diff options
Diffstat (limited to 'results/optionSBCB_smoke/results_cifar10.json')
| -rw-r--r-- | results/optionSBCB_smoke/results_cifar10.json | 220 |
1 files changed, 220 insertions, 0 deletions
diff --git a/results/optionSBCB_smoke/results_cifar10.json b/results/optionSBCB_smoke/results_cifar10.json new file mode 100644 index 0000000..c2ab728 --- /dev/null +++ b/results/optionSBCB_smoke/results_cifar10.json @@ -0,0 +1,220 @@ +{ + "42": { + "state_bridge": { + "log": { + "train_loss": [ + 2.32298729019165, + 2.3135665228271485, + 2.3065665995025633 + ], + "train_acc": [ + 0.09906, + 0.09972, + 0.09984 + ], + "test_acc": [ + 0.1088, + 0.0958, + 0.1037 + ], + "state_pred_error": [ + 0.13290744504570962, + 0.04644830721378326, + 0.02964444874405861 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.02890743687748909, + 0.029086019843816757, + 0.02471546083688736, + 0.02299082651734352 + ], + "perturbation_rho": [ + 0.06398507952690125, + 0.03261129930615425, + -0.0327291414141655, + 0.03132244944572449 + ], + "nudging": { + "0.001": [ + -8.400529623031616e-07, + -5.029141902923584e-07, + -2.8870999813079834e-07, + -1.3969838619232178e-07 + ], + "0.003": [ + -2.5480985641479492e-06, + -1.559033989906311e-06, + -7.82310962677002e-07, + -4.7497451305389404e-07 + ], + "0.01": [ + -8.374452590942383e-06, + -5.166977643966675e-06, + -2.5797635316848755e-06, + -1.5459954738616943e-06 + ] + }, + "hidden_norms_per_layer": [ + 382.91375732421875, + 1350.580810546875, + 2999.959716796875, + 4744.67626953125, + 6224.6923828125 + ], + "bp_grad_norms_per_layer": [ + 1.2822482858609874e-05, + 1.0445839507156052e-05, + 1.0443974133522715e-05, + 1.04420678326278e-05, + 1.0443762221257202e-05 + ] + }, + "drift": { + "embed.weight": 3.211158219942791, + "embed.bias": 7.062613177208634, + "blocks.0.ln.weight": 0.21760645508766174, + "blocks.0.w1.weight": 2.8678529976663016, + "blocks.0.w1.bias": 3.956961082186717, + "blocks.0.w2.weight": 9.394435771614729, + "blocks.1.ln.weight": 0.18258990347385406, + "blocks.1.w1.weight": 2.21922766779391, + "blocks.1.w1.bias": 2.8283530013194156, + "blocks.1.w2.weight": 7.529057036338004, + "blocks.2.ln.weight": 0.1940218061208725, + "blocks.2.w1.weight": 2.6529854316910684, + "blocks.2.w1.bias": 3.025569884204286, + "blocks.2.w2.weight": 8.47631449731586, + "blocks.3.ln.weight": 0.1730765700340271, + "blocks.3.w1.weight": 2.41631876651548, + "blocks.3.w1.bias": 2.7588911555760287, + "blocks.3.w2.weight": 7.962227392442747, + "out_ln.weight": 0.03852882981300354, + "out_head.weight": 0.2597139888044846, + "out_head.bias": 0.22334529177207701 + } + }, + "credit_bridge": { + "log": { + "train_loss": [ + 2.320697575531006, + 2.3160406340026856, + 2.307044482879639 + ], + "train_acc": [ + 0.09946, + 0.09836, + 0.10142 + ], + "test_acc": [ + 0.0998, + 0.1055, + 0.0925 + ], + "value_loss": [ + 0.36024896956682206, + 0.6458554600691795, + 0.018822102856636047 + ] + }, + "diagnostics": { + "bp_cosine": [ + -0.0007630798500031233, + -0.009494196623563766, + -0.009180156514048576, + -0.007028202060610056 + ], + "perturbation_rho": [ + -0.014483902603387833, + 0.01178690791130066, + -0.011402066797018051, + 0.02756977453827858 + ], + "nudging": { + "0.001": [ + -4.284083843231201e-08, + 6.146728992462158e-08, + 4.842877388000488e-08, + 3.91155481338501e-08 + ], + "0.003": [ + -1.4528632164001465e-07, + 1.5459954738616943e-07, + 1.471489667892456e-07, + 6.891787052154541e-08 + ], + "0.01": [ + -4.637986421585083e-07, + 6.239861249923706e-07, + 4.6193599700927734e-07, + 2.998858690261841e-07 + ] + }, + "hidden_norms_per_layer": [ + 1157.2740478515625, + 10477.9150390625, + 16051.2392578125, + 19347.453125, + 19974.22265625 + ], + "bp_grad_norms_per_layer": [ + 4.064333097630879e-06, + 3.1959411899151746e-06, + 3.1822878554521594e-06, + 3.1774463877809467e-06, + 3.1793581456440734e-06 + ] + }, + "drift": { + "embed.weight": 7.825915877663139, + "embed.bias": 10.392833847164916, + "blocks.0.ln.weight": 0.32047849893569946, + "blocks.0.w1.weight": 6.762743850808533, + "blocks.0.w1.bias": 8.151214078637885, + "blocks.0.w2.weight": 17.774887825977164, + "blocks.1.ln.weight": 0.28780925273895264, + "blocks.1.w1.weight": 6.646412153864217, + "blocks.1.w1.bias": 7.28996940865764, + "blocks.1.w2.weight": 16.59156331214822, + "blocks.2.ln.weight": 0.30320411920547485, + "blocks.2.w1.weight": 6.002501250935489, + "blocks.2.w1.bias": 5.318388908944272, + "blocks.2.w2.weight": 16.00213789556674, + "blocks.3.ln.weight": 0.24688510596752167, + "blocks.3.w1.weight": 3.6772500170447437, + "blocks.3.w1.bias": 4.008142009024597, + "blocks.3.w2.weight": 13.91740354386689, + "out_ln.weight": 0.03830864652991295, + "out_head.weight": 0.3261551698718769, + "out_head.bias": 0.2861540512185052 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 4, + "batch_size": 128, + "epochs": 3, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 42 + ], + "gpu": 0, + "output_dir": "results/optionSBCB_smoke", + "methods": [ + "state_bridge", + "credit_bridge" + ], + "random_targets": true, + "num_classes": 10 + } +}
\ No newline at end of file |
