{ "456": { "dfa": { "log": { "train_loss": [ 1.9882178707504272, 1.9317677951049805, 1.9248256610870362, 1.9217043152618407, 1.917682172279358, 1.919907767944336, 1.9130486701202392, 1.9160204236221314, 1.9124437686920166, 1.910402643661499, 1.9057467068099976, 1.9108504774475097, 1.9094166152572631, 1.9078613684844972, 1.9042421661376954, 1.9058136753082275, 1.9045625204467773, 1.9027692486953736, 1.901805549621582, 1.9018799402236939, 1.8972413793945313, 1.8987792961883545, 1.897886729812622, 1.894330054550171, 1.8959275032806397, 1.8961896368408202, 1.8971351692962646, 1.8951918316650391, 1.89493817653656, 1.8952873357391358 ], "train_acc": [ 0.2782, 0.30628, 0.31294, 0.31586, 0.31784, 0.31944, 0.32352, 0.32056, 0.32366, 0.32092, 0.32656, 0.3254, 0.3272, 0.3234, 0.32672, 0.32684, 0.33252, 0.3313, 0.32936, 0.33102, 0.3319, 0.33272, 0.33364, 0.3357, 0.334, 0.3336, 0.33476, 0.33474, 0.3348, 0.33498 ], "test_acc": [ 0.3368, 0.3538, 0.3441, 0.3634, 0.343, 0.3574, 0.341, 0.3403, 0.3594, 0.3606, 0.3628, 0.3674, 0.3684, 0.3558, 0.3637, 0.3632, 0.3649, 0.3565, 0.3631, 0.3629, 0.3546, 0.3629, 0.3594, 0.3629, 0.3619, 0.3609, 0.3623, 0.3609, 0.3607, 0.3614 ] }, "diagnostics": { "bp_cosine": [ 0.35106080770492554, 0.12957611680030823, 0.12942053377628326, 0.15940426290035248 ], "perturbation_rho": [ 0.17636069655418396, 0.08117785304784775, 0.0472814217209816, 0.11043912172317505 ], "nudging": { "0.001": [ -1.4475401258096099e-05, -4.396220901980996e-06, -4.46141348220408e-06, -5.974114174023271e-06 ], "0.003": [ -4.3450010707601905e-05, -1.3241806300356984e-05, -1.3367069186642766e-05, -1.7999671399593353e-05 ], "0.01": [ -0.00014482333790510893, -4.394981078803539e-05, -4.4448592234402895e-05, -5.99866034463048e-05 ] }, "hidden_norms_per_layer": [ 12229.544921875, 12266.8916015625, 12257.5791015625, 12255.2705078125, 12254.3017578125 ], "bp_grad_norms_per_layer": [ 1.334045191470068e-05, 1.2721701750706416e-05, 1.251421963388566e-05, 1.2780437828041613e-05, 1.2897891792817973e-05 ] }, "drift": { "embed.weight": 96.08641084786761, "embed.bias": 127.15202633956196, "blocks.0.ln.weight": 0.3495213985443115, "blocks.0.w1.weight": 3.6819779928624996, "blocks.0.w1.bias": 6.228241220813173, "blocks.0.w2.weight": 17.84445507605505, "blocks.1.ln.weight": 0.34699368476867676, "blocks.1.w1.weight": 3.6269651247865355, "blocks.1.w1.bias": 6.23888543559435, "blocks.1.w2.weight": 17.036835876989706, "blocks.2.ln.weight": 0.3590497076511383, "blocks.2.w1.weight": 3.638377800312274, "blocks.2.w1.bias": 6.075326782548754, "blocks.2.w2.weight": 17.82232398341993, "blocks.3.ln.weight": 0.35772791504859924, "blocks.3.w1.weight": 3.76772753341028, "blocks.3.w1.bias": 6.390494916053956, "blocks.3.w2.weight": 18.92297820750716, "out_ln.weight": 0.18465575575828552, "out_head.weight": 2.515055641320841, "out_head.bias": 1.320315434071626 } } }, "config": { "dataset": "cifar10", "d_hidden": 256, "num_blocks": 4, "batch_size": 128, "epochs": 30, "lr": 0.001, "lr_fb": 0.001, "wd": 0.01, "lam": 0.1, "K": 4, "sigma_bridge": 0.05, "ema_momentum": 0.995, "term_grad_weight": 1.0, "seeds": [ 456 ], "gpu": 0, "output_dir": "results/round41_dfa_penalty_30ep_s456", "methods": [ "dfa" ], "random_targets": false, "penalty_lam": 0.01, "num_classes": 10 } }