{ "42": { "fa": { "log": { "train_loss": [ 2.0345889838027955, 1.9560137537384032, 1.9360403113555908, 1.9143632999420166, 1.8943112557601929, 1.883546539993286, 1.87789137008667, 1.8733353225708007, 1.8758181957626343, 1.8700244534301758, 1.8686021829605102, 1.8640803202056884, 1.8597132386779784, 1.8604257183837891, 1.860362275390625, 1.8570359252166748, 1.8561881994247436, 1.8504637549209595, 1.8508178236389161, 1.8495385042572021, 1.8523688412475585, 1.8491829050445556, 1.849259608154297, 1.8475904892730712, 1.846731012802124, 1.8426731698989869, 1.8409621490097046, 1.8425804473876952, 1.8410273541259765, 1.8429513037109375 ], "train_acc": [ 0.25574, 0.29268, 0.30346, 0.31094, 0.31784, 0.3242, 0.32714, 0.3321, 0.33104, 0.3346, 0.33454, 0.33844, 0.33788, 0.33896, 0.3398, 0.34038, 0.3428, 0.34448, 0.34504, 0.34356, 0.34638, 0.34678, 0.3455, 0.34862, 0.34636, 0.35102, 0.34858, 0.35316, 0.3505, 0.3521 ], "test_acc": [ 0.2909, 0.3244, 0.3269, 0.3335, 0.3455, 0.3577, 0.3581, 0.3473, 0.359, 0.3635, 0.3513, 0.3583, 0.3735, 0.3642, 0.3646, 0.364, 0.3653, 0.3734, 0.3717, 0.3682, 0.3792, 0.3722, 0.3728, 0.3747, 0.3749, 0.3751, 0.3748, 0.375, 0.3766, 0.3759 ] }, "diagnostics": { "bp_cosine": [ 0.03903631120920181, 0.014399020932614803, -0.033061157912015915, 0.8762983083724976 ], "perturbation_rho": [ 0.04046167433261871, 0.029874827712774277, 0.018399983644485474, 0.5844764113426208 ], "nudging": { "0.001": [ -3.4736585803329945e-06, -1.6225967556238174e-06, 8.612405508756638e-07, -4.002035711891949e-05 ], "0.003": [ -1.0443473001942039e-05, -4.940724465996027e-06, 2.589426003396511e-06, -0.00012009820784442127 ], "0.01": [ -3.477800055406988e-05, -1.6514735762029886e-05, 8.722592610865831e-06, -0.00040041320607997477 ] }, "hidden_norms_per_layer": [ 9302.052734375, 9300.826171875, 9304.12890625, 9388.5966796875, 9324.71484375 ], "bp_grad_norms_per_layer": [ 1.8842416466213763e-05, 1.7771730199456215e-05, 1.7083899365388788e-05, 1.6848869563546032e-05, 1.1624123544606846e-05 ] }, "drift": { "embed.weight": 59.712949390275256, "embed.bias": 109.7242290933353, "blocks.0.ln.weight": 0.4963775873184204, "blocks.0.w1.weight": 6.181720741000805, "blocks.0.w1.bias": 4.020397499075911, "blocks.0.w2.weight": 27.582674728139626, "blocks.1.ln.weight": 0.5040290951728821, "blocks.1.w1.weight": 6.5836738314372525, "blocks.1.w1.bias": 3.6793793299266104, "blocks.1.w2.weight": 30.666596548839138, "blocks.2.ln.weight": 0.5469347834587097, "blocks.2.w1.weight": 6.732229120538098, "blocks.2.w1.bias": 3.7361472645162905, "blocks.2.w2.weight": 32.57792259935943, "blocks.3.ln.weight": 0.5613139867782593, "blocks.3.w1.weight": 6.581817915516039, "blocks.3.w1.bias": 3.910112736502136, "blocks.3.w2.weight": 30.460380132293686, "out_ln.weight": 0.13369733095169067, "out_head.weight": 2.149734268892375, "out_head.bias": 1.8324297050244958 } } }, "123": { "fa": { "log": { "train_loss": [ 2.02592181602478, 1.9371592224121095, 1.8974695124053955, 1.8743731897735596, 1.86856899684906, 1.866699522743225, 1.8631246627807616, 1.8699150874328614, 1.8676455249404906, 1.8666385611724854, 1.8668112030792237, 1.8644982721710206, 1.8628531475448609, 1.8602032104492188, 1.859476708946228, 1.8544428533554078, 1.8534636458969116, 1.8551011206054688, 1.854274546432495, 1.850538496170044, 1.8506593602752686, 1.849616947631836, 1.8474625555419921, 1.8434547743988037, 1.8423766018676757, 1.8429830319976808, 1.8445982418823241, 1.842038772354126, 1.8427192529296874, 1.8390978100585937 ], "train_acc": [ 0.26002, 0.30074, 0.31632, 0.32608, 0.33058, 0.3327, 0.33324, 0.3331, 0.33306, 0.3358, 0.33708, 0.33474, 0.33836, 0.33936, 0.34254, 0.34292, 0.34406, 0.34572, 0.34532, 0.34724, 0.34686, 0.35018, 0.34834, 0.35062, 0.35238, 0.35146, 0.35322, 0.3507, 0.35174, 0.35298 ], "test_acc": [ 0.3099, 0.3371, 0.35, 0.3548, 0.3568, 0.3543, 0.3595, 0.3565, 0.3605, 0.3532, 0.3572, 0.3574, 0.3587, 0.3635, 0.3598, 0.3716, 0.3664, 0.3641, 0.3588, 0.3685, 0.3704, 0.3738, 0.3678, 0.3716, 0.3726, 0.3706, 0.3715, 0.3711, 0.3718, 0.3725 ] }, "diagnostics": { "bp_cosine": [ 0.12851236760616302, 0.0212344229221344, 0.06850548088550568, 0.912018895149231 ], "perturbation_rho": [ 0.1286340206861496, 0.05542958900332451, 0.095004141330719, 0.5912511944770813 ], "nudging": { "0.001": [ -1.2503878679126501e-05, -2.949964255094528e-06, -4.535817424766719e-06, -4.0970538975670934e-05 ], "0.003": [ -3.755756188184023e-05, -8.84209293872118e-06, -1.3517230399884284e-05, -0.00012284089461900294 ], "0.01": [ -0.00012522244651336223, -2.9436778277158737e-05, -4.5000226236879826e-05, -0.0004094060859642923 ] }, "hidden_norms_per_layer": [ 8440.2509765625, 8675.947265625, 8919.455078125, 9151.62890625, 9001.966796875 ], "bp_grad_norms_per_layer": [ 2.2785976398154162e-05, 1.9475846784189343e-05, 1.76876437762985e-05, 1.6414065612480044e-05, 1.2445364518498536e-05 ] }, "drift": { "embed.weight": 58.570146595416965, "embed.bias": 129.66757192630527, "blocks.0.ln.weight": 0.506428599357605, "blocks.0.w1.weight": 6.100287737701914, "blocks.0.w1.bias": 4.352361447004298, "blocks.0.w2.weight": 29.053039362183725, "blocks.1.ln.weight": 0.48886165022850037, "blocks.1.w1.weight": 6.289397911525807, "blocks.1.w1.bias": 4.491060429115057, "blocks.1.w2.weight": 31.966204529299326, "blocks.2.ln.weight": 0.47751307487487793, "blocks.2.w1.weight": 6.165341672963252, "blocks.2.w1.bias": 4.204746402092476, "blocks.2.w2.weight": 31.785153220803842, "blocks.3.ln.weight": 0.522526741027832, "blocks.3.w1.weight": 6.2976831221121925, "blocks.3.w1.bias": 3.31165977931123, "blocks.3.w2.weight": 29.399820039375125, "out_ln.weight": 0.12725675106048584, "out_head.weight": 2.171575181004019, "out_head.bias": 1.7024717076770008 } } }, "456": { "fa": { "log": { "train_loss": [ 2.0370823514556884, 1.9467987035751342, 1.912523896484375, 1.8945044506072999, 1.881802406349182, 1.8781717306137085, 1.8688158060073852, 1.8661776677703859, 1.8603027975845337, 1.8572819149017334, 1.8532151040267943, 1.8534327938461304, 1.8522534008407592, 1.8507964714050293, 1.8465819549179077, 1.8432143920135498, 1.8410576669311522, 1.8371334344863892, 1.8372844079208375, 1.8335014197540282, 1.8299405459213256, 1.8304111400985719, 1.8290630575561524, 1.8247105368041991, 1.8245959228515625, 1.825667812576294, 1.8260319690322877, 1.8243235848999024, 1.8212057236480712, 1.8206781775283813 ], "train_acc": [ 0.25624, 0.29612, 0.3112, 0.31836, 0.32406, 0.32822, 0.33338, 0.3358, 0.3376, 0.33728, 0.34174, 0.34368, 0.34628, 0.34296, 0.34426, 0.34552, 0.34848, 0.35006, 0.34902, 0.34998, 0.35384, 0.35252, 0.35216, 0.35542, 0.35508, 0.35538, 0.35446, 0.35524, 0.3556, 0.35744 ], "test_acc": [ 0.2999, 0.3434, 0.3434, 0.364, 0.3546, 0.3612, 0.3581, 0.3492, 0.367, 0.3567, 0.3655, 0.3723, 0.3761, 0.3771, 0.3778, 0.3763, 0.3825, 0.3744, 0.3812, 0.3831, 0.3751, 0.3821, 0.3809, 0.3833, 0.3812, 0.3832, 0.3835, 0.3836, 0.3841, 0.3837 ] }, "diagnostics": { "bp_cosine": [ 0.11049995571374893, 0.014905610121786594, -0.0519559383392334, 0.861151933670044 ], "perturbation_rho": [ 0.058121487498283386, 0.011935144662857056, -0.056115295737981796, 0.5436498522758484 ], "nudging": { "0.001": [ -7.875027222326025e-06, -1.205597072839737e-06, 2.052285708487034e-06, -3.838281554635614e-05 ], "0.003": [ -2.359108839300461e-05, -3.5760422179009765e-06, 6.082700565457344e-06, -0.00011519025429151952 ], "0.01": [ -7.864644430810586e-05, -1.1975058441748843e-05, 2.0268842490622774e-05, -0.000383934035198763 ] }, "hidden_norms_per_layer": [ 8074.6318359375, 8344.6298828125, 8543.16796875, 8806.9365234375, 8809.208984375 ], "bp_grad_norms_per_layer": [ 2.0788113033631817e-05, 1.6527425032109022e-05, 1.6617635992588475e-05, 1.6509349734405987e-05, 1.1822515261883382e-05 ] }, "drift": { "embed.weight": 55.0795347886669, "embed.bias": 104.1791057670674, "blocks.0.ln.weight": 0.5131628513336182, "blocks.0.w1.weight": 6.423288157104268, "blocks.0.w1.bias": 5.260214874942604, "blocks.0.w2.weight": 28.84901365790228, "blocks.1.ln.weight": 0.5011720657348633, "blocks.1.w1.weight": 6.239148515891604, "blocks.1.w1.bias": 3.694106675391347, "blocks.1.w2.weight": 28.607867363928534, "blocks.2.ln.weight": 0.46569541096687317, "blocks.2.w1.weight": 6.112045116014977, "blocks.2.w1.bias": 4.730623150261222, "blocks.2.w2.weight": 28.99578369272475, "blocks.3.ln.weight": 0.5072412490844727, "blocks.3.w1.weight": 6.376723566598171, "blocks.3.w1.bias": 4.743548803408704, "blocks.3.w2.weight": 30.777217385288502, "out_ln.weight": 0.1257992684841156, "out_head.weight": 2.0103689615464178, "out_head.bias": 1.8804179129019218 } } }, "config": { "dataset": "cifar10", "d_hidden": 256, "num_blocks": 4, "batch_size": 128, "epochs": 30, "lr": 0.001, "lr_fb": 0.001, "wd": 0.01, "lam": 0.1, "K": 4, "sigma_bridge": 0.05, "ema_momentum": 0.995, "term_grad_weight": 1.0, "seeds": [ 42, 123, 456 ], "gpu": 0, "output_dir": "results/fa_canonical_lam1e-4_30ep", "methods": [ "fa" ], "random_targets": false, "penalty_lam": 0.0001, "num_classes": 10 } }