diff options
Diffstat (limited to 'results/fa_canonical_lam1e-4_30ep/results_cifar10.json')
| -rw-r--r-- | results/fa_canonical_lam1e-4_30ep/results_cifar10.json | 549 |
1 files changed, 549 insertions, 0 deletions
diff --git a/results/fa_canonical_lam1e-4_30ep/results_cifar10.json b/results/fa_canonical_lam1e-4_30ep/results_cifar10.json new file mode 100644 index 0000000..d04a393 --- /dev/null +++ b/results/fa_canonical_lam1e-4_30ep/results_cifar10.json @@ -0,0 +1,549 @@ +{ + "42": { + "fa": { + "log": { + "train_loss": [ + 2.0345889838027955, + 1.9560137537384032, + 1.9360403113555908, + 1.9143632999420166, + 1.8943112557601929, + 1.883546539993286, + 1.87789137008667, + 1.8733353225708007, + 1.8758181957626343, + 1.8700244534301758, + 1.8686021829605102, + 1.8640803202056884, + 1.8597132386779784, + 1.8604257183837891, + 1.860362275390625, + 1.8570359252166748, + 1.8561881994247436, + 1.8504637549209595, + 1.8508178236389161, + 1.8495385042572021, + 1.8523688412475585, + 1.8491829050445556, + 1.849259608154297, + 1.8475904892730712, + 1.846731012802124, + 1.8426731698989869, + 1.8409621490097046, + 1.8425804473876952, + 1.8410273541259765, + 1.8429513037109375 + ], + "train_acc": [ + 0.25574, + 0.29268, + 0.30346, + 0.31094, + 0.31784, + 0.3242, + 0.32714, + 0.3321, + 0.33104, + 0.3346, + 0.33454, + 0.33844, + 0.33788, + 0.33896, + 0.3398, + 0.34038, + 0.3428, + 0.34448, + 0.34504, + 0.34356, + 0.34638, + 0.34678, + 0.3455, + 0.34862, + 0.34636, + 0.35102, + 0.34858, + 0.35316, + 0.3505, + 0.3521 + ], + "test_acc": [ + 0.2909, + 0.3244, + 0.3269, + 0.3335, + 0.3455, + 0.3577, + 0.3581, + 0.3473, + 0.359, + 0.3635, + 0.3513, + 0.3583, + 0.3735, + 0.3642, + 0.3646, + 0.364, + 0.3653, + 0.3734, + 0.3717, + 0.3682, + 0.3792, + 0.3722, + 0.3728, + 0.3747, + 0.3749, + 0.3751, + 0.3748, + 0.375, + 0.3766, + 0.3759 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.03903631120920181, + 0.014399020932614803, + -0.033061157912015915, + 0.8762983083724976 + ], + "perturbation_rho": [ + 0.04046167433261871, + 0.029874827712774277, + 0.018399983644485474, + 0.5844764113426208 + ], + "nudging": { + "0.001": [ + -3.4736585803329945e-06, + -1.6225967556238174e-06, + 8.612405508756638e-07, + -4.002035711891949e-05 + ], + "0.003": [ + -1.0443473001942039e-05, + -4.940724465996027e-06, + 2.589426003396511e-06, + -0.00012009820784442127 + ], + "0.01": [ + -3.477800055406988e-05, + -1.6514735762029886e-05, + 8.722592610865831e-06, + -0.00040041320607997477 + ] + }, + "hidden_norms_per_layer": [ + 9302.052734375, + 9300.826171875, + 9304.12890625, + 9388.5966796875, + 9324.71484375 + ], + "bp_grad_norms_per_layer": [ + 1.8842416466213763e-05, + 1.7771730199456215e-05, + 1.7083899365388788e-05, + 1.6848869563546032e-05, + 1.1624123544606846e-05 + ] + }, + "drift": { + "embed.weight": 59.712949390275256, + "embed.bias": 109.7242290933353, + "blocks.0.ln.weight": 0.4963775873184204, + "blocks.0.w1.weight": 6.181720741000805, + "blocks.0.w1.bias": 4.020397499075911, + "blocks.0.w2.weight": 27.582674728139626, + "blocks.1.ln.weight": 0.5040290951728821, + "blocks.1.w1.weight": 6.5836738314372525, + "blocks.1.w1.bias": 3.6793793299266104, + "blocks.1.w2.weight": 30.666596548839138, + "blocks.2.ln.weight": 0.5469347834587097, + "blocks.2.w1.weight": 6.732229120538098, + "blocks.2.w1.bias": 3.7361472645162905, + "blocks.2.w2.weight": 32.57792259935943, + "blocks.3.ln.weight": 0.5613139867782593, + "blocks.3.w1.weight": 6.581817915516039, + "blocks.3.w1.bias": 3.910112736502136, + "blocks.3.w2.weight": 30.460380132293686, + "out_ln.weight": 0.13369733095169067, + "out_head.weight": 2.149734268892375, + "out_head.bias": 1.8324297050244958 + } + } + }, + "123": { + "fa": { + "log": { + "train_loss": [ + 2.02592181602478, + 1.9371592224121095, + 1.8974695124053955, + 1.8743731897735596, + 1.86856899684906, + 1.866699522743225, + 1.8631246627807616, + 1.8699150874328614, + 1.8676455249404906, + 1.8666385611724854, + 1.8668112030792237, + 1.8644982721710206, + 1.8628531475448609, + 1.8602032104492188, + 1.859476708946228, + 1.8544428533554078, + 1.8534636458969116, + 1.8551011206054688, + 1.854274546432495, + 1.850538496170044, + 1.8506593602752686, + 1.849616947631836, + 1.8474625555419921, + 1.8434547743988037, + 1.8423766018676757, + 1.8429830319976808, + 1.8445982418823241, + 1.842038772354126, + 1.8427192529296874, + 1.8390978100585937 + ], + "train_acc": [ + 0.26002, + 0.30074, + 0.31632, + 0.32608, + 0.33058, + 0.3327, + 0.33324, + 0.3331, + 0.33306, + 0.3358, + 0.33708, + 0.33474, + 0.33836, + 0.33936, + 0.34254, + 0.34292, + 0.34406, + 0.34572, + 0.34532, + 0.34724, + 0.34686, + 0.35018, + 0.34834, + 0.35062, + 0.35238, + 0.35146, + 0.35322, + 0.3507, + 0.35174, + 0.35298 + ], + "test_acc": [ + 0.3099, + 0.3371, + 0.35, + 0.3548, + 0.3568, + 0.3543, + 0.3595, + 0.3565, + 0.3605, + 0.3532, + 0.3572, + 0.3574, + 0.3587, + 0.3635, + 0.3598, + 0.3716, + 0.3664, + 0.3641, + 0.3588, + 0.3685, + 0.3704, + 0.3738, + 0.3678, + 0.3716, + 0.3726, + 0.3706, + 0.3715, + 0.3711, + 0.3718, + 0.3725 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.12851236760616302, + 0.0212344229221344, + 0.06850548088550568, + 0.912018895149231 + ], + "perturbation_rho": [ + 0.1286340206861496, + 0.05542958900332451, + 0.095004141330719, + 0.5912511944770813 + ], + "nudging": { + "0.001": [ + -1.2503878679126501e-05, + -2.949964255094528e-06, + -4.535817424766719e-06, + -4.0970538975670934e-05 + ], + "0.003": [ + -3.755756188184023e-05, + -8.84209293872118e-06, + -1.3517230399884284e-05, + -0.00012284089461900294 + ], + "0.01": [ + -0.00012522244651336223, + -2.9436778277158737e-05, + -4.5000226236879826e-05, + -0.0004094060859642923 + ] + }, + "hidden_norms_per_layer": [ + 8440.2509765625, + 8675.947265625, + 8919.455078125, + 9151.62890625, + 9001.966796875 + ], + "bp_grad_norms_per_layer": [ + 2.2785976398154162e-05, + 1.9475846784189343e-05, + 1.76876437762985e-05, + 1.6414065612480044e-05, + 1.2445364518498536e-05 + ] + }, + "drift": { + "embed.weight": 58.570146595416965, + "embed.bias": 129.66757192630527, + "blocks.0.ln.weight": 0.506428599357605, + "blocks.0.w1.weight": 6.100287737701914, + "blocks.0.w1.bias": 4.352361447004298, + "blocks.0.w2.weight": 29.053039362183725, + "blocks.1.ln.weight": 0.48886165022850037, + "blocks.1.w1.weight": 6.289397911525807, + "blocks.1.w1.bias": 4.491060429115057, + "blocks.1.w2.weight": 31.966204529299326, + "blocks.2.ln.weight": 0.47751307487487793, + "blocks.2.w1.weight": 6.165341672963252, + "blocks.2.w1.bias": 4.204746402092476, + "blocks.2.w2.weight": 31.785153220803842, + "blocks.3.ln.weight": 0.522526741027832, + "blocks.3.w1.weight": 6.2976831221121925, + "blocks.3.w1.bias": 3.31165977931123, + "blocks.3.w2.weight": 29.399820039375125, + "out_ln.weight": 0.12725675106048584, + "out_head.weight": 2.171575181004019, + "out_head.bias": 1.7024717076770008 + } + } + }, + "456": { + "fa": { + "log": { + "train_loss": [ + 2.0370823514556884, + 1.9467987035751342, + 1.912523896484375, + 1.8945044506072999, + 1.881802406349182, + 1.8781717306137085, + 1.8688158060073852, + 1.8661776677703859, + 1.8603027975845337, + 1.8572819149017334, + 1.8532151040267943, + 1.8534327938461304, + 1.8522534008407592, + 1.8507964714050293, + 1.8465819549179077, + 1.8432143920135498, + 1.8410576669311522, + 1.8371334344863892, + 1.8372844079208375, + 1.8335014197540282, + 1.8299405459213256, + 1.8304111400985719, + 1.8290630575561524, + 1.8247105368041991, + 1.8245959228515625, + 1.825667812576294, + 1.8260319690322877, + 1.8243235848999024, + 1.8212057236480712, + 1.8206781775283813 + ], + "train_acc": [ + 0.25624, + 0.29612, + 0.3112, + 0.31836, + 0.32406, + 0.32822, + 0.33338, + 0.3358, + 0.3376, + 0.33728, + 0.34174, + 0.34368, + 0.34628, + 0.34296, + 0.34426, + 0.34552, + 0.34848, + 0.35006, + 0.34902, + 0.34998, + 0.35384, + 0.35252, + 0.35216, + 0.35542, + 0.35508, + 0.35538, + 0.35446, + 0.35524, + 0.3556, + 0.35744 + ], + "test_acc": [ + 0.2999, + 0.3434, + 0.3434, + 0.364, + 0.3546, + 0.3612, + 0.3581, + 0.3492, + 0.367, + 0.3567, + 0.3655, + 0.3723, + 0.3761, + 0.3771, + 0.3778, + 0.3763, + 0.3825, + 0.3744, + 0.3812, + 0.3831, + 0.3751, + 0.3821, + 0.3809, + 0.3833, + 0.3812, + 0.3832, + 0.3835, + 0.3836, + 0.3841, + 0.3837 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.11049995571374893, + 0.014905610121786594, + -0.0519559383392334, + 0.861151933670044 + ], + "perturbation_rho": [ + 0.058121487498283386, + 0.011935144662857056, + -0.056115295737981796, + 0.5436498522758484 + ], + "nudging": { + "0.001": [ + -7.875027222326025e-06, + -1.205597072839737e-06, + 2.052285708487034e-06, + -3.838281554635614e-05 + ], + "0.003": [ + -2.359108839300461e-05, + -3.5760422179009765e-06, + 6.082700565457344e-06, + -0.00011519025429151952 + ], + "0.01": [ + -7.864644430810586e-05, + -1.1975058441748843e-05, + 2.0268842490622774e-05, + -0.000383934035198763 + ] + }, + "hidden_norms_per_layer": [ + 8074.6318359375, + 8344.6298828125, + 8543.16796875, + 8806.9365234375, + 8809.208984375 + ], + "bp_grad_norms_per_layer": [ + 2.0788113033631817e-05, + 1.6527425032109022e-05, + 1.6617635992588475e-05, + 1.6509349734405987e-05, + 1.1822515261883382e-05 + ] + }, + "drift": { + "embed.weight": 55.0795347886669, + "embed.bias": 104.1791057670674, + "blocks.0.ln.weight": 0.5131628513336182, + "blocks.0.w1.weight": 6.423288157104268, + "blocks.0.w1.bias": 5.260214874942604, + "blocks.0.w2.weight": 28.84901365790228, + "blocks.1.ln.weight": 0.5011720657348633, + "blocks.1.w1.weight": 6.239148515891604, + "blocks.1.w1.bias": 3.694106675391347, + "blocks.1.w2.weight": 28.607867363928534, + "blocks.2.ln.weight": 0.46569541096687317, + "blocks.2.w1.weight": 6.112045116014977, + "blocks.2.w1.bias": 4.730623150261222, + "blocks.2.w2.weight": 28.99578369272475, + "blocks.3.ln.weight": 0.5072412490844727, + "blocks.3.w1.weight": 6.376723566598171, + "blocks.3.w1.bias": 4.743548803408704, + "blocks.3.w2.weight": 30.777217385288502, + "out_ln.weight": 0.1257992684841156, + "out_head.weight": 2.0103689615464178, + "out_head.bias": 1.8804179129019218 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 4, + "batch_size": 128, + "epochs": 30, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 42, + 123, + 456 + ], + "gpu": 0, + "output_dir": "results/fa_canonical_lam1e-4_30ep", + "methods": [ + "fa" + ], + "random_targets": false, + "penalty_lam": 0.0001, + "num_classes": 10 + } +}
\ No newline at end of file |
