diff options
Diffstat (limited to 'results/fa_no_penalty_30ep/results_cifar10.json')
| -rw-r--r-- | results/fa_no_penalty_30ep/results_cifar10.json | 549 |
1 files changed, 549 insertions, 0 deletions
diff --git a/results/fa_no_penalty_30ep/results_cifar10.json b/results/fa_no_penalty_30ep/results_cifar10.json new file mode 100644 index 0000000..b555be4 --- /dev/null +++ b/results/fa_no_penalty_30ep/results_cifar10.json @@ -0,0 +1,549 @@ +{ + "42": { + "fa": { + "log": { + "train_loss": [ + 2.049124941177368, + 1.971962617225647, + 1.948333747253418, + 1.93432448513031, + 1.9224566204071045, + 1.911075694732666, + 1.9016322410583497, + 1.891839779663086, + 1.888196748046875, + 1.8842478870391846, + 1.8793783734512328, + 1.873243000831604, + 1.8691495156478881, + 1.8672306230163573, + 1.8655509171295166, + 1.8641471955108642, + 1.8627385038757325, + 1.8566293866348267, + 1.8572382849121094, + 1.854542728805542, + 1.853432930831909, + 1.853121408996582, + 1.8526418188476563, + 1.850091189842224, + 1.8493881246566772, + 1.8482141668319703, + 1.8446919596099853, + 1.8451624103164672, + 1.8452935827255248, + 1.842558771057129 + ], + "train_acc": [ + 0.2436, + 0.27344, + 0.28986, + 0.29662, + 0.30162, + 0.30408, + 0.31004, + 0.31504, + 0.31764, + 0.31756, + 0.32138, + 0.32436, + 0.32408, + 0.32572, + 0.33018, + 0.32902, + 0.3305, + 0.33264, + 0.33294, + 0.3324, + 0.3352, + 0.33392, + 0.33506, + 0.33476, + 0.33738, + 0.33864, + 0.3368, + 0.3421, + 0.34022, + 0.34144 + ], + "test_acc": [ + 0.2789, + 0.3106, + 0.3051, + 0.3149, + 0.3232, + 0.3345, + 0.3394, + 0.3473, + 0.3416, + 0.3427, + 0.3494, + 0.3508, + 0.3543, + 0.3612, + 0.3602, + 0.3571, + 0.3605, + 0.3616, + 0.3652, + 0.363, + 0.3649, + 0.3599, + 0.3637, + 0.3606, + 0.3631, + 0.3672, + 0.3653, + 0.3664, + 0.3659, + 0.3655 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.039796918630599976, + -0.0086748618632555, + -0.05908923223614693, + 0.9973034262657166 + ], + "perturbation_rho": [ + 0.014420752413570881, + 0.010810372419655323, + -0.0031975782476365566, + 0.018547791987657547 + ], + "nudging": { + "0.001": [ + -1.748558133840561e-06, + 4.516914486885071e-08, + 1.9744038581848145e-07, + -3.418419510126114e-06 + ], + "0.003": [ + -5.089212208986282e-06, + -9.313225746154785e-09, + 6.165355443954468e-07, + -1.0619405657052994e-05 + ], + "0.01": [ + -1.7355196177959442e-05, + -1.9045546650886536e-07, + 2.042856067419052e-06, + -3.545219078660011e-05 + ] + }, + "hidden_norms_per_layer": [ + 3135.737060546875, + 27729.080078125, + 142235.453125, + 221539.90625, + 98576.6796875 + ], + "bp_grad_norms_per_layer": [ + 2.2086309400037862e-05, + 2.296714228577912e-06, + 1.807363787520444e-06, + 1.8235258494314621e-06, + 1.7601513491172227e-06 + ] + }, + "drift": { + "embed.weight": 27.4249662858567, + "embed.bias": 13.120389146639068, + "blocks.0.ln.weight": 1.030566692352295, + "blocks.0.w1.weight": 12.544252287230748, + "blocks.0.w1.bias": 11.915807637034495, + "blocks.0.w2.weight": 40.35155370112267, + "blocks.1.ln.weight": 0.8229038715362549, + "blocks.1.w1.weight": 13.580795814003102, + "blocks.1.w1.bias": 13.535923105255597, + "blocks.1.w2.weight": 26.893514153884407, + "blocks.2.ln.weight": 0.7509615421295166, + "blocks.2.w1.weight": 12.776503039551056, + "blocks.2.w1.bias": 13.791977535501145, + "blocks.2.w2.weight": 24.833686221953496, + "blocks.3.ln.weight": 0.7571015954017639, + "blocks.3.w1.weight": 12.83601203126826, + "blocks.3.w1.bias": 14.312451854403847, + "blocks.3.w2.weight": 27.882819964995566, + "out_ln.weight": 0.14845088124275208, + "out_head.weight": 2.4655734611964566, + "out_head.bias": 1.0148059705009767 + } + } + }, + "123": { + "fa": { + "log": { + "train_loss": [ + 2.039442294845581, + 1.9535883404159546, + 1.926400708694458, + 1.911022697982788, + 1.9043336145401002, + 1.8967472591400147, + 1.8879544982147216, + 1.880874624710083, + 1.8684730854034424, + 1.8600735509490967, + 1.856403991165161, + 1.8502347243499755, + 1.8449485485458375, + 1.8409737787246705, + 1.8416496984100341, + 1.834242384109497, + 1.8294944548797607, + 1.8298376247406005, + 1.826137925453186, + 1.8233746087646485, + 1.82085874168396, + 1.8194894942855835, + 1.8188257317733765, + 1.8151497143936157, + 1.813930884475708, + 1.8120647540283203, + 1.813437055130005, + 1.8121181848526, + 1.8119350430297851, + 1.8114447354888916 + ], + "train_acc": [ + 0.25042, + 0.2898, + 0.30126, + 0.3061, + 0.3093, + 0.31594, + 0.31488, + 0.31876, + 0.3224, + 0.32746, + 0.3304, + 0.33324, + 0.33658, + 0.34018, + 0.34052, + 0.34376, + 0.34472, + 0.34468, + 0.34946, + 0.34906, + 0.34908, + 0.35074, + 0.35076, + 0.35142, + 0.35312, + 0.35506, + 0.3535, + 0.35112, + 0.35142, + 0.35268 + ], + "test_acc": [ + 0.2905, + 0.3334, + 0.3427, + 0.3456, + 0.347, + 0.3554, + 0.355, + 0.344, + 0.3537, + 0.3514, + 0.3589, + 0.3694, + 0.361, + 0.3679, + 0.3707, + 0.3645, + 0.3766, + 0.3737, + 0.3685, + 0.3753, + 0.3749, + 0.3762, + 0.3779, + 0.3776, + 0.3771, + 0.3778, + 0.3785, + 0.3791, + 0.3788, + 0.3793 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.04084893688559532, + 0.03916510194540024, + -0.05586155131459236, + 0.9959583282470703 + ], + "perturbation_rho": [ + 0.037160176783800125, + 0.010217813774943352, + 0.009163782931864262, + 0.09005501866340637 + ], + "nudging": { + "0.001": [ + -3.252178430557251e-06, + -4.302710294723511e-07, + 2.551823854446411e-07, + -5.400157533586025e-06 + ], + "0.003": [ + -9.553623385727406e-06, + -1.2621749192476273e-06, + 7.917406037449837e-07, + -1.6256701201200485e-05 + ], + "0.01": [ + -3.1867995858192444e-05, + -4.215282388031483e-06, + 2.7384376153349876e-06, + -5.423836410045624e-05 + ] + }, + "hidden_norms_per_layer": [ + 2199.5390625, + 23238.517578125, + 33747.62109375, + 60688.390625, + 42941.73828125 + ], + "bp_grad_norms_per_layer": [ + 3.650465077953413e-05, + 3.7466418234544108e-06, + 2.5758349693205673e-06, + 2.5369565719302045e-06, + 2.5125191314145923e-06 + ] + }, + "drift": { + "embed.weight": 22.579546770636362, + "embed.bias": 11.99376839559686, + "blocks.0.ln.weight": 0.9874183535575867, + "blocks.0.w1.weight": 11.111551574509331, + "blocks.0.w1.bias": 10.99245593612019, + "blocks.0.w2.weight": 37.55172206485303, + "blocks.1.ln.weight": 0.8036943674087524, + "blocks.1.w1.weight": 11.271966509968653, + "blocks.1.w1.bias": 9.571436867060473, + "blocks.1.w2.weight": 31.072497431661937, + "blocks.2.ln.weight": 0.7531070709228516, + "blocks.2.w1.weight": 12.21459668569711, + "blocks.2.w1.bias": 10.794010887239482, + "blocks.2.w2.weight": 27.176995851908416, + "blocks.3.ln.weight": 0.7162689566612244, + "blocks.3.w1.weight": 10.48127263461816, + "blocks.3.w1.bias": 11.865909016175292, + "blocks.3.w2.weight": 20.70822324996214, + "out_ln.weight": 0.11810445040464401, + "out_head.weight": 2.0129756552130713, + "out_head.bias": 2.429219803928322 + } + } + }, + "456": { + "fa": { + "log": { + "train_loss": [ + 2.064060781517029, + 1.9899379243469237, + 1.9583307926940918, + 1.9413945766830445, + 1.9259979123687745, + 1.917475108642578, + 1.9057074185180665, + 1.8973549193572998, + 1.8837018002319337, + 1.880822947654724, + 1.8696432660675049, + 1.8690105139541626, + 1.864671703262329, + 1.860279491958618, + 1.8527126348114014, + 1.852656520614624, + 1.8518596002197265, + 1.8463533575820923, + 1.8458245706939698, + 1.8417340802764892, + 1.8398325180053712, + 1.8388891097640991, + 1.8387157396697997, + 1.834438217163086, + 1.835012681427002, + 1.835493402671814, + 1.8353626998138428, + 1.8342380188751222, + 1.8330915405654906, + 1.8353773220443725 + ], + "train_acc": [ + 0.24014, + 0.26754, + 0.28264, + 0.29278, + 0.29936, + 0.30364, + 0.3098, + 0.3125, + 0.3179, + 0.31672, + 0.32526, + 0.32416, + 0.32716, + 0.32892, + 0.33012, + 0.3305, + 0.33536, + 0.33526, + 0.3355, + 0.33674, + 0.3366, + 0.33948, + 0.3374, + 0.34192, + 0.34176, + 0.33942, + 0.33996, + 0.34336, + 0.34066, + 0.34118 + ], + "test_acc": [ + 0.2713, + 0.3036, + 0.3138, + 0.3217, + 0.3206, + 0.3309, + 0.3341, + 0.3322, + 0.3431, + 0.3426, + 0.3568, + 0.3643, + 0.359, + 0.3584, + 0.3556, + 0.3673, + 0.3669, + 0.366, + 0.3678, + 0.3677, + 0.3664, + 0.3691, + 0.369, + 0.3699, + 0.3699, + 0.3695, + 0.3706, + 0.3701, + 0.3698, + 0.3699 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.005443892907351255, + 0.056010693311691284, + -0.02994656004011631, + 0.993930459022522 + ], + "perturbation_rho": [ + 0.041760556399822235, + 0.011946016922593117, + 0.03664176166057587, + 0.019783515483140945 + ], + "nudging": { + "0.001": [ + -4.184548743069172e-07, + -1.57160684466362e-07, + 1.1589145287871361e-07, + -2.3843604139983654e-06 + ], + "0.003": [ + -1.4954712241888046e-06, + -6.176414899528027e-07, + 2.6955967769026756e-07, + -7.573107723146677e-06 + ], + "0.01": [ + -4.984147381037474e-06, + -2.076325472444296e-06, + 7.987837307155132e-07, + -2.5291461497545242e-05 + ] + }, + "hidden_norms_per_layer": [ + 2585.784423828125, + 30518.099609375, + 125489.96875, + 234551.46875, + 135467.90625 + ], + "bp_grad_norms_per_layer": [ + 2.3132513888413087e-05, + 1.7084096270991722e-06, + 1.3069517308395007e-06, + 1.3104119034323958e-06, + 1.2661037089856109e-06 + ] + }, + "drift": { + "embed.weight": 25.036403923789077, + "embed.bias": 16.467870087010475, + "blocks.0.ln.weight": 1.1242742538452148, + "blocks.0.w1.weight": 13.107427774588935, + "blocks.0.w1.bias": 10.049346359618044, + "blocks.0.w2.weight": 42.48070715823368, + "blocks.1.ln.weight": 0.858128547668457, + "blocks.1.w1.weight": 13.379689964096187, + "blocks.1.w1.bias": 11.437785495003597, + "blocks.1.w2.weight": 32.57709335591351, + "blocks.2.ln.weight": 0.724311351776123, + "blocks.2.w1.weight": 13.542630387751261, + "blocks.2.w1.bias": 15.41909845322556, + "blocks.2.w2.weight": 27.566931719808178, + "blocks.3.ln.weight": 0.7379146218299866, + "blocks.3.w1.weight": 12.288692457217095, + "blocks.3.w1.bias": 12.33989285228832, + "blocks.3.w2.weight": 30.803534080173662, + "out_ln.weight": 0.14593681693077087, + "out_head.weight": 2.504909261880854, + "out_head.bias": 1.0678192714646333 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 4, + "batch_size": 128, + "epochs": 30, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 42, + 123, + 456 + ], + "gpu": 0, + "output_dir": "results/fa_no_penalty_30ep", + "methods": [ + "fa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
