{ "42": { "fa": { "log": { "train_loss": [ 2.049124941177368, 1.971962617225647, 1.948333747253418, 1.93432448513031, 1.9224566204071045, 1.911075694732666, 1.9016322410583497, 1.891839779663086, 1.888196748046875, 1.8842478870391846, 1.8793783734512328, 1.873243000831604, 1.8691495156478881, 1.8672306230163573, 1.8655509171295166, 1.8641471955108642, 1.8627385038757325, 1.8566293866348267, 1.8572382849121094, 1.854542728805542, 1.853432930831909, 1.853121408996582, 1.8526418188476563, 1.850091189842224, 1.8493881246566772, 1.8482141668319703, 1.8446919596099853, 1.8451624103164672, 1.8452935827255248, 1.842558771057129 ], "train_acc": [ 0.2436, 0.27344, 0.28986, 0.29662, 0.30162, 0.30408, 0.31004, 0.31504, 0.31764, 0.31756, 0.32138, 0.32436, 0.32408, 0.32572, 0.33018, 0.32902, 0.3305, 0.33264, 0.33294, 0.3324, 0.3352, 0.33392, 0.33506, 0.33476, 0.33738, 0.33864, 0.3368, 0.3421, 0.34022, 0.34144 ], "test_acc": [ 0.2789, 0.3106, 0.3051, 0.3149, 0.3232, 0.3345, 0.3394, 0.3473, 0.3416, 0.3427, 0.3494, 0.3508, 0.3543, 0.3612, 0.3602, 0.3571, 0.3605, 0.3616, 0.3652, 0.363, 0.3649, 0.3599, 0.3637, 0.3606, 0.3631, 0.3672, 0.3653, 0.3664, 0.3659, 0.3655 ] }, "diagnostics": { "bp_cosine": [ 0.039796918630599976, -0.0086748618632555, -0.05908923223614693, 0.9973034262657166 ], "perturbation_rho": [ 0.014420752413570881, 0.010810372419655323, -0.0031975782476365566, 0.018547791987657547 ], "nudging": { "0.001": [ -1.748558133840561e-06, 4.516914486885071e-08, 1.9744038581848145e-07, -3.418419510126114e-06 ], "0.003": [ -5.089212208986282e-06, -9.313225746154785e-09, 6.165355443954468e-07, -1.0619405657052994e-05 ], "0.01": [ -1.7355196177959442e-05, -1.9045546650886536e-07, 2.042856067419052e-06, -3.545219078660011e-05 ] }, "hidden_norms_per_layer": [ 3135.737060546875, 27729.080078125, 142235.453125, 221539.90625, 98576.6796875 ], "bp_grad_norms_per_layer": [ 2.2086309400037862e-05, 2.296714228577912e-06, 1.807363787520444e-06, 1.8235258494314621e-06, 1.7601513491172227e-06 ] }, "drift": { "embed.weight": 27.4249662858567, "embed.bias": 13.120389146639068, "blocks.0.ln.weight": 1.030566692352295, "blocks.0.w1.weight": 12.544252287230748, "blocks.0.w1.bias": 11.915807637034495, "blocks.0.w2.weight": 40.35155370112267, "blocks.1.ln.weight": 0.8229038715362549, "blocks.1.w1.weight": 13.580795814003102, "blocks.1.w1.bias": 13.535923105255597, "blocks.1.w2.weight": 26.893514153884407, "blocks.2.ln.weight": 0.7509615421295166, "blocks.2.w1.weight": 12.776503039551056, "blocks.2.w1.bias": 13.791977535501145, "blocks.2.w2.weight": 24.833686221953496, "blocks.3.ln.weight": 0.7571015954017639, "blocks.3.w1.weight": 12.83601203126826, "blocks.3.w1.bias": 14.312451854403847, "blocks.3.w2.weight": 27.882819964995566, "out_ln.weight": 0.14845088124275208, "out_head.weight": 2.4655734611964566, "out_head.bias": 1.0148059705009767 } } }, "123": { "fa": { "log": { "train_loss": [ 2.039442294845581, 1.9535883404159546, 1.926400708694458, 1.911022697982788, 1.9043336145401002, 1.8967472591400147, 1.8879544982147216, 1.880874624710083, 1.8684730854034424, 1.8600735509490967, 1.856403991165161, 1.8502347243499755, 1.8449485485458375, 1.8409737787246705, 1.8416496984100341, 1.834242384109497, 1.8294944548797607, 1.8298376247406005, 1.826137925453186, 1.8233746087646485, 1.82085874168396, 1.8194894942855835, 1.8188257317733765, 1.8151497143936157, 1.813930884475708, 1.8120647540283203, 1.813437055130005, 1.8121181848526, 1.8119350430297851, 1.8114447354888916 ], "train_acc": [ 0.25042, 0.2898, 0.30126, 0.3061, 0.3093, 0.31594, 0.31488, 0.31876, 0.3224, 0.32746, 0.3304, 0.33324, 0.33658, 0.34018, 0.34052, 0.34376, 0.34472, 0.34468, 0.34946, 0.34906, 0.34908, 0.35074, 0.35076, 0.35142, 0.35312, 0.35506, 0.3535, 0.35112, 0.35142, 0.35268 ], "test_acc": [ 0.2905, 0.3334, 0.3427, 0.3456, 0.347, 0.3554, 0.355, 0.344, 0.3537, 0.3514, 0.3589, 0.3694, 0.361, 0.3679, 0.3707, 0.3645, 0.3766, 0.3737, 0.3685, 0.3753, 0.3749, 0.3762, 0.3779, 0.3776, 0.3771, 0.3778, 0.3785, 0.3791, 0.3788, 0.3793 ] }, "diagnostics": { "bp_cosine": [ 0.04084893688559532, 0.03916510194540024, -0.05586155131459236, 0.9959583282470703 ], "perturbation_rho": [ 0.037160176783800125, 0.010217813774943352, 0.009163782931864262, 0.09005501866340637 ], "nudging": { "0.001": [ -3.252178430557251e-06, -4.302710294723511e-07, 2.551823854446411e-07, -5.400157533586025e-06 ], "0.003": [ -9.553623385727406e-06, -1.2621749192476273e-06, 7.917406037449837e-07, -1.6256701201200485e-05 ], "0.01": [ -3.1867995858192444e-05, -4.215282388031483e-06, 2.7384376153349876e-06, -5.423836410045624e-05 ] }, "hidden_norms_per_layer": [ 2199.5390625, 23238.517578125, 33747.62109375, 60688.390625, 42941.73828125 ], "bp_grad_norms_per_layer": [ 3.650465077953413e-05, 3.7466418234544108e-06, 2.5758349693205673e-06, 2.5369565719302045e-06, 2.5125191314145923e-06 ] }, "drift": { "embed.weight": 22.579546770636362, "embed.bias": 11.99376839559686, "blocks.0.ln.weight": 0.9874183535575867, "blocks.0.w1.weight": 11.111551574509331, "blocks.0.w1.bias": 10.99245593612019, "blocks.0.w2.weight": 37.55172206485303, "blocks.1.ln.weight": 0.8036943674087524, "blocks.1.w1.weight": 11.271966509968653, "blocks.1.w1.bias": 9.571436867060473, "blocks.1.w2.weight": 31.072497431661937, "blocks.2.ln.weight": 0.7531070709228516, "blocks.2.w1.weight": 12.21459668569711, "blocks.2.w1.bias": 10.794010887239482, "blocks.2.w2.weight": 27.176995851908416, "blocks.3.ln.weight": 0.7162689566612244, "blocks.3.w1.weight": 10.48127263461816, "blocks.3.w1.bias": 11.865909016175292, "blocks.3.w2.weight": 20.70822324996214, "out_ln.weight": 0.11810445040464401, "out_head.weight": 2.0129756552130713, "out_head.bias": 2.429219803928322 } } }, "456": { "fa": { "log": { "train_loss": [ 2.064060781517029, 1.9899379243469237, 1.9583307926940918, 1.9413945766830445, 1.9259979123687745, 1.917475108642578, 1.9057074185180665, 1.8973549193572998, 1.8837018002319337, 1.880822947654724, 1.8696432660675049, 1.8690105139541626, 1.864671703262329, 1.860279491958618, 1.8527126348114014, 1.852656520614624, 1.8518596002197265, 1.8463533575820923, 1.8458245706939698, 1.8417340802764892, 1.8398325180053712, 1.8388891097640991, 1.8387157396697997, 1.834438217163086, 1.835012681427002, 1.835493402671814, 1.8353626998138428, 1.8342380188751222, 1.8330915405654906, 1.8353773220443725 ], "train_acc": [ 0.24014, 0.26754, 0.28264, 0.29278, 0.29936, 0.30364, 0.3098, 0.3125, 0.3179, 0.31672, 0.32526, 0.32416, 0.32716, 0.32892, 0.33012, 0.3305, 0.33536, 0.33526, 0.3355, 0.33674, 0.3366, 0.33948, 0.3374, 0.34192, 0.34176, 0.33942, 0.33996, 0.34336, 0.34066, 0.34118 ], "test_acc": [ 0.2713, 0.3036, 0.3138, 0.3217, 0.3206, 0.3309, 0.3341, 0.3322, 0.3431, 0.3426, 0.3568, 0.3643, 0.359, 0.3584, 0.3556, 0.3673, 0.3669, 0.366, 0.3678, 0.3677, 0.3664, 0.3691, 0.369, 0.3699, 0.3699, 0.3695, 0.3706, 0.3701, 0.3698, 0.3699 ] }, "diagnostics": { "bp_cosine": [ 0.005443892907351255, 0.056010693311691284, -0.02994656004011631, 0.993930459022522 ], "perturbation_rho": [ 0.041760556399822235, 0.011946016922593117, 0.03664176166057587, 0.019783515483140945 ], "nudging": { "0.001": [ -4.184548743069172e-07, -1.57160684466362e-07, 1.1589145287871361e-07, -2.3843604139983654e-06 ], "0.003": [ -1.4954712241888046e-06, -6.176414899528027e-07, 2.6955967769026756e-07, -7.573107723146677e-06 ], "0.01": [ -4.984147381037474e-06, -2.076325472444296e-06, 7.987837307155132e-07, -2.5291461497545242e-05 ] }, "hidden_norms_per_layer": [ 2585.784423828125, 30518.099609375, 125489.96875, 234551.46875, 135467.90625 ], "bp_grad_norms_per_layer": [ 2.3132513888413087e-05, 1.7084096270991722e-06, 1.3069517308395007e-06, 1.3104119034323958e-06, 1.2661037089856109e-06 ] }, "drift": { "embed.weight": 25.036403923789077, "embed.bias": 16.467870087010475, "blocks.0.ln.weight": 1.1242742538452148, "blocks.0.w1.weight": 13.107427774588935, "blocks.0.w1.bias": 10.049346359618044, "blocks.0.w2.weight": 42.48070715823368, "blocks.1.ln.weight": 0.858128547668457, "blocks.1.w1.weight": 13.379689964096187, "blocks.1.w1.bias": 11.437785495003597, "blocks.1.w2.weight": 32.57709335591351, "blocks.2.ln.weight": 0.724311351776123, "blocks.2.w1.weight": 13.542630387751261, "blocks.2.w1.bias": 15.41909845322556, "blocks.2.w2.weight": 27.566931719808178, "blocks.3.ln.weight": 0.7379146218299866, "blocks.3.w1.weight": 12.288692457217095, "blocks.3.w1.bias": 12.33989285228832, "blocks.3.w2.weight": 30.803534080173662, "out_ln.weight": 0.14593681693077087, "out_head.weight": 2.504909261880854, "out_head.bias": 1.0678192714646333 } } }, "config": { "dataset": "cifar10", "d_hidden": 256, "num_blocks": 4, "batch_size": 128, "epochs": 30, "lr": 0.001, "lr_fb": 0.001, "wd": 0.01, "lam": 0.1, "K": 4, "sigma_bridge": 0.05, "ema_momentum": 0.995, "term_grad_weight": 1.0, "seeds": [ 42, 123, 456 ], "gpu": 0, "output_dir": "results/fa_no_penalty_30ep", "methods": [ "fa" ], "random_targets": false, "penalty_lam": 0.0, "num_classes": 10 } }