{ "42": { "bp": { "log": { "train_loss": [ 3.975249941253662, 3.6423986249542235, 3.4934494227600097, 3.3944045304870607, 3.3128096391296387, 3.249527327423096, 3.202136681060791, 3.1545809954071045, 3.118299830780029, 3.08477955947876, 3.05101420211792, 3.014707311477661, 2.9815688012695314, 2.9542644719696045, 2.917737830657959, 2.8987481777954103, 2.877259644317627, 2.8476891691589357, 2.826910527801514, 2.8041292054748537, 2.7817903074645995, 2.7624645190429686, 2.736394059448242, 2.7153989936065672, 2.6938070545959474, 2.6722896592712404, 2.656808620452881, 2.6342206648254396, 2.6130940935516356, 2.601909016876221, 2.5847591175842286, 2.5616293630218507, 2.540946982421875, 2.528531006851196, 2.508262563323975, 2.494774736251831, 2.471127844619751, 2.455823439178467, 2.443439641647339, 2.419278503417969, 2.409900106124878, 2.3866433473205566, 2.3731185454559327, 2.348314314727783, 2.336296524658203, 2.3228698544311523, 2.299138896865845, 2.2919679093933105, 2.271020979156494, 2.263141948699951, 2.242116179199219, 2.2228171809387205, 2.210650175628662, 2.1947706311798094, 2.1783614316558837, 2.1615723634338377, 2.152301993675232, 2.139739892730713, 2.11690347366333, 2.1134400886535643, 2.0890519944000245, 2.076347621765137, 2.067469055480957, 2.0518165933990478, 2.041287666015625, 2.027727124786377, 2.0111099968719484, 1.9989793057250977, 1.991382496948242, 1.9685791171264648, 1.966265661239624, 1.9562527508544922, 1.942470475769043, 1.9250774355316163, 1.9187585319137572, 1.9093894129943847, 1.9017769010162353, 1.8909222549438476, 1.8789906490707398, 1.87238977394104, 1.876755567703247, 1.8579993398284913, 1.8549057474517823, 1.8437388555145264, 1.83396729637146, 1.8345417359161378, 1.83224958152771, 1.8185578295516969, 1.8260858072280883, 1.8161109666442872, 1.8139908428573608, 1.8139760601043702, 1.8037351627349854, 1.8002028354644775, 1.8035412215423583, 1.8000410940933227, 1.7957222201156615, 1.79567547290802, 1.7953530138397218, 1.7897127409744262 ], "train_acc": [ 0.09828, 0.14442, 0.16788, 0.18412, 0.1995, 0.21148, 0.21652, 0.2278, 0.23422, 0.24242, 0.24772, 0.25402, 0.26126, 0.2639, 0.27258, 0.27642, 0.2808, 0.28564, 0.2893, 0.29352, 0.29922, 0.3014, 0.30794, 0.31222, 0.31722, 0.322, 0.32392, 0.32726, 0.33092, 0.3352, 0.33782, 0.34382, 0.34482, 0.34966, 0.3531, 0.35308, 0.36068, 0.36312, 0.36942, 0.37268, 0.3705, 0.3762, 0.38242, 0.38778, 0.38854, 0.39198, 0.3985, 0.39664, 0.4058, 0.40534, 0.40916, 0.41304, 0.41528, 0.42002, 0.42598, 0.4278, 0.42834, 0.43244, 0.43612, 0.43712, 0.4414, 0.44592, 0.44574, 0.44904, 0.4551, 0.45958, 0.46296, 0.4616, 0.46646, 0.47288, 0.4693, 0.47354, 0.47626, 0.48042, 0.4848, 0.48688, 0.4857, 0.4874, 0.49078, 0.49406, 0.49478, 0.49768, 0.49744, 0.50144, 0.50262, 0.5014, 0.50426, 0.5064, 0.5074, 0.50692, 0.50832, 0.50964, 0.5119, 0.51018, 0.5099, 0.51064, 0.51258, 0.515, 0.51522, 0.5126 ], "test_acc": [ 0.1432, 0.181, 0.1871, 0.212, 0.2185, 0.2122, 0.2368, 0.2466, 0.241, 0.2492, 0.2563, 0.2629, 0.2656, 0.2695, 0.2731, 0.2753, 0.2737, 0.2823, 0.2832, 0.2883, 0.2946, 0.2915, 0.2957, 0.2922, 0.2981, 0.294, 0.2961, 0.301, 0.3015, 0.3078, 0.3127, 0.3097, 0.308, 0.3058, 0.3138, 0.3098, 0.3114, 0.3129, 0.3126, 0.3158, 0.3146, 0.3119, 0.313, 0.3137, 0.3127, 0.3167, 0.3186, 0.3181, 0.3165, 0.316, 0.3222, 0.3188, 0.3228, 0.3173, 0.3178, 0.3174, 0.3217, 0.3173, 0.3196, 0.3223, 0.3181, 0.3186, 0.3196, 0.3161, 0.3202, 0.314, 0.3178, 0.3172, 0.3174, 0.3207, 0.3193, 0.3214, 0.3213, 0.3175, 0.3228, 0.3207, 0.3208, 0.3213, 0.3187, 0.3197, 0.3226, 0.32, 0.3205, 0.3213, 0.3217, 0.3192, 0.3203, 0.3207, 0.3209, 0.3197, 0.3207, 0.3195, 0.3192, 0.3186, 0.3178, 0.3191, 0.3189, 0.3193, 0.3189, 0.3192 ] }, "diagnostics": { "bp_cosine": [ 1.0, 1.0, 1.0, 1.0 ], "perturbation_rho": [ 0.9980340600013733, 0.9982945919036865, 0.9984437227249146, 0.9987074136734009 ], "nudging": { "0.001": [ -0.0022835906129330397, -0.0023387258406728506, -0.002373999450355768, -0.002260879147797823 ], "0.003": [ -0.006846790201961994, -0.007012245710939169, -0.007117925677448511, -0.006779024377465248 ], "0.01": [ -0.022776642814278603, -0.02332628145813942, -0.023679494857788086, -0.02255532145500183 ] }, "hidden_norms_per_layer": [ 236.57855224609375, 217.48927307128906, 197.1617889404297, 187.24839782714844, 191.277587890625 ], "bp_grad_norms_per_layer": [ 0.0010384325869381428, 0.0010872395941987634, 0.0011213896796107292, 0.001103188842535019, 0.0009315301431342959 ] }, "drift": { "embed.weight": 7.433656905137662, "embed.bias": 13.602162006682997, "blocks.0.ln.weight": 0.14788760244846344, "blocks.0.w1.weight": 3.605816897627063, "blocks.0.w1.bias": 5.290238058535016, "blocks.0.w2.weight": 13.310045077395138, "blocks.1.ln.weight": 0.2914122939109802, "blocks.1.w1.weight": 3.997572101972956, "blocks.1.w1.bias": 4.732854205627193, "blocks.1.w2.weight": 14.921397012401421, "blocks.2.ln.weight": 0.35930144786834717, "blocks.2.w1.weight": 4.149029664687284, "blocks.2.w1.bias": 3.8845074849434162, "blocks.2.w2.weight": 16.00171210299565, "blocks.3.ln.weight": 0.5467617511749268, "blocks.3.w1.weight": 4.475958556672767, "blocks.3.w1.bias": 3.7954587976923664, "blocks.3.w2.weight": 17.52879838948756, "out_ln.weight": 0.1443399339914322, "out_head.weight": 2.871805586504566, "out_head.bias": 3.1293217686796893 } }, "dfa": { "log": { "train_loss": [ 4.17356710144043, 4.045213865203857, 4.0394830491638185, 4.052726871032715, 4.062585520324707, 4.077131650848389, 4.081288940353393, 4.082902023620606, 4.082292946929932, 4.084377682189942, 4.082009593811035, 4.078371176147461, 4.0787283967590335, 4.0781014080810545, 4.071053223266602, 4.073739369049072, 4.070565816497803, 4.068224396362305, 4.064294567718506, 4.062663984680176, 4.060789375686645, 4.060499965057373, 4.05942348953247, 4.05340063583374, 4.054450568389893, 4.051036394500732, 4.052292782592773, 4.04923277923584, 4.047891028900146, 4.046583199005127, 4.046011067352295, 4.0444303074646, 4.045907030334472, 4.044839619445801, 4.03859343673706, 4.043664588165283, 4.0400294355773925, 4.035496199493408, 4.038207025909424, 4.035675854187012, 4.037980937347412, 4.038932502746582, 4.036024468383789, 4.037031109313965, 4.034120268630981, 4.036895367584228, 4.037431150054932, 4.033238902282715, 4.036427326049805, 4.03467604888916, 4.03339782157898, 4.0328554144287105, 4.031013775024414, 4.031612381286621, 4.0287201686096195, 4.031132036132813, 4.029230642242432, 4.029863064575196, 4.03147859588623, 4.029825403366089, 4.030359633789063, 4.028107041168213, 4.02976943069458, 4.029035094909668, 4.028509668731689, 4.0304917379760745, 4.030941032562255, 4.026340734710693, 4.027734536590576, 4.024428620147705, 4.025526253967286, 4.025494504394532, 4.026692573547363, 4.0229616456604, 4.026408356018067, 4.024612335662842, 4.023342019500732, 4.024318858337402, 4.023249450683593, 4.023151823806763, 4.026289680709839, 4.023877305831909, 4.023189326248169, 4.022549798278809, 4.024784528045655, 4.022774189758301, 4.024208254547119, 4.023730496368408, 4.02367640625, 4.021903743209839, 4.02390080947876, 4.022859935150146, 4.023393632354736, 4.021766328201294, 4.02325630645752, 4.023518178710938, 4.021166778717041, 4.023976897583008, 4.023609465179443, 4.024444749069214 ], "train_acc": [ 0.06788, 0.08332, 0.08406, 0.0818, 0.08046, 0.08148, 0.07862, 0.07956, 0.08038, 0.07976, 0.08042, 0.0808, 0.08058, 0.08086, 0.08006, 0.08136, 0.08244, 0.08388, 0.08354, 0.084, 0.08448, 0.08518, 0.08594, 0.08624, 0.08568, 0.08764, 0.08492, 0.08604, 0.08814, 0.0876, 0.08836, 0.08956, 0.08702, 0.08854, 0.08976, 0.08954, 0.0895, 0.09006, 0.0894, 0.08964, 0.09064, 0.09034, 0.09288, 0.0913, 0.0914, 0.09116, 0.09094, 0.09192, 0.09044, 0.0909, 0.09252, 0.09214, 0.09332, 0.0931, 0.09364, 0.09176, 0.09246, 0.09302, 0.09276, 0.09334, 0.09404, 0.0942, 0.09392, 0.09358, 0.09336, 0.09392, 0.09252, 0.0949, 0.09444, 0.0958, 0.09542, 0.0949, 0.0954, 0.09438, 0.09338, 0.0958, 0.09376, 0.0959, 0.09448, 0.09388, 0.09458, 0.09422, 0.09542, 0.09522, 0.09468, 0.0951, 0.09564, 0.09586, 0.09536, 0.09624, 0.09588, 0.09594, 0.09642, 0.09588, 0.09496, 0.09496, 0.096, 0.09454, 0.09542, 0.09488 ], "test_acc": [ 0.0775, 0.0794, 0.0808, 0.0828, 0.0742, 0.0795, 0.0788, 0.0855, 0.0809, 0.0819, 0.0767, 0.0826, 0.0854, 0.0752, 0.0798, 0.0796, 0.0813, 0.0771, 0.0823, 0.0759, 0.0814, 0.0772, 0.0826, 0.079, 0.0803, 0.0815, 0.0818, 0.0819, 0.0818, 0.084, 0.0853, 0.0855, 0.0875, 0.0818, 0.0861, 0.0827, 0.0847, 0.0825, 0.0853, 0.0862, 0.0841, 0.0857, 0.0852, 0.0852, 0.0827, 0.0863, 0.0878, 0.0858, 0.0857, 0.0879, 0.0888, 0.088, 0.0878, 0.0871, 0.0878, 0.0845, 0.0863, 0.0852, 0.0872, 0.0879, 0.086, 0.0878, 0.0851, 0.0863, 0.0877, 0.0861, 0.0889, 0.0839, 0.0893, 0.0883, 0.0867, 0.0855, 0.0878, 0.086, 0.0876, 0.0882, 0.0895, 0.0877, 0.0876, 0.0871, 0.088, 0.0876, 0.088, 0.0885, 0.0879, 0.0888, 0.0882, 0.0876, 0.0882, 0.087, 0.0873, 0.0877, 0.0876, 0.0882, 0.0878, 0.0876, 0.0877, 0.0876, 0.0875, 0.0875 ] }, "diagnostics": { "bp_cosine": [ 0.14061881601810455, 0.0022556069307029247, -0.009000124409794807, -0.013949227519333363 ], "perturbation_rho": [ 0.04333949834108353, 0.0, 0.0, 0.0 ], "nudging": { "0.001": [ -5.513429641723633e-07, 0.0, 0.0, -3.725290298461914e-09 ], "0.003": [ -1.7480924725532532e-06, -1.4901161193847656e-08, 0.0, -1.862645149230957e-09 ], "0.01": [ -5.760230123996735e-06, 2.3283064365386963e-08, -1.862645149230957e-09, 0.0 ] }, "hidden_norms_per_layer": [ 20187.3125, 13252251.0, 93144488.0, 163952208.0, 202900624.0 ], "bp_grad_norms_per_layer": [ 1.3867399957234738e-06, 9.786202070927175e-09, 9.394233835280374e-09, 9.39475075512064e-09, 9.382485899323001e-09 ] }, "drift": { "embed.weight": 164.5243882671684, "embed.bias": 86.31975598233767, "blocks.0.ln.weight": 9.94965648651123, "blocks.0.w1.weight": 116.60864500939675, "blocks.0.w1.bias": 119.5896283256837, "blocks.0.w2.weight": 139.89574047116318, "blocks.1.ln.weight": 6.952559471130371, "blocks.1.w1.weight": 171.01066211955538, "blocks.1.w1.bias": 184.66872276388008, "blocks.1.w2.weight": 113.70390206644275, "blocks.2.ln.weight": 8.037435531616211, "blocks.2.w1.weight": 204.0534914689403, "blocks.2.w1.bias": 205.1741516813065, "blocks.2.w2.weight": 112.9529550385935, "blocks.3.ln.weight": 7.489717483520508, "blocks.3.w1.weight": 199.12470243900225, "blocks.3.w1.bias": 198.81992912624156, "blocks.3.w2.weight": 104.7339169635497, "out_ln.weight": 1.6446622610092163, "out_head.weight": 8.633984180685534, "out_head.bias": 0.8233496742420285 } }, "fa": { "log": { "train_loss": [ 4.184186522827148, 4.179929753265381, 4.089846964416504, 4.048904882202148, 4.019415323638916, 4.003474291839599, 3.9891197368621825, 3.9800468362426757, 3.9685846406555174, 3.9551162309265138, 3.942166231536865, 3.9271056993865967, 3.915979294891357, 3.9063652964782714, 3.8933532473754884, 3.892304434890747, 3.8851386541748045, 3.880706764526367, 3.8791183966064455, 3.874532577972412, 3.870268273162842, 3.8705162049865725, 3.8667305128479006, 3.8626690478515626, 3.8593408808898926, 3.8570751000213623, 3.858237083206177, 3.8517847180938722, 3.8476831745910642, 3.8457262559509275, 3.840909554901123, 3.835939132537842, 3.834127527542114, 3.830621910324097, 3.8192956964874267, 3.8194126902770997, 3.8129332183074953, 3.803470793991089, 3.8037892980194092, 3.7975124457550047, 3.7967339707946777, 3.7944636122131348, 3.786077890625, 3.7846675917053223, 3.7791504066467283, 3.7762106622314455, 3.777709127731323, 3.7638775648498535, 3.765332045440674, 3.7623159846496583, 3.761364775314331, 3.7529254064178468, 3.749479592971802, 3.744997635345459, 3.7406992932128906, 3.74050118598938, 3.740082041091919, 3.7403511964416505, 3.7391156189727783, 3.7338412855529786, 3.733470021286011, 3.726227327346802, 3.7261299742889404, 3.722200173187256, 3.7222453344726563, 3.7236415620422365, 3.720610160598755, 3.71873546585083, 3.7171892735290526, 3.710947210006714, 3.7083427098846435, 3.7072678149414062, 3.70656751663208, 3.704509799041748, 3.7028028456115725, 3.7015775603485106, 3.7001070789337156, 3.6981460167694093, 3.696165197067261, 3.6909815367889403, 3.6975155518341065, 3.693085638427734, 3.6924814643096924, 3.6891826426696777, 3.688821280517578, 3.689004655685425, 3.6925391107177736, 3.6897733489990236, 3.687680895843506, 3.6844489430236815, 3.688553215789795, 3.6839564624023438, 3.685328346557617, 3.6844889741516114, 3.682617470626831, 3.685823777923584, 3.6817508575439453, 3.6835061328125, 3.683719411773682, 3.685913360671997 ], "train_acc": [ 0.06392, 0.06052, 0.07254, 0.0799, 0.0863, 0.08926, 0.09172, 0.09366, 0.0967, 0.09784, 0.10026, 0.10226, 0.10344, 0.10226, 0.10548, 0.10614, 0.10576, 0.1094, 0.10852, 0.11026, 0.1109, 0.1113, 0.11464, 0.11204, 0.114, 0.11382, 0.11484, 0.11308, 0.11652, 0.116, 0.11544, 0.11934, 0.1164, 0.11756, 0.12118, 0.12098, 0.1208, 0.1226, 0.12196, 0.12346, 0.1241, 0.12418, 0.12376, 0.12556, 0.12682, 0.12578, 0.12646, 0.12802, 0.12834, 0.12896, 0.1285, 0.13022, 0.13064, 0.13266, 0.132, 0.13304, 0.13302, 0.13274, 0.13234, 0.13406, 0.1352, 0.1352, 0.13452, 0.13546, 0.1378, 0.1353, 0.13576, 0.13548, 0.13744, 0.1404, 0.13826, 0.13848, 0.14008, 0.13914, 0.13812, 0.14022, 0.14048, 0.14066, 0.14008, 0.1426, 0.1408, 0.1407, 0.1429, 0.14204, 0.14294, 0.14272, 0.14102, 0.14226, 0.14414, 0.1436, 0.14408, 0.14312, 0.1434, 0.14338, 0.14488, 0.14448, 0.14394, 0.1426, 0.14502, 0.14398 ], "test_acc": [ 0.0598, 0.0694, 0.069, 0.0805, 0.086, 0.0911, 0.0953, 0.0986, 0.0954, 0.0949, 0.0993, 0.1051, 0.1069, 0.1004, 0.1066, 0.11, 0.1087, 0.1039, 0.1151, 0.1101, 0.1136, 0.1096, 0.1118, 0.1138, 0.1155, 0.1151, 0.1171, 0.1197, 0.1188, 0.1212, 0.1182, 0.1194, 0.12, 0.122, 0.1259, 0.1204, 0.1205, 0.1259, 0.1231, 0.1247, 0.1263, 0.1294, 0.1233, 0.1265, 0.1299, 0.1267, 0.1297, 0.1338, 0.1276, 0.1332, 0.1337, 0.1302, 0.1399, 0.1354, 0.1322, 0.1382, 0.1372, 0.1363, 0.1369, 0.1397, 0.1357, 0.1394, 0.144, 0.1395, 0.1448, 0.1373, 0.1435, 0.1417, 0.1398, 0.14, 0.1409, 0.1409, 0.1408, 0.141, 0.145, 0.1425, 0.1425, 0.1452, 0.1449, 0.1457, 0.1432, 0.1449, 0.1446, 0.1486, 0.1429, 0.1477, 0.1458, 0.146, 0.1449, 0.1455, 0.1442, 0.1453, 0.1458, 0.1462, 0.1468, 0.1456, 0.1462, 0.1464, 0.1465, 0.1464 ] }, "diagnostics": { "bp_cosine": [ 0.032749101519584656, -0.02806573174893856, -0.13188865780830383, 0.9981366395950317 ], "perturbation_rho": [ 0.023128217086195946, -0.033153705298900604, -0.03114410489797592, -0.04209538549184799 ], "nudging": { "0.001": [ -3.4319236874580383e-07, 3.664754331111908e-07, 2.3958273231983185e-07, -1.4158431440591812e-06 ], "0.003": [ -1.9101426005363464e-06, 6.353948265314102e-07, 1.5594996511936188e-06, -8.973991498351097e-06 ], "0.01": [ -6.851274520158768e-06, 8.617062121629715e-07, 6.175599992275238e-06, -3.897189162671566e-05 ] }, "hidden_norms_per_layer": [ 10709.3916015625, 183619.875, 1852831.0, 3034211.25, 169794.0625 ], "bp_grad_norms_per_layer": [ 1.2192158465040848e-05, 2.159640871468582e-06, 1.9386970961932093e-06, 1.93888718058588e-06, 1.940315996762365e-06 ] }, "drift": { "embed.weight": 97.51059845469358, "embed.bias": 45.31500470139968, "blocks.0.ln.weight": 2.126800775527954, "blocks.0.w1.weight": 24.113425166766486, "blocks.0.w1.bias": 25.534244327420822, "blocks.0.w2.weight": 69.8748433766199, "blocks.1.ln.weight": 1.5964641571044922, "blocks.1.w1.weight": 35.519098992915914, "blocks.1.w1.bias": 42.27561830539123, "blocks.1.w2.weight": 61.05750225415427, "blocks.2.ln.weight": 1.4540088176727295, "blocks.2.w1.weight": 41.92037705190106, "blocks.2.w1.bias": 47.8882125375433, "blocks.2.w2.weight": 47.86373297818362, "blocks.3.ln.weight": 1.6165372133255005, "blocks.3.w1.weight": 47.10451321088829, "blocks.3.w1.bias": 53.07025310766248, "blocks.3.w2.weight": 48.00579573400754, "out_ln.weight": 0.49529579281806946, "out_head.weight": 4.173857948425521, "out_head.bias": 3.4238911704789996 } } }, "123": { "bp": { "log": { "train_loss": [ 3.967920153274536, 3.643245764923096, 3.486677552947998, 3.389170970993042, 3.3176012518310545, 3.2524516577148437, 3.196259712524414, 3.152468095245361, 3.116606701889038, 3.0753799269104003, 3.0412558711242674, 3.0103372289276122, 2.9851040869140624, 2.9536216430664064, 2.919733713989258, 2.8960993688964844, 2.872693371734619, 2.84370941696167, 2.8186388218688965, 2.802537234954834, 2.775104519882202, 2.752585594329834, 2.7324084952545165, 2.7153604712677004, 2.6842776251220704, 2.667392734527588, 2.6493602058410644, 2.6304399774932863, 2.6058559870910645, 2.5874294744110107, 2.5774721023559573, 2.560277436981201, 2.5345041821289063, 2.5185300437927247, 2.5040577131652832, 2.4822867348480226, 2.4557650662994384, 2.4456021406555175, 2.4341802147674563, 2.4113426206207276, 2.401808072052002, 2.3768489891052247, 2.3607859144592287, 2.3446667034912108, 2.329873125, 2.3144486641693116, 2.297909416656494, 2.2798938914489746, 2.266936881713867, 2.2467652272033694, 2.2354507537841797, 2.2205448300933837, 2.2017585570526124, 2.1915560935974123, 2.1722356983947755, 2.1580420698547362, 2.138712609024048, 2.1268783713531496, 2.1147646588897704, 2.1034324825286865, 2.088131596069336, 2.0740757128143312, 2.049090367126465, 2.0376090378570555, 2.0276603718566895, 2.0167199254989625, 2.0046445736694336, 1.9935844805145264, 1.9779361727905274, 1.9663786093521118, 1.9583420275497436, 1.94067984375, 1.929230265197754, 1.9221224881362915, 1.912958982810974, 1.9034557625579833, 1.8937049017715455, 1.8790358535003662, 1.876692020301819, 1.8658557400512696, 1.8551803729629517, 1.8528358583831788, 1.8547788270568848, 1.83579787399292, 1.825839135055542, 1.8247737094116212, 1.8170814221191407, 1.8188595218658448, 1.815419641456604, 1.8142681406402588, 1.8094764706802369, 1.7942750823593139, 1.7980619449615478, 1.7933076245498658, 1.7883917538452148, 1.7867490985870362, 1.7903605484771727, 1.79024905128479, 1.7870432864379884, 1.7758308526611328 ], "train_acc": [ 0.0986, 0.14448, 0.17068, 0.18832, 0.20038, 0.21186, 0.22002, 0.22894, 0.23506, 0.24404, 0.24964, 0.25374, 0.25782, 0.26606, 0.2711, 0.27704, 0.28214, 0.2849, 0.29088, 0.2938, 0.29992, 0.3054, 0.3096, 0.30988, 0.3165, 0.3225, 0.32436, 0.32574, 0.33332, 0.33762, 0.33924, 0.343, 0.34878, 0.3505, 0.35344, 0.35974, 0.36384, 0.36348, 0.3704, 0.36994, 0.37362, 0.38144, 0.38266, 0.3852, 0.3905, 0.3918, 0.39726, 0.40046, 0.40264, 0.40842, 0.40998, 0.41106, 0.4151, 0.4193, 0.4234, 0.42538, 0.43214, 0.43326, 0.43584, 0.4373, 0.44392, 0.4447, 0.44954, 0.45406, 0.45662, 0.45958, 0.46094, 0.46528, 0.46588, 0.47114, 0.47258, 0.47772, 0.47956, 0.48164, 0.48602, 0.48696, 0.48908, 0.49296, 0.49344, 0.49132, 0.49576, 0.49762, 0.49882, 0.49984, 0.50594, 0.5048, 0.50622, 0.50504, 0.50664, 0.50984, 0.50942, 0.5116, 0.5117, 0.51286, 0.51336, 0.51602, 0.5143, 0.51562, 0.51416, 0.51578 ], "test_acc": [ 0.1439, 0.1786, 0.2007, 0.208, 0.2165, 0.2315, 0.2325, 0.2385, 0.2483, 0.2501, 0.254, 0.2585, 0.2634, 0.2702, 0.2749, 0.2758, 0.2809, 0.2848, 0.2814, 0.2812, 0.2857, 0.2915, 0.2921, 0.2923, 0.2939, 0.295, 0.302, 0.2965, 0.3039, 0.3021, 0.3035, 0.3055, 0.3095, 0.3052, 0.3129, 0.3112, 0.3097, 0.308, 0.3123, 0.3104, 0.3135, 0.3145, 0.3105, 0.3173, 0.3165, 0.3154, 0.315, 0.3172, 0.3154, 0.316, 0.3159, 0.3184, 0.316, 0.3219, 0.3177, 0.3206, 0.3164, 0.3151, 0.3233, 0.3209, 0.3224, 0.3239, 0.3212, 0.318, 0.3248, 0.3255, 0.324, 0.3217, 0.323, 0.3212, 0.323, 0.3249, 0.3196, 0.3215, 0.3217, 0.3204, 0.3211, 0.3238, 0.3237, 0.3208, 0.3243, 0.3225, 0.3216, 0.3233, 0.3208, 0.3198, 0.3185, 0.319, 0.3196, 0.3201, 0.3197, 0.3214, 0.3217, 0.3219, 0.3217, 0.3221, 0.3217, 0.3217, 0.3218, 0.3218 ] }, "diagnostics": { "bp_cosine": [ 1.0, 1.0, 1.0, 1.0 ], "perturbation_rho": [ 0.9976038932800293, 0.9983073472976685, 0.9986611604690552, 0.9987543821334839 ], "nudging": { "0.001": [ -0.002336513716727495, -0.0024050232023000717, -0.002418666612356901, -0.0022877217270433903 ], "0.003": [ -0.007005014922469854, -0.007211083546280861, -0.007251948118209839, -0.006859402172267437 ], "0.01": [ -0.023301422595977783, -0.02398984506726265, -0.024125652387738228, -0.022821901366114616 ] }, "hidden_norms_per_layer": [ 228.35731506347656, 216.70431518554688, 197.1549835205078, 189.50289916992188, 193.0804901123047 ], "bp_grad_norms_per_layer": [ 0.0011750052217394114, 0.0012150296242907643, 0.00123639649245888, 0.0011825052788481116, 0.0009545879438519478 ] }, "drift": { "embed.weight": 7.3996956607158415, "embed.bias": 13.278713687472328, "blocks.0.ln.weight": 0.15684440732002258, "blocks.0.w1.weight": 3.6258136635529388, "blocks.0.w1.bias": 5.57490288506498, "blocks.0.w2.weight": 13.211600071846687, "blocks.1.ln.weight": 0.26902759075164795, "blocks.1.w1.weight": 3.9457858088997937, "blocks.1.w1.bias": 4.8656985397285295, "blocks.1.w2.weight": 14.784374975769294, "blocks.2.ln.weight": 0.37451720237731934, "blocks.2.w1.weight": 4.11706460399454, "blocks.2.w1.bias": 4.146142198756796, "blocks.2.w2.weight": 15.840220385160595, "blocks.3.ln.weight": 0.5431011319160461, "blocks.3.w1.weight": 4.464837126822249, "blocks.3.w1.bias": 3.6144859875754407, "blocks.3.w2.weight": 17.545783297025896, "out_ln.weight": 0.14564843475818634, "out_head.weight": 2.8817944671871123, "out_head.bias": 3.056171448565604 } }, "dfa": { "log": { "train_loss": [ 4.179048243713379, 4.067725786132812, 4.071110060424805, 4.091065592956543, 4.10388401260376, 4.1050505090332035, 4.10725640045166, 4.107924006652832, 4.103042182159424, 4.101259585266114, 4.0994670211791995, 4.092466098937988, 4.091690199432373, 4.088890562286377, 4.083471063919068, 4.082734613113403, 4.079294251174927, 4.074109273834228, 4.072821339111328, 4.072047854766845, 4.068868335876465, 4.064674437255859, 4.064519798278808, 4.062208421783447, 4.059947501296997, 4.0568214564514165, 4.0578119854736325, 4.05678455657959, 4.054043687744141, 4.04934491607666, 4.049620868835449, 4.048787186126709, 4.041555746765137, 4.046662612380981, 4.043773061065674, 4.0444277174377445, 4.043180988922119, 4.040115342483521, 4.041283304138184, 4.040324337005615, 4.03773109161377, 4.037001483154297, 4.036031613235473, 4.035188417053223, 4.035520594024658, 4.031777966766358, 4.036043049621582, 4.0324571522521975, 4.032010763549804, 4.032135240783691, 4.028632532043457, 4.028738806915284, 4.029276781768798, 4.030177063598633, 4.027558575515747, 4.0295089608764645, 4.02790505935669, 4.0268916735076905, 4.025881996612549, 4.024340693664551, 4.024784558410644, 4.022587103195191, 4.023770975189209, 4.024697297668457, 4.023757594070434, 4.025282996749878, 4.020828542404175, 4.02296846206665, 4.021536037902832, 4.021342518310547, 4.023781976318359, 4.020269456634521, 4.019074986038208, 4.020880366287232, 4.02205199508667, 4.019182727050781, 4.019642067718506, 4.019440497131348, 4.018397001342773, 4.020665705871582, 4.020597035064697, 4.019291881713867, 4.019497079620361, 4.018186092681884, 4.018166368408203, 4.017542911529541, 4.018283336334228, 4.015659646606445, 4.0151326177978515, 4.017795926361084, 4.016853940124512, 4.018234666442871, 4.015345659790039, 4.017251117095947, 4.018545875244141, 4.017435375213623, 4.015963760681152, 4.015863068695069, 4.015675266418457, 4.018068486938477 ], "train_acc": [ 0.06442, 0.07788, 0.07748, 0.07344, 0.07334, 0.0722, 0.07326, 0.07414, 0.07376, 0.07382, 0.07452, 0.07688, 0.07534, 0.07782, 0.08016, 0.07796, 0.08084, 0.0805, 0.0801, 0.08082, 0.08118, 0.08206, 0.08406, 0.08258, 0.08372, 0.08414, 0.08464, 0.08358, 0.0859, 0.08652, 0.08582, 0.08718, 0.08834, 0.08784, 0.08652, 0.08678, 0.08714, 0.08734, 0.0885, 0.08662, 0.08978, 0.08958, 0.08816, 0.08956, 0.08942, 0.09074, 0.08972, 0.08778, 0.08952, 0.08974, 0.09166, 0.08992, 0.0922, 0.09098, 0.09182, 0.092, 0.09068, 0.09088, 0.09264, 0.09206, 0.09184, 0.09252, 0.09102, 0.09206, 0.09348, 0.09252, 0.09214, 0.09232, 0.09276, 0.09236, 0.0915, 0.09298, 0.09236, 0.0928, 0.09326, 0.09264, 0.09282, 0.0924, 0.0927, 0.09328, 0.09474, 0.09338, 0.09336, 0.09342, 0.09358, 0.09408, 0.09404, 0.09384, 0.09388, 0.09484, 0.09448, 0.09396, 0.09364, 0.09346, 0.09286, 0.09446, 0.0944, 0.094, 0.0935, 0.09318 ], "test_acc": [ 0.0808, 0.0776, 0.0759, 0.0747, 0.0703, 0.0777, 0.0773, 0.0701, 0.0792, 0.0764, 0.0753, 0.0743, 0.0772, 0.0824, 0.0807, 0.081, 0.0784, 0.0813, 0.0774, 0.0803, 0.0826, 0.0865, 0.0793, 0.0798, 0.0821, 0.082, 0.0822, 0.0812, 0.0812, 0.0845, 0.0808, 0.087, 0.0849, 0.0823, 0.0861, 0.0851, 0.0873, 0.0833, 0.0826, 0.0855, 0.0843, 0.0861, 0.0904, 0.0866, 0.0882, 0.0861, 0.0875, 0.0871, 0.0843, 0.0852, 0.0874, 0.0892, 0.0868, 0.0887, 0.0882, 0.0874, 0.0816, 0.0865, 0.0861, 0.0856, 0.0874, 0.0869, 0.0905, 0.0844, 0.0838, 0.0898, 0.0881, 0.0865, 0.0891, 0.0868, 0.0861, 0.0874, 0.0909, 0.0889, 0.0856, 0.0875, 0.088, 0.0864, 0.0889, 0.0867, 0.0852, 0.0869, 0.0879, 0.0875, 0.0872, 0.0883, 0.0897, 0.0879, 0.0875, 0.0875, 0.0868, 0.087, 0.0869, 0.0868, 0.0875, 0.0873, 0.0871, 0.0872, 0.0872, 0.0872 ] }, "diagnostics": { "bp_cosine": [ 0.14216047525405884, -0.000992744229733944, -0.008401205763220787, -0.018219897523522377 ], "perturbation_rho": [ 0.053201157599687576, 0.0, 0.0, 0.0 ], "nudging": { "0.001": [ -6.109476089477539e-07, 3.725290298461914e-09, 0.0, 0.0 ], "0.003": [ -1.6260892152786255e-06, 3.725290298461914e-09, 3.725290298461914e-09, 0.0 ], "0.01": [ -5.261041224002838e-06, -1.4901161193847656e-08, 5.587935447692871e-09, 5.587935447692871e-09 ] }, "hidden_norms_per_layer": [ 19441.8984375, 14779116.0, 143599776.0, 226312160.0, 280974592.0 ], "bp_grad_norms_per_layer": [ 1.4361885405378416e-06, 8.184267485944474e-09, 7.640669430486469e-09, 7.643201627161034e-09, 7.633985887878225e-09 ] }, "drift": { "embed.weight": 162.24755066692174, "embed.bias": 87.51431369480697, "blocks.0.ln.weight": 10.42286205291748, "blocks.0.w1.weight": 122.86975821758014, "blocks.0.w1.bias": 127.51968896872295, "blocks.0.w2.weight": 144.60590878787062, "blocks.1.ln.weight": 7.615029811859131, "blocks.1.w1.weight": 201.6705191292493, "blocks.1.w1.bias": 211.79976689727667, "blocks.1.w2.weight": 125.73570047362797, "blocks.2.ln.weight": 8.678234100341797, "blocks.2.w1.weight": 226.59101485856874, "blocks.2.w1.bias": 218.5035230014578, "blocks.2.w2.weight": 132.07798132121874, "blocks.3.ln.weight": 8.401058197021484, "blocks.3.w1.weight": 221.76508002622688, "blocks.3.w1.bias": 204.23780375800553, "blocks.3.w2.weight": 123.66583688447245, "out_ln.weight": 1.673746943473816, "out_head.weight": 9.550233758399107, "out_head.bias": 0.9096587370527971 } }, "fa": { "log": { "train_loss": [ 4.1970798970031735, 4.170009743804932, 4.130948940124512, 4.094561860961914, 4.050638759155273, 4.024049156494141, 4.013653193511963, 4.014947728729248, 4.032731681671143, 4.047689598388672, 4.044174816589355, 4.0340768909454345, 4.026809699783326, 4.0191610552215575, 4.009859062652588, 4.004589066696167, 4.000014752273559, 3.9934563516235353, 3.989729327392578, 3.9867174030303953, 3.980116393737793, 3.979052162322998, 3.976570917816162, 3.972892049102783, 3.9706854679870607, 3.964727850341797, 3.9669337309265136, 3.962656950378418, 3.959514749221802, 3.950425816116333, 3.9518183033752443, 3.944525026168823, 3.9370513851928712, 3.9401658685302734, 3.9340728694152833, 3.933683126220703, 3.929854642791748, 3.9251790898132324, 3.925471938934326, 3.920428736419678, 3.9194086865997315, 3.916180799026489, 3.910477230758667, 3.9091271575164797, 3.9079003392791747, 3.9009140887451172, 3.903854856109619, 3.896608190765381, 3.8983821444702147, 3.8914879888153076, 3.8876359399414064, 3.8866743579101564, 3.886013005371094, 3.8823793061065675, 3.8820696644592285, 3.8807897300720215, 3.8792161915588377, 3.873366918258667, 3.872225737609863, 3.867956961669922, 3.8688921507263183, 3.864798822555542, 3.8635835083770753, 3.8616594306182863, 3.86274955078125, 3.8615631605529783, 3.855813476715088, 3.8575434510040285, 3.8537498898315428, 3.8516512804412844, 3.853169044036865, 3.848847638626099, 3.8501204053497315, 3.8475019567871094, 3.8478130152130126, 3.841899245147705, 3.8445189476776123, 3.8427941259002685, 3.841190276489258, 3.8432645402526857, 3.840385712738037, 3.841228730545044, 3.8406887775421144, 3.838410483932495, 3.8366693685913087, 3.8328694233703615, 3.8348467766571046, 3.8281507495880125, 3.8312854344177247, 3.8341920923614503, 3.832209959793091, 3.832971982574463, 3.827496012649536, 3.832459149017334, 3.8320746698760986, 3.833449503631592, 3.8311014671325685, 3.828391033859253, 3.8309260621643064, 3.832998709869385 ], "train_acc": [ 0.06318, 0.06262, 0.06896, 0.07402, 0.08474, 0.0894, 0.09134, 0.0919, 0.08782, 0.08538, 0.08668, 0.0868, 0.08842, 0.0918, 0.09218, 0.09394, 0.09588, 0.09596, 0.09472, 0.09676, 0.09782, 0.09964, 0.10034, 0.09964, 0.09956, 0.10158, 0.10016, 0.10198, 0.10194, 0.1036, 0.10318, 0.10394, 0.1047, 0.1039, 0.10536, 0.10528, 0.10612, 0.10692, 0.10644, 0.10704, 0.10844, 0.1082, 0.10744, 0.10862, 0.10826, 0.11176, 0.10998, 0.10964, 0.11052, 0.1107, 0.11298, 0.11352, 0.11398, 0.11322, 0.11342, 0.11344, 0.11324, 0.11226, 0.11314, 0.11472, 0.11338, 0.11524, 0.11458, 0.11598, 0.11536, 0.11664, 0.11622, 0.11714, 0.1169, 0.11656, 0.11594, 0.11838, 0.1178, 0.11898, 0.1183, 0.11852, 0.11904, 0.11914, 0.1193, 0.11876, 0.11858, 0.12086, 0.11934, 0.11944, 0.12044, 0.1214, 0.12184, 0.12008, 0.12106, 0.12018, 0.12146, 0.12004, 0.12064, 0.11986, 0.12106, 0.12248, 0.12166, 0.12014, 0.12266, 0.12278 ], "test_acc": [ 0.0708, 0.0659, 0.0746, 0.0752, 0.0902, 0.0946, 0.0937, 0.0824, 0.0936, 0.0847, 0.095, 0.084, 0.0884, 0.0959, 0.0927, 0.0944, 0.101, 0.0972, 0.098, 0.0997, 0.095, 0.1032, 0.0977, 0.0949, 0.1003, 0.1004, 0.0969, 0.101, 0.1019, 0.1037, 0.0979, 0.103, 0.1047, 0.1026, 0.1092, 0.1071, 0.1062, 0.1024, 0.1051, 0.1068, 0.1088, 0.105, 0.108, 0.1068, 0.1092, 0.1112, 0.1096, 0.1084, 0.108, 0.1091, 0.1102, 0.1141, 0.1109, 0.1138, 0.1085, 0.1143, 0.1107, 0.1135, 0.1153, 0.1135, 0.1181, 0.1159, 0.1171, 0.1126, 0.1165, 0.1193, 0.1206, 0.113, 0.1139, 0.1156, 0.1152, 0.1201, 0.1239, 0.1171, 0.1187, 0.1197, 0.1187, 0.1212, 0.1187, 0.1182, 0.1187, 0.1203, 0.1195, 0.1212, 0.122, 0.1207, 0.1192, 0.1205, 0.1205, 0.1215, 0.12, 0.1201, 0.1203, 0.1203, 0.1213, 0.1208, 0.1209, 0.1209, 0.1209, 0.1208 ] }, "diagnostics": { "bp_cosine": [ 0.017588060349225998, -0.016497433185577393, -0.053346455097198486, 0.9986427426338196 ], "perturbation_rho": [ 0.02255980297923088, 0.005564957857131958, -0.049259480088949203, -0.011541965417563915 ], "nudging": { "0.001": [ 1.280568540096283e-07, 1.62515789270401e-07, 7.82310962677002e-08, -5.41098415851593e-07 ], "0.003": [ 1.0710209608078003e-07, 2.7334317564964294e-07, 1.1641532182693481e-07, -2.648215740919113e-06 ], "0.01": [ 3.57162207365036e-07, 2.039596438407898e-07, 8.307397365570068e-07, -1.1221971362829208e-05 ] }, "hidden_norms_per_layer": [ 19381.54296875, 312615.90625, 2321965.25, 3640082.0, 692179.875 ], "bp_grad_norms_per_layer": [ 5.805031378258718e-06, 6.129388907538669e-07, 5.996270147079485e-07, 6.020890737090667e-07, 6.002287022965902e-07 ] }, "drift": { "embed.weight": 160.52406007787414, "embed.bias": 24.62414456524661, "blocks.0.ln.weight": 2.7208244800567627, "blocks.0.w1.weight": 31.32931103808957, "blocks.0.w1.bias": 24.578464288381767, "blocks.0.w2.weight": 76.69986787657376, "blocks.1.ln.weight": 2.1396796703338623, "blocks.1.w1.weight": 41.923375948001016, "blocks.1.w1.bias": 42.687481681423954, "blocks.1.w2.weight": 41.924463722510936, "blocks.2.ln.weight": 1.8985017538070679, "blocks.2.w1.weight": 49.16152213344094, "blocks.2.w1.bias": 49.20711111001434, "blocks.2.w2.weight": 40.28985166850181, "blocks.3.ln.weight": 1.8008719682693481, "blocks.3.w1.weight": 48.175959456780525, "blocks.3.w1.bias": 46.53396040604286, "blocks.3.w2.weight": 50.22310178120013, "out_ln.weight": 0.6654373407363892, "out_head.weight": 4.396233728367674, "out_head.bias": 1.1577362249288412 } } }, "456": { "bp": { "log": { "train_loss": [ 3.9721967333221437, 3.6422341596221925, 3.48317311088562, 3.384400603027344, 3.3024317309570312, 3.244081290435791, 3.1941285145568847, 3.1515661222839357, 3.110703917541504, 3.0678702397155764, 3.0385052178955076, 3.0125491227722168, 2.9755833444976805, 2.9489572755432127, 2.922896576004028, 2.8952785552215574, 2.8644921349334718, 2.837543485031128, 2.815750347290039, 2.7902291624450686, 2.7643565017700196, 2.749027015914917, 2.7195132698059084, 2.7054608350372313, 2.678812169647217, 2.655367773590088, 2.6455082903289795, 2.621019700469971, 2.6011389213562013, 2.592018772201538, 2.563096626586914, 2.5475857734680174, 2.5312666052246096, 2.507406205291748, 2.4910193432617187, 2.4760341359710694, 2.463254167022705, 2.443411533126831, 2.4220050102996824, 2.4045714128875733, 2.3885920139312744, 2.3683135289001465, 2.35701943649292, 2.339880431365967, 2.3229517127990724, 2.307848415145874, 2.2967623091125486, 2.277586201171875, 2.2599157135009764, 2.2420585137939453, 2.2226363661193846, 2.211539700012207, 2.1965325047302247, 2.181053132362366, 2.1636188303375246, 2.148667406234741, 2.1403865797424317, 2.118398011932373, 2.1047801040649414, 2.090780695877075, 2.0796419092178344, 2.0641602599716187, 2.046349721298218, 2.034338469238281, 2.0246941263580323, 2.010717328338623, 1.9879577135848998, 1.9908112752532958, 1.9648436877822877, 1.9547819673156739, 1.942468291015625, 1.9376066028594972, 1.9182865232086181, 1.9184452313232423, 1.9024611935424804, 1.8942070873641967, 1.8815567203521728, 1.8690018142318725, 1.8623281653213501, 1.8579870316314697, 1.8522963430404662, 1.836771893234253, 1.8371977347564696, 1.8297668377685548, 1.822632707901001, 1.8128088718032838, 1.8157146021270751, 1.8022041482162476, 1.8005397263336183, 1.80292332862854, 1.7940929901504516, 1.7858528171539307, 1.7898485187149047, 1.7881443228149414, 1.7818482210540771, 1.776848041419983, 1.7760994798278809, 1.7772335396957397, 1.7761591400146484, 1.7769086535644532 ], "train_acc": [ 0.09778, 0.1434, 0.17072, 0.18902, 0.20188, 0.21248, 0.22264, 0.22896, 0.23846, 0.24332, 0.2513, 0.25484, 0.26372, 0.26734, 0.27242, 0.27908, 0.2835, 0.28882, 0.29154, 0.29834, 0.30224, 0.30392, 0.31194, 0.3125, 0.31934, 0.32328, 0.32578, 0.3307, 0.33552, 0.33738, 0.34074, 0.34822, 0.3489, 0.35476, 0.35714, 0.3598, 0.36264, 0.3679, 0.373, 0.37474, 0.37926, 0.38364, 0.38482, 0.38824, 0.3921, 0.39568, 0.39682, 0.4015, 0.4051, 0.40902, 0.41356, 0.41618, 0.42064, 0.4233, 0.42546, 0.42778, 0.43316, 0.4337, 0.4397, 0.442, 0.44436, 0.45016, 0.4521, 0.45792, 0.45892, 0.4619, 0.46716, 0.46476, 0.47062, 0.475, 0.4752, 0.47938, 0.48198, 0.48142, 0.48788, 0.49032, 0.49212, 0.49664, 0.49638, 0.4973, 0.4982, 0.5018, 0.5012, 0.50402, 0.50828, 0.512, 0.50698, 0.51086, 0.51272, 0.5128, 0.51364, 0.5147, 0.51322, 0.51572, 0.51924, 0.51582, 0.52014, 0.51688, 0.519, 0.51792 ], "test_acc": [ 0.1436, 0.1724, 0.1976, 0.2161, 0.2193, 0.2286, 0.2386, 0.2425, 0.2508, 0.2496, 0.2572, 0.2667, 0.2678, 0.266, 0.2744, 0.2762, 0.278, 0.277, 0.2819, 0.2857, 0.2884, 0.2924, 0.2971, 0.2938, 0.2967, 0.2984, 0.2994, 0.2987, 0.3023, 0.3018, 0.3005, 0.3058, 0.3086, 0.3118, 0.3122, 0.317, 0.3139, 0.3083, 0.3131, 0.3166, 0.3193, 0.3185, 0.318, 0.3176, 0.319, 0.3176, 0.3157, 0.3158, 0.3241, 0.3165, 0.3253, 0.3217, 0.3224, 0.3244, 0.3217, 0.3181, 0.3206, 0.3261, 0.3208, 0.3204, 0.3198, 0.324, 0.322, 0.3227, 0.3184, 0.3193, 0.323, 0.3233, 0.3254, 0.3202, 0.3196, 0.3251, 0.3243, 0.3247, 0.3209, 0.3245, 0.3267, 0.322, 0.3224, 0.3177, 0.3213, 0.3196, 0.3218, 0.3207, 0.3219, 0.3223, 0.3244, 0.3202, 0.3248, 0.3217, 0.3236, 0.3228, 0.3211, 0.3231, 0.3222, 0.3215, 0.3217, 0.3216, 0.3217, 0.3219 ] }, "diagnostics": { "bp_cosine": [ 1.0, 1.0, 1.0, 1.0 ], "perturbation_rho": [ 0.9977043271064758, 0.9984360337257385, 0.9986849427223206, 0.998687744140625 ], "nudging": { "0.001": [ -0.0023326585069298744, -0.0024166624061763287, -0.002438523806631565, -0.002317411359399557 ], "0.003": [ -0.006994417868554592, -0.007245808374136686, -0.007311580237001181, -0.0069488403387367725 ], "0.01": [ -0.023275790736079216, -0.024107107892632484, -0.02432604506611824, -0.023123476654291153 ] }, "hidden_norms_per_layer": [ 233.81195068359375, 216.790283203125, 197.1819305419922, 192.0648956298828, 192.4108123779297 ], "bp_grad_norms_per_layer": [ 0.0011535885278135538, 0.0012452645460143685, 0.0012428142363205552, 0.0011899643577635288, 0.0009729627054184675 ] }, "drift": { "embed.weight": 7.39398977590873, "embed.bias": 12.21138556118351, "blocks.0.ln.weight": 0.14770954847335815, "blocks.0.w1.weight": 3.60408439890455, "blocks.0.w1.bias": 5.22994684696013, "blocks.0.w2.weight": 13.053238719433654, "blocks.1.ln.weight": 0.2946910560131073, "blocks.1.w1.weight": 4.020628252632815, "blocks.1.w1.bias": 4.08986730586542, "blocks.1.w2.weight": 14.948894323523715, "blocks.2.ln.weight": 0.3565492630004883, "blocks.2.w1.weight": 4.131160512042399, "blocks.2.w1.bias": 4.003626979292673, "blocks.2.w2.weight": 15.902707284756408, "blocks.3.ln.weight": 0.5485884547233582, "blocks.3.w1.weight": 4.454943236812576, "blocks.3.w1.bias": 3.7282169522367608, "blocks.3.w2.weight": 17.516109303089266, "out_ln.weight": 0.14718063175678253, "out_head.weight": 2.8761547030079035, "out_head.bias": 2.9449926838807814 } }, "dfa": { "log": { "train_loss": [ 4.161876879730225, 4.0187293235778805, 4.006406578216553, 4.022609137268066, 4.043629914093017, 4.065061766357422, 4.069896491699219, 4.078123603668213, 4.078062142028808, 4.077979364929199, 4.076888464050293, 4.075497508239746, 4.072547879943848, 4.0701713494110106, 4.069026535797119, 4.064811604537964, 4.061763154296875, 4.060759606628418, 4.057107001800537, 4.0601828897094725, 4.055534585723877, 4.054944921569824, 4.051256662597656, 4.050451954650879, 4.050393276824951, 4.046223313903808, 4.047258992919922, 4.047048767089843, 4.044600559692383, 4.0430317344665525, 4.043368485031128, 4.043571725158691, 4.04141174987793, 4.0433316299438475, 4.041590490188598, 4.042222282104492, 4.039775361480713, 4.0390912678527835, 4.040217839660644, 4.039096583099365, 4.040037224197388, 4.037792987365723, 4.037555375213623, 4.0370241506958005, 4.036846955261231, 4.037261250762939, 4.036293372497559, 4.036143635406495, 4.036682392578125, 4.037161629333496, 4.033746524200439, 4.0374753099060054, 4.035349784240722, 4.037173667144775, 4.0371518122863765, 4.035486927947998, 4.036137412185669, 4.036821200714112, 4.035519530792237, 4.035781366424561, 4.034954352722168, 4.03612970993042, 4.036103559722901, 4.03622392074585, 4.035975666046142, 4.036468890686035, 4.0351540557861325, 4.035398180618286, 4.034821971435547, 4.0340077725219725, 4.0375495939636235, 4.034684106140137, 4.034154214782715, 4.032997749557495, 4.033953489532471, 4.034259751586914, 4.034655684280396, 4.0341955810546875, 4.032625467605591, 4.033415794677734, 4.0320995791625975, 4.034218223724365, 4.033138923187256, 4.03524599899292, 4.03208328704834, 4.032160759277343, 4.034070838088989, 4.030838821868897, 4.031133236694336, 4.032454485397339, 4.0351671361541745, 4.032833539428711, 4.03182988456726, 4.035511425170898, 4.034130640563965, 4.030528937072754, 4.031213985443115, 4.02996834274292, 4.034611818542481, 4.032926365203857 ], "train_acc": [ 0.06838, 0.08666, 0.08636, 0.08428, 0.08436, 0.08, 0.07998, 0.0795, 0.08026, 0.07902, 0.08168, 0.08248, 0.08128, 0.08158, 0.0824, 0.08262, 0.08376, 0.08384, 0.08508, 0.08482, 0.0866, 0.08544, 0.08676, 0.08688, 0.08632, 0.08594, 0.0864, 0.08776, 0.08736, 0.0885, 0.08856, 0.08896, 0.08866, 0.08886, 0.08898, 0.09026, 0.08896, 0.09028, 0.09104, 0.08926, 0.08906, 0.08948, 0.09216, 0.09092, 0.0916, 0.09188, 0.09304, 0.09212, 0.09048, 0.0914, 0.09228, 0.09146, 0.09234, 0.09168, 0.09086, 0.09146, 0.09152, 0.0926, 0.0927, 0.09192, 0.09064, 0.09282, 0.09188, 0.09342, 0.09134, 0.09174, 0.09164, 0.09388, 0.09276, 0.0928, 0.09378, 0.0937, 0.09332, 0.09394, 0.09278, 0.09276, 0.09362, 0.09378, 0.0931, 0.09256, 0.09352, 0.0925, 0.09258, 0.0933, 0.09242, 0.09364, 0.09322, 0.09358, 0.09406, 0.09348, 0.09254, 0.09332, 0.09516, 0.09486, 0.09452, 0.09486, 0.09398, 0.09486, 0.0926, 0.09292 ], "test_acc": [ 0.0832, 0.0875, 0.0896, 0.0827, 0.0848, 0.0809, 0.0756, 0.0769, 0.0809, 0.0777, 0.0775, 0.0751, 0.0835, 0.0804, 0.0776, 0.0805, 0.0857, 0.0753, 0.0811, 0.0813, 0.0813, 0.0786, 0.0812, 0.0834, 0.0862, 0.0883, 0.0863, 0.0861, 0.0853, 0.0878, 0.0835, 0.087, 0.0873, 0.0833, 0.087, 0.0834, 0.08, 0.0867, 0.0897, 0.0872, 0.0834, 0.0871, 0.0804, 0.084, 0.0892, 0.0908, 0.0851, 0.0891, 0.0889, 0.0834, 0.0892, 0.0905, 0.0915, 0.0885, 0.09, 0.0895, 0.0863, 0.0889, 0.0923, 0.0884, 0.0901, 0.087, 0.0912, 0.0893, 0.0901, 0.0868, 0.09, 0.0877, 0.09, 0.0906, 0.0895, 0.0902, 0.0864, 0.0878, 0.0914, 0.0888, 0.0878, 0.0886, 0.089, 0.0879, 0.0894, 0.0902, 0.0885, 0.0887, 0.0886, 0.089, 0.0897, 0.0892, 0.09, 0.0898, 0.0898, 0.089, 0.0898, 0.0896, 0.0895, 0.0893, 0.0895, 0.0894, 0.0895, 0.0894 ] }, "diagnostics": { "bp_cosine": [ 0.1401812732219696, -0.0036733129527419806, -0.012468342669308186, -0.005749615840613842 ], "perturbation_rho": [ -0.006108707282692194, 0.0, 0.0, 0.0 ], "nudging": { "0.001": [ -6.239861249923706e-07, 0.0, -7.450580596923828e-09, 0.0 ], "0.003": [ -1.9390136003494263e-06, -1.862645149230957e-09, -5.587935447692871e-09, -9.313225746154785e-10 ], "0.01": [ -6.262678653001785e-06, 7.450580596923828e-09, -9.313225746154785e-09, -9.313225746154785e-10 ] }, "hidden_norms_per_layer": [ 18858.84765625, 30104360.0, 115109720.0, 142242432.0, 189834320.0 ], "bp_grad_norms_per_layer": [ 1.6800353250800981e-06, 1.0384521687001325e-08, 1.0259526561640087e-08, 1.025457141423658e-08, 1.027041474088719e-08 ] }, "drift": { "embed.weight": 159.1972472218232, "embed.bias": 106.18499737865416, "blocks.0.ln.weight": 8.374361991882324, "blocks.0.w1.weight": 130.25292802776573, "blocks.0.w1.bias": 147.17751025342662, "blocks.0.w2.weight": 140.99980409584657, "blocks.1.ln.weight": 7.522654056549072, "blocks.1.w1.weight": 183.9407903471226, "blocks.1.w1.bias": 180.51106512410223, "blocks.1.w2.weight": 111.67829579527385, "blocks.2.ln.weight": 7.498361110687256, "blocks.2.w1.weight": 174.07305808153814, "blocks.2.w1.bias": 174.07821632973472, "blocks.2.w2.weight": 98.850301691257, "blocks.3.ln.weight": 7.616756439208984, "blocks.3.w1.weight": 196.59852287605085, "blocks.3.w1.bias": 194.4284525943356, "blocks.3.w2.weight": 107.54506697831434, "out_ln.weight": 1.6382553577423096, "out_head.weight": 8.655421610551045, "out_head.bias": 0.8148936743952768 } }, "fa": { "log": { "train_loss": [ 4.217838704376221, 4.173818229370117, 4.116404933776855, 4.05976043182373, 4.010950881347656, 3.9877797143554687, 3.9654394758605958, 3.9516948413085937, 3.9414562885284425, 3.933943572845459, 3.9315726831054687, 3.9283136461639403, 3.918033894042969, 3.912524202957153, 3.9111721415710448, 3.9033537468719484, 3.898933872528076, 3.8940574435424806, 3.888040324020386, 3.8902594078826906, 3.881309941253662, 3.879018879852295, 3.874496072845459, 3.871587658691406, 3.867957287750244, 3.8586695248413085, 3.858678229446411, 3.8550938296508788, 3.848460863571167, 3.843934544219971, 3.841355255813599, 3.838310421142578, 3.832888546218872, 3.8317080396270753, 3.8244371617126465, 3.826680388870239, 3.820091895675659, 3.8168691710662843, 3.8143892570495606, 3.8094303827667235, 3.811340135421753, 3.808084242401123, 3.8061284660339356, 3.8042135272979736, 3.803864609298706, 3.8014156354522703, 3.796123163604736, 3.7960259418487547, 3.794198034515381, 3.793316494140625, 3.7887488148498534, 3.7955673222351076, 3.787652738494873, 3.789291185760498, 3.7856833452606202, 3.7837219773864748, 3.7850829206848147, 3.7822190380859375, 3.780595345993042, 3.780773038482666, 3.7798492553710936, 3.777712000579834, 3.77948525390625, 3.7754471015930178, 3.7783823108673094, 3.7751461651611327, 3.7731632305908205, 3.7698883753204346, 3.7678470989227293, 3.7675471087646484, 3.768726477050781, 3.7664906182861326, 3.7642928605651855, 3.7634823627471925, 3.7624606089782713, 3.7626713751983645, 3.76369431892395, 3.7644162739562987, 3.759299655914307, 3.759175650482178, 3.758095093460083, 3.758687426223755, 3.7558257189941404, 3.758380405960083, 3.7552133833312986, 3.7560367737579345, 3.757290748748779, 3.7513376335144044, 3.7552056824493407, 3.7553686474609376, 3.754800821685791, 3.752587652511597, 3.7521287742614744, 3.754502056884766, 3.7546933338928223, 3.750953895263672, 3.749489372558594, 3.7477175559997558, 3.753209332885742, 3.75081868347168 ], "train_acc": [ 0.06112, 0.06028, 0.0684, 0.07838, 0.08954, 0.09232, 0.0962, 0.09766, 0.099, 0.10078, 0.1022, 0.10314, 0.10234, 0.1031, 0.10412, 0.10498, 0.10776, 0.10782, 0.109, 0.10792, 0.10914, 0.11072, 0.10982, 0.11022, 0.11282, 0.11262, 0.11402, 0.11302, 0.11302, 0.11694, 0.11542, 0.11602, 0.11642, 0.1175, 0.11876, 0.12074, 0.11926, 0.11816, 0.12184, 0.12202, 0.12178, 0.12238, 0.1222, 0.12238, 0.12092, 0.12362, 0.12338, 0.12428, 0.1248, 0.12516, 0.12588, 0.12532, 0.12592, 0.12498, 0.12348, 0.1247, 0.12646, 0.12716, 0.12708, 0.1273, 0.12668, 0.12754, 0.1268, 0.1288, 0.12586, 0.12914, 0.12678, 0.12832, 0.1304, 0.12808, 0.12922, 0.13104, 0.12964, 0.13008, 0.13144, 0.1314, 0.12958, 0.1298, 0.1312, 0.13116, 0.13188, 0.1315, 0.1315, 0.13028, 0.13222, 0.1339, 0.13118, 0.13218, 0.13354, 0.1333, 0.13136, 0.13216, 0.13268, 0.13262, 0.13292, 0.13426, 0.13368, 0.13344, 0.13306, 0.13192 ], "test_acc": [ 0.0534, 0.0616, 0.0812, 0.0837, 0.0888, 0.0944, 0.0942, 0.0981, 0.1007, 0.0999, 0.1021, 0.0988, 0.1083, 0.1019, 0.1073, 0.1051, 0.1085, 0.107, 0.1044, 0.1125, 0.1066, 0.1037, 0.1053, 0.1129, 0.1129, 0.1183, 0.1149, 0.1188, 0.1188, 0.1138, 0.114, 0.1201, 0.1229, 0.1223, 0.1169, 0.1239, 0.1156, 0.1221, 0.1195, 0.1228, 0.1275, 0.1226, 0.1232, 0.1264, 0.1312, 0.1262, 0.126, 0.1266, 0.126, 0.124, 0.1272, 0.1229, 0.1293, 0.1247, 0.125, 0.1265, 0.1232, 0.1295, 0.1274, 0.1275, 0.1273, 0.1276, 0.1285, 0.1266, 0.1285, 0.128, 0.1299, 0.1295, 0.1308, 0.1252, 0.1315, 0.1292, 0.1291, 0.1296, 0.1298, 0.1308, 0.1314, 0.1297, 0.1304, 0.1307, 0.13, 0.1335, 0.1296, 0.1302, 0.1329, 0.1303, 0.1313, 0.1315, 0.132, 0.1311, 0.1311, 0.1322, 0.1311, 0.1317, 0.1308, 0.1309, 0.1306, 0.1308, 0.1311, 0.131 ] }, "diagnostics": { "bp_cosine": [ 0.08354228734970093, 0.008694879710674286, -0.10087701678276062, 0.9982630610466003 ], "perturbation_rho": [ -0.00011634547263383865, 0.02022995427250862, -0.014473065733909607, -0.005974383093416691 ], "nudging": { "0.001": [ -1.8016435205936432e-06, -6.216578185558319e-08, 9.220093488693237e-08, -1.9045546650886536e-06 ], "0.003": [ -5.271751433610916e-06, -1.3830140233039856e-07, 7.264316082000732e-07, -7.401220500469208e-06 ], "0.01": [ -1.7585232853889465e-05, -5.245674401521683e-07, 3.0526425689458847e-06, -2.642977051436901e-05 ] }, "hidden_norms_per_layer": [ 11478.736328125, 155429.828125, 870084.3125, 1247755.625, 225384.953125 ], "bp_grad_norms_per_layer": [ 9.544731256028172e-06, 1.3853283462594845e-06, 1.3375421303862822e-06, 1.3386792261371738e-06, 1.3382986026044819e-06 ] }, "drift": { "embed.weight": 105.03852710133908, "embed.bias": 26.837405563994206, "blocks.0.ln.weight": 2.3730220794677734, "blocks.0.w1.weight": 28.170015713455946, "blocks.0.w1.bias": 23.197896445244968, "blocks.0.w2.weight": 73.04463083219117, "blocks.1.ln.weight": 1.476442575454712, "blocks.1.w1.weight": 29.314224337417265, "blocks.1.w1.bias": 28.13876609135274, "blocks.1.w2.weight": 39.8446977611363, "blocks.2.ln.weight": 1.1785210371017456, "blocks.2.w1.weight": 29.01006529023273, "blocks.2.w1.bias": 32.13611131919081, "blocks.2.w2.weight": 31.240416399097963, "blocks.3.ln.weight": 1.4267656803131104, "blocks.3.w1.weight": 32.92178917188402, "blocks.3.w1.bias": 34.64484336575641, "blocks.3.w2.weight": 31.82487883605455, "out_ln.weight": 0.5113871097564697, "out_head.weight": 3.9161050246024547, "out_head.bias": 2.224345965208238 } } }, "config": { "dataset": "cifar100", "d_hidden": 256, "num_blocks": 4, "batch_size": 128, "epochs": 100, "lr": 0.001, "lr_fb": 0.001, "wd": 0.01, "lam": 0.1, "K": 4, "sigma_bridge": 0.05, "ema_momentum": 0.995, "term_grad_weight": 1.0, "seeds": [ 42, 123, 456 ], "gpu": 0, "output_dir": "results/cifar100_protocol_validation", "methods": [ "bp", "fa", "dfa" ], "random_targets": false, "penalty_lam": 0.0, "num_classes": 100 } }