{ "8": { "dfa": { "log": { "train_loss": [ 2.0621551152038573, 2.032111440887451, 2.026601074066162, 2.025331893157959, 2.020625417556763, 2.0179995822143555, 2.0146445192718505, 2.0131945146179198, 2.0136738064575197, 2.0107804653549195, 2.0097129691314697, 2.0098247957611086, 2.0069902968597413, 2.0106662986755373, 2.005746477012634, 2.006078488845825, 2.0048853853988646, 2.0049549353027345, 2.000735955581665, 2.0011458109283446, 2.000437735824585, 1.997751926422119, 2.0025174723052976, 1.998807412185669, 2.000533748397827, 1.9959854383087159, 1.9978079619979858, 1.9958966563415528, 1.996048821105957, 1.9932382696533204, 1.9939029093170166, 1.9939674211120606, 1.993965982017517, 1.9950546265411377, 1.9910492811584473, 1.9932873993301392, 1.9934390425109862, 1.9942208531188965, 1.993705040283203, 1.9926780229949952, 1.9916279154205323, 1.9933884371566772, 1.9898671300506592, 1.989496806678772, 1.990106160736084, 1.9908844341278076, 1.9897770419311522, 1.9911138761901856, 1.9896133652496337, 1.9886156379699707, 1.989099236602783, 1.9889647799682617, 1.987695361251831, 1.987419519920349, 1.9892369752502441, 1.9867283428955078, 1.985670655517578, 1.985864746055603, 1.9890831967163085, 1.986451468811035, 1.98608133934021, 1.986669381942749, 1.9860679926300049, 1.9857238763046265, 1.9861250590515136, 1.9867476695251465, 1.9855187320709229, 1.9870084239959718, 1.9870807974243163, 1.9854985891723633, 1.9862063644790648, 1.986035345840454, 1.9848036085510254, 1.9857490323638916, 1.9848638201141358, 1.9828469595336915, 1.984784072341919, 1.986132272644043, 1.9855829830932616, 1.9845155084228516, 1.9863489217376709, 1.983173909263611, 1.9847177695465088, 1.985843783493042, 1.9844750480651856, 1.9835583266448975, 1.9832866216278076, 1.9824517697906494, 1.9833141297912598, 1.9830520043182374, 1.9822975007629395, 1.9850699710464477, 1.9854454914855957, 1.984017345199585, 1.9839908868408203, 1.9843200086212158, 1.9820627519989014, 1.9816079531478883, 1.9821515436553956, 1.9855904679107665 ], "train_acc": [ 0.2372, 0.24852, 0.25134, 0.25132, 0.2549, 0.25304, 0.25902, 0.26106, 0.25824, 0.2579, 0.26096, 0.25852, 0.26276, 0.2618, 0.2637, 0.26306, 0.26492, 0.26136, 0.26604, 0.26754, 0.26836, 0.26768, 0.26738, 0.26802, 0.26928, 0.26922, 0.26836, 0.2679, 0.2719, 0.27114, 0.26944, 0.27052, 0.2709, 0.26888, 0.26714, 0.27096, 0.2726, 0.27154, 0.27206, 0.27296, 0.27414, 0.27206, 0.27384, 0.27372, 0.27318, 0.27372, 0.27472, 0.27246, 0.27264, 0.27428, 0.27502, 0.27448, 0.2742, 0.27528, 0.275, 0.27736, 0.2766, 0.2759, 0.27522, 0.27712, 0.27626, 0.27468, 0.27628, 0.27488, 0.27464, 0.27474, 0.27628, 0.2782, 0.27526, 0.27606, 0.27564, 0.27476, 0.27602, 0.27802, 0.2757, 0.27936, 0.27668, 0.27736, 0.2746, 0.27752, 0.27548, 0.27848, 0.2752, 0.27484, 0.27662, 0.27726, 0.2752, 0.27864, 0.27702, 0.27936, 0.27774, 0.27604, 0.27712, 0.27604, 0.27688, 0.27666, 0.27784, 0.27676, 0.27736, 0.27638 ], "test_acc": [ 0.2412, 0.2706, 0.2562, 0.262, 0.2857, 0.2628, 0.2833, 0.2782, 0.2575, 0.2859, 0.2873, 0.267, 0.288, 0.2774, 0.2724, 0.2759, 0.2718, 0.2937, 0.2699, 0.2666, 0.2763, 0.2678, 0.299, 0.2876, 0.2835, 0.2979, 0.28, 0.2829, 0.2768, 0.2923, 0.281, 0.2884, 0.2948, 0.2862, 0.2896, 0.2881, 0.2871, 0.2895, 0.2905, 0.298, 0.2897, 0.2784, 0.2898, 0.2972, 0.2879, 0.2895, 0.2881, 0.2884, 0.2944, 0.2935, 0.3039, 0.2886, 0.2993, 0.2857, 0.2931, 0.2941, 0.2933, 0.3021, 0.2938, 0.2991, 0.2841, 0.2896, 0.2945, 0.2911, 0.3, 0.2963, 0.2894, 0.2908, 0.2917, 0.2946, 0.2927, 0.2946, 0.296, 0.3, 0.2935, 0.2898, 0.2955, 0.2929, 0.2928, 0.2992, 0.2931, 0.2954, 0.2933, 0.2924, 0.2933, 0.2925, 0.2919, 0.2941, 0.2941, 0.2946, 0.2927, 0.2945, 0.294, 0.2931, 0.2945, 0.2935, 0.2943, 0.2943, 0.2946, 0.2946 ] }, "diagnostics": { "bp_cosine": [ 0.3852921724319458, -0.0006387766916304827, 0.00021705820108763874, -0.00021593061683233827, -0.0001868726685643196, 2.6933841581922024e-05, 0.00016013934509828687, 7.406133954646066e-05, -3.145005030091852e-05, 0.0004791871178895235, 0.0008787637343630195, -0.0007018762989901006 ], "perturbation_rho": [ 0.01893431320786476, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "nudging": { "0.001": [ -5.145557224750519e-07, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "0.003": [ -1.3075768947601318e-06, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ], "0.01": [ -4.1211023926734924e-06, 9.313225746154785e-10, 0.0, 0.0, 0.0, 0.0, 0.0, 9.313225746154785e-10, 0.0, 0.0, 0.0, 0.0 ] }, "hidden_norms_per_layer": [ 55220.6953125, 703320128.0, 4374171648.0, 4628753408.0, 5259584512.0, 5602404864.0, 6980004864.0, 7249905664.0, 7601436160.0, 8436634624.0, 9617230848.0, 9769342976.0, 10366923776.0 ], "bp_grad_norms_per_layer": [ 2.842145931936102e-07, 1.8565524118052679e-10, 1.8530457723819893e-10, 1.8531473577887425e-10, 1.852697301130135e-10, 1.852322323303568e-10, 1.8537825441367062e-10, 1.8532649026514747e-10, 1.8534035417516748e-10, 1.8526735701129837e-10, 1.85539111852151e-10, 1.8548607094714953e-10, 1.8552165359508876e-10 ] }, "drift": { "embed.weight": 331.10430617288347, "embed.bias": 252.9571084710531, "blocks.0.ln.weight": 10.63590306325829, "blocks.0.w1.weight": 263.71773941078897, "blocks.0.w1.bias": 240.90214072476394, "blocks.0.w2.weight": 484.24212277052584, "blocks.1.ln.weight": 8.994925940820934, "blocks.1.w1.weight": 375.0257234388459, "blocks.1.w1.bias": 372.81964220264507, "blocks.1.w2.weight": 399.05181196420534, "blocks.2.ln.weight": 6.785381078745234, "blocks.2.w1.weight": 237.77081904994165, "blocks.2.w1.bias": 215.00165533028638, "blocks.2.w2.weight": 243.766610543445, "blocks.3.ln.weight": 8.749875392840245, "blocks.3.w1.weight": 331.37899522969747, "blocks.3.w1.bias": 315.8500446097451, "blocks.3.w2.weight": 312.3537327104354, "blocks.4.ln.weight": 8.073930968349993, "blocks.4.w1.weight": 291.9215675635427, "blocks.4.w1.bias": 265.90446228164745, "blocks.4.w2.weight": 273.96062778369446, "blocks.5.ln.weight": 9.487609121353676, "blocks.5.w1.weight": 379.3472655416328, "blocks.5.w1.bias": 364.18802309088414, "blocks.5.w2.weight": 375.20829167636464, "blocks.6.ln.weight": 7.271832594737899, "blocks.6.w1.weight": 274.22615021165615, "blocks.6.w1.bias": 253.82742699623324, "blocks.6.w2.weight": 263.6858849060695, "blocks.7.ln.weight": 8.060212000337511, "blocks.7.w1.weight": 280.8757929768, "blocks.7.w1.bias": 262.00267937269797, "blocks.7.w2.weight": 269.2018115874501, "blocks.8.ln.weight": 9.34446156262141, "blocks.8.w1.weight": 362.0470567061417, "blocks.8.w1.bias": 345.9730629510181, "blocks.8.w2.weight": 346.1165305240824, "blocks.9.ln.weight": 10.547709601128119, "blocks.9.w1.weight": 421.4247930203293, "blocks.9.w1.bias": 384.7717987562065, "blocks.9.w2.weight": 391.67325768744075, "blocks.10.ln.weight": 7.98341504623973, "blocks.10.w1.weight": 310.77250388889735, "blocks.10.w1.bias": 289.60855856986865, "blocks.10.w2.weight": 285.0024842791926, "blocks.11.ln.weight": 9.5014629592837, "blocks.11.w1.weight": 374.13082807575006, "blocks.11.w1.bias": 348.0579252323944, "blocks.11.w2.weight": 345.1186750982964, "out_ln.weight": 0.6314801307530133, "out_head.weight": 8.99875779950455, "out_head.bias": 0.46869532178721607 } }, "fa": { "log": { "train_loss": [ 2.0407437321472166, 1.9698008823394775, 1.9470496474838257, 1.928373962173462, 1.9022277151870728, 1.8882839569091796, 1.8752377725219727, 1.8682713878631592, 1.8643299547576904, 1.8616547422027587, 1.861611949081421, 1.8620045602416992, 1.8579721353149414, 1.8603198122406006, 1.852560590248108, 1.850361158065796, 1.8412007814788818, 1.8371962604141234, 1.830751480064392, 1.830014287109375, 1.8211512073135376, 1.8158404816055298, 1.8152668701553345, 1.8068659871673585, 1.807379986190796, 1.8042517069244384, 1.8004914111709596, 1.7970869720077514, 1.7892543896484374, 1.786518664894104, 1.7887954705047608, 1.787979825515747, 1.782947692489624, 1.7796524175643922, 1.7780501412963867, 1.7751860930252075, 1.77746077003479, 1.7734027153778076, 1.7719406731414795, 1.7689697579574586, 1.7694388663101197, 1.7689455612182616, 1.7627556784820557, 1.76162135345459, 1.7598590536880494, 1.7624462253189086, 1.7612236563873291, 1.7604118212127686, 1.757069122390747, 1.7525782149887086, 1.7570673566055297, 1.753389842262268, 1.7526489987564087, 1.7506390865325927, 1.7564489751434327, 1.7510275960922241, 1.7505046075057984, 1.7489783365249634, 1.7482558310699463, 1.7478452098846435, 1.746708115234375, 1.7485102898788452, 1.7510625556182862, 1.7457116416168212, 1.7465464822387695, 1.7432715910339356, 1.7428231796646119, 1.7424320980072021, 1.7456896788787841, 1.7436157192611694, 1.7397695401763915, 1.740845919532776, 1.7387019988250731, 1.7427006936645508, 1.7407404198455811, 1.7381612093734742, 1.742300903930664, 1.741065849647522, 1.7432577428817748, 1.7419394709014893, 1.7393424035644531, 1.7376308783721923, 1.7395985202407838, 1.7360640591812133, 1.7388901407623292, 1.7372601049804688, 1.737554390182495, 1.7332513166046142, 1.7339157095718383, 1.73852820854187, 1.7378087186050415, 1.7388774118423462, 1.732873987159729, 1.7344641219329835, 1.7370289881134033, 1.735849939918518, 1.731984726486206, 1.7332192292022706, 1.7361968645477295, 1.7349854880142213 ], "train_acc": [ 0.2475, 0.27976, 0.29052, 0.29796, 0.31032, 0.31312, 0.32008, 0.32376, 0.3275, 0.32858, 0.3309, 0.33454, 0.33414, 0.33476, 0.33654, 0.33644, 0.34346, 0.3414, 0.34694, 0.3445, 0.34916, 0.35076, 0.35364, 0.35626, 0.35816, 0.35708, 0.35872, 0.35684, 0.36218, 0.36264, 0.36392, 0.36252, 0.36352, 0.3664, 0.36594, 0.36678, 0.36554, 0.36668, 0.36746, 0.3698, 0.37054, 0.36998, 0.37094, 0.3744, 0.3719, 0.37366, 0.37076, 0.3724, 0.37426, 0.37956, 0.3765, 0.373, 0.37478, 0.37612, 0.37244, 0.37586, 0.37716, 0.37568, 0.3782, 0.37812, 0.37682, 0.3766, 0.37616, 0.3787, 0.3766, 0.37772, 0.38072, 0.37976, 0.37842, 0.3751, 0.3807, 0.3796, 0.37884, 0.37908, 0.38054, 0.38114, 0.37896, 0.37942, 0.3799, 0.37928, 0.38292, 0.37956, 0.38116, 0.38178, 0.38176, 0.38242, 0.38104, 0.38424, 0.38304, 0.38046, 0.3825, 0.38104, 0.38308, 0.38346, 0.38224, 0.38272, 0.38498, 0.38364, 0.3829, 0.3843 ], "test_acc": [ 0.2791, 0.3084, 0.3148, 0.3302, 0.3397, 0.3317, 0.3545, 0.3557, 0.3438, 0.3655, 0.3703, 0.3631, 0.3661, 0.3705, 0.3563, 0.3627, 0.374, 0.3834, 0.3758, 0.3803, 0.3716, 0.3648, 0.3972, 0.3934, 0.3921, 0.3901, 0.3811, 0.3987, 0.3921, 0.3942, 0.3883, 0.3901, 0.3797, 0.3967, 0.3989, 0.3944, 0.3874, 0.3912, 0.3979, 0.3907, 0.405, 0.4035, 0.4043, 0.4026, 0.3974, 0.397, 0.4022, 0.4051, 0.3848, 0.4045, 0.4084, 0.4049, 0.4029, 0.3967, 0.4055, 0.4057, 0.406, 0.4054, 0.4069, 0.4042, 0.3997, 0.4073, 0.4117, 0.406, 0.4061, 0.4051, 0.4068, 0.4082, 0.4055, 0.4063, 0.4081, 0.4109, 0.4081, 0.4085, 0.4103, 0.4019, 0.4102, 0.4103, 0.4103, 0.4095, 0.4108, 0.4093, 0.408, 0.4097, 0.4098, 0.4075, 0.4093, 0.4119, 0.4105, 0.4117, 0.4103, 0.4113, 0.4124, 0.4111, 0.4111, 0.4108, 0.4113, 0.4122, 0.4122, 0.4122 ] }, "diagnostics": { "bp_cosine": [ 0.03226801007986069, 0.06280244886875153, 0.010810771957039833, -0.03502938896417618, -0.02679986134171486, -0.043182265013456345, -0.04647787660360336, -0.03189649432897568, -0.0780816450715065, -0.05483850836753845, -0.02444492094218731, 0.987554669380188 ], "perturbation_rho": [ 0.0010241912677884102, -0.04057024046778679, 0.018125081434845924, 0.0397450290620327, -0.017151735723018646, 0.010231071151793003, 0.02530200220644474, -0.034604527056217194, -0.008924206718802452, 0.008240236900746822, 0.006566178053617477, -0.016290105879306793 ], "nudging": { "0.001": [ -2.0849984139204025e-06, -2.7567148208618164e-07, -1.979060471057892e-08, 8.824281394481659e-08, 7.82310962677002e-08, -1.6996636986732483e-08, 7.520429790019989e-08, 2.584420144557953e-08, -8.381903171539307e-09, 5.820766091346741e-09, 1.3504177331924438e-08, -1.0039657354354858e-06 ], "0.003": [ -6.289919838309288e-06, -7.05476850271225e-07, -2.3283064365386963e-08, 1.6996636986732483e-07, 1.1431984603404999e-07, 2.3702159523963928e-07, 1.7462298274040222e-07, 1.2898817658424377e-07, 3.096647560596466e-07, 1.2014061212539673e-07, 1.3527460396289825e-07, -3.6957208067178726e-06 ], "0.01": [ -2.0938459783792496e-05, -2.1811574697494507e-06, -1.424923539161682e-07, 4.1816383600234985e-07, 3.825407475233078e-07, 5.098991096019745e-07, 6.461050361394882e-07, 3.5669654607772827e-07, 1.1634547263383865e-06, 6.814952939748764e-07, 3.4994445741176605e-07, -1.281173899769783e-05 ] }, "hidden_norms_per_layer": [ 7157.4072265625, 94832.078125, 404715.40625, 900567.4375, 1081047.875, 1230157.0, 1365586.875, 1528390.375, 1618979.25, 1737319.75, 1867286.375, 1900228.625, 1396480.75 ], "bp_grad_norms_per_layer": [ 2.4219883925979957e-05, 1.7657896478340263e-06, 6.163170382933458e-07, 5.848775117556215e-07, 5.799907398795767e-07, 5.837350158799381e-07, 5.848508521921758e-07, 5.83334440307226e-07, 5.784736458736006e-07, 5.808138325846812e-07, 5.77644073018746e-07, 5.686248982783582e-07, 5.330333578967839e-07 ] }, "drift": { "embed.weight": 47.13343033798094, "embed.bias": 17.027552330320233, "blocks.0.ln.weight": 1.1320890684307203, "blocks.0.w1.weight": 16.27975609186272, "blocks.0.w1.bias": 12.808052799935883, "blocks.0.w2.weight": 51.91947766632093, "blocks.1.ln.weight": 0.9202706367200378, "blocks.1.w1.weight": 18.82744877577282, "blocks.1.w1.bias": 12.653020534114173, "blocks.1.w2.weight": 46.17134940964603, "blocks.2.ln.weight": 0.7482719581080549, "blocks.2.w1.weight": 19.557560479232055, "blocks.2.w1.bias": 16.890590269942404, "blocks.2.w2.weight": 39.02075074459777, "blocks.3.ln.weight": 0.5160212396517895, "blocks.3.w1.weight": 16.570363915867922, "blocks.3.w1.bias": 17.214082946851736, "blocks.3.w2.weight": 30.85580581534199, "blocks.4.ln.weight": 0.43511517848811326, "blocks.4.w1.weight": 16.64619937865781, "blocks.4.w1.bias": 18.15413776267707, "blocks.4.w2.weight": 29.35744053745253, "blocks.5.ln.weight": 0.46109930109565794, "blocks.5.w1.weight": 16.648426948133086, "blocks.5.w1.bias": 18.405016598411816, "blocks.5.w2.weight": 32.04993283017616, "blocks.6.ln.weight": 0.5239951706454276, "blocks.6.w1.weight": 17.307678602670318, "blocks.6.w1.bias": 17.94260026627366, "blocks.6.w2.weight": 35.649016996813835, "blocks.7.ln.weight": 0.549764892230928, "blocks.7.w1.weight": 17.37898974454947, "blocks.7.w1.bias": 17.920017558565252, "blocks.7.w2.weight": 47.618251557182425, "blocks.8.ln.weight": 0.5059874235487765, "blocks.8.w1.weight": 16.71552334731513, "blocks.8.w1.bias": 18.79123189462688, "blocks.8.w2.weight": 33.90958537646263, "blocks.9.ln.weight": 0.5547967624139118, "blocks.9.w1.weight": 17.469950204626425, "blocks.9.w1.bias": 17.514572577139017, "blocks.9.w2.weight": 47.637641198604065, "blocks.10.ln.weight": 0.4702376652398834, "blocks.10.w1.weight": 15.609742871134038, "blocks.10.w1.bias": 14.194008119737171, "blocks.10.w2.weight": 53.92588624506532, "blocks.11.ln.weight": 0.5638652470821104, "blocks.11.w1.weight": 18.329914062311936, "blocks.11.w1.bias": 16.960111542785533, "blocks.11.w2.weight": 61.54428413245766, "out_ln.weight": 0.36613157588839446, "out_head.weight": 6.704387093335344, "out_head.bias": 0.6590609045431134 } } }, "config": { "dataset": "cifar10", "d_hidden": 512, "num_blocks": 12, "batch_size": 128, "epochs": 100, "lr": 0.001, "lr_fb": 0.001, "wd": 0.01, "lam": 0.1, "K": 4, "sigma_bridge": 0.05, "ema_momentum": 0.995, "term_grad_weight": 1.0, "seeds": [ 8 ], "gpu": 0, "output_dir": "results/fa_dfa_d512_L12_seed8", "methods": [ "fa", "dfa" ], "random_targets": false, "penalty_lam": 0.0, "num_classes": 10 } }