diff options
Diffstat (limited to 'results/fa_dfa_d512_L12_seed8/results_cifar10.json')
| -rw-r--r-- | results/fa_dfa_d512_L12_seed8/results_cifar10.json | 969 |
1 files changed, 969 insertions, 0 deletions
diff --git a/results/fa_dfa_d512_L12_seed8/results_cifar10.json b/results/fa_dfa_d512_L12_seed8/results_cifar10.json new file mode 100644 index 0000000..8d804da --- /dev/null +++ b/results/fa_dfa_d512_L12_seed8/results_cifar10.json @@ -0,0 +1,969 @@ +{ + "8": { + "dfa": { + "log": { + "train_loss": [ + 2.0621551152038573, + 2.032111440887451, + 2.026601074066162, + 2.025331893157959, + 2.020625417556763, + 2.0179995822143555, + 2.0146445192718505, + 2.0131945146179198, + 2.0136738064575197, + 2.0107804653549195, + 2.0097129691314697, + 2.0098247957611086, + 2.0069902968597413, + 2.0106662986755373, + 2.005746477012634, + 2.006078488845825, + 2.0048853853988646, + 2.0049549353027345, + 2.000735955581665, + 2.0011458109283446, + 2.000437735824585, + 1.997751926422119, + 2.0025174723052976, + 1.998807412185669, + 2.000533748397827, + 1.9959854383087159, + 1.9978079619979858, + 1.9958966563415528, + 1.996048821105957, + 1.9932382696533204, + 1.9939029093170166, + 1.9939674211120606, + 1.993965982017517, + 1.9950546265411377, + 1.9910492811584473, + 1.9932873993301392, + 1.9934390425109862, + 1.9942208531188965, + 1.993705040283203, + 1.9926780229949952, + 1.9916279154205323, + 1.9933884371566772, + 1.9898671300506592, + 1.989496806678772, + 1.990106160736084, + 1.9908844341278076, + 1.9897770419311522, + 1.9911138761901856, + 1.9896133652496337, + 1.9886156379699707, + 1.989099236602783, + 1.9889647799682617, + 1.987695361251831, + 1.987419519920349, + 1.9892369752502441, + 1.9867283428955078, + 1.985670655517578, + 1.985864746055603, + 1.9890831967163085, + 1.986451468811035, + 1.98608133934021, + 1.986669381942749, + 1.9860679926300049, + 1.9857238763046265, + 1.9861250590515136, + 1.9867476695251465, + 1.9855187320709229, + 1.9870084239959718, + 1.9870807974243163, + 1.9854985891723633, + 1.9862063644790648, + 1.986035345840454, + 1.9848036085510254, + 1.9857490323638916, + 1.9848638201141358, + 1.9828469595336915, + 1.984784072341919, + 1.986132272644043, + 1.9855829830932616, + 1.9845155084228516, + 1.9863489217376709, + 1.983173909263611, + 1.9847177695465088, + 1.985843783493042, + 1.9844750480651856, + 1.9835583266448975, + 1.9832866216278076, + 1.9824517697906494, + 1.9833141297912598, + 1.9830520043182374, + 1.9822975007629395, + 1.9850699710464477, + 1.9854454914855957, + 1.984017345199585, + 1.9839908868408203, + 1.9843200086212158, + 1.9820627519989014, + 1.9816079531478883, + 1.9821515436553956, + 1.9855904679107665 + ], + "train_acc": [ + 0.2372, + 0.24852, + 0.25134, + 0.25132, + 0.2549, + 0.25304, + 0.25902, + 0.26106, + 0.25824, + 0.2579, + 0.26096, + 0.25852, + 0.26276, + 0.2618, + 0.2637, + 0.26306, + 0.26492, + 0.26136, + 0.26604, + 0.26754, + 0.26836, + 0.26768, + 0.26738, + 0.26802, + 0.26928, + 0.26922, + 0.26836, + 0.2679, + 0.2719, + 0.27114, + 0.26944, + 0.27052, + 0.2709, + 0.26888, + 0.26714, + 0.27096, + 0.2726, + 0.27154, + 0.27206, + 0.27296, + 0.27414, + 0.27206, + 0.27384, + 0.27372, + 0.27318, + 0.27372, + 0.27472, + 0.27246, + 0.27264, + 0.27428, + 0.27502, + 0.27448, + 0.2742, + 0.27528, + 0.275, + 0.27736, + 0.2766, + 0.2759, + 0.27522, + 0.27712, + 0.27626, + 0.27468, + 0.27628, + 0.27488, + 0.27464, + 0.27474, + 0.27628, + 0.2782, + 0.27526, + 0.27606, + 0.27564, + 0.27476, + 0.27602, + 0.27802, + 0.2757, + 0.27936, + 0.27668, + 0.27736, + 0.2746, + 0.27752, + 0.27548, + 0.27848, + 0.2752, + 0.27484, + 0.27662, + 0.27726, + 0.2752, + 0.27864, + 0.27702, + 0.27936, + 0.27774, + 0.27604, + 0.27712, + 0.27604, + 0.27688, + 0.27666, + 0.27784, + 0.27676, + 0.27736, + 0.27638 + ], + "test_acc": [ + 0.2412, + 0.2706, + 0.2562, + 0.262, + 0.2857, + 0.2628, + 0.2833, + 0.2782, + 0.2575, + 0.2859, + 0.2873, + 0.267, + 0.288, + 0.2774, + 0.2724, + 0.2759, + 0.2718, + 0.2937, + 0.2699, + 0.2666, + 0.2763, + 0.2678, + 0.299, + 0.2876, + 0.2835, + 0.2979, + 0.28, + 0.2829, + 0.2768, + 0.2923, + 0.281, + 0.2884, + 0.2948, + 0.2862, + 0.2896, + 0.2881, + 0.2871, + 0.2895, + 0.2905, + 0.298, + 0.2897, + 0.2784, + 0.2898, + 0.2972, + 0.2879, + 0.2895, + 0.2881, + 0.2884, + 0.2944, + 0.2935, + 0.3039, + 0.2886, + 0.2993, + 0.2857, + 0.2931, + 0.2941, + 0.2933, + 0.3021, + 0.2938, + 0.2991, + 0.2841, + 0.2896, + 0.2945, + 0.2911, + 0.3, + 0.2963, + 0.2894, + 0.2908, + 0.2917, + 0.2946, + 0.2927, + 0.2946, + 0.296, + 0.3, + 0.2935, + 0.2898, + 0.2955, + 0.2929, + 0.2928, + 0.2992, + 0.2931, + 0.2954, + 0.2933, + 0.2924, + 0.2933, + 0.2925, + 0.2919, + 0.2941, + 0.2941, + 0.2946, + 0.2927, + 0.2945, + 0.294, + 0.2931, + 0.2945, + 0.2935, + 0.2943, + 0.2943, + 0.2946, + 0.2946 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3852921724319458, + -0.0006387766916304827, + 0.00021705820108763874, + -0.00021593061683233827, + -0.0001868726685643196, + 2.6933841581922024e-05, + 0.00016013934509828687, + 7.406133954646066e-05, + -3.145005030091852e-05, + 0.0004791871178895235, + 0.0008787637343630195, + -0.0007018762989901006 + ], + "perturbation_rho": [ + 0.01893431320786476, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -5.145557224750519e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.3075768947601318e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -4.1211023926734924e-06, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 55220.6953125, + 703320128.0, + 4374171648.0, + 4628753408.0, + 5259584512.0, + 5602404864.0, + 6980004864.0, + 7249905664.0, + 7601436160.0, + 8436634624.0, + 9617230848.0, + 9769342976.0, + 10366923776.0 + ], + "bp_grad_norms_per_layer": [ + 2.842145931936102e-07, + 1.8565524118052679e-10, + 1.8530457723819893e-10, + 1.8531473577887425e-10, + 1.852697301130135e-10, + 1.852322323303568e-10, + 1.8537825441367062e-10, + 1.8532649026514747e-10, + 1.8534035417516748e-10, + 1.8526735701129837e-10, + 1.85539111852151e-10, + 1.8548607094714953e-10, + 1.8552165359508876e-10 + ] + }, + "drift": { + "embed.weight": 331.10430617288347, + "embed.bias": 252.9571084710531, + "blocks.0.ln.weight": 10.63590306325829, + "blocks.0.w1.weight": 263.71773941078897, + "blocks.0.w1.bias": 240.90214072476394, + "blocks.0.w2.weight": 484.24212277052584, + "blocks.1.ln.weight": 8.994925940820934, + "blocks.1.w1.weight": 375.0257234388459, + "blocks.1.w1.bias": 372.81964220264507, + "blocks.1.w2.weight": 399.05181196420534, + "blocks.2.ln.weight": 6.785381078745234, + "blocks.2.w1.weight": 237.77081904994165, + "blocks.2.w1.bias": 215.00165533028638, + "blocks.2.w2.weight": 243.766610543445, + "blocks.3.ln.weight": 8.749875392840245, + "blocks.3.w1.weight": 331.37899522969747, + "blocks.3.w1.bias": 315.8500446097451, + "blocks.3.w2.weight": 312.3537327104354, + "blocks.4.ln.weight": 8.073930968349993, + "blocks.4.w1.weight": 291.9215675635427, + "blocks.4.w1.bias": 265.90446228164745, + "blocks.4.w2.weight": 273.96062778369446, + "blocks.5.ln.weight": 9.487609121353676, + "blocks.5.w1.weight": 379.3472655416328, + "blocks.5.w1.bias": 364.18802309088414, + "blocks.5.w2.weight": 375.20829167636464, + "blocks.6.ln.weight": 7.271832594737899, + "blocks.6.w1.weight": 274.22615021165615, + "blocks.6.w1.bias": 253.82742699623324, + "blocks.6.w2.weight": 263.6858849060695, + "blocks.7.ln.weight": 8.060212000337511, + "blocks.7.w1.weight": 280.8757929768, + "blocks.7.w1.bias": 262.00267937269797, + "blocks.7.w2.weight": 269.2018115874501, + "blocks.8.ln.weight": 9.34446156262141, + "blocks.8.w1.weight": 362.0470567061417, + "blocks.8.w1.bias": 345.9730629510181, + "blocks.8.w2.weight": 346.1165305240824, + "blocks.9.ln.weight": 10.547709601128119, + "blocks.9.w1.weight": 421.4247930203293, + "blocks.9.w1.bias": 384.7717987562065, + "blocks.9.w2.weight": 391.67325768744075, + "blocks.10.ln.weight": 7.98341504623973, + "blocks.10.w1.weight": 310.77250388889735, + "blocks.10.w1.bias": 289.60855856986865, + "blocks.10.w2.weight": 285.0024842791926, + "blocks.11.ln.weight": 9.5014629592837, + "blocks.11.w1.weight": 374.13082807575006, + "blocks.11.w1.bias": 348.0579252323944, + "blocks.11.w2.weight": 345.1186750982964, + "out_ln.weight": 0.6314801307530133, + "out_head.weight": 8.99875779950455, + "out_head.bias": 0.46869532178721607 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0407437321472166, + 1.9698008823394775, + 1.9470496474838257, + 1.928373962173462, + 1.9022277151870728, + 1.8882839569091796, + 1.8752377725219727, + 1.8682713878631592, + 1.8643299547576904, + 1.8616547422027587, + 1.861611949081421, + 1.8620045602416992, + 1.8579721353149414, + 1.8603198122406006, + 1.852560590248108, + 1.850361158065796, + 1.8412007814788818, + 1.8371962604141234, + 1.830751480064392, + 1.830014287109375, + 1.8211512073135376, + 1.8158404816055298, + 1.8152668701553345, + 1.8068659871673585, + 1.807379986190796, + 1.8042517069244384, + 1.8004914111709596, + 1.7970869720077514, + 1.7892543896484374, + 1.786518664894104, + 1.7887954705047608, + 1.787979825515747, + 1.782947692489624, + 1.7796524175643922, + 1.7780501412963867, + 1.7751860930252075, + 1.77746077003479, + 1.7734027153778076, + 1.7719406731414795, + 1.7689697579574586, + 1.7694388663101197, + 1.7689455612182616, + 1.7627556784820557, + 1.76162135345459, + 1.7598590536880494, + 1.7624462253189086, + 1.7612236563873291, + 1.7604118212127686, + 1.757069122390747, + 1.7525782149887086, + 1.7570673566055297, + 1.753389842262268, + 1.7526489987564087, + 1.7506390865325927, + 1.7564489751434327, + 1.7510275960922241, + 1.7505046075057984, + 1.7489783365249634, + 1.7482558310699463, + 1.7478452098846435, + 1.746708115234375, + 1.7485102898788452, + 1.7510625556182862, + 1.7457116416168212, + 1.7465464822387695, + 1.7432715910339356, + 1.7428231796646119, + 1.7424320980072021, + 1.7456896788787841, + 1.7436157192611694, + 1.7397695401763915, + 1.740845919532776, + 1.7387019988250731, + 1.7427006936645508, + 1.7407404198455811, + 1.7381612093734742, + 1.742300903930664, + 1.741065849647522, + 1.7432577428817748, + 1.7419394709014893, + 1.7393424035644531, + 1.7376308783721923, + 1.7395985202407838, + 1.7360640591812133, + 1.7388901407623292, + 1.7372601049804688, + 1.737554390182495, + 1.7332513166046142, + 1.7339157095718383, + 1.73852820854187, + 1.7378087186050415, + 1.7388774118423462, + 1.732873987159729, + 1.7344641219329835, + 1.7370289881134033, + 1.735849939918518, + 1.731984726486206, + 1.7332192292022706, + 1.7361968645477295, + 1.7349854880142213 + ], + "train_acc": [ + 0.2475, + 0.27976, + 0.29052, + 0.29796, + 0.31032, + 0.31312, + 0.32008, + 0.32376, + 0.3275, + 0.32858, + 0.3309, + 0.33454, + 0.33414, + 0.33476, + 0.33654, + 0.33644, + 0.34346, + 0.3414, + 0.34694, + 0.3445, + 0.34916, + 0.35076, + 0.35364, + 0.35626, + 0.35816, + 0.35708, + 0.35872, + 0.35684, + 0.36218, + 0.36264, + 0.36392, + 0.36252, + 0.36352, + 0.3664, + 0.36594, + 0.36678, + 0.36554, + 0.36668, + 0.36746, + 0.3698, + 0.37054, + 0.36998, + 0.37094, + 0.3744, + 0.3719, + 0.37366, + 0.37076, + 0.3724, + 0.37426, + 0.37956, + 0.3765, + 0.373, + 0.37478, + 0.37612, + 0.37244, + 0.37586, + 0.37716, + 0.37568, + 0.3782, + 0.37812, + 0.37682, + 0.3766, + 0.37616, + 0.3787, + 0.3766, + 0.37772, + 0.38072, + 0.37976, + 0.37842, + 0.3751, + 0.3807, + 0.3796, + 0.37884, + 0.37908, + 0.38054, + 0.38114, + 0.37896, + 0.37942, + 0.3799, + 0.37928, + 0.38292, + 0.37956, + 0.38116, + 0.38178, + 0.38176, + 0.38242, + 0.38104, + 0.38424, + 0.38304, + 0.38046, + 0.3825, + 0.38104, + 0.38308, + 0.38346, + 0.38224, + 0.38272, + 0.38498, + 0.38364, + 0.3829, + 0.3843 + ], + "test_acc": [ + 0.2791, + 0.3084, + 0.3148, + 0.3302, + 0.3397, + 0.3317, + 0.3545, + 0.3557, + 0.3438, + 0.3655, + 0.3703, + 0.3631, + 0.3661, + 0.3705, + 0.3563, + 0.3627, + 0.374, + 0.3834, + 0.3758, + 0.3803, + 0.3716, + 0.3648, + 0.3972, + 0.3934, + 0.3921, + 0.3901, + 0.3811, + 0.3987, + 0.3921, + 0.3942, + 0.3883, + 0.3901, + 0.3797, + 0.3967, + 0.3989, + 0.3944, + 0.3874, + 0.3912, + 0.3979, + 0.3907, + 0.405, + 0.4035, + 0.4043, + 0.4026, + 0.3974, + 0.397, + 0.4022, + 0.4051, + 0.3848, + 0.4045, + 0.4084, + 0.4049, + 0.4029, + 0.3967, + 0.4055, + 0.4057, + 0.406, + 0.4054, + 0.4069, + 0.4042, + 0.3997, + 0.4073, + 0.4117, + 0.406, + 0.4061, + 0.4051, + 0.4068, + 0.4082, + 0.4055, + 0.4063, + 0.4081, + 0.4109, + 0.4081, + 0.4085, + 0.4103, + 0.4019, + 0.4102, + 0.4103, + 0.4103, + 0.4095, + 0.4108, + 0.4093, + 0.408, + 0.4097, + 0.4098, + 0.4075, + 0.4093, + 0.4119, + 0.4105, + 0.4117, + 0.4103, + 0.4113, + 0.4124, + 0.4111, + 0.4111, + 0.4108, + 0.4113, + 0.4122, + 0.4122, + 0.4122 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.03226801007986069, + 0.06280244886875153, + 0.010810771957039833, + -0.03502938896417618, + -0.02679986134171486, + -0.043182265013456345, + -0.04647787660360336, + -0.03189649432897568, + -0.0780816450715065, + -0.05483850836753845, + -0.02444492094218731, + 0.987554669380188 + ], + "perturbation_rho": [ + 0.0010241912677884102, + -0.04057024046778679, + 0.018125081434845924, + 0.0397450290620327, + -0.017151735723018646, + 0.010231071151793003, + 0.02530200220644474, + -0.034604527056217194, + -0.008924206718802452, + 0.008240236900746822, + 0.006566178053617477, + -0.016290105879306793 + ], + "nudging": { + "0.001": [ + -2.0849984139204025e-06, + -2.7567148208618164e-07, + -1.979060471057892e-08, + 8.824281394481659e-08, + 7.82310962677002e-08, + -1.6996636986732483e-08, + 7.520429790019989e-08, + 2.584420144557953e-08, + -8.381903171539307e-09, + 5.820766091346741e-09, + 1.3504177331924438e-08, + -1.0039657354354858e-06 + ], + "0.003": [ + -6.289919838309288e-06, + -7.05476850271225e-07, + -2.3283064365386963e-08, + 1.6996636986732483e-07, + 1.1431984603404999e-07, + 2.3702159523963928e-07, + 1.7462298274040222e-07, + 1.2898817658424377e-07, + 3.096647560596466e-07, + 1.2014061212539673e-07, + 1.3527460396289825e-07, + -3.6957208067178726e-06 + ], + "0.01": [ + -2.0938459783792496e-05, + -2.1811574697494507e-06, + -1.424923539161682e-07, + 4.1816383600234985e-07, + 3.825407475233078e-07, + 5.098991096019745e-07, + 6.461050361394882e-07, + 3.5669654607772827e-07, + 1.1634547263383865e-06, + 6.814952939748764e-07, + 3.4994445741176605e-07, + -1.281173899769783e-05 + ] + }, + "hidden_norms_per_layer": [ + 7157.4072265625, + 94832.078125, + 404715.40625, + 900567.4375, + 1081047.875, + 1230157.0, + 1365586.875, + 1528390.375, + 1618979.25, + 1737319.75, + 1867286.375, + 1900228.625, + 1396480.75 + ], + "bp_grad_norms_per_layer": [ + 2.4219883925979957e-05, + 1.7657896478340263e-06, + 6.163170382933458e-07, + 5.848775117556215e-07, + 5.799907398795767e-07, + 5.837350158799381e-07, + 5.848508521921758e-07, + 5.83334440307226e-07, + 5.784736458736006e-07, + 5.808138325846812e-07, + 5.77644073018746e-07, + 5.686248982783582e-07, + 5.330333578967839e-07 + ] + }, + "drift": { + "embed.weight": 47.13343033798094, + "embed.bias": 17.027552330320233, + "blocks.0.ln.weight": 1.1320890684307203, + "blocks.0.w1.weight": 16.27975609186272, + "blocks.0.w1.bias": 12.808052799935883, + "blocks.0.w2.weight": 51.91947766632093, + "blocks.1.ln.weight": 0.9202706367200378, + "blocks.1.w1.weight": 18.82744877577282, + "blocks.1.w1.bias": 12.653020534114173, + "blocks.1.w2.weight": 46.17134940964603, + "blocks.2.ln.weight": 0.7482719581080549, + "blocks.2.w1.weight": 19.557560479232055, + "blocks.2.w1.bias": 16.890590269942404, + "blocks.2.w2.weight": 39.02075074459777, + "blocks.3.ln.weight": 0.5160212396517895, + "blocks.3.w1.weight": 16.570363915867922, + "blocks.3.w1.bias": 17.214082946851736, + "blocks.3.w2.weight": 30.85580581534199, + "blocks.4.ln.weight": 0.43511517848811326, + "blocks.4.w1.weight": 16.64619937865781, + "blocks.4.w1.bias": 18.15413776267707, + "blocks.4.w2.weight": 29.35744053745253, + "blocks.5.ln.weight": 0.46109930109565794, + "blocks.5.w1.weight": 16.648426948133086, + "blocks.5.w1.bias": 18.405016598411816, + "blocks.5.w2.weight": 32.04993283017616, + "blocks.6.ln.weight": 0.5239951706454276, + "blocks.6.w1.weight": 17.307678602670318, + "blocks.6.w1.bias": 17.94260026627366, + "blocks.6.w2.weight": 35.649016996813835, + "blocks.7.ln.weight": 0.549764892230928, + "blocks.7.w1.weight": 17.37898974454947, + "blocks.7.w1.bias": 17.920017558565252, + "blocks.7.w2.weight": 47.618251557182425, + "blocks.8.ln.weight": 0.5059874235487765, + "blocks.8.w1.weight": 16.71552334731513, + "blocks.8.w1.bias": 18.79123189462688, + "blocks.8.w2.weight": 33.90958537646263, + "blocks.9.ln.weight": 0.5547967624139118, + "blocks.9.w1.weight": 17.469950204626425, + "blocks.9.w1.bias": 17.514572577139017, + "blocks.9.w2.weight": 47.637641198604065, + "blocks.10.ln.weight": 0.4702376652398834, + "blocks.10.w1.weight": 15.609742871134038, + "blocks.10.w1.bias": 14.194008119737171, + "blocks.10.w2.weight": 53.92588624506532, + "blocks.11.ln.weight": 0.5638652470821104, + "blocks.11.w1.weight": 18.329914062311936, + "blocks.11.w1.bias": 16.960111542785533, + "blocks.11.w2.weight": 61.54428413245766, + "out_ln.weight": 0.36613157588839446, + "out_head.weight": 6.704387093335344, + "out_head.bias": 0.6590609045431134 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 12, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 8 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L12_seed8", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
