diff options
Diffstat (limited to 'results/fa_dfa_d512_L12_seed2/results_cifar10.json')
| -rw-r--r-- | results/fa_dfa_d512_L12_seed2/results_cifar10.json | 969 |
1 files changed, 969 insertions, 0 deletions
diff --git a/results/fa_dfa_d512_L12_seed2/results_cifar10.json b/results/fa_dfa_d512_L12_seed2/results_cifar10.json new file mode 100644 index 0000000..68c6e2b --- /dev/null +++ b/results/fa_dfa_d512_L12_seed2/results_cifar10.json @@ -0,0 +1,969 @@ +{ + "2": { + "dfa": { + "log": { + "train_loss": [ + 2.067472825927734, + 2.050606523780823, + 2.0464368284606933, + 2.042041470413208, + 2.044630073509216, + 2.0444898017120363, + 2.0405262368774415, + 2.035152849884033, + 2.0376771257019044, + 2.03509138168335, + 2.0322119396972655, + 2.033472604637146, + 2.033374984664917, + 2.029572864456177, + 2.0276870463562013, + 2.0302749423980715, + 2.027638416900635, + 2.0297894455718994, + 2.0263528815078735, + 2.0278661029052736, + 2.0263561694335936, + 2.025871162261963, + 2.0294278043365477, + 2.0267604162979125, + 2.023654881286621, + 2.0251026361846924, + 2.023718124923706, + 2.023326771583557, + 2.0232533959960937, + 2.0244881047821046, + 2.0223184526062012, + 2.021666586151123, + 2.0216548426818846, + 2.01917991065979, + 2.021205049819946, + 2.019787854347229, + 2.0205126093292236, + 2.019609580001831, + 2.021551597328186, + 2.0192294607543944, + 2.019204094810486, + 2.0193405352020264, + 2.0203353549957277, + 2.0184646337127687, + 2.0181322897338867, + 2.019681806564331, + 2.0184515984344484, + 2.019875436248779, + 2.019927127380371, + 2.0191176706695555, + 2.016925164756775, + 2.0216545279693605, + 2.01788885345459, + 2.0193467868804933, + 2.0155374671936035, + 2.0189524417114257, + 2.018774903793335, + 2.0179057373046874, + 2.0154743661117553, + 2.016519417648315, + 2.0184439277648925, + 2.017523434753418, + 2.017258151779175, + 2.018665143585205, + 2.0169664744567872, + 2.016487240142822, + 2.01617966884613, + 2.0159906078338623, + 2.0176960159683226, + 2.015817363433838, + 2.014662687149048, + 2.0164543737030027, + 2.0170937897109984, + 2.017375130081177, + 2.0153453800964356, + 2.015449415817261, + 2.0149039567565916, + 2.0142875480651856, + 2.0150826512145996, + 2.0165929887390135, + 2.015268103866577, + 2.013780555686951, + 2.0156544293212892, + 2.0130539707183837, + 2.015054910621643, + 2.0157424531555175, + 2.0142131992340087, + 2.016000519104004, + 2.014644538726807, + 2.0129901065826417, + 2.014485344467163, + 2.015419366531372, + 2.014306447067261, + 2.013788412322998, + 2.014239661521912, + 2.0151051541137694, + 2.0142657162475586, + 2.015102813682556, + 2.0133784993743897, + 2.015375001487732 + ], + "train_acc": [ + 0.23898, + 0.24246, + 0.24622, + 0.24884, + 0.24668, + 0.2485, + 0.24792, + 0.25306, + 0.24956, + 0.25484, + 0.25486, + 0.2526, + 0.2561, + 0.256, + 0.25878, + 0.25726, + 0.2586, + 0.25762, + 0.25946, + 0.25782, + 0.26002, + 0.2601, + 0.25784, + 0.2578, + 0.26152, + 0.2606, + 0.26106, + 0.25782, + 0.25852, + 0.25776, + 0.26318, + 0.26254, + 0.26198, + 0.2636, + 0.26256, + 0.26578, + 0.2624, + 0.26176, + 0.25926, + 0.26508, + 0.26478, + 0.26082, + 0.26404, + 0.26548, + 0.26746, + 0.26278, + 0.26414, + 0.26246, + 0.26348, + 0.26282, + 0.26394, + 0.26348, + 0.26624, + 0.26506, + 0.26554, + 0.26198, + 0.26362, + 0.26472, + 0.26728, + 0.26684, + 0.26632, + 0.26602, + 0.26588, + 0.26584, + 0.26636, + 0.2642, + 0.26606, + 0.26868, + 0.26574, + 0.26688, + 0.267, + 0.2633, + 0.26504, + 0.26764, + 0.26732, + 0.26612, + 0.26802, + 0.26864, + 0.26808, + 0.26662, + 0.26826, + 0.26906, + 0.2658, + 0.26706, + 0.2689, + 0.26686, + 0.26868, + 0.2686, + 0.26812, + 0.26678, + 0.26878, + 0.26654, + 0.26678, + 0.2667, + 0.26656, + 0.26746, + 0.26736, + 0.26788, + 0.26794, + 0.27014 + ], + "test_acc": [ + 0.2486, + 0.2615, + 0.2803, + 0.2745, + 0.2627, + 0.2561, + 0.2643, + 0.2592, + 0.2832, + 0.2903, + 0.2737, + 0.2757, + 0.2706, + 0.2847, + 0.2742, + 0.2822, + 0.276, + 0.2661, + 0.2705, + 0.2742, + 0.2755, + 0.2899, + 0.2929, + 0.2661, + 0.2857, + 0.2777, + 0.2783, + 0.2498, + 0.287, + 0.2912, + 0.2833, + 0.2801, + 0.2912, + 0.2897, + 0.2936, + 0.2849, + 0.2874, + 0.2957, + 0.2753, + 0.2862, + 0.2964, + 0.2934, + 0.289, + 0.29, + 0.2951, + 0.2861, + 0.2858, + 0.2867, + 0.2885, + 0.3027, + 0.2822, + 0.2847, + 0.2933, + 0.2899, + 0.286, + 0.2958, + 0.2974, + 0.2957, + 0.288, + 0.2878, + 0.2944, + 0.2885, + 0.2913, + 0.2942, + 0.2965, + 0.2943, + 0.3, + 0.2924, + 0.2977, + 0.2917, + 0.2951, + 0.2848, + 0.2943, + 0.2944, + 0.2874, + 0.2901, + 0.2969, + 0.29, + 0.2948, + 0.2954, + 0.2894, + 0.2948, + 0.2908, + 0.2922, + 0.2904, + 0.2903, + 0.2947, + 0.2926, + 0.2912, + 0.292, + 0.2929, + 0.2922, + 0.2941, + 0.2952, + 0.2927, + 0.2937, + 0.2934, + 0.2933, + 0.2933, + 0.2933 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3273237347602844, + 0.0001051856525009498, + -2.771663639578037e-05, + -0.0005365631077438593, + 0.0003001387231051922, + -7.278185512404889e-05, + -0.00019034843717236072, + -0.00018735270714387298, + -0.0003543527564033866, + -0.0005275406292639673, + -0.000327416870277375, + -0.00032380438642576337 + ], + "perturbation_rho": [ + -0.01584552228450775, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.3387914299964905e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -8.740462362766266e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -2.7050264179706573e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 55119.203125, + 2542876672.0, + 5336881152.0, + 6906983936.0, + 7084125696.0, + 9782456320.0, + 9881924608.0, + 10504348672.0, + 10572271616.0, + 11673710592.0, + 12703504384.0, + 12870231040.0, + 13115667456.0 + ], + "bp_grad_norms_per_layer": [ + 1.9967485798133566e-07, + 2.110904229191135e-10, + 2.0921038512700108e-10, + 2.0923462962230133e-10, + 2.092699208366966e-10, + 2.0928372923556537e-10, + 2.0927498622924645e-10, + 2.0925521038162032e-10, + 2.0925566834861797e-10, + 2.0928303534617498e-10, + 2.0925450261444212e-10, + 2.093413220549678e-10, + 2.0937052092051545e-10 + ] + }, + "drift": { + "embed.weight": 355.47197791509984, + "embed.bias": 325.4137572298477, + "blocks.0.ln.weight": 10.183073943359524, + "blocks.0.w1.weight": 337.53798007019327, + "blocks.0.w1.bias": 368.2919019457867, + "blocks.0.w2.weight": 502.2465316680103, + "blocks.1.ln.weight": 9.790888407534407, + "blocks.1.w1.weight": 399.8591225924345, + "blocks.1.w1.bias": 381.91784742604096, + "blocks.1.w2.weight": 400.78281765228365, + "blocks.2.ln.weight": 9.816270926016012, + "blocks.2.w1.weight": 404.058717958719, + "blocks.2.w1.bias": 370.3941989263201, + "blocks.2.w2.weight": 389.67673069887354, + "blocks.3.ln.weight": 7.767040540127046, + "blocks.3.w1.weight": 279.00825869080103, + "blocks.3.w1.bias": 255.25224285416232, + "blocks.3.w2.weight": 275.49641344672904, + "blocks.4.ln.weight": 10.807571563453388, + "blocks.4.w1.weight": 443.61612820167835, + "blocks.4.w1.bias": 410.34345299471363, + "blocks.4.w2.weight": 435.1254085598178, + "blocks.5.ln.weight": 7.222077733457088, + "blocks.5.w1.weight": 277.46785316158423, + "blocks.5.w1.bias": 253.23890247800753, + "blocks.5.w2.weight": 256.5840082126155, + "blocks.6.ln.weight": 8.723424030262112, + "blocks.6.w1.weight": 347.79770296240184, + "blocks.6.w1.bias": 330.68249616268605, + "blocks.6.w2.weight": 337.991009055742, + "blocks.7.ln.weight": 6.095259128522855, + "blocks.7.w1.weight": 228.22905887133265, + "blocks.7.w1.bias": 207.27947768539244, + "blocks.7.w2.weight": 218.0278988490447, + "blocks.8.ln.weight": 10.272236445847241, + "blocks.8.w1.weight": 411.857394629554, + "blocks.8.w1.bias": 374.6759208224197, + "blocks.8.w2.weight": 382.12457040788746, + "blocks.9.ln.weight": 10.509629754259423, + "blocks.9.w1.weight": 419.2076511149967, + "blocks.9.w1.bias": 382.6992093043317, + "blocks.9.w2.weight": 400.5745998142284, + "blocks.10.ln.weight": 8.353122487257533, + "blocks.10.w1.weight": 326.130308345784, + "blocks.10.w1.bias": 311.08525380018006, + "blocks.10.w2.weight": 293.42915037063676, + "blocks.11.ln.weight": 9.253648926692552, + "blocks.11.w1.weight": 374.4659124025003, + "blocks.11.w1.bias": 354.20131257586644, + "blocks.11.w2.weight": 347.73755162930524, + "out_ln.weight": 0.6640411848895453, + "out_head.weight": 10.723191480707335, + "out_head.bias": 0.5785102998287991 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0563817890930176, + 1.9627269606781006, + 1.9333056550598144, + 1.9114726235198976, + 1.8997531073379517, + 1.8861957403945924, + 1.87444166015625, + 1.8636801738739013, + 1.8639895972442626, + 1.859206601295471, + 1.8560995433807372, + 1.8580559069824218, + 1.857329437828064, + 1.8486979720687866, + 1.8436245847320556, + 1.8427142208480836, + 1.8344886156463622, + 1.8326412610626222, + 1.8245765609741211, + 1.828027060470581, + 1.825153868484497, + 1.8163878149795532, + 1.824076413230896, + 1.81121192653656, + 1.8073615282821656, + 1.8063795041275024, + 1.8070646337509155, + 1.8042147310638428, + 1.8005332668685914, + 1.7986898645401002, + 1.8020886263275147, + 1.7947696161270141, + 1.7940981402206422, + 1.7886089331436157, + 1.7890249035263062, + 1.7892986505889892, + 1.7865815286636353, + 1.7821338254547119, + 1.7815338650512695, + 1.7780959701919556, + 1.7746984979629516, + 1.7739060324478149, + 1.7781290399169922, + 1.7747572018051148, + 1.7699797783660889, + 1.7655833713150024, + 1.767660176963806, + 1.7649168865203857, + 1.762353595352173, + 1.7620251220321654, + 1.7568823919296264, + 1.7637593435668946, + 1.758452812423706, + 1.7617782683944703, + 1.75097875831604, + 1.7536098889160157, + 1.7537783139801026, + 1.7496310552597045, + 1.7481205722045898, + 1.746379077758789, + 1.748953713645935, + 1.7454774743652344, + 1.7429656470108033, + 1.7443698706436157, + 1.7438769147491455, + 1.741790360183716, + 1.7384765851593018, + 1.737857723007202, + 1.7372428884124755, + 1.7377281158828735, + 1.7380989352416991, + 1.7357493377685547, + 1.7356955800628662, + 1.7379692654418946, + 1.7327075168609618, + 1.7319425884246826, + 1.7306698516082764, + 1.7282980773544312, + 1.7296418505477906, + 1.7297362589263916, + 1.728454496421814, + 1.7303657376098633, + 1.7310565896987915, + 1.7265128871917725, + 1.7292877883911133, + 1.7294651891708375, + 1.7270633559417725, + 1.7284353275299071, + 1.7280301824188233, + 1.7230924905776976, + 1.7238229986190796, + 1.7292125368499756, + 1.7265778392791749, + 1.727167644920349, + 1.7264636569595337, + 1.7247360497283934, + 1.7212902561187744, + 1.7221066110229493, + 1.7266186252212525, + 1.7269180416488648 + ], + "train_acc": [ + 0.24122, + 0.28436, + 0.299, + 0.308, + 0.31228, + 0.31872, + 0.32314, + 0.32812, + 0.32904, + 0.33368, + 0.33254, + 0.33406, + 0.33382, + 0.33696, + 0.33812, + 0.33908, + 0.34066, + 0.34344, + 0.34602, + 0.3438, + 0.34686, + 0.3481, + 0.3424, + 0.3496, + 0.3543, + 0.35444, + 0.35368, + 0.35296, + 0.35348, + 0.35554, + 0.35894, + 0.3566, + 0.35682, + 0.36212, + 0.35982, + 0.35886, + 0.36128, + 0.3635, + 0.36448, + 0.36314, + 0.36606, + 0.3656, + 0.36578, + 0.36614, + 0.36982, + 0.36962, + 0.37046, + 0.37016, + 0.36992, + 0.37214, + 0.3727, + 0.37368, + 0.37504, + 0.37302, + 0.37496, + 0.37264, + 0.37418, + 0.37714, + 0.37492, + 0.37612, + 0.377, + 0.37596, + 0.37848, + 0.37688, + 0.37832, + 0.3779, + 0.37974, + 0.38196, + 0.38234, + 0.37998, + 0.38076, + 0.37942, + 0.37988, + 0.38244, + 0.38388, + 0.38314, + 0.38488, + 0.38358, + 0.3841, + 0.38562, + 0.38478, + 0.3818, + 0.38364, + 0.38524, + 0.38452, + 0.38266, + 0.38434, + 0.38448, + 0.38532, + 0.38844, + 0.38614, + 0.38436, + 0.3866, + 0.38558, + 0.38382, + 0.38576, + 0.38752, + 0.3862, + 0.3878, + 0.38596 + ], + "test_acc": [ + 0.2906, + 0.315, + 0.3412, + 0.3441, + 0.3313, + 0.3339, + 0.3449, + 0.3436, + 0.3633, + 0.3441, + 0.359, + 0.3384, + 0.3562, + 0.3694, + 0.3664, + 0.3678, + 0.3625, + 0.3697, + 0.3793, + 0.3753, + 0.3826, + 0.3718, + 0.381, + 0.3826, + 0.3821, + 0.3792, + 0.3812, + 0.3575, + 0.3868, + 0.382, + 0.383, + 0.3859, + 0.3914, + 0.3883, + 0.3907, + 0.3941, + 0.3969, + 0.3849, + 0.387, + 0.3904, + 0.3945, + 0.3886, + 0.3923, + 0.3966, + 0.3963, + 0.3937, + 0.395, + 0.3874, + 0.3891, + 0.3962, + 0.3873, + 0.3903, + 0.3954, + 0.3911, + 0.3956, + 0.3948, + 0.3985, + 0.3938, + 0.3964, + 0.3949, + 0.398, + 0.3955, + 0.3973, + 0.3924, + 0.3965, + 0.4008, + 0.3918, + 0.4006, + 0.3975, + 0.3976, + 0.3974, + 0.3984, + 0.4018, + 0.3991, + 0.3957, + 0.4003, + 0.4007, + 0.4003, + 0.4027, + 0.4018, + 0.4003, + 0.4011, + 0.3991, + 0.4017, + 0.4012, + 0.4021, + 0.4007, + 0.4004, + 0.4021, + 0.4026, + 0.4015, + 0.4035, + 0.4023, + 0.4022, + 0.4028, + 0.4034, + 0.4025, + 0.4021, + 0.4025, + 0.4025 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.018669456243515015, + 0.056162357330322266, + -0.0088431341573596, + -0.008182319812476635, + -0.055274538695812225, + -0.005170345772057772, + 0.014380814507603645, + -0.0468558594584465, + -0.06233042851090431, + -0.04816172271966934, + -0.019947674125432968, + 0.9990314245223999 + ], + "perturbation_rho": [ + 0.03642716258764267, + 0.02892448753118515, + 0.03735572472214699, + -0.00921887531876564, + -0.014430028386414051, + 0.03254326060414314, + 0.005076523870229721, + 0.0074741230346262455, + 0.035068579018116, + 0.027692969888448715, + -0.0010530222207307816, + 0.018331632018089294 + ], + "nudging": { + "0.001": [ + -1.2880191206932068e-06, + -2.0337756723165512e-07, + -1.0291114449501038e-07, + -6.391201168298721e-08, + 8.12578946352005e-08, + -4.307366907596588e-09, + 9.313225746154785e-09, + 1.1568772606551647e-07, + 6.877235136926174e-08, + 4.470348358154297e-08, + 2.6775524020195007e-08, + -1.1273659765720367e-06 + ], + "0.003": [ + -3.7905119825154543e-06, + -6.791669875383377e-07, + -9.001814760267735e-08, + -7.08096195012331e-08, + 2.7276109904050827e-07, + -5.8818841353058815e-08, + -9.292853064835072e-08, + 2.6039197109639645e-07, + 2.7110218070447445e-07, + 1.3009412214159966e-07, + 6.56291376799345e-08, + -4.065892426297069e-06 + ], + "0.01": [ + -1.25557417050004e-05, + -2.2551976144313812e-06, + 8.774804882705212e-08, + -2.8230715543031693e-09, + 8.539936970919371e-07, + -6.600748747587204e-08, + -3.3914693631231785e-07, + 7.495400495827198e-07, + 8.926435839384794e-07, + 5.459296517074108e-07, + 2.251181285828352e-07, + -1.443939981982112e-05 + ] + }, + "hidden_norms_per_layer": [ + 6788.72314453125, + 99873.7890625, + 239906.21875, + 498097.5, + 936403.5, + 1120419.5, + 1148044.0, + 1243337.25, + 1465607.625, + 1820056.25, + 1916954.25, + 2080373.375, + 1057581.375 + ], + "bp_grad_norms_per_layer": [ + 3.21922343573533e-05, + 1.926036247823504e-06, + 7.882048862484226e-07, + 6.652410888818849e-07, + 6.507269176836417e-07, + 6.543205017806031e-07, + 6.41887766050786e-07, + 6.365870603985968e-07, + 6.355120376611012e-07, + 6.336023261610535e-07, + 6.346273266899516e-07, + 6.351577326313418e-07, + 6.350035732793913e-07 + ] + }, + "drift": { + "embed.weight": 47.675119188037286, + "embed.bias": 12.598388666523805, + "blocks.0.ln.weight": 1.151033318517311, + "blocks.0.w1.weight": 16.377677244547872, + "blocks.0.w1.bias": 12.516910644554024, + "blocks.0.w2.weight": 52.57068443004618, + "blocks.1.ln.weight": 0.9655458779833542, + "blocks.1.w1.weight": 18.881599150212853, + "blocks.1.w1.bias": 9.530569072308776, + "blocks.1.w2.weight": 43.92118513197058, + "blocks.2.ln.weight": 0.7887174233304558, + "blocks.2.w1.weight": 18.824409898016874, + "blocks.2.w1.bias": 13.563387947054077, + "blocks.2.w2.weight": 31.07735174337715, + "blocks.3.ln.weight": 0.8171227981326923, + "blocks.3.w1.weight": 19.539674249572744, + "blocks.3.w1.bias": 18.146478276910425, + "blocks.3.w2.weight": 35.53729462516465, + "blocks.4.ln.weight": 0.6263369507412071, + "blocks.4.w1.weight": 16.92487352504901, + "blocks.4.w1.bias": 15.976801095115205, + "blocks.4.w2.weight": 29.94254311653644, + "blocks.5.ln.weight": 0.6487257363749984, + "blocks.5.w1.weight": 17.08776989967922, + "blocks.5.w1.bias": 11.932105261833147, + "blocks.5.w2.weight": 57.38022751451892, + "blocks.6.ln.weight": 0.6775998262875462, + "blocks.6.w1.weight": 18.367003078140872, + "blocks.6.w1.bias": 14.665116127192782, + "blocks.6.w2.weight": 54.62506371390311, + "blocks.7.ln.weight": 0.7036757447327185, + "blocks.7.w1.weight": 19.393650716681655, + "blocks.7.w1.bias": 17.4299147560077, + "blocks.7.w2.weight": 46.77523522935725, + "blocks.8.ln.weight": 0.7270809437825937, + "blocks.8.w1.weight": 21.877072467037458, + "blocks.8.w1.bias": 19.917426863463785, + "blocks.8.w2.weight": 42.759258263025565, + "blocks.9.ln.weight": 0.6050917355676333, + "blocks.9.w1.weight": 17.155375601849993, + "blocks.9.w1.bias": 15.811293825336426, + "blocks.9.w2.weight": 33.7630968788062, + "blocks.10.ln.weight": 0.6513788383873166, + "blocks.10.w1.weight": 18.151371658733705, + "blocks.10.w1.bias": 17.557942417321982, + "blocks.10.w2.weight": 38.33917690306304, + "blocks.11.ln.weight": 0.6877634546861944, + "blocks.11.w1.weight": 19.58584700755457, + "blocks.11.w1.bias": 19.31054232174018, + "blocks.11.w2.weight": 38.09422640487883, + "out_ln.weight": 0.3216566822185731, + "out_head.weight": 6.321448993543088, + "out_head.bias": 1.6265612863801873 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 12, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 2 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L12_seed2", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
