diff options
Diffstat (limited to 'results/fa_dfa_d256_L8_seed3/results_cifar10.json')
| -rw-r--r-- | results/fa_dfa_d256_L8_seed3/results_cifar10.json | 881 |
1 files changed, 881 insertions, 0 deletions
diff --git a/results/fa_dfa_d256_L8_seed3/results_cifar10.json b/results/fa_dfa_d256_L8_seed3/results_cifar10.json new file mode 100644 index 0000000..5c3b9f9 --- /dev/null +++ b/results/fa_dfa_d256_L8_seed3/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "3": { + "dfa": { + "log": { + "train_loss": [ + 2.068615023956299, + 2.047176646156311, + 2.045687096939087, + 2.041560323257446, + 2.039340658111572, + 2.037013464202881, + 2.035925048980713, + 2.0311173275756835, + 2.032121988372803, + 2.028200259399414, + 2.0268639810943605, + 2.0260205685043333, + 2.023872131958008, + 2.022557463645935, + 2.019715512161255, + 2.0188200274276733, + 2.0172054164886473, + 2.017824410171509, + 2.017126524734497, + 2.0166522624969483, + 2.0137256134414674, + 2.0104475395965578, + 2.014379795913696, + 2.014344210357666, + 2.0130453625488283, + 2.0132340604400634, + 2.015250590057373, + 2.0126567953872683, + 2.011817781829834, + 2.011758652381897, + 2.012901442489624, + 2.010367298049927, + 2.01117289024353, + 2.0130296683502196, + 2.011817899551392, + 2.0138639054107665, + 2.0104484258270263, + 2.01166593460083, + 2.0109901863861084, + 2.009072921295166, + 2.0114363181304933, + 2.0125079582214354, + 2.0117907460784914, + 2.0110322050476075, + 2.0118529551696778, + 2.0105218141174315, + 2.0101245944213866, + 2.011210913467407, + 2.00997705657959, + 2.0106901919555664, + 2.0133068067932127, + 2.010482346534729, + 2.01014770980835, + 2.013678374786377, + 2.0105705118560793, + 2.009467424964905, + 2.0119786975097655, + 2.012015775146484, + 2.0094030991363527, + 2.012004408950806, + 2.0111507415008547, + 2.0106979748535156, + 2.008728541183472, + 2.010878988723755, + 2.009884360809326, + 2.0100272506713868, + 2.010221890258789, + 2.0115498251342774, + 2.012461339263916, + 2.010008108291626, + 2.009727705307007, + 2.0100395127105712, + 2.009380127105713, + 2.0105259854888917, + 2.0112087518310546, + 2.0097919299316405, + 2.009896908798218, + 2.0096029611968995, + 2.0109228817749023, + 2.0104407540130613, + 2.008896068954468, + 2.010570963516235, + 2.0119053901672364, + 2.010287576522827, + 2.008419459609985, + 2.0072363785171508, + 2.00801055557251, + 2.008693112564087, + 2.0080592823791505, + 2.0085273458862303, + 2.009861908569336, + 2.0077927374267577, + 2.009273677444458, + 2.008448128814697, + 2.00934712600708, + 2.0098364579772947, + 2.0096740563964843, + 2.009671881637573, + 2.0100610285949707, + 2.0068935653305053 + ], + "train_acc": [ + 0.23866, + 0.24262, + 0.24576, + 0.25012, + 0.25268, + 0.25266, + 0.25262, + 0.25804, + 0.2571, + 0.25908, + 0.26094, + 0.26028, + 0.26044, + 0.26054, + 0.262, + 0.26448, + 0.26688, + 0.26672, + 0.266, + 0.26744, + 0.26834, + 0.26974, + 0.26904, + 0.2705, + 0.26832, + 0.26774, + 0.26668, + 0.26762, + 0.267, + 0.27284, + 0.27026, + 0.27038, + 0.27058, + 0.27046, + 0.27236, + 0.26896, + 0.27174, + 0.27068, + 0.27064, + 0.2734, + 0.27182, + 0.27226, + 0.27142, + 0.2746, + 0.27316, + 0.27024, + 0.27572, + 0.27118, + 0.2743, + 0.27594, + 0.27086, + 0.27004, + 0.27266, + 0.26996, + 0.27428, + 0.27528, + 0.27046, + 0.27342, + 0.27552, + 0.27356, + 0.27532, + 0.27676, + 0.27258, + 0.27636, + 0.27536, + 0.27252, + 0.2765, + 0.27326, + 0.2722, + 0.27532, + 0.27574, + 0.2746, + 0.27538, + 0.27316, + 0.2756, + 0.27282, + 0.27362, + 0.27488, + 0.2711, + 0.27402, + 0.27544, + 0.27228, + 0.27592, + 0.2756, + 0.27716, + 0.27642, + 0.27756, + 0.27424, + 0.27854, + 0.27518, + 0.27258, + 0.2778, + 0.27602, + 0.277, + 0.27468, + 0.27454, + 0.27424, + 0.27666, + 0.27484, + 0.27578 + ], + "test_acc": [ + 0.2605, + 0.2721, + 0.2761, + 0.2552, + 0.2751, + 0.2671, + 0.2772, + 0.279, + 0.2708, + 0.2664, + 0.2807, + 0.2803, + 0.2787, + 0.2911, + 0.2852, + 0.2856, + 0.2844, + 0.2747, + 0.2802, + 0.2843, + 0.294, + 0.2912, + 0.2858, + 0.276, + 0.2842, + 0.2825, + 0.2921, + 0.2849, + 0.289, + 0.2758, + 0.2964, + 0.2852, + 0.2834, + 0.2877, + 0.2999, + 0.2849, + 0.293, + 0.2873, + 0.2934, + 0.2883, + 0.293, + 0.2895, + 0.2914, + 0.292, + 0.3002, + 0.2865, + 0.2926, + 0.2945, + 0.2989, + 0.2977, + 0.2916, + 0.2894, + 0.2977, + 0.2938, + 0.3004, + 0.2967, + 0.3009, + 0.2818, + 0.2924, + 0.2934, + 0.2852, + 0.2866, + 0.2939, + 0.2929, + 0.2948, + 0.296, + 0.2999, + 0.2936, + 0.2911, + 0.2955, + 0.2922, + 0.2948, + 0.292, + 0.2922, + 0.2954, + 0.2909, + 0.2971, + 0.2964, + 0.2964, + 0.2975, + 0.2926, + 0.2936, + 0.2895, + 0.2968, + 0.295, + 0.2928, + 0.2962, + 0.2962, + 0.2981, + 0.295, + 0.2952, + 0.2962, + 0.2954, + 0.2942, + 0.294, + 0.2946, + 0.2946, + 0.2945, + 0.2946, + 0.2946 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.39051711559295654, + 0.00047808888484723866, + 0.00286676874384284, + -0.0025043508503586054, + 0.000322053674608469, + -0.0008587267366237938, + -0.0018792838091030717, + 0.0018272181041538715 + ], + "perturbation_rho": [ + 0.005894219968467951, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -2.915039658546448e-07, + -9.313225746154785e-10, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.0211952030658722e-06, + -1.862645149230957e-09, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0 + ], + "0.01": [ + -3.0533410608768463e-06, + -3.725290298461914e-09, + 9.313225746154785e-10, + 3.725290298461914e-09, + 0.0, + 0.0, + -9.313225746154785e-10, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 40316.796875, + 307326208.0, + 913832192.0, + 988009024.0, + 1508025088.0, + 1806359168.0, + 1849629568.0, + 2188687616.0, + 2485296896.0 + ], + "bp_grad_norms_per_layer": [ + 3.1247913057086407e-07, + 6.341530633946491e-10, + 6.226453796998044e-10, + 6.2297295100322e-10, + 6.209277536584068e-10, + 6.210325587119314e-10, + 6.210361669367614e-10, + 6.206388181162481e-10, + 6.205727043351317e-10 + ] + }, + "drift": { + "embed.weight": 346.8648177248426, + "embed.bias": 294.36548366042695, + "blocks.0.ln.weight": 11.012950897216797, + "blocks.0.w1.weight": 218.82511475631335, + "blocks.0.w1.bias": 208.27393479202811, + "blocks.0.w2.weight": 491.86978296944665, + "blocks.1.ln.weight": 10.164929389953613, + "blocks.1.w1.weight": 277.9108744662706, + "blocks.1.w1.bias": 255.17942982496433, + "blocks.1.w2.weight": 416.2935804854679, + "blocks.2.ln.weight": 9.155071258544922, + "blocks.2.w1.weight": 230.32353546806806, + "blocks.2.w1.bias": 225.3918473151377, + "blocks.2.w2.weight": 335.1338975810203, + "blocks.3.ln.weight": 10.646790504455566, + "blocks.3.w1.weight": 305.38387277965444, + "blocks.3.w1.bias": 272.0924461450336, + "blocks.3.w2.weight": 449.71231243467605, + "blocks.4.ln.weight": 10.511990547180176, + "blocks.4.w1.weight": 301.02475276314414, + "blocks.4.w1.bias": 283.9163308436812, + "blocks.4.w2.weight": 425.48006497712345, + "blocks.5.ln.weight": 7.359297275543213, + "blocks.5.w1.weight": 197.05490869212008, + "blocks.5.w1.bias": 175.3607301882127, + "blocks.5.w2.weight": 279.18290459751876, + "blocks.6.ln.weight": 11.279300689697266, + "blocks.6.w1.weight": 321.6075481759436, + "blocks.6.w1.bias": 299.55963820199565, + "blocks.6.w2.weight": 437.1964645534032, + "blocks.7.ln.weight": 11.766033172607422, + "blocks.7.w1.weight": 330.68186483881703, + "blocks.7.w1.bias": 322.85133764510664, + "blocks.7.w2.weight": 455.9640356105679, + "out_ln.weight": 0.8627486824989319, + "out_head.weight": 8.827849511384864, + "out_head.bias": 1.0810052204688796 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.058981375579834, + 1.9834817496490478, + 1.9641429858398438, + 1.9436818535614013, + 1.9326764168930053, + 1.9252216508483886, + 1.9228894757080077, + 1.9135075526428222, + 1.908241348800659, + 1.898921445579529, + 1.8944130271148683, + 1.8912477577972413, + 1.8830120736312865, + 1.881072061805725, + 1.8768733514022826, + 1.8708937075805665, + 1.8682633953094483, + 1.867644786605835, + 1.862151336288452, + 1.8556909922027587, + 1.8531654512786866, + 1.8516737128448486, + 1.856813112487793, + 1.8525701689147949, + 1.845026340560913, + 1.839306372718811, + 1.8322341234970092, + 1.8263341422271728, + 1.8188115433502197, + 1.8107766916275025, + 1.8092199557113648, + 1.8012385620880127, + 1.7975863833999635, + 1.7922526489257813, + 1.7880666454315186, + 1.7886801773071288, + 1.7807807125091553, + 1.779724262084961, + 1.775158731918335, + 1.7758396688842772, + 1.7778481522369385, + 1.774957112350464, + 1.7728896438598634, + 1.7728447552490234, + 1.7720583478546144, + 1.7720315837860108, + 1.7645318777084351, + 1.7661468465423584, + 1.7653337018203736, + 1.7627277339935303, + 1.7612493091583252, + 1.7608684145736695, + 1.7618030507659912, + 1.7599208697128297, + 1.7579380053710938, + 1.7533478388214112, + 1.7546381701278686, + 1.7539206622314454, + 1.74741821723938, + 1.750472043838501, + 1.7494391228866577, + 1.748632820663452, + 1.7438091558837892, + 1.7478703462982177, + 1.7456060763549806, + 1.7437335445785522, + 1.7426370120239258, + 1.7418463362884522, + 1.7448427515029907, + 1.7404597217559814, + 1.743501665992737, + 1.7376666475677491, + 1.7360430716705322, + 1.7362179106903075, + 1.7341359579086304, + 1.733851330947876, + 1.7334849172210693, + 1.7326775600814819, + 1.7341204782104491, + 1.7362167639541626, + 1.7309519817352295, + 1.7323176555633546, + 1.733476494140625, + 1.7358508117294311, + 1.7305281336212157, + 1.7303411197662353, + 1.73070291847229, + 1.7317729217147828, + 1.727743486557007, + 1.7267417626571655, + 1.7302013341522218, + 1.7299649307250977, + 1.7291040048217774, + 1.7297229943847656, + 1.7304346767807006, + 1.7294611694335937, + 1.731980471458435, + 1.7267779105377197, + 1.728856870651245, + 1.7232196910858155 + ], + "train_acc": [ + 0.24234, + 0.2742, + 0.29098, + 0.29598, + 0.29902, + 0.29986, + 0.3002, + 0.30472, + 0.30692, + 0.31152, + 0.31082, + 0.31398, + 0.3191, + 0.31702, + 0.32336, + 0.32422, + 0.32658, + 0.32714, + 0.32938, + 0.33382, + 0.33342, + 0.33472, + 0.33338, + 0.33292, + 0.33888, + 0.3423, + 0.34322, + 0.3461, + 0.34668, + 0.35166, + 0.35128, + 0.35728, + 0.3546, + 0.35978, + 0.361, + 0.3614, + 0.36304, + 0.36228, + 0.36516, + 0.36602, + 0.36758, + 0.36578, + 0.36746, + 0.3672, + 0.36552, + 0.3666, + 0.37044, + 0.3676, + 0.37066, + 0.37068, + 0.37034, + 0.37252, + 0.37222, + 0.37374, + 0.372, + 0.37456, + 0.37316, + 0.37352, + 0.37646, + 0.37608, + 0.37806, + 0.37702, + 0.37736, + 0.3746, + 0.37672, + 0.37986, + 0.37954, + 0.38086, + 0.3791, + 0.37724, + 0.3785, + 0.38128, + 0.37962, + 0.3815, + 0.38328, + 0.38234, + 0.38302, + 0.38246, + 0.38054, + 0.37982, + 0.38474, + 0.38062, + 0.382, + 0.3824, + 0.38212, + 0.38174, + 0.38348, + 0.38414, + 0.38298, + 0.38414, + 0.38316, + 0.38198, + 0.38374, + 0.3837, + 0.38268, + 0.38232, + 0.38248, + 0.38382, + 0.38298, + 0.38342 + ], + "test_acc": [ + 0.2801, + 0.3073, + 0.3193, + 0.3203, + 0.3257, + 0.3225, + 0.3347, + 0.3351, + 0.3319, + 0.3249, + 0.337, + 0.3477, + 0.3485, + 0.3531, + 0.3504, + 0.3551, + 0.3542, + 0.3603, + 0.3587, + 0.3676, + 0.3669, + 0.3616, + 0.3612, + 0.3648, + 0.3724, + 0.3675, + 0.3764, + 0.3806, + 0.3759, + 0.3844, + 0.38, + 0.3841, + 0.3848, + 0.3885, + 0.3898, + 0.3917, + 0.3918, + 0.3932, + 0.3872, + 0.3948, + 0.3953, + 0.3905, + 0.3913, + 0.3936, + 0.3956, + 0.3946, + 0.3982, + 0.3893, + 0.4001, + 0.3968, + 0.3968, + 0.3963, + 0.3964, + 0.4, + 0.3958, + 0.3986, + 0.397, + 0.398, + 0.3948, + 0.3973, + 0.3975, + 0.401, + 0.4026, + 0.3986, + 0.3986, + 0.3997, + 0.4009, + 0.3998, + 0.3987, + 0.4021, + 0.4003, + 0.4008, + 0.4017, + 0.4014, + 0.4009, + 0.4024, + 0.3995, + 0.4047, + 0.4028, + 0.4026, + 0.4041, + 0.4028, + 0.4013, + 0.4016, + 0.4038, + 0.404, + 0.404, + 0.405, + 0.4042, + 0.4014, + 0.4029, + 0.404, + 0.4044, + 0.4035, + 0.4035, + 0.4036, + 0.4035, + 0.4036, + 0.4036, + 0.4035 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.015765948221087456, + 0.07965049147605896, + 0.01825195923447609, + -0.06194503605365753, + -0.06945411115884781, + -0.027306973934173584, + -0.06518878042697906, + 0.9981832504272461 + ], + "perturbation_rho": [ + -0.003869679756462574, + 0.028149917721748352, + -0.03722488507628441, + 0.010893862694501877, + -0.053424231708049774, + 0.0007988980505615473, + 0.014475717209279537, + 0.039686620235443115 + ], + "nudging": { + "0.001": [ + 1.698499545454979e-07, + -3.166496753692627e-07, + -3.213062882423401e-08, + 1.387670636177063e-07, + 1.8265563994646072e-07, + 7.741618901491165e-08, + 1.4121178537607193e-07, + -1.2140953913331032e-06 + ], + "0.003": [ + 3.164168447256088e-07, + -1.0326039046049118e-06, + -5.611218512058258e-08, + 2.748565748333931e-07, + 3.5634730011224747e-07, + 1.3550743460655212e-07, + 3.384193405508995e-07, + -4.2710453271865845e-06 + ], + "0.01": [ + 1.0926742106676102e-06, + -3.4980475902557373e-06, + -2.4691689759492874e-07, + 1.008971594274044e-06, + 1.0706717148423195e-06, + 4.578614607453346e-07, + 1.044594682753086e-06, + -1.4978926628828049e-05 + ] + }, + "hidden_norms_per_layer": [ + 5578.46875, + 33612.796875, + 156752.953125, + 338066.0625, + 628331.5, + 779574.8125, + 844256.6875, + 888649.0, + 425876.65625 + ], + "bp_grad_norms_per_layer": [ + 2.167309503420256e-05, + 2.129869471900747e-06, + 8.520624419361411e-07, + 8.023235977816512e-07, + 8.024675253182068e-07, + 8.062054348556558e-07, + 8.076636390796921e-07, + 8.067011663115409e-07, + 8.030539788705937e-07 + ] + }, + "drift": { + "embed.weight": 54.49055679435881, + "embed.bias": 20.29269603704931, + "blocks.0.ln.weight": 1.6682279109954834, + "blocks.0.w1.weight": 17.062151194987237, + "blocks.0.w1.bias": 13.499766617268852, + "blocks.0.w2.weight": 71.40829154639394, + "blocks.1.ln.weight": 1.4543473720550537, + "blocks.1.w1.weight": 19.719236452544127, + "blocks.1.w1.bias": 9.828062345144708, + "blocks.1.w2.weight": 58.72293185943862, + "blocks.2.ln.weight": 1.0721585750579834, + "blocks.2.w1.weight": 18.58227228825852, + "blocks.2.w1.bias": 15.746003179119816, + "blocks.2.w2.weight": 41.71235474869442, + "blocks.3.ln.weight": 0.8726892471313477, + "blocks.3.w1.weight": 20.471342330584488, + "blocks.3.w1.bias": 19.512499778298427, + "blocks.3.w2.weight": 36.10169832252701, + "blocks.4.ln.weight": 0.7409772276878357, + "blocks.4.w1.weight": 17.51931629450496, + "blocks.4.w1.bias": 18.723236583560368, + "blocks.4.w2.weight": 30.24901961387597, + "blocks.5.ln.weight": 0.7194885015487671, + "blocks.5.w1.weight": 16.539786302289123, + "blocks.5.w1.bias": 16.75240225769585, + "blocks.5.w2.weight": 28.9854050706238, + "blocks.6.ln.weight": 0.7770423293113708, + "blocks.6.w1.weight": 17.2275004107886, + "blocks.6.w1.bias": 18.38493950420895, + "blocks.6.w2.weight": 32.31440391348023, + "blocks.7.ln.weight": 0.9231076240539551, + "blocks.7.w1.weight": 19.50195743824471, + "blocks.7.w1.bias": 21.20774557625887, + "blocks.7.w2.weight": 46.39053908141515, + "out_ln.weight": 0.307411789894104, + "out_head.weight": 4.604781316777015, + "out_head.bias": 1.258175899516633 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 256, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 3 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d256_L8_seed3", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
