diff options
Diffstat (limited to 'results/fa_dfa_d512_L6_seed1/results_cifar10.json')
| -rw-r--r-- | results/fa_dfa_d512_L6_seed1/results_cifar10.json | 837 |
1 files changed, 837 insertions, 0 deletions
diff --git a/results/fa_dfa_d512_L6_seed1/results_cifar10.json b/results/fa_dfa_d512_L6_seed1/results_cifar10.json new file mode 100644 index 0000000..e0848f8 --- /dev/null +++ b/results/fa_dfa_d512_L6_seed1/results_cifar10.json @@ -0,0 +1,837 @@ +{ + "1": { + "dfa": { + "log": { + "train_loss": [ + 2.0748964222717285, + 2.0451132692718508, + 2.032143096008301, + 2.029643494186401, + 2.030650337371826, + 2.0243890724945066, + 2.020762248840332, + 2.0232248431396482, + 2.0159964645004274, + 2.0192399645996093, + 2.01540469039917, + 2.0154635834503174, + 2.017197366409302, + 2.0137894206237794, + 2.0133308319091796, + 2.0108140522003173, + 2.011235432815552, + 2.009914517288208, + 2.0094243881988527, + 2.0100407147979737, + 2.010185157546997, + 2.0089511253356935, + 2.0095393753051756, + 2.0059863204574584, + 2.0064435525512696, + 2.005170561828613, + 2.0074975017547607, + 2.004913715057373, + 2.0057138012313844, + 2.004565502243042, + 2.006841945838928, + 2.0055636949157716, + 2.0060591010284425, + 2.006927216835022, + 2.0074666262817384, + 2.004335132293701, + 2.005483384475708, + 2.006595082168579, + 2.003282996673584, + 2.0056812954330443, + 2.003032290878296, + 2.0057049587249756, + 2.004697043457031, + 2.006551830406189, + 2.0058861305236815, + 2.0044844329071045, + 2.0043990351867675, + 2.0037572646331787, + 2.0045921185302733, + 2.0048506913757325, + 2.003375824584961, + 2.004632271575928, + 2.0042632717895508, + 2.0026413610076905, + 2.001700767211914, + 2.003248519668579, + 2.003593999633789, + 2.003497390823364, + 2.003082277145386, + 2.001226206436157, + 2.0015557537841797, + 2.0028394889831542, + 2.0041756005859375, + 2.0014186894226076, + 2.0017097331237794, + 2.0027389540863036, + 2.00000232421875, + 2.001385536727905, + 2.0014734981536866, + 2.000507820701599, + 2.0018744316101076, + 2.00272723903656, + 2.0029756539535524, + 2.0013647467422486, + 2.0018114362335204, + 2.0025039096832273, + 2.0007914069366457, + 2.0015052102279665, + 2.0013790615844727, + 2.000559573059082, + 2.000183110809326, + 2.002135623321533, + 1.9992622534561157, + 2.0023144091033935, + 1.9984413458633423, + 2.0006315287780763, + 2.0000011035919187, + 1.9984883616638183, + 1.9980969067382812, + 1.999547445678711, + 1.999142066078186, + 2.0002035105895994, + 1.9980618365859986, + 1.9986838175201416, + 1.9983668603515625, + 1.9985755539321899, + 2.000544935379028, + 1.9968827368545532, + 1.9982192471313476, + 2.0020621044921874 + ], + "train_acc": [ + 0.23312, + 0.24122, + 0.2498, + 0.25198, + 0.25274, + 0.25466, + 0.25756, + 0.25584, + 0.26256, + 0.25706, + 0.26104, + 0.26366, + 0.26034, + 0.2643, + 0.2637, + 0.26512, + 0.26362, + 0.26534, + 0.2631, + 0.26636, + 0.26484, + 0.26708, + 0.2655, + 0.26972, + 0.26856, + 0.27134, + 0.2676, + 0.26984, + 0.26974, + 0.27138, + 0.2693, + 0.27122, + 0.27032, + 0.27198, + 0.27264, + 0.27422, + 0.26884, + 0.27136, + 0.27186, + 0.27214, + 0.27222, + 0.27306, + 0.27124, + 0.26954, + 0.27338, + 0.27444, + 0.27396, + 0.27308, + 0.27134, + 0.27374, + 0.27228, + 0.27202, + 0.2734, + 0.27426, + 0.27536, + 0.2763, + 0.27336, + 0.2738, + 0.2753, + 0.27466, + 0.2761, + 0.27344, + 0.27534, + 0.2757, + 0.27462, + 0.27636, + 0.27752, + 0.27438, + 0.27618, + 0.27512, + 0.27614, + 0.27672, + 0.2751, + 0.2764, + 0.27506, + 0.27662, + 0.2763, + 0.27732, + 0.27492, + 0.27632, + 0.27788, + 0.27568, + 0.27686, + 0.27594, + 0.27624, + 0.27706, + 0.27618, + 0.27852, + 0.27466, + 0.27688, + 0.27624, + 0.2765, + 0.27688, + 0.27766, + 0.27692, + 0.27586, + 0.27718, + 0.27828, + 0.27846, + 0.27564 + ], + "test_acc": [ + 0.2476, + 0.2684, + 0.2584, + 0.2859, + 0.2672, + 0.2792, + 0.2735, + 0.2954, + 0.2679, + 0.29, + 0.2763, + 0.2726, + 0.2807, + 0.2893, + 0.296, + 0.2701, + 0.2694, + 0.2782, + 0.2885, + 0.2811, + 0.2919, + 0.2734, + 0.2917, + 0.2961, + 0.2922, + 0.3011, + 0.2957, + 0.2783, + 0.3042, + 0.2892, + 0.2859, + 0.2808, + 0.2889, + 0.2977, + 0.2867, + 0.2998, + 0.2953, + 0.2908, + 0.2808, + 0.287, + 0.2953, + 0.303, + 0.287, + 0.3021, + 0.2834, + 0.3003, + 0.2861, + 0.2915, + 0.2953, + 0.3065, + 0.2839, + 0.2883, + 0.298, + 0.2928, + 0.2955, + 0.2906, + 0.3041, + 0.2956, + 0.2883, + 0.2887, + 0.2903, + 0.2939, + 0.294, + 0.2878, + 0.2835, + 0.2948, + 0.2838, + 0.2948, + 0.296, + 0.2941, + 0.294, + 0.3002, + 0.2949, + 0.2918, + 0.2977, + 0.2982, + 0.2944, + 0.2961, + 0.2936, + 0.2989, + 0.2956, + 0.2995, + 0.2953, + 0.2967, + 0.3003, + 0.2945, + 0.2989, + 0.2961, + 0.2974, + 0.3003, + 0.2941, + 0.2969, + 0.2982, + 0.2977, + 0.2981, + 0.2977, + 0.2979, + 0.2972, + 0.297, + 0.2969 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.4029613733291626, + 7.883789658080786e-05, + -0.0002453301858622581, + -0.0004192243213765323, + 0.000372876413166523, + -0.000354595307726413 + ], + "perturbation_rho": [ + 0.011547078378498554, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.4691765904426575e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.1422671377658844e-06, + 0.0, + -2.7939677238464355e-09, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.50363552570343e-06, + 1.862645149230957e-09, + -3.725290298461914e-09, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 56897.21875, + 1696502784.0, + 3738960896.0, + 4607917568.0, + 5758332928.0, + 6969167872.0, + 7904643072.0 + ], + "bp_grad_norms_per_layer": [ + 2.473913411904505e-07, + 2.2789505005338384e-10, + 2.2798352095065866e-10, + 2.2797391752149565e-10, + 2.2786038333943992e-10, + 2.2787945141988786e-10, + 2.2788908260462648e-10 + ] + }, + "drift": { + "embed.weight": 341.6512135112792, + "embed.bias": 266.6380085288841, + "blocks.0.ln.weight": 9.881568196594484, + "blocks.0.w1.weight": 316.78039360087587, + "blocks.0.w1.bias": 286.8114997527113, + "blocks.0.w2.weight": 487.5637400185808, + "blocks.1.ln.weight": 9.2020409527447, + "blocks.1.w1.weight": 361.84956095939293, + "blocks.1.w1.bias": 334.52465546675654, + "blocks.1.w2.weight": 335.23042288085577, + "blocks.2.ln.weight": 8.38680422611515, + "blocks.2.w1.weight": 335.6889109899642, + "blocks.2.w1.bias": 310.7077386931781, + "blocks.2.w2.weight": 315.3949169119556, + "blocks.3.ln.weight": 8.903503018515195, + "blocks.3.w1.weight": 371.4244307750354, + "blocks.3.w1.bias": 345.3889053690205, + "blocks.3.w2.weight": 352.95729115252703, + "blocks.4.ln.weight": 9.577081150302154, + "blocks.4.w1.weight": 387.7879530619586, + "blocks.4.w1.bias": 358.359826994576, + "blocks.4.w2.weight": 353.9896695590463, + "blocks.5.ln.weight": 9.043917438368451, + "blocks.5.w1.weight": 368.8104037409108, + "blocks.5.w1.bias": 337.4941181161536, + "blocks.5.w2.weight": 342.83001538637893, + "out_ln.weight": 0.6049025805082792, + "out_head.weight": 8.662451359844601, + "out_head.bias": 1.2452406037396404 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0533182458114623, + 1.9470771118927002, + 1.9098005801391602, + 1.8933470011138915, + 1.8866070096206664, + 1.8806784909820558, + 1.8700345465469361, + 1.8668433471298218, + 1.8549686681747437, + 1.8534734383773803, + 1.844026010055542, + 1.8412904165649413, + 1.8354822329711915, + 1.8315302365493775, + 1.8307162652206421, + 1.8245070376968384, + 1.820267495956421, + 1.8159903725814819, + 1.8120850400543214, + 1.8062479650115968, + 1.8023771206665038, + 1.7989153610992432, + 1.7973320705795288, + 1.789391294517517, + 1.7857570403289795, + 1.7807958832550048, + 1.7800961505126953, + 1.7727591732025147, + 1.7719479108047484, + 1.7722896636199952, + 1.765616308631897, + 1.765071000442505, + 1.7588437004852295, + 1.7560785748672485, + 1.7530314648056031, + 1.7516267270278931, + 1.7478641785049438, + 1.747710930519104, + 1.7394591223526001, + 1.7430037671279908, + 1.7430737688827516, + 1.743327683944702, + 1.735208967590332, + 1.7367942264175416, + 1.735928176651001, + 1.7336044045639039, + 1.7319979761123658, + 1.7286449816131593, + 1.7276164590072631, + 1.7342356995010375, + 1.7273106130599976, + 1.728026152114868, + 1.7241678939437866, + 1.7218856454467772, + 1.720424718055725, + 1.7259459881973267, + 1.723678625831604, + 1.7201445708465577, + 1.7179317428588867, + 1.714501748046875, + 1.7184516412353517, + 1.7182959340667725, + 1.7173260328674316, + 1.7162155276870728, + 1.7148893152618407, + 1.7136942127990722, + 1.7128957564926148, + 1.7095485464859008, + 1.711679192199707, + 1.7081169268417358, + 1.7124231735992432, + 1.7100938080215453, + 1.7099676737213134, + 1.7056838412094115, + 1.7102637582397462, + 1.7076803800201417, + 1.703552028427124, + 1.7047434192657471, + 1.7060186026382447, + 1.7038543602752685, + 1.7023552209091186, + 1.7033933319854737, + 1.7048375073623656, + 1.7029183816146851, + 1.7019960340118407, + 1.7012598460006714, + 1.700243671836853, + 1.6951217443847657, + 1.7031349285125732, + 1.6982279485702514, + 1.7003905519866944, + 1.7008577185821534, + 1.69846902469635, + 1.7006194688796996, + 1.6990861594009399, + 1.7007253762817383, + 1.702599566040039, + 1.7018861039352418, + 1.699664118347168, + 1.7012134171295166 + ], + "train_acc": [ + 0.24282, + 0.28774, + 0.30648, + 0.31656, + 0.32152, + 0.3235, + 0.33028, + 0.32876, + 0.33768, + 0.33628, + 0.33892, + 0.341, + 0.3464, + 0.34646, + 0.3456, + 0.35168, + 0.34876, + 0.35232, + 0.35214, + 0.3571, + 0.3564, + 0.35782, + 0.35768, + 0.36332, + 0.3616, + 0.36234, + 0.3655, + 0.36546, + 0.36836, + 0.36622, + 0.36982, + 0.36954, + 0.3712, + 0.37104, + 0.37318, + 0.37486, + 0.37374, + 0.37526, + 0.37592, + 0.37592, + 0.37662, + 0.37494, + 0.37758, + 0.37708, + 0.37796, + 0.38076, + 0.37962, + 0.38058, + 0.3806, + 0.37936, + 0.38016, + 0.38172, + 0.38376, + 0.384, + 0.38752, + 0.3876, + 0.38602, + 0.38746, + 0.38644, + 0.386, + 0.38718, + 0.38552, + 0.38468, + 0.3868, + 0.38882, + 0.38934, + 0.3874, + 0.38992, + 0.3904, + 0.3908, + 0.38916, + 0.38954, + 0.39022, + 0.39024, + 0.39018, + 0.39104, + 0.39206, + 0.39178, + 0.39074, + 0.3921, + 0.39292, + 0.39326, + 0.39202, + 0.3935, + 0.39312, + 0.3942, + 0.39594, + 0.39766, + 0.39178, + 0.39502, + 0.39626, + 0.39468, + 0.39338, + 0.39468, + 0.39366, + 0.39276, + 0.39356, + 0.39354, + 0.39138, + 0.39408 + ], + "test_acc": [ + 0.2926, + 0.3237, + 0.3296, + 0.3577, + 0.3485, + 0.3454, + 0.3678, + 0.3648, + 0.3607, + 0.3756, + 0.3775, + 0.3817, + 0.3786, + 0.3781, + 0.3803, + 0.3682, + 0.369, + 0.371, + 0.3834, + 0.3875, + 0.3822, + 0.3805, + 0.392, + 0.401, + 0.3844, + 0.4015, + 0.3866, + 0.3901, + 0.3987, + 0.3939, + 0.3837, + 0.3972, + 0.3877, + 0.3994, + 0.4002, + 0.4033, + 0.4085, + 0.4058, + 0.391, + 0.399, + 0.3986, + 0.4086, + 0.404, + 0.4086, + 0.4062, + 0.4047, + 0.3991, + 0.4037, + 0.3968, + 0.4118, + 0.4017, + 0.4095, + 0.4117, + 0.4037, + 0.4073, + 0.4087, + 0.4137, + 0.4088, + 0.3997, + 0.4059, + 0.409, + 0.4099, + 0.4096, + 0.412, + 0.4102, + 0.4137, + 0.4102, + 0.4121, + 0.4119, + 0.4132, + 0.4149, + 0.412, + 0.415, + 0.4148, + 0.4154, + 0.416, + 0.4112, + 0.4109, + 0.4153, + 0.4148, + 0.4183, + 0.4172, + 0.4152, + 0.4157, + 0.418, + 0.4115, + 0.4187, + 0.4127, + 0.4108, + 0.4153, + 0.4141, + 0.4132, + 0.415, + 0.4124, + 0.4151, + 0.4147, + 0.4136, + 0.4142, + 0.4147, + 0.4144 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.030343683436512947, + 0.09938552975654602, + -0.07437123358249664, + -0.07549507170915604, + -0.09523595124483109, + 0.9972963929176331 + ], + "perturbation_rho": [ + 0.04541456699371338, + 0.008701281622052193, + -0.00402827700600028, + 0.03399529308080673, + -0.00989921111613512, + 0.004614755045622587 + ], + "nudging": { + "0.001": [ + -2.914457581937313e-06, + -5.584442988038063e-07, + 1.0908115655183792e-07, + 4.6566128730773926e-08, + 4.377216100692749e-08, + -1.0116491466760635e-06 + ], + "0.003": [ + -8.360599167644978e-06, + -2.0274892449378967e-06, + 2.825399860739708e-07, + 3.023305907845497e-07, + 2.789311110973358e-07, + -4.071509465575218e-06 + ], + "0.01": [ + -2.804200630635023e-05, + -6.612506695091724e-06, + 9.683426469564438e-07, + 1.1706724762916565e-06, + 1.3328390195965767e-06, + -1.4974735677242279e-05 + ] + }, + "hidden_norms_per_layer": [ + 5724.40771484375, + 72733.609375, + 478685.65625, + 1483182.625, + 1809620.875, + 2091372.375, + 857882.6875 + ], + "bp_grad_norms_per_layer": [ + 3.0453806175501086e-05, + 2.1086596007080516e-06, + 6.627448101426126e-07, + 6.460473969127634e-07, + 6.491723638646363e-07, + 6.540217896144895e-07, + 6.466638637903088e-07 + ] + }, + "drift": { + "embed.weight": 42.22188941497859, + "embed.bias": 17.68183956971274, + "blocks.0.ln.weight": 1.180802481067003, + "blocks.0.w1.weight": 15.4315958124657, + "blocks.0.w1.bias": 12.305080689576908, + "blocks.0.w2.weight": 57.44824499129088, + "blocks.1.ln.weight": 0.9692930700548912, + "blocks.1.w1.weight": 19.880441088833255, + "blocks.1.w1.bias": 16.257331118504066, + "blocks.1.w2.weight": 48.06735579536956, + "blocks.2.ln.weight": 0.7719428930423391, + "blocks.2.w1.weight": 23.427567219106724, + "blocks.2.w1.bias": 24.716070592776813, + "blocks.2.w2.weight": 33.2055102304801, + "blocks.3.ln.weight": 0.6610316048264212, + "blocks.3.w1.weight": 21.14916966984416, + "blocks.3.w1.bias": 22.188834381211755, + "blocks.3.w2.weight": 37.41900122482242, + "blocks.4.ln.weight": 0.6045383052903652, + "blocks.4.w1.weight": 21.6093498763088, + "blocks.4.w1.bias": 23.188519739977856, + "blocks.4.w2.weight": 29.43277495491079, + "blocks.5.ln.weight": 0.7110348796366065, + "blocks.5.w1.weight": 22.91348409054967, + "blocks.5.w1.bias": 24.128500394824375, + "blocks.5.w2.weight": 42.82735324390181, + "out_ln.weight": 0.2879508142897712, + "out_head.weight": 5.9584913076844686, + "out_head.bias": 1.1797514883024003 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 6, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 1 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L6_seed1", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
