diff options
Diffstat (limited to 'results/fa_dfa_d512_L6_seed3/results_cifar10.json')
| -rw-r--r-- | results/fa_dfa_d512_L6_seed3/results_cifar10.json | 837 |
1 files changed, 837 insertions, 0 deletions
diff --git a/results/fa_dfa_d512_L6_seed3/results_cifar10.json b/results/fa_dfa_d512_L6_seed3/results_cifar10.json new file mode 100644 index 0000000..fe507fc --- /dev/null +++ b/results/fa_dfa_d512_L6_seed3/results_cifar10.json @@ -0,0 +1,837 @@ +{ + "3": { + "dfa": { + "log": { + "train_loss": [ + 2.0551145947265623, + 2.0254818302154542, + 2.0194406120300292, + 2.0175874392700197, + 2.0163199629211426, + 2.0142830805969236, + 2.007013447418213, + 2.003593424758911, + 2.0067723297500613, + 2.003558956604004, + 2.00101790184021, + 2.0011146197509766, + 1.998018801345825, + 1.9996520843887329, + 1.9962160342407227, + 1.997622791786194, + 1.9969604161071777, + 1.9916633380126954, + 1.9958815657806397, + 1.9930840515899657, + 1.9948459027862548, + 1.9917297480773926, + 1.9886382999420167, + 1.9919039403533936, + 1.990138112449646, + 1.9900766090393067, + 1.9897496450042724, + 1.9914394456481934, + 1.9891233996582032, + 1.985649783477783, + 1.9873360358428955, + 1.989273772125244, + 1.987579068336487, + 1.9871572724151612, + 1.9870523969268798, + 1.983975528640747, + 1.987108448867798, + 1.9884108324813843, + 1.988470245361328, + 1.9860010179138183, + 1.9886363947296142, + 1.987370449295044, + 1.9879705474853515, + 1.9867627616882324, + 1.9860119804382323, + 1.9856234351348876, + 1.9848077922058105, + 1.9845034539794921, + 1.9853366620635986, + 1.9832583249664306, + 1.9855356827163697, + 1.9846282432556153, + 1.9862506673431397, + 1.9845512745666505, + 1.984809038848877, + 1.983686294631958, + 1.9839595748138428, + 1.9834054181671144, + 1.9851946828460694, + 1.9813065106964112, + 1.982595512084961, + 1.9824060856628418, + 1.9832570594787597, + 1.9817028414916993, + 1.9841908657836913, + 1.9825086151123046, + 1.9821907202148437, + 1.9809494961166383, + 1.9812863923645019, + 1.9832594239807129, + 1.9819738615417481, + 1.982517120513916, + 1.9800974131011964, + 1.983135057144165, + 1.983559645614624, + 1.9801312906646729, + 1.9810623525238038, + 1.9804602764511108, + 1.9805444803619385, + 1.9829668161010743, + 1.9795248139190673, + 1.9794620516967774, + 1.9808693537902833, + 1.9817010179138184, + 1.9800636986541749, + 1.9797203030395507, + 1.980884309463501, + 1.9790666972351074, + 1.9815667838287354, + 1.9785900994110108, + 1.9793143473815917, + 1.9813185803985596, + 1.9775366342926026, + 1.9791302056884765, + 1.9800324520874024, + 1.9808550524902344, + 1.9790050820541383, + 1.9809321939086915, + 1.9801498863220215, + 1.9817508765029908 + ], + "train_acc": [ + 0.24236, + 0.25538, + 0.26038, + 0.25914, + 0.2596, + 0.26144, + 0.26392, + 0.26632, + 0.2648, + 0.26606, + 0.27124, + 0.26782, + 0.26864, + 0.2686, + 0.27008, + 0.27032, + 0.2686, + 0.2709, + 0.27116, + 0.26994, + 0.27146, + 0.27376, + 0.2744, + 0.2734, + 0.27522, + 0.27308, + 0.27442, + 0.27432, + 0.27566, + 0.27572, + 0.27774, + 0.2749, + 0.275, + 0.27594, + 0.27506, + 0.27894, + 0.27722, + 0.27576, + 0.27618, + 0.27836, + 0.27742, + 0.27672, + 0.27846, + 0.27852, + 0.27952, + 0.27852, + 0.2798, + 0.2786, + 0.27982, + 0.27886, + 0.27922, + 0.28008, + 0.28086, + 0.27944, + 0.27862, + 0.2795, + 0.2811, + 0.28046, + 0.28042, + 0.28068, + 0.2808, + 0.2807, + 0.28074, + 0.28092, + 0.28042, + 0.28318, + 0.28114, + 0.28196, + 0.28128, + 0.28244, + 0.28616, + 0.28052, + 0.28304, + 0.28028, + 0.28134, + 0.283, + 0.281, + 0.28442, + 0.2821, + 0.2828, + 0.28364, + 0.28418, + 0.2818, + 0.2828, + 0.28202, + 0.28332, + 0.2823, + 0.28336, + 0.27986, + 0.28428, + 0.28444, + 0.28348, + 0.28552, + 0.28302, + 0.28178, + 0.28176, + 0.28462, + 0.28336, + 0.28208, + 0.28378 + ], + "test_acc": [ + 0.2839, + 0.276, + 0.2703, + 0.2882, + 0.3045, + 0.2916, + 0.2881, + 0.2861, + 0.2942, + 0.2742, + 0.286, + 0.3032, + 0.3043, + 0.284, + 0.3085, + 0.2889, + 0.2865, + 0.2944, + 0.288, + 0.2904, + 0.2951, + 0.2866, + 0.3007, + 0.2883, + 0.2858, + 0.3049, + 0.2761, + 0.3085, + 0.2795, + 0.2969, + 0.2937, + 0.3004, + 0.2812, + 0.2902, + 0.3023, + 0.2997, + 0.2918, + 0.3121, + 0.2969, + 0.2938, + 0.2951, + 0.3021, + 0.3015, + 0.31, + 0.2967, + 0.2972, + 0.3, + 0.3063, + 0.3104, + 0.3045, + 0.3005, + 0.3049, + 0.3048, + 0.3025, + 0.3043, + 0.3031, + 0.2936, + 0.2981, + 0.3033, + 0.2941, + 0.3064, + 0.2988, + 0.3068, + 0.3013, + 0.2997, + 0.3068, + 0.3062, + 0.3059, + 0.293, + 0.3029, + 0.3083, + 0.3108, + 0.3016, + 0.3031, + 0.2946, + 0.2992, + 0.3066, + 0.3046, + 0.3006, + 0.3035, + 0.2988, + 0.3031, + 0.3015, + 0.3028, + 0.3008, + 0.3022, + 0.3064, + 0.3045, + 0.3051, + 0.3041, + 0.3025, + 0.3045, + 0.3034, + 0.3039, + 0.3049, + 0.3044, + 0.3042, + 0.3046, + 0.3046, + 0.3046 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3834345042705536, + 0.0005603223107755184, + 0.0008055042708292603, + 4.550980156636797e-05, + -0.0009304977720603347, + 0.0005176510312594473 + ], + "perturbation_rho": [ + -0.015653517097234726, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.976747393608093e-07, + 0.0, + 0.0, + -9.313225746154785e-10, + 0.0, + 0.0 + ], + "0.003": [ + -1.353677362203598e-06, + 0.0, + 0.0, + -9.313225746154785e-10, + 0.0, + 0.0 + ], + "0.01": [ + -4.4209882616996765e-06, + -1.862645149230957e-09, + 0.0, + -9.313225746154785e-10, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 53414.71484375, + 1171344000.0, + 1695788416.0, + 3132320256.0, + 5559488000.0, + 6114624000.0, + 6167857152.0 + ], + "bp_grad_norms_per_layer": [ + 2.961761254027806e-07, + 3.1574928782696077e-10, + 3.1493549434991053e-10, + 3.145896598777398e-10, + 3.146878591042679e-10, + 3.147946070480856e-10, + 3.1481975359959335e-10 + ] + }, + "drift": { + "embed.weight": 323.54545540843105, + "embed.bias": 220.1187911959123, + "blocks.0.ln.weight": 9.685558373681214, + "blocks.0.w1.weight": 284.1235348747452, + "blocks.0.w1.bias": 249.08037975739575, + "blocks.0.w2.weight": 467.48682118152, + "blocks.1.ln.weight": 7.467562562572266, + "blocks.1.w1.weight": 239.93081695201815, + "blocks.1.w1.bias": 214.84010837594673, + "blocks.1.w2.weight": 270.7171266420411, + "blocks.2.ln.weight": 8.6999741976989, + "blocks.2.w1.weight": 335.1621989545166, + "blocks.2.w1.bias": 297.63643437529385, + "blocks.2.w2.weight": 332.9487355280909, + "blocks.3.ln.weight": 9.65496650463451, + "blocks.3.w1.weight": 396.7552349422605, + "blocks.3.w1.bias": 368.25716787661366, + "blocks.3.w2.weight": 397.6707533080763, + "blocks.4.ln.weight": 8.086181202960429, + "blocks.4.w1.weight": 327.52147542974734, + "blocks.4.w1.bias": 308.89251019375376, + "blocks.4.w2.weight": 317.4051434598822, + "blocks.5.ln.weight": 5.9782378958674425, + "blocks.5.w1.weight": 215.37270785666712, + "blocks.5.w1.bias": 206.13969443200085, + "blocks.5.w2.weight": 210.55284579153434, + "out_ln.weight": 0.6330900560999851, + "out_head.weight": 8.856435188669105, + "out_head.bias": 0.9683577161846454 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0186621208190916, + 1.9331099752044678, + 1.905914333114624, + 1.8951857843399047, + 1.8854292852783203, + 1.8709850173568725, + 1.8602817873382569, + 1.8479190076065064, + 1.8414129400253296, + 1.8321620767974853, + 1.826335980758667, + 1.8197622241210938, + 1.8174528274917603, + 1.8127154489898682, + 1.8082266244888305, + 1.811778229446411, + 1.8112502433013915, + 1.8042263799667357, + 1.8090753686141967, + 1.8053230755233765, + 1.8077443563842774, + 1.804732971496582, + 1.8016516897583008, + 1.8038146850204468, + 1.8026063943862916, + 1.802457709350586, + 1.8002039365386964, + 1.8020965616607667, + 1.7966074966812133, + 1.796929923324585, + 1.7942455569076539, + 1.7956397313690187, + 1.7895081832504272, + 1.7901598165893555, + 1.785636817932129, + 1.7812746408843994, + 1.7844691129302979, + 1.783790927810669, + 1.7788431832885743, + 1.7756607747650146, + 1.774973991355896, + 1.769966243019104, + 1.7683391579818726, + 1.7691140979766846, + 1.7666235674285888, + 1.763746886062622, + 1.7614172232437133, + 1.7630763125610351, + 1.7593659213256836, + 1.7556221802139282, + 1.7563741372680663, + 1.7534167012786865, + 1.7579979578018188, + 1.7498029040145875, + 1.7481240466308594, + 1.747141012802124, + 1.7512671886444091, + 1.7454403200531006, + 1.747818590774536, + 1.7458769092559814, + 1.7429523513412475, + 1.7409421573638917, + 1.7443068244171143, + 1.7444597329330445, + 1.7395000383758545, + 1.7387146155548097, + 1.7404073351669311, + 1.7386165090560912, + 1.7302196057891845, + 1.7337853803253174, + 1.734050626296997, + 1.7332715703582764, + 1.7335029886245727, + 1.7333770494842529, + 1.7329201885223389, + 1.7302788830947875, + 1.7308894277572633, + 1.7317599598693847, + 1.7300064459609985, + 1.730283342819214, + 1.7256998642349244, + 1.7275242566680908, + 1.7297832077026367, + 1.7274928902435303, + 1.7220390670776367, + 1.725278727684021, + 1.7244994747924804, + 1.725174829978943, + 1.72525224609375, + 1.7249654501724243, + 1.7217949364852905, + 1.724819582977295, + 1.7225480925750734, + 1.7213666839981079, + 1.7232744509887696, + 1.7264412049102784, + 1.7210735564804076, + 1.7247192889404297, + 1.7228876683807373, + 1.7252232053375245 + ], + "train_acc": [ + 0.26044, + 0.2964, + 0.30886, + 0.31392, + 0.32094, + 0.32556, + 0.32806, + 0.33438, + 0.33584, + 0.34332, + 0.34448, + 0.3469, + 0.3493, + 0.34834, + 0.34978, + 0.3522, + 0.35074, + 0.35222, + 0.3523, + 0.35548, + 0.35364, + 0.35582, + 0.35566, + 0.3538, + 0.35346, + 0.35642, + 0.3576, + 0.35768, + 0.35674, + 0.35842, + 0.35762, + 0.35934, + 0.36148, + 0.36034, + 0.36122, + 0.3633, + 0.36236, + 0.3648, + 0.36606, + 0.36512, + 0.36748, + 0.36804, + 0.36888, + 0.36944, + 0.37288, + 0.37268, + 0.37384, + 0.37068, + 0.37262, + 0.375, + 0.37486, + 0.37624, + 0.37518, + 0.3761, + 0.37538, + 0.3772, + 0.37746, + 0.37694, + 0.37704, + 0.37738, + 0.37926, + 0.37986, + 0.37926, + 0.3778, + 0.38224, + 0.38084, + 0.3805, + 0.381, + 0.38198, + 0.38366, + 0.38362, + 0.38348, + 0.38632, + 0.38202, + 0.38298, + 0.38632, + 0.38186, + 0.38376, + 0.38438, + 0.3837, + 0.38756, + 0.3857, + 0.38298, + 0.38544, + 0.38558, + 0.3877, + 0.38692, + 0.38824, + 0.38624, + 0.38652, + 0.38646, + 0.38884, + 0.38832, + 0.3871, + 0.38898, + 0.38446, + 0.38604, + 0.38742, + 0.38538, + 0.3872 + ], + "test_acc": [ + 0.3158, + 0.3361, + 0.343, + 0.3494, + 0.3693, + 0.3591, + 0.3652, + 0.3642, + 0.376, + 0.3646, + 0.3753, + 0.3821, + 0.3889, + 0.3824, + 0.3807, + 0.3765, + 0.3749, + 0.3867, + 0.3837, + 0.3793, + 0.3829, + 0.3792, + 0.3865, + 0.3772, + 0.3856, + 0.3842, + 0.3784, + 0.3846, + 0.3821, + 0.3885, + 0.3845, + 0.3915, + 0.3893, + 0.3836, + 0.3942, + 0.4018, + 0.3946, + 0.397, + 0.3956, + 0.3996, + 0.3947, + 0.3958, + 0.3944, + 0.4035, + 0.4029, + 0.4009, + 0.393, + 0.4026, + 0.4079, + 0.4063, + 0.3963, + 0.4017, + 0.4033, + 0.4045, + 0.4081, + 0.4059, + 0.4068, + 0.4035, + 0.4051, + 0.4058, + 0.402, + 0.4074, + 0.4037, + 0.404, + 0.4017, + 0.4036, + 0.4057, + 0.409, + 0.404, + 0.4081, + 0.407, + 0.4083, + 0.4085, + 0.4041, + 0.4044, + 0.4056, + 0.4118, + 0.4081, + 0.4097, + 0.4096, + 0.4116, + 0.4104, + 0.4077, + 0.4095, + 0.4088, + 0.4114, + 0.41, + 0.4093, + 0.4113, + 0.4112, + 0.4105, + 0.4115, + 0.4109, + 0.4115, + 0.4116, + 0.4127, + 0.4116, + 0.4116, + 0.4116, + 0.4118 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.033788226544857025, + 0.08954796195030212, + -0.06740664690732956, + -0.10690448433160782, + -0.11852722615003586, + 0.9977210760116577 + ], + "perturbation_rho": [ + 0.03511377051472664, + 0.04956234246492386, + -0.005039767827838659, + -0.028402434661984444, + 0.002629645634442568, + -0.0117247449234128 + ], + "nudging": { + "0.001": [ + -2.101645804941654e-06, + -5.005858838558197e-07, + 6.752088665962219e-09, + 1.1606607586145401e-07, + 8.440110832452774e-08, + -1.3562384992837906e-06 + ], + "0.003": [ + -5.929498001933098e-06, + -1.1969823390245438e-06, + 3.577442839741707e-07, + 4.066387191414833e-07, + 6.486661732196808e-07, + -5.192705430090427e-06 + ], + "0.01": [ + -1.9490602426230907e-05, + -3.955909051001072e-06, + 1.2312084436416626e-06, + 1.8166610971093178e-06, + 2.1688174456357956e-06, + -1.8663820810616016e-05 + ] + }, + "hidden_norms_per_layer": [ + 7009.63232421875, + 187427.9375, + 535894.875, + 1119580.875, + 1710404.625, + 2170302.75, + 769555.6875 + ], + "bp_grad_norms_per_layer": [ + 2.6194647944066674e-05, + 1.3078431493340759e-06, + 7.762633913444006e-07, + 7.287014227586042e-07, + 7.358628977272019e-07, + 7.391291774183628e-07, + 7.346811230490857e-07 + ] + }, + "drift": { + "embed.weight": 48.84389712380156, + "embed.bias": 18.04622672355978, + "blocks.0.ln.weight": 1.2715512223724241, + "blocks.0.w1.weight": 18.323188610788566, + "blocks.0.w1.bias": 14.621705904927168, + "blocks.0.w2.weight": 62.82097007591977, + "blocks.1.ln.weight": 1.0780729921051364, + "blocks.1.w1.weight": 19.649865156855387, + "blocks.1.w1.bias": 13.846457460989065, + "blocks.1.w2.weight": 46.895166874548835, + "blocks.2.ln.weight": 0.8170823371574294, + "blocks.2.w1.weight": 21.511415879383534, + "blocks.2.w1.bias": 20.453359426824278, + "blocks.2.w2.weight": 36.12877482089504, + "blocks.3.ln.weight": 0.7815298682497922, + "blocks.3.w1.weight": 23.873373589893692, + "blocks.3.w1.bias": 23.438832639917354, + "blocks.3.w2.weight": 34.65004625568547, + "blocks.4.ln.weight": 0.7049287284135428, + "blocks.4.w1.weight": 24.756294779706568, + "blocks.4.w1.bias": 25.075713132594746, + "blocks.4.w2.weight": 39.3474190645009, + "blocks.5.ln.weight": 0.825085939532927, + "blocks.5.w1.weight": 25.260469003772034, + "blocks.5.w1.bias": 24.835263327096893, + "blocks.5.w2.weight": 47.04696050246029, + "out_ln.weight": 0.3016529235625585, + "out_head.weight": 6.069315898628023, + "out_head.bias": 1.8625596673903535 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 6, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 3 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L6_seed3", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
