diff options
Diffstat (limited to 'results/fa_dfa_d512_L8_seed8/results_cifar10.json')
| -rw-r--r-- | results/fa_dfa_d512_L8_seed8/results_cifar10.json | 881 |
1 files changed, 881 insertions, 0 deletions
diff --git a/results/fa_dfa_d512_L8_seed8/results_cifar10.json b/results/fa_dfa_d512_L8_seed8/results_cifar10.json new file mode 100644 index 0000000..8f97591 --- /dev/null +++ b/results/fa_dfa_d512_L8_seed8/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "8": { + "dfa": { + "log": { + "train_loss": [ + 2.068326115875244, + 2.038449552383423, + 2.032887363433838, + 2.0267745724487303, + 2.026417502670288, + 2.0202374043273927, + 2.018483938064575, + 2.0165669372558592, + 2.019748689651489, + 2.0128509746551515, + 2.0144077210235594, + 2.0115078826904296, + 2.0115395934295655, + 2.007768662490845, + 2.0097858222961427, + 2.0063318170166013, + 2.0055432247924805, + 2.0048474722290037, + 2.0065497072982788, + 2.0015513079071043, + 2.0043911225128173, + 2.0010596670150758, + 2.0026096622467042, + 2.003728278236389, + 2.0012191329193114, + 2.0005321977233885, + 2.0005435330581665, + 1.999249365234375, + 1.9984628038024903, + 1.9983316033935548, + 1.997845711593628, + 1.9972105752563476, + 1.9978763916778564, + 1.9953950925064088, + 1.9970173344421387, + 1.9984811785888672, + 1.9950866604232789, + 1.9947769330596923, + 1.9955189801788331, + 1.9929481762313843, + 1.9966953428649903, + 1.9945243342590333, + 1.9926670757675171, + 1.993055216293335, + 1.9953397652435303, + 1.992445609664917, + 1.9928514415740968, + 1.9945266257476806, + 1.9926752143096924, + 1.992415104598999, + 1.9914240314483642, + 1.9914591967773438, + 1.9917445565032958, + 1.9892081171417237, + 1.9890185669708251, + 1.9918106182479858, + 1.9911041564178467, + 1.9896882055282592, + 1.9890501625823975, + 1.9907431673431397, + 1.9901649180603027, + 1.9891316648101807, + 1.9877268949127198, + 1.989798042602539, + 1.9878116387939453, + 1.9890665167236328, + 1.9905334783935547, + 1.9886393180847168, + 1.988864939727783, + 1.9884007026672363, + 1.9877286835479737, + 1.989529699935913, + 1.9890546997070313, + 1.9868773061752318, + 1.9869191469573975, + 1.9882244151306152, + 1.987383364868164, + 1.9855076770782472, + 1.9868913415908813, + 1.9883932913970948, + 1.9862220487213136, + 1.9841738792419434, + 1.9836644219970703, + 1.9869539430999756, + 1.984350054588318, + 1.9856882331848145, + 1.9835283684539795, + 1.9859053189849853, + 1.9878566506576538, + 1.986811107788086, + 1.9838481521606446, + 1.986321011314392, + 1.9864019583892822, + 1.9851219155883788, + 1.9863925772476196, + 1.9855677154541016, + 1.984962483253479, + 1.9849489275360108, + 1.985427260131836, + 1.984917903213501 + ], + "train_acc": [ + 0.23632, + 0.24556, + 0.24874, + 0.25046, + 0.25146, + 0.25464, + 0.25568, + 0.25664, + 0.25548, + 0.25852, + 0.25636, + 0.26028, + 0.25868, + 0.26176, + 0.2607, + 0.26174, + 0.26266, + 0.26672, + 0.26472, + 0.2636, + 0.26466, + 0.26618, + 0.2643, + 0.26808, + 0.2664, + 0.26632, + 0.27, + 0.26608, + 0.26812, + 0.26848, + 0.26888, + 0.27018, + 0.27012, + 0.2694, + 0.2686, + 0.2696, + 0.27142, + 0.27298, + 0.27042, + 0.27126, + 0.2717, + 0.2718, + 0.27386, + 0.27346, + 0.26924, + 0.27342, + 0.27324, + 0.27112, + 0.27066, + 0.27258, + 0.27196, + 0.27386, + 0.27262, + 0.2753, + 0.274, + 0.27484, + 0.27268, + 0.27362, + 0.27338, + 0.27272, + 0.274, + 0.27568, + 0.27592, + 0.27312, + 0.27434, + 0.2757, + 0.27476, + 0.27544, + 0.27548, + 0.27548, + 0.2741, + 0.27606, + 0.27428, + 0.27634, + 0.27604, + 0.2735, + 0.27494, + 0.27588, + 0.27586, + 0.27474, + 0.27652, + 0.27834, + 0.27718, + 0.27562, + 0.27728, + 0.27692, + 0.27862, + 0.27782, + 0.27452, + 0.27704, + 0.27826, + 0.27674, + 0.27832, + 0.27826, + 0.27584, + 0.27546, + 0.27866, + 0.27724, + 0.2748, + 0.2757 + ], + "test_acc": [ + 0.2602, + 0.2704, + 0.2727, + 0.2629, + 0.2497, + 0.2721, + 0.2855, + 0.2759, + 0.2772, + 0.2885, + 0.2833, + 0.2656, + 0.2889, + 0.2844, + 0.2696, + 0.2757, + 0.2739, + 0.2826, + 0.2819, + 0.2819, + 0.2892, + 0.2867, + 0.2852, + 0.2806, + 0.2973, + 0.2863, + 0.2939, + 0.2937, + 0.2926, + 0.2764, + 0.2824, + 0.2885, + 0.2758, + 0.2824, + 0.2965, + 0.2933, + 0.2839, + 0.2859, + 0.2924, + 0.2871, + 0.2828, + 0.2905, + 0.2969, + 0.2792, + 0.286, + 0.2835, + 0.2903, + 0.2892, + 0.2897, + 0.2923, + 0.2943, + 0.2885, + 0.2928, + 0.2967, + 0.3031, + 0.2862, + 0.2863, + 0.2915, + 0.2885, + 0.2905, + 0.2855, + 0.3001, + 0.2961, + 0.2865, + 0.2976, + 0.2898, + 0.2962, + 0.2908, + 0.2936, + 0.2822, + 0.2934, + 0.2904, + 0.2952, + 0.2923, + 0.29, + 0.2923, + 0.2933, + 0.2869, + 0.2957, + 0.2945, + 0.2951, + 0.2981, + 0.2954, + 0.2891, + 0.2935, + 0.294, + 0.2948, + 0.2916, + 0.2959, + 0.2929, + 0.296, + 0.2939, + 0.2944, + 0.2937, + 0.2942, + 0.294, + 0.2942, + 0.2945, + 0.2944, + 0.2943 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.38314950466156006, + -0.0002771378494799137, + -0.0004160030803177506, + 3.204246604582295e-05, + -4.1770588723011315e-05, + -0.0001344233169220388, + -3.0795046768616885e-05, + -3.3676558814477175e-05 + ], + "perturbation_rho": [ + -0.00034889206290245056, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.711320459842682e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.1324882507324219e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.98978590965271e-06, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 54124.7890625, + 783932864.0, + 4690777600.0, + 5000870912.0, + 5716334080.0, + 6005877760.0, + 7473982976.0, + 7776250880.0, + 8210811904.0 + ], + "bp_grad_norms_per_layer": [ + 3.0676110895910824e-07, + 1.972915580905621e-10, + 1.953913281171893e-10, + 1.953771588958375e-10, + 1.9510017212898134e-10, + 1.950706401965263e-10, + 1.9518447968991381e-10, + 1.951617339956968e-10, + 1.953130296383776e-10 + ] + }, + "drift": { + "embed.weight": 328.1038460126514, + "embed.bias": 254.00296968816264, + "blocks.0.ln.weight": 10.375421373661911, + "blocks.0.w1.weight": 272.1998177697825, + "blocks.0.w1.bias": 246.55078363508565, + "blocks.0.w2.weight": 489.8754501153931, + "blocks.1.ln.weight": 9.050343989939378, + "blocks.1.w1.weight": 388.3338705775239, + "blocks.1.w1.bias": 383.7188912712258, + "blocks.1.w2.weight": 402.9058555114096, + "blocks.2.ln.weight": 6.952665632270595, + "blocks.2.w1.weight": 255.3639402973493, + "blocks.2.w1.bias": 230.39667811143107, + "blocks.2.w2.weight": 247.93087044444246, + "blocks.3.ln.weight": 8.807891710920156, + "blocks.3.w1.weight": 351.3359831200073, + "blocks.3.w1.bias": 336.28061721166756, + "blocks.3.w2.weight": 321.5850139623348, + "blocks.4.ln.weight": 8.196055678685234, + "blocks.4.w1.weight": 287.52820992582446, + "blocks.4.w1.bias": 262.4079024326842, + "blocks.4.w2.weight": 263.46643755724006, + "blocks.5.ln.weight": 9.62846119408936, + "blocks.5.w1.weight": 390.9965336931924, + "blocks.5.w1.bias": 375.54893253589324, + "blocks.5.w2.weight": 382.6688713102951, + "blocks.6.ln.weight": 7.376838940788987, + "blocks.6.w1.weight": 284.9872318771198, + "blocks.6.w1.bias": 263.7123597178357, + "blocks.6.w2.weight": 270.37673819187216, + "blocks.7.ln.weight": 8.135998104403308, + "blocks.7.w1.weight": 306.058248194457, + "blocks.7.w1.bias": 285.31467158190975, + "blocks.7.w2.weight": 284.5221337965431, + "out_ln.weight": 0.540826641326857, + "out_head.weight": 7.9794751282903125, + "out_head.bias": 0.727890481081825 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0412177281188963, + 1.9586744415664672, + 1.9367398124694823, + 1.9134309594726562, + 1.9032167110824585, + 1.8894411625671386, + 1.8745269380950929, + 1.8681198519515991, + 1.859885403137207, + 1.8490976187515258, + 1.8433319261932373, + 1.8375304843139648, + 1.8348215142822266, + 1.8295704886627198, + 1.8297615969085694, + 1.8252419507217408, + 1.8235907801055908, + 1.8161062512207031, + 1.8182137090301513, + 1.8134747228240966, + 1.8125705044555664, + 1.8099720315551757, + 1.8140770354843139, + 1.812659885635376, + 1.8111054842376708, + 1.8094129010772706, + 1.8020060912322997, + 1.803116989402771, + 1.795589069519043, + 1.7962562110137938, + 1.793900502319336, + 1.7944407584381104, + 1.7943759982681275, + 1.7872746685028076, + 1.788272954788208, + 1.7918251754379273, + 1.783637704849243, + 1.7797057202529907, + 1.7784698824691771, + 1.7769115085601808, + 1.7821795818328858, + 1.774635576171875, + 1.777582476463318, + 1.7744439294815064, + 1.7718625156402588, + 1.7697111307525635, + 1.77068788394928, + 1.764987317237854, + 1.7658291906356811, + 1.7618338104629516, + 1.763578690109253, + 1.7595625869750977, + 1.7586208461761474, + 1.7597406827163695, + 1.7557958205795288, + 1.756042088279724, + 1.7561137405014038, + 1.75862396736145, + 1.7520068192672729, + 1.7528972547149657, + 1.7537218439102173, + 1.7509230569076537, + 1.7497916799163817, + 1.7516448692703248, + 1.747637153892517, + 1.7501363418579101, + 1.7462628519058228, + 1.7452370420074463, + 1.751023518638611, + 1.7453709937286377, + 1.7441615590667725, + 1.745248391342163, + 1.7455106867218018, + 1.744859725265503, + 1.7450293384170532, + 1.741427905960083, + 1.7391855532836915, + 1.739177918357849, + 1.7413748574447632, + 1.740176322555542, + 1.7360426489639282, + 1.7357201895904542, + 1.7336676422119142, + 1.7378975563812256, + 1.7353016548156739, + 1.735442308998108, + 1.7332442119979858, + 1.7347565906524658, + 1.7399420615386962, + 1.735971279335022, + 1.732453648071289, + 1.7338156942749023, + 1.7323283444976807, + 1.733454556350708, + 1.7349756786727906, + 1.730115432472229, + 1.7333317721176147, + 1.730795558128357, + 1.7303683010482789, + 1.7296705428695678 + ], + "train_acc": [ + 0.24972, + 0.2857, + 0.29716, + 0.3051, + 0.31388, + 0.32028, + 0.32718, + 0.32794, + 0.33452, + 0.33888, + 0.33738, + 0.34414, + 0.3427, + 0.344, + 0.34354, + 0.34574, + 0.34834, + 0.34704, + 0.34938, + 0.34914, + 0.34736, + 0.34888, + 0.34864, + 0.3503, + 0.35144, + 0.35012, + 0.35328, + 0.35404, + 0.3563, + 0.3579, + 0.35846, + 0.35674, + 0.35756, + 0.35986, + 0.35896, + 0.35632, + 0.36026, + 0.35954, + 0.36354, + 0.36492, + 0.36274, + 0.36668, + 0.3658, + 0.36558, + 0.36578, + 0.36772, + 0.36466, + 0.36692, + 0.36736, + 0.36832, + 0.36896, + 0.36776, + 0.37266, + 0.37066, + 0.3741, + 0.37286, + 0.3711, + 0.37272, + 0.37522, + 0.37178, + 0.37578, + 0.37454, + 0.37386, + 0.37256, + 0.37668, + 0.3778, + 0.3781, + 0.37876, + 0.37596, + 0.37894, + 0.37748, + 0.3801, + 0.37678, + 0.3776, + 0.37852, + 0.37916, + 0.38186, + 0.38062, + 0.38234, + 0.3802, + 0.38074, + 0.38048, + 0.38312, + 0.38086, + 0.38172, + 0.38188, + 0.37984, + 0.38142, + 0.37894, + 0.38204, + 0.38274, + 0.38072, + 0.38316, + 0.38076, + 0.38282, + 0.38266, + 0.38244, + 0.3807, + 0.38074, + 0.38534 + ], + "test_acc": [ + 0.305, + 0.3111, + 0.3184, + 0.3374, + 0.3246, + 0.3486, + 0.3605, + 0.3591, + 0.3631, + 0.3644, + 0.3656, + 0.37, + 0.3724, + 0.3813, + 0.3682, + 0.3692, + 0.3665, + 0.3747, + 0.3765, + 0.3701, + 0.3749, + 0.3749, + 0.3821, + 0.377, + 0.3807, + 0.3795, + 0.3856, + 0.3799, + 0.3833, + 0.383, + 0.3687, + 0.3841, + 0.3836, + 0.377, + 0.3894, + 0.3861, + 0.3944, + 0.384, + 0.3911, + 0.3879, + 0.3843, + 0.3919, + 0.389, + 0.3888, + 0.3918, + 0.3954, + 0.3994, + 0.3935, + 0.3926, + 0.3914, + 0.3866, + 0.3981, + 0.3864, + 0.3947, + 0.4018, + 0.3938, + 0.3965, + 0.3931, + 0.3998, + 0.4003, + 0.401, + 0.3984, + 0.4009, + 0.3958, + 0.3936, + 0.4004, + 0.3968, + 0.3984, + 0.4015, + 0.4012, + 0.4042, + 0.397, + 0.4023, + 0.3968, + 0.4001, + 0.402, + 0.3965, + 0.3973, + 0.3964, + 0.4025, + 0.4011, + 0.4017, + 0.4014, + 0.3981, + 0.3992, + 0.4015, + 0.4024, + 0.4008, + 0.4012, + 0.4004, + 0.4017, + 0.4023, + 0.4025, + 0.4032, + 0.4018, + 0.4015, + 0.4018, + 0.4028, + 0.4024, + 0.4026 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.016079379245638847, + 0.0893096849322319, + -0.03535890951752663, + -0.027881216257810593, + -0.04659418389201164, + -0.039142537862062454, + -0.024059921503067017, + 0.9954550266265869 + ], + "perturbation_rho": [ + -0.003716413863003254, + -0.01681683585047722, + 0.0011358268093317747, + 0.011708367615938187, + -0.0036298027262091637, + -0.028253771364688873, + -0.03197161853313446, + -0.0007108366116881371 + ], + "nudging": { + "0.001": [ + -1.6684643924236298e-06, + -3.836903488263488e-07, + 2.2817403078079224e-08, + -4.656612873077393e-10, + -4.0745362639427185e-08, + 4.21423465013504e-08, + 5.2852556109428406e-08, + -1.6444828361272812e-06 + ], + "0.003": [ + -4.838439053855836e-06, + -1.1676456779241562e-06, + 1.3969838619232178e-07, + 2.7101486921310425e-07, + 2.761371433734894e-07, + 2.1047890186309814e-07, + 8.940696716308594e-08, + -5.75091689825058e-06 + ], + "0.01": [ + -1.587084261700511e-05, + -3.770226612687111e-06, + 4.6566128730773926e-07, + 6.083864718675613e-07, + 9.522773325443268e-07, + 8.021015673875809e-07, + 3.6670826375484467e-07, + -2.0256484276615083e-05 + ] + }, + "hidden_norms_per_layer": [ + 7878.02783203125, + 160404.015625, + 657860.0625, + 1298493.75, + 1530210.875, + 1789493.5, + 1949845.875, + 2001972.0, + 954169.0 + ], + "bp_grad_norms_per_layer": [ + 2.3713459086138755e-05, + 1.3362353001866722e-06, + 8.249007805716246e-07, + 8.118395271594636e-07, + 8.120087500174122e-07, + 8.141144007822732e-07, + 8.151110932885786e-07, + 8.161479740920186e-07, + 7.983429100022477e-07 + ] + }, + "drift": { + "embed.weight": 50.27203344624106, + "embed.bias": 18.164593935621223, + "blocks.0.ln.weight": 1.2923599647753492, + "blocks.0.w1.weight": 17.615281639137443, + "blocks.0.w1.bias": 15.943020869292715, + "blocks.0.w2.weight": 62.07043407165766, + "blocks.1.ln.weight": 0.9933874122365621, + "blocks.1.w1.weight": 19.89251765717899, + "blocks.1.w1.bias": 15.397253448680312, + "blocks.1.w2.weight": 48.46816742349729, + "blocks.2.ln.weight": 0.7602194940473055, + "blocks.2.w1.weight": 21.862181220405073, + "blocks.2.w1.bias": 20.159047961824015, + "blocks.2.w2.weight": 35.30605863540395, + "blocks.3.ln.weight": 0.6625976974501915, + "blocks.3.w1.weight": 19.25465609702534, + "blocks.3.w1.bias": 19.348925466014506, + "blocks.3.w2.weight": 46.43927447797268, + "blocks.4.ln.weight": 0.5980307474752141, + "blocks.4.w1.weight": 19.617144750384867, + "blocks.4.w1.bias": 20.038154099436944, + "blocks.4.w2.weight": 42.571562389062414, + "blocks.5.ln.weight": 0.6412290763020658, + "blocks.5.w1.weight": 18.227445383614373, + "blocks.5.w1.bias": 17.78546251590179, + "blocks.5.w2.weight": 48.65373336198152, + "blocks.6.ln.weight": 0.6350627815251478, + "blocks.6.w1.weight": 16.896056503722676, + "blocks.6.w1.bias": 14.642090090469127, + "blocks.6.w2.weight": 58.25817193229909, + "blocks.7.ln.weight": 0.7771808191712193, + "blocks.7.w1.weight": 20.486284886878302, + "blocks.7.w1.bias": 19.32003589686942, + "blocks.7.w2.weight": 62.85071102943643, + "out_ln.weight": 0.3595629051418566, + "out_head.weight": 6.471892657261998, + "out_head.bias": 1.5825345450853237 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 8 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L8_seed8", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
