diff options
Diffstat (limited to 'results/fa_dfa_d512_L6_seed5/results_cifar10.json')
| -rw-r--r-- | results/fa_dfa_d512_L6_seed5/results_cifar10.json | 837 |
1 files changed, 837 insertions, 0 deletions
diff --git a/results/fa_dfa_d512_L6_seed5/results_cifar10.json b/results/fa_dfa_d512_L6_seed5/results_cifar10.json new file mode 100644 index 0000000..e974678 --- /dev/null +++ b/results/fa_dfa_d512_L6_seed5/results_cifar10.json @@ -0,0 +1,837 @@ +{ + "5": { + "dfa": { + "log": { + "train_loss": [ + 2.0602571796417237, + 2.0357400941467283, + 2.0281838052368166, + 2.023866386795044, + 2.0154901094818114, + 2.019438636016846, + 2.0162963735580446, + 2.0127672254180906, + 2.0099401387786866, + 2.011543902282715, + 2.006219865684509, + 2.0047207692718505, + 2.0029416635894775, + 2.0006940887069704, + 2.0026997183609008, + 2.0016165689468384, + 2.000759786224365, + 1.9997175212860108, + 1.9980090442276002, + 1.998993673324585, + 1.996654589538574, + 1.9994389457702637, + 1.9997356970977784, + 1.993837992324829, + 1.9959214011383057, + 1.9948663213729858, + 1.9960267625427246, + 1.9953035424041747, + 1.9930475028991699, + 1.9938180725479127, + 1.9931120376586915, + 1.989824568710327, + 1.9910550234985351, + 1.9906865880584717, + 1.9898072389984132, + 1.9914395150375366, + 1.9915398007202147, + 1.9909269763183595, + 1.9913815157699586, + 1.9884502619934081, + 1.991486932411194, + 1.989191399459839, + 1.989710302581787, + 1.9897258306884766, + 1.9906724350738525, + 1.9914843771362305, + 1.9885967330169678, + 1.98751413482666, + 1.989536333770752, + 1.98715817817688, + 1.9881259790802002, + 1.9869521377182007, + 1.9907182498931886, + 1.9890414456939698, + 1.9886535820770264, + 1.9867755039215087, + 1.988041506919861, + 1.9891718532943725, + 1.9870871344375611, + 1.985717644920349, + 1.9884497722625731, + 1.9873963590240478, + 1.9863690199279784, + 1.9873637462997435, + 1.9868278685760499, + 1.9860613510894776, + 1.9854717936706543, + 1.9856478284454346, + 1.9866084854888917, + 1.987229610748291, + 1.9831793658828736, + 1.9870755458831788, + 1.985736505126953, + 1.9868509146881104, + 1.9870286437225342, + 1.9859972204971315, + 1.9860080694961548, + 1.9868842920684815, + 1.9851076070404052, + 1.985850848121643, + 1.9842121643447876, + 1.9848132551574706, + 1.9846837445831298, + 1.9864671915435792, + 1.9831796479034425, + 1.984968702774048, + 1.983607007446289, + 1.9863699700546265, + 1.9863424487304688, + 1.98700967628479, + 1.9837391945648193, + 1.9848160073471068, + 1.9845281774139405, + 1.9848566493988038, + 1.9822382134628296, + 1.9836605060577392, + 1.9821798385620117, + 1.9824478280639648, + 1.9817402114486695, + 1.9846896728515624 + ], + "train_acc": [ + 0.24254, + 0.2517, + 0.25592, + 0.25594, + 0.26192, + 0.25714, + 0.26284, + 0.26238, + 0.26512, + 0.26496, + 0.26548, + 0.26742, + 0.2712, + 0.2708, + 0.268, + 0.27104, + 0.27124, + 0.27076, + 0.2733, + 0.27132, + 0.27188, + 0.26918, + 0.27278, + 0.27414, + 0.2739, + 0.27446, + 0.27544, + 0.2747, + 0.27416, + 0.27496, + 0.27546, + 0.27906, + 0.27846, + 0.27636, + 0.27836, + 0.27738, + 0.27604, + 0.27838, + 0.27826, + 0.28024, + 0.27756, + 0.28068, + 0.2774, + 0.27826, + 0.27808, + 0.2748, + 0.27776, + 0.27942, + 0.28244, + 0.28036, + 0.28156, + 0.28332, + 0.27896, + 0.28098, + 0.28062, + 0.27874, + 0.2805, + 0.28012, + 0.2826, + 0.28204, + 0.27786, + 0.2819, + 0.28144, + 0.28236, + 0.28224, + 0.28264, + 0.2807, + 0.28432, + 0.28308, + 0.28156, + 0.28262, + 0.28086, + 0.28328, + 0.28292, + 0.28262, + 0.2819, + 0.28378, + 0.28152, + 0.28326, + 0.28238, + 0.28308, + 0.28294, + 0.28316, + 0.28292, + 0.28588, + 0.2832, + 0.28346, + 0.28494, + 0.2829, + 0.28252, + 0.28328, + 0.28276, + 0.28382, + 0.28216, + 0.28412, + 0.28436, + 0.28506, + 0.28418, + 0.28462, + 0.28428 + ], + "test_acc": [ + 0.2458, + 0.2841, + 0.28, + 0.2941, + 0.2898, + 0.2696, + 0.2791, + 0.2876, + 0.2822, + 0.3025, + 0.2919, + 0.292, + 0.2972, + 0.2946, + 0.2821, + 0.2924, + 0.2897, + 0.2957, + 0.3053, + 0.2873, + 0.3059, + 0.2873, + 0.2865, + 0.3017, + 0.3023, + 0.2803, + 0.3064, + 0.2856, + 0.2919, + 0.2974, + 0.2917, + 0.2949, + 0.2862, + 0.2929, + 0.2952, + 0.2864, + 0.3013, + 0.3011, + 0.2934, + 0.2975, + 0.2961, + 0.304, + 0.3058, + 0.2954, + 0.3001, + 0.2882, + 0.304, + 0.2976, + 0.3002, + 0.291, + 0.3019, + 0.3012, + 0.3085, + 0.3014, + 0.3042, + 0.2999, + 0.2951, + 0.305, + 0.3104, + 0.3037, + 0.3011, + 0.3046, + 0.2999, + 0.3087, + 0.2981, + 0.3044, + 0.298, + 0.2931, + 0.3016, + 0.306, + 0.2997, + 0.3019, + 0.2984, + 0.3028, + 0.2957, + 0.3086, + 0.3062, + 0.2985, + 0.2995, + 0.31, + 0.3028, + 0.305, + 0.2993, + 0.3029, + 0.3047, + 0.3011, + 0.3028, + 0.3008, + 0.302, + 0.3028, + 0.3053, + 0.305, + 0.304, + 0.3041, + 0.3021, + 0.3024, + 0.3022, + 0.3019, + 0.3021, + 0.3022 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.4137295186519623, + 0.00037798876292072237, + 0.00035168789327144623, + -0.00011552235810086131, + 0.0004928180132992566, + 0.0006692470051348209 + ], + "perturbation_rho": [ + -0.004769737366586924, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -4.731118679046631e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.455657184123993e-06, + -9.313225746154785e-10, + -9.313225746154785e-10, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -4.32552769780159e-06, + -1.862645149230957e-09, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 53850.484375, + 999738112.0, + 1825332736.0, + 2847918592.0, + 3948823552.0, + 7465979392.0, + 8693204992.0 + ], + "bp_grad_norms_per_layer": [ + 2.7038154826186656e-07, + 3.3068986438067327e-10, + 3.3043778824293213e-10, + 3.3058311643685556e-10, + 3.306660778523707e-10, + 3.3053956793871464e-10, + 3.3055713721807933e-10 + ] + }, + "drift": { + "embed.weight": 326.316807505004, + "embed.bias": 247.64016879156944, + "blocks.0.ln.weight": 10.506143366890935, + "blocks.0.w1.weight": 266.6641005309724, + "blocks.0.w1.bias": 237.30788612441194, + "blocks.0.w2.weight": 463.4592869979373, + "blocks.1.ln.weight": 8.041758423869682, + "blocks.1.w1.weight": 275.6017801051782, + "blocks.1.w1.bias": 242.84249700605667, + "blocks.1.w2.weight": 293.6559726136312, + "blocks.2.ln.weight": 8.38043701730794, + "blocks.2.w1.weight": 303.9003341110909, + "blocks.2.w1.bias": 274.77693120914864, + "blocks.2.w2.weight": 311.7253398049225, + "blocks.3.ln.weight": 8.344881260946064, + "blocks.3.w1.weight": 324.91067284816666, + "blocks.3.w1.bias": 320.84528008040263, + "blocks.3.w2.weight": 323.2526807364217, + "blocks.4.ln.weight": 10.649638215660875, + "blocks.4.w1.weight": 442.18421448330207, + "blocks.4.w1.bias": 419.08703726739526, + "blocks.4.w2.weight": 435.13630410704593, + "blocks.5.ln.weight": 10.016310662065466, + "blocks.5.w1.weight": 397.1121641226028, + "blocks.5.w1.bias": 370.9478671282672, + "blocks.5.w2.weight": 383.0732880205366, + "out_ln.weight": 0.7323829331223068, + "out_head.weight": 9.997613880770942, + "out_head.bias": 0.47019353243584805 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0245736110687256, + 1.9432434065246582, + 1.9171724227905274, + 1.9035633584594727, + 1.8866331351470946, + 1.8834057013702392, + 1.8780549462509155, + 1.873760415649414, + 1.86404331199646, + 1.8641292553329467, + 1.8578160486602784, + 1.8527875860595704, + 1.8485828662109376, + 1.8464561464691163, + 1.8462472164154053, + 1.8430843777084351, + 1.8429961044692993, + 1.8410946324920654, + 1.8362357471466064, + 1.8319243711090087, + 1.8236686227416992, + 1.8258946035766601, + 1.8234262411117554, + 1.8137903323364257, + 1.811674436264038, + 1.808376842956543, + 1.809517276611328, + 1.8052343613052368, + 1.799909278907776, + 1.7993224626922608, + 1.7999323937225342, + 1.7926906624984742, + 1.7933230242156983, + 1.7922531407928466, + 1.7895995733261107, + 1.7906105719375611, + 1.7885604776763917, + 1.7840546982192993, + 1.7896481491088867, + 1.7832089348602296, + 1.782369646949768, + 1.7770448540878296, + 1.7808699810409545, + 1.7771298468017578, + 1.7774345711898805, + 1.7765879013061523, + 1.7742172146987916, + 1.7695884064102172, + 1.775497437171936, + 1.767691201248169, + 1.7683605837249756, + 1.7657782846069336, + 1.7674894732666016, + 1.7704336560058593, + 1.7661452458190918, + 1.7612660248947143, + 1.7608598317718507, + 1.7622494547271728, + 1.7596149068450928, + 1.7570250023651124, + 1.7613954134368897, + 1.7564489492034911, + 1.7566620055389404, + 1.7558204865264893, + 1.7551631838989257, + 1.7566033951187134, + 1.7519007582855224, + 1.7499195809936523, + 1.7534941720581054, + 1.7503434635162354, + 1.74504672996521, + 1.7483937967681884, + 1.7457905151367188, + 1.7486124094009399, + 1.7461996807479858, + 1.7445868758392333, + 1.7431557595062255, + 1.7459273419570922, + 1.7443120336151123, + 1.7433632315063476, + 1.7413235707855224, + 1.7421810306167602, + 1.7423726794815064, + 1.7430526740264893, + 1.7426294480133058, + 1.7443564529037476, + 1.7412052463531493, + 1.7419821909332276, + 1.742747957763672, + 1.7393953549194336, + 1.7356409253692626, + 1.7367648685455321, + 1.7391734333038331, + 1.7373552332305908, + 1.7321791064071654, + 1.737231011619568, + 1.7380055041122437, + 1.7336925805664063, + 1.735386132774353, + 1.7398383757781983 + ], + "train_acc": [ + 0.25622, + 0.29234, + 0.30374, + 0.307, + 0.31608, + 0.31724, + 0.32094, + 0.32354, + 0.32676, + 0.33046, + 0.33084, + 0.33444, + 0.33722, + 0.33964, + 0.3378, + 0.33812, + 0.33888, + 0.34132, + 0.3422, + 0.34414, + 0.34496, + 0.34604, + 0.3475, + 0.34998, + 0.35366, + 0.3559, + 0.35212, + 0.35394, + 0.35768, + 0.35582, + 0.35698, + 0.3617, + 0.35966, + 0.35904, + 0.36126, + 0.36076, + 0.36356, + 0.36338, + 0.3616, + 0.36454, + 0.36376, + 0.36512, + 0.36348, + 0.36704, + 0.36486, + 0.36646, + 0.36906, + 0.368, + 0.36894, + 0.37176, + 0.36712, + 0.37086, + 0.37036, + 0.37114, + 0.37032, + 0.37276, + 0.37386, + 0.37112, + 0.37256, + 0.3721, + 0.37356, + 0.37602, + 0.37426, + 0.37504, + 0.37378, + 0.37652, + 0.37642, + 0.37658, + 0.3768, + 0.3798, + 0.37862, + 0.37708, + 0.37848, + 0.37812, + 0.37732, + 0.37934, + 0.38062, + 0.37766, + 0.37892, + 0.38066, + 0.38076, + 0.38064, + 0.38124, + 0.38186, + 0.38218, + 0.37966, + 0.38064, + 0.38088, + 0.37996, + 0.38164, + 0.38374, + 0.38408, + 0.38228, + 0.3827, + 0.38392, + 0.38448, + 0.38308, + 0.38368, + 0.38292, + 0.38126 + ], + "test_acc": [ + 0.2956, + 0.3353, + 0.3321, + 0.3526, + 0.3442, + 0.3427, + 0.3492, + 0.3563, + 0.354, + 0.3594, + 0.3655, + 0.3603, + 0.3701, + 0.3726, + 0.3688, + 0.3702, + 0.3634, + 0.3695, + 0.3742, + 0.3755, + 0.3706, + 0.3715, + 0.3676, + 0.3816, + 0.3786, + 0.3874, + 0.3836, + 0.3703, + 0.3798, + 0.379, + 0.3806, + 0.3918, + 0.3845, + 0.3862, + 0.3933, + 0.3876, + 0.3924, + 0.3857, + 0.3748, + 0.3943, + 0.3942, + 0.389, + 0.398, + 0.3839, + 0.3964, + 0.3949, + 0.3943, + 0.3936, + 0.3949, + 0.3982, + 0.3982, + 0.4012, + 0.3968, + 0.3988, + 0.3977, + 0.3977, + 0.401, + 0.3978, + 0.4017, + 0.4016, + 0.4013, + 0.3972, + 0.3961, + 0.4032, + 0.3959, + 0.4004, + 0.3967, + 0.3993, + 0.4021, + 0.4044, + 0.4053, + 0.4023, + 0.4054, + 0.4061, + 0.4047, + 0.403, + 0.4036, + 0.4032, + 0.3985, + 0.4059, + 0.4049, + 0.4061, + 0.4017, + 0.4059, + 0.4063, + 0.4067, + 0.4039, + 0.4053, + 0.4048, + 0.4084, + 0.4051, + 0.4074, + 0.4073, + 0.4054, + 0.4059, + 0.4059, + 0.4056, + 0.4062, + 0.4061, + 0.4058 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.02408183179795742, + 0.061381518840789795, + -0.06236571818590164, + -0.022614534944295883, + 0.005173904821276665, + 0.9961987137794495 + ], + "perturbation_rho": [ + -0.0075406357645988464, + 0.015521236695349216, + -0.03143823519349098, + -0.03976670280098915, + 0.044851042330265045, + -0.006162412464618683 + ], + "nudging": { + "0.001": [ + -2.3316824808716774e-06, + -2.2514723241329193e-07, + 1.0617077350616455e-07, + 2.3283064365386963e-09, + -6.05359673500061e-09, + -1.5987316146492958e-06 + ], + "0.003": [ + -6.882240995764732e-06, + -6.901100277900696e-07, + 3.80445271730423e-07, + 1.1408701539039612e-08, + -5.21540641784668e-08, + -5.463254638016224e-06 + ], + "0.01": [ + -2.2817635908722878e-05, + -2.484419383108616e-06, + 1.1968659237027168e-06, + 3.1816307455301285e-07, + -1.9674189388751984e-07, + -1.9047758542001247e-05 + ] + }, + "hidden_norms_per_layer": [ + 8055.30419921875, + 113125.40625, + 1163908.5, + 1491131.625, + 1596391.25, + 1615158.375, + 896264.875 + ], + "bp_grad_norms_per_layer": [ + 2.2761472791898996e-05, + 1.4049503533897223e-06, + 7.91284321621788e-07, + 7.916268032204243e-07, + 7.900576974861906e-07, + 7.786943569954019e-07, + 7.593308168907242e-07 + ] + }, + "drift": { + "embed.weight": 54.562706327622614, + "embed.bias": 17.31102523118792, + "blocks.0.ln.weight": 1.1930605549913966, + "blocks.0.w1.weight": 17.719005515431316, + "blocks.0.w1.bias": 12.672636747852074, + "blocks.0.w2.weight": 57.38339155194176, + "blocks.1.ln.weight": 1.1832029127181742, + "blocks.1.w1.weight": 25.16862327064878, + "blocks.1.w1.bias": 22.86442569965511, + "blocks.1.w2.weight": 40.800794703713116, + "blocks.2.ln.weight": 0.7781541585073879, + "blocks.2.w1.weight": 21.816615196629947, + "blocks.2.w1.bias": 21.85112016896182, + "blocks.2.w2.weight": 56.80267641124345, + "blocks.3.ln.weight": 0.6733768384350268, + "blocks.3.w1.weight": 19.172490261311314, + "blocks.3.w1.bias": 19.28818718647845, + "blocks.3.w2.weight": 56.812396875482676, + "blocks.4.ln.weight": 0.4559901335911821, + "blocks.4.w1.weight": 14.442025794898862, + "blocks.4.w1.bias": 12.350979488836783, + "blocks.4.w2.weight": 51.455152706019774, + "blocks.5.ln.weight": 0.6410944171187689, + "blocks.5.w1.weight": 19.462352016085756, + "blocks.5.w1.bias": 18.71679203171091, + "blocks.5.w2.weight": 54.03878336670424, + "out_ln.weight": 0.3528795738919825, + "out_head.weight": 6.138358709909979, + "out_head.bias": 0.7832523450909458 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 6, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 5 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L6_seed5", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
