diff options
Diffstat (limited to 'results/fa_dfa_d512_L8_seed4/results_cifar10.json')
| -rw-r--r-- | results/fa_dfa_d512_L8_seed4/results_cifar10.json | 881 |
1 files changed, 881 insertions, 0 deletions
diff --git a/results/fa_dfa_d512_L8_seed4/results_cifar10.json b/results/fa_dfa_d512_L8_seed4/results_cifar10.json new file mode 100644 index 0000000..335a670 --- /dev/null +++ b/results/fa_dfa_d512_L8_seed4/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "4": { + "dfa": { + "log": { + "train_loss": [ + 2.0558988859558105, + 2.0445796598815917, + 2.039279856033325, + 2.038156519508362, + 2.036371453819275, + 2.035036249847412, + 2.031745447311401, + 2.0292039811706544, + 2.0318929217529296, + 2.0292139208984374, + 2.0309901475524903, + 2.0294140184020995, + 2.024980134963989, + 2.026833506088257, + 2.0269724795532227, + 2.0259553061676026, + 2.025906521835327, + 2.0246393172454833, + 2.0217932791137696, + 2.024892644882202, + 2.0215970348739623, + 2.022969856185913, + 2.0212113690567017, + 2.018885112991333, + 2.019870955734253, + 2.017834800796509, + 2.0190272177124022, + 2.0189630345916747, + 2.01638854927063, + 2.017822699356079, + 2.019127551727295, + 2.0166009580230715, + 2.016471000518799, + 2.0156495739746094, + 2.0154129775238037, + 2.017227215194702, + 2.015604530029297, + 2.018137004623413, + 2.013566918640137, + 2.0139937634277345, + 2.014001354904175, + 2.014924657058716, + 2.01404189201355, + 2.012948843307495, + 2.0132822931671144, + 2.0135159986114504, + 2.0149466477966307, + 2.0134463012313843, + 2.0127882065200806, + 2.011855206069946, + 2.0118354721450804, + 2.009852321395874, + 2.0144452475357055, + 2.0108168384170533, + 2.0125812700653074, + 2.009986862640381, + 2.0114155954360964, + 2.010974427947998, + 2.012690294647217, + 2.0099948442077635, + 2.0100676802062987, + 2.0103798513031004, + 2.009976401939392, + 2.009263822669983, + 2.0103307348251342, + 2.0066765419006347, + 2.0081610918426516, + 2.009749242553711, + 2.009436781463623, + 2.008263199005127, + 2.008898328781128, + 2.0088115994262696, + 2.0076247560882567, + 2.008243568115234, + 2.009206538734436, + 2.0073812493896486, + 2.0084627660369874, + 2.0082589547729492, + 2.006824525222778, + 2.006816029586792, + 2.005350517425537, + 2.005616167564392, + 2.0048646172714233, + 2.005117626724243, + 2.007857433204651, + 2.006735478591919, + 2.0058193558502198, + 2.0068686953353883, + 2.0066705658721924, + 2.0060114856719973, + 2.0083387131118773, + 2.0070924770736696, + 2.006274801864624, + 2.005065950050354, + 2.007637509765625, + 2.0068550647354124, + 2.0040754894256594, + 2.0044231857681276, + 2.00634932926178, + 2.004043010559082 + ], + "train_acc": [ + 0.2432, + 0.24314, + 0.24398, + 0.244, + 0.2482, + 0.24582, + 0.24994, + 0.25054, + 0.24894, + 0.24966, + 0.2472, + 0.2524, + 0.25424, + 0.25128, + 0.25402, + 0.2517, + 0.25458, + 0.25468, + 0.2556, + 0.25626, + 0.25828, + 0.25636, + 0.2575, + 0.25626, + 0.2598, + 0.25736, + 0.25858, + 0.25966, + 0.261, + 0.26086, + 0.25872, + 0.26222, + 0.25918, + 0.26136, + 0.2628, + 0.26148, + 0.26234, + 0.26032, + 0.26366, + 0.26014, + 0.26316, + 0.2627, + 0.26332, + 0.26396, + 0.2624, + 0.2651, + 0.26332, + 0.26546, + 0.26388, + 0.2643, + 0.26664, + 0.26656, + 0.26452, + 0.26586, + 0.26372, + 0.26434, + 0.26688, + 0.265, + 0.26536, + 0.2676, + 0.26708, + 0.26556, + 0.26724, + 0.26668, + 0.26684, + 0.26658, + 0.26898, + 0.26802, + 0.26746, + 0.26814, + 0.2674, + 0.2696, + 0.26804, + 0.26742, + 0.26854, + 0.2678, + 0.26776, + 0.26772, + 0.26896, + 0.26856, + 0.2709, + 0.26856, + 0.27094, + 0.27032, + 0.26868, + 0.26798, + 0.27036, + 0.26862, + 0.2704, + 0.26882, + 0.26632, + 0.2691, + 0.26788, + 0.26786, + 0.26864, + 0.26766, + 0.27096, + 0.27096, + 0.26832, + 0.27034 + ], + "test_acc": [ + 0.255, + 0.282, + 0.2597, + 0.2682, + 0.2706, + 0.2653, + 0.268, + 0.2831, + 0.2608, + 0.2611, + 0.2809, + 0.2728, + 0.2628, + 0.2678, + 0.2917, + 0.2633, + 0.2587, + 0.2789, + 0.2667, + 0.255, + 0.2702, + 0.2763, + 0.2721, + 0.2728, + 0.2836, + 0.2812, + 0.2586, + 0.2753, + 0.2767, + 0.2765, + 0.2893, + 0.2684, + 0.2739, + 0.2781, + 0.283, + 0.2702, + 0.2792, + 0.2779, + 0.2757, + 0.2829, + 0.2798, + 0.2777, + 0.2673, + 0.2797, + 0.2958, + 0.272, + 0.2749, + 0.2875, + 0.2807, + 0.2796, + 0.2828, + 0.2825, + 0.2827, + 0.2782, + 0.2898, + 0.2783, + 0.2754, + 0.2926, + 0.2676, + 0.2797, + 0.2879, + 0.2839, + 0.2887, + 0.2792, + 0.2699, + 0.2925, + 0.2741, + 0.2804, + 0.2827, + 0.282, + 0.2908, + 0.2818, + 0.2801, + 0.2873, + 0.2889, + 0.2898, + 0.2934, + 0.2874, + 0.2894, + 0.2882, + 0.2751, + 0.2786, + 0.2871, + 0.2813, + 0.2875, + 0.2837, + 0.2827, + 0.2853, + 0.2816, + 0.2853, + 0.2848, + 0.2829, + 0.2862, + 0.2857, + 0.2852, + 0.2855, + 0.2859, + 0.2859, + 0.2859, + 0.2861 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.348124623298645, + 0.0005955962114967406, + 0.0003339378454256803, + -0.00018519387231208384, + -0.00039015739457681775, + -6.0401107475627214e-05, + 0.0005056928494013846, + -0.00041366269579157233 + ], + "perturbation_rho": [ + 0.023487141355872154, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -2.4354085326194763e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0, + 0.0 + ], + "0.003": [ + -9.727664291858673e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0, + 0.0 + ], + "0.01": [ + -3.107357770204544e-06, + 0.0, + -1.862645149230957e-09, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 57046.83203125, + 1460526592.0, + 3493817088.0, + 5286084096.0, + 7466274304.0, + 9068557312.0, + 9149831168.0, + 9557196800.0, + 9628340224.0 + ], + "bp_grad_norms_per_layer": [ + 2.206747353739047e-07, + 1.568134094798168e-10, + 1.5691002663853482e-10, + 1.5639808892409235e-10, + 1.5647411144570356e-10, + 1.564954832389276e-10, + 1.564743612458841e-10, + 1.564741808346426e-10, + 1.5647404205676452e-10 + ] + }, + "drift": { + "embed.weight": 345.4826524750452, + "embed.bias": 268.77599398354755, + "blocks.0.ln.weight": 9.693802971740622, + "blocks.0.w1.weight": 305.69328663379747, + "blocks.0.w1.bias": 282.72127878032245, + "blocks.0.w2.weight": 506.67910364132115, + "blocks.1.ln.weight": 9.022706446436455, + "blocks.1.w1.weight": 344.52276292946954, + "blocks.1.w1.bias": 336.8078507211997, + "blocks.1.w2.weight": 344.5230830295154, + "blocks.2.ln.weight": 9.205345940074093, + "blocks.2.w1.weight": 390.2517774750357, + "blocks.2.w1.bias": 365.62094868403284, + "blocks.2.w2.weight": 372.1071143869873, + "blocks.3.ln.weight": 10.058504715768814, + "blocks.3.w1.weight": 409.96586790482206, + "blocks.3.w1.bias": 389.26772963997206, + "blocks.3.w2.weight": 398.4000450175067, + "blocks.4.ln.weight": 10.413518753619888, + "blocks.4.w1.weight": 429.20878622659194, + "blocks.4.w1.bias": 400.403481558048, + "blocks.4.w2.weight": 399.0461533995044, + "blocks.5.ln.weight": 7.53017659614122, + "blocks.5.w1.weight": 303.5211766091988, + "blocks.5.w1.bias": 290.0904456815074, + "blocks.5.w2.weight": 267.46908289070075, + "blocks.6.ln.weight": 8.989001105343268, + "blocks.6.w1.weight": 361.1863333213492, + "blocks.6.w1.bias": 343.25957937988704, + "blocks.6.w2.weight": 317.54530185898403, + "blocks.7.ln.weight": 7.256623314136568, + "blocks.7.w1.weight": 264.5076613280686, + "blocks.7.w1.bias": 244.7481419757745, + "blocks.7.w2.weight": 244.11940761827694, + "out_ln.weight": 0.5537612143282766, + "out_head.weight": 8.260097616924732, + "out_head.bias": 0.594355583013062 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0499230930328367, + 1.9647031881713868, + 1.9228250009536743, + 1.8920866240692138, + 1.8762959116363525, + 1.8783360265731812, + 1.8759148642349244, + 1.8639714459609986, + 1.859312229270935, + 1.8467648499298095, + 1.8394229892730714, + 1.825405124130249, + 1.820375383529663, + 1.8109127722549438, + 1.8068262866210938, + 1.7986448593521118, + 1.7920247109603882, + 1.7862463021087647, + 1.7800769235610963, + 1.7746711435317992, + 1.7699551953125, + 1.7670180697250366, + 1.7607733086395263, + 1.7561056157684327, + 1.752474511489868, + 1.7487609728240967, + 1.7504161080551148, + 1.7488452796173095, + 1.744940209083557, + 1.7447595501327515, + 1.7444546957015992, + 1.7363371981811524, + 1.7366404415893555, + 1.7359863372802735, + 1.733716072654724, + 1.7326603939056398, + 1.7307178776550294, + 1.7296201685333252, + 1.7284672827911376, + 1.7325348129272462, + 1.7272048714828492, + 1.7266432265472411, + 1.7268222411727905, + 1.7249765619659423, + 1.7266766318130493, + 1.7234420058441162, + 1.7232857625579834, + 1.7222659621429444, + 1.7246627559280396, + 1.7202154187011718, + 1.721605923423767, + 1.7191662756729127, + 1.7233360834503173, + 1.7196103755950927, + 1.721537714920044, + 1.7234055492401124, + 1.7208966232681275, + 1.719355527381897, + 1.716101904335022, + 1.7131741651153565, + 1.716256374435425, + 1.7143639738082885, + 1.7126432967376708, + 1.7113130987167358, + 1.7133349226760863, + 1.7085220684051514, + 1.7085635126495362, + 1.7085992751312256, + 1.7081326258087157, + 1.7065643463897706, + 1.7067539226913453, + 1.7028010097503663, + 1.7049776620864867, + 1.7025865216445923, + 1.7059548104095459, + 1.7012168072128295, + 1.7034333710479737, + 1.700839725112915, + 1.7046385177612304, + 1.6998081121063233, + 1.6977149985122681, + 1.6993804161834716, + 1.696179700050354, + 1.696841403427124, + 1.6989550568771363, + 1.6974260115814208, + 1.691993281288147, + 1.6956287409210204, + 1.6960039244842529, + 1.693833966407776, + 1.7002782623672485, + 1.6935484520721436, + 1.6925058234405517, + 1.693256030807495, + 1.6941557180786133, + 1.6950180599594116, + 1.697810530014038, + 1.6920377853393556, + 1.694213403968811, + 1.6934792825317382 + ], + "train_acc": [ + 0.2421, + 0.2836, + 0.30398, + 0.31868, + 0.32722, + 0.32272, + 0.32508, + 0.33034, + 0.33142, + 0.33714, + 0.33564, + 0.34428, + 0.34574, + 0.35108, + 0.35066, + 0.35318, + 0.35484, + 0.35766, + 0.36236, + 0.36052, + 0.36336, + 0.3664, + 0.36704, + 0.36724, + 0.37118, + 0.37224, + 0.3698, + 0.36986, + 0.37486, + 0.3753, + 0.37402, + 0.37556, + 0.37578, + 0.37696, + 0.37936, + 0.3799, + 0.37784, + 0.38368, + 0.38064, + 0.37848, + 0.38158, + 0.38068, + 0.38108, + 0.38166, + 0.37934, + 0.38318, + 0.38062, + 0.38266, + 0.38166, + 0.3839, + 0.38078, + 0.38148, + 0.38032, + 0.38514, + 0.38458, + 0.38238, + 0.38438, + 0.38428, + 0.3852, + 0.38752, + 0.38766, + 0.38474, + 0.38774, + 0.38698, + 0.38748, + 0.38616, + 0.38806, + 0.38998, + 0.3884, + 0.39208, + 0.39082, + 0.39204, + 0.38986, + 0.39288, + 0.39172, + 0.3936, + 0.39076, + 0.3933, + 0.39058, + 0.3929, + 0.39284, + 0.39224, + 0.3934, + 0.3943, + 0.39238, + 0.39528, + 0.3933, + 0.39514, + 0.39354, + 0.39286, + 0.39398, + 0.39674, + 0.39442, + 0.39522, + 0.3961, + 0.39508, + 0.39172, + 0.39722, + 0.3963, + 0.39622 + ], + "test_acc": [ + 0.298, + 0.3175, + 0.3253, + 0.3526, + 0.3529, + 0.3373, + 0.3593, + 0.3592, + 0.3528, + 0.3615, + 0.3745, + 0.3626, + 0.3649, + 0.3807, + 0.3926, + 0.3892, + 0.3738, + 0.3794, + 0.3869, + 0.3749, + 0.3867, + 0.3913, + 0.3851, + 0.3865, + 0.3911, + 0.3974, + 0.3817, + 0.3933, + 0.3944, + 0.3958, + 0.3981, + 0.3966, + 0.3975, + 0.4001, + 0.4023, + 0.4001, + 0.402, + 0.4049, + 0.4088, + 0.4076, + 0.4051, + 0.4027, + 0.3994, + 0.4035, + 0.4076, + 0.3913, + 0.4047, + 0.4061, + 0.4018, + 0.4118, + 0.4097, + 0.41, + 0.4085, + 0.401, + 0.4083, + 0.4014, + 0.405, + 0.4018, + 0.4114, + 0.4104, + 0.4074, + 0.4083, + 0.4103, + 0.4046, + 0.4011, + 0.4139, + 0.408, + 0.4094, + 0.4125, + 0.4088, + 0.414, + 0.4114, + 0.4124, + 0.4154, + 0.4121, + 0.4153, + 0.4115, + 0.4125, + 0.4144, + 0.4152, + 0.4117, + 0.411, + 0.414, + 0.4126, + 0.4137, + 0.4151, + 0.4134, + 0.4126, + 0.4132, + 0.4127, + 0.4138, + 0.4143, + 0.4148, + 0.414, + 0.4142, + 0.4145, + 0.4145, + 0.415, + 0.4144, + 0.4143 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.026830948889255524, + 0.10699465870857239, + 0.09640855342149734, + 0.016582896932959557, + -0.06015268713235855, + -0.09386980533599854, + 0.010263346135616302, + 0.9927012920379639 + ], + "perturbation_rho": [ + 0.02561907097697258, + -0.004561614245176315, + 0.008805938996374607, + -0.004548710770905018, + -0.06699962168931961, + -0.0668429285287857, + -0.03943357244133949, + 0.02623065561056137 + ], + "nudging": { + "0.001": [ + -2.3067113943398e-06, + -4.3620821088552475e-07, + -2.4866312742233276e-07, + 7.08969309926033e-08, + 1.3213139027357101e-07, + 1.5960540622472763e-07, + 4.423782229423523e-09, + -1.1902302503585815e-06 + ], + "0.003": [ + -7.16338399797678e-06, + -1.5725381672382355e-06, + -7.672933861613274e-07, + -1.3835960999131203e-07, + 2.973247319459915e-07, + 5.42961061000824e-07, + -2.5727786123752594e-08, + -4.3523614294826984e-06 + ], + "0.01": [ + -2.3563567083328962e-05, + -5.601265002042055e-06, + -2.8641661629080772e-06, + -5.615875124931335e-07, + 9.248033165931702e-07, + 1.622654963284731e-06, + -2.4889595806598663e-07, + -1.5358731616288424e-05 + ] + }, + "hidden_norms_per_layer": [ + 5353.1064453125, + 80361.6875, + 198367.984375, + 279507.6875, + 680688.0, + 1020542.375, + 1562414.75, + 1590694.75, + 789908.0 + ], + "bp_grad_norms_per_layer": [ + 3.106619624304585e-05, + 1.862773842731258e-06, + 9.139845928984869e-07, + 7.041780349936744e-07, + 6.676064572275209e-07, + 6.690797818009742e-07, + 6.687893119305954e-07, + 6.670686047982599e-07, + 6.49854484890966e-07 + ] + }, + "drift": { + "embed.weight": 40.22494125084113, + "embed.bias": 15.798132334624936, + "blocks.0.ln.weight": 1.2241361855735886, + "blocks.0.w1.weight": 16.34451235786821, + "blocks.0.w1.bias": 11.623359166710161, + "blocks.0.w2.weight": 59.40609750891924, + "blocks.1.ln.weight": 0.9394658291469571, + "blocks.1.w1.weight": 16.816482551922167, + "blocks.1.w1.bias": 7.284029885203612, + "blocks.1.w2.weight": 43.964019309740266, + "blocks.2.ln.weight": 0.7618290822194834, + "blocks.2.w1.weight": 15.551125367027769, + "blocks.2.w1.bias": 9.075794440045083, + "blocks.2.w2.weight": 44.423765980175624, + "blocks.3.ln.weight": 0.7978062227799843, + "blocks.3.w1.weight": 18.895887551107087, + "blocks.3.w1.bias": 16.322155802924105, + "blocks.3.w2.weight": 40.59294466107463, + "blocks.4.ln.weight": 0.7028791271353807, + "blocks.4.w1.weight": 18.831247940509048, + "blocks.4.w1.bias": 19.397899365978574, + "blocks.4.w2.weight": 33.77994253855422, + "blocks.5.ln.weight": 0.5882412988040494, + "blocks.5.w1.weight": 20.16928056506279, + "blocks.5.w1.bias": 23.309656656430743, + "blocks.5.w2.weight": 30.02848330872152, + "blocks.6.ln.weight": 0.6458882840441386, + "blocks.6.w1.weight": 15.641543003808316, + "blocks.6.w1.bias": 12.528164451431063, + "blocks.6.w2.weight": 65.83396405007723, + "blocks.7.ln.weight": 0.8531338247599758, + "blocks.7.w1.weight": 19.29027360202624, + "blocks.7.w1.bias": 18.640476186308685, + "blocks.7.w2.weight": 55.45516056132436, + "out_ln.weight": 0.3127678274218022, + "out_head.weight": 5.776369546198508, + "out_head.bias": 1.402401683707018 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 4 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L8_seed4", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
