diff options
30 files changed, 26870 insertions, 0 deletions
diff --git a/results/fa_dfa_d512_L12_seed0/results_cifar10.json b/results/fa_dfa_d512_L12_seed0/results_cifar10.json new file mode 100644 index 0000000..0938122 --- /dev/null +++ b/results/fa_dfa_d512_L12_seed0/results_cifar10.json @@ -0,0 +1,969 @@ +{ + "0": { + "dfa": { + "log": { + "train_loss": [ + 2.0727519187164307, + 2.0449732803344727, + 2.0343947634887694, + 2.0317279723739623, + 2.029190194129944, + 2.0266742729187013, + 2.022695488433838, + 2.0220756021881106, + 2.017278121871948, + 2.013867749862671, + 2.0141937175750733, + 2.0114837906265257, + 2.0090159717559812, + 2.006871261138916, + 2.0085684817504883, + 2.004453958129883, + 2.0050127017211916, + 2.004625142745972, + 2.001847840270996, + 2.0038710110855105, + 2.0015532569885255, + 2.001803522415161, + 2.0029406197357176, + 1.9984209733581544, + 1.9999778453063966, + 1.997875029144287, + 2.002456622467041, + 2.000510860939026, + 1.9972556433105468, + 1.9980127339935303, + 1.9964525595092772, + 1.9945909706878662, + 1.994441053237915, + 1.9951845182037353, + 1.995466745147705, + 1.992624706878662, + 1.9937170362091063, + 1.9947755416870117, + 1.9944197441101075, + 1.9952830042266845, + 1.9904139096832276, + 1.991399830932617, + 1.9951595304870606, + 1.9930326363754272, + 1.9904268531799316, + 1.9908876788330079, + 1.9936137868499755, + 1.9875998815917968, + 1.992842328338623, + 1.990330719833374, + 1.9921186029815674, + 1.9902390911102295, + 1.9888944667053223, + 1.9905146474456787, + 1.9888609869384766, + 1.9911943308258058, + 1.9893802894592285, + 1.9878305652618409, + 1.9877847812652587, + 1.9883021939849854, + 1.988304048690796, + 1.9873467050170899, + 1.9866977008438111, + 1.9872942990875244, + 1.9863059562683105, + 1.9860430270004272, + 1.9863528005981446, + 1.9857840370941162, + 1.9861036871337892, + 1.985937850112915, + 1.9876243210601807, + 1.9859837552642823, + 1.9863339701080323, + 1.9869920307922364, + 1.9855867569732666, + 1.984609903526306, + 1.9867486194229127, + 1.9843175145721434, + 1.9860691648101807, + 1.9842630282592773, + 1.986168493347168, + 1.9858933181762695, + 1.98474868850708, + 1.9866244787979126, + 1.9833790422058106, + 1.9852294052505493, + 1.9821201675033568, + 1.9854196563339233, + 1.9846102389526368, + 1.9820537503433227, + 1.981782052307129, + 1.984344421157837, + 1.983909532699585, + 1.9853303438949585, + 1.984236333694458, + 1.9829996084594728, + 1.984313102684021, + 1.9833407458496093, + 1.983147784729004, + 1.9833076025390626 + ], + "train_acc": [ + 0.2332, + 0.24532, + 0.2542, + 0.2564, + 0.25326, + 0.25504, + 0.25822, + 0.26022, + 0.26324, + 0.26234, + 0.26278, + 0.26454, + 0.26348, + 0.26876, + 0.26432, + 0.26712, + 0.268, + 0.26852, + 0.27126, + 0.26796, + 0.2701, + 0.27052, + 0.26954, + 0.27056, + 0.27256, + 0.27258, + 0.26872, + 0.27102, + 0.2732, + 0.27298, + 0.27452, + 0.27374, + 0.27488, + 0.273, + 0.27604, + 0.2756, + 0.27576, + 0.27472, + 0.27344, + 0.27474, + 0.27776, + 0.27826, + 0.27318, + 0.2762, + 0.27666, + 0.27602, + 0.2763, + 0.2777, + 0.27724, + 0.27716, + 0.27566, + 0.27832, + 0.28138, + 0.27918, + 0.27922, + 0.27668, + 0.27846, + 0.27786, + 0.27796, + 0.2762, + 0.27946, + 0.27984, + 0.2795, + 0.27926, + 0.28092, + 0.27924, + 0.27978, + 0.28098, + 0.28132, + 0.2802, + 0.27814, + 0.28184, + 0.28178, + 0.28284, + 0.28128, + 0.2829, + 0.28048, + 0.28194, + 0.28236, + 0.2801, + 0.28106, + 0.28278, + 0.28308, + 0.27886, + 0.2815, + 0.28188, + 0.28254, + 0.28202, + 0.28158, + 0.28336, + 0.2825, + 0.28316, + 0.28312, + 0.28218, + 0.28288, + 0.2816, + 0.28218, + 0.2807, + 0.2835, + 0.28354 + ], + "test_acc": [ + 0.2507, + 0.2596, + 0.274, + 0.2704, + 0.2841, + 0.2803, + 0.2788, + 0.2781, + 0.303, + 0.2941, + 0.2905, + 0.3016, + 0.2779, + 0.3052, + 0.2819, + 0.2911, + 0.2931, + 0.2886, + 0.2812, + 0.2924, + 0.2947, + 0.2701, + 0.28, + 0.2912, + 0.3037, + 0.2894, + 0.3012, + 0.3011, + 0.3056, + 0.2981, + 0.2923, + 0.2961, + 0.3082, + 0.3035, + 0.2868, + 0.2981, + 0.3042, + 0.2956, + 0.3012, + 0.296, + 0.3077, + 0.3082, + 0.3, + 0.3141, + 0.2967, + 0.3024, + 0.3004, + 0.3016, + 0.308, + 0.3028, + 0.3072, + 0.2933, + 0.2907, + 0.3126, + 0.3036, + 0.2973, + 0.3001, + 0.3054, + 0.3115, + 0.3004, + 0.3026, + 0.3022, + 0.3051, + 0.3109, + 0.2968, + 0.3092, + 0.2998, + 0.3062, + 0.3081, + 0.3035, + 0.3032, + 0.3114, + 0.3147, + 0.308, + 0.3035, + 0.3098, + 0.3091, + 0.3065, + 0.3094, + 0.3071, + 0.3072, + 0.309, + 0.3012, + 0.3067, + 0.3016, + 0.3028, + 0.3048, + 0.3083, + 0.3082, + 0.308, + 0.3097, + 0.3087, + 0.3117, + 0.3104, + 0.3086, + 0.3085, + 0.3088, + 0.3084, + 0.3085, + 0.3086 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.395052969455719, + 0.0004171400796622038, + 8.336821338161826e-06, + 0.0005622187163680792, + 0.00013701531861443073, + -0.00035643568844534457, + 0.00012600264744833112, + -0.00027335749473422766, + 0.00016040689661167562, + -6.0928698076168075e-05, + 3.278384974692017e-05, + 0.0003701794194057584 + ], + "perturbation_rho": [ + 0.018501652404665947, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -4.1211023926734924e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.2032687664031982e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 3.725290298461914e-09, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -4.2067840695381165e-06, + -9.313225746154785e-10, + -2.7939677238464355e-09, + 9.313225746154785e-10, + 0.0, + 4.6566128730773926e-09, + 1.862645149230957e-09, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 52639.97265625, + 1088866944.0, + 4048803840.0, + 5257923584.0, + 5847087616.0, + 5938909184.0, + 7423066624.0, + 9731870720.0, + 11342387200.0, + 11317005312.0, + 11353008128.0, + 12463034368.0, + 13189959680.0 + ], + "bp_grad_norms_per_layer": [ + 2.804916334753216e-07, + 1.9205773083008637e-10, + 1.9170887100017353e-10, + 1.9189858035950635e-10, + 1.919657766080718e-10, + 1.919622516499686e-10, + 1.919622516499686e-10, + 1.9191119526862366e-10, + 1.9192891720365424e-10, + 1.919416292572862e-10, + 1.9193641120907046e-10, + 1.9192181177629664e-10, + 1.9190903033372564e-10 + ] + }, + "drift": { + "embed.weight": 330.26007887921685, + "embed.bias": 274.2168771279626, + "blocks.0.ln.weight": 10.94628999409354, + "blocks.0.w1.weight": 277.3643033410537, + "blocks.0.w1.bias": 261.86497048407, + "blocks.0.w2.weight": 484.6109947045105, + "blocks.1.ln.weight": 9.35886499546542, + "blocks.1.w1.weight": 360.26286845371646, + "blocks.1.w1.bias": 355.66126981713177, + "blocks.1.w2.weight": 392.1776082515924, + "blocks.2.ln.weight": 9.350239389888953, + "blocks.2.w1.weight": 370.0336198354507, + "blocks.2.w1.bias": 337.8932267522991, + "blocks.2.w2.weight": 352.1258021274022, + "blocks.3.ln.weight": 9.767929501471052, + "blocks.3.w1.weight": 331.5909899943081, + "blocks.3.w1.bias": 303.3604214345198, + "blocks.3.w2.weight": 312.3336342294667, + "blocks.4.ln.weight": 6.8894143383412505, + "blocks.4.w1.weight": 245.69582220582208, + "blocks.4.w1.bias": 226.6077326792198, + "blocks.4.w2.weight": 237.1578303477898, + "blocks.5.ln.weight": 10.087708773364, + "blocks.5.w1.weight": 391.6451916277919, + "blocks.5.w1.bias": 364.5077034653599, + "blocks.5.w2.weight": 375.80268056062386, + "blocks.6.ln.weight": 10.894168165759849, + "blocks.6.w1.weight": 446.1083385424109, + "blocks.6.w1.bias": 408.24079100200436, + "blocks.6.w2.weight": 421.6066446657385, + "blocks.7.ln.weight": 10.280446652077675, + "blocks.7.w1.weight": 417.5786268538863, + "blocks.7.w1.bias": 412.3650615213133, + "blocks.7.w2.weight": 413.1057523107626, + "blocks.8.ln.weight": 7.628061136196448, + "blocks.8.w1.weight": 280.87597195145304, + "blocks.8.w1.bias": 261.4439841235217, + "blocks.8.w2.weight": 269.85556299140023, + "blocks.9.ln.weight": 7.035433277618009, + "blocks.9.w1.weight": 236.54101277402214, + "blocks.9.w1.bias": 227.70919757896647, + "blocks.9.w2.weight": 225.58244999977697, + "blocks.10.ln.weight": 10.07787136157841, + "blocks.10.w1.weight": 405.6014059372381, + "blocks.10.w1.bias": 387.7928293241623, + "blocks.10.w2.weight": 391.3899843395933, + "blocks.11.ln.weight": 9.615189319853535, + "blocks.11.w1.weight": 379.5363902874608, + "blocks.11.w1.bias": 358.37006314701347, + "blocks.11.w2.weight": 355.59941733335387, + "out_ln.weight": 0.6824993975107903, + "out_head.weight": 9.861581662622491, + "out_head.bias": 0.35061659338833756 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0293911264038087, + 1.950565330429077, + 1.917781063232422, + 1.9023740824127198, + 1.8861673935699463, + 1.8825682291412353, + 1.873201879234314, + 1.8714005821990967, + 1.8628303081512452, + 1.8579199737548828, + 1.8572168459320069, + 1.8525300827789306, + 1.845312434463501, + 1.8409330677032472, + 1.8335270806121826, + 1.822513469543457, + 1.8199686001205444, + 1.8136085485458373, + 1.8064511907196046, + 1.803108533859253, + 1.7962512873077392, + 1.7911701647949219, + 1.7911696646118165, + 1.7887402313995362, + 1.783379939842224, + 1.7759505123901367, + 1.780573166732788, + 1.7734921991729737, + 1.7670989339828491, + 1.7612831104278563, + 1.7572847888183594, + 1.753526630859375, + 1.750715301132202, + 1.7472047467041016, + 1.7457971502304077, + 1.7424137032699585, + 1.7345080102920531, + 1.734222209701538, + 1.7304457287216186, + 1.7239187670898437, + 1.7218291164398194, + 1.7199537884902953, + 1.7217532263565063, + 1.7196246533584594, + 1.7135296297836304, + 1.706606083908081, + 1.7079890319824218, + 1.7041183599472045, + 1.705560855026245, + 1.7011269199371337, + 1.7035633499908447, + 1.6992430791854858, + 1.6950508019638062, + 1.7004030841064453, + 1.691479129295349, + 1.691718464050293, + 1.6919280102157592, + 1.6887580330657959, + 1.6883217154693604, + 1.688851436805725, + 1.6853341622543334, + 1.680581435546875, + 1.678009653892517, + 1.6784401907730102, + 1.6803946784210204, + 1.6761323783493043, + 1.676516137046814, + 1.6778211946868897, + 1.6767865496444703, + 1.6731242126083374, + 1.675690205078125, + 1.669817829246521, + 1.6760586986923218, + 1.670416958580017, + 1.6687565314483643, + 1.668258262901306, + 1.6666069919204711, + 1.666242490005493, + 1.6675299571990967, + 1.663490775527954, + 1.6672910994720458, + 1.6639445114517213, + 1.6617960903167726, + 1.6636806215667725, + 1.6643552365112304, + 1.6602695792007447, + 1.6589048775100708, + 1.6614284314727783, + 1.6610555962753295, + 1.6578762405014038, + 1.6580958423614502, + 1.6621724097061157, + 1.6586555422210694, + 1.657684158668518, + 1.6587366760253905, + 1.657404851913452, + 1.6599628324127198, + 1.6565218897247314, + 1.6586303088760377, + 1.6615223189544677 + ], + "train_acc": [ + 0.2526, + 0.29054, + 0.30538, + 0.31188, + 0.31418, + 0.31852, + 0.32254, + 0.3243, + 0.32728, + 0.33104, + 0.33096, + 0.33338, + 0.33726, + 0.33962, + 0.3428, + 0.34614, + 0.34728, + 0.34656, + 0.35102, + 0.35328, + 0.35598, + 0.35864, + 0.35488, + 0.35678, + 0.35954, + 0.36188, + 0.36066, + 0.36522, + 0.36488, + 0.36676, + 0.36986, + 0.3702, + 0.3739, + 0.37346, + 0.37434, + 0.37574, + 0.3752, + 0.3788, + 0.37904, + 0.38092, + 0.38054, + 0.38164, + 0.37934, + 0.38172, + 0.38678, + 0.3875, + 0.38734, + 0.38996, + 0.38752, + 0.39106, + 0.38978, + 0.39084, + 0.39086, + 0.39326, + 0.39296, + 0.39256, + 0.39342, + 0.39414, + 0.39416, + 0.39432, + 0.39614, + 0.39536, + 0.3965, + 0.39922, + 0.39722, + 0.39968, + 0.3982, + 0.39886, + 0.40146, + 0.4016, + 0.40122, + 0.4017, + 0.39858, + 0.40386, + 0.40168, + 0.4028, + 0.4026, + 0.4035, + 0.40378, + 0.40566, + 0.4031, + 0.4049, + 0.40506, + 0.4035, + 0.40618, + 0.40566, + 0.406, + 0.40476, + 0.40564, + 0.40724, + 0.40646, + 0.4061, + 0.4079, + 0.40568, + 0.40498, + 0.40842, + 0.40674, + 0.40746, + 0.40728, + 0.40684 + ], + "test_acc": [ + 0.2924, + 0.3363, + 0.3349, + 0.3386, + 0.347, + 0.3457, + 0.3423, + 0.3555, + 0.3653, + 0.3606, + 0.3632, + 0.3617, + 0.3575, + 0.3723, + 0.3703, + 0.3715, + 0.3741, + 0.3774, + 0.374, + 0.3745, + 0.3836, + 0.3907, + 0.3702, + 0.3831, + 0.385, + 0.3843, + 0.3922, + 0.4003, + 0.3967, + 0.3978, + 0.3897, + 0.3828, + 0.401, + 0.4012, + 0.3991, + 0.3999, + 0.4055, + 0.4085, + 0.3968, + 0.4064, + 0.4066, + 0.4082, + 0.404, + 0.4042, + 0.4085, + 0.4091, + 0.4104, + 0.4155, + 0.4157, + 0.4125, + 0.4139, + 0.4166, + 0.4116, + 0.4187, + 0.4186, + 0.4123, + 0.4161, + 0.4171, + 0.4184, + 0.414, + 0.4179, + 0.4184, + 0.4197, + 0.4193, + 0.421, + 0.4205, + 0.423, + 0.4233, + 0.4201, + 0.422, + 0.4204, + 0.4151, + 0.4182, + 0.4218, + 0.4239, + 0.4198, + 0.425, + 0.4239, + 0.4204, + 0.4215, + 0.4253, + 0.4199, + 0.4235, + 0.4206, + 0.4235, + 0.4255, + 0.4222, + 0.423, + 0.4216, + 0.426, + 0.4245, + 0.4245, + 0.4239, + 0.4248, + 0.4224, + 0.4245, + 0.4246, + 0.4241, + 0.4242, + 0.4239 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.005966864060610533, + 0.04795818775892258, + 0.043208200484514236, + -0.05298762395977974, + -0.11785908043384552, + -0.05540306121110916, + -0.10012105107307434, + -0.09065192937850952, + -0.04056818038225174, + -0.03824863210320473, + -0.07007172703742981, + 0.9948829412460327 + ], + "perturbation_rho": [ + 0.028052061796188354, + 0.03165658190846443, + -0.006041648797690868, + -0.027337782084941864, + 0.01562969572842121, + 0.01757640205323696, + -0.025423342362046242, + -0.024989785626530647, + -0.03021504357457161, + 0.021996641531586647, + 0.03194758668541908, + 0.01976931467652321 + ], + "nudging": { + "0.001": [ + 3.7101563066244125e-07, + -6.126938387751579e-07, + -3.066379576921463e-07, + 1.6076955944299698e-07, + 3.3061951398849487e-07, + 1.5937257558107376e-07, + 2.9441434890031815e-07, + 2.859160304069519e-07, + 1.284060999751091e-07, + 1.5040859580039978e-07, + 2.1443702280521393e-07, + -3.2152747735381126e-06 + ], + "0.003": [ + 1.007108949124813e-06, + -1.6828998923301697e-06, + -6.601912900805473e-07, + 4.4493936002254486e-07, + 1.1789379641413689e-06, + 5.08967787027359e-07, + 1.0151416063308716e-06, + 8.791685104370117e-07, + 3.866152837872505e-07, + 3.688037395477295e-07, + 6.126938387751579e-07, + -1.0434770956635475e-05 + ], + "0.01": [ + 3.6874553188681602e-06, + -5.368026904761791e-06, + -2.2364547476172447e-06, + 1.735752448439598e-06, + 3.992114216089249e-06, + 1.860782504081726e-06, + 3.5137636587023735e-06, + 3.010733053088188e-06, + 1.437612809240818e-06, + 1.2825476005673409e-06, + 2.246466465294361e-06, + -3.491528332233429e-05 + ] + }, + "hidden_norms_per_layer": [ + 4702.6123046875, + 44530.10546875, + 108911.2109375, + 164933.125, + 283322.40625, + 377155.46875, + 407664.6875, + 517485.0625, + 562702.0625, + 564809.5, + 577993.1875, + 617885.5625, + 296847.03125 + ], + "bp_grad_norms_per_layer": [ + 4.941995575791225e-05, + 3.9530186768388376e-06, + 1.528447455712012e-06, + 1.3358264823182253e-06, + 1.3305660786500084e-06, + 1.3287758520164061e-06, + 1.3350502285902621e-06, + 1.339361574537179e-06, + 1.3375677099247696e-06, + 1.3184284171074978e-06, + 1.3012635236009373e-06, + 1.3201109823057777e-06, + 1.27659131976543e-06 + ] + }, + "drift": { + "embed.weight": 36.945692873367385, + "embed.bias": 13.300137685811876, + "blocks.0.ln.weight": 0.901147599699004, + "blocks.0.w1.weight": 13.367758537525685, + "blocks.0.w1.bias": 10.72336098907488, + "blocks.0.w2.weight": 46.304733049049524, + "blocks.1.ln.weight": 0.7424575473753766, + "blocks.1.w1.weight": 15.226852479991834, + "blocks.1.w1.bias": 7.345119575250527, + "blocks.1.w2.weight": 40.81729281725618, + "blocks.2.ln.weight": 0.6752024292095198, + "blocks.2.w1.weight": 13.877425620596014, + "blocks.2.w1.bias": 7.829951186600198, + "blocks.2.w2.weight": 39.52886223656965, + "blocks.3.ln.weight": 0.6072219216215275, + "blocks.3.w1.weight": 13.849387698880346, + "blocks.3.w1.bias": 11.302805784789012, + "blocks.3.w2.weight": 31.48991587682597, + "blocks.4.ln.weight": 0.3513996294280428, + "blocks.4.w1.weight": 12.017687634070212, + "blocks.4.w1.bias": 12.545759692163585, + "blocks.4.w2.weight": 19.78672753090035, + "blocks.5.ln.weight": 0.27340079470413037, + "blocks.5.w1.weight": 10.049144366681992, + "blocks.5.w1.bias": 9.943637751012846, + "blocks.5.w2.weight": 19.227980350358227, + "blocks.6.ln.weight": 0.3688735234588941, + "blocks.6.w1.weight": 12.460300257534458, + "blocks.6.w1.bias": 13.383277160534908, + "blocks.6.w2.weight": 19.760938996380943, + "blocks.7.ln.weight": 0.34359755221214905, + "blocks.7.w1.weight": 11.768674295896531, + "blocks.7.w1.bias": 13.150434923761187, + "blocks.7.w2.weight": 21.174060669259852, + "blocks.8.ln.weight": 0.3997077790355334, + "blocks.8.w1.weight": 11.48289450791943, + "blocks.8.w1.bias": 8.028224303872328, + "blocks.8.w2.weight": 35.011661428887535, + "blocks.9.ln.weight": 0.43894400900661307, + "blocks.9.w1.weight": 11.84935422288265, + "blocks.9.w1.bias": 8.899824293598352, + "blocks.9.w2.weight": 34.666647832790865, + "blocks.10.ln.weight": 0.4701858246153278, + "blocks.10.w1.weight": 12.779979416376127, + "blocks.10.w1.bias": 12.40660396279328, + "blocks.10.w2.weight": 30.770398182409856, + "blocks.11.ln.weight": 0.502840402610306, + "blocks.11.w1.weight": 15.51482213849637, + "blocks.11.w1.bias": 16.777463254010712, + "blocks.11.w2.weight": 27.32990799926724, + "out_ln.weight": 0.2952693196309993, + "out_head.weight": 4.4773532602382, + "out_head.bias": 1.8150038464128413 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 12, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 0 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L12_seed0", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L12_seed1/results_cifar10.json b/results/fa_dfa_d512_L12_seed1/results_cifar10.json new file mode 100644 index 0000000..e317465 --- /dev/null +++ b/results/fa_dfa_d512_L12_seed1/results_cifar10.json @@ -0,0 +1,969 @@ +{ + "1": { + "dfa": { + "log": { + "train_loss": [ + 2.08784130317688, + 2.052014544372559, + 2.045669429321289, + 2.0424993614196776, + 2.0406430601501464, + 2.038676989364624, + 2.0339627771759035, + 2.0320363275527953, + 2.0293006126403808, + 2.027781917953491, + 2.0250831903839113, + 2.0254856860351564, + 2.025074804534912, + 2.020526984100342, + 2.017961084213257, + 2.0189661613464356, + 2.0154323001098633, + 2.012715454673767, + 2.0142161911010743, + 2.0169401965713503, + 2.015820147628784, + 2.0139186489105225, + 2.0170457207489014, + 2.010938446311951, + 2.0119680180358888, + 2.011925876312256, + 2.0143484900665283, + 2.011023857269287, + 2.011966758041382, + 2.011277076072693, + 2.0121180039978026, + 2.011552553100586, + 2.0112545052337647, + 2.0096049158477785, + 2.0116290228271483, + 2.012332228317261, + 2.0100498531341553, + 2.013163764266968, + 2.0090835204696655, + 2.0087667613983156, + 2.008153441734314, + 2.008515095252991, + 2.0076544715118407, + 2.006217385635376, + 2.0079916049957274, + 2.0093279348754884, + 2.0085637417602538, + 2.0080473442077635, + 2.005413489227295, + 2.006327581214905, + 2.0087718325042725, + 2.005806240501404, + 2.0063090213012695, + 2.005670380554199, + 2.008137595367432, + 2.0058074869537355, + 2.00653946685791, + 2.008738963394165, + 2.005000798873901, + 2.005233299484253, + 2.0051020904159547, + 2.0054547385406494, + 2.006043081893921, + 2.0066007862091064, + 2.0075411237335206, + 2.004597886505127, + 2.004815984649658, + 2.0049288347625733, + 2.004783443069458, + 2.006772222366333, + 2.0067188888549805, + 2.005615498046875, + 2.0044576259613036, + 2.006375897064209, + 2.0036719734954835, + 2.0061926458740236, + 2.004457794265747, + 2.0043624324035645, + 2.00480925743103, + 2.0047112017822264, + 2.0034663817977907, + 2.002827211380005, + 2.003827619934082, + 2.0031036682891847, + 2.0055768743133546, + 2.002000348739624, + 2.0022059215545656, + 2.0022215761566162, + 2.0029819064331056, + 2.0014546299743654, + 2.00309593082428, + 2.0011430258178713, + 2.004017787246704, + 2.001931449203491, + 2.000619831314087, + 2.0033977560043335, + 2.0014762798690797, + 2.0025222287368774, + 2.003771315956116, + 2.0038832190704348 + ], + "train_acc": [ + 0.22474, + 0.23978, + 0.242, + 0.24386, + 0.2415, + 0.24352, + 0.24826, + 0.2474, + 0.25112, + 0.24912, + 0.25434, + 0.25094, + 0.25204, + 0.25484, + 0.2583, + 0.25824, + 0.2601, + 0.26048, + 0.261, + 0.25998, + 0.2624, + 0.2619, + 0.25918, + 0.26472, + 0.2644, + 0.26346, + 0.2648, + 0.26354, + 0.26368, + 0.26704, + 0.26128, + 0.26468, + 0.26524, + 0.26586, + 0.26558, + 0.26324, + 0.26776, + 0.2652, + 0.26808, + 0.26752, + 0.26614, + 0.26558, + 0.2692, + 0.27084, + 0.27106, + 0.26932, + 0.2685, + 0.26908, + 0.26978, + 0.27274, + 0.26924, + 0.27318, + 0.27128, + 0.27216, + 0.27004, + 0.27288, + 0.26872, + 0.27234, + 0.27316, + 0.27282, + 0.27218, + 0.2717, + 0.27198, + 0.27498, + 0.27174, + 0.27282, + 0.27204, + 0.27394, + 0.27538, + 0.26966, + 0.27104, + 0.27454, + 0.27422, + 0.27358, + 0.27468, + 0.27304, + 0.27244, + 0.27386, + 0.27408, + 0.27234, + 0.27666, + 0.2753, + 0.2739, + 0.27624, + 0.27434, + 0.27818, + 0.27466, + 0.27586, + 0.27504, + 0.27554, + 0.27526, + 0.27624, + 0.27486, + 0.27606, + 0.27568, + 0.27474, + 0.27578, + 0.27652, + 0.27508, + 0.27436 + ], + "test_acc": [ + 0.2444, + 0.2437, + 0.2469, + 0.2484, + 0.2807, + 0.266, + 0.2731, + 0.2735, + 0.2783, + 0.2609, + 0.2648, + 0.2716, + 0.2541, + 0.2846, + 0.2739, + 0.2748, + 0.28, + 0.2759, + 0.2345, + 0.2676, + 0.271, + 0.2659, + 0.2785, + 0.2728, + 0.2906, + 0.269, + 0.2846, + 0.2651, + 0.2937, + 0.2971, + 0.2925, + 0.2807, + 0.2778, + 0.2837, + 0.2793, + 0.292, + 0.2866, + 0.2911, + 0.2915, + 0.2833, + 0.2907, + 0.2971, + 0.2829, + 0.2891, + 0.2955, + 0.2991, + 0.295, + 0.2912, + 0.2697, + 0.2874, + 0.2741, + 0.2949, + 0.2836, + 0.2944, + 0.2934, + 0.2975, + 0.2873, + 0.2882, + 0.2889, + 0.2901, + 0.2789, + 0.2908, + 0.2953, + 0.2911, + 0.2987, + 0.2861, + 0.291, + 0.2942, + 0.2938, + 0.301, + 0.2972, + 0.2922, + 0.2872, + 0.2859, + 0.298, + 0.2942, + 0.2918, + 0.2964, + 0.2973, + 0.2944, + 0.2903, + 0.2885, + 0.2969, + 0.2897, + 0.291, + 0.294, + 0.2903, + 0.2931, + 0.2894, + 0.293, + 0.2936, + 0.2923, + 0.291, + 0.291, + 0.2933, + 0.293, + 0.2935, + 0.2933, + 0.2932, + 0.2932 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3784182071685791, + 0.00024469412164762616, + -6.5529711719136685e-06, + -2.4012413632590324e-05, + -2.2034288122085854e-05, + 0.0002968982153106481, + -0.0007156325737014413, + 0.0002482909185346216, + -0.00023413923918269575, + -0.000343983992934227, + -0.0004118037468288094, + -0.0001832617272157222 + ], + "perturbation_rho": [ + 0.02268090471625328, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -2.682209014892578e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -8.391216397285461e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.262888640165329e-06, + -2.7939677238464355e-09, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 58511.79296875, + 1607470848.0, + 3539406336.0, + 4483452928.0, + 5316571136.0, + 6553798144.0, + 7388260352.0, + 9242703872.0, + 9554661376.0, + 11009835008.0, + 11375313920.0, + 12110455808.0, + 12125570048.0 + ], + "bp_grad_norms_per_layer": [ + 2.394743603417737e-07, + 1.8629858766772145e-10, + 1.8620747999076315e-10, + 1.8607929086478237e-10, + 1.8619832065081e-10, + 1.8618356856237028e-10, + 1.862058007784384e-10, + 1.8604620621864854e-10, + 1.860521597896181e-10, + 1.860489817762101e-10, + 1.860557541366603e-10, + 1.860564341482629e-10, + 1.8607769491918447e-10 + ] + }, + "drift": { + "embed.weight": 354.85094069636483, + "embed.bias": 257.5504888266541, + "blocks.0.ln.weight": 10.020600874142511, + "blocks.0.w1.weight": 311.43953558716794, + "blocks.0.w1.bias": 277.95760174306895, + "blocks.0.w2.weight": 495.0366657957393, + "blocks.1.ln.weight": 9.233680086701126, + "blocks.1.w1.weight": 352.291334675127, + "blocks.1.w1.bias": 326.78546394466474, + "blocks.1.w2.weight": 328.5686664739385, + "blocks.2.ln.weight": 8.349584849320378, + "blocks.2.w1.weight": 347.4686920651646, + "blocks.2.w1.bias": 314.54485739427827, + "blocks.2.w2.weight": 322.05654919163993, + "blocks.3.ln.weight": 8.748114666066721, + "blocks.3.w1.weight": 341.6411014867908, + "blocks.3.w1.bias": 316.14847884372415, + "blocks.3.w2.weight": 332.52492325797556, + "blocks.4.ln.weight": 9.48080425966404, + "blocks.4.w1.weight": 385.71563399378635, + "blocks.4.w1.bias": 355.1208489434028, + "blocks.4.w2.weight": 353.2677244575998, + "blocks.5.ln.weight": 8.852085211151078, + "blocks.5.w1.weight": 359.24445324084536, + "blocks.5.w1.bias": 327.2053984662166, + "blocks.5.w2.weight": 334.59987322510256, + "blocks.6.ln.weight": 11.348583355888922, + "blocks.6.w1.weight": 445.597521308562, + "blocks.6.w1.bias": 417.10230766347513, + "blocks.6.w2.weight": 410.28435436855875, + "blocks.7.ln.weight": 8.64827114045918, + "blocks.7.w1.weight": 341.9936697385893, + "blocks.7.w1.bias": 328.17910031845435, + "blocks.7.w2.weight": 322.2651678366388, + "blocks.8.ln.weight": 10.71699832162817, + "blocks.8.w1.weight": 430.40137342022086, + "blocks.8.w1.bias": 395.45655885652957, + "blocks.8.w2.weight": 418.09527663040274, + "blocks.9.ln.weight": 7.689168672786446, + "blocks.9.w1.weight": 307.285121525094, + "blocks.9.w1.bias": 286.6467639719644, + "blocks.9.w2.weight": 284.90185966096766, + "blocks.10.ln.weight": 9.362189538932856, + "blocks.10.w1.weight": 374.1142646081974, + "blocks.10.w1.bias": 340.73445175290055, + "blocks.10.w2.weight": 334.3762368608616, + "blocks.11.ln.weight": 6.971496170157957, + "blocks.11.w1.weight": 273.28051237448034, + "blocks.11.w1.bias": 249.7041416371339, + "blocks.11.w2.weight": 250.47506193148064, + "out_ln.weight": 0.6766551046620063, + "out_head.weight": 9.660594953000444, + "out_head.bias": 0.6081606213236193 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.039813977584839, + 1.9617359282684326, + 1.9338554121398925, + 1.920875029220581, + 1.9130360520172118, + 1.9065295135116578, + 1.8998144156646728, + 1.893558436279297, + 1.8832812244033814, + 1.8782934536361695, + 1.8706854000091553, + 1.8697611987304688, + 1.8661894456481933, + 1.8597797629165649, + 1.8567528272247313, + 1.8511868425750733, + 1.8451919631195068, + 1.8397227505493163, + 1.834096513671875, + 1.8383717123413086, + 1.8340178047943114, + 1.828574577331543, + 1.8276408141326905, + 1.816076180152893, + 1.8154300131225587, + 1.8111229518890382, + 1.8086885763549805, + 1.8008410723114014, + 1.797977260093689, + 1.791234312095642, + 1.790951509628296, + 1.783079252281189, + 1.779609817199707, + 1.7747913509750366, + 1.7767339920043945, + 1.7752550736236572, + 1.7656426385879516, + 1.766551993637085, + 1.763684893760681, + 1.7585593047332764, + 1.7559137664413451, + 1.7532807390975953, + 1.75251891330719, + 1.749091358680725, + 1.7431825846099853, + 1.745732957458496, + 1.744267059288025, + 1.7419162277603149, + 1.738112122116089, + 1.7364409768676758, + 1.7408631985473633, + 1.7369310165405274, + 1.7362467727279662, + 1.7374004998016357, + 1.736051294517517, + 1.734863005027771, + 1.7319254892730713, + 1.7279368935775756, + 1.728024648475647, + 1.72793106716156, + 1.726857622718811, + 1.7304030743408203, + 1.725222360267639, + 1.7216177057647706, + 1.7253669234466553, + 1.7201292371368408, + 1.7183555184173585, + 1.7207783557891845, + 1.7150025174713135, + 1.7195079833221436, + 1.718960595779419, + 1.7189737926483155, + 1.7145323584365846, + 1.7151868480682373, + 1.7150000100708007, + 1.7164085680389405, + 1.7131129906463622, + 1.7118962685775756, + 1.7095365018463136, + 1.7063884579086304, + 1.7068846228790284, + 1.7066734680175781, + 1.7093646648406982, + 1.7075168813323975, + 1.7101067148590088, + 1.7065433209228515, + 1.70673194480896, + 1.706905789451599, + 1.7064489783096313, + 1.7049209537124634, + 1.702039204940796, + 1.7093150380706787, + 1.7065809494018556, + 1.7064716823959352, + 1.7058918637466431, + 1.7017672801971435, + 1.7050170278549195, + 1.7040833249664307, + 1.7052951337432862, + 1.7028977381134034 + ], + "train_acc": [ + 0.2476, + 0.28516, + 0.30182, + 0.30518, + 0.30668, + 0.3081, + 0.30996, + 0.31082, + 0.31996, + 0.31962, + 0.32402, + 0.32084, + 0.32474, + 0.3262, + 0.33392, + 0.33128, + 0.33602, + 0.33814, + 0.34208, + 0.33954, + 0.34316, + 0.34226, + 0.34322, + 0.34824, + 0.35108, + 0.35332, + 0.35372, + 0.35524, + 0.35692, + 0.3583, + 0.3558, + 0.36152, + 0.36236, + 0.36616, + 0.36662, + 0.3641, + 0.36936, + 0.36966, + 0.36902, + 0.37004, + 0.3724, + 0.37158, + 0.37278, + 0.3725, + 0.375, + 0.37618, + 0.3764, + 0.3774, + 0.37752, + 0.37724, + 0.37826, + 0.37848, + 0.38024, + 0.3791, + 0.37902, + 0.38188, + 0.38038, + 0.38478, + 0.3799, + 0.38192, + 0.38352, + 0.38058, + 0.38324, + 0.38586, + 0.38004, + 0.38654, + 0.3842, + 0.38632, + 0.38732, + 0.38408, + 0.38422, + 0.38632, + 0.38902, + 0.38782, + 0.38674, + 0.38694, + 0.38626, + 0.38678, + 0.38948, + 0.39038, + 0.39018, + 0.38814, + 0.38618, + 0.3908, + 0.38982, + 0.39066, + 0.38904, + 0.38854, + 0.38872, + 0.39008, + 0.39052, + 0.38934, + 0.38852, + 0.39056, + 0.39032, + 0.3915, + 0.39092, + 0.39138, + 0.3895, + 0.39112 + ], + "test_acc": [ + 0.2816, + 0.3167, + 0.3194, + 0.3093, + 0.3417, + 0.3344, + 0.3297, + 0.3418, + 0.3479, + 0.3372, + 0.3432, + 0.3446, + 0.3458, + 0.3646, + 0.3616, + 0.3529, + 0.3617, + 0.3576, + 0.329, + 0.3542, + 0.3701, + 0.3647, + 0.3697, + 0.3697, + 0.3808, + 0.3735, + 0.3695, + 0.3735, + 0.3711, + 0.379, + 0.3877, + 0.3805, + 0.3751, + 0.3909, + 0.381, + 0.3909, + 0.3902, + 0.3862, + 0.3899, + 0.3879, + 0.3994, + 0.3963, + 0.3954, + 0.3882, + 0.3978, + 0.3919, + 0.3928, + 0.3909, + 0.3966, + 0.3986, + 0.3939, + 0.4016, + 0.3973, + 0.397, + 0.4019, + 0.4004, + 0.4018, + 0.4052, + 0.3951, + 0.4016, + 0.396, + 0.4024, + 0.4075, + 0.4051, + 0.4025, + 0.4037, + 0.4041, + 0.4061, + 0.4056, + 0.4041, + 0.4009, + 0.405, + 0.399, + 0.4012, + 0.4076, + 0.4079, + 0.4096, + 0.407, + 0.4049, + 0.4059, + 0.4026, + 0.406, + 0.4048, + 0.4027, + 0.407, + 0.4022, + 0.4067, + 0.4063, + 0.4062, + 0.4052, + 0.4059, + 0.4068, + 0.4063, + 0.4076, + 0.4063, + 0.4065, + 0.4062, + 0.4069, + 0.4069, + 0.4067 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.027296170592308044, + 0.04133676737546921, + 0.08515161275863647, + 0.026729261502623558, + 0.01583785191178322, + -0.025018498301506042, + -0.044713616371154785, + -0.05714074894785881, + -0.040867358446121216, + 0.012529742904007435, + -0.027012888342142105, + 0.9971734881401062 + ], + "perturbation_rho": [ + 0.053132910281419754, + -0.014070656150579453, + 0.02090507186949253, + 0.005164717324078083, + -0.031424831598997116, + -0.010921415872871876, + -0.005948010832071304, + 0.01678430661559105, + -0.00991929043084383, + -0.006591915152966976, + -0.010621514171361923, + -0.019635088741779327 + ], + "nudging": { + "0.001": [ + -2.4188775569200516e-06, + -2.6740599423646927e-07, + -1.7171259969472885e-07, + -1.0791700333356857e-07, + -3.4458935260772705e-08, + -4.493631422519684e-08, + 5.0407834351062775e-08, + 1.2456439435482025e-08, + 9.19681042432785e-09, + -1.6065314412117004e-08, + 2.444721758365631e-08, + -6.272457540035248e-07 + ], + "0.003": [ + -7.179100066423416e-06, + -6.825430318713188e-07, + -5.516340024769306e-07, + -1.1979136615991592e-07, + -2.8172507882118225e-08, + -1.9907020032405853e-08, + 1.2316741049289703e-07, + 7.264316082000732e-08, + 9.010545909404755e-08, + -2.7706846594810486e-08, + 9.778887033462524e-09, + -2.26777046918869e-06 + ], + "0.01": [ + -2.3640692234039307e-05, + -2.3529864847660065e-06, + -1.5455298125743866e-06, + -3.109453245997429e-07, + -1.5425030142068863e-07, + 1.0291114449501038e-07, + 3.3457763493061066e-07, + 3.76836396753788e-07, + 3.371387720108032e-07, + -1.2211967259645462e-07, + 1.5588011592626572e-07, + -8.008093573153019e-06 + ] + }, + "hidden_norms_per_layer": [ + 6886.94775390625, + 87584.2109375, + 126785.4921875, + 355667.78125, + 552057.5625, + 850422.75, + 1293051.0, + 1541609.25, + 1868753.125, + 2007669.375, + 2071472.125, + 2260452.75, + 1769309.25 + ], + "bp_grad_norms_per_layer": [ + 3.3799024095060304e-05, + 2.5621802706154995e-06, + 8.161263167494326e-07, + 4.5329542786021193e-07, + 4.0827075054039597e-07, + 4.0655893940311216e-07, + 4.057951343838795e-07, + 4.061944878230861e-07, + 4.055657427670667e-07, + 4.0522803601561463e-07, + 4.0508828647034534e-07, + 4.0517943489248864e-07, + 3.946254025777307e-07 + ] + }, + "drift": { + "embed.weight": 46.66262768106957, + "embed.bias": 12.349060830406593, + "blocks.0.ln.weight": 1.186887811746546, + "blocks.0.w1.weight": 16.691636363051998, + "blocks.0.w1.bias": 10.396716096626863, + "blocks.0.w2.weight": 53.63063338700071, + "blocks.1.ln.weight": 0.9868002811700606, + "blocks.1.w1.weight": 18.44033798724087, + "blocks.1.w1.bias": 4.605202402802353, + "blocks.1.w2.weight": 52.226844329919246, + "blocks.2.ln.weight": 1.0614278526441627, + "blocks.2.w1.weight": 19.34190693735112, + "blocks.2.w1.bias": 11.075717016343752, + "blocks.2.w2.weight": 42.37755333929534, + "blocks.3.ln.weight": 0.8798892347084698, + "blocks.3.w1.weight": 19.160031225604996, + "blocks.3.w1.bias": 13.88384021345869, + "blocks.3.w2.weight": 33.30996859649038, + "blocks.4.ln.weight": 0.7104088303381113, + "blocks.4.w1.weight": 18.209430962072435, + "blocks.4.w1.bias": 16.92336837763847, + "blocks.4.w2.weight": 28.747777526920554, + "blocks.5.ln.weight": 0.7207815914026496, + "blocks.5.w1.weight": 20.407344689317952, + "blocks.5.w1.bias": 19.928474035997958, + "blocks.5.w2.weight": 30.668259780333205, + "blocks.6.ln.weight": 0.6859569564344036, + "blocks.6.w1.weight": 20.174584059207987, + "blocks.6.w1.bias": 20.229323693427677, + "blocks.6.w2.weight": 30.149329891021953, + "blocks.7.ln.weight": 0.7233680592463317, + "blocks.7.w1.weight": 21.7410787215414, + "blocks.7.w1.bias": 22.40005573271207, + "blocks.7.w2.weight": 28.827371299185597, + "blocks.8.ln.weight": 0.6511450076677763, + "blocks.8.w1.weight": 17.427880699003644, + "blocks.8.w1.bias": 16.729590061303803, + "blocks.8.w2.weight": 28.2153738883236, + "blocks.9.ln.weight": 0.6038210923569808, + "blocks.9.w1.weight": 16.214819848855626, + "blocks.9.w1.bias": 15.839234916532192, + "blocks.9.w2.weight": 28.312285773435068, + "blocks.10.ln.weight": 0.6503320370929363, + "blocks.10.w1.weight": 19.037981824154166, + "blocks.10.w1.bias": 18.568708970692136, + "blocks.10.w2.weight": 20.586050451276414, + "blocks.11.ln.weight": 0.5703826672599819, + "blocks.11.w1.weight": 16.05034192522954, + "blocks.11.w1.bias": 14.75267875282651, + "blocks.11.w2.weight": 45.75154443737838, + "out_ln.weight": 0.36468260935864044, + "out_head.weight": 6.277188782444725, + "out_head.bias": 0.6248347589534139 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 12, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 1 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L12_seed1", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L12_seed2/results_cifar10.json b/results/fa_dfa_d512_L12_seed2/results_cifar10.json new file mode 100644 index 0000000..68c6e2b --- /dev/null +++ b/results/fa_dfa_d512_L12_seed2/results_cifar10.json @@ -0,0 +1,969 @@ +{ + "2": { + "dfa": { + "log": { + "train_loss": [ + 2.067472825927734, + 2.050606523780823, + 2.0464368284606933, + 2.042041470413208, + 2.044630073509216, + 2.0444898017120363, + 2.0405262368774415, + 2.035152849884033, + 2.0376771257019044, + 2.03509138168335, + 2.0322119396972655, + 2.033472604637146, + 2.033374984664917, + 2.029572864456177, + 2.0276870463562013, + 2.0302749423980715, + 2.027638416900635, + 2.0297894455718994, + 2.0263528815078735, + 2.0278661029052736, + 2.0263561694335936, + 2.025871162261963, + 2.0294278043365477, + 2.0267604162979125, + 2.023654881286621, + 2.0251026361846924, + 2.023718124923706, + 2.023326771583557, + 2.0232533959960937, + 2.0244881047821046, + 2.0223184526062012, + 2.021666586151123, + 2.0216548426818846, + 2.01917991065979, + 2.021205049819946, + 2.019787854347229, + 2.0205126093292236, + 2.019609580001831, + 2.021551597328186, + 2.0192294607543944, + 2.019204094810486, + 2.0193405352020264, + 2.0203353549957277, + 2.0184646337127687, + 2.0181322897338867, + 2.019681806564331, + 2.0184515984344484, + 2.019875436248779, + 2.019927127380371, + 2.0191176706695555, + 2.016925164756775, + 2.0216545279693605, + 2.01788885345459, + 2.0193467868804933, + 2.0155374671936035, + 2.0189524417114257, + 2.018774903793335, + 2.0179057373046874, + 2.0154743661117553, + 2.016519417648315, + 2.0184439277648925, + 2.017523434753418, + 2.017258151779175, + 2.018665143585205, + 2.0169664744567872, + 2.016487240142822, + 2.01617966884613, + 2.0159906078338623, + 2.0176960159683226, + 2.015817363433838, + 2.014662687149048, + 2.0164543737030027, + 2.0170937897109984, + 2.017375130081177, + 2.0153453800964356, + 2.015449415817261, + 2.0149039567565916, + 2.0142875480651856, + 2.0150826512145996, + 2.0165929887390135, + 2.015268103866577, + 2.013780555686951, + 2.0156544293212892, + 2.0130539707183837, + 2.015054910621643, + 2.0157424531555175, + 2.0142131992340087, + 2.016000519104004, + 2.014644538726807, + 2.0129901065826417, + 2.014485344467163, + 2.015419366531372, + 2.014306447067261, + 2.013788412322998, + 2.014239661521912, + 2.0151051541137694, + 2.0142657162475586, + 2.015102813682556, + 2.0133784993743897, + 2.015375001487732 + ], + "train_acc": [ + 0.23898, + 0.24246, + 0.24622, + 0.24884, + 0.24668, + 0.2485, + 0.24792, + 0.25306, + 0.24956, + 0.25484, + 0.25486, + 0.2526, + 0.2561, + 0.256, + 0.25878, + 0.25726, + 0.2586, + 0.25762, + 0.25946, + 0.25782, + 0.26002, + 0.2601, + 0.25784, + 0.2578, + 0.26152, + 0.2606, + 0.26106, + 0.25782, + 0.25852, + 0.25776, + 0.26318, + 0.26254, + 0.26198, + 0.2636, + 0.26256, + 0.26578, + 0.2624, + 0.26176, + 0.25926, + 0.26508, + 0.26478, + 0.26082, + 0.26404, + 0.26548, + 0.26746, + 0.26278, + 0.26414, + 0.26246, + 0.26348, + 0.26282, + 0.26394, + 0.26348, + 0.26624, + 0.26506, + 0.26554, + 0.26198, + 0.26362, + 0.26472, + 0.26728, + 0.26684, + 0.26632, + 0.26602, + 0.26588, + 0.26584, + 0.26636, + 0.2642, + 0.26606, + 0.26868, + 0.26574, + 0.26688, + 0.267, + 0.2633, + 0.26504, + 0.26764, + 0.26732, + 0.26612, + 0.26802, + 0.26864, + 0.26808, + 0.26662, + 0.26826, + 0.26906, + 0.2658, + 0.26706, + 0.2689, + 0.26686, + 0.26868, + 0.2686, + 0.26812, + 0.26678, + 0.26878, + 0.26654, + 0.26678, + 0.2667, + 0.26656, + 0.26746, + 0.26736, + 0.26788, + 0.26794, + 0.27014 + ], + "test_acc": [ + 0.2486, + 0.2615, + 0.2803, + 0.2745, + 0.2627, + 0.2561, + 0.2643, + 0.2592, + 0.2832, + 0.2903, + 0.2737, + 0.2757, + 0.2706, + 0.2847, + 0.2742, + 0.2822, + 0.276, + 0.2661, + 0.2705, + 0.2742, + 0.2755, + 0.2899, + 0.2929, + 0.2661, + 0.2857, + 0.2777, + 0.2783, + 0.2498, + 0.287, + 0.2912, + 0.2833, + 0.2801, + 0.2912, + 0.2897, + 0.2936, + 0.2849, + 0.2874, + 0.2957, + 0.2753, + 0.2862, + 0.2964, + 0.2934, + 0.289, + 0.29, + 0.2951, + 0.2861, + 0.2858, + 0.2867, + 0.2885, + 0.3027, + 0.2822, + 0.2847, + 0.2933, + 0.2899, + 0.286, + 0.2958, + 0.2974, + 0.2957, + 0.288, + 0.2878, + 0.2944, + 0.2885, + 0.2913, + 0.2942, + 0.2965, + 0.2943, + 0.3, + 0.2924, + 0.2977, + 0.2917, + 0.2951, + 0.2848, + 0.2943, + 0.2944, + 0.2874, + 0.2901, + 0.2969, + 0.29, + 0.2948, + 0.2954, + 0.2894, + 0.2948, + 0.2908, + 0.2922, + 0.2904, + 0.2903, + 0.2947, + 0.2926, + 0.2912, + 0.292, + 0.2929, + 0.2922, + 0.2941, + 0.2952, + 0.2927, + 0.2937, + 0.2934, + 0.2933, + 0.2933, + 0.2933 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3273237347602844, + 0.0001051856525009498, + -2.771663639578037e-05, + -0.0005365631077438593, + 0.0003001387231051922, + -7.278185512404889e-05, + -0.00019034843717236072, + -0.00018735270714387298, + -0.0003543527564033866, + -0.0005275406292639673, + -0.000327416870277375, + -0.00032380438642576337 + ], + "perturbation_rho": [ + -0.01584552228450775, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.3387914299964905e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -8.740462362766266e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -2.7050264179706573e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 55119.203125, + 2542876672.0, + 5336881152.0, + 6906983936.0, + 7084125696.0, + 9782456320.0, + 9881924608.0, + 10504348672.0, + 10572271616.0, + 11673710592.0, + 12703504384.0, + 12870231040.0, + 13115667456.0 + ], + "bp_grad_norms_per_layer": [ + 1.9967485798133566e-07, + 2.110904229191135e-10, + 2.0921038512700108e-10, + 2.0923462962230133e-10, + 2.092699208366966e-10, + 2.0928372923556537e-10, + 2.0927498622924645e-10, + 2.0925521038162032e-10, + 2.0925566834861797e-10, + 2.0928303534617498e-10, + 2.0925450261444212e-10, + 2.093413220549678e-10, + 2.0937052092051545e-10 + ] + }, + "drift": { + "embed.weight": 355.47197791509984, + "embed.bias": 325.4137572298477, + "blocks.0.ln.weight": 10.183073943359524, + "blocks.0.w1.weight": 337.53798007019327, + "blocks.0.w1.bias": 368.2919019457867, + "blocks.0.w2.weight": 502.2465316680103, + "blocks.1.ln.weight": 9.790888407534407, + "blocks.1.w1.weight": 399.8591225924345, + "blocks.1.w1.bias": 381.91784742604096, + "blocks.1.w2.weight": 400.78281765228365, + "blocks.2.ln.weight": 9.816270926016012, + "blocks.2.w1.weight": 404.058717958719, + "blocks.2.w1.bias": 370.3941989263201, + "blocks.2.w2.weight": 389.67673069887354, + "blocks.3.ln.weight": 7.767040540127046, + "blocks.3.w1.weight": 279.00825869080103, + "blocks.3.w1.bias": 255.25224285416232, + "blocks.3.w2.weight": 275.49641344672904, + "blocks.4.ln.weight": 10.807571563453388, + "blocks.4.w1.weight": 443.61612820167835, + "blocks.4.w1.bias": 410.34345299471363, + "blocks.4.w2.weight": 435.1254085598178, + "blocks.5.ln.weight": 7.222077733457088, + "blocks.5.w1.weight": 277.46785316158423, + "blocks.5.w1.bias": 253.23890247800753, + "blocks.5.w2.weight": 256.5840082126155, + "blocks.6.ln.weight": 8.723424030262112, + "blocks.6.w1.weight": 347.79770296240184, + "blocks.6.w1.bias": 330.68249616268605, + "blocks.6.w2.weight": 337.991009055742, + "blocks.7.ln.weight": 6.095259128522855, + "blocks.7.w1.weight": 228.22905887133265, + "blocks.7.w1.bias": 207.27947768539244, + "blocks.7.w2.weight": 218.0278988490447, + "blocks.8.ln.weight": 10.272236445847241, + "blocks.8.w1.weight": 411.857394629554, + "blocks.8.w1.bias": 374.6759208224197, + "blocks.8.w2.weight": 382.12457040788746, + "blocks.9.ln.weight": 10.509629754259423, + "blocks.9.w1.weight": 419.2076511149967, + "blocks.9.w1.bias": 382.6992093043317, + "blocks.9.w2.weight": 400.5745998142284, + "blocks.10.ln.weight": 8.353122487257533, + "blocks.10.w1.weight": 326.130308345784, + "blocks.10.w1.bias": 311.08525380018006, + "blocks.10.w2.weight": 293.42915037063676, + "blocks.11.ln.weight": 9.253648926692552, + "blocks.11.w1.weight": 374.4659124025003, + "blocks.11.w1.bias": 354.20131257586644, + "blocks.11.w2.weight": 347.73755162930524, + "out_ln.weight": 0.6640411848895453, + "out_head.weight": 10.723191480707335, + "out_head.bias": 0.5785102998287991 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0563817890930176, + 1.9627269606781006, + 1.9333056550598144, + 1.9114726235198976, + 1.8997531073379517, + 1.8861957403945924, + 1.87444166015625, + 1.8636801738739013, + 1.8639895972442626, + 1.859206601295471, + 1.8560995433807372, + 1.8580559069824218, + 1.857329437828064, + 1.8486979720687866, + 1.8436245847320556, + 1.8427142208480836, + 1.8344886156463622, + 1.8326412610626222, + 1.8245765609741211, + 1.828027060470581, + 1.825153868484497, + 1.8163878149795532, + 1.824076413230896, + 1.81121192653656, + 1.8073615282821656, + 1.8063795041275024, + 1.8070646337509155, + 1.8042147310638428, + 1.8005332668685914, + 1.7986898645401002, + 1.8020886263275147, + 1.7947696161270141, + 1.7940981402206422, + 1.7886089331436157, + 1.7890249035263062, + 1.7892986505889892, + 1.7865815286636353, + 1.7821338254547119, + 1.7815338650512695, + 1.7780959701919556, + 1.7746984979629516, + 1.7739060324478149, + 1.7781290399169922, + 1.7747572018051148, + 1.7699797783660889, + 1.7655833713150024, + 1.767660176963806, + 1.7649168865203857, + 1.762353595352173, + 1.7620251220321654, + 1.7568823919296264, + 1.7637593435668946, + 1.758452812423706, + 1.7617782683944703, + 1.75097875831604, + 1.7536098889160157, + 1.7537783139801026, + 1.7496310552597045, + 1.7481205722045898, + 1.746379077758789, + 1.748953713645935, + 1.7454774743652344, + 1.7429656470108033, + 1.7443698706436157, + 1.7438769147491455, + 1.741790360183716, + 1.7384765851593018, + 1.737857723007202, + 1.7372428884124755, + 1.7377281158828735, + 1.7380989352416991, + 1.7357493377685547, + 1.7356955800628662, + 1.7379692654418946, + 1.7327075168609618, + 1.7319425884246826, + 1.7306698516082764, + 1.7282980773544312, + 1.7296418505477906, + 1.7297362589263916, + 1.728454496421814, + 1.7303657376098633, + 1.7310565896987915, + 1.7265128871917725, + 1.7292877883911133, + 1.7294651891708375, + 1.7270633559417725, + 1.7284353275299071, + 1.7280301824188233, + 1.7230924905776976, + 1.7238229986190796, + 1.7292125368499756, + 1.7265778392791749, + 1.727167644920349, + 1.7264636569595337, + 1.7247360497283934, + 1.7212902561187744, + 1.7221066110229493, + 1.7266186252212525, + 1.7269180416488648 + ], + "train_acc": [ + 0.24122, + 0.28436, + 0.299, + 0.308, + 0.31228, + 0.31872, + 0.32314, + 0.32812, + 0.32904, + 0.33368, + 0.33254, + 0.33406, + 0.33382, + 0.33696, + 0.33812, + 0.33908, + 0.34066, + 0.34344, + 0.34602, + 0.3438, + 0.34686, + 0.3481, + 0.3424, + 0.3496, + 0.3543, + 0.35444, + 0.35368, + 0.35296, + 0.35348, + 0.35554, + 0.35894, + 0.3566, + 0.35682, + 0.36212, + 0.35982, + 0.35886, + 0.36128, + 0.3635, + 0.36448, + 0.36314, + 0.36606, + 0.3656, + 0.36578, + 0.36614, + 0.36982, + 0.36962, + 0.37046, + 0.37016, + 0.36992, + 0.37214, + 0.3727, + 0.37368, + 0.37504, + 0.37302, + 0.37496, + 0.37264, + 0.37418, + 0.37714, + 0.37492, + 0.37612, + 0.377, + 0.37596, + 0.37848, + 0.37688, + 0.37832, + 0.3779, + 0.37974, + 0.38196, + 0.38234, + 0.37998, + 0.38076, + 0.37942, + 0.37988, + 0.38244, + 0.38388, + 0.38314, + 0.38488, + 0.38358, + 0.3841, + 0.38562, + 0.38478, + 0.3818, + 0.38364, + 0.38524, + 0.38452, + 0.38266, + 0.38434, + 0.38448, + 0.38532, + 0.38844, + 0.38614, + 0.38436, + 0.3866, + 0.38558, + 0.38382, + 0.38576, + 0.38752, + 0.3862, + 0.3878, + 0.38596 + ], + "test_acc": [ + 0.2906, + 0.315, + 0.3412, + 0.3441, + 0.3313, + 0.3339, + 0.3449, + 0.3436, + 0.3633, + 0.3441, + 0.359, + 0.3384, + 0.3562, + 0.3694, + 0.3664, + 0.3678, + 0.3625, + 0.3697, + 0.3793, + 0.3753, + 0.3826, + 0.3718, + 0.381, + 0.3826, + 0.3821, + 0.3792, + 0.3812, + 0.3575, + 0.3868, + 0.382, + 0.383, + 0.3859, + 0.3914, + 0.3883, + 0.3907, + 0.3941, + 0.3969, + 0.3849, + 0.387, + 0.3904, + 0.3945, + 0.3886, + 0.3923, + 0.3966, + 0.3963, + 0.3937, + 0.395, + 0.3874, + 0.3891, + 0.3962, + 0.3873, + 0.3903, + 0.3954, + 0.3911, + 0.3956, + 0.3948, + 0.3985, + 0.3938, + 0.3964, + 0.3949, + 0.398, + 0.3955, + 0.3973, + 0.3924, + 0.3965, + 0.4008, + 0.3918, + 0.4006, + 0.3975, + 0.3976, + 0.3974, + 0.3984, + 0.4018, + 0.3991, + 0.3957, + 0.4003, + 0.4007, + 0.4003, + 0.4027, + 0.4018, + 0.4003, + 0.4011, + 0.3991, + 0.4017, + 0.4012, + 0.4021, + 0.4007, + 0.4004, + 0.4021, + 0.4026, + 0.4015, + 0.4035, + 0.4023, + 0.4022, + 0.4028, + 0.4034, + 0.4025, + 0.4021, + 0.4025, + 0.4025 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.018669456243515015, + 0.056162357330322266, + -0.0088431341573596, + -0.008182319812476635, + -0.055274538695812225, + -0.005170345772057772, + 0.014380814507603645, + -0.0468558594584465, + -0.06233042851090431, + -0.04816172271966934, + -0.019947674125432968, + 0.9990314245223999 + ], + "perturbation_rho": [ + 0.03642716258764267, + 0.02892448753118515, + 0.03735572472214699, + -0.00921887531876564, + -0.014430028386414051, + 0.03254326060414314, + 0.005076523870229721, + 0.0074741230346262455, + 0.035068579018116, + 0.027692969888448715, + -0.0010530222207307816, + 0.018331632018089294 + ], + "nudging": { + "0.001": [ + -1.2880191206932068e-06, + -2.0337756723165512e-07, + -1.0291114449501038e-07, + -6.391201168298721e-08, + 8.12578946352005e-08, + -4.307366907596588e-09, + 9.313225746154785e-09, + 1.1568772606551647e-07, + 6.877235136926174e-08, + 4.470348358154297e-08, + 2.6775524020195007e-08, + -1.1273659765720367e-06 + ], + "0.003": [ + -3.7905119825154543e-06, + -6.791669875383377e-07, + -9.001814760267735e-08, + -7.08096195012331e-08, + 2.7276109904050827e-07, + -5.8818841353058815e-08, + -9.292853064835072e-08, + 2.6039197109639645e-07, + 2.7110218070447445e-07, + 1.3009412214159966e-07, + 6.56291376799345e-08, + -4.065892426297069e-06 + ], + "0.01": [ + -1.25557417050004e-05, + -2.2551976144313812e-06, + 8.774804882705212e-08, + -2.8230715543031693e-09, + 8.539936970919371e-07, + -6.600748747587204e-08, + -3.3914693631231785e-07, + 7.495400495827198e-07, + 8.926435839384794e-07, + 5.459296517074108e-07, + 2.251181285828352e-07, + -1.443939981982112e-05 + ] + }, + "hidden_norms_per_layer": [ + 6788.72314453125, + 99873.7890625, + 239906.21875, + 498097.5, + 936403.5, + 1120419.5, + 1148044.0, + 1243337.25, + 1465607.625, + 1820056.25, + 1916954.25, + 2080373.375, + 1057581.375 + ], + "bp_grad_norms_per_layer": [ + 3.21922343573533e-05, + 1.926036247823504e-06, + 7.882048862484226e-07, + 6.652410888818849e-07, + 6.507269176836417e-07, + 6.543205017806031e-07, + 6.41887766050786e-07, + 6.365870603985968e-07, + 6.355120376611012e-07, + 6.336023261610535e-07, + 6.346273266899516e-07, + 6.351577326313418e-07, + 6.350035732793913e-07 + ] + }, + "drift": { + "embed.weight": 47.675119188037286, + "embed.bias": 12.598388666523805, + "blocks.0.ln.weight": 1.151033318517311, + "blocks.0.w1.weight": 16.377677244547872, + "blocks.0.w1.bias": 12.516910644554024, + "blocks.0.w2.weight": 52.57068443004618, + "blocks.1.ln.weight": 0.9655458779833542, + "blocks.1.w1.weight": 18.881599150212853, + "blocks.1.w1.bias": 9.530569072308776, + "blocks.1.w2.weight": 43.92118513197058, + "blocks.2.ln.weight": 0.7887174233304558, + "blocks.2.w1.weight": 18.824409898016874, + "blocks.2.w1.bias": 13.563387947054077, + "blocks.2.w2.weight": 31.07735174337715, + "blocks.3.ln.weight": 0.8171227981326923, + "blocks.3.w1.weight": 19.539674249572744, + "blocks.3.w1.bias": 18.146478276910425, + "blocks.3.w2.weight": 35.53729462516465, + "blocks.4.ln.weight": 0.6263369507412071, + "blocks.4.w1.weight": 16.92487352504901, + "blocks.4.w1.bias": 15.976801095115205, + "blocks.4.w2.weight": 29.94254311653644, + "blocks.5.ln.weight": 0.6487257363749984, + "blocks.5.w1.weight": 17.08776989967922, + "blocks.5.w1.bias": 11.932105261833147, + "blocks.5.w2.weight": 57.38022751451892, + "blocks.6.ln.weight": 0.6775998262875462, + "blocks.6.w1.weight": 18.367003078140872, + "blocks.6.w1.bias": 14.665116127192782, + "blocks.6.w2.weight": 54.62506371390311, + "blocks.7.ln.weight": 0.7036757447327185, + "blocks.7.w1.weight": 19.393650716681655, + "blocks.7.w1.bias": 17.4299147560077, + "blocks.7.w2.weight": 46.77523522935725, + "blocks.8.ln.weight": 0.7270809437825937, + "blocks.8.w1.weight": 21.877072467037458, + "blocks.8.w1.bias": 19.917426863463785, + "blocks.8.w2.weight": 42.759258263025565, + "blocks.9.ln.weight": 0.6050917355676333, + "blocks.9.w1.weight": 17.155375601849993, + "blocks.9.w1.bias": 15.811293825336426, + "blocks.9.w2.weight": 33.7630968788062, + "blocks.10.ln.weight": 0.6513788383873166, + "blocks.10.w1.weight": 18.151371658733705, + "blocks.10.w1.bias": 17.557942417321982, + "blocks.10.w2.weight": 38.33917690306304, + "blocks.11.ln.weight": 0.6877634546861944, + "blocks.11.w1.weight": 19.58584700755457, + "blocks.11.w1.bias": 19.31054232174018, + "blocks.11.w2.weight": 38.09422640487883, + "out_ln.weight": 0.3216566822185731, + "out_head.weight": 6.321448993543088, + "out_head.bias": 1.6265612863801873 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 12, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 2 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L12_seed2", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L12_seed3/results_cifar10.json b/results/fa_dfa_d512_L12_seed3/results_cifar10.json new file mode 100644 index 0000000..b0d47db --- /dev/null +++ b/results/fa_dfa_d512_L12_seed3/results_cifar10.json @@ -0,0 +1,969 @@ +{ + "3": { + "dfa": { + "log": { + "train_loss": [ + 2.0758749111938477, + 2.0449016705703738, + 2.0355339753723145, + 2.033782032623291, + 2.0313166973876955, + 2.0272680778503416, + 2.0246013010406494, + 2.0257189083099365, + 2.0217166822052004, + 2.0203594020080566, + 2.0174152685165407, + 2.018405201034546, + 2.0147499544525145, + 2.014394051055908, + 2.016394521636963, + 2.012079556236267, + 2.0137909897613526, + 2.014015982284546, + 2.0109746593475344, + 2.014003984375, + 2.0116691515350342, + 2.0092717248535155, + 2.008394868850708, + 2.0091352093887327, + 2.0081222701644896, + 2.008312914390564, + 2.0076249710464475, + 2.007539535140991, + 2.006514790344238, + 2.0067833641052246, + 2.002747939300537, + 2.0058067621612548, + 2.0007595862197878, + 2.0042390581512453, + 2.0029916175842284, + 2.0036084170150756, + 2.000785510559082, + 2.0037371703720095, + 2.001269952163696, + 2.0031689390563967, + 2.002326145706177, + 2.0042856772232054, + 2.001963946914673, + 1.9995613592147827, + 2.000965595703125, + 2.0001352091979983, + 2.0018488079071046, + 2.00168183052063, + 2.002008575668335, + 1.9990670106506347, + 2.000030261154175, + 2.0025694244766234, + 2.000111082458496, + 2.0006912731170656, + 1.9988810729980468, + 1.9988630979156494, + 1.9988078353118897, + 1.9998760187530518, + 1.999488633041382, + 1.9987801266479492, + 2.0005314921569823, + 1.9992797033691405, + 1.9988488864135743, + 1.9994260061645508, + 1.99964618309021, + 1.997838856201172, + 1.9974179007339476, + 1.9974302843475342, + 1.9990705501556396, + 1.9974287271499633, + 1.9969496033477783, + 1.9965652766418458, + 1.9951063619613647, + 1.997466404724121, + 1.9966685424804687, + 1.9972340461730957, + 1.9965911829376222, + 1.9977326393127441, + 1.9962291263580323, + 1.9955562910461426, + 1.9967020976257324, + 1.9967882759094238, + 1.9950329151916504, + 1.99679522026062, + 1.9955974411773683, + 1.9971681539154054, + 1.996667978439331, + 1.99505258934021, + 1.9948117360687256, + 1.9953517591094971, + 1.994836011428833, + 1.9956397792816163, + 1.9950232530212402, + 1.9973082242584228, + 1.9949973779296875, + 1.995920922012329, + 1.9955849953460694, + 1.9965253795623779, + 1.995772368774414, + 1.9941626037216187 + ], + "train_acc": [ + 0.2279, + 0.24072, + 0.24646, + 0.24854, + 0.25044, + 0.25614, + 0.25482, + 0.25184, + 0.25618, + 0.25788, + 0.25794, + 0.25986, + 0.25826, + 0.26054, + 0.26198, + 0.26164, + 0.26276, + 0.26056, + 0.26338, + 0.26482, + 0.26374, + 0.27, + 0.26544, + 0.26496, + 0.26726, + 0.26608, + 0.2677, + 0.26784, + 0.26822, + 0.26688, + 0.27298, + 0.2674, + 0.27316, + 0.26942, + 0.27202, + 0.2707, + 0.27076, + 0.2708, + 0.27202, + 0.27326, + 0.27412, + 0.27078, + 0.27212, + 0.27422, + 0.27278, + 0.27348, + 0.27218, + 0.27472, + 0.27368, + 0.27624, + 0.2752, + 0.27146, + 0.27238, + 0.27332, + 0.27558, + 0.27502, + 0.274, + 0.27478, + 0.2765, + 0.27494, + 0.2753, + 0.2748, + 0.27594, + 0.27726, + 0.27356, + 0.27716, + 0.27596, + 0.27972, + 0.277, + 0.27616, + 0.27622, + 0.2769, + 0.27584, + 0.27614, + 0.27692, + 0.27662, + 0.27758, + 0.27612, + 0.27734, + 0.27778, + 0.2763, + 0.2761, + 0.2776, + 0.27728, + 0.27692, + 0.27838, + 0.27818, + 0.2777, + 0.27922, + 0.27792, + 0.27694, + 0.27868, + 0.27922, + 0.2776, + 0.27966, + 0.27774, + 0.28024, + 0.27544, + 0.27852, + 0.28028 + ], + "test_acc": [ + 0.2245, + 0.2534, + 0.2569, + 0.2594, + 0.2706, + 0.2666, + 0.2759, + 0.276, + 0.2493, + 0.2911, + 0.2656, + 0.2816, + 0.2759, + 0.2916, + 0.302, + 0.2776, + 0.2854, + 0.2814, + 0.2838, + 0.2796, + 0.2881, + 0.2831, + 0.2907, + 0.2949, + 0.2872, + 0.2857, + 0.3067, + 0.301, + 0.2915, + 0.2936, + 0.2727, + 0.2971, + 0.2768, + 0.2756, + 0.2752, + 0.2918, + 0.2942, + 0.2706, + 0.2698, + 0.3002, + 0.2978, + 0.2896, + 0.2879, + 0.2922, + 0.2897, + 0.2955, + 0.2922, + 0.2979, + 0.3049, + 0.2838, + 0.2864, + 0.2929, + 0.3024, + 0.2947, + 0.2939, + 0.3001, + 0.2966, + 0.2875, + 0.288, + 0.2917, + 0.2948, + 0.2922, + 0.2926, + 0.2969, + 0.2958, + 0.2974, + 0.3028, + 0.2923, + 0.2926, + 0.2949, + 0.2849, + 0.2976, + 0.299, + 0.3005, + 0.291, + 0.297, + 0.2947, + 0.294, + 0.2974, + 0.2953, + 0.2958, + 0.293, + 0.292, + 0.3, + 0.2927, + 0.2969, + 0.2968, + 0.297, + 0.296, + 0.2958, + 0.2974, + 0.2971, + 0.2965, + 0.2957, + 0.2976, + 0.2961, + 0.2969, + 0.2969, + 0.2968, + 0.297 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3807877004146576, + -5.573713860940188e-05, + 0.00026205976610071957, + 5.7832341553876176e-05, + -0.0006758540403097868, + 9.82169367489405e-05, + -0.0004489597922656685, + 0.00012056646664859727, + 0.00039241870399564505, + 0.0004099192447029054, + -0.00038764585042372346, + 9.827437315834686e-05 + ], + "perturbation_rho": [ + 0.0020497534424066544, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.2549723982810974e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.0975636541843414e-06, + 0.0, + -5.122274160385132e-09, + -9.313225746154785e-10, + 0.0, + 0.0, + 0.0, + -2.7939677238464355e-09, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.623310476541519e-06, + 2.7939677238464355e-09, + -3.259629011154175e-09, + -2.7939677238464355e-09, + 0.0, + 0.0, + 0.0, + -1.862645149230957e-09, + -9.313225746154785e-10, + 0.0, + 0.0, + -1.862645149230957e-09 + ] + }, + "hidden_norms_per_layer": [ + 53572.55078125, + 1372884736.0, + 1772287744.0, + 3241988608.0, + 5805380608.0, + 6317006848.0, + 6381533184.0, + 6840990720.0, + 8109968896.0, + 8596752384.0, + 8790169600.0, + 9575925760.0, + 10357586944.0 + ], + "bp_grad_norms_per_layer": [ + 2.5510743739687314e-07, + 2.2633883656197895e-10, + 2.2594967563627222e-10, + 2.2604293437034073e-10, + 2.2608663552414754e-10, + 2.2616231110106355e-10, + 2.2606677640979456e-10, + 2.2607062055701732e-10, + 2.2602890392686703e-10, + 2.2608405425561529e-10, + 2.2606443106365504e-10, + 2.26052662699594e-10, + 2.2604719485119773e-10 + ] + }, + "drift": { + "embed.weight": 327.43241116948093, + "embed.bias": 223.24151222733428, + "blocks.0.ln.weight": 9.973600730927519, + "blocks.0.w1.weight": 298.8679532598482, + "blocks.0.w1.bias": 266.1095556704987, + "blocks.0.w2.weight": 476.45333568353897, + "blocks.1.ln.weight": 7.306781436470832, + "blocks.1.w1.weight": 230.21983409061758, + "blocks.1.w1.bias": 212.39608458469817, + "blocks.1.w2.weight": 257.8517804786634, + "blocks.2.ln.weight": 8.853410982440527, + "blocks.2.w1.weight": 338.8735798549029, + "blocks.2.w1.bias": 298.9157804791883, + "blocks.2.w2.weight": 337.76450040174166, + "blocks.3.ln.weight": 9.715802278340659, + "blocks.3.w1.weight": 404.66894924511104, + "blocks.3.w1.bias": 372.47402373621406, + "blocks.3.w2.weight": 404.8085148871707, + "blocks.4.ln.weight": 8.048402116007853, + "blocks.4.w1.weight": 327.8249969525614, + "blocks.4.w1.bias": 304.9268816131629, + "blocks.4.w2.weight": 317.1280523416376, + "blocks.5.ln.weight": 6.247399813356893, + "blocks.5.w1.weight": 221.81137144150296, + "blocks.5.w1.bias": 207.34062578724078, + "blocks.5.w2.weight": 219.43839912847176, + "blocks.6.ln.weight": 8.827579347133504, + "blocks.6.w1.weight": 332.6040891021855, + "blocks.6.w1.bias": 301.2387289035309, + "blocks.6.w2.weight": 301.27670926969427, + "blocks.7.ln.weight": 10.285108430777552, + "blocks.7.w1.weight": 406.88401472243066, + "blocks.7.w1.bias": 378.73147854832933, + "blocks.7.w2.weight": 385.868939931865, + "blocks.8.ln.weight": 8.800845432078118, + "blocks.8.w1.weight": 349.4923999527594, + "blocks.8.w1.bias": 314.4618082483143, + "blocks.8.w2.weight": 312.34600503490987, + "blocks.9.ln.weight": 7.894181737045302, + "blocks.9.w1.weight": 310.5073132382578, + "blocks.9.w1.bias": 286.9185161607037, + "blocks.9.w2.weight": 283.5451705498602, + "blocks.10.ln.weight": 9.369016654158887, + "blocks.10.w1.weight": 380.3135567962096, + "blocks.10.w1.bias": 366.5904487269434, + "blocks.10.w2.weight": 364.5707271107058, + "blocks.11.ln.weight": 9.823259210793506, + "blocks.11.w1.weight": 387.5149132383442, + "blocks.11.w1.bias": 358.0799340264483, + "blocks.11.w2.weight": 346.4609492966194, + "out_ln.weight": 0.671169228619093, + "out_head.weight": 9.171587508256877, + "out_head.bias": 0.43138812866351306 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0396657500457764, + 1.9608277139282226, + 1.9326482776641847, + 1.9163381662750245, + 1.9034836812591553, + 1.890145294265747, + 1.8797314615631104, + 1.8768412320327759, + 1.867796627883911, + 1.8607218304824829, + 1.8534223027801513, + 1.8497662923431397, + 1.8423005596923827, + 1.836013819656372, + 1.8322983874511718, + 1.8273781113052368, + 1.8277745475006104, + 1.8251262734985352, + 1.820411690635681, + 1.8177711023712158, + 1.8161355939102173, + 1.811638991355896, + 1.8096345615005494, + 1.809067364845276, + 1.8052070534896851, + 1.806075843887329, + 1.8016806897735596, + 1.8036705466079712, + 1.8012689702606202, + 1.7954118814086915, + 1.7932103118515015, + 1.7936329647064209, + 1.7884179528427124, + 1.7905398846817016, + 1.7887225045013428, + 1.7856135736846923, + 1.7844085126495361, + 1.7840350121307373, + 1.774809980545044, + 1.778959913368225, + 1.771520369796753, + 1.775222759399414, + 1.7740441353607177, + 1.7697435302734374, + 1.7660783060073852, + 1.7702557649993897, + 1.7655815365600587, + 1.767026780052185, + 1.7612031787490845, + 1.7588948153686523, + 1.7596543489837646, + 1.7577299001312257, + 1.7578089023590089, + 1.7566554468154907, + 1.7528888186264038, + 1.7549751110076903, + 1.7530736223983765, + 1.7534073949813842, + 1.7489603903198243, + 1.7487286296844482, + 1.7519352731704712, + 1.7486104052734375, + 1.7467161587142945, + 1.748152449569702, + 1.7439867239379883, + 1.7447851418304443, + 1.7409665993499757, + 1.7442577163314819, + 1.7420410083770752, + 1.7413771923446655, + 1.7416430898666382, + 1.7407120821762085, + 1.7343293948745728, + 1.7396172916412354, + 1.7371713684844972, + 1.7387296481323242, + 1.736112067642212, + 1.733493857727051, + 1.7360445062637329, + 1.7351057822036744, + 1.734473055152893, + 1.7352992589569092, + 1.7343194945907592, + 1.735215076599121, + 1.733106077194214, + 1.7345250820159912, + 1.7314182668304443, + 1.731066604309082, + 1.730784634399414, + 1.7321079010391236, + 1.7304316757583618, + 1.7294625887298585, + 1.730209889831543, + 1.7305309671401978, + 1.7275332722854615, + 1.7291330585098266, + 1.7289624264526366, + 1.7314536703109742, + 1.7267744286346436, + 1.7254689588165284 + ], + "train_acc": [ + 0.2449, + 0.27998, + 0.2977, + 0.30396, + 0.3107, + 0.31656, + 0.32312, + 0.31882, + 0.32358, + 0.3291, + 0.32752, + 0.33508, + 0.33836, + 0.33794, + 0.34144, + 0.34536, + 0.34406, + 0.34554, + 0.34712, + 0.34862, + 0.34952, + 0.35122, + 0.35074, + 0.35326, + 0.3523, + 0.35342, + 0.35338, + 0.35376, + 0.35524, + 0.35476, + 0.35774, + 0.3577, + 0.3623, + 0.3571, + 0.35802, + 0.36086, + 0.3604, + 0.36042, + 0.36226, + 0.3633, + 0.3655, + 0.36332, + 0.36392, + 0.3675, + 0.36718, + 0.36608, + 0.36696, + 0.36686, + 0.37226, + 0.37178, + 0.37022, + 0.3701, + 0.3706, + 0.37212, + 0.375, + 0.37474, + 0.37334, + 0.37296, + 0.37436, + 0.37478, + 0.37246, + 0.37528, + 0.37576, + 0.37532, + 0.37498, + 0.37548, + 0.3776, + 0.37842, + 0.37692, + 0.37798, + 0.37794, + 0.37798, + 0.38094, + 0.38068, + 0.37932, + 0.37848, + 0.38182, + 0.37976, + 0.38048, + 0.381, + 0.3807, + 0.37908, + 0.38194, + 0.38326, + 0.38306, + 0.38104, + 0.3811, + 0.3813, + 0.38256, + 0.38198, + 0.38276, + 0.3842, + 0.3819, + 0.38484, + 0.38298, + 0.38164, + 0.38186, + 0.38012, + 0.38642, + 0.38538 + ], + "test_acc": [ + 0.2636, + 0.3156, + 0.3187, + 0.3379, + 0.3384, + 0.3469, + 0.3529, + 0.3526, + 0.3431, + 0.3598, + 0.3512, + 0.3619, + 0.3655, + 0.3755, + 0.3804, + 0.3699, + 0.3784, + 0.3643, + 0.3669, + 0.3644, + 0.3728, + 0.376, + 0.3802, + 0.3798, + 0.388, + 0.3631, + 0.3848, + 0.3802, + 0.3861, + 0.3829, + 0.3808, + 0.3865, + 0.3601, + 0.3915, + 0.371, + 0.3833, + 0.3932, + 0.3933, + 0.3827, + 0.3896, + 0.394, + 0.3837, + 0.3944, + 0.388, + 0.3896, + 0.3983, + 0.3933, + 0.3988, + 0.3997, + 0.3956, + 0.3967, + 0.3981, + 0.4041, + 0.4021, + 0.3873, + 0.4015, + 0.4031, + 0.3996, + 0.4016, + 0.401, + 0.403, + 0.4044, + 0.4021, + 0.4006, + 0.4062, + 0.4035, + 0.4027, + 0.4066, + 0.4081, + 0.4059, + 0.4023, + 0.4111, + 0.4095, + 0.406, + 0.4046, + 0.4048, + 0.4081, + 0.4067, + 0.4088, + 0.4074, + 0.4045, + 0.4073, + 0.408, + 0.4073, + 0.4066, + 0.4076, + 0.4064, + 0.4063, + 0.4088, + 0.4087, + 0.4081, + 0.4092, + 0.4088, + 0.4086, + 0.4093, + 0.4103, + 0.4091, + 0.4094, + 0.4087, + 0.4088 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.033972106873989105, + 0.070818230509758, + 0.01647794246673584, + -0.04525557532906532, + -0.03256663307547569, + -0.03431400656700134, + 0.011294779367744923, + -0.034145478159189224, + -0.05482051521539688, + 0.018059976398944855, + -0.030339818447828293, + 0.9974545836448669 + ], + "perturbation_rho": [ + 0.06674668192863464, + 0.0040874360129237175, + -0.028121206909418106, + 0.033479828387498856, + -0.023402733728289604, + -0.02146291360259056, + 0.01769183948636055, + -0.01436183787882328, + 0.009981930255889893, + 0.04906001687049866, + 0.004424326121807098, + 0.01695885881781578 + ], + "nudging": { + "0.001": [ + -2.7818605303764343e-06, + -2.752931322902441e-07, + -6.530899554491043e-08, + -3.67872416973114e-08, + 3.2247044146060944e-08, + 4.889443516731262e-09, + -1.862645149230957e-09, + -4.237517714500427e-08, + -4.889443516731262e-09, + -1.3737007975578308e-08, + -2.7008354663848877e-08, + -6.719492375850677e-07 + ], + "0.003": [ + -8.350558346137404e-06, + -6.812333595007658e-07, + -1.474982127547264e-07, + 9.033828973770142e-08, + 5.844049155712128e-08, + 7.008202373981476e-08, + 6.984919309616089e-09, + 1.0186340659856796e-07, + 1.2223608791828156e-07, + -8.253846317529678e-08, + 7.729977369308472e-08, + -2.513494109734893e-06 + ], + "0.01": [ + -2.8048030799254775e-05, + -2.1727464627474546e-06, + -3.3923424780368805e-07, + 3.7439167499542236e-07, + 2.1606683731079102e-07, + 2.60770320892334e-07, + -9.790528565645218e-08, + 2.130400389432907e-07, + 3.862660378217697e-07, + -1.123407855629921e-07, + 2.7267378754913807e-07, + -8.881674148142338e-06 + ] + }, + "hidden_norms_per_layer": [ + 6954.1630859375, + 111134.6796875, + 557813.6875, + 996516.875, + 1403786.0, + 1685541.375, + 2023717.25, + 2137581.0, + 2278237.0, + 2302800.5, + 2343287.75, + 2358363.25, + 1859393.25 + ], + "bp_grad_norms_per_layer": [ + 2.7945565307163633e-05, + 1.218010652337398e-06, + 4.458847513433284e-07, + 4.3005411498597823e-07, + 4.3150785700163397e-07, + 4.261477215550258e-07, + 4.2959365487149626e-07, + 4.2929926280521613e-07, + 4.298903206745308e-07, + 4.3081271883238514e-07, + 4.2645430653465155e-07, + 4.254479506471398e-07, + 4.048590369620797e-07 + ] + }, + "drift": { + "embed.weight": 48.84206517073718, + "embed.bias": 16.190805729960193, + "blocks.0.ln.weight": 1.1044050790999422, + "blocks.0.w1.weight": 16.916577725001215, + "blocks.0.w1.bias": 13.172179767406938, + "blocks.0.w2.weight": 57.6138058078061, + "blocks.1.ln.weight": 1.006892864310832, + "blocks.1.w1.weight": 20.474460499736093, + "blocks.1.w1.bias": 14.809778597172114, + "blocks.1.w2.weight": 56.78242938569469, + "blocks.2.ln.weight": 0.7251084712385265, + "blocks.2.w1.weight": 20.378823416992166, + "blocks.2.w1.bias": 19.146090983258652, + "blocks.2.w2.weight": 43.602875378949854, + "blocks.3.ln.weight": 0.5778874616748163, + "blocks.3.w1.weight": 19.87346377423969, + "blocks.3.w1.bias": 20.45559649243161, + "blocks.3.w2.weight": 29.273505770749868, + "blocks.4.ln.weight": 0.5907735236527697, + "blocks.4.w1.weight": 19.310944988230343, + "blocks.4.w1.bias": 19.98019865055604, + "blocks.4.w2.weight": 35.158811303772694, + "blocks.5.ln.weight": 0.5664442970803276, + "blocks.5.w1.weight": 21.492887279178838, + "blocks.5.w1.bias": 23.514956599646894, + "blocks.5.w2.weight": 31.178552638993263, + "blocks.6.ln.weight": 0.5602955775395394, + "blocks.6.w1.weight": 19.61043120210454, + "blocks.6.w1.bias": 21.095333255326253, + "blocks.6.w2.weight": 35.811762604119366, + "blocks.7.ln.weight": 0.5625727296567063, + "blocks.7.w1.weight": 18.807090394262456, + "blocks.7.w1.bias": 21.21463438586343, + "blocks.7.w2.weight": 37.013283750347604, + "blocks.8.ln.weight": 0.6306802678482845, + "blocks.8.w1.weight": 18.641671150147772, + "blocks.8.w1.bias": 18.967183417640936, + "blocks.8.w2.weight": 48.3826747365419, + "blocks.9.ln.weight": 0.587358912368524, + "blocks.9.w1.weight": 16.64124632095154, + "blocks.9.w1.bias": 15.919474132420179, + "blocks.9.w2.weight": 52.31462590866729, + "blocks.10.ln.weight": 0.5508428821937401, + "blocks.10.w1.weight": 15.626845109574424, + "blocks.10.w1.bias": 15.545502528098213, + "blocks.10.w2.weight": 49.44591721207919, + "blocks.11.ln.weight": 0.5806268804343621, + "blocks.11.w1.weight": 16.372036995921366, + "blocks.11.w1.bias": 13.944650611793428, + "blocks.11.w2.weight": 55.112106178217005, + "out_ln.weight": 0.3649173251620146, + "out_head.weight": 6.863263504617185, + "out_head.bias": 0.881340072926514 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 12, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 3 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L12_seed3", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L12_seed4/results_cifar10.json b/results/fa_dfa_d512_L12_seed4/results_cifar10.json new file mode 100644 index 0000000..7eb8f8a --- /dev/null +++ b/results/fa_dfa_d512_L12_seed4/results_cifar10.json @@ -0,0 +1,969 @@ +{ + "4": { + "dfa": { + "log": { + "train_loss": [ + 2.0759089875793455, + 2.049546967468262, + 2.050352345352173, + 2.0449130432891844, + 2.0451894146728518, + 2.0407823961639404, + 2.031551725692749, + 2.0308942835998534, + 2.027821557159424, + 2.0279336640930175, + 2.030788267745972, + 2.027367984313965, + 2.023764753379822, + 2.0222284102630614, + 2.021787865447998, + 2.0210360749053957, + 2.020778656768799, + 2.0178001490020754, + 2.015317197265625, + 2.0181816483306885, + 2.0157879919433594, + 2.0184039527893067, + 2.0140462054443358, + 2.0133035342025756, + 2.0117346116638184, + 2.0124337507629395, + 2.015013382720947, + 2.0131209099578857, + 2.0119356175994874, + 2.0105034595489504, + 2.0114617578125, + 2.010921164550781, + 2.0091999086761474, + 2.0087246141052244, + 2.008987555999756, + 2.007724672088623, + 2.0079352700424193, + 2.0069879000854494, + 2.0099592947769165, + 2.0069592917633057, + 2.0085809383773805, + 2.007678118972778, + 2.006203598976135, + 2.0054306941986084, + 2.004828847351074, + 2.0060369828796385, + 2.0055570992279055, + 2.0053103733825686, + 2.004637515335083, + 2.0055320530700684, + 2.0035772978973387, + 2.0045177731323243, + 2.0053057807922365, + 2.004057818374634, + 2.0017144575500487, + 2.0030729360580444, + 2.002487395324707, + 2.0019396072387696, + 1.9998790828704833, + 2.003716873703003, + 2.001451046066284, + 1.9996818887329102, + 2.0031208934783935, + 2.0009771130371092, + 2.0052372956848146, + 2.000408243637085, + 2.0017312159729004, + 2.00103297542572, + 2.0006809278106688, + 1.999061644744873, + 1.9998556817626953, + 2.000588871498108, + 2.0007700952148437, + 2.001544753036499, + 2.000170311355591, + 2.0008300536346435, + 2.00009318069458, + 2.0018883587646483, + 1.9953095329284667, + 1.9990402968597412, + 1.9993244941711426, + 1.9997524154281616, + 1.9993522864151, + 1.999408567199707, + 2.000375003089905, + 2.0002292515563966, + 1.99836263092041, + 1.999543589744568, + 1.9990913265609742, + 1.9996358081436156, + 1.9965649541854857, + 1.9988786743164062, + 2.000610784988403, + 1.9997782401275634, + 1.9977047104644776, + 1.998138542137146, + 1.9980609845733643, + 1.9994125312805175, + 1.9971481538391114, + 1.999670766143799 + ], + "train_acc": [ + 0.2298, + 0.24208, + 0.24002, + 0.24094, + 0.24586, + 0.24526, + 0.2481, + 0.24888, + 0.25388, + 0.25086, + 0.24904, + 0.25404, + 0.25272, + 0.2537, + 0.2557, + 0.25446, + 0.25566, + 0.25962, + 0.25888, + 0.25898, + 0.25988, + 0.25998, + 0.26148, + 0.2599, + 0.26144, + 0.2642, + 0.2616, + 0.2624, + 0.26138, + 0.26398, + 0.2628, + 0.26448, + 0.26602, + 0.2662, + 0.26392, + 0.26558, + 0.26618, + 0.26806, + 0.26572, + 0.26504, + 0.26606, + 0.26642, + 0.26868, + 0.26806, + 0.2681, + 0.26742, + 0.26938, + 0.26904, + 0.26912, + 0.26822, + 0.26928, + 0.26642, + 0.26932, + 0.26744, + 0.2692, + 0.27166, + 0.27004, + 0.27224, + 0.26922, + 0.26924, + 0.2704, + 0.2713, + 0.27188, + 0.27038, + 0.2696, + 0.27172, + 0.26974, + 0.27234, + 0.27158, + 0.27154, + 0.27218, + 0.27162, + 0.271, + 0.27146, + 0.27242, + 0.27194, + 0.27084, + 0.27166, + 0.2737, + 0.27286, + 0.27226, + 0.27186, + 0.27026, + 0.27378, + 0.27114, + 0.27044, + 0.27224, + 0.27336, + 0.27488, + 0.27168, + 0.27362, + 0.27224, + 0.27022, + 0.2718, + 0.27058, + 0.27258, + 0.27366, + 0.2716, + 0.2725, + 0.27256 + ], + "test_acc": [ + 0.26, + 0.261, + 0.2579, + 0.2572, + 0.2579, + 0.2482, + 0.2715, + 0.2746, + 0.2616, + 0.2747, + 0.2443, + 0.2845, + 0.2756, + 0.2732, + 0.274, + 0.2875, + 0.2614, + 0.2732, + 0.2699, + 0.2708, + 0.2874, + 0.277, + 0.2787, + 0.2872, + 0.2775, + 0.2813, + 0.2711, + 0.2819, + 0.2716, + 0.2875, + 0.2749, + 0.279, + 0.2977, + 0.282, + 0.2738, + 0.2876, + 0.2788, + 0.2943, + 0.2736, + 0.2936, + 0.2773, + 0.2777, + 0.2891, + 0.2962, + 0.2734, + 0.2955, + 0.2867, + 0.2864, + 0.2834, + 0.2845, + 0.2853, + 0.2937, + 0.2705, + 0.292, + 0.2952, + 0.2875, + 0.2771, + 0.2837, + 0.2843, + 0.2887, + 0.291, + 0.2917, + 0.2763, + 0.2862, + 0.2897, + 0.2876, + 0.2865, + 0.2799, + 0.2772, + 0.2898, + 0.2823, + 0.2833, + 0.2875, + 0.2866, + 0.2771, + 0.28, + 0.2827, + 0.2895, + 0.2925, + 0.2896, + 0.2889, + 0.2882, + 0.2886, + 0.2864, + 0.2873, + 0.2909, + 0.2878, + 0.2848, + 0.286, + 0.2834, + 0.287, + 0.2888, + 0.2892, + 0.2871, + 0.2873, + 0.2878, + 0.2872, + 0.2875, + 0.2877, + 0.2878 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.36940303444862366, + 0.00047992257168516517, + 0.00055807048920542, + -0.000561900029424578, + -0.00015003856969997287, + 0.0003543531056493521, + -0.000369079178199172, + -7.983684918144718e-05, + -0.0001259066048078239, + 0.0002819746732711792, + -2.340562059544027e-05, + -5.241552571533248e-05 + ], + "perturbation_rho": [ + -0.0012638717889785767, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -2.9383227229118347e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 1.1175870895385742e-08, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.0235235095024109e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 1.1175870895385742e-08, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.4552067518234253e-06, + -8.381903171539307e-09, + 4.656612873077393e-10, + 9.313225746154785e-10, + -3.725290298461914e-09, + 0.0, + 1.1175870895385742e-08, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 56077.015625, + 1373153024.0, + 3371879936.0, + 5206262272.0, + 7341849088.0, + 8992748544.0, + 9065292800.0, + 9464700928.0, + 9539394560.0, + 10031855616.0, + 10931398656.0, + 11201608704.0, + 11850394624.0 + ], + "bp_grad_norms_per_layer": [ + 2.49191884904576e-07, + 1.9557305774853262e-10, + 1.9563668740563145e-10, + 1.9543285045831027e-10, + 1.9541313012183537e-10, + 1.9539742046603692e-10, + 1.9536555706523018e-10, + 1.953735784265831e-10, + 1.9537529927227126e-10, + 1.9540973006382245e-10, + 1.9539764251064184e-10, + 1.9554310948244336e-10, + 1.9556442576451616e-10 + ] + }, + "drift": { + "embed.weight": 341.4389143853609, + "embed.bias": 269.18455752628876, + "blocks.0.ln.weight": 10.301521425392954, + "blocks.0.w1.weight": 298.4302439253081, + "blocks.0.w1.bias": 278.6169359900818, + "blocks.0.w2.weight": 499.7366737832077, + "blocks.1.ln.weight": 8.816575984910754, + "blocks.1.w1.weight": 341.9389165053106, + "blocks.1.w1.bias": 332.3162371142644, + "blocks.1.w2.weight": 347.85214832372776, + "blocks.2.ln.weight": 9.211748215059862, + "blocks.2.w1.weight": 389.3615048816918, + "blocks.2.w1.bias": 365.40848517889776, + "blocks.2.w2.weight": 370.8759731237476, + "blocks.3.ln.weight": 10.03198861563026, + "blocks.3.w1.weight": 406.5535727387553, + "blocks.3.w1.bias": 385.3233146773144, + "blocks.3.w2.weight": 394.381764481299, + "blocks.4.ln.weight": 10.474602013624727, + "blocks.4.w1.weight": 429.8395174539667, + "blocks.4.w1.bias": 400.4483643636673, + "blocks.4.w2.weight": 397.09630840545793, + "blocks.5.ln.weight": 7.5593503080049596, + "blocks.5.w1.weight": 296.69313861733195, + "blocks.5.w1.bias": 284.6870161884467, + "blocks.5.w2.weight": 262.57737559088133, + "blocks.6.ln.weight": 9.004407296024212, + "blocks.6.w1.weight": 356.77068113879443, + "blocks.6.w1.bias": 339.0443824568545, + "blocks.6.w2.weight": 315.61513347531445, + "blocks.7.ln.weight": 7.295378859292153, + "blocks.7.w1.weight": 264.4208821925752, + "blocks.7.w1.bias": 244.22970176166734, + "blocks.7.w2.weight": 243.628762418729, + "blocks.8.ln.weight": 8.932564788347374, + "blocks.8.w1.weight": 351.408355367191, + "blocks.8.w1.bias": 334.01069244258156, + "blocks.8.w2.weight": 326.1916980797317, + "blocks.9.ln.weight": 10.358840465999167, + "blocks.9.w1.weight": 415.0022329456538, + "blocks.9.w1.bias": 381.22980098380185, + "blocks.9.w2.weight": 375.2902593077766, + "blocks.10.ln.weight": 9.099883733701708, + "blocks.10.w1.weight": 357.2725104478134, + "blocks.10.w1.bias": 319.59512329402446, + "blocks.10.w2.weight": 347.7682207115722, + "blocks.11.ln.weight": 9.671063229293274, + "blocks.11.w1.weight": 383.4569677056522, + "blocks.11.w1.bias": 366.9661288886149, + "blocks.11.w2.weight": 358.31090602111163, + "out_ln.weight": 0.6468211624050442, + "out_head.weight": 9.099909010510617, + "out_head.bias": 0.5580580979411405 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0478644479370116, + 1.9653604916763305, + 1.9457170885849, + 1.9292129779052734, + 1.911267426147461, + 1.8966463186264038, + 1.8790605630874633, + 1.8708782619476318, + 1.8626127404403687, + 1.8581517023468017, + 1.8520171240234375, + 1.8454141430282593, + 1.8327462087249755, + 1.8297313425064088, + 1.8218167012786866, + 1.8236521685791016, + 1.810682963180542, + 1.8065698401641845, + 1.7991282674407958, + 1.794059274559021, + 1.7904878783416749, + 1.7887911895370483, + 1.7848259868621825, + 1.7783562173461913, + 1.7769769561767579, + 1.7767581796264649, + 1.7756539735412598, + 1.776255124130249, + 1.7723581911849975, + 1.7729071472549438, + 1.7715669748687743, + 1.771372833633423, + 1.767975373764038, + 1.7652683026123046, + 1.766329913673401, + 1.7672014434051513, + 1.758714213027954, + 1.7554267383193969, + 1.756759292869568, + 1.7521009868621826, + 1.7497902758789063, + 1.750489810180664, + 1.7499482775497437, + 1.747922197303772, + 1.744107078781128, + 1.7441058347320557, + 1.7407303936004639, + 1.7397420559310912, + 1.7378707089996337, + 1.7362638066864013, + 1.739966582069397, + 1.7335311608505248, + 1.7373196209716797, + 1.7352568884658814, + 1.7298812759399413, + 1.732886531715393, + 1.7291758307647704, + 1.724468434715271, + 1.7238299334716798, + 1.7226758923339844, + 1.7246140502548217, + 1.7225423165130616, + 1.7256555751800537, + 1.7229330892944337, + 1.7288363692855835, + 1.7249921246337891, + 1.7212819675445556, + 1.7200141415023804, + 1.7191423548126221, + 1.7222559392929078, + 1.7194412873077392, + 1.7188739904022217, + 1.7199222569274903, + 1.7203983585357665, + 1.7149837328338624, + 1.7162825980377197, + 1.7136023723602294, + 1.7168209014129638, + 1.7104442378997802, + 1.7158753946685792, + 1.7140782101821899, + 1.7154304480743408, + 1.7131923955535888, + 1.711485964012146, + 1.7109355539703368, + 1.716393593826294, + 1.7096369751358031, + 1.7133884111785889, + 1.7087384057998658, + 1.7118981928253174, + 1.7084463762664794, + 1.7090828707504273, + 1.7140569379425048, + 1.7086596160507201, + 1.706718920211792, + 1.7098103982925414, + 1.7067520601654054, + 1.7073699936676026, + 1.7088190727996826, + 1.7079839348983765 + ], + "train_acc": [ + 0.2395, + 0.28052, + 0.2902, + 0.297, + 0.30538, + 0.31214, + 0.32012, + 0.3235, + 0.32874, + 0.3302, + 0.33236, + 0.336, + 0.3379, + 0.33908, + 0.34476, + 0.34302, + 0.34752, + 0.35002, + 0.3536, + 0.35486, + 0.35646, + 0.35768, + 0.3591, + 0.36082, + 0.36046, + 0.36418, + 0.36422, + 0.3634, + 0.36582, + 0.36498, + 0.3655, + 0.3643, + 0.36678, + 0.36726, + 0.36698, + 0.3653, + 0.37052, + 0.37094, + 0.36996, + 0.37132, + 0.37166, + 0.3736, + 0.37276, + 0.37378, + 0.37664, + 0.37462, + 0.37638, + 0.37634, + 0.3792, + 0.37858, + 0.3749, + 0.37682, + 0.37684, + 0.37932, + 0.3824, + 0.38052, + 0.38102, + 0.38218, + 0.3842, + 0.3825, + 0.38362, + 0.38164, + 0.38222, + 0.38416, + 0.3834, + 0.38208, + 0.38782, + 0.38568, + 0.3865, + 0.38586, + 0.3853, + 0.3871, + 0.3852, + 0.38532, + 0.38646, + 0.38714, + 0.3898, + 0.38778, + 0.38802, + 0.38644, + 0.38714, + 0.39032, + 0.39064, + 0.39112, + 0.39104, + 0.39074, + 0.38882, + 0.38916, + 0.38948, + 0.38788, + 0.38876, + 0.39034, + 0.3877, + 0.38964, + 0.39044, + 0.38936, + 0.39126, + 0.3913, + 0.3887, + 0.3921 + ], + "test_acc": [ + 0.2937, + 0.318, + 0.3284, + 0.3218, + 0.3414, + 0.3309, + 0.3539, + 0.3542, + 0.3588, + 0.3564, + 0.3555, + 0.3677, + 0.3666, + 0.3693, + 0.3683, + 0.3771, + 0.3637, + 0.3767, + 0.3747, + 0.3788, + 0.3798, + 0.3875, + 0.3865, + 0.3832, + 0.3831, + 0.3875, + 0.3819, + 0.3915, + 0.3952, + 0.392, + 0.3842, + 0.3883, + 0.4019, + 0.3939, + 0.3936, + 0.3968, + 0.3946, + 0.3997, + 0.3989, + 0.3971, + 0.4026, + 0.3991, + 0.4026, + 0.4039, + 0.4021, + 0.4032, + 0.4022, + 0.4026, + 0.407, + 0.4115, + 0.4082, + 0.4054, + 0.4011, + 0.4083, + 0.4078, + 0.4068, + 0.4023, + 0.4005, + 0.4032, + 0.4055, + 0.4038, + 0.4083, + 0.4038, + 0.408, + 0.4047, + 0.4058, + 0.4078, + 0.4108, + 0.4074, + 0.41, + 0.406, + 0.4068, + 0.408, + 0.4086, + 0.4098, + 0.4093, + 0.411, + 0.4114, + 0.4128, + 0.4126, + 0.4111, + 0.4122, + 0.4107, + 0.4102, + 0.4096, + 0.4108, + 0.4108, + 0.4128, + 0.4113, + 0.4082, + 0.411, + 0.4112, + 0.4109, + 0.4108, + 0.4107, + 0.4111, + 0.4116, + 0.411, + 0.4108, + 0.4108 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.03154352679848671, + 0.05562606453895569, + 0.031104888767004013, + -0.07920745015144348, + -0.07174454629421234, + -0.02221393957734108, + -0.0608971044421196, + -0.043849505484104156, + -0.07298076152801514, + -0.004631989635527134, + 0.026790393516421318, + 0.9985308051109314 + ], + "perturbation_rho": [ + 0.027997372671961784, + 0.017702028155326843, + -0.01458565704524517, + 0.0021418300457298756, + 0.01191677525639534, + 0.033614952117204666, + 0.024964284151792526, + 0.01625584065914154, + 0.024897336959838867, + 0.035478636622428894, + 0.016699712723493576, + 0.03735386207699776 + ], + "nudging": { + "0.001": [ + -3.6178389564156532e-06, + -1.6938429325819016e-07, + -6.28642737865448e-09, + 5.6694261729717255e-08, + 5.681067705154419e-08, + 1.909211277961731e-08, + 2.3748725652694702e-08, + 3.166496753692627e-08, + 3.3993273973464966e-08, + 4.9243681132793427e-08, + -2.3283064365386963e-08, + -8.114147931337357e-07 + ], + "0.003": [ + -1.0870513506233692e-05, + -7.745111361145973e-07, + -1.3748649507761002e-07, + 2.558808773756027e-07, + 1.755543053150177e-07, + 4.9709342420101166e-08, + 4.563480615615845e-08, + 9.592622518539429e-08, + 2.1245796233415604e-07, + 2.9569491744041443e-08, + -1.1490192264318466e-07, + -2.9135262593626976e-06 + ], + "0.01": [ + -3.632775042206049e-05, + -2.4959444999694824e-06, + -4.919711500406265e-07, + 7.352791726589203e-07, + 7.244525477290154e-07, + 1.8265563994646072e-07, + 6.683403626084328e-07, + 4.987232387065887e-07, + 7.337657734751701e-07, + 2.223532646894455e-08, + -3.0745286494493484e-07, + -1.0411371476948261e-05 + ] + }, + "hidden_norms_per_layer": [ + 5638.3818359375, + 81270.953125, + 444372.78125, + 1134060.625, + 1891502.0, + 2104572.25, + 2255040.75, + 2375941.75, + 2391241.75, + 2426907.25, + 2452151.0, + 2467291.0, + 1570105.625 + ], + "bp_grad_norms_per_layer": [ + 3.355086664669216e-05, + 1.9466469893814065e-06, + 5.659199473484477e-07, + 4.918285867461236e-07, + 4.922185325995088e-07, + 4.924264089822827e-07, + 4.924931431560253e-07, + 4.925626626572921e-07, + 4.927623535877501e-07, + 4.928857038066781e-07, + 4.913110842608148e-07, + 4.904794081994623e-07, + 4.797909127773892e-07 + ] + }, + "drift": { + "embed.weight": 41.710097786001306, + "embed.bias": 15.464283033590101, + "blocks.0.ln.weight": 1.200268762028184, + "blocks.0.w1.weight": 16.308146249299604, + "blocks.0.w1.bias": 12.841405630221256, + "blocks.0.w2.weight": 57.47900576989446, + "blocks.1.ln.weight": 1.058833461192825, + "blocks.1.w1.weight": 20.255632368172027, + "blocks.1.w1.bias": 14.323810463836294, + "blocks.1.w2.weight": 51.70227234444696, + "blocks.2.ln.weight": 1.0067914589915794, + "blocks.2.w1.weight": 23.883193038734614, + "blocks.2.w1.bias": 21.70680288297347, + "blocks.2.w2.weight": 32.15341601636593, + "blocks.3.ln.weight": 0.6991583611383122, + "blocks.3.w1.weight": 26.213152107729808, + "blocks.3.w1.bias": 27.835553865312857, + "blocks.3.w2.weight": 22.33686846223663, + "blocks.4.ln.weight": 0.49537939877182396, + "blocks.4.w1.weight": 20.812006160361022, + "blocks.4.w1.bias": 22.653788785176967, + "blocks.4.w2.weight": 18.947997726554537, + "blocks.5.ln.weight": 0.5246639568490442, + "blocks.5.w1.weight": 21.161102756452053, + "blocks.5.w1.bias": 23.494613405758418, + "blocks.5.w2.weight": 18.27047815490039, + "blocks.6.ln.weight": 0.5056438845806848, + "blocks.6.w1.weight": 21.366617590276153, + "blocks.6.w1.bias": 23.57646482535375, + "blocks.6.w2.weight": 17.743866768773838, + "blocks.7.ln.weight": 0.37623071960771737, + "blocks.7.w1.weight": 15.484194372550624, + "blocks.7.w1.bias": 16.374712028510555, + "blocks.7.w2.weight": 18.270059859207745, + "blocks.8.ln.weight": 0.4246821474291729, + "blocks.8.w1.weight": 15.052279864294677, + "blocks.8.w1.bias": 15.735595602596712, + "blocks.8.w2.weight": 20.64493053094595, + "blocks.9.ln.weight": 0.40094293459913244, + "blocks.9.w1.weight": 13.315401582838119, + "blocks.9.w1.bias": 11.448042864028588, + "blocks.9.w2.weight": 39.460291915843406, + "blocks.10.ln.weight": 0.3986931357871173, + "blocks.10.w1.weight": 12.861490846141741, + "blocks.10.w1.bias": 10.433659114876445, + "blocks.10.w2.weight": 37.16747442552621, + "blocks.11.ln.weight": 0.5241503553443005, + "blocks.11.w1.weight": 18.756889297523845, + "blocks.11.w1.bias": 18.863150794441257, + "blocks.11.w2.weight": 46.497694834593375, + "out_ln.weight": 0.3799708731554326, + "out_head.weight": 6.730072302834575, + "out_head.bias": 0.6968175874942788 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 12, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 4 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L12_seed4", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L12_seed5/results_cifar10.json b/results/fa_dfa_d512_L12_seed5/results_cifar10.json new file mode 100644 index 0000000..1e9a983 --- /dev/null +++ b/results/fa_dfa_d512_L12_seed5/results_cifar10.json @@ -0,0 +1,969 @@ +{ + "5": { + "dfa": { + "log": { + "train_loss": [ + 2.0684647270965577, + 2.04593505569458, + 2.0332052132797243, + 2.0338553718566894, + 2.026373544998169, + 2.0213139679336547, + 2.0205811246109007, + 2.018696953125, + 2.014947237319946, + 2.0151999055480956, + 2.0129982511138915, + 2.011509538650513, + 2.0100906581115723, + 2.0089198484802244, + 2.0063697845458983, + 2.006922910385132, + 2.0044513999176026, + 2.0016603435897826, + 2.000636997833252, + 2.002304845352173, + 2.0021789043426512, + 2.001000444030762, + 2.00005261428833, + 2.002928783569336, + 2.0017156188201906, + 1.9988520227050781, + 1.9994586238861083, + 1.9980162719726562, + 1.9998335108184815, + 1.9996961016845702, + 1.9991928104400636, + 1.9968728270721436, + 1.9977278369903564, + 1.9969172173309326, + 1.997944600830078, + 1.9979880140686035, + 1.994605475616455, + 1.9973402403259277, + 1.997146150894165, + 1.99599303855896, + 1.9974721060943603, + 1.9941783197402954, + 1.9956128702545166, + 1.9942147409057618, + 1.994319501800537, + 1.994643907546997, + 1.9942114786529541, + 1.994346463241577, + 1.9929966555023193, + 1.9937490250396728, + 1.992175287437439, + 1.9944900625610351, + 1.9938098908233644, + 1.9900078340911864, + 1.994505626373291, + 1.9926387735366822, + 1.990931240310669, + 1.9906283693695068, + 1.9929009646606446, + 1.9926953937530518, + 1.9925627197647096, + 1.990387219581604, + 1.9893561473846435, + 1.9920542861557007, + 1.9893005572509765, + 1.9888617028808593, + 1.991559531326294, + 1.9911002109527587, + 1.9900375119781495, + 1.9892236702728272, + 1.9937436754608153, + 1.991619889755249, + 1.9916846366882324, + 1.9896835166931153, + 1.9901411660003663, + 1.9924274538421631, + 1.9892655798721313, + 1.9914114519500732, + 1.990216528968811, + 1.9892477695465087, + 1.9900167115783691, + 1.98849309715271, + 1.9889385766983032, + 1.988801861000061, + 1.9898810375976563, + 1.9873209413909911, + 1.9886231661224365, + 1.988806999168396, + 1.9894798669052125, + 1.988367915725708, + 1.9881281168365479, + 1.9901167456054687, + 1.9896381842041015, + 1.9841947576522827, + 1.98667507106781, + 1.9886395093154907, + 1.9892304638290406, + 1.9895199391174316, + 1.9884832098388672, + 1.9883703183746337 + ], + "train_acc": [ + 0.23802, + 0.24804, + 0.25088, + 0.25142, + 0.25734, + 0.2584, + 0.26202, + 0.26208, + 0.265, + 0.26464, + 0.26442, + 0.26658, + 0.26774, + 0.26682, + 0.27064, + 0.26844, + 0.26844, + 0.27148, + 0.2714, + 0.26686, + 0.27288, + 0.26878, + 0.27184, + 0.27342, + 0.2741, + 0.27264, + 0.27204, + 0.27416, + 0.27192, + 0.27138, + 0.27496, + 0.27304, + 0.27252, + 0.2761, + 0.2752, + 0.27484, + 0.27692, + 0.2757, + 0.27436, + 0.27528, + 0.27534, + 0.276, + 0.27886, + 0.27704, + 0.27484, + 0.27556, + 0.27668, + 0.27734, + 0.27718, + 0.27692, + 0.27726, + 0.27706, + 0.2785, + 0.27902, + 0.2764, + 0.27802, + 0.27758, + 0.2812, + 0.27958, + 0.2804, + 0.27626, + 0.27992, + 0.2817, + 0.27822, + 0.2799, + 0.28178, + 0.2783, + 0.27782, + 0.28144, + 0.28136, + 0.2789, + 0.2784, + 0.27892, + 0.28034, + 0.28046, + 0.2792, + 0.28034, + 0.28004, + 0.28032, + 0.28042, + 0.28062, + 0.28206, + 0.28064, + 0.28308, + 0.2811, + 0.28224, + 0.28148, + 0.2803, + 0.2814, + 0.27944, + 0.28004, + 0.27996, + 0.28, + 0.28104, + 0.28072, + 0.28176, + 0.28028, + 0.27976, + 0.28074, + 0.27914 + ], + "test_acc": [ + 0.2445, + 0.2726, + 0.2811, + 0.2819, + 0.2748, + 0.2867, + 0.2884, + 0.2833, + 0.2868, + 0.2909, + 0.2961, + 0.2829, + 0.2919, + 0.3048, + 0.2947, + 0.2998, + 0.2984, + 0.2733, + 0.2898, + 0.2992, + 0.2974, + 0.289, + 0.2923, + 0.2983, + 0.2849, + 0.3036, + 0.2861, + 0.2991, + 0.2961, + 0.3025, + 0.2979, + 0.2897, + 0.2946, + 0.2973, + 0.2984, + 0.3042, + 0.2986, + 0.2994, + 0.3083, + 0.2802, + 0.3018, + 0.2976, + 0.301, + 0.301, + 0.2973, + 0.3045, + 0.297, + 0.2933, + 0.306, + 0.3031, + 0.2937, + 0.3027, + 0.2977, + 0.2974, + 0.3053, + 0.301, + 0.2965, + 0.3001, + 0.2994, + 0.2956, + 0.3025, + 0.286, + 0.2916, + 0.2956, + 0.3058, + 0.2961, + 0.2973, + 0.2996, + 0.2925, + 0.3068, + 0.3085, + 0.3058, + 0.2965, + 0.3035, + 0.3075, + 0.301, + 0.3066, + 0.3077, + 0.3019, + 0.3036, + 0.2997, + 0.3005, + 0.297, + 0.3008, + 0.3006, + 0.3103, + 0.3054, + 0.3048, + 0.2988, + 0.3009, + 0.3011, + 0.3039, + 0.3005, + 0.3037, + 0.3031, + 0.3022, + 0.302, + 0.3018, + 0.3018, + 0.3018 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.41004806756973267, + 0.0008555407403036952, + 0.0005728952819481492, + -0.0003633289597928524, + 0.00043969464604742825, + 0.0006190181011334062, + -0.0006453525274991989, + 0.0001250960340257734, + -0.0005502170533873141, + -6.381357525242493e-05, + -0.00011379925126675516, + -0.00034056592267006636 + ], + "perturbation_rho": [ + -0.0048940712586045265, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.5297125577926636e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.1865049600601196e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.998167812824249e-06, + 0.0, + 0.0, + 9.313225746154785e-10, + -8.381903171539307e-09, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 55109.36328125, + 840561728.0, + 1444462336.0, + 2124835456.0, + 3210963200.0, + 6536487936.0, + 7593719808.0, + 8378088960.0, + 8459614208.0, + 9916916736.0, + 10216921088.0, + 11792236544.0, + 12205541376.0 + ], + "bp_grad_norms_per_layer": [ + 2.8213216296535393e-07, + 2.0590699978395577e-10, + 2.0399454347952428e-10, + 2.03904518270015e-10, + 2.0418219892626155e-10, + 2.0476981221762003e-10, + 2.0479024032127313e-10, + 2.0459070548817238e-10, + 2.0458530702871514e-10, + 2.045856123400469e-10, + 2.04587194407857e-10, + 2.044076713447751e-10, + 2.045200814260184e-10 + ] + }, + "drift": { + "embed.weight": 331.27602666259105, + "embed.bias": 239.81762105942474, + "blocks.0.ln.weight": 10.927184995923978, + "blocks.0.w1.weight": 251.71912543831303, + "blocks.0.w1.bias": 221.3073991198696, + "blocks.0.w2.weight": 446.18400062248946, + "blocks.1.ln.weight": 7.981967217674905, + "blocks.1.w1.weight": 253.74455824953162, + "blocks.1.w1.bias": 216.76732500414306, + "blocks.1.w2.weight": 295.48008203993817, + "blocks.2.ln.weight": 8.227632098130004, + "blocks.2.w1.weight": 269.5007877113237, + "blocks.2.w1.bias": 241.96437719605447, + "blocks.2.w2.weight": 310.98985782960006, + "blocks.3.ln.weight": 8.112799129886726, + "blocks.3.w1.weight": 308.3323380552167, + "blocks.3.w1.bias": 309.27230622114104, + "blocks.3.w2.weight": 318.17414451568425, + "blocks.4.ln.weight": 10.16216573439241, + "blocks.4.w1.weight": 424.58007413849407, + "blocks.4.w1.bias": 404.55597818760236, + "blocks.4.w2.weight": 422.3987925739278, + "blocks.5.ln.weight": 9.551306834710088, + "blocks.5.w1.weight": 374.36129372347426, + "blocks.5.w1.bias": 352.23829526395434, + "blocks.5.w2.weight": 368.0730794757206, + "blocks.6.ln.weight": 8.854285613855794, + "blocks.6.w1.weight": 345.52287116736915, + "blocks.6.w1.bias": 319.68248259727653, + "blocks.6.w2.weight": 327.22678020367175, + "blocks.7.ln.weight": 7.811231342959617, + "blocks.7.w1.weight": 277.64478372749727, + "blocks.7.w1.bias": 254.112362538096, + "blocks.7.w2.weight": 265.1743796968286, + "blocks.8.ln.weight": 10.228506898132666, + "blocks.8.w1.weight": 404.4219524430621, + "blocks.8.w1.bias": 393.5696990939405, + "blocks.8.w2.weight": 394.0798993515089, + "blocks.9.ln.weight": 8.0107262045422, + "blocks.9.w1.weight": 316.5928270106835, + "blocks.9.w1.bias": 286.78238448653866, + "blocks.9.w2.weight": 310.9127649917408, + "blocks.10.ln.weight": 11.136451857382143, + "blocks.10.w1.weight": 438.2046003550788, + "blocks.10.w1.bias": 405.1181282489877, + "blocks.10.w2.weight": 413.8473423192705, + "blocks.11.ln.weight": 9.410383955267694, + "blocks.11.w1.weight": 372.4463938503806, + "blocks.11.w1.bias": 347.63407553366176, + "blocks.11.w2.weight": 344.49269974034115, + "out_ln.weight": 0.7270416629191093, + "out_head.weight": 10.238250621105578, + "out_head.bias": 0.4975266289585721 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0707826377868654, + 1.9932800539779663, + 1.959168056678772, + 1.9474205780029297, + 1.9326355227279663, + 1.9199382480239868, + 1.9140991415405273, + 1.9034375612640382, + 1.8959864074707031, + 1.8931433800506592, + 1.8902097482681275, + 1.8859386821746826, + 1.882199794769287, + 1.8849088175201416, + 1.8818455212020875, + 1.8821384326171875, + 1.8799380892944335, + 1.875362028427124, + 1.870530955467224, + 1.8683246834564209, + 1.8638563344573975, + 1.8618846607208253, + 1.8603020819854736, + 1.8594753393936156, + 1.8566845929718017, + 1.8526717345428467, + 1.849912834777832, + 1.8465362030029298, + 1.848408186569214, + 1.845232035217285, + 1.843059235229492, + 1.839803340072632, + 1.8417775817871094, + 1.8390681932067872, + 1.8388364002227784, + 1.8360300784683228, + 1.8323893267059326, + 1.834402112121582, + 1.8309936769866944, + 1.8322431957244874, + 1.8288187261581421, + 1.8238647890472413, + 1.824506708908081, + 1.8221080670166017, + 1.8222445755767822, + 1.8188702960205079, + 1.817204086036682, + 1.8124210216522216, + 1.8137152634429932, + 1.8142603408050537, + 1.8083323223876953, + 1.8079692990493774, + 1.8071039364624024, + 1.8000855495834351, + 1.8052594284057617, + 1.802986600036621, + 1.7997608633422852, + 1.7976583826065065, + 1.798347628555298, + 1.7966360546875, + 1.7926319261932373, + 1.7933891333007812, + 1.7893145151138306, + 1.7920790769195556, + 1.7883893395614625, + 1.785349788131714, + 1.7879791070938111, + 1.7846136389541627, + 1.783509234313965, + 1.7843902270507812, + 1.7877061660385132, + 1.7827738708877563, + 1.7823649167251587, + 1.7814125487899781, + 1.7809069869995118, + 1.7828298908233642, + 1.7791348586273192, + 1.7774614221191407, + 1.7751106949615478, + 1.7760692443466186, + 1.7797742696380616, + 1.7786880410003663, + 1.7746904688262939, + 1.776022590560913, + 1.7778167572402954, + 1.775718899459839, + 1.7761091399383544, + 1.7744762685394286, + 1.7767131290435791, + 1.7732363131713866, + 1.7731415704345703, + 1.7759572997665405, + 1.7733897400283813, + 1.7683659911346437, + 1.7711752317047118, + 1.772973984146118, + 1.772844222984314, + 1.774391251449585, + 1.7711415176010132, + 1.7753938333511352 + ], + "train_acc": [ + 0.22922, + 0.26562, + 0.28014, + 0.286, + 0.29238, + 0.29874, + 0.30312, + 0.30664, + 0.30876, + 0.31168, + 0.3128, + 0.31518, + 0.31798, + 0.317, + 0.3238, + 0.32118, + 0.32352, + 0.32418, + 0.329, + 0.32642, + 0.33084, + 0.3318, + 0.3332, + 0.33264, + 0.33466, + 0.33646, + 0.33754, + 0.34266, + 0.3406, + 0.33896, + 0.34342, + 0.34406, + 0.3431, + 0.34494, + 0.34444, + 0.346, + 0.34776, + 0.34624, + 0.34814, + 0.34848, + 0.34938, + 0.35124, + 0.35154, + 0.3523, + 0.35154, + 0.35416, + 0.3531, + 0.3559, + 0.35634, + 0.35504, + 0.35722, + 0.35778, + 0.35632, + 0.36152, + 0.36064, + 0.35976, + 0.35954, + 0.36118, + 0.36272, + 0.36392, + 0.36202, + 0.36176, + 0.3667, + 0.36318, + 0.36542, + 0.36726, + 0.36706, + 0.36818, + 0.3674, + 0.36874, + 0.36462, + 0.36766, + 0.36724, + 0.3694, + 0.36714, + 0.36818, + 0.36918, + 0.37, + 0.36942, + 0.37088, + 0.36958, + 0.3705, + 0.3704, + 0.37006, + 0.37176, + 0.37004, + 0.37138, + 0.37146, + 0.37036, + 0.36844, + 0.37048, + 0.37066, + 0.37288, + 0.37296, + 0.37426, + 0.37316, + 0.37076, + 0.37046, + 0.37382, + 0.37288 + ], + "test_acc": [ + 0.2587, + 0.2962, + 0.3064, + 0.3196, + 0.323, + 0.3355, + 0.3259, + 0.3316, + 0.345, + 0.3454, + 0.347, + 0.3503, + 0.3501, + 0.3606, + 0.35, + 0.366, + 0.3606, + 0.345, + 0.3598, + 0.3644, + 0.365, + 0.3731, + 0.3727, + 0.3741, + 0.3592, + 0.3701, + 0.3692, + 0.375, + 0.3687, + 0.3686, + 0.3813, + 0.3762, + 0.3719, + 0.3836, + 0.3831, + 0.3884, + 0.3927, + 0.3866, + 0.3775, + 0.3781, + 0.3852, + 0.3899, + 0.3881, + 0.3895, + 0.3869, + 0.3874, + 0.3932, + 0.3817, + 0.3908, + 0.3893, + 0.3806, + 0.3918, + 0.3899, + 0.39, + 0.3949, + 0.3925, + 0.391, + 0.3933, + 0.3968, + 0.3904, + 0.3935, + 0.3933, + 0.3933, + 0.3992, + 0.4014, + 0.3955, + 0.3973, + 0.396, + 0.3963, + 0.3933, + 0.4005, + 0.4025, + 0.3997, + 0.4007, + 0.4027, + 0.4043, + 0.4044, + 0.4033, + 0.4033, + 0.4026, + 0.3994, + 0.4039, + 0.3988, + 0.4037, + 0.4041, + 0.4071, + 0.4054, + 0.4045, + 0.4049, + 0.4049, + 0.404, + 0.4069, + 0.4028, + 0.4041, + 0.4052, + 0.4053, + 0.4038, + 0.4034, + 0.4041, + 0.404 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.02494833618402481, + 0.06461166590452194, + 0.007773830089718103, + -0.053252629935741425, + -0.03213752806186676, + -0.0018765359418466687, + -0.020534943789243698, + -0.05978960543870926, + -0.019162429496645927, + 0.007563438266515732, + 0.033061787486076355, + 0.989224910736084 + ], + "perturbation_rho": [ + -0.0004792527761310339, + -0.012973977252840996, + 0.01789543777704239, + 0.0009404178708791733, + -0.03582464158535004, + -0.024608338251709938, + 0.03623630106449127, + -0.0018152520060539246, + 0.017969228327274323, + 0.0033451307099312544, + 0.0, + 0.029760660603642464 + ], + "nudging": { + "0.001": [ + -9.243376553058624e-07, + -5.587935447692871e-08, + 2.0721927285194397e-08, + -3.119930624961853e-08, + -2.1420419216156006e-08, + -2.9336661100387573e-08, + -2.0023435354232788e-08, + -5.122274160385132e-09, + -6.05359673500061e-09, + 3.725290298461914e-09, + -2.3748725652694702e-08, + -2.4866312742233276e-07 + ], + "0.003": [ + -2.8724316507577896e-06, + -2.635642886161804e-07, + -5.4249539971351624e-08, + -1.5599653124809265e-08, + -4.44706529378891e-08, + -1.6996636986732483e-08, + 1.1641532182693481e-09, + 5.704350769519806e-08, + -1.3504177331924438e-08, + -1.3737007975578308e-08, + -3.259629011154175e-08, + -1.0190997272729874e-06 + ], + "0.01": [ + -9.626368409954011e-06, + -7.129274308681488e-07, + 5.145557224750519e-08, + 1.8137507140636444e-07, + 1.0221265256404877e-07, + 1.3969838619232178e-08, + 3.608874976634979e-08, + 2.377200871706009e-07, + -1.6298145055770874e-08, + -8.731149137020111e-08, + -1.6693957149982452e-07, + -3.8032885640859604e-06 + ] + }, + "hidden_norms_per_layer": [ + 11572.41796875, + 170961.703125, + 761668.5625, + 2149773.25, + 3106495.5, + 3628764.75, + 3844090.5, + 4201663.0, + 4523281.0, + 4742705.5, + 4761602.0, + 4759073.5, + 4570665.0 + ], + "bp_grad_norms_per_layer": [ + 2.5100242055486888e-05, + 6.707207944600668e-07, + 2.330790778160008e-07, + 2.3257328507497732e-07, + 2.3236127333348122e-07, + 2.322931180742671e-07, + 2.323264851611384e-07, + 2.3226489531680272e-07, + 2.3237980428802985e-07, + 2.3232749413182319e-07, + 2.322744165894619e-07, + 2.3242134261636238e-07, + 2.2277541233961529e-07 + ] + }, + "drift": { + "embed.weight": 67.4183636503971, + "embed.bias": 12.639534947709132, + "blocks.0.ln.weight": 1.4281653737839686, + "blocks.0.w1.weight": 22.149022125188107, + "blocks.0.w1.bias": 14.43109450707943, + "blocks.0.w2.weight": 73.54315291854846, + "blocks.1.ln.weight": 1.3725125667153013, + "blocks.1.w1.weight": 28.041388459852282, + "blocks.1.w1.bias": 16.737535867388896, + "blocks.1.w2.weight": 48.10538352181479, + "blocks.2.ln.weight": 0.9692921428242081, + "blocks.2.w1.weight": 29.126634688998955, + "blocks.2.w1.bias": 28.160099907002987, + "blocks.2.w2.weight": 30.43668721582192, + "blocks.3.ln.weight": 0.8517630406355475, + "blocks.3.w1.weight": 30.698936977122976, + "blocks.3.w1.bias": 30.008595592413137, + "blocks.3.w2.weight": 25.53125025342729, + "blocks.4.ln.weight": 0.8000730489190893, + "blocks.4.w1.weight": 28.294012201626252, + "blocks.4.w1.bias": 27.681602003963505, + "blocks.4.w2.weight": 31.236916248398295, + "blocks.5.ln.weight": 0.7726872906934783, + "blocks.5.w1.weight": 27.361431129938182, + "blocks.5.w1.bias": 27.206264858292855, + "blocks.5.w2.weight": 30.441424834889585, + "blocks.6.ln.weight": 0.836359631646653, + "blocks.6.w1.weight": 26.47170019895073, + "blocks.6.w1.bias": 25.167535850732502, + "blocks.6.w2.weight": 27.854377209764337, + "blocks.7.ln.weight": 0.7549166617706077, + "blocks.7.w1.weight": 26.946400539940182, + "blocks.7.w1.bias": 26.486714874573558, + "blocks.7.w2.weight": 31.532602647761905, + "blocks.8.ln.weight": 0.7532469721914246, + "blocks.8.w1.weight": 26.170218010343223, + "blocks.8.w1.bias": 27.052147560472978, + "blocks.8.w2.weight": 31.57827072835312, + "blocks.9.ln.weight": 0.5491388007855835, + "blocks.9.w1.weight": 17.942432655743463, + "blocks.9.w1.bias": 16.168189388817968, + "blocks.9.w2.weight": 38.437476826047074, + "blocks.10.ln.weight": 0.44584462833770383, + "blocks.10.w1.weight": 14.61733255882144, + "blocks.10.w1.bias": 9.906228706328685, + "blocks.10.w2.weight": 63.74207530119445, + "blocks.11.ln.weight": 0.4261726286054206, + "blocks.11.w1.weight": 16.108342263191403, + "blocks.11.w1.bias": 10.380547619047618, + "blocks.11.w2.weight": 90.22616819735002, + "out_ln.weight": 0.4534589624136927, + "out_head.weight": 7.737075841258268, + "out_head.bias": 0.6965283841940496 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 12, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 5 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L12_seed5", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L12_seed6/results_cifar10.json b/results/fa_dfa_d512_L12_seed6/results_cifar10.json new file mode 100644 index 0000000..ccf37e3 --- /dev/null +++ b/results/fa_dfa_d512_L12_seed6/results_cifar10.json @@ -0,0 +1,969 @@ +{ + "6": { + "dfa": { + "log": { + "train_loss": [ + 2.0821874879455566, + 2.0482772801208498, + 2.047201838607788, + 2.0446329682159425, + 2.0396667890930176, + 2.036854875450134, + 2.035803484649658, + 2.0357003843688966, + 2.030295910644531, + 2.027509428253174, + 2.027801731262207, + 2.026494747467041, + 2.023713701324463, + 2.0224919934082033, + 2.0218288256835937, + 2.0183722845840455, + 2.0188908798217775, + 2.0174379263305666, + 2.0145048645782473, + 2.0172035501098633, + 2.01550293838501, + 2.013378846511841, + 2.015203825531006, + 2.016428701705933, + 2.011487791137695, + 2.010817546157837, + 2.01103433883667, + 2.011530419845581, + 2.0120579361724853, + 2.0116470166778564, + 2.008692329330444, + 2.009709241294861, + 2.008519637374878, + 2.0078509032440186, + 2.0077528884124756, + 2.006431950531006, + 2.0055743618011475, + 2.005430025024414, + 2.0056805452728272, + 2.007569041595459, + 2.004868392677307, + 2.0057909046936033, + 2.0064586280822754, + 2.005987978057861, + 2.00688329624176, + 2.00672064743042, + 2.0048528481292727, + 2.005383738861084, + 2.0018666375732423, + 2.0059955949401855, + 2.003435557899475, + 2.001197138328552, + 2.0012222803497313, + 2.003032783164978, + 2.003918945236206, + 2.0043967723083496, + 2.0035451693725586, + 2.003359081878662, + 2.001155726585388, + 2.001671184616089, + 2.0038728633880614, + 2.00498755947113, + 2.002938235092163, + 2.000668586883545, + 2.0018935997772216, + 2.0039958557128905, + 2.001610350112915, + 2.000534170913696, + 2.001975674972534, + 2.002237940979004, + 2.0009369605255127, + 2.003139477081299, + 2.00191648399353, + 2.003141210899353, + 1.9998547933578492, + 2.000947899513245, + 2.0015833082580565, + 2.0008020196151732, + 2.0014297840118407, + 2.00010564907074, + 2.0010018926239015, + 2.0015807587051393, + 2.0006349613952636, + 2.0010920279693605, + 2.000114615020752, + 2.001875145263672, + 2.001294381980896, + 2.001243436965942, + 1.9999162873077392, + 1.998048712120056, + 2.000587540740967, + 2.0002040885162353, + 2.00153217338562, + 2.0005496265411375, + 2.000687266998291, + 2.000036004295349, + 1.9989564297866822, + 1.9997446877288818, + 1.9997422592163085, + 2.000641355895996 + ], + "train_acc": [ + 0.23088, + 0.24298, + 0.242, + 0.24386, + 0.24422, + 0.24566, + 0.24754, + 0.24998, + 0.25214, + 0.25368, + 0.25348, + 0.2556, + 0.2568, + 0.25746, + 0.2569, + 0.25902, + 0.26116, + 0.2581, + 0.26084, + 0.26254, + 0.26022, + 0.26164, + 0.26092, + 0.26324, + 0.26598, + 0.26604, + 0.26308, + 0.26346, + 0.26408, + 0.26498, + 0.2656, + 0.26436, + 0.26714, + 0.26778, + 0.26842, + 0.26646, + 0.26986, + 0.26788, + 0.26944, + 0.2679, + 0.26858, + 0.269, + 0.26802, + 0.26894, + 0.26892, + 0.26996, + 0.26784, + 0.27018, + 0.26922, + 0.2704, + 0.26998, + 0.27222, + 0.2733, + 0.27004, + 0.2688, + 0.26874, + 0.26814, + 0.27284, + 0.26872, + 0.272, + 0.26908, + 0.27148, + 0.27176, + 0.27322, + 0.2711, + 0.26946, + 0.27102, + 0.27338, + 0.27226, + 0.2703, + 0.27122, + 0.27198, + 0.27138, + 0.27356, + 0.27036, + 0.27104, + 0.27176, + 0.27334, + 0.27254, + 0.27312, + 0.27288, + 0.27076, + 0.2713, + 0.27096, + 0.27414, + 0.27154, + 0.27082, + 0.27156, + 0.27418, + 0.27186, + 0.27248, + 0.27316, + 0.27146, + 0.27196, + 0.27248, + 0.27434, + 0.27286, + 0.27322, + 0.26998, + 0.272 + ], + "test_acc": [ + 0.2459, + 0.2608, + 0.2635, + 0.2517, + 0.253, + 0.2671, + 0.2464, + 0.2581, + 0.2752, + 0.2714, + 0.2801, + 0.2721, + 0.2781, + 0.2735, + 0.2757, + 0.2879, + 0.2731, + 0.2825, + 0.271, + 0.2808, + 0.2798, + 0.2898, + 0.2713, + 0.2935, + 0.2878, + 0.2873, + 0.268, + 0.2934, + 0.2831, + 0.2992, + 0.2915, + 0.2887, + 0.2797, + 0.2998, + 0.2957, + 0.2824, + 0.2818, + 0.289, + 0.2951, + 0.2906, + 0.2842, + 0.2772, + 0.2962, + 0.2715, + 0.3031, + 0.2834, + 0.299, + 0.2881, + 0.2849, + 0.2939, + 0.2801, + 0.279, + 0.2819, + 0.2841, + 0.3013, + 0.2908, + 0.2906, + 0.2845, + 0.2931, + 0.2816, + 0.2975, + 0.2963, + 0.2953, + 0.288, + 0.2836, + 0.2878, + 0.2906, + 0.2879, + 0.2969, + 0.2892, + 0.286, + 0.282, + 0.2914, + 0.2885, + 0.2892, + 0.2904, + 0.295, + 0.2865, + 0.2949, + 0.2902, + 0.2894, + 0.2898, + 0.2913, + 0.2929, + 0.2868, + 0.2869, + 0.2905, + 0.2895, + 0.29, + 0.2896, + 0.2882, + 0.2908, + 0.2912, + 0.2901, + 0.2893, + 0.2888, + 0.2895, + 0.2894, + 0.2896, + 0.2896 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.36949819326400757, + 0.0005479701794683933, + -0.0006032834062352777, + 0.00028805271722376347, + -0.0005946755409240723, + -0.0002685927611310035, + -0.00046556672896258533, + -0.00010807066428242251, + -0.0003351868945173919, + 0.00010062567162094638, + -0.000464106589788571, + -4.078936035512015e-05 + ], + "perturbation_rho": [ + -0.011094596236944199, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -2.4400651454925537e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -9.699724614620209e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.3657997846603394e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 55177.16015625, + 1459404800.0, + 2815841792.0, + 3553956096.0, + 3947802112.0, + 6848872448.0, + 7047305728.0, + 7256152064.0, + 9300344832.0, + 10056428544.0, + 10370693120.0, + 10568456192.0, + 11512604672.0 + ], + "bp_grad_norms_per_layer": [ + 2.3892781086942705e-07, + 2.0915398579735012e-10, + 2.0916321452624231e-10, + 2.0916490761635487e-10, + 2.0915806586696561e-10, + 2.0912831188990566e-10, + 2.0913649978471227e-10, + 2.0914339704525275e-10, + 2.089406564431684e-10, + 2.0894751207034545e-10, + 2.0897203412140186e-10, + 2.0899702801724374e-10, + 2.090319445313682e-10 + ] + }, + "drift": { + "embed.weight": 337.6841411506247, + "embed.bias": 244.65544843260258, + "blocks.0.ln.weight": 9.921388539749072, + "blocks.0.w1.weight": 303.34325560691656, + "blocks.0.w1.bias": 271.5154938432058, + "blocks.0.w2.weight": 490.11291855747174, + "blocks.1.ln.weight": 8.358331837822632, + "blocks.1.w1.weight": 306.46180391751284, + "blocks.1.w1.bias": 289.91192320867975, + "blocks.1.w2.weight": 307.16882665457837, + "blocks.2.ln.weight": 8.171380552921143, + "blocks.2.w1.weight": 309.0280257845424, + "blocks.2.w1.bias": 278.09046067069085, + "blocks.2.w2.weight": 303.83002249941313, + "blocks.3.ln.weight": 7.530980420849777, + "blocks.3.w1.weight": 291.36754150240233, + "blocks.3.w1.bias": 267.02199332509406, + "blocks.3.w2.weight": 279.4411092845991, + "blocks.4.ln.weight": 10.6408318843949, + "blocks.4.w1.weight": 441.74974808763915, + "blocks.4.w1.bias": 407.5025978091534, + "blocks.4.w2.weight": 396.8841237755263, + "blocks.5.ln.weight": 7.3188489223429976, + "blocks.5.w1.weight": 282.40725775096377, + "blocks.5.w1.bias": 268.64915470838923, + "blocks.5.w2.weight": 255.5102203441866, + "blocks.6.ln.weight": 7.484712621287041, + "blocks.6.w1.weight": 289.3900145716271, + "blocks.6.w1.bias": 273.2836157437038, + "blocks.6.w2.weight": 269.0639311186208, + "blocks.7.ln.weight": 10.645635950857786, + "blocks.7.w1.weight": 435.3403361556039, + "blocks.7.w1.bias": 407.2628750706809, + "blocks.7.w2.weight": 422.88459144783326, + "blocks.8.ln.weight": 9.19535275353374, + "blocks.8.w1.weight": 359.87499481651184, + "blocks.8.w1.bias": 333.4216491141516, + "blocks.8.w2.weight": 337.7077986951993, + "blocks.9.ln.weight": 7.799912385129816, + "blocks.9.w1.weight": 297.0043322415887, + "blocks.9.w1.bias": 289.12017704180437, + "blocks.9.w2.weight": 272.7064790358395, + "blocks.10.ln.weight": 8.43728199015399, + "blocks.10.w1.weight": 330.9678935021042, + "blocks.10.w1.bias": 306.6806776473047, + "blocks.10.w2.weight": 309.957908747728, + "blocks.11.ln.weight": 10.237677378176008, + "blocks.11.w1.weight": 400.7826698559754, + "blocks.11.w1.bias": 368.5214108931406, + "blocks.11.w2.weight": 374.837703368063, + "out_ln.weight": 0.67814086539114, + "out_head.weight": 9.623079895238506, + "out_head.bias": 0.4268739303655545 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.05228896446228, + 1.9686302278900147, + 1.937331941871643, + 1.9156952641296388, + 1.8984398557281494, + 1.8918078393936157, + 1.8841382266616822, + 1.8769169528198242, + 1.8724429541397094, + 1.868889035949707, + 1.8656937331390382, + 1.8661101271820069, + 1.8578856301116944, + 1.8565830435180664, + 1.856787547569275, + 1.8557513214492798, + 1.8538894052124024, + 1.8534770782089234, + 1.848385778579712, + 1.846604812889099, + 1.8453827571868897, + 1.8429374709320068, + 1.8401580144500733, + 1.8401220611190796, + 1.8288005982208253, + 1.8259171481323242, + 1.8221615615463256, + 1.8216705709075929, + 1.821623645401001, + 1.818600112838745, + 1.812706976966858, + 1.8111941592788696, + 1.8062977261734008, + 1.805511683959961, + 1.8029492014312745, + 1.800798831100464, + 1.7953546783065797, + 1.7982503284454345, + 1.7914396514511108, + 1.7916672002792358, + 1.7845456259155272, + 1.78576413356781, + 1.783857018814087, + 1.7810697037506102, + 1.7796821146011352, + 1.7769692651367188, + 1.778120286178589, + 1.7701848324966432, + 1.7680266930770874, + 1.7723119870376587, + 1.7669956749725342, + 1.764911948814392, + 1.7598207390975953, + 1.7630078075408935, + 1.7617963638687133, + 1.7589407592391968, + 1.7578953286361694, + 1.755873950805664, + 1.7555263995742798, + 1.7516939831924438, + 1.7528417974090575, + 1.7498321814346314, + 1.7489991750335694, + 1.744125429725647, + 1.7467560472869874, + 1.7484809857940673, + 1.7442166653823852, + 1.7426679946136474, + 1.7441077096939086, + 1.7404415154647828, + 1.734541618347168, + 1.7417204373931885, + 1.7390217791748046, + 1.7405015720367432, + 1.739689345970154, + 1.7386328066253662, + 1.7377501021575927, + 1.7382229836654663, + 1.7361252673721312, + 1.7335023355865478, + 1.7335243579483033, + 1.7341424390029907, + 1.7341539621734618, + 1.7341088440322876, + 1.7317719327545167, + 1.7339295767593383, + 1.7329442672729491, + 1.7313460754776, + 1.7338177963256836, + 1.7318969781112672, + 1.7321792919921875, + 1.7318955504989624, + 1.7337695608520507, + 1.7288571945953368, + 1.7327508489990235, + 1.7287590840911866, + 1.730326948890686, + 1.7341352017593383, + 1.7290034577178954, + 1.72969926902771 + ], + "train_acc": [ + 0.2384, + 0.27844, + 0.2943, + 0.30138, + 0.30742, + 0.30942, + 0.3195, + 0.31888, + 0.32204, + 0.32418, + 0.3248, + 0.32782, + 0.33146, + 0.33268, + 0.32912, + 0.33066, + 0.33278, + 0.33552, + 0.3367, + 0.34006, + 0.3393, + 0.33932, + 0.3395, + 0.33874, + 0.34818, + 0.34772, + 0.34958, + 0.3503, + 0.35032, + 0.35086, + 0.35212, + 0.35332, + 0.35488, + 0.3559, + 0.35578, + 0.35756, + 0.35926, + 0.35818, + 0.36006, + 0.3605, + 0.36376, + 0.3638, + 0.36622, + 0.36562, + 0.36488, + 0.36572, + 0.36658, + 0.3694, + 0.37002, + 0.36866, + 0.37106, + 0.37212, + 0.37664, + 0.37196, + 0.37114, + 0.37274, + 0.37342, + 0.37482, + 0.37588, + 0.37478, + 0.37494, + 0.3766, + 0.37674, + 0.3785, + 0.37762, + 0.37722, + 0.37866, + 0.38074, + 0.37858, + 0.3806, + 0.38312, + 0.37868, + 0.3786, + 0.38086, + 0.37808, + 0.37986, + 0.38126, + 0.38078, + 0.38162, + 0.3829, + 0.38342, + 0.3823, + 0.3824, + 0.38028, + 0.38296, + 0.383, + 0.38242, + 0.38388, + 0.3826, + 0.38382, + 0.38268, + 0.38512, + 0.38288, + 0.38526, + 0.38352, + 0.38476, + 0.3849, + 0.38008, + 0.3831, + 0.3847 + ], + "test_acc": [ + 0.2743, + 0.3072, + 0.3046, + 0.3269, + 0.3349, + 0.3431, + 0.3427, + 0.3386, + 0.3512, + 0.3518, + 0.3496, + 0.3458, + 0.3629, + 0.3681, + 0.364, + 0.3676, + 0.3514, + 0.3685, + 0.353, + 0.3779, + 0.3693, + 0.3647, + 0.3759, + 0.3607, + 0.374, + 0.3735, + 0.3711, + 0.3721, + 0.3821, + 0.3752, + 0.3707, + 0.3851, + 0.3838, + 0.3893, + 0.374, + 0.392, + 0.3839, + 0.3977, + 0.3956, + 0.3924, + 0.3957, + 0.3936, + 0.394, + 0.3912, + 0.3993, + 0.3945, + 0.3978, + 0.3946, + 0.398, + 0.3963, + 0.3976, + 0.3972, + 0.3971, + 0.3984, + 0.3988, + 0.3958, + 0.4012, + 0.4042, + 0.4017, + 0.3974, + 0.3975, + 0.4052, + 0.4061, + 0.4066, + 0.4056, + 0.4057, + 0.406, + 0.4017, + 0.4026, + 0.4095, + 0.4043, + 0.4072, + 0.4077, + 0.4031, + 0.4083, + 0.407, + 0.4077, + 0.4099, + 0.4067, + 0.4065, + 0.4057, + 0.4092, + 0.4093, + 0.4055, + 0.4097, + 0.4081, + 0.4083, + 0.407, + 0.407, + 0.4086, + 0.4084, + 0.4074, + 0.4073, + 0.4072, + 0.4063, + 0.4075, + 0.407, + 0.4074, + 0.4075, + 0.4076 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.0210530087351799, + 0.03225391358137131, + 0.030373934656381607, + -0.01976676471531391, + -0.039145730435848236, + -0.07764401286840439, + -0.04457290470600128, + -0.0037843044847249985, + -0.0476202666759491, + -0.000595096789766103, + 0.02137608267366886, + 0.995658278465271 + ], + "perturbation_rho": [ + -0.0567036010324955, + -0.0030030906200408936, + -0.022865712642669678, + -0.009168766438961029, + -0.0182923823595047, + 0.03648798167705536, + 0.04260994493961334, + 0.0037925627548247576, + -0.03891247510910034, + -0.052096571773290634, + -0.05355001986026764, + 0.005782400257885456 + ], + "nudging": { + "0.001": [ + -7.315538823604584e-07, + -1.1478550732135773e-07, + -6.658956408500671e-08, + 7.450580596923828e-09, + 3.702007234096527e-08, + 5.634501576423645e-08, + 5.51808625459671e-08, + 9.080395102500916e-09, + 2.7241185307502747e-08, + -1.3969838619232178e-09, + -3.3527612686157227e-08, + -7.827766239643097e-07 + ], + "0.003": [ + -2.304092049598694e-06, + -3.343448042869568e-07, + -1.4924444258213043e-07, + 1.979060471057892e-08, + 1.2828968465328217e-07, + 2.391170710325241e-07, + 1.1990778148174286e-07, + -4.959292709827423e-08, + 1.4039687812328339e-07, + -6.752088665962219e-09, + -3.050081431865692e-08, + -2.78581865131855e-06 + ], + "0.01": [ + -7.684342563152313e-06, + -1.07521191239357e-06, + -4.4563785195350647e-07, + 1.5692785382270813e-07, + 3.7904828786849976e-07, + 7.476191967725754e-07, + 4.507601261138916e-07, + -3.306195139884949e-08, + 5.138572305440903e-07, + 2.9569491744041443e-08, + -2.5634653866291046e-07, + -9.92906279861927e-06 + ] + }, + "hidden_norms_per_layer": [ + 7711.2001953125, + 86607.8359375, + 328385.03125, + 717089.75, + 1100718.375, + 1366338.125, + 1659771.0, + 1846383.5, + 2026219.75, + 2144626.25, + 2170055.0, + 2174719.5, + 1710646.75 + ], + "bp_grad_norms_per_layer": [ + 2.5522365831420757e-05, + 1.6940738305493142e-06, + 5.418342539087462e-07, + 4.801818249688949e-07, + 4.7660145696681866e-07, + 4.771524686475459e-07, + 4.772307420353172e-07, + 4.777210733664106e-07, + 4.776471200784727e-07, + 4.787051466337289e-07, + 4.790993557435286e-07, + 4.780670792570163e-07, + 4.5420676997309783e-07 + ] + }, + "drift": { + "embed.weight": 51.639575695075386, + "embed.bias": 13.420626933240067, + "blocks.0.ln.weight": 1.165106572944437, + "blocks.0.w1.weight": 17.074257617816983, + "blocks.0.w1.bias": 12.55405788253026, + "blocks.0.w2.weight": 56.84408822778898, + "blocks.1.ln.weight": 1.0814484489664455, + "blocks.1.w1.weight": 20.255692646568928, + "blocks.1.w1.bias": 12.590175436666444, + "blocks.1.w2.weight": 48.43073026110698, + "blocks.2.ln.weight": 0.9210710896806559, + "blocks.2.w1.weight": 20.147092711724806, + "blocks.2.w1.bias": 16.898392906551216, + "blocks.2.w2.weight": 46.45488641124572, + "blocks.3.ln.weight": 0.6699388356494572, + "blocks.3.w1.weight": 19.04886241071522, + "blocks.3.w1.bias": 18.822886783231823, + "blocks.3.w2.weight": 35.62237142992444, + "blocks.4.ln.weight": 0.5724221532943284, + "blocks.4.w1.weight": 18.93452177775512, + "blocks.4.w1.bias": 18.487677483264793, + "blocks.4.w2.weight": 30.11841300499853, + "blocks.5.ln.weight": 0.5318503739647785, + "blocks.5.w1.weight": 19.752732313152933, + "blocks.5.w1.bias": 21.012803450001744, + "blocks.5.w2.weight": 26.781939546320714, + "blocks.6.ln.weight": 0.4938677177304124, + "blocks.6.w1.weight": 18.79282421249984, + "blocks.6.w1.bias": 20.24614504362752, + "blocks.6.w2.weight": 23.954722502437257, + "blocks.7.ln.weight": 0.520902940610959, + "blocks.7.w1.weight": 19.15384116500715, + "blocks.7.w1.bias": 20.22154003333216, + "blocks.7.w2.weight": 28.09055042594723, + "blocks.8.ln.weight": 0.5342907608290218, + "blocks.8.w1.weight": 18.38918909035725, + "blocks.8.w1.bias": 18.8314837769314, + "blocks.8.w2.weight": 31.203969055913323, + "blocks.9.ln.weight": 0.5198835347685796, + "blocks.9.w1.weight": 17.551735719168683, + "blocks.9.w1.bias": 17.643006584631404, + "blocks.9.w2.weight": 50.56607469544128, + "blocks.10.ln.weight": 0.4499630077104289, + "blocks.10.w1.weight": 15.14684109843433, + "blocks.10.w1.bias": 12.774970121083086, + "blocks.10.w2.weight": 53.261273057743395, + "blocks.11.ln.weight": 0.4529064172202585, + "blocks.11.w1.weight": 17.52348612725844, + "blocks.11.w1.bias": 15.699238779618904, + "blocks.11.w2.weight": 54.34198491601119, + "out_ln.weight": 0.38172881823690796, + "out_head.weight": 6.667353449848178, + "out_head.bias": 0.7683726336162054 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 12, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 6 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L12_seed6", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L12_seed7/results_cifar10.json b/results/fa_dfa_d512_L12_seed7/results_cifar10.json new file mode 100644 index 0000000..d1c18ce --- /dev/null +++ b/results/fa_dfa_d512_L12_seed7/results_cifar10.json @@ -0,0 +1,969 @@ +{ + "7": { + "dfa": { + "log": { + "train_loss": [ + 2.0717703549194337, + 2.041298008880615, + 2.029167268066406, + 2.0265366765975954, + 2.0233016680145264, + 2.021149669342041, + 2.015149740600586, + 2.014331086883545, + 2.011163010597229, + 2.00646972366333, + 2.0077928015899658, + 2.0069499212265014, + 2.0002946398925783, + 2.002099399795532, + 1.9983450998687744, + 1.9959249508666992, + 1.9950883417510987, + 1.9943942831802368, + 1.993684328918457, + 1.9930962839508057, + 1.9938797711181642, + 1.986891388015747, + 1.9910820366668702, + 1.9907979718780517, + 1.989411315689087, + 1.9852583177947998, + 1.981110195388794, + 1.9853213675308228, + 1.9821887520599366, + 1.9824637595367431, + 1.9810781491470337, + 1.978963046951294, + 1.9793350392913818, + 1.9822494831848145, + 1.9770296029281615, + 1.9799606243515016, + 1.979586162147522, + 1.978336156539917, + 1.9767711865234374, + 1.9781431746673583, + 1.97637092502594, + 1.9791246725845337, + 1.9776694247055053, + 1.9774175772857665, + 1.974560000038147, + 1.97618024269104, + 1.9750861001968383, + 1.9740860368347168, + 1.975063199005127, + 1.975644574661255, + 1.974392618637085, + 1.976005935974121, + 1.974110288734436, + 1.9724776383209228, + 1.9728016277313232, + 1.973982884864807, + 1.972937359275818, + 1.971426241798401, + 1.970759379310608, + 1.97244817237854, + 1.9711870779800416, + 1.9723809194564819, + 1.970773144493103, + 1.9708155387115478, + 1.9702037267684938, + 1.9704236566925049, + 1.9704266147613525, + 1.9674897221374512, + 1.969771149520874, + 1.9684535709381104, + 1.972261046295166, + 1.9708796706390381, + 1.9701639178848267, + 1.970189701461792, + 1.9690214687728882, + 1.970269794769287, + 1.9686412525177002, + 1.9686531394195557, + 1.9690335285186769, + 1.9683185077667236, + 1.9699563415527344, + 1.9683909454345703, + 1.9685154306793213, + 1.966210771751404, + 1.969103935775757, + 1.9681334160614015, + 1.9663462246322632, + 1.9694912530899047, + 1.9677201830291748, + 1.9683091250610352, + 1.9689178394317628, + 1.966240083847046, + 1.968869041519165, + 1.96721680809021, + 1.9639848593139648, + 1.9679099475097657, + 1.9662394170379638, + 1.966985820236206, + 1.9662641632843016, + 1.9668700101089478 + ], + "train_acc": [ + 0.23978, + 0.24322, + 0.25136, + 0.25116, + 0.25264, + 0.25374, + 0.26024, + 0.2603, + 0.26, + 0.26422, + 0.26226, + 0.26102, + 0.26648, + 0.2665, + 0.26764, + 0.26976, + 0.27012, + 0.27084, + 0.27144, + 0.27138, + 0.26928, + 0.2735, + 0.27452, + 0.27408, + 0.27312, + 0.276, + 0.27856, + 0.27458, + 0.27752, + 0.27688, + 0.27972, + 0.2782, + 0.2808, + 0.27646, + 0.28084, + 0.28012, + 0.2808, + 0.2793, + 0.28086, + 0.2807, + 0.27956, + 0.27968, + 0.28138, + 0.28184, + 0.28282, + 0.28208, + 0.28344, + 0.2835, + 0.27944, + 0.28146, + 0.28096, + 0.28186, + 0.28142, + 0.2833, + 0.28372, + 0.28258, + 0.28264, + 0.28458, + 0.28644, + 0.2822, + 0.2842, + 0.28418, + 0.2849, + 0.28526, + 0.28614, + 0.28488, + 0.2833, + 0.28626, + 0.28546, + 0.28682, + 0.28376, + 0.2851, + 0.28242, + 0.28406, + 0.28672, + 0.28582, + 0.28482, + 0.28584, + 0.28422, + 0.28666, + 0.28424, + 0.28532, + 0.28416, + 0.28974, + 0.28814, + 0.28682, + 0.28648, + 0.28618, + 0.28602, + 0.2883, + 0.2841, + 0.28796, + 0.28582, + 0.28556, + 0.29042, + 0.28562, + 0.28642, + 0.28704, + 0.28854, + 0.28752 + ], + "test_acc": [ + 0.2673, + 0.2593, + 0.2397, + 0.2765, + 0.284, + 0.2617, + 0.2814, + 0.2855, + 0.277, + 0.2629, + 0.287, + 0.2628, + 0.2838, + 0.2918, + 0.29, + 0.3014, + 0.2878, + 0.2773, + 0.2764, + 0.299, + 0.2977, + 0.2922, + 0.2967, + 0.2866, + 0.2876, + 0.2988, + 0.296, + 0.3046, + 0.3027, + 0.2977, + 0.3168, + 0.2957, + 0.2942, + 0.3079, + 0.2781, + 0.3022, + 0.294, + 0.3143, + 0.3169, + 0.3066, + 0.3105, + 0.3005, + 0.3035, + 0.3135, + 0.3035, + 0.2983, + 0.2928, + 0.3032, + 0.299, + 0.3029, + 0.2978, + 0.3047, + 0.3054, + 0.2984, + 0.3004, + 0.3, + 0.3146, + 0.3113, + 0.3079, + 0.3115, + 0.3101, + 0.3124, + 0.3111, + 0.3014, + 0.3064, + 0.3106, + 0.3105, + 0.3112, + 0.3019, + 0.3067, + 0.3096, + 0.2973, + 0.3033, + 0.3053, + 0.3119, + 0.3105, + 0.3093, + 0.3053, + 0.31, + 0.3055, + 0.3091, + 0.3076, + 0.3069, + 0.3131, + 0.3058, + 0.3119, + 0.3091, + 0.3084, + 0.3126, + 0.3089, + 0.3102, + 0.3105, + 0.31, + 0.3113, + 0.309, + 0.3103, + 0.3103, + 0.3109, + 0.3108, + 0.3108 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.4225231409072876, + 0.000332531810272485, + -0.00028742029098793864, + -0.0005942026618868113, + 0.0011352845467627048, + 5.1041941333096474e-05, + 4.8770005378173664e-05, + -0.0001115800259867683, + -0.00025915325386449695, + -0.0001579285744810477, + -0.00034282656270079315, + -0.001362925162538886 + ], + "perturbation_rho": [ + 0.005214178003370762, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -6.705522537231445e-07, + -1.862645149230957e-09, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.5972182154655457e-06, + 5.587935447692871e-09, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -5.433335900306702e-06, + 0.0, + 3.725290298461914e-09, + 0.0, + -1.862645149230957e-09, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 52073.1640625, + 624600448.0, + 2354930944.0, + 3962745600.0, + 4607439360.0, + 6940876800.0, + 10061239296.0, + 10270245888.0, + 10501487616.0, + 10991346688.0, + 11392520192.0, + 11497131008.0, + 11767929856.0 + ], + "bp_grad_norms_per_layer": [ + 3.116530820079788e-07, + 2.391457587513912e-10, + 2.334610560428274e-10, + 2.3252627601166864e-10, + 2.3254012604390084e-10, + 2.3251603420426648e-10, + 2.3252098857451386e-10, + 2.324976183798455e-10, + 2.3242860414107724e-10, + 2.3244653424292494e-10, + 2.3248170055722994e-10, + 2.3248254710228622e-10, + 2.324954950783109e-10 + ] + }, + "drift": { + "embed.weight": 314.5708156367549, + "embed.bias": 173.06181327916602, + "blocks.0.ln.weight": 10.608293168088462, + "blocks.0.w1.weight": 248.85531881291706, + "blocks.0.w1.bias": 197.42523217269257, + "blocks.0.w2.weight": 461.7533133123923, + "blocks.1.ln.weight": 8.793695977749161, + "blocks.1.w1.weight": 307.77495139687693, + "blocks.1.w1.bias": 271.79664056051047, + "blocks.1.w2.weight": 356.28012985496673, + "blocks.2.ln.weight": 8.954547910557306, + "blocks.2.w1.weight": 349.2950006278737, + "blocks.2.w1.bias": 313.10534483438977, + "blocks.2.w2.weight": 340.97526886427016, + "blocks.3.ln.weight": 7.664064660684567, + "blocks.3.w1.weight": 303.1221009157576, + "blocks.3.w1.bias": 276.7648976947458, + "blocks.3.w2.weight": 276.8184093796655, + "blocks.4.ln.weight": 10.090295578382477, + "blocks.4.w1.weight": 416.58752319563774, + "blocks.4.w1.bias": 396.4529594058789, + "blocks.4.w2.weight": 411.8714989953588, + "blocks.5.ln.weight": 11.222968212087666, + "blocks.5.w1.weight": 460.41052960846133, + "blocks.5.w1.bias": 446.32020235508855, + "blocks.5.w2.weight": 461.9898764832566, + "blocks.6.ln.weight": 9.029697428612298, + "blocks.6.w1.weight": 363.0948995365021, + "blocks.6.w1.bias": 335.6612472751257, + "blocks.6.w2.weight": 358.45859133995083, + "blocks.7.ln.weight": 8.68285178731722, + "blocks.7.w1.weight": 347.30737462136295, + "blocks.7.w1.bias": 319.98700238133193, + "blocks.7.w2.weight": 330.4186701898697, + "blocks.8.ln.weight": 8.786812217157763, + "blocks.8.w1.weight": 347.961851087332, + "blocks.8.w1.bias": 341.22606981083646, + "blocks.8.w2.weight": 315.32952774172026, + "blocks.9.ln.weight": 9.311416409774596, + "blocks.9.w1.weight": 343.96115770328134, + "blocks.9.w1.bias": 321.3536311695321, + "blocks.9.w2.weight": 315.28472186823876, + "blocks.10.ln.weight": 6.752703446423726, + "blocks.10.w1.weight": 265.99193594775096, + "blocks.10.w1.bias": 250.46671872343975, + "blocks.10.w2.weight": 243.26857071036574, + "blocks.11.ln.weight": 9.311969376436522, + "blocks.11.w1.weight": 375.3561731401925, + "blocks.11.w1.bias": 348.39190827099014, + "blocks.11.w2.weight": 350.6780634465036, + "out_ln.weight": 0.7156480208724685, + "out_head.weight": 9.90130634348424, + "out_head.bias": 0.6155786911344596 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.030823752403259, + 1.934500171585083, + 1.9079237392807007, + 1.8947180710220337, + 1.8812753409576417, + 1.8702804034423828, + 1.8603069189834596, + 1.854407600440979, + 1.8499454795074464, + 1.8409444109344482, + 1.8404085549545288, + 1.8384266778182983, + 1.8317015438842774, + 1.8293640671157836, + 1.8229795181655883, + 1.8220855783081054, + 1.8241054327392578, + 1.8256892261123656, + 1.825209083328247, + 1.8221923596572875, + 1.827516168899536, + 1.8242645990371704, + 1.8307866805648805, + 1.8350352270126342, + 1.8312144650268554, + 1.8282124835205078, + 1.822820876083374, + 1.8260399611663818, + 1.8204992460632323, + 1.8168495791625976, + 1.810229619064331, + 1.811380881576538, + 1.805999729537964, + 1.8007601963043214, + 1.7961979135513306, + 1.7947537366485595, + 1.7928297024536133, + 1.788902147216797, + 1.7878808304214477, + 1.7845082584381105, + 1.7827687833023071, + 1.7805255111694336, + 1.7768453490829468, + 1.7761483609390258, + 1.7748139315414428, + 1.7749142253875732, + 1.7714660557174682, + 1.770267225265503, + 1.7713737234497071, + 1.7634125463867187, + 1.7577275490570068, + 1.7656006281280519, + 1.7626705096435547, + 1.7589925038909913, + 1.756216689376831, + 1.7574205046844482, + 1.7555658932113647, + 1.7512263652801514, + 1.7533771200180053, + 1.7522863424301147, + 1.743942953414917, + 1.7457706503677368, + 1.744990401649475, + 1.7453525008773805, + 1.743213934249878, + 1.7438006609344483, + 1.7438085187530517, + 1.738191604385376, + 1.7366509014129639, + 1.739820227279663, + 1.7397972118377685, + 1.7396622115325928, + 1.7390950318145753, + 1.7385543502044678, + 1.733802723083496, + 1.735742342300415, + 1.7329303577041626, + 1.732147887878418, + 1.7361309533691407, + 1.7338640795516969, + 1.7344183361053467, + 1.7314273851776123, + 1.732653318786621, + 1.726430778465271, + 1.7266450793838501, + 1.7317952347564698, + 1.7268288860321044, + 1.7301931661605836, + 1.729054831314087, + 1.728853748779297, + 1.7290257139587402, + 1.7282057024383546, + 1.7276880645370483, + 1.7283826892089844, + 1.7221429102325438, + 1.7275538332366944, + 1.7293567386627198, + 1.7251165560913087, + 1.727771372756958, + 1.7276002161026 + ], + "train_acc": [ + 0.25468, + 0.29668, + 0.3079, + 0.31446, + 0.31972, + 0.32522, + 0.32856, + 0.32842, + 0.33384, + 0.3401, + 0.33566, + 0.3388, + 0.3399, + 0.34344, + 0.3465, + 0.3477, + 0.34726, + 0.34478, + 0.34704, + 0.34592, + 0.34452, + 0.34292, + 0.34314, + 0.3423, + 0.33992, + 0.34576, + 0.3454, + 0.34362, + 0.3478, + 0.34758, + 0.35328, + 0.35146, + 0.35478, + 0.35634, + 0.35704, + 0.35806, + 0.3582, + 0.35946, + 0.3603, + 0.35836, + 0.36136, + 0.36142, + 0.36514, + 0.36682, + 0.36542, + 0.36722, + 0.36676, + 0.36892, + 0.36556, + 0.3685, + 0.37052, + 0.3698, + 0.37064, + 0.37166, + 0.37218, + 0.37034, + 0.37416, + 0.3751, + 0.37026, + 0.37354, + 0.377, + 0.37554, + 0.3771, + 0.37508, + 0.37768, + 0.37622, + 0.37688, + 0.3797, + 0.37742, + 0.3776, + 0.37718, + 0.37764, + 0.38156, + 0.3803, + 0.38212, + 0.38114, + 0.37938, + 0.38004, + 0.37876, + 0.38, + 0.37908, + 0.37994, + 0.38146, + 0.38242, + 0.38336, + 0.38234, + 0.38252, + 0.3794, + 0.38194, + 0.3836, + 0.3818, + 0.3845, + 0.3839, + 0.38346, + 0.385, + 0.38244, + 0.38364, + 0.38556, + 0.38002, + 0.38376 + ], + "test_acc": [ + 0.312, + 0.3272, + 0.3242, + 0.3601, + 0.3595, + 0.3486, + 0.3611, + 0.3545, + 0.3528, + 0.3443, + 0.3558, + 0.3534, + 0.3612, + 0.3665, + 0.3685, + 0.377, + 0.3552, + 0.3599, + 0.3585, + 0.3623, + 0.3614, + 0.3579, + 0.3684, + 0.3467, + 0.3545, + 0.3635, + 0.3553, + 0.3726, + 0.3762, + 0.3545, + 0.3576, + 0.3804, + 0.3752, + 0.3737, + 0.3726, + 0.3765, + 0.3783, + 0.3783, + 0.3863, + 0.3813, + 0.3837, + 0.3841, + 0.3885, + 0.3917, + 0.3897, + 0.3896, + 0.3819, + 0.3844, + 0.394, + 0.391, + 0.3903, + 0.3962, + 0.3984, + 0.3917, + 0.3958, + 0.3898, + 0.3995, + 0.3985, + 0.3973, + 0.3971, + 0.3961, + 0.3993, + 0.4031, + 0.396, + 0.3996, + 0.4015, + 0.4009, + 0.4025, + 0.4007, + 0.4015, + 0.4021, + 0.397, + 0.3991, + 0.405, + 0.4053, + 0.4045, + 0.4016, + 0.4024, + 0.4055, + 0.4033, + 0.4033, + 0.4071, + 0.4049, + 0.4068, + 0.4048, + 0.4049, + 0.4049, + 0.4039, + 0.4045, + 0.4055, + 0.4063, + 0.4046, + 0.406, + 0.4066, + 0.4061, + 0.4062, + 0.4059, + 0.406, + 0.4061, + 0.4062 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.010068703442811966, + 0.1245778352022171, + 0.09253278374671936, + 0.08268401026725769, + 0.05013114959001541, + 0.041134029626846313, + 0.017736738547682762, + 0.00011690100654959679, + -0.07415217161178589, + 0.024469580501317978, + -0.045304883271455765, + 0.9951483011245728 + ], + "perturbation_rho": [ + -0.006299033761024475, + 0.014822498895227909, + 0.003820352256298065, + 0.018621522933244705, + -0.0024977736175060272, + -0.0020938459783792496, + -0.018873507156968117, + 0.056286461651325226, + 0.008058521896600723, + 0.012508060783147812, + 0.03497573360800743, + -0.005283009260892868 + ], + "nudging": { + "0.001": [ + -8.329516276717186e-07, + -4.055909812450409e-07, + -1.9476283341646194e-07, + -1.578591763973236e-07, + -7.811468094587326e-08, + -4.6566128730773926e-08, + -2.0256265997886658e-08, + 1.7113052308559418e-08, + 2.9802322387695312e-08, + 6.984919309616089e-10, + 1.280568540096283e-08, + -5.175825208425522e-07 + ], + "0.003": [ + -2.361135557293892e-06, + -1.1622905731201172e-06, + -4.988396540284157e-07, + -3.294553607702255e-07, + -1.5029218047857285e-07, + -1.0221265256404877e-07, + -3.748573362827301e-08, + 1.4202669262886047e-08, + 1.280568540096283e-07, + -5.366746336221695e-08, + 6.51925802230835e-08, + -1.7427373677492142e-06 + ], + "0.01": [ + -7.986207492649555e-06, + -3.8052676245570183e-06, + -1.7613638192415237e-06, + -9.794021025300026e-07, + -5.171168595552444e-07, + -2.832384780049324e-07, + -1.4971010386943817e-07, + -7.182825356721878e-08, + 4.4121406972408295e-07, + -2.468004822731018e-07, + 2.0524021238088608e-07, + -6.320537067949772e-06 + ] + }, + "hidden_norms_per_layer": [ + 7009.81494140625, + 120237.1953125, + 234765.75, + 332169.78125, + 346779.53125, + 390112.375, + 463708.90625, + 668915.8125, + 1122988.5, + 1967430.625, + 1988171.0, + 2240154.75, + 1633562.75 + ], + "bp_grad_norms_per_layer": [ + 2.4624005163786933e-05, + 1.2478232065404882e-06, + 6.924460080881545e-07, + 5.278115509099734e-07, + 4.257075261193677e-07, + 3.652528164366231e-07, + 3.4045956454065163e-07, + 3.226113562959654e-07, + 3.1934513344822335e-07, + 3.1868384553490614e-07, + 3.1644472642256005e-07, + 3.1672169598095934e-07, + 3.163899009450688e-07 + ] + }, + "drift": { + "embed.weight": 47.005374347880874, + "embed.bias": 13.243067966808145, + "blocks.0.ln.weight": 1.2686282540844722, + "blocks.0.w1.weight": 17.553869325595105, + "blocks.0.w1.bias": 13.820052496499288, + "blocks.0.w2.weight": 60.50343593281807, + "blocks.1.ln.weight": 0.9603227875454038, + "blocks.1.w1.weight": 17.719833102219965, + "blocks.1.w1.bias": 8.605211656313518, + "blocks.1.w2.weight": 46.36885490302975, + "blocks.2.ln.weight": 0.8397655185331075, + "blocks.2.w1.weight": 17.32854478894422, + "blocks.2.w1.bias": 8.14502863318518, + "blocks.2.w2.weight": 46.8756195290007, + "blocks.3.ln.weight": 0.8514772849977531, + "blocks.3.w1.weight": 17.625201649093363, + "blocks.3.w1.bias": 4.891298958534819, + "blocks.3.w2.weight": 49.35104323212137, + "blocks.4.ln.weight": 0.7181238110900667, + "blocks.4.w1.weight": 17.236589560962, + "blocks.4.w1.bias": 6.048278381119212, + "blocks.4.w2.weight": 37.01349206570791, + "blocks.5.ln.weight": 0.7889239428916937, + "blocks.5.w1.weight": 18.624277821727198, + "blocks.5.w1.bias": 8.232516711563722, + "blocks.5.w2.weight": 34.44857991057927, + "blocks.6.ln.weight": 0.9087775281092089, + "blocks.6.w1.weight": 20.478021505238107, + "blocks.6.w1.bias": 9.991577823388699, + "blocks.6.w2.weight": 41.635213960063496, + "blocks.7.ln.weight": 0.8672691193491717, + "blocks.7.w1.weight": 23.031638161138872, + "blocks.7.w1.bias": 15.432108567671811, + "blocks.7.w2.weight": 30.494694204720314, + "blocks.8.ln.weight": 0.9279180982195232, + "blocks.8.w1.weight": 25.623083539907853, + "blocks.8.w1.bias": 21.104273424027447, + "blocks.8.w2.weight": 30.431336626547836, + "blocks.9.ln.weight": 0.6368710077977572, + "blocks.9.w1.weight": 17.55317251649144, + "blocks.9.w1.bias": 12.498837888749929, + "blocks.9.w2.weight": 63.802683742207456, + "blocks.10.ln.weight": 0.6261645701278474, + "blocks.10.w1.weight": 19.149302928700966, + "blocks.10.w1.bias": 19.63354445246865, + "blocks.10.w2.weight": 28.244603543415494, + "blocks.11.ln.weight": 0.7596072689153585, + "blocks.11.w1.weight": 19.668235342459408, + "blocks.11.w1.bias": 15.490287106269353, + "blocks.11.w2.weight": 55.03718141211597, + "out_ln.weight": 0.33939420479137183, + "out_head.weight": 5.440112536505558, + "out_head.bias": 2.010159660293023 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 12, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 7 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L12_seed7", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L12_seed8/results_cifar10.json b/results/fa_dfa_d512_L12_seed8/results_cifar10.json new file mode 100644 index 0000000..8d804da --- /dev/null +++ b/results/fa_dfa_d512_L12_seed8/results_cifar10.json @@ -0,0 +1,969 @@ +{ + "8": { + "dfa": { + "log": { + "train_loss": [ + 2.0621551152038573, + 2.032111440887451, + 2.026601074066162, + 2.025331893157959, + 2.020625417556763, + 2.0179995822143555, + 2.0146445192718505, + 2.0131945146179198, + 2.0136738064575197, + 2.0107804653549195, + 2.0097129691314697, + 2.0098247957611086, + 2.0069902968597413, + 2.0106662986755373, + 2.005746477012634, + 2.006078488845825, + 2.0048853853988646, + 2.0049549353027345, + 2.000735955581665, + 2.0011458109283446, + 2.000437735824585, + 1.997751926422119, + 2.0025174723052976, + 1.998807412185669, + 2.000533748397827, + 1.9959854383087159, + 1.9978079619979858, + 1.9958966563415528, + 1.996048821105957, + 1.9932382696533204, + 1.9939029093170166, + 1.9939674211120606, + 1.993965982017517, + 1.9950546265411377, + 1.9910492811584473, + 1.9932873993301392, + 1.9934390425109862, + 1.9942208531188965, + 1.993705040283203, + 1.9926780229949952, + 1.9916279154205323, + 1.9933884371566772, + 1.9898671300506592, + 1.989496806678772, + 1.990106160736084, + 1.9908844341278076, + 1.9897770419311522, + 1.9911138761901856, + 1.9896133652496337, + 1.9886156379699707, + 1.989099236602783, + 1.9889647799682617, + 1.987695361251831, + 1.987419519920349, + 1.9892369752502441, + 1.9867283428955078, + 1.985670655517578, + 1.985864746055603, + 1.9890831967163085, + 1.986451468811035, + 1.98608133934021, + 1.986669381942749, + 1.9860679926300049, + 1.9857238763046265, + 1.9861250590515136, + 1.9867476695251465, + 1.9855187320709229, + 1.9870084239959718, + 1.9870807974243163, + 1.9854985891723633, + 1.9862063644790648, + 1.986035345840454, + 1.9848036085510254, + 1.9857490323638916, + 1.9848638201141358, + 1.9828469595336915, + 1.984784072341919, + 1.986132272644043, + 1.9855829830932616, + 1.9845155084228516, + 1.9863489217376709, + 1.983173909263611, + 1.9847177695465088, + 1.985843783493042, + 1.9844750480651856, + 1.9835583266448975, + 1.9832866216278076, + 1.9824517697906494, + 1.9833141297912598, + 1.9830520043182374, + 1.9822975007629395, + 1.9850699710464477, + 1.9854454914855957, + 1.984017345199585, + 1.9839908868408203, + 1.9843200086212158, + 1.9820627519989014, + 1.9816079531478883, + 1.9821515436553956, + 1.9855904679107665 + ], + "train_acc": [ + 0.2372, + 0.24852, + 0.25134, + 0.25132, + 0.2549, + 0.25304, + 0.25902, + 0.26106, + 0.25824, + 0.2579, + 0.26096, + 0.25852, + 0.26276, + 0.2618, + 0.2637, + 0.26306, + 0.26492, + 0.26136, + 0.26604, + 0.26754, + 0.26836, + 0.26768, + 0.26738, + 0.26802, + 0.26928, + 0.26922, + 0.26836, + 0.2679, + 0.2719, + 0.27114, + 0.26944, + 0.27052, + 0.2709, + 0.26888, + 0.26714, + 0.27096, + 0.2726, + 0.27154, + 0.27206, + 0.27296, + 0.27414, + 0.27206, + 0.27384, + 0.27372, + 0.27318, + 0.27372, + 0.27472, + 0.27246, + 0.27264, + 0.27428, + 0.27502, + 0.27448, + 0.2742, + 0.27528, + 0.275, + 0.27736, + 0.2766, + 0.2759, + 0.27522, + 0.27712, + 0.27626, + 0.27468, + 0.27628, + 0.27488, + 0.27464, + 0.27474, + 0.27628, + 0.2782, + 0.27526, + 0.27606, + 0.27564, + 0.27476, + 0.27602, + 0.27802, + 0.2757, + 0.27936, + 0.27668, + 0.27736, + 0.2746, + 0.27752, + 0.27548, + 0.27848, + 0.2752, + 0.27484, + 0.27662, + 0.27726, + 0.2752, + 0.27864, + 0.27702, + 0.27936, + 0.27774, + 0.27604, + 0.27712, + 0.27604, + 0.27688, + 0.27666, + 0.27784, + 0.27676, + 0.27736, + 0.27638 + ], + "test_acc": [ + 0.2412, + 0.2706, + 0.2562, + 0.262, + 0.2857, + 0.2628, + 0.2833, + 0.2782, + 0.2575, + 0.2859, + 0.2873, + 0.267, + 0.288, + 0.2774, + 0.2724, + 0.2759, + 0.2718, + 0.2937, + 0.2699, + 0.2666, + 0.2763, + 0.2678, + 0.299, + 0.2876, + 0.2835, + 0.2979, + 0.28, + 0.2829, + 0.2768, + 0.2923, + 0.281, + 0.2884, + 0.2948, + 0.2862, + 0.2896, + 0.2881, + 0.2871, + 0.2895, + 0.2905, + 0.298, + 0.2897, + 0.2784, + 0.2898, + 0.2972, + 0.2879, + 0.2895, + 0.2881, + 0.2884, + 0.2944, + 0.2935, + 0.3039, + 0.2886, + 0.2993, + 0.2857, + 0.2931, + 0.2941, + 0.2933, + 0.3021, + 0.2938, + 0.2991, + 0.2841, + 0.2896, + 0.2945, + 0.2911, + 0.3, + 0.2963, + 0.2894, + 0.2908, + 0.2917, + 0.2946, + 0.2927, + 0.2946, + 0.296, + 0.3, + 0.2935, + 0.2898, + 0.2955, + 0.2929, + 0.2928, + 0.2992, + 0.2931, + 0.2954, + 0.2933, + 0.2924, + 0.2933, + 0.2925, + 0.2919, + 0.2941, + 0.2941, + 0.2946, + 0.2927, + 0.2945, + 0.294, + 0.2931, + 0.2945, + 0.2935, + 0.2943, + 0.2943, + 0.2946, + 0.2946 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3852921724319458, + -0.0006387766916304827, + 0.00021705820108763874, + -0.00021593061683233827, + -0.0001868726685643196, + 2.6933841581922024e-05, + 0.00016013934509828687, + 7.406133954646066e-05, + -3.145005030091852e-05, + 0.0004791871178895235, + 0.0008787637343630195, + -0.0007018762989901006 + ], + "perturbation_rho": [ + 0.01893431320786476, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -5.145557224750519e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.3075768947601318e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -4.1211023926734924e-06, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 55220.6953125, + 703320128.0, + 4374171648.0, + 4628753408.0, + 5259584512.0, + 5602404864.0, + 6980004864.0, + 7249905664.0, + 7601436160.0, + 8436634624.0, + 9617230848.0, + 9769342976.0, + 10366923776.0 + ], + "bp_grad_norms_per_layer": [ + 2.842145931936102e-07, + 1.8565524118052679e-10, + 1.8530457723819893e-10, + 1.8531473577887425e-10, + 1.852697301130135e-10, + 1.852322323303568e-10, + 1.8537825441367062e-10, + 1.8532649026514747e-10, + 1.8534035417516748e-10, + 1.8526735701129837e-10, + 1.85539111852151e-10, + 1.8548607094714953e-10, + 1.8552165359508876e-10 + ] + }, + "drift": { + "embed.weight": 331.10430617288347, + "embed.bias": 252.9571084710531, + "blocks.0.ln.weight": 10.63590306325829, + "blocks.0.w1.weight": 263.71773941078897, + "blocks.0.w1.bias": 240.90214072476394, + "blocks.0.w2.weight": 484.24212277052584, + "blocks.1.ln.weight": 8.994925940820934, + "blocks.1.w1.weight": 375.0257234388459, + "blocks.1.w1.bias": 372.81964220264507, + "blocks.1.w2.weight": 399.05181196420534, + "blocks.2.ln.weight": 6.785381078745234, + "blocks.2.w1.weight": 237.77081904994165, + "blocks.2.w1.bias": 215.00165533028638, + "blocks.2.w2.weight": 243.766610543445, + "blocks.3.ln.weight": 8.749875392840245, + "blocks.3.w1.weight": 331.37899522969747, + "blocks.3.w1.bias": 315.8500446097451, + "blocks.3.w2.weight": 312.3537327104354, + "blocks.4.ln.weight": 8.073930968349993, + "blocks.4.w1.weight": 291.9215675635427, + "blocks.4.w1.bias": 265.90446228164745, + "blocks.4.w2.weight": 273.96062778369446, + "blocks.5.ln.weight": 9.487609121353676, + "blocks.5.w1.weight": 379.3472655416328, + "blocks.5.w1.bias": 364.18802309088414, + "blocks.5.w2.weight": 375.20829167636464, + "blocks.6.ln.weight": 7.271832594737899, + "blocks.6.w1.weight": 274.22615021165615, + "blocks.6.w1.bias": 253.82742699623324, + "blocks.6.w2.weight": 263.6858849060695, + "blocks.7.ln.weight": 8.060212000337511, + "blocks.7.w1.weight": 280.8757929768, + "blocks.7.w1.bias": 262.00267937269797, + "blocks.7.w2.weight": 269.2018115874501, + "blocks.8.ln.weight": 9.34446156262141, + "blocks.8.w1.weight": 362.0470567061417, + "blocks.8.w1.bias": 345.9730629510181, + "blocks.8.w2.weight": 346.1165305240824, + "blocks.9.ln.weight": 10.547709601128119, + "blocks.9.w1.weight": 421.4247930203293, + "blocks.9.w1.bias": 384.7717987562065, + "blocks.9.w2.weight": 391.67325768744075, + "blocks.10.ln.weight": 7.98341504623973, + "blocks.10.w1.weight": 310.77250388889735, + "blocks.10.w1.bias": 289.60855856986865, + "blocks.10.w2.weight": 285.0024842791926, + "blocks.11.ln.weight": 9.5014629592837, + "blocks.11.w1.weight": 374.13082807575006, + "blocks.11.w1.bias": 348.0579252323944, + "blocks.11.w2.weight": 345.1186750982964, + "out_ln.weight": 0.6314801307530133, + "out_head.weight": 8.99875779950455, + "out_head.bias": 0.46869532178721607 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0407437321472166, + 1.9698008823394775, + 1.9470496474838257, + 1.928373962173462, + 1.9022277151870728, + 1.8882839569091796, + 1.8752377725219727, + 1.8682713878631592, + 1.8643299547576904, + 1.8616547422027587, + 1.861611949081421, + 1.8620045602416992, + 1.8579721353149414, + 1.8603198122406006, + 1.852560590248108, + 1.850361158065796, + 1.8412007814788818, + 1.8371962604141234, + 1.830751480064392, + 1.830014287109375, + 1.8211512073135376, + 1.8158404816055298, + 1.8152668701553345, + 1.8068659871673585, + 1.807379986190796, + 1.8042517069244384, + 1.8004914111709596, + 1.7970869720077514, + 1.7892543896484374, + 1.786518664894104, + 1.7887954705047608, + 1.787979825515747, + 1.782947692489624, + 1.7796524175643922, + 1.7780501412963867, + 1.7751860930252075, + 1.77746077003479, + 1.7734027153778076, + 1.7719406731414795, + 1.7689697579574586, + 1.7694388663101197, + 1.7689455612182616, + 1.7627556784820557, + 1.76162135345459, + 1.7598590536880494, + 1.7624462253189086, + 1.7612236563873291, + 1.7604118212127686, + 1.757069122390747, + 1.7525782149887086, + 1.7570673566055297, + 1.753389842262268, + 1.7526489987564087, + 1.7506390865325927, + 1.7564489751434327, + 1.7510275960922241, + 1.7505046075057984, + 1.7489783365249634, + 1.7482558310699463, + 1.7478452098846435, + 1.746708115234375, + 1.7485102898788452, + 1.7510625556182862, + 1.7457116416168212, + 1.7465464822387695, + 1.7432715910339356, + 1.7428231796646119, + 1.7424320980072021, + 1.7456896788787841, + 1.7436157192611694, + 1.7397695401763915, + 1.740845919532776, + 1.7387019988250731, + 1.7427006936645508, + 1.7407404198455811, + 1.7381612093734742, + 1.742300903930664, + 1.741065849647522, + 1.7432577428817748, + 1.7419394709014893, + 1.7393424035644531, + 1.7376308783721923, + 1.7395985202407838, + 1.7360640591812133, + 1.7388901407623292, + 1.7372601049804688, + 1.737554390182495, + 1.7332513166046142, + 1.7339157095718383, + 1.73852820854187, + 1.7378087186050415, + 1.7388774118423462, + 1.732873987159729, + 1.7344641219329835, + 1.7370289881134033, + 1.735849939918518, + 1.731984726486206, + 1.7332192292022706, + 1.7361968645477295, + 1.7349854880142213 + ], + "train_acc": [ + 0.2475, + 0.27976, + 0.29052, + 0.29796, + 0.31032, + 0.31312, + 0.32008, + 0.32376, + 0.3275, + 0.32858, + 0.3309, + 0.33454, + 0.33414, + 0.33476, + 0.33654, + 0.33644, + 0.34346, + 0.3414, + 0.34694, + 0.3445, + 0.34916, + 0.35076, + 0.35364, + 0.35626, + 0.35816, + 0.35708, + 0.35872, + 0.35684, + 0.36218, + 0.36264, + 0.36392, + 0.36252, + 0.36352, + 0.3664, + 0.36594, + 0.36678, + 0.36554, + 0.36668, + 0.36746, + 0.3698, + 0.37054, + 0.36998, + 0.37094, + 0.3744, + 0.3719, + 0.37366, + 0.37076, + 0.3724, + 0.37426, + 0.37956, + 0.3765, + 0.373, + 0.37478, + 0.37612, + 0.37244, + 0.37586, + 0.37716, + 0.37568, + 0.3782, + 0.37812, + 0.37682, + 0.3766, + 0.37616, + 0.3787, + 0.3766, + 0.37772, + 0.38072, + 0.37976, + 0.37842, + 0.3751, + 0.3807, + 0.3796, + 0.37884, + 0.37908, + 0.38054, + 0.38114, + 0.37896, + 0.37942, + 0.3799, + 0.37928, + 0.38292, + 0.37956, + 0.38116, + 0.38178, + 0.38176, + 0.38242, + 0.38104, + 0.38424, + 0.38304, + 0.38046, + 0.3825, + 0.38104, + 0.38308, + 0.38346, + 0.38224, + 0.38272, + 0.38498, + 0.38364, + 0.3829, + 0.3843 + ], + "test_acc": [ + 0.2791, + 0.3084, + 0.3148, + 0.3302, + 0.3397, + 0.3317, + 0.3545, + 0.3557, + 0.3438, + 0.3655, + 0.3703, + 0.3631, + 0.3661, + 0.3705, + 0.3563, + 0.3627, + 0.374, + 0.3834, + 0.3758, + 0.3803, + 0.3716, + 0.3648, + 0.3972, + 0.3934, + 0.3921, + 0.3901, + 0.3811, + 0.3987, + 0.3921, + 0.3942, + 0.3883, + 0.3901, + 0.3797, + 0.3967, + 0.3989, + 0.3944, + 0.3874, + 0.3912, + 0.3979, + 0.3907, + 0.405, + 0.4035, + 0.4043, + 0.4026, + 0.3974, + 0.397, + 0.4022, + 0.4051, + 0.3848, + 0.4045, + 0.4084, + 0.4049, + 0.4029, + 0.3967, + 0.4055, + 0.4057, + 0.406, + 0.4054, + 0.4069, + 0.4042, + 0.3997, + 0.4073, + 0.4117, + 0.406, + 0.4061, + 0.4051, + 0.4068, + 0.4082, + 0.4055, + 0.4063, + 0.4081, + 0.4109, + 0.4081, + 0.4085, + 0.4103, + 0.4019, + 0.4102, + 0.4103, + 0.4103, + 0.4095, + 0.4108, + 0.4093, + 0.408, + 0.4097, + 0.4098, + 0.4075, + 0.4093, + 0.4119, + 0.4105, + 0.4117, + 0.4103, + 0.4113, + 0.4124, + 0.4111, + 0.4111, + 0.4108, + 0.4113, + 0.4122, + 0.4122, + 0.4122 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.03226801007986069, + 0.06280244886875153, + 0.010810771957039833, + -0.03502938896417618, + -0.02679986134171486, + -0.043182265013456345, + -0.04647787660360336, + -0.03189649432897568, + -0.0780816450715065, + -0.05483850836753845, + -0.02444492094218731, + 0.987554669380188 + ], + "perturbation_rho": [ + 0.0010241912677884102, + -0.04057024046778679, + 0.018125081434845924, + 0.0397450290620327, + -0.017151735723018646, + 0.010231071151793003, + 0.02530200220644474, + -0.034604527056217194, + -0.008924206718802452, + 0.008240236900746822, + 0.006566178053617477, + -0.016290105879306793 + ], + "nudging": { + "0.001": [ + -2.0849984139204025e-06, + -2.7567148208618164e-07, + -1.979060471057892e-08, + 8.824281394481659e-08, + 7.82310962677002e-08, + -1.6996636986732483e-08, + 7.520429790019989e-08, + 2.584420144557953e-08, + -8.381903171539307e-09, + 5.820766091346741e-09, + 1.3504177331924438e-08, + -1.0039657354354858e-06 + ], + "0.003": [ + -6.289919838309288e-06, + -7.05476850271225e-07, + -2.3283064365386963e-08, + 1.6996636986732483e-07, + 1.1431984603404999e-07, + 2.3702159523963928e-07, + 1.7462298274040222e-07, + 1.2898817658424377e-07, + 3.096647560596466e-07, + 1.2014061212539673e-07, + 1.3527460396289825e-07, + -3.6957208067178726e-06 + ], + "0.01": [ + -2.0938459783792496e-05, + -2.1811574697494507e-06, + -1.424923539161682e-07, + 4.1816383600234985e-07, + 3.825407475233078e-07, + 5.098991096019745e-07, + 6.461050361394882e-07, + 3.5669654607772827e-07, + 1.1634547263383865e-06, + 6.814952939748764e-07, + 3.4994445741176605e-07, + -1.281173899769783e-05 + ] + }, + "hidden_norms_per_layer": [ + 7157.4072265625, + 94832.078125, + 404715.40625, + 900567.4375, + 1081047.875, + 1230157.0, + 1365586.875, + 1528390.375, + 1618979.25, + 1737319.75, + 1867286.375, + 1900228.625, + 1396480.75 + ], + "bp_grad_norms_per_layer": [ + 2.4219883925979957e-05, + 1.7657896478340263e-06, + 6.163170382933458e-07, + 5.848775117556215e-07, + 5.799907398795767e-07, + 5.837350158799381e-07, + 5.848508521921758e-07, + 5.83334440307226e-07, + 5.784736458736006e-07, + 5.808138325846812e-07, + 5.77644073018746e-07, + 5.686248982783582e-07, + 5.330333578967839e-07 + ] + }, + "drift": { + "embed.weight": 47.13343033798094, + "embed.bias": 17.027552330320233, + "blocks.0.ln.weight": 1.1320890684307203, + "blocks.0.w1.weight": 16.27975609186272, + "blocks.0.w1.bias": 12.808052799935883, + "blocks.0.w2.weight": 51.91947766632093, + "blocks.1.ln.weight": 0.9202706367200378, + "blocks.1.w1.weight": 18.82744877577282, + "blocks.1.w1.bias": 12.653020534114173, + "blocks.1.w2.weight": 46.17134940964603, + "blocks.2.ln.weight": 0.7482719581080549, + "blocks.2.w1.weight": 19.557560479232055, + "blocks.2.w1.bias": 16.890590269942404, + "blocks.2.w2.weight": 39.02075074459777, + "blocks.3.ln.weight": 0.5160212396517895, + "blocks.3.w1.weight": 16.570363915867922, + "blocks.3.w1.bias": 17.214082946851736, + "blocks.3.w2.weight": 30.85580581534199, + "blocks.4.ln.weight": 0.43511517848811326, + "blocks.4.w1.weight": 16.64619937865781, + "blocks.4.w1.bias": 18.15413776267707, + "blocks.4.w2.weight": 29.35744053745253, + "blocks.5.ln.weight": 0.46109930109565794, + "blocks.5.w1.weight": 16.648426948133086, + "blocks.5.w1.bias": 18.405016598411816, + "blocks.5.w2.weight": 32.04993283017616, + "blocks.6.ln.weight": 0.5239951706454276, + "blocks.6.w1.weight": 17.307678602670318, + "blocks.6.w1.bias": 17.94260026627366, + "blocks.6.w2.weight": 35.649016996813835, + "blocks.7.ln.weight": 0.549764892230928, + "blocks.7.w1.weight": 17.37898974454947, + "blocks.7.w1.bias": 17.920017558565252, + "blocks.7.w2.weight": 47.618251557182425, + "blocks.8.ln.weight": 0.5059874235487765, + "blocks.8.w1.weight": 16.71552334731513, + "blocks.8.w1.bias": 18.79123189462688, + "blocks.8.w2.weight": 33.90958537646263, + "blocks.9.ln.weight": 0.5547967624139118, + "blocks.9.w1.weight": 17.469950204626425, + "blocks.9.w1.bias": 17.514572577139017, + "blocks.9.w2.weight": 47.637641198604065, + "blocks.10.ln.weight": 0.4702376652398834, + "blocks.10.w1.weight": 15.609742871134038, + "blocks.10.w1.bias": 14.194008119737171, + "blocks.10.w2.weight": 53.92588624506532, + "blocks.11.ln.weight": 0.5638652470821104, + "blocks.11.w1.weight": 18.329914062311936, + "blocks.11.w1.bias": 16.960111542785533, + "blocks.11.w2.weight": 61.54428413245766, + "out_ln.weight": 0.36613157588839446, + "out_head.weight": 6.704387093335344, + "out_head.bias": 0.6590609045431134 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 12, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 8 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L12_seed8", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L12_seed9/results_cifar10.json b/results/fa_dfa_d512_L12_seed9/results_cifar10.json new file mode 100644 index 0000000..974d42a --- /dev/null +++ b/results/fa_dfa_d512_L12_seed9/results_cifar10.json @@ -0,0 +1,969 @@ +{ + "9": { + "dfa": { + "log": { + "train_loss": [ + 2.086543095245361, + 2.049035365447998, + 2.0382854364776612, + 2.0298845935058596, + 2.0281315744018555, + 2.0237447773742674, + 2.0194822165679933, + 2.019161204986572, + 2.015748862915039, + 2.0162334437561036, + 2.0126681255340575, + 2.012035894165039, + 2.009547615966797, + 2.011429062194824, + 2.0080011304473877, + 2.0085202750396727, + 2.0067744227600097, + 2.0047744031143186, + 2.004743169174194, + 2.001354461517334, + 2.0038469007873534, + 2.0027169017028807, + 1.9995321035003661, + 1.9969835998916625, + 1.9975377194595336, + 1.9985439079284668, + 1.9960253533172607, + 1.9974639236831666, + 1.9950202367401122, + 1.9968991885375977, + 1.9976718412017822, + 1.9939492235565186, + 1.9947053424835206, + 1.995585137901306, + 1.9956871920776367, + 1.9927901021575927, + 1.992329425201416, + 1.9917698760604858, + 1.9922489316558838, + 1.9921125721740722, + 1.9919519045257568, + 1.9915309338378906, + 1.9927355318450928, + 1.9899454330825805, + 1.9910321056365967, + 1.9931654618835448, + 1.9889872648620606, + 1.9899863806915283, + 1.9893665783309937, + 1.989453801651001, + 1.9890669647216797, + 1.9882669693374633, + 1.9893041844177246, + 1.9891512668609619, + 1.9883573954772948, + 1.9891788550186158, + 1.9858848489379883, + 1.9877911152648926, + 1.987854264564514, + 1.9873136006164551, + 1.9871731573486329, + 1.9879692407989502, + 1.987359190750122, + 1.9866330204772948, + 1.98554376373291, + 1.9857093753051758, + 1.986483130493164, + 1.9841259546279908, + 1.98654155418396, + 1.9857370235824585, + 1.9854363512420654, + 1.9837415993881227, + 1.9846706900024413, + 1.9846486893463136, + 1.9852562395477296, + 1.985122490158081, + 1.9856149569702148, + 1.9855838787460327, + 1.985352071609497, + 1.9836603102874757, + 1.984020567855835, + 1.9842208641052246, + 1.9867661368179321, + 1.9825923907852172, + 1.9839700649261474, + 1.985303124923706, + 1.984482919921875, + 1.9825283669281006, + 1.9836111863708497, + 1.9835795726013183, + 1.9827597061920166, + 1.98389965385437, + 1.9854092786407471, + 1.9815458943939208, + 1.9817595046615601, + 1.9855254986190796, + 1.9835386077880859, + 1.9825453355407714, + 1.9826166521835327, + 1.9829287356567382 + ], + "train_acc": [ + 0.22926, + 0.24766, + 0.24966, + 0.25552, + 0.25304, + 0.25922, + 0.25868, + 0.25894, + 0.2602, + 0.26156, + 0.26166, + 0.26588, + 0.26356, + 0.26296, + 0.2648, + 0.26752, + 0.26888, + 0.26516, + 0.2687, + 0.2697, + 0.27084, + 0.26762, + 0.26938, + 0.27176, + 0.2698, + 0.27094, + 0.2722, + 0.2721, + 0.27342, + 0.27132, + 0.27128, + 0.2735, + 0.27358, + 0.27302, + 0.2743, + 0.27702, + 0.27592, + 0.2762, + 0.27586, + 0.27454, + 0.27708, + 0.27588, + 0.27488, + 0.27664, + 0.27778, + 0.27754, + 0.27744, + 0.27638, + 0.2782, + 0.27892, + 0.27888, + 0.27894, + 0.27778, + 0.2784, + 0.2806, + 0.2788, + 0.27942, + 0.27968, + 0.27888, + 0.2771, + 0.27966, + 0.27918, + 0.28102, + 0.2799, + 0.27998, + 0.27866, + 0.28044, + 0.28208, + 0.28314, + 0.28018, + 0.2797, + 0.28004, + 0.2795, + 0.28196, + 0.28128, + 0.2824, + 0.28326, + 0.28176, + 0.28084, + 0.2816, + 0.28268, + 0.28344, + 0.28014, + 0.283, + 0.2824, + 0.28114, + 0.28384, + 0.28424, + 0.28492, + 0.28422, + 0.28338, + 0.28276, + 0.28152, + 0.28358, + 0.28352, + 0.28086, + 0.2832, + 0.28484, + 0.28126, + 0.28462 + ], + "test_acc": [ + 0.2701, + 0.2621, + 0.2798, + 0.281, + 0.2593, + 0.2849, + 0.2797, + 0.2768, + 0.272, + 0.2901, + 0.2986, + 0.2839, + 0.2741, + 0.2916, + 0.2865, + 0.2818, + 0.3006, + 0.2972, + 0.2972, + 0.3049, + 0.2762, + 0.2939, + 0.2754, + 0.2954, + 0.2984, + 0.3, + 0.2821, + 0.2798, + 0.2887, + 0.3018, + 0.2833, + 0.3076, + 0.2891, + 0.291, + 0.2959, + 0.284, + 0.2942, + 0.3106, + 0.2895, + 0.3004, + 0.3048, + 0.3058, + 0.303, + 0.2951, + 0.3031, + 0.2955, + 0.2961, + 0.3036, + 0.3045, + 0.2976, + 0.2995, + 0.2965, + 0.2912, + 0.3007, + 0.2987, + 0.308, + 0.3073, + 0.3, + 0.3117, + 0.2891, + 0.2945, + 0.3064, + 0.2987, + 0.2929, + 0.2984, + 0.2945, + 0.2945, + 0.3028, + 0.2994, + 0.3008, + 0.3, + 0.3069, + 0.3033, + 0.3088, + 0.3052, + 0.3023, + 0.2992, + 0.2946, + 0.305, + 0.3024, + 0.3081, + 0.3014, + 0.3053, + 0.3039, + 0.3079, + 0.305, + 0.3035, + 0.3018, + 0.3052, + 0.3063, + 0.3025, + 0.3056, + 0.3037, + 0.3046, + 0.3045, + 0.3041, + 0.3043, + 0.3044, + 0.3044, + 0.3043 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3948982357978821, + 8.632082608528435e-05, + -0.00022141945373732597, + -0.0004791135434061289, + 0.00010106388799613342, + 7.346954225795344e-05, + -7.074545283103362e-05, + -5.661595787387341e-05, + 0.00012481751036830246, + -0.0005871393950656056, + 0.00015422290016431361, + -0.0005310022970661521 + ], + "perturbation_rho": [ + 0.0077664791606366634, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.594905138015747e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.3024546205997467e-06, + -3.725290298461914e-09, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -4.144385457038879e-06, + -3.725290298461914e-09, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + -9.313225746154785e-10, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 56626.58984375, + 921777728.0, + 2723525376.0, + 5562149376.0, + 6096715776.0, + 6586259968.0, + 9732721664.0, + 9669530624.0, + 10051646464.0, + 10812058624.0, + 10900319232.0, + 11575299072.0, + 12058745856.0 + ], + "bp_grad_norms_per_layer": [ + 2.953412661099719e-07, + 1.7980278377294212e-10, + 1.7774372251810888e-10, + 1.7754615833087684e-10, + 1.77546435886633e-10, + 1.7753745695792134e-10, + 1.775574409723646e-10, + 1.7755288905796363e-10, + 1.7755619197146189e-10, + 1.7750646785774649e-10, + 1.775143226856457e-10, + 1.7751018710487898e-10, + 1.775422031613516e-10 + ] + }, + "drift": { + "embed.weight": 330.97935370781335, + "embed.bias": 242.7352200338295, + "blocks.0.ln.weight": 10.118660070740958, + "blocks.0.w1.weight": 268.02039354554194, + "blocks.0.w1.bias": 236.60527981405795, + "blocks.0.w2.weight": 479.95056180068593, + "blocks.1.ln.weight": 8.173523635910902, + "blocks.1.w1.weight": 314.00233907878504, + "blocks.1.w1.bias": 303.2790322538351, + "blocks.1.w2.weight": 332.3008689530667, + "blocks.2.ln.weight": 9.503933776173088, + "blocks.2.w1.weight": 407.2148671186256, + "blocks.2.w1.bias": 377.7677645021183, + "blocks.2.w2.weight": 405.2326915964652, + "blocks.3.ln.weight": 8.261827691373458, + "blocks.3.w1.weight": 324.0550552979263, + "blocks.3.w1.bias": 305.54524457584483, + "blocks.3.w2.weight": 300.9480221394592, + "blocks.4.ln.weight": 8.47590401409121, + "blocks.4.w1.weight": 338.56186029144186, + "blocks.4.w1.bias": 322.0375352385375, + "blocks.4.w2.weight": 319.77753486779426, + "blocks.5.ln.weight": 11.22139697754829, + "blocks.5.w1.weight": 458.2444114055581, + "blocks.5.w1.bias": 424.3745814588518, + "blocks.5.w2.weight": 450.83821734650985, + "blocks.6.ln.weight": 7.796159629674181, + "blocks.6.w1.weight": 306.7435557133061, + "blocks.6.w1.bias": 295.1474090093065, + "blocks.6.w2.weight": 287.3353674909529, + "blocks.7.ln.weight": 8.077037696900597, + "blocks.7.w1.weight": 320.0610116694296, + "blocks.7.w1.bias": 297.55111326321224, + "blocks.7.w2.weight": 294.8277249235264, + "blocks.8.ln.weight": 9.437782779016567, + "blocks.8.w1.weight": 381.01065244180916, + "blocks.8.w1.bias": 346.5987668046489, + "blocks.8.w2.weight": 354.1595052479188, + "blocks.9.ln.weight": 7.2340406951431575, + "blocks.9.w1.weight": 277.2401387021752, + "blocks.9.w1.bias": 262.2340251864824, + "blocks.9.w2.weight": 255.1111568089683, + "blocks.10.ln.weight": 9.961816471882193, + "blocks.10.w1.weight": 400.6058052128251, + "blocks.10.w1.bias": 369.00187630886694, + "blocks.10.w2.weight": 388.64956558131564, + "blocks.11.ln.weight": 8.356343855238048, + "blocks.11.w1.weight": 316.91850723934095, + "blocks.11.w1.bias": 297.45861181164963, + "blocks.11.w2.weight": 290.23124955665594, + "out_ln.weight": 0.6534850850308469, + "out_head.weight": 9.848963989978573, + "out_head.bias": 0.41699188717651064 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0459222147369385, + 1.9676443615722656, + 1.9359465097045898, + 1.9049253499984742, + 1.8857684122085572, + 1.8734163967514037, + 1.866455598678589, + 1.8637321212387086, + 1.8555012069320678, + 1.8584735760879516, + 1.84622566532135, + 1.8429653158950805, + 1.8373867583847046, + 1.837373632888794, + 1.8269891134643554, + 1.8270549670791627, + 1.8241438305282593, + 1.8183621447372436, + 1.8176076436233521, + 1.8132607741546631, + 1.8171432834243775, + 1.8106282345199585, + 1.8068263914489746, + 1.8001747159194947, + 1.797019803085327, + 1.799570650062561, + 1.7992187567520141, + 1.7956272640228272, + 1.7893521131134034, + 1.7947827518463135, + 1.792629102745056, + 1.7834515933990478, + 1.784611135520935, + 1.7758458992767334, + 1.77485143119812, + 1.7703186184692383, + 1.7679582196044923, + 1.7615163513946532, + 1.760532038192749, + 1.7643762328338624, + 1.7579109369659425, + 1.755184786720276, + 1.7560388659286499, + 1.7505403981781005, + 1.7491300173950195, + 1.7485786712646485, + 1.7421914972686767, + 1.7423645967864991, + 1.7408842813110352, + 1.7322719787979126, + 1.7337682635498046, + 1.7296864183807372, + 1.7320984534454347, + 1.7284361850357055, + 1.7295022455596925, + 1.7249998714447021, + 1.7229057648086548, + 1.7212110678863526, + 1.72249207862854, + 1.7190113651275636, + 1.717387024230957, + 1.7169812508392335, + 1.7162931756210327, + 1.7126198623275757, + 1.7117092602539064, + 1.7112801861190796, + 1.7114282751846313, + 1.7101980803680419, + 1.7129489760971068, + 1.7100260018539428, + 1.7092081976699829, + 1.703683881187439, + 1.7068217246246338, + 1.7049527828216553, + 1.7057187732315064, + 1.7080302432250976, + 1.7038126770401, + 1.7049555835723877, + 1.7032458050155639, + 1.7034490664672852, + 1.7006872876739503, + 1.7008496173095704, + 1.7032257048797608, + 1.6975849852371216, + 1.7016590633392334, + 1.7036013817596436, + 1.7003546350097656, + 1.6987783340835572, + 1.7004486318969727, + 1.6981003344726562, + 1.6952576486968993, + 1.6982201540374755, + 1.6985680112075805, + 1.6973068224334718, + 1.698023046951294, + 1.6962739462661742, + 1.7007662552642822, + 1.694658960800171, + 1.6946139693069457, + 1.6979838909912108 + ], + "train_acc": [ + 0.24304, + 0.27768, + 0.29718, + 0.3114, + 0.3155, + 0.32342, + 0.32646, + 0.32848, + 0.33164, + 0.33126, + 0.33432, + 0.33792, + 0.33882, + 0.33816, + 0.34222, + 0.34396, + 0.34814, + 0.34378, + 0.34546, + 0.3467, + 0.34866, + 0.35218, + 0.3533, + 0.3518, + 0.35448, + 0.3562, + 0.35548, + 0.358, + 0.35764, + 0.35846, + 0.35716, + 0.3626, + 0.36082, + 0.36478, + 0.36298, + 0.36532, + 0.36858, + 0.36974, + 0.37074, + 0.3673, + 0.37104, + 0.372, + 0.36924, + 0.37348, + 0.37412, + 0.37354, + 0.37672, + 0.37626, + 0.3795, + 0.3796, + 0.37736, + 0.37876, + 0.37752, + 0.38226, + 0.3828, + 0.38434, + 0.38232, + 0.38234, + 0.38028, + 0.38442, + 0.38296, + 0.38464, + 0.3859, + 0.38794, + 0.38652, + 0.3878, + 0.38752, + 0.38512, + 0.3885, + 0.38786, + 0.38678, + 0.3892, + 0.38872, + 0.39076, + 0.38828, + 0.38848, + 0.38858, + 0.38922, + 0.38946, + 0.39026, + 0.38864, + 0.38984, + 0.39182, + 0.3923, + 0.39104, + 0.39152, + 0.39114, + 0.3885, + 0.39122, + 0.39272, + 0.39182, + 0.39102, + 0.39104, + 0.3916, + 0.39454, + 0.39062, + 0.38996, + 0.39216, + 0.39174, + 0.39228 + ], + "test_acc": [ + 0.2729, + 0.3136, + 0.3293, + 0.3473, + 0.3313, + 0.3519, + 0.3629, + 0.3562, + 0.3516, + 0.3661, + 0.3733, + 0.3723, + 0.3632, + 0.3667, + 0.3676, + 0.3685, + 0.3723, + 0.3608, + 0.3781, + 0.3773, + 0.3671, + 0.3711, + 0.3595, + 0.377, + 0.3849, + 0.3701, + 0.3727, + 0.3603, + 0.3725, + 0.382, + 0.3783, + 0.3762, + 0.3832, + 0.3806, + 0.387, + 0.3873, + 0.3862, + 0.3864, + 0.3908, + 0.3939, + 0.3997, + 0.396, + 0.393, + 0.3846, + 0.3913, + 0.3967, + 0.3972, + 0.4013, + 0.3994, + 0.4043, + 0.3988, + 0.4024, + 0.3951, + 0.3982, + 0.3985, + 0.4081, + 0.4033, + 0.4029, + 0.4082, + 0.3986, + 0.4028, + 0.405, + 0.4054, + 0.4051, + 0.408, + 0.4069, + 0.4066, + 0.4085, + 0.4073, + 0.4096, + 0.4103, + 0.4066, + 0.4131, + 0.4106, + 0.4108, + 0.41, + 0.4121, + 0.41, + 0.4102, + 0.4076, + 0.4129, + 0.4109, + 0.4094, + 0.4103, + 0.41, + 0.4094, + 0.4121, + 0.4119, + 0.4126, + 0.4125, + 0.4116, + 0.4109, + 0.4128, + 0.4123, + 0.4129, + 0.4113, + 0.411, + 0.4114, + 0.4114, + 0.4115 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.03677885979413986, + 0.03784121572971344, + 0.08952198922634125, + 0.04931114614009857, + -0.00528424559161067, + 0.03716714680194855, + -0.02366219088435173, + -0.042923711240291595, + -0.07109162956476212, + -0.028443587943911552, + 0.01791619509458542, + 0.9962450861930847 + ], + "perturbation_rho": [ + 0.0005302987992763519, + 0.012386747635900974, + 0.023949606344103813, + 0.006188662722706795, + 0.05080874264240265, + -0.01257331669330597, + -0.003029853105545044, + 0.003304736688733101, + 0.043240927159786224, + 0.0031258384697139263, + -0.018814031034708023, + -0.018657812848687172 + ], + "nudging": { + "0.001": [ + -3.0247028917074203e-06, + -1.51805579662323e-07, + -1.8300488591194153e-07, + -1.150183379650116e-07, + -8.847564458847046e-09, + -3.64379957318306e-08, + 9.778887033462524e-09, + 3.096647560596466e-08, + 5.3551048040390015e-08, + -1.3737007975578308e-08, + -5.820766091346741e-09, + -7.356284186244011e-07 + ], + "0.003": [ + -9.037903510034084e-06, + -4.105968400835991e-07, + -4.7474168241024017e-07, + -2.3958273231983185e-07, + -1.0593794286251068e-08, + -1.551816239953041e-07, + 3.8067810237407684e-08, + 1.2014061212539673e-07, + 1.5227124094963074e-07, + 1.1094380170106888e-07, + -8.183997124433517e-08, + -2.6408815756440163e-06 + ], + "0.01": [ + -3.033224493265152e-05, + -1.35018490254879e-06, + -1.6904668882489204e-06, + -6.724148988723755e-07, + 3.341119736433029e-08, + -5.158362910151482e-07, + 1.4738179743289948e-07, + 3.360910341143608e-07, + 6.420304998755455e-07, + 2.4598557502031326e-07, + -2.3667234927415848e-07, + -9.222421795129776e-06 + ] + }, + "hidden_norms_per_layer": [ + 6368.87646484375, + 60059.984375, + 180441.8125, + 274150.96875, + 517201.65625, + 857839.4375, + 862732.0, + 1219729.5, + 1498813.125, + 1794381.5, + 1887047.25, + 1900776.375, + 1523186.0 + ], + "bp_grad_norms_per_layer": [ + 3.379595364094712e-05, + 2.1159430616535246e-06, + 6.931001053089858e-07, + 4.934564117320406e-07, + 4.4718211711369804e-07, + 4.4051026293345785e-07, + 4.3748320877057267e-07, + 4.3679304440047417e-07, + 4.368349380001746e-07, + 4.3717255948649836e-07, + 4.362396737178642e-07, + 4.269320186267578e-07, + 4.208905863833934e-07 + ] + }, + "drift": { + "embed.weight": 43.40305026092655, + "embed.bias": 12.884723360242011, + "blocks.0.ln.weight": 1.1396957317801488, + "blocks.0.w1.weight": 15.815325196455687, + "blocks.0.w1.bias": 9.997007586540093, + "blocks.0.w2.weight": 51.54342945808938, + "blocks.1.ln.weight": 0.9665440839605698, + "blocks.1.w1.weight": 17.964098193399778, + "blocks.1.w1.bias": 7.662859769714302, + "blocks.1.w2.weight": 45.06560123113467, + "blocks.2.ln.weight": 0.7981284775892137, + "blocks.2.w1.weight": 16.830439973227175, + "blocks.2.w1.bias": 8.949450279000775, + "blocks.2.w2.weight": 46.74550325782192, + "blocks.3.ln.weight": 0.7868855526754271, + "blocks.3.w1.weight": 17.044252690417927, + "blocks.3.w1.bias": 13.670590135892402, + "blocks.3.w2.weight": 38.40914010958083, + "blocks.4.ln.weight": 0.6395411371371652, + "blocks.4.w1.weight": 17.982212663546917, + "blocks.4.w1.bias": 18.982959249125763, + "blocks.4.w2.weight": 28.60564989778907, + "blocks.5.ln.weight": 0.5328988611331281, + "blocks.5.w1.weight": 14.634932988244662, + "blocks.5.w1.bias": 10.974172599914699, + "blocks.5.w2.weight": 39.20486954308308, + "blocks.6.ln.weight": 0.5485338249117044, + "blocks.6.w1.weight": 18.159621784839892, + "blocks.6.w1.bias": 18.891505344910815, + "blocks.6.w2.weight": 28.93284323285378, + "blocks.7.ln.weight": 0.5894477945101706, + "blocks.7.w1.weight": 18.009195934295505, + "blocks.7.w1.bias": 18.37430010215222, + "blocks.7.w2.weight": 30.406749000767654, + "blocks.8.ln.weight": 0.6068497649428072, + "blocks.8.w1.weight": 19.638841310683983, + "blocks.8.w1.bias": 20.60396737480574, + "blocks.8.w2.weight": 33.89217379886503, + "blocks.9.ln.weight": 0.678936766492041, + "blocks.9.w1.weight": 16.2394672260477, + "blocks.9.w1.bias": 14.562794517299237, + "blocks.9.w2.weight": 50.979251931044914, + "blocks.10.ln.weight": 0.6040394973297021, + "blocks.10.w1.weight": 14.292818466799657, + "blocks.10.w1.bias": 9.974035975062883, + "blocks.10.w2.weight": 60.410553219844616, + "blocks.11.ln.weight": 0.6493019680977011, + "blocks.11.w1.weight": 15.455381797619573, + "blocks.11.w1.bias": 12.883608145619089, + "blocks.11.w2.weight": 64.05421036694803, + "out_ln.weight": 0.3508285396209464, + "out_head.weight": 6.026469963854456, + "out_head.bias": 0.7776800281309532 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 12, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 9 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L12_seed9", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L6_seed0/results_cifar10.json b/results/fa_dfa_d512_L6_seed0/results_cifar10.json new file mode 100644 index 0000000..821b0e0 --- /dev/null +++ b/results/fa_dfa_d512_L6_seed0/results_cifar10.json @@ -0,0 +1,837 @@ +{ + "0": { + "dfa": { + "log": { + "train_loss": [ + 2.0753555574798583, + 2.0437081289672854, + 2.031278556365967, + 2.027659920578003, + 2.0243590501403808, + 2.0202790953826906, + 2.0175082940673827, + 2.015957561340332, + 2.016036532974243, + 2.013858056335449, + 2.0126232041931154, + 2.006905050048828, + 2.0110664825439453, + 2.0065614931488036, + 2.007456555709839, + 2.0070649071502684, + 2.0075456425476075, + 2.004424341583252, + 2.005271594772339, + 2.0039944921875, + 2.0024667238998415, + 2.000112854309082, + 1.9993625988769532, + 2.002483249664307, + 2.0039204889678954, + 1.9985878760528564, + 2.0006166967010497, + 2.0003138301086425, + 1.9996023558807372, + 1.9986100010681151, + 1.99738998298645, + 1.9980814916229248, + 1.996693759994507, + 2.0003671773529055, + 1.9974340956878662, + 1.9980345281982421, + 1.9958612198638916, + 1.9961950925445557, + 1.9993210222625732, + 1.9953513426971436, + 1.9962950857925414, + 1.9969137393951415, + 1.9964947815704346, + 1.9980549166870116, + 1.9973483322525025, + 1.9953596134185791, + 1.995900859451294, + 1.9963403786849976, + 1.9947577867126465, + 1.996345650177002, + 1.9976896784210205, + 1.9959460092163086, + 1.9935208990478515, + 1.9943153414154053, + 1.9918208218765259, + 1.9932028875350951, + 1.9944638661956786, + 1.995019360809326, + 1.9929348825073243, + 1.9962419706726073, + 1.9949486227035522, + 1.991672697906494, + 1.9935600751495361, + 1.9919422644424438, + 1.994082801437378, + 1.9919944891738892, + 1.9940126430511476, + 1.9923382126617433, + 1.9926940571594238, + 1.9947953038024901, + 1.993876279335022, + 1.992678325843811, + 1.992137566757202, + 1.9900086630630494, + 1.9903694836044312, + 1.9928425230407716, + 1.9906801569366455, + 1.9912908507919311, + 1.9918341095352172, + 1.9906631398773194, + 1.9910551013183593, + 1.9912693448257446, + 1.9916500932312011, + 1.992011311416626, + 1.9930540170669555, + 1.9900453987121582, + 1.9914034769058226, + 1.9921153451156617, + 1.9884680337142944, + 1.9897785013198852, + 1.9896507069396974, + 1.9915314351654052, + 1.9887127932357789, + 1.9907834010314942, + 1.990130773010254, + 1.9915763851547241, + 1.9891119548797607, + 1.9894035186386108, + 1.9906070321273803, + 1.990175763015747 + ], + "train_acc": [ + 0.23202, + 0.24612, + 0.2534, + 0.25296, + 0.25586, + 0.25736, + 0.26078, + 0.25968, + 0.26376, + 0.26194, + 0.26464, + 0.26644, + 0.26328, + 0.26692, + 0.26742, + 0.26744, + 0.26808, + 0.2689, + 0.26662, + 0.26898, + 0.27116, + 0.27066, + 0.27046, + 0.2737, + 0.26932, + 0.27098, + 0.27496, + 0.2742, + 0.27396, + 0.27362, + 0.27356, + 0.27542, + 0.2752, + 0.27348, + 0.27492, + 0.2737, + 0.27614, + 0.27596, + 0.27528, + 0.27582, + 0.27624, + 0.27556, + 0.27432, + 0.27686, + 0.27726, + 0.278, + 0.27874, + 0.27706, + 0.27906, + 0.27922, + 0.27678, + 0.27996, + 0.27996, + 0.27956, + 0.27936, + 0.27786, + 0.27958, + 0.27888, + 0.2801, + 0.2786, + 0.28002, + 0.281, + 0.28102, + 0.27928, + 0.27852, + 0.2814, + 0.28026, + 0.28168, + 0.28254, + 0.27894, + 0.28102, + 0.28146, + 0.27998, + 0.28272, + 0.28324, + 0.28034, + 0.28126, + 0.28156, + 0.28228, + 0.28094, + 0.28084, + 0.28196, + 0.28344, + 0.28236, + 0.28084, + 0.28244, + 0.2821, + 0.28254, + 0.28398, + 0.2826, + 0.28278, + 0.28214, + 0.28376, + 0.28172, + 0.28234, + 0.28182, + 0.28146, + 0.28132, + 0.2826, + 0.28366 + ], + "test_acc": [ + 0.2527, + 0.2731, + 0.2652, + 0.2735, + 0.2758, + 0.2862, + 0.2692, + 0.2901, + 0.2963, + 0.2897, + 0.2793, + 0.2945, + 0.2734, + 0.3044, + 0.2915, + 0.2965, + 0.2975, + 0.2951, + 0.3018, + 0.2954, + 0.2868, + 0.2943, + 0.3068, + 0.2639, + 0.3007, + 0.303, + 0.3009, + 0.3059, + 0.3024, + 0.2876, + 0.2984, + 0.2926, + 0.3024, + 0.3034, + 0.2906, + 0.3054, + 0.2978, + 0.2872, + 0.3036, + 0.3052, + 0.2874, + 0.3045, + 0.3028, + 0.2949, + 0.2981, + 0.3047, + 0.3073, + 0.2937, + 0.3078, + 0.2894, + 0.3098, + 0.2913, + 0.2897, + 0.306, + 0.3075, + 0.302, + 0.3025, + 0.2982, + 0.2983, + 0.31, + 0.3071, + 0.3013, + 0.309, + 0.3042, + 0.3009, + 0.2958, + 0.3024, + 0.3013, + 0.3055, + 0.3016, + 0.3041, + 0.3077, + 0.3023, + 0.3111, + 0.308, + 0.3005, + 0.3051, + 0.3101, + 0.3103, + 0.3006, + 0.3025, + 0.3034, + 0.3043, + 0.3082, + 0.3011, + 0.3069, + 0.3051, + 0.3023, + 0.3033, + 0.3051, + 0.3041, + 0.3044, + 0.3029, + 0.3047, + 0.3047, + 0.3048, + 0.3056, + 0.3056, + 0.3053, + 0.3053 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3933757543563843, + -0.00033686644746921957, + -0.00029227498453110456, + 0.00023268867516890168, + -0.00033252357388846576, + -0.0006308911251835525 + ], + "perturbation_rho": [ + -0.005786933470517397, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -4.791654646396637e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.2507662177085876e-06, + 0.0, + 0.0, + -3.725290298461914e-09, + 0.0, + 0.0 + ], + "0.01": [ + -4.01865690946579e-06, + 0.0, + 1.862645149230957e-09, + -3.725290298461914e-09, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 52006.18359375, + 1299919104.0, + 4471346176.0, + 5541193216.0, + 6115124736.0, + 6220777984.0, + 7776075776.0 + ], + "bp_grad_norms_per_layer": [ + 2.7690890647136257e-07, + 2.9755062302960766e-10, + 2.9703242643286387e-10, + 2.9693505987360425e-10, + 2.969274826014612e-10, + 2.96939195454371e-10, + 2.9694927072831945e-10 + ] + }, + "drift": { + "embed.weight": 330.18669911453446, + "embed.bias": 275.0701726252062, + "blocks.0.ln.weight": 9.87127425013801, + "blocks.0.w1.weight": 288.2752398777733, + "blocks.0.w1.bias": 276.1693244742186, + "blocks.0.w2.weight": 476.3482702241823, + "blocks.1.ln.weight": 9.208131003871356, + "blocks.1.w1.weight": 379.93945965650295, + "blocks.1.w1.bias": 370.165908360661, + "blocks.1.w2.weight": 391.3759720182112, + "blocks.2.ln.weight": 9.291829925999405, + "blocks.2.w1.weight": 372.47037498699524, + "blocks.2.w1.bias": 340.94957924841253, + "blocks.2.w2.weight": 343.62414266271355, + "blocks.3.ln.weight": 9.704848817987399, + "blocks.3.w1.weight": 334.76567440579134, + "blocks.3.w1.bias": 307.03384934243365, + "blocks.3.w2.weight": 310.86785550010995, + "blocks.4.ln.weight": 6.86934636896043, + "blocks.4.w1.weight": 263.30065941816594, + "blocks.4.w1.bias": 243.63618296008045, + "blocks.4.w2.weight": 243.60310044137827, + "blocks.5.ln.weight": 10.060319390509356, + "blocks.5.w1.weight": 402.62899842011797, + "blocks.5.w1.bias": 374.4871410963097, + "blocks.5.w2.weight": 379.02482222844293, + "out_ln.weight": 0.614063493216844, + "out_head.weight": 8.991195841592733, + "out_head.bias": 0.8181543795651063 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0268119436264036, + 1.9347526413726808, + 1.9036040418243407, + 1.8875383782958985, + 1.8758800409317016, + 1.8637015169525146, + 1.8550788135147094, + 1.8481883679580688, + 1.8380642176055908, + 1.831563844680786, + 1.82749318901062, + 1.814368692855835, + 1.817901651649475, + 1.8037349676895142, + 1.8005951739883423, + 1.7895659107208253, + 1.7900659426879884, + 1.781074461402893, + 1.7845501969146729, + 1.776343214149475, + 1.7684208422470094, + 1.7643969812774658, + 1.7617348025894164, + 1.7578030713653565, + 1.7565854375457763, + 1.7562964432525634, + 1.7528118307113647, + 1.7484837558364867, + 1.743814429550171, + 1.7366932116699219, + 1.7326225136566162, + 1.728937064590454, + 1.7266727477645873, + 1.7240253580474854, + 1.7245794469833373, + 1.7228408333969116, + 1.7203827739715576, + 1.711478896446228, + 1.7089925986099244, + 1.7088872208023071, + 1.7068994373321533, + 1.7094496100234986, + 1.7018463577270508, + 1.7043216220855713, + 1.6993152593231202, + 1.6993111734771729, + 1.6920886862182618, + 1.6940990381622314, + 1.6973865487670898, + 1.693602544631958, + 1.6929154531478883, + 1.6890407666397094, + 1.685359003982544, + 1.684633869857788, + 1.6784778961181641, + 1.6846174112701415, + 1.6849634936904907, + 1.6800030670928956, + 1.6778543420410157, + 1.6783534344482423, + 1.6748548553848266, + 1.675967246170044, + 1.674832339515686, + 1.6731126987075806, + 1.6737006020355225, + 1.670804006958008, + 1.6716182923126222, + 1.6689927773284912, + 1.6716524493026734, + 1.6704032614517212, + 1.6676370931625366, + 1.6686026779174805, + 1.6649452212142943, + 1.6682810161972046, + 1.6657524144744873, + 1.666575444908142, + 1.6638936541748046, + 1.6632415933609008, + 1.6630835901641845, + 1.6650469580078124, + 1.661507155380249, + 1.6635552446746826, + 1.6633718565368651, + 1.6654531225204467, + 1.660396284713745, + 1.6597816809463501, + 1.6641134811019898, + 1.6597988692855834, + 1.6597074938583374, + 1.6637430121612549, + 1.657964035987854, + 1.6597277429580688, + 1.65512205078125, + 1.66050142578125, + 1.6606662057495116, + 1.6582156159210204, + 1.657457571105957, + 1.6597890829086304, + 1.6602586605834961, + 1.6589343154525757 + ], + "train_acc": [ + 0.2612, + 0.30246, + 0.31784, + 0.3241, + 0.32576, + 0.33054, + 0.33502, + 0.33912, + 0.34148, + 0.34222, + 0.34698, + 0.34792, + 0.34712, + 0.35504, + 0.35516, + 0.35796, + 0.35952, + 0.36286, + 0.36188, + 0.36396, + 0.36778, + 0.36878, + 0.3676, + 0.36954, + 0.37154, + 0.36936, + 0.37584, + 0.3732, + 0.37446, + 0.37702, + 0.37832, + 0.38196, + 0.37868, + 0.3824, + 0.38276, + 0.38352, + 0.38208, + 0.3852, + 0.3863, + 0.38682, + 0.38838, + 0.38784, + 0.3886, + 0.38966, + 0.38966, + 0.39112, + 0.39534, + 0.39184, + 0.39354, + 0.39498, + 0.39364, + 0.39514, + 0.3952, + 0.39942, + 0.40118, + 0.39886, + 0.3962, + 0.39814, + 0.39946, + 0.40066, + 0.39902, + 0.40112, + 0.40356, + 0.40358, + 0.40162, + 0.40426, + 0.40428, + 0.4024, + 0.40284, + 0.40386, + 0.4073, + 0.4049, + 0.40354, + 0.40386, + 0.40588, + 0.4046, + 0.40704, + 0.40718, + 0.40888, + 0.405, + 0.41078, + 0.40724, + 0.40814, + 0.40858, + 0.40918, + 0.40666, + 0.40436, + 0.41216, + 0.40734, + 0.40822, + 0.40996, + 0.40958, + 0.411, + 0.40964, + 0.40704, + 0.40942, + 0.41122, + 0.40922, + 0.40942, + 0.40982 + ], + "test_acc": [ + 0.3205, + 0.3401, + 0.3549, + 0.3534, + 0.3599, + 0.3678, + 0.3633, + 0.3657, + 0.3754, + 0.3736, + 0.3895, + 0.3818, + 0.3742, + 0.3765, + 0.383, + 0.3823, + 0.3883, + 0.4012, + 0.3985, + 0.3927, + 0.3954, + 0.3928, + 0.3965, + 0.3899, + 0.4035, + 0.4001, + 0.4034, + 0.4104, + 0.4022, + 0.4106, + 0.3951, + 0.4038, + 0.4094, + 0.4181, + 0.4124, + 0.4094, + 0.4171, + 0.417, + 0.4163, + 0.418, + 0.4157, + 0.4251, + 0.4269, + 0.4201, + 0.4151, + 0.4222, + 0.4264, + 0.4171, + 0.4209, + 0.4171, + 0.4187, + 0.4239, + 0.4214, + 0.4154, + 0.4241, + 0.4166, + 0.424, + 0.4208, + 0.4265, + 0.427, + 0.4223, + 0.4248, + 0.4301, + 0.4312, + 0.4273, + 0.4313, + 0.4304, + 0.4296, + 0.426, + 0.4255, + 0.4306, + 0.4314, + 0.4315, + 0.4326, + 0.4331, + 0.432, + 0.432, + 0.4294, + 0.4335, + 0.433, + 0.437, + 0.4319, + 0.4304, + 0.4327, + 0.4321, + 0.4319, + 0.4333, + 0.4357, + 0.4315, + 0.4337, + 0.434, + 0.4346, + 0.4342, + 0.4334, + 0.4336, + 0.4328, + 0.434, + 0.4344, + 0.434, + 0.434 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.020984500646591187, + 0.06538723409175873, + -0.02862982451915741, + -0.14415404200553894, + -0.12824469804763794, + 0.9942382574081421 + ], + "perturbation_rho": [ + 0.04626474529504776, + 0.00916149839758873, + 0.02606651559472084, + -0.0706678107380867, + 0.0447549931704998, + -0.07931974530220032 + ], + "nudging": { + "0.001": [ + -2.3448956198990345e-06, + -6.940681487321854e-07, + 9.400537237524986e-08, + 4.384201020002365e-07, + 2.918532118201256e-07, + -2.525397576391697e-06 + ], + "0.003": [ + -7.02321995049715e-06, + -2.0685838535428047e-06, + 3.417953848838806e-07, + 1.5673576854169369e-06, + 1.2195087037980556e-06, + -9.568408131599426e-06 + ], + "0.01": [ + -2.358935307711363e-05, + -7.018155883997679e-06, + 9.208451956510544e-07, + 4.6023051254451275e-06, + 4.1344319470226765e-06, + -3.3148215152323246e-05 + ] + }, + "hidden_norms_per_layer": [ + 3922.13720703125, + 49122.51171875, + 264047.5, + 465089.875, + 831614.3125, + 1037417.9375, + 282876.625 + ], + "bp_grad_norms_per_layer": [ + 3.9657443267060444e-05, + 4.083395651832689e-06, + 1.3594802794614225e-06, + 1.2777863958035596e-06, + 1.2765268593284418e-06, + 1.2795584325431264e-06, + 1.2760270919898176e-06 + ] + }, + "drift": { + "embed.weight": 31.47039846350604, + "embed.bias": 18.02217544364072, + "blocks.0.ln.weight": 0.9789876040215849, + "blocks.0.w1.weight": 13.428105491977634, + "blocks.0.w1.bias": 11.014509469722494, + "blocks.0.w2.weight": 44.375832792200086, + "blocks.1.ln.weight": 0.9256838094480255, + "blocks.1.w1.weight": 16.049693958454462, + "blocks.1.w1.bias": 12.671067703031834, + "blocks.1.w2.weight": 46.940642430560914, + "blocks.2.ln.weight": 0.768237594938939, + "blocks.2.w1.weight": 16.878210463024953, + "blocks.2.w1.bias": 14.246556953188842, + "blocks.2.w2.weight": 45.47089120587146, + "blocks.3.ln.weight": 0.6449156612044069, + "blocks.3.w1.weight": 17.300866761556396, + "blocks.3.w1.bias": 17.793175424183687, + "blocks.3.w2.weight": 34.146538375073234, + "blocks.4.ln.weight": 0.536074289047981, + "blocks.4.w1.weight": 15.870552255415648, + "blocks.4.w1.bias": 17.679587984495768, + "blocks.4.w2.weight": 27.222411899497516, + "blocks.5.ln.weight": 0.5977975068115631, + "blocks.5.w1.weight": 18.813068681696937, + "blocks.5.w1.bias": 22.173876530236612, + "blocks.5.w2.weight": 27.925157214653215, + "out_ln.weight": 0.29706709348499905, + "out_head.weight": 4.788666797102501, + "out_head.bias": 2.266760510860064 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 6, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 0 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L6_seed0", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L6_seed1/results_cifar10.json b/results/fa_dfa_d512_L6_seed1/results_cifar10.json new file mode 100644 index 0000000..e0848f8 --- /dev/null +++ b/results/fa_dfa_d512_L6_seed1/results_cifar10.json @@ -0,0 +1,837 @@ +{ + "1": { + "dfa": { + "log": { + "train_loss": [ + 2.0748964222717285, + 2.0451132692718508, + 2.032143096008301, + 2.029643494186401, + 2.030650337371826, + 2.0243890724945066, + 2.020762248840332, + 2.0232248431396482, + 2.0159964645004274, + 2.0192399645996093, + 2.01540469039917, + 2.0154635834503174, + 2.017197366409302, + 2.0137894206237794, + 2.0133308319091796, + 2.0108140522003173, + 2.011235432815552, + 2.009914517288208, + 2.0094243881988527, + 2.0100407147979737, + 2.010185157546997, + 2.0089511253356935, + 2.0095393753051756, + 2.0059863204574584, + 2.0064435525512696, + 2.005170561828613, + 2.0074975017547607, + 2.004913715057373, + 2.0057138012313844, + 2.004565502243042, + 2.006841945838928, + 2.0055636949157716, + 2.0060591010284425, + 2.006927216835022, + 2.0074666262817384, + 2.004335132293701, + 2.005483384475708, + 2.006595082168579, + 2.003282996673584, + 2.0056812954330443, + 2.003032290878296, + 2.0057049587249756, + 2.004697043457031, + 2.006551830406189, + 2.0058861305236815, + 2.0044844329071045, + 2.0043990351867675, + 2.0037572646331787, + 2.0045921185302733, + 2.0048506913757325, + 2.003375824584961, + 2.004632271575928, + 2.0042632717895508, + 2.0026413610076905, + 2.001700767211914, + 2.003248519668579, + 2.003593999633789, + 2.003497390823364, + 2.003082277145386, + 2.001226206436157, + 2.0015557537841797, + 2.0028394889831542, + 2.0041756005859375, + 2.0014186894226076, + 2.0017097331237794, + 2.0027389540863036, + 2.00000232421875, + 2.001385536727905, + 2.0014734981536866, + 2.000507820701599, + 2.0018744316101076, + 2.00272723903656, + 2.0029756539535524, + 2.0013647467422486, + 2.0018114362335204, + 2.0025039096832273, + 2.0007914069366457, + 2.0015052102279665, + 2.0013790615844727, + 2.000559573059082, + 2.000183110809326, + 2.002135623321533, + 1.9992622534561157, + 2.0023144091033935, + 1.9984413458633423, + 2.0006315287780763, + 2.0000011035919187, + 1.9984883616638183, + 1.9980969067382812, + 1.999547445678711, + 1.999142066078186, + 2.0002035105895994, + 1.9980618365859986, + 1.9986838175201416, + 1.9983668603515625, + 1.9985755539321899, + 2.000544935379028, + 1.9968827368545532, + 1.9982192471313476, + 2.0020621044921874 + ], + "train_acc": [ + 0.23312, + 0.24122, + 0.2498, + 0.25198, + 0.25274, + 0.25466, + 0.25756, + 0.25584, + 0.26256, + 0.25706, + 0.26104, + 0.26366, + 0.26034, + 0.2643, + 0.2637, + 0.26512, + 0.26362, + 0.26534, + 0.2631, + 0.26636, + 0.26484, + 0.26708, + 0.2655, + 0.26972, + 0.26856, + 0.27134, + 0.2676, + 0.26984, + 0.26974, + 0.27138, + 0.2693, + 0.27122, + 0.27032, + 0.27198, + 0.27264, + 0.27422, + 0.26884, + 0.27136, + 0.27186, + 0.27214, + 0.27222, + 0.27306, + 0.27124, + 0.26954, + 0.27338, + 0.27444, + 0.27396, + 0.27308, + 0.27134, + 0.27374, + 0.27228, + 0.27202, + 0.2734, + 0.27426, + 0.27536, + 0.2763, + 0.27336, + 0.2738, + 0.2753, + 0.27466, + 0.2761, + 0.27344, + 0.27534, + 0.2757, + 0.27462, + 0.27636, + 0.27752, + 0.27438, + 0.27618, + 0.27512, + 0.27614, + 0.27672, + 0.2751, + 0.2764, + 0.27506, + 0.27662, + 0.2763, + 0.27732, + 0.27492, + 0.27632, + 0.27788, + 0.27568, + 0.27686, + 0.27594, + 0.27624, + 0.27706, + 0.27618, + 0.27852, + 0.27466, + 0.27688, + 0.27624, + 0.2765, + 0.27688, + 0.27766, + 0.27692, + 0.27586, + 0.27718, + 0.27828, + 0.27846, + 0.27564 + ], + "test_acc": [ + 0.2476, + 0.2684, + 0.2584, + 0.2859, + 0.2672, + 0.2792, + 0.2735, + 0.2954, + 0.2679, + 0.29, + 0.2763, + 0.2726, + 0.2807, + 0.2893, + 0.296, + 0.2701, + 0.2694, + 0.2782, + 0.2885, + 0.2811, + 0.2919, + 0.2734, + 0.2917, + 0.2961, + 0.2922, + 0.3011, + 0.2957, + 0.2783, + 0.3042, + 0.2892, + 0.2859, + 0.2808, + 0.2889, + 0.2977, + 0.2867, + 0.2998, + 0.2953, + 0.2908, + 0.2808, + 0.287, + 0.2953, + 0.303, + 0.287, + 0.3021, + 0.2834, + 0.3003, + 0.2861, + 0.2915, + 0.2953, + 0.3065, + 0.2839, + 0.2883, + 0.298, + 0.2928, + 0.2955, + 0.2906, + 0.3041, + 0.2956, + 0.2883, + 0.2887, + 0.2903, + 0.2939, + 0.294, + 0.2878, + 0.2835, + 0.2948, + 0.2838, + 0.2948, + 0.296, + 0.2941, + 0.294, + 0.3002, + 0.2949, + 0.2918, + 0.2977, + 0.2982, + 0.2944, + 0.2961, + 0.2936, + 0.2989, + 0.2956, + 0.2995, + 0.2953, + 0.2967, + 0.3003, + 0.2945, + 0.2989, + 0.2961, + 0.2974, + 0.3003, + 0.2941, + 0.2969, + 0.2982, + 0.2977, + 0.2981, + 0.2977, + 0.2979, + 0.2972, + 0.297, + 0.2969 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.4029613733291626, + 7.883789658080786e-05, + -0.0002453301858622581, + -0.0004192243213765323, + 0.000372876413166523, + -0.000354595307726413 + ], + "perturbation_rho": [ + 0.011547078378498554, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.4691765904426575e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.1422671377658844e-06, + 0.0, + -2.7939677238464355e-09, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.50363552570343e-06, + 1.862645149230957e-09, + -3.725290298461914e-09, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 56897.21875, + 1696502784.0, + 3738960896.0, + 4607917568.0, + 5758332928.0, + 6969167872.0, + 7904643072.0 + ], + "bp_grad_norms_per_layer": [ + 2.473913411904505e-07, + 2.2789505005338384e-10, + 2.2798352095065866e-10, + 2.2797391752149565e-10, + 2.2786038333943992e-10, + 2.2787945141988786e-10, + 2.2788908260462648e-10 + ] + }, + "drift": { + "embed.weight": 341.6512135112792, + "embed.bias": 266.6380085288841, + "blocks.0.ln.weight": 9.881568196594484, + "blocks.0.w1.weight": 316.78039360087587, + "blocks.0.w1.bias": 286.8114997527113, + "blocks.0.w2.weight": 487.5637400185808, + "blocks.1.ln.weight": 9.2020409527447, + "blocks.1.w1.weight": 361.84956095939293, + "blocks.1.w1.bias": 334.52465546675654, + "blocks.1.w2.weight": 335.23042288085577, + "blocks.2.ln.weight": 8.38680422611515, + "blocks.2.w1.weight": 335.6889109899642, + "blocks.2.w1.bias": 310.7077386931781, + "blocks.2.w2.weight": 315.3949169119556, + "blocks.3.ln.weight": 8.903503018515195, + "blocks.3.w1.weight": 371.4244307750354, + "blocks.3.w1.bias": 345.3889053690205, + "blocks.3.w2.weight": 352.95729115252703, + "blocks.4.ln.weight": 9.577081150302154, + "blocks.4.w1.weight": 387.7879530619586, + "blocks.4.w1.bias": 358.359826994576, + "blocks.4.w2.weight": 353.9896695590463, + "blocks.5.ln.weight": 9.043917438368451, + "blocks.5.w1.weight": 368.8104037409108, + "blocks.5.w1.bias": 337.4941181161536, + "blocks.5.w2.weight": 342.83001538637893, + "out_ln.weight": 0.6049025805082792, + "out_head.weight": 8.662451359844601, + "out_head.bias": 1.2452406037396404 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0533182458114623, + 1.9470771118927002, + 1.9098005801391602, + 1.8933470011138915, + 1.8866070096206664, + 1.8806784909820558, + 1.8700345465469361, + 1.8668433471298218, + 1.8549686681747437, + 1.8534734383773803, + 1.844026010055542, + 1.8412904165649413, + 1.8354822329711915, + 1.8315302365493775, + 1.8307162652206421, + 1.8245070376968384, + 1.820267495956421, + 1.8159903725814819, + 1.8120850400543214, + 1.8062479650115968, + 1.8023771206665038, + 1.7989153610992432, + 1.7973320705795288, + 1.789391294517517, + 1.7857570403289795, + 1.7807958832550048, + 1.7800961505126953, + 1.7727591732025147, + 1.7719479108047484, + 1.7722896636199952, + 1.765616308631897, + 1.765071000442505, + 1.7588437004852295, + 1.7560785748672485, + 1.7530314648056031, + 1.7516267270278931, + 1.7478641785049438, + 1.747710930519104, + 1.7394591223526001, + 1.7430037671279908, + 1.7430737688827516, + 1.743327683944702, + 1.735208967590332, + 1.7367942264175416, + 1.735928176651001, + 1.7336044045639039, + 1.7319979761123658, + 1.7286449816131593, + 1.7276164590072631, + 1.7342356995010375, + 1.7273106130599976, + 1.728026152114868, + 1.7241678939437866, + 1.7218856454467772, + 1.720424718055725, + 1.7259459881973267, + 1.723678625831604, + 1.7201445708465577, + 1.7179317428588867, + 1.714501748046875, + 1.7184516412353517, + 1.7182959340667725, + 1.7173260328674316, + 1.7162155276870728, + 1.7148893152618407, + 1.7136942127990722, + 1.7128957564926148, + 1.7095485464859008, + 1.711679192199707, + 1.7081169268417358, + 1.7124231735992432, + 1.7100938080215453, + 1.7099676737213134, + 1.7056838412094115, + 1.7102637582397462, + 1.7076803800201417, + 1.703552028427124, + 1.7047434192657471, + 1.7060186026382447, + 1.7038543602752685, + 1.7023552209091186, + 1.7033933319854737, + 1.7048375073623656, + 1.7029183816146851, + 1.7019960340118407, + 1.7012598460006714, + 1.700243671836853, + 1.6951217443847657, + 1.7031349285125732, + 1.6982279485702514, + 1.7003905519866944, + 1.7008577185821534, + 1.69846902469635, + 1.7006194688796996, + 1.6990861594009399, + 1.7007253762817383, + 1.702599566040039, + 1.7018861039352418, + 1.699664118347168, + 1.7012134171295166 + ], + "train_acc": [ + 0.24282, + 0.28774, + 0.30648, + 0.31656, + 0.32152, + 0.3235, + 0.33028, + 0.32876, + 0.33768, + 0.33628, + 0.33892, + 0.341, + 0.3464, + 0.34646, + 0.3456, + 0.35168, + 0.34876, + 0.35232, + 0.35214, + 0.3571, + 0.3564, + 0.35782, + 0.35768, + 0.36332, + 0.3616, + 0.36234, + 0.3655, + 0.36546, + 0.36836, + 0.36622, + 0.36982, + 0.36954, + 0.3712, + 0.37104, + 0.37318, + 0.37486, + 0.37374, + 0.37526, + 0.37592, + 0.37592, + 0.37662, + 0.37494, + 0.37758, + 0.37708, + 0.37796, + 0.38076, + 0.37962, + 0.38058, + 0.3806, + 0.37936, + 0.38016, + 0.38172, + 0.38376, + 0.384, + 0.38752, + 0.3876, + 0.38602, + 0.38746, + 0.38644, + 0.386, + 0.38718, + 0.38552, + 0.38468, + 0.3868, + 0.38882, + 0.38934, + 0.3874, + 0.38992, + 0.3904, + 0.3908, + 0.38916, + 0.38954, + 0.39022, + 0.39024, + 0.39018, + 0.39104, + 0.39206, + 0.39178, + 0.39074, + 0.3921, + 0.39292, + 0.39326, + 0.39202, + 0.3935, + 0.39312, + 0.3942, + 0.39594, + 0.39766, + 0.39178, + 0.39502, + 0.39626, + 0.39468, + 0.39338, + 0.39468, + 0.39366, + 0.39276, + 0.39356, + 0.39354, + 0.39138, + 0.39408 + ], + "test_acc": [ + 0.2926, + 0.3237, + 0.3296, + 0.3577, + 0.3485, + 0.3454, + 0.3678, + 0.3648, + 0.3607, + 0.3756, + 0.3775, + 0.3817, + 0.3786, + 0.3781, + 0.3803, + 0.3682, + 0.369, + 0.371, + 0.3834, + 0.3875, + 0.3822, + 0.3805, + 0.392, + 0.401, + 0.3844, + 0.4015, + 0.3866, + 0.3901, + 0.3987, + 0.3939, + 0.3837, + 0.3972, + 0.3877, + 0.3994, + 0.4002, + 0.4033, + 0.4085, + 0.4058, + 0.391, + 0.399, + 0.3986, + 0.4086, + 0.404, + 0.4086, + 0.4062, + 0.4047, + 0.3991, + 0.4037, + 0.3968, + 0.4118, + 0.4017, + 0.4095, + 0.4117, + 0.4037, + 0.4073, + 0.4087, + 0.4137, + 0.4088, + 0.3997, + 0.4059, + 0.409, + 0.4099, + 0.4096, + 0.412, + 0.4102, + 0.4137, + 0.4102, + 0.4121, + 0.4119, + 0.4132, + 0.4149, + 0.412, + 0.415, + 0.4148, + 0.4154, + 0.416, + 0.4112, + 0.4109, + 0.4153, + 0.4148, + 0.4183, + 0.4172, + 0.4152, + 0.4157, + 0.418, + 0.4115, + 0.4187, + 0.4127, + 0.4108, + 0.4153, + 0.4141, + 0.4132, + 0.415, + 0.4124, + 0.4151, + 0.4147, + 0.4136, + 0.4142, + 0.4147, + 0.4144 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.030343683436512947, + 0.09938552975654602, + -0.07437123358249664, + -0.07549507170915604, + -0.09523595124483109, + 0.9972963929176331 + ], + "perturbation_rho": [ + 0.04541456699371338, + 0.008701281622052193, + -0.00402827700600028, + 0.03399529308080673, + -0.00989921111613512, + 0.004614755045622587 + ], + "nudging": { + "0.001": [ + -2.914457581937313e-06, + -5.584442988038063e-07, + 1.0908115655183792e-07, + 4.6566128730773926e-08, + 4.377216100692749e-08, + -1.0116491466760635e-06 + ], + "0.003": [ + -8.360599167644978e-06, + -2.0274892449378967e-06, + 2.825399860739708e-07, + 3.023305907845497e-07, + 2.789311110973358e-07, + -4.071509465575218e-06 + ], + "0.01": [ + -2.804200630635023e-05, + -6.612506695091724e-06, + 9.683426469564438e-07, + 1.1706724762916565e-06, + 1.3328390195965767e-06, + -1.4974735677242279e-05 + ] + }, + "hidden_norms_per_layer": [ + 5724.40771484375, + 72733.609375, + 478685.65625, + 1483182.625, + 1809620.875, + 2091372.375, + 857882.6875 + ], + "bp_grad_norms_per_layer": [ + 3.0453806175501086e-05, + 2.1086596007080516e-06, + 6.627448101426126e-07, + 6.460473969127634e-07, + 6.491723638646363e-07, + 6.540217896144895e-07, + 6.466638637903088e-07 + ] + }, + "drift": { + "embed.weight": 42.22188941497859, + "embed.bias": 17.68183956971274, + "blocks.0.ln.weight": 1.180802481067003, + "blocks.0.w1.weight": 15.4315958124657, + "blocks.0.w1.bias": 12.305080689576908, + "blocks.0.w2.weight": 57.44824499129088, + "blocks.1.ln.weight": 0.9692930700548912, + "blocks.1.w1.weight": 19.880441088833255, + "blocks.1.w1.bias": 16.257331118504066, + "blocks.1.w2.weight": 48.06735579536956, + "blocks.2.ln.weight": 0.7719428930423391, + "blocks.2.w1.weight": 23.427567219106724, + "blocks.2.w1.bias": 24.716070592776813, + "blocks.2.w2.weight": 33.2055102304801, + "blocks.3.ln.weight": 0.6610316048264212, + "blocks.3.w1.weight": 21.14916966984416, + "blocks.3.w1.bias": 22.188834381211755, + "blocks.3.w2.weight": 37.41900122482242, + "blocks.4.ln.weight": 0.6045383052903652, + "blocks.4.w1.weight": 21.6093498763088, + "blocks.4.w1.bias": 23.188519739977856, + "blocks.4.w2.weight": 29.43277495491079, + "blocks.5.ln.weight": 0.7110348796366065, + "blocks.5.w1.weight": 22.91348409054967, + "blocks.5.w1.bias": 24.128500394824375, + "blocks.5.w2.weight": 42.82735324390181, + "out_ln.weight": 0.2879508142897712, + "out_head.weight": 5.9584913076844686, + "out_head.bias": 1.1797514883024003 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 6, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 1 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L6_seed1", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L6_seed2/results_cifar10.json b/results/fa_dfa_d512_L6_seed2/results_cifar10.json new file mode 100644 index 0000000..0d6122d --- /dev/null +++ b/results/fa_dfa_d512_L6_seed2/results_cifar10.json @@ -0,0 +1,837 @@ +{ + "2": { + "dfa": { + "log": { + "train_loss": [ + 2.0704764087677003, + 2.049675481185913, + 2.0393172933197024, + 2.0385254068756105, + 2.0333104403305056, + 2.037317512435913, + 2.035111841583252, + 2.033784921951294, + 2.0276396758270265, + 2.0287634895324707, + 2.0255478504943847, + 2.0249928087997437, + 2.0251559844207763, + 2.026155082015991, + 2.0227601895141603, + 2.0238794365692137, + 2.0220592249298095, + 2.0211007302474977, + 2.0220025036621094, + 2.0187504733657837, + 2.022589133682251, + 2.0209386083221434, + 2.022369726409912, + 2.019175484046936, + 2.0184574550628662, + 2.020228290786743, + 2.0170737294769285, + 2.0200500258636476, + 2.0192940770721437, + 2.017934571380615, + 2.018111932220459, + 2.019744345703125, + 2.0181864041137696, + 2.018692120895386, + 2.019279464416504, + 2.020759454421997, + 2.0175593240356444, + 2.0189450312805177, + 2.018363641014099, + 2.0204193601989746, + 2.018774921836853, + 2.020020353240967, + 2.020256604042053, + 2.018686157913208, + 2.017777068939209, + 2.01642233127594, + 2.0193701077270507, + 2.0175296588897704, + 2.0180459953308105, + 2.0163426136016844, + 2.0169748413085937, + 2.017522889404297, + 2.0171759409332277, + 2.0182337244415285, + 2.0202614056396486, + 2.0193526766967773, + 2.0176297589874266, + 2.0176475778961183, + 2.017951414642334, + 2.0159765984725952, + 2.0174861521911622, + 2.0162583628082276, + 2.0189596444702147, + 2.017551812057495, + 2.0174899821472168, + 2.017485563354492, + 2.0167667126464845, + 2.017466169166565, + 2.017418462524414, + 2.0175614359283447, + 2.016368521194458, + 2.017146753158569, + 2.016855639877319, + 2.016222254562378, + 2.016020330963135, + 2.0157443789672853, + 2.014490191497803, + 2.0179072814559937, + 2.0176085803985595, + 2.016617900123596, + 2.016193874168396, + 2.0163377814483643, + 2.017562115097046, + 2.017051735992432, + 2.015485999984741, + 2.0135854721450808, + 2.0146356997299195, + 2.0149522621154787, + 2.0146646865844726, + 2.0153391693878175, + 2.0160440145874023, + 2.0161946767425536, + 2.014449896469116, + 2.015591969642639, + 2.01485006980896, + 2.01346685836792, + 2.0139751796722414, + 2.0144011250305174, + 2.014900064163208, + 2.016459233779907 + ], + "train_acc": [ + 0.23802, + 0.24392, + 0.24906, + 0.25122, + 0.25396, + 0.25104, + 0.2552, + 0.25244, + 0.2577, + 0.25632, + 0.25834, + 0.2557, + 0.25944, + 0.25876, + 0.26044, + 0.25964, + 0.2593, + 0.26036, + 0.26106, + 0.26136, + 0.26018, + 0.25898, + 0.25858, + 0.2613, + 0.26334, + 0.26364, + 0.26186, + 0.26224, + 0.26166, + 0.26282, + 0.26364, + 0.26358, + 0.2645, + 0.26294, + 0.26592, + 0.26282, + 0.26398, + 0.26414, + 0.26538, + 0.26506, + 0.26578, + 0.26148, + 0.26312, + 0.26586, + 0.26478, + 0.26574, + 0.26416, + 0.26488, + 0.26366, + 0.26614, + 0.2656, + 0.26508, + 0.2675, + 0.26694, + 0.26532, + 0.26614, + 0.26596, + 0.26538, + 0.2652, + 0.26606, + 0.26698, + 0.26806, + 0.26674, + 0.26714, + 0.26704, + 0.26768, + 0.26714, + 0.2651, + 0.26544, + 0.26656, + 0.2676, + 0.26678, + 0.26898, + 0.26788, + 0.26736, + 0.2677, + 0.26732, + 0.26766, + 0.26812, + 0.2675, + 0.26916, + 0.26876, + 0.26754, + 0.26748, + 0.26802, + 0.27004, + 0.27052, + 0.2671, + 0.27224, + 0.26654, + 0.26818, + 0.26902, + 0.2691, + 0.26962, + 0.2702, + 0.26948, + 0.2712, + 0.27026, + 0.26876, + 0.26952 + ], + "test_acc": [ + 0.2537, + 0.2494, + 0.2765, + 0.2473, + 0.2851, + 0.2759, + 0.2702, + 0.2833, + 0.2841, + 0.2663, + 0.2858, + 0.2432, + 0.2894, + 0.2933, + 0.2938, + 0.2901, + 0.2777, + 0.2769, + 0.2907, + 0.2833, + 0.2973, + 0.2692, + 0.277, + 0.2841, + 0.2917, + 0.2694, + 0.2929, + 0.2737, + 0.2955, + 0.2877, + 0.2891, + 0.302, + 0.272, + 0.2931, + 0.2882, + 0.2806, + 0.2849, + 0.2989, + 0.2988, + 0.3036, + 0.2869, + 0.2951, + 0.3014, + 0.289, + 0.2834, + 0.2896, + 0.2887, + 0.2932, + 0.2855, + 0.2882, + 0.2846, + 0.3018, + 0.2981, + 0.3019, + 0.2846, + 0.2968, + 0.2915, + 0.2935, + 0.2912, + 0.2978, + 0.2989, + 0.2953, + 0.2999, + 0.292, + 0.2923, + 0.2856, + 0.2956, + 0.2993, + 0.2906, + 0.2931, + 0.2925, + 0.2948, + 0.2968, + 0.295, + 0.2965, + 0.3019, + 0.2889, + 0.2941, + 0.2958, + 0.2959, + 0.2955, + 0.2926, + 0.2921, + 0.2964, + 0.2981, + 0.2912, + 0.2982, + 0.2995, + 0.2958, + 0.2963, + 0.2965, + 0.2963, + 0.2947, + 0.2957, + 0.2953, + 0.2947, + 0.2953, + 0.2951, + 0.2949, + 0.295 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3428037762641907, + -3.763916902244091e-05, + -0.0007522967061959207, + -0.00019637049990706146, + -0.0005234384443610907, + 0.00033836261718533933 + ], + "perturbation_rho": [ + -0.03871288150548935, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.3015385270118713e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -8.167698979377747e-07, + 0.0, + 0.0, + 0.0, + 1.862645149230957e-09, + 0.0 + ], + "0.01": [ + -2.7455389499664307e-06, + 0.0, + 0.0, + 1.862645149230957e-09, + 1.862645149230957e-09, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 53848.7734375, + 2442806016.0, + 5245538816.0, + 6828209664.0, + 7018600960.0, + 9658986496.0, + 9768150016.0 + ], + "bp_grad_norms_per_layer": [ + 2.0601439132406085e-07, + 2.350288574870518e-10, + 2.3180254937749112e-10, + 2.322748937633179e-10, + 2.3222145040247e-10, + 2.3218917066802902e-10, + 2.321651759729093e-10 + ] + }, + "drift": { + "embed.weight": 347.726745764923, + "embed.bias": 320.686611820448, + "blocks.0.ln.weight": 9.997068770929598, + "blocks.0.w1.weight": 332.0183119830964, + "blocks.0.w1.bias": 361.6276454599223, + "blocks.0.w2.weight": 491.26444464340926, + "blocks.1.ln.weight": 9.760206175642947, + "blocks.1.w1.weight": 399.42793179083463, + "blocks.1.w1.bias": 382.1218468905457, + "blocks.1.w2.weight": 398.4896403314863, + "blocks.2.ln.weight": 9.863790992763134, + "blocks.2.w1.weight": 403.4218387582533, + "blocks.2.w1.bias": 369.67326277166103, + "blocks.2.w2.weight": 388.1030551430935, + "blocks.3.ln.weight": 7.7321395772148405, + "blocks.3.w1.weight": 284.159080804305, + "blocks.3.w1.bias": 261.08860693920894, + "blocks.3.w2.weight": 276.021700238577, + "blocks.4.ln.weight": 10.766653379169998, + "blocks.4.w1.weight": 441.4219333141434, + "blocks.4.w1.bias": 408.4716885612093, + "blocks.4.w2.weight": 432.48056855561003, + "blocks.5.ln.weight": 7.252784916283292, + "blocks.5.w1.weight": 280.38847615534195, + "blocks.5.w1.bias": 255.99327290804064, + "blocks.5.w2.weight": 255.63025547094998, + "out_ln.weight": 0.644730004333539, + "out_head.weight": 9.313913439210426, + "out_head.bias": 0.978185244031994 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0382984645843507, + 1.9496157778167724, + 1.9119072325897217, + 1.8929361431121827, + 1.8775380432128905, + 1.8741492847442627, + 1.8671779189300537, + 1.860015816307068, + 1.8485039702987671, + 1.844084996986389, + 1.8363114721679688, + 1.8330332998275758, + 1.8273526552581787, + 1.825867492904663, + 1.821722242088318, + 1.8222067623519898, + 1.8212668908691407, + 1.8164310580825807, + 1.8167414797592163, + 1.8158153560638428, + 1.8170397219848633, + 1.8139575901031495, + 1.814438596458435, + 1.807250874671936, + 1.806480390663147, + 1.805527213973999, + 1.7998141473770142, + 1.8049808203125, + 1.8055741637420655, + 1.796914314918518, + 1.8009305011749268, + 1.797931548538208, + 1.7981645835113524, + 1.793643822402954, + 1.7937120150375365, + 1.7967921892929077, + 1.7892387997436523, + 1.785188671875, + 1.786605785293579, + 1.787917324485779, + 1.7853582260894776, + 1.7880721422576904, + 1.7864411516571046, + 1.7777207873916625, + 1.7769269942855834, + 1.777174091796875, + 1.7795673489761352, + 1.773414126586914, + 1.7774351581192016, + 1.7697969200897217, + 1.771698450050354, + 1.768428716278076, + 1.7676336987304688, + 1.765194714126587, + 1.7675122838974, + 1.7682551845932006, + 1.7667323706436158, + 1.7661600426864623, + 1.7620434980010986, + 1.7610514661026, + 1.7625030724716186, + 1.7606126858520508, + 1.760137271118164, + 1.7585180844879151, + 1.7585363976287842, + 1.7592174551010131, + 1.755930234375, + 1.7548912310791016, + 1.7563516518783568, + 1.7501784167099, + 1.7541064535522461, + 1.7512517785263062, + 1.7541993662261963, + 1.7516761654663087, + 1.7516398949813843, + 1.749719626121521, + 1.7497738579559325, + 1.752181930885315, + 1.7519074490356445, + 1.7518238845443725, + 1.7466175811004638, + 1.748845227355957, + 1.746575064430237, + 1.7496823914337158, + 1.7468877982330322, + 1.7439219277191162, + 1.7428745371246337, + 1.746016036453247, + 1.742904917678833, + 1.7454003299713134, + 1.7449493420028686, + 1.744745844039917, + 1.7438948041152955, + 1.7453510579681397, + 1.7438724200439453, + 1.7425698751068115, + 1.742772998123169, + 1.7421956006622314, + 1.7445682699966432, + 1.7442712462615966 + ], + "train_acc": [ + 0.25024, + 0.28724, + 0.30528, + 0.31264, + 0.32216, + 0.32208, + 0.32792, + 0.33064, + 0.3365, + 0.3396, + 0.34, + 0.34242, + 0.34468, + 0.34422, + 0.34752, + 0.34912, + 0.34524, + 0.34978, + 0.34856, + 0.3501, + 0.34806, + 0.35048, + 0.35214, + 0.35416, + 0.35338, + 0.35482, + 0.35432, + 0.35244, + 0.35388, + 0.35836, + 0.35712, + 0.3568, + 0.3591, + 0.36118, + 0.36208, + 0.35824, + 0.36296, + 0.3625, + 0.3616, + 0.3628, + 0.36246, + 0.36298, + 0.3605, + 0.36632, + 0.36574, + 0.36678, + 0.36478, + 0.3649, + 0.3662, + 0.36734, + 0.3665, + 0.36722, + 0.36714, + 0.3702, + 0.36908, + 0.37058, + 0.36998, + 0.36858, + 0.37244, + 0.37104, + 0.3722, + 0.37344, + 0.3726, + 0.37172, + 0.3741, + 0.37406, + 0.3731, + 0.37294, + 0.37284, + 0.37628, + 0.37682, + 0.37742, + 0.37544, + 0.37656, + 0.37632, + 0.37814, + 0.37808, + 0.37642, + 0.37748, + 0.37462, + 0.37968, + 0.37766, + 0.37704, + 0.37846, + 0.37784, + 0.379, + 0.37904, + 0.37854, + 0.38202, + 0.37828, + 0.37712, + 0.37962, + 0.38004, + 0.37898, + 0.3783, + 0.37898, + 0.38082, + 0.38006, + 0.37922, + 0.37846 + ], + "test_acc": [ + 0.3027, + 0.306, + 0.3365, + 0.3382, + 0.3465, + 0.3597, + 0.3579, + 0.3625, + 0.3757, + 0.3702, + 0.3663, + 0.3612, + 0.374, + 0.378, + 0.3778, + 0.3778, + 0.3807, + 0.3691, + 0.3833, + 0.3714, + 0.3788, + 0.3629, + 0.3794, + 0.382, + 0.3843, + 0.3732, + 0.3818, + 0.373, + 0.3819, + 0.3776, + 0.3882, + 0.3926, + 0.385, + 0.3853, + 0.3879, + 0.3927, + 0.3894, + 0.3945, + 0.3903, + 0.3924, + 0.3896, + 0.3941, + 0.3919, + 0.3888, + 0.3863, + 0.3927, + 0.3922, + 0.3903, + 0.3939, + 0.3828, + 0.3963, + 0.3935, + 0.3958, + 0.3971, + 0.4048, + 0.4003, + 0.3934, + 0.3945, + 0.395, + 0.3979, + 0.3908, + 0.3969, + 0.4012, + 0.3998, + 0.3987, + 0.3965, + 0.4025, + 0.3984, + 0.4038, + 0.3982, + 0.4015, + 0.4027, + 0.3997, + 0.4022, + 0.4045, + 0.4031, + 0.4028, + 0.4041, + 0.3998, + 0.4012, + 0.3998, + 0.3979, + 0.4026, + 0.401, + 0.4006, + 0.4048, + 0.4018, + 0.4009, + 0.4025, + 0.4024, + 0.4009, + 0.4036, + 0.4025, + 0.4022, + 0.4026, + 0.401, + 0.4018, + 0.401, + 0.4015, + 0.4016 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.02052094228565693, + 0.07671980559825897, + -0.10412630438804626, + -0.03588568791747093, + -0.07592153549194336, + 0.9841817617416382 + ], + "perturbation_rho": [ + -0.0499441958963871, + 0.03510870411992073, + 0.012152664363384247, + -0.01593763567507267, + -0.030490349978208542, + 0.01056537963449955 + ], + "nudging": { + "0.001": [ + -1.460895873606205e-06, + -1.8533319234848022e-07, + 8.102506399154663e-08, + 3.9814040064811707e-08, + 7.660128176212311e-08, + -1.0418007150292397e-06 + ], + "0.003": [ + -4.51505184173584e-06, + -7.73230567574501e-07, + 4.012836143374443e-07, + 1.9150320440530777e-07, + 2.3317988961935043e-07, + -3.8853613659739494e-06 + ], + "0.01": [ + -1.504761166870594e-05, + -2.689310349524021e-06, + 1.5775440260767937e-06, + 6.139744073152542e-07, + 9.952345862984657e-07, + -1.3828510418534279e-05 + ] + }, + "hidden_norms_per_layer": [ + 7937.806640625, + 157942.125, + 845318.25, + 1734418.625, + 1908211.875, + 2166763.75, + 1424229.125 + ], + "bp_grad_norms_per_layer": [ + 2.3930177121656016e-05, + 1.1552400565051357e-06, + 6.149262503640784e-07, + 6.002979375807627e-07, + 6.000758503432735e-07, + 5.996230356686283e-07, + 5.751607545789739e-07 + ] + }, + "drift": { + "embed.weight": 54.05447299340841, + "embed.bias": 16.031038115248215, + "blocks.0.ln.weight": 1.2416108593211508, + "blocks.0.w1.weight": 17.922589218376636, + "blocks.0.w1.bias": 14.956067338203475, + "blocks.0.w2.weight": 64.67351730215137, + "blocks.1.ln.weight": 1.1286859632363149, + "blocks.1.w1.weight": 23.71804846947617, + "blocks.1.w1.bias": 17.43068124645083, + "blocks.1.w2.weight": 41.38902946126262, + "blocks.2.ln.weight": 0.7406672335136909, + "blocks.2.w1.weight": 24.88918465627309, + "blocks.2.w1.bias": 25.668393301356875, + "blocks.2.w2.weight": 26.33124026065157, + "blocks.3.ln.weight": 0.5408212886770045, + "blocks.3.w1.weight": 19.746562187924944, + "blocks.3.w1.bias": 21.605426127504696, + "blocks.3.w2.weight": 26.408093498021923, + "blocks.4.ln.weight": 0.7373842468395975, + "blocks.4.w1.weight": 21.153183487980545, + "blocks.4.w1.bias": 21.145538252519305, + "blocks.4.w2.weight": 45.59224517841596, + "blocks.5.ln.weight": 0.8316077429831186, + "blocks.5.w1.weight": 20.11472175424346, + "blocks.5.w1.bias": 17.387776840886495, + "blocks.5.w2.weight": 81.52952222436008, + "out_ln.weight": 0.38061390763416836, + "out_head.weight": 7.016437743402715, + "out_head.bias": 0.6179754221306049 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 6, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 2 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L6_seed2", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L6_seed3/results_cifar10.json b/results/fa_dfa_d512_L6_seed3/results_cifar10.json new file mode 100644 index 0000000..fe507fc --- /dev/null +++ b/results/fa_dfa_d512_L6_seed3/results_cifar10.json @@ -0,0 +1,837 @@ +{ + "3": { + "dfa": { + "log": { + "train_loss": [ + 2.0551145947265623, + 2.0254818302154542, + 2.0194406120300292, + 2.0175874392700197, + 2.0163199629211426, + 2.0142830805969236, + 2.007013447418213, + 2.003593424758911, + 2.0067723297500613, + 2.003558956604004, + 2.00101790184021, + 2.0011146197509766, + 1.998018801345825, + 1.9996520843887329, + 1.9962160342407227, + 1.997622791786194, + 1.9969604161071777, + 1.9916633380126954, + 1.9958815657806397, + 1.9930840515899657, + 1.9948459027862548, + 1.9917297480773926, + 1.9886382999420167, + 1.9919039403533936, + 1.990138112449646, + 1.9900766090393067, + 1.9897496450042724, + 1.9914394456481934, + 1.9891233996582032, + 1.985649783477783, + 1.9873360358428955, + 1.989273772125244, + 1.987579068336487, + 1.9871572724151612, + 1.9870523969268798, + 1.983975528640747, + 1.987108448867798, + 1.9884108324813843, + 1.988470245361328, + 1.9860010179138183, + 1.9886363947296142, + 1.987370449295044, + 1.9879705474853515, + 1.9867627616882324, + 1.9860119804382323, + 1.9856234351348876, + 1.9848077922058105, + 1.9845034539794921, + 1.9853366620635986, + 1.9832583249664306, + 1.9855356827163697, + 1.9846282432556153, + 1.9862506673431397, + 1.9845512745666505, + 1.984809038848877, + 1.983686294631958, + 1.9839595748138428, + 1.9834054181671144, + 1.9851946828460694, + 1.9813065106964112, + 1.982595512084961, + 1.9824060856628418, + 1.9832570594787597, + 1.9817028414916993, + 1.9841908657836913, + 1.9825086151123046, + 1.9821907202148437, + 1.9809494961166383, + 1.9812863923645019, + 1.9832594239807129, + 1.9819738615417481, + 1.982517120513916, + 1.9800974131011964, + 1.983135057144165, + 1.983559645614624, + 1.9801312906646729, + 1.9810623525238038, + 1.9804602764511108, + 1.9805444803619385, + 1.9829668161010743, + 1.9795248139190673, + 1.9794620516967774, + 1.9808693537902833, + 1.9817010179138184, + 1.9800636986541749, + 1.9797203030395507, + 1.980884309463501, + 1.9790666972351074, + 1.9815667838287354, + 1.9785900994110108, + 1.9793143473815917, + 1.9813185803985596, + 1.9775366342926026, + 1.9791302056884765, + 1.9800324520874024, + 1.9808550524902344, + 1.9790050820541383, + 1.9809321939086915, + 1.9801498863220215, + 1.9817508765029908 + ], + "train_acc": [ + 0.24236, + 0.25538, + 0.26038, + 0.25914, + 0.2596, + 0.26144, + 0.26392, + 0.26632, + 0.2648, + 0.26606, + 0.27124, + 0.26782, + 0.26864, + 0.2686, + 0.27008, + 0.27032, + 0.2686, + 0.2709, + 0.27116, + 0.26994, + 0.27146, + 0.27376, + 0.2744, + 0.2734, + 0.27522, + 0.27308, + 0.27442, + 0.27432, + 0.27566, + 0.27572, + 0.27774, + 0.2749, + 0.275, + 0.27594, + 0.27506, + 0.27894, + 0.27722, + 0.27576, + 0.27618, + 0.27836, + 0.27742, + 0.27672, + 0.27846, + 0.27852, + 0.27952, + 0.27852, + 0.2798, + 0.2786, + 0.27982, + 0.27886, + 0.27922, + 0.28008, + 0.28086, + 0.27944, + 0.27862, + 0.2795, + 0.2811, + 0.28046, + 0.28042, + 0.28068, + 0.2808, + 0.2807, + 0.28074, + 0.28092, + 0.28042, + 0.28318, + 0.28114, + 0.28196, + 0.28128, + 0.28244, + 0.28616, + 0.28052, + 0.28304, + 0.28028, + 0.28134, + 0.283, + 0.281, + 0.28442, + 0.2821, + 0.2828, + 0.28364, + 0.28418, + 0.2818, + 0.2828, + 0.28202, + 0.28332, + 0.2823, + 0.28336, + 0.27986, + 0.28428, + 0.28444, + 0.28348, + 0.28552, + 0.28302, + 0.28178, + 0.28176, + 0.28462, + 0.28336, + 0.28208, + 0.28378 + ], + "test_acc": [ + 0.2839, + 0.276, + 0.2703, + 0.2882, + 0.3045, + 0.2916, + 0.2881, + 0.2861, + 0.2942, + 0.2742, + 0.286, + 0.3032, + 0.3043, + 0.284, + 0.3085, + 0.2889, + 0.2865, + 0.2944, + 0.288, + 0.2904, + 0.2951, + 0.2866, + 0.3007, + 0.2883, + 0.2858, + 0.3049, + 0.2761, + 0.3085, + 0.2795, + 0.2969, + 0.2937, + 0.3004, + 0.2812, + 0.2902, + 0.3023, + 0.2997, + 0.2918, + 0.3121, + 0.2969, + 0.2938, + 0.2951, + 0.3021, + 0.3015, + 0.31, + 0.2967, + 0.2972, + 0.3, + 0.3063, + 0.3104, + 0.3045, + 0.3005, + 0.3049, + 0.3048, + 0.3025, + 0.3043, + 0.3031, + 0.2936, + 0.2981, + 0.3033, + 0.2941, + 0.3064, + 0.2988, + 0.3068, + 0.3013, + 0.2997, + 0.3068, + 0.3062, + 0.3059, + 0.293, + 0.3029, + 0.3083, + 0.3108, + 0.3016, + 0.3031, + 0.2946, + 0.2992, + 0.3066, + 0.3046, + 0.3006, + 0.3035, + 0.2988, + 0.3031, + 0.3015, + 0.3028, + 0.3008, + 0.3022, + 0.3064, + 0.3045, + 0.3051, + 0.3041, + 0.3025, + 0.3045, + 0.3034, + 0.3039, + 0.3049, + 0.3044, + 0.3042, + 0.3046, + 0.3046, + 0.3046 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3834345042705536, + 0.0005603223107755184, + 0.0008055042708292603, + 4.550980156636797e-05, + -0.0009304977720603347, + 0.0005176510312594473 + ], + "perturbation_rho": [ + -0.015653517097234726, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.976747393608093e-07, + 0.0, + 0.0, + -9.313225746154785e-10, + 0.0, + 0.0 + ], + "0.003": [ + -1.353677362203598e-06, + 0.0, + 0.0, + -9.313225746154785e-10, + 0.0, + 0.0 + ], + "0.01": [ + -4.4209882616996765e-06, + -1.862645149230957e-09, + 0.0, + -9.313225746154785e-10, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 53414.71484375, + 1171344000.0, + 1695788416.0, + 3132320256.0, + 5559488000.0, + 6114624000.0, + 6167857152.0 + ], + "bp_grad_norms_per_layer": [ + 2.961761254027806e-07, + 3.1574928782696077e-10, + 3.1493549434991053e-10, + 3.145896598777398e-10, + 3.146878591042679e-10, + 3.147946070480856e-10, + 3.1481975359959335e-10 + ] + }, + "drift": { + "embed.weight": 323.54545540843105, + "embed.bias": 220.1187911959123, + "blocks.0.ln.weight": 9.685558373681214, + "blocks.0.w1.weight": 284.1235348747452, + "blocks.0.w1.bias": 249.08037975739575, + "blocks.0.w2.weight": 467.48682118152, + "blocks.1.ln.weight": 7.467562562572266, + "blocks.1.w1.weight": 239.93081695201815, + "blocks.1.w1.bias": 214.84010837594673, + "blocks.1.w2.weight": 270.7171266420411, + "blocks.2.ln.weight": 8.6999741976989, + "blocks.2.w1.weight": 335.1621989545166, + "blocks.2.w1.bias": 297.63643437529385, + "blocks.2.w2.weight": 332.9487355280909, + "blocks.3.ln.weight": 9.65496650463451, + "blocks.3.w1.weight": 396.7552349422605, + "blocks.3.w1.bias": 368.25716787661366, + "blocks.3.w2.weight": 397.6707533080763, + "blocks.4.ln.weight": 8.086181202960429, + "blocks.4.w1.weight": 327.52147542974734, + "blocks.4.w1.bias": 308.89251019375376, + "blocks.4.w2.weight": 317.4051434598822, + "blocks.5.ln.weight": 5.9782378958674425, + "blocks.5.w1.weight": 215.37270785666712, + "blocks.5.w1.bias": 206.13969443200085, + "blocks.5.w2.weight": 210.55284579153434, + "out_ln.weight": 0.6330900560999851, + "out_head.weight": 8.856435188669105, + "out_head.bias": 0.9683577161846454 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0186621208190916, + 1.9331099752044678, + 1.905914333114624, + 1.8951857843399047, + 1.8854292852783203, + 1.8709850173568725, + 1.8602817873382569, + 1.8479190076065064, + 1.8414129400253296, + 1.8321620767974853, + 1.826335980758667, + 1.8197622241210938, + 1.8174528274917603, + 1.8127154489898682, + 1.8082266244888305, + 1.811778229446411, + 1.8112502433013915, + 1.8042263799667357, + 1.8090753686141967, + 1.8053230755233765, + 1.8077443563842774, + 1.804732971496582, + 1.8016516897583008, + 1.8038146850204468, + 1.8026063943862916, + 1.802457709350586, + 1.8002039365386964, + 1.8020965616607667, + 1.7966074966812133, + 1.796929923324585, + 1.7942455569076539, + 1.7956397313690187, + 1.7895081832504272, + 1.7901598165893555, + 1.785636817932129, + 1.7812746408843994, + 1.7844691129302979, + 1.783790927810669, + 1.7788431832885743, + 1.7756607747650146, + 1.774973991355896, + 1.769966243019104, + 1.7683391579818726, + 1.7691140979766846, + 1.7666235674285888, + 1.763746886062622, + 1.7614172232437133, + 1.7630763125610351, + 1.7593659213256836, + 1.7556221802139282, + 1.7563741372680663, + 1.7534167012786865, + 1.7579979578018188, + 1.7498029040145875, + 1.7481240466308594, + 1.747141012802124, + 1.7512671886444091, + 1.7454403200531006, + 1.747818590774536, + 1.7458769092559814, + 1.7429523513412475, + 1.7409421573638917, + 1.7443068244171143, + 1.7444597329330445, + 1.7395000383758545, + 1.7387146155548097, + 1.7404073351669311, + 1.7386165090560912, + 1.7302196057891845, + 1.7337853803253174, + 1.734050626296997, + 1.7332715703582764, + 1.7335029886245727, + 1.7333770494842529, + 1.7329201885223389, + 1.7302788830947875, + 1.7308894277572633, + 1.7317599598693847, + 1.7300064459609985, + 1.730283342819214, + 1.7256998642349244, + 1.7275242566680908, + 1.7297832077026367, + 1.7274928902435303, + 1.7220390670776367, + 1.725278727684021, + 1.7244994747924804, + 1.725174829978943, + 1.72525224609375, + 1.7249654501724243, + 1.7217949364852905, + 1.724819582977295, + 1.7225480925750734, + 1.7213666839981079, + 1.7232744509887696, + 1.7264412049102784, + 1.7210735564804076, + 1.7247192889404297, + 1.7228876683807373, + 1.7252232053375245 + ], + "train_acc": [ + 0.26044, + 0.2964, + 0.30886, + 0.31392, + 0.32094, + 0.32556, + 0.32806, + 0.33438, + 0.33584, + 0.34332, + 0.34448, + 0.3469, + 0.3493, + 0.34834, + 0.34978, + 0.3522, + 0.35074, + 0.35222, + 0.3523, + 0.35548, + 0.35364, + 0.35582, + 0.35566, + 0.3538, + 0.35346, + 0.35642, + 0.3576, + 0.35768, + 0.35674, + 0.35842, + 0.35762, + 0.35934, + 0.36148, + 0.36034, + 0.36122, + 0.3633, + 0.36236, + 0.3648, + 0.36606, + 0.36512, + 0.36748, + 0.36804, + 0.36888, + 0.36944, + 0.37288, + 0.37268, + 0.37384, + 0.37068, + 0.37262, + 0.375, + 0.37486, + 0.37624, + 0.37518, + 0.3761, + 0.37538, + 0.3772, + 0.37746, + 0.37694, + 0.37704, + 0.37738, + 0.37926, + 0.37986, + 0.37926, + 0.3778, + 0.38224, + 0.38084, + 0.3805, + 0.381, + 0.38198, + 0.38366, + 0.38362, + 0.38348, + 0.38632, + 0.38202, + 0.38298, + 0.38632, + 0.38186, + 0.38376, + 0.38438, + 0.3837, + 0.38756, + 0.3857, + 0.38298, + 0.38544, + 0.38558, + 0.3877, + 0.38692, + 0.38824, + 0.38624, + 0.38652, + 0.38646, + 0.38884, + 0.38832, + 0.3871, + 0.38898, + 0.38446, + 0.38604, + 0.38742, + 0.38538, + 0.3872 + ], + "test_acc": [ + 0.3158, + 0.3361, + 0.343, + 0.3494, + 0.3693, + 0.3591, + 0.3652, + 0.3642, + 0.376, + 0.3646, + 0.3753, + 0.3821, + 0.3889, + 0.3824, + 0.3807, + 0.3765, + 0.3749, + 0.3867, + 0.3837, + 0.3793, + 0.3829, + 0.3792, + 0.3865, + 0.3772, + 0.3856, + 0.3842, + 0.3784, + 0.3846, + 0.3821, + 0.3885, + 0.3845, + 0.3915, + 0.3893, + 0.3836, + 0.3942, + 0.4018, + 0.3946, + 0.397, + 0.3956, + 0.3996, + 0.3947, + 0.3958, + 0.3944, + 0.4035, + 0.4029, + 0.4009, + 0.393, + 0.4026, + 0.4079, + 0.4063, + 0.3963, + 0.4017, + 0.4033, + 0.4045, + 0.4081, + 0.4059, + 0.4068, + 0.4035, + 0.4051, + 0.4058, + 0.402, + 0.4074, + 0.4037, + 0.404, + 0.4017, + 0.4036, + 0.4057, + 0.409, + 0.404, + 0.4081, + 0.407, + 0.4083, + 0.4085, + 0.4041, + 0.4044, + 0.4056, + 0.4118, + 0.4081, + 0.4097, + 0.4096, + 0.4116, + 0.4104, + 0.4077, + 0.4095, + 0.4088, + 0.4114, + 0.41, + 0.4093, + 0.4113, + 0.4112, + 0.4105, + 0.4115, + 0.4109, + 0.4115, + 0.4116, + 0.4127, + 0.4116, + 0.4116, + 0.4116, + 0.4118 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.033788226544857025, + 0.08954796195030212, + -0.06740664690732956, + -0.10690448433160782, + -0.11852722615003586, + 0.9977210760116577 + ], + "perturbation_rho": [ + 0.03511377051472664, + 0.04956234246492386, + -0.005039767827838659, + -0.028402434661984444, + 0.002629645634442568, + -0.0117247449234128 + ], + "nudging": { + "0.001": [ + -2.101645804941654e-06, + -5.005858838558197e-07, + 6.752088665962219e-09, + 1.1606607586145401e-07, + 8.440110832452774e-08, + -1.3562384992837906e-06 + ], + "0.003": [ + -5.929498001933098e-06, + -1.1969823390245438e-06, + 3.577442839741707e-07, + 4.066387191414833e-07, + 6.486661732196808e-07, + -5.192705430090427e-06 + ], + "0.01": [ + -1.9490602426230907e-05, + -3.955909051001072e-06, + 1.2312084436416626e-06, + 1.8166610971093178e-06, + 2.1688174456357956e-06, + -1.8663820810616016e-05 + ] + }, + "hidden_norms_per_layer": [ + 7009.63232421875, + 187427.9375, + 535894.875, + 1119580.875, + 1710404.625, + 2170302.75, + 769555.6875 + ], + "bp_grad_norms_per_layer": [ + 2.6194647944066674e-05, + 1.3078431493340759e-06, + 7.762633913444006e-07, + 7.287014227586042e-07, + 7.358628977272019e-07, + 7.391291774183628e-07, + 7.346811230490857e-07 + ] + }, + "drift": { + "embed.weight": 48.84389712380156, + "embed.bias": 18.04622672355978, + "blocks.0.ln.weight": 1.2715512223724241, + "blocks.0.w1.weight": 18.323188610788566, + "blocks.0.w1.bias": 14.621705904927168, + "blocks.0.w2.weight": 62.82097007591977, + "blocks.1.ln.weight": 1.0780729921051364, + "blocks.1.w1.weight": 19.649865156855387, + "blocks.1.w1.bias": 13.846457460989065, + "blocks.1.w2.weight": 46.895166874548835, + "blocks.2.ln.weight": 0.8170823371574294, + "blocks.2.w1.weight": 21.511415879383534, + "blocks.2.w1.bias": 20.453359426824278, + "blocks.2.w2.weight": 36.12877482089504, + "blocks.3.ln.weight": 0.7815298682497922, + "blocks.3.w1.weight": 23.873373589893692, + "blocks.3.w1.bias": 23.438832639917354, + "blocks.3.w2.weight": 34.65004625568547, + "blocks.4.ln.weight": 0.7049287284135428, + "blocks.4.w1.weight": 24.756294779706568, + "blocks.4.w1.bias": 25.075713132594746, + "blocks.4.w2.weight": 39.3474190645009, + "blocks.5.ln.weight": 0.825085939532927, + "blocks.5.w1.weight": 25.260469003772034, + "blocks.5.w1.bias": 24.835263327096893, + "blocks.5.w2.weight": 47.04696050246029, + "out_ln.weight": 0.3016529235625585, + "out_head.weight": 6.069315898628023, + "out_head.bias": 1.8625596673903535 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 6, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 3 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L6_seed3", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L6_seed4/results_cifar10.json b/results/fa_dfa_d512_L6_seed4/results_cifar10.json new file mode 100644 index 0000000..0f4b59f --- /dev/null +++ b/results/fa_dfa_d512_L6_seed4/results_cifar10.json @@ -0,0 +1,837 @@ +{ + "4": { + "dfa": { + "log": { + "train_loss": [ + 2.0741304068756103, + 2.052452068710327, + 2.0495207961273194, + 2.0484457649230956, + 2.045468647079468, + 2.0459753023529053, + 2.0450184237670896, + 2.0391295627593995, + 2.0377573050689697, + 2.0382228170394896, + 2.0312761193466184, + 2.0324994396591185, + 2.0278026767730712, + 2.0305476247787477, + 2.0286661254119873, + 2.029820890045166, + 2.0290051824188233, + 2.0265300660705567, + 2.025750561065674, + 2.027355994720459, + 2.0275561265563966, + 2.023750612411499, + 2.025079263343811, + 2.025395411605835, + 2.0240028636932372, + 2.022815622253418, + 2.0226645655822755, + 2.022443874359131, + 2.022390442466736, + 2.023733573875427, + 2.0224531535339354, + 2.0227799210357666, + 2.0218398332214353, + 2.020151731872559, + 2.021256269683838, + 2.0194612783432007, + 2.018333575744629, + 2.0183729624938964, + 2.0209142852020263, + 2.022426343536377, + 2.018248730201721, + 2.0204831772613527, + 2.0188777527618407, + 2.0191313500213623, + 2.0171565660858155, + 2.0189703364562988, + 2.0182820797729493, + 2.0170327036285403, + 2.0180798105239868, + 2.0163722726821898, + 2.0178759913635256, + 2.017920862388611, + 2.015365661468506, + 2.0167020557403563, + 2.0157576428985595, + 2.0196311264801023, + 2.0176754986190795, + 2.017508302345276, + 2.0164641773223875, + 2.016444677734375, + 2.0168316576385497, + 2.0152849128723145, + 2.015149051208496, + 2.0158757721710203, + 2.016044344520569, + 2.0154035729217528, + 2.012803511734009, + 2.0160995071792605, + 2.015346680755615, + 2.0140808686828615, + 2.0151058874893186, + 2.0144621922302246, + 2.0154705238342285, + 2.0149063832092287, + 2.0115139068603516, + 2.0131744523620605, + 2.0158527814483644, + 2.0125201052474977, + 2.0122518685913087, + 2.0132210369873045, + 2.0136120219421385, + 2.0142880089950563, + 2.013032984085083, + 2.011840599975586, + 2.0118305902862548, + 2.012003014450073, + 2.011992978248596, + 2.0092738487243653, + 2.0135212693023683, + 2.010035700378418, + 2.011592872314453, + 2.010264990081787, + 2.0128248121643066, + 2.0130243259048464, + 2.0120003854751585, + 2.011955912322998, + 2.011813560256958, + 2.0142602909851073, + 2.0124067852020264, + 2.0116278426742555 + ], + "train_acc": [ + 0.23226, + 0.2375, + 0.23894, + 0.2411, + 0.24176, + 0.24224, + 0.24692, + 0.24654, + 0.24822, + 0.24668, + 0.25084, + 0.24936, + 0.2519, + 0.24896, + 0.25208, + 0.25194, + 0.2549, + 0.25158, + 0.25292, + 0.25402, + 0.25358, + 0.2561, + 0.25688, + 0.25572, + 0.25782, + 0.25752, + 0.2589, + 0.25694, + 0.2558, + 0.25686, + 0.25668, + 0.259, + 0.2579, + 0.25902, + 0.25906, + 0.25856, + 0.26018, + 0.2611, + 0.25834, + 0.26182, + 0.26058, + 0.25944, + 0.26134, + 0.25952, + 0.26058, + 0.2594, + 0.26156, + 0.26526, + 0.26156, + 0.2652, + 0.26178, + 0.26122, + 0.2618, + 0.26384, + 0.26214, + 0.26386, + 0.26256, + 0.2644, + 0.26262, + 0.2629, + 0.26074, + 0.26266, + 0.2634, + 0.26182, + 0.26266, + 0.26396, + 0.26654, + 0.2633, + 0.2613, + 0.26562, + 0.26356, + 0.26384, + 0.26406, + 0.26388, + 0.26606, + 0.26592, + 0.265, + 0.26578, + 0.268, + 0.2666, + 0.26434, + 0.2666, + 0.26418, + 0.26448, + 0.26558, + 0.2644, + 0.26708, + 0.26714, + 0.2644, + 0.2675, + 0.26548, + 0.26528, + 0.26508, + 0.26738, + 0.26266, + 0.2658, + 0.26572, + 0.26576, + 0.26514, + 0.2658 + ], + "test_acc": [ + 0.2469, + 0.2437, + 0.255, + 0.2587, + 0.2612, + 0.2707, + 0.2697, + 0.2504, + 0.279, + 0.2707, + 0.2664, + 0.2693, + 0.2715, + 0.2832, + 0.2849, + 0.2633, + 0.2682, + 0.2874, + 0.2852, + 0.2671, + 0.273, + 0.2819, + 0.2654, + 0.2763, + 0.279, + 0.2924, + 0.281, + 0.2755, + 0.2706, + 0.2758, + 0.2812, + 0.2801, + 0.2847, + 0.2684, + 0.2679, + 0.2819, + 0.2548, + 0.2726, + 0.2794, + 0.2838, + 0.2834, + 0.2788, + 0.283, + 0.2721, + 0.2782, + 0.2742, + 0.2817, + 0.2752, + 0.2722, + 0.2812, + 0.2797, + 0.2792, + 0.2799, + 0.2747, + 0.2879, + 0.2769, + 0.2842, + 0.2793, + 0.2857, + 0.2797, + 0.2793, + 0.283, + 0.2889, + 0.2789, + 0.2748, + 0.2811, + 0.2814, + 0.2775, + 0.282, + 0.2787, + 0.2862, + 0.2861, + 0.2788, + 0.2763, + 0.272, + 0.2788, + 0.2818, + 0.2839, + 0.2865, + 0.2837, + 0.2898, + 0.2814, + 0.2784, + 0.2823, + 0.2852, + 0.2835, + 0.2848, + 0.2846, + 0.2821, + 0.2809, + 0.2812, + 0.2826, + 0.2818, + 0.2832, + 0.2834, + 0.2832, + 0.2838, + 0.2838, + 0.2838, + 0.2837 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.337674617767334, + 0.0006247189012356102, + 0.0002945333835668862, + -7.626566366525367e-05, + -0.00035525468410924077, + 4.714205351774581e-05 + ], + "perturbation_rho": [ + 0.005920294672250748, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.096647560596466e-07, + 0.0, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0 + ], + "0.003": [ + -9.313225746154785e-07, + 9.313225746154785e-10, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0 + ], + "0.01": [ + -2.87545844912529e-06, + 0.0, + 0.0, + 9.313225746154785e-10, + 9.313225746154785e-10, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 58616.81640625, + 1590750592.0, + 3689531648.0, + 5549292544.0, + 7886680064.0, + 9552689152.0, + 9643235328.0 + ], + "bp_grad_norms_per_layer": [ + 1.978580996819801e-07, + 1.773336893995392e-10, + 1.7791043638304416e-10, + 1.7622008019468893e-10, + 1.761738255279255e-10, + 1.7640035265831244e-10, + 1.7640640337379665e-10 + ] + }, + "drift": { + "embed.weight": 352.51471697650226, + "embed.bias": 269.49001616797665, + "blocks.0.ln.weight": 9.43372656624646, + "blocks.0.w1.weight": 313.54087904771035, + "blocks.0.w1.bias": 290.62855542890503, + "blocks.0.w2.weight": 505.8716937587808, + "blocks.1.ln.weight": 8.808169542950294, + "blocks.1.w1.weight": 358.5357409826361, + "blocks.1.w1.bias": 345.5274778587847, + "blocks.1.w2.weight": 349.75977929168596, + "blocks.2.ln.weight": 9.276124324101515, + "blocks.2.w1.weight": 395.6550378108265, + "blocks.2.w1.bias": 371.0546436106137, + "blocks.2.w2.weight": 375.3367421271525, + "blocks.3.ln.weight": 10.24244368106198, + "blocks.3.w1.weight": 420.44883795775684, + "blocks.3.w1.bias": 397.1721710589656, + "blocks.3.w2.weight": 405.8311891944568, + "blocks.4.ln.weight": 10.577140914534366, + "blocks.4.w1.weight": 436.864273411592, + "blocks.4.w1.bias": 406.9723932351042, + "blocks.4.w2.weight": 404.6446833274338, + "blocks.5.ln.weight": 7.782034028860308, + "blocks.5.w1.weight": 310.2471895048895, + "blocks.5.w1.bias": 297.2855707982679, + "blocks.5.w2.weight": 270.3724536312007, + "out_ln.weight": 0.5880164032165464, + "out_head.weight": 8.450568347443314, + "out_head.bias": 0.7740018910126388 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.038021390991211, + 1.9517871558380127, + 1.923053580093384, + 1.9045403602600097, + 1.8952905172729493, + 1.8863004998397828, + 1.8793055545043946, + 1.868332847251892, + 1.860797550354004, + 1.8578368386077881, + 1.8483906720352172, + 1.8424116570663451, + 1.8290137046051025, + 1.8305858782577515, + 1.823958169631958, + 1.8175954293823242, + 1.8124929406356811, + 1.8102095357513428, + 1.8039513228607178, + 1.8015951740264893, + 1.7953115439605714, + 1.7889242324447632, + 1.7881110052871705, + 1.7843929150390625, + 1.7783852965545655, + 1.777275090942383, + 1.7724220377349853, + 1.7673138269424438, + 1.7659701934814453, + 1.7594620475006104, + 1.753105486755371, + 1.7537677935028075, + 1.7502301782989502, + 1.743755245666504, + 1.7426675580596924, + 1.7381902320098876, + 1.7371277405548096, + 1.7380020862197876, + 1.7344951665878297, + 1.732631077194214, + 1.7297903375244141, + 1.7305091592025756, + 1.72773092628479, + 1.7255595262908936, + 1.7265567044067383, + 1.7226529630279541, + 1.7242008368301391, + 1.7205993241119384, + 1.714460986251831, + 1.7183882989501953, + 1.7157616848373414, + 1.7143050561141968, + 1.711619351425171, + 1.712949155960083, + 1.7116730854797364, + 1.7097789264297485, + 1.7088176770401, + 1.7031783606338502, + 1.705644631690979, + 1.702429769935608, + 1.7007943072891236, + 1.6994391622924805, + 1.698121346435547, + 1.6975506435775758, + 1.6984819930648805, + 1.6961595495224, + 1.6988952493286134, + 1.700139475479126, + 1.698183412246704, + 1.6925607485580445, + 1.6942372484970092, + 1.695046120262146, + 1.6923927303695678, + 1.6946959600448608, + 1.6924981302261353, + 1.688026438217163, + 1.6864081888580322, + 1.6876143859863282, + 1.687906523399353, + 1.6851787896347046, + 1.6870249395370482, + 1.6865479514312745, + 1.6857007721710204, + 1.686013254776001, + 1.685618132247925, + 1.6866127777099609, + 1.682880577697754, + 1.6817467867660523, + 1.6864028040695191, + 1.683715857887268, + 1.6836617621612548, + 1.6802171184921264, + 1.6822573038101196, + 1.6837931204223633, + 1.6831412961196899, + 1.6796661951065064, + 1.682324230041504, + 1.6844535943222045, + 1.6789856439590454, + 1.680855456161499 + ], + "train_acc": [ + 0.2477, + 0.28914, + 0.30422, + 0.30836, + 0.31314, + 0.31906, + 0.3247, + 0.32558, + 0.33112, + 0.33134, + 0.33596, + 0.33794, + 0.34382, + 0.34438, + 0.3429, + 0.3491, + 0.34826, + 0.351, + 0.35252, + 0.3536, + 0.35608, + 0.36104, + 0.35948, + 0.36006, + 0.36364, + 0.36444, + 0.36608, + 0.367, + 0.36846, + 0.36866, + 0.37048, + 0.37416, + 0.37428, + 0.37498, + 0.37714, + 0.37884, + 0.37872, + 0.3782, + 0.381, + 0.38058, + 0.3804, + 0.38016, + 0.38418, + 0.38552, + 0.38428, + 0.3811, + 0.38204, + 0.38364, + 0.38758, + 0.38694, + 0.38588, + 0.38568, + 0.388, + 0.38704, + 0.39158, + 0.38884, + 0.38912, + 0.38936, + 0.39034, + 0.39182, + 0.39316, + 0.3944, + 0.39182, + 0.39272, + 0.39394, + 0.39482, + 0.39348, + 0.39242, + 0.39554, + 0.3963, + 0.39508, + 0.39252, + 0.39634, + 0.39332, + 0.39612, + 0.39644, + 0.39648, + 0.39668, + 0.3985, + 0.3995, + 0.39528, + 0.39904, + 0.39988, + 0.39688, + 0.39954, + 0.39648, + 0.4014, + 0.39972, + 0.39808, + 0.4019, + 0.39968, + 0.40222, + 0.39884, + 0.39954, + 0.39804, + 0.40034, + 0.39982, + 0.39856, + 0.40168, + 0.4006 + ], + "test_acc": [ + 0.2942, + 0.311, + 0.3308, + 0.342, + 0.3419, + 0.3528, + 0.3504, + 0.3462, + 0.3605, + 0.367, + 0.3696, + 0.3674, + 0.3716, + 0.3801, + 0.3769, + 0.3624, + 0.3792, + 0.3785, + 0.3758, + 0.3779, + 0.3853, + 0.3821, + 0.3816, + 0.3891, + 0.3866, + 0.3936, + 0.3896, + 0.3908, + 0.397, + 0.3971, + 0.4013, + 0.4023, + 0.393, + 0.3983, + 0.3977, + 0.3969, + 0.3982, + 0.405, + 0.4014, + 0.4089, + 0.4078, + 0.3985, + 0.4063, + 0.4039, + 0.4008, + 0.4096, + 0.4111, + 0.4059, + 0.4128, + 0.4079, + 0.4066, + 0.4115, + 0.407, + 0.4156, + 0.4156, + 0.4208, + 0.4164, + 0.4206, + 0.4124, + 0.4186, + 0.4142, + 0.4123, + 0.4105, + 0.4153, + 0.4198, + 0.4192, + 0.419, + 0.4198, + 0.4202, + 0.417, + 0.4233, + 0.4224, + 0.4164, + 0.4183, + 0.4193, + 0.4178, + 0.4189, + 0.4232, + 0.4184, + 0.4206, + 0.4223, + 0.4222, + 0.417, + 0.4199, + 0.421, + 0.4202, + 0.4181, + 0.4177, + 0.4214, + 0.4186, + 0.4201, + 0.4189, + 0.4221, + 0.4205, + 0.4194, + 0.42, + 0.4203, + 0.4199, + 0.4196, + 0.4195 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.015272621065378189, + 0.05551045760512352, + 0.03781959414482117, + -0.09142173081636429, + -0.12766994535923004, + 0.9957026839256287 + ], + "perturbation_rho": [ + -0.044239602982997894, + 0.018639111891388893, + -0.002674652263522148, + -0.024060344323515892, + -0.007919290103018284, + -0.016996072605252266 + ], + "nudging": { + "0.001": [ + -2.239597961306572e-06, + -6.057089194655418e-07, + -1.341104507446289e-07, + 1.417938619852066e-07, + 1.6426201909780502e-07, + -1.8667196854948997e-06 + ], + "0.003": [ + -6.492482498288155e-06, + -1.5690457075834274e-06, + -4.0302984416484833e-07, + 5.105976015329361e-07, + 7.660128176212311e-07, + -6.384681910276413e-06 + ], + "0.01": [ + -2.1643005311489105e-05, + -5.265465006232262e-06, + -1.4334218576550484e-06, + 2.071727067232132e-06, + 2.557528205215931e-06, + -2.2026244550943375e-05 + ] + }, + "hidden_norms_per_layer": [ + 5698.91357421875, + 74089.609375, + 464746.75, + 571451.9375, + 1133723.625, + 1542811.875, + 774708.375 + ], + "bp_grad_norms_per_layer": [ + 3.8776230212533846e-05, + 3.033989969480899e-06, + 9.73796886682976e-07, + 8.570191312173847e-07, + 8.443464594165562e-07, + 8.415062779931759e-07, + 8.336798487107444e-07 + ] + }, + "drift": { + "embed.weight": 39.486431642766554, + "embed.bias": 14.997056918535808, + "blocks.0.ln.weight": 1.193207647495217, + "blocks.0.w1.weight": 15.819034065039398, + "blocks.0.w1.bias": 13.818478530432463, + "blocks.0.w2.weight": 51.89139321984625, + "blocks.1.ln.weight": 1.0430825935788601, + "blocks.1.w1.weight": 19.906462986471904, + "blocks.1.w1.bias": 13.39027258705086, + "blocks.1.w2.weight": 46.62053425901664, + "blocks.2.ln.weight": 0.7864666972877575, + "blocks.2.w1.weight": 18.270196046735624, + "blocks.2.w1.bias": 12.944067249835973, + "blocks.2.w2.weight": 49.53460850460699, + "blocks.3.ln.weight": 0.64141877927046, + "blocks.3.w1.weight": 20.764880942811217, + "blocks.3.w1.bias": 20.679665524082093, + "blocks.3.w2.weight": 25.665386885036355, + "blocks.4.ln.weight": 0.5680829244316266, + "blocks.4.w1.weight": 20.078876458586997, + "blocks.4.w1.bias": 21.20100063860491, + "blocks.4.w2.weight": 30.60469406190784, + "blocks.5.ln.weight": 0.6546824348706847, + "blocks.5.w1.weight": 18.8873912461224, + "blocks.5.w1.bias": 18.79210459184489, + "blocks.5.w2.weight": 45.45712755218628, + "out_ln.weight": 0.3591377698736513, + "out_head.weight": 6.323626106875271, + "out_head.bias": 0.8856670357666573 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 6, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 4 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L6_seed4", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L6_seed5/results_cifar10.json b/results/fa_dfa_d512_L6_seed5/results_cifar10.json new file mode 100644 index 0000000..e974678 --- /dev/null +++ b/results/fa_dfa_d512_L6_seed5/results_cifar10.json @@ -0,0 +1,837 @@ +{ + "5": { + "dfa": { + "log": { + "train_loss": [ + 2.0602571796417237, + 2.0357400941467283, + 2.0281838052368166, + 2.023866386795044, + 2.0154901094818114, + 2.019438636016846, + 2.0162963735580446, + 2.0127672254180906, + 2.0099401387786866, + 2.011543902282715, + 2.006219865684509, + 2.0047207692718505, + 2.0029416635894775, + 2.0006940887069704, + 2.0026997183609008, + 2.0016165689468384, + 2.000759786224365, + 1.9997175212860108, + 1.9980090442276002, + 1.998993673324585, + 1.996654589538574, + 1.9994389457702637, + 1.9997356970977784, + 1.993837992324829, + 1.9959214011383057, + 1.9948663213729858, + 1.9960267625427246, + 1.9953035424041747, + 1.9930475028991699, + 1.9938180725479127, + 1.9931120376586915, + 1.989824568710327, + 1.9910550234985351, + 1.9906865880584717, + 1.9898072389984132, + 1.9914395150375366, + 1.9915398007202147, + 1.9909269763183595, + 1.9913815157699586, + 1.9884502619934081, + 1.991486932411194, + 1.989191399459839, + 1.989710302581787, + 1.9897258306884766, + 1.9906724350738525, + 1.9914843771362305, + 1.9885967330169678, + 1.98751413482666, + 1.989536333770752, + 1.98715817817688, + 1.9881259790802002, + 1.9869521377182007, + 1.9907182498931886, + 1.9890414456939698, + 1.9886535820770264, + 1.9867755039215087, + 1.988041506919861, + 1.9891718532943725, + 1.9870871344375611, + 1.985717644920349, + 1.9884497722625731, + 1.9873963590240478, + 1.9863690199279784, + 1.9873637462997435, + 1.9868278685760499, + 1.9860613510894776, + 1.9854717936706543, + 1.9856478284454346, + 1.9866084854888917, + 1.987229610748291, + 1.9831793658828736, + 1.9870755458831788, + 1.985736505126953, + 1.9868509146881104, + 1.9870286437225342, + 1.9859972204971315, + 1.9860080694961548, + 1.9868842920684815, + 1.9851076070404052, + 1.985850848121643, + 1.9842121643447876, + 1.9848132551574706, + 1.9846837445831298, + 1.9864671915435792, + 1.9831796479034425, + 1.984968702774048, + 1.983607007446289, + 1.9863699700546265, + 1.9863424487304688, + 1.98700967628479, + 1.9837391945648193, + 1.9848160073471068, + 1.9845281774139405, + 1.9848566493988038, + 1.9822382134628296, + 1.9836605060577392, + 1.9821798385620117, + 1.9824478280639648, + 1.9817402114486695, + 1.9846896728515624 + ], + "train_acc": [ + 0.24254, + 0.2517, + 0.25592, + 0.25594, + 0.26192, + 0.25714, + 0.26284, + 0.26238, + 0.26512, + 0.26496, + 0.26548, + 0.26742, + 0.2712, + 0.2708, + 0.268, + 0.27104, + 0.27124, + 0.27076, + 0.2733, + 0.27132, + 0.27188, + 0.26918, + 0.27278, + 0.27414, + 0.2739, + 0.27446, + 0.27544, + 0.2747, + 0.27416, + 0.27496, + 0.27546, + 0.27906, + 0.27846, + 0.27636, + 0.27836, + 0.27738, + 0.27604, + 0.27838, + 0.27826, + 0.28024, + 0.27756, + 0.28068, + 0.2774, + 0.27826, + 0.27808, + 0.2748, + 0.27776, + 0.27942, + 0.28244, + 0.28036, + 0.28156, + 0.28332, + 0.27896, + 0.28098, + 0.28062, + 0.27874, + 0.2805, + 0.28012, + 0.2826, + 0.28204, + 0.27786, + 0.2819, + 0.28144, + 0.28236, + 0.28224, + 0.28264, + 0.2807, + 0.28432, + 0.28308, + 0.28156, + 0.28262, + 0.28086, + 0.28328, + 0.28292, + 0.28262, + 0.2819, + 0.28378, + 0.28152, + 0.28326, + 0.28238, + 0.28308, + 0.28294, + 0.28316, + 0.28292, + 0.28588, + 0.2832, + 0.28346, + 0.28494, + 0.2829, + 0.28252, + 0.28328, + 0.28276, + 0.28382, + 0.28216, + 0.28412, + 0.28436, + 0.28506, + 0.28418, + 0.28462, + 0.28428 + ], + "test_acc": [ + 0.2458, + 0.2841, + 0.28, + 0.2941, + 0.2898, + 0.2696, + 0.2791, + 0.2876, + 0.2822, + 0.3025, + 0.2919, + 0.292, + 0.2972, + 0.2946, + 0.2821, + 0.2924, + 0.2897, + 0.2957, + 0.3053, + 0.2873, + 0.3059, + 0.2873, + 0.2865, + 0.3017, + 0.3023, + 0.2803, + 0.3064, + 0.2856, + 0.2919, + 0.2974, + 0.2917, + 0.2949, + 0.2862, + 0.2929, + 0.2952, + 0.2864, + 0.3013, + 0.3011, + 0.2934, + 0.2975, + 0.2961, + 0.304, + 0.3058, + 0.2954, + 0.3001, + 0.2882, + 0.304, + 0.2976, + 0.3002, + 0.291, + 0.3019, + 0.3012, + 0.3085, + 0.3014, + 0.3042, + 0.2999, + 0.2951, + 0.305, + 0.3104, + 0.3037, + 0.3011, + 0.3046, + 0.2999, + 0.3087, + 0.2981, + 0.3044, + 0.298, + 0.2931, + 0.3016, + 0.306, + 0.2997, + 0.3019, + 0.2984, + 0.3028, + 0.2957, + 0.3086, + 0.3062, + 0.2985, + 0.2995, + 0.31, + 0.3028, + 0.305, + 0.2993, + 0.3029, + 0.3047, + 0.3011, + 0.3028, + 0.3008, + 0.302, + 0.3028, + 0.3053, + 0.305, + 0.304, + 0.3041, + 0.3021, + 0.3024, + 0.3022, + 0.3019, + 0.3021, + 0.3022 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.4137295186519623, + 0.00037798876292072237, + 0.00035168789327144623, + -0.00011552235810086131, + 0.0004928180132992566, + 0.0006692470051348209 + ], + "perturbation_rho": [ + -0.004769737366586924, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -4.731118679046631e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.455657184123993e-06, + -9.313225746154785e-10, + -9.313225746154785e-10, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -4.32552769780159e-06, + -1.862645149230957e-09, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 53850.484375, + 999738112.0, + 1825332736.0, + 2847918592.0, + 3948823552.0, + 7465979392.0, + 8693204992.0 + ], + "bp_grad_norms_per_layer": [ + 2.7038154826186656e-07, + 3.3068986438067327e-10, + 3.3043778824293213e-10, + 3.3058311643685556e-10, + 3.306660778523707e-10, + 3.3053956793871464e-10, + 3.3055713721807933e-10 + ] + }, + "drift": { + "embed.weight": 326.316807505004, + "embed.bias": 247.64016879156944, + "blocks.0.ln.weight": 10.506143366890935, + "blocks.0.w1.weight": 266.6641005309724, + "blocks.0.w1.bias": 237.30788612441194, + "blocks.0.w2.weight": 463.4592869979373, + "blocks.1.ln.weight": 8.041758423869682, + "blocks.1.w1.weight": 275.6017801051782, + "blocks.1.w1.bias": 242.84249700605667, + "blocks.1.w2.weight": 293.6559726136312, + "blocks.2.ln.weight": 8.38043701730794, + "blocks.2.w1.weight": 303.9003341110909, + "blocks.2.w1.bias": 274.77693120914864, + "blocks.2.w2.weight": 311.7253398049225, + "blocks.3.ln.weight": 8.344881260946064, + "blocks.3.w1.weight": 324.91067284816666, + "blocks.3.w1.bias": 320.84528008040263, + "blocks.3.w2.weight": 323.2526807364217, + "blocks.4.ln.weight": 10.649638215660875, + "blocks.4.w1.weight": 442.18421448330207, + "blocks.4.w1.bias": 419.08703726739526, + "blocks.4.w2.weight": 435.13630410704593, + "blocks.5.ln.weight": 10.016310662065466, + "blocks.5.w1.weight": 397.1121641226028, + "blocks.5.w1.bias": 370.9478671282672, + "blocks.5.w2.weight": 383.0732880205366, + "out_ln.weight": 0.7323829331223068, + "out_head.weight": 9.997613880770942, + "out_head.bias": 0.47019353243584805 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0245736110687256, + 1.9432434065246582, + 1.9171724227905274, + 1.9035633584594727, + 1.8866331351470946, + 1.8834057013702392, + 1.8780549462509155, + 1.873760415649414, + 1.86404331199646, + 1.8641292553329467, + 1.8578160486602784, + 1.8527875860595704, + 1.8485828662109376, + 1.8464561464691163, + 1.8462472164154053, + 1.8430843777084351, + 1.8429961044692993, + 1.8410946324920654, + 1.8362357471466064, + 1.8319243711090087, + 1.8236686227416992, + 1.8258946035766601, + 1.8234262411117554, + 1.8137903323364257, + 1.811674436264038, + 1.808376842956543, + 1.809517276611328, + 1.8052343613052368, + 1.799909278907776, + 1.7993224626922608, + 1.7999323937225342, + 1.7926906624984742, + 1.7933230242156983, + 1.7922531407928466, + 1.7895995733261107, + 1.7906105719375611, + 1.7885604776763917, + 1.7840546982192993, + 1.7896481491088867, + 1.7832089348602296, + 1.782369646949768, + 1.7770448540878296, + 1.7808699810409545, + 1.7771298468017578, + 1.7774345711898805, + 1.7765879013061523, + 1.7742172146987916, + 1.7695884064102172, + 1.775497437171936, + 1.767691201248169, + 1.7683605837249756, + 1.7657782846069336, + 1.7674894732666016, + 1.7704336560058593, + 1.7661452458190918, + 1.7612660248947143, + 1.7608598317718507, + 1.7622494547271728, + 1.7596149068450928, + 1.7570250023651124, + 1.7613954134368897, + 1.7564489492034911, + 1.7566620055389404, + 1.7558204865264893, + 1.7551631838989257, + 1.7566033951187134, + 1.7519007582855224, + 1.7499195809936523, + 1.7534941720581054, + 1.7503434635162354, + 1.74504672996521, + 1.7483937967681884, + 1.7457905151367188, + 1.7486124094009399, + 1.7461996807479858, + 1.7445868758392333, + 1.7431557595062255, + 1.7459273419570922, + 1.7443120336151123, + 1.7433632315063476, + 1.7413235707855224, + 1.7421810306167602, + 1.7423726794815064, + 1.7430526740264893, + 1.7426294480133058, + 1.7443564529037476, + 1.7412052463531493, + 1.7419821909332276, + 1.742747957763672, + 1.7393953549194336, + 1.7356409253692626, + 1.7367648685455321, + 1.7391734333038331, + 1.7373552332305908, + 1.7321791064071654, + 1.737231011619568, + 1.7380055041122437, + 1.7336925805664063, + 1.735386132774353, + 1.7398383757781983 + ], + "train_acc": [ + 0.25622, + 0.29234, + 0.30374, + 0.307, + 0.31608, + 0.31724, + 0.32094, + 0.32354, + 0.32676, + 0.33046, + 0.33084, + 0.33444, + 0.33722, + 0.33964, + 0.3378, + 0.33812, + 0.33888, + 0.34132, + 0.3422, + 0.34414, + 0.34496, + 0.34604, + 0.3475, + 0.34998, + 0.35366, + 0.3559, + 0.35212, + 0.35394, + 0.35768, + 0.35582, + 0.35698, + 0.3617, + 0.35966, + 0.35904, + 0.36126, + 0.36076, + 0.36356, + 0.36338, + 0.3616, + 0.36454, + 0.36376, + 0.36512, + 0.36348, + 0.36704, + 0.36486, + 0.36646, + 0.36906, + 0.368, + 0.36894, + 0.37176, + 0.36712, + 0.37086, + 0.37036, + 0.37114, + 0.37032, + 0.37276, + 0.37386, + 0.37112, + 0.37256, + 0.3721, + 0.37356, + 0.37602, + 0.37426, + 0.37504, + 0.37378, + 0.37652, + 0.37642, + 0.37658, + 0.3768, + 0.3798, + 0.37862, + 0.37708, + 0.37848, + 0.37812, + 0.37732, + 0.37934, + 0.38062, + 0.37766, + 0.37892, + 0.38066, + 0.38076, + 0.38064, + 0.38124, + 0.38186, + 0.38218, + 0.37966, + 0.38064, + 0.38088, + 0.37996, + 0.38164, + 0.38374, + 0.38408, + 0.38228, + 0.3827, + 0.38392, + 0.38448, + 0.38308, + 0.38368, + 0.38292, + 0.38126 + ], + "test_acc": [ + 0.2956, + 0.3353, + 0.3321, + 0.3526, + 0.3442, + 0.3427, + 0.3492, + 0.3563, + 0.354, + 0.3594, + 0.3655, + 0.3603, + 0.3701, + 0.3726, + 0.3688, + 0.3702, + 0.3634, + 0.3695, + 0.3742, + 0.3755, + 0.3706, + 0.3715, + 0.3676, + 0.3816, + 0.3786, + 0.3874, + 0.3836, + 0.3703, + 0.3798, + 0.379, + 0.3806, + 0.3918, + 0.3845, + 0.3862, + 0.3933, + 0.3876, + 0.3924, + 0.3857, + 0.3748, + 0.3943, + 0.3942, + 0.389, + 0.398, + 0.3839, + 0.3964, + 0.3949, + 0.3943, + 0.3936, + 0.3949, + 0.3982, + 0.3982, + 0.4012, + 0.3968, + 0.3988, + 0.3977, + 0.3977, + 0.401, + 0.3978, + 0.4017, + 0.4016, + 0.4013, + 0.3972, + 0.3961, + 0.4032, + 0.3959, + 0.4004, + 0.3967, + 0.3993, + 0.4021, + 0.4044, + 0.4053, + 0.4023, + 0.4054, + 0.4061, + 0.4047, + 0.403, + 0.4036, + 0.4032, + 0.3985, + 0.4059, + 0.4049, + 0.4061, + 0.4017, + 0.4059, + 0.4063, + 0.4067, + 0.4039, + 0.4053, + 0.4048, + 0.4084, + 0.4051, + 0.4074, + 0.4073, + 0.4054, + 0.4059, + 0.4059, + 0.4056, + 0.4062, + 0.4061, + 0.4058 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.02408183179795742, + 0.061381518840789795, + -0.06236571818590164, + -0.022614534944295883, + 0.005173904821276665, + 0.9961987137794495 + ], + "perturbation_rho": [ + -0.0075406357645988464, + 0.015521236695349216, + -0.03143823519349098, + -0.03976670280098915, + 0.044851042330265045, + -0.006162412464618683 + ], + "nudging": { + "0.001": [ + -2.3316824808716774e-06, + -2.2514723241329193e-07, + 1.0617077350616455e-07, + 2.3283064365386963e-09, + -6.05359673500061e-09, + -1.5987316146492958e-06 + ], + "0.003": [ + -6.882240995764732e-06, + -6.901100277900696e-07, + 3.80445271730423e-07, + 1.1408701539039612e-08, + -5.21540641784668e-08, + -5.463254638016224e-06 + ], + "0.01": [ + -2.2817635908722878e-05, + -2.484419383108616e-06, + 1.1968659237027168e-06, + 3.1816307455301285e-07, + -1.9674189388751984e-07, + -1.9047758542001247e-05 + ] + }, + "hidden_norms_per_layer": [ + 8055.30419921875, + 113125.40625, + 1163908.5, + 1491131.625, + 1596391.25, + 1615158.375, + 896264.875 + ], + "bp_grad_norms_per_layer": [ + 2.2761472791898996e-05, + 1.4049503533897223e-06, + 7.91284321621788e-07, + 7.916268032204243e-07, + 7.900576974861906e-07, + 7.786943569954019e-07, + 7.593308168907242e-07 + ] + }, + "drift": { + "embed.weight": 54.562706327622614, + "embed.bias": 17.31102523118792, + "blocks.0.ln.weight": 1.1930605549913966, + "blocks.0.w1.weight": 17.719005515431316, + "blocks.0.w1.bias": 12.672636747852074, + "blocks.0.w2.weight": 57.38339155194176, + "blocks.1.ln.weight": 1.1832029127181742, + "blocks.1.w1.weight": 25.16862327064878, + "blocks.1.w1.bias": 22.86442569965511, + "blocks.1.w2.weight": 40.800794703713116, + "blocks.2.ln.weight": 0.7781541585073879, + "blocks.2.w1.weight": 21.816615196629947, + "blocks.2.w1.bias": 21.85112016896182, + "blocks.2.w2.weight": 56.80267641124345, + "blocks.3.ln.weight": 0.6733768384350268, + "blocks.3.w1.weight": 19.172490261311314, + "blocks.3.w1.bias": 19.28818718647845, + "blocks.3.w2.weight": 56.812396875482676, + "blocks.4.ln.weight": 0.4559901335911821, + "blocks.4.w1.weight": 14.442025794898862, + "blocks.4.w1.bias": 12.350979488836783, + "blocks.4.w2.weight": 51.455152706019774, + "blocks.5.ln.weight": 0.6410944171187689, + "blocks.5.w1.weight": 19.462352016085756, + "blocks.5.w1.bias": 18.71679203171091, + "blocks.5.w2.weight": 54.03878336670424, + "out_ln.weight": 0.3528795738919825, + "out_head.weight": 6.138358709909979, + "out_head.bias": 0.7832523450909458 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 6, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 5 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L6_seed5", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L6_seed6/results_cifar10.json b/results/fa_dfa_d512_L6_seed6/results_cifar10.json new file mode 100644 index 0000000..e57be14 --- /dev/null +++ b/results/fa_dfa_d512_L6_seed6/results_cifar10.json @@ -0,0 +1,837 @@ +{ + "6": { + "dfa": { + "log": { + "train_loss": [ + 2.058094557647705, + 2.0324147889709474, + 2.0218327713394166, + 2.0282914637756346, + 2.021828155441284, + 2.0190604560852052, + 2.0181676456069946, + 2.015587470474243, + 2.0169739764022827, + 2.012877261199951, + 2.015299163284302, + 2.015902668991089, + 2.010847349205017, + 2.0069280560302736, + 2.008257807006836, + 2.0082109774017334, + 2.006445485191345, + 2.0060603087615965, + 2.0080129043579102, + 2.0026243621444704, + 2.0042781149291993, + 2.0038208263397217, + 2.00284217376709, + 2.0032863708496094, + 2.003503814620972, + 2.0014031100082397, + 2.00154658203125, + 2.0027565859985352, + 1.9988653813934327, + 2.0048675049591065, + 2.002612951965332, + 1.9998337776184083, + 2.0022170097351073, + 2.0006915353012085, + 1.9998700122451782, + 2.001532240638733, + 2.0009784855651858, + 2.001896736717224, + 1.997888755760193, + 1.9997327941894532, + 1.998836244468689, + 1.9972671249008178, + 1.9991955379867554, + 1.996907271156311, + 1.9989860800933839, + 1.9979305069732667, + 1.9979226346588135, + 1.9973365603637696, + 1.9991827986145019, + 1.9979995524597167, + 1.9976954571533203, + 1.9975597812652588, + 1.9963330318450927, + 1.9988955184173585, + 1.9985223587036134, + 1.9940103855895996, + 1.9969634999084473, + 1.9979491520690917, + 1.9940481422424317, + 1.9955107457733154, + 1.9963652723693848, + 1.9967138528060913, + 1.9949829993438721, + 1.9970767431259155, + 1.996754500579834, + 1.9966884062194825, + 1.9968673025131225, + 1.9936358183288574, + 1.994392073059082, + 1.9950825340270997, + 1.9949580252838135, + 1.9944661192321778, + 1.994148543624878, + 1.992705733795166, + 1.9950622162246705, + 1.9951953311538697, + 1.9943858066558837, + 1.9932064359283448, + 1.9938985696411133, + 1.992524094696045, + 1.9919383232116699, + 1.9938886001586915, + 1.992321823425293, + 1.993851443786621, + 1.99483976688385, + 1.991985835800171, + 1.995015989379883, + 1.9928585873413085, + 1.9930935015487672, + 1.9924035341644286, + 1.992943452758789, + 1.992181587677002, + 1.992508976135254, + 1.9930797433471679, + 1.9915329122924805, + 1.9912273866271972, + 1.9927945401000977, + 1.991996759376526, + 1.992920351409912, + 1.99362177444458 + ], + "train_acc": [ + 0.24312, + 0.25066, + 0.25642, + 0.25462, + 0.25868, + 0.25902, + 0.25752, + 0.26142, + 0.26284, + 0.26284, + 0.26276, + 0.261, + 0.26512, + 0.2669, + 0.26932, + 0.2642, + 0.26996, + 0.26718, + 0.26772, + 0.26848, + 0.2686, + 0.27064, + 0.26932, + 0.26956, + 0.2707, + 0.27038, + 0.27084, + 0.27048, + 0.27238, + 0.26878, + 0.27158, + 0.27342, + 0.27218, + 0.27104, + 0.2718, + 0.27454, + 0.27166, + 0.27142, + 0.2736, + 0.27368, + 0.27276, + 0.2748, + 0.2733, + 0.2743, + 0.27712, + 0.2743, + 0.27604, + 0.2745, + 0.27354, + 0.27452, + 0.27538, + 0.27732, + 0.27562, + 0.27438, + 0.27546, + 0.27798, + 0.27778, + 0.27474, + 0.27972, + 0.27802, + 0.27448, + 0.27894, + 0.27802, + 0.27462, + 0.27436, + 0.27914, + 0.27646, + 0.27898, + 0.27676, + 0.27804, + 0.27688, + 0.2795, + 0.27758, + 0.27814, + 0.27706, + 0.27766, + 0.27888, + 0.2781, + 0.27842, + 0.27892, + 0.2802, + 0.27858, + 0.2792, + 0.2788, + 0.27574, + 0.28088, + 0.27956, + 0.27906, + 0.27764, + 0.27988, + 0.27716, + 0.27956, + 0.2782, + 0.2787, + 0.2789, + 0.2799, + 0.27836, + 0.28092, + 0.27914, + 0.27792 + ], + "test_acc": [ + 0.2564, + 0.2576, + 0.2815, + 0.2842, + 0.2869, + 0.2771, + 0.2864, + 0.2704, + 0.2864, + 0.2827, + 0.3005, + 0.2939, + 0.2817, + 0.2771, + 0.3033, + 0.2802, + 0.2776, + 0.3002, + 0.3, + 0.2773, + 0.3036, + 0.2858, + 0.2917, + 0.2964, + 0.2956, + 0.2996, + 0.2972, + 0.2926, + 0.3109, + 0.2826, + 0.3046, + 0.3077, + 0.278, + 0.2856, + 0.2955, + 0.2948, + 0.3034, + 0.3088, + 0.2955, + 0.2871, + 0.303, + 0.3009, + 0.2893, + 0.2922, + 0.3096, + 0.3061, + 0.2974, + 0.3028, + 0.2996, + 0.2988, + 0.3022, + 0.3003, + 0.3068, + 0.2961, + 0.2908, + 0.2971, + 0.3027, + 0.2998, + 0.3008, + 0.3071, + 0.2999, + 0.2988, + 0.2959, + 0.2982, + 0.3025, + 0.295, + 0.2934, + 0.2965, + 0.3014, + 0.2975, + 0.2893, + 0.2951, + 0.3003, + 0.3061, + 0.297, + 0.2987, + 0.3, + 0.3016, + 0.2974, + 0.303, + 0.3003, + 0.3043, + 0.3029, + 0.3004, + 0.2992, + 0.2983, + 0.3017, + 0.299, + 0.3005, + 0.2977, + 0.3004, + 0.2986, + 0.3, + 0.2993, + 0.2988, + 0.2994, + 0.2998, + 0.2994, + 0.2993, + 0.2994 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3915177881717682, + 0.00031804549507796764, + -0.0005461883265525103, + 0.00020462644170038402, + -0.0005664663622155786, + -0.0008316519670188427 + ], + "perturbation_rho": [ + -0.011084532365202904, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -4.0885061025619507e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.2046657502651215e-06, + 0.0, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -4.0549784898757935e-06, + 0.0, + 9.313225746154785e-10, + 0.0, + 9.313225746154785e-10, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 52265.20703125, + 1156494592.0, + 2602998784.0, + 3437189888.0, + 3901714432.0, + 6607706112.0, + 6800756736.0 + ], + "bp_grad_norms_per_layer": [ + 2.745953793237277e-07, + 2.664374554317561e-10, + 2.662880471682172e-10, + 2.6685945120341614e-10, + 2.6688468102165075e-10, + 2.6681562514951906e-10, + 2.668691656548816e-10 + ] + }, + "drift": { + "embed.weight": 323.8438777567112, + "embed.bias": 236.32127317549046, + "blocks.0.ln.weight": 9.81678478040185, + "blocks.0.w1.weight": 283.8033448856046, + "blocks.0.w1.bias": 254.7986676902803, + "blocks.0.w2.weight": 459.2312703971922, + "blocks.1.ln.weight": 7.9902441845145225, + "blocks.1.w1.weight": 303.8032457038149, + "blocks.1.w1.bias": 290.75689909603165, + "blocks.1.w2.weight": 302.7987642593431, + "blocks.2.ln.weight": 7.8083033170497576, + "blocks.2.w1.weight": 313.18221523970163, + "blocks.2.w1.bias": 288.235969181505, + "blocks.2.w2.weight": 301.0694759577726, + "blocks.3.ln.weight": 7.457261198270327, + "blocks.3.w1.weight": 299.37309231794677, + "blocks.3.w1.bias": 276.9324398819875, + "blocks.3.w2.weight": 279.5917777002267, + "blocks.4.ln.weight": 10.517843332237796, + "blocks.4.w1.weight": 436.93554951444736, + "blocks.4.w1.bias": 404.54404533704337, + "blocks.4.w2.weight": 386.63677119691914, + "blocks.5.ln.weight": 7.187391888063363, + "blocks.5.w1.weight": 278.782404520272, + "blocks.5.w1.bias": 266.36329153767053, + "blocks.5.w2.weight": 249.56060346351742, + "out_ln.weight": 0.5731338871373126, + "out_head.weight": 8.11049488578885, + "out_head.bias": 1.1188503504473455 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0280703507995606, + 1.9438096570587158, + 1.9075839419174194, + 1.891309684791565, + 1.8722532345199585, + 1.8634102462768554, + 1.8554693552017212, + 1.8489897765350343, + 1.846228702659607, + 1.8386287690734864, + 1.8406133193206786, + 1.8387405684661866, + 1.8276975162506104, + 1.8232349169921875, + 1.8223698708724976, + 1.821195379638672, + 1.8214911682510375, + 1.8160597175598145, + 1.8229843518066406, + 1.813365035133362, + 1.8151773623275758, + 1.8169539303207398, + 1.8105776592254639, + 1.8114565746307374, + 1.807515641746521, + 1.806776870765686, + 1.8001547024154663, + 1.8010753986358643, + 1.7956665130233764, + 1.8011931986236571, + 1.7943034625625611, + 1.791777073059082, + 1.791583038711548, + 1.7873410034179686, + 1.7855097869873047, + 1.7860526992416381, + 1.7824570555877686, + 1.7813435136795044, + 1.7782593814086913, + 1.77326356174469, + 1.7763885403060913, + 1.7732242642974854, + 1.7696506113052368, + 1.7713894509887695, + 1.7732664206695556, + 1.7675420838165283, + 1.763011986732483, + 1.7635349313354491, + 1.7613904175186157, + 1.7604380879974366, + 1.7606450772476196, + 1.7594652207183838, + 1.7573485291290283, + 1.7578170404434204, + 1.754877228012085, + 1.7504150357437134, + 1.7527595220565797, + 1.7520960317230225, + 1.7485238824081422, + 1.7481630941772461, + 1.7490973169326782, + 1.7442448379516602, + 1.7454284634399415, + 1.7441554889678954, + 1.744502698326111, + 1.7424594699478149, + 1.7411711275482178, + 1.737814305076599, + 1.7386144577789306, + 1.7368740311431885, + 1.7391422924804687, + 1.737129613571167, + 1.7340563317108155, + 1.733810231666565, + 1.7361049320220947, + 1.7318184774398804, + 1.7328989402008057, + 1.731393734779358, + 1.7313611135482787, + 1.7314666943740844, + 1.7308115518188476, + 1.7282487900543213, + 1.7280186172485352, + 1.7302542147445679, + 1.729862018966675, + 1.7284494402313233, + 1.7274188668060302, + 1.72759757938385, + 1.728316011276245, + 1.7254533053588867, + 1.7282261031341553, + 1.7274837893676758, + 1.7255484241104126, + 1.7243229049682618, + 1.7256911779022217, + 1.7249952478790282, + 1.7246788320541382, + 1.724236849632263, + 1.7243867401885986, + 1.7224459433746337 + ], + "train_acc": [ + 0.26004, + 0.2935, + 0.3105, + 0.31688, + 0.32726, + 0.3268, + 0.33294, + 0.33576, + 0.33912, + 0.34126, + 0.3408, + 0.33982, + 0.34284, + 0.34334, + 0.34554, + 0.3478, + 0.34762, + 0.34808, + 0.34944, + 0.35122, + 0.35068, + 0.35214, + 0.35338, + 0.35102, + 0.35642, + 0.35468, + 0.35298, + 0.35718, + 0.35686, + 0.3546, + 0.3602, + 0.36098, + 0.35922, + 0.35992, + 0.36128, + 0.36012, + 0.36176, + 0.3639, + 0.36318, + 0.36768, + 0.36394, + 0.36712, + 0.36688, + 0.36672, + 0.36676, + 0.36582, + 0.37102, + 0.36946, + 0.37328, + 0.37212, + 0.36806, + 0.37476, + 0.3764, + 0.37204, + 0.37148, + 0.37532, + 0.37424, + 0.37422, + 0.37674, + 0.37706, + 0.37652, + 0.37612, + 0.37884, + 0.37688, + 0.37654, + 0.37978, + 0.37676, + 0.37814, + 0.3802, + 0.38226, + 0.3787, + 0.38016, + 0.38294, + 0.38174, + 0.37992, + 0.38192, + 0.38114, + 0.38394, + 0.38458, + 0.38406, + 0.38478, + 0.38358, + 0.38324, + 0.38232, + 0.38468, + 0.3854, + 0.3863, + 0.38538, + 0.38582, + 0.38406, + 0.38552, + 0.38434, + 0.3848, + 0.38542, + 0.38594, + 0.38728, + 0.38536, + 0.3858, + 0.38608, + 0.386 + ], + "test_acc": [ + 0.3013, + 0.3142, + 0.3483, + 0.3543, + 0.3608, + 0.3458, + 0.3633, + 0.3576, + 0.3713, + 0.3632, + 0.3817, + 0.3741, + 0.3661, + 0.3795, + 0.3734, + 0.3749, + 0.3706, + 0.3813, + 0.3822, + 0.3692, + 0.3725, + 0.3749, + 0.3836, + 0.3822, + 0.389, + 0.3821, + 0.3846, + 0.3777, + 0.3861, + 0.3809, + 0.3857, + 0.3855, + 0.3765, + 0.3955, + 0.3914, + 0.3948, + 0.3997, + 0.3994, + 0.3935, + 0.3989, + 0.3987, + 0.4008, + 0.3925, + 0.3922, + 0.4023, + 0.4055, + 0.3953, + 0.4008, + 0.4003, + 0.3971, + 0.3987, + 0.3931, + 0.4076, + 0.4019, + 0.4046, + 0.3981, + 0.4081, + 0.4046, + 0.4043, + 0.406, + 0.4118, + 0.4076, + 0.4057, + 0.414, + 0.3979, + 0.4079, + 0.4044, + 0.4087, + 0.4065, + 0.4024, + 0.3997, + 0.4078, + 0.4117, + 0.4092, + 0.409, + 0.406, + 0.4132, + 0.4133, + 0.4066, + 0.4131, + 0.4083, + 0.413, + 0.4126, + 0.4117, + 0.4109, + 0.4096, + 0.4111, + 0.4099, + 0.4094, + 0.4105, + 0.4105, + 0.4081, + 0.4122, + 0.4113, + 0.412, + 0.4095, + 0.41, + 0.4114, + 0.4111, + 0.4117 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.03050209954380989, + 0.07341398298740387, + -0.030213337391614914, + -0.1065329909324646, + -0.112009696662426, + 0.9969239234924316 + ], + "perturbation_rho": [ + 0.060943812131881714, + 0.02977900393307209, + 0.030837206169962883, + 0.03361157327890396, + 0.003993029240518808, + -0.012494717724621296 + ], + "nudging": { + "0.001": [ + -2.1707965061068535e-06, + -2.555316314101219e-07, + 1.9907020032405853e-08, + 1.1094380170106888e-07, + 1.1106021702289581e-07, + -1.6409903764724731e-06 + ], + "0.003": [ + -5.929847247898579e-06, + -7.075723260641098e-07, + 1.384178176522255e-07, + 6.816117092967033e-07, + 7.138587534427643e-07, + -6.536138243973255e-06 + ], + "0.01": [ + -2.0072446204721928e-05, + -2.6080524548888206e-06, + 4.3155159801244736e-07, + 2.434244379401207e-06, + 2.515967935323715e-06, + -2.3723463527858257e-05 + ] + }, + "hidden_norms_per_layer": [ + 6712.94189453125, + 241810.65625, + 672440.6875, + 1111939.0, + 1518122.75, + 1746060.875, + 545338.8125 + ], + "bp_grad_norms_per_layer": [ + 2.4450268028886057e-05, + 1.2711601584669552e-06, + 9.635764399718028e-07, + 9.082662018045085e-07, + 9.076145488506882e-07, + 9.084363341571589e-07, + 8.959328283708601e-07 + ] + }, + "drift": { + "embed.weight": 52.75751575367878, + "embed.bias": 16.290300308868286, + "blocks.0.ln.weight": 1.3122960145180722, + "blocks.0.w1.weight": 18.35861512742778, + "blocks.0.w1.bias": 15.033623290031253, + "blocks.0.w2.weight": 59.867189750033205, + "blocks.1.ln.weight": 0.9612442862570167, + "blocks.1.w1.weight": 19.510654541967284, + "blocks.1.w1.bias": 16.988315121645034, + "blocks.1.w2.weight": 44.17488250656146, + "blocks.2.ln.weight": 0.8653865881813659, + "blocks.2.w1.weight": 21.845526237032836, + "blocks.2.w1.bias": 20.228308191109985, + "blocks.2.w2.weight": 54.8483328488635, + "blocks.3.ln.weight": 0.687489668753582, + "blocks.3.w1.weight": 21.235696780055214, + "blocks.3.w1.bias": 22.153293813124844, + "blocks.3.w2.weight": 37.488286386992314, + "blocks.4.ln.weight": 0.6826937366325999, + "blocks.4.w1.weight": 20.105546147824473, + "blocks.4.w1.bias": 21.27470328328852, + "blocks.4.w2.weight": 45.57345483069108, + "blocks.5.ln.weight": 0.7529120733274255, + "blocks.5.w1.weight": 23.26064002188114, + "blocks.5.w1.bias": 24.637626686580436, + "blocks.5.w2.weight": 40.48651407120801, + "out_ln.weight": 0.28684521814071196, + "out_head.weight": 5.505824885038432, + "out_head.bias": 1.5561206526473979 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 6, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 6 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L6_seed6", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L6_seed7/results_cifar10.json b/results/fa_dfa_d512_L6_seed7/results_cifar10.json new file mode 100644 index 0000000..1518de5 --- /dev/null +++ b/results/fa_dfa_d512_L6_seed7/results_cifar10.json @@ -0,0 +1,837 @@ +{ + "7": { + "dfa": { + "log": { + "train_loss": [ + 2.0644660066223146, + 2.0393594007873537, + 2.0298658728027346, + 2.027508737716675, + 2.021048464202881, + 2.0180776037979125, + 2.016010649871826, + 2.0145508810424806, + 2.0100066609954834, + 2.006050896835327, + 2.007042580833435, + 2.0072416454315185, + 2.0022777138519285, + 2.003505378875732, + 2.0038011445617676, + 2.0008850508880616, + 1.9970258136749268, + 1.9960380639648438, + 1.9933054499816893, + 1.9941288608551024, + 1.99546089138031, + 1.9936006065368652, + 1.994488134994507, + 1.9934685417938232, + 1.9926122019195556, + 1.9915543088531493, + 1.9883341995239259, + 1.9901361241149902, + 1.988699857711792, + 1.988091276512146, + 1.9886641019439697, + 1.9861467250061036, + 1.9892435473251342, + 1.986944725379944, + 1.9878197789001464, + 1.9859363149261475, + 1.9881446804046632, + 1.98439239112854, + 1.9870981986999512, + 1.9842346769714356, + 1.9852807202529907, + 1.9832774765396117, + 1.9852560285949707, + 1.985099944229126, + 1.984175789642334, + 1.9816588903808594, + 1.9819236499404906, + 1.9812517197418213, + 1.9811725481414795, + 1.9830146681213379, + 1.9799175099945068, + 1.9814645065689087, + 1.9808358678817748, + 1.9797256357192994, + 1.9818459476470947, + 1.980439381980896, + 1.9798791062164307, + 1.9782852731323242, + 1.9786473375701905, + 1.9781282710266113, + 1.9775994555664063, + 1.9782113193511963, + 1.9787678694915771, + 1.9774463504028321, + 1.9799045008850098, + 1.9775051036834717, + 1.977933309020996, + 1.9787949396514892, + 1.9781452545547484, + 1.9794640335845948, + 1.976175922241211, + 1.9797193655776977, + 1.9766233335113526, + 1.9788971273422242, + 1.9771730890274049, + 1.9750644179916381, + 1.9780011644744873, + 1.978542350692749, + 1.9770074579238892, + 1.9780642249298095, + 1.9761155870819092, + 1.9755325689697265, + 1.976641792564392, + 1.976419013442993, + 1.9755548094177247, + 1.9737617702484131, + 1.9747001873397827, + 1.974268023376465, + 1.974451787185669, + 1.9748253519439698, + 1.9772220765686035, + 1.9760730503845214, + 1.9763396139526368, + 1.9743848707580567, + 1.9757111903381348, + 1.9740122735214234, + 1.9744470761489867, + 1.9736765104675293, + 1.9756489211654662, + 1.975818688316345 + ], + "train_acc": [ + 0.23738, + 0.24404, + 0.25038, + 0.25232, + 0.25616, + 0.25784, + 0.2552, + 0.2584, + 0.26018, + 0.2647, + 0.26414, + 0.26396, + 0.26708, + 0.26626, + 0.2656, + 0.26712, + 0.26824, + 0.2697, + 0.27158, + 0.26954, + 0.27018, + 0.27384, + 0.27396, + 0.2686, + 0.27122, + 0.27376, + 0.2733, + 0.2745, + 0.276, + 0.27458, + 0.27622, + 0.27596, + 0.27604, + 0.2743, + 0.27596, + 0.27608, + 0.2749, + 0.27588, + 0.27448, + 0.27836, + 0.27578, + 0.27956, + 0.27798, + 0.27912, + 0.27864, + 0.27886, + 0.27796, + 0.2803, + 0.27986, + 0.27928, + 0.28028, + 0.28038, + 0.27754, + 0.2806, + 0.28016, + 0.27942, + 0.28168, + 0.28164, + 0.28036, + 0.2808, + 0.28056, + 0.2808, + 0.28214, + 0.283, + 0.28266, + 0.28086, + 0.28166, + 0.28236, + 0.28182, + 0.28274, + 0.28322, + 0.28242, + 0.28414, + 0.28278, + 0.2849, + 0.28342, + 0.28298, + 0.2835, + 0.28288, + 0.28306, + 0.28192, + 0.284, + 0.28374, + 0.2818, + 0.284, + 0.28564, + 0.28466, + 0.2856, + 0.28366, + 0.28388, + 0.28242, + 0.28532, + 0.2828, + 0.28576, + 0.28524, + 0.28524, + 0.28596, + 0.28682, + 0.28318, + 0.28228 + ], + "test_acc": [ + 0.2747, + 0.2448, + 0.2761, + 0.275, + 0.2784, + 0.2789, + 0.2712, + 0.2923, + 0.2752, + 0.2898, + 0.2816, + 0.2913, + 0.2903, + 0.2968, + 0.2885, + 0.2852, + 0.2949, + 0.2793, + 0.2989, + 0.2934, + 0.2914, + 0.2923, + 0.2946, + 0.3013, + 0.3014, + 0.2906, + 0.3031, + 0.297, + 0.2962, + 0.2942, + 0.3013, + 0.3006, + 0.3016, + 0.3082, + 0.311, + 0.2978, + 0.2803, + 0.299, + 0.3028, + 0.2966, + 0.3076, + 0.2768, + 0.3014, + 0.2929, + 0.3045, + 0.2953, + 0.2998, + 0.3013, + 0.3052, + 0.2902, + 0.3055, + 0.2939, + 0.3027, + 0.2966, + 0.3039, + 0.3032, + 0.294, + 0.3002, + 0.2989, + 0.307, + 0.3064, + 0.3031, + 0.3103, + 0.3015, + 0.2935, + 0.3131, + 0.2974, + 0.2958, + 0.3098, + 0.308, + 0.301, + 0.3079, + 0.306, + 0.3089, + 0.3077, + 0.304, + 0.3059, + 0.302, + 0.3017, + 0.3008, + 0.3065, + 0.3071, + 0.3039, + 0.3027, + 0.3065, + 0.3079, + 0.3051, + 0.305, + 0.3068, + 0.3084, + 0.3043, + 0.3041, + 0.3055, + 0.3043, + 0.3066, + 0.3062, + 0.3059, + 0.3066, + 0.3066, + 0.3065 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.4079825282096863, + 0.00017449544975534081, + -0.0002950271009467542, + -0.00047933883615769446, + 0.0008355096215382218, + -0.00013302025035955012 + ], + "perturbation_rho": [ + 0.005086352117359638, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.762543201446533e-07, + -2.7939677238464355e-09, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.4076940715312958e-06, + -2.7939677238464355e-09, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -4.495494067668915e-06, + -9.313225746154785e-10, + 0.0, + 2.7939677238464355e-09, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 53469.4921875, + 722511616.0, + 2513154304.0, + 4196997888.0, + 4850079232.0, + 7170064896.0, + 10128478208.0 + ], + "bp_grad_norms_per_layer": [ + 3.0659566618851386e-07, + 2.1547569284408041e-10, + 2.1372952019316216e-10, + 2.1350407553022421e-10, + 2.1338901479150962e-10, + 2.1328670773979042e-10, + 2.1351549694959004e-10 + ] + }, + "drift": { + "embed.weight": 319.2060524889922, + "embed.bias": 168.91177351992346, + "blocks.0.ln.weight": 10.20084372934541, + "blocks.0.w1.weight": 259.3585707340092, + "blocks.0.w1.bias": 204.8860780612092, + "blocks.0.w2.weight": 474.58360990249685, + "blocks.1.ln.weight": 8.524169911482344, + "blocks.1.w1.weight": 317.8100676412464, + "blocks.1.w1.bias": 286.625861663325, + "blocks.1.w2.weight": 337.7969456452116, + "blocks.2.ln.weight": 8.931831095995939, + "blocks.2.w1.weight": 362.27088509151724, + "blocks.2.w1.bias": 323.3719624301686, + "blocks.2.w2.weight": 340.6308866629528, + "blocks.3.ln.weight": 7.651484669125739, + "blocks.3.w1.weight": 309.48260206501516, + "blocks.3.w1.bias": 279.96119268361133, + "blocks.3.w2.weight": 277.85914995873645, + "blocks.4.ln.weight": 10.210800501008027, + "blocks.4.w1.weight": 421.6457628353572, + "blocks.4.w1.bias": 399.4367334675059, + "blocks.4.w2.weight": 412.14310110719924, + "blocks.5.ln.weight": 11.351197134890906, + "blocks.5.w1.weight": 456.9953588488586, + "blocks.5.w1.bias": 441.75351526808663, + "blocks.5.w2.weight": 455.08343692703954, + "out_ln.weight": 0.6738960876175676, + "out_head.weight": 9.13993717128016, + "out_head.bias": 0.8636649912030224 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.035428274383545, + 1.9543501361465454, + 1.921069111099243, + 1.9030546938323976, + 1.8895003170013427, + 1.884778947982788, + 1.8827891104888916, + 1.8784568830108643, + 1.86878865776062, + 1.860254260787964, + 1.8565264916229247, + 1.8507944106674195, + 1.8448910778808594, + 1.8358412002563476, + 1.8325608110809326, + 1.8244803615570069, + 1.8170998212051392, + 1.8116112271499634, + 1.8029299909210206, + 1.798451124572754, + 1.796333960533142, + 1.7877936901092528, + 1.7880024145889282, + 1.783990569152832, + 1.773157628097534, + 1.7710246792984008, + 1.7636069051742553, + 1.7616489712142944, + 1.7549441582870484, + 1.754057723007202, + 1.7550610286712647, + 1.7466887496185304, + 1.7430288970565795, + 1.7387338048553467, + 1.7371670000839234, + 1.7317205599975587, + 1.733508980026245, + 1.7296288764190675, + 1.7255350440216064, + 1.7244919805908203, + 1.7210525772857665, + 1.7164554761123658, + 1.716478593711853, + 1.717660334854126, + 1.7179905541992186, + 1.7098763641738892, + 1.70570148979187, + 1.7075587792587281, + 1.7068590453720094, + 1.70389818359375, + 1.6986094250488282, + 1.6995252443695068, + 1.6985104891967773, + 1.6972654037094117, + 1.695931503982544, + 1.6940053173065186, + 1.6959078594589234, + 1.6945061222076416, + 1.6924745792388916, + 1.6908132946777343, + 1.6884509930038452, + 1.6876910009384156, + 1.684639096031189, + 1.6838580658721924, + 1.6875753708267212, + 1.6831429992294311, + 1.6840416891479493, + 1.6803303512573242, + 1.6848778852081299, + 1.6803966582870484, + 1.676663829689026, + 1.6796003302383422, + 1.677098304748535, + 1.678186548423767, + 1.675721337814331, + 1.671638519821167, + 1.6730699136734009, + 1.673575770187378, + 1.6733202564239502, + 1.671583014907837, + 1.6726769936370849, + 1.6701723910522461, + 1.6703911999893188, + 1.6733231628417968, + 1.6666682750701904, + 1.6670018822860717, + 1.6669657723999023, + 1.6693463718795776, + 1.6684244076156616, + 1.6675063357162476, + 1.668329490966797, + 1.6683633364486694, + 1.6668814519882202, + 1.6666266248321533, + 1.6659794528198242, + 1.6643282043075562, + 1.666822133102417, + 1.6650706377410889, + 1.6686715842056274, + 1.6671581310653687 + ], + "train_acc": [ + 0.252, + 0.28866, + 0.30228, + 0.31138, + 0.31636, + 0.32068, + 0.32168, + 0.32222, + 0.32708, + 0.32854, + 0.33246, + 0.33542, + 0.33984, + 0.34196, + 0.34244, + 0.34556, + 0.34844, + 0.35176, + 0.35512, + 0.35748, + 0.35646, + 0.3622, + 0.35886, + 0.36206, + 0.36288, + 0.36684, + 0.37276, + 0.37276, + 0.37378, + 0.37246, + 0.37274, + 0.37874, + 0.37508, + 0.37808, + 0.3804, + 0.38156, + 0.37756, + 0.37946, + 0.38278, + 0.38208, + 0.38226, + 0.3882, + 0.38378, + 0.38596, + 0.38588, + 0.38736, + 0.38724, + 0.38704, + 0.38846, + 0.387, + 0.39326, + 0.39342, + 0.3919, + 0.3942, + 0.39288, + 0.39274, + 0.3934, + 0.3913, + 0.39564, + 0.39482, + 0.39314, + 0.39526, + 0.39544, + 0.3977, + 0.39616, + 0.39674, + 0.39592, + 0.39896, + 0.39744, + 0.3976, + 0.39928, + 0.39962, + 0.39968, + 0.40054, + 0.39988, + 0.40192, + 0.40246, + 0.40078, + 0.40022, + 0.39994, + 0.40256, + 0.40522, + 0.40094, + 0.40278, + 0.40524, + 0.40424, + 0.4035, + 0.40222, + 0.40502, + 0.40334, + 0.40314, + 0.40426, + 0.40582, + 0.40816, + 0.40362, + 0.40492, + 0.40314, + 0.40624, + 0.40386, + 0.4031 + ], + "test_acc": [ + 0.2994, + 0.315, + 0.3222, + 0.3432, + 0.3472, + 0.3579, + 0.3389, + 0.357, + 0.3572, + 0.3638, + 0.3595, + 0.3655, + 0.3683, + 0.3749, + 0.3736, + 0.3761, + 0.379, + 0.3672, + 0.3847, + 0.3897, + 0.3818, + 0.3922, + 0.3878, + 0.3942, + 0.395, + 0.3922, + 0.3942, + 0.3994, + 0.393, + 0.3938, + 0.3987, + 0.3997, + 0.3985, + 0.4053, + 0.406, + 0.4058, + 0.4051, + 0.4097, + 0.4081, + 0.4058, + 0.4093, + 0.401, + 0.4153, + 0.4048, + 0.4131, + 0.4069, + 0.404, + 0.414, + 0.412, + 0.414, + 0.4144, + 0.4179, + 0.4122, + 0.4187, + 0.4136, + 0.4228, + 0.414, + 0.4186, + 0.418, + 0.4167, + 0.4195, + 0.4178, + 0.4212, + 0.4175, + 0.4217, + 0.4212, + 0.4164, + 0.4237, + 0.4199, + 0.4199, + 0.4209, + 0.4234, + 0.4238, + 0.4226, + 0.4199, + 0.4251, + 0.4239, + 0.4209, + 0.4222, + 0.4257, + 0.423, + 0.424, + 0.4253, + 0.4242, + 0.4253, + 0.4239, + 0.4255, + 0.4247, + 0.4241, + 0.4253, + 0.4256, + 0.4259, + 0.4276, + 0.426, + 0.4246, + 0.4258, + 0.4269, + 0.4251, + 0.4254, + 0.4253 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.05678346008062363, + 0.06278860569000244, + -0.06489825248718262, + -0.13550668954849243, + -0.030537210404872894, + 0.9970009326934814 + ], + "perturbation_rho": [ + 0.021435417234897614, + -0.006943210028111935, + 0.002432417357340455, + -0.022404946386814117, + 0.010333601385354996, + -0.011944804340600967 + ], + "nudging": { + "0.001": [ + -6.006448529660702e-06, + -6.604241207242012e-07, + 6.123445928096771e-08, + 1.4808028936386108e-07, + 1.4319084584712982e-08, + -1.7423881217837334e-06 + ], + "0.003": [ + -1.811189576983452e-05, + -1.56438909471035e-06, + 3.5529956221580505e-07, + 8.135102689266205e-07, + 1.6961712390184402e-07, + -6.420654244720936e-06 + ], + "0.01": [ + -6.019952706992626e-05, + -4.958710633218288e-06, + 1.4510005712509155e-06, + 2.9762741178274155e-06, + 6.504124030470848e-07, + -2.2383523173630238e-05 + ] + }, + "hidden_norms_per_layer": [ + 4713.947265625, + 59263.7578125, + 803386.0625, + 994639.875, + 1492731.0, + 1505302.75, + 597029.1875 + ], + "bp_grad_norms_per_layer": [ + 3.536270742188208e-05, + 2.4848127395671327e-06, + 9.235278071173525e-07, + 9.185976637127169e-07, + 9.248900596503518e-07, + 9.24963046600169e-07, + 9.076006222130673e-07 + ] + }, + "drift": { + "embed.weight": 37.15779008870908, + "embed.bias": 18.146058604345335, + "blocks.0.ln.weight": 1.0848731333476576, + "blocks.0.w1.weight": 14.826104871554262, + "blocks.0.w1.bias": 11.802494272115224, + "blocks.0.w2.weight": 46.31577951436631, + "blocks.1.ln.weight": 0.9864104228146627, + "blocks.1.w1.weight": 20.750805358036015, + "blocks.1.w1.bias": 18.84914683848346, + "blocks.1.w2.weight": 45.31226900575547, + "blocks.2.ln.weight": 0.6561709772918677, + "blocks.2.w1.weight": 17.738489694580174, + "blocks.2.w1.bias": 16.431735764543493, + "blocks.2.w2.weight": 39.339953277678816, + "blocks.3.ln.weight": 0.5804518445695007, + "blocks.3.w1.weight": 19.985841730818343, + "blocks.3.w1.bias": 21.371168400070296, + "blocks.3.w2.weight": 31.937812891223096, + "blocks.4.ln.weight": 0.634509772716372, + "blocks.4.w1.weight": 17.17337766921684, + "blocks.4.w1.bias": 15.654490632053776, + "blocks.4.w2.weight": 45.84886660282379, + "blocks.5.ln.weight": 0.6629717085902781, + "blocks.5.w1.weight": 20.18167535513518, + "blocks.5.w1.bias": 21.32865390385067, + "blocks.5.w2.weight": 40.56800371651544, + "out_ln.weight": 0.3071358282526009, + "out_head.weight": 5.834156740127778, + "out_head.bias": 1.541462699862782 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 6, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 7 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L6_seed7", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L6_seed8/results_cifar10.json b/results/fa_dfa_d512_L6_seed8/results_cifar10.json new file mode 100644 index 0000000..120e901 --- /dev/null +++ b/results/fa_dfa_d512_L6_seed8/results_cifar10.json @@ -0,0 +1,837 @@ +{ + "8": { + "dfa": { + "log": { + "train_loss": [ + 2.080832463722229, + 2.048995506820679, + 2.0368573234558105, + 2.0319985338974, + 2.0285650724029543, + 2.0264119197082517, + 2.0246589180755614, + 2.0194223052597047, + 2.023226359863281, + 2.0190831980895996, + 2.0185574532699584, + 2.0163141152572632, + 2.0120892976379396, + 2.0131693548965455, + 2.0107243951416014, + 2.0126246429443357, + 2.0099691207504273, + 2.007364001235962, + 2.009500975265503, + 2.005168029327393, + 2.0078421032333376, + 2.005056644668579, + 2.003016718902588, + 2.002305891647339, + 2.004946018447876, + 2.0044735367202757, + 2.0007521640014647, + 2.0023752281188965, + 2.002063782119751, + 1.9980181777954102, + 1.999666137161255, + 2.0014348561096194, + 1.9975938603973389, + 1.997772049255371, + 1.9968431232452393, + 1.9957593490600587, + 1.9965335370254516, + 1.9956621952819824, + 1.9998830209732055, + 1.9981224115371705, + 1.9981820908355712, + 1.9963909757995606, + 1.9932935749816894, + 1.9949091632843017, + 1.9948957403564453, + 1.9962669714736938, + 1.9956311498641968, + 1.993200020980835, + 1.9959270191192626, + 1.9959917138671874, + 1.99261492729187, + 1.9928764194107056, + 1.9886184701538085, + 1.9945403707504272, + 1.9950171528625489, + 1.9918433042144776, + 1.9921051956939697, + 1.9939103940582275, + 1.9926260836791991, + 1.9888539260864257, + 1.9864782820129394, + 1.9928740283203126, + 1.9936177950286864, + 1.9908017050933837, + 1.9916553665924073, + 1.991268727722168, + 1.9904707485198974, + 1.990502048187256, + 1.9922669997406006, + 1.990579651412964, + 1.989777621154785, + 1.9894770226669312, + 1.9896350140762329, + 1.9901242208480836, + 1.9889081002426148, + 1.9884048733520507, + 1.991174902420044, + 1.9889308002090453, + 1.9896702439880372, + 1.9888165955352783, + 1.9881730316925048, + 1.9870906281280518, + 1.9875952992248536, + 1.987178568344116, + 1.98706350440979, + 1.987743454208374, + 1.9882577419281007, + 1.98791315574646, + 1.9878161795806886, + 1.9868540209197998, + 1.9870264992523194, + 1.9860009889984132, + 1.9875505159759521, + 1.9869904022979736, + 1.9855010274887086, + 1.9865515293884277, + 1.9860705053710936, + 1.9874219760131835, + 1.9881664807128907, + 1.9873311661148072 + ], + "train_acc": [ + 0.22856, + 0.2384, + 0.24456, + 0.24494, + 0.24732, + 0.25098, + 0.24904, + 0.2523, + 0.25028, + 0.2514, + 0.25612, + 0.25722, + 0.25858, + 0.25916, + 0.26012, + 0.26044, + 0.26232, + 0.26452, + 0.26066, + 0.26248, + 0.2637, + 0.26254, + 0.26474, + 0.26712, + 0.2677, + 0.26322, + 0.26788, + 0.26452, + 0.26682, + 0.26908, + 0.2686, + 0.26734, + 0.27066, + 0.26914, + 0.26942, + 0.27018, + 0.26882, + 0.2706, + 0.26786, + 0.26876, + 0.27074, + 0.27, + 0.27148, + 0.27242, + 0.27346, + 0.27144, + 0.27246, + 0.27478, + 0.2713, + 0.27238, + 0.2732, + 0.27598, + 0.27788, + 0.27462, + 0.27216, + 0.27474, + 0.27372, + 0.27488, + 0.2751, + 0.27356, + 0.27528, + 0.2739, + 0.2744, + 0.27592, + 0.27666, + 0.27542, + 0.2749, + 0.27564, + 0.27486, + 0.27662, + 0.27766, + 0.27678, + 0.27682, + 0.27618, + 0.27544, + 0.2767, + 0.27538, + 0.27816, + 0.27422, + 0.27562, + 0.2772, + 0.27786, + 0.27924, + 0.2775, + 0.2789, + 0.27834, + 0.27724, + 0.27724, + 0.27694, + 0.27716, + 0.27862, + 0.27786, + 0.27676, + 0.2771, + 0.278, + 0.27708, + 0.27678, + 0.27732, + 0.27858, + 0.27724 + ], + "test_acc": [ + 0.25, + 0.2553, + 0.2484, + 0.2586, + 0.2481, + 0.2619, + 0.2688, + 0.2577, + 0.2807, + 0.2806, + 0.2671, + 0.2735, + 0.2663, + 0.272, + 0.2824, + 0.2808, + 0.2859, + 0.2837, + 0.282, + 0.2895, + 0.2788, + 0.2787, + 0.275, + 0.2726, + 0.283, + 0.2729, + 0.2866, + 0.2909, + 0.2806, + 0.2892, + 0.2818, + 0.2915, + 0.288, + 0.2928, + 0.2895, + 0.3015, + 0.2906, + 0.2869, + 0.2936, + 0.2929, + 0.2996, + 0.2852, + 0.295, + 0.284, + 0.2772, + 0.302, + 0.278, + 0.2875, + 0.2899, + 0.3026, + 0.2806, + 0.2945, + 0.2936, + 0.298, + 0.2945, + 0.2987, + 0.2926, + 0.2891, + 0.2897, + 0.2928, + 0.2947, + 0.2867, + 0.2913, + 0.2912, + 0.2907, + 0.2955, + 0.2948, + 0.2955, + 0.2934, + 0.2874, + 0.2917, + 0.2904, + 0.2907, + 0.2893, + 0.2937, + 0.2975, + 0.2948, + 0.2938, + 0.2921, + 0.2903, + 0.2921, + 0.2932, + 0.2969, + 0.2915, + 0.294, + 0.2947, + 0.2891, + 0.2975, + 0.2948, + 0.2956, + 0.2919, + 0.2948, + 0.2936, + 0.2931, + 0.293, + 0.2934, + 0.2934, + 0.294, + 0.2937, + 0.2937 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.37266281247138977, + -0.00045336512266658247, + -0.00016533058078493923, + -0.00022915060981176794, + -9.622798825148493e-05, + -0.0005005986895412207 + ], + "perturbation_rho": [ + -0.022135162726044655, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -4.153698682785034e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.184176653623581e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -4.217028617858887e-06, + 9.313225746154785e-10, + 0.0, + 0.0, + -4.6566128730773926e-09, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 50847.30078125, + 848742848.0, + 4660130304.0, + 4956724224.0, + 5474357248.0, + 5779449344.0, + 6897274368.0 + ], + "bp_grad_norms_per_layer": [ + 3.144729134874069e-07, + 2.3805521442987754e-10, + 2.377963659316862e-10, + 2.3782173452779887e-10, + 2.382432029435222e-10, + 2.381181640753738e-10, + 2.3811083660341126e-10 + ] + }, + "drift": { + "embed.weight": 319.35155723550326, + "embed.bias": 248.60053068216845, + "blocks.0.ln.weight": 9.427908278003652, + "blocks.0.w1.weight": 267.21029834357404, + "blocks.0.w1.bias": 240.44244412845688, + "blocks.0.w2.weight": 467.22266822391686, + "blocks.1.ln.weight": 9.176869505684051, + "blocks.1.w1.weight": 396.8672687385826, + "blocks.1.w1.bias": 384.3213865917358, + "blocks.1.w2.weight": 392.8080177067534, + "blocks.2.ln.weight": 6.748169794145516, + "blocks.2.w1.weight": 260.5475963025436, + "blocks.2.w1.bias": 233.50388419740455, + "blocks.2.w2.weight": 243.9744268156159, + "blocks.3.ln.weight": 8.622995843562022, + "blocks.3.w1.weight": 331.24292230819924, + "blocks.3.w1.bias": 315.5877913098073, + "blocks.3.w2.weight": 298.8313159517988, + "blocks.4.ln.weight": 8.025346777953455, + "blocks.4.w1.weight": 289.7124732778298, + "blocks.4.w1.bias": 265.5485861970709, + "blocks.4.w2.weight": 262.14080086870655, + "blocks.5.ln.weight": 9.46176669645325, + "blocks.5.w1.weight": 364.3981229248546, + "blocks.5.w1.bias": 349.70853696261963, + "blocks.5.w2.weight": 357.5708723169573, + "out_ln.weight": 0.5682786122526117, + "out_head.weight": 8.418164907479396, + "out_head.bias": 0.662517650973914 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0414198583984375, + 1.9522941863250733, + 1.912906623916626, + 1.8914309909057616, + 1.8828500344848633, + 1.8722443673706055, + 1.867602448654175, + 1.856888031578064, + 1.8552497367095948, + 1.843837871170044, + 1.8456551111602784, + 1.8392053551864624, + 1.832319408721924, + 1.8315165106582643, + 1.8281906174468994, + 1.8275285326766968, + 1.823442045288086, + 1.8223411087799073, + 1.8216783404922485, + 1.8156820178604125, + 1.8176694821166992, + 1.8109132452392578, + 1.8122728036880493, + 1.8086374733734132, + 1.8135607794189452, + 1.807762541885376, + 1.8036900131225586, + 1.8017769944000244, + 1.8010728991699219, + 1.7973168952178955, + 1.792228649635315, + 1.7974130847549439, + 1.7889769022369384, + 1.78288419921875, + 1.7853075231552125, + 1.7797698288726806, + 1.7783561060333253, + 1.7755371138763427, + 1.7767201525115968, + 1.7773720627593994, + 1.7749587761688232, + 1.7682355377960206, + 1.7642274634552002, + 1.7686031677627563, + 1.763687038230896, + 1.7665823318862914, + 1.7619021569824218, + 1.7553145666885377, + 1.757571950340271, + 1.7612224188995362, + 1.7522613665390014, + 1.7504500290298461, + 1.7475368619537353, + 1.7495313736343383, + 1.7496113208389281, + 1.7413074100494386, + 1.7437634180450439, + 1.7434646717071534, + 1.746056427268982, + 1.740490657081604, + 1.7333746536254884, + 1.7381776495742798, + 1.73842948387146, + 1.7355398804092408, + 1.7360767101287842, + 1.7332445461273194, + 1.732193847579956, + 1.730364631652832, + 1.732538067970276, + 1.7276911224746705, + 1.7270672924804686, + 1.7267360440826416, + 1.725644203414917, + 1.7283870937347412, + 1.72088895652771, + 1.7216768536376954, + 1.723486647377014, + 1.7211247494125366, + 1.7208690827178954, + 1.7219977252578735, + 1.7204493698120118, + 1.7233893868255614, + 1.7198170611190795, + 1.7190866617202758, + 1.7203575790405274, + 1.7177474556350707, + 1.7174966396713256, + 1.7169412566375732, + 1.714385998878479, + 1.7148360108184815, + 1.7158790673446656, + 1.7134223749542237, + 1.714623501663208, + 1.7133040780639648, + 1.715691188583374, + 1.7187325539398193, + 1.7134061930084228, + 1.713903324661255, + 1.7177725345230102, + 1.7178887582397462 + ], + "train_acc": [ + 0.24766, + 0.28764, + 0.30724, + 0.31554, + 0.31774, + 0.32862, + 0.32412, + 0.32998, + 0.33022, + 0.33776, + 0.33938, + 0.33914, + 0.3422, + 0.33992, + 0.34394, + 0.34276, + 0.3413, + 0.34586, + 0.347, + 0.34834, + 0.34908, + 0.35144, + 0.35026, + 0.3513, + 0.34898, + 0.35394, + 0.35424, + 0.35282, + 0.3581, + 0.35672, + 0.35968, + 0.35698, + 0.36002, + 0.36412, + 0.36052, + 0.36534, + 0.36372, + 0.36502, + 0.36444, + 0.36414, + 0.36688, + 0.36802, + 0.37136, + 0.3681, + 0.3721, + 0.36958, + 0.37192, + 0.37408, + 0.37342, + 0.37124, + 0.37412, + 0.37666, + 0.37718, + 0.3741, + 0.37748, + 0.37834, + 0.3806, + 0.37612, + 0.3775, + 0.37774, + 0.38072, + 0.38068, + 0.38082, + 0.37998, + 0.382, + 0.38172, + 0.38358, + 0.3851, + 0.38346, + 0.38376, + 0.38542, + 0.38474, + 0.38684, + 0.38692, + 0.3899, + 0.38944, + 0.38642, + 0.38598, + 0.38606, + 0.38656, + 0.38402, + 0.38854, + 0.38994, + 0.38908, + 0.3902, + 0.38896, + 0.39004, + 0.39054, + 0.3903, + 0.38856, + 0.38994, + 0.38868, + 0.38934, + 0.39004, + 0.39128, + 0.39024, + 0.39048, + 0.3904, + 0.38948, + 0.39032 + ], + "test_acc": [ + 0.2817, + 0.3086, + 0.321, + 0.3426, + 0.3496, + 0.3478, + 0.3546, + 0.3478, + 0.3568, + 0.3627, + 0.3663, + 0.3752, + 0.3646, + 0.3581, + 0.3779, + 0.3748, + 0.3759, + 0.3762, + 0.3858, + 0.3739, + 0.3779, + 0.3667, + 0.3721, + 0.3827, + 0.369, + 0.3761, + 0.3719, + 0.3794, + 0.38, + 0.3844, + 0.3733, + 0.3869, + 0.3885, + 0.3911, + 0.387, + 0.3906, + 0.3904, + 0.395, + 0.3849, + 0.3987, + 0.3901, + 0.4009, + 0.3893, + 0.385, + 0.3918, + 0.3995, + 0.392, + 0.3934, + 0.3954, + 0.4044, + 0.3976, + 0.3997, + 0.3967, + 0.402, + 0.404, + 0.3987, + 0.3997, + 0.3959, + 0.3955, + 0.3962, + 0.4028, + 0.4067, + 0.3984, + 0.4075, + 0.4072, + 0.4011, + 0.4028, + 0.4082, + 0.4069, + 0.4041, + 0.4058, + 0.4022, + 0.4061, + 0.4029, + 0.4059, + 0.4071, + 0.4086, + 0.4069, + 0.4058, + 0.4081, + 0.4083, + 0.4114, + 0.4078, + 0.4083, + 0.4074, + 0.4097, + 0.4064, + 0.4087, + 0.4057, + 0.4068, + 0.4067, + 0.407, + 0.4072, + 0.4089, + 0.4094, + 0.4092, + 0.4088, + 0.4081, + 0.4078, + 0.408 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.016317440196871758, + 0.08700302243232727, + -0.07087832689285278, + -0.04807429015636444, + -0.011868046596646309, + 0.9980045557022095 + ], + "perturbation_rho": [ + 0.021565234288573265, + 0.02034841850399971, + 0.026769006624817848, + -0.05038269981741905, + 0.007762039080262184, + 0.04057261347770691 + ], + "nudging": { + "0.001": [ + -1.8766731955111027e-06, + -6.295740604400635e-07, + 1.050066202878952e-07, + 5.390029400587082e-08, + -8.824281394481659e-08, + -1.8120626918971539e-06 + ], + "0.003": [ + -6.020389264449477e-06, + -1.8251012079417706e-06, + 4.4528860598802567e-07, + 2.2060703486204147e-07, + 2.1478626877069473e-08, + -6.290327291935682e-06 + ], + "0.01": [ + -2.0035397028550506e-05, + -6.116693839430809e-06, + 1.5703844837844372e-06, + 9.065261110663414e-07, + 1.710723154246807e-07, + -2.1861022105440497e-05 + ] + }, + "hidden_norms_per_layer": [ + 7090.23486328125, + 97328.1015625, + 1195400.0, + 1509493.75, + 1603199.125, + 1626782.875, + 733314.5 + ], + "bp_grad_norms_per_layer": [ + 2.7028392651118338e-05, + 1.845057795435423e-06, + 8.682639531798486e-07, + 8.779788345236739e-07, + 8.714667956155608e-07, + 8.789162393441075e-07, + 8.642545594739204e-07 + ] + }, + "drift": { + "embed.weight": 49.97107099246965, + "embed.bias": 16.073336112282973, + "blocks.0.ln.weight": 1.1864794930711844, + "blocks.0.w1.weight": 16.772962175548443, + "blocks.0.w1.bias": 11.717179109423967, + "blocks.0.w2.weight": 54.31408520827723, + "blocks.1.ln.weight": 1.1429608481901679, + "blocks.1.w1.weight": 25.1727265395903, + "blocks.1.w1.bias": 23.582538699977512, + "blocks.1.w2.weight": 44.89348831927897, + "blocks.2.ln.weight": 0.6307651937494874, + "blocks.2.w1.weight": 20.089313457148293, + "blocks.2.w1.bias": 20.43961041250799, + "blocks.2.w2.weight": 36.04199442331163, + "blocks.3.ln.weight": 0.5625176858716091, + "blocks.3.w1.weight": 17.29253500275298, + "blocks.3.w1.bias": 18.353681256446446, + "blocks.3.w2.weight": 45.166175379509205, + "blocks.4.ln.weight": 0.5780873641807247, + "blocks.4.w1.weight": 15.563905559896059, + "blocks.4.w1.bias": 13.46488029874075, + "blocks.4.w2.weight": 59.00645861860142, + "blocks.5.ln.weight": 0.7521925423173332, + "blocks.5.w1.weight": 19.685866374226443, + "blocks.5.w1.bias": 20.12611595031584, + "blocks.5.w2.weight": 41.84701231716978, + "out_ln.weight": 0.3374467885491731, + "out_head.weight": 5.972151190658582, + "out_head.bias": 0.6096349208966545 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 6, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 8 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L6_seed8", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L6_seed9/results_cifar10.json b/results/fa_dfa_d512_L6_seed9/results_cifar10.json new file mode 100644 index 0000000..b808ce9 --- /dev/null +++ b/results/fa_dfa_d512_L6_seed9/results_cifar10.json @@ -0,0 +1,837 @@ +{ + "9": { + "dfa": { + "log": { + "train_loss": [ + 2.073234083404541, + 2.0497977373504637, + 2.0341352867126465, + 2.031250879135132, + 2.026975922088623, + 2.021300171508789, + 2.0191297291564942, + 2.0188821714782716, + 2.017022749557495, + 2.016341851234436, + 2.0123678436279295, + 2.010649602279663, + 2.0090650662994385, + 2.010900563201904, + 2.0095366540527344, + 2.0077656902313232, + 2.0068030431365966, + 2.008629825515747, + 2.003841092376709, + 2.005812280654907, + 2.0057511726379396, + 2.004702599334717, + 2.0014493046569823, + 2.004868564796448, + 2.001082904891968, + 1.9998853713989257, + 2.0003265306854248, + 2.0005562553405762, + 1.9986425912094117, + 2.0000717253875733, + 1.9990827807617189, + 1.9992364362335204, + 1.9985607556152343, + 1.996207259979248, + 1.9981809964370727, + 1.9958161113739015, + 1.9976731842422486, + 1.9966179808807374, + 1.9954889965057374, + 1.9954629190826416, + 1.9958389172363282, + 1.994535606842041, + 1.9974303654479981, + 1.9959670455932617, + 1.9949795317840575, + 1.995265456085205, + 1.9942493894195557, + 1.995007307357788, + 1.996318332901001, + 1.9937139911651611, + 1.994312198791504, + 1.9913738401031493, + 1.9951153566741944, + 1.9924673150253296, + 1.9923780603790284, + 1.9935618125152588, + 1.992571726989746, + 1.9926865383911132, + 1.992229995956421, + 1.993404683456421, + 1.9912095419311524, + 1.991911597442627, + 1.9921949435806274, + 1.9902286859512328, + 1.9926082902526856, + 1.9909677504730225, + 1.9914979708099365, + 1.9912472879791259, + 1.990876022491455, + 1.9902124596405029, + 1.9935787561035156, + 1.991565812225342, + 1.9911484326171875, + 1.9914972548675538, + 1.990185121498108, + 1.9900826383972168, + 1.9883790439605713, + 1.9883859337997436, + 1.9883267873382569, + 1.9883251065444947, + 1.9901439308166504, + 1.9902003237915038, + 1.9887106338500977, + 1.9913360192871095, + 1.9886777478027344, + 1.99161105342865, + 1.9905986673736573, + 1.9889384605407714, + 1.990373504333496, + 1.988581312599182, + 1.9881270119476318, + 1.986687230491638, + 1.9884908292007446, + 1.9869463809204102, + 1.9887974228668213, + 1.986062038192749, + 1.9904356650543213, + 1.9901889600372313, + 1.989139567527771, + 1.9876907534790038 + ], + "train_acc": [ + 0.23456, + 0.24532, + 0.2504, + 0.25112, + 0.25282, + 0.26, + 0.2606, + 0.2593, + 0.25506, + 0.2613, + 0.26512, + 0.26252, + 0.26454, + 0.2637, + 0.26436, + 0.26586, + 0.26598, + 0.26426, + 0.26796, + 0.26508, + 0.26656, + 0.26618, + 0.26934, + 0.26756, + 0.26878, + 0.2704, + 0.27164, + 0.2692, + 0.27146, + 0.27, + 0.27024, + 0.2712, + 0.27242, + 0.27516, + 0.27246, + 0.27374, + 0.27374, + 0.2729, + 0.27492, + 0.27392, + 0.2749, + 0.27492, + 0.2729, + 0.2735, + 0.27376, + 0.2749, + 0.27598, + 0.27244, + 0.27474, + 0.27518, + 0.27482, + 0.27532, + 0.27598, + 0.277, + 0.27772, + 0.27702, + 0.27816, + 0.2744, + 0.27456, + 0.27834, + 0.27612, + 0.27584, + 0.27394, + 0.27708, + 0.27626, + 0.27684, + 0.27762, + 0.27826, + 0.2762, + 0.27832, + 0.277, + 0.27744, + 0.27744, + 0.27842, + 0.27896, + 0.27732, + 0.27968, + 0.28094, + 0.27758, + 0.28126, + 0.28152, + 0.27998, + 0.2792, + 0.27652, + 0.27846, + 0.27788, + 0.27668, + 0.28002, + 0.27868, + 0.27792, + 0.2785, + 0.28064, + 0.27878, + 0.28056, + 0.279, + 0.28108, + 0.2801, + 0.27866, + 0.27774, + 0.27842 + ], + "test_acc": [ + 0.2428, + 0.2739, + 0.2627, + 0.2812, + 0.2892, + 0.2843, + 0.2907, + 0.2804, + 0.276, + 0.3009, + 0.2792, + 0.2917, + 0.2992, + 0.2872, + 0.2869, + 0.2731, + 0.2863, + 0.3004, + 0.2738, + 0.2964, + 0.2885, + 0.2907, + 0.2934, + 0.2869, + 0.283, + 0.295, + 0.2853, + 0.2785, + 0.2809, + 0.3069, + 0.2927, + 0.2929, + 0.3024, + 0.296, + 0.2988, + 0.2998, + 0.3001, + 0.3009, + 0.2946, + 0.3043, + 0.3071, + 0.2996, + 0.308, + 0.2894, + 0.2951, + 0.292, + 0.286, + 0.295, + 0.2997, + 0.2967, + 0.2942, + 0.2974, + 0.2921, + 0.2946, + 0.3006, + 0.303, + 0.303, + 0.3102, + 0.3042, + 0.31, + 0.3053, + 0.294, + 0.2993, + 0.3065, + 0.2955, + 0.2908, + 0.2943, + 0.3014, + 0.3029, + 0.3044, + 0.3066, + 0.2909, + 0.3097, + 0.3031, + 0.3, + 0.3012, + 0.2993, + 0.2967, + 0.3001, + 0.3051, + 0.2993, + 0.3091, + 0.3013, + 0.3013, + 0.3007, + 0.3031, + 0.3047, + 0.3032, + 0.3053, + 0.3041, + 0.3041, + 0.3041, + 0.3037, + 0.3036, + 0.3046, + 0.3036, + 0.3044, + 0.3041, + 0.3042, + 0.3041 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3925938308238983, + 0.000656805990729481, + 0.0001544215774629265, + -0.0004282527952454984, + 0.0003690449520945549, + -0.00013921636855229735 + ], + "perturbation_rho": [ + 0.005831995978951454, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.3574178814888e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.0384246706962585e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.6191195249557495e-06, + 1.862645149230957e-09, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 54551.35546875, + 1099375360.0, + 3004748800.0, + 5508783616.0, + 6090545664.0, + 6586548736.0, + 9585897472.0 + ], + "bp_grad_norms_per_layer": [ + 2.399317509116372e-07, + 1.9445174637144902e-10, + 1.943899208267652e-10, + 1.9486748326080772e-10, + 1.9483341329173953e-10, + 1.9483435698131046e-10, + 1.9498901104864075e-10 + ] + }, + "drift": { + "embed.weight": 328.6355950373237, + "embed.bias": 242.36454815135576, + "blocks.0.ln.weight": 10.003397541810306, + "blocks.0.w1.weight": 280.20952373585186, + "blocks.0.w1.bias": 245.5434949270726, + "blocks.0.w2.weight": 491.79926117548297, + "blocks.1.ln.weight": 8.729605792932698, + "blocks.1.w1.weight": 325.11308424934106, + "blocks.1.w1.bias": 311.5625818317441, + "blocks.1.w2.weight": 334.530484796484, + "blocks.2.ln.weight": 9.417363473500547, + "blocks.2.w1.weight": 404.6194334537652, + "blocks.2.w1.bias": 374.09843900052965, + "blocks.2.w2.weight": 394.7925902511279, + "blocks.3.ln.weight": 8.451752857956773, + "blocks.3.w1.weight": 326.2362218443471, + "blocks.3.w1.bias": 306.77785286979747, + "blocks.3.w2.weight": 303.662545367935, + "blocks.4.ln.weight": 8.451660472063256, + "blocks.4.w1.weight": 344.2742835972674, + "blocks.4.w1.bias": 326.1264884495552, + "blocks.4.w2.weight": 324.1161613020223, + "blocks.5.ln.weight": 11.13712283522192, + "blocks.5.w1.weight": 457.04149442154977, + "blocks.5.w1.bias": 422.6989592235929, + "blocks.5.w2.weight": 446.6716640278749, + "out_ln.weight": 0.5876580707043741, + "out_head.weight": 9.25778651436399, + "out_head.bias": 0.5027234759174332 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0332628253936766, + 1.9553094621276856, + 1.9304233611679078, + 1.9176126587677003, + 1.9049958919906615, + 1.8898755237197875, + 1.8779738095474243, + 1.87334542842865, + 1.8662727130126953, + 1.8637196960830689, + 1.858073745689392, + 1.8548571952819823, + 1.8493252722930908, + 1.851236528892517, + 1.848223992576599, + 1.8478172241210937, + 1.8419879977416993, + 1.8413512692260743, + 1.8363069415664672, + 1.8369076172256469, + 1.8356854833984375, + 1.8313965267562866, + 1.826233511695862, + 1.8260947198867798, + 1.8246617177581788, + 1.8188705309677125, + 1.8175858071899413, + 1.817788968887329, + 1.8145698094940186, + 1.811288226928711, + 1.8075631722259522, + 1.8072492791366577, + 1.8021571556854248, + 1.8001763897705079, + 1.8002913983535767, + 1.7960401634979248, + 1.7964820532226562, + 1.7898353637695312, + 1.7879999541854859, + 1.789700000267029, + 1.7842918268585206, + 1.7852493328475951, + 1.7841659473037719, + 1.7837900876617432, + 1.7787483280181884, + 1.7782914597320556, + 1.7754856842422486, + 1.7753860149765015, + 1.7756032290267945, + 1.7689688110351562, + 1.7688034854507446, + 1.7665690698623657, + 1.767441110267639, + 1.7627149563598632, + 1.7611339742279053, + 1.7607593900299072, + 1.7611287790679933, + 1.7574685613250733, + 1.754263912963867, + 1.7551698053741456, + 1.7525737002563477, + 1.7504821019744874, + 1.7550537628936766, + 1.749086598892212, + 1.746996110610962, + 1.7465003091812135, + 1.7470869120025634, + 1.7463814974975587, + 1.7397632767486573, + 1.7418430507659912, + 1.7430473554229737, + 1.741324652786255, + 1.7399064194488525, + 1.7389730927276612, + 1.7398483694076539, + 1.7381602671051026, + 1.7356981131362914, + 1.7315244303131103, + 1.7377736062622071, + 1.7310680517196655, + 1.7351790799713134, + 1.733889009361267, + 1.73604018699646, + 1.7337274952697754, + 1.7322516064453124, + 1.7330827197647094, + 1.7328004323577881, + 1.7354140316772462, + 1.7350243264007568, + 1.7329780157470702, + 1.732491442489624, + 1.7300388946151732, + 1.7312415341949463, + 1.7289181127929687, + 1.7315491919326782, + 1.7262304349517823, + 1.733718437461853, + 1.7327616833877564, + 1.732974825668335, + 1.7304670404434204 + ], + "train_acc": [ + 0.25128, + 0.2863, + 0.29668, + 0.30188, + 0.30926, + 0.31842, + 0.32158, + 0.32524, + 0.326, + 0.32964, + 0.33238, + 0.33504, + 0.33644, + 0.33418, + 0.33848, + 0.33768, + 0.34274, + 0.34018, + 0.34388, + 0.34162, + 0.34312, + 0.3448, + 0.34788, + 0.34724, + 0.34808, + 0.34792, + 0.35052, + 0.3484, + 0.34834, + 0.35138, + 0.35202, + 0.3548, + 0.35396, + 0.35878, + 0.35564, + 0.35712, + 0.35584, + 0.36054, + 0.36252, + 0.3624, + 0.36288, + 0.36378, + 0.36368, + 0.36364, + 0.36672, + 0.36452, + 0.36916, + 0.36916, + 0.36646, + 0.36894, + 0.37018, + 0.37108, + 0.36976, + 0.3718, + 0.37092, + 0.37276, + 0.37434, + 0.37378, + 0.37382, + 0.37552, + 0.37628, + 0.37336, + 0.37356, + 0.37596, + 0.37708, + 0.37716, + 0.37758, + 0.37918, + 0.37974, + 0.37924, + 0.3793, + 0.38072, + 0.3823, + 0.38326, + 0.3807, + 0.3811, + 0.38186, + 0.3819, + 0.37932, + 0.38418, + 0.38282, + 0.38236, + 0.38112, + 0.38594, + 0.38248, + 0.38184, + 0.38116, + 0.3824, + 0.38242, + 0.38396, + 0.3811, + 0.38402, + 0.38436, + 0.38572, + 0.38324, + 0.38604, + 0.38198, + 0.38274, + 0.3843, + 0.38394 + ], + "test_acc": [ + 0.2784, + 0.3102, + 0.3139, + 0.3338, + 0.3473, + 0.3407, + 0.3546, + 0.341, + 0.3527, + 0.3631, + 0.3575, + 0.361, + 0.3661, + 0.3657, + 0.3671, + 0.3545, + 0.3752, + 0.3558, + 0.3637, + 0.3696, + 0.3788, + 0.3719, + 0.3783, + 0.3713, + 0.3801, + 0.3776, + 0.3826, + 0.3803, + 0.3804, + 0.38, + 0.3788, + 0.3834, + 0.3783, + 0.385, + 0.3843, + 0.3798, + 0.3854, + 0.3913, + 0.3868, + 0.3793, + 0.3823, + 0.387, + 0.3862, + 0.3885, + 0.393, + 0.3914, + 0.3913, + 0.388, + 0.395, + 0.3924, + 0.3895, + 0.3884, + 0.3872, + 0.3949, + 0.3852, + 0.3969, + 0.3964, + 0.3952, + 0.4008, + 0.3959, + 0.3952, + 0.3928, + 0.3987, + 0.3957, + 0.3957, + 0.4041, + 0.4015, + 0.4019, + 0.4027, + 0.4016, + 0.4043, + 0.3973, + 0.4001, + 0.3968, + 0.4013, + 0.4044, + 0.4001, + 0.4014, + 0.4009, + 0.4, + 0.3997, + 0.3998, + 0.4038, + 0.3999, + 0.402, + 0.4017, + 0.4004, + 0.402, + 0.4016, + 0.4016, + 0.4032, + 0.4014, + 0.4028, + 0.4035, + 0.4031, + 0.4026, + 0.403, + 0.4021, + 0.4026, + 0.4025 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.04259001836180687, + 0.06560904532670975, + -0.04747920483350754, + -0.030196242034435272, + -0.011228787712752819, + 0.9897887706756592 + ], + "perturbation_rho": [ + 0.027144353836774826, + 0.02096467837691307, + -0.03647351637482643, + 0.004128730855882168, + 0.035667650401592255, + 0.0024879188276827335 + ], + "nudging": { + "0.001": [ + -3.065855707973242e-06, + -3.6228448152542114e-07, + -7.62520357966423e-09, + 4.0279701352119446e-08, + -3.4924596548080444e-09, + -1.401233021169901e-06 + ], + "0.003": [ + -9.421346476301551e-06, + -1.0454095900058746e-06, + 1.4924444258213043e-07, + 1.5087425708770752e-07, + 2.1478626877069473e-08, + -4.659174010157585e-06 + ], + "0.01": [ + -3.1415780540555716e-05, + -3.479945007711649e-06, + 5.55417500436306e-07, + 4.3446198105812073e-07, + 1.6833655536174774e-07, + -1.60514609888196e-05 + ] + }, + "hidden_norms_per_layer": [ + 8874.3623046875, + 79337.484375, + 1180381.75, + 1586145.875, + 1886674.0, + 1911188.375, + 1308062.125 + ], + "bp_grad_norms_per_layer": [ + 2.749456871242728e-05, + 1.8702693296290818e-06, + 6.362390649883309e-07, + 6.333205533337605e-07, + 6.349519026116468e-07, + 6.351035040097486e-07, + 6.164591468404979e-07 + ] + }, + "drift": { + "embed.weight": 53.51461020835447, + "embed.bias": 12.540707882150965, + "blocks.0.ln.weight": 1.1919744306866826, + "blocks.0.w1.weight": 17.44693961444689, + "blocks.0.w1.bias": 11.761966180483919, + "blocks.0.w2.weight": 54.05207721791165, + "blocks.1.ln.weight": 1.2851156800356192, + "blocks.1.w1.weight": 27.13129237243264, + "blocks.1.w1.bias": 21.945665839852136, + "blocks.1.w2.weight": 45.75957303201888, + "blocks.2.ln.weight": 0.765602321043846, + "blocks.2.w1.weight": 21.68864597033105, + "blocks.2.w1.bias": 20.84889617417975, + "blocks.2.w2.weight": 36.85421923299837, + "blocks.3.ln.weight": 0.7387797290176757, + "blocks.3.w1.weight": 21.304625635934347, + "blocks.3.w1.bias": 21.302804910719708, + "blocks.3.w2.weight": 42.158761396062424, + "blocks.4.ln.weight": 0.5093491405372358, + "blocks.4.w1.weight": 16.62521527836923, + "blocks.4.w1.bias": 14.569854707495779, + "blocks.4.w2.weight": 46.72830505948521, + "blocks.5.ln.weight": 0.569690236673946, + "blocks.5.w1.weight": 18.425801313816322, + "blocks.5.w1.bias": 15.707781883071574, + "blocks.5.w2.weight": 65.35620786546646, + "out_ln.weight": 0.42037958632530303, + "out_head.weight": 6.6986797412678145, + "out_head.bias": 0.660644788913502 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 6, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 9 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L6_seed9", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L8_seed0/results_cifar10.json b/results/fa_dfa_d512_L8_seed0/results_cifar10.json new file mode 100644 index 0000000..de5d08f --- /dev/null +++ b/results/fa_dfa_d512_L8_seed0/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "0": { + "dfa": { + "log": { + "train_loss": [ + 2.068570284347534, + 2.0450995418930056, + 2.040309367141724, + 2.0384985535430906, + 2.0337540267944334, + 2.028315601158142, + 2.0283870655059815, + 2.0261440406036377, + 2.0277783988952636, + 2.0236783444976805, + 2.0177216738128663, + 2.020857338027954, + 2.0178855153656006, + 2.0204481651306154, + 2.0144878174209593, + 2.012771276702881, + 2.0133113939666747, + 2.0119590814208985, + 2.009890116882324, + 2.008774749298096, + 2.0083382148742674, + 2.0076066046142578, + 2.0096171319580076, + 2.0058218814849855, + 2.0037864183044434, + 2.005599175491333, + 2.0061829089355467, + 2.0027509896087645, + 2.0058307901763914, + 2.005169650306702, + 2.001157734375, + 2.0021650346374513, + 2.001039823226929, + 2.0046913961029054, + 2.0023456330871583, + 2.00130324256897, + 2.0025460794067382, + 2.001611663970947, + 2.0031372194671633, + 2.0014712918090822, + 2.0003627224731444, + 1.9975626316070556, + 1.9982662202453614, + 1.9976352671813964, + 1.9969971923828125, + 2.0001233780288694, + 1.997716463356018, + 1.9996397301483155, + 1.9975465998840332, + 1.9973710316467286, + 1.9996610871887206, + 1.9990007359313964, + 1.9958738163757324, + 1.9966175972747802, + 2.0009265493774415, + 1.9975428602600098, + 1.9962558139038087, + 1.9977977807998657, + 1.9964274404144287, + 1.9977582873535156, + 1.9967647107696533, + 1.9964400707626342, + 1.9988198545074463, + 1.9962505130386352, + 1.9980460896301269, + 1.9944342567443847, + 1.9969512873077393, + 1.9951286602783203, + 1.9959452591705322, + 1.9949243884658814, + 1.997910827407837, + 1.9961381217956542, + 1.9937760234832764, + 1.9977886120605468, + 1.9961791613006592, + 1.99590330657959, + 1.9961589616394042, + 1.996615062561035, + 1.9954028228759766, + 1.994140139312744, + 1.9964727613067628, + 1.995709580001831, + 1.9935905380630494, + 1.9950566864013672, + 1.9948614299011231, + 1.9930699211120606, + 1.9950269234466553, + 1.996148171005249, + 1.9932368953704833, + 1.9957756786346434, + 1.9943457048034667, + 1.9930462799072266, + 1.9908040158081055, + 1.9929836280059814, + 1.995184083518982, + 1.9943841827392579, + 1.99466581741333, + 1.9947607112121581, + 1.9959496617889405, + 1.9939526372909546 + ], + "train_acc": [ + 0.23544, + 0.24512, + 0.24644, + 0.25124, + 0.25002, + 0.25482, + 0.25544, + 0.25706, + 0.25856, + 0.2579, + 0.26, + 0.25966, + 0.26192, + 0.26044, + 0.26436, + 0.2634, + 0.26574, + 0.26464, + 0.26664, + 0.26776, + 0.26856, + 0.26684, + 0.269, + 0.26998, + 0.272, + 0.27086, + 0.269, + 0.27068, + 0.27142, + 0.27146, + 0.27248, + 0.2718, + 0.27584, + 0.27106, + 0.2725, + 0.27434, + 0.27146, + 0.27324, + 0.27368, + 0.2727, + 0.27496, + 0.2771, + 0.27492, + 0.27596, + 0.27874, + 0.2729, + 0.27634, + 0.2747, + 0.27376, + 0.277, + 0.275, + 0.2776, + 0.27882, + 0.27748, + 0.27616, + 0.2745, + 0.2764, + 0.2782, + 0.27698, + 0.276, + 0.27666, + 0.27568, + 0.27892, + 0.2781, + 0.27582, + 0.2795, + 0.27838, + 0.27616, + 0.2783, + 0.2789, + 0.2776, + 0.27716, + 0.28014, + 0.27904, + 0.2768, + 0.2776, + 0.27682, + 0.279, + 0.28004, + 0.27846, + 0.28068, + 0.27798, + 0.28202, + 0.27836, + 0.28026, + 0.28116, + 0.27894, + 0.28088, + 0.28236, + 0.27878, + 0.2797, + 0.28002, + 0.28094, + 0.27994, + 0.27928, + 0.27954, + 0.27828, + 0.28064, + 0.27654, + 0.2811 + ], + "test_acc": [ + 0.2555, + 0.2658, + 0.2338, + 0.2589, + 0.2645, + 0.2829, + 0.2836, + 0.2386, + 0.2797, + 0.2828, + 0.2825, + 0.2743, + 0.2832, + 0.2778, + 0.277, + 0.2861, + 0.2887, + 0.2843, + 0.2986, + 0.3013, + 0.2914, + 0.2909, + 0.2788, + 0.2839, + 0.301, + 0.3034, + 0.295, + 0.2851, + 0.3031, + 0.2935, + 0.3072, + 0.2842, + 0.2977, + 0.3087, + 0.2878, + 0.2992, + 0.2958, + 0.2776, + 0.3095, + 0.302, + 0.3019, + 0.3096, + 0.3102, + 0.2911, + 0.2998, + 0.2978, + 0.2993, + 0.3104, + 0.2967, + 0.289, + 0.3004, + 0.3059, + 0.3001, + 0.2963, + 0.3022, + 0.2988, + 0.3028, + 0.2962, + 0.3041, + 0.3057, + 0.2973, + 0.305, + 0.3004, + 0.3098, + 0.2968, + 0.3054, + 0.3037, + 0.2995, + 0.3053, + 0.3065, + 0.3013, + 0.3067, + 0.3097, + 0.2996, + 0.3024, + 0.3038, + 0.2982, + 0.3071, + 0.3011, + 0.3049, + 0.3004, + 0.3033, + 0.3033, + 0.3029, + 0.3018, + 0.3025, + 0.3059, + 0.3053, + 0.3051, + 0.3062, + 0.3074, + 0.3024, + 0.3045, + 0.3056, + 0.3046, + 0.3048, + 0.3052, + 0.3054, + 0.3055, + 0.3055 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3844696283340454, + -0.0003293692716397345, + 0.00036326167173683643, + -7.58874011808075e-06, + -0.000848759722430259, + -0.0005374888423830271, + -0.00015908177010715008, + 0.00029008230194449425 + ], + "perturbation_rho": [ + 0.009705127216875553, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -5.657784640789032e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.2135133147239685e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.6587007343769073e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 51559.4296875, + 1329486592.0, + 4660075520.0, + 5993569792.0, + 6888088576.0, + 7015652864.0, + 8532604416.0, + 10583718912.0, + 12350084096.0 + ], + "bp_grad_norms_per_layer": [ + 2.5801111291912093e-07, + 2.506394813917012e-10, + 2.495726680873389e-10, + 2.497514417498792e-10, + 2.4978316637280784e-10, + 2.497747564333963e-10, + 2.497407836088428e-10, + 2.497392292966083e-10, + 2.5016502758212766e-10 + ] + }, + "drift": { + "embed.weight": 331.3816715738323, + "embed.bias": 280.1972794762848, + "blocks.0.ln.weight": 10.206860276367006, + "blocks.0.w1.weight": 293.0648280946435, + "blocks.0.w1.bias": 287.8102020652532, + "blocks.0.w2.weight": 480.05673936823246, + "blocks.1.ln.weight": 9.536572296218509, + "blocks.1.w1.weight": 384.85005936378496, + "blocks.1.w1.bias": 372.907458869564, + "blocks.1.w2.weight": 395.5604232032487, + "blocks.2.ln.weight": 9.583575811181442, + "blocks.2.w1.weight": 391.35917488090115, + "blocks.2.w1.bias": 358.7794005832253, + "blocks.2.w2.weight": 359.6693258728706, + "blocks.3.ln.weight": 9.98809250356752, + "blocks.3.w1.weight": 372.15298556466314, + "blocks.3.w1.bias": 341.69408442244566, + "blocks.3.w2.weight": 332.4997523924133, + "blocks.4.ln.weight": 7.10483666283608, + "blocks.4.w1.weight": 274.0268087874156, + "blocks.4.w1.bias": 252.79226182282545, + "blocks.4.w2.weight": 253.45621037491264, + "blocks.5.ln.weight": 10.243956247187224, + "blocks.5.w1.weight": 408.60736547082587, + "blocks.5.w1.bias": 378.8848879049959, + "blocks.5.w2.weight": 385.0032037459413, + "blocks.6.ln.weight": 11.012226548081168, + "blocks.6.w1.weight": 445.2115675036214, + "blocks.6.w1.bias": 406.90392571558743, + "blocks.6.w2.weight": 422.7651604516687, + "blocks.7.ln.weight": 10.678682452403773, + "blocks.7.w1.weight": 427.86871943047703, + "blocks.7.w1.bias": 421.26775677167313, + "blocks.7.w2.weight": 424.8177101015246, + "out_ln.weight": 0.7424186036866861, + "out_head.weight": 9.83199206815889, + "out_head.bias": 0.4556331945087151 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.031117898826599, + 1.9475624210357667, + 1.920816502456665, + 1.909251974143982, + 1.8920108963775635, + 1.8767158478546142, + 1.8721334871673585, + 1.863781995162964, + 1.8590689016342163, + 1.8527897924041747, + 1.8401800704956055, + 1.846341034011841, + 1.8388335055160523, + 1.8359079233169555, + 1.8243437384414674, + 1.8181520553970336, + 1.8202268659210206, + 1.808049402732849, + 1.8043210766601563, + 1.804519454650879, + 1.805484197998047, + 1.8011124628448487, + 1.8000532245635987, + 1.793024960975647, + 1.7795401358413696, + 1.780609351196289, + 1.777570998802185, + 1.768387048072815, + 1.7707503821563721, + 1.7622215390396119, + 1.7559357610321045, + 1.7557656387710572, + 1.7530760443115234, + 1.7580088625717163, + 1.746208298110962, + 1.7444442225646972, + 1.7456918414306641, + 1.742843345336914, + 1.7410870483398437, + 1.7379502767181396, + 1.7418859932708741, + 1.7290691452789306, + 1.7275336447906493, + 1.7247933518218994, + 1.7241094170761109, + 1.723441693687439, + 1.7219038082122802, + 1.7236041189575195, + 1.7184861129379272, + 1.7169820540618896, + 1.7187074862289429, + 1.7181550318145753, + 1.7141529751205444, + 1.7122508407592774, + 1.7160141869735719, + 1.712083070716858, + 1.709077308998108, + 1.7120566661834717, + 1.7138131452178955, + 1.7056159759902954, + 1.706560867576599, + 1.7041735248565675, + 1.7031678924942018, + 1.7053270459747314, + 1.7001814685440064, + 1.698059520263672, + 1.700954571838379, + 1.6969071603012085, + 1.6964463064575195, + 1.6973033560562134, + 1.697128847579956, + 1.6953863137054443, + 1.6925591326141358, + 1.6960328076171876, + 1.694064889831543, + 1.6925205464935302, + 1.691927748451233, + 1.6897403769683839, + 1.6869727429580688, + 1.6902123848724364, + 1.692222232322693, + 1.6904909725952149, + 1.6880250762557982, + 1.6899029476165772, + 1.6866879183959962, + 1.6873126499176025, + 1.689984903640747, + 1.6877681387329102, + 1.6844988162994385, + 1.6888493407821654, + 1.6853452449798585, + 1.6879416900253297, + 1.6831971020889283, + 1.681332094078064, + 1.681463975868225, + 1.6855041525268555, + 1.6846018813323975, + 1.688128840942383, + 1.6876728596115111, + 1.6832955141830444 + ], + "train_acc": [ + 0.25122, + 0.29002, + 0.30074, + 0.3078, + 0.31202, + 0.32116, + 0.32302, + 0.32474, + 0.32998, + 0.3327, + 0.33662, + 0.3378, + 0.34062, + 0.3423, + 0.3468, + 0.3493, + 0.34718, + 0.35114, + 0.35658, + 0.35406, + 0.35438, + 0.356, + 0.35496, + 0.35712, + 0.36442, + 0.36318, + 0.3635, + 0.36856, + 0.36718, + 0.37118, + 0.37414, + 0.37068, + 0.37418, + 0.36996, + 0.3753, + 0.37762, + 0.3755, + 0.37454, + 0.37586, + 0.37612, + 0.37538, + 0.38164, + 0.3825, + 0.37992, + 0.38294, + 0.38242, + 0.38152, + 0.38306, + 0.3826, + 0.38608, + 0.38346, + 0.38492, + 0.38532, + 0.38858, + 0.38614, + 0.38406, + 0.38632, + 0.38606, + 0.38602, + 0.39134, + 0.39088, + 0.39142, + 0.39166, + 0.39208, + 0.39016, + 0.39156, + 0.3904, + 0.39498, + 0.39106, + 0.39302, + 0.39394, + 0.39172, + 0.39746, + 0.39332, + 0.39366, + 0.39408, + 0.39414, + 0.39544, + 0.3967, + 0.39562, + 0.39768, + 0.39624, + 0.397, + 0.39732, + 0.39628, + 0.39652, + 0.39712, + 0.39694, + 0.39822, + 0.39396, + 0.39708, + 0.39744, + 0.39826, + 0.39852, + 0.39892, + 0.39592, + 0.3976, + 0.39714, + 0.39696, + 0.40022 + ], + "test_acc": [ + 0.2945, + 0.3266, + 0.3121, + 0.33, + 0.3343, + 0.3566, + 0.3538, + 0.3381, + 0.3562, + 0.3602, + 0.3694, + 0.3632, + 0.3618, + 0.3697, + 0.3812, + 0.3821, + 0.3828, + 0.3851, + 0.3757, + 0.3917, + 0.389, + 0.3894, + 0.3834, + 0.3912, + 0.3926, + 0.3955, + 0.3907, + 0.3903, + 0.3941, + 0.384, + 0.3972, + 0.3875, + 0.399, + 0.4015, + 0.3924, + 0.3957, + 0.4009, + 0.3991, + 0.3996, + 0.4008, + 0.4036, + 0.4005, + 0.409, + 0.4097, + 0.4021, + 0.4009, + 0.4126, + 0.4071, + 0.4122, + 0.4089, + 0.4131, + 0.4113, + 0.4146, + 0.4059, + 0.4163, + 0.4114, + 0.4068, + 0.4117, + 0.4182, + 0.4115, + 0.4082, + 0.4193, + 0.4177, + 0.4224, + 0.416, + 0.4171, + 0.4137, + 0.4155, + 0.4184, + 0.4188, + 0.4182, + 0.4169, + 0.4182, + 0.414, + 0.4182, + 0.412, + 0.4183, + 0.4217, + 0.4169, + 0.4204, + 0.4189, + 0.4154, + 0.4163, + 0.4166, + 0.4175, + 0.4195, + 0.418, + 0.4198, + 0.4191, + 0.4194, + 0.4182, + 0.4198, + 0.4177, + 0.4183, + 0.4178, + 0.4181, + 0.4188, + 0.4187, + 0.4191, + 0.4191 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.04550348222255707, + 0.04952041804790497, + 0.012564106844365597, + -0.048967309296131134, + -0.018994076177477837, + -0.08921081572771072, + -0.09729112684726715, + 0.9970568418502808 + ], + "perturbation_rho": [ + -0.007405851036310196, + -0.01778452657163143, + 0.03651121258735657, + -0.004411454312503338, + -0.017335545271635056, + 0.024473480880260468, + 0.033633388578891754, + -0.0013661051634699106 + ], + "nudging": { + "0.001": [ + -5.257490556687117e-06, + -3.859749995172024e-07, + -9.487848728895187e-08, + 9.802170097827911e-08, + 2.2584572434425354e-08, + 1.0256189852952957e-07, + 7.171183824539185e-08, + -1.4471588656306267e-06 + ], + "0.003": [ + -1.5992671251296997e-05, + -1.0106596164405346e-06, + -1.9604340195655823e-07, + 2.4406472221016884e-07, + 1.200241968035698e-07, + 4.641478881239891e-07, + 5.108886398375034e-07, + -5.204754415899515e-06 + ], + "0.01": [ + -5.306815728545189e-05, + -3.1923409551382065e-06, + -3.415043465793133e-07, + 9.238137863576412e-07, + 3.079185262322426e-07, + 1.4764373190701008e-06, + 1.8483842723071575e-06, + -1.84740056283772e-05 + ] + }, + "hidden_norms_per_layer": [ + 5801.0615234375, + 58545.69140625, + 278396.125, + 541219.0625, + 842750.8125, + 1009016.125, + 1188189.0, + 1390662.625, + 689550.6875 + ], + "bp_grad_norms_per_layer": [ + 3.488941365503706e-05, + 2.940359536296455e-06, + 8.656722911837278e-07, + 7.742645493635791e-07, + 7.646057156307506e-07, + 7.609253316331888e-07, + 7.622682005603565e-07, + 7.812644753357745e-07, + 7.56423219172575e-07 + ] + }, + "drift": { + "embed.weight": 40.85701563091728, + "embed.bias": 16.257987544795743, + "blocks.0.ln.weight": 1.0155844718531961, + "blocks.0.w1.weight": 14.308616002505659, + "blocks.0.w1.bias": 11.682066059725996, + "blocks.0.w2.weight": 47.88448262527169, + "blocks.1.ln.weight": 0.9800293898409066, + "blocks.1.w1.weight": 18.22850941673304, + "blocks.1.w1.bias": 10.248089109744662, + "blocks.1.w2.weight": 46.5074943877367, + "blocks.2.ln.weight": 0.6484944766132613, + "blocks.2.w1.weight": 18.04882879127194, + "blocks.2.w1.bias": 15.21848342753742, + "blocks.2.w2.weight": 28.777360387947045, + "blocks.3.ln.weight": 0.5971288470484941, + "blocks.3.w1.weight": 17.899823557577733, + "blocks.3.w1.bias": 18.289110041973974, + "blocks.3.w2.weight": 26.844107923153718, + "blocks.4.ln.weight": 0.5827124751217686, + "blocks.4.w1.weight": 16.72204012093031, + "blocks.4.w1.bias": 17.062801665778103, + "blocks.4.w2.weight": 33.8783390116795, + "blocks.5.ln.weight": 0.4109250365181375, + "blocks.5.w1.weight": 17.501541376807705, + "blocks.5.w1.bias": 19.653202019435096, + "blocks.5.w2.weight": 21.067728941101397, + "blocks.6.ln.weight": 0.5111244501205948, + "blocks.6.w1.weight": 18.004625850717634, + "blocks.6.w1.bias": 18.9556455723282, + "blocks.6.w2.weight": 26.773409027964927, + "blocks.7.ln.weight": 0.6137697718245447, + "blocks.7.w1.weight": 19.644875839051267, + "blocks.7.w1.bias": 20.525336825132296, + "blocks.7.w2.weight": 43.29822559398887, + "out_ln.weight": 0.2984864729265921, + "out_head.weight": 5.62935512400422, + "out_head.bias": 0.701363478107578 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 0 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L8_seed0", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L8_seed1/results_cifar10.json b/results/fa_dfa_d512_L8_seed1/results_cifar10.json new file mode 100644 index 0000000..7ae4222 --- /dev/null +++ b/results/fa_dfa_d512_L8_seed1/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "1": { + "dfa": { + "log": { + "train_loss": [ + 2.082363905029297, + 2.0549054863739014, + 2.048207590179443, + 2.040793383560181, + 2.0366951902008057, + 2.032741487388611, + 2.0325103788757324, + 2.030692584877014, + 2.0259502252197263, + 2.025390202407837, + 2.0222666513061522, + 2.0214237771606447, + 2.0199728786468505, + 2.021625373916626, + 2.017548585968018, + 2.0168718279266358, + 2.0175807970428465, + 2.0172574797058105, + 2.01521342338562, + 2.0168276077270506, + 2.012039394607544, + 2.0129323637390137, + 2.014951186904907, + 2.0105557980346678, + 2.01169651550293, + 2.0122193618011472, + 2.0088330200576783, + 2.0112308924102784, + 2.010762939796448, + 2.0086743901824953, + 2.00971054649353, + 2.010672675628662, + 2.007623846206665, + 2.0087671311187743, + 2.0092544104766845, + 2.0102373889541627, + 2.0070104084014893, + 2.00739478553772, + 2.0070192804336546, + 2.0084682094573973, + 2.00599662361145, + 2.005214340057373, + 2.0051871164703368, + 2.0050434196472167, + 2.005824287185669, + 2.004784624862671, + 2.006481015930176, + 2.004454507408142, + 2.004789038391113, + 2.004796143836975, + 2.004693070678711, + 2.0054635766601563, + 2.005378037185669, + 2.004756614151001, + 2.0019625535583496, + 2.002588889465332, + 2.0051779277038575, + 2.0044091219711304, + 2.003338354034424, + 2.0033947316741942, + 2.003234750213623, + 2.0028414460754393, + 2.0036003881072997, + 2.0034564432525634, + 2.0025813438415527, + 2.0027055737304686, + 2.003683521270752, + 2.000025806732178, + 2.000833299789429, + 2.000070794067383, + 2.0004121925354004, + 1.9998590943908692, + 2.001034607772827, + 1.9999368872451782, + 2.001410151634216, + 1.9992089236831665, + 2.000929930076599, + 2.0007802046966554, + 1.999412225341797, + 2.00004220161438, + 1.9992640100860595, + 1.9985904244995116, + 1.9986971343231201, + 1.998013607559204, + 1.9992886389160156, + 1.999632513961792, + 1.9996888436889648, + 1.997718791885376, + 1.9993746613311767, + 1.9998883880233764, + 1.9990022793960571, + 1.997506809692383, + 2.0001587979507445, + 1.9999388419342041, + 1.997678748703003, + 1.9990675243759155, + 1.9979941717910767, + 1.99825477684021, + 1.9982349634552001, + 1.9987702852630616 + ], + "train_acc": [ + 0.22766, + 0.23814, + 0.24064, + 0.24662, + 0.24896, + 0.24744, + 0.25128, + 0.25148, + 0.257, + 0.25682, + 0.25768, + 0.25754, + 0.25738, + 0.25788, + 0.25796, + 0.26228, + 0.26038, + 0.26018, + 0.2624, + 0.26194, + 0.26886, + 0.26334, + 0.26348, + 0.26562, + 0.26452, + 0.26388, + 0.26886, + 0.26554, + 0.26736, + 0.26842, + 0.26788, + 0.2677, + 0.26986, + 0.26932, + 0.2696, + 0.26988, + 0.27094, + 0.26952, + 0.27026, + 0.26998, + 0.27264, + 0.27378, + 0.27244, + 0.27328, + 0.27004, + 0.27324, + 0.27278, + 0.27568, + 0.27154, + 0.27282, + 0.27324, + 0.27192, + 0.27224, + 0.27294, + 0.27522, + 0.27348, + 0.27266, + 0.2733, + 0.27448, + 0.27446, + 0.27554, + 0.27596, + 0.27772, + 0.2751, + 0.27624, + 0.2762, + 0.27634, + 0.27822, + 0.27598, + 0.2772, + 0.2767, + 0.27624, + 0.27632, + 0.27776, + 0.27558, + 0.27776, + 0.27686, + 0.27808, + 0.27808, + 0.27784, + 0.27804, + 0.2769, + 0.27848, + 0.27982, + 0.27622, + 0.27712, + 0.27716, + 0.27708, + 0.2775, + 0.27748, + 0.27844, + 0.27692, + 0.2778, + 0.27656, + 0.27824, + 0.27638, + 0.27978, + 0.27886, + 0.27804, + 0.27882 + ], + "test_acc": [ + 0.2423, + 0.2394, + 0.2347, + 0.2736, + 0.2726, + 0.255, + 0.2481, + 0.2855, + 0.2847, + 0.266, + 0.291, + 0.2613, + 0.2775, + 0.2742, + 0.268, + 0.2726, + 0.2918, + 0.2421, + 0.2813, + 0.2608, + 0.2622, + 0.2733, + 0.2684, + 0.2941, + 0.2935, + 0.2823, + 0.2868, + 0.2952, + 0.3002, + 0.2952, + 0.3004, + 0.2797, + 0.2936, + 0.2993, + 0.2878, + 0.291, + 0.2839, + 0.2917, + 0.2933, + 0.2925, + 0.2814, + 0.2948, + 0.2987, + 0.2876, + 0.2737, + 0.2985, + 0.3022, + 0.2788, + 0.2868, + 0.2958, + 0.2886, + 0.302, + 0.2925, + 0.2965, + 0.2882, + 0.2941, + 0.3024, + 0.2895, + 0.3013, + 0.2994, + 0.2891, + 0.2866, + 0.291, + 0.2939, + 0.2834, + 0.2973, + 0.2851, + 0.2965, + 0.2951, + 0.2911, + 0.297, + 0.2923, + 0.2987, + 0.2954, + 0.2925, + 0.2892, + 0.2923, + 0.2956, + 0.2976, + 0.2978, + 0.2955, + 0.2986, + 0.2959, + 0.294, + 0.294, + 0.297, + 0.296, + 0.2964, + 0.2979, + 0.2999, + 0.2958, + 0.2963, + 0.2963, + 0.2954, + 0.2959, + 0.2953, + 0.2952, + 0.2956, + 0.2958, + 0.2958 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.396922767162323, + -0.0003933884436264634, + -8.022795373108238e-05, + -0.00015045293548610061, + 0.0005392992752604187, + -0.00036555560654960573, + 0.0002907244488596916, + -9.105022036237642e-05 + ], + "perturbation_rho": [ + -0.022967863827943802, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.4226104617118835e-07, + 0.0, + 0.0, + 0.0, + -1.862645149230957e-09, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.051928848028183e-06, + 0.0, + -1.862645149230957e-09, + 0.0, + -2.7939677238464355e-09, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.5013072192668915e-06, + 0.0, + -1.862645149230957e-09, + 0.0, + -4.6566128730773926e-09, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 56414.3359375, + 1650990848.0, + 3797263872.0, + 4648061440.0, + 5705044480.0, + 6883173376.0, + 7810007552.0, + 9771259904.0, + 10072083456.0 + ], + "bp_grad_norms_per_layer": [ + 2.3933893089633784e-07, + 2.075998678519042e-10, + 2.073478749808899e-10, + 2.0727551619525997e-10, + 2.0692902946706226e-10, + 2.0691065527600472e-10, + 2.0689701341058964e-10, + 2.0701990122162783e-10, + 2.070805887877114e-10 + ] + }, + "drift": { + "embed.weight": 342.6478357645448, + "embed.bias": 262.14247134543683, + "blocks.0.ln.weight": 10.104765097486084, + "blocks.0.w1.weight": 316.5544638552012, + "blocks.0.w1.bias": 286.1293710348026, + "blocks.0.w2.weight": 488.0042854468774, + "blocks.1.ln.weight": 9.276528933854145, + "blocks.1.w1.weight": 365.09493896949925, + "blocks.1.w1.bias": 338.1199629883399, + "blocks.1.w2.weight": 335.8119384773775, + "blocks.2.ln.weight": 8.33234375341126, + "blocks.2.w1.weight": 339.8426755615624, + "blocks.2.w1.bias": 311.7235998065332, + "blocks.2.w2.weight": 316.14958807276867, + "blocks.3.ln.weight": 8.800684936876243, + "blocks.3.w1.weight": 365.78353354881364, + "blocks.3.w1.bias": 339.9117633918553, + "blocks.3.w2.weight": 349.88606585029197, + "blocks.4.ln.weight": 9.453209200185142, + "blocks.4.w1.weight": 385.95491307373965, + "blocks.4.w1.bias": 356.64512170760173, + "blocks.4.w2.weight": 352.5237601257774, + "blocks.5.ln.weight": 9.069459103352756, + "blocks.5.w1.weight": 368.17480746032606, + "blocks.5.w1.bias": 336.75699627476126, + "blocks.5.w2.weight": 341.8227235483115, + "blocks.6.ln.weight": 11.485193095368288, + "blocks.6.w1.weight": 454.812566497913, + "blocks.6.w1.bias": 427.10330552698105, + "blocks.6.w2.weight": 417.7750307221631, + "blocks.7.ln.weight": 8.765686193273819, + "blocks.7.w1.weight": 347.8255816484655, + "blocks.7.w1.bias": 334.696723010558, + "blocks.7.w2.weight": 324.7278045130555, + "out_ln.weight": 0.6473491781322253, + "out_head.weight": 9.841211699963578, + "out_head.bias": 1.0361314019488228 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0216404482269286, + 1.9374429712677002, + 1.9162016125488281, + 1.8889766452026366, + 1.8700911392211914, + 1.8582790970611571, + 1.8527047716903686, + 1.848144301109314, + 1.8374218865966796, + 1.8318119472503662, + 1.8252220174407958, + 1.8180473458862305, + 1.8125171118927002, + 1.8029107147216796, + 1.8021541842651367, + 1.7970342992401123, + 1.7941553127288818, + 1.7909175275421143, + 1.782177930908203, + 1.78192981880188, + 1.7714600928115845, + 1.7727110540008546, + 1.7706272528076172, + 1.7628321991348266, + 1.7623195043563842, + 1.7597255520629882, + 1.7538186774063111, + 1.7597168767547609, + 1.751037622909546, + 1.7449284631729125, + 1.7485404402923583, + 1.7483091131973267, + 1.7475471432113647, + 1.7437016841888429, + 1.744876121864319, + 1.7462471084213256, + 1.739538355178833, + 1.740323678817749, + 1.7387758260345458, + 1.7394548602294921, + 1.7387311511611938, + 1.7320650988388062, + 1.7338726557159423, + 1.7331176211547852, + 1.7323205828475952, + 1.7314462683486938, + 1.7310566067886353, + 1.7290524214935303, + 1.728844566001892, + 1.7304431410980226, + 1.7348097546386718, + 1.7295909213638305, + 1.7310686675262452, + 1.7292217221069337, + 1.7266970907211303, + 1.7262331484985352, + 1.7316008935928344, + 1.732737080116272, + 1.72913513130188, + 1.7267526404190063, + 1.7301805443954468, + 1.726527798461914, + 1.7265644375991822, + 1.7238530459213257, + 1.7280543838500977, + 1.7257318978881835, + 1.726536202392578, + 1.7238884717559815, + 1.724731280784607, + 1.7219169683456421, + 1.725569347229004, + 1.7213600470352173, + 1.7229674570083617, + 1.723882184753418, + 1.7203116341781617, + 1.723835189590454, + 1.7169267540740967, + 1.7185560147094727, + 1.7191566823577882, + 1.7162863437652587, + 1.7187646448135376, + 1.7191588035202026, + 1.716515462913513, + 1.7150226160430908, + 1.7141532082748414, + 1.7141116730499268, + 1.7139857364273072, + 1.7161692111587525, + 1.7135670114135741, + 1.7172551581573485, + 1.711785320739746, + 1.710454179458618, + 1.7139234024429322, + 1.712289864768982, + 1.7146155573272706, + 1.7152564708709717, + 1.713687544631958, + 1.7137356586456298, + 1.7130619507217406, + 1.7119491666030884 + ], + "train_acc": [ + 0.25728, + 0.2904, + 0.30204, + 0.31214, + 0.32092, + 0.32608, + 0.32958, + 0.333, + 0.34152, + 0.34204, + 0.34534, + 0.34792, + 0.34984, + 0.35522, + 0.35358, + 0.35932, + 0.35884, + 0.35804, + 0.36126, + 0.3618, + 0.36572, + 0.36352, + 0.36422, + 0.36884, + 0.36892, + 0.36766, + 0.3732, + 0.36848, + 0.37482, + 0.37654, + 0.374, + 0.37336, + 0.37234, + 0.37542, + 0.37588, + 0.37886, + 0.3797, + 0.37564, + 0.37638, + 0.37848, + 0.37414, + 0.38038, + 0.38024, + 0.37908, + 0.37766, + 0.38196, + 0.38054, + 0.3817, + 0.37988, + 0.3799, + 0.37976, + 0.3801, + 0.38004, + 0.3791, + 0.38402, + 0.38122, + 0.38052, + 0.38022, + 0.37968, + 0.38174, + 0.37974, + 0.3824, + 0.38198, + 0.38646, + 0.38242, + 0.38228, + 0.37926, + 0.386, + 0.38334, + 0.3845, + 0.38592, + 0.38384, + 0.3829, + 0.38474, + 0.38294, + 0.38496, + 0.3877, + 0.3868, + 0.38626, + 0.38876, + 0.3878, + 0.38576, + 0.38496, + 0.387, + 0.38818, + 0.38784, + 0.38816, + 0.38708, + 0.38834, + 0.38604, + 0.38942, + 0.39, + 0.38986, + 0.39166, + 0.38932, + 0.3872, + 0.38822, + 0.38954, + 0.38726, + 0.39068 + ], + "test_acc": [ + 0.3016, + 0.3144, + 0.318, + 0.344, + 0.3607, + 0.3449, + 0.3552, + 0.3675, + 0.3696, + 0.3786, + 0.3818, + 0.374, + 0.3837, + 0.3739, + 0.3839, + 0.3766, + 0.3871, + 0.3763, + 0.3964, + 0.3769, + 0.375, + 0.3863, + 0.3847, + 0.4028, + 0.3957, + 0.4041, + 0.3988, + 0.3948, + 0.4082, + 0.4017, + 0.4074, + 0.4028, + 0.4108, + 0.4004, + 0.4041, + 0.4025, + 0.4049, + 0.4087, + 0.4, + 0.402, + 0.3985, + 0.4059, + 0.4004, + 0.4091, + 0.4071, + 0.4143, + 0.4132, + 0.409, + 0.4076, + 0.4057, + 0.4061, + 0.4117, + 0.41, + 0.4148, + 0.4041, + 0.4149, + 0.4053, + 0.4078, + 0.4003, + 0.4132, + 0.4143, + 0.4119, + 0.4157, + 0.4033, + 0.417, + 0.4029, + 0.4075, + 0.4054, + 0.4092, + 0.4144, + 0.4062, + 0.4099, + 0.4126, + 0.4157, + 0.4137, + 0.4152, + 0.4095, + 0.4124, + 0.4125, + 0.4084, + 0.4123, + 0.4109, + 0.4112, + 0.4101, + 0.4096, + 0.4125, + 0.4075, + 0.4126, + 0.4119, + 0.4123, + 0.412, + 0.4125, + 0.4119, + 0.4121, + 0.4112, + 0.4123, + 0.4125, + 0.4116, + 0.4123, + 0.4123 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.018269643187522888, + 0.07727976143360138, + -0.007720204535871744, + -0.03330487757921219, + -0.08965514600276947, + -0.05200214684009552, + -0.026286372914910316, + 0.9987640380859375 + ], + "perturbation_rho": [ + -0.009564901702105999, + -0.008453571237623692, + 0.004972926340997219, + -0.0027248896658420563, + -0.002009802497923374, + -0.036599986255168915, + -0.004199513234198093, + -0.03066324070096016 + ], + "nudging": { + "0.001": [ + -2.971384674310684e-06, + -4.7439243644475937e-07, + -6.693881005048752e-08, + 8.987262845039368e-08, + 6.495974957942963e-08, + 3.585591912269592e-08, + 5.820766091346741e-08, + -1.373467966914177e-06 + ], + "0.003": [ + -8.388538844883442e-06, + -1.432374119758606e-06, + 3.236345946788788e-08, + 1.6565900295972824e-07, + 4.4424086809158325e-07, + 1.909211277961731e-07, + 1.6344711184501648e-07, + -5.066161975264549e-06 + ], + "0.01": [ + -2.80656386166811e-05, + -5.379552021622658e-06, + 6.239861249923706e-08, + 5.534384399652481e-07, + 1.6264384612441063e-06, + 8.010538294911385e-07, + 5.328329280018806e-07, + -1.849012915045023e-05 + ] + }, + "hidden_norms_per_layer": [ + 6323.6376953125, + 105527.390625, + 956923.0625, + 1390351.75, + 1878290.125, + 2305144.75, + 2408578.0, + 2446730.0, + 1101165.75 + ], + "bp_grad_norms_per_layer": [ + 3.628878766903654e-05, + 2.4742682853684528e-06, + 7.756235049782845e-07, + 7.634440635229112e-07, + 7.605355563100602e-07, + 7.63955029015051e-07, + 7.672065862607269e-07, + 7.676999871364387e-07, + 7.604297138641414e-07 + ] + }, + "drift": { + "embed.weight": 41.248137439097945, + "embed.bias": 19.492810839395194, + "blocks.0.ln.weight": 1.069830164213397, + "blocks.0.w1.weight": 14.662790472591759, + "blocks.0.w1.bias": 14.121256961200007, + "blocks.0.w2.weight": 58.06593910253397, + "blocks.1.ln.weight": 1.133609473869923, + "blocks.1.w1.weight": 22.616267702800116, + "blocks.1.w1.bias": 18.62883152876521, + "blocks.1.w2.weight": 55.42300201602086, + "blocks.2.ln.weight": 0.8395001619703416, + "blocks.2.w1.weight": 23.18964915987391, + "blocks.2.w1.bias": 23.25897523020434, + "blocks.2.w2.weight": 51.12589199717472, + "blocks.3.ln.weight": 0.8079243325814617, + "blocks.3.w1.weight": 23.270223091205043, + "blocks.3.w1.bias": 23.973451384958736, + "blocks.3.w2.weight": 44.38945707970049, + "blocks.4.ln.weight": 0.7352731954552547, + "blocks.4.w1.weight": 23.461113331044203, + "blocks.4.w1.bias": 25.70390171443948, + "blocks.4.w2.weight": 35.45845934569411, + "blocks.5.ln.weight": 0.5783243137671081, + "blocks.5.w1.weight": 18.59117833907557, + "blocks.5.w1.bias": 19.752451457096242, + "blocks.5.w2.weight": 34.26869815664233, + "blocks.6.ln.weight": 0.5587106874210115, + "blocks.6.w1.weight": 16.34076044382607, + "blocks.6.w1.bias": 16.11418290554864, + "blocks.6.w2.weight": 46.04419499078452, + "blocks.7.ln.weight": 0.7410616437288059, + "blocks.7.w1.weight": 24.360145562410782, + "blocks.7.w1.bias": 27.19985092354318, + "blocks.7.w2.weight": 37.619326829386324, + "out_ln.weight": 0.35428257928264884, + "out_head.weight": 7.449367264588818, + "out_head.bias": 2.03709619180192 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 1 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L8_seed1", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L8_seed2/results_cifar10.json b/results/fa_dfa_d512_L8_seed2/results_cifar10.json new file mode 100644 index 0000000..5a01422 --- /dev/null +++ b/results/fa_dfa_d512_L8_seed2/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "2": { + "dfa": { + "log": { + "train_loss": [ + 2.0644361302948, + 2.046419815826416, + 2.038333583984375, + 2.038490813598633, + 2.039234817237854, + 2.036761814575195, + 2.0334856452941894, + 2.034733783721924, + 2.029182948684692, + 2.0294196990585327, + 2.0262659832000733, + 2.026484178543091, + 2.025475791244507, + 2.0258196518707274, + 2.0231178736114503, + 2.0239224526977537, + 2.0206442892456056, + 2.020261905860901, + 2.0215636526489256, + 2.0221778102874755, + 2.019456211929321, + 2.0221493022155763, + 2.0212540914916994, + 2.0227064220809936, + 2.019565500793457, + 2.0192780477905274, + 2.022175419692993, + 2.0185312504577637, + 2.0168647090911866, + 2.018404118041992, + 2.0194629988098143, + 2.019041424560547, + 2.0179552308654785, + 2.0175965952301027, + 2.019192621688843, + 2.0174668720245363, + 2.0164142420196534, + 2.015060181007385, + 2.0180635737609864, + 2.0148490827941896, + 2.017637328643799, + 2.0145212058258055, + 2.0134287954711914, + 2.016226960449219, + 2.017133801612854, + 2.015110319671631, + 2.0140358378601073, + 2.0136930332946776, + 2.013491507110596, + 2.0132310498046877, + 2.0138910511779784, + 2.0127566445159912, + 2.014643051528931, + 2.013907117614746, + 2.013747940750122, + 2.013379987182617, + 2.014386905593872, + 2.0132828955841062, + 2.012256466674805, + 2.0110202868652345, + 2.011350602493286, + 2.0109373377990725, + 2.0134030670928955, + 2.013690048866272, + 2.010493644256592, + 2.0127015099716186, + 2.014034610977173, + 2.0104107666015625, + 2.009855454788208, + 2.0113264336395265, + 2.0088184381866454, + 2.009741211090088, + 2.0078707803726195, + 2.0089734397888184, + 2.010708229217529, + 2.0102198361206054, + 2.0088808136367797, + 2.010095508155823, + 2.0100783850097654, + 2.0090800459289553, + 2.0093112369155883, + 2.0093213819122315, + 2.0104708361053465, + 2.0093392966079713, + 2.007901397628784, + 2.0074678485870363, + 2.0079102828216553, + 2.0076543350982665, + 2.008981750946045, + 2.0082327671051026, + 2.0088174480819703, + 2.009487823638916, + 2.0088063831329346, + 2.008092264633179, + 2.009373797225952, + 2.0082830452728273, + 2.009356221847534, + 2.0067892125701903, + 2.0079211888122557, + 2.009279239501953 + ], + "train_acc": [ + 0.23872, + 0.2456, + 0.24996, + 0.2495, + 0.24748, + 0.2497, + 0.25566, + 0.25088, + 0.25386, + 0.2538, + 0.25732, + 0.25644, + 0.2601, + 0.25942, + 0.2592, + 0.25872, + 0.26188, + 0.26174, + 0.26336, + 0.2605, + 0.2632, + 0.2612, + 0.2603, + 0.2612, + 0.26016, + 0.2645, + 0.26032, + 0.26266, + 0.26196, + 0.26384, + 0.26492, + 0.26296, + 0.26584, + 0.26768, + 0.26482, + 0.2648, + 0.26722, + 0.26534, + 0.26276, + 0.2668, + 0.26668, + 0.26698, + 0.2691, + 0.26718, + 0.26602, + 0.26738, + 0.26584, + 0.26838, + 0.2676, + 0.26986, + 0.268, + 0.2675, + 0.26946, + 0.2669, + 0.2694, + 0.26946, + 0.26752, + 0.26804, + 0.27002, + 0.26888, + 0.27004, + 0.27052, + 0.2715, + 0.26972, + 0.2714, + 0.26728, + 0.26934, + 0.27246, + 0.27128, + 0.27064, + 0.27078, + 0.27072, + 0.27496, + 0.2735, + 0.27134, + 0.27228, + 0.2706, + 0.27152, + 0.27204, + 0.27414, + 0.27122, + 0.27298, + 0.27146, + 0.27378, + 0.27276, + 0.2735, + 0.27246, + 0.2746, + 0.2735, + 0.27246, + 0.27278, + 0.27358, + 0.27442, + 0.27286, + 0.2721, + 0.27286, + 0.27204, + 0.27472, + 0.27256, + 0.27478 + ], + "test_acc": [ + 0.2603, + 0.2353, + 0.2607, + 0.265, + 0.2673, + 0.2661, + 0.2701, + 0.2571, + 0.2897, + 0.288, + 0.2911, + 0.2945, + 0.265, + 0.2754, + 0.2857, + 0.2812, + 0.2695, + 0.2887, + 0.2854, + 0.288, + 0.2916, + 0.2906, + 0.2793, + 0.2867, + 0.2901, + 0.3, + 0.2805, + 0.2864, + 0.2983, + 0.2846, + 0.2886, + 0.2877, + 0.2925, + 0.2778, + 0.2959, + 0.2912, + 0.2908, + 0.2908, + 0.2801, + 0.2988, + 0.2829, + 0.2906, + 0.3007, + 0.2911, + 0.2975, + 0.2959, + 0.3009, + 0.2959, + 0.2901, + 0.2841, + 0.2989, + 0.2867, + 0.2927, + 0.2886, + 0.2802, + 0.281, + 0.3007, + 0.2873, + 0.2891, + 0.3031, + 0.2983, + 0.2973, + 0.2972, + 0.2921, + 0.2927, + 0.2971, + 0.2975, + 0.2988, + 0.3026, + 0.2926, + 0.2941, + 0.2971, + 0.2967, + 0.2972, + 0.2957, + 0.2998, + 0.2961, + 0.2934, + 0.2937, + 0.2949, + 0.2969, + 0.2975, + 0.2933, + 0.2965, + 0.2925, + 0.299, + 0.2975, + 0.2993, + 0.2935, + 0.2975, + 0.2962, + 0.2959, + 0.2958, + 0.2974, + 0.2975, + 0.2967, + 0.2967, + 0.2967, + 0.2967, + 0.2966 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3420855402946472, + -0.0001374588318867609, + -0.0004025904054287821, + -0.0002873847261071205, + -0.0005695301806554198, + 0.00037305167643353343, + 0.0003001938748639077, + 6.27083791187033e-05 + ], + "perturbation_rho": [ + 0.05308223515748978, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.8510188460350037e-07, + 0.0, + 0.0, + 0.0, + -3.725290298461914e-09, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.0007061064243317e-06, + 0.0, + 0.0, + 0.0, + -3.725290298461914e-09, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.0365772545337677e-06, + 0.0, + 0.0, + 0.0, + -3.725290298461914e-09, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 53974.390625, + 1994035456.0, + 4991160320.0, + 6590162432.0, + 6723744768.0, + 9356510208.0, + 9466019840.0, + 10094161920.0, + 10161934336.0 + ], + "bp_grad_norms_per_layer": [ + 2.1625525903345988e-07, + 2.0878114515010537e-10, + 2.0495614927451555e-10, + 2.0494976549212396e-10, + 2.0494639318968666e-10, + 2.0494643482305008e-10, + 2.0495241614959525e-10, + 2.0495422026201027e-10, + 2.0496979113993063e-10 + ] + }, + "drift": { + "embed.weight": 347.8854750017766, + "embed.bias": 315.64569824736526, + "blocks.0.ln.weight": 10.235928115345473, + "blocks.0.w1.weight": 318.5495193049707, + "blocks.0.w1.bias": 347.2457558883921, + "blocks.0.w2.weight": 498.50725738614966, + "blocks.1.ln.weight": 9.559401727161022, + "blocks.1.w1.weight": 396.35172799806554, + "blocks.1.w1.bias": 380.45360123690216, + "blocks.1.w2.weight": 397.06562678988604, + "blocks.2.ln.weight": 9.795519840502836, + "blocks.2.w1.weight": 400.5282974464668, + "blocks.2.w1.bias": 367.0528578182135, + "blocks.2.w2.weight": 385.20621819830745, + "blocks.3.ln.weight": 7.79115393268457, + "blocks.3.w1.weight": 267.38195610990635, + "blocks.3.w1.bias": 241.5060314212707, + "blocks.3.w2.weight": 261.60447846418674, + "blocks.4.ln.weight": 10.663570278143073, + "blocks.4.w1.weight": 440.19027772109945, + "blocks.4.w1.bias": 406.52995649086876, + "blocks.4.w2.weight": 429.9566257006414, + "blocks.5.ln.weight": 7.336168242804289, + "blocks.5.w1.weight": 278.99456283486023, + "blocks.5.w1.bias": 254.26825064498672, + "blocks.5.w2.weight": 255.6469209546951, + "blocks.6.ln.weight": 8.774665158034248, + "blocks.6.w1.weight": 349.580580396389, + "blocks.6.w1.bias": 332.01322875711713, + "blocks.6.w2.weight": 335.2183411011385, + "blocks.7.ln.weight": 6.103809206945497, + "blocks.7.w1.weight": 223.73607116076752, + "blocks.7.w1.bias": 204.11858072638188, + "blocks.7.w2.weight": 212.2909923599191, + "out_ln.weight": 0.5939581817276044, + "out_head.weight": 9.736202389552009, + "out_head.bias": 0.7653619259023434 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0312406940460206, + 1.9362668811798096, + 1.8983942519378663, + 1.881630844039917, + 1.8686952627563476, + 1.8598585787200927, + 1.8500165731430054, + 1.8523509564208984, + 1.841215087814331, + 1.8375128533935547, + 1.82443065284729, + 1.82105125, + 1.8107502059555054, + 1.8082057354736327, + 1.8022028189468384, + 1.7978119366836547, + 1.7920289902496338, + 1.7873877236938476, + 1.7848837859725952, + 1.7817217052459717, + 1.7746633721542358, + 1.7798608938598632, + 1.7749403537750243, + 1.7722899240112304, + 1.7705043783187866, + 1.765470712814331, + 1.7667155236434937, + 1.7605387261199952, + 1.7600090964126587, + 1.7570182471466065, + 1.7548303343963623, + 1.753013416786194, + 1.7530071124267579, + 1.752319090499878, + 1.7505883599090577, + 1.7475732799530028, + 1.7479963735961914, + 1.7440779452514648, + 1.7415211089706422, + 1.7423380712127685, + 1.7422383227157592, + 1.73590397315979, + 1.7367827083587646, + 1.7335861996078492, + 1.732941851272583, + 1.726208869934082, + 1.7261467092895508, + 1.731659492225647, + 1.7264601552963257, + 1.7286788833618163, + 1.722340958251953, + 1.7210906581497192, + 1.7243633469390869, + 1.7231188234710693, + 1.7203610498809814, + 1.7219875589752198, + 1.719448472518921, + 1.7194024643707275, + 1.7188453897857665, + 1.715792247581482, + 1.7124444417572022, + 1.71695018119812, + 1.7176160364532471, + 1.7146047933578492, + 1.7122148095321654, + 1.7136172246551513, + 1.7148900774765015, + 1.7101783513641358, + 1.7065966332244873, + 1.7109612771606446, + 1.7086562002182006, + 1.708388620033264, + 1.7043865182113647, + 1.7084569076919556, + 1.7085095125961303, + 1.7060216832733155, + 1.7013830905532836, + 1.706795428390503, + 1.7063351504898072, + 1.70022035030365, + 1.7059846701049806, + 1.706136099281311, + 1.7015950707626344, + 1.7021336114120484, + 1.6970344146347045, + 1.7002056908416747, + 1.6996512441635132, + 1.701994123878479, + 1.6985623202896118, + 1.7015460013580321, + 1.7006773757171632, + 1.7014044579696654, + 1.7009879675674437, + 1.699486920852661, + 1.6986795735931397, + 1.6978497576904297, + 1.7015439191436768, + 1.699127428817749, + 1.6978760889434814, + 1.69874500705719 + ], + "train_acc": [ + 0.25366, + 0.295, + 0.31024, + 0.322, + 0.3275, + 0.3335, + 0.33654, + 0.33282, + 0.33834, + 0.3379, + 0.34392, + 0.3437, + 0.34912, + 0.35138, + 0.35342, + 0.3525, + 0.35808, + 0.36144, + 0.36196, + 0.36088, + 0.3609, + 0.3617, + 0.36304, + 0.36386, + 0.36488, + 0.36634, + 0.3662, + 0.37146, + 0.36906, + 0.37174, + 0.37052, + 0.37144, + 0.37146, + 0.37096, + 0.37404, + 0.37546, + 0.37712, + 0.37372, + 0.37698, + 0.37738, + 0.37506, + 0.38058, + 0.379, + 0.37758, + 0.38042, + 0.38246, + 0.38102, + 0.38084, + 0.38438, + 0.38092, + 0.38366, + 0.38522, + 0.38544, + 0.38474, + 0.38626, + 0.385, + 0.38362, + 0.38682, + 0.38782, + 0.3882, + 0.38862, + 0.38608, + 0.38642, + 0.3857, + 0.38758, + 0.38714, + 0.38588, + 0.39092, + 0.38846, + 0.39068, + 0.38904, + 0.39098, + 0.39194, + 0.39138, + 0.38832, + 0.39186, + 0.39308, + 0.3901, + 0.39034, + 0.3932, + 0.39182, + 0.39044, + 0.39078, + 0.3919, + 0.39418, + 0.39302, + 0.39488, + 0.39178, + 0.39388, + 0.39272, + 0.39092, + 0.39096, + 0.3937, + 0.39176, + 0.39522, + 0.39246, + 0.39252, + 0.39446, + 0.3936, + 0.39362 + ], + "test_acc": [ + 0.2876, + 0.3293, + 0.3436, + 0.3566, + 0.3569, + 0.3491, + 0.3632, + 0.36, + 0.3748, + 0.3697, + 0.3686, + 0.3785, + 0.3636, + 0.3798, + 0.3817, + 0.3661, + 0.3711, + 0.386, + 0.3761, + 0.3774, + 0.3908, + 0.3855, + 0.3821, + 0.3861, + 0.3921, + 0.398, + 0.3973, + 0.3814, + 0.3875, + 0.3951, + 0.3951, + 0.3873, + 0.399, + 0.3912, + 0.4011, + 0.3901, + 0.4079, + 0.3961, + 0.397, + 0.4004, + 0.4022, + 0.4, + 0.4042, + 0.3988, + 0.4114, + 0.4031, + 0.4057, + 0.4077, + 0.4027, + 0.4028, + 0.4083, + 0.4001, + 0.4068, + 0.411, + 0.4046, + 0.4106, + 0.4098, + 0.4054, + 0.4117, + 0.4084, + 0.407, + 0.4054, + 0.4116, + 0.4124, + 0.4119, + 0.412, + 0.4066, + 0.4118, + 0.4119, + 0.4107, + 0.413, + 0.4056, + 0.4103, + 0.4076, + 0.4136, + 0.4117, + 0.4165, + 0.4112, + 0.4129, + 0.4125, + 0.413, + 0.4136, + 0.4145, + 0.4119, + 0.4112, + 0.4123, + 0.4126, + 0.4137, + 0.4137, + 0.4146, + 0.4117, + 0.4117, + 0.4125, + 0.4122, + 0.4119, + 0.4125, + 0.4126, + 0.4123, + 0.4122, + 0.412 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.018279677256941795, + 0.05121004581451416, + -0.0035886913537979126, + -0.08221495151519775, + -0.07729659974575043, + -0.03265474736690521, + -0.04502299427986145, + 0.9937314391136169 + ], + "perturbation_rho": [ + -0.0045250579714775085, + -0.0025405026972293854, + 0.02932029776275158, + -0.04425422102212906, + -0.024078164249658585, + 0.036674171686172485, + 0.03741011023521423, + -0.027221273630857468 + ], + "nudging": { + "0.001": [ + -1.8192222341895103e-06, + -1.5320256352424622e-07, + -4.225876182317734e-08, + 1.200241968035698e-07, + 7.438939064741135e-08, + 2.7241185307502747e-08, + 2.759043127298355e-08, + -1.1578667908906937e-06 + ], + "0.003": [ + -5.577923730015755e-06, + -8.606584742665291e-07, + -8.975621312856674e-08, + 4.239846020936966e-07, + 4.441244527697563e-07, + 1.2828968465328217e-07, + 2.665910869836807e-07, + -4.686298780143261e-06 + ], + "0.01": [ + -1.855997834354639e-05, + -2.620276063680649e-06, + -2.4156179279088974e-07, + 1.257285475730896e-06, + 1.3473909348249435e-06, + 5.852198228240013e-07, + 7.433118298649788e-07, + -1.6578123904764652e-05 + ] + }, + "hidden_norms_per_layer": [ + 4948.6533203125, + 63535.5390625, + 485750.46875, + 729670.4375, + 1132140.625, + 1303753.625, + 1400111.125, + 1525175.75, + 735722.25 + ], + "bp_grad_norms_per_layer": [ + 3.38389327225741e-05, + 2.3188813429442234e-06, + 7.230223104670586e-07, + 6.714369078508753e-07, + 6.712992899338133e-07, + 6.754108312634344e-07, + 6.760963060514769e-07, + 6.705485020574997e-07, + 6.541473567267531e-07 + ] + }, + "drift": { + "embed.weight": 38.30467980586698, + "embed.bias": 18.474777878278143, + "blocks.0.ln.weight": 1.0140204865718874, + "blocks.0.w1.weight": 14.619711688794071, + "blocks.0.w1.bias": 11.460165338986966, + "blocks.0.w2.weight": 49.811132056190225, + "blocks.1.ln.weight": 0.943966438295369, + "blocks.1.w1.weight": 19.03349845703188, + "blocks.1.w1.bias": 16.732407132612977, + "blocks.1.w2.weight": 43.9070524360131, + "blocks.2.ln.weight": 0.6544196492657218, + "blocks.2.w1.weight": 18.12360528225708, + "blocks.2.w1.bias": 17.453784588368148, + "blocks.2.w2.weight": 43.32806943352846, + "blocks.3.ln.weight": 0.5531295594988335, + "blocks.3.w1.weight": 18.884523077230778, + "blocks.3.w1.bias": 20.07589187473831, + "blocks.3.w2.weight": 35.34492100476457, + "blocks.4.ln.weight": 0.5269231122615889, + "blocks.4.w1.weight": 17.103043932812323, + "blocks.4.w1.bias": 18.102481284342456, + "blocks.4.w2.weight": 42.32555419027007, + "blocks.5.ln.weight": 0.5628789686632275, + "blocks.5.w1.weight": 16.33193833700236, + "blocks.5.w1.bias": 15.909636919669264, + "blocks.5.w2.weight": 48.27717558625265, + "blocks.6.ln.weight": 0.6467844103525138, + "blocks.6.w1.weight": 16.86172393719663, + "blocks.6.w1.bias": 16.921984405373085, + "blocks.6.w2.weight": 56.44865850476068, + "blocks.7.ln.weight": 0.6477260974049089, + "blocks.7.w1.weight": 19.61181966070048, + "blocks.7.w1.bias": 20.268823478099563, + "blocks.7.w2.weight": 50.05420170873082, + "out_ln.weight": 0.28931782627119323, + "out_head.weight": 5.596138162617972, + "out_head.bias": 1.6180902727109185 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 2 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L8_seed2", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L8_seed3/results_cifar10.json b/results/fa_dfa_d512_L8_seed3/results_cifar10.json new file mode 100644 index 0000000..4a81c5e --- /dev/null +++ b/results/fa_dfa_d512_L8_seed3/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "3": { + "dfa": { + "log": { + "train_loss": [ + 2.067654705734253, + 2.0439451234436037, + 2.0388121842193603, + 2.0374467311859132, + 2.032723134689331, + 2.024335552711487, + 2.0244412200927733, + 2.020215712852478, + 2.02228947303772, + 2.0196216506958007, + 2.018127802886963, + 2.0154445655822752, + 2.014929994125366, + 2.0130414514160155, + 2.010970745010376, + 2.009714426345825, + 2.008820608253479, + 2.0076065615081786, + 2.0063576023864744, + 2.0080780532455442, + 2.0033639809799193, + 2.0052446017456056, + 2.003892343444824, + 2.0035709592437745, + 2.004005387611389, + 2.003134113922119, + 2.002949758758545, + 2.003510911102295, + 2.0014034066009523, + 2.0013524534606932, + 2.001380777282715, + 2.0003092810058596, + 2.001631733970642, + 1.9991252640533448, + 2.001369930343628, + 2.0026023275375366, + 1.9995970624542236, + 2.001532023696899, + 1.9995446050643921, + 1.9996646937561036, + 2.0011661471176145, + 2.001093216934204, + 1.9989905255126954, + 1.9983071016693115, + 2.0015053617095946, + 2.0005502851867676, + 1.9974795049667358, + 2.001641445236206, + 1.9987062954711914, + 1.9984554448699952, + 2.000685492324829, + 1.9992525644683838, + 2.0004551610565184, + 1.9981906829452514, + 1.998261117515564, + 1.9979981622314453, + 1.999091849937439, + 1.999358012046814, + 1.9977104736709594, + 1.998026010055542, + 1.9991155099487306, + 1.998898390197754, + 1.997283667640686, + 1.9974989557266236, + 1.9966122649765015, + 1.99706481754303, + 1.9956817004394531, + 1.9957520567321778, + 1.9964876544570922, + 1.9971850046539306, + 1.9973575018310548, + 1.9963464212417603, + 1.995423618736267, + 1.9970853455352784, + 1.9958270948028565, + 1.997421721458435, + 1.9960898080825806, + 1.996168349533081, + 1.9963192137145995, + 1.997360442123413, + 1.9975355083465576, + 1.996066244506836, + 1.9953448879241944, + 1.9958293856430054, + 1.9962540533828734, + 1.9953726630401611, + 1.9940088095092774, + 1.9959244760894774, + 1.9942969249725342, + 1.9947067263793945, + 1.9935712906646728, + 1.994222310256958, + 1.9953250341796875, + 1.995568331222534, + 1.9945212261199952, + 1.9952520877075195, + 1.9954078897857666, + 1.9954364456176759, + 1.9938959969329835, + 1.9937770639038086 + ], + "train_acc": [ + 0.23262, + 0.24192, + 0.24632, + 0.24536, + 0.2464, + 0.2547, + 0.25628, + 0.25992, + 0.25686, + 0.25908, + 0.25972, + 0.2614, + 0.26032, + 0.26244, + 0.26444, + 0.26376, + 0.26774, + 0.26756, + 0.26628, + 0.2646, + 0.26774, + 0.26684, + 0.26744, + 0.2703, + 0.26722, + 0.2705, + 0.26978, + 0.26872, + 0.2695, + 0.27244, + 0.27246, + 0.27274, + 0.26828, + 0.27612, + 0.27204, + 0.27338, + 0.27352, + 0.26948, + 0.27384, + 0.27384, + 0.27366, + 0.27284, + 0.27418, + 0.27444, + 0.2734, + 0.27474, + 0.27514, + 0.27438, + 0.274, + 0.27478, + 0.2748, + 0.27494, + 0.27578, + 0.27282, + 0.27458, + 0.27628, + 0.27448, + 0.27492, + 0.27672, + 0.27708, + 0.2768, + 0.27398, + 0.27716, + 0.27682, + 0.27596, + 0.27722, + 0.2767, + 0.27738, + 0.27778, + 0.27676, + 0.27824, + 0.27798, + 0.27824, + 0.27748, + 0.27828, + 0.2766, + 0.2783, + 0.27724, + 0.27768, + 0.27614, + 0.27838, + 0.27748, + 0.27688, + 0.27912, + 0.27784, + 0.27692, + 0.27876, + 0.27918, + 0.27702, + 0.27866, + 0.2807, + 0.27914, + 0.27646, + 0.27638, + 0.27878, + 0.27844, + 0.27988, + 0.27484, + 0.27874, + 0.27812 + ], + "test_acc": [ + 0.2578, + 0.2681, + 0.2679, + 0.2839, + 0.2546, + 0.2725, + 0.2785, + 0.2856, + 0.2934, + 0.2638, + 0.2995, + 0.288, + 0.2773, + 0.2846, + 0.3003, + 0.2936, + 0.2826, + 0.2985, + 0.2836, + 0.2804, + 0.282, + 0.2777, + 0.2854, + 0.2789, + 0.3056, + 0.296, + 0.2922, + 0.2934, + 0.2854, + 0.2952, + 0.3047, + 0.2955, + 0.2902, + 0.2858, + 0.3016, + 0.2931, + 0.2948, + 0.2922, + 0.2924, + 0.2932, + 0.2975, + 0.2989, + 0.2968, + 0.3084, + 0.295, + 0.2818, + 0.2943, + 0.2957, + 0.2895, + 0.2846, + 0.2928, + 0.2932, + 0.2923, + 0.2923, + 0.2948, + 0.2839, + 0.2885, + 0.2986, + 0.2955, + 0.292, + 0.3058, + 0.2945, + 0.302, + 0.2895, + 0.294, + 0.2968, + 0.293, + 0.3034, + 0.2963, + 0.2881, + 0.295, + 0.2915, + 0.2951, + 0.2916, + 0.3019, + 0.2971, + 0.2965, + 0.2974, + 0.297, + 0.2973, + 0.2983, + 0.3006, + 0.2984, + 0.2948, + 0.2929, + 0.2974, + 0.2957, + 0.2979, + 0.2947, + 0.2944, + 0.2955, + 0.2957, + 0.2938, + 0.296, + 0.2973, + 0.2969, + 0.2962, + 0.2965, + 0.2967, + 0.2967 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3848226070404053, + 0.0005948130274191499, + 0.0008828549180179834, + -0.0005318043986335397, + -0.0011384200770407915, + 0.001081241061910987, + -0.0007273855153471231, + -9.393676009494811e-05 + ], + "perturbation_rho": [ + 0.02956267260015011, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -4.009343683719635e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.071486622095108e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.686174750328064e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 53208.375, + 1418977024.0, + 1839794304.0, + 3366365440.0, + 5876559872.0, + 6415245312.0, + 6484437504.0, + 6990735872.0, + 8058666496.0 + ], + "bp_grad_norms_per_layer": [ + 2.507335352675e-07, + 3.0165914211011113e-10, + 3.015434568709452e-10, + 3.022892214321615e-10, + 3.0200991707474145e-10, + 3.020122762986688e-10, + 3.0200916767419983e-10, + 3.020158290123476e-10, + 3.0227947922512044e-10 + ] + }, + "drift": { + "embed.weight": 327.65976028345034, + "embed.bias": 222.60798330036593, + "blocks.0.ln.weight": 9.50619149859276, + "blocks.0.w1.weight": 302.2719637669162, + "blocks.0.w1.bias": 270.6809804281448, + "blocks.0.w2.weight": 471.5962929911042, + "blocks.1.ln.weight": 7.339419282166665, + "blocks.1.w1.weight": 236.07033236306975, + "blocks.1.w1.bias": 214.61371141440935, + "blocks.1.w2.weight": 257.6350959469773, + "blocks.2.ln.weight": 8.814285893710872, + "blocks.2.w1.weight": 346.78316046198574, + "blocks.2.w1.bias": 305.8043449728941, + "blocks.2.w2.weight": 337.46416422582774, + "blocks.3.ln.weight": 9.745238986543606, + "blocks.3.w1.weight": 405.8018654296332, + "blocks.3.w1.bias": 374.1566493279044, + "blocks.3.w2.weight": 402.68490503996316, + "blocks.4.ln.weight": 8.109738257108088, + "blocks.4.w1.weight": 332.27298936873524, + "blocks.4.w1.bias": 309.4368090711017, + "blocks.4.w2.weight": 317.97600700647837, + "blocks.5.ln.weight": 6.209367170959338, + "blocks.5.w1.weight": 230.00050402864986, + "blocks.5.w1.bias": 215.5148975730586, + "blocks.5.w2.weight": 220.6587410113886, + "blocks.6.ln.weight": 8.869002619258092, + "blocks.6.w1.weight": 341.5561743855726, + "blocks.6.w1.bias": 310.4841079276583, + "blocks.6.w2.weight": 307.6831168444481, + "blocks.7.ln.weight": 10.348632836289921, + "blocks.7.w1.weight": 389.83507647770097, + "blocks.7.w1.bias": 362.80530475665813, + "blocks.7.w2.weight": 372.02137428373555, + "out_ln.weight": 0.6700943575231241, + "out_head.weight": 9.265882212393693, + "out_head.bias": 0.5155313240128788 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0319221758270265, + 1.9485809626770019, + 1.9236068560409545, + 1.9108332887268067, + 1.8946271410751343, + 1.8748885510635376, + 1.8679874765777589, + 1.8528113153839112, + 1.854252926864624, + 1.8431586810302734, + 1.835074407119751, + 1.830763704071045, + 1.8243982820892335, + 1.8189660712051392, + 1.8128780780792237, + 1.8087021814346314, + 1.804141354446411, + 1.8017899124526977, + 1.7928460983657837, + 1.7929035697174072, + 1.7876431453704833, + 1.7875543393325806, + 1.7795426641845704, + 1.775789865951538, + 1.7698374728012085, + 1.768277823562622, + 1.7634769374847412, + 1.7641023599624635, + 1.757076796989441, + 1.7488349988174439, + 1.7501668548965454, + 1.7444925582122803, + 1.7474665740585327, + 1.7382987225723268, + 1.734984835205078, + 1.7357169647979735, + 1.7307214682388306, + 1.7250416897201537, + 1.7257672270965576, + 1.7227689398574828, + 1.7207815472030639, + 1.7202168838882446, + 1.7142020806503295, + 1.714960380783081, + 1.71727346408844, + 1.712963445739746, + 1.7071009867095948, + 1.7091050637435914, + 1.7093897729492187, + 1.7002552864837646, + 1.7015939654922485, + 1.7008016619873048, + 1.6995257330703735, + 1.697141215133667, + 1.6948078282928467, + 1.6939659241485596, + 1.69270690864563, + 1.6911025055313111, + 1.6915674340820313, + 1.6882078439712525, + 1.6849368871307373, + 1.6877099935913087, + 1.6871322372817994, + 1.683017247543335, + 1.6802406833648682, + 1.6818945336532594, + 1.6764708988189698, + 1.6813461883163452, + 1.679410824356079, + 1.6801742096328736, + 1.6759367601776123, + 1.6751640189361572, + 1.6755147110366821, + 1.6753325751113892, + 1.669490467529297, + 1.6727212616348266, + 1.6710585062026977, + 1.6708744039535524, + 1.6716560121536255, + 1.6683519116210936, + 1.669690319480896, + 1.6661786761474608, + 1.6632559734725951, + 1.6698375412750244, + 1.6648968856430053, + 1.6646921353912354, + 1.6623535321044922, + 1.6634717670440673, + 1.662731397628784, + 1.6652179148101807, + 1.664047002029419, + 1.6637420905303955, + 1.6616315328979492, + 1.6613892125320435, + 1.6618764827728272, + 1.6608539768218995, + 1.662214938583374, + 1.6588589643859863, + 1.6624627486419679, + 1.6634692792129517 + ], + "train_acc": [ + 0.25286, + 0.28656, + 0.29892, + 0.30622, + 0.31524, + 0.32466, + 0.33028, + 0.33336, + 0.33322, + 0.33718, + 0.34028, + 0.34314, + 0.3488, + 0.34888, + 0.3513, + 0.35072, + 0.35386, + 0.35666, + 0.35802, + 0.35736, + 0.36152, + 0.36052, + 0.36226, + 0.36666, + 0.3689, + 0.3657, + 0.36826, + 0.36686, + 0.36968, + 0.37314, + 0.37168, + 0.37526, + 0.3725, + 0.37586, + 0.37854, + 0.38008, + 0.38192, + 0.38086, + 0.38108, + 0.38154, + 0.38472, + 0.38546, + 0.38504, + 0.3864, + 0.3857, + 0.3858, + 0.38726, + 0.38836, + 0.38902, + 0.39314, + 0.39324, + 0.39162, + 0.39194, + 0.3928, + 0.39618, + 0.39266, + 0.39604, + 0.39298, + 0.39408, + 0.39738, + 0.3988, + 0.39496, + 0.39628, + 0.39638, + 0.3968, + 0.40014, + 0.40092, + 0.3965, + 0.39798, + 0.39902, + 0.40112, + 0.40126, + 0.39922, + 0.40196, + 0.40252, + 0.40146, + 0.40286, + 0.4025, + 0.40382, + 0.40394, + 0.40502, + 0.40518, + 0.40502, + 0.4029, + 0.40602, + 0.40654, + 0.40788, + 0.40838, + 0.40608, + 0.40626, + 0.40682, + 0.4054, + 0.40548, + 0.40818, + 0.4063, + 0.40556, + 0.40728, + 0.40878, + 0.41036, + 0.40504 + ], + "test_acc": [ + 0.3107, + 0.3197, + 0.3396, + 0.3501, + 0.3445, + 0.3533, + 0.3594, + 0.3665, + 0.3638, + 0.3697, + 0.3705, + 0.3731, + 0.3731, + 0.3779, + 0.3879, + 0.3797, + 0.3855, + 0.3895, + 0.389, + 0.3836, + 0.3898, + 0.3899, + 0.3957, + 0.3975, + 0.3903, + 0.3904, + 0.3925, + 0.4026, + 0.3948, + 0.404, + 0.407, + 0.4052, + 0.4016, + 0.4038, + 0.4077, + 0.4055, + 0.4139, + 0.4125, + 0.4071, + 0.41, + 0.4157, + 0.4116, + 0.4057, + 0.4093, + 0.4107, + 0.4121, + 0.4178, + 0.4151, + 0.4102, + 0.415, + 0.4171, + 0.4152, + 0.4107, + 0.4169, + 0.4165, + 0.4128, + 0.4211, + 0.4239, + 0.419, + 0.4169, + 0.4175, + 0.4204, + 0.4172, + 0.42, + 0.4223, + 0.4193, + 0.4249, + 0.4239, + 0.4247, + 0.4237, + 0.4241, + 0.4249, + 0.4246, + 0.4236, + 0.4222, + 0.4256, + 0.4236, + 0.4254, + 0.4218, + 0.4219, + 0.4253, + 0.4236, + 0.4242, + 0.4266, + 0.4265, + 0.4244, + 0.4236, + 0.4244, + 0.4257, + 0.426, + 0.4278, + 0.4252, + 0.4256, + 0.4257, + 0.425, + 0.4251, + 0.4257, + 0.4253, + 0.4252, + 0.4251 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.011332567781209946, + 0.06513819098472595, + 0.0006735082715749741, + 0.01789894700050354, + -0.04844595864415169, + -0.0789085328578949, + -0.08352568745613098, + 0.9912235736846924 + ], + "perturbation_rho": [ + -0.019055387005209923, + 0.000641997903585434, + 0.007458926644176245, + -0.0017425119876861572, + 0.01768876612186432, + -0.035874560475349426, + -0.009836452081799507, + -0.0037038950249552727 + ], + "nudging": { + "0.001": [ + 3.5937409847974777e-07, + -8.093193173408508e-07, + 5.8673322200775146e-08, + -9.988434612751007e-08, + 1.5122350305318832e-07, + 2.3562461137771606e-07, + 1.7601996660232544e-07, + -2.587912604212761e-06 + ], + "0.003": [ + 4.564644768834114e-07, + -2.398388460278511e-06, + -8.882489055395126e-08, + -2.555316314101219e-07, + 4.91039827466011e-07, + 6.976770237088203e-07, + 7.48317688703537e-07, + -8.602859452366829e-06 + ], + "0.01": [ + 2.159271389245987e-06, + -7.903669029474258e-06, + -2.2118911147117615e-09, + -8.208444342017174e-07, + 1.3509998098015785e-06, + 2.2408785298466682e-06, + 2.4311011657118797e-06, + -2.9218033887445927e-05 + ] + }, + "hidden_norms_per_layer": [ + 4438.09326171875, + 51045.953125, + 224884.15625, + 281807.5625, + 313976.5, + 377254.8125, + 619965.6875, + 803209.875, + 395948.40625 + ], + "bp_grad_norms_per_layer": [ + 5.1603383326437324e-05, + 4.058777449245099e-06, + 1.2677732001975528e-06, + 1.2549784287330112e-06, + 1.1549120699783089e-06, + 1.1174195151397726e-06, + 1.1277491012151586e-06, + 1.1274379403403145e-06, + 1.1047595762647688e-06 + ] + }, + "drift": { + "embed.weight": 35.94724430826174, + "embed.bias": 12.93641303808505, + "blocks.0.ln.weight": 1.0316107269800443, + "blocks.0.w1.weight": 14.343462705354373, + "blocks.0.w1.bias": 10.48509752557022, + "blocks.0.w2.weight": 50.82449203404063, + "blocks.1.ln.weight": 0.8954057658407036, + "blocks.1.w1.weight": 17.084567030271085, + "blocks.1.w1.bias": 8.321442582132649, + "blocks.1.w2.weight": 43.15792770243562, + "blocks.2.ln.weight": 0.50963325244791, + "blocks.2.w1.weight": 13.992497779476563, + "blocks.2.w1.bias": 8.388377688031984, + "blocks.2.w2.weight": 36.31652372933758, + "blocks.3.ln.weight": 0.5126350353439263, + "blocks.3.w1.weight": 13.376631353463463, + "blocks.3.w1.bias": 7.3308712021855715, + "blocks.3.w2.weight": 38.8692971792842, + "blocks.4.ln.weight": 0.44132703400905127, + "blocks.4.w1.weight": 13.832310366505041, + "blocks.4.w1.bias": 8.752010456823355, + "blocks.4.w2.weight": 29.67447860418571, + "blocks.5.ln.weight": 0.44899228147891274, + "blocks.5.w1.weight": 15.279703613622448, + "blocks.5.w1.bias": 14.678313736857362, + "blocks.5.w2.weight": 23.989192930697847, + "blocks.6.ln.weight": 0.48252983596530574, + "blocks.6.w1.weight": 15.592232386492284, + "blocks.6.w1.bias": 15.403926884582539, + "blocks.6.w2.weight": 26.0662939832218, + "blocks.7.ln.weight": 0.5873549927115453, + "blocks.7.w1.weight": 16.643576121850806, + "blocks.7.w1.bias": 15.88707616632278, + "blocks.7.w2.weight": 36.93738990528072, + "out_ln.weight": 0.2817508020334675, + "out_head.weight": 5.01870628445436, + "out_head.bias": 1.5909890644945488 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 3 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L8_seed3", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L8_seed4/results_cifar10.json b/results/fa_dfa_d512_L8_seed4/results_cifar10.json new file mode 100644 index 0000000..335a670 --- /dev/null +++ b/results/fa_dfa_d512_L8_seed4/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "4": { + "dfa": { + "log": { + "train_loss": [ + 2.0558988859558105, + 2.0445796598815917, + 2.039279856033325, + 2.038156519508362, + 2.036371453819275, + 2.035036249847412, + 2.031745447311401, + 2.0292039811706544, + 2.0318929217529296, + 2.0292139208984374, + 2.0309901475524903, + 2.0294140184020995, + 2.024980134963989, + 2.026833506088257, + 2.0269724795532227, + 2.0259553061676026, + 2.025906521835327, + 2.0246393172454833, + 2.0217932791137696, + 2.024892644882202, + 2.0215970348739623, + 2.022969856185913, + 2.0212113690567017, + 2.018885112991333, + 2.019870955734253, + 2.017834800796509, + 2.0190272177124022, + 2.0189630345916747, + 2.01638854927063, + 2.017822699356079, + 2.019127551727295, + 2.0166009580230715, + 2.016471000518799, + 2.0156495739746094, + 2.0154129775238037, + 2.017227215194702, + 2.015604530029297, + 2.018137004623413, + 2.013566918640137, + 2.0139937634277345, + 2.014001354904175, + 2.014924657058716, + 2.01404189201355, + 2.012948843307495, + 2.0132822931671144, + 2.0135159986114504, + 2.0149466477966307, + 2.0134463012313843, + 2.0127882065200806, + 2.011855206069946, + 2.0118354721450804, + 2.009852321395874, + 2.0144452475357055, + 2.0108168384170533, + 2.0125812700653074, + 2.009986862640381, + 2.0114155954360964, + 2.010974427947998, + 2.012690294647217, + 2.0099948442077635, + 2.0100676802062987, + 2.0103798513031004, + 2.009976401939392, + 2.009263822669983, + 2.0103307348251342, + 2.0066765419006347, + 2.0081610918426516, + 2.009749242553711, + 2.009436781463623, + 2.008263199005127, + 2.008898328781128, + 2.0088115994262696, + 2.0076247560882567, + 2.008243568115234, + 2.009206538734436, + 2.0073812493896486, + 2.0084627660369874, + 2.0082589547729492, + 2.006824525222778, + 2.006816029586792, + 2.005350517425537, + 2.005616167564392, + 2.0048646172714233, + 2.005117626724243, + 2.007857433204651, + 2.006735478591919, + 2.0058193558502198, + 2.0068686953353883, + 2.0066705658721924, + 2.0060114856719973, + 2.0083387131118773, + 2.0070924770736696, + 2.006274801864624, + 2.005065950050354, + 2.007637509765625, + 2.0068550647354124, + 2.0040754894256594, + 2.0044231857681276, + 2.00634932926178, + 2.004043010559082 + ], + "train_acc": [ + 0.2432, + 0.24314, + 0.24398, + 0.244, + 0.2482, + 0.24582, + 0.24994, + 0.25054, + 0.24894, + 0.24966, + 0.2472, + 0.2524, + 0.25424, + 0.25128, + 0.25402, + 0.2517, + 0.25458, + 0.25468, + 0.2556, + 0.25626, + 0.25828, + 0.25636, + 0.2575, + 0.25626, + 0.2598, + 0.25736, + 0.25858, + 0.25966, + 0.261, + 0.26086, + 0.25872, + 0.26222, + 0.25918, + 0.26136, + 0.2628, + 0.26148, + 0.26234, + 0.26032, + 0.26366, + 0.26014, + 0.26316, + 0.2627, + 0.26332, + 0.26396, + 0.2624, + 0.2651, + 0.26332, + 0.26546, + 0.26388, + 0.2643, + 0.26664, + 0.26656, + 0.26452, + 0.26586, + 0.26372, + 0.26434, + 0.26688, + 0.265, + 0.26536, + 0.2676, + 0.26708, + 0.26556, + 0.26724, + 0.26668, + 0.26684, + 0.26658, + 0.26898, + 0.26802, + 0.26746, + 0.26814, + 0.2674, + 0.2696, + 0.26804, + 0.26742, + 0.26854, + 0.2678, + 0.26776, + 0.26772, + 0.26896, + 0.26856, + 0.2709, + 0.26856, + 0.27094, + 0.27032, + 0.26868, + 0.26798, + 0.27036, + 0.26862, + 0.2704, + 0.26882, + 0.26632, + 0.2691, + 0.26788, + 0.26786, + 0.26864, + 0.26766, + 0.27096, + 0.27096, + 0.26832, + 0.27034 + ], + "test_acc": [ + 0.255, + 0.282, + 0.2597, + 0.2682, + 0.2706, + 0.2653, + 0.268, + 0.2831, + 0.2608, + 0.2611, + 0.2809, + 0.2728, + 0.2628, + 0.2678, + 0.2917, + 0.2633, + 0.2587, + 0.2789, + 0.2667, + 0.255, + 0.2702, + 0.2763, + 0.2721, + 0.2728, + 0.2836, + 0.2812, + 0.2586, + 0.2753, + 0.2767, + 0.2765, + 0.2893, + 0.2684, + 0.2739, + 0.2781, + 0.283, + 0.2702, + 0.2792, + 0.2779, + 0.2757, + 0.2829, + 0.2798, + 0.2777, + 0.2673, + 0.2797, + 0.2958, + 0.272, + 0.2749, + 0.2875, + 0.2807, + 0.2796, + 0.2828, + 0.2825, + 0.2827, + 0.2782, + 0.2898, + 0.2783, + 0.2754, + 0.2926, + 0.2676, + 0.2797, + 0.2879, + 0.2839, + 0.2887, + 0.2792, + 0.2699, + 0.2925, + 0.2741, + 0.2804, + 0.2827, + 0.282, + 0.2908, + 0.2818, + 0.2801, + 0.2873, + 0.2889, + 0.2898, + 0.2934, + 0.2874, + 0.2894, + 0.2882, + 0.2751, + 0.2786, + 0.2871, + 0.2813, + 0.2875, + 0.2837, + 0.2827, + 0.2853, + 0.2816, + 0.2853, + 0.2848, + 0.2829, + 0.2862, + 0.2857, + 0.2852, + 0.2855, + 0.2859, + 0.2859, + 0.2859, + 0.2861 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.348124623298645, + 0.0005955962114967406, + 0.0003339378454256803, + -0.00018519387231208384, + -0.00039015739457681775, + -6.0401107475627214e-05, + 0.0005056928494013846, + -0.00041366269579157233 + ], + "perturbation_rho": [ + 0.023487141355872154, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -2.4354085326194763e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0, + 0.0 + ], + "0.003": [ + -9.727664291858673e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0, + 0.0 + ], + "0.01": [ + -3.107357770204544e-06, + 0.0, + -1.862645149230957e-09, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 57046.83203125, + 1460526592.0, + 3493817088.0, + 5286084096.0, + 7466274304.0, + 9068557312.0, + 9149831168.0, + 9557196800.0, + 9628340224.0 + ], + "bp_grad_norms_per_layer": [ + 2.206747353739047e-07, + 1.568134094798168e-10, + 1.5691002663853482e-10, + 1.5639808892409235e-10, + 1.5647411144570356e-10, + 1.564954832389276e-10, + 1.564743612458841e-10, + 1.564741808346426e-10, + 1.5647404205676452e-10 + ] + }, + "drift": { + "embed.weight": 345.4826524750452, + "embed.bias": 268.77599398354755, + "blocks.0.ln.weight": 9.693802971740622, + "blocks.0.w1.weight": 305.69328663379747, + "blocks.0.w1.bias": 282.72127878032245, + "blocks.0.w2.weight": 506.67910364132115, + "blocks.1.ln.weight": 9.022706446436455, + "blocks.1.w1.weight": 344.52276292946954, + "blocks.1.w1.bias": 336.8078507211997, + "blocks.1.w2.weight": 344.5230830295154, + "blocks.2.ln.weight": 9.205345940074093, + "blocks.2.w1.weight": 390.2517774750357, + "blocks.2.w1.bias": 365.62094868403284, + "blocks.2.w2.weight": 372.1071143869873, + "blocks.3.ln.weight": 10.058504715768814, + "blocks.3.w1.weight": 409.96586790482206, + "blocks.3.w1.bias": 389.26772963997206, + "blocks.3.w2.weight": 398.4000450175067, + "blocks.4.ln.weight": 10.413518753619888, + "blocks.4.w1.weight": 429.20878622659194, + "blocks.4.w1.bias": 400.403481558048, + "blocks.4.w2.weight": 399.0461533995044, + "blocks.5.ln.weight": 7.53017659614122, + "blocks.5.w1.weight": 303.5211766091988, + "blocks.5.w1.bias": 290.0904456815074, + "blocks.5.w2.weight": 267.46908289070075, + "blocks.6.ln.weight": 8.989001105343268, + "blocks.6.w1.weight": 361.1863333213492, + "blocks.6.w1.bias": 343.25957937988704, + "blocks.6.w2.weight": 317.54530185898403, + "blocks.7.ln.weight": 7.256623314136568, + "blocks.7.w1.weight": 264.5076613280686, + "blocks.7.w1.bias": 244.7481419757745, + "blocks.7.w2.weight": 244.11940761827694, + "out_ln.weight": 0.5537612143282766, + "out_head.weight": 8.260097616924732, + "out_head.bias": 0.594355583013062 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0499230930328367, + 1.9647031881713868, + 1.9228250009536743, + 1.8920866240692138, + 1.8762959116363525, + 1.8783360265731812, + 1.8759148642349244, + 1.8639714459609986, + 1.859312229270935, + 1.8467648499298095, + 1.8394229892730714, + 1.825405124130249, + 1.820375383529663, + 1.8109127722549438, + 1.8068262866210938, + 1.7986448593521118, + 1.7920247109603882, + 1.7862463021087647, + 1.7800769235610963, + 1.7746711435317992, + 1.7699551953125, + 1.7670180697250366, + 1.7607733086395263, + 1.7561056157684327, + 1.752474511489868, + 1.7487609728240967, + 1.7504161080551148, + 1.7488452796173095, + 1.744940209083557, + 1.7447595501327515, + 1.7444546957015992, + 1.7363371981811524, + 1.7366404415893555, + 1.7359863372802735, + 1.733716072654724, + 1.7326603939056398, + 1.7307178776550294, + 1.7296201685333252, + 1.7284672827911376, + 1.7325348129272462, + 1.7272048714828492, + 1.7266432265472411, + 1.7268222411727905, + 1.7249765619659423, + 1.7266766318130493, + 1.7234420058441162, + 1.7232857625579834, + 1.7222659621429444, + 1.7246627559280396, + 1.7202154187011718, + 1.721605923423767, + 1.7191662756729127, + 1.7233360834503173, + 1.7196103755950927, + 1.721537714920044, + 1.7234055492401124, + 1.7208966232681275, + 1.719355527381897, + 1.716101904335022, + 1.7131741651153565, + 1.716256374435425, + 1.7143639738082885, + 1.7126432967376708, + 1.7113130987167358, + 1.7133349226760863, + 1.7085220684051514, + 1.7085635126495362, + 1.7085992751312256, + 1.7081326258087157, + 1.7065643463897706, + 1.7067539226913453, + 1.7028010097503663, + 1.7049776620864867, + 1.7025865216445923, + 1.7059548104095459, + 1.7012168072128295, + 1.7034333710479737, + 1.700839725112915, + 1.7046385177612304, + 1.6998081121063233, + 1.6977149985122681, + 1.6993804161834716, + 1.696179700050354, + 1.696841403427124, + 1.6989550568771363, + 1.6974260115814208, + 1.691993281288147, + 1.6956287409210204, + 1.6960039244842529, + 1.693833966407776, + 1.7002782623672485, + 1.6935484520721436, + 1.6925058234405517, + 1.693256030807495, + 1.6941557180786133, + 1.6950180599594116, + 1.697810530014038, + 1.6920377853393556, + 1.694213403968811, + 1.6934792825317382 + ], + "train_acc": [ + 0.2421, + 0.2836, + 0.30398, + 0.31868, + 0.32722, + 0.32272, + 0.32508, + 0.33034, + 0.33142, + 0.33714, + 0.33564, + 0.34428, + 0.34574, + 0.35108, + 0.35066, + 0.35318, + 0.35484, + 0.35766, + 0.36236, + 0.36052, + 0.36336, + 0.3664, + 0.36704, + 0.36724, + 0.37118, + 0.37224, + 0.3698, + 0.36986, + 0.37486, + 0.3753, + 0.37402, + 0.37556, + 0.37578, + 0.37696, + 0.37936, + 0.3799, + 0.37784, + 0.38368, + 0.38064, + 0.37848, + 0.38158, + 0.38068, + 0.38108, + 0.38166, + 0.37934, + 0.38318, + 0.38062, + 0.38266, + 0.38166, + 0.3839, + 0.38078, + 0.38148, + 0.38032, + 0.38514, + 0.38458, + 0.38238, + 0.38438, + 0.38428, + 0.3852, + 0.38752, + 0.38766, + 0.38474, + 0.38774, + 0.38698, + 0.38748, + 0.38616, + 0.38806, + 0.38998, + 0.3884, + 0.39208, + 0.39082, + 0.39204, + 0.38986, + 0.39288, + 0.39172, + 0.3936, + 0.39076, + 0.3933, + 0.39058, + 0.3929, + 0.39284, + 0.39224, + 0.3934, + 0.3943, + 0.39238, + 0.39528, + 0.3933, + 0.39514, + 0.39354, + 0.39286, + 0.39398, + 0.39674, + 0.39442, + 0.39522, + 0.3961, + 0.39508, + 0.39172, + 0.39722, + 0.3963, + 0.39622 + ], + "test_acc": [ + 0.298, + 0.3175, + 0.3253, + 0.3526, + 0.3529, + 0.3373, + 0.3593, + 0.3592, + 0.3528, + 0.3615, + 0.3745, + 0.3626, + 0.3649, + 0.3807, + 0.3926, + 0.3892, + 0.3738, + 0.3794, + 0.3869, + 0.3749, + 0.3867, + 0.3913, + 0.3851, + 0.3865, + 0.3911, + 0.3974, + 0.3817, + 0.3933, + 0.3944, + 0.3958, + 0.3981, + 0.3966, + 0.3975, + 0.4001, + 0.4023, + 0.4001, + 0.402, + 0.4049, + 0.4088, + 0.4076, + 0.4051, + 0.4027, + 0.3994, + 0.4035, + 0.4076, + 0.3913, + 0.4047, + 0.4061, + 0.4018, + 0.4118, + 0.4097, + 0.41, + 0.4085, + 0.401, + 0.4083, + 0.4014, + 0.405, + 0.4018, + 0.4114, + 0.4104, + 0.4074, + 0.4083, + 0.4103, + 0.4046, + 0.4011, + 0.4139, + 0.408, + 0.4094, + 0.4125, + 0.4088, + 0.414, + 0.4114, + 0.4124, + 0.4154, + 0.4121, + 0.4153, + 0.4115, + 0.4125, + 0.4144, + 0.4152, + 0.4117, + 0.411, + 0.414, + 0.4126, + 0.4137, + 0.4151, + 0.4134, + 0.4126, + 0.4132, + 0.4127, + 0.4138, + 0.4143, + 0.4148, + 0.414, + 0.4142, + 0.4145, + 0.4145, + 0.415, + 0.4144, + 0.4143 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.026830948889255524, + 0.10699465870857239, + 0.09640855342149734, + 0.016582896932959557, + -0.06015268713235855, + -0.09386980533599854, + 0.010263346135616302, + 0.9927012920379639 + ], + "perturbation_rho": [ + 0.02561907097697258, + -0.004561614245176315, + 0.008805938996374607, + -0.004548710770905018, + -0.06699962168931961, + -0.0668429285287857, + -0.03943357244133949, + 0.02623065561056137 + ], + "nudging": { + "0.001": [ + -2.3067113943398e-06, + -4.3620821088552475e-07, + -2.4866312742233276e-07, + 7.08969309926033e-08, + 1.3213139027357101e-07, + 1.5960540622472763e-07, + 4.423782229423523e-09, + -1.1902302503585815e-06 + ], + "0.003": [ + -7.16338399797678e-06, + -1.5725381672382355e-06, + -7.672933861613274e-07, + -1.3835960999131203e-07, + 2.973247319459915e-07, + 5.42961061000824e-07, + -2.5727786123752594e-08, + -4.3523614294826984e-06 + ], + "0.01": [ + -2.3563567083328962e-05, + -5.601265002042055e-06, + -2.8641661629080772e-06, + -5.615875124931335e-07, + 9.248033165931702e-07, + 1.622654963284731e-06, + -2.4889595806598663e-07, + -1.5358731616288424e-05 + ] + }, + "hidden_norms_per_layer": [ + 5353.1064453125, + 80361.6875, + 198367.984375, + 279507.6875, + 680688.0, + 1020542.375, + 1562414.75, + 1590694.75, + 789908.0 + ], + "bp_grad_norms_per_layer": [ + 3.106619624304585e-05, + 1.862773842731258e-06, + 9.139845928984869e-07, + 7.041780349936744e-07, + 6.676064572275209e-07, + 6.690797818009742e-07, + 6.687893119305954e-07, + 6.670686047982599e-07, + 6.49854484890966e-07 + ] + }, + "drift": { + "embed.weight": 40.22494125084113, + "embed.bias": 15.798132334624936, + "blocks.0.ln.weight": 1.2241361855735886, + "blocks.0.w1.weight": 16.34451235786821, + "blocks.0.w1.bias": 11.623359166710161, + "blocks.0.w2.weight": 59.40609750891924, + "blocks.1.ln.weight": 0.9394658291469571, + "blocks.1.w1.weight": 16.816482551922167, + "blocks.1.w1.bias": 7.284029885203612, + "blocks.1.w2.weight": 43.964019309740266, + "blocks.2.ln.weight": 0.7618290822194834, + "blocks.2.w1.weight": 15.551125367027769, + "blocks.2.w1.bias": 9.075794440045083, + "blocks.2.w2.weight": 44.423765980175624, + "blocks.3.ln.weight": 0.7978062227799843, + "blocks.3.w1.weight": 18.895887551107087, + "blocks.3.w1.bias": 16.322155802924105, + "blocks.3.w2.weight": 40.59294466107463, + "blocks.4.ln.weight": 0.7028791271353807, + "blocks.4.w1.weight": 18.831247940509048, + "blocks.4.w1.bias": 19.397899365978574, + "blocks.4.w2.weight": 33.77994253855422, + "blocks.5.ln.weight": 0.5882412988040494, + "blocks.5.w1.weight": 20.16928056506279, + "blocks.5.w1.bias": 23.309656656430743, + "blocks.5.w2.weight": 30.02848330872152, + "blocks.6.ln.weight": 0.6458882840441386, + "blocks.6.w1.weight": 15.641543003808316, + "blocks.6.w1.bias": 12.528164451431063, + "blocks.6.w2.weight": 65.83396405007723, + "blocks.7.ln.weight": 0.8531338247599758, + "blocks.7.w1.weight": 19.29027360202624, + "blocks.7.w1.bias": 18.640476186308685, + "blocks.7.w2.weight": 55.45516056132436, + "out_ln.weight": 0.3127678274218022, + "out_head.weight": 5.776369546198508, + "out_head.bias": 1.402401683707018 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 4 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L8_seed4", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L8_seed5/results_cifar10.json b/results/fa_dfa_d512_L8_seed5/results_cifar10.json new file mode 100644 index 0000000..e2ec1f0 --- /dev/null +++ b/results/fa_dfa_d512_L8_seed5/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "5": { + "dfa": { + "log": { + "train_loss": [ + 2.0787187157440186, + 2.046413578338623, + 2.043807250671387, + 2.0408004566955564, + 2.034887484970093, + 2.03150455116272, + 2.0333121584320066, + 2.030504135971069, + 2.0268674066925048, + 2.026082299346924, + 2.0260079474639894, + 2.025023748931885, + 2.0206153302001955, + 2.019373860206604, + 2.019724616012573, + 2.0159671471405027, + 2.0133701262664796, + 2.013524337539673, + 2.01570996799469, + 2.0106682904815676, + 2.011510214996338, + 2.0109367053222655, + 2.0096939754486085, + 2.0094474443054198, + 2.0071240294647215, + 2.009241544647217, + 2.0069837924957277, + 2.006896128387451, + 2.0078313034439086, + 2.0056955588531493, + 2.0059358129119875, + 2.005650292816162, + 2.0021781530380247, + 2.0034559716033935, + 2.0014139038848877, + 2.0027021182250975, + 2.002708815307617, + 2.0024726174926757, + 2.0010244245910647, + 2.0033527210235595, + 2.0017753661727906, + 2.0007596390151976, + 2.0027179621124267, + 2.000149852218628, + 2.000393838119507, + 1.9997536290740967, + 1.998189719390869, + 2.0013531698226927, + 1.9993105652236938, + 2.002620627365112, + 2.000056958694458, + 1.998213128967285, + 1.9998394576263427, + 1.9988756994628907, + 2.000797734146118, + 1.9982664052581787, + 1.9998580239105224, + 1.9990567274475097, + 2.0013695433807372, + 2.000919705581665, + 2.00011759311676, + 1.998085468597412, + 1.9998952405548096, + 1.9984590853881836, + 2.0001076091766357, + 1.9984168599700927, + 1.998167473297119, + 1.9996371339416503, + 1.998458631324768, + 1.9980697260284423, + 1.99891614402771, + 1.9961408318328857, + 1.9967646357727051, + 1.9982722332000733, + 1.9978370711517335, + 1.9981338930892945, + 1.9954986195373534, + 1.998370800704956, + 1.9966814602661134, + 1.9975791915893555, + 1.9963215632629394, + 1.9960799507141114, + 1.9947424390029906, + 1.995765002822876, + 1.9970874053955079, + 1.9967461769866943, + 1.9952786428070068, + 1.9961291760253905, + 1.9962149816894532, + 1.9956222714614869, + 1.9963563278579712, + 1.9963637481307983, + 1.995376283531189, + 1.9958238674545288, + 1.996031088180542, + 1.9979531326675415, + 1.9962067671966552, + 1.9966400229644776, + 1.9965521046447754, + 1.9952978050994874 + ], + "train_acc": [ + 0.2307, + 0.2439, + 0.241, + 0.24388, + 0.24862, + 0.25128, + 0.2482, + 0.25152, + 0.25226, + 0.25402, + 0.25412, + 0.25508, + 0.25796, + 0.25802, + 0.25836, + 0.26098, + 0.26092, + 0.2638, + 0.2603, + 0.2651, + 0.26208, + 0.26628, + 0.2672, + 0.26436, + 0.26628, + 0.264, + 0.26676, + 0.26936, + 0.26734, + 0.26656, + 0.268, + 0.27012, + 0.26954, + 0.2697, + 0.2733, + 0.27156, + 0.2711, + 0.27136, + 0.27432, + 0.27026, + 0.2729, + 0.27246, + 0.27116, + 0.27356, + 0.27474, + 0.27462, + 0.27356, + 0.27106, + 0.27586, + 0.2728, + 0.2735, + 0.27494, + 0.27414, + 0.27566, + 0.2745, + 0.275, + 0.27516, + 0.27464, + 0.27214, + 0.27562, + 0.27386, + 0.27616, + 0.27478, + 0.27452, + 0.27438, + 0.27868, + 0.27604, + 0.27778, + 0.2761, + 0.2758, + 0.27738, + 0.27662, + 0.27798, + 0.2747, + 0.27758, + 0.27598, + 0.27646, + 0.27686, + 0.27664, + 0.2775, + 0.27674, + 0.27976, + 0.27856, + 0.27662, + 0.27848, + 0.2782, + 0.27792, + 0.27528, + 0.28002, + 0.27594, + 0.27606, + 0.27738, + 0.27828, + 0.2771, + 0.27888, + 0.2776, + 0.27728, + 0.27756, + 0.27762, + 0.27778 + ], + "test_acc": [ + 0.2557, + 0.2621, + 0.2511, + 0.2468, + 0.2652, + 0.2808, + 0.268, + 0.268, + 0.2707, + 0.2646, + 0.2731, + 0.2756, + 0.2675, + 0.2771, + 0.2753, + 0.2594, + 0.2718, + 0.2876, + 0.2822, + 0.29, + 0.2898, + 0.2714, + 0.2849, + 0.2855, + 0.2765, + 0.281, + 0.2888, + 0.2819, + 0.2876, + 0.2929, + 0.278, + 0.2912, + 0.2877, + 0.2921, + 0.2827, + 0.2961, + 0.2852, + 0.2869, + 0.2852, + 0.2944, + 0.2755, + 0.2905, + 0.2862, + 0.2873, + 0.2769, + 0.2929, + 0.2913, + 0.2949, + 0.291, + 0.2811, + 0.2879, + 0.2854, + 0.282, + 0.2891, + 0.2937, + 0.2909, + 0.2957, + 0.289, + 0.292, + 0.2901, + 0.2975, + 0.2944, + 0.2887, + 0.2984, + 0.2938, + 0.295, + 0.2949, + 0.2936, + 0.2911, + 0.2947, + 0.2942, + 0.2841, + 0.2989, + 0.2916, + 0.2949, + 0.2871, + 0.2863, + 0.2936, + 0.2936, + 0.294, + 0.2902, + 0.2943, + 0.2913, + 0.2937, + 0.2942, + 0.294, + 0.2913, + 0.2949, + 0.2934, + 0.294, + 0.293, + 0.2946, + 0.2939, + 0.2929, + 0.2932, + 0.2933, + 0.2937, + 0.2937, + 0.2939, + 0.2938 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.38104039430618286, + 0.0003684713738039136, + -0.00036610430106520653, + 7.536964403698221e-06, + -7.538420322816819e-05, + 0.00021099840523675084, + -0.00022237180382944643, + -0.0001967815769603476 + ], + "perturbation_rho": [ + -0.022990737110376358, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.380700945854187e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.1343508958816528e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.5706907510757446e-06, + 0.0, + 1.862645149230957e-09, + 0.0, + 1.862645149230957e-09, + 0.0, + 1.862645149230957e-09, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 53305.52734375, + 1104618240.0, + 1890710528.0, + 2939089664.0, + 4109807360.0, + 7493927424.0, + 8776990720.0, + 9704909824.0, + 9853197312.0 + ], + "bp_grad_norms_per_layer": [ + 2.442081949993735e-07, + 1.9960764985338386e-10, + 1.943529920334086e-10, + 1.947161321069757e-10, + 1.9485994762202807e-10, + 1.949839456560909e-10, + 1.9488108349285937e-10, + 1.9481991020420253e-10, + 1.9481959101508295e-10 + ] + }, + "drift": { + "embed.weight": 332.9361658873903, + "embed.bias": 249.6745044186471, + "blocks.0.ln.weight": 9.964419461290774, + "blocks.0.w1.weight": 277.0099379887824, + "blocks.0.w1.bias": 243.60625234836664, + "blocks.0.w2.weight": 475.0412234341198, + "blocks.1.ln.weight": 7.805044859841917, + "blocks.1.w1.weight": 276.45375034840094, + "blocks.1.w1.bias": 248.51195524041702, + "blocks.1.w2.weight": 286.4144000014896, + "blocks.2.ln.weight": 8.343399040552265, + "blocks.2.w1.weight": 312.7393216527087, + "blocks.2.w1.bias": 278.64438111710933, + "blocks.2.w2.weight": 319.41728918519345, + "blocks.3.ln.weight": 8.2818066466087, + "blocks.3.w1.weight": 331.8178756864101, + "blocks.3.w1.bias": 324.7690510217321, + "blocks.3.w2.weight": 327.59347009101197, + "blocks.4.ln.weight": 10.593671245978031, + "blocks.4.w1.weight": 439.88191861683595, + "blocks.4.w1.bias": 415.51184102478516, + "blocks.4.w2.weight": 435.66114149915, + "blocks.5.ln.weight": 10.04837328756298, + "blocks.5.w1.weight": 400.6014717531882, + "blocks.5.w1.bias": 372.89187324310467, + "blocks.5.w2.weight": 389.61950905043466, + "blocks.6.ln.weight": 9.160507087287726, + "blocks.6.w1.weight": 366.7858107672568, + "blocks.6.w1.bias": 337.0123511781895, + "blocks.6.w2.weight": 345.65905679108874, + "blocks.7.ln.weight": 8.151913429022978, + "blocks.7.w1.weight": 320.48348360714385, + "blocks.7.w1.bias": 294.0973824112796, + "blocks.7.w2.weight": 295.11322720454183, + "out_ln.weight": 0.6460140923890967, + "out_head.weight": 9.185934457998872, + "out_head.bias": 0.6276268606859465 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.038202630004883, + 1.9484270775604249, + 1.9142503852081298, + 1.8978817923355102, + 1.8827772109603882, + 1.8714566381073, + 1.866606608543396, + 1.8608839841079712, + 1.851916226119995, + 1.8518026924133302, + 1.8441847302246093, + 1.8414982928466797, + 1.832841236038208, + 1.8305379946517943, + 1.824911393432617, + 1.8228179180908204, + 1.815796693687439, + 1.8136043480682373, + 1.8111609268569946, + 1.8098963827133179, + 1.8101768481063842, + 1.8083936654663086, + 1.806380335998535, + 1.8081299662017822, + 1.8065022414398193, + 1.8069096044158937, + 1.808437180557251, + 1.8035286350250244, + 1.803345340270996, + 1.7989226748657225, + 1.8019704568481445, + 1.799195940284729, + 1.7924841125106812, + 1.7943892532348633, + 1.7934645865249634, + 1.7921251168823242, + 1.789522707901001, + 1.7859628525161744, + 1.7830697146606445, + 1.7806333806991577, + 1.7807968478775025, + 1.7813195696258546, + 1.7742907537460326, + 1.7763463638687134, + 1.7789305100250243, + 1.7736491188812256, + 1.768717915878296, + 1.7715998685073853, + 1.7694499214935302, + 1.7765150137329102, + 1.7702046938323974, + 1.7672565545272827, + 1.7681742614746094, + 1.7645444506835937, + 1.7642444360351563, + 1.7655976934814452, + 1.7601742944717407, + 1.7602788580703734, + 1.7589675115585328, + 1.7637487964630127, + 1.7622764212036133, + 1.7553310860443114, + 1.7552539752197265, + 1.7542964382553101, + 1.7557507891464232, + 1.7533866836929322, + 1.752781385498047, + 1.7534792293930053, + 1.751649098548889, + 1.7528042163848876, + 1.7522574480438233, + 1.748868000869751, + 1.745579310951233, + 1.748803058128357, + 1.7481703802490234, + 1.7455506887817382, + 1.7438273191070557, + 1.7489568131256104, + 1.7463487658691406, + 1.7455516606903076, + 1.7449686437225342, + 1.7442902671051026, + 1.7408798983383178, + 1.7403113724136352, + 1.7415612424087525, + 1.7422372088623046, + 1.7399942685317993, + 1.7430598278045655, + 1.7437233737182618, + 1.7388530453872681, + 1.742972678489685, + 1.7391708444213867, + 1.7380083013153076, + 1.7414154996109008, + 1.7416090111541749, + 1.7426657040405273, + 1.7410819094085694, + 1.7388902407455444, + 1.7386211107635499, + 1.7390259966278077 + ], + "train_acc": [ + 0.24962, + 0.29246, + 0.30748, + 0.31596, + 0.32232, + 0.32566, + 0.32894, + 0.33218, + 0.33564, + 0.33338, + 0.33816, + 0.33802, + 0.34154, + 0.34408, + 0.3454, + 0.3464, + 0.35, + 0.35064, + 0.34942, + 0.35102, + 0.3499, + 0.35208, + 0.354, + 0.35186, + 0.35288, + 0.35062, + 0.35306, + 0.35098, + 0.35422, + 0.35166, + 0.35522, + 0.35382, + 0.35856, + 0.35798, + 0.35914, + 0.35776, + 0.36182, + 0.36158, + 0.36558, + 0.36348, + 0.36282, + 0.36244, + 0.36576, + 0.36514, + 0.3636, + 0.36724, + 0.36794, + 0.36594, + 0.36652, + 0.36342, + 0.36684, + 0.37162, + 0.36754, + 0.36882, + 0.36786, + 0.36904, + 0.36942, + 0.37216, + 0.37156, + 0.37028, + 0.37302, + 0.37194, + 0.37354, + 0.37204, + 0.37152, + 0.37402, + 0.3749, + 0.37556, + 0.37472, + 0.37472, + 0.3748, + 0.37612, + 0.37782, + 0.37806, + 0.37734, + 0.37744, + 0.37922, + 0.37866, + 0.37972, + 0.37874, + 0.37706, + 0.37788, + 0.37796, + 0.38026, + 0.37858, + 0.37822, + 0.37768, + 0.3786, + 0.37856, + 0.3791, + 0.37826, + 0.3811, + 0.38088, + 0.37906, + 0.3784, + 0.37828, + 0.37966, + 0.38174, + 0.38126, + 0.38014 + ], + "test_acc": [ + 0.3045, + 0.3393, + 0.3346, + 0.3359, + 0.3456, + 0.3582, + 0.3589, + 0.3566, + 0.3662, + 0.3668, + 0.3696, + 0.3757, + 0.3629, + 0.3738, + 0.3716, + 0.3674, + 0.3792, + 0.3837, + 0.3815, + 0.3839, + 0.3912, + 0.3753, + 0.3781, + 0.3804, + 0.3753, + 0.3771, + 0.3802, + 0.3792, + 0.3805, + 0.3876, + 0.3826, + 0.3862, + 0.3827, + 0.3927, + 0.382, + 0.393, + 0.3874, + 0.3935, + 0.381, + 0.3979, + 0.3786, + 0.3946, + 0.3908, + 0.3995, + 0.3955, + 0.3971, + 0.4012, + 0.399, + 0.3975, + 0.3922, + 0.3985, + 0.3901, + 0.3931, + 0.3976, + 0.3921, + 0.3985, + 0.3962, + 0.4034, + 0.3942, + 0.4, + 0.4027, + 0.4062, + 0.4005, + 0.4042, + 0.4061, + 0.4041, + 0.4029, + 0.4026, + 0.4045, + 0.4, + 0.4023, + 0.4018, + 0.4061, + 0.4049, + 0.4069, + 0.4042, + 0.4047, + 0.4007, + 0.4039, + 0.4028, + 0.4057, + 0.4022, + 0.4017, + 0.4046, + 0.4012, + 0.4026, + 0.4039, + 0.4032, + 0.4027, + 0.4051, + 0.4043, + 0.4052, + 0.4041, + 0.4048, + 0.4033, + 0.4037, + 0.4032, + 0.4021, + 0.4023, + 0.4025 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.014374179765582085, + 0.09000293165445328, + -0.03108237311244011, + -0.02239440567791462, + -0.02732698619365692, + -0.04657082259654999, + -0.06352561712265015, + 0.9887357950210571 + ], + "perturbation_rho": [ + 0.03167568892240524, + -0.015739459544420242, + 0.0075595797970891, + 0.03960222005844116, + 0.0035968590527772903, + 0.03037603199481964, + 0.029943909496068954, + 0.005924902856349945 + ], + "nudging": { + "0.001": [ + 5.2677933126688e-07, + -4.189787432551384e-07, + 9.185168892145157e-08, + -3.841705620288849e-09, + -3.655441105365753e-08, + 2.3981556296348572e-08, + 2.3981556296348572e-08, + -1.473352313041687e-06 + ], + "0.003": [ + 1.9101426005363464e-06, + -1.223525032401085e-06, + 1.5366822481155396e-07, + -2.537854015827179e-08, + 1.5622936189174652e-07, + 9.872019290924072e-08, + 1.771841198205948e-07, + -4.843459464609623e-06 + ], + "0.01": [ + 6.739639502484351e-06, + -3.795139491558075e-06, + 4.704343155026436e-07, + 2.0337756723165512e-07, + 3.941822797060013e-07, + 6.516929715871811e-07, + 1.0146759450435638e-06, + -1.683842856436968e-05 + ] + }, + "hidden_norms_per_layer": [ + 7750.18701171875, + 163374.5, + 1020835.0, + 1417354.375, + 1653246.5, + 1891510.125, + 2137834.5, + 2238698.25, + 1372594.75 + ], + "bp_grad_norms_per_layer": [ + 2.8991271392442286e-05, + 1.4679561672892305e-06, + 6.962879979255376e-07, + 6.957282039365964e-07, + 6.946868325030664e-07, + 6.940763341845013e-07, + 6.940714456504793e-07, + 6.966275805098121e-07, + 6.714612368341477e-07 + ] + }, + "drift": { + "embed.weight": 55.14922199302306, + "embed.bias": 15.427529434718334, + "blocks.0.ln.weight": 1.3139840801235205, + "blocks.0.w1.weight": 18.28738161809814, + "blocks.0.w1.bias": 13.519470867121564, + "blocks.0.w2.weight": 65.29381196065084, + "blocks.1.ln.weight": 1.2381725193607873, + "blocks.1.w1.weight": 24.224592130996264, + "blocks.1.w1.bias": 19.040373052112855, + "blocks.1.w2.weight": 51.77163849918677, + "blocks.2.ln.weight": 0.7724310378501466, + "blocks.2.w1.weight": 20.96874606199096, + "blocks.2.w1.bias": 19.420703681847986, + "blocks.2.w2.weight": 49.62414572233535, + "blocks.3.ln.weight": 0.6858974872301369, + "blocks.3.w1.weight": 19.410601791558882, + "blocks.3.w1.bias": 18.939055453950672, + "blocks.3.w2.weight": 45.97967492141307, + "blocks.4.ln.weight": 0.6186727990894257, + "blocks.4.w1.weight": 19.69072198327253, + "blocks.4.w1.bias": 19.749605410301402, + "blocks.4.w2.weight": 41.99820530820194, + "blocks.5.ln.weight": 0.6406844547162872, + "blocks.5.w1.weight": 20.510058050945528, + "blocks.5.w1.bias": 20.988299652643033, + "blocks.5.w2.weight": 34.25267498831366, + "blocks.6.ln.weight": 0.6310276000328071, + "blocks.6.w1.weight": 18.188089381701026, + "blocks.6.w1.bias": 17.13681902701148, + "blocks.6.w2.weight": 39.811714763649384, + "blocks.7.ln.weight": 0.7307433363934756, + "blocks.7.w1.weight": 19.56130269038455, + "blocks.7.w1.bias": 16.899275966635617, + "blocks.7.w2.weight": 58.89283532862689, + "out_ln.weight": 0.430748638467109, + "out_head.weight": 7.024186774080115, + "out_head.bias": 0.6948984479282404 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 5 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L8_seed5", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L8_seed6/results_cifar10.json b/results/fa_dfa_d512_L8_seed6/results_cifar10.json new file mode 100644 index 0000000..996693e --- /dev/null +++ b/results/fa_dfa_d512_L8_seed6/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "6": { + "dfa": { + "log": { + "train_loss": [ + 2.048823083343506, + 2.036137353439331, + 2.0293412883758544, + 2.0249873413085937, + 2.022372166824341, + 2.0224948783874512, + 2.0168244441223147, + 2.016312102279663, + 2.0174264653015137, + 2.0148667056655882, + 2.016306040496826, + 2.012973408279419, + 2.009305914993286, + 2.0127887310409545, + 2.010646263999939, + 2.0090836833953856, + 2.010158688583374, + 2.0119627042007444, + 2.0057619748306275, + 2.00755533531189, + 2.001894345550537, + 2.004210508117676, + 2.002892424316406, + 2.0056438186264036, + 2.006109398841858, + 2.0008294903945925, + 2.004116688156128, + 2.003745227279663, + 2.0009986640930175, + 2.0015462801361084, + 2.001444738845825, + 1.9975958602142334, + 1.9996052980804444, + 1.999814730834961, + 2.0010388219451904, + 1.9962302626800537, + 1.9987594539642335, + 1.9968312421417236, + 1.9977108895111084, + 1.997722573852539, + 1.9974460966491698, + 1.9960338827514648, + 1.997143808517456, + 1.9972000302505493, + 1.9973672123718262, + 1.9960454425811767, + 1.9950894018936158, + 1.9955249545669556, + 1.9957580101776122, + 1.9944090689086913, + 1.9936514922332764, + 1.99337042137146, + 1.9966205854034424, + 1.9951700267791748, + 1.995979843597412, + 1.9958187718200684, + 1.9929929906463624, + 1.996409147720337, + 1.992997929534912, + 1.9945113285827636, + 1.9946366765975951, + 1.9921776248550416, + 1.9923859210586548, + 1.9960651620864869, + 1.992535831222534, + 1.9948228299331665, + 1.992319468231201, + 1.991410069732666, + 1.990960274810791, + 1.9932797208023072, + 1.9914824683380128, + 1.9920508996963502, + 1.9927404278564453, + 1.9916280765533447, + 1.9920522421646119, + 1.9932328916931152, + 1.9915541756439208, + 1.992103966407776, + 1.9913361803817748, + 1.9909088207626342, + 1.9888611060333252, + 1.9917893444061279, + 1.990510139694214, + 1.9895336349105834, + 1.9925078125, + 1.9905930145263673, + 1.9911131750488282, + 1.9901078464126587, + 1.9898306075286865, + 1.9894448529052735, + 1.9916437425231934, + 1.9892843076324462, + 1.9908900201797486, + 1.991130443496704, + 1.9902202153778077, + 1.9898065716934203, + 1.9898762433242798, + 1.989882501220703, + 1.9897464168548584, + 1.991097922897339 + ], + "train_acc": [ + 0.24736, + 0.24926, + 0.25592, + 0.25554, + 0.25688, + 0.25748, + 0.25952, + 0.25872, + 0.26084, + 0.26068, + 0.25918, + 0.26338, + 0.2641, + 0.2628, + 0.2662, + 0.26518, + 0.26584, + 0.26602, + 0.26744, + 0.26454, + 0.2692, + 0.26846, + 0.27026, + 0.26784, + 0.26908, + 0.27298, + 0.27076, + 0.27126, + 0.26854, + 0.27136, + 0.27188, + 0.27314, + 0.27166, + 0.27166, + 0.27, + 0.27358, + 0.27228, + 0.27568, + 0.27252, + 0.27382, + 0.275, + 0.27676, + 0.27532, + 0.27448, + 0.2742, + 0.27754, + 0.27586, + 0.27378, + 0.27572, + 0.27606, + 0.27584, + 0.27508, + 0.2744, + 0.27512, + 0.27518, + 0.27536, + 0.2768, + 0.27628, + 0.2759, + 0.27418, + 0.27668, + 0.27618, + 0.27754, + 0.2755, + 0.2797, + 0.27622, + 0.27722, + 0.2781, + 0.27882, + 0.27704, + 0.27728, + 0.27598, + 0.27658, + 0.2793, + 0.2785, + 0.27512, + 0.27576, + 0.27754, + 0.2809, + 0.27864, + 0.27994, + 0.27934, + 0.278, + 0.2789, + 0.27944, + 0.2777, + 0.2785, + 0.28082, + 0.27722, + 0.27896, + 0.27776, + 0.27826, + 0.27702, + 0.2777, + 0.28018, + 0.27868, + 0.27942, + 0.277, + 0.2778, + 0.2796 + ], + "test_acc": [ + 0.2851, + 0.2882, + 0.2834, + 0.2969, + 0.2682, + 0.2744, + 0.2751, + 0.2761, + 0.2883, + 0.2869, + 0.2764, + 0.2839, + 0.2682, + 0.2743, + 0.2888, + 0.2735, + 0.2847, + 0.2852, + 0.2974, + 0.2908, + 0.2957, + 0.3032, + 0.2908, + 0.272, + 0.2959, + 0.2858, + 0.2995, + 0.2807, + 0.292, + 0.302, + 0.2824, + 0.2802, + 0.2916, + 0.2913, + 0.3048, + 0.2908, + 0.2934, + 0.284, + 0.3083, + 0.302, + 0.3039, + 0.2868, + 0.3047, + 0.2997, + 0.2914, + 0.2968, + 0.2848, + 0.3035, + 0.2934, + 0.2966, + 0.2973, + 0.2946, + 0.2953, + 0.296, + 0.2932, + 0.2959, + 0.2864, + 0.2976, + 0.293, + 0.2961, + 0.2957, + 0.3012, + 0.2948, + 0.2926, + 0.2969, + 0.3043, + 0.3005, + 0.2957, + 0.2956, + 0.2994, + 0.2919, + 0.2945, + 0.2915, + 0.2903, + 0.2934, + 0.2967, + 0.2953, + 0.3, + 0.3006, + 0.2939, + 0.2995, + 0.2965, + 0.292, + 0.294, + 0.3006, + 0.3013, + 0.2978, + 0.2961, + 0.3011, + 0.2982, + 0.2984, + 0.2974, + 0.2977, + 0.297, + 0.2967, + 0.2972, + 0.2972, + 0.2962, + 0.2961, + 0.296 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.385863333940506, + 0.0001165914727607742, + 0.00017788054537959397, + 0.00018135752179659903, + -0.00028566765831783414, + -0.0002498100802768022, + 0.0006119838217273355, + -0.00013276470417622477 + ], + "perturbation_rho": [ + -0.01099667139351368, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.4226104617118835e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.0388903319835663e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.7979334592819214e-06, + 0.0, + -9.313225746154785e-10, + 0.0, + 3.725290298461914e-09, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 54199.1640625, + 1216095872.0, + 2414873600.0, + 3109203712.0, + 3571607296.0, + 6284516352.0, + 6465921536.0, + 6651301376.0, + 8724734976.0 + ], + "bp_grad_norms_per_layer": [ + 2.6299613864466664e-07, + 2.240632540617682e-10, + 2.239412544291497e-10, + 2.2397983467925542e-10, + 2.240085755778054e-10, + 2.2390612974820812e-10, + 2.2391909160202061e-10, + 2.2392485088396086e-10, + 2.239693430716727e-10 + ] + }, + "drift": { + "embed.weight": 328.7167633164963, + "embed.bias": 238.81345295557173, + "blocks.0.ln.weight": 10.101694109463628, + "blocks.0.w1.weight": 287.42152472298335, + "blocks.0.w1.bias": 258.1218430687453, + "blocks.0.w2.weight": 482.380198439405, + "blocks.1.ln.weight": 8.527375095072756, + "blocks.1.w1.weight": 281.10785120372356, + "blocks.1.w1.bias": 271.84962810099614, + "blocks.1.w2.weight": 308.13809976512954, + "blocks.2.ln.weight": 8.093445971068885, + "blocks.2.w1.weight": 291.96840313708185, + "blocks.2.w1.bias": 267.27308802490285, + "blocks.2.w2.weight": 296.34890032183836, + "blocks.3.ln.weight": 7.507548795725433, + "blocks.3.w1.weight": 290.43783218773393, + "blocks.3.w1.bias": 271.71149018462404, + "blocks.3.w2.weight": 280.50007531299553, + "blocks.4.ln.weight": 10.4676649794159, + "blocks.4.w1.weight": 425.7246219342384, + "blocks.4.w1.bias": 395.9832771459978, + "blocks.4.w2.weight": 387.2972211221749, + "blocks.5.ln.weight": 7.145103425417736, + "blocks.5.w1.weight": 272.3772890298663, + "blocks.5.w1.bias": 261.31341956376576, + "blocks.5.w2.weight": 248.44511726184933, + "blocks.6.ln.weight": 7.2864592372954435, + "blocks.6.w1.weight": 277.1704677507615, + "blocks.6.w1.bias": 265.20680207190264, + "blocks.6.w2.weight": 260.3666734377331, + "blocks.7.ln.weight": 10.55594745569165, + "blocks.7.w1.weight": 426.2374140444505, + "blocks.7.w1.bias": 401.49349715227146, + "blocks.7.w2.weight": 419.0556854355582, + "out_ln.weight": 0.5880689181906897, + "out_head.weight": 8.642576606816835, + "out_head.bias": 0.6087240911810886 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0266723140716554, + 1.9453410259246826, + 1.9245470600128174, + 1.9017182848739624, + 1.889633105506897, + 1.8787115466308595, + 1.869741219520569, + 1.859507017288208, + 1.8565170881652833, + 1.8455627252197266, + 1.8404858825683594, + 1.8341104727172852, + 1.8273077993774414, + 1.821364828453064, + 1.8166592828369141, + 1.8075398138046264, + 1.8040798845672608, + 1.7960231017303467, + 1.785357122154236, + 1.7876761278915405, + 1.77504403591156, + 1.7683619609832764, + 1.769164297103882, + 1.7614449658966065, + 1.7590879248428344, + 1.7571340868759155, + 1.7514765051651, + 1.7532490229034423, + 1.7487156938934325, + 1.7469101626205443, + 1.7441981435394287, + 1.7379362894058228, + 1.7393075399017335, + 1.7378688484954834, + 1.7407434060668945, + 1.7369408278656007, + 1.7355171717453004, + 1.7350507015228271, + 1.733909783859253, + 1.7398296880722046, + 1.7312470238876343, + 1.7290490047836304, + 1.7353802160644531, + 1.7334120412445069, + 1.7291723877334595, + 1.726144882774353, + 1.7297367659759522, + 1.72646170627594, + 1.724361591567993, + 1.7226831272125245, + 1.7208573818969726, + 1.7199689587402345, + 1.7188016274261475, + 1.7211026276016235, + 1.7158281423568726, + 1.7192893993759155, + 1.711656453590393, + 1.717024673728943, + 1.710442032470703, + 1.7105829236602783, + 1.711408941345215, + 1.706308694000244, + 1.7078606945419312, + 1.7072623968887328, + 1.703292763900757, + 1.7017784185028075, + 1.7011818041992188, + 1.7006354034805298, + 1.6977454791259765, + 1.6999914080429077, + 1.6981370053863525, + 1.699045623397827, + 1.7020124697494508, + 1.6962461737823487, + 1.6977318247222901, + 1.696797931213379, + 1.697660752220154, + 1.695864578933716, + 1.6946413637542725, + 1.6933105139923095, + 1.6918804026031493, + 1.69379876953125, + 1.692444557876587, + 1.692386579284668, + 1.6933874633026123, + 1.6912192764282226, + 1.6896385999298096, + 1.6908740300750733, + 1.6846979732894898, + 1.6878035149765014, + 1.6874957360839844, + 1.6844608339691163, + 1.687544917602539, + 1.6878573336791993, + 1.6888155934906006, + 1.6884653591537475, + 1.684945572128296, + 1.6854769310760498, + 1.689350791053772, + 1.684672553062439 + ], + "train_acc": [ + 0.25516, + 0.2934, + 0.30626, + 0.31242, + 0.32024, + 0.32414, + 0.32588, + 0.32934, + 0.33358, + 0.3364, + 0.33836, + 0.34152, + 0.34374, + 0.34868, + 0.34688, + 0.35322, + 0.35536, + 0.35502, + 0.3615, + 0.35864, + 0.365, + 0.36708, + 0.36492, + 0.36656, + 0.36836, + 0.36936, + 0.3728, + 0.37342, + 0.37252, + 0.37256, + 0.37424, + 0.3751, + 0.37706, + 0.3752, + 0.37348, + 0.37624, + 0.37684, + 0.37782, + 0.37856, + 0.3766, + 0.37936, + 0.38126, + 0.37778, + 0.37912, + 0.38126, + 0.38394, + 0.37856, + 0.38026, + 0.38072, + 0.38172, + 0.3842, + 0.38356, + 0.38258, + 0.38382, + 0.38378, + 0.38544, + 0.38808, + 0.38518, + 0.3869, + 0.386, + 0.3856, + 0.38972, + 0.38756, + 0.38892, + 0.39094, + 0.38878, + 0.39068, + 0.3887, + 0.39396, + 0.39364, + 0.39368, + 0.39064, + 0.39046, + 0.39298, + 0.39274, + 0.39592, + 0.39298, + 0.39386, + 0.39488, + 0.39222, + 0.39268, + 0.39366, + 0.39478, + 0.39506, + 0.39528, + 0.39594, + 0.39594, + 0.39416, + 0.39786, + 0.39686, + 0.39512, + 0.39482, + 0.39516, + 0.3969, + 0.39614, + 0.39604, + 0.39616, + 0.3983, + 0.39634, + 0.39874 + ], + "test_acc": [ + 0.3093, + 0.3342, + 0.3399, + 0.3542, + 0.3538, + 0.3515, + 0.3467, + 0.3593, + 0.3629, + 0.3586, + 0.3658, + 0.3651, + 0.3634, + 0.368, + 0.3662, + 0.3772, + 0.3706, + 0.3806, + 0.3767, + 0.3884, + 0.3928, + 0.3913, + 0.3908, + 0.3845, + 0.394, + 0.3977, + 0.3963, + 0.3919, + 0.3927, + 0.4001, + 0.3845, + 0.3968, + 0.3923, + 0.3972, + 0.4001, + 0.3927, + 0.3949, + 0.392, + 0.4012, + 0.3949, + 0.4069, + 0.3964, + 0.4031, + 0.3987, + 0.3974, + 0.4014, + 0.3977, + 0.4103, + 0.3991, + 0.3969, + 0.3986, + 0.4054, + 0.4046, + 0.4081, + 0.4036, + 0.4082, + 0.4012, + 0.4036, + 0.4027, + 0.4102, + 0.4079, + 0.4038, + 0.4044, + 0.4118, + 0.4118, + 0.4102, + 0.4106, + 0.4039, + 0.4082, + 0.4046, + 0.4096, + 0.4129, + 0.408, + 0.4041, + 0.4099, + 0.4103, + 0.4081, + 0.4101, + 0.4125, + 0.4133, + 0.4071, + 0.4109, + 0.4092, + 0.412, + 0.4117, + 0.4117, + 0.4081, + 0.412, + 0.412, + 0.41, + 0.4116, + 0.4107, + 0.409, + 0.4105, + 0.4105, + 0.4117, + 0.411, + 0.4107, + 0.4109, + 0.4109 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.036382660269737244, + 0.07720986753702164, + 0.015243301168084145, + -0.03982359915971756, + -0.08008211106061935, + -0.07353264093399048, + 0.004661812447011471, + 0.9986914992332458 + ], + "perturbation_rho": [ + 0.046086542308330536, + -0.012133870273828506, + 0.0135452039539814, + 0.027485966682434082, + -0.0412953719496727, + -0.01517622172832489, + 0.015256978571414948, + -0.014813247136771679 + ], + "nudging": { + "0.001": [ + -2.9372749850153923e-06, + -4.57162968814373e-07, + 5.78584149479866e-08, + 1.1955853551626205e-07, + 9.022187441587448e-08, + 7.35744833946228e-08, + 2.5029294192790985e-08, + -1.23586505651474e-06 + ], + "0.003": [ + -9.18388832360506e-06, + -1.2764940038323402e-06, + -1.8684659153223038e-07, + 2.1711457520723343e-07, + 3.7939753383398056e-07, + 3.0547380447387695e-07, + 1.57160684466362e-08, + -4.404224455356598e-06 + ], + "0.01": [ + -3.0463445000350475e-05, + -4.363246262073517e-06, + -3.3760443329811096e-07, + 6.594927981495857e-07, + 1.332256942987442e-06, + 1.2486707419157028e-06, + -1.0861549526453018e-07, + -1.546763814985752e-05 + ] + }, + "hidden_norms_per_layer": [ + 6046.84130859375, + 74194.140625, + 477900.9375, + 689443.0, + 1004220.0625, + 1523434.625, + 1680752.625, + 1682025.375, + 898568.625 + ], + "bp_grad_norms_per_layer": [ + 3.200672654202208e-05, + 2.357817038500798e-06, + 7.007323006291699e-07, + 6.70125302804081e-07, + 6.57040743590187e-07, + 6.679950388388534e-07, + 6.679492230432515e-07, + 6.673712391602749e-07, + 6.578414968316793e-07 + ] + }, + "drift": { + "embed.weight": 41.37337372663241, + "embed.bias": 21.739351895524546, + "blocks.0.ln.weight": 1.096869475338319, + "blocks.0.w1.weight": 14.853427419399033, + "blocks.0.w1.bias": 14.626173478331832, + "blocks.0.w2.weight": 52.544426520863894, + "blocks.1.ln.weight": 0.989631116445591, + "blocks.1.w1.weight": 18.73171250459504, + "blocks.1.w1.bias": 13.329767291528706, + "blocks.1.w2.weight": 51.37681586378203, + "blocks.2.ln.weight": 0.7265939790865649, + "blocks.2.w1.weight": 18.824924434649834, + "blocks.2.w1.bias": 13.21966047090143, + "blocks.2.w2.weight": 36.1324487703447, + "blocks.3.ln.weight": 0.6455349248601757, + "blocks.3.w1.weight": 19.270974488434813, + "blocks.3.w1.bias": 16.976194451895758, + "blocks.3.w2.weight": 28.417177242878225, + "blocks.4.ln.weight": 0.6212274460619375, + "blocks.4.w1.weight": 20.86703490367653, + "blocks.4.w1.bias": 22.972850990124385, + "blocks.4.w2.weight": 29.740214683313905, + "blocks.5.ln.weight": 0.6222289394933932, + "blocks.5.w1.weight": 18.57775599009712, + "blocks.5.w1.bias": 20.08003721644836, + "blocks.5.w2.weight": 40.53068019154897, + "blocks.6.ln.weight": 0.5581383332084382, + "blocks.6.w1.weight": 15.461885552051882, + "blocks.6.w1.bias": 11.684922950989124, + "blocks.6.w2.weight": 53.18350858546716, + "blocks.7.ln.weight": 0.6558470787555182, + "blocks.7.w1.weight": 20.23429444317466, + "blocks.7.w1.bias": 21.51332878594272, + "blocks.7.w2.weight": 38.38263927504239, + "out_ln.weight": 0.3257959664285173, + "out_head.weight": 5.900526651995236, + "out_head.bias": 0.9300218587303252 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 6 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L8_seed6", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L8_seed7/results_cifar10.json b/results/fa_dfa_d512_L8_seed7/results_cifar10.json new file mode 100644 index 0000000..1afcf83 --- /dev/null +++ b/results/fa_dfa_d512_L8_seed7/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "7": { + "dfa": { + "log": { + "train_loss": [ + 2.0667303594207764, + 2.0324611769866943, + 2.0354494356536867, + 2.029560486984253, + 2.0285463153076173, + 2.0250388471984864, + 2.024455110168457, + 2.019448572616577, + 2.016967566986084, + 2.015476321334839, + 2.0111602995300295, + 2.011849951324463, + 2.009353925704956, + 2.0070058068084715, + 2.008019479827881, + 2.004458062477112, + 2.0052689391326903, + 1.9994013024520874, + 1.9999373209381104, + 1.998676916770935, + 1.9997672403717042, + 2.0009046767807006, + 1.9968020938491822, + 1.995622604598999, + 1.996247008934021, + 1.9960846052551269, + 1.994126517906189, + 1.9923845544433594, + 1.993994869995117, + 1.9906352404403687, + 1.993604411315918, + 1.9903970510864257, + 1.9910701220321656, + 1.9923721322250367, + 1.9914608909606935, + 1.990251951751709, + 1.9879735006332397, + 1.990091123123169, + 1.989167085533142, + 1.9870023511505126, + 1.9876113708496095, + 1.987236508255005, + 1.9868223078918457, + 1.986799183616638, + 1.9872047930145265, + 1.9893735236358643, + 1.985566644592285, + 1.9861162490081787, + 1.9881098473739625, + 1.9854161669540404, + 1.9856359400939942, + 1.9845690382385255, + 1.98623692237854, + 1.9848303674316405, + 1.985502159729004, + 1.9834946334075927, + 1.9829965356063843, + 1.982283455657959, + 1.984253911705017, + 1.9829701402282716, + 1.9815960638046264, + 1.9816721053314208, + 1.9825859942245483, + 1.980166824951172, + 1.9817002365493774, + 1.9804359143447876, + 1.9821267053604126, + 1.9813222342681884, + 1.980442075805664, + 1.9812367477416992, + 1.9801365872192382, + 1.980766887664795, + 1.979991530380249, + 1.9813088734436035, + 1.9798352686309815, + 1.979272435836792, + 1.9787259462738036, + 1.9776743465805053, + 1.9783772805404662, + 1.9771632527160645, + 1.9797238985824586, + 1.9785139586639404, + 1.978402091369629, + 1.9799914836883545, + 1.9797544343566895, + 1.9781852695465088, + 1.9793489038848877, + 1.97948084274292, + 1.9786187704849243, + 1.9765155652236939, + 1.9782553648376464, + 1.980494052734375, + 1.9774739639282226, + 1.9782270874786376, + 1.9778608634948731, + 1.9789943241119385, + 1.9787702282333375, + 1.9780272876358032, + 1.9778476651000976, + 1.9772668927764891 + ], + "train_acc": [ + 0.23342, + 0.24908, + 0.24646, + 0.24752, + 0.25104, + 0.25202, + 0.2535, + 0.25812, + 0.25712, + 0.2563, + 0.26068, + 0.25952, + 0.26266, + 0.261, + 0.26174, + 0.26348, + 0.26424, + 0.26552, + 0.2655, + 0.2661, + 0.26904, + 0.26678, + 0.27024, + 0.26984, + 0.26944, + 0.27048, + 0.26884, + 0.27052, + 0.27058, + 0.26958, + 0.27028, + 0.27444, + 0.27172, + 0.27178, + 0.27452, + 0.2702, + 0.27174, + 0.27434, + 0.27366, + 0.27622, + 0.27582, + 0.2751, + 0.27744, + 0.2755, + 0.2772, + 0.27482, + 0.27754, + 0.2768, + 0.27594, + 0.27634, + 0.27546, + 0.27758, + 0.27588, + 0.27868, + 0.27538, + 0.27782, + 0.27744, + 0.27858, + 0.27892, + 0.27808, + 0.27762, + 0.28194, + 0.27864, + 0.28092, + 0.27878, + 0.28206, + 0.27952, + 0.27822, + 0.28132, + 0.28178, + 0.2819, + 0.28098, + 0.28214, + 0.2826, + 0.28144, + 0.2839, + 0.28118, + 0.28474, + 0.28238, + 0.28314, + 0.28064, + 0.27932, + 0.28206, + 0.28026, + 0.28226, + 0.2815, + 0.28228, + 0.27942, + 0.2824, + 0.28296, + 0.2822, + 0.27914, + 0.28222, + 0.2813, + 0.28184, + 0.28122, + 0.28294, + 0.28278, + 0.28304, + 0.28276 + ], + "test_acc": [ + 0.2395, + 0.2584, + 0.278, + 0.2745, + 0.2714, + 0.2845, + 0.279, + 0.2849, + 0.2824, + 0.2736, + 0.2673, + 0.2695, + 0.2977, + 0.2632, + 0.2837, + 0.2899, + 0.2993, + 0.2917, + 0.2987, + 0.2881, + 0.2865, + 0.2963, + 0.307, + 0.2818, + 0.2905, + 0.297, + 0.3003, + 0.284, + 0.2824, + 0.3007, + 0.3045, + 0.3001, + 0.2875, + 0.3014, + 0.2948, + 0.2938, + 0.2869, + 0.3033, + 0.3048, + 0.2973, + 0.2897, + 0.2889, + 0.3027, + 0.309, + 0.2915, + 0.3052, + 0.3026, + 0.3069, + 0.2847, + 0.3014, + 0.3036, + 0.3065, + 0.3041, + 0.3006, + 0.296, + 0.3047, + 0.299, + 0.2993, + 0.3054, + 0.3044, + 0.3094, + 0.3083, + 0.2916, + 0.2964, + 0.3022, + 0.3068, + 0.3073, + 0.2973, + 0.3006, + 0.2954, + 0.3078, + 0.3015, + 0.3052, + 0.2978, + 0.3006, + 0.2996, + 0.3006, + 0.3029, + 0.2958, + 0.2935, + 0.3002, + 0.2972, + 0.3032, + 0.3043, + 0.3048, + 0.3077, + 0.3024, + 0.3073, + 0.3056, + 0.3067, + 0.3072, + 0.3058, + 0.3066, + 0.3057, + 0.3058, + 0.3058, + 0.306, + 0.3059, + 0.3055, + 0.3055 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.38665446639060974, + 0.000932278111577034, + -3.8507863791892305e-05, + 0.0002074055082630366, + 0.0004886630922555923, + -2.073507675959263e-05, + 9.8392330983188e-05, + -0.00026387785328552127 + ], + "perturbation_rho": [ + -0.015509183518588543, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -4.4563785195350647e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.2372620403766632e-06, + 0.0, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.957655280828476e-06, + -5.587935447692871e-09, + 9.313225746154785e-10, + -3.725290298461914e-09, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 54526.05078125, + 813104704.0, + 2402701568.0, + 3855742976.0, + 4565018112.0, + 7107309056.0, + 10427996160.0, + 10594944000.0, + 10802217984.0 + ], + "bp_grad_norms_per_layer": [ + 2.7473959107737755e-07, + 1.7227932969099413e-10, + 1.702108870516028e-10, + 1.6996638818600474e-10, + 1.6991698326140892e-10, + 1.6980272743438718e-10, + 1.6994737561670803e-10, + 1.6996060114848888e-10, + 1.6999159024866373e-10 + ] + }, + "drift": { + "embed.weight": 327.4346996072575, + "embed.bias": 170.27189912672551, + "blocks.0.ln.weight": 10.029448340733337, + "blocks.0.w1.weight": 271.09694985433896, + "blocks.0.w1.bias": 210.3344533990309, + "blocks.0.w2.weight": 492.9386447153367, + "blocks.1.ln.weight": 8.828382497492473, + "blocks.1.w1.weight": 304.4175908830063, + "blocks.1.w1.bias": 277.43931427160345, + "blocks.1.w2.weight": 335.16168952866406, + "blocks.2.ln.weight": 8.838151125620117, + "blocks.2.w1.weight": 344.66383116618897, + "blocks.2.w1.bias": 306.24514451886506, + "blocks.2.w2.weight": 332.4200685911524, + "blocks.3.ln.weight": 7.729321470287778, + "blocks.3.w1.weight": 307.1583996497501, + "blocks.3.w1.bias": 280.7450378625442, + "blocks.3.w2.weight": 283.2991400409648, + "blocks.4.ln.weight": 10.279472891273015, + "blocks.4.w1.weight": 425.65902304929637, + "blocks.4.w1.bias": 405.30162642773695, + "blocks.4.w2.weight": 417.0126410755384, + "blocks.5.ln.weight": 11.518133724029006, + "blocks.5.w1.weight": 467.0126802605906, + "blocks.5.w1.bias": 452.55789718669956, + "blocks.5.w2.weight": 467.65851514176296, + "blocks.6.ln.weight": 9.037508419886805, + "blocks.6.w1.weight": 358.31655559732087, + "blocks.6.w1.bias": 330.36950370925376, + "blocks.6.w2.weight": 350.477939939699, + "blocks.7.ln.weight": 8.758836150161805, + "blocks.7.w1.weight": 347.69073535976315, + "blocks.7.w1.bias": 320.1104471013453, + "blocks.7.w2.weight": 332.8564062769089, + "out_ln.weight": 0.6350668697695233, + "out_head.weight": 9.197192445712407, + "out_head.bias": 0.8845643723650614 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0300379509735107, + 1.9401999561309815, + 1.913938886642456, + 1.8904116683578491, + 1.8764363840103149, + 1.873182268371582, + 1.8717842712402344, + 1.8644564879608154, + 1.8566048559570312, + 1.850687060508728, + 1.8460483242797852, + 1.8433684701538087, + 1.833488243789673, + 1.8278405266571045, + 1.8265484925079345, + 1.818034171447754, + 1.8155431734466552, + 1.8074933514022826, + 1.8059410440444947, + 1.8039924750137328, + 1.8001109600067138, + 1.7982124895477294, + 1.7878496411514282, + 1.7872236651611328, + 1.7860211572265625, + 1.7805663021469116, + 1.779324355392456, + 1.771492137527466, + 1.772376263961792, + 1.7669432696914673, + 1.7704145129776, + 1.7629687177276612, + 1.759504497718811, + 1.7608156897735596, + 1.7560134534072875, + 1.7523908859634398, + 1.7503845543670655, + 1.7478893075180053, + 1.7472649411773682, + 1.7424788817596435, + 1.74519405002594, + 1.7384309884262086, + 1.7379801452636718, + 1.7321213195800782, + 1.732442968559265, + 1.7347256170654297, + 1.7268475212860108, + 1.7259371692276, + 1.730874881286621, + 1.7255464766311646, + 1.7273355867767335, + 1.7193929712295533, + 1.7231387823104858, + 1.7167245266342164, + 1.7185063571929933, + 1.7160026277923583, + 1.71593848777771, + 1.7101723334121703, + 1.7123066550445556, + 1.7091578268432617, + 1.710422399520874, + 1.7071148763275146, + 1.703984679031372, + 1.7055902273559571, + 1.7043205630493163, + 1.702749596977234, + 1.7015760723114013, + 1.6997603458023072, + 1.7010435842895508, + 1.7012611554718018, + 1.7004539463043213, + 1.6962661600494384, + 1.6981626427841185, + 1.698192346496582, + 1.6980322202682494, + 1.6932597018814086, + 1.6941705449676514, + 1.6931912238311768, + 1.6942447135162353, + 1.6932169602203369, + 1.6921008915328979, + 1.691394665260315, + 1.692651755027771, + 1.690492251586914, + 1.6901156521606446, + 1.6877212131118775, + 1.6918383533859254, + 1.6916584229278564, + 1.6862779132843018, + 1.690645524635315, + 1.689451340637207, + 1.6918565393829346, + 1.6878593984222412, + 1.6896748791122436, + 1.6869253600692748, + 1.6864034613037109, + 1.6866692586517333, + 1.6912582437515258, + 1.6882677157211303, + 1.6879832342147827 + ], + "train_acc": [ + 0.25058, + 0.29132, + 0.30476, + 0.31654, + 0.32498, + 0.32736, + 0.32762, + 0.3327, + 0.33604, + 0.33698, + 0.33776, + 0.33972, + 0.34396, + 0.34484, + 0.34528, + 0.34798, + 0.351, + 0.35216, + 0.35302, + 0.35184, + 0.35472, + 0.3551, + 0.35822, + 0.35894, + 0.36172, + 0.36298, + 0.36054, + 0.36104, + 0.36522, + 0.36498, + 0.364, + 0.36804, + 0.36678, + 0.36776, + 0.3693, + 0.37214, + 0.37256, + 0.37198, + 0.37316, + 0.37604, + 0.37578, + 0.37634, + 0.37724, + 0.38006, + 0.38006, + 0.3758, + 0.38288, + 0.37976, + 0.37962, + 0.38206, + 0.38228, + 0.38356, + 0.38352, + 0.38466, + 0.38582, + 0.3873, + 0.3864, + 0.38692, + 0.38786, + 0.38986, + 0.38712, + 0.38938, + 0.389, + 0.39036, + 0.38952, + 0.3926, + 0.3918, + 0.39226, + 0.39226, + 0.39486, + 0.39342, + 0.39342, + 0.39574, + 0.39404, + 0.39118, + 0.39872, + 0.39752, + 0.39638, + 0.39602, + 0.39634, + 0.39706, + 0.39678, + 0.39506, + 0.39628, + 0.39606, + 0.39918, + 0.39864, + 0.39598, + 0.39636, + 0.39802, + 0.39958, + 0.39712, + 0.39732, + 0.39682, + 0.39824, + 0.39882, + 0.39722, + 0.39738, + 0.39682, + 0.39604 + ], + "test_acc": [ + 0.2982, + 0.3236, + 0.3362, + 0.355, + 0.3615, + 0.3565, + 0.3706, + 0.3689, + 0.3752, + 0.3721, + 0.3648, + 0.3757, + 0.358, + 0.3682, + 0.3671, + 0.3888, + 0.3867, + 0.3821, + 0.3853, + 0.3871, + 0.3942, + 0.387, + 0.3844, + 0.3793, + 0.3865, + 0.3944, + 0.3914, + 0.3841, + 0.3867, + 0.3946, + 0.3922, + 0.3911, + 0.3974, + 0.3923, + 0.4016, + 0.4003, + 0.398, + 0.3965, + 0.4044, + 0.4054, + 0.3959, + 0.3964, + 0.3997, + 0.4038, + 0.4096, + 0.4134, + 0.4049, + 0.4077, + 0.4074, + 0.4021, + 0.4087, + 0.4057, + 0.4133, + 0.415, + 0.4057, + 0.414, + 0.4129, + 0.4116, + 0.4118, + 0.4059, + 0.4124, + 0.414, + 0.4067, + 0.4166, + 0.4129, + 0.4136, + 0.4099, + 0.4092, + 0.418, + 0.4107, + 0.4178, + 0.4112, + 0.4128, + 0.4178, + 0.4138, + 0.4147, + 0.4157, + 0.4178, + 0.4179, + 0.4197, + 0.4188, + 0.414, + 0.416, + 0.4182, + 0.4217, + 0.4191, + 0.4173, + 0.4185, + 0.421, + 0.42, + 0.4176, + 0.4186, + 0.421, + 0.4193, + 0.4192, + 0.42, + 0.4192, + 0.42, + 0.4203, + 0.42 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.04720165580511093, + 0.023647643625736237, + -0.033746276050806046, + -0.02436545118689537, + -0.028243303298950195, + -0.01576988771557808, + -0.08274193108081818, + 0.9948431253433228 + ], + "perturbation_rho": [ + -0.019907839596271515, + 0.026347745209932327, + 0.00029688142240047455, + -0.015195554122328758, + 0.043242912739515305, + 0.008055397309362888, + 0.003536408767104149, + -0.039388515055179596 + ], + "nudging": { + "0.001": [ + -7.520895451307297e-06, + -1.26776285469532e-07, + 9.906943887472153e-08, + -4.656612873077393e-10, + -4.0978193283081055e-08, + -3.585591912269592e-08, + 6.740447133779526e-08, + -1.3819662854075432e-06 + ], + "0.003": [ + -2.2122403606772423e-05, + -3.012828528881073e-07, + 2.1152663975954056e-07, + 6.798654794692993e-08, + 1.9441358745098114e-07, + 2.6775524020195007e-08, + 3.398745320737362e-07, + -5.154346581548452e-06 + ], + "0.01": [ + -7.367332000285387e-05, + -1.0807416401803493e-06, + 6.495974957942963e-07, + 3.817840479314327e-07, + 5.302717909216881e-07, + 2.4068867787718773e-07, + 1.5024561434984207e-06, + -1.799390884116292e-05 + ] + }, + "hidden_norms_per_layer": [ + 5806.3515625, + 63154.71875, + 345991.1875, + 741559.875, + 923550.8125, + 1119541.375, + 1242674.125, + 1367644.625, + 780196.0625 + ], + "bp_grad_norms_per_layer": [ + 5.0447550165699795e-05, + 2.6470297598280013e-06, + 8.134598488140909e-07, + 7.909251280580065e-07, + 7.877957273194625e-07, + 7.858119488446391e-07, + 7.467841669495101e-07, + 7.52233631828858e-07, + 7.457880428773933e-07 + ] + }, + "drift": { + "embed.weight": 40.32473097964277, + "embed.bias": 16.685566967564547, + "blocks.0.ln.weight": 1.0445740862794894, + "blocks.0.w1.weight": 15.133344163582892, + "blocks.0.w1.bias": 12.448585097052113, + "blocks.0.w2.weight": 47.37614824821112, + "blocks.1.ln.weight": 0.9771977116283915, + "blocks.1.w1.weight": 19.65973837310052, + "blocks.1.w1.bias": 15.073340004663, + "blocks.1.w2.weight": 45.15493814957138, + "blocks.2.ln.weight": 0.7677471742012729, + "blocks.2.w1.weight": 18.286316393259757, + "blocks.2.w1.bias": 16.668691780331002, + "blocks.2.w2.weight": 43.33024072420251, + "blocks.3.ln.weight": 0.7157196705161631, + "blocks.3.w1.weight": 17.769177715348974, + "blocks.3.w1.bias": 17.48172961098867, + "blocks.3.w2.weight": 46.41797990739132, + "blocks.4.ln.weight": 0.7158938212971907, + "blocks.4.w1.weight": 18.203867219610753, + "blocks.4.w1.bias": 18.870397314936728, + "blocks.4.w2.weight": 51.23112107531505, + "blocks.5.ln.weight": 0.6884334631484388, + "blocks.5.w1.weight": 18.258883275015975, + "blocks.5.w1.bias": 18.71714897198837, + "blocks.5.w2.weight": 51.991090351166356, + "blocks.6.ln.weight": 0.633491884160565, + "blocks.6.w1.weight": 17.95984323437285, + "blocks.6.w1.bias": 18.493378505745028, + "blocks.6.w2.weight": 38.75838405347133, + "blocks.7.ln.weight": 0.5989570930744887, + "blocks.7.w1.weight": 18.705839523958478, + "blocks.7.w1.bias": 20.505853112621466, + "blocks.7.w2.weight": 33.59326297500316, + "out_ln.weight": 0.2917390152456112, + "out_head.weight": 5.883284575632486, + "out_head.bias": 0.8881859391580925 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 7 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L8_seed7", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L8_seed8/results_cifar10.json b/results/fa_dfa_d512_L8_seed8/results_cifar10.json new file mode 100644 index 0000000..8f97591 --- /dev/null +++ b/results/fa_dfa_d512_L8_seed8/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "8": { + "dfa": { + "log": { + "train_loss": [ + 2.068326115875244, + 2.038449552383423, + 2.032887363433838, + 2.0267745724487303, + 2.026417502670288, + 2.0202374043273927, + 2.018483938064575, + 2.0165669372558592, + 2.019748689651489, + 2.0128509746551515, + 2.0144077210235594, + 2.0115078826904296, + 2.0115395934295655, + 2.007768662490845, + 2.0097858222961427, + 2.0063318170166013, + 2.0055432247924805, + 2.0048474722290037, + 2.0065497072982788, + 2.0015513079071043, + 2.0043911225128173, + 2.0010596670150758, + 2.0026096622467042, + 2.003728278236389, + 2.0012191329193114, + 2.0005321977233885, + 2.0005435330581665, + 1.999249365234375, + 1.9984628038024903, + 1.9983316033935548, + 1.997845711593628, + 1.9972105752563476, + 1.9978763916778564, + 1.9953950925064088, + 1.9970173344421387, + 1.9984811785888672, + 1.9950866604232789, + 1.9947769330596923, + 1.9955189801788331, + 1.9929481762313843, + 1.9966953428649903, + 1.9945243342590333, + 1.9926670757675171, + 1.993055216293335, + 1.9953397652435303, + 1.992445609664917, + 1.9928514415740968, + 1.9945266257476806, + 1.9926752143096924, + 1.992415104598999, + 1.9914240314483642, + 1.9914591967773438, + 1.9917445565032958, + 1.9892081171417237, + 1.9890185669708251, + 1.9918106182479858, + 1.9911041564178467, + 1.9896882055282592, + 1.9890501625823975, + 1.9907431673431397, + 1.9901649180603027, + 1.9891316648101807, + 1.9877268949127198, + 1.989798042602539, + 1.9878116387939453, + 1.9890665167236328, + 1.9905334783935547, + 1.9886393180847168, + 1.988864939727783, + 1.9884007026672363, + 1.9877286835479737, + 1.989529699935913, + 1.9890546997070313, + 1.9868773061752318, + 1.9869191469573975, + 1.9882244151306152, + 1.987383364868164, + 1.9855076770782472, + 1.9868913415908813, + 1.9883932913970948, + 1.9862220487213136, + 1.9841738792419434, + 1.9836644219970703, + 1.9869539430999756, + 1.984350054588318, + 1.9856882331848145, + 1.9835283684539795, + 1.9859053189849853, + 1.9878566506576538, + 1.986811107788086, + 1.9838481521606446, + 1.986321011314392, + 1.9864019583892822, + 1.9851219155883788, + 1.9863925772476196, + 1.9855677154541016, + 1.984962483253479, + 1.9849489275360108, + 1.985427260131836, + 1.984917903213501 + ], + "train_acc": [ + 0.23632, + 0.24556, + 0.24874, + 0.25046, + 0.25146, + 0.25464, + 0.25568, + 0.25664, + 0.25548, + 0.25852, + 0.25636, + 0.26028, + 0.25868, + 0.26176, + 0.2607, + 0.26174, + 0.26266, + 0.26672, + 0.26472, + 0.2636, + 0.26466, + 0.26618, + 0.2643, + 0.26808, + 0.2664, + 0.26632, + 0.27, + 0.26608, + 0.26812, + 0.26848, + 0.26888, + 0.27018, + 0.27012, + 0.2694, + 0.2686, + 0.2696, + 0.27142, + 0.27298, + 0.27042, + 0.27126, + 0.2717, + 0.2718, + 0.27386, + 0.27346, + 0.26924, + 0.27342, + 0.27324, + 0.27112, + 0.27066, + 0.27258, + 0.27196, + 0.27386, + 0.27262, + 0.2753, + 0.274, + 0.27484, + 0.27268, + 0.27362, + 0.27338, + 0.27272, + 0.274, + 0.27568, + 0.27592, + 0.27312, + 0.27434, + 0.2757, + 0.27476, + 0.27544, + 0.27548, + 0.27548, + 0.2741, + 0.27606, + 0.27428, + 0.27634, + 0.27604, + 0.2735, + 0.27494, + 0.27588, + 0.27586, + 0.27474, + 0.27652, + 0.27834, + 0.27718, + 0.27562, + 0.27728, + 0.27692, + 0.27862, + 0.27782, + 0.27452, + 0.27704, + 0.27826, + 0.27674, + 0.27832, + 0.27826, + 0.27584, + 0.27546, + 0.27866, + 0.27724, + 0.2748, + 0.2757 + ], + "test_acc": [ + 0.2602, + 0.2704, + 0.2727, + 0.2629, + 0.2497, + 0.2721, + 0.2855, + 0.2759, + 0.2772, + 0.2885, + 0.2833, + 0.2656, + 0.2889, + 0.2844, + 0.2696, + 0.2757, + 0.2739, + 0.2826, + 0.2819, + 0.2819, + 0.2892, + 0.2867, + 0.2852, + 0.2806, + 0.2973, + 0.2863, + 0.2939, + 0.2937, + 0.2926, + 0.2764, + 0.2824, + 0.2885, + 0.2758, + 0.2824, + 0.2965, + 0.2933, + 0.2839, + 0.2859, + 0.2924, + 0.2871, + 0.2828, + 0.2905, + 0.2969, + 0.2792, + 0.286, + 0.2835, + 0.2903, + 0.2892, + 0.2897, + 0.2923, + 0.2943, + 0.2885, + 0.2928, + 0.2967, + 0.3031, + 0.2862, + 0.2863, + 0.2915, + 0.2885, + 0.2905, + 0.2855, + 0.3001, + 0.2961, + 0.2865, + 0.2976, + 0.2898, + 0.2962, + 0.2908, + 0.2936, + 0.2822, + 0.2934, + 0.2904, + 0.2952, + 0.2923, + 0.29, + 0.2923, + 0.2933, + 0.2869, + 0.2957, + 0.2945, + 0.2951, + 0.2981, + 0.2954, + 0.2891, + 0.2935, + 0.294, + 0.2948, + 0.2916, + 0.2959, + 0.2929, + 0.296, + 0.2939, + 0.2944, + 0.2937, + 0.2942, + 0.294, + 0.2942, + 0.2945, + 0.2944, + 0.2943 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.38314950466156006, + -0.0002771378494799137, + -0.0004160030803177506, + 3.204246604582295e-05, + -4.1770588723011315e-05, + -0.0001344233169220388, + -3.0795046768616885e-05, + -3.3676558814477175e-05 + ], + "perturbation_rho": [ + -0.00034889206290245056, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.711320459842682e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.1324882507324219e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -3.98978590965271e-06, + 9.313225746154785e-10, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "hidden_norms_per_layer": [ + 54124.7890625, + 783932864.0, + 4690777600.0, + 5000870912.0, + 5716334080.0, + 6005877760.0, + 7473982976.0, + 7776250880.0, + 8210811904.0 + ], + "bp_grad_norms_per_layer": [ + 3.0676110895910824e-07, + 1.972915580905621e-10, + 1.953913281171893e-10, + 1.953771588958375e-10, + 1.9510017212898134e-10, + 1.950706401965263e-10, + 1.9518447968991381e-10, + 1.951617339956968e-10, + 1.953130296383776e-10 + ] + }, + "drift": { + "embed.weight": 328.1038460126514, + "embed.bias": 254.00296968816264, + "blocks.0.ln.weight": 10.375421373661911, + "blocks.0.w1.weight": 272.1998177697825, + "blocks.0.w1.bias": 246.55078363508565, + "blocks.0.w2.weight": 489.8754501153931, + "blocks.1.ln.weight": 9.050343989939378, + "blocks.1.w1.weight": 388.3338705775239, + "blocks.1.w1.bias": 383.7188912712258, + "blocks.1.w2.weight": 402.9058555114096, + "blocks.2.ln.weight": 6.952665632270595, + "blocks.2.w1.weight": 255.3639402973493, + "blocks.2.w1.bias": 230.39667811143107, + "blocks.2.w2.weight": 247.93087044444246, + "blocks.3.ln.weight": 8.807891710920156, + "blocks.3.w1.weight": 351.3359831200073, + "blocks.3.w1.bias": 336.28061721166756, + "blocks.3.w2.weight": 321.5850139623348, + "blocks.4.ln.weight": 8.196055678685234, + "blocks.4.w1.weight": 287.52820992582446, + "blocks.4.w1.bias": 262.4079024326842, + "blocks.4.w2.weight": 263.46643755724006, + "blocks.5.ln.weight": 9.62846119408936, + "blocks.5.w1.weight": 390.9965336931924, + "blocks.5.w1.bias": 375.54893253589324, + "blocks.5.w2.weight": 382.6688713102951, + "blocks.6.ln.weight": 7.376838940788987, + "blocks.6.w1.weight": 284.9872318771198, + "blocks.6.w1.bias": 263.7123597178357, + "blocks.6.w2.weight": 270.37673819187216, + "blocks.7.ln.weight": 8.135998104403308, + "blocks.7.w1.weight": 306.058248194457, + "blocks.7.w1.bias": 285.31467158190975, + "blocks.7.w2.weight": 284.5221337965431, + "out_ln.weight": 0.540826641326857, + "out_head.weight": 7.9794751282903125, + "out_head.bias": 0.727890481081825 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.0412177281188963, + 1.9586744415664672, + 1.9367398124694823, + 1.9134309594726562, + 1.9032167110824585, + 1.8894411625671386, + 1.8745269380950929, + 1.8681198519515991, + 1.859885403137207, + 1.8490976187515258, + 1.8433319261932373, + 1.8375304843139648, + 1.8348215142822266, + 1.8295704886627198, + 1.8297615969085694, + 1.8252419507217408, + 1.8235907801055908, + 1.8161062512207031, + 1.8182137090301513, + 1.8134747228240966, + 1.8125705044555664, + 1.8099720315551757, + 1.8140770354843139, + 1.812659885635376, + 1.8111054842376708, + 1.8094129010772706, + 1.8020060912322997, + 1.803116989402771, + 1.795589069519043, + 1.7962562110137938, + 1.793900502319336, + 1.7944407584381104, + 1.7943759982681275, + 1.7872746685028076, + 1.788272954788208, + 1.7918251754379273, + 1.783637704849243, + 1.7797057202529907, + 1.7784698824691771, + 1.7769115085601808, + 1.7821795818328858, + 1.774635576171875, + 1.777582476463318, + 1.7744439294815064, + 1.7718625156402588, + 1.7697111307525635, + 1.77068788394928, + 1.764987317237854, + 1.7658291906356811, + 1.7618338104629516, + 1.763578690109253, + 1.7595625869750977, + 1.7586208461761474, + 1.7597406827163695, + 1.7557958205795288, + 1.756042088279724, + 1.7561137405014038, + 1.75862396736145, + 1.7520068192672729, + 1.7528972547149657, + 1.7537218439102173, + 1.7509230569076537, + 1.7497916799163817, + 1.7516448692703248, + 1.747637153892517, + 1.7501363418579101, + 1.7462628519058228, + 1.7452370420074463, + 1.751023518638611, + 1.7453709937286377, + 1.7441615590667725, + 1.745248391342163, + 1.7455106867218018, + 1.744859725265503, + 1.7450293384170532, + 1.741427905960083, + 1.7391855532836915, + 1.739177918357849, + 1.7413748574447632, + 1.740176322555542, + 1.7360426489639282, + 1.7357201895904542, + 1.7336676422119142, + 1.7378975563812256, + 1.7353016548156739, + 1.735442308998108, + 1.7332442119979858, + 1.7347565906524658, + 1.7399420615386962, + 1.735971279335022, + 1.732453648071289, + 1.7338156942749023, + 1.7323283444976807, + 1.733454556350708, + 1.7349756786727906, + 1.730115432472229, + 1.7333317721176147, + 1.730795558128357, + 1.7303683010482789, + 1.7296705428695678 + ], + "train_acc": [ + 0.24972, + 0.2857, + 0.29716, + 0.3051, + 0.31388, + 0.32028, + 0.32718, + 0.32794, + 0.33452, + 0.33888, + 0.33738, + 0.34414, + 0.3427, + 0.344, + 0.34354, + 0.34574, + 0.34834, + 0.34704, + 0.34938, + 0.34914, + 0.34736, + 0.34888, + 0.34864, + 0.3503, + 0.35144, + 0.35012, + 0.35328, + 0.35404, + 0.3563, + 0.3579, + 0.35846, + 0.35674, + 0.35756, + 0.35986, + 0.35896, + 0.35632, + 0.36026, + 0.35954, + 0.36354, + 0.36492, + 0.36274, + 0.36668, + 0.3658, + 0.36558, + 0.36578, + 0.36772, + 0.36466, + 0.36692, + 0.36736, + 0.36832, + 0.36896, + 0.36776, + 0.37266, + 0.37066, + 0.3741, + 0.37286, + 0.3711, + 0.37272, + 0.37522, + 0.37178, + 0.37578, + 0.37454, + 0.37386, + 0.37256, + 0.37668, + 0.3778, + 0.3781, + 0.37876, + 0.37596, + 0.37894, + 0.37748, + 0.3801, + 0.37678, + 0.3776, + 0.37852, + 0.37916, + 0.38186, + 0.38062, + 0.38234, + 0.3802, + 0.38074, + 0.38048, + 0.38312, + 0.38086, + 0.38172, + 0.38188, + 0.37984, + 0.38142, + 0.37894, + 0.38204, + 0.38274, + 0.38072, + 0.38316, + 0.38076, + 0.38282, + 0.38266, + 0.38244, + 0.3807, + 0.38074, + 0.38534 + ], + "test_acc": [ + 0.305, + 0.3111, + 0.3184, + 0.3374, + 0.3246, + 0.3486, + 0.3605, + 0.3591, + 0.3631, + 0.3644, + 0.3656, + 0.37, + 0.3724, + 0.3813, + 0.3682, + 0.3692, + 0.3665, + 0.3747, + 0.3765, + 0.3701, + 0.3749, + 0.3749, + 0.3821, + 0.377, + 0.3807, + 0.3795, + 0.3856, + 0.3799, + 0.3833, + 0.383, + 0.3687, + 0.3841, + 0.3836, + 0.377, + 0.3894, + 0.3861, + 0.3944, + 0.384, + 0.3911, + 0.3879, + 0.3843, + 0.3919, + 0.389, + 0.3888, + 0.3918, + 0.3954, + 0.3994, + 0.3935, + 0.3926, + 0.3914, + 0.3866, + 0.3981, + 0.3864, + 0.3947, + 0.4018, + 0.3938, + 0.3965, + 0.3931, + 0.3998, + 0.4003, + 0.401, + 0.3984, + 0.4009, + 0.3958, + 0.3936, + 0.4004, + 0.3968, + 0.3984, + 0.4015, + 0.4012, + 0.4042, + 0.397, + 0.4023, + 0.3968, + 0.4001, + 0.402, + 0.3965, + 0.3973, + 0.3964, + 0.4025, + 0.4011, + 0.4017, + 0.4014, + 0.3981, + 0.3992, + 0.4015, + 0.4024, + 0.4008, + 0.4012, + 0.4004, + 0.4017, + 0.4023, + 0.4025, + 0.4032, + 0.4018, + 0.4015, + 0.4018, + 0.4028, + 0.4024, + 0.4026 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.016079379245638847, + 0.0893096849322319, + -0.03535890951752663, + -0.027881216257810593, + -0.04659418389201164, + -0.039142537862062454, + -0.024059921503067017, + 0.9954550266265869 + ], + "perturbation_rho": [ + -0.003716413863003254, + -0.01681683585047722, + 0.0011358268093317747, + 0.011708367615938187, + -0.0036298027262091637, + -0.028253771364688873, + -0.03197161853313446, + -0.0007108366116881371 + ], + "nudging": { + "0.001": [ + -1.6684643924236298e-06, + -3.836903488263488e-07, + 2.2817403078079224e-08, + -4.656612873077393e-10, + -4.0745362639427185e-08, + 4.21423465013504e-08, + 5.2852556109428406e-08, + -1.6444828361272812e-06 + ], + "0.003": [ + -4.838439053855836e-06, + -1.1676456779241562e-06, + 1.3969838619232178e-07, + 2.7101486921310425e-07, + 2.761371433734894e-07, + 2.1047890186309814e-07, + 8.940696716308594e-08, + -5.75091689825058e-06 + ], + "0.01": [ + -1.587084261700511e-05, + -3.770226612687111e-06, + 4.6566128730773926e-07, + 6.083864718675613e-07, + 9.522773325443268e-07, + 8.021015673875809e-07, + 3.6670826375484467e-07, + -2.0256484276615083e-05 + ] + }, + "hidden_norms_per_layer": [ + 7878.02783203125, + 160404.015625, + 657860.0625, + 1298493.75, + 1530210.875, + 1789493.5, + 1949845.875, + 2001972.0, + 954169.0 + ], + "bp_grad_norms_per_layer": [ + 2.3713459086138755e-05, + 1.3362353001866722e-06, + 8.249007805716246e-07, + 8.118395271594636e-07, + 8.120087500174122e-07, + 8.141144007822732e-07, + 8.151110932885786e-07, + 8.161479740920186e-07, + 7.983429100022477e-07 + ] + }, + "drift": { + "embed.weight": 50.27203344624106, + "embed.bias": 18.164593935621223, + "blocks.0.ln.weight": 1.2923599647753492, + "blocks.0.w1.weight": 17.615281639137443, + "blocks.0.w1.bias": 15.943020869292715, + "blocks.0.w2.weight": 62.07043407165766, + "blocks.1.ln.weight": 0.9933874122365621, + "blocks.1.w1.weight": 19.89251765717899, + "blocks.1.w1.bias": 15.397253448680312, + "blocks.1.w2.weight": 48.46816742349729, + "blocks.2.ln.weight": 0.7602194940473055, + "blocks.2.w1.weight": 21.862181220405073, + "blocks.2.w1.bias": 20.159047961824015, + "blocks.2.w2.weight": 35.30605863540395, + "blocks.3.ln.weight": 0.6625976974501915, + "blocks.3.w1.weight": 19.25465609702534, + "blocks.3.w1.bias": 19.348925466014506, + "blocks.3.w2.weight": 46.43927447797268, + "blocks.4.ln.weight": 0.5980307474752141, + "blocks.4.w1.weight": 19.617144750384867, + "blocks.4.w1.bias": 20.038154099436944, + "blocks.4.w2.weight": 42.571562389062414, + "blocks.5.ln.weight": 0.6412290763020658, + "blocks.5.w1.weight": 18.227445383614373, + "blocks.5.w1.bias": 17.78546251590179, + "blocks.5.w2.weight": 48.65373336198152, + "blocks.6.ln.weight": 0.6350627815251478, + "blocks.6.w1.weight": 16.896056503722676, + "blocks.6.w1.bias": 14.642090090469127, + "blocks.6.w2.weight": 58.25817193229909, + "blocks.7.ln.weight": 0.7771808191712193, + "blocks.7.w1.weight": 20.486284886878302, + "blocks.7.w1.bias": 19.32003589686942, + "blocks.7.w2.weight": 62.85071102943643, + "out_ln.weight": 0.3595629051418566, + "out_head.weight": 6.471892657261998, + "out_head.bias": 1.5825345450853237 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 8 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L8_seed8", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file diff --git a/results/fa_dfa_d512_L8_seed9/results_cifar10.json b/results/fa_dfa_d512_L8_seed9/results_cifar10.json new file mode 100644 index 0000000..b70df2d --- /dev/null +++ b/results/fa_dfa_d512_L8_seed9/results_cifar10.json @@ -0,0 +1,881 @@ +{ + "9": { + "dfa": { + "log": { + "train_loss": [ + 2.0749315245056152, + 2.0436887923812868, + 2.0410750790023804, + 2.0344971646881103, + 2.030566175842285, + 2.0275797971343996, + 2.0244680599212646, + 2.024917350387573, + 2.027327563819885, + 2.018273809814453, + 2.0191563272094726, + 2.0152392296600343, + 2.0181246972274782, + 2.0139052614974977, + 2.011121483078003, + 2.007120831375122, + 2.009763352279663, + 2.010490125274658, + 2.0083817123413086, + 2.010725986175537, + 2.006365931854248, + 2.0057907654571534, + 2.006084675445557, + 2.0068956647491456, + 2.0047656631851196, + 2.005573451766968, + 2.003518327026367, + 2.003638141479492, + 2.0002300247192384, + 2.001433812713623, + 2.004064089279175, + 2.001738834075928, + 1.999301480255127, + 2.0016421214294433, + 1.9995300130462645, + 1.9971350913238526, + 2.002013059692383, + 2.000174626083374, + 1.9979997787475585, + 1.9982461312103272, + 1.9966425020599365, + 1.9979894361114503, + 1.9972480834579467, + 1.9982498080444335, + 1.9934808911514281, + 1.9956281592559815, + 1.99603251953125, + 1.9947702381134034, + 1.9941040051269532, + 1.9968071872329711, + 1.9977976748657227, + 1.9950814849853515, + 1.996515994567871, + 1.9940743815612794, + 1.9959130297470093, + 1.9953608598327637, + 1.995676156616211, + 1.9918495462036132, + 1.9927745639801024, + 1.9944643795013428, + 1.9957545782852173, + 1.99323220954895, + 1.9948831502914428, + 1.99567852684021, + 1.9960307669830322, + 1.993249538116455, + 1.992333110961914, + 1.9930713208007813, + 1.9917521324157714, + 1.9922201361846923, + 1.990846061668396, + 1.9919507978057862, + 1.993395569152832, + 1.9929897798919678, + 1.9927783852386474, + 1.992465722427368, + 1.9913483853149414, + 1.9910357999420165, + 1.9905700182724, + 1.9907023773956298, + 1.9905259375, + 1.9902730661010741, + 1.9915288207244872, + 1.9918687856292725, + 1.9918737398910522, + 1.9905504457092285, + 1.991807127685547, + 1.991159518737793, + 1.9909059362030028, + 1.987746954574585, + 1.9886708137512208, + 1.991118684539795, + 1.9877248714447022, + 1.9914885107040405, + 1.9913817571258545, + 1.990808463897705, + 1.9888677963638306, + 1.9901370908355713, + 1.9902113021850585, + 1.9896661295318603 + ], + "train_acc": [ + 0.233, + 0.24974, + 0.24902, + 0.2555, + 0.2535, + 0.25314, + 0.2584, + 0.25894, + 0.25562, + 0.25858, + 0.25952, + 0.26262, + 0.2592, + 0.26194, + 0.26504, + 0.26518, + 0.26234, + 0.26346, + 0.2654, + 0.2614, + 0.2644, + 0.26158, + 0.26868, + 0.26646, + 0.26696, + 0.26778, + 0.26766, + 0.26614, + 0.2709, + 0.26686, + 0.26754, + 0.26956, + 0.27002, + 0.27106, + 0.27018, + 0.27268, + 0.26956, + 0.26954, + 0.27276, + 0.2717, + 0.27314, + 0.27052, + 0.27232, + 0.2731, + 0.27476, + 0.27392, + 0.27266, + 0.27468, + 0.27238, + 0.27302, + 0.27042, + 0.27416, + 0.27348, + 0.27274, + 0.27358, + 0.27494, + 0.27386, + 0.27458, + 0.2768, + 0.27558, + 0.27496, + 0.27546, + 0.27332, + 0.27632, + 0.27632, + 0.27756, + 0.27488, + 0.27564, + 0.27622, + 0.27424, + 0.27614, + 0.27482, + 0.27548, + 0.2777, + 0.27614, + 0.27582, + 0.27502, + 0.27412, + 0.27556, + 0.2774, + 0.27578, + 0.27622, + 0.27676, + 0.27774, + 0.27778, + 0.27504, + 0.276, + 0.27678, + 0.2769, + 0.27788, + 0.2765, + 0.27636, + 0.27804, + 0.27786, + 0.27472, + 0.2759, + 0.2799, + 0.27662, + 0.27648, + 0.27812 + ], + "test_acc": [ + 0.2539, + 0.2722, + 0.2611, + 0.265, + 0.2773, + 0.2768, + 0.2858, + 0.2962, + 0.2833, + 0.2576, + 0.2732, + 0.2985, + 0.2776, + 0.2927, + 0.2865, + 0.2838, + 0.2825, + 0.283, + 0.2827, + 0.2834, + 0.2795, + 0.2866, + 0.2944, + 0.2847, + 0.297, + 0.2895, + 0.2903, + 0.2906, + 0.2904, + 0.2849, + 0.3028, + 0.2955, + 0.2932, + 0.2846, + 0.2898, + 0.2915, + 0.2827, + 0.3025, + 0.2708, + 0.2993, + 0.2941, + 0.2879, + 0.3013, + 0.2873, + 0.3017, + 0.3012, + 0.2932, + 0.3037, + 0.2939, + 0.2959, + 0.2899, + 0.2903, + 0.291, + 0.2999, + 0.292, + 0.3001, + 0.3011, + 0.299, + 0.292, + 0.3016, + 0.2875, + 0.301, + 0.2948, + 0.2952, + 0.2906, + 0.2961, + 0.2975, + 0.2937, + 0.2969, + 0.2938, + 0.3015, + 0.2993, + 0.3007, + 0.3012, + 0.3012, + 0.2974, + 0.2929, + 0.3003, + 0.2973, + 0.2988, + 0.301, + 0.2941, + 0.2997, + 0.2995, + 0.2979, + 0.2993, + 0.2998, + 0.2963, + 0.3029, + 0.2969, + 0.2976, + 0.2979, + 0.2958, + 0.2979, + 0.2971, + 0.2974, + 0.2977, + 0.2977, + 0.2977, + 0.2977 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.3850422501564026, + 0.0012538384180516005, + 0.0006158786127343774, + -0.0003950851387344301, + 0.000671692076139152, + 0.00011835923214675859, + -0.00013412139378488064, + -0.0009948844090104103 + ], + "perturbation_rho": [ + -0.009509662166237831, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -3.688037395477295e-07, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -1.0244548320770264e-06, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + -1.862645149230957e-09 + ], + "0.01": [ + -3.604218363761902e-06, + -8.381903171539307e-09, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + -1.862645149230957e-09 + ] + }, + "hidden_norms_per_layer": [ + 57815.328125, + 1306904576.0, + 3130353408.0, + 6042526720.0, + 6570734080.0, + 7140251648.0, + 10449125376.0, + 10382031872.0, + 10819989504.0 + ], + "bp_grad_norms_per_layer": [ + 2.387568827089126e-07, + 2.7390667511895117e-10, + 2.7443888828138086e-10, + 2.747598537578e-10, + 2.7503455068966787e-10, + 2.749568628335197e-10, + 2.749303285032312e-10, + 2.749313832151046e-10, + 2.7492974563614325e-10 + ] + }, + "drift": { + "embed.weight": 338.964341619299, + "embed.bias": 255.6470029656094, + "blocks.0.ln.weight": 9.7660440200238, + "blocks.0.w1.weight": 293.6860368082436, + "blocks.0.w1.bias": 259.7815916326078, + "blocks.0.w2.weight": 507.1631892613405, + "blocks.1.ln.weight": 8.6057770011893, + "blocks.1.w1.weight": 327.85104096601293, + "blocks.1.w1.bias": 315.96204358536954, + "blocks.1.w2.weight": 340.58083302512796, + "blocks.2.ln.weight": 9.649643863338673, + "blocks.2.w1.weight": 416.26890827062306, + "blocks.2.w1.bias": 387.4960514327406, + "blocks.2.w2.weight": 412.7077111053307, + "blocks.3.ln.weight": 8.443154226363815, + "blocks.3.w1.weight": 329.2601915023256, + "blocks.3.w1.bias": 308.6467481709907, + "blocks.3.w2.weight": 307.62522052468177, + "blocks.4.ln.weight": 8.653036768995564, + "blocks.4.w1.weight": 354.6654001843918, + "blocks.4.w1.bias": 335.86010162934014, + "blocks.4.w2.weight": 337.73623750534483, + "blocks.5.ln.weight": 11.320001722963196, + "blocks.5.w1.weight": 468.93827725593786, + "blocks.5.w1.bias": 433.59224246544227, + "blocks.5.w2.weight": 463.6712206215392, + "blocks.6.ln.weight": 8.063403696936168, + "blocks.6.w1.weight": 318.91137867276336, + "blocks.6.w1.bias": 306.65686506595944, + "blocks.6.w2.weight": 299.5449959675926, + "blocks.7.ln.weight": 8.29383434585519, + "blocks.7.w1.weight": 333.33394053359984, + "blocks.7.w1.bias": 309.27674432467256, + "blocks.7.w2.weight": 305.2383919884082, + "out_ln.weight": 0.6921153697505151, + "out_head.weight": 9.920257520136026, + "out_head.bias": 0.6756283909065531 + } + }, + "fa": { + "log": { + "train_loss": [ + 2.054712998123169, + 1.9743254754638673, + 1.9496866632080079, + 1.9313411603164672, + 1.916562248878479, + 1.9087221280288695, + 1.8928792087173463, + 1.8860099752426147, + 1.8823522333526612, + 1.8718870676040649, + 1.8658955471038818, + 1.8600263873672485, + 1.861494415512085, + 1.8535768915176392, + 1.848890178527832, + 1.8419178400421143, + 1.843526672897339, + 1.8426634002685547, + 1.836815809020996, + 1.8399362145233153, + 1.8320682555770873, + 1.8253674346923827, + 1.8266462970733643, + 1.8268505213165283, + 1.819463009338379, + 1.815872594833374, + 1.8166155709075928, + 1.8089701477432252, + 1.8076338037490844, + 1.8046819745635987, + 1.8099775692749023, + 1.804258670349121, + 1.800314033203125, + 1.802857616043091, + 1.795948698425293, + 1.7952545282745362, + 1.7960923547363281, + 1.7939829620361327, + 1.7962147301483153, + 1.7921875751113892, + 1.7896775894927979, + 1.7867036238861085, + 1.7867207586669922, + 1.7872415407562257, + 1.782658879699707, + 1.780779080429077, + 1.7818151058959961, + 1.7821691263580322, + 1.7803733910369872, + 1.7798878842163086, + 1.7819366843032838, + 1.779062672958374, + 1.7781674340438842, + 1.7732916201019286, + 1.7719713204574585, + 1.7729518437957763, + 1.7732273389053346, + 1.7703840225982665, + 1.768649151878357, + 1.7698098583221435, + 1.7702393863296508, + 1.7676599294662476, + 1.7648498352813722, + 1.767660079307556, + 1.7671956900024415, + 1.7631733521270752, + 1.7601489026260375, + 1.7607763108062744, + 1.7581446547698976, + 1.7573072035980224, + 1.7607083935546874, + 1.7579230298233033, + 1.7577186632537842, + 1.7563656659317017, + 1.7547048263931275, + 1.7598694692230226, + 1.757342479248047, + 1.7562967572021484, + 1.7538646789169312, + 1.7546308361816407, + 1.753781385269165, + 1.7569293072128296, + 1.7535972839736937, + 1.756253225708008, + 1.752462748413086, + 1.750845112876892, + 1.7530096230316161, + 1.753320961074829, + 1.751194098739624, + 1.7476333127593995, + 1.7507887158966065, + 1.75599822265625, + 1.7483607616424561, + 1.750292029685974, + 1.7498931618881226, + 1.7509470245742798, + 1.7501217209243773, + 1.7520614090728759, + 1.750200431213379, + 1.7495508447265624 + ], + "train_acc": [ + 0.23466, + 0.27712, + 0.2872, + 0.29944, + 0.30162, + 0.30732, + 0.31528, + 0.31818, + 0.31696, + 0.32156, + 0.32474, + 0.32968, + 0.32702, + 0.33288, + 0.33656, + 0.33372, + 0.33702, + 0.33554, + 0.33892, + 0.33848, + 0.34342, + 0.34664, + 0.34692, + 0.34766, + 0.34448, + 0.3486, + 0.346, + 0.35034, + 0.35144, + 0.35384, + 0.35184, + 0.35544, + 0.35718, + 0.35464, + 0.35864, + 0.358, + 0.35758, + 0.35774, + 0.35722, + 0.36054, + 0.35958, + 0.36134, + 0.36046, + 0.3609, + 0.3655, + 0.36438, + 0.36688, + 0.36438, + 0.36392, + 0.36454, + 0.36186, + 0.36844, + 0.36298, + 0.3657, + 0.36936, + 0.3674, + 0.36656, + 0.36678, + 0.3693, + 0.36684, + 0.36768, + 0.36934, + 0.36892, + 0.37064, + 0.37, + 0.3704, + 0.37166, + 0.37122, + 0.3729, + 0.37234, + 0.37368, + 0.37366, + 0.37094, + 0.3736, + 0.3747, + 0.37176, + 0.37262, + 0.37366, + 0.374, + 0.37594, + 0.37354, + 0.374, + 0.37318, + 0.3751, + 0.375, + 0.3755, + 0.3763, + 0.3747, + 0.37738, + 0.3774, + 0.37554, + 0.37554, + 0.37726, + 0.37876, + 0.3747, + 0.37554, + 0.37696, + 0.37494, + 0.37724, + 0.37714 + ], + "test_acc": [ + 0.2763, + 0.3104, + 0.3093, + 0.3052, + 0.3209, + 0.3254, + 0.3374, + 0.3255, + 0.3509, + 0.3324, + 0.3485, + 0.3621, + 0.3572, + 0.3522, + 0.3638, + 0.3522, + 0.3562, + 0.3678, + 0.3662, + 0.3637, + 0.3713, + 0.372, + 0.3708, + 0.3802, + 0.3793, + 0.3859, + 0.3685, + 0.3665, + 0.3765, + 0.3782, + 0.3917, + 0.3905, + 0.3833, + 0.3867, + 0.3788, + 0.3843, + 0.3776, + 0.3794, + 0.3847, + 0.383, + 0.3929, + 0.392, + 0.392, + 0.3834, + 0.3857, + 0.3926, + 0.3914, + 0.387, + 0.3899, + 0.3859, + 0.3952, + 0.3883, + 0.3936, + 0.3901, + 0.3938, + 0.3899, + 0.3887, + 0.3938, + 0.3882, + 0.395, + 0.3878, + 0.3907, + 0.3952, + 0.3954, + 0.393, + 0.3931, + 0.3952, + 0.3999, + 0.3926, + 0.3963, + 0.3966, + 0.3973, + 0.3991, + 0.4007, + 0.3957, + 0.399, + 0.3988, + 0.3977, + 0.3955, + 0.395, + 0.3975, + 0.3971, + 0.3999, + 0.4003, + 0.3973, + 0.3988, + 0.3966, + 0.3972, + 0.4, + 0.3995, + 0.4008, + 0.3991, + 0.3981, + 0.3983, + 0.4006, + 0.3998, + 0.4001, + 0.4001, + 0.3992, + 0.3992 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.009833792224526405, + 0.05025029927492142, + 0.011137298308312893, + -0.039807502180337906, + -0.012322505004703999, + -0.023192159831523895, + 0.01924338936805725, + 0.9853411912918091 + ], + "perturbation_rho": [ + 0.009103747084736824, + -0.019891876727342606, + -0.013953445479273796, + -0.03727542236447334, + 0.01967245526611805, + 0.006721112877130508, + 0.013294404372572899, + 0.0555168092250824 + ], + "nudging": { + "0.001": [ + -6.459013093262911e-07, + -2.127781044691801e-07, + -3.3585820347070694e-08, + 5.916808731853962e-08, + 5.3318217396736145e-08, + 1.6763806343078613e-08, + 2.9802322387695312e-08, + -1.0665098670870066e-06 + ], + "0.003": [ + -1.868669642135501e-06, + -6.879272405058146e-07, + -5.1280949264764786e-08, + 2.151064109057188e-07, + 3.4226104617118835e-08, + 5.404581315815449e-08, + -1.3181124813854694e-07, + -3.7869031075388193e-06 + ], + "0.01": [ + -6.252725142985582e-06, + -2.3539469111710787e-06, + -1.600128598511219e-07, + 5.455221980810165e-07, + 1.2578675523400307e-07, + 2.0262086763978004e-07, + -3.8879807107150555e-07, + -1.3263634173199534e-05 + ] + }, + "hidden_norms_per_layer": [ + 9282.8193359375, + 113090.3984375, + 1271631.0, + 1386270.5, + 1726514.25, + 1932736.5, + 2234588.0, + 2239280.5, + 1909485.875 + ], + "bp_grad_norms_per_layer": [ + 2.4564533305238e-05, + 1.5358415339505882e-06, + 5.894418677598878e-07, + 5.805990781482251e-07, + 5.811043592984788e-07, + 5.801387032988714e-07, + 5.800572466796439e-07, + 5.741365498579398e-07, + 5.41225858796679e-07 + ] + }, + "drift": { + "embed.weight": 57.80682284068776, + "embed.bias": 15.88198491033531, + "blocks.0.ln.weight": 1.158570355271808, + "blocks.0.w1.weight": 17.652543030292062, + "blocks.0.w1.bias": 13.648960422025777, + "blocks.0.w2.weight": 62.30914257814011, + "blocks.1.ln.weight": 0.9953869430578365, + "blocks.1.w1.weight": 25.201451590168976, + "blocks.1.w1.bias": 21.042828750553948, + "blocks.1.w2.weight": 46.84933855533669, + "blocks.2.ln.weight": 0.5242024488499516, + "blocks.2.w1.weight": 18.18564601499141, + "blocks.2.w1.bias": 16.795126625795433, + "blocks.2.w2.weight": 37.67242795528057, + "blocks.3.ln.weight": 0.6028075449266447, + "blocks.3.w1.weight": 22.217273085841715, + "blocks.3.w1.bias": 23.55749047158996, + "blocks.3.w2.weight": 31.067562938269838, + "blocks.4.ln.weight": 0.5882814647513678, + "blocks.4.w1.weight": 19.789776992865228, + "blocks.4.w1.bias": 19.617707942323612, + "blocks.4.w2.weight": 40.08965442413366, + "blocks.5.ln.weight": 0.6533636599582089, + "blocks.5.w1.weight": 22.778934917253743, + "blocks.5.w1.bias": 23.182695560236425, + "blocks.5.w2.weight": 28.95545561908214, + "blocks.6.ln.weight": 0.5059341920781962, + "blocks.6.w1.weight": 15.068172940394142, + "blocks.6.w1.bias": 10.870554507643952, + "blocks.6.w2.weight": 64.80351856105872, + "blocks.7.ln.weight": 0.5640754334192314, + "blocks.7.w1.weight": 17.02027959939073, + "blocks.7.w1.bias": 12.619662219928584, + "blocks.7.w2.weight": 75.96627478247885, + "out_ln.weight": 0.44409435398278874, + "out_head.weight": 7.673916065937935, + "out_head.bias": 0.9250628284434071 + } + } + }, + "config": { + "dataset": "cifar10", + "d_hidden": 512, + "num_blocks": 8, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 9 + ], + "gpu": 0, + "output_dir": "results/fa_dfa_d512_L8_seed9", + "methods": [ + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 10 + } +}
\ No newline at end of file |
