summaryrefslogtreecommitdiff
path: root/results
diff options
context:
space:
mode:
authorYurenHao0426 <Blackhao0426@gmail.com>2026-04-26 18:25:50 -0500
committerYurenHao0426 <Blackhao0426@gmail.com>2026-04-26 18:25:50 -0500
commit827c658fa9a750f3c6ebdb87703762f10f69f6ff (patch)
tree58fc01d936fe1dc40b1dd580d50342deede33a5f /results
parentc4bd8d321b3cb2e77a6727ecfc2c2ae999065c4a (diff)
d=512 deep scan complete: FA+DFA at L=6,8,12 (10 seeds each)
FA is depth-invariant at ~0.41 for L>=4, never below frozen 0.349. Only L=2 has enough variance (σ=0.027) for 3/10 seeds to qualify. Deeper L does not produce the "both FA and DFA fail" panel. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat (limited to 'results')
-rw-r--r--results/fa_dfa_d512_L12_seed0/results_cifar10.json969
-rw-r--r--results/fa_dfa_d512_L12_seed1/results_cifar10.json969
-rw-r--r--results/fa_dfa_d512_L12_seed2/results_cifar10.json969
-rw-r--r--results/fa_dfa_d512_L12_seed3/results_cifar10.json969
-rw-r--r--results/fa_dfa_d512_L12_seed4/results_cifar10.json969
-rw-r--r--results/fa_dfa_d512_L12_seed5/results_cifar10.json969
-rw-r--r--results/fa_dfa_d512_L12_seed6/results_cifar10.json969
-rw-r--r--results/fa_dfa_d512_L12_seed7/results_cifar10.json969
-rw-r--r--results/fa_dfa_d512_L12_seed8/results_cifar10.json969
-rw-r--r--results/fa_dfa_d512_L12_seed9/results_cifar10.json969
-rw-r--r--results/fa_dfa_d512_L6_seed0/results_cifar10.json837
-rw-r--r--results/fa_dfa_d512_L6_seed1/results_cifar10.json837
-rw-r--r--results/fa_dfa_d512_L6_seed2/results_cifar10.json837
-rw-r--r--results/fa_dfa_d512_L6_seed3/results_cifar10.json837
-rw-r--r--results/fa_dfa_d512_L6_seed4/results_cifar10.json837
-rw-r--r--results/fa_dfa_d512_L6_seed5/results_cifar10.json837
-rw-r--r--results/fa_dfa_d512_L6_seed6/results_cifar10.json837
-rw-r--r--results/fa_dfa_d512_L6_seed7/results_cifar10.json837
-rw-r--r--results/fa_dfa_d512_L6_seed8/results_cifar10.json837
-rw-r--r--results/fa_dfa_d512_L6_seed9/results_cifar10.json837
-rw-r--r--results/fa_dfa_d512_L8_seed0/results_cifar10.json881
-rw-r--r--results/fa_dfa_d512_L8_seed1/results_cifar10.json881
-rw-r--r--results/fa_dfa_d512_L8_seed2/results_cifar10.json881
-rw-r--r--results/fa_dfa_d512_L8_seed3/results_cifar10.json881
-rw-r--r--results/fa_dfa_d512_L8_seed4/results_cifar10.json881
-rw-r--r--results/fa_dfa_d512_L8_seed5/results_cifar10.json881
-rw-r--r--results/fa_dfa_d512_L8_seed6/results_cifar10.json881
-rw-r--r--results/fa_dfa_d512_L8_seed7/results_cifar10.json881
-rw-r--r--results/fa_dfa_d512_L8_seed8/results_cifar10.json881
-rw-r--r--results/fa_dfa_d512_L8_seed9/results_cifar10.json881
30 files changed, 26870 insertions, 0 deletions
diff --git a/results/fa_dfa_d512_L12_seed0/results_cifar10.json b/results/fa_dfa_d512_L12_seed0/results_cifar10.json
new file mode 100644
index 0000000..0938122
--- /dev/null
+++ b/results/fa_dfa_d512_L12_seed0/results_cifar10.json
@@ -0,0 +1,969 @@
+{
+ "0": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0727519187164307,
+ 2.0449732803344727,
+ 2.0343947634887694,
+ 2.0317279723739623,
+ 2.029190194129944,
+ 2.0266742729187013,
+ 2.022695488433838,
+ 2.0220756021881106,
+ 2.017278121871948,
+ 2.013867749862671,
+ 2.0141937175750733,
+ 2.0114837906265257,
+ 2.0090159717559812,
+ 2.006871261138916,
+ 2.0085684817504883,
+ 2.004453958129883,
+ 2.0050127017211916,
+ 2.004625142745972,
+ 2.001847840270996,
+ 2.0038710110855105,
+ 2.0015532569885255,
+ 2.001803522415161,
+ 2.0029406197357176,
+ 1.9984209733581544,
+ 1.9999778453063966,
+ 1.997875029144287,
+ 2.002456622467041,
+ 2.000510860939026,
+ 1.9972556433105468,
+ 1.9980127339935303,
+ 1.9964525595092772,
+ 1.9945909706878662,
+ 1.994441053237915,
+ 1.9951845182037353,
+ 1.995466745147705,
+ 1.992624706878662,
+ 1.9937170362091063,
+ 1.9947755416870117,
+ 1.9944197441101075,
+ 1.9952830042266845,
+ 1.9904139096832276,
+ 1.991399830932617,
+ 1.9951595304870606,
+ 1.9930326363754272,
+ 1.9904268531799316,
+ 1.9908876788330079,
+ 1.9936137868499755,
+ 1.9875998815917968,
+ 1.992842328338623,
+ 1.990330719833374,
+ 1.9921186029815674,
+ 1.9902390911102295,
+ 1.9888944667053223,
+ 1.9905146474456787,
+ 1.9888609869384766,
+ 1.9911943308258058,
+ 1.9893802894592285,
+ 1.9878305652618409,
+ 1.9877847812652587,
+ 1.9883021939849854,
+ 1.988304048690796,
+ 1.9873467050170899,
+ 1.9866977008438111,
+ 1.9872942990875244,
+ 1.9863059562683105,
+ 1.9860430270004272,
+ 1.9863528005981446,
+ 1.9857840370941162,
+ 1.9861036871337892,
+ 1.985937850112915,
+ 1.9876243210601807,
+ 1.9859837552642823,
+ 1.9863339701080323,
+ 1.9869920307922364,
+ 1.9855867569732666,
+ 1.984609903526306,
+ 1.9867486194229127,
+ 1.9843175145721434,
+ 1.9860691648101807,
+ 1.9842630282592773,
+ 1.986168493347168,
+ 1.9858933181762695,
+ 1.98474868850708,
+ 1.9866244787979126,
+ 1.9833790422058106,
+ 1.9852294052505493,
+ 1.9821201675033568,
+ 1.9854196563339233,
+ 1.9846102389526368,
+ 1.9820537503433227,
+ 1.981782052307129,
+ 1.984344421157837,
+ 1.983909532699585,
+ 1.9853303438949585,
+ 1.984236333694458,
+ 1.9829996084594728,
+ 1.984313102684021,
+ 1.9833407458496093,
+ 1.983147784729004,
+ 1.9833076025390626
+ ],
+ "train_acc": [
+ 0.2332,
+ 0.24532,
+ 0.2542,
+ 0.2564,
+ 0.25326,
+ 0.25504,
+ 0.25822,
+ 0.26022,
+ 0.26324,
+ 0.26234,
+ 0.26278,
+ 0.26454,
+ 0.26348,
+ 0.26876,
+ 0.26432,
+ 0.26712,
+ 0.268,
+ 0.26852,
+ 0.27126,
+ 0.26796,
+ 0.2701,
+ 0.27052,
+ 0.26954,
+ 0.27056,
+ 0.27256,
+ 0.27258,
+ 0.26872,
+ 0.27102,
+ 0.2732,
+ 0.27298,
+ 0.27452,
+ 0.27374,
+ 0.27488,
+ 0.273,
+ 0.27604,
+ 0.2756,
+ 0.27576,
+ 0.27472,
+ 0.27344,
+ 0.27474,
+ 0.27776,
+ 0.27826,
+ 0.27318,
+ 0.2762,
+ 0.27666,
+ 0.27602,
+ 0.2763,
+ 0.2777,
+ 0.27724,
+ 0.27716,
+ 0.27566,
+ 0.27832,
+ 0.28138,
+ 0.27918,
+ 0.27922,
+ 0.27668,
+ 0.27846,
+ 0.27786,
+ 0.27796,
+ 0.2762,
+ 0.27946,
+ 0.27984,
+ 0.2795,
+ 0.27926,
+ 0.28092,
+ 0.27924,
+ 0.27978,
+ 0.28098,
+ 0.28132,
+ 0.2802,
+ 0.27814,
+ 0.28184,
+ 0.28178,
+ 0.28284,
+ 0.28128,
+ 0.2829,
+ 0.28048,
+ 0.28194,
+ 0.28236,
+ 0.2801,
+ 0.28106,
+ 0.28278,
+ 0.28308,
+ 0.27886,
+ 0.2815,
+ 0.28188,
+ 0.28254,
+ 0.28202,
+ 0.28158,
+ 0.28336,
+ 0.2825,
+ 0.28316,
+ 0.28312,
+ 0.28218,
+ 0.28288,
+ 0.2816,
+ 0.28218,
+ 0.2807,
+ 0.2835,
+ 0.28354
+ ],
+ "test_acc": [
+ 0.2507,
+ 0.2596,
+ 0.274,
+ 0.2704,
+ 0.2841,
+ 0.2803,
+ 0.2788,
+ 0.2781,
+ 0.303,
+ 0.2941,
+ 0.2905,
+ 0.3016,
+ 0.2779,
+ 0.3052,
+ 0.2819,
+ 0.2911,
+ 0.2931,
+ 0.2886,
+ 0.2812,
+ 0.2924,
+ 0.2947,
+ 0.2701,
+ 0.28,
+ 0.2912,
+ 0.3037,
+ 0.2894,
+ 0.3012,
+ 0.3011,
+ 0.3056,
+ 0.2981,
+ 0.2923,
+ 0.2961,
+ 0.3082,
+ 0.3035,
+ 0.2868,
+ 0.2981,
+ 0.3042,
+ 0.2956,
+ 0.3012,
+ 0.296,
+ 0.3077,
+ 0.3082,
+ 0.3,
+ 0.3141,
+ 0.2967,
+ 0.3024,
+ 0.3004,
+ 0.3016,
+ 0.308,
+ 0.3028,
+ 0.3072,
+ 0.2933,
+ 0.2907,
+ 0.3126,
+ 0.3036,
+ 0.2973,
+ 0.3001,
+ 0.3054,
+ 0.3115,
+ 0.3004,
+ 0.3026,
+ 0.3022,
+ 0.3051,
+ 0.3109,
+ 0.2968,
+ 0.3092,
+ 0.2998,
+ 0.3062,
+ 0.3081,
+ 0.3035,
+ 0.3032,
+ 0.3114,
+ 0.3147,
+ 0.308,
+ 0.3035,
+ 0.3098,
+ 0.3091,
+ 0.3065,
+ 0.3094,
+ 0.3071,
+ 0.3072,
+ 0.309,
+ 0.3012,
+ 0.3067,
+ 0.3016,
+ 0.3028,
+ 0.3048,
+ 0.3083,
+ 0.3082,
+ 0.308,
+ 0.3097,
+ 0.3087,
+ 0.3117,
+ 0.3104,
+ 0.3086,
+ 0.3085,
+ 0.3088,
+ 0.3084,
+ 0.3085,
+ 0.3086
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.395052969455719,
+ 0.0004171400796622038,
+ 8.336821338161826e-06,
+ 0.0005622187163680792,
+ 0.00013701531861443073,
+ -0.00035643568844534457,
+ 0.00012600264744833112,
+ -0.00027335749473422766,
+ 0.00016040689661167562,
+ -6.0928698076168075e-05,
+ 3.278384974692017e-05,
+ 0.0003701794194057584
+ ],
+ "perturbation_rho": [
+ 0.018501652404665947,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -4.1211023926734924e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.2032687664031982e-06,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 3.725290298461914e-09,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -4.2067840695381165e-06,
+ -9.313225746154785e-10,
+ -2.7939677238464355e-09,
+ 9.313225746154785e-10,
+ 0.0,
+ 4.6566128730773926e-09,
+ 1.862645149230957e-09,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 52639.97265625,
+ 1088866944.0,
+ 4048803840.0,
+ 5257923584.0,
+ 5847087616.0,
+ 5938909184.0,
+ 7423066624.0,
+ 9731870720.0,
+ 11342387200.0,
+ 11317005312.0,
+ 11353008128.0,
+ 12463034368.0,
+ 13189959680.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.804916334753216e-07,
+ 1.9205773083008637e-10,
+ 1.9170887100017353e-10,
+ 1.9189858035950635e-10,
+ 1.919657766080718e-10,
+ 1.919622516499686e-10,
+ 1.919622516499686e-10,
+ 1.9191119526862366e-10,
+ 1.9192891720365424e-10,
+ 1.919416292572862e-10,
+ 1.9193641120907046e-10,
+ 1.9192181177629664e-10,
+ 1.9190903033372564e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 330.26007887921685,
+ "embed.bias": 274.2168771279626,
+ "blocks.0.ln.weight": 10.94628999409354,
+ "blocks.0.w1.weight": 277.3643033410537,
+ "blocks.0.w1.bias": 261.86497048407,
+ "blocks.0.w2.weight": 484.6109947045105,
+ "blocks.1.ln.weight": 9.35886499546542,
+ "blocks.1.w1.weight": 360.26286845371646,
+ "blocks.1.w1.bias": 355.66126981713177,
+ "blocks.1.w2.weight": 392.1776082515924,
+ "blocks.2.ln.weight": 9.350239389888953,
+ "blocks.2.w1.weight": 370.0336198354507,
+ "blocks.2.w1.bias": 337.8932267522991,
+ "blocks.2.w2.weight": 352.1258021274022,
+ "blocks.3.ln.weight": 9.767929501471052,
+ "blocks.3.w1.weight": 331.5909899943081,
+ "blocks.3.w1.bias": 303.3604214345198,
+ "blocks.3.w2.weight": 312.3336342294667,
+ "blocks.4.ln.weight": 6.8894143383412505,
+ "blocks.4.w1.weight": 245.69582220582208,
+ "blocks.4.w1.bias": 226.6077326792198,
+ "blocks.4.w2.weight": 237.1578303477898,
+ "blocks.5.ln.weight": 10.087708773364,
+ "blocks.5.w1.weight": 391.6451916277919,
+ "blocks.5.w1.bias": 364.5077034653599,
+ "blocks.5.w2.weight": 375.80268056062386,
+ "blocks.6.ln.weight": 10.894168165759849,
+ "blocks.6.w1.weight": 446.1083385424109,
+ "blocks.6.w1.bias": 408.24079100200436,
+ "blocks.6.w2.weight": 421.6066446657385,
+ "blocks.7.ln.weight": 10.280446652077675,
+ "blocks.7.w1.weight": 417.5786268538863,
+ "blocks.7.w1.bias": 412.3650615213133,
+ "blocks.7.w2.weight": 413.1057523107626,
+ "blocks.8.ln.weight": 7.628061136196448,
+ "blocks.8.w1.weight": 280.87597195145304,
+ "blocks.8.w1.bias": 261.4439841235217,
+ "blocks.8.w2.weight": 269.85556299140023,
+ "blocks.9.ln.weight": 7.035433277618009,
+ "blocks.9.w1.weight": 236.54101277402214,
+ "blocks.9.w1.bias": 227.70919757896647,
+ "blocks.9.w2.weight": 225.58244999977697,
+ "blocks.10.ln.weight": 10.07787136157841,
+ "blocks.10.w1.weight": 405.6014059372381,
+ "blocks.10.w1.bias": 387.7928293241623,
+ "blocks.10.w2.weight": 391.3899843395933,
+ "blocks.11.ln.weight": 9.615189319853535,
+ "blocks.11.w1.weight": 379.5363902874608,
+ "blocks.11.w1.bias": 358.37006314701347,
+ "blocks.11.w2.weight": 355.59941733335387,
+ "out_ln.weight": 0.6824993975107903,
+ "out_head.weight": 9.861581662622491,
+ "out_head.bias": 0.35061659338833756
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0293911264038087,
+ 1.950565330429077,
+ 1.917781063232422,
+ 1.9023740824127198,
+ 1.8861673935699463,
+ 1.8825682291412353,
+ 1.873201879234314,
+ 1.8714005821990967,
+ 1.8628303081512452,
+ 1.8579199737548828,
+ 1.8572168459320069,
+ 1.8525300827789306,
+ 1.845312434463501,
+ 1.8409330677032472,
+ 1.8335270806121826,
+ 1.822513469543457,
+ 1.8199686001205444,
+ 1.8136085485458373,
+ 1.8064511907196046,
+ 1.803108533859253,
+ 1.7962512873077392,
+ 1.7911701647949219,
+ 1.7911696646118165,
+ 1.7887402313995362,
+ 1.783379939842224,
+ 1.7759505123901367,
+ 1.780573166732788,
+ 1.7734921991729737,
+ 1.7670989339828491,
+ 1.7612831104278563,
+ 1.7572847888183594,
+ 1.753526630859375,
+ 1.750715301132202,
+ 1.7472047467041016,
+ 1.7457971502304077,
+ 1.7424137032699585,
+ 1.7345080102920531,
+ 1.734222209701538,
+ 1.7304457287216186,
+ 1.7239187670898437,
+ 1.7218291164398194,
+ 1.7199537884902953,
+ 1.7217532263565063,
+ 1.7196246533584594,
+ 1.7135296297836304,
+ 1.706606083908081,
+ 1.7079890319824218,
+ 1.7041183599472045,
+ 1.705560855026245,
+ 1.7011269199371337,
+ 1.7035633499908447,
+ 1.6992430791854858,
+ 1.6950508019638062,
+ 1.7004030841064453,
+ 1.691479129295349,
+ 1.691718464050293,
+ 1.6919280102157592,
+ 1.6887580330657959,
+ 1.6883217154693604,
+ 1.688851436805725,
+ 1.6853341622543334,
+ 1.680581435546875,
+ 1.678009653892517,
+ 1.6784401907730102,
+ 1.6803946784210204,
+ 1.6761323783493043,
+ 1.676516137046814,
+ 1.6778211946868897,
+ 1.6767865496444703,
+ 1.6731242126083374,
+ 1.675690205078125,
+ 1.669817829246521,
+ 1.6760586986923218,
+ 1.670416958580017,
+ 1.6687565314483643,
+ 1.668258262901306,
+ 1.6666069919204711,
+ 1.666242490005493,
+ 1.6675299571990967,
+ 1.663490775527954,
+ 1.6672910994720458,
+ 1.6639445114517213,
+ 1.6617960903167726,
+ 1.6636806215667725,
+ 1.6643552365112304,
+ 1.6602695792007447,
+ 1.6589048775100708,
+ 1.6614284314727783,
+ 1.6610555962753295,
+ 1.6578762405014038,
+ 1.6580958423614502,
+ 1.6621724097061157,
+ 1.6586555422210694,
+ 1.657684158668518,
+ 1.6587366760253905,
+ 1.657404851913452,
+ 1.6599628324127198,
+ 1.6565218897247314,
+ 1.6586303088760377,
+ 1.6615223189544677
+ ],
+ "train_acc": [
+ 0.2526,
+ 0.29054,
+ 0.30538,
+ 0.31188,
+ 0.31418,
+ 0.31852,
+ 0.32254,
+ 0.3243,
+ 0.32728,
+ 0.33104,
+ 0.33096,
+ 0.33338,
+ 0.33726,
+ 0.33962,
+ 0.3428,
+ 0.34614,
+ 0.34728,
+ 0.34656,
+ 0.35102,
+ 0.35328,
+ 0.35598,
+ 0.35864,
+ 0.35488,
+ 0.35678,
+ 0.35954,
+ 0.36188,
+ 0.36066,
+ 0.36522,
+ 0.36488,
+ 0.36676,
+ 0.36986,
+ 0.3702,
+ 0.3739,
+ 0.37346,
+ 0.37434,
+ 0.37574,
+ 0.3752,
+ 0.3788,
+ 0.37904,
+ 0.38092,
+ 0.38054,
+ 0.38164,
+ 0.37934,
+ 0.38172,
+ 0.38678,
+ 0.3875,
+ 0.38734,
+ 0.38996,
+ 0.38752,
+ 0.39106,
+ 0.38978,
+ 0.39084,
+ 0.39086,
+ 0.39326,
+ 0.39296,
+ 0.39256,
+ 0.39342,
+ 0.39414,
+ 0.39416,
+ 0.39432,
+ 0.39614,
+ 0.39536,
+ 0.3965,
+ 0.39922,
+ 0.39722,
+ 0.39968,
+ 0.3982,
+ 0.39886,
+ 0.40146,
+ 0.4016,
+ 0.40122,
+ 0.4017,
+ 0.39858,
+ 0.40386,
+ 0.40168,
+ 0.4028,
+ 0.4026,
+ 0.4035,
+ 0.40378,
+ 0.40566,
+ 0.4031,
+ 0.4049,
+ 0.40506,
+ 0.4035,
+ 0.40618,
+ 0.40566,
+ 0.406,
+ 0.40476,
+ 0.40564,
+ 0.40724,
+ 0.40646,
+ 0.4061,
+ 0.4079,
+ 0.40568,
+ 0.40498,
+ 0.40842,
+ 0.40674,
+ 0.40746,
+ 0.40728,
+ 0.40684
+ ],
+ "test_acc": [
+ 0.2924,
+ 0.3363,
+ 0.3349,
+ 0.3386,
+ 0.347,
+ 0.3457,
+ 0.3423,
+ 0.3555,
+ 0.3653,
+ 0.3606,
+ 0.3632,
+ 0.3617,
+ 0.3575,
+ 0.3723,
+ 0.3703,
+ 0.3715,
+ 0.3741,
+ 0.3774,
+ 0.374,
+ 0.3745,
+ 0.3836,
+ 0.3907,
+ 0.3702,
+ 0.3831,
+ 0.385,
+ 0.3843,
+ 0.3922,
+ 0.4003,
+ 0.3967,
+ 0.3978,
+ 0.3897,
+ 0.3828,
+ 0.401,
+ 0.4012,
+ 0.3991,
+ 0.3999,
+ 0.4055,
+ 0.4085,
+ 0.3968,
+ 0.4064,
+ 0.4066,
+ 0.4082,
+ 0.404,
+ 0.4042,
+ 0.4085,
+ 0.4091,
+ 0.4104,
+ 0.4155,
+ 0.4157,
+ 0.4125,
+ 0.4139,
+ 0.4166,
+ 0.4116,
+ 0.4187,
+ 0.4186,
+ 0.4123,
+ 0.4161,
+ 0.4171,
+ 0.4184,
+ 0.414,
+ 0.4179,
+ 0.4184,
+ 0.4197,
+ 0.4193,
+ 0.421,
+ 0.4205,
+ 0.423,
+ 0.4233,
+ 0.4201,
+ 0.422,
+ 0.4204,
+ 0.4151,
+ 0.4182,
+ 0.4218,
+ 0.4239,
+ 0.4198,
+ 0.425,
+ 0.4239,
+ 0.4204,
+ 0.4215,
+ 0.4253,
+ 0.4199,
+ 0.4235,
+ 0.4206,
+ 0.4235,
+ 0.4255,
+ 0.4222,
+ 0.423,
+ 0.4216,
+ 0.426,
+ 0.4245,
+ 0.4245,
+ 0.4239,
+ 0.4248,
+ 0.4224,
+ 0.4245,
+ 0.4246,
+ 0.4241,
+ 0.4242,
+ 0.4239
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.005966864060610533,
+ 0.04795818775892258,
+ 0.043208200484514236,
+ -0.05298762395977974,
+ -0.11785908043384552,
+ -0.05540306121110916,
+ -0.10012105107307434,
+ -0.09065192937850952,
+ -0.04056818038225174,
+ -0.03824863210320473,
+ -0.07007172703742981,
+ 0.9948829412460327
+ ],
+ "perturbation_rho": [
+ 0.028052061796188354,
+ 0.03165658190846443,
+ -0.006041648797690868,
+ -0.027337782084941864,
+ 0.01562969572842121,
+ 0.01757640205323696,
+ -0.025423342362046242,
+ -0.024989785626530647,
+ -0.03021504357457161,
+ 0.021996641531586647,
+ 0.03194758668541908,
+ 0.01976931467652321
+ ],
+ "nudging": {
+ "0.001": [
+ 3.7101563066244125e-07,
+ -6.126938387751579e-07,
+ -3.066379576921463e-07,
+ 1.6076955944299698e-07,
+ 3.3061951398849487e-07,
+ 1.5937257558107376e-07,
+ 2.9441434890031815e-07,
+ 2.859160304069519e-07,
+ 1.284060999751091e-07,
+ 1.5040859580039978e-07,
+ 2.1443702280521393e-07,
+ -3.2152747735381126e-06
+ ],
+ "0.003": [
+ 1.007108949124813e-06,
+ -1.6828998923301697e-06,
+ -6.601912900805473e-07,
+ 4.4493936002254486e-07,
+ 1.1789379641413689e-06,
+ 5.08967787027359e-07,
+ 1.0151416063308716e-06,
+ 8.791685104370117e-07,
+ 3.866152837872505e-07,
+ 3.688037395477295e-07,
+ 6.126938387751579e-07,
+ -1.0434770956635475e-05
+ ],
+ "0.01": [
+ 3.6874553188681602e-06,
+ -5.368026904761791e-06,
+ -2.2364547476172447e-06,
+ 1.735752448439598e-06,
+ 3.992114216089249e-06,
+ 1.860782504081726e-06,
+ 3.5137636587023735e-06,
+ 3.010733053088188e-06,
+ 1.437612809240818e-06,
+ 1.2825476005673409e-06,
+ 2.246466465294361e-06,
+ -3.491528332233429e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 4702.6123046875,
+ 44530.10546875,
+ 108911.2109375,
+ 164933.125,
+ 283322.40625,
+ 377155.46875,
+ 407664.6875,
+ 517485.0625,
+ 562702.0625,
+ 564809.5,
+ 577993.1875,
+ 617885.5625,
+ 296847.03125
+ ],
+ "bp_grad_norms_per_layer": [
+ 4.941995575791225e-05,
+ 3.9530186768388376e-06,
+ 1.528447455712012e-06,
+ 1.3358264823182253e-06,
+ 1.3305660786500084e-06,
+ 1.3287758520164061e-06,
+ 1.3350502285902621e-06,
+ 1.339361574537179e-06,
+ 1.3375677099247696e-06,
+ 1.3184284171074978e-06,
+ 1.3012635236009373e-06,
+ 1.3201109823057777e-06,
+ 1.27659131976543e-06
+ ]
+ },
+ "drift": {
+ "embed.weight": 36.945692873367385,
+ "embed.bias": 13.300137685811876,
+ "blocks.0.ln.weight": 0.901147599699004,
+ "blocks.0.w1.weight": 13.367758537525685,
+ "blocks.0.w1.bias": 10.72336098907488,
+ "blocks.0.w2.weight": 46.304733049049524,
+ "blocks.1.ln.weight": 0.7424575473753766,
+ "blocks.1.w1.weight": 15.226852479991834,
+ "blocks.1.w1.bias": 7.345119575250527,
+ "blocks.1.w2.weight": 40.81729281725618,
+ "blocks.2.ln.weight": 0.6752024292095198,
+ "blocks.2.w1.weight": 13.877425620596014,
+ "blocks.2.w1.bias": 7.829951186600198,
+ "blocks.2.w2.weight": 39.52886223656965,
+ "blocks.3.ln.weight": 0.6072219216215275,
+ "blocks.3.w1.weight": 13.849387698880346,
+ "blocks.3.w1.bias": 11.302805784789012,
+ "blocks.3.w2.weight": 31.48991587682597,
+ "blocks.4.ln.weight": 0.3513996294280428,
+ "blocks.4.w1.weight": 12.017687634070212,
+ "blocks.4.w1.bias": 12.545759692163585,
+ "blocks.4.w2.weight": 19.78672753090035,
+ "blocks.5.ln.weight": 0.27340079470413037,
+ "blocks.5.w1.weight": 10.049144366681992,
+ "blocks.5.w1.bias": 9.943637751012846,
+ "blocks.5.w2.weight": 19.227980350358227,
+ "blocks.6.ln.weight": 0.3688735234588941,
+ "blocks.6.w1.weight": 12.460300257534458,
+ "blocks.6.w1.bias": 13.383277160534908,
+ "blocks.6.w2.weight": 19.760938996380943,
+ "blocks.7.ln.weight": 0.34359755221214905,
+ "blocks.7.w1.weight": 11.768674295896531,
+ "blocks.7.w1.bias": 13.150434923761187,
+ "blocks.7.w2.weight": 21.174060669259852,
+ "blocks.8.ln.weight": 0.3997077790355334,
+ "blocks.8.w1.weight": 11.48289450791943,
+ "blocks.8.w1.bias": 8.028224303872328,
+ "blocks.8.w2.weight": 35.011661428887535,
+ "blocks.9.ln.weight": 0.43894400900661307,
+ "blocks.9.w1.weight": 11.84935422288265,
+ "blocks.9.w1.bias": 8.899824293598352,
+ "blocks.9.w2.weight": 34.666647832790865,
+ "blocks.10.ln.weight": 0.4701858246153278,
+ "blocks.10.w1.weight": 12.779979416376127,
+ "blocks.10.w1.bias": 12.40660396279328,
+ "blocks.10.w2.weight": 30.770398182409856,
+ "blocks.11.ln.weight": 0.502840402610306,
+ "blocks.11.w1.weight": 15.51482213849637,
+ "blocks.11.w1.bias": 16.777463254010712,
+ "blocks.11.w2.weight": 27.32990799926724,
+ "out_ln.weight": 0.2952693196309993,
+ "out_head.weight": 4.4773532602382,
+ "out_head.bias": 1.8150038464128413
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 12,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 0
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L12_seed0",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L12_seed1/results_cifar10.json b/results/fa_dfa_d512_L12_seed1/results_cifar10.json
new file mode 100644
index 0000000..e317465
--- /dev/null
+++ b/results/fa_dfa_d512_L12_seed1/results_cifar10.json
@@ -0,0 +1,969 @@
+{
+ "1": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.08784130317688,
+ 2.052014544372559,
+ 2.045669429321289,
+ 2.0424993614196776,
+ 2.0406430601501464,
+ 2.038676989364624,
+ 2.0339627771759035,
+ 2.0320363275527953,
+ 2.0293006126403808,
+ 2.027781917953491,
+ 2.0250831903839113,
+ 2.0254856860351564,
+ 2.025074804534912,
+ 2.020526984100342,
+ 2.017961084213257,
+ 2.0189661613464356,
+ 2.0154323001098633,
+ 2.012715454673767,
+ 2.0142161911010743,
+ 2.0169401965713503,
+ 2.015820147628784,
+ 2.0139186489105225,
+ 2.0170457207489014,
+ 2.010938446311951,
+ 2.0119680180358888,
+ 2.011925876312256,
+ 2.0143484900665283,
+ 2.011023857269287,
+ 2.011966758041382,
+ 2.011277076072693,
+ 2.0121180039978026,
+ 2.011552553100586,
+ 2.0112545052337647,
+ 2.0096049158477785,
+ 2.0116290228271483,
+ 2.012332228317261,
+ 2.0100498531341553,
+ 2.013163764266968,
+ 2.0090835204696655,
+ 2.0087667613983156,
+ 2.008153441734314,
+ 2.008515095252991,
+ 2.0076544715118407,
+ 2.006217385635376,
+ 2.0079916049957274,
+ 2.0093279348754884,
+ 2.0085637417602538,
+ 2.0080473442077635,
+ 2.005413489227295,
+ 2.006327581214905,
+ 2.0087718325042725,
+ 2.005806240501404,
+ 2.0063090213012695,
+ 2.005670380554199,
+ 2.008137595367432,
+ 2.0058074869537355,
+ 2.00653946685791,
+ 2.008738963394165,
+ 2.005000798873901,
+ 2.005233299484253,
+ 2.0051020904159547,
+ 2.0054547385406494,
+ 2.006043081893921,
+ 2.0066007862091064,
+ 2.0075411237335206,
+ 2.004597886505127,
+ 2.004815984649658,
+ 2.0049288347625733,
+ 2.004783443069458,
+ 2.006772222366333,
+ 2.0067188888549805,
+ 2.005615498046875,
+ 2.0044576259613036,
+ 2.006375897064209,
+ 2.0036719734954835,
+ 2.0061926458740236,
+ 2.004457794265747,
+ 2.0043624324035645,
+ 2.00480925743103,
+ 2.0047112017822264,
+ 2.0034663817977907,
+ 2.002827211380005,
+ 2.003827619934082,
+ 2.0031036682891847,
+ 2.0055768743133546,
+ 2.002000348739624,
+ 2.0022059215545656,
+ 2.0022215761566162,
+ 2.0029819064331056,
+ 2.0014546299743654,
+ 2.00309593082428,
+ 2.0011430258178713,
+ 2.004017787246704,
+ 2.001931449203491,
+ 2.000619831314087,
+ 2.0033977560043335,
+ 2.0014762798690797,
+ 2.0025222287368774,
+ 2.003771315956116,
+ 2.0038832190704348
+ ],
+ "train_acc": [
+ 0.22474,
+ 0.23978,
+ 0.242,
+ 0.24386,
+ 0.2415,
+ 0.24352,
+ 0.24826,
+ 0.2474,
+ 0.25112,
+ 0.24912,
+ 0.25434,
+ 0.25094,
+ 0.25204,
+ 0.25484,
+ 0.2583,
+ 0.25824,
+ 0.2601,
+ 0.26048,
+ 0.261,
+ 0.25998,
+ 0.2624,
+ 0.2619,
+ 0.25918,
+ 0.26472,
+ 0.2644,
+ 0.26346,
+ 0.2648,
+ 0.26354,
+ 0.26368,
+ 0.26704,
+ 0.26128,
+ 0.26468,
+ 0.26524,
+ 0.26586,
+ 0.26558,
+ 0.26324,
+ 0.26776,
+ 0.2652,
+ 0.26808,
+ 0.26752,
+ 0.26614,
+ 0.26558,
+ 0.2692,
+ 0.27084,
+ 0.27106,
+ 0.26932,
+ 0.2685,
+ 0.26908,
+ 0.26978,
+ 0.27274,
+ 0.26924,
+ 0.27318,
+ 0.27128,
+ 0.27216,
+ 0.27004,
+ 0.27288,
+ 0.26872,
+ 0.27234,
+ 0.27316,
+ 0.27282,
+ 0.27218,
+ 0.2717,
+ 0.27198,
+ 0.27498,
+ 0.27174,
+ 0.27282,
+ 0.27204,
+ 0.27394,
+ 0.27538,
+ 0.26966,
+ 0.27104,
+ 0.27454,
+ 0.27422,
+ 0.27358,
+ 0.27468,
+ 0.27304,
+ 0.27244,
+ 0.27386,
+ 0.27408,
+ 0.27234,
+ 0.27666,
+ 0.2753,
+ 0.2739,
+ 0.27624,
+ 0.27434,
+ 0.27818,
+ 0.27466,
+ 0.27586,
+ 0.27504,
+ 0.27554,
+ 0.27526,
+ 0.27624,
+ 0.27486,
+ 0.27606,
+ 0.27568,
+ 0.27474,
+ 0.27578,
+ 0.27652,
+ 0.27508,
+ 0.27436
+ ],
+ "test_acc": [
+ 0.2444,
+ 0.2437,
+ 0.2469,
+ 0.2484,
+ 0.2807,
+ 0.266,
+ 0.2731,
+ 0.2735,
+ 0.2783,
+ 0.2609,
+ 0.2648,
+ 0.2716,
+ 0.2541,
+ 0.2846,
+ 0.2739,
+ 0.2748,
+ 0.28,
+ 0.2759,
+ 0.2345,
+ 0.2676,
+ 0.271,
+ 0.2659,
+ 0.2785,
+ 0.2728,
+ 0.2906,
+ 0.269,
+ 0.2846,
+ 0.2651,
+ 0.2937,
+ 0.2971,
+ 0.2925,
+ 0.2807,
+ 0.2778,
+ 0.2837,
+ 0.2793,
+ 0.292,
+ 0.2866,
+ 0.2911,
+ 0.2915,
+ 0.2833,
+ 0.2907,
+ 0.2971,
+ 0.2829,
+ 0.2891,
+ 0.2955,
+ 0.2991,
+ 0.295,
+ 0.2912,
+ 0.2697,
+ 0.2874,
+ 0.2741,
+ 0.2949,
+ 0.2836,
+ 0.2944,
+ 0.2934,
+ 0.2975,
+ 0.2873,
+ 0.2882,
+ 0.2889,
+ 0.2901,
+ 0.2789,
+ 0.2908,
+ 0.2953,
+ 0.2911,
+ 0.2987,
+ 0.2861,
+ 0.291,
+ 0.2942,
+ 0.2938,
+ 0.301,
+ 0.2972,
+ 0.2922,
+ 0.2872,
+ 0.2859,
+ 0.298,
+ 0.2942,
+ 0.2918,
+ 0.2964,
+ 0.2973,
+ 0.2944,
+ 0.2903,
+ 0.2885,
+ 0.2969,
+ 0.2897,
+ 0.291,
+ 0.294,
+ 0.2903,
+ 0.2931,
+ 0.2894,
+ 0.293,
+ 0.2936,
+ 0.2923,
+ 0.291,
+ 0.291,
+ 0.2933,
+ 0.293,
+ 0.2935,
+ 0.2933,
+ 0.2932,
+ 0.2932
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.3784182071685791,
+ 0.00024469412164762616,
+ -6.5529711719136685e-06,
+ -2.4012413632590324e-05,
+ -2.2034288122085854e-05,
+ 0.0002968982153106481,
+ -0.0007156325737014413,
+ 0.0002482909185346216,
+ -0.00023413923918269575,
+ -0.000343983992934227,
+ -0.0004118037468288094,
+ -0.0001832617272157222
+ ],
+ "perturbation_rho": [
+ 0.02268090471625328,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -2.682209014892578e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -8.391216397285461e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 9.313225746154785e-10,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -3.262888640165329e-06,
+ -2.7939677238464355e-09,
+ 0.0,
+ 0.0,
+ 9.313225746154785e-10,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 58511.79296875,
+ 1607470848.0,
+ 3539406336.0,
+ 4483452928.0,
+ 5316571136.0,
+ 6553798144.0,
+ 7388260352.0,
+ 9242703872.0,
+ 9554661376.0,
+ 11009835008.0,
+ 11375313920.0,
+ 12110455808.0,
+ 12125570048.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.394743603417737e-07,
+ 1.8629858766772145e-10,
+ 1.8620747999076315e-10,
+ 1.8607929086478237e-10,
+ 1.8619832065081e-10,
+ 1.8618356856237028e-10,
+ 1.862058007784384e-10,
+ 1.8604620621864854e-10,
+ 1.860521597896181e-10,
+ 1.860489817762101e-10,
+ 1.860557541366603e-10,
+ 1.860564341482629e-10,
+ 1.8607769491918447e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 354.85094069636483,
+ "embed.bias": 257.5504888266541,
+ "blocks.0.ln.weight": 10.020600874142511,
+ "blocks.0.w1.weight": 311.43953558716794,
+ "blocks.0.w1.bias": 277.95760174306895,
+ "blocks.0.w2.weight": 495.0366657957393,
+ "blocks.1.ln.weight": 9.233680086701126,
+ "blocks.1.w1.weight": 352.291334675127,
+ "blocks.1.w1.bias": 326.78546394466474,
+ "blocks.1.w2.weight": 328.5686664739385,
+ "blocks.2.ln.weight": 8.349584849320378,
+ "blocks.2.w1.weight": 347.4686920651646,
+ "blocks.2.w1.bias": 314.54485739427827,
+ "blocks.2.w2.weight": 322.05654919163993,
+ "blocks.3.ln.weight": 8.748114666066721,
+ "blocks.3.w1.weight": 341.6411014867908,
+ "blocks.3.w1.bias": 316.14847884372415,
+ "blocks.3.w2.weight": 332.52492325797556,
+ "blocks.4.ln.weight": 9.48080425966404,
+ "blocks.4.w1.weight": 385.71563399378635,
+ "blocks.4.w1.bias": 355.1208489434028,
+ "blocks.4.w2.weight": 353.2677244575998,
+ "blocks.5.ln.weight": 8.852085211151078,
+ "blocks.5.w1.weight": 359.24445324084536,
+ "blocks.5.w1.bias": 327.2053984662166,
+ "blocks.5.w2.weight": 334.59987322510256,
+ "blocks.6.ln.weight": 11.348583355888922,
+ "blocks.6.w1.weight": 445.597521308562,
+ "blocks.6.w1.bias": 417.10230766347513,
+ "blocks.6.w2.weight": 410.28435436855875,
+ "blocks.7.ln.weight": 8.64827114045918,
+ "blocks.7.w1.weight": 341.9936697385893,
+ "blocks.7.w1.bias": 328.17910031845435,
+ "blocks.7.w2.weight": 322.2651678366388,
+ "blocks.8.ln.weight": 10.71699832162817,
+ "blocks.8.w1.weight": 430.40137342022086,
+ "blocks.8.w1.bias": 395.45655885652957,
+ "blocks.8.w2.weight": 418.09527663040274,
+ "blocks.9.ln.weight": 7.689168672786446,
+ "blocks.9.w1.weight": 307.285121525094,
+ "blocks.9.w1.bias": 286.6467639719644,
+ "blocks.9.w2.weight": 284.90185966096766,
+ "blocks.10.ln.weight": 9.362189538932856,
+ "blocks.10.w1.weight": 374.1142646081974,
+ "blocks.10.w1.bias": 340.73445175290055,
+ "blocks.10.w2.weight": 334.3762368608616,
+ "blocks.11.ln.weight": 6.971496170157957,
+ "blocks.11.w1.weight": 273.28051237448034,
+ "blocks.11.w1.bias": 249.7041416371339,
+ "blocks.11.w2.weight": 250.47506193148064,
+ "out_ln.weight": 0.6766551046620063,
+ "out_head.weight": 9.660594953000444,
+ "out_head.bias": 0.6081606213236193
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.039813977584839,
+ 1.9617359282684326,
+ 1.9338554121398925,
+ 1.920875029220581,
+ 1.9130360520172118,
+ 1.9065295135116578,
+ 1.8998144156646728,
+ 1.893558436279297,
+ 1.8832812244033814,
+ 1.8782934536361695,
+ 1.8706854000091553,
+ 1.8697611987304688,
+ 1.8661894456481933,
+ 1.8597797629165649,
+ 1.8567528272247313,
+ 1.8511868425750733,
+ 1.8451919631195068,
+ 1.8397227505493163,
+ 1.834096513671875,
+ 1.8383717123413086,
+ 1.8340178047943114,
+ 1.828574577331543,
+ 1.8276408141326905,
+ 1.816076180152893,
+ 1.8154300131225587,
+ 1.8111229518890382,
+ 1.8086885763549805,
+ 1.8008410723114014,
+ 1.797977260093689,
+ 1.791234312095642,
+ 1.790951509628296,
+ 1.783079252281189,
+ 1.779609817199707,
+ 1.7747913509750366,
+ 1.7767339920043945,
+ 1.7752550736236572,
+ 1.7656426385879516,
+ 1.766551993637085,
+ 1.763684893760681,
+ 1.7585593047332764,
+ 1.7559137664413451,
+ 1.7532807390975953,
+ 1.75251891330719,
+ 1.749091358680725,
+ 1.7431825846099853,
+ 1.745732957458496,
+ 1.744267059288025,
+ 1.7419162277603149,
+ 1.738112122116089,
+ 1.7364409768676758,
+ 1.7408631985473633,
+ 1.7369310165405274,
+ 1.7362467727279662,
+ 1.7374004998016357,
+ 1.736051294517517,
+ 1.734863005027771,
+ 1.7319254892730713,
+ 1.7279368935775756,
+ 1.728024648475647,
+ 1.72793106716156,
+ 1.726857622718811,
+ 1.7304030743408203,
+ 1.725222360267639,
+ 1.7216177057647706,
+ 1.7253669234466553,
+ 1.7201292371368408,
+ 1.7183555184173585,
+ 1.7207783557891845,
+ 1.7150025174713135,
+ 1.7195079833221436,
+ 1.718960595779419,
+ 1.7189737926483155,
+ 1.7145323584365846,
+ 1.7151868480682373,
+ 1.7150000100708007,
+ 1.7164085680389405,
+ 1.7131129906463622,
+ 1.7118962685775756,
+ 1.7095365018463136,
+ 1.7063884579086304,
+ 1.7068846228790284,
+ 1.7066734680175781,
+ 1.7093646648406982,
+ 1.7075168813323975,
+ 1.7101067148590088,
+ 1.7065433209228515,
+ 1.70673194480896,
+ 1.706905789451599,
+ 1.7064489783096313,
+ 1.7049209537124634,
+ 1.702039204940796,
+ 1.7093150380706787,
+ 1.7065809494018556,
+ 1.7064716823959352,
+ 1.7058918637466431,
+ 1.7017672801971435,
+ 1.7050170278549195,
+ 1.7040833249664307,
+ 1.7052951337432862,
+ 1.7028977381134034
+ ],
+ "train_acc": [
+ 0.2476,
+ 0.28516,
+ 0.30182,
+ 0.30518,
+ 0.30668,
+ 0.3081,
+ 0.30996,
+ 0.31082,
+ 0.31996,
+ 0.31962,
+ 0.32402,
+ 0.32084,
+ 0.32474,
+ 0.3262,
+ 0.33392,
+ 0.33128,
+ 0.33602,
+ 0.33814,
+ 0.34208,
+ 0.33954,
+ 0.34316,
+ 0.34226,
+ 0.34322,
+ 0.34824,
+ 0.35108,
+ 0.35332,
+ 0.35372,
+ 0.35524,
+ 0.35692,
+ 0.3583,
+ 0.3558,
+ 0.36152,
+ 0.36236,
+ 0.36616,
+ 0.36662,
+ 0.3641,
+ 0.36936,
+ 0.36966,
+ 0.36902,
+ 0.37004,
+ 0.3724,
+ 0.37158,
+ 0.37278,
+ 0.3725,
+ 0.375,
+ 0.37618,
+ 0.3764,
+ 0.3774,
+ 0.37752,
+ 0.37724,
+ 0.37826,
+ 0.37848,
+ 0.38024,
+ 0.3791,
+ 0.37902,
+ 0.38188,
+ 0.38038,
+ 0.38478,
+ 0.3799,
+ 0.38192,
+ 0.38352,
+ 0.38058,
+ 0.38324,
+ 0.38586,
+ 0.38004,
+ 0.38654,
+ 0.3842,
+ 0.38632,
+ 0.38732,
+ 0.38408,
+ 0.38422,
+ 0.38632,
+ 0.38902,
+ 0.38782,
+ 0.38674,
+ 0.38694,
+ 0.38626,
+ 0.38678,
+ 0.38948,
+ 0.39038,
+ 0.39018,
+ 0.38814,
+ 0.38618,
+ 0.3908,
+ 0.38982,
+ 0.39066,
+ 0.38904,
+ 0.38854,
+ 0.38872,
+ 0.39008,
+ 0.39052,
+ 0.38934,
+ 0.38852,
+ 0.39056,
+ 0.39032,
+ 0.3915,
+ 0.39092,
+ 0.39138,
+ 0.3895,
+ 0.39112
+ ],
+ "test_acc": [
+ 0.2816,
+ 0.3167,
+ 0.3194,
+ 0.3093,
+ 0.3417,
+ 0.3344,
+ 0.3297,
+ 0.3418,
+ 0.3479,
+ 0.3372,
+ 0.3432,
+ 0.3446,
+ 0.3458,
+ 0.3646,
+ 0.3616,
+ 0.3529,
+ 0.3617,
+ 0.3576,
+ 0.329,
+ 0.3542,
+ 0.3701,
+ 0.3647,
+ 0.3697,
+ 0.3697,
+ 0.3808,
+ 0.3735,
+ 0.3695,
+ 0.3735,
+ 0.3711,
+ 0.379,
+ 0.3877,
+ 0.3805,
+ 0.3751,
+ 0.3909,
+ 0.381,
+ 0.3909,
+ 0.3902,
+ 0.3862,
+ 0.3899,
+ 0.3879,
+ 0.3994,
+ 0.3963,
+ 0.3954,
+ 0.3882,
+ 0.3978,
+ 0.3919,
+ 0.3928,
+ 0.3909,
+ 0.3966,
+ 0.3986,
+ 0.3939,
+ 0.4016,
+ 0.3973,
+ 0.397,
+ 0.4019,
+ 0.4004,
+ 0.4018,
+ 0.4052,
+ 0.3951,
+ 0.4016,
+ 0.396,
+ 0.4024,
+ 0.4075,
+ 0.4051,
+ 0.4025,
+ 0.4037,
+ 0.4041,
+ 0.4061,
+ 0.4056,
+ 0.4041,
+ 0.4009,
+ 0.405,
+ 0.399,
+ 0.4012,
+ 0.4076,
+ 0.4079,
+ 0.4096,
+ 0.407,
+ 0.4049,
+ 0.4059,
+ 0.4026,
+ 0.406,
+ 0.4048,
+ 0.4027,
+ 0.407,
+ 0.4022,
+ 0.4067,
+ 0.4063,
+ 0.4062,
+ 0.4052,
+ 0.4059,
+ 0.4068,
+ 0.4063,
+ 0.4076,
+ 0.4063,
+ 0.4065,
+ 0.4062,
+ 0.4069,
+ 0.4069,
+ 0.4067
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.027296170592308044,
+ 0.04133676737546921,
+ 0.08515161275863647,
+ 0.026729261502623558,
+ 0.01583785191178322,
+ -0.025018498301506042,
+ -0.044713616371154785,
+ -0.05714074894785881,
+ -0.040867358446121216,
+ 0.012529742904007435,
+ -0.027012888342142105,
+ 0.9971734881401062
+ ],
+ "perturbation_rho": [
+ 0.053132910281419754,
+ -0.014070656150579453,
+ 0.02090507186949253,
+ 0.005164717324078083,
+ -0.031424831598997116,
+ -0.010921415872871876,
+ -0.005948010832071304,
+ 0.01678430661559105,
+ -0.00991929043084383,
+ -0.006591915152966976,
+ -0.010621514171361923,
+ -0.019635088741779327
+ ],
+ "nudging": {
+ "0.001": [
+ -2.4188775569200516e-06,
+ -2.6740599423646927e-07,
+ -1.7171259969472885e-07,
+ -1.0791700333356857e-07,
+ -3.4458935260772705e-08,
+ -4.493631422519684e-08,
+ 5.0407834351062775e-08,
+ 1.2456439435482025e-08,
+ 9.19681042432785e-09,
+ -1.6065314412117004e-08,
+ 2.444721758365631e-08,
+ -6.272457540035248e-07
+ ],
+ "0.003": [
+ -7.179100066423416e-06,
+ -6.825430318713188e-07,
+ -5.516340024769306e-07,
+ -1.1979136615991592e-07,
+ -2.8172507882118225e-08,
+ -1.9907020032405853e-08,
+ 1.2316741049289703e-07,
+ 7.264316082000732e-08,
+ 9.010545909404755e-08,
+ -2.7706846594810486e-08,
+ 9.778887033462524e-09,
+ -2.26777046918869e-06
+ ],
+ "0.01": [
+ -2.3640692234039307e-05,
+ -2.3529864847660065e-06,
+ -1.5455298125743866e-06,
+ -3.109453245997429e-07,
+ -1.5425030142068863e-07,
+ 1.0291114449501038e-07,
+ 3.3457763493061066e-07,
+ 3.76836396753788e-07,
+ 3.371387720108032e-07,
+ -1.2211967259645462e-07,
+ 1.5588011592626572e-07,
+ -8.008093573153019e-06
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 6886.94775390625,
+ 87584.2109375,
+ 126785.4921875,
+ 355667.78125,
+ 552057.5625,
+ 850422.75,
+ 1293051.0,
+ 1541609.25,
+ 1868753.125,
+ 2007669.375,
+ 2071472.125,
+ 2260452.75,
+ 1769309.25
+ ],
+ "bp_grad_norms_per_layer": [
+ 3.3799024095060304e-05,
+ 2.5621802706154995e-06,
+ 8.161263167494326e-07,
+ 4.5329542786021193e-07,
+ 4.0827075054039597e-07,
+ 4.0655893940311216e-07,
+ 4.057951343838795e-07,
+ 4.061944878230861e-07,
+ 4.055657427670667e-07,
+ 4.0522803601561463e-07,
+ 4.0508828647034534e-07,
+ 4.0517943489248864e-07,
+ 3.946254025777307e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 46.66262768106957,
+ "embed.bias": 12.349060830406593,
+ "blocks.0.ln.weight": 1.186887811746546,
+ "blocks.0.w1.weight": 16.691636363051998,
+ "blocks.0.w1.bias": 10.396716096626863,
+ "blocks.0.w2.weight": 53.63063338700071,
+ "blocks.1.ln.weight": 0.9868002811700606,
+ "blocks.1.w1.weight": 18.44033798724087,
+ "blocks.1.w1.bias": 4.605202402802353,
+ "blocks.1.w2.weight": 52.226844329919246,
+ "blocks.2.ln.weight": 1.0614278526441627,
+ "blocks.2.w1.weight": 19.34190693735112,
+ "blocks.2.w1.bias": 11.075717016343752,
+ "blocks.2.w2.weight": 42.37755333929534,
+ "blocks.3.ln.weight": 0.8798892347084698,
+ "blocks.3.w1.weight": 19.160031225604996,
+ "blocks.3.w1.bias": 13.88384021345869,
+ "blocks.3.w2.weight": 33.30996859649038,
+ "blocks.4.ln.weight": 0.7104088303381113,
+ "blocks.4.w1.weight": 18.209430962072435,
+ "blocks.4.w1.bias": 16.92336837763847,
+ "blocks.4.w2.weight": 28.747777526920554,
+ "blocks.5.ln.weight": 0.7207815914026496,
+ "blocks.5.w1.weight": 20.407344689317952,
+ "blocks.5.w1.bias": 19.928474035997958,
+ "blocks.5.w2.weight": 30.668259780333205,
+ "blocks.6.ln.weight": 0.6859569564344036,
+ "blocks.6.w1.weight": 20.174584059207987,
+ "blocks.6.w1.bias": 20.229323693427677,
+ "blocks.6.w2.weight": 30.149329891021953,
+ "blocks.7.ln.weight": 0.7233680592463317,
+ "blocks.7.w1.weight": 21.7410787215414,
+ "blocks.7.w1.bias": 22.40005573271207,
+ "blocks.7.w2.weight": 28.827371299185597,
+ "blocks.8.ln.weight": 0.6511450076677763,
+ "blocks.8.w1.weight": 17.427880699003644,
+ "blocks.8.w1.bias": 16.729590061303803,
+ "blocks.8.w2.weight": 28.2153738883236,
+ "blocks.9.ln.weight": 0.6038210923569808,
+ "blocks.9.w1.weight": 16.214819848855626,
+ "blocks.9.w1.bias": 15.839234916532192,
+ "blocks.9.w2.weight": 28.312285773435068,
+ "blocks.10.ln.weight": 0.6503320370929363,
+ "blocks.10.w1.weight": 19.037981824154166,
+ "blocks.10.w1.bias": 18.568708970692136,
+ "blocks.10.w2.weight": 20.586050451276414,
+ "blocks.11.ln.weight": 0.5703826672599819,
+ "blocks.11.w1.weight": 16.05034192522954,
+ "blocks.11.w1.bias": 14.75267875282651,
+ "blocks.11.w2.weight": 45.75154443737838,
+ "out_ln.weight": 0.36468260935864044,
+ "out_head.weight": 6.277188782444725,
+ "out_head.bias": 0.6248347589534139
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 12,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 1
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L12_seed1",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L12_seed2/results_cifar10.json b/results/fa_dfa_d512_L12_seed2/results_cifar10.json
new file mode 100644
index 0000000..68c6e2b
--- /dev/null
+++ b/results/fa_dfa_d512_L12_seed2/results_cifar10.json
@@ -0,0 +1,969 @@
+{
+ "2": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.067472825927734,
+ 2.050606523780823,
+ 2.0464368284606933,
+ 2.042041470413208,
+ 2.044630073509216,
+ 2.0444898017120363,
+ 2.0405262368774415,
+ 2.035152849884033,
+ 2.0376771257019044,
+ 2.03509138168335,
+ 2.0322119396972655,
+ 2.033472604637146,
+ 2.033374984664917,
+ 2.029572864456177,
+ 2.0276870463562013,
+ 2.0302749423980715,
+ 2.027638416900635,
+ 2.0297894455718994,
+ 2.0263528815078735,
+ 2.0278661029052736,
+ 2.0263561694335936,
+ 2.025871162261963,
+ 2.0294278043365477,
+ 2.0267604162979125,
+ 2.023654881286621,
+ 2.0251026361846924,
+ 2.023718124923706,
+ 2.023326771583557,
+ 2.0232533959960937,
+ 2.0244881047821046,
+ 2.0223184526062012,
+ 2.021666586151123,
+ 2.0216548426818846,
+ 2.01917991065979,
+ 2.021205049819946,
+ 2.019787854347229,
+ 2.0205126093292236,
+ 2.019609580001831,
+ 2.021551597328186,
+ 2.0192294607543944,
+ 2.019204094810486,
+ 2.0193405352020264,
+ 2.0203353549957277,
+ 2.0184646337127687,
+ 2.0181322897338867,
+ 2.019681806564331,
+ 2.0184515984344484,
+ 2.019875436248779,
+ 2.019927127380371,
+ 2.0191176706695555,
+ 2.016925164756775,
+ 2.0216545279693605,
+ 2.01788885345459,
+ 2.0193467868804933,
+ 2.0155374671936035,
+ 2.0189524417114257,
+ 2.018774903793335,
+ 2.0179057373046874,
+ 2.0154743661117553,
+ 2.016519417648315,
+ 2.0184439277648925,
+ 2.017523434753418,
+ 2.017258151779175,
+ 2.018665143585205,
+ 2.0169664744567872,
+ 2.016487240142822,
+ 2.01617966884613,
+ 2.0159906078338623,
+ 2.0176960159683226,
+ 2.015817363433838,
+ 2.014662687149048,
+ 2.0164543737030027,
+ 2.0170937897109984,
+ 2.017375130081177,
+ 2.0153453800964356,
+ 2.015449415817261,
+ 2.0149039567565916,
+ 2.0142875480651856,
+ 2.0150826512145996,
+ 2.0165929887390135,
+ 2.015268103866577,
+ 2.013780555686951,
+ 2.0156544293212892,
+ 2.0130539707183837,
+ 2.015054910621643,
+ 2.0157424531555175,
+ 2.0142131992340087,
+ 2.016000519104004,
+ 2.014644538726807,
+ 2.0129901065826417,
+ 2.014485344467163,
+ 2.015419366531372,
+ 2.014306447067261,
+ 2.013788412322998,
+ 2.014239661521912,
+ 2.0151051541137694,
+ 2.0142657162475586,
+ 2.015102813682556,
+ 2.0133784993743897,
+ 2.015375001487732
+ ],
+ "train_acc": [
+ 0.23898,
+ 0.24246,
+ 0.24622,
+ 0.24884,
+ 0.24668,
+ 0.2485,
+ 0.24792,
+ 0.25306,
+ 0.24956,
+ 0.25484,
+ 0.25486,
+ 0.2526,
+ 0.2561,
+ 0.256,
+ 0.25878,
+ 0.25726,
+ 0.2586,
+ 0.25762,
+ 0.25946,
+ 0.25782,
+ 0.26002,
+ 0.2601,
+ 0.25784,
+ 0.2578,
+ 0.26152,
+ 0.2606,
+ 0.26106,
+ 0.25782,
+ 0.25852,
+ 0.25776,
+ 0.26318,
+ 0.26254,
+ 0.26198,
+ 0.2636,
+ 0.26256,
+ 0.26578,
+ 0.2624,
+ 0.26176,
+ 0.25926,
+ 0.26508,
+ 0.26478,
+ 0.26082,
+ 0.26404,
+ 0.26548,
+ 0.26746,
+ 0.26278,
+ 0.26414,
+ 0.26246,
+ 0.26348,
+ 0.26282,
+ 0.26394,
+ 0.26348,
+ 0.26624,
+ 0.26506,
+ 0.26554,
+ 0.26198,
+ 0.26362,
+ 0.26472,
+ 0.26728,
+ 0.26684,
+ 0.26632,
+ 0.26602,
+ 0.26588,
+ 0.26584,
+ 0.26636,
+ 0.2642,
+ 0.26606,
+ 0.26868,
+ 0.26574,
+ 0.26688,
+ 0.267,
+ 0.2633,
+ 0.26504,
+ 0.26764,
+ 0.26732,
+ 0.26612,
+ 0.26802,
+ 0.26864,
+ 0.26808,
+ 0.26662,
+ 0.26826,
+ 0.26906,
+ 0.2658,
+ 0.26706,
+ 0.2689,
+ 0.26686,
+ 0.26868,
+ 0.2686,
+ 0.26812,
+ 0.26678,
+ 0.26878,
+ 0.26654,
+ 0.26678,
+ 0.2667,
+ 0.26656,
+ 0.26746,
+ 0.26736,
+ 0.26788,
+ 0.26794,
+ 0.27014
+ ],
+ "test_acc": [
+ 0.2486,
+ 0.2615,
+ 0.2803,
+ 0.2745,
+ 0.2627,
+ 0.2561,
+ 0.2643,
+ 0.2592,
+ 0.2832,
+ 0.2903,
+ 0.2737,
+ 0.2757,
+ 0.2706,
+ 0.2847,
+ 0.2742,
+ 0.2822,
+ 0.276,
+ 0.2661,
+ 0.2705,
+ 0.2742,
+ 0.2755,
+ 0.2899,
+ 0.2929,
+ 0.2661,
+ 0.2857,
+ 0.2777,
+ 0.2783,
+ 0.2498,
+ 0.287,
+ 0.2912,
+ 0.2833,
+ 0.2801,
+ 0.2912,
+ 0.2897,
+ 0.2936,
+ 0.2849,
+ 0.2874,
+ 0.2957,
+ 0.2753,
+ 0.2862,
+ 0.2964,
+ 0.2934,
+ 0.289,
+ 0.29,
+ 0.2951,
+ 0.2861,
+ 0.2858,
+ 0.2867,
+ 0.2885,
+ 0.3027,
+ 0.2822,
+ 0.2847,
+ 0.2933,
+ 0.2899,
+ 0.286,
+ 0.2958,
+ 0.2974,
+ 0.2957,
+ 0.288,
+ 0.2878,
+ 0.2944,
+ 0.2885,
+ 0.2913,
+ 0.2942,
+ 0.2965,
+ 0.2943,
+ 0.3,
+ 0.2924,
+ 0.2977,
+ 0.2917,
+ 0.2951,
+ 0.2848,
+ 0.2943,
+ 0.2944,
+ 0.2874,
+ 0.2901,
+ 0.2969,
+ 0.29,
+ 0.2948,
+ 0.2954,
+ 0.2894,
+ 0.2948,
+ 0.2908,
+ 0.2922,
+ 0.2904,
+ 0.2903,
+ 0.2947,
+ 0.2926,
+ 0.2912,
+ 0.292,
+ 0.2929,
+ 0.2922,
+ 0.2941,
+ 0.2952,
+ 0.2927,
+ 0.2937,
+ 0.2934,
+ 0.2933,
+ 0.2933,
+ 0.2933
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.3273237347602844,
+ 0.0001051856525009498,
+ -2.771663639578037e-05,
+ -0.0005365631077438593,
+ 0.0003001387231051922,
+ -7.278185512404889e-05,
+ -0.00019034843717236072,
+ -0.00018735270714387298,
+ -0.0003543527564033866,
+ -0.0005275406292639673,
+ -0.000327416870277375,
+ -0.00032380438642576337
+ ],
+ "perturbation_rho": [
+ -0.01584552228450775,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -3.3387914299964905e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -8.740462362766266e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -2.7050264179706573e-06,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 55119.203125,
+ 2542876672.0,
+ 5336881152.0,
+ 6906983936.0,
+ 7084125696.0,
+ 9782456320.0,
+ 9881924608.0,
+ 10504348672.0,
+ 10572271616.0,
+ 11673710592.0,
+ 12703504384.0,
+ 12870231040.0,
+ 13115667456.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 1.9967485798133566e-07,
+ 2.110904229191135e-10,
+ 2.0921038512700108e-10,
+ 2.0923462962230133e-10,
+ 2.092699208366966e-10,
+ 2.0928372923556537e-10,
+ 2.0927498622924645e-10,
+ 2.0925521038162032e-10,
+ 2.0925566834861797e-10,
+ 2.0928303534617498e-10,
+ 2.0925450261444212e-10,
+ 2.093413220549678e-10,
+ 2.0937052092051545e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 355.47197791509984,
+ "embed.bias": 325.4137572298477,
+ "blocks.0.ln.weight": 10.183073943359524,
+ "blocks.0.w1.weight": 337.53798007019327,
+ "blocks.0.w1.bias": 368.2919019457867,
+ "blocks.0.w2.weight": 502.2465316680103,
+ "blocks.1.ln.weight": 9.790888407534407,
+ "blocks.1.w1.weight": 399.8591225924345,
+ "blocks.1.w1.bias": 381.91784742604096,
+ "blocks.1.w2.weight": 400.78281765228365,
+ "blocks.2.ln.weight": 9.816270926016012,
+ "blocks.2.w1.weight": 404.058717958719,
+ "blocks.2.w1.bias": 370.3941989263201,
+ "blocks.2.w2.weight": 389.67673069887354,
+ "blocks.3.ln.weight": 7.767040540127046,
+ "blocks.3.w1.weight": 279.00825869080103,
+ "blocks.3.w1.bias": 255.25224285416232,
+ "blocks.3.w2.weight": 275.49641344672904,
+ "blocks.4.ln.weight": 10.807571563453388,
+ "blocks.4.w1.weight": 443.61612820167835,
+ "blocks.4.w1.bias": 410.34345299471363,
+ "blocks.4.w2.weight": 435.1254085598178,
+ "blocks.5.ln.weight": 7.222077733457088,
+ "blocks.5.w1.weight": 277.46785316158423,
+ "blocks.5.w1.bias": 253.23890247800753,
+ "blocks.5.w2.weight": 256.5840082126155,
+ "blocks.6.ln.weight": 8.723424030262112,
+ "blocks.6.w1.weight": 347.79770296240184,
+ "blocks.6.w1.bias": 330.68249616268605,
+ "blocks.6.w2.weight": 337.991009055742,
+ "blocks.7.ln.weight": 6.095259128522855,
+ "blocks.7.w1.weight": 228.22905887133265,
+ "blocks.7.w1.bias": 207.27947768539244,
+ "blocks.7.w2.weight": 218.0278988490447,
+ "blocks.8.ln.weight": 10.272236445847241,
+ "blocks.8.w1.weight": 411.857394629554,
+ "blocks.8.w1.bias": 374.6759208224197,
+ "blocks.8.w2.weight": 382.12457040788746,
+ "blocks.9.ln.weight": 10.509629754259423,
+ "blocks.9.w1.weight": 419.2076511149967,
+ "blocks.9.w1.bias": 382.6992093043317,
+ "blocks.9.w2.weight": 400.5745998142284,
+ "blocks.10.ln.weight": 8.353122487257533,
+ "blocks.10.w1.weight": 326.130308345784,
+ "blocks.10.w1.bias": 311.08525380018006,
+ "blocks.10.w2.weight": 293.42915037063676,
+ "blocks.11.ln.weight": 9.253648926692552,
+ "blocks.11.w1.weight": 374.4659124025003,
+ "blocks.11.w1.bias": 354.20131257586644,
+ "blocks.11.w2.weight": 347.73755162930524,
+ "out_ln.weight": 0.6640411848895453,
+ "out_head.weight": 10.723191480707335,
+ "out_head.bias": 0.5785102998287991
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0563817890930176,
+ 1.9627269606781006,
+ 1.9333056550598144,
+ 1.9114726235198976,
+ 1.8997531073379517,
+ 1.8861957403945924,
+ 1.87444166015625,
+ 1.8636801738739013,
+ 1.8639895972442626,
+ 1.859206601295471,
+ 1.8560995433807372,
+ 1.8580559069824218,
+ 1.857329437828064,
+ 1.8486979720687866,
+ 1.8436245847320556,
+ 1.8427142208480836,
+ 1.8344886156463622,
+ 1.8326412610626222,
+ 1.8245765609741211,
+ 1.828027060470581,
+ 1.825153868484497,
+ 1.8163878149795532,
+ 1.824076413230896,
+ 1.81121192653656,
+ 1.8073615282821656,
+ 1.8063795041275024,
+ 1.8070646337509155,
+ 1.8042147310638428,
+ 1.8005332668685914,
+ 1.7986898645401002,
+ 1.8020886263275147,
+ 1.7947696161270141,
+ 1.7940981402206422,
+ 1.7886089331436157,
+ 1.7890249035263062,
+ 1.7892986505889892,
+ 1.7865815286636353,
+ 1.7821338254547119,
+ 1.7815338650512695,
+ 1.7780959701919556,
+ 1.7746984979629516,
+ 1.7739060324478149,
+ 1.7781290399169922,
+ 1.7747572018051148,
+ 1.7699797783660889,
+ 1.7655833713150024,
+ 1.767660176963806,
+ 1.7649168865203857,
+ 1.762353595352173,
+ 1.7620251220321654,
+ 1.7568823919296264,
+ 1.7637593435668946,
+ 1.758452812423706,
+ 1.7617782683944703,
+ 1.75097875831604,
+ 1.7536098889160157,
+ 1.7537783139801026,
+ 1.7496310552597045,
+ 1.7481205722045898,
+ 1.746379077758789,
+ 1.748953713645935,
+ 1.7454774743652344,
+ 1.7429656470108033,
+ 1.7443698706436157,
+ 1.7438769147491455,
+ 1.741790360183716,
+ 1.7384765851593018,
+ 1.737857723007202,
+ 1.7372428884124755,
+ 1.7377281158828735,
+ 1.7380989352416991,
+ 1.7357493377685547,
+ 1.7356955800628662,
+ 1.7379692654418946,
+ 1.7327075168609618,
+ 1.7319425884246826,
+ 1.7306698516082764,
+ 1.7282980773544312,
+ 1.7296418505477906,
+ 1.7297362589263916,
+ 1.728454496421814,
+ 1.7303657376098633,
+ 1.7310565896987915,
+ 1.7265128871917725,
+ 1.7292877883911133,
+ 1.7294651891708375,
+ 1.7270633559417725,
+ 1.7284353275299071,
+ 1.7280301824188233,
+ 1.7230924905776976,
+ 1.7238229986190796,
+ 1.7292125368499756,
+ 1.7265778392791749,
+ 1.727167644920349,
+ 1.7264636569595337,
+ 1.7247360497283934,
+ 1.7212902561187744,
+ 1.7221066110229493,
+ 1.7266186252212525,
+ 1.7269180416488648
+ ],
+ "train_acc": [
+ 0.24122,
+ 0.28436,
+ 0.299,
+ 0.308,
+ 0.31228,
+ 0.31872,
+ 0.32314,
+ 0.32812,
+ 0.32904,
+ 0.33368,
+ 0.33254,
+ 0.33406,
+ 0.33382,
+ 0.33696,
+ 0.33812,
+ 0.33908,
+ 0.34066,
+ 0.34344,
+ 0.34602,
+ 0.3438,
+ 0.34686,
+ 0.3481,
+ 0.3424,
+ 0.3496,
+ 0.3543,
+ 0.35444,
+ 0.35368,
+ 0.35296,
+ 0.35348,
+ 0.35554,
+ 0.35894,
+ 0.3566,
+ 0.35682,
+ 0.36212,
+ 0.35982,
+ 0.35886,
+ 0.36128,
+ 0.3635,
+ 0.36448,
+ 0.36314,
+ 0.36606,
+ 0.3656,
+ 0.36578,
+ 0.36614,
+ 0.36982,
+ 0.36962,
+ 0.37046,
+ 0.37016,
+ 0.36992,
+ 0.37214,
+ 0.3727,
+ 0.37368,
+ 0.37504,
+ 0.37302,
+ 0.37496,
+ 0.37264,
+ 0.37418,
+ 0.37714,
+ 0.37492,
+ 0.37612,
+ 0.377,
+ 0.37596,
+ 0.37848,
+ 0.37688,
+ 0.37832,
+ 0.3779,
+ 0.37974,
+ 0.38196,
+ 0.38234,
+ 0.37998,
+ 0.38076,
+ 0.37942,
+ 0.37988,
+ 0.38244,
+ 0.38388,
+ 0.38314,
+ 0.38488,
+ 0.38358,
+ 0.3841,
+ 0.38562,
+ 0.38478,
+ 0.3818,
+ 0.38364,
+ 0.38524,
+ 0.38452,
+ 0.38266,
+ 0.38434,
+ 0.38448,
+ 0.38532,
+ 0.38844,
+ 0.38614,
+ 0.38436,
+ 0.3866,
+ 0.38558,
+ 0.38382,
+ 0.38576,
+ 0.38752,
+ 0.3862,
+ 0.3878,
+ 0.38596
+ ],
+ "test_acc": [
+ 0.2906,
+ 0.315,
+ 0.3412,
+ 0.3441,
+ 0.3313,
+ 0.3339,
+ 0.3449,
+ 0.3436,
+ 0.3633,
+ 0.3441,
+ 0.359,
+ 0.3384,
+ 0.3562,
+ 0.3694,
+ 0.3664,
+ 0.3678,
+ 0.3625,
+ 0.3697,
+ 0.3793,
+ 0.3753,
+ 0.3826,
+ 0.3718,
+ 0.381,
+ 0.3826,
+ 0.3821,
+ 0.3792,
+ 0.3812,
+ 0.3575,
+ 0.3868,
+ 0.382,
+ 0.383,
+ 0.3859,
+ 0.3914,
+ 0.3883,
+ 0.3907,
+ 0.3941,
+ 0.3969,
+ 0.3849,
+ 0.387,
+ 0.3904,
+ 0.3945,
+ 0.3886,
+ 0.3923,
+ 0.3966,
+ 0.3963,
+ 0.3937,
+ 0.395,
+ 0.3874,
+ 0.3891,
+ 0.3962,
+ 0.3873,
+ 0.3903,
+ 0.3954,
+ 0.3911,
+ 0.3956,
+ 0.3948,
+ 0.3985,
+ 0.3938,
+ 0.3964,
+ 0.3949,
+ 0.398,
+ 0.3955,
+ 0.3973,
+ 0.3924,
+ 0.3965,
+ 0.4008,
+ 0.3918,
+ 0.4006,
+ 0.3975,
+ 0.3976,
+ 0.3974,
+ 0.3984,
+ 0.4018,
+ 0.3991,
+ 0.3957,
+ 0.4003,
+ 0.4007,
+ 0.4003,
+ 0.4027,
+ 0.4018,
+ 0.4003,
+ 0.4011,
+ 0.3991,
+ 0.4017,
+ 0.4012,
+ 0.4021,
+ 0.4007,
+ 0.4004,
+ 0.4021,
+ 0.4026,
+ 0.4015,
+ 0.4035,
+ 0.4023,
+ 0.4022,
+ 0.4028,
+ 0.4034,
+ 0.4025,
+ 0.4021,
+ 0.4025,
+ 0.4025
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.018669456243515015,
+ 0.056162357330322266,
+ -0.0088431341573596,
+ -0.008182319812476635,
+ -0.055274538695812225,
+ -0.005170345772057772,
+ 0.014380814507603645,
+ -0.0468558594584465,
+ -0.06233042851090431,
+ -0.04816172271966934,
+ -0.019947674125432968,
+ 0.9990314245223999
+ ],
+ "perturbation_rho": [
+ 0.03642716258764267,
+ 0.02892448753118515,
+ 0.03735572472214699,
+ -0.00921887531876564,
+ -0.014430028386414051,
+ 0.03254326060414314,
+ 0.005076523870229721,
+ 0.0074741230346262455,
+ 0.035068579018116,
+ 0.027692969888448715,
+ -0.0010530222207307816,
+ 0.018331632018089294
+ ],
+ "nudging": {
+ "0.001": [
+ -1.2880191206932068e-06,
+ -2.0337756723165512e-07,
+ -1.0291114449501038e-07,
+ -6.391201168298721e-08,
+ 8.12578946352005e-08,
+ -4.307366907596588e-09,
+ 9.313225746154785e-09,
+ 1.1568772606551647e-07,
+ 6.877235136926174e-08,
+ 4.470348358154297e-08,
+ 2.6775524020195007e-08,
+ -1.1273659765720367e-06
+ ],
+ "0.003": [
+ -3.7905119825154543e-06,
+ -6.791669875383377e-07,
+ -9.001814760267735e-08,
+ -7.08096195012331e-08,
+ 2.7276109904050827e-07,
+ -5.8818841353058815e-08,
+ -9.292853064835072e-08,
+ 2.6039197109639645e-07,
+ 2.7110218070447445e-07,
+ 1.3009412214159966e-07,
+ 6.56291376799345e-08,
+ -4.065892426297069e-06
+ ],
+ "0.01": [
+ -1.25557417050004e-05,
+ -2.2551976144313812e-06,
+ 8.774804882705212e-08,
+ -2.8230715543031693e-09,
+ 8.539936970919371e-07,
+ -6.600748747587204e-08,
+ -3.3914693631231785e-07,
+ 7.495400495827198e-07,
+ 8.926435839384794e-07,
+ 5.459296517074108e-07,
+ 2.251181285828352e-07,
+ -1.443939981982112e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 6788.72314453125,
+ 99873.7890625,
+ 239906.21875,
+ 498097.5,
+ 936403.5,
+ 1120419.5,
+ 1148044.0,
+ 1243337.25,
+ 1465607.625,
+ 1820056.25,
+ 1916954.25,
+ 2080373.375,
+ 1057581.375
+ ],
+ "bp_grad_norms_per_layer": [
+ 3.21922343573533e-05,
+ 1.926036247823504e-06,
+ 7.882048862484226e-07,
+ 6.652410888818849e-07,
+ 6.507269176836417e-07,
+ 6.543205017806031e-07,
+ 6.41887766050786e-07,
+ 6.365870603985968e-07,
+ 6.355120376611012e-07,
+ 6.336023261610535e-07,
+ 6.346273266899516e-07,
+ 6.351577326313418e-07,
+ 6.350035732793913e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 47.675119188037286,
+ "embed.bias": 12.598388666523805,
+ "blocks.0.ln.weight": 1.151033318517311,
+ "blocks.0.w1.weight": 16.377677244547872,
+ "blocks.0.w1.bias": 12.516910644554024,
+ "blocks.0.w2.weight": 52.57068443004618,
+ "blocks.1.ln.weight": 0.9655458779833542,
+ "blocks.1.w1.weight": 18.881599150212853,
+ "blocks.1.w1.bias": 9.530569072308776,
+ "blocks.1.w2.weight": 43.92118513197058,
+ "blocks.2.ln.weight": 0.7887174233304558,
+ "blocks.2.w1.weight": 18.824409898016874,
+ "blocks.2.w1.bias": 13.563387947054077,
+ "blocks.2.w2.weight": 31.07735174337715,
+ "blocks.3.ln.weight": 0.8171227981326923,
+ "blocks.3.w1.weight": 19.539674249572744,
+ "blocks.3.w1.bias": 18.146478276910425,
+ "blocks.3.w2.weight": 35.53729462516465,
+ "blocks.4.ln.weight": 0.6263369507412071,
+ "blocks.4.w1.weight": 16.92487352504901,
+ "blocks.4.w1.bias": 15.976801095115205,
+ "blocks.4.w2.weight": 29.94254311653644,
+ "blocks.5.ln.weight": 0.6487257363749984,
+ "blocks.5.w1.weight": 17.08776989967922,
+ "blocks.5.w1.bias": 11.932105261833147,
+ "blocks.5.w2.weight": 57.38022751451892,
+ "blocks.6.ln.weight": 0.6775998262875462,
+ "blocks.6.w1.weight": 18.367003078140872,
+ "blocks.6.w1.bias": 14.665116127192782,
+ "blocks.6.w2.weight": 54.62506371390311,
+ "blocks.7.ln.weight": 0.7036757447327185,
+ "blocks.7.w1.weight": 19.393650716681655,
+ "blocks.7.w1.bias": 17.4299147560077,
+ "blocks.7.w2.weight": 46.77523522935725,
+ "blocks.8.ln.weight": 0.7270809437825937,
+ "blocks.8.w1.weight": 21.877072467037458,
+ "blocks.8.w1.bias": 19.917426863463785,
+ "blocks.8.w2.weight": 42.759258263025565,
+ "blocks.9.ln.weight": 0.6050917355676333,
+ "blocks.9.w1.weight": 17.155375601849993,
+ "blocks.9.w1.bias": 15.811293825336426,
+ "blocks.9.w2.weight": 33.7630968788062,
+ "blocks.10.ln.weight": 0.6513788383873166,
+ "blocks.10.w1.weight": 18.151371658733705,
+ "blocks.10.w1.bias": 17.557942417321982,
+ "blocks.10.w2.weight": 38.33917690306304,
+ "blocks.11.ln.weight": 0.6877634546861944,
+ "blocks.11.w1.weight": 19.58584700755457,
+ "blocks.11.w1.bias": 19.31054232174018,
+ "blocks.11.w2.weight": 38.09422640487883,
+ "out_ln.weight": 0.3216566822185731,
+ "out_head.weight": 6.321448993543088,
+ "out_head.bias": 1.6265612863801873
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 12,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 2
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L12_seed2",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L12_seed3/results_cifar10.json b/results/fa_dfa_d512_L12_seed3/results_cifar10.json
new file mode 100644
index 0000000..b0d47db
--- /dev/null
+++ b/results/fa_dfa_d512_L12_seed3/results_cifar10.json
@@ -0,0 +1,969 @@
+{
+ "3": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0758749111938477,
+ 2.0449016705703738,
+ 2.0355339753723145,
+ 2.033782032623291,
+ 2.0313166973876955,
+ 2.0272680778503416,
+ 2.0246013010406494,
+ 2.0257189083099365,
+ 2.0217166822052004,
+ 2.0203594020080566,
+ 2.0174152685165407,
+ 2.018405201034546,
+ 2.0147499544525145,
+ 2.014394051055908,
+ 2.016394521636963,
+ 2.012079556236267,
+ 2.0137909897613526,
+ 2.014015982284546,
+ 2.0109746593475344,
+ 2.014003984375,
+ 2.0116691515350342,
+ 2.0092717248535155,
+ 2.008394868850708,
+ 2.0091352093887327,
+ 2.0081222701644896,
+ 2.008312914390564,
+ 2.0076249710464475,
+ 2.007539535140991,
+ 2.006514790344238,
+ 2.0067833641052246,
+ 2.002747939300537,
+ 2.0058067621612548,
+ 2.0007595862197878,
+ 2.0042390581512453,
+ 2.0029916175842284,
+ 2.0036084170150756,
+ 2.000785510559082,
+ 2.0037371703720095,
+ 2.001269952163696,
+ 2.0031689390563967,
+ 2.002326145706177,
+ 2.0042856772232054,
+ 2.001963946914673,
+ 1.9995613592147827,
+ 2.000965595703125,
+ 2.0001352091979983,
+ 2.0018488079071046,
+ 2.00168183052063,
+ 2.002008575668335,
+ 1.9990670106506347,
+ 2.000030261154175,
+ 2.0025694244766234,
+ 2.000111082458496,
+ 2.0006912731170656,
+ 1.9988810729980468,
+ 1.9988630979156494,
+ 1.9988078353118897,
+ 1.9998760187530518,
+ 1.999488633041382,
+ 1.9987801266479492,
+ 2.0005314921569823,
+ 1.9992797033691405,
+ 1.9988488864135743,
+ 1.9994260061645508,
+ 1.99964618309021,
+ 1.997838856201172,
+ 1.9974179007339476,
+ 1.9974302843475342,
+ 1.9990705501556396,
+ 1.9974287271499633,
+ 1.9969496033477783,
+ 1.9965652766418458,
+ 1.9951063619613647,
+ 1.997466404724121,
+ 1.9966685424804687,
+ 1.9972340461730957,
+ 1.9965911829376222,
+ 1.9977326393127441,
+ 1.9962291263580323,
+ 1.9955562910461426,
+ 1.9967020976257324,
+ 1.9967882759094238,
+ 1.9950329151916504,
+ 1.99679522026062,
+ 1.9955974411773683,
+ 1.9971681539154054,
+ 1.996667978439331,
+ 1.99505258934021,
+ 1.9948117360687256,
+ 1.9953517591094971,
+ 1.994836011428833,
+ 1.9956397792816163,
+ 1.9950232530212402,
+ 1.9973082242584228,
+ 1.9949973779296875,
+ 1.995920922012329,
+ 1.9955849953460694,
+ 1.9965253795623779,
+ 1.995772368774414,
+ 1.9941626037216187
+ ],
+ "train_acc": [
+ 0.2279,
+ 0.24072,
+ 0.24646,
+ 0.24854,
+ 0.25044,
+ 0.25614,
+ 0.25482,
+ 0.25184,
+ 0.25618,
+ 0.25788,
+ 0.25794,
+ 0.25986,
+ 0.25826,
+ 0.26054,
+ 0.26198,
+ 0.26164,
+ 0.26276,
+ 0.26056,
+ 0.26338,
+ 0.26482,
+ 0.26374,
+ 0.27,
+ 0.26544,
+ 0.26496,
+ 0.26726,
+ 0.26608,
+ 0.2677,
+ 0.26784,
+ 0.26822,
+ 0.26688,
+ 0.27298,
+ 0.2674,
+ 0.27316,
+ 0.26942,
+ 0.27202,
+ 0.2707,
+ 0.27076,
+ 0.2708,
+ 0.27202,
+ 0.27326,
+ 0.27412,
+ 0.27078,
+ 0.27212,
+ 0.27422,
+ 0.27278,
+ 0.27348,
+ 0.27218,
+ 0.27472,
+ 0.27368,
+ 0.27624,
+ 0.2752,
+ 0.27146,
+ 0.27238,
+ 0.27332,
+ 0.27558,
+ 0.27502,
+ 0.274,
+ 0.27478,
+ 0.2765,
+ 0.27494,
+ 0.2753,
+ 0.2748,
+ 0.27594,
+ 0.27726,
+ 0.27356,
+ 0.27716,
+ 0.27596,
+ 0.27972,
+ 0.277,
+ 0.27616,
+ 0.27622,
+ 0.2769,
+ 0.27584,
+ 0.27614,
+ 0.27692,
+ 0.27662,
+ 0.27758,
+ 0.27612,
+ 0.27734,
+ 0.27778,
+ 0.2763,
+ 0.2761,
+ 0.2776,
+ 0.27728,
+ 0.27692,
+ 0.27838,
+ 0.27818,
+ 0.2777,
+ 0.27922,
+ 0.27792,
+ 0.27694,
+ 0.27868,
+ 0.27922,
+ 0.2776,
+ 0.27966,
+ 0.27774,
+ 0.28024,
+ 0.27544,
+ 0.27852,
+ 0.28028
+ ],
+ "test_acc": [
+ 0.2245,
+ 0.2534,
+ 0.2569,
+ 0.2594,
+ 0.2706,
+ 0.2666,
+ 0.2759,
+ 0.276,
+ 0.2493,
+ 0.2911,
+ 0.2656,
+ 0.2816,
+ 0.2759,
+ 0.2916,
+ 0.302,
+ 0.2776,
+ 0.2854,
+ 0.2814,
+ 0.2838,
+ 0.2796,
+ 0.2881,
+ 0.2831,
+ 0.2907,
+ 0.2949,
+ 0.2872,
+ 0.2857,
+ 0.3067,
+ 0.301,
+ 0.2915,
+ 0.2936,
+ 0.2727,
+ 0.2971,
+ 0.2768,
+ 0.2756,
+ 0.2752,
+ 0.2918,
+ 0.2942,
+ 0.2706,
+ 0.2698,
+ 0.3002,
+ 0.2978,
+ 0.2896,
+ 0.2879,
+ 0.2922,
+ 0.2897,
+ 0.2955,
+ 0.2922,
+ 0.2979,
+ 0.3049,
+ 0.2838,
+ 0.2864,
+ 0.2929,
+ 0.3024,
+ 0.2947,
+ 0.2939,
+ 0.3001,
+ 0.2966,
+ 0.2875,
+ 0.288,
+ 0.2917,
+ 0.2948,
+ 0.2922,
+ 0.2926,
+ 0.2969,
+ 0.2958,
+ 0.2974,
+ 0.3028,
+ 0.2923,
+ 0.2926,
+ 0.2949,
+ 0.2849,
+ 0.2976,
+ 0.299,
+ 0.3005,
+ 0.291,
+ 0.297,
+ 0.2947,
+ 0.294,
+ 0.2974,
+ 0.2953,
+ 0.2958,
+ 0.293,
+ 0.292,
+ 0.3,
+ 0.2927,
+ 0.2969,
+ 0.2968,
+ 0.297,
+ 0.296,
+ 0.2958,
+ 0.2974,
+ 0.2971,
+ 0.2965,
+ 0.2957,
+ 0.2976,
+ 0.2961,
+ 0.2969,
+ 0.2969,
+ 0.2968,
+ 0.297
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.3807877004146576,
+ -5.573713860940188e-05,
+ 0.00026205976610071957,
+ 5.7832341553876176e-05,
+ -0.0006758540403097868,
+ 9.82169367489405e-05,
+ -0.0004489597922656685,
+ 0.00012056646664859727,
+ 0.00039241870399564505,
+ 0.0004099192447029054,
+ -0.00038764585042372346,
+ 9.827437315834686e-05
+ ],
+ "perturbation_rho": [
+ 0.0020497534424066544,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -3.2549723982810974e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.0975636541843414e-06,
+ 0.0,
+ -5.122274160385132e-09,
+ -9.313225746154785e-10,
+ 0.0,
+ 0.0,
+ 0.0,
+ -2.7939677238464355e-09,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -3.623310476541519e-06,
+ 2.7939677238464355e-09,
+ -3.259629011154175e-09,
+ -2.7939677238464355e-09,
+ 0.0,
+ 0.0,
+ 0.0,
+ -1.862645149230957e-09,
+ -9.313225746154785e-10,
+ 0.0,
+ 0.0,
+ -1.862645149230957e-09
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 53572.55078125,
+ 1372884736.0,
+ 1772287744.0,
+ 3241988608.0,
+ 5805380608.0,
+ 6317006848.0,
+ 6381533184.0,
+ 6840990720.0,
+ 8109968896.0,
+ 8596752384.0,
+ 8790169600.0,
+ 9575925760.0,
+ 10357586944.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.5510743739687314e-07,
+ 2.2633883656197895e-10,
+ 2.2594967563627222e-10,
+ 2.2604293437034073e-10,
+ 2.2608663552414754e-10,
+ 2.2616231110106355e-10,
+ 2.2606677640979456e-10,
+ 2.2607062055701732e-10,
+ 2.2602890392686703e-10,
+ 2.2608405425561529e-10,
+ 2.2606443106365504e-10,
+ 2.26052662699594e-10,
+ 2.2604719485119773e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 327.43241116948093,
+ "embed.bias": 223.24151222733428,
+ "blocks.0.ln.weight": 9.973600730927519,
+ "blocks.0.w1.weight": 298.8679532598482,
+ "blocks.0.w1.bias": 266.1095556704987,
+ "blocks.0.w2.weight": 476.45333568353897,
+ "blocks.1.ln.weight": 7.306781436470832,
+ "blocks.1.w1.weight": 230.21983409061758,
+ "blocks.1.w1.bias": 212.39608458469817,
+ "blocks.1.w2.weight": 257.8517804786634,
+ "blocks.2.ln.weight": 8.853410982440527,
+ "blocks.2.w1.weight": 338.8735798549029,
+ "blocks.2.w1.bias": 298.9157804791883,
+ "blocks.2.w2.weight": 337.76450040174166,
+ "blocks.3.ln.weight": 9.715802278340659,
+ "blocks.3.w1.weight": 404.66894924511104,
+ "blocks.3.w1.bias": 372.47402373621406,
+ "blocks.3.w2.weight": 404.8085148871707,
+ "blocks.4.ln.weight": 8.048402116007853,
+ "blocks.4.w1.weight": 327.8249969525614,
+ "blocks.4.w1.bias": 304.9268816131629,
+ "blocks.4.w2.weight": 317.1280523416376,
+ "blocks.5.ln.weight": 6.247399813356893,
+ "blocks.5.w1.weight": 221.81137144150296,
+ "blocks.5.w1.bias": 207.34062578724078,
+ "blocks.5.w2.weight": 219.43839912847176,
+ "blocks.6.ln.weight": 8.827579347133504,
+ "blocks.6.w1.weight": 332.6040891021855,
+ "blocks.6.w1.bias": 301.2387289035309,
+ "blocks.6.w2.weight": 301.27670926969427,
+ "blocks.7.ln.weight": 10.285108430777552,
+ "blocks.7.w1.weight": 406.88401472243066,
+ "blocks.7.w1.bias": 378.73147854832933,
+ "blocks.7.w2.weight": 385.868939931865,
+ "blocks.8.ln.weight": 8.800845432078118,
+ "blocks.8.w1.weight": 349.4923999527594,
+ "blocks.8.w1.bias": 314.4618082483143,
+ "blocks.8.w2.weight": 312.34600503490987,
+ "blocks.9.ln.weight": 7.894181737045302,
+ "blocks.9.w1.weight": 310.5073132382578,
+ "blocks.9.w1.bias": 286.9185161607037,
+ "blocks.9.w2.weight": 283.5451705498602,
+ "blocks.10.ln.weight": 9.369016654158887,
+ "blocks.10.w1.weight": 380.3135567962096,
+ "blocks.10.w1.bias": 366.5904487269434,
+ "blocks.10.w2.weight": 364.5707271107058,
+ "blocks.11.ln.weight": 9.823259210793506,
+ "blocks.11.w1.weight": 387.5149132383442,
+ "blocks.11.w1.bias": 358.0799340264483,
+ "blocks.11.w2.weight": 346.4609492966194,
+ "out_ln.weight": 0.671169228619093,
+ "out_head.weight": 9.171587508256877,
+ "out_head.bias": 0.43138812866351306
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0396657500457764,
+ 1.9608277139282226,
+ 1.9326482776641847,
+ 1.9163381662750245,
+ 1.9034836812591553,
+ 1.890145294265747,
+ 1.8797314615631104,
+ 1.8768412320327759,
+ 1.867796627883911,
+ 1.8607218304824829,
+ 1.8534223027801513,
+ 1.8497662923431397,
+ 1.8423005596923827,
+ 1.836013819656372,
+ 1.8322983874511718,
+ 1.8273781113052368,
+ 1.8277745475006104,
+ 1.8251262734985352,
+ 1.820411690635681,
+ 1.8177711023712158,
+ 1.8161355939102173,
+ 1.811638991355896,
+ 1.8096345615005494,
+ 1.809067364845276,
+ 1.8052070534896851,
+ 1.806075843887329,
+ 1.8016806897735596,
+ 1.8036705466079712,
+ 1.8012689702606202,
+ 1.7954118814086915,
+ 1.7932103118515015,
+ 1.7936329647064209,
+ 1.7884179528427124,
+ 1.7905398846817016,
+ 1.7887225045013428,
+ 1.7856135736846923,
+ 1.7844085126495361,
+ 1.7840350121307373,
+ 1.774809980545044,
+ 1.778959913368225,
+ 1.771520369796753,
+ 1.775222759399414,
+ 1.7740441353607177,
+ 1.7697435302734374,
+ 1.7660783060073852,
+ 1.7702557649993897,
+ 1.7655815365600587,
+ 1.767026780052185,
+ 1.7612031787490845,
+ 1.7588948153686523,
+ 1.7596543489837646,
+ 1.7577299001312257,
+ 1.7578089023590089,
+ 1.7566554468154907,
+ 1.7528888186264038,
+ 1.7549751110076903,
+ 1.7530736223983765,
+ 1.7534073949813842,
+ 1.7489603903198243,
+ 1.7487286296844482,
+ 1.7519352731704712,
+ 1.7486104052734375,
+ 1.7467161587142945,
+ 1.748152449569702,
+ 1.7439867239379883,
+ 1.7447851418304443,
+ 1.7409665993499757,
+ 1.7442577163314819,
+ 1.7420410083770752,
+ 1.7413771923446655,
+ 1.7416430898666382,
+ 1.7407120821762085,
+ 1.7343293948745728,
+ 1.7396172916412354,
+ 1.7371713684844972,
+ 1.7387296481323242,
+ 1.736112067642212,
+ 1.733493857727051,
+ 1.7360445062637329,
+ 1.7351057822036744,
+ 1.734473055152893,
+ 1.7352992589569092,
+ 1.7343194945907592,
+ 1.735215076599121,
+ 1.733106077194214,
+ 1.7345250820159912,
+ 1.7314182668304443,
+ 1.731066604309082,
+ 1.730784634399414,
+ 1.7321079010391236,
+ 1.7304316757583618,
+ 1.7294625887298585,
+ 1.730209889831543,
+ 1.7305309671401978,
+ 1.7275332722854615,
+ 1.7291330585098266,
+ 1.7289624264526366,
+ 1.7314536703109742,
+ 1.7267744286346436,
+ 1.7254689588165284
+ ],
+ "train_acc": [
+ 0.2449,
+ 0.27998,
+ 0.2977,
+ 0.30396,
+ 0.3107,
+ 0.31656,
+ 0.32312,
+ 0.31882,
+ 0.32358,
+ 0.3291,
+ 0.32752,
+ 0.33508,
+ 0.33836,
+ 0.33794,
+ 0.34144,
+ 0.34536,
+ 0.34406,
+ 0.34554,
+ 0.34712,
+ 0.34862,
+ 0.34952,
+ 0.35122,
+ 0.35074,
+ 0.35326,
+ 0.3523,
+ 0.35342,
+ 0.35338,
+ 0.35376,
+ 0.35524,
+ 0.35476,
+ 0.35774,
+ 0.3577,
+ 0.3623,
+ 0.3571,
+ 0.35802,
+ 0.36086,
+ 0.3604,
+ 0.36042,
+ 0.36226,
+ 0.3633,
+ 0.3655,
+ 0.36332,
+ 0.36392,
+ 0.3675,
+ 0.36718,
+ 0.36608,
+ 0.36696,
+ 0.36686,
+ 0.37226,
+ 0.37178,
+ 0.37022,
+ 0.3701,
+ 0.3706,
+ 0.37212,
+ 0.375,
+ 0.37474,
+ 0.37334,
+ 0.37296,
+ 0.37436,
+ 0.37478,
+ 0.37246,
+ 0.37528,
+ 0.37576,
+ 0.37532,
+ 0.37498,
+ 0.37548,
+ 0.3776,
+ 0.37842,
+ 0.37692,
+ 0.37798,
+ 0.37794,
+ 0.37798,
+ 0.38094,
+ 0.38068,
+ 0.37932,
+ 0.37848,
+ 0.38182,
+ 0.37976,
+ 0.38048,
+ 0.381,
+ 0.3807,
+ 0.37908,
+ 0.38194,
+ 0.38326,
+ 0.38306,
+ 0.38104,
+ 0.3811,
+ 0.3813,
+ 0.38256,
+ 0.38198,
+ 0.38276,
+ 0.3842,
+ 0.3819,
+ 0.38484,
+ 0.38298,
+ 0.38164,
+ 0.38186,
+ 0.38012,
+ 0.38642,
+ 0.38538
+ ],
+ "test_acc": [
+ 0.2636,
+ 0.3156,
+ 0.3187,
+ 0.3379,
+ 0.3384,
+ 0.3469,
+ 0.3529,
+ 0.3526,
+ 0.3431,
+ 0.3598,
+ 0.3512,
+ 0.3619,
+ 0.3655,
+ 0.3755,
+ 0.3804,
+ 0.3699,
+ 0.3784,
+ 0.3643,
+ 0.3669,
+ 0.3644,
+ 0.3728,
+ 0.376,
+ 0.3802,
+ 0.3798,
+ 0.388,
+ 0.3631,
+ 0.3848,
+ 0.3802,
+ 0.3861,
+ 0.3829,
+ 0.3808,
+ 0.3865,
+ 0.3601,
+ 0.3915,
+ 0.371,
+ 0.3833,
+ 0.3932,
+ 0.3933,
+ 0.3827,
+ 0.3896,
+ 0.394,
+ 0.3837,
+ 0.3944,
+ 0.388,
+ 0.3896,
+ 0.3983,
+ 0.3933,
+ 0.3988,
+ 0.3997,
+ 0.3956,
+ 0.3967,
+ 0.3981,
+ 0.4041,
+ 0.4021,
+ 0.3873,
+ 0.4015,
+ 0.4031,
+ 0.3996,
+ 0.4016,
+ 0.401,
+ 0.403,
+ 0.4044,
+ 0.4021,
+ 0.4006,
+ 0.4062,
+ 0.4035,
+ 0.4027,
+ 0.4066,
+ 0.4081,
+ 0.4059,
+ 0.4023,
+ 0.4111,
+ 0.4095,
+ 0.406,
+ 0.4046,
+ 0.4048,
+ 0.4081,
+ 0.4067,
+ 0.4088,
+ 0.4074,
+ 0.4045,
+ 0.4073,
+ 0.408,
+ 0.4073,
+ 0.4066,
+ 0.4076,
+ 0.4064,
+ 0.4063,
+ 0.4088,
+ 0.4087,
+ 0.4081,
+ 0.4092,
+ 0.4088,
+ 0.4086,
+ 0.4093,
+ 0.4103,
+ 0.4091,
+ 0.4094,
+ 0.4087,
+ 0.4088
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.033972106873989105,
+ 0.070818230509758,
+ 0.01647794246673584,
+ -0.04525557532906532,
+ -0.03256663307547569,
+ -0.03431400656700134,
+ 0.011294779367744923,
+ -0.034145478159189224,
+ -0.05482051521539688,
+ 0.018059976398944855,
+ -0.030339818447828293,
+ 0.9974545836448669
+ ],
+ "perturbation_rho": [
+ 0.06674668192863464,
+ 0.0040874360129237175,
+ -0.028121206909418106,
+ 0.033479828387498856,
+ -0.023402733728289604,
+ -0.02146291360259056,
+ 0.01769183948636055,
+ -0.01436183787882328,
+ 0.009981930255889893,
+ 0.04906001687049866,
+ 0.004424326121807098,
+ 0.01695885881781578
+ ],
+ "nudging": {
+ "0.001": [
+ -2.7818605303764343e-06,
+ -2.752931322902441e-07,
+ -6.530899554491043e-08,
+ -3.67872416973114e-08,
+ 3.2247044146060944e-08,
+ 4.889443516731262e-09,
+ -1.862645149230957e-09,
+ -4.237517714500427e-08,
+ -4.889443516731262e-09,
+ -1.3737007975578308e-08,
+ -2.7008354663848877e-08,
+ -6.719492375850677e-07
+ ],
+ "0.003": [
+ -8.350558346137404e-06,
+ -6.812333595007658e-07,
+ -1.474982127547264e-07,
+ 9.033828973770142e-08,
+ 5.844049155712128e-08,
+ 7.008202373981476e-08,
+ 6.984919309616089e-09,
+ 1.0186340659856796e-07,
+ 1.2223608791828156e-07,
+ -8.253846317529678e-08,
+ 7.729977369308472e-08,
+ -2.513494109734893e-06
+ ],
+ "0.01": [
+ -2.8048030799254775e-05,
+ -2.1727464627474546e-06,
+ -3.3923424780368805e-07,
+ 3.7439167499542236e-07,
+ 2.1606683731079102e-07,
+ 2.60770320892334e-07,
+ -9.790528565645218e-08,
+ 2.130400389432907e-07,
+ 3.862660378217697e-07,
+ -1.123407855629921e-07,
+ 2.7267378754913807e-07,
+ -8.881674148142338e-06
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 6954.1630859375,
+ 111134.6796875,
+ 557813.6875,
+ 996516.875,
+ 1403786.0,
+ 1685541.375,
+ 2023717.25,
+ 2137581.0,
+ 2278237.0,
+ 2302800.5,
+ 2343287.75,
+ 2358363.25,
+ 1859393.25
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.7945565307163633e-05,
+ 1.218010652337398e-06,
+ 4.458847513433284e-07,
+ 4.3005411498597823e-07,
+ 4.3150785700163397e-07,
+ 4.261477215550258e-07,
+ 4.2959365487149626e-07,
+ 4.2929926280521613e-07,
+ 4.298903206745308e-07,
+ 4.3081271883238514e-07,
+ 4.2645430653465155e-07,
+ 4.254479506471398e-07,
+ 4.048590369620797e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 48.84206517073718,
+ "embed.bias": 16.190805729960193,
+ "blocks.0.ln.weight": 1.1044050790999422,
+ "blocks.0.w1.weight": 16.916577725001215,
+ "blocks.0.w1.bias": 13.172179767406938,
+ "blocks.0.w2.weight": 57.6138058078061,
+ "blocks.1.ln.weight": 1.006892864310832,
+ "blocks.1.w1.weight": 20.474460499736093,
+ "blocks.1.w1.bias": 14.809778597172114,
+ "blocks.1.w2.weight": 56.78242938569469,
+ "blocks.2.ln.weight": 0.7251084712385265,
+ "blocks.2.w1.weight": 20.378823416992166,
+ "blocks.2.w1.bias": 19.146090983258652,
+ "blocks.2.w2.weight": 43.602875378949854,
+ "blocks.3.ln.weight": 0.5778874616748163,
+ "blocks.3.w1.weight": 19.87346377423969,
+ "blocks.3.w1.bias": 20.45559649243161,
+ "blocks.3.w2.weight": 29.273505770749868,
+ "blocks.4.ln.weight": 0.5907735236527697,
+ "blocks.4.w1.weight": 19.310944988230343,
+ "blocks.4.w1.bias": 19.98019865055604,
+ "blocks.4.w2.weight": 35.158811303772694,
+ "blocks.5.ln.weight": 0.5664442970803276,
+ "blocks.5.w1.weight": 21.492887279178838,
+ "blocks.5.w1.bias": 23.514956599646894,
+ "blocks.5.w2.weight": 31.178552638993263,
+ "blocks.6.ln.weight": 0.5602955775395394,
+ "blocks.6.w1.weight": 19.61043120210454,
+ "blocks.6.w1.bias": 21.095333255326253,
+ "blocks.6.w2.weight": 35.811762604119366,
+ "blocks.7.ln.weight": 0.5625727296567063,
+ "blocks.7.w1.weight": 18.807090394262456,
+ "blocks.7.w1.bias": 21.21463438586343,
+ "blocks.7.w2.weight": 37.013283750347604,
+ "blocks.8.ln.weight": 0.6306802678482845,
+ "blocks.8.w1.weight": 18.641671150147772,
+ "blocks.8.w1.bias": 18.967183417640936,
+ "blocks.8.w2.weight": 48.3826747365419,
+ "blocks.9.ln.weight": 0.587358912368524,
+ "blocks.9.w1.weight": 16.64124632095154,
+ "blocks.9.w1.bias": 15.919474132420179,
+ "blocks.9.w2.weight": 52.31462590866729,
+ "blocks.10.ln.weight": 0.5508428821937401,
+ "blocks.10.w1.weight": 15.626845109574424,
+ "blocks.10.w1.bias": 15.545502528098213,
+ "blocks.10.w2.weight": 49.44591721207919,
+ "blocks.11.ln.weight": 0.5806268804343621,
+ "blocks.11.w1.weight": 16.372036995921366,
+ "blocks.11.w1.bias": 13.944650611793428,
+ "blocks.11.w2.weight": 55.112106178217005,
+ "out_ln.weight": 0.3649173251620146,
+ "out_head.weight": 6.863263504617185,
+ "out_head.bias": 0.881340072926514
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 12,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 3
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L12_seed3",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L12_seed4/results_cifar10.json b/results/fa_dfa_d512_L12_seed4/results_cifar10.json
new file mode 100644
index 0000000..7eb8f8a
--- /dev/null
+++ b/results/fa_dfa_d512_L12_seed4/results_cifar10.json
@@ -0,0 +1,969 @@
+{
+ "4": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0759089875793455,
+ 2.049546967468262,
+ 2.050352345352173,
+ 2.0449130432891844,
+ 2.0451894146728518,
+ 2.0407823961639404,
+ 2.031551725692749,
+ 2.0308942835998534,
+ 2.027821557159424,
+ 2.0279336640930175,
+ 2.030788267745972,
+ 2.027367984313965,
+ 2.023764753379822,
+ 2.0222284102630614,
+ 2.021787865447998,
+ 2.0210360749053957,
+ 2.020778656768799,
+ 2.0178001490020754,
+ 2.015317197265625,
+ 2.0181816483306885,
+ 2.0157879919433594,
+ 2.0184039527893067,
+ 2.0140462054443358,
+ 2.0133035342025756,
+ 2.0117346116638184,
+ 2.0124337507629395,
+ 2.015013382720947,
+ 2.0131209099578857,
+ 2.0119356175994874,
+ 2.0105034595489504,
+ 2.0114617578125,
+ 2.010921164550781,
+ 2.0091999086761474,
+ 2.0087246141052244,
+ 2.008987555999756,
+ 2.007724672088623,
+ 2.0079352700424193,
+ 2.0069879000854494,
+ 2.0099592947769165,
+ 2.0069592917633057,
+ 2.0085809383773805,
+ 2.007678118972778,
+ 2.006203598976135,
+ 2.0054306941986084,
+ 2.004828847351074,
+ 2.0060369828796385,
+ 2.0055570992279055,
+ 2.0053103733825686,
+ 2.004637515335083,
+ 2.0055320530700684,
+ 2.0035772978973387,
+ 2.0045177731323243,
+ 2.0053057807922365,
+ 2.004057818374634,
+ 2.0017144575500487,
+ 2.0030729360580444,
+ 2.002487395324707,
+ 2.0019396072387696,
+ 1.9998790828704833,
+ 2.003716873703003,
+ 2.001451046066284,
+ 1.9996818887329102,
+ 2.0031208934783935,
+ 2.0009771130371092,
+ 2.0052372956848146,
+ 2.000408243637085,
+ 2.0017312159729004,
+ 2.00103297542572,
+ 2.0006809278106688,
+ 1.999061644744873,
+ 1.9998556817626953,
+ 2.000588871498108,
+ 2.0007700952148437,
+ 2.001544753036499,
+ 2.000170311355591,
+ 2.0008300536346435,
+ 2.00009318069458,
+ 2.0018883587646483,
+ 1.9953095329284667,
+ 1.9990402968597412,
+ 1.9993244941711426,
+ 1.9997524154281616,
+ 1.9993522864151,
+ 1.999408567199707,
+ 2.000375003089905,
+ 2.0002292515563966,
+ 1.99836263092041,
+ 1.999543589744568,
+ 1.9990913265609742,
+ 1.9996358081436156,
+ 1.9965649541854857,
+ 1.9988786743164062,
+ 2.000610784988403,
+ 1.9997782401275634,
+ 1.9977047104644776,
+ 1.998138542137146,
+ 1.9980609845733643,
+ 1.9994125312805175,
+ 1.9971481538391114,
+ 1.999670766143799
+ ],
+ "train_acc": [
+ 0.2298,
+ 0.24208,
+ 0.24002,
+ 0.24094,
+ 0.24586,
+ 0.24526,
+ 0.2481,
+ 0.24888,
+ 0.25388,
+ 0.25086,
+ 0.24904,
+ 0.25404,
+ 0.25272,
+ 0.2537,
+ 0.2557,
+ 0.25446,
+ 0.25566,
+ 0.25962,
+ 0.25888,
+ 0.25898,
+ 0.25988,
+ 0.25998,
+ 0.26148,
+ 0.2599,
+ 0.26144,
+ 0.2642,
+ 0.2616,
+ 0.2624,
+ 0.26138,
+ 0.26398,
+ 0.2628,
+ 0.26448,
+ 0.26602,
+ 0.2662,
+ 0.26392,
+ 0.26558,
+ 0.26618,
+ 0.26806,
+ 0.26572,
+ 0.26504,
+ 0.26606,
+ 0.26642,
+ 0.26868,
+ 0.26806,
+ 0.2681,
+ 0.26742,
+ 0.26938,
+ 0.26904,
+ 0.26912,
+ 0.26822,
+ 0.26928,
+ 0.26642,
+ 0.26932,
+ 0.26744,
+ 0.2692,
+ 0.27166,
+ 0.27004,
+ 0.27224,
+ 0.26922,
+ 0.26924,
+ 0.2704,
+ 0.2713,
+ 0.27188,
+ 0.27038,
+ 0.2696,
+ 0.27172,
+ 0.26974,
+ 0.27234,
+ 0.27158,
+ 0.27154,
+ 0.27218,
+ 0.27162,
+ 0.271,
+ 0.27146,
+ 0.27242,
+ 0.27194,
+ 0.27084,
+ 0.27166,
+ 0.2737,
+ 0.27286,
+ 0.27226,
+ 0.27186,
+ 0.27026,
+ 0.27378,
+ 0.27114,
+ 0.27044,
+ 0.27224,
+ 0.27336,
+ 0.27488,
+ 0.27168,
+ 0.27362,
+ 0.27224,
+ 0.27022,
+ 0.2718,
+ 0.27058,
+ 0.27258,
+ 0.27366,
+ 0.2716,
+ 0.2725,
+ 0.27256
+ ],
+ "test_acc": [
+ 0.26,
+ 0.261,
+ 0.2579,
+ 0.2572,
+ 0.2579,
+ 0.2482,
+ 0.2715,
+ 0.2746,
+ 0.2616,
+ 0.2747,
+ 0.2443,
+ 0.2845,
+ 0.2756,
+ 0.2732,
+ 0.274,
+ 0.2875,
+ 0.2614,
+ 0.2732,
+ 0.2699,
+ 0.2708,
+ 0.2874,
+ 0.277,
+ 0.2787,
+ 0.2872,
+ 0.2775,
+ 0.2813,
+ 0.2711,
+ 0.2819,
+ 0.2716,
+ 0.2875,
+ 0.2749,
+ 0.279,
+ 0.2977,
+ 0.282,
+ 0.2738,
+ 0.2876,
+ 0.2788,
+ 0.2943,
+ 0.2736,
+ 0.2936,
+ 0.2773,
+ 0.2777,
+ 0.2891,
+ 0.2962,
+ 0.2734,
+ 0.2955,
+ 0.2867,
+ 0.2864,
+ 0.2834,
+ 0.2845,
+ 0.2853,
+ 0.2937,
+ 0.2705,
+ 0.292,
+ 0.2952,
+ 0.2875,
+ 0.2771,
+ 0.2837,
+ 0.2843,
+ 0.2887,
+ 0.291,
+ 0.2917,
+ 0.2763,
+ 0.2862,
+ 0.2897,
+ 0.2876,
+ 0.2865,
+ 0.2799,
+ 0.2772,
+ 0.2898,
+ 0.2823,
+ 0.2833,
+ 0.2875,
+ 0.2866,
+ 0.2771,
+ 0.28,
+ 0.2827,
+ 0.2895,
+ 0.2925,
+ 0.2896,
+ 0.2889,
+ 0.2882,
+ 0.2886,
+ 0.2864,
+ 0.2873,
+ 0.2909,
+ 0.2878,
+ 0.2848,
+ 0.286,
+ 0.2834,
+ 0.287,
+ 0.2888,
+ 0.2892,
+ 0.2871,
+ 0.2873,
+ 0.2878,
+ 0.2872,
+ 0.2875,
+ 0.2877,
+ 0.2878
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.36940303444862366,
+ 0.00047992257168516517,
+ 0.00055807048920542,
+ -0.000561900029424578,
+ -0.00015003856969997287,
+ 0.0003543531056493521,
+ -0.000369079178199172,
+ -7.983684918144718e-05,
+ -0.0001259066048078239,
+ 0.0002819746732711792,
+ -2.340562059544027e-05,
+ -5.241552571533248e-05
+ ],
+ "perturbation_rho": [
+ -0.0012638717889785767,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -2.9383227229118347e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 1.1175870895385742e-08,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.0235235095024109e-06,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 1.1175870895385742e-08,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -3.4552067518234253e-06,
+ -8.381903171539307e-09,
+ 4.656612873077393e-10,
+ 9.313225746154785e-10,
+ -3.725290298461914e-09,
+ 0.0,
+ 1.1175870895385742e-08,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 56077.015625,
+ 1373153024.0,
+ 3371879936.0,
+ 5206262272.0,
+ 7341849088.0,
+ 8992748544.0,
+ 9065292800.0,
+ 9464700928.0,
+ 9539394560.0,
+ 10031855616.0,
+ 10931398656.0,
+ 11201608704.0,
+ 11850394624.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.49191884904576e-07,
+ 1.9557305774853262e-10,
+ 1.9563668740563145e-10,
+ 1.9543285045831027e-10,
+ 1.9541313012183537e-10,
+ 1.9539742046603692e-10,
+ 1.9536555706523018e-10,
+ 1.953735784265831e-10,
+ 1.9537529927227126e-10,
+ 1.9540973006382245e-10,
+ 1.9539764251064184e-10,
+ 1.9554310948244336e-10,
+ 1.9556442576451616e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 341.4389143853609,
+ "embed.bias": 269.18455752628876,
+ "blocks.0.ln.weight": 10.301521425392954,
+ "blocks.0.w1.weight": 298.4302439253081,
+ "blocks.0.w1.bias": 278.6169359900818,
+ "blocks.0.w2.weight": 499.7366737832077,
+ "blocks.1.ln.weight": 8.816575984910754,
+ "blocks.1.w1.weight": 341.9389165053106,
+ "blocks.1.w1.bias": 332.3162371142644,
+ "blocks.1.w2.weight": 347.85214832372776,
+ "blocks.2.ln.weight": 9.211748215059862,
+ "blocks.2.w1.weight": 389.3615048816918,
+ "blocks.2.w1.bias": 365.40848517889776,
+ "blocks.2.w2.weight": 370.8759731237476,
+ "blocks.3.ln.weight": 10.03198861563026,
+ "blocks.3.w1.weight": 406.5535727387553,
+ "blocks.3.w1.bias": 385.3233146773144,
+ "blocks.3.w2.weight": 394.381764481299,
+ "blocks.4.ln.weight": 10.474602013624727,
+ "blocks.4.w1.weight": 429.8395174539667,
+ "blocks.4.w1.bias": 400.4483643636673,
+ "blocks.4.w2.weight": 397.09630840545793,
+ "blocks.5.ln.weight": 7.5593503080049596,
+ "blocks.5.w1.weight": 296.69313861733195,
+ "blocks.5.w1.bias": 284.6870161884467,
+ "blocks.5.w2.weight": 262.57737559088133,
+ "blocks.6.ln.weight": 9.004407296024212,
+ "blocks.6.w1.weight": 356.77068113879443,
+ "blocks.6.w1.bias": 339.0443824568545,
+ "blocks.6.w2.weight": 315.61513347531445,
+ "blocks.7.ln.weight": 7.295378859292153,
+ "blocks.7.w1.weight": 264.4208821925752,
+ "blocks.7.w1.bias": 244.22970176166734,
+ "blocks.7.w2.weight": 243.628762418729,
+ "blocks.8.ln.weight": 8.932564788347374,
+ "blocks.8.w1.weight": 351.408355367191,
+ "blocks.8.w1.bias": 334.01069244258156,
+ "blocks.8.w2.weight": 326.1916980797317,
+ "blocks.9.ln.weight": 10.358840465999167,
+ "blocks.9.w1.weight": 415.0022329456538,
+ "blocks.9.w1.bias": 381.22980098380185,
+ "blocks.9.w2.weight": 375.2902593077766,
+ "blocks.10.ln.weight": 9.099883733701708,
+ "blocks.10.w1.weight": 357.2725104478134,
+ "blocks.10.w1.bias": 319.59512329402446,
+ "blocks.10.w2.weight": 347.7682207115722,
+ "blocks.11.ln.weight": 9.671063229293274,
+ "blocks.11.w1.weight": 383.4569677056522,
+ "blocks.11.w1.bias": 366.9661288886149,
+ "blocks.11.w2.weight": 358.31090602111163,
+ "out_ln.weight": 0.6468211624050442,
+ "out_head.weight": 9.099909010510617,
+ "out_head.bias": 0.5580580979411405
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0478644479370116,
+ 1.9653604916763305,
+ 1.9457170885849,
+ 1.9292129779052734,
+ 1.911267426147461,
+ 1.8966463186264038,
+ 1.8790605630874633,
+ 1.8708782619476318,
+ 1.8626127404403687,
+ 1.8581517023468017,
+ 1.8520171240234375,
+ 1.8454141430282593,
+ 1.8327462087249755,
+ 1.8297313425064088,
+ 1.8218167012786866,
+ 1.8236521685791016,
+ 1.810682963180542,
+ 1.8065698401641845,
+ 1.7991282674407958,
+ 1.794059274559021,
+ 1.7904878783416749,
+ 1.7887911895370483,
+ 1.7848259868621825,
+ 1.7783562173461913,
+ 1.7769769561767579,
+ 1.7767581796264649,
+ 1.7756539735412598,
+ 1.776255124130249,
+ 1.7723581911849975,
+ 1.7729071472549438,
+ 1.7715669748687743,
+ 1.771372833633423,
+ 1.767975373764038,
+ 1.7652683026123046,
+ 1.766329913673401,
+ 1.7672014434051513,
+ 1.758714213027954,
+ 1.7554267383193969,
+ 1.756759292869568,
+ 1.7521009868621826,
+ 1.7497902758789063,
+ 1.750489810180664,
+ 1.7499482775497437,
+ 1.747922197303772,
+ 1.744107078781128,
+ 1.7441058347320557,
+ 1.7407303936004639,
+ 1.7397420559310912,
+ 1.7378707089996337,
+ 1.7362638066864013,
+ 1.739966582069397,
+ 1.7335311608505248,
+ 1.7373196209716797,
+ 1.7352568884658814,
+ 1.7298812759399413,
+ 1.732886531715393,
+ 1.7291758307647704,
+ 1.724468434715271,
+ 1.7238299334716798,
+ 1.7226758923339844,
+ 1.7246140502548217,
+ 1.7225423165130616,
+ 1.7256555751800537,
+ 1.7229330892944337,
+ 1.7288363692855835,
+ 1.7249921246337891,
+ 1.7212819675445556,
+ 1.7200141415023804,
+ 1.7191423548126221,
+ 1.7222559392929078,
+ 1.7194412873077392,
+ 1.7188739904022217,
+ 1.7199222569274903,
+ 1.7203983585357665,
+ 1.7149837328338624,
+ 1.7162825980377197,
+ 1.7136023723602294,
+ 1.7168209014129638,
+ 1.7104442378997802,
+ 1.7158753946685792,
+ 1.7140782101821899,
+ 1.7154304480743408,
+ 1.7131923955535888,
+ 1.711485964012146,
+ 1.7109355539703368,
+ 1.716393593826294,
+ 1.7096369751358031,
+ 1.7133884111785889,
+ 1.7087384057998658,
+ 1.7118981928253174,
+ 1.7084463762664794,
+ 1.7090828707504273,
+ 1.7140569379425048,
+ 1.7086596160507201,
+ 1.706718920211792,
+ 1.7098103982925414,
+ 1.7067520601654054,
+ 1.7073699936676026,
+ 1.7088190727996826,
+ 1.7079839348983765
+ ],
+ "train_acc": [
+ 0.2395,
+ 0.28052,
+ 0.2902,
+ 0.297,
+ 0.30538,
+ 0.31214,
+ 0.32012,
+ 0.3235,
+ 0.32874,
+ 0.3302,
+ 0.33236,
+ 0.336,
+ 0.3379,
+ 0.33908,
+ 0.34476,
+ 0.34302,
+ 0.34752,
+ 0.35002,
+ 0.3536,
+ 0.35486,
+ 0.35646,
+ 0.35768,
+ 0.3591,
+ 0.36082,
+ 0.36046,
+ 0.36418,
+ 0.36422,
+ 0.3634,
+ 0.36582,
+ 0.36498,
+ 0.3655,
+ 0.3643,
+ 0.36678,
+ 0.36726,
+ 0.36698,
+ 0.3653,
+ 0.37052,
+ 0.37094,
+ 0.36996,
+ 0.37132,
+ 0.37166,
+ 0.3736,
+ 0.37276,
+ 0.37378,
+ 0.37664,
+ 0.37462,
+ 0.37638,
+ 0.37634,
+ 0.3792,
+ 0.37858,
+ 0.3749,
+ 0.37682,
+ 0.37684,
+ 0.37932,
+ 0.3824,
+ 0.38052,
+ 0.38102,
+ 0.38218,
+ 0.3842,
+ 0.3825,
+ 0.38362,
+ 0.38164,
+ 0.38222,
+ 0.38416,
+ 0.3834,
+ 0.38208,
+ 0.38782,
+ 0.38568,
+ 0.3865,
+ 0.38586,
+ 0.3853,
+ 0.3871,
+ 0.3852,
+ 0.38532,
+ 0.38646,
+ 0.38714,
+ 0.3898,
+ 0.38778,
+ 0.38802,
+ 0.38644,
+ 0.38714,
+ 0.39032,
+ 0.39064,
+ 0.39112,
+ 0.39104,
+ 0.39074,
+ 0.38882,
+ 0.38916,
+ 0.38948,
+ 0.38788,
+ 0.38876,
+ 0.39034,
+ 0.3877,
+ 0.38964,
+ 0.39044,
+ 0.38936,
+ 0.39126,
+ 0.3913,
+ 0.3887,
+ 0.3921
+ ],
+ "test_acc": [
+ 0.2937,
+ 0.318,
+ 0.3284,
+ 0.3218,
+ 0.3414,
+ 0.3309,
+ 0.3539,
+ 0.3542,
+ 0.3588,
+ 0.3564,
+ 0.3555,
+ 0.3677,
+ 0.3666,
+ 0.3693,
+ 0.3683,
+ 0.3771,
+ 0.3637,
+ 0.3767,
+ 0.3747,
+ 0.3788,
+ 0.3798,
+ 0.3875,
+ 0.3865,
+ 0.3832,
+ 0.3831,
+ 0.3875,
+ 0.3819,
+ 0.3915,
+ 0.3952,
+ 0.392,
+ 0.3842,
+ 0.3883,
+ 0.4019,
+ 0.3939,
+ 0.3936,
+ 0.3968,
+ 0.3946,
+ 0.3997,
+ 0.3989,
+ 0.3971,
+ 0.4026,
+ 0.3991,
+ 0.4026,
+ 0.4039,
+ 0.4021,
+ 0.4032,
+ 0.4022,
+ 0.4026,
+ 0.407,
+ 0.4115,
+ 0.4082,
+ 0.4054,
+ 0.4011,
+ 0.4083,
+ 0.4078,
+ 0.4068,
+ 0.4023,
+ 0.4005,
+ 0.4032,
+ 0.4055,
+ 0.4038,
+ 0.4083,
+ 0.4038,
+ 0.408,
+ 0.4047,
+ 0.4058,
+ 0.4078,
+ 0.4108,
+ 0.4074,
+ 0.41,
+ 0.406,
+ 0.4068,
+ 0.408,
+ 0.4086,
+ 0.4098,
+ 0.4093,
+ 0.411,
+ 0.4114,
+ 0.4128,
+ 0.4126,
+ 0.4111,
+ 0.4122,
+ 0.4107,
+ 0.4102,
+ 0.4096,
+ 0.4108,
+ 0.4108,
+ 0.4128,
+ 0.4113,
+ 0.4082,
+ 0.411,
+ 0.4112,
+ 0.4109,
+ 0.4108,
+ 0.4107,
+ 0.4111,
+ 0.4116,
+ 0.411,
+ 0.4108,
+ 0.4108
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.03154352679848671,
+ 0.05562606453895569,
+ 0.031104888767004013,
+ -0.07920745015144348,
+ -0.07174454629421234,
+ -0.02221393957734108,
+ -0.0608971044421196,
+ -0.043849505484104156,
+ -0.07298076152801514,
+ -0.004631989635527134,
+ 0.026790393516421318,
+ 0.9985308051109314
+ ],
+ "perturbation_rho": [
+ 0.027997372671961784,
+ 0.017702028155326843,
+ -0.01458565704524517,
+ 0.0021418300457298756,
+ 0.01191677525639534,
+ 0.033614952117204666,
+ 0.024964284151792526,
+ 0.01625584065914154,
+ 0.024897336959838867,
+ 0.035478636622428894,
+ 0.016699712723493576,
+ 0.03735386207699776
+ ],
+ "nudging": {
+ "0.001": [
+ -3.6178389564156532e-06,
+ -1.6938429325819016e-07,
+ -6.28642737865448e-09,
+ 5.6694261729717255e-08,
+ 5.681067705154419e-08,
+ 1.909211277961731e-08,
+ 2.3748725652694702e-08,
+ 3.166496753692627e-08,
+ 3.3993273973464966e-08,
+ 4.9243681132793427e-08,
+ -2.3283064365386963e-08,
+ -8.114147931337357e-07
+ ],
+ "0.003": [
+ -1.0870513506233692e-05,
+ -7.745111361145973e-07,
+ -1.3748649507761002e-07,
+ 2.558808773756027e-07,
+ 1.755543053150177e-07,
+ 4.9709342420101166e-08,
+ 4.563480615615845e-08,
+ 9.592622518539429e-08,
+ 2.1245796233415604e-07,
+ 2.9569491744041443e-08,
+ -1.1490192264318466e-07,
+ -2.9135262593626976e-06
+ ],
+ "0.01": [
+ -3.632775042206049e-05,
+ -2.4959444999694824e-06,
+ -4.919711500406265e-07,
+ 7.352791726589203e-07,
+ 7.244525477290154e-07,
+ 1.8265563994646072e-07,
+ 6.683403626084328e-07,
+ 4.987232387065887e-07,
+ 7.337657734751701e-07,
+ 2.223532646894455e-08,
+ -3.0745286494493484e-07,
+ -1.0411371476948261e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 5638.3818359375,
+ 81270.953125,
+ 444372.78125,
+ 1134060.625,
+ 1891502.0,
+ 2104572.25,
+ 2255040.75,
+ 2375941.75,
+ 2391241.75,
+ 2426907.25,
+ 2452151.0,
+ 2467291.0,
+ 1570105.625
+ ],
+ "bp_grad_norms_per_layer": [
+ 3.355086664669216e-05,
+ 1.9466469893814065e-06,
+ 5.659199473484477e-07,
+ 4.918285867461236e-07,
+ 4.922185325995088e-07,
+ 4.924264089822827e-07,
+ 4.924931431560253e-07,
+ 4.925626626572921e-07,
+ 4.927623535877501e-07,
+ 4.928857038066781e-07,
+ 4.913110842608148e-07,
+ 4.904794081994623e-07,
+ 4.797909127773892e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 41.710097786001306,
+ "embed.bias": 15.464283033590101,
+ "blocks.0.ln.weight": 1.200268762028184,
+ "blocks.0.w1.weight": 16.308146249299604,
+ "blocks.0.w1.bias": 12.841405630221256,
+ "blocks.0.w2.weight": 57.47900576989446,
+ "blocks.1.ln.weight": 1.058833461192825,
+ "blocks.1.w1.weight": 20.255632368172027,
+ "blocks.1.w1.bias": 14.323810463836294,
+ "blocks.1.w2.weight": 51.70227234444696,
+ "blocks.2.ln.weight": 1.0067914589915794,
+ "blocks.2.w1.weight": 23.883193038734614,
+ "blocks.2.w1.bias": 21.70680288297347,
+ "blocks.2.w2.weight": 32.15341601636593,
+ "blocks.3.ln.weight": 0.6991583611383122,
+ "blocks.3.w1.weight": 26.213152107729808,
+ "blocks.3.w1.bias": 27.835553865312857,
+ "blocks.3.w2.weight": 22.33686846223663,
+ "blocks.4.ln.weight": 0.49537939877182396,
+ "blocks.4.w1.weight": 20.812006160361022,
+ "blocks.4.w1.bias": 22.653788785176967,
+ "blocks.4.w2.weight": 18.947997726554537,
+ "blocks.5.ln.weight": 0.5246639568490442,
+ "blocks.5.w1.weight": 21.161102756452053,
+ "blocks.5.w1.bias": 23.494613405758418,
+ "blocks.5.w2.weight": 18.27047815490039,
+ "blocks.6.ln.weight": 0.5056438845806848,
+ "blocks.6.w1.weight": 21.366617590276153,
+ "blocks.6.w1.bias": 23.57646482535375,
+ "blocks.6.w2.weight": 17.743866768773838,
+ "blocks.7.ln.weight": 0.37623071960771737,
+ "blocks.7.w1.weight": 15.484194372550624,
+ "blocks.7.w1.bias": 16.374712028510555,
+ "blocks.7.w2.weight": 18.270059859207745,
+ "blocks.8.ln.weight": 0.4246821474291729,
+ "blocks.8.w1.weight": 15.052279864294677,
+ "blocks.8.w1.bias": 15.735595602596712,
+ "blocks.8.w2.weight": 20.64493053094595,
+ "blocks.9.ln.weight": 0.40094293459913244,
+ "blocks.9.w1.weight": 13.315401582838119,
+ "blocks.9.w1.bias": 11.448042864028588,
+ "blocks.9.w2.weight": 39.460291915843406,
+ "blocks.10.ln.weight": 0.3986931357871173,
+ "blocks.10.w1.weight": 12.861490846141741,
+ "blocks.10.w1.bias": 10.433659114876445,
+ "blocks.10.w2.weight": 37.16747442552621,
+ "blocks.11.ln.weight": 0.5241503553443005,
+ "blocks.11.w1.weight": 18.756889297523845,
+ "blocks.11.w1.bias": 18.863150794441257,
+ "blocks.11.w2.weight": 46.497694834593375,
+ "out_ln.weight": 0.3799708731554326,
+ "out_head.weight": 6.730072302834575,
+ "out_head.bias": 0.6968175874942788
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 12,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 4
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L12_seed4",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L12_seed5/results_cifar10.json b/results/fa_dfa_d512_L12_seed5/results_cifar10.json
new file mode 100644
index 0000000..1e9a983
--- /dev/null
+++ b/results/fa_dfa_d512_L12_seed5/results_cifar10.json
@@ -0,0 +1,969 @@
+{
+ "5": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0684647270965577,
+ 2.04593505569458,
+ 2.0332052132797243,
+ 2.0338553718566894,
+ 2.026373544998169,
+ 2.0213139679336547,
+ 2.0205811246109007,
+ 2.018696953125,
+ 2.014947237319946,
+ 2.0151999055480956,
+ 2.0129982511138915,
+ 2.011509538650513,
+ 2.0100906581115723,
+ 2.0089198484802244,
+ 2.0063697845458983,
+ 2.006922910385132,
+ 2.0044513999176026,
+ 2.0016603435897826,
+ 2.000636997833252,
+ 2.002304845352173,
+ 2.0021789043426512,
+ 2.001000444030762,
+ 2.00005261428833,
+ 2.002928783569336,
+ 2.0017156188201906,
+ 1.9988520227050781,
+ 1.9994586238861083,
+ 1.9980162719726562,
+ 1.9998335108184815,
+ 1.9996961016845702,
+ 1.9991928104400636,
+ 1.9968728270721436,
+ 1.9977278369903564,
+ 1.9969172173309326,
+ 1.997944600830078,
+ 1.9979880140686035,
+ 1.994605475616455,
+ 1.9973402403259277,
+ 1.997146150894165,
+ 1.99599303855896,
+ 1.9974721060943603,
+ 1.9941783197402954,
+ 1.9956128702545166,
+ 1.9942147409057618,
+ 1.994319501800537,
+ 1.994643907546997,
+ 1.9942114786529541,
+ 1.994346463241577,
+ 1.9929966555023193,
+ 1.9937490250396728,
+ 1.992175287437439,
+ 1.9944900625610351,
+ 1.9938098908233644,
+ 1.9900078340911864,
+ 1.994505626373291,
+ 1.9926387735366822,
+ 1.990931240310669,
+ 1.9906283693695068,
+ 1.9929009646606446,
+ 1.9926953937530518,
+ 1.9925627197647096,
+ 1.990387219581604,
+ 1.9893561473846435,
+ 1.9920542861557007,
+ 1.9893005572509765,
+ 1.9888617028808593,
+ 1.991559531326294,
+ 1.9911002109527587,
+ 1.9900375119781495,
+ 1.9892236702728272,
+ 1.9937436754608153,
+ 1.991619889755249,
+ 1.9916846366882324,
+ 1.9896835166931153,
+ 1.9901411660003663,
+ 1.9924274538421631,
+ 1.9892655798721313,
+ 1.9914114519500732,
+ 1.990216528968811,
+ 1.9892477695465087,
+ 1.9900167115783691,
+ 1.98849309715271,
+ 1.9889385766983032,
+ 1.988801861000061,
+ 1.9898810375976563,
+ 1.9873209413909911,
+ 1.9886231661224365,
+ 1.988806999168396,
+ 1.9894798669052125,
+ 1.988367915725708,
+ 1.9881281168365479,
+ 1.9901167456054687,
+ 1.9896381842041015,
+ 1.9841947576522827,
+ 1.98667507106781,
+ 1.9886395093154907,
+ 1.9892304638290406,
+ 1.9895199391174316,
+ 1.9884832098388672,
+ 1.9883703183746337
+ ],
+ "train_acc": [
+ 0.23802,
+ 0.24804,
+ 0.25088,
+ 0.25142,
+ 0.25734,
+ 0.2584,
+ 0.26202,
+ 0.26208,
+ 0.265,
+ 0.26464,
+ 0.26442,
+ 0.26658,
+ 0.26774,
+ 0.26682,
+ 0.27064,
+ 0.26844,
+ 0.26844,
+ 0.27148,
+ 0.2714,
+ 0.26686,
+ 0.27288,
+ 0.26878,
+ 0.27184,
+ 0.27342,
+ 0.2741,
+ 0.27264,
+ 0.27204,
+ 0.27416,
+ 0.27192,
+ 0.27138,
+ 0.27496,
+ 0.27304,
+ 0.27252,
+ 0.2761,
+ 0.2752,
+ 0.27484,
+ 0.27692,
+ 0.2757,
+ 0.27436,
+ 0.27528,
+ 0.27534,
+ 0.276,
+ 0.27886,
+ 0.27704,
+ 0.27484,
+ 0.27556,
+ 0.27668,
+ 0.27734,
+ 0.27718,
+ 0.27692,
+ 0.27726,
+ 0.27706,
+ 0.2785,
+ 0.27902,
+ 0.2764,
+ 0.27802,
+ 0.27758,
+ 0.2812,
+ 0.27958,
+ 0.2804,
+ 0.27626,
+ 0.27992,
+ 0.2817,
+ 0.27822,
+ 0.2799,
+ 0.28178,
+ 0.2783,
+ 0.27782,
+ 0.28144,
+ 0.28136,
+ 0.2789,
+ 0.2784,
+ 0.27892,
+ 0.28034,
+ 0.28046,
+ 0.2792,
+ 0.28034,
+ 0.28004,
+ 0.28032,
+ 0.28042,
+ 0.28062,
+ 0.28206,
+ 0.28064,
+ 0.28308,
+ 0.2811,
+ 0.28224,
+ 0.28148,
+ 0.2803,
+ 0.2814,
+ 0.27944,
+ 0.28004,
+ 0.27996,
+ 0.28,
+ 0.28104,
+ 0.28072,
+ 0.28176,
+ 0.28028,
+ 0.27976,
+ 0.28074,
+ 0.27914
+ ],
+ "test_acc": [
+ 0.2445,
+ 0.2726,
+ 0.2811,
+ 0.2819,
+ 0.2748,
+ 0.2867,
+ 0.2884,
+ 0.2833,
+ 0.2868,
+ 0.2909,
+ 0.2961,
+ 0.2829,
+ 0.2919,
+ 0.3048,
+ 0.2947,
+ 0.2998,
+ 0.2984,
+ 0.2733,
+ 0.2898,
+ 0.2992,
+ 0.2974,
+ 0.289,
+ 0.2923,
+ 0.2983,
+ 0.2849,
+ 0.3036,
+ 0.2861,
+ 0.2991,
+ 0.2961,
+ 0.3025,
+ 0.2979,
+ 0.2897,
+ 0.2946,
+ 0.2973,
+ 0.2984,
+ 0.3042,
+ 0.2986,
+ 0.2994,
+ 0.3083,
+ 0.2802,
+ 0.3018,
+ 0.2976,
+ 0.301,
+ 0.301,
+ 0.2973,
+ 0.3045,
+ 0.297,
+ 0.2933,
+ 0.306,
+ 0.3031,
+ 0.2937,
+ 0.3027,
+ 0.2977,
+ 0.2974,
+ 0.3053,
+ 0.301,
+ 0.2965,
+ 0.3001,
+ 0.2994,
+ 0.2956,
+ 0.3025,
+ 0.286,
+ 0.2916,
+ 0.2956,
+ 0.3058,
+ 0.2961,
+ 0.2973,
+ 0.2996,
+ 0.2925,
+ 0.3068,
+ 0.3085,
+ 0.3058,
+ 0.2965,
+ 0.3035,
+ 0.3075,
+ 0.301,
+ 0.3066,
+ 0.3077,
+ 0.3019,
+ 0.3036,
+ 0.2997,
+ 0.3005,
+ 0.297,
+ 0.3008,
+ 0.3006,
+ 0.3103,
+ 0.3054,
+ 0.3048,
+ 0.2988,
+ 0.3009,
+ 0.3011,
+ 0.3039,
+ 0.3005,
+ 0.3037,
+ 0.3031,
+ 0.3022,
+ 0.302,
+ 0.3018,
+ 0.3018,
+ 0.3018
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.41004806756973267,
+ 0.0008555407403036952,
+ 0.0005728952819481492,
+ -0.0003633289597928524,
+ 0.00043969464604742825,
+ 0.0006190181011334062,
+ -0.0006453525274991989,
+ 0.0001250960340257734,
+ -0.0005502170533873141,
+ -6.381357525242493e-05,
+ -0.00011379925126675516,
+ -0.00034056592267006636
+ ],
+ "perturbation_rho": [
+ -0.0048940712586045265,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -3.5297125577926636e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.1865049600601196e-06,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -3.998167812824249e-06,
+ 0.0,
+ 0.0,
+ 9.313225746154785e-10,
+ -8.381903171539307e-09,
+ 0.0,
+ 0.0,
+ 9.313225746154785e-10,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 55109.36328125,
+ 840561728.0,
+ 1444462336.0,
+ 2124835456.0,
+ 3210963200.0,
+ 6536487936.0,
+ 7593719808.0,
+ 8378088960.0,
+ 8459614208.0,
+ 9916916736.0,
+ 10216921088.0,
+ 11792236544.0,
+ 12205541376.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.8213216296535393e-07,
+ 2.0590699978395577e-10,
+ 2.0399454347952428e-10,
+ 2.03904518270015e-10,
+ 2.0418219892626155e-10,
+ 2.0476981221762003e-10,
+ 2.0479024032127313e-10,
+ 2.0459070548817238e-10,
+ 2.0458530702871514e-10,
+ 2.045856123400469e-10,
+ 2.04587194407857e-10,
+ 2.044076713447751e-10,
+ 2.045200814260184e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 331.27602666259105,
+ "embed.bias": 239.81762105942474,
+ "blocks.0.ln.weight": 10.927184995923978,
+ "blocks.0.w1.weight": 251.71912543831303,
+ "blocks.0.w1.bias": 221.3073991198696,
+ "blocks.0.w2.weight": 446.18400062248946,
+ "blocks.1.ln.weight": 7.981967217674905,
+ "blocks.1.w1.weight": 253.74455824953162,
+ "blocks.1.w1.bias": 216.76732500414306,
+ "blocks.1.w2.weight": 295.48008203993817,
+ "blocks.2.ln.weight": 8.227632098130004,
+ "blocks.2.w1.weight": 269.5007877113237,
+ "blocks.2.w1.bias": 241.96437719605447,
+ "blocks.2.w2.weight": 310.98985782960006,
+ "blocks.3.ln.weight": 8.112799129886726,
+ "blocks.3.w1.weight": 308.3323380552167,
+ "blocks.3.w1.bias": 309.27230622114104,
+ "blocks.3.w2.weight": 318.17414451568425,
+ "blocks.4.ln.weight": 10.16216573439241,
+ "blocks.4.w1.weight": 424.58007413849407,
+ "blocks.4.w1.bias": 404.55597818760236,
+ "blocks.4.w2.weight": 422.3987925739278,
+ "blocks.5.ln.weight": 9.551306834710088,
+ "blocks.5.w1.weight": 374.36129372347426,
+ "blocks.5.w1.bias": 352.23829526395434,
+ "blocks.5.w2.weight": 368.0730794757206,
+ "blocks.6.ln.weight": 8.854285613855794,
+ "blocks.6.w1.weight": 345.52287116736915,
+ "blocks.6.w1.bias": 319.68248259727653,
+ "blocks.6.w2.weight": 327.22678020367175,
+ "blocks.7.ln.weight": 7.811231342959617,
+ "blocks.7.w1.weight": 277.64478372749727,
+ "blocks.7.w1.bias": 254.112362538096,
+ "blocks.7.w2.weight": 265.1743796968286,
+ "blocks.8.ln.weight": 10.228506898132666,
+ "blocks.8.w1.weight": 404.4219524430621,
+ "blocks.8.w1.bias": 393.5696990939405,
+ "blocks.8.w2.weight": 394.0798993515089,
+ "blocks.9.ln.weight": 8.0107262045422,
+ "blocks.9.w1.weight": 316.5928270106835,
+ "blocks.9.w1.bias": 286.78238448653866,
+ "blocks.9.w2.weight": 310.9127649917408,
+ "blocks.10.ln.weight": 11.136451857382143,
+ "blocks.10.w1.weight": 438.2046003550788,
+ "blocks.10.w1.bias": 405.1181282489877,
+ "blocks.10.w2.weight": 413.8473423192705,
+ "blocks.11.ln.weight": 9.410383955267694,
+ "blocks.11.w1.weight": 372.4463938503806,
+ "blocks.11.w1.bias": 347.63407553366176,
+ "blocks.11.w2.weight": 344.49269974034115,
+ "out_ln.weight": 0.7270416629191093,
+ "out_head.weight": 10.238250621105578,
+ "out_head.bias": 0.4975266289585721
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0707826377868654,
+ 1.9932800539779663,
+ 1.959168056678772,
+ 1.9474205780029297,
+ 1.9326355227279663,
+ 1.9199382480239868,
+ 1.9140991415405273,
+ 1.9034375612640382,
+ 1.8959864074707031,
+ 1.8931433800506592,
+ 1.8902097482681275,
+ 1.8859386821746826,
+ 1.882199794769287,
+ 1.8849088175201416,
+ 1.8818455212020875,
+ 1.8821384326171875,
+ 1.8799380892944335,
+ 1.875362028427124,
+ 1.870530955467224,
+ 1.8683246834564209,
+ 1.8638563344573975,
+ 1.8618846607208253,
+ 1.8603020819854736,
+ 1.8594753393936156,
+ 1.8566845929718017,
+ 1.8526717345428467,
+ 1.849912834777832,
+ 1.8465362030029298,
+ 1.848408186569214,
+ 1.845232035217285,
+ 1.843059235229492,
+ 1.839803340072632,
+ 1.8417775817871094,
+ 1.8390681932067872,
+ 1.8388364002227784,
+ 1.8360300784683228,
+ 1.8323893267059326,
+ 1.834402112121582,
+ 1.8309936769866944,
+ 1.8322431957244874,
+ 1.8288187261581421,
+ 1.8238647890472413,
+ 1.824506708908081,
+ 1.8221080670166017,
+ 1.8222445755767822,
+ 1.8188702960205079,
+ 1.817204086036682,
+ 1.8124210216522216,
+ 1.8137152634429932,
+ 1.8142603408050537,
+ 1.8083323223876953,
+ 1.8079692990493774,
+ 1.8071039364624024,
+ 1.8000855495834351,
+ 1.8052594284057617,
+ 1.802986600036621,
+ 1.7997608633422852,
+ 1.7976583826065065,
+ 1.798347628555298,
+ 1.7966360546875,
+ 1.7926319261932373,
+ 1.7933891333007812,
+ 1.7893145151138306,
+ 1.7920790769195556,
+ 1.7883893395614625,
+ 1.785349788131714,
+ 1.7879791070938111,
+ 1.7846136389541627,
+ 1.783509234313965,
+ 1.7843902270507812,
+ 1.7877061660385132,
+ 1.7827738708877563,
+ 1.7823649167251587,
+ 1.7814125487899781,
+ 1.7809069869995118,
+ 1.7828298908233642,
+ 1.7791348586273192,
+ 1.7774614221191407,
+ 1.7751106949615478,
+ 1.7760692443466186,
+ 1.7797742696380616,
+ 1.7786880410003663,
+ 1.7746904688262939,
+ 1.776022590560913,
+ 1.7778167572402954,
+ 1.775718899459839,
+ 1.7761091399383544,
+ 1.7744762685394286,
+ 1.7767131290435791,
+ 1.7732363131713866,
+ 1.7731415704345703,
+ 1.7759572997665405,
+ 1.7733897400283813,
+ 1.7683659911346437,
+ 1.7711752317047118,
+ 1.772973984146118,
+ 1.772844222984314,
+ 1.774391251449585,
+ 1.7711415176010132,
+ 1.7753938333511352
+ ],
+ "train_acc": [
+ 0.22922,
+ 0.26562,
+ 0.28014,
+ 0.286,
+ 0.29238,
+ 0.29874,
+ 0.30312,
+ 0.30664,
+ 0.30876,
+ 0.31168,
+ 0.3128,
+ 0.31518,
+ 0.31798,
+ 0.317,
+ 0.3238,
+ 0.32118,
+ 0.32352,
+ 0.32418,
+ 0.329,
+ 0.32642,
+ 0.33084,
+ 0.3318,
+ 0.3332,
+ 0.33264,
+ 0.33466,
+ 0.33646,
+ 0.33754,
+ 0.34266,
+ 0.3406,
+ 0.33896,
+ 0.34342,
+ 0.34406,
+ 0.3431,
+ 0.34494,
+ 0.34444,
+ 0.346,
+ 0.34776,
+ 0.34624,
+ 0.34814,
+ 0.34848,
+ 0.34938,
+ 0.35124,
+ 0.35154,
+ 0.3523,
+ 0.35154,
+ 0.35416,
+ 0.3531,
+ 0.3559,
+ 0.35634,
+ 0.35504,
+ 0.35722,
+ 0.35778,
+ 0.35632,
+ 0.36152,
+ 0.36064,
+ 0.35976,
+ 0.35954,
+ 0.36118,
+ 0.36272,
+ 0.36392,
+ 0.36202,
+ 0.36176,
+ 0.3667,
+ 0.36318,
+ 0.36542,
+ 0.36726,
+ 0.36706,
+ 0.36818,
+ 0.3674,
+ 0.36874,
+ 0.36462,
+ 0.36766,
+ 0.36724,
+ 0.3694,
+ 0.36714,
+ 0.36818,
+ 0.36918,
+ 0.37,
+ 0.36942,
+ 0.37088,
+ 0.36958,
+ 0.3705,
+ 0.3704,
+ 0.37006,
+ 0.37176,
+ 0.37004,
+ 0.37138,
+ 0.37146,
+ 0.37036,
+ 0.36844,
+ 0.37048,
+ 0.37066,
+ 0.37288,
+ 0.37296,
+ 0.37426,
+ 0.37316,
+ 0.37076,
+ 0.37046,
+ 0.37382,
+ 0.37288
+ ],
+ "test_acc": [
+ 0.2587,
+ 0.2962,
+ 0.3064,
+ 0.3196,
+ 0.323,
+ 0.3355,
+ 0.3259,
+ 0.3316,
+ 0.345,
+ 0.3454,
+ 0.347,
+ 0.3503,
+ 0.3501,
+ 0.3606,
+ 0.35,
+ 0.366,
+ 0.3606,
+ 0.345,
+ 0.3598,
+ 0.3644,
+ 0.365,
+ 0.3731,
+ 0.3727,
+ 0.3741,
+ 0.3592,
+ 0.3701,
+ 0.3692,
+ 0.375,
+ 0.3687,
+ 0.3686,
+ 0.3813,
+ 0.3762,
+ 0.3719,
+ 0.3836,
+ 0.3831,
+ 0.3884,
+ 0.3927,
+ 0.3866,
+ 0.3775,
+ 0.3781,
+ 0.3852,
+ 0.3899,
+ 0.3881,
+ 0.3895,
+ 0.3869,
+ 0.3874,
+ 0.3932,
+ 0.3817,
+ 0.3908,
+ 0.3893,
+ 0.3806,
+ 0.3918,
+ 0.3899,
+ 0.39,
+ 0.3949,
+ 0.3925,
+ 0.391,
+ 0.3933,
+ 0.3968,
+ 0.3904,
+ 0.3935,
+ 0.3933,
+ 0.3933,
+ 0.3992,
+ 0.4014,
+ 0.3955,
+ 0.3973,
+ 0.396,
+ 0.3963,
+ 0.3933,
+ 0.4005,
+ 0.4025,
+ 0.3997,
+ 0.4007,
+ 0.4027,
+ 0.4043,
+ 0.4044,
+ 0.4033,
+ 0.4033,
+ 0.4026,
+ 0.3994,
+ 0.4039,
+ 0.3988,
+ 0.4037,
+ 0.4041,
+ 0.4071,
+ 0.4054,
+ 0.4045,
+ 0.4049,
+ 0.4049,
+ 0.404,
+ 0.4069,
+ 0.4028,
+ 0.4041,
+ 0.4052,
+ 0.4053,
+ 0.4038,
+ 0.4034,
+ 0.4041,
+ 0.404
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.02494833618402481,
+ 0.06461166590452194,
+ 0.007773830089718103,
+ -0.053252629935741425,
+ -0.03213752806186676,
+ -0.0018765359418466687,
+ -0.020534943789243698,
+ -0.05978960543870926,
+ -0.019162429496645927,
+ 0.007563438266515732,
+ 0.033061787486076355,
+ 0.989224910736084
+ ],
+ "perturbation_rho": [
+ -0.0004792527761310339,
+ -0.012973977252840996,
+ 0.01789543777704239,
+ 0.0009404178708791733,
+ -0.03582464158535004,
+ -0.024608338251709938,
+ 0.03623630106449127,
+ -0.0018152520060539246,
+ 0.017969228327274323,
+ 0.0033451307099312544,
+ 0.0,
+ 0.029760660603642464
+ ],
+ "nudging": {
+ "0.001": [
+ -9.243376553058624e-07,
+ -5.587935447692871e-08,
+ 2.0721927285194397e-08,
+ -3.119930624961853e-08,
+ -2.1420419216156006e-08,
+ -2.9336661100387573e-08,
+ -2.0023435354232788e-08,
+ -5.122274160385132e-09,
+ -6.05359673500061e-09,
+ 3.725290298461914e-09,
+ -2.3748725652694702e-08,
+ -2.4866312742233276e-07
+ ],
+ "0.003": [
+ -2.8724316507577896e-06,
+ -2.635642886161804e-07,
+ -5.4249539971351624e-08,
+ -1.5599653124809265e-08,
+ -4.44706529378891e-08,
+ -1.6996636986732483e-08,
+ 1.1641532182693481e-09,
+ 5.704350769519806e-08,
+ -1.3504177331924438e-08,
+ -1.3737007975578308e-08,
+ -3.259629011154175e-08,
+ -1.0190997272729874e-06
+ ],
+ "0.01": [
+ -9.626368409954011e-06,
+ -7.129274308681488e-07,
+ 5.145557224750519e-08,
+ 1.8137507140636444e-07,
+ 1.0221265256404877e-07,
+ 1.3969838619232178e-08,
+ 3.608874976634979e-08,
+ 2.377200871706009e-07,
+ -1.6298145055770874e-08,
+ -8.731149137020111e-08,
+ -1.6693957149982452e-07,
+ -3.8032885640859604e-06
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 11572.41796875,
+ 170961.703125,
+ 761668.5625,
+ 2149773.25,
+ 3106495.5,
+ 3628764.75,
+ 3844090.5,
+ 4201663.0,
+ 4523281.0,
+ 4742705.5,
+ 4761602.0,
+ 4759073.5,
+ 4570665.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.5100242055486888e-05,
+ 6.707207944600668e-07,
+ 2.330790778160008e-07,
+ 2.3257328507497732e-07,
+ 2.3236127333348122e-07,
+ 2.322931180742671e-07,
+ 2.323264851611384e-07,
+ 2.3226489531680272e-07,
+ 2.3237980428802985e-07,
+ 2.3232749413182319e-07,
+ 2.322744165894619e-07,
+ 2.3242134261636238e-07,
+ 2.2277541233961529e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 67.4183636503971,
+ "embed.bias": 12.639534947709132,
+ "blocks.0.ln.weight": 1.4281653737839686,
+ "blocks.0.w1.weight": 22.149022125188107,
+ "blocks.0.w1.bias": 14.43109450707943,
+ "blocks.0.w2.weight": 73.54315291854846,
+ "blocks.1.ln.weight": 1.3725125667153013,
+ "blocks.1.w1.weight": 28.041388459852282,
+ "blocks.1.w1.bias": 16.737535867388896,
+ "blocks.1.w2.weight": 48.10538352181479,
+ "blocks.2.ln.weight": 0.9692921428242081,
+ "blocks.2.w1.weight": 29.126634688998955,
+ "blocks.2.w1.bias": 28.160099907002987,
+ "blocks.2.w2.weight": 30.43668721582192,
+ "blocks.3.ln.weight": 0.8517630406355475,
+ "blocks.3.w1.weight": 30.698936977122976,
+ "blocks.3.w1.bias": 30.008595592413137,
+ "blocks.3.w2.weight": 25.53125025342729,
+ "blocks.4.ln.weight": 0.8000730489190893,
+ "blocks.4.w1.weight": 28.294012201626252,
+ "blocks.4.w1.bias": 27.681602003963505,
+ "blocks.4.w2.weight": 31.236916248398295,
+ "blocks.5.ln.weight": 0.7726872906934783,
+ "blocks.5.w1.weight": 27.361431129938182,
+ "blocks.5.w1.bias": 27.206264858292855,
+ "blocks.5.w2.weight": 30.441424834889585,
+ "blocks.6.ln.weight": 0.836359631646653,
+ "blocks.6.w1.weight": 26.47170019895073,
+ "blocks.6.w1.bias": 25.167535850732502,
+ "blocks.6.w2.weight": 27.854377209764337,
+ "blocks.7.ln.weight": 0.7549166617706077,
+ "blocks.7.w1.weight": 26.946400539940182,
+ "blocks.7.w1.bias": 26.486714874573558,
+ "blocks.7.w2.weight": 31.532602647761905,
+ "blocks.8.ln.weight": 0.7532469721914246,
+ "blocks.8.w1.weight": 26.170218010343223,
+ "blocks.8.w1.bias": 27.052147560472978,
+ "blocks.8.w2.weight": 31.57827072835312,
+ "blocks.9.ln.weight": 0.5491388007855835,
+ "blocks.9.w1.weight": 17.942432655743463,
+ "blocks.9.w1.bias": 16.168189388817968,
+ "blocks.9.w2.weight": 38.437476826047074,
+ "blocks.10.ln.weight": 0.44584462833770383,
+ "blocks.10.w1.weight": 14.61733255882144,
+ "blocks.10.w1.bias": 9.906228706328685,
+ "blocks.10.w2.weight": 63.74207530119445,
+ "blocks.11.ln.weight": 0.4261726286054206,
+ "blocks.11.w1.weight": 16.108342263191403,
+ "blocks.11.w1.bias": 10.380547619047618,
+ "blocks.11.w2.weight": 90.22616819735002,
+ "out_ln.weight": 0.4534589624136927,
+ "out_head.weight": 7.737075841258268,
+ "out_head.bias": 0.6965283841940496
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 12,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 5
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L12_seed5",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L12_seed6/results_cifar10.json b/results/fa_dfa_d512_L12_seed6/results_cifar10.json
new file mode 100644
index 0000000..ccf37e3
--- /dev/null
+++ b/results/fa_dfa_d512_L12_seed6/results_cifar10.json
@@ -0,0 +1,969 @@
+{
+ "6": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0821874879455566,
+ 2.0482772801208498,
+ 2.047201838607788,
+ 2.0446329682159425,
+ 2.0396667890930176,
+ 2.036854875450134,
+ 2.035803484649658,
+ 2.0357003843688966,
+ 2.030295910644531,
+ 2.027509428253174,
+ 2.027801731262207,
+ 2.026494747467041,
+ 2.023713701324463,
+ 2.0224919934082033,
+ 2.0218288256835937,
+ 2.0183722845840455,
+ 2.0188908798217775,
+ 2.0174379263305666,
+ 2.0145048645782473,
+ 2.0172035501098633,
+ 2.01550293838501,
+ 2.013378846511841,
+ 2.015203825531006,
+ 2.016428701705933,
+ 2.011487791137695,
+ 2.010817546157837,
+ 2.01103433883667,
+ 2.011530419845581,
+ 2.0120579361724853,
+ 2.0116470166778564,
+ 2.008692329330444,
+ 2.009709241294861,
+ 2.008519637374878,
+ 2.0078509032440186,
+ 2.0077528884124756,
+ 2.006431950531006,
+ 2.0055743618011475,
+ 2.005430025024414,
+ 2.0056805452728272,
+ 2.007569041595459,
+ 2.004868392677307,
+ 2.0057909046936033,
+ 2.0064586280822754,
+ 2.005987978057861,
+ 2.00688329624176,
+ 2.00672064743042,
+ 2.0048528481292727,
+ 2.005383738861084,
+ 2.0018666375732423,
+ 2.0059955949401855,
+ 2.003435557899475,
+ 2.001197138328552,
+ 2.0012222803497313,
+ 2.003032783164978,
+ 2.003918945236206,
+ 2.0043967723083496,
+ 2.0035451693725586,
+ 2.003359081878662,
+ 2.001155726585388,
+ 2.001671184616089,
+ 2.0038728633880614,
+ 2.00498755947113,
+ 2.002938235092163,
+ 2.000668586883545,
+ 2.0018935997772216,
+ 2.0039958557128905,
+ 2.001610350112915,
+ 2.000534170913696,
+ 2.001975674972534,
+ 2.002237940979004,
+ 2.0009369605255127,
+ 2.003139477081299,
+ 2.00191648399353,
+ 2.003141210899353,
+ 1.9998547933578492,
+ 2.000947899513245,
+ 2.0015833082580565,
+ 2.0008020196151732,
+ 2.0014297840118407,
+ 2.00010564907074,
+ 2.0010018926239015,
+ 2.0015807587051393,
+ 2.0006349613952636,
+ 2.0010920279693605,
+ 2.000114615020752,
+ 2.001875145263672,
+ 2.001294381980896,
+ 2.001243436965942,
+ 1.9999162873077392,
+ 1.998048712120056,
+ 2.000587540740967,
+ 2.0002040885162353,
+ 2.00153217338562,
+ 2.0005496265411375,
+ 2.000687266998291,
+ 2.000036004295349,
+ 1.9989564297866822,
+ 1.9997446877288818,
+ 1.9997422592163085,
+ 2.000641355895996
+ ],
+ "train_acc": [
+ 0.23088,
+ 0.24298,
+ 0.242,
+ 0.24386,
+ 0.24422,
+ 0.24566,
+ 0.24754,
+ 0.24998,
+ 0.25214,
+ 0.25368,
+ 0.25348,
+ 0.2556,
+ 0.2568,
+ 0.25746,
+ 0.2569,
+ 0.25902,
+ 0.26116,
+ 0.2581,
+ 0.26084,
+ 0.26254,
+ 0.26022,
+ 0.26164,
+ 0.26092,
+ 0.26324,
+ 0.26598,
+ 0.26604,
+ 0.26308,
+ 0.26346,
+ 0.26408,
+ 0.26498,
+ 0.2656,
+ 0.26436,
+ 0.26714,
+ 0.26778,
+ 0.26842,
+ 0.26646,
+ 0.26986,
+ 0.26788,
+ 0.26944,
+ 0.2679,
+ 0.26858,
+ 0.269,
+ 0.26802,
+ 0.26894,
+ 0.26892,
+ 0.26996,
+ 0.26784,
+ 0.27018,
+ 0.26922,
+ 0.2704,
+ 0.26998,
+ 0.27222,
+ 0.2733,
+ 0.27004,
+ 0.2688,
+ 0.26874,
+ 0.26814,
+ 0.27284,
+ 0.26872,
+ 0.272,
+ 0.26908,
+ 0.27148,
+ 0.27176,
+ 0.27322,
+ 0.2711,
+ 0.26946,
+ 0.27102,
+ 0.27338,
+ 0.27226,
+ 0.2703,
+ 0.27122,
+ 0.27198,
+ 0.27138,
+ 0.27356,
+ 0.27036,
+ 0.27104,
+ 0.27176,
+ 0.27334,
+ 0.27254,
+ 0.27312,
+ 0.27288,
+ 0.27076,
+ 0.2713,
+ 0.27096,
+ 0.27414,
+ 0.27154,
+ 0.27082,
+ 0.27156,
+ 0.27418,
+ 0.27186,
+ 0.27248,
+ 0.27316,
+ 0.27146,
+ 0.27196,
+ 0.27248,
+ 0.27434,
+ 0.27286,
+ 0.27322,
+ 0.26998,
+ 0.272
+ ],
+ "test_acc": [
+ 0.2459,
+ 0.2608,
+ 0.2635,
+ 0.2517,
+ 0.253,
+ 0.2671,
+ 0.2464,
+ 0.2581,
+ 0.2752,
+ 0.2714,
+ 0.2801,
+ 0.2721,
+ 0.2781,
+ 0.2735,
+ 0.2757,
+ 0.2879,
+ 0.2731,
+ 0.2825,
+ 0.271,
+ 0.2808,
+ 0.2798,
+ 0.2898,
+ 0.2713,
+ 0.2935,
+ 0.2878,
+ 0.2873,
+ 0.268,
+ 0.2934,
+ 0.2831,
+ 0.2992,
+ 0.2915,
+ 0.2887,
+ 0.2797,
+ 0.2998,
+ 0.2957,
+ 0.2824,
+ 0.2818,
+ 0.289,
+ 0.2951,
+ 0.2906,
+ 0.2842,
+ 0.2772,
+ 0.2962,
+ 0.2715,
+ 0.3031,
+ 0.2834,
+ 0.299,
+ 0.2881,
+ 0.2849,
+ 0.2939,
+ 0.2801,
+ 0.279,
+ 0.2819,
+ 0.2841,
+ 0.3013,
+ 0.2908,
+ 0.2906,
+ 0.2845,
+ 0.2931,
+ 0.2816,
+ 0.2975,
+ 0.2963,
+ 0.2953,
+ 0.288,
+ 0.2836,
+ 0.2878,
+ 0.2906,
+ 0.2879,
+ 0.2969,
+ 0.2892,
+ 0.286,
+ 0.282,
+ 0.2914,
+ 0.2885,
+ 0.2892,
+ 0.2904,
+ 0.295,
+ 0.2865,
+ 0.2949,
+ 0.2902,
+ 0.2894,
+ 0.2898,
+ 0.2913,
+ 0.2929,
+ 0.2868,
+ 0.2869,
+ 0.2905,
+ 0.2895,
+ 0.29,
+ 0.2896,
+ 0.2882,
+ 0.2908,
+ 0.2912,
+ 0.2901,
+ 0.2893,
+ 0.2888,
+ 0.2895,
+ 0.2894,
+ 0.2896,
+ 0.2896
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.36949819326400757,
+ 0.0005479701794683933,
+ -0.0006032834062352777,
+ 0.00028805271722376347,
+ -0.0005946755409240723,
+ -0.0002685927611310035,
+ -0.00046556672896258533,
+ -0.00010807066428242251,
+ -0.0003351868945173919,
+ 0.00010062567162094638,
+ -0.000464106589788571,
+ -4.078936035512015e-05
+ ],
+ "perturbation_rho": [
+ -0.011094596236944199,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -2.4400651454925537e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -9.699724614620209e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -3.3657997846603394e-06,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 55177.16015625,
+ 1459404800.0,
+ 2815841792.0,
+ 3553956096.0,
+ 3947802112.0,
+ 6848872448.0,
+ 7047305728.0,
+ 7256152064.0,
+ 9300344832.0,
+ 10056428544.0,
+ 10370693120.0,
+ 10568456192.0,
+ 11512604672.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.3892781086942705e-07,
+ 2.0915398579735012e-10,
+ 2.0916321452624231e-10,
+ 2.0916490761635487e-10,
+ 2.0915806586696561e-10,
+ 2.0912831188990566e-10,
+ 2.0913649978471227e-10,
+ 2.0914339704525275e-10,
+ 2.089406564431684e-10,
+ 2.0894751207034545e-10,
+ 2.0897203412140186e-10,
+ 2.0899702801724374e-10,
+ 2.090319445313682e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 337.6841411506247,
+ "embed.bias": 244.65544843260258,
+ "blocks.0.ln.weight": 9.921388539749072,
+ "blocks.0.w1.weight": 303.34325560691656,
+ "blocks.0.w1.bias": 271.5154938432058,
+ "blocks.0.w2.weight": 490.11291855747174,
+ "blocks.1.ln.weight": 8.358331837822632,
+ "blocks.1.w1.weight": 306.46180391751284,
+ "blocks.1.w1.bias": 289.91192320867975,
+ "blocks.1.w2.weight": 307.16882665457837,
+ "blocks.2.ln.weight": 8.171380552921143,
+ "blocks.2.w1.weight": 309.0280257845424,
+ "blocks.2.w1.bias": 278.09046067069085,
+ "blocks.2.w2.weight": 303.83002249941313,
+ "blocks.3.ln.weight": 7.530980420849777,
+ "blocks.3.w1.weight": 291.36754150240233,
+ "blocks.3.w1.bias": 267.02199332509406,
+ "blocks.3.w2.weight": 279.4411092845991,
+ "blocks.4.ln.weight": 10.6408318843949,
+ "blocks.4.w1.weight": 441.74974808763915,
+ "blocks.4.w1.bias": 407.5025978091534,
+ "blocks.4.w2.weight": 396.8841237755263,
+ "blocks.5.ln.weight": 7.3188489223429976,
+ "blocks.5.w1.weight": 282.40725775096377,
+ "blocks.5.w1.bias": 268.64915470838923,
+ "blocks.5.w2.weight": 255.5102203441866,
+ "blocks.6.ln.weight": 7.484712621287041,
+ "blocks.6.w1.weight": 289.3900145716271,
+ "blocks.6.w1.bias": 273.2836157437038,
+ "blocks.6.w2.weight": 269.0639311186208,
+ "blocks.7.ln.weight": 10.645635950857786,
+ "blocks.7.w1.weight": 435.3403361556039,
+ "blocks.7.w1.bias": 407.2628750706809,
+ "blocks.7.w2.weight": 422.88459144783326,
+ "blocks.8.ln.weight": 9.19535275353374,
+ "blocks.8.w1.weight": 359.87499481651184,
+ "blocks.8.w1.bias": 333.4216491141516,
+ "blocks.8.w2.weight": 337.7077986951993,
+ "blocks.9.ln.weight": 7.799912385129816,
+ "blocks.9.w1.weight": 297.0043322415887,
+ "blocks.9.w1.bias": 289.12017704180437,
+ "blocks.9.w2.weight": 272.7064790358395,
+ "blocks.10.ln.weight": 8.43728199015399,
+ "blocks.10.w1.weight": 330.9678935021042,
+ "blocks.10.w1.bias": 306.6806776473047,
+ "blocks.10.w2.weight": 309.957908747728,
+ "blocks.11.ln.weight": 10.237677378176008,
+ "blocks.11.w1.weight": 400.7826698559754,
+ "blocks.11.w1.bias": 368.5214108931406,
+ "blocks.11.w2.weight": 374.837703368063,
+ "out_ln.weight": 0.67814086539114,
+ "out_head.weight": 9.623079895238506,
+ "out_head.bias": 0.4268739303655545
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.05228896446228,
+ 1.9686302278900147,
+ 1.937331941871643,
+ 1.9156952641296388,
+ 1.8984398557281494,
+ 1.8918078393936157,
+ 1.8841382266616822,
+ 1.8769169528198242,
+ 1.8724429541397094,
+ 1.868889035949707,
+ 1.8656937331390382,
+ 1.8661101271820069,
+ 1.8578856301116944,
+ 1.8565830435180664,
+ 1.856787547569275,
+ 1.8557513214492798,
+ 1.8538894052124024,
+ 1.8534770782089234,
+ 1.848385778579712,
+ 1.846604812889099,
+ 1.8453827571868897,
+ 1.8429374709320068,
+ 1.8401580144500733,
+ 1.8401220611190796,
+ 1.8288005982208253,
+ 1.8259171481323242,
+ 1.8221615615463256,
+ 1.8216705709075929,
+ 1.821623645401001,
+ 1.818600112838745,
+ 1.812706976966858,
+ 1.8111941592788696,
+ 1.8062977261734008,
+ 1.805511683959961,
+ 1.8029492014312745,
+ 1.800798831100464,
+ 1.7953546783065797,
+ 1.7982503284454345,
+ 1.7914396514511108,
+ 1.7916672002792358,
+ 1.7845456259155272,
+ 1.78576413356781,
+ 1.783857018814087,
+ 1.7810697037506102,
+ 1.7796821146011352,
+ 1.7769692651367188,
+ 1.778120286178589,
+ 1.7701848324966432,
+ 1.7680266930770874,
+ 1.7723119870376587,
+ 1.7669956749725342,
+ 1.764911948814392,
+ 1.7598207390975953,
+ 1.7630078075408935,
+ 1.7617963638687133,
+ 1.7589407592391968,
+ 1.7578953286361694,
+ 1.755873950805664,
+ 1.7555263995742798,
+ 1.7516939831924438,
+ 1.7528417974090575,
+ 1.7498321814346314,
+ 1.7489991750335694,
+ 1.744125429725647,
+ 1.7467560472869874,
+ 1.7484809857940673,
+ 1.7442166653823852,
+ 1.7426679946136474,
+ 1.7441077096939086,
+ 1.7404415154647828,
+ 1.734541618347168,
+ 1.7417204373931885,
+ 1.7390217791748046,
+ 1.7405015720367432,
+ 1.739689345970154,
+ 1.7386328066253662,
+ 1.7377501021575927,
+ 1.7382229836654663,
+ 1.7361252673721312,
+ 1.7335023355865478,
+ 1.7335243579483033,
+ 1.7341424390029907,
+ 1.7341539621734618,
+ 1.7341088440322876,
+ 1.7317719327545167,
+ 1.7339295767593383,
+ 1.7329442672729491,
+ 1.7313460754776,
+ 1.7338177963256836,
+ 1.7318969781112672,
+ 1.7321792919921875,
+ 1.7318955504989624,
+ 1.7337695608520507,
+ 1.7288571945953368,
+ 1.7327508489990235,
+ 1.7287590840911866,
+ 1.730326948890686,
+ 1.7341352017593383,
+ 1.7290034577178954,
+ 1.72969926902771
+ ],
+ "train_acc": [
+ 0.2384,
+ 0.27844,
+ 0.2943,
+ 0.30138,
+ 0.30742,
+ 0.30942,
+ 0.3195,
+ 0.31888,
+ 0.32204,
+ 0.32418,
+ 0.3248,
+ 0.32782,
+ 0.33146,
+ 0.33268,
+ 0.32912,
+ 0.33066,
+ 0.33278,
+ 0.33552,
+ 0.3367,
+ 0.34006,
+ 0.3393,
+ 0.33932,
+ 0.3395,
+ 0.33874,
+ 0.34818,
+ 0.34772,
+ 0.34958,
+ 0.3503,
+ 0.35032,
+ 0.35086,
+ 0.35212,
+ 0.35332,
+ 0.35488,
+ 0.3559,
+ 0.35578,
+ 0.35756,
+ 0.35926,
+ 0.35818,
+ 0.36006,
+ 0.3605,
+ 0.36376,
+ 0.3638,
+ 0.36622,
+ 0.36562,
+ 0.36488,
+ 0.36572,
+ 0.36658,
+ 0.3694,
+ 0.37002,
+ 0.36866,
+ 0.37106,
+ 0.37212,
+ 0.37664,
+ 0.37196,
+ 0.37114,
+ 0.37274,
+ 0.37342,
+ 0.37482,
+ 0.37588,
+ 0.37478,
+ 0.37494,
+ 0.3766,
+ 0.37674,
+ 0.3785,
+ 0.37762,
+ 0.37722,
+ 0.37866,
+ 0.38074,
+ 0.37858,
+ 0.3806,
+ 0.38312,
+ 0.37868,
+ 0.3786,
+ 0.38086,
+ 0.37808,
+ 0.37986,
+ 0.38126,
+ 0.38078,
+ 0.38162,
+ 0.3829,
+ 0.38342,
+ 0.3823,
+ 0.3824,
+ 0.38028,
+ 0.38296,
+ 0.383,
+ 0.38242,
+ 0.38388,
+ 0.3826,
+ 0.38382,
+ 0.38268,
+ 0.38512,
+ 0.38288,
+ 0.38526,
+ 0.38352,
+ 0.38476,
+ 0.3849,
+ 0.38008,
+ 0.3831,
+ 0.3847
+ ],
+ "test_acc": [
+ 0.2743,
+ 0.3072,
+ 0.3046,
+ 0.3269,
+ 0.3349,
+ 0.3431,
+ 0.3427,
+ 0.3386,
+ 0.3512,
+ 0.3518,
+ 0.3496,
+ 0.3458,
+ 0.3629,
+ 0.3681,
+ 0.364,
+ 0.3676,
+ 0.3514,
+ 0.3685,
+ 0.353,
+ 0.3779,
+ 0.3693,
+ 0.3647,
+ 0.3759,
+ 0.3607,
+ 0.374,
+ 0.3735,
+ 0.3711,
+ 0.3721,
+ 0.3821,
+ 0.3752,
+ 0.3707,
+ 0.3851,
+ 0.3838,
+ 0.3893,
+ 0.374,
+ 0.392,
+ 0.3839,
+ 0.3977,
+ 0.3956,
+ 0.3924,
+ 0.3957,
+ 0.3936,
+ 0.394,
+ 0.3912,
+ 0.3993,
+ 0.3945,
+ 0.3978,
+ 0.3946,
+ 0.398,
+ 0.3963,
+ 0.3976,
+ 0.3972,
+ 0.3971,
+ 0.3984,
+ 0.3988,
+ 0.3958,
+ 0.4012,
+ 0.4042,
+ 0.4017,
+ 0.3974,
+ 0.3975,
+ 0.4052,
+ 0.4061,
+ 0.4066,
+ 0.4056,
+ 0.4057,
+ 0.406,
+ 0.4017,
+ 0.4026,
+ 0.4095,
+ 0.4043,
+ 0.4072,
+ 0.4077,
+ 0.4031,
+ 0.4083,
+ 0.407,
+ 0.4077,
+ 0.4099,
+ 0.4067,
+ 0.4065,
+ 0.4057,
+ 0.4092,
+ 0.4093,
+ 0.4055,
+ 0.4097,
+ 0.4081,
+ 0.4083,
+ 0.407,
+ 0.407,
+ 0.4086,
+ 0.4084,
+ 0.4074,
+ 0.4073,
+ 0.4072,
+ 0.4063,
+ 0.4075,
+ 0.407,
+ 0.4074,
+ 0.4075,
+ 0.4076
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.0210530087351799,
+ 0.03225391358137131,
+ 0.030373934656381607,
+ -0.01976676471531391,
+ -0.039145730435848236,
+ -0.07764401286840439,
+ -0.04457290470600128,
+ -0.0037843044847249985,
+ -0.0476202666759491,
+ -0.000595096789766103,
+ 0.02137608267366886,
+ 0.995658278465271
+ ],
+ "perturbation_rho": [
+ -0.0567036010324955,
+ -0.0030030906200408936,
+ -0.022865712642669678,
+ -0.009168766438961029,
+ -0.0182923823595047,
+ 0.03648798167705536,
+ 0.04260994493961334,
+ 0.0037925627548247576,
+ -0.03891247510910034,
+ -0.052096571773290634,
+ -0.05355001986026764,
+ 0.005782400257885456
+ ],
+ "nudging": {
+ "0.001": [
+ -7.315538823604584e-07,
+ -1.1478550732135773e-07,
+ -6.658956408500671e-08,
+ 7.450580596923828e-09,
+ 3.702007234096527e-08,
+ 5.634501576423645e-08,
+ 5.51808625459671e-08,
+ 9.080395102500916e-09,
+ 2.7241185307502747e-08,
+ -1.3969838619232178e-09,
+ -3.3527612686157227e-08,
+ -7.827766239643097e-07
+ ],
+ "0.003": [
+ -2.304092049598694e-06,
+ -3.343448042869568e-07,
+ -1.4924444258213043e-07,
+ 1.979060471057892e-08,
+ 1.2828968465328217e-07,
+ 2.391170710325241e-07,
+ 1.1990778148174286e-07,
+ -4.959292709827423e-08,
+ 1.4039687812328339e-07,
+ -6.752088665962219e-09,
+ -3.050081431865692e-08,
+ -2.78581865131855e-06
+ ],
+ "0.01": [
+ -7.684342563152313e-06,
+ -1.07521191239357e-06,
+ -4.4563785195350647e-07,
+ 1.5692785382270813e-07,
+ 3.7904828786849976e-07,
+ 7.476191967725754e-07,
+ 4.507601261138916e-07,
+ -3.306195139884949e-08,
+ 5.138572305440903e-07,
+ 2.9569491744041443e-08,
+ -2.5634653866291046e-07,
+ -9.92906279861927e-06
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 7711.2001953125,
+ 86607.8359375,
+ 328385.03125,
+ 717089.75,
+ 1100718.375,
+ 1366338.125,
+ 1659771.0,
+ 1846383.5,
+ 2026219.75,
+ 2144626.25,
+ 2170055.0,
+ 2174719.5,
+ 1710646.75
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.5522365831420757e-05,
+ 1.6940738305493142e-06,
+ 5.418342539087462e-07,
+ 4.801818249688949e-07,
+ 4.7660145696681866e-07,
+ 4.771524686475459e-07,
+ 4.772307420353172e-07,
+ 4.777210733664106e-07,
+ 4.776471200784727e-07,
+ 4.787051466337289e-07,
+ 4.790993557435286e-07,
+ 4.780670792570163e-07,
+ 4.5420676997309783e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 51.639575695075386,
+ "embed.bias": 13.420626933240067,
+ "blocks.0.ln.weight": 1.165106572944437,
+ "blocks.0.w1.weight": 17.074257617816983,
+ "blocks.0.w1.bias": 12.55405788253026,
+ "blocks.0.w2.weight": 56.84408822778898,
+ "blocks.1.ln.weight": 1.0814484489664455,
+ "blocks.1.w1.weight": 20.255692646568928,
+ "blocks.1.w1.bias": 12.590175436666444,
+ "blocks.1.w2.weight": 48.43073026110698,
+ "blocks.2.ln.weight": 0.9210710896806559,
+ "blocks.2.w1.weight": 20.147092711724806,
+ "blocks.2.w1.bias": 16.898392906551216,
+ "blocks.2.w2.weight": 46.45488641124572,
+ "blocks.3.ln.weight": 0.6699388356494572,
+ "blocks.3.w1.weight": 19.04886241071522,
+ "blocks.3.w1.bias": 18.822886783231823,
+ "blocks.3.w2.weight": 35.62237142992444,
+ "blocks.4.ln.weight": 0.5724221532943284,
+ "blocks.4.w1.weight": 18.93452177775512,
+ "blocks.4.w1.bias": 18.487677483264793,
+ "blocks.4.w2.weight": 30.11841300499853,
+ "blocks.5.ln.weight": 0.5318503739647785,
+ "blocks.5.w1.weight": 19.752732313152933,
+ "blocks.5.w1.bias": 21.012803450001744,
+ "blocks.5.w2.weight": 26.781939546320714,
+ "blocks.6.ln.weight": 0.4938677177304124,
+ "blocks.6.w1.weight": 18.79282421249984,
+ "blocks.6.w1.bias": 20.24614504362752,
+ "blocks.6.w2.weight": 23.954722502437257,
+ "blocks.7.ln.weight": 0.520902940610959,
+ "blocks.7.w1.weight": 19.15384116500715,
+ "blocks.7.w1.bias": 20.22154003333216,
+ "blocks.7.w2.weight": 28.09055042594723,
+ "blocks.8.ln.weight": 0.5342907608290218,
+ "blocks.8.w1.weight": 18.38918909035725,
+ "blocks.8.w1.bias": 18.8314837769314,
+ "blocks.8.w2.weight": 31.203969055913323,
+ "blocks.9.ln.weight": 0.5198835347685796,
+ "blocks.9.w1.weight": 17.551735719168683,
+ "blocks.9.w1.bias": 17.643006584631404,
+ "blocks.9.w2.weight": 50.56607469544128,
+ "blocks.10.ln.weight": 0.4499630077104289,
+ "blocks.10.w1.weight": 15.14684109843433,
+ "blocks.10.w1.bias": 12.774970121083086,
+ "blocks.10.w2.weight": 53.261273057743395,
+ "blocks.11.ln.weight": 0.4529064172202585,
+ "blocks.11.w1.weight": 17.52348612725844,
+ "blocks.11.w1.bias": 15.699238779618904,
+ "blocks.11.w2.weight": 54.34198491601119,
+ "out_ln.weight": 0.38172881823690796,
+ "out_head.weight": 6.667353449848178,
+ "out_head.bias": 0.7683726336162054
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 12,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 6
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L12_seed6",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L12_seed7/results_cifar10.json b/results/fa_dfa_d512_L12_seed7/results_cifar10.json
new file mode 100644
index 0000000..d1c18ce
--- /dev/null
+++ b/results/fa_dfa_d512_L12_seed7/results_cifar10.json
@@ -0,0 +1,969 @@
+{
+ "7": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0717703549194337,
+ 2.041298008880615,
+ 2.029167268066406,
+ 2.0265366765975954,
+ 2.0233016680145264,
+ 2.021149669342041,
+ 2.015149740600586,
+ 2.014331086883545,
+ 2.011163010597229,
+ 2.00646972366333,
+ 2.0077928015899658,
+ 2.0069499212265014,
+ 2.0002946398925783,
+ 2.002099399795532,
+ 1.9983450998687744,
+ 1.9959249508666992,
+ 1.9950883417510987,
+ 1.9943942831802368,
+ 1.993684328918457,
+ 1.9930962839508057,
+ 1.9938797711181642,
+ 1.986891388015747,
+ 1.9910820366668702,
+ 1.9907979718780517,
+ 1.989411315689087,
+ 1.9852583177947998,
+ 1.981110195388794,
+ 1.9853213675308228,
+ 1.9821887520599366,
+ 1.9824637595367431,
+ 1.9810781491470337,
+ 1.978963046951294,
+ 1.9793350392913818,
+ 1.9822494831848145,
+ 1.9770296029281615,
+ 1.9799606243515016,
+ 1.979586162147522,
+ 1.978336156539917,
+ 1.9767711865234374,
+ 1.9781431746673583,
+ 1.97637092502594,
+ 1.9791246725845337,
+ 1.9776694247055053,
+ 1.9774175772857665,
+ 1.974560000038147,
+ 1.97618024269104,
+ 1.9750861001968383,
+ 1.9740860368347168,
+ 1.975063199005127,
+ 1.975644574661255,
+ 1.974392618637085,
+ 1.976005935974121,
+ 1.974110288734436,
+ 1.9724776383209228,
+ 1.9728016277313232,
+ 1.973982884864807,
+ 1.972937359275818,
+ 1.971426241798401,
+ 1.970759379310608,
+ 1.97244817237854,
+ 1.9711870779800416,
+ 1.9723809194564819,
+ 1.970773144493103,
+ 1.9708155387115478,
+ 1.9702037267684938,
+ 1.9704236566925049,
+ 1.9704266147613525,
+ 1.9674897221374512,
+ 1.969771149520874,
+ 1.9684535709381104,
+ 1.972261046295166,
+ 1.9708796706390381,
+ 1.9701639178848267,
+ 1.970189701461792,
+ 1.9690214687728882,
+ 1.970269794769287,
+ 1.9686412525177002,
+ 1.9686531394195557,
+ 1.9690335285186769,
+ 1.9683185077667236,
+ 1.9699563415527344,
+ 1.9683909454345703,
+ 1.9685154306793213,
+ 1.966210771751404,
+ 1.969103935775757,
+ 1.9681334160614015,
+ 1.9663462246322632,
+ 1.9694912530899047,
+ 1.9677201830291748,
+ 1.9683091250610352,
+ 1.9689178394317628,
+ 1.966240083847046,
+ 1.968869041519165,
+ 1.96721680809021,
+ 1.9639848593139648,
+ 1.9679099475097657,
+ 1.9662394170379638,
+ 1.966985820236206,
+ 1.9662641632843016,
+ 1.9668700101089478
+ ],
+ "train_acc": [
+ 0.23978,
+ 0.24322,
+ 0.25136,
+ 0.25116,
+ 0.25264,
+ 0.25374,
+ 0.26024,
+ 0.2603,
+ 0.26,
+ 0.26422,
+ 0.26226,
+ 0.26102,
+ 0.26648,
+ 0.2665,
+ 0.26764,
+ 0.26976,
+ 0.27012,
+ 0.27084,
+ 0.27144,
+ 0.27138,
+ 0.26928,
+ 0.2735,
+ 0.27452,
+ 0.27408,
+ 0.27312,
+ 0.276,
+ 0.27856,
+ 0.27458,
+ 0.27752,
+ 0.27688,
+ 0.27972,
+ 0.2782,
+ 0.2808,
+ 0.27646,
+ 0.28084,
+ 0.28012,
+ 0.2808,
+ 0.2793,
+ 0.28086,
+ 0.2807,
+ 0.27956,
+ 0.27968,
+ 0.28138,
+ 0.28184,
+ 0.28282,
+ 0.28208,
+ 0.28344,
+ 0.2835,
+ 0.27944,
+ 0.28146,
+ 0.28096,
+ 0.28186,
+ 0.28142,
+ 0.2833,
+ 0.28372,
+ 0.28258,
+ 0.28264,
+ 0.28458,
+ 0.28644,
+ 0.2822,
+ 0.2842,
+ 0.28418,
+ 0.2849,
+ 0.28526,
+ 0.28614,
+ 0.28488,
+ 0.2833,
+ 0.28626,
+ 0.28546,
+ 0.28682,
+ 0.28376,
+ 0.2851,
+ 0.28242,
+ 0.28406,
+ 0.28672,
+ 0.28582,
+ 0.28482,
+ 0.28584,
+ 0.28422,
+ 0.28666,
+ 0.28424,
+ 0.28532,
+ 0.28416,
+ 0.28974,
+ 0.28814,
+ 0.28682,
+ 0.28648,
+ 0.28618,
+ 0.28602,
+ 0.2883,
+ 0.2841,
+ 0.28796,
+ 0.28582,
+ 0.28556,
+ 0.29042,
+ 0.28562,
+ 0.28642,
+ 0.28704,
+ 0.28854,
+ 0.28752
+ ],
+ "test_acc": [
+ 0.2673,
+ 0.2593,
+ 0.2397,
+ 0.2765,
+ 0.284,
+ 0.2617,
+ 0.2814,
+ 0.2855,
+ 0.277,
+ 0.2629,
+ 0.287,
+ 0.2628,
+ 0.2838,
+ 0.2918,
+ 0.29,
+ 0.3014,
+ 0.2878,
+ 0.2773,
+ 0.2764,
+ 0.299,
+ 0.2977,
+ 0.2922,
+ 0.2967,
+ 0.2866,
+ 0.2876,
+ 0.2988,
+ 0.296,
+ 0.3046,
+ 0.3027,
+ 0.2977,
+ 0.3168,
+ 0.2957,
+ 0.2942,
+ 0.3079,
+ 0.2781,
+ 0.3022,
+ 0.294,
+ 0.3143,
+ 0.3169,
+ 0.3066,
+ 0.3105,
+ 0.3005,
+ 0.3035,
+ 0.3135,
+ 0.3035,
+ 0.2983,
+ 0.2928,
+ 0.3032,
+ 0.299,
+ 0.3029,
+ 0.2978,
+ 0.3047,
+ 0.3054,
+ 0.2984,
+ 0.3004,
+ 0.3,
+ 0.3146,
+ 0.3113,
+ 0.3079,
+ 0.3115,
+ 0.3101,
+ 0.3124,
+ 0.3111,
+ 0.3014,
+ 0.3064,
+ 0.3106,
+ 0.3105,
+ 0.3112,
+ 0.3019,
+ 0.3067,
+ 0.3096,
+ 0.2973,
+ 0.3033,
+ 0.3053,
+ 0.3119,
+ 0.3105,
+ 0.3093,
+ 0.3053,
+ 0.31,
+ 0.3055,
+ 0.3091,
+ 0.3076,
+ 0.3069,
+ 0.3131,
+ 0.3058,
+ 0.3119,
+ 0.3091,
+ 0.3084,
+ 0.3126,
+ 0.3089,
+ 0.3102,
+ 0.3105,
+ 0.31,
+ 0.3113,
+ 0.309,
+ 0.3103,
+ 0.3103,
+ 0.3109,
+ 0.3108,
+ 0.3108
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.4225231409072876,
+ 0.000332531810272485,
+ -0.00028742029098793864,
+ -0.0005942026618868113,
+ 0.0011352845467627048,
+ 5.1041941333096474e-05,
+ 4.8770005378173664e-05,
+ -0.0001115800259867683,
+ -0.00025915325386449695,
+ -0.0001579285744810477,
+ -0.00034282656270079315,
+ -0.001362925162538886
+ ],
+ "perturbation_rho": [
+ 0.005214178003370762,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -6.705522537231445e-07,
+ -1.862645149230957e-09,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.5972182154655457e-06,
+ 5.587935447692871e-09,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -5.433335900306702e-06,
+ 0.0,
+ 3.725290298461914e-09,
+ 0.0,
+ -1.862645149230957e-09,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 52073.1640625,
+ 624600448.0,
+ 2354930944.0,
+ 3962745600.0,
+ 4607439360.0,
+ 6940876800.0,
+ 10061239296.0,
+ 10270245888.0,
+ 10501487616.0,
+ 10991346688.0,
+ 11392520192.0,
+ 11497131008.0,
+ 11767929856.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 3.116530820079788e-07,
+ 2.391457587513912e-10,
+ 2.334610560428274e-10,
+ 2.3252627601166864e-10,
+ 2.3254012604390084e-10,
+ 2.3251603420426648e-10,
+ 2.3252098857451386e-10,
+ 2.324976183798455e-10,
+ 2.3242860414107724e-10,
+ 2.3244653424292494e-10,
+ 2.3248170055722994e-10,
+ 2.3248254710228622e-10,
+ 2.324954950783109e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 314.5708156367549,
+ "embed.bias": 173.06181327916602,
+ "blocks.0.ln.weight": 10.608293168088462,
+ "blocks.0.w1.weight": 248.85531881291706,
+ "blocks.0.w1.bias": 197.42523217269257,
+ "blocks.0.w2.weight": 461.7533133123923,
+ "blocks.1.ln.weight": 8.793695977749161,
+ "blocks.1.w1.weight": 307.77495139687693,
+ "blocks.1.w1.bias": 271.79664056051047,
+ "blocks.1.w2.weight": 356.28012985496673,
+ "blocks.2.ln.weight": 8.954547910557306,
+ "blocks.2.w1.weight": 349.2950006278737,
+ "blocks.2.w1.bias": 313.10534483438977,
+ "blocks.2.w2.weight": 340.97526886427016,
+ "blocks.3.ln.weight": 7.664064660684567,
+ "blocks.3.w1.weight": 303.1221009157576,
+ "blocks.3.w1.bias": 276.7648976947458,
+ "blocks.3.w2.weight": 276.8184093796655,
+ "blocks.4.ln.weight": 10.090295578382477,
+ "blocks.4.w1.weight": 416.58752319563774,
+ "blocks.4.w1.bias": 396.4529594058789,
+ "blocks.4.w2.weight": 411.8714989953588,
+ "blocks.5.ln.weight": 11.222968212087666,
+ "blocks.5.w1.weight": 460.41052960846133,
+ "blocks.5.w1.bias": 446.32020235508855,
+ "blocks.5.w2.weight": 461.9898764832566,
+ "blocks.6.ln.weight": 9.029697428612298,
+ "blocks.6.w1.weight": 363.0948995365021,
+ "blocks.6.w1.bias": 335.6612472751257,
+ "blocks.6.w2.weight": 358.45859133995083,
+ "blocks.7.ln.weight": 8.68285178731722,
+ "blocks.7.w1.weight": 347.30737462136295,
+ "blocks.7.w1.bias": 319.98700238133193,
+ "blocks.7.w2.weight": 330.4186701898697,
+ "blocks.8.ln.weight": 8.786812217157763,
+ "blocks.8.w1.weight": 347.961851087332,
+ "blocks.8.w1.bias": 341.22606981083646,
+ "blocks.8.w2.weight": 315.32952774172026,
+ "blocks.9.ln.weight": 9.311416409774596,
+ "blocks.9.w1.weight": 343.96115770328134,
+ "blocks.9.w1.bias": 321.3536311695321,
+ "blocks.9.w2.weight": 315.28472186823876,
+ "blocks.10.ln.weight": 6.752703446423726,
+ "blocks.10.w1.weight": 265.99193594775096,
+ "blocks.10.w1.bias": 250.46671872343975,
+ "blocks.10.w2.weight": 243.26857071036574,
+ "blocks.11.ln.weight": 9.311969376436522,
+ "blocks.11.w1.weight": 375.3561731401925,
+ "blocks.11.w1.bias": 348.39190827099014,
+ "blocks.11.w2.weight": 350.6780634465036,
+ "out_ln.weight": 0.7156480208724685,
+ "out_head.weight": 9.90130634348424,
+ "out_head.bias": 0.6155786911344596
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.030823752403259,
+ 1.934500171585083,
+ 1.9079237392807007,
+ 1.8947180710220337,
+ 1.8812753409576417,
+ 1.8702804034423828,
+ 1.8603069189834596,
+ 1.854407600440979,
+ 1.8499454795074464,
+ 1.8409444109344482,
+ 1.8404085549545288,
+ 1.8384266778182983,
+ 1.8317015438842774,
+ 1.8293640671157836,
+ 1.8229795181655883,
+ 1.8220855783081054,
+ 1.8241054327392578,
+ 1.8256892261123656,
+ 1.825209083328247,
+ 1.8221923596572875,
+ 1.827516168899536,
+ 1.8242645990371704,
+ 1.8307866805648805,
+ 1.8350352270126342,
+ 1.8312144650268554,
+ 1.8282124835205078,
+ 1.822820876083374,
+ 1.8260399611663818,
+ 1.8204992460632323,
+ 1.8168495791625976,
+ 1.810229619064331,
+ 1.811380881576538,
+ 1.805999729537964,
+ 1.8007601963043214,
+ 1.7961979135513306,
+ 1.7947537366485595,
+ 1.7928297024536133,
+ 1.788902147216797,
+ 1.7878808304214477,
+ 1.7845082584381105,
+ 1.7827687833023071,
+ 1.7805255111694336,
+ 1.7768453490829468,
+ 1.7761483609390258,
+ 1.7748139315414428,
+ 1.7749142253875732,
+ 1.7714660557174682,
+ 1.770267225265503,
+ 1.7713737234497071,
+ 1.7634125463867187,
+ 1.7577275490570068,
+ 1.7656006281280519,
+ 1.7626705096435547,
+ 1.7589925038909913,
+ 1.756216689376831,
+ 1.7574205046844482,
+ 1.7555658932113647,
+ 1.7512263652801514,
+ 1.7533771200180053,
+ 1.7522863424301147,
+ 1.743942953414917,
+ 1.7457706503677368,
+ 1.744990401649475,
+ 1.7453525008773805,
+ 1.743213934249878,
+ 1.7438006609344483,
+ 1.7438085187530517,
+ 1.738191604385376,
+ 1.7366509014129639,
+ 1.739820227279663,
+ 1.7397972118377685,
+ 1.7396622115325928,
+ 1.7390950318145753,
+ 1.7385543502044678,
+ 1.733802723083496,
+ 1.735742342300415,
+ 1.7329303577041626,
+ 1.732147887878418,
+ 1.7361309533691407,
+ 1.7338640795516969,
+ 1.7344183361053467,
+ 1.7314273851776123,
+ 1.732653318786621,
+ 1.726430778465271,
+ 1.7266450793838501,
+ 1.7317952347564698,
+ 1.7268288860321044,
+ 1.7301931661605836,
+ 1.729054831314087,
+ 1.728853748779297,
+ 1.7290257139587402,
+ 1.7282057024383546,
+ 1.7276880645370483,
+ 1.7283826892089844,
+ 1.7221429102325438,
+ 1.7275538332366944,
+ 1.7293567386627198,
+ 1.7251165560913087,
+ 1.727771372756958,
+ 1.7276002161026
+ ],
+ "train_acc": [
+ 0.25468,
+ 0.29668,
+ 0.3079,
+ 0.31446,
+ 0.31972,
+ 0.32522,
+ 0.32856,
+ 0.32842,
+ 0.33384,
+ 0.3401,
+ 0.33566,
+ 0.3388,
+ 0.3399,
+ 0.34344,
+ 0.3465,
+ 0.3477,
+ 0.34726,
+ 0.34478,
+ 0.34704,
+ 0.34592,
+ 0.34452,
+ 0.34292,
+ 0.34314,
+ 0.3423,
+ 0.33992,
+ 0.34576,
+ 0.3454,
+ 0.34362,
+ 0.3478,
+ 0.34758,
+ 0.35328,
+ 0.35146,
+ 0.35478,
+ 0.35634,
+ 0.35704,
+ 0.35806,
+ 0.3582,
+ 0.35946,
+ 0.3603,
+ 0.35836,
+ 0.36136,
+ 0.36142,
+ 0.36514,
+ 0.36682,
+ 0.36542,
+ 0.36722,
+ 0.36676,
+ 0.36892,
+ 0.36556,
+ 0.3685,
+ 0.37052,
+ 0.3698,
+ 0.37064,
+ 0.37166,
+ 0.37218,
+ 0.37034,
+ 0.37416,
+ 0.3751,
+ 0.37026,
+ 0.37354,
+ 0.377,
+ 0.37554,
+ 0.3771,
+ 0.37508,
+ 0.37768,
+ 0.37622,
+ 0.37688,
+ 0.3797,
+ 0.37742,
+ 0.3776,
+ 0.37718,
+ 0.37764,
+ 0.38156,
+ 0.3803,
+ 0.38212,
+ 0.38114,
+ 0.37938,
+ 0.38004,
+ 0.37876,
+ 0.38,
+ 0.37908,
+ 0.37994,
+ 0.38146,
+ 0.38242,
+ 0.38336,
+ 0.38234,
+ 0.38252,
+ 0.3794,
+ 0.38194,
+ 0.3836,
+ 0.3818,
+ 0.3845,
+ 0.3839,
+ 0.38346,
+ 0.385,
+ 0.38244,
+ 0.38364,
+ 0.38556,
+ 0.38002,
+ 0.38376
+ ],
+ "test_acc": [
+ 0.312,
+ 0.3272,
+ 0.3242,
+ 0.3601,
+ 0.3595,
+ 0.3486,
+ 0.3611,
+ 0.3545,
+ 0.3528,
+ 0.3443,
+ 0.3558,
+ 0.3534,
+ 0.3612,
+ 0.3665,
+ 0.3685,
+ 0.377,
+ 0.3552,
+ 0.3599,
+ 0.3585,
+ 0.3623,
+ 0.3614,
+ 0.3579,
+ 0.3684,
+ 0.3467,
+ 0.3545,
+ 0.3635,
+ 0.3553,
+ 0.3726,
+ 0.3762,
+ 0.3545,
+ 0.3576,
+ 0.3804,
+ 0.3752,
+ 0.3737,
+ 0.3726,
+ 0.3765,
+ 0.3783,
+ 0.3783,
+ 0.3863,
+ 0.3813,
+ 0.3837,
+ 0.3841,
+ 0.3885,
+ 0.3917,
+ 0.3897,
+ 0.3896,
+ 0.3819,
+ 0.3844,
+ 0.394,
+ 0.391,
+ 0.3903,
+ 0.3962,
+ 0.3984,
+ 0.3917,
+ 0.3958,
+ 0.3898,
+ 0.3995,
+ 0.3985,
+ 0.3973,
+ 0.3971,
+ 0.3961,
+ 0.3993,
+ 0.4031,
+ 0.396,
+ 0.3996,
+ 0.4015,
+ 0.4009,
+ 0.4025,
+ 0.4007,
+ 0.4015,
+ 0.4021,
+ 0.397,
+ 0.3991,
+ 0.405,
+ 0.4053,
+ 0.4045,
+ 0.4016,
+ 0.4024,
+ 0.4055,
+ 0.4033,
+ 0.4033,
+ 0.4071,
+ 0.4049,
+ 0.4068,
+ 0.4048,
+ 0.4049,
+ 0.4049,
+ 0.4039,
+ 0.4045,
+ 0.4055,
+ 0.4063,
+ 0.4046,
+ 0.406,
+ 0.4066,
+ 0.4061,
+ 0.4062,
+ 0.4059,
+ 0.406,
+ 0.4061,
+ 0.4062
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.010068703442811966,
+ 0.1245778352022171,
+ 0.09253278374671936,
+ 0.08268401026725769,
+ 0.05013114959001541,
+ 0.041134029626846313,
+ 0.017736738547682762,
+ 0.00011690100654959679,
+ -0.07415217161178589,
+ 0.024469580501317978,
+ -0.045304883271455765,
+ 0.9951483011245728
+ ],
+ "perturbation_rho": [
+ -0.006299033761024475,
+ 0.014822498895227909,
+ 0.003820352256298065,
+ 0.018621522933244705,
+ -0.0024977736175060272,
+ -0.0020938459783792496,
+ -0.018873507156968117,
+ 0.056286461651325226,
+ 0.008058521896600723,
+ 0.012508060783147812,
+ 0.03497573360800743,
+ -0.005283009260892868
+ ],
+ "nudging": {
+ "0.001": [
+ -8.329516276717186e-07,
+ -4.055909812450409e-07,
+ -1.9476283341646194e-07,
+ -1.578591763973236e-07,
+ -7.811468094587326e-08,
+ -4.6566128730773926e-08,
+ -2.0256265997886658e-08,
+ 1.7113052308559418e-08,
+ 2.9802322387695312e-08,
+ 6.984919309616089e-10,
+ 1.280568540096283e-08,
+ -5.175825208425522e-07
+ ],
+ "0.003": [
+ -2.361135557293892e-06,
+ -1.1622905731201172e-06,
+ -4.988396540284157e-07,
+ -3.294553607702255e-07,
+ -1.5029218047857285e-07,
+ -1.0221265256404877e-07,
+ -3.748573362827301e-08,
+ 1.4202669262886047e-08,
+ 1.280568540096283e-07,
+ -5.366746336221695e-08,
+ 6.51925802230835e-08,
+ -1.7427373677492142e-06
+ ],
+ "0.01": [
+ -7.986207492649555e-06,
+ -3.8052676245570183e-06,
+ -1.7613638192415237e-06,
+ -9.794021025300026e-07,
+ -5.171168595552444e-07,
+ -2.832384780049324e-07,
+ -1.4971010386943817e-07,
+ -7.182825356721878e-08,
+ 4.4121406972408295e-07,
+ -2.468004822731018e-07,
+ 2.0524021238088608e-07,
+ -6.320537067949772e-06
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 7009.81494140625,
+ 120237.1953125,
+ 234765.75,
+ 332169.78125,
+ 346779.53125,
+ 390112.375,
+ 463708.90625,
+ 668915.8125,
+ 1122988.5,
+ 1967430.625,
+ 1988171.0,
+ 2240154.75,
+ 1633562.75
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.4624005163786933e-05,
+ 1.2478232065404882e-06,
+ 6.924460080881545e-07,
+ 5.278115509099734e-07,
+ 4.257075261193677e-07,
+ 3.652528164366231e-07,
+ 3.4045956454065163e-07,
+ 3.226113562959654e-07,
+ 3.1934513344822335e-07,
+ 3.1868384553490614e-07,
+ 3.1644472642256005e-07,
+ 3.1672169598095934e-07,
+ 3.163899009450688e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 47.005374347880874,
+ "embed.bias": 13.243067966808145,
+ "blocks.0.ln.weight": 1.2686282540844722,
+ "blocks.0.w1.weight": 17.553869325595105,
+ "blocks.0.w1.bias": 13.820052496499288,
+ "blocks.0.w2.weight": 60.50343593281807,
+ "blocks.1.ln.weight": 0.9603227875454038,
+ "blocks.1.w1.weight": 17.719833102219965,
+ "blocks.1.w1.bias": 8.605211656313518,
+ "blocks.1.w2.weight": 46.36885490302975,
+ "blocks.2.ln.weight": 0.8397655185331075,
+ "blocks.2.w1.weight": 17.32854478894422,
+ "blocks.2.w1.bias": 8.14502863318518,
+ "blocks.2.w2.weight": 46.8756195290007,
+ "blocks.3.ln.weight": 0.8514772849977531,
+ "blocks.3.w1.weight": 17.625201649093363,
+ "blocks.3.w1.bias": 4.891298958534819,
+ "blocks.3.w2.weight": 49.35104323212137,
+ "blocks.4.ln.weight": 0.7181238110900667,
+ "blocks.4.w1.weight": 17.236589560962,
+ "blocks.4.w1.bias": 6.048278381119212,
+ "blocks.4.w2.weight": 37.01349206570791,
+ "blocks.5.ln.weight": 0.7889239428916937,
+ "blocks.5.w1.weight": 18.624277821727198,
+ "blocks.5.w1.bias": 8.232516711563722,
+ "blocks.5.w2.weight": 34.44857991057927,
+ "blocks.6.ln.weight": 0.9087775281092089,
+ "blocks.6.w1.weight": 20.478021505238107,
+ "blocks.6.w1.bias": 9.991577823388699,
+ "blocks.6.w2.weight": 41.635213960063496,
+ "blocks.7.ln.weight": 0.8672691193491717,
+ "blocks.7.w1.weight": 23.031638161138872,
+ "blocks.7.w1.bias": 15.432108567671811,
+ "blocks.7.w2.weight": 30.494694204720314,
+ "blocks.8.ln.weight": 0.9279180982195232,
+ "blocks.8.w1.weight": 25.623083539907853,
+ "blocks.8.w1.bias": 21.104273424027447,
+ "blocks.8.w2.weight": 30.431336626547836,
+ "blocks.9.ln.weight": 0.6368710077977572,
+ "blocks.9.w1.weight": 17.55317251649144,
+ "blocks.9.w1.bias": 12.498837888749929,
+ "blocks.9.w2.weight": 63.802683742207456,
+ "blocks.10.ln.weight": 0.6261645701278474,
+ "blocks.10.w1.weight": 19.149302928700966,
+ "blocks.10.w1.bias": 19.63354445246865,
+ "blocks.10.w2.weight": 28.244603543415494,
+ "blocks.11.ln.weight": 0.7596072689153585,
+ "blocks.11.w1.weight": 19.668235342459408,
+ "blocks.11.w1.bias": 15.490287106269353,
+ "blocks.11.w2.weight": 55.03718141211597,
+ "out_ln.weight": 0.33939420479137183,
+ "out_head.weight": 5.440112536505558,
+ "out_head.bias": 2.010159660293023
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 12,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 7
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L12_seed7",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L12_seed8/results_cifar10.json b/results/fa_dfa_d512_L12_seed8/results_cifar10.json
new file mode 100644
index 0000000..8d804da
--- /dev/null
+++ b/results/fa_dfa_d512_L12_seed8/results_cifar10.json
@@ -0,0 +1,969 @@
+{
+ "8": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0621551152038573,
+ 2.032111440887451,
+ 2.026601074066162,
+ 2.025331893157959,
+ 2.020625417556763,
+ 2.0179995822143555,
+ 2.0146445192718505,
+ 2.0131945146179198,
+ 2.0136738064575197,
+ 2.0107804653549195,
+ 2.0097129691314697,
+ 2.0098247957611086,
+ 2.0069902968597413,
+ 2.0106662986755373,
+ 2.005746477012634,
+ 2.006078488845825,
+ 2.0048853853988646,
+ 2.0049549353027345,
+ 2.000735955581665,
+ 2.0011458109283446,
+ 2.000437735824585,
+ 1.997751926422119,
+ 2.0025174723052976,
+ 1.998807412185669,
+ 2.000533748397827,
+ 1.9959854383087159,
+ 1.9978079619979858,
+ 1.9958966563415528,
+ 1.996048821105957,
+ 1.9932382696533204,
+ 1.9939029093170166,
+ 1.9939674211120606,
+ 1.993965982017517,
+ 1.9950546265411377,
+ 1.9910492811584473,
+ 1.9932873993301392,
+ 1.9934390425109862,
+ 1.9942208531188965,
+ 1.993705040283203,
+ 1.9926780229949952,
+ 1.9916279154205323,
+ 1.9933884371566772,
+ 1.9898671300506592,
+ 1.989496806678772,
+ 1.990106160736084,
+ 1.9908844341278076,
+ 1.9897770419311522,
+ 1.9911138761901856,
+ 1.9896133652496337,
+ 1.9886156379699707,
+ 1.989099236602783,
+ 1.9889647799682617,
+ 1.987695361251831,
+ 1.987419519920349,
+ 1.9892369752502441,
+ 1.9867283428955078,
+ 1.985670655517578,
+ 1.985864746055603,
+ 1.9890831967163085,
+ 1.986451468811035,
+ 1.98608133934021,
+ 1.986669381942749,
+ 1.9860679926300049,
+ 1.9857238763046265,
+ 1.9861250590515136,
+ 1.9867476695251465,
+ 1.9855187320709229,
+ 1.9870084239959718,
+ 1.9870807974243163,
+ 1.9854985891723633,
+ 1.9862063644790648,
+ 1.986035345840454,
+ 1.9848036085510254,
+ 1.9857490323638916,
+ 1.9848638201141358,
+ 1.9828469595336915,
+ 1.984784072341919,
+ 1.986132272644043,
+ 1.9855829830932616,
+ 1.9845155084228516,
+ 1.9863489217376709,
+ 1.983173909263611,
+ 1.9847177695465088,
+ 1.985843783493042,
+ 1.9844750480651856,
+ 1.9835583266448975,
+ 1.9832866216278076,
+ 1.9824517697906494,
+ 1.9833141297912598,
+ 1.9830520043182374,
+ 1.9822975007629395,
+ 1.9850699710464477,
+ 1.9854454914855957,
+ 1.984017345199585,
+ 1.9839908868408203,
+ 1.9843200086212158,
+ 1.9820627519989014,
+ 1.9816079531478883,
+ 1.9821515436553956,
+ 1.9855904679107665
+ ],
+ "train_acc": [
+ 0.2372,
+ 0.24852,
+ 0.25134,
+ 0.25132,
+ 0.2549,
+ 0.25304,
+ 0.25902,
+ 0.26106,
+ 0.25824,
+ 0.2579,
+ 0.26096,
+ 0.25852,
+ 0.26276,
+ 0.2618,
+ 0.2637,
+ 0.26306,
+ 0.26492,
+ 0.26136,
+ 0.26604,
+ 0.26754,
+ 0.26836,
+ 0.26768,
+ 0.26738,
+ 0.26802,
+ 0.26928,
+ 0.26922,
+ 0.26836,
+ 0.2679,
+ 0.2719,
+ 0.27114,
+ 0.26944,
+ 0.27052,
+ 0.2709,
+ 0.26888,
+ 0.26714,
+ 0.27096,
+ 0.2726,
+ 0.27154,
+ 0.27206,
+ 0.27296,
+ 0.27414,
+ 0.27206,
+ 0.27384,
+ 0.27372,
+ 0.27318,
+ 0.27372,
+ 0.27472,
+ 0.27246,
+ 0.27264,
+ 0.27428,
+ 0.27502,
+ 0.27448,
+ 0.2742,
+ 0.27528,
+ 0.275,
+ 0.27736,
+ 0.2766,
+ 0.2759,
+ 0.27522,
+ 0.27712,
+ 0.27626,
+ 0.27468,
+ 0.27628,
+ 0.27488,
+ 0.27464,
+ 0.27474,
+ 0.27628,
+ 0.2782,
+ 0.27526,
+ 0.27606,
+ 0.27564,
+ 0.27476,
+ 0.27602,
+ 0.27802,
+ 0.2757,
+ 0.27936,
+ 0.27668,
+ 0.27736,
+ 0.2746,
+ 0.27752,
+ 0.27548,
+ 0.27848,
+ 0.2752,
+ 0.27484,
+ 0.27662,
+ 0.27726,
+ 0.2752,
+ 0.27864,
+ 0.27702,
+ 0.27936,
+ 0.27774,
+ 0.27604,
+ 0.27712,
+ 0.27604,
+ 0.27688,
+ 0.27666,
+ 0.27784,
+ 0.27676,
+ 0.27736,
+ 0.27638
+ ],
+ "test_acc": [
+ 0.2412,
+ 0.2706,
+ 0.2562,
+ 0.262,
+ 0.2857,
+ 0.2628,
+ 0.2833,
+ 0.2782,
+ 0.2575,
+ 0.2859,
+ 0.2873,
+ 0.267,
+ 0.288,
+ 0.2774,
+ 0.2724,
+ 0.2759,
+ 0.2718,
+ 0.2937,
+ 0.2699,
+ 0.2666,
+ 0.2763,
+ 0.2678,
+ 0.299,
+ 0.2876,
+ 0.2835,
+ 0.2979,
+ 0.28,
+ 0.2829,
+ 0.2768,
+ 0.2923,
+ 0.281,
+ 0.2884,
+ 0.2948,
+ 0.2862,
+ 0.2896,
+ 0.2881,
+ 0.2871,
+ 0.2895,
+ 0.2905,
+ 0.298,
+ 0.2897,
+ 0.2784,
+ 0.2898,
+ 0.2972,
+ 0.2879,
+ 0.2895,
+ 0.2881,
+ 0.2884,
+ 0.2944,
+ 0.2935,
+ 0.3039,
+ 0.2886,
+ 0.2993,
+ 0.2857,
+ 0.2931,
+ 0.2941,
+ 0.2933,
+ 0.3021,
+ 0.2938,
+ 0.2991,
+ 0.2841,
+ 0.2896,
+ 0.2945,
+ 0.2911,
+ 0.3,
+ 0.2963,
+ 0.2894,
+ 0.2908,
+ 0.2917,
+ 0.2946,
+ 0.2927,
+ 0.2946,
+ 0.296,
+ 0.3,
+ 0.2935,
+ 0.2898,
+ 0.2955,
+ 0.2929,
+ 0.2928,
+ 0.2992,
+ 0.2931,
+ 0.2954,
+ 0.2933,
+ 0.2924,
+ 0.2933,
+ 0.2925,
+ 0.2919,
+ 0.2941,
+ 0.2941,
+ 0.2946,
+ 0.2927,
+ 0.2945,
+ 0.294,
+ 0.2931,
+ 0.2945,
+ 0.2935,
+ 0.2943,
+ 0.2943,
+ 0.2946,
+ 0.2946
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.3852921724319458,
+ -0.0006387766916304827,
+ 0.00021705820108763874,
+ -0.00021593061683233827,
+ -0.0001868726685643196,
+ 2.6933841581922024e-05,
+ 0.00016013934509828687,
+ 7.406133954646066e-05,
+ -3.145005030091852e-05,
+ 0.0004791871178895235,
+ 0.0008787637343630195,
+ -0.0007018762989901006
+ ],
+ "perturbation_rho": [
+ 0.01893431320786476,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -5.145557224750519e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.3075768947601318e-06,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -4.1211023926734924e-06,
+ 9.313225746154785e-10,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 9.313225746154785e-10,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 55220.6953125,
+ 703320128.0,
+ 4374171648.0,
+ 4628753408.0,
+ 5259584512.0,
+ 5602404864.0,
+ 6980004864.0,
+ 7249905664.0,
+ 7601436160.0,
+ 8436634624.0,
+ 9617230848.0,
+ 9769342976.0,
+ 10366923776.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.842145931936102e-07,
+ 1.8565524118052679e-10,
+ 1.8530457723819893e-10,
+ 1.8531473577887425e-10,
+ 1.852697301130135e-10,
+ 1.852322323303568e-10,
+ 1.8537825441367062e-10,
+ 1.8532649026514747e-10,
+ 1.8534035417516748e-10,
+ 1.8526735701129837e-10,
+ 1.85539111852151e-10,
+ 1.8548607094714953e-10,
+ 1.8552165359508876e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 331.10430617288347,
+ "embed.bias": 252.9571084710531,
+ "blocks.0.ln.weight": 10.63590306325829,
+ "blocks.0.w1.weight": 263.71773941078897,
+ "blocks.0.w1.bias": 240.90214072476394,
+ "blocks.0.w2.weight": 484.24212277052584,
+ "blocks.1.ln.weight": 8.994925940820934,
+ "blocks.1.w1.weight": 375.0257234388459,
+ "blocks.1.w1.bias": 372.81964220264507,
+ "blocks.1.w2.weight": 399.05181196420534,
+ "blocks.2.ln.weight": 6.785381078745234,
+ "blocks.2.w1.weight": 237.77081904994165,
+ "blocks.2.w1.bias": 215.00165533028638,
+ "blocks.2.w2.weight": 243.766610543445,
+ "blocks.3.ln.weight": 8.749875392840245,
+ "blocks.3.w1.weight": 331.37899522969747,
+ "blocks.3.w1.bias": 315.8500446097451,
+ "blocks.3.w2.weight": 312.3537327104354,
+ "blocks.4.ln.weight": 8.073930968349993,
+ "blocks.4.w1.weight": 291.9215675635427,
+ "blocks.4.w1.bias": 265.90446228164745,
+ "blocks.4.w2.weight": 273.96062778369446,
+ "blocks.5.ln.weight": 9.487609121353676,
+ "blocks.5.w1.weight": 379.3472655416328,
+ "blocks.5.w1.bias": 364.18802309088414,
+ "blocks.5.w2.weight": 375.20829167636464,
+ "blocks.6.ln.weight": 7.271832594737899,
+ "blocks.6.w1.weight": 274.22615021165615,
+ "blocks.6.w1.bias": 253.82742699623324,
+ "blocks.6.w2.weight": 263.6858849060695,
+ "blocks.7.ln.weight": 8.060212000337511,
+ "blocks.7.w1.weight": 280.8757929768,
+ "blocks.7.w1.bias": 262.00267937269797,
+ "blocks.7.w2.weight": 269.2018115874501,
+ "blocks.8.ln.weight": 9.34446156262141,
+ "blocks.8.w1.weight": 362.0470567061417,
+ "blocks.8.w1.bias": 345.9730629510181,
+ "blocks.8.w2.weight": 346.1165305240824,
+ "blocks.9.ln.weight": 10.547709601128119,
+ "blocks.9.w1.weight": 421.4247930203293,
+ "blocks.9.w1.bias": 384.7717987562065,
+ "blocks.9.w2.weight": 391.67325768744075,
+ "blocks.10.ln.weight": 7.98341504623973,
+ "blocks.10.w1.weight": 310.77250388889735,
+ "blocks.10.w1.bias": 289.60855856986865,
+ "blocks.10.w2.weight": 285.0024842791926,
+ "blocks.11.ln.weight": 9.5014629592837,
+ "blocks.11.w1.weight": 374.13082807575006,
+ "blocks.11.w1.bias": 348.0579252323944,
+ "blocks.11.w2.weight": 345.1186750982964,
+ "out_ln.weight": 0.6314801307530133,
+ "out_head.weight": 8.99875779950455,
+ "out_head.bias": 0.46869532178721607
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0407437321472166,
+ 1.9698008823394775,
+ 1.9470496474838257,
+ 1.928373962173462,
+ 1.9022277151870728,
+ 1.8882839569091796,
+ 1.8752377725219727,
+ 1.8682713878631592,
+ 1.8643299547576904,
+ 1.8616547422027587,
+ 1.861611949081421,
+ 1.8620045602416992,
+ 1.8579721353149414,
+ 1.8603198122406006,
+ 1.852560590248108,
+ 1.850361158065796,
+ 1.8412007814788818,
+ 1.8371962604141234,
+ 1.830751480064392,
+ 1.830014287109375,
+ 1.8211512073135376,
+ 1.8158404816055298,
+ 1.8152668701553345,
+ 1.8068659871673585,
+ 1.807379986190796,
+ 1.8042517069244384,
+ 1.8004914111709596,
+ 1.7970869720077514,
+ 1.7892543896484374,
+ 1.786518664894104,
+ 1.7887954705047608,
+ 1.787979825515747,
+ 1.782947692489624,
+ 1.7796524175643922,
+ 1.7780501412963867,
+ 1.7751860930252075,
+ 1.77746077003479,
+ 1.7734027153778076,
+ 1.7719406731414795,
+ 1.7689697579574586,
+ 1.7694388663101197,
+ 1.7689455612182616,
+ 1.7627556784820557,
+ 1.76162135345459,
+ 1.7598590536880494,
+ 1.7624462253189086,
+ 1.7612236563873291,
+ 1.7604118212127686,
+ 1.757069122390747,
+ 1.7525782149887086,
+ 1.7570673566055297,
+ 1.753389842262268,
+ 1.7526489987564087,
+ 1.7506390865325927,
+ 1.7564489751434327,
+ 1.7510275960922241,
+ 1.7505046075057984,
+ 1.7489783365249634,
+ 1.7482558310699463,
+ 1.7478452098846435,
+ 1.746708115234375,
+ 1.7485102898788452,
+ 1.7510625556182862,
+ 1.7457116416168212,
+ 1.7465464822387695,
+ 1.7432715910339356,
+ 1.7428231796646119,
+ 1.7424320980072021,
+ 1.7456896788787841,
+ 1.7436157192611694,
+ 1.7397695401763915,
+ 1.740845919532776,
+ 1.7387019988250731,
+ 1.7427006936645508,
+ 1.7407404198455811,
+ 1.7381612093734742,
+ 1.742300903930664,
+ 1.741065849647522,
+ 1.7432577428817748,
+ 1.7419394709014893,
+ 1.7393424035644531,
+ 1.7376308783721923,
+ 1.7395985202407838,
+ 1.7360640591812133,
+ 1.7388901407623292,
+ 1.7372601049804688,
+ 1.737554390182495,
+ 1.7332513166046142,
+ 1.7339157095718383,
+ 1.73852820854187,
+ 1.7378087186050415,
+ 1.7388774118423462,
+ 1.732873987159729,
+ 1.7344641219329835,
+ 1.7370289881134033,
+ 1.735849939918518,
+ 1.731984726486206,
+ 1.7332192292022706,
+ 1.7361968645477295,
+ 1.7349854880142213
+ ],
+ "train_acc": [
+ 0.2475,
+ 0.27976,
+ 0.29052,
+ 0.29796,
+ 0.31032,
+ 0.31312,
+ 0.32008,
+ 0.32376,
+ 0.3275,
+ 0.32858,
+ 0.3309,
+ 0.33454,
+ 0.33414,
+ 0.33476,
+ 0.33654,
+ 0.33644,
+ 0.34346,
+ 0.3414,
+ 0.34694,
+ 0.3445,
+ 0.34916,
+ 0.35076,
+ 0.35364,
+ 0.35626,
+ 0.35816,
+ 0.35708,
+ 0.35872,
+ 0.35684,
+ 0.36218,
+ 0.36264,
+ 0.36392,
+ 0.36252,
+ 0.36352,
+ 0.3664,
+ 0.36594,
+ 0.36678,
+ 0.36554,
+ 0.36668,
+ 0.36746,
+ 0.3698,
+ 0.37054,
+ 0.36998,
+ 0.37094,
+ 0.3744,
+ 0.3719,
+ 0.37366,
+ 0.37076,
+ 0.3724,
+ 0.37426,
+ 0.37956,
+ 0.3765,
+ 0.373,
+ 0.37478,
+ 0.37612,
+ 0.37244,
+ 0.37586,
+ 0.37716,
+ 0.37568,
+ 0.3782,
+ 0.37812,
+ 0.37682,
+ 0.3766,
+ 0.37616,
+ 0.3787,
+ 0.3766,
+ 0.37772,
+ 0.38072,
+ 0.37976,
+ 0.37842,
+ 0.3751,
+ 0.3807,
+ 0.3796,
+ 0.37884,
+ 0.37908,
+ 0.38054,
+ 0.38114,
+ 0.37896,
+ 0.37942,
+ 0.3799,
+ 0.37928,
+ 0.38292,
+ 0.37956,
+ 0.38116,
+ 0.38178,
+ 0.38176,
+ 0.38242,
+ 0.38104,
+ 0.38424,
+ 0.38304,
+ 0.38046,
+ 0.3825,
+ 0.38104,
+ 0.38308,
+ 0.38346,
+ 0.38224,
+ 0.38272,
+ 0.38498,
+ 0.38364,
+ 0.3829,
+ 0.3843
+ ],
+ "test_acc": [
+ 0.2791,
+ 0.3084,
+ 0.3148,
+ 0.3302,
+ 0.3397,
+ 0.3317,
+ 0.3545,
+ 0.3557,
+ 0.3438,
+ 0.3655,
+ 0.3703,
+ 0.3631,
+ 0.3661,
+ 0.3705,
+ 0.3563,
+ 0.3627,
+ 0.374,
+ 0.3834,
+ 0.3758,
+ 0.3803,
+ 0.3716,
+ 0.3648,
+ 0.3972,
+ 0.3934,
+ 0.3921,
+ 0.3901,
+ 0.3811,
+ 0.3987,
+ 0.3921,
+ 0.3942,
+ 0.3883,
+ 0.3901,
+ 0.3797,
+ 0.3967,
+ 0.3989,
+ 0.3944,
+ 0.3874,
+ 0.3912,
+ 0.3979,
+ 0.3907,
+ 0.405,
+ 0.4035,
+ 0.4043,
+ 0.4026,
+ 0.3974,
+ 0.397,
+ 0.4022,
+ 0.4051,
+ 0.3848,
+ 0.4045,
+ 0.4084,
+ 0.4049,
+ 0.4029,
+ 0.3967,
+ 0.4055,
+ 0.4057,
+ 0.406,
+ 0.4054,
+ 0.4069,
+ 0.4042,
+ 0.3997,
+ 0.4073,
+ 0.4117,
+ 0.406,
+ 0.4061,
+ 0.4051,
+ 0.4068,
+ 0.4082,
+ 0.4055,
+ 0.4063,
+ 0.4081,
+ 0.4109,
+ 0.4081,
+ 0.4085,
+ 0.4103,
+ 0.4019,
+ 0.4102,
+ 0.4103,
+ 0.4103,
+ 0.4095,
+ 0.4108,
+ 0.4093,
+ 0.408,
+ 0.4097,
+ 0.4098,
+ 0.4075,
+ 0.4093,
+ 0.4119,
+ 0.4105,
+ 0.4117,
+ 0.4103,
+ 0.4113,
+ 0.4124,
+ 0.4111,
+ 0.4111,
+ 0.4108,
+ 0.4113,
+ 0.4122,
+ 0.4122,
+ 0.4122
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.03226801007986069,
+ 0.06280244886875153,
+ 0.010810771957039833,
+ -0.03502938896417618,
+ -0.02679986134171486,
+ -0.043182265013456345,
+ -0.04647787660360336,
+ -0.03189649432897568,
+ -0.0780816450715065,
+ -0.05483850836753845,
+ -0.02444492094218731,
+ 0.987554669380188
+ ],
+ "perturbation_rho": [
+ 0.0010241912677884102,
+ -0.04057024046778679,
+ 0.018125081434845924,
+ 0.0397450290620327,
+ -0.017151735723018646,
+ 0.010231071151793003,
+ 0.02530200220644474,
+ -0.034604527056217194,
+ -0.008924206718802452,
+ 0.008240236900746822,
+ 0.006566178053617477,
+ -0.016290105879306793
+ ],
+ "nudging": {
+ "0.001": [
+ -2.0849984139204025e-06,
+ -2.7567148208618164e-07,
+ -1.979060471057892e-08,
+ 8.824281394481659e-08,
+ 7.82310962677002e-08,
+ -1.6996636986732483e-08,
+ 7.520429790019989e-08,
+ 2.584420144557953e-08,
+ -8.381903171539307e-09,
+ 5.820766091346741e-09,
+ 1.3504177331924438e-08,
+ -1.0039657354354858e-06
+ ],
+ "0.003": [
+ -6.289919838309288e-06,
+ -7.05476850271225e-07,
+ -2.3283064365386963e-08,
+ 1.6996636986732483e-07,
+ 1.1431984603404999e-07,
+ 2.3702159523963928e-07,
+ 1.7462298274040222e-07,
+ 1.2898817658424377e-07,
+ 3.096647560596466e-07,
+ 1.2014061212539673e-07,
+ 1.3527460396289825e-07,
+ -3.6957208067178726e-06
+ ],
+ "0.01": [
+ -2.0938459783792496e-05,
+ -2.1811574697494507e-06,
+ -1.424923539161682e-07,
+ 4.1816383600234985e-07,
+ 3.825407475233078e-07,
+ 5.098991096019745e-07,
+ 6.461050361394882e-07,
+ 3.5669654607772827e-07,
+ 1.1634547263383865e-06,
+ 6.814952939748764e-07,
+ 3.4994445741176605e-07,
+ -1.281173899769783e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 7157.4072265625,
+ 94832.078125,
+ 404715.40625,
+ 900567.4375,
+ 1081047.875,
+ 1230157.0,
+ 1365586.875,
+ 1528390.375,
+ 1618979.25,
+ 1737319.75,
+ 1867286.375,
+ 1900228.625,
+ 1396480.75
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.4219883925979957e-05,
+ 1.7657896478340263e-06,
+ 6.163170382933458e-07,
+ 5.848775117556215e-07,
+ 5.799907398795767e-07,
+ 5.837350158799381e-07,
+ 5.848508521921758e-07,
+ 5.83334440307226e-07,
+ 5.784736458736006e-07,
+ 5.808138325846812e-07,
+ 5.77644073018746e-07,
+ 5.686248982783582e-07,
+ 5.330333578967839e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 47.13343033798094,
+ "embed.bias": 17.027552330320233,
+ "blocks.0.ln.weight": 1.1320890684307203,
+ "blocks.0.w1.weight": 16.27975609186272,
+ "blocks.0.w1.bias": 12.808052799935883,
+ "blocks.0.w2.weight": 51.91947766632093,
+ "blocks.1.ln.weight": 0.9202706367200378,
+ "blocks.1.w1.weight": 18.82744877577282,
+ "blocks.1.w1.bias": 12.653020534114173,
+ "blocks.1.w2.weight": 46.17134940964603,
+ "blocks.2.ln.weight": 0.7482719581080549,
+ "blocks.2.w1.weight": 19.557560479232055,
+ "blocks.2.w1.bias": 16.890590269942404,
+ "blocks.2.w2.weight": 39.02075074459777,
+ "blocks.3.ln.weight": 0.5160212396517895,
+ "blocks.3.w1.weight": 16.570363915867922,
+ "blocks.3.w1.bias": 17.214082946851736,
+ "blocks.3.w2.weight": 30.85580581534199,
+ "blocks.4.ln.weight": 0.43511517848811326,
+ "blocks.4.w1.weight": 16.64619937865781,
+ "blocks.4.w1.bias": 18.15413776267707,
+ "blocks.4.w2.weight": 29.35744053745253,
+ "blocks.5.ln.weight": 0.46109930109565794,
+ "blocks.5.w1.weight": 16.648426948133086,
+ "blocks.5.w1.bias": 18.405016598411816,
+ "blocks.5.w2.weight": 32.04993283017616,
+ "blocks.6.ln.weight": 0.5239951706454276,
+ "blocks.6.w1.weight": 17.307678602670318,
+ "blocks.6.w1.bias": 17.94260026627366,
+ "blocks.6.w2.weight": 35.649016996813835,
+ "blocks.7.ln.weight": 0.549764892230928,
+ "blocks.7.w1.weight": 17.37898974454947,
+ "blocks.7.w1.bias": 17.920017558565252,
+ "blocks.7.w2.weight": 47.618251557182425,
+ "blocks.8.ln.weight": 0.5059874235487765,
+ "blocks.8.w1.weight": 16.71552334731513,
+ "blocks.8.w1.bias": 18.79123189462688,
+ "blocks.8.w2.weight": 33.90958537646263,
+ "blocks.9.ln.weight": 0.5547967624139118,
+ "blocks.9.w1.weight": 17.469950204626425,
+ "blocks.9.w1.bias": 17.514572577139017,
+ "blocks.9.w2.weight": 47.637641198604065,
+ "blocks.10.ln.weight": 0.4702376652398834,
+ "blocks.10.w1.weight": 15.609742871134038,
+ "blocks.10.w1.bias": 14.194008119737171,
+ "blocks.10.w2.weight": 53.92588624506532,
+ "blocks.11.ln.weight": 0.5638652470821104,
+ "blocks.11.w1.weight": 18.329914062311936,
+ "blocks.11.w1.bias": 16.960111542785533,
+ "blocks.11.w2.weight": 61.54428413245766,
+ "out_ln.weight": 0.36613157588839446,
+ "out_head.weight": 6.704387093335344,
+ "out_head.bias": 0.6590609045431134
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 12,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 8
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L12_seed8",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L12_seed9/results_cifar10.json b/results/fa_dfa_d512_L12_seed9/results_cifar10.json
new file mode 100644
index 0000000..974d42a
--- /dev/null
+++ b/results/fa_dfa_d512_L12_seed9/results_cifar10.json
@@ -0,0 +1,969 @@
+{
+ "9": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.086543095245361,
+ 2.049035365447998,
+ 2.0382854364776612,
+ 2.0298845935058596,
+ 2.0281315744018555,
+ 2.0237447773742674,
+ 2.0194822165679933,
+ 2.019161204986572,
+ 2.015748862915039,
+ 2.0162334437561036,
+ 2.0126681255340575,
+ 2.012035894165039,
+ 2.009547615966797,
+ 2.011429062194824,
+ 2.0080011304473877,
+ 2.0085202750396727,
+ 2.0067744227600097,
+ 2.0047744031143186,
+ 2.004743169174194,
+ 2.001354461517334,
+ 2.0038469007873534,
+ 2.0027169017028807,
+ 1.9995321035003661,
+ 1.9969835998916625,
+ 1.9975377194595336,
+ 1.9985439079284668,
+ 1.9960253533172607,
+ 1.9974639236831666,
+ 1.9950202367401122,
+ 1.9968991885375977,
+ 1.9976718412017822,
+ 1.9939492235565186,
+ 1.9947053424835206,
+ 1.995585137901306,
+ 1.9956871920776367,
+ 1.9927901021575927,
+ 1.992329425201416,
+ 1.9917698760604858,
+ 1.9922489316558838,
+ 1.9921125721740722,
+ 1.9919519045257568,
+ 1.9915309338378906,
+ 1.9927355318450928,
+ 1.9899454330825805,
+ 1.9910321056365967,
+ 1.9931654618835448,
+ 1.9889872648620606,
+ 1.9899863806915283,
+ 1.9893665783309937,
+ 1.989453801651001,
+ 1.9890669647216797,
+ 1.9882669693374633,
+ 1.9893041844177246,
+ 1.9891512668609619,
+ 1.9883573954772948,
+ 1.9891788550186158,
+ 1.9858848489379883,
+ 1.9877911152648926,
+ 1.987854264564514,
+ 1.9873136006164551,
+ 1.9871731573486329,
+ 1.9879692407989502,
+ 1.987359190750122,
+ 1.9866330204772948,
+ 1.98554376373291,
+ 1.9857093753051758,
+ 1.986483130493164,
+ 1.9841259546279908,
+ 1.98654155418396,
+ 1.9857370235824585,
+ 1.9854363512420654,
+ 1.9837415993881227,
+ 1.9846706900024413,
+ 1.9846486893463136,
+ 1.9852562395477296,
+ 1.985122490158081,
+ 1.9856149569702148,
+ 1.9855838787460327,
+ 1.985352071609497,
+ 1.9836603102874757,
+ 1.984020567855835,
+ 1.9842208641052246,
+ 1.9867661368179321,
+ 1.9825923907852172,
+ 1.9839700649261474,
+ 1.985303124923706,
+ 1.984482919921875,
+ 1.9825283669281006,
+ 1.9836111863708497,
+ 1.9835795726013183,
+ 1.9827597061920166,
+ 1.98389965385437,
+ 1.9854092786407471,
+ 1.9815458943939208,
+ 1.9817595046615601,
+ 1.9855254986190796,
+ 1.9835386077880859,
+ 1.9825453355407714,
+ 1.9826166521835327,
+ 1.9829287356567382
+ ],
+ "train_acc": [
+ 0.22926,
+ 0.24766,
+ 0.24966,
+ 0.25552,
+ 0.25304,
+ 0.25922,
+ 0.25868,
+ 0.25894,
+ 0.2602,
+ 0.26156,
+ 0.26166,
+ 0.26588,
+ 0.26356,
+ 0.26296,
+ 0.2648,
+ 0.26752,
+ 0.26888,
+ 0.26516,
+ 0.2687,
+ 0.2697,
+ 0.27084,
+ 0.26762,
+ 0.26938,
+ 0.27176,
+ 0.2698,
+ 0.27094,
+ 0.2722,
+ 0.2721,
+ 0.27342,
+ 0.27132,
+ 0.27128,
+ 0.2735,
+ 0.27358,
+ 0.27302,
+ 0.2743,
+ 0.27702,
+ 0.27592,
+ 0.2762,
+ 0.27586,
+ 0.27454,
+ 0.27708,
+ 0.27588,
+ 0.27488,
+ 0.27664,
+ 0.27778,
+ 0.27754,
+ 0.27744,
+ 0.27638,
+ 0.2782,
+ 0.27892,
+ 0.27888,
+ 0.27894,
+ 0.27778,
+ 0.2784,
+ 0.2806,
+ 0.2788,
+ 0.27942,
+ 0.27968,
+ 0.27888,
+ 0.2771,
+ 0.27966,
+ 0.27918,
+ 0.28102,
+ 0.2799,
+ 0.27998,
+ 0.27866,
+ 0.28044,
+ 0.28208,
+ 0.28314,
+ 0.28018,
+ 0.2797,
+ 0.28004,
+ 0.2795,
+ 0.28196,
+ 0.28128,
+ 0.2824,
+ 0.28326,
+ 0.28176,
+ 0.28084,
+ 0.2816,
+ 0.28268,
+ 0.28344,
+ 0.28014,
+ 0.283,
+ 0.2824,
+ 0.28114,
+ 0.28384,
+ 0.28424,
+ 0.28492,
+ 0.28422,
+ 0.28338,
+ 0.28276,
+ 0.28152,
+ 0.28358,
+ 0.28352,
+ 0.28086,
+ 0.2832,
+ 0.28484,
+ 0.28126,
+ 0.28462
+ ],
+ "test_acc": [
+ 0.2701,
+ 0.2621,
+ 0.2798,
+ 0.281,
+ 0.2593,
+ 0.2849,
+ 0.2797,
+ 0.2768,
+ 0.272,
+ 0.2901,
+ 0.2986,
+ 0.2839,
+ 0.2741,
+ 0.2916,
+ 0.2865,
+ 0.2818,
+ 0.3006,
+ 0.2972,
+ 0.2972,
+ 0.3049,
+ 0.2762,
+ 0.2939,
+ 0.2754,
+ 0.2954,
+ 0.2984,
+ 0.3,
+ 0.2821,
+ 0.2798,
+ 0.2887,
+ 0.3018,
+ 0.2833,
+ 0.3076,
+ 0.2891,
+ 0.291,
+ 0.2959,
+ 0.284,
+ 0.2942,
+ 0.3106,
+ 0.2895,
+ 0.3004,
+ 0.3048,
+ 0.3058,
+ 0.303,
+ 0.2951,
+ 0.3031,
+ 0.2955,
+ 0.2961,
+ 0.3036,
+ 0.3045,
+ 0.2976,
+ 0.2995,
+ 0.2965,
+ 0.2912,
+ 0.3007,
+ 0.2987,
+ 0.308,
+ 0.3073,
+ 0.3,
+ 0.3117,
+ 0.2891,
+ 0.2945,
+ 0.3064,
+ 0.2987,
+ 0.2929,
+ 0.2984,
+ 0.2945,
+ 0.2945,
+ 0.3028,
+ 0.2994,
+ 0.3008,
+ 0.3,
+ 0.3069,
+ 0.3033,
+ 0.3088,
+ 0.3052,
+ 0.3023,
+ 0.2992,
+ 0.2946,
+ 0.305,
+ 0.3024,
+ 0.3081,
+ 0.3014,
+ 0.3053,
+ 0.3039,
+ 0.3079,
+ 0.305,
+ 0.3035,
+ 0.3018,
+ 0.3052,
+ 0.3063,
+ 0.3025,
+ 0.3056,
+ 0.3037,
+ 0.3046,
+ 0.3045,
+ 0.3041,
+ 0.3043,
+ 0.3044,
+ 0.3044,
+ 0.3043
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.3948982357978821,
+ 8.632082608528435e-05,
+ -0.00022141945373732597,
+ -0.0004791135434061289,
+ 0.00010106388799613342,
+ 7.346954225795344e-05,
+ -7.074545283103362e-05,
+ -5.661595787387341e-05,
+ 0.00012481751036830246,
+ -0.0005871393950656056,
+ 0.00015422290016431361,
+ -0.0005310022970661521
+ ],
+ "perturbation_rho": [
+ 0.0077664791606366634,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -3.594905138015747e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.3024546205997467e-06,
+ -3.725290298461914e-09,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -4.144385457038879e-06,
+ -3.725290298461914e-09,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ -9.313225746154785e-10,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 56626.58984375,
+ 921777728.0,
+ 2723525376.0,
+ 5562149376.0,
+ 6096715776.0,
+ 6586259968.0,
+ 9732721664.0,
+ 9669530624.0,
+ 10051646464.0,
+ 10812058624.0,
+ 10900319232.0,
+ 11575299072.0,
+ 12058745856.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.953412661099719e-07,
+ 1.7980278377294212e-10,
+ 1.7774372251810888e-10,
+ 1.7754615833087684e-10,
+ 1.77546435886633e-10,
+ 1.7753745695792134e-10,
+ 1.775574409723646e-10,
+ 1.7755288905796363e-10,
+ 1.7755619197146189e-10,
+ 1.7750646785774649e-10,
+ 1.775143226856457e-10,
+ 1.7751018710487898e-10,
+ 1.775422031613516e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 330.97935370781335,
+ "embed.bias": 242.7352200338295,
+ "blocks.0.ln.weight": 10.118660070740958,
+ "blocks.0.w1.weight": 268.02039354554194,
+ "blocks.0.w1.bias": 236.60527981405795,
+ "blocks.0.w2.weight": 479.95056180068593,
+ "blocks.1.ln.weight": 8.173523635910902,
+ "blocks.1.w1.weight": 314.00233907878504,
+ "blocks.1.w1.bias": 303.2790322538351,
+ "blocks.1.w2.weight": 332.3008689530667,
+ "blocks.2.ln.weight": 9.503933776173088,
+ "blocks.2.w1.weight": 407.2148671186256,
+ "blocks.2.w1.bias": 377.7677645021183,
+ "blocks.2.w2.weight": 405.2326915964652,
+ "blocks.3.ln.weight": 8.261827691373458,
+ "blocks.3.w1.weight": 324.0550552979263,
+ "blocks.3.w1.bias": 305.54524457584483,
+ "blocks.3.w2.weight": 300.9480221394592,
+ "blocks.4.ln.weight": 8.47590401409121,
+ "blocks.4.w1.weight": 338.56186029144186,
+ "blocks.4.w1.bias": 322.0375352385375,
+ "blocks.4.w2.weight": 319.77753486779426,
+ "blocks.5.ln.weight": 11.22139697754829,
+ "blocks.5.w1.weight": 458.2444114055581,
+ "blocks.5.w1.bias": 424.3745814588518,
+ "blocks.5.w2.weight": 450.83821734650985,
+ "blocks.6.ln.weight": 7.796159629674181,
+ "blocks.6.w1.weight": 306.7435557133061,
+ "blocks.6.w1.bias": 295.1474090093065,
+ "blocks.6.w2.weight": 287.3353674909529,
+ "blocks.7.ln.weight": 8.077037696900597,
+ "blocks.7.w1.weight": 320.0610116694296,
+ "blocks.7.w1.bias": 297.55111326321224,
+ "blocks.7.w2.weight": 294.8277249235264,
+ "blocks.8.ln.weight": 9.437782779016567,
+ "blocks.8.w1.weight": 381.01065244180916,
+ "blocks.8.w1.bias": 346.5987668046489,
+ "blocks.8.w2.weight": 354.1595052479188,
+ "blocks.9.ln.weight": 7.2340406951431575,
+ "blocks.9.w1.weight": 277.2401387021752,
+ "blocks.9.w1.bias": 262.2340251864824,
+ "blocks.9.w2.weight": 255.1111568089683,
+ "blocks.10.ln.weight": 9.961816471882193,
+ "blocks.10.w1.weight": 400.6058052128251,
+ "blocks.10.w1.bias": 369.00187630886694,
+ "blocks.10.w2.weight": 388.64956558131564,
+ "blocks.11.ln.weight": 8.356343855238048,
+ "blocks.11.w1.weight": 316.91850723934095,
+ "blocks.11.w1.bias": 297.45861181164963,
+ "blocks.11.w2.weight": 290.23124955665594,
+ "out_ln.weight": 0.6534850850308469,
+ "out_head.weight": 9.848963989978573,
+ "out_head.bias": 0.41699188717651064
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0459222147369385,
+ 1.9676443615722656,
+ 1.9359465097045898,
+ 1.9049253499984742,
+ 1.8857684122085572,
+ 1.8734163967514037,
+ 1.866455598678589,
+ 1.8637321212387086,
+ 1.8555012069320678,
+ 1.8584735760879516,
+ 1.84622566532135,
+ 1.8429653158950805,
+ 1.8373867583847046,
+ 1.837373632888794,
+ 1.8269891134643554,
+ 1.8270549670791627,
+ 1.8241438305282593,
+ 1.8183621447372436,
+ 1.8176076436233521,
+ 1.8132607741546631,
+ 1.8171432834243775,
+ 1.8106282345199585,
+ 1.8068263914489746,
+ 1.8001747159194947,
+ 1.797019803085327,
+ 1.799570650062561,
+ 1.7992187567520141,
+ 1.7956272640228272,
+ 1.7893521131134034,
+ 1.7947827518463135,
+ 1.792629102745056,
+ 1.7834515933990478,
+ 1.784611135520935,
+ 1.7758458992767334,
+ 1.77485143119812,
+ 1.7703186184692383,
+ 1.7679582196044923,
+ 1.7615163513946532,
+ 1.760532038192749,
+ 1.7643762328338624,
+ 1.7579109369659425,
+ 1.755184786720276,
+ 1.7560388659286499,
+ 1.7505403981781005,
+ 1.7491300173950195,
+ 1.7485786712646485,
+ 1.7421914972686767,
+ 1.7423645967864991,
+ 1.7408842813110352,
+ 1.7322719787979126,
+ 1.7337682635498046,
+ 1.7296864183807372,
+ 1.7320984534454347,
+ 1.7284361850357055,
+ 1.7295022455596925,
+ 1.7249998714447021,
+ 1.7229057648086548,
+ 1.7212110678863526,
+ 1.72249207862854,
+ 1.7190113651275636,
+ 1.717387024230957,
+ 1.7169812508392335,
+ 1.7162931756210327,
+ 1.7126198623275757,
+ 1.7117092602539064,
+ 1.7112801861190796,
+ 1.7114282751846313,
+ 1.7101980803680419,
+ 1.7129489760971068,
+ 1.7100260018539428,
+ 1.7092081976699829,
+ 1.703683881187439,
+ 1.7068217246246338,
+ 1.7049527828216553,
+ 1.7057187732315064,
+ 1.7080302432250976,
+ 1.7038126770401,
+ 1.7049555835723877,
+ 1.7032458050155639,
+ 1.7034490664672852,
+ 1.7006872876739503,
+ 1.7008496173095704,
+ 1.7032257048797608,
+ 1.6975849852371216,
+ 1.7016590633392334,
+ 1.7036013817596436,
+ 1.7003546350097656,
+ 1.6987783340835572,
+ 1.7004486318969727,
+ 1.6981003344726562,
+ 1.6952576486968993,
+ 1.6982201540374755,
+ 1.6985680112075805,
+ 1.6973068224334718,
+ 1.698023046951294,
+ 1.6962739462661742,
+ 1.7007662552642822,
+ 1.694658960800171,
+ 1.6946139693069457,
+ 1.6979838909912108
+ ],
+ "train_acc": [
+ 0.24304,
+ 0.27768,
+ 0.29718,
+ 0.3114,
+ 0.3155,
+ 0.32342,
+ 0.32646,
+ 0.32848,
+ 0.33164,
+ 0.33126,
+ 0.33432,
+ 0.33792,
+ 0.33882,
+ 0.33816,
+ 0.34222,
+ 0.34396,
+ 0.34814,
+ 0.34378,
+ 0.34546,
+ 0.3467,
+ 0.34866,
+ 0.35218,
+ 0.3533,
+ 0.3518,
+ 0.35448,
+ 0.3562,
+ 0.35548,
+ 0.358,
+ 0.35764,
+ 0.35846,
+ 0.35716,
+ 0.3626,
+ 0.36082,
+ 0.36478,
+ 0.36298,
+ 0.36532,
+ 0.36858,
+ 0.36974,
+ 0.37074,
+ 0.3673,
+ 0.37104,
+ 0.372,
+ 0.36924,
+ 0.37348,
+ 0.37412,
+ 0.37354,
+ 0.37672,
+ 0.37626,
+ 0.3795,
+ 0.3796,
+ 0.37736,
+ 0.37876,
+ 0.37752,
+ 0.38226,
+ 0.3828,
+ 0.38434,
+ 0.38232,
+ 0.38234,
+ 0.38028,
+ 0.38442,
+ 0.38296,
+ 0.38464,
+ 0.3859,
+ 0.38794,
+ 0.38652,
+ 0.3878,
+ 0.38752,
+ 0.38512,
+ 0.3885,
+ 0.38786,
+ 0.38678,
+ 0.3892,
+ 0.38872,
+ 0.39076,
+ 0.38828,
+ 0.38848,
+ 0.38858,
+ 0.38922,
+ 0.38946,
+ 0.39026,
+ 0.38864,
+ 0.38984,
+ 0.39182,
+ 0.3923,
+ 0.39104,
+ 0.39152,
+ 0.39114,
+ 0.3885,
+ 0.39122,
+ 0.39272,
+ 0.39182,
+ 0.39102,
+ 0.39104,
+ 0.3916,
+ 0.39454,
+ 0.39062,
+ 0.38996,
+ 0.39216,
+ 0.39174,
+ 0.39228
+ ],
+ "test_acc": [
+ 0.2729,
+ 0.3136,
+ 0.3293,
+ 0.3473,
+ 0.3313,
+ 0.3519,
+ 0.3629,
+ 0.3562,
+ 0.3516,
+ 0.3661,
+ 0.3733,
+ 0.3723,
+ 0.3632,
+ 0.3667,
+ 0.3676,
+ 0.3685,
+ 0.3723,
+ 0.3608,
+ 0.3781,
+ 0.3773,
+ 0.3671,
+ 0.3711,
+ 0.3595,
+ 0.377,
+ 0.3849,
+ 0.3701,
+ 0.3727,
+ 0.3603,
+ 0.3725,
+ 0.382,
+ 0.3783,
+ 0.3762,
+ 0.3832,
+ 0.3806,
+ 0.387,
+ 0.3873,
+ 0.3862,
+ 0.3864,
+ 0.3908,
+ 0.3939,
+ 0.3997,
+ 0.396,
+ 0.393,
+ 0.3846,
+ 0.3913,
+ 0.3967,
+ 0.3972,
+ 0.4013,
+ 0.3994,
+ 0.4043,
+ 0.3988,
+ 0.4024,
+ 0.3951,
+ 0.3982,
+ 0.3985,
+ 0.4081,
+ 0.4033,
+ 0.4029,
+ 0.4082,
+ 0.3986,
+ 0.4028,
+ 0.405,
+ 0.4054,
+ 0.4051,
+ 0.408,
+ 0.4069,
+ 0.4066,
+ 0.4085,
+ 0.4073,
+ 0.4096,
+ 0.4103,
+ 0.4066,
+ 0.4131,
+ 0.4106,
+ 0.4108,
+ 0.41,
+ 0.4121,
+ 0.41,
+ 0.4102,
+ 0.4076,
+ 0.4129,
+ 0.4109,
+ 0.4094,
+ 0.4103,
+ 0.41,
+ 0.4094,
+ 0.4121,
+ 0.4119,
+ 0.4126,
+ 0.4125,
+ 0.4116,
+ 0.4109,
+ 0.4128,
+ 0.4123,
+ 0.4129,
+ 0.4113,
+ 0.411,
+ 0.4114,
+ 0.4114,
+ 0.4115
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.03677885979413986,
+ 0.03784121572971344,
+ 0.08952198922634125,
+ 0.04931114614009857,
+ -0.00528424559161067,
+ 0.03716714680194855,
+ -0.02366219088435173,
+ -0.042923711240291595,
+ -0.07109162956476212,
+ -0.028443587943911552,
+ 0.01791619509458542,
+ 0.9962450861930847
+ ],
+ "perturbation_rho": [
+ 0.0005302987992763519,
+ 0.012386747635900974,
+ 0.023949606344103813,
+ 0.006188662722706795,
+ 0.05080874264240265,
+ -0.01257331669330597,
+ -0.003029853105545044,
+ 0.003304736688733101,
+ 0.043240927159786224,
+ 0.0031258384697139263,
+ -0.018814031034708023,
+ -0.018657812848687172
+ ],
+ "nudging": {
+ "0.001": [
+ -3.0247028917074203e-06,
+ -1.51805579662323e-07,
+ -1.8300488591194153e-07,
+ -1.150183379650116e-07,
+ -8.847564458847046e-09,
+ -3.64379957318306e-08,
+ 9.778887033462524e-09,
+ 3.096647560596466e-08,
+ 5.3551048040390015e-08,
+ -1.3737007975578308e-08,
+ -5.820766091346741e-09,
+ -7.356284186244011e-07
+ ],
+ "0.003": [
+ -9.037903510034084e-06,
+ -4.105968400835991e-07,
+ -4.7474168241024017e-07,
+ -2.3958273231983185e-07,
+ -1.0593794286251068e-08,
+ -1.551816239953041e-07,
+ 3.8067810237407684e-08,
+ 1.2014061212539673e-07,
+ 1.5227124094963074e-07,
+ 1.1094380170106888e-07,
+ -8.183997124433517e-08,
+ -2.6408815756440163e-06
+ ],
+ "0.01": [
+ -3.033224493265152e-05,
+ -1.35018490254879e-06,
+ -1.6904668882489204e-06,
+ -6.724148988723755e-07,
+ 3.341119736433029e-08,
+ -5.158362910151482e-07,
+ 1.4738179743289948e-07,
+ 3.360910341143608e-07,
+ 6.420304998755455e-07,
+ 2.4598557502031326e-07,
+ -2.3667234927415848e-07,
+ -9.222421795129776e-06
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 6368.87646484375,
+ 60059.984375,
+ 180441.8125,
+ 274150.96875,
+ 517201.65625,
+ 857839.4375,
+ 862732.0,
+ 1219729.5,
+ 1498813.125,
+ 1794381.5,
+ 1887047.25,
+ 1900776.375,
+ 1523186.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 3.379595364094712e-05,
+ 2.1159430616535246e-06,
+ 6.931001053089858e-07,
+ 4.934564117320406e-07,
+ 4.4718211711369804e-07,
+ 4.4051026293345785e-07,
+ 4.3748320877057267e-07,
+ 4.3679304440047417e-07,
+ 4.368349380001746e-07,
+ 4.3717255948649836e-07,
+ 4.362396737178642e-07,
+ 4.269320186267578e-07,
+ 4.208905863833934e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 43.40305026092655,
+ "embed.bias": 12.884723360242011,
+ "blocks.0.ln.weight": 1.1396957317801488,
+ "blocks.0.w1.weight": 15.815325196455687,
+ "blocks.0.w1.bias": 9.997007586540093,
+ "blocks.0.w2.weight": 51.54342945808938,
+ "blocks.1.ln.weight": 0.9665440839605698,
+ "blocks.1.w1.weight": 17.964098193399778,
+ "blocks.1.w1.bias": 7.662859769714302,
+ "blocks.1.w2.weight": 45.06560123113467,
+ "blocks.2.ln.weight": 0.7981284775892137,
+ "blocks.2.w1.weight": 16.830439973227175,
+ "blocks.2.w1.bias": 8.949450279000775,
+ "blocks.2.w2.weight": 46.74550325782192,
+ "blocks.3.ln.weight": 0.7868855526754271,
+ "blocks.3.w1.weight": 17.044252690417927,
+ "blocks.3.w1.bias": 13.670590135892402,
+ "blocks.3.w2.weight": 38.40914010958083,
+ "blocks.4.ln.weight": 0.6395411371371652,
+ "blocks.4.w1.weight": 17.982212663546917,
+ "blocks.4.w1.bias": 18.982959249125763,
+ "blocks.4.w2.weight": 28.60564989778907,
+ "blocks.5.ln.weight": 0.5328988611331281,
+ "blocks.5.w1.weight": 14.634932988244662,
+ "blocks.5.w1.bias": 10.974172599914699,
+ "blocks.5.w2.weight": 39.20486954308308,
+ "blocks.6.ln.weight": 0.5485338249117044,
+ "blocks.6.w1.weight": 18.159621784839892,
+ "blocks.6.w1.bias": 18.891505344910815,
+ "blocks.6.w2.weight": 28.93284323285378,
+ "blocks.7.ln.weight": 0.5894477945101706,
+ "blocks.7.w1.weight": 18.009195934295505,
+ "blocks.7.w1.bias": 18.37430010215222,
+ "blocks.7.w2.weight": 30.406749000767654,
+ "blocks.8.ln.weight": 0.6068497649428072,
+ "blocks.8.w1.weight": 19.638841310683983,
+ "blocks.8.w1.bias": 20.60396737480574,
+ "blocks.8.w2.weight": 33.89217379886503,
+ "blocks.9.ln.weight": 0.678936766492041,
+ "blocks.9.w1.weight": 16.2394672260477,
+ "blocks.9.w1.bias": 14.562794517299237,
+ "blocks.9.w2.weight": 50.979251931044914,
+ "blocks.10.ln.weight": 0.6040394973297021,
+ "blocks.10.w1.weight": 14.292818466799657,
+ "blocks.10.w1.bias": 9.974035975062883,
+ "blocks.10.w2.weight": 60.410553219844616,
+ "blocks.11.ln.weight": 0.6493019680977011,
+ "blocks.11.w1.weight": 15.455381797619573,
+ "blocks.11.w1.bias": 12.883608145619089,
+ "blocks.11.w2.weight": 64.05421036694803,
+ "out_ln.weight": 0.3508285396209464,
+ "out_head.weight": 6.026469963854456,
+ "out_head.bias": 0.7776800281309532
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 12,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 9
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L12_seed9",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L6_seed0/results_cifar10.json b/results/fa_dfa_d512_L6_seed0/results_cifar10.json
new file mode 100644
index 0000000..821b0e0
--- /dev/null
+++ b/results/fa_dfa_d512_L6_seed0/results_cifar10.json
@@ -0,0 +1,837 @@
+{
+ "0": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0753555574798583,
+ 2.0437081289672854,
+ 2.031278556365967,
+ 2.027659920578003,
+ 2.0243590501403808,
+ 2.0202790953826906,
+ 2.0175082940673827,
+ 2.015957561340332,
+ 2.016036532974243,
+ 2.013858056335449,
+ 2.0126232041931154,
+ 2.006905050048828,
+ 2.0110664825439453,
+ 2.0065614931488036,
+ 2.007456555709839,
+ 2.0070649071502684,
+ 2.0075456425476075,
+ 2.004424341583252,
+ 2.005271594772339,
+ 2.0039944921875,
+ 2.0024667238998415,
+ 2.000112854309082,
+ 1.9993625988769532,
+ 2.002483249664307,
+ 2.0039204889678954,
+ 1.9985878760528564,
+ 2.0006166967010497,
+ 2.0003138301086425,
+ 1.9996023558807372,
+ 1.9986100010681151,
+ 1.99738998298645,
+ 1.9980814916229248,
+ 1.996693759994507,
+ 2.0003671773529055,
+ 1.9974340956878662,
+ 1.9980345281982421,
+ 1.9958612198638916,
+ 1.9961950925445557,
+ 1.9993210222625732,
+ 1.9953513426971436,
+ 1.9962950857925414,
+ 1.9969137393951415,
+ 1.9964947815704346,
+ 1.9980549166870116,
+ 1.9973483322525025,
+ 1.9953596134185791,
+ 1.995900859451294,
+ 1.9963403786849976,
+ 1.9947577867126465,
+ 1.996345650177002,
+ 1.9976896784210205,
+ 1.9959460092163086,
+ 1.9935208990478515,
+ 1.9943153414154053,
+ 1.9918208218765259,
+ 1.9932028875350951,
+ 1.9944638661956786,
+ 1.995019360809326,
+ 1.9929348825073243,
+ 1.9962419706726073,
+ 1.9949486227035522,
+ 1.991672697906494,
+ 1.9935600751495361,
+ 1.9919422644424438,
+ 1.994082801437378,
+ 1.9919944891738892,
+ 1.9940126430511476,
+ 1.9923382126617433,
+ 1.9926940571594238,
+ 1.9947953038024901,
+ 1.993876279335022,
+ 1.992678325843811,
+ 1.992137566757202,
+ 1.9900086630630494,
+ 1.9903694836044312,
+ 1.9928425230407716,
+ 1.9906801569366455,
+ 1.9912908507919311,
+ 1.9918341095352172,
+ 1.9906631398773194,
+ 1.9910551013183593,
+ 1.9912693448257446,
+ 1.9916500932312011,
+ 1.992011311416626,
+ 1.9930540170669555,
+ 1.9900453987121582,
+ 1.9914034769058226,
+ 1.9921153451156617,
+ 1.9884680337142944,
+ 1.9897785013198852,
+ 1.9896507069396974,
+ 1.9915314351654052,
+ 1.9887127932357789,
+ 1.9907834010314942,
+ 1.990130773010254,
+ 1.9915763851547241,
+ 1.9891119548797607,
+ 1.9894035186386108,
+ 1.9906070321273803,
+ 1.990175763015747
+ ],
+ "train_acc": [
+ 0.23202,
+ 0.24612,
+ 0.2534,
+ 0.25296,
+ 0.25586,
+ 0.25736,
+ 0.26078,
+ 0.25968,
+ 0.26376,
+ 0.26194,
+ 0.26464,
+ 0.26644,
+ 0.26328,
+ 0.26692,
+ 0.26742,
+ 0.26744,
+ 0.26808,
+ 0.2689,
+ 0.26662,
+ 0.26898,
+ 0.27116,
+ 0.27066,
+ 0.27046,
+ 0.2737,
+ 0.26932,
+ 0.27098,
+ 0.27496,
+ 0.2742,
+ 0.27396,
+ 0.27362,
+ 0.27356,
+ 0.27542,
+ 0.2752,
+ 0.27348,
+ 0.27492,
+ 0.2737,
+ 0.27614,
+ 0.27596,
+ 0.27528,
+ 0.27582,
+ 0.27624,
+ 0.27556,
+ 0.27432,
+ 0.27686,
+ 0.27726,
+ 0.278,
+ 0.27874,
+ 0.27706,
+ 0.27906,
+ 0.27922,
+ 0.27678,
+ 0.27996,
+ 0.27996,
+ 0.27956,
+ 0.27936,
+ 0.27786,
+ 0.27958,
+ 0.27888,
+ 0.2801,
+ 0.2786,
+ 0.28002,
+ 0.281,
+ 0.28102,
+ 0.27928,
+ 0.27852,
+ 0.2814,
+ 0.28026,
+ 0.28168,
+ 0.28254,
+ 0.27894,
+ 0.28102,
+ 0.28146,
+ 0.27998,
+ 0.28272,
+ 0.28324,
+ 0.28034,
+ 0.28126,
+ 0.28156,
+ 0.28228,
+ 0.28094,
+ 0.28084,
+ 0.28196,
+ 0.28344,
+ 0.28236,
+ 0.28084,
+ 0.28244,
+ 0.2821,
+ 0.28254,
+ 0.28398,
+ 0.2826,
+ 0.28278,
+ 0.28214,
+ 0.28376,
+ 0.28172,
+ 0.28234,
+ 0.28182,
+ 0.28146,
+ 0.28132,
+ 0.2826,
+ 0.28366
+ ],
+ "test_acc": [
+ 0.2527,
+ 0.2731,
+ 0.2652,
+ 0.2735,
+ 0.2758,
+ 0.2862,
+ 0.2692,
+ 0.2901,
+ 0.2963,
+ 0.2897,
+ 0.2793,
+ 0.2945,
+ 0.2734,
+ 0.3044,
+ 0.2915,
+ 0.2965,
+ 0.2975,
+ 0.2951,
+ 0.3018,
+ 0.2954,
+ 0.2868,
+ 0.2943,
+ 0.3068,
+ 0.2639,
+ 0.3007,
+ 0.303,
+ 0.3009,
+ 0.3059,
+ 0.3024,
+ 0.2876,
+ 0.2984,
+ 0.2926,
+ 0.3024,
+ 0.3034,
+ 0.2906,
+ 0.3054,
+ 0.2978,
+ 0.2872,
+ 0.3036,
+ 0.3052,
+ 0.2874,
+ 0.3045,
+ 0.3028,
+ 0.2949,
+ 0.2981,
+ 0.3047,
+ 0.3073,
+ 0.2937,
+ 0.3078,
+ 0.2894,
+ 0.3098,
+ 0.2913,
+ 0.2897,
+ 0.306,
+ 0.3075,
+ 0.302,
+ 0.3025,
+ 0.2982,
+ 0.2983,
+ 0.31,
+ 0.3071,
+ 0.3013,
+ 0.309,
+ 0.3042,
+ 0.3009,
+ 0.2958,
+ 0.3024,
+ 0.3013,
+ 0.3055,
+ 0.3016,
+ 0.3041,
+ 0.3077,
+ 0.3023,
+ 0.3111,
+ 0.308,
+ 0.3005,
+ 0.3051,
+ 0.3101,
+ 0.3103,
+ 0.3006,
+ 0.3025,
+ 0.3034,
+ 0.3043,
+ 0.3082,
+ 0.3011,
+ 0.3069,
+ 0.3051,
+ 0.3023,
+ 0.3033,
+ 0.3051,
+ 0.3041,
+ 0.3044,
+ 0.3029,
+ 0.3047,
+ 0.3047,
+ 0.3048,
+ 0.3056,
+ 0.3056,
+ 0.3053,
+ 0.3053
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.3933757543563843,
+ -0.00033686644746921957,
+ -0.00029227498453110456,
+ 0.00023268867516890168,
+ -0.00033252357388846576,
+ -0.0006308911251835525
+ ],
+ "perturbation_rho": [
+ -0.005786933470517397,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -4.791654646396637e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.2507662177085876e-06,
+ 0.0,
+ 0.0,
+ -3.725290298461914e-09,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -4.01865690946579e-06,
+ 0.0,
+ 1.862645149230957e-09,
+ -3.725290298461914e-09,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 52006.18359375,
+ 1299919104.0,
+ 4471346176.0,
+ 5541193216.0,
+ 6115124736.0,
+ 6220777984.0,
+ 7776075776.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.7690890647136257e-07,
+ 2.9755062302960766e-10,
+ 2.9703242643286387e-10,
+ 2.9693505987360425e-10,
+ 2.969274826014612e-10,
+ 2.96939195454371e-10,
+ 2.9694927072831945e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 330.18669911453446,
+ "embed.bias": 275.0701726252062,
+ "blocks.0.ln.weight": 9.87127425013801,
+ "blocks.0.w1.weight": 288.2752398777733,
+ "blocks.0.w1.bias": 276.1693244742186,
+ "blocks.0.w2.weight": 476.3482702241823,
+ "blocks.1.ln.weight": 9.208131003871356,
+ "blocks.1.w1.weight": 379.93945965650295,
+ "blocks.1.w1.bias": 370.165908360661,
+ "blocks.1.w2.weight": 391.3759720182112,
+ "blocks.2.ln.weight": 9.291829925999405,
+ "blocks.2.w1.weight": 372.47037498699524,
+ "blocks.2.w1.bias": 340.94957924841253,
+ "blocks.2.w2.weight": 343.62414266271355,
+ "blocks.3.ln.weight": 9.704848817987399,
+ "blocks.3.w1.weight": 334.76567440579134,
+ "blocks.3.w1.bias": 307.03384934243365,
+ "blocks.3.w2.weight": 310.86785550010995,
+ "blocks.4.ln.weight": 6.86934636896043,
+ "blocks.4.w1.weight": 263.30065941816594,
+ "blocks.4.w1.bias": 243.63618296008045,
+ "blocks.4.w2.weight": 243.60310044137827,
+ "blocks.5.ln.weight": 10.060319390509356,
+ "blocks.5.w1.weight": 402.62899842011797,
+ "blocks.5.w1.bias": 374.4871410963097,
+ "blocks.5.w2.weight": 379.02482222844293,
+ "out_ln.weight": 0.614063493216844,
+ "out_head.weight": 8.991195841592733,
+ "out_head.bias": 0.8181543795651063
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0268119436264036,
+ 1.9347526413726808,
+ 1.9036040418243407,
+ 1.8875383782958985,
+ 1.8758800409317016,
+ 1.8637015169525146,
+ 1.8550788135147094,
+ 1.8481883679580688,
+ 1.8380642176055908,
+ 1.831563844680786,
+ 1.82749318901062,
+ 1.814368692855835,
+ 1.817901651649475,
+ 1.8037349676895142,
+ 1.8005951739883423,
+ 1.7895659107208253,
+ 1.7900659426879884,
+ 1.781074461402893,
+ 1.7845501969146729,
+ 1.776343214149475,
+ 1.7684208422470094,
+ 1.7643969812774658,
+ 1.7617348025894164,
+ 1.7578030713653565,
+ 1.7565854375457763,
+ 1.7562964432525634,
+ 1.7528118307113647,
+ 1.7484837558364867,
+ 1.743814429550171,
+ 1.7366932116699219,
+ 1.7326225136566162,
+ 1.728937064590454,
+ 1.7266727477645873,
+ 1.7240253580474854,
+ 1.7245794469833373,
+ 1.7228408333969116,
+ 1.7203827739715576,
+ 1.711478896446228,
+ 1.7089925986099244,
+ 1.7088872208023071,
+ 1.7068994373321533,
+ 1.7094496100234986,
+ 1.7018463577270508,
+ 1.7043216220855713,
+ 1.6993152593231202,
+ 1.6993111734771729,
+ 1.6920886862182618,
+ 1.6940990381622314,
+ 1.6973865487670898,
+ 1.693602544631958,
+ 1.6929154531478883,
+ 1.6890407666397094,
+ 1.685359003982544,
+ 1.684633869857788,
+ 1.6784778961181641,
+ 1.6846174112701415,
+ 1.6849634936904907,
+ 1.6800030670928956,
+ 1.6778543420410157,
+ 1.6783534344482423,
+ 1.6748548553848266,
+ 1.675967246170044,
+ 1.674832339515686,
+ 1.6731126987075806,
+ 1.6737006020355225,
+ 1.670804006958008,
+ 1.6716182923126222,
+ 1.6689927773284912,
+ 1.6716524493026734,
+ 1.6704032614517212,
+ 1.6676370931625366,
+ 1.6686026779174805,
+ 1.6649452212142943,
+ 1.6682810161972046,
+ 1.6657524144744873,
+ 1.666575444908142,
+ 1.6638936541748046,
+ 1.6632415933609008,
+ 1.6630835901641845,
+ 1.6650469580078124,
+ 1.661507155380249,
+ 1.6635552446746826,
+ 1.6633718565368651,
+ 1.6654531225204467,
+ 1.660396284713745,
+ 1.6597816809463501,
+ 1.6641134811019898,
+ 1.6597988692855834,
+ 1.6597074938583374,
+ 1.6637430121612549,
+ 1.657964035987854,
+ 1.6597277429580688,
+ 1.65512205078125,
+ 1.66050142578125,
+ 1.6606662057495116,
+ 1.6582156159210204,
+ 1.657457571105957,
+ 1.6597890829086304,
+ 1.6602586605834961,
+ 1.6589343154525757
+ ],
+ "train_acc": [
+ 0.2612,
+ 0.30246,
+ 0.31784,
+ 0.3241,
+ 0.32576,
+ 0.33054,
+ 0.33502,
+ 0.33912,
+ 0.34148,
+ 0.34222,
+ 0.34698,
+ 0.34792,
+ 0.34712,
+ 0.35504,
+ 0.35516,
+ 0.35796,
+ 0.35952,
+ 0.36286,
+ 0.36188,
+ 0.36396,
+ 0.36778,
+ 0.36878,
+ 0.3676,
+ 0.36954,
+ 0.37154,
+ 0.36936,
+ 0.37584,
+ 0.3732,
+ 0.37446,
+ 0.37702,
+ 0.37832,
+ 0.38196,
+ 0.37868,
+ 0.3824,
+ 0.38276,
+ 0.38352,
+ 0.38208,
+ 0.3852,
+ 0.3863,
+ 0.38682,
+ 0.38838,
+ 0.38784,
+ 0.3886,
+ 0.38966,
+ 0.38966,
+ 0.39112,
+ 0.39534,
+ 0.39184,
+ 0.39354,
+ 0.39498,
+ 0.39364,
+ 0.39514,
+ 0.3952,
+ 0.39942,
+ 0.40118,
+ 0.39886,
+ 0.3962,
+ 0.39814,
+ 0.39946,
+ 0.40066,
+ 0.39902,
+ 0.40112,
+ 0.40356,
+ 0.40358,
+ 0.40162,
+ 0.40426,
+ 0.40428,
+ 0.4024,
+ 0.40284,
+ 0.40386,
+ 0.4073,
+ 0.4049,
+ 0.40354,
+ 0.40386,
+ 0.40588,
+ 0.4046,
+ 0.40704,
+ 0.40718,
+ 0.40888,
+ 0.405,
+ 0.41078,
+ 0.40724,
+ 0.40814,
+ 0.40858,
+ 0.40918,
+ 0.40666,
+ 0.40436,
+ 0.41216,
+ 0.40734,
+ 0.40822,
+ 0.40996,
+ 0.40958,
+ 0.411,
+ 0.40964,
+ 0.40704,
+ 0.40942,
+ 0.41122,
+ 0.40922,
+ 0.40942,
+ 0.40982
+ ],
+ "test_acc": [
+ 0.3205,
+ 0.3401,
+ 0.3549,
+ 0.3534,
+ 0.3599,
+ 0.3678,
+ 0.3633,
+ 0.3657,
+ 0.3754,
+ 0.3736,
+ 0.3895,
+ 0.3818,
+ 0.3742,
+ 0.3765,
+ 0.383,
+ 0.3823,
+ 0.3883,
+ 0.4012,
+ 0.3985,
+ 0.3927,
+ 0.3954,
+ 0.3928,
+ 0.3965,
+ 0.3899,
+ 0.4035,
+ 0.4001,
+ 0.4034,
+ 0.4104,
+ 0.4022,
+ 0.4106,
+ 0.3951,
+ 0.4038,
+ 0.4094,
+ 0.4181,
+ 0.4124,
+ 0.4094,
+ 0.4171,
+ 0.417,
+ 0.4163,
+ 0.418,
+ 0.4157,
+ 0.4251,
+ 0.4269,
+ 0.4201,
+ 0.4151,
+ 0.4222,
+ 0.4264,
+ 0.4171,
+ 0.4209,
+ 0.4171,
+ 0.4187,
+ 0.4239,
+ 0.4214,
+ 0.4154,
+ 0.4241,
+ 0.4166,
+ 0.424,
+ 0.4208,
+ 0.4265,
+ 0.427,
+ 0.4223,
+ 0.4248,
+ 0.4301,
+ 0.4312,
+ 0.4273,
+ 0.4313,
+ 0.4304,
+ 0.4296,
+ 0.426,
+ 0.4255,
+ 0.4306,
+ 0.4314,
+ 0.4315,
+ 0.4326,
+ 0.4331,
+ 0.432,
+ 0.432,
+ 0.4294,
+ 0.4335,
+ 0.433,
+ 0.437,
+ 0.4319,
+ 0.4304,
+ 0.4327,
+ 0.4321,
+ 0.4319,
+ 0.4333,
+ 0.4357,
+ 0.4315,
+ 0.4337,
+ 0.434,
+ 0.4346,
+ 0.4342,
+ 0.4334,
+ 0.4336,
+ 0.4328,
+ 0.434,
+ 0.4344,
+ 0.434,
+ 0.434
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.020984500646591187,
+ 0.06538723409175873,
+ -0.02862982451915741,
+ -0.14415404200553894,
+ -0.12824469804763794,
+ 0.9942382574081421
+ ],
+ "perturbation_rho": [
+ 0.04626474529504776,
+ 0.00916149839758873,
+ 0.02606651559472084,
+ -0.0706678107380867,
+ 0.0447549931704998,
+ -0.07931974530220032
+ ],
+ "nudging": {
+ "0.001": [
+ -2.3448956198990345e-06,
+ -6.940681487321854e-07,
+ 9.400537237524986e-08,
+ 4.384201020002365e-07,
+ 2.918532118201256e-07,
+ -2.525397576391697e-06
+ ],
+ "0.003": [
+ -7.02321995049715e-06,
+ -2.0685838535428047e-06,
+ 3.417953848838806e-07,
+ 1.5673576854169369e-06,
+ 1.2195087037980556e-06,
+ -9.568408131599426e-06
+ ],
+ "0.01": [
+ -2.358935307711363e-05,
+ -7.018155883997679e-06,
+ 9.208451956510544e-07,
+ 4.6023051254451275e-06,
+ 4.1344319470226765e-06,
+ -3.3148215152323246e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 3922.13720703125,
+ 49122.51171875,
+ 264047.5,
+ 465089.875,
+ 831614.3125,
+ 1037417.9375,
+ 282876.625
+ ],
+ "bp_grad_norms_per_layer": [
+ 3.9657443267060444e-05,
+ 4.083395651832689e-06,
+ 1.3594802794614225e-06,
+ 1.2777863958035596e-06,
+ 1.2765268593284418e-06,
+ 1.2795584325431264e-06,
+ 1.2760270919898176e-06
+ ]
+ },
+ "drift": {
+ "embed.weight": 31.47039846350604,
+ "embed.bias": 18.02217544364072,
+ "blocks.0.ln.weight": 0.9789876040215849,
+ "blocks.0.w1.weight": 13.428105491977634,
+ "blocks.0.w1.bias": 11.014509469722494,
+ "blocks.0.w2.weight": 44.375832792200086,
+ "blocks.1.ln.weight": 0.9256838094480255,
+ "blocks.1.w1.weight": 16.049693958454462,
+ "blocks.1.w1.bias": 12.671067703031834,
+ "blocks.1.w2.weight": 46.940642430560914,
+ "blocks.2.ln.weight": 0.768237594938939,
+ "blocks.2.w1.weight": 16.878210463024953,
+ "blocks.2.w1.bias": 14.246556953188842,
+ "blocks.2.w2.weight": 45.47089120587146,
+ "blocks.3.ln.weight": 0.6449156612044069,
+ "blocks.3.w1.weight": 17.300866761556396,
+ "blocks.3.w1.bias": 17.793175424183687,
+ "blocks.3.w2.weight": 34.146538375073234,
+ "blocks.4.ln.weight": 0.536074289047981,
+ "blocks.4.w1.weight": 15.870552255415648,
+ "blocks.4.w1.bias": 17.679587984495768,
+ "blocks.4.w2.weight": 27.222411899497516,
+ "blocks.5.ln.weight": 0.5977975068115631,
+ "blocks.5.w1.weight": 18.813068681696937,
+ "blocks.5.w1.bias": 22.173876530236612,
+ "blocks.5.w2.weight": 27.925157214653215,
+ "out_ln.weight": 0.29706709348499905,
+ "out_head.weight": 4.788666797102501,
+ "out_head.bias": 2.266760510860064
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 6,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 0
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L6_seed0",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L6_seed1/results_cifar10.json b/results/fa_dfa_d512_L6_seed1/results_cifar10.json
new file mode 100644
index 0000000..e0848f8
--- /dev/null
+++ b/results/fa_dfa_d512_L6_seed1/results_cifar10.json
@@ -0,0 +1,837 @@
+{
+ "1": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0748964222717285,
+ 2.0451132692718508,
+ 2.032143096008301,
+ 2.029643494186401,
+ 2.030650337371826,
+ 2.0243890724945066,
+ 2.020762248840332,
+ 2.0232248431396482,
+ 2.0159964645004274,
+ 2.0192399645996093,
+ 2.01540469039917,
+ 2.0154635834503174,
+ 2.017197366409302,
+ 2.0137894206237794,
+ 2.0133308319091796,
+ 2.0108140522003173,
+ 2.011235432815552,
+ 2.009914517288208,
+ 2.0094243881988527,
+ 2.0100407147979737,
+ 2.010185157546997,
+ 2.0089511253356935,
+ 2.0095393753051756,
+ 2.0059863204574584,
+ 2.0064435525512696,
+ 2.005170561828613,
+ 2.0074975017547607,
+ 2.004913715057373,
+ 2.0057138012313844,
+ 2.004565502243042,
+ 2.006841945838928,
+ 2.0055636949157716,
+ 2.0060591010284425,
+ 2.006927216835022,
+ 2.0074666262817384,
+ 2.004335132293701,
+ 2.005483384475708,
+ 2.006595082168579,
+ 2.003282996673584,
+ 2.0056812954330443,
+ 2.003032290878296,
+ 2.0057049587249756,
+ 2.004697043457031,
+ 2.006551830406189,
+ 2.0058861305236815,
+ 2.0044844329071045,
+ 2.0043990351867675,
+ 2.0037572646331787,
+ 2.0045921185302733,
+ 2.0048506913757325,
+ 2.003375824584961,
+ 2.004632271575928,
+ 2.0042632717895508,
+ 2.0026413610076905,
+ 2.001700767211914,
+ 2.003248519668579,
+ 2.003593999633789,
+ 2.003497390823364,
+ 2.003082277145386,
+ 2.001226206436157,
+ 2.0015557537841797,
+ 2.0028394889831542,
+ 2.0041756005859375,
+ 2.0014186894226076,
+ 2.0017097331237794,
+ 2.0027389540863036,
+ 2.00000232421875,
+ 2.001385536727905,
+ 2.0014734981536866,
+ 2.000507820701599,
+ 2.0018744316101076,
+ 2.00272723903656,
+ 2.0029756539535524,
+ 2.0013647467422486,
+ 2.0018114362335204,
+ 2.0025039096832273,
+ 2.0007914069366457,
+ 2.0015052102279665,
+ 2.0013790615844727,
+ 2.000559573059082,
+ 2.000183110809326,
+ 2.002135623321533,
+ 1.9992622534561157,
+ 2.0023144091033935,
+ 1.9984413458633423,
+ 2.0006315287780763,
+ 2.0000011035919187,
+ 1.9984883616638183,
+ 1.9980969067382812,
+ 1.999547445678711,
+ 1.999142066078186,
+ 2.0002035105895994,
+ 1.9980618365859986,
+ 1.9986838175201416,
+ 1.9983668603515625,
+ 1.9985755539321899,
+ 2.000544935379028,
+ 1.9968827368545532,
+ 1.9982192471313476,
+ 2.0020621044921874
+ ],
+ "train_acc": [
+ 0.23312,
+ 0.24122,
+ 0.2498,
+ 0.25198,
+ 0.25274,
+ 0.25466,
+ 0.25756,
+ 0.25584,
+ 0.26256,
+ 0.25706,
+ 0.26104,
+ 0.26366,
+ 0.26034,
+ 0.2643,
+ 0.2637,
+ 0.26512,
+ 0.26362,
+ 0.26534,
+ 0.2631,
+ 0.26636,
+ 0.26484,
+ 0.26708,
+ 0.2655,
+ 0.26972,
+ 0.26856,
+ 0.27134,
+ 0.2676,
+ 0.26984,
+ 0.26974,
+ 0.27138,
+ 0.2693,
+ 0.27122,
+ 0.27032,
+ 0.27198,
+ 0.27264,
+ 0.27422,
+ 0.26884,
+ 0.27136,
+ 0.27186,
+ 0.27214,
+ 0.27222,
+ 0.27306,
+ 0.27124,
+ 0.26954,
+ 0.27338,
+ 0.27444,
+ 0.27396,
+ 0.27308,
+ 0.27134,
+ 0.27374,
+ 0.27228,
+ 0.27202,
+ 0.2734,
+ 0.27426,
+ 0.27536,
+ 0.2763,
+ 0.27336,
+ 0.2738,
+ 0.2753,
+ 0.27466,
+ 0.2761,
+ 0.27344,
+ 0.27534,
+ 0.2757,
+ 0.27462,
+ 0.27636,
+ 0.27752,
+ 0.27438,
+ 0.27618,
+ 0.27512,
+ 0.27614,
+ 0.27672,
+ 0.2751,
+ 0.2764,
+ 0.27506,
+ 0.27662,
+ 0.2763,
+ 0.27732,
+ 0.27492,
+ 0.27632,
+ 0.27788,
+ 0.27568,
+ 0.27686,
+ 0.27594,
+ 0.27624,
+ 0.27706,
+ 0.27618,
+ 0.27852,
+ 0.27466,
+ 0.27688,
+ 0.27624,
+ 0.2765,
+ 0.27688,
+ 0.27766,
+ 0.27692,
+ 0.27586,
+ 0.27718,
+ 0.27828,
+ 0.27846,
+ 0.27564
+ ],
+ "test_acc": [
+ 0.2476,
+ 0.2684,
+ 0.2584,
+ 0.2859,
+ 0.2672,
+ 0.2792,
+ 0.2735,
+ 0.2954,
+ 0.2679,
+ 0.29,
+ 0.2763,
+ 0.2726,
+ 0.2807,
+ 0.2893,
+ 0.296,
+ 0.2701,
+ 0.2694,
+ 0.2782,
+ 0.2885,
+ 0.2811,
+ 0.2919,
+ 0.2734,
+ 0.2917,
+ 0.2961,
+ 0.2922,
+ 0.3011,
+ 0.2957,
+ 0.2783,
+ 0.3042,
+ 0.2892,
+ 0.2859,
+ 0.2808,
+ 0.2889,
+ 0.2977,
+ 0.2867,
+ 0.2998,
+ 0.2953,
+ 0.2908,
+ 0.2808,
+ 0.287,
+ 0.2953,
+ 0.303,
+ 0.287,
+ 0.3021,
+ 0.2834,
+ 0.3003,
+ 0.2861,
+ 0.2915,
+ 0.2953,
+ 0.3065,
+ 0.2839,
+ 0.2883,
+ 0.298,
+ 0.2928,
+ 0.2955,
+ 0.2906,
+ 0.3041,
+ 0.2956,
+ 0.2883,
+ 0.2887,
+ 0.2903,
+ 0.2939,
+ 0.294,
+ 0.2878,
+ 0.2835,
+ 0.2948,
+ 0.2838,
+ 0.2948,
+ 0.296,
+ 0.2941,
+ 0.294,
+ 0.3002,
+ 0.2949,
+ 0.2918,
+ 0.2977,
+ 0.2982,
+ 0.2944,
+ 0.2961,
+ 0.2936,
+ 0.2989,
+ 0.2956,
+ 0.2995,
+ 0.2953,
+ 0.2967,
+ 0.3003,
+ 0.2945,
+ 0.2989,
+ 0.2961,
+ 0.2974,
+ 0.3003,
+ 0.2941,
+ 0.2969,
+ 0.2982,
+ 0.2977,
+ 0.2981,
+ 0.2977,
+ 0.2979,
+ 0.2972,
+ 0.297,
+ 0.2969
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.4029613733291626,
+ 7.883789658080786e-05,
+ -0.0002453301858622581,
+ -0.0004192243213765323,
+ 0.000372876413166523,
+ -0.000354595307726413
+ ],
+ "perturbation_rho": [
+ 0.011547078378498554,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -3.4691765904426575e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.1422671377658844e-06,
+ 0.0,
+ -2.7939677238464355e-09,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -3.50363552570343e-06,
+ 1.862645149230957e-09,
+ -3.725290298461914e-09,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 56897.21875,
+ 1696502784.0,
+ 3738960896.0,
+ 4607917568.0,
+ 5758332928.0,
+ 6969167872.0,
+ 7904643072.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.473913411904505e-07,
+ 2.2789505005338384e-10,
+ 2.2798352095065866e-10,
+ 2.2797391752149565e-10,
+ 2.2786038333943992e-10,
+ 2.2787945141988786e-10,
+ 2.2788908260462648e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 341.6512135112792,
+ "embed.bias": 266.6380085288841,
+ "blocks.0.ln.weight": 9.881568196594484,
+ "blocks.0.w1.weight": 316.78039360087587,
+ "blocks.0.w1.bias": 286.8114997527113,
+ "blocks.0.w2.weight": 487.5637400185808,
+ "blocks.1.ln.weight": 9.2020409527447,
+ "blocks.1.w1.weight": 361.84956095939293,
+ "blocks.1.w1.bias": 334.52465546675654,
+ "blocks.1.w2.weight": 335.23042288085577,
+ "blocks.2.ln.weight": 8.38680422611515,
+ "blocks.2.w1.weight": 335.6889109899642,
+ "blocks.2.w1.bias": 310.7077386931781,
+ "blocks.2.w2.weight": 315.3949169119556,
+ "blocks.3.ln.weight": 8.903503018515195,
+ "blocks.3.w1.weight": 371.4244307750354,
+ "blocks.3.w1.bias": 345.3889053690205,
+ "blocks.3.w2.weight": 352.95729115252703,
+ "blocks.4.ln.weight": 9.577081150302154,
+ "blocks.4.w1.weight": 387.7879530619586,
+ "blocks.4.w1.bias": 358.359826994576,
+ "blocks.4.w2.weight": 353.9896695590463,
+ "blocks.5.ln.weight": 9.043917438368451,
+ "blocks.5.w1.weight": 368.8104037409108,
+ "blocks.5.w1.bias": 337.4941181161536,
+ "blocks.5.w2.weight": 342.83001538637893,
+ "out_ln.weight": 0.6049025805082792,
+ "out_head.weight": 8.662451359844601,
+ "out_head.bias": 1.2452406037396404
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0533182458114623,
+ 1.9470771118927002,
+ 1.9098005801391602,
+ 1.8933470011138915,
+ 1.8866070096206664,
+ 1.8806784909820558,
+ 1.8700345465469361,
+ 1.8668433471298218,
+ 1.8549686681747437,
+ 1.8534734383773803,
+ 1.844026010055542,
+ 1.8412904165649413,
+ 1.8354822329711915,
+ 1.8315302365493775,
+ 1.8307162652206421,
+ 1.8245070376968384,
+ 1.820267495956421,
+ 1.8159903725814819,
+ 1.8120850400543214,
+ 1.8062479650115968,
+ 1.8023771206665038,
+ 1.7989153610992432,
+ 1.7973320705795288,
+ 1.789391294517517,
+ 1.7857570403289795,
+ 1.7807958832550048,
+ 1.7800961505126953,
+ 1.7727591732025147,
+ 1.7719479108047484,
+ 1.7722896636199952,
+ 1.765616308631897,
+ 1.765071000442505,
+ 1.7588437004852295,
+ 1.7560785748672485,
+ 1.7530314648056031,
+ 1.7516267270278931,
+ 1.7478641785049438,
+ 1.747710930519104,
+ 1.7394591223526001,
+ 1.7430037671279908,
+ 1.7430737688827516,
+ 1.743327683944702,
+ 1.735208967590332,
+ 1.7367942264175416,
+ 1.735928176651001,
+ 1.7336044045639039,
+ 1.7319979761123658,
+ 1.7286449816131593,
+ 1.7276164590072631,
+ 1.7342356995010375,
+ 1.7273106130599976,
+ 1.728026152114868,
+ 1.7241678939437866,
+ 1.7218856454467772,
+ 1.720424718055725,
+ 1.7259459881973267,
+ 1.723678625831604,
+ 1.7201445708465577,
+ 1.7179317428588867,
+ 1.714501748046875,
+ 1.7184516412353517,
+ 1.7182959340667725,
+ 1.7173260328674316,
+ 1.7162155276870728,
+ 1.7148893152618407,
+ 1.7136942127990722,
+ 1.7128957564926148,
+ 1.7095485464859008,
+ 1.711679192199707,
+ 1.7081169268417358,
+ 1.7124231735992432,
+ 1.7100938080215453,
+ 1.7099676737213134,
+ 1.7056838412094115,
+ 1.7102637582397462,
+ 1.7076803800201417,
+ 1.703552028427124,
+ 1.7047434192657471,
+ 1.7060186026382447,
+ 1.7038543602752685,
+ 1.7023552209091186,
+ 1.7033933319854737,
+ 1.7048375073623656,
+ 1.7029183816146851,
+ 1.7019960340118407,
+ 1.7012598460006714,
+ 1.700243671836853,
+ 1.6951217443847657,
+ 1.7031349285125732,
+ 1.6982279485702514,
+ 1.7003905519866944,
+ 1.7008577185821534,
+ 1.69846902469635,
+ 1.7006194688796996,
+ 1.6990861594009399,
+ 1.7007253762817383,
+ 1.702599566040039,
+ 1.7018861039352418,
+ 1.699664118347168,
+ 1.7012134171295166
+ ],
+ "train_acc": [
+ 0.24282,
+ 0.28774,
+ 0.30648,
+ 0.31656,
+ 0.32152,
+ 0.3235,
+ 0.33028,
+ 0.32876,
+ 0.33768,
+ 0.33628,
+ 0.33892,
+ 0.341,
+ 0.3464,
+ 0.34646,
+ 0.3456,
+ 0.35168,
+ 0.34876,
+ 0.35232,
+ 0.35214,
+ 0.3571,
+ 0.3564,
+ 0.35782,
+ 0.35768,
+ 0.36332,
+ 0.3616,
+ 0.36234,
+ 0.3655,
+ 0.36546,
+ 0.36836,
+ 0.36622,
+ 0.36982,
+ 0.36954,
+ 0.3712,
+ 0.37104,
+ 0.37318,
+ 0.37486,
+ 0.37374,
+ 0.37526,
+ 0.37592,
+ 0.37592,
+ 0.37662,
+ 0.37494,
+ 0.37758,
+ 0.37708,
+ 0.37796,
+ 0.38076,
+ 0.37962,
+ 0.38058,
+ 0.3806,
+ 0.37936,
+ 0.38016,
+ 0.38172,
+ 0.38376,
+ 0.384,
+ 0.38752,
+ 0.3876,
+ 0.38602,
+ 0.38746,
+ 0.38644,
+ 0.386,
+ 0.38718,
+ 0.38552,
+ 0.38468,
+ 0.3868,
+ 0.38882,
+ 0.38934,
+ 0.3874,
+ 0.38992,
+ 0.3904,
+ 0.3908,
+ 0.38916,
+ 0.38954,
+ 0.39022,
+ 0.39024,
+ 0.39018,
+ 0.39104,
+ 0.39206,
+ 0.39178,
+ 0.39074,
+ 0.3921,
+ 0.39292,
+ 0.39326,
+ 0.39202,
+ 0.3935,
+ 0.39312,
+ 0.3942,
+ 0.39594,
+ 0.39766,
+ 0.39178,
+ 0.39502,
+ 0.39626,
+ 0.39468,
+ 0.39338,
+ 0.39468,
+ 0.39366,
+ 0.39276,
+ 0.39356,
+ 0.39354,
+ 0.39138,
+ 0.39408
+ ],
+ "test_acc": [
+ 0.2926,
+ 0.3237,
+ 0.3296,
+ 0.3577,
+ 0.3485,
+ 0.3454,
+ 0.3678,
+ 0.3648,
+ 0.3607,
+ 0.3756,
+ 0.3775,
+ 0.3817,
+ 0.3786,
+ 0.3781,
+ 0.3803,
+ 0.3682,
+ 0.369,
+ 0.371,
+ 0.3834,
+ 0.3875,
+ 0.3822,
+ 0.3805,
+ 0.392,
+ 0.401,
+ 0.3844,
+ 0.4015,
+ 0.3866,
+ 0.3901,
+ 0.3987,
+ 0.3939,
+ 0.3837,
+ 0.3972,
+ 0.3877,
+ 0.3994,
+ 0.4002,
+ 0.4033,
+ 0.4085,
+ 0.4058,
+ 0.391,
+ 0.399,
+ 0.3986,
+ 0.4086,
+ 0.404,
+ 0.4086,
+ 0.4062,
+ 0.4047,
+ 0.3991,
+ 0.4037,
+ 0.3968,
+ 0.4118,
+ 0.4017,
+ 0.4095,
+ 0.4117,
+ 0.4037,
+ 0.4073,
+ 0.4087,
+ 0.4137,
+ 0.4088,
+ 0.3997,
+ 0.4059,
+ 0.409,
+ 0.4099,
+ 0.4096,
+ 0.412,
+ 0.4102,
+ 0.4137,
+ 0.4102,
+ 0.4121,
+ 0.4119,
+ 0.4132,
+ 0.4149,
+ 0.412,
+ 0.415,
+ 0.4148,
+ 0.4154,
+ 0.416,
+ 0.4112,
+ 0.4109,
+ 0.4153,
+ 0.4148,
+ 0.4183,
+ 0.4172,
+ 0.4152,
+ 0.4157,
+ 0.418,
+ 0.4115,
+ 0.4187,
+ 0.4127,
+ 0.4108,
+ 0.4153,
+ 0.4141,
+ 0.4132,
+ 0.415,
+ 0.4124,
+ 0.4151,
+ 0.4147,
+ 0.4136,
+ 0.4142,
+ 0.4147,
+ 0.4144
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.030343683436512947,
+ 0.09938552975654602,
+ -0.07437123358249664,
+ -0.07549507170915604,
+ -0.09523595124483109,
+ 0.9972963929176331
+ ],
+ "perturbation_rho": [
+ 0.04541456699371338,
+ 0.008701281622052193,
+ -0.00402827700600028,
+ 0.03399529308080673,
+ -0.00989921111613512,
+ 0.004614755045622587
+ ],
+ "nudging": {
+ "0.001": [
+ -2.914457581937313e-06,
+ -5.584442988038063e-07,
+ 1.0908115655183792e-07,
+ 4.6566128730773926e-08,
+ 4.377216100692749e-08,
+ -1.0116491466760635e-06
+ ],
+ "0.003": [
+ -8.360599167644978e-06,
+ -2.0274892449378967e-06,
+ 2.825399860739708e-07,
+ 3.023305907845497e-07,
+ 2.789311110973358e-07,
+ -4.071509465575218e-06
+ ],
+ "0.01": [
+ -2.804200630635023e-05,
+ -6.612506695091724e-06,
+ 9.683426469564438e-07,
+ 1.1706724762916565e-06,
+ 1.3328390195965767e-06,
+ -1.4974735677242279e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 5724.40771484375,
+ 72733.609375,
+ 478685.65625,
+ 1483182.625,
+ 1809620.875,
+ 2091372.375,
+ 857882.6875
+ ],
+ "bp_grad_norms_per_layer": [
+ 3.0453806175501086e-05,
+ 2.1086596007080516e-06,
+ 6.627448101426126e-07,
+ 6.460473969127634e-07,
+ 6.491723638646363e-07,
+ 6.540217896144895e-07,
+ 6.466638637903088e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 42.22188941497859,
+ "embed.bias": 17.68183956971274,
+ "blocks.0.ln.weight": 1.180802481067003,
+ "blocks.0.w1.weight": 15.4315958124657,
+ "blocks.0.w1.bias": 12.305080689576908,
+ "blocks.0.w2.weight": 57.44824499129088,
+ "blocks.1.ln.weight": 0.9692930700548912,
+ "blocks.1.w1.weight": 19.880441088833255,
+ "blocks.1.w1.bias": 16.257331118504066,
+ "blocks.1.w2.weight": 48.06735579536956,
+ "blocks.2.ln.weight": 0.7719428930423391,
+ "blocks.2.w1.weight": 23.427567219106724,
+ "blocks.2.w1.bias": 24.716070592776813,
+ "blocks.2.w2.weight": 33.2055102304801,
+ "blocks.3.ln.weight": 0.6610316048264212,
+ "blocks.3.w1.weight": 21.14916966984416,
+ "blocks.3.w1.bias": 22.188834381211755,
+ "blocks.3.w2.weight": 37.41900122482242,
+ "blocks.4.ln.weight": 0.6045383052903652,
+ "blocks.4.w1.weight": 21.6093498763088,
+ "blocks.4.w1.bias": 23.188519739977856,
+ "blocks.4.w2.weight": 29.43277495491079,
+ "blocks.5.ln.weight": 0.7110348796366065,
+ "blocks.5.w1.weight": 22.91348409054967,
+ "blocks.5.w1.bias": 24.128500394824375,
+ "blocks.5.w2.weight": 42.82735324390181,
+ "out_ln.weight": 0.2879508142897712,
+ "out_head.weight": 5.9584913076844686,
+ "out_head.bias": 1.1797514883024003
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 6,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 1
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L6_seed1",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L6_seed2/results_cifar10.json b/results/fa_dfa_d512_L6_seed2/results_cifar10.json
new file mode 100644
index 0000000..0d6122d
--- /dev/null
+++ b/results/fa_dfa_d512_L6_seed2/results_cifar10.json
@@ -0,0 +1,837 @@
+{
+ "2": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0704764087677003,
+ 2.049675481185913,
+ 2.0393172933197024,
+ 2.0385254068756105,
+ 2.0333104403305056,
+ 2.037317512435913,
+ 2.035111841583252,
+ 2.033784921951294,
+ 2.0276396758270265,
+ 2.0287634895324707,
+ 2.0255478504943847,
+ 2.0249928087997437,
+ 2.0251559844207763,
+ 2.026155082015991,
+ 2.0227601895141603,
+ 2.0238794365692137,
+ 2.0220592249298095,
+ 2.0211007302474977,
+ 2.0220025036621094,
+ 2.0187504733657837,
+ 2.022589133682251,
+ 2.0209386083221434,
+ 2.022369726409912,
+ 2.019175484046936,
+ 2.0184574550628662,
+ 2.020228290786743,
+ 2.0170737294769285,
+ 2.0200500258636476,
+ 2.0192940770721437,
+ 2.017934571380615,
+ 2.018111932220459,
+ 2.019744345703125,
+ 2.0181864041137696,
+ 2.018692120895386,
+ 2.019279464416504,
+ 2.020759454421997,
+ 2.0175593240356444,
+ 2.0189450312805177,
+ 2.018363641014099,
+ 2.0204193601989746,
+ 2.018774921836853,
+ 2.020020353240967,
+ 2.020256604042053,
+ 2.018686157913208,
+ 2.017777068939209,
+ 2.01642233127594,
+ 2.0193701077270507,
+ 2.0175296588897704,
+ 2.0180459953308105,
+ 2.0163426136016844,
+ 2.0169748413085937,
+ 2.017522889404297,
+ 2.0171759409332277,
+ 2.0182337244415285,
+ 2.0202614056396486,
+ 2.0193526766967773,
+ 2.0176297589874266,
+ 2.0176475778961183,
+ 2.017951414642334,
+ 2.0159765984725952,
+ 2.0174861521911622,
+ 2.0162583628082276,
+ 2.0189596444702147,
+ 2.017551812057495,
+ 2.0174899821472168,
+ 2.017485563354492,
+ 2.0167667126464845,
+ 2.017466169166565,
+ 2.017418462524414,
+ 2.0175614359283447,
+ 2.016368521194458,
+ 2.017146753158569,
+ 2.016855639877319,
+ 2.016222254562378,
+ 2.016020330963135,
+ 2.0157443789672853,
+ 2.014490191497803,
+ 2.0179072814559937,
+ 2.0176085803985595,
+ 2.016617900123596,
+ 2.016193874168396,
+ 2.0163377814483643,
+ 2.017562115097046,
+ 2.017051735992432,
+ 2.015485999984741,
+ 2.0135854721450808,
+ 2.0146356997299195,
+ 2.0149522621154787,
+ 2.0146646865844726,
+ 2.0153391693878175,
+ 2.0160440145874023,
+ 2.0161946767425536,
+ 2.014449896469116,
+ 2.015591969642639,
+ 2.01485006980896,
+ 2.01346685836792,
+ 2.0139751796722414,
+ 2.0144011250305174,
+ 2.014900064163208,
+ 2.016459233779907
+ ],
+ "train_acc": [
+ 0.23802,
+ 0.24392,
+ 0.24906,
+ 0.25122,
+ 0.25396,
+ 0.25104,
+ 0.2552,
+ 0.25244,
+ 0.2577,
+ 0.25632,
+ 0.25834,
+ 0.2557,
+ 0.25944,
+ 0.25876,
+ 0.26044,
+ 0.25964,
+ 0.2593,
+ 0.26036,
+ 0.26106,
+ 0.26136,
+ 0.26018,
+ 0.25898,
+ 0.25858,
+ 0.2613,
+ 0.26334,
+ 0.26364,
+ 0.26186,
+ 0.26224,
+ 0.26166,
+ 0.26282,
+ 0.26364,
+ 0.26358,
+ 0.2645,
+ 0.26294,
+ 0.26592,
+ 0.26282,
+ 0.26398,
+ 0.26414,
+ 0.26538,
+ 0.26506,
+ 0.26578,
+ 0.26148,
+ 0.26312,
+ 0.26586,
+ 0.26478,
+ 0.26574,
+ 0.26416,
+ 0.26488,
+ 0.26366,
+ 0.26614,
+ 0.2656,
+ 0.26508,
+ 0.2675,
+ 0.26694,
+ 0.26532,
+ 0.26614,
+ 0.26596,
+ 0.26538,
+ 0.2652,
+ 0.26606,
+ 0.26698,
+ 0.26806,
+ 0.26674,
+ 0.26714,
+ 0.26704,
+ 0.26768,
+ 0.26714,
+ 0.2651,
+ 0.26544,
+ 0.26656,
+ 0.2676,
+ 0.26678,
+ 0.26898,
+ 0.26788,
+ 0.26736,
+ 0.2677,
+ 0.26732,
+ 0.26766,
+ 0.26812,
+ 0.2675,
+ 0.26916,
+ 0.26876,
+ 0.26754,
+ 0.26748,
+ 0.26802,
+ 0.27004,
+ 0.27052,
+ 0.2671,
+ 0.27224,
+ 0.26654,
+ 0.26818,
+ 0.26902,
+ 0.2691,
+ 0.26962,
+ 0.2702,
+ 0.26948,
+ 0.2712,
+ 0.27026,
+ 0.26876,
+ 0.26952
+ ],
+ "test_acc": [
+ 0.2537,
+ 0.2494,
+ 0.2765,
+ 0.2473,
+ 0.2851,
+ 0.2759,
+ 0.2702,
+ 0.2833,
+ 0.2841,
+ 0.2663,
+ 0.2858,
+ 0.2432,
+ 0.2894,
+ 0.2933,
+ 0.2938,
+ 0.2901,
+ 0.2777,
+ 0.2769,
+ 0.2907,
+ 0.2833,
+ 0.2973,
+ 0.2692,
+ 0.277,
+ 0.2841,
+ 0.2917,
+ 0.2694,
+ 0.2929,
+ 0.2737,
+ 0.2955,
+ 0.2877,
+ 0.2891,
+ 0.302,
+ 0.272,
+ 0.2931,
+ 0.2882,
+ 0.2806,
+ 0.2849,
+ 0.2989,
+ 0.2988,
+ 0.3036,
+ 0.2869,
+ 0.2951,
+ 0.3014,
+ 0.289,
+ 0.2834,
+ 0.2896,
+ 0.2887,
+ 0.2932,
+ 0.2855,
+ 0.2882,
+ 0.2846,
+ 0.3018,
+ 0.2981,
+ 0.3019,
+ 0.2846,
+ 0.2968,
+ 0.2915,
+ 0.2935,
+ 0.2912,
+ 0.2978,
+ 0.2989,
+ 0.2953,
+ 0.2999,
+ 0.292,
+ 0.2923,
+ 0.2856,
+ 0.2956,
+ 0.2993,
+ 0.2906,
+ 0.2931,
+ 0.2925,
+ 0.2948,
+ 0.2968,
+ 0.295,
+ 0.2965,
+ 0.3019,
+ 0.2889,
+ 0.2941,
+ 0.2958,
+ 0.2959,
+ 0.2955,
+ 0.2926,
+ 0.2921,
+ 0.2964,
+ 0.2981,
+ 0.2912,
+ 0.2982,
+ 0.2995,
+ 0.2958,
+ 0.2963,
+ 0.2965,
+ 0.2963,
+ 0.2947,
+ 0.2957,
+ 0.2953,
+ 0.2947,
+ 0.2953,
+ 0.2951,
+ 0.2949,
+ 0.295
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.3428037762641907,
+ -3.763916902244091e-05,
+ -0.0007522967061959207,
+ -0.00019637049990706146,
+ -0.0005234384443610907,
+ 0.00033836261718533933
+ ],
+ "perturbation_rho": [
+ -0.03871288150548935,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -3.3015385270118713e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -8.167698979377747e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 1.862645149230957e-09,
+ 0.0
+ ],
+ "0.01": [
+ -2.7455389499664307e-06,
+ 0.0,
+ 0.0,
+ 1.862645149230957e-09,
+ 1.862645149230957e-09,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 53848.7734375,
+ 2442806016.0,
+ 5245538816.0,
+ 6828209664.0,
+ 7018600960.0,
+ 9658986496.0,
+ 9768150016.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.0601439132406085e-07,
+ 2.350288574870518e-10,
+ 2.3180254937749112e-10,
+ 2.322748937633179e-10,
+ 2.3222145040247e-10,
+ 2.3218917066802902e-10,
+ 2.321651759729093e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 347.726745764923,
+ "embed.bias": 320.686611820448,
+ "blocks.0.ln.weight": 9.997068770929598,
+ "blocks.0.w1.weight": 332.0183119830964,
+ "blocks.0.w1.bias": 361.6276454599223,
+ "blocks.0.w2.weight": 491.26444464340926,
+ "blocks.1.ln.weight": 9.760206175642947,
+ "blocks.1.w1.weight": 399.42793179083463,
+ "blocks.1.w1.bias": 382.1218468905457,
+ "blocks.1.w2.weight": 398.4896403314863,
+ "blocks.2.ln.weight": 9.863790992763134,
+ "blocks.2.w1.weight": 403.4218387582533,
+ "blocks.2.w1.bias": 369.67326277166103,
+ "blocks.2.w2.weight": 388.1030551430935,
+ "blocks.3.ln.weight": 7.7321395772148405,
+ "blocks.3.w1.weight": 284.159080804305,
+ "blocks.3.w1.bias": 261.08860693920894,
+ "blocks.3.w2.weight": 276.021700238577,
+ "blocks.4.ln.weight": 10.766653379169998,
+ "blocks.4.w1.weight": 441.4219333141434,
+ "blocks.4.w1.bias": 408.4716885612093,
+ "blocks.4.w2.weight": 432.48056855561003,
+ "blocks.5.ln.weight": 7.252784916283292,
+ "blocks.5.w1.weight": 280.38847615534195,
+ "blocks.5.w1.bias": 255.99327290804064,
+ "blocks.5.w2.weight": 255.63025547094998,
+ "out_ln.weight": 0.644730004333539,
+ "out_head.weight": 9.313913439210426,
+ "out_head.bias": 0.978185244031994
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0382984645843507,
+ 1.9496157778167724,
+ 1.9119072325897217,
+ 1.8929361431121827,
+ 1.8775380432128905,
+ 1.8741492847442627,
+ 1.8671779189300537,
+ 1.860015816307068,
+ 1.8485039702987671,
+ 1.844084996986389,
+ 1.8363114721679688,
+ 1.8330332998275758,
+ 1.8273526552581787,
+ 1.825867492904663,
+ 1.821722242088318,
+ 1.8222067623519898,
+ 1.8212668908691407,
+ 1.8164310580825807,
+ 1.8167414797592163,
+ 1.8158153560638428,
+ 1.8170397219848633,
+ 1.8139575901031495,
+ 1.814438596458435,
+ 1.807250874671936,
+ 1.806480390663147,
+ 1.805527213973999,
+ 1.7998141473770142,
+ 1.8049808203125,
+ 1.8055741637420655,
+ 1.796914314918518,
+ 1.8009305011749268,
+ 1.797931548538208,
+ 1.7981645835113524,
+ 1.793643822402954,
+ 1.7937120150375365,
+ 1.7967921892929077,
+ 1.7892387997436523,
+ 1.785188671875,
+ 1.786605785293579,
+ 1.787917324485779,
+ 1.7853582260894776,
+ 1.7880721422576904,
+ 1.7864411516571046,
+ 1.7777207873916625,
+ 1.7769269942855834,
+ 1.777174091796875,
+ 1.7795673489761352,
+ 1.773414126586914,
+ 1.7774351581192016,
+ 1.7697969200897217,
+ 1.771698450050354,
+ 1.768428716278076,
+ 1.7676336987304688,
+ 1.765194714126587,
+ 1.7675122838974,
+ 1.7682551845932006,
+ 1.7667323706436158,
+ 1.7661600426864623,
+ 1.7620434980010986,
+ 1.7610514661026,
+ 1.7625030724716186,
+ 1.7606126858520508,
+ 1.760137271118164,
+ 1.7585180844879151,
+ 1.7585363976287842,
+ 1.7592174551010131,
+ 1.755930234375,
+ 1.7548912310791016,
+ 1.7563516518783568,
+ 1.7501784167099,
+ 1.7541064535522461,
+ 1.7512517785263062,
+ 1.7541993662261963,
+ 1.7516761654663087,
+ 1.7516398949813843,
+ 1.749719626121521,
+ 1.7497738579559325,
+ 1.752181930885315,
+ 1.7519074490356445,
+ 1.7518238845443725,
+ 1.7466175811004638,
+ 1.748845227355957,
+ 1.746575064430237,
+ 1.7496823914337158,
+ 1.7468877982330322,
+ 1.7439219277191162,
+ 1.7428745371246337,
+ 1.746016036453247,
+ 1.742904917678833,
+ 1.7454003299713134,
+ 1.7449493420028686,
+ 1.744745844039917,
+ 1.7438948041152955,
+ 1.7453510579681397,
+ 1.7438724200439453,
+ 1.7425698751068115,
+ 1.742772998123169,
+ 1.7421956006622314,
+ 1.7445682699966432,
+ 1.7442712462615966
+ ],
+ "train_acc": [
+ 0.25024,
+ 0.28724,
+ 0.30528,
+ 0.31264,
+ 0.32216,
+ 0.32208,
+ 0.32792,
+ 0.33064,
+ 0.3365,
+ 0.3396,
+ 0.34,
+ 0.34242,
+ 0.34468,
+ 0.34422,
+ 0.34752,
+ 0.34912,
+ 0.34524,
+ 0.34978,
+ 0.34856,
+ 0.3501,
+ 0.34806,
+ 0.35048,
+ 0.35214,
+ 0.35416,
+ 0.35338,
+ 0.35482,
+ 0.35432,
+ 0.35244,
+ 0.35388,
+ 0.35836,
+ 0.35712,
+ 0.3568,
+ 0.3591,
+ 0.36118,
+ 0.36208,
+ 0.35824,
+ 0.36296,
+ 0.3625,
+ 0.3616,
+ 0.3628,
+ 0.36246,
+ 0.36298,
+ 0.3605,
+ 0.36632,
+ 0.36574,
+ 0.36678,
+ 0.36478,
+ 0.3649,
+ 0.3662,
+ 0.36734,
+ 0.3665,
+ 0.36722,
+ 0.36714,
+ 0.3702,
+ 0.36908,
+ 0.37058,
+ 0.36998,
+ 0.36858,
+ 0.37244,
+ 0.37104,
+ 0.3722,
+ 0.37344,
+ 0.3726,
+ 0.37172,
+ 0.3741,
+ 0.37406,
+ 0.3731,
+ 0.37294,
+ 0.37284,
+ 0.37628,
+ 0.37682,
+ 0.37742,
+ 0.37544,
+ 0.37656,
+ 0.37632,
+ 0.37814,
+ 0.37808,
+ 0.37642,
+ 0.37748,
+ 0.37462,
+ 0.37968,
+ 0.37766,
+ 0.37704,
+ 0.37846,
+ 0.37784,
+ 0.379,
+ 0.37904,
+ 0.37854,
+ 0.38202,
+ 0.37828,
+ 0.37712,
+ 0.37962,
+ 0.38004,
+ 0.37898,
+ 0.3783,
+ 0.37898,
+ 0.38082,
+ 0.38006,
+ 0.37922,
+ 0.37846
+ ],
+ "test_acc": [
+ 0.3027,
+ 0.306,
+ 0.3365,
+ 0.3382,
+ 0.3465,
+ 0.3597,
+ 0.3579,
+ 0.3625,
+ 0.3757,
+ 0.3702,
+ 0.3663,
+ 0.3612,
+ 0.374,
+ 0.378,
+ 0.3778,
+ 0.3778,
+ 0.3807,
+ 0.3691,
+ 0.3833,
+ 0.3714,
+ 0.3788,
+ 0.3629,
+ 0.3794,
+ 0.382,
+ 0.3843,
+ 0.3732,
+ 0.3818,
+ 0.373,
+ 0.3819,
+ 0.3776,
+ 0.3882,
+ 0.3926,
+ 0.385,
+ 0.3853,
+ 0.3879,
+ 0.3927,
+ 0.3894,
+ 0.3945,
+ 0.3903,
+ 0.3924,
+ 0.3896,
+ 0.3941,
+ 0.3919,
+ 0.3888,
+ 0.3863,
+ 0.3927,
+ 0.3922,
+ 0.3903,
+ 0.3939,
+ 0.3828,
+ 0.3963,
+ 0.3935,
+ 0.3958,
+ 0.3971,
+ 0.4048,
+ 0.4003,
+ 0.3934,
+ 0.3945,
+ 0.395,
+ 0.3979,
+ 0.3908,
+ 0.3969,
+ 0.4012,
+ 0.3998,
+ 0.3987,
+ 0.3965,
+ 0.4025,
+ 0.3984,
+ 0.4038,
+ 0.3982,
+ 0.4015,
+ 0.4027,
+ 0.3997,
+ 0.4022,
+ 0.4045,
+ 0.4031,
+ 0.4028,
+ 0.4041,
+ 0.3998,
+ 0.4012,
+ 0.3998,
+ 0.3979,
+ 0.4026,
+ 0.401,
+ 0.4006,
+ 0.4048,
+ 0.4018,
+ 0.4009,
+ 0.4025,
+ 0.4024,
+ 0.4009,
+ 0.4036,
+ 0.4025,
+ 0.4022,
+ 0.4026,
+ 0.401,
+ 0.4018,
+ 0.401,
+ 0.4015,
+ 0.4016
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.02052094228565693,
+ 0.07671980559825897,
+ -0.10412630438804626,
+ -0.03588568791747093,
+ -0.07592153549194336,
+ 0.9841817617416382
+ ],
+ "perturbation_rho": [
+ -0.0499441958963871,
+ 0.03510870411992073,
+ 0.012152664363384247,
+ -0.01593763567507267,
+ -0.030490349978208542,
+ 0.01056537963449955
+ ],
+ "nudging": {
+ "0.001": [
+ -1.460895873606205e-06,
+ -1.8533319234848022e-07,
+ 8.102506399154663e-08,
+ 3.9814040064811707e-08,
+ 7.660128176212311e-08,
+ -1.0418007150292397e-06
+ ],
+ "0.003": [
+ -4.51505184173584e-06,
+ -7.73230567574501e-07,
+ 4.012836143374443e-07,
+ 1.9150320440530777e-07,
+ 2.3317988961935043e-07,
+ -3.8853613659739494e-06
+ ],
+ "0.01": [
+ -1.504761166870594e-05,
+ -2.689310349524021e-06,
+ 1.5775440260767937e-06,
+ 6.139744073152542e-07,
+ 9.952345862984657e-07,
+ -1.3828510418534279e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 7937.806640625,
+ 157942.125,
+ 845318.25,
+ 1734418.625,
+ 1908211.875,
+ 2166763.75,
+ 1424229.125
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.3930177121656016e-05,
+ 1.1552400565051357e-06,
+ 6.149262503640784e-07,
+ 6.002979375807627e-07,
+ 6.000758503432735e-07,
+ 5.996230356686283e-07,
+ 5.751607545789739e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 54.05447299340841,
+ "embed.bias": 16.031038115248215,
+ "blocks.0.ln.weight": 1.2416108593211508,
+ "blocks.0.w1.weight": 17.922589218376636,
+ "blocks.0.w1.bias": 14.956067338203475,
+ "blocks.0.w2.weight": 64.67351730215137,
+ "blocks.1.ln.weight": 1.1286859632363149,
+ "blocks.1.w1.weight": 23.71804846947617,
+ "blocks.1.w1.bias": 17.43068124645083,
+ "blocks.1.w2.weight": 41.38902946126262,
+ "blocks.2.ln.weight": 0.7406672335136909,
+ "blocks.2.w1.weight": 24.88918465627309,
+ "blocks.2.w1.bias": 25.668393301356875,
+ "blocks.2.w2.weight": 26.33124026065157,
+ "blocks.3.ln.weight": 0.5408212886770045,
+ "blocks.3.w1.weight": 19.746562187924944,
+ "blocks.3.w1.bias": 21.605426127504696,
+ "blocks.3.w2.weight": 26.408093498021923,
+ "blocks.4.ln.weight": 0.7373842468395975,
+ "blocks.4.w1.weight": 21.153183487980545,
+ "blocks.4.w1.bias": 21.145538252519305,
+ "blocks.4.w2.weight": 45.59224517841596,
+ "blocks.5.ln.weight": 0.8316077429831186,
+ "blocks.5.w1.weight": 20.11472175424346,
+ "blocks.5.w1.bias": 17.387776840886495,
+ "blocks.5.w2.weight": 81.52952222436008,
+ "out_ln.weight": 0.38061390763416836,
+ "out_head.weight": 7.016437743402715,
+ "out_head.bias": 0.6179754221306049
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 6,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 2
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L6_seed2",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L6_seed3/results_cifar10.json b/results/fa_dfa_d512_L6_seed3/results_cifar10.json
new file mode 100644
index 0000000..fe507fc
--- /dev/null
+++ b/results/fa_dfa_d512_L6_seed3/results_cifar10.json
@@ -0,0 +1,837 @@
+{
+ "3": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0551145947265623,
+ 2.0254818302154542,
+ 2.0194406120300292,
+ 2.0175874392700197,
+ 2.0163199629211426,
+ 2.0142830805969236,
+ 2.007013447418213,
+ 2.003593424758911,
+ 2.0067723297500613,
+ 2.003558956604004,
+ 2.00101790184021,
+ 2.0011146197509766,
+ 1.998018801345825,
+ 1.9996520843887329,
+ 1.9962160342407227,
+ 1.997622791786194,
+ 1.9969604161071777,
+ 1.9916633380126954,
+ 1.9958815657806397,
+ 1.9930840515899657,
+ 1.9948459027862548,
+ 1.9917297480773926,
+ 1.9886382999420167,
+ 1.9919039403533936,
+ 1.990138112449646,
+ 1.9900766090393067,
+ 1.9897496450042724,
+ 1.9914394456481934,
+ 1.9891233996582032,
+ 1.985649783477783,
+ 1.9873360358428955,
+ 1.989273772125244,
+ 1.987579068336487,
+ 1.9871572724151612,
+ 1.9870523969268798,
+ 1.983975528640747,
+ 1.987108448867798,
+ 1.9884108324813843,
+ 1.988470245361328,
+ 1.9860010179138183,
+ 1.9886363947296142,
+ 1.987370449295044,
+ 1.9879705474853515,
+ 1.9867627616882324,
+ 1.9860119804382323,
+ 1.9856234351348876,
+ 1.9848077922058105,
+ 1.9845034539794921,
+ 1.9853366620635986,
+ 1.9832583249664306,
+ 1.9855356827163697,
+ 1.9846282432556153,
+ 1.9862506673431397,
+ 1.9845512745666505,
+ 1.984809038848877,
+ 1.983686294631958,
+ 1.9839595748138428,
+ 1.9834054181671144,
+ 1.9851946828460694,
+ 1.9813065106964112,
+ 1.982595512084961,
+ 1.9824060856628418,
+ 1.9832570594787597,
+ 1.9817028414916993,
+ 1.9841908657836913,
+ 1.9825086151123046,
+ 1.9821907202148437,
+ 1.9809494961166383,
+ 1.9812863923645019,
+ 1.9832594239807129,
+ 1.9819738615417481,
+ 1.982517120513916,
+ 1.9800974131011964,
+ 1.983135057144165,
+ 1.983559645614624,
+ 1.9801312906646729,
+ 1.9810623525238038,
+ 1.9804602764511108,
+ 1.9805444803619385,
+ 1.9829668161010743,
+ 1.9795248139190673,
+ 1.9794620516967774,
+ 1.9808693537902833,
+ 1.9817010179138184,
+ 1.9800636986541749,
+ 1.9797203030395507,
+ 1.980884309463501,
+ 1.9790666972351074,
+ 1.9815667838287354,
+ 1.9785900994110108,
+ 1.9793143473815917,
+ 1.9813185803985596,
+ 1.9775366342926026,
+ 1.9791302056884765,
+ 1.9800324520874024,
+ 1.9808550524902344,
+ 1.9790050820541383,
+ 1.9809321939086915,
+ 1.9801498863220215,
+ 1.9817508765029908
+ ],
+ "train_acc": [
+ 0.24236,
+ 0.25538,
+ 0.26038,
+ 0.25914,
+ 0.2596,
+ 0.26144,
+ 0.26392,
+ 0.26632,
+ 0.2648,
+ 0.26606,
+ 0.27124,
+ 0.26782,
+ 0.26864,
+ 0.2686,
+ 0.27008,
+ 0.27032,
+ 0.2686,
+ 0.2709,
+ 0.27116,
+ 0.26994,
+ 0.27146,
+ 0.27376,
+ 0.2744,
+ 0.2734,
+ 0.27522,
+ 0.27308,
+ 0.27442,
+ 0.27432,
+ 0.27566,
+ 0.27572,
+ 0.27774,
+ 0.2749,
+ 0.275,
+ 0.27594,
+ 0.27506,
+ 0.27894,
+ 0.27722,
+ 0.27576,
+ 0.27618,
+ 0.27836,
+ 0.27742,
+ 0.27672,
+ 0.27846,
+ 0.27852,
+ 0.27952,
+ 0.27852,
+ 0.2798,
+ 0.2786,
+ 0.27982,
+ 0.27886,
+ 0.27922,
+ 0.28008,
+ 0.28086,
+ 0.27944,
+ 0.27862,
+ 0.2795,
+ 0.2811,
+ 0.28046,
+ 0.28042,
+ 0.28068,
+ 0.2808,
+ 0.2807,
+ 0.28074,
+ 0.28092,
+ 0.28042,
+ 0.28318,
+ 0.28114,
+ 0.28196,
+ 0.28128,
+ 0.28244,
+ 0.28616,
+ 0.28052,
+ 0.28304,
+ 0.28028,
+ 0.28134,
+ 0.283,
+ 0.281,
+ 0.28442,
+ 0.2821,
+ 0.2828,
+ 0.28364,
+ 0.28418,
+ 0.2818,
+ 0.2828,
+ 0.28202,
+ 0.28332,
+ 0.2823,
+ 0.28336,
+ 0.27986,
+ 0.28428,
+ 0.28444,
+ 0.28348,
+ 0.28552,
+ 0.28302,
+ 0.28178,
+ 0.28176,
+ 0.28462,
+ 0.28336,
+ 0.28208,
+ 0.28378
+ ],
+ "test_acc": [
+ 0.2839,
+ 0.276,
+ 0.2703,
+ 0.2882,
+ 0.3045,
+ 0.2916,
+ 0.2881,
+ 0.2861,
+ 0.2942,
+ 0.2742,
+ 0.286,
+ 0.3032,
+ 0.3043,
+ 0.284,
+ 0.3085,
+ 0.2889,
+ 0.2865,
+ 0.2944,
+ 0.288,
+ 0.2904,
+ 0.2951,
+ 0.2866,
+ 0.3007,
+ 0.2883,
+ 0.2858,
+ 0.3049,
+ 0.2761,
+ 0.3085,
+ 0.2795,
+ 0.2969,
+ 0.2937,
+ 0.3004,
+ 0.2812,
+ 0.2902,
+ 0.3023,
+ 0.2997,
+ 0.2918,
+ 0.3121,
+ 0.2969,
+ 0.2938,
+ 0.2951,
+ 0.3021,
+ 0.3015,
+ 0.31,
+ 0.2967,
+ 0.2972,
+ 0.3,
+ 0.3063,
+ 0.3104,
+ 0.3045,
+ 0.3005,
+ 0.3049,
+ 0.3048,
+ 0.3025,
+ 0.3043,
+ 0.3031,
+ 0.2936,
+ 0.2981,
+ 0.3033,
+ 0.2941,
+ 0.3064,
+ 0.2988,
+ 0.3068,
+ 0.3013,
+ 0.2997,
+ 0.3068,
+ 0.3062,
+ 0.3059,
+ 0.293,
+ 0.3029,
+ 0.3083,
+ 0.3108,
+ 0.3016,
+ 0.3031,
+ 0.2946,
+ 0.2992,
+ 0.3066,
+ 0.3046,
+ 0.3006,
+ 0.3035,
+ 0.2988,
+ 0.3031,
+ 0.3015,
+ 0.3028,
+ 0.3008,
+ 0.3022,
+ 0.3064,
+ 0.3045,
+ 0.3051,
+ 0.3041,
+ 0.3025,
+ 0.3045,
+ 0.3034,
+ 0.3039,
+ 0.3049,
+ 0.3044,
+ 0.3042,
+ 0.3046,
+ 0.3046,
+ 0.3046
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.3834345042705536,
+ 0.0005603223107755184,
+ 0.0008055042708292603,
+ 4.550980156636797e-05,
+ -0.0009304977720603347,
+ 0.0005176510312594473
+ ],
+ "perturbation_rho": [
+ -0.015653517097234726,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -3.976747393608093e-07,
+ 0.0,
+ 0.0,
+ -9.313225746154785e-10,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.353677362203598e-06,
+ 0.0,
+ 0.0,
+ -9.313225746154785e-10,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -4.4209882616996765e-06,
+ -1.862645149230957e-09,
+ 0.0,
+ -9.313225746154785e-10,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 53414.71484375,
+ 1171344000.0,
+ 1695788416.0,
+ 3132320256.0,
+ 5559488000.0,
+ 6114624000.0,
+ 6167857152.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.961761254027806e-07,
+ 3.1574928782696077e-10,
+ 3.1493549434991053e-10,
+ 3.145896598777398e-10,
+ 3.146878591042679e-10,
+ 3.147946070480856e-10,
+ 3.1481975359959335e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 323.54545540843105,
+ "embed.bias": 220.1187911959123,
+ "blocks.0.ln.weight": 9.685558373681214,
+ "blocks.0.w1.weight": 284.1235348747452,
+ "blocks.0.w1.bias": 249.08037975739575,
+ "blocks.0.w2.weight": 467.48682118152,
+ "blocks.1.ln.weight": 7.467562562572266,
+ "blocks.1.w1.weight": 239.93081695201815,
+ "blocks.1.w1.bias": 214.84010837594673,
+ "blocks.1.w2.weight": 270.7171266420411,
+ "blocks.2.ln.weight": 8.6999741976989,
+ "blocks.2.w1.weight": 335.1621989545166,
+ "blocks.2.w1.bias": 297.63643437529385,
+ "blocks.2.w2.weight": 332.9487355280909,
+ "blocks.3.ln.weight": 9.65496650463451,
+ "blocks.3.w1.weight": 396.7552349422605,
+ "blocks.3.w1.bias": 368.25716787661366,
+ "blocks.3.w2.weight": 397.6707533080763,
+ "blocks.4.ln.weight": 8.086181202960429,
+ "blocks.4.w1.weight": 327.52147542974734,
+ "blocks.4.w1.bias": 308.89251019375376,
+ "blocks.4.w2.weight": 317.4051434598822,
+ "blocks.5.ln.weight": 5.9782378958674425,
+ "blocks.5.w1.weight": 215.37270785666712,
+ "blocks.5.w1.bias": 206.13969443200085,
+ "blocks.5.w2.weight": 210.55284579153434,
+ "out_ln.weight": 0.6330900560999851,
+ "out_head.weight": 8.856435188669105,
+ "out_head.bias": 0.9683577161846454
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0186621208190916,
+ 1.9331099752044678,
+ 1.905914333114624,
+ 1.8951857843399047,
+ 1.8854292852783203,
+ 1.8709850173568725,
+ 1.8602817873382569,
+ 1.8479190076065064,
+ 1.8414129400253296,
+ 1.8321620767974853,
+ 1.826335980758667,
+ 1.8197622241210938,
+ 1.8174528274917603,
+ 1.8127154489898682,
+ 1.8082266244888305,
+ 1.811778229446411,
+ 1.8112502433013915,
+ 1.8042263799667357,
+ 1.8090753686141967,
+ 1.8053230755233765,
+ 1.8077443563842774,
+ 1.804732971496582,
+ 1.8016516897583008,
+ 1.8038146850204468,
+ 1.8026063943862916,
+ 1.802457709350586,
+ 1.8002039365386964,
+ 1.8020965616607667,
+ 1.7966074966812133,
+ 1.796929923324585,
+ 1.7942455569076539,
+ 1.7956397313690187,
+ 1.7895081832504272,
+ 1.7901598165893555,
+ 1.785636817932129,
+ 1.7812746408843994,
+ 1.7844691129302979,
+ 1.783790927810669,
+ 1.7788431832885743,
+ 1.7756607747650146,
+ 1.774973991355896,
+ 1.769966243019104,
+ 1.7683391579818726,
+ 1.7691140979766846,
+ 1.7666235674285888,
+ 1.763746886062622,
+ 1.7614172232437133,
+ 1.7630763125610351,
+ 1.7593659213256836,
+ 1.7556221802139282,
+ 1.7563741372680663,
+ 1.7534167012786865,
+ 1.7579979578018188,
+ 1.7498029040145875,
+ 1.7481240466308594,
+ 1.747141012802124,
+ 1.7512671886444091,
+ 1.7454403200531006,
+ 1.747818590774536,
+ 1.7458769092559814,
+ 1.7429523513412475,
+ 1.7409421573638917,
+ 1.7443068244171143,
+ 1.7444597329330445,
+ 1.7395000383758545,
+ 1.7387146155548097,
+ 1.7404073351669311,
+ 1.7386165090560912,
+ 1.7302196057891845,
+ 1.7337853803253174,
+ 1.734050626296997,
+ 1.7332715703582764,
+ 1.7335029886245727,
+ 1.7333770494842529,
+ 1.7329201885223389,
+ 1.7302788830947875,
+ 1.7308894277572633,
+ 1.7317599598693847,
+ 1.7300064459609985,
+ 1.730283342819214,
+ 1.7256998642349244,
+ 1.7275242566680908,
+ 1.7297832077026367,
+ 1.7274928902435303,
+ 1.7220390670776367,
+ 1.725278727684021,
+ 1.7244994747924804,
+ 1.725174829978943,
+ 1.72525224609375,
+ 1.7249654501724243,
+ 1.7217949364852905,
+ 1.724819582977295,
+ 1.7225480925750734,
+ 1.7213666839981079,
+ 1.7232744509887696,
+ 1.7264412049102784,
+ 1.7210735564804076,
+ 1.7247192889404297,
+ 1.7228876683807373,
+ 1.7252232053375245
+ ],
+ "train_acc": [
+ 0.26044,
+ 0.2964,
+ 0.30886,
+ 0.31392,
+ 0.32094,
+ 0.32556,
+ 0.32806,
+ 0.33438,
+ 0.33584,
+ 0.34332,
+ 0.34448,
+ 0.3469,
+ 0.3493,
+ 0.34834,
+ 0.34978,
+ 0.3522,
+ 0.35074,
+ 0.35222,
+ 0.3523,
+ 0.35548,
+ 0.35364,
+ 0.35582,
+ 0.35566,
+ 0.3538,
+ 0.35346,
+ 0.35642,
+ 0.3576,
+ 0.35768,
+ 0.35674,
+ 0.35842,
+ 0.35762,
+ 0.35934,
+ 0.36148,
+ 0.36034,
+ 0.36122,
+ 0.3633,
+ 0.36236,
+ 0.3648,
+ 0.36606,
+ 0.36512,
+ 0.36748,
+ 0.36804,
+ 0.36888,
+ 0.36944,
+ 0.37288,
+ 0.37268,
+ 0.37384,
+ 0.37068,
+ 0.37262,
+ 0.375,
+ 0.37486,
+ 0.37624,
+ 0.37518,
+ 0.3761,
+ 0.37538,
+ 0.3772,
+ 0.37746,
+ 0.37694,
+ 0.37704,
+ 0.37738,
+ 0.37926,
+ 0.37986,
+ 0.37926,
+ 0.3778,
+ 0.38224,
+ 0.38084,
+ 0.3805,
+ 0.381,
+ 0.38198,
+ 0.38366,
+ 0.38362,
+ 0.38348,
+ 0.38632,
+ 0.38202,
+ 0.38298,
+ 0.38632,
+ 0.38186,
+ 0.38376,
+ 0.38438,
+ 0.3837,
+ 0.38756,
+ 0.3857,
+ 0.38298,
+ 0.38544,
+ 0.38558,
+ 0.3877,
+ 0.38692,
+ 0.38824,
+ 0.38624,
+ 0.38652,
+ 0.38646,
+ 0.38884,
+ 0.38832,
+ 0.3871,
+ 0.38898,
+ 0.38446,
+ 0.38604,
+ 0.38742,
+ 0.38538,
+ 0.3872
+ ],
+ "test_acc": [
+ 0.3158,
+ 0.3361,
+ 0.343,
+ 0.3494,
+ 0.3693,
+ 0.3591,
+ 0.3652,
+ 0.3642,
+ 0.376,
+ 0.3646,
+ 0.3753,
+ 0.3821,
+ 0.3889,
+ 0.3824,
+ 0.3807,
+ 0.3765,
+ 0.3749,
+ 0.3867,
+ 0.3837,
+ 0.3793,
+ 0.3829,
+ 0.3792,
+ 0.3865,
+ 0.3772,
+ 0.3856,
+ 0.3842,
+ 0.3784,
+ 0.3846,
+ 0.3821,
+ 0.3885,
+ 0.3845,
+ 0.3915,
+ 0.3893,
+ 0.3836,
+ 0.3942,
+ 0.4018,
+ 0.3946,
+ 0.397,
+ 0.3956,
+ 0.3996,
+ 0.3947,
+ 0.3958,
+ 0.3944,
+ 0.4035,
+ 0.4029,
+ 0.4009,
+ 0.393,
+ 0.4026,
+ 0.4079,
+ 0.4063,
+ 0.3963,
+ 0.4017,
+ 0.4033,
+ 0.4045,
+ 0.4081,
+ 0.4059,
+ 0.4068,
+ 0.4035,
+ 0.4051,
+ 0.4058,
+ 0.402,
+ 0.4074,
+ 0.4037,
+ 0.404,
+ 0.4017,
+ 0.4036,
+ 0.4057,
+ 0.409,
+ 0.404,
+ 0.4081,
+ 0.407,
+ 0.4083,
+ 0.4085,
+ 0.4041,
+ 0.4044,
+ 0.4056,
+ 0.4118,
+ 0.4081,
+ 0.4097,
+ 0.4096,
+ 0.4116,
+ 0.4104,
+ 0.4077,
+ 0.4095,
+ 0.4088,
+ 0.4114,
+ 0.41,
+ 0.4093,
+ 0.4113,
+ 0.4112,
+ 0.4105,
+ 0.4115,
+ 0.4109,
+ 0.4115,
+ 0.4116,
+ 0.4127,
+ 0.4116,
+ 0.4116,
+ 0.4116,
+ 0.4118
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.033788226544857025,
+ 0.08954796195030212,
+ -0.06740664690732956,
+ -0.10690448433160782,
+ -0.11852722615003586,
+ 0.9977210760116577
+ ],
+ "perturbation_rho": [
+ 0.03511377051472664,
+ 0.04956234246492386,
+ -0.005039767827838659,
+ -0.028402434661984444,
+ 0.002629645634442568,
+ -0.0117247449234128
+ ],
+ "nudging": {
+ "0.001": [
+ -2.101645804941654e-06,
+ -5.005858838558197e-07,
+ 6.752088665962219e-09,
+ 1.1606607586145401e-07,
+ 8.440110832452774e-08,
+ -1.3562384992837906e-06
+ ],
+ "0.003": [
+ -5.929498001933098e-06,
+ -1.1969823390245438e-06,
+ 3.577442839741707e-07,
+ 4.066387191414833e-07,
+ 6.486661732196808e-07,
+ -5.192705430090427e-06
+ ],
+ "0.01": [
+ -1.9490602426230907e-05,
+ -3.955909051001072e-06,
+ 1.2312084436416626e-06,
+ 1.8166610971093178e-06,
+ 2.1688174456357956e-06,
+ -1.8663820810616016e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 7009.63232421875,
+ 187427.9375,
+ 535894.875,
+ 1119580.875,
+ 1710404.625,
+ 2170302.75,
+ 769555.6875
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.6194647944066674e-05,
+ 1.3078431493340759e-06,
+ 7.762633913444006e-07,
+ 7.287014227586042e-07,
+ 7.358628977272019e-07,
+ 7.391291774183628e-07,
+ 7.346811230490857e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 48.84389712380156,
+ "embed.bias": 18.04622672355978,
+ "blocks.0.ln.weight": 1.2715512223724241,
+ "blocks.0.w1.weight": 18.323188610788566,
+ "blocks.0.w1.bias": 14.621705904927168,
+ "blocks.0.w2.weight": 62.82097007591977,
+ "blocks.1.ln.weight": 1.0780729921051364,
+ "blocks.1.w1.weight": 19.649865156855387,
+ "blocks.1.w1.bias": 13.846457460989065,
+ "blocks.1.w2.weight": 46.895166874548835,
+ "blocks.2.ln.weight": 0.8170823371574294,
+ "blocks.2.w1.weight": 21.511415879383534,
+ "blocks.2.w1.bias": 20.453359426824278,
+ "blocks.2.w2.weight": 36.12877482089504,
+ "blocks.3.ln.weight": 0.7815298682497922,
+ "blocks.3.w1.weight": 23.873373589893692,
+ "blocks.3.w1.bias": 23.438832639917354,
+ "blocks.3.w2.weight": 34.65004625568547,
+ "blocks.4.ln.weight": 0.7049287284135428,
+ "blocks.4.w1.weight": 24.756294779706568,
+ "blocks.4.w1.bias": 25.075713132594746,
+ "blocks.4.w2.weight": 39.3474190645009,
+ "blocks.5.ln.weight": 0.825085939532927,
+ "blocks.5.w1.weight": 25.260469003772034,
+ "blocks.5.w1.bias": 24.835263327096893,
+ "blocks.5.w2.weight": 47.04696050246029,
+ "out_ln.weight": 0.3016529235625585,
+ "out_head.weight": 6.069315898628023,
+ "out_head.bias": 1.8625596673903535
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 6,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 3
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L6_seed3",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L6_seed4/results_cifar10.json b/results/fa_dfa_d512_L6_seed4/results_cifar10.json
new file mode 100644
index 0000000..0f4b59f
--- /dev/null
+++ b/results/fa_dfa_d512_L6_seed4/results_cifar10.json
@@ -0,0 +1,837 @@
+{
+ "4": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0741304068756103,
+ 2.052452068710327,
+ 2.0495207961273194,
+ 2.0484457649230956,
+ 2.045468647079468,
+ 2.0459753023529053,
+ 2.0450184237670896,
+ 2.0391295627593995,
+ 2.0377573050689697,
+ 2.0382228170394896,
+ 2.0312761193466184,
+ 2.0324994396591185,
+ 2.0278026767730712,
+ 2.0305476247787477,
+ 2.0286661254119873,
+ 2.029820890045166,
+ 2.0290051824188233,
+ 2.0265300660705567,
+ 2.025750561065674,
+ 2.027355994720459,
+ 2.0275561265563966,
+ 2.023750612411499,
+ 2.025079263343811,
+ 2.025395411605835,
+ 2.0240028636932372,
+ 2.022815622253418,
+ 2.0226645655822755,
+ 2.022443874359131,
+ 2.022390442466736,
+ 2.023733573875427,
+ 2.0224531535339354,
+ 2.0227799210357666,
+ 2.0218398332214353,
+ 2.020151731872559,
+ 2.021256269683838,
+ 2.0194612783432007,
+ 2.018333575744629,
+ 2.0183729624938964,
+ 2.0209142852020263,
+ 2.022426343536377,
+ 2.018248730201721,
+ 2.0204831772613527,
+ 2.0188777527618407,
+ 2.0191313500213623,
+ 2.0171565660858155,
+ 2.0189703364562988,
+ 2.0182820797729493,
+ 2.0170327036285403,
+ 2.0180798105239868,
+ 2.0163722726821898,
+ 2.0178759913635256,
+ 2.017920862388611,
+ 2.015365661468506,
+ 2.0167020557403563,
+ 2.0157576428985595,
+ 2.0196311264801023,
+ 2.0176754986190795,
+ 2.017508302345276,
+ 2.0164641773223875,
+ 2.016444677734375,
+ 2.0168316576385497,
+ 2.0152849128723145,
+ 2.015149051208496,
+ 2.0158757721710203,
+ 2.016044344520569,
+ 2.0154035729217528,
+ 2.012803511734009,
+ 2.0160995071792605,
+ 2.015346680755615,
+ 2.0140808686828615,
+ 2.0151058874893186,
+ 2.0144621922302246,
+ 2.0154705238342285,
+ 2.0149063832092287,
+ 2.0115139068603516,
+ 2.0131744523620605,
+ 2.0158527814483644,
+ 2.0125201052474977,
+ 2.0122518685913087,
+ 2.0132210369873045,
+ 2.0136120219421385,
+ 2.0142880089950563,
+ 2.013032984085083,
+ 2.011840599975586,
+ 2.0118305902862548,
+ 2.012003014450073,
+ 2.011992978248596,
+ 2.0092738487243653,
+ 2.0135212693023683,
+ 2.010035700378418,
+ 2.011592872314453,
+ 2.010264990081787,
+ 2.0128248121643066,
+ 2.0130243259048464,
+ 2.0120003854751585,
+ 2.011955912322998,
+ 2.011813560256958,
+ 2.0142602909851073,
+ 2.0124067852020264,
+ 2.0116278426742555
+ ],
+ "train_acc": [
+ 0.23226,
+ 0.2375,
+ 0.23894,
+ 0.2411,
+ 0.24176,
+ 0.24224,
+ 0.24692,
+ 0.24654,
+ 0.24822,
+ 0.24668,
+ 0.25084,
+ 0.24936,
+ 0.2519,
+ 0.24896,
+ 0.25208,
+ 0.25194,
+ 0.2549,
+ 0.25158,
+ 0.25292,
+ 0.25402,
+ 0.25358,
+ 0.2561,
+ 0.25688,
+ 0.25572,
+ 0.25782,
+ 0.25752,
+ 0.2589,
+ 0.25694,
+ 0.2558,
+ 0.25686,
+ 0.25668,
+ 0.259,
+ 0.2579,
+ 0.25902,
+ 0.25906,
+ 0.25856,
+ 0.26018,
+ 0.2611,
+ 0.25834,
+ 0.26182,
+ 0.26058,
+ 0.25944,
+ 0.26134,
+ 0.25952,
+ 0.26058,
+ 0.2594,
+ 0.26156,
+ 0.26526,
+ 0.26156,
+ 0.2652,
+ 0.26178,
+ 0.26122,
+ 0.2618,
+ 0.26384,
+ 0.26214,
+ 0.26386,
+ 0.26256,
+ 0.2644,
+ 0.26262,
+ 0.2629,
+ 0.26074,
+ 0.26266,
+ 0.2634,
+ 0.26182,
+ 0.26266,
+ 0.26396,
+ 0.26654,
+ 0.2633,
+ 0.2613,
+ 0.26562,
+ 0.26356,
+ 0.26384,
+ 0.26406,
+ 0.26388,
+ 0.26606,
+ 0.26592,
+ 0.265,
+ 0.26578,
+ 0.268,
+ 0.2666,
+ 0.26434,
+ 0.2666,
+ 0.26418,
+ 0.26448,
+ 0.26558,
+ 0.2644,
+ 0.26708,
+ 0.26714,
+ 0.2644,
+ 0.2675,
+ 0.26548,
+ 0.26528,
+ 0.26508,
+ 0.26738,
+ 0.26266,
+ 0.2658,
+ 0.26572,
+ 0.26576,
+ 0.26514,
+ 0.2658
+ ],
+ "test_acc": [
+ 0.2469,
+ 0.2437,
+ 0.255,
+ 0.2587,
+ 0.2612,
+ 0.2707,
+ 0.2697,
+ 0.2504,
+ 0.279,
+ 0.2707,
+ 0.2664,
+ 0.2693,
+ 0.2715,
+ 0.2832,
+ 0.2849,
+ 0.2633,
+ 0.2682,
+ 0.2874,
+ 0.2852,
+ 0.2671,
+ 0.273,
+ 0.2819,
+ 0.2654,
+ 0.2763,
+ 0.279,
+ 0.2924,
+ 0.281,
+ 0.2755,
+ 0.2706,
+ 0.2758,
+ 0.2812,
+ 0.2801,
+ 0.2847,
+ 0.2684,
+ 0.2679,
+ 0.2819,
+ 0.2548,
+ 0.2726,
+ 0.2794,
+ 0.2838,
+ 0.2834,
+ 0.2788,
+ 0.283,
+ 0.2721,
+ 0.2782,
+ 0.2742,
+ 0.2817,
+ 0.2752,
+ 0.2722,
+ 0.2812,
+ 0.2797,
+ 0.2792,
+ 0.2799,
+ 0.2747,
+ 0.2879,
+ 0.2769,
+ 0.2842,
+ 0.2793,
+ 0.2857,
+ 0.2797,
+ 0.2793,
+ 0.283,
+ 0.2889,
+ 0.2789,
+ 0.2748,
+ 0.2811,
+ 0.2814,
+ 0.2775,
+ 0.282,
+ 0.2787,
+ 0.2862,
+ 0.2861,
+ 0.2788,
+ 0.2763,
+ 0.272,
+ 0.2788,
+ 0.2818,
+ 0.2839,
+ 0.2865,
+ 0.2837,
+ 0.2898,
+ 0.2814,
+ 0.2784,
+ 0.2823,
+ 0.2852,
+ 0.2835,
+ 0.2848,
+ 0.2846,
+ 0.2821,
+ 0.2809,
+ 0.2812,
+ 0.2826,
+ 0.2818,
+ 0.2832,
+ 0.2834,
+ 0.2832,
+ 0.2838,
+ 0.2838,
+ 0.2838,
+ 0.2837
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.337674617767334,
+ 0.0006247189012356102,
+ 0.0002945333835668862,
+ -7.626566366525367e-05,
+ -0.00035525468410924077,
+ 4.714205351774581e-05
+ ],
+ "perturbation_rho": [
+ 0.005920294672250748,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -3.096647560596466e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 9.313225746154785e-10,
+ 0.0
+ ],
+ "0.003": [
+ -9.313225746154785e-07,
+ 9.313225746154785e-10,
+ 0.0,
+ 0.0,
+ 9.313225746154785e-10,
+ 0.0
+ ],
+ "0.01": [
+ -2.87545844912529e-06,
+ 0.0,
+ 0.0,
+ 9.313225746154785e-10,
+ 9.313225746154785e-10,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 58616.81640625,
+ 1590750592.0,
+ 3689531648.0,
+ 5549292544.0,
+ 7886680064.0,
+ 9552689152.0,
+ 9643235328.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 1.978580996819801e-07,
+ 1.773336893995392e-10,
+ 1.7791043638304416e-10,
+ 1.7622008019468893e-10,
+ 1.761738255279255e-10,
+ 1.7640035265831244e-10,
+ 1.7640640337379665e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 352.51471697650226,
+ "embed.bias": 269.49001616797665,
+ "blocks.0.ln.weight": 9.43372656624646,
+ "blocks.0.w1.weight": 313.54087904771035,
+ "blocks.0.w1.bias": 290.62855542890503,
+ "blocks.0.w2.weight": 505.8716937587808,
+ "blocks.1.ln.weight": 8.808169542950294,
+ "blocks.1.w1.weight": 358.5357409826361,
+ "blocks.1.w1.bias": 345.5274778587847,
+ "blocks.1.w2.weight": 349.75977929168596,
+ "blocks.2.ln.weight": 9.276124324101515,
+ "blocks.2.w1.weight": 395.6550378108265,
+ "blocks.2.w1.bias": 371.0546436106137,
+ "blocks.2.w2.weight": 375.3367421271525,
+ "blocks.3.ln.weight": 10.24244368106198,
+ "blocks.3.w1.weight": 420.44883795775684,
+ "blocks.3.w1.bias": 397.1721710589656,
+ "blocks.3.w2.weight": 405.8311891944568,
+ "blocks.4.ln.weight": 10.577140914534366,
+ "blocks.4.w1.weight": 436.864273411592,
+ "blocks.4.w1.bias": 406.9723932351042,
+ "blocks.4.w2.weight": 404.6446833274338,
+ "blocks.5.ln.weight": 7.782034028860308,
+ "blocks.5.w1.weight": 310.2471895048895,
+ "blocks.5.w1.bias": 297.2855707982679,
+ "blocks.5.w2.weight": 270.3724536312007,
+ "out_ln.weight": 0.5880164032165464,
+ "out_head.weight": 8.450568347443314,
+ "out_head.bias": 0.7740018910126388
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.038021390991211,
+ 1.9517871558380127,
+ 1.923053580093384,
+ 1.9045403602600097,
+ 1.8952905172729493,
+ 1.8863004998397828,
+ 1.8793055545043946,
+ 1.868332847251892,
+ 1.860797550354004,
+ 1.8578368386077881,
+ 1.8483906720352172,
+ 1.8424116570663451,
+ 1.8290137046051025,
+ 1.8305858782577515,
+ 1.823958169631958,
+ 1.8175954293823242,
+ 1.8124929406356811,
+ 1.8102095357513428,
+ 1.8039513228607178,
+ 1.8015951740264893,
+ 1.7953115439605714,
+ 1.7889242324447632,
+ 1.7881110052871705,
+ 1.7843929150390625,
+ 1.7783852965545655,
+ 1.777275090942383,
+ 1.7724220377349853,
+ 1.7673138269424438,
+ 1.7659701934814453,
+ 1.7594620475006104,
+ 1.753105486755371,
+ 1.7537677935028075,
+ 1.7502301782989502,
+ 1.743755245666504,
+ 1.7426675580596924,
+ 1.7381902320098876,
+ 1.7371277405548096,
+ 1.7380020862197876,
+ 1.7344951665878297,
+ 1.732631077194214,
+ 1.7297903375244141,
+ 1.7305091592025756,
+ 1.72773092628479,
+ 1.7255595262908936,
+ 1.7265567044067383,
+ 1.7226529630279541,
+ 1.7242008368301391,
+ 1.7205993241119384,
+ 1.714460986251831,
+ 1.7183882989501953,
+ 1.7157616848373414,
+ 1.7143050561141968,
+ 1.711619351425171,
+ 1.712949155960083,
+ 1.7116730854797364,
+ 1.7097789264297485,
+ 1.7088176770401,
+ 1.7031783606338502,
+ 1.705644631690979,
+ 1.702429769935608,
+ 1.7007943072891236,
+ 1.6994391622924805,
+ 1.698121346435547,
+ 1.6975506435775758,
+ 1.6984819930648805,
+ 1.6961595495224,
+ 1.6988952493286134,
+ 1.700139475479126,
+ 1.698183412246704,
+ 1.6925607485580445,
+ 1.6942372484970092,
+ 1.695046120262146,
+ 1.6923927303695678,
+ 1.6946959600448608,
+ 1.6924981302261353,
+ 1.688026438217163,
+ 1.6864081888580322,
+ 1.6876143859863282,
+ 1.687906523399353,
+ 1.6851787896347046,
+ 1.6870249395370482,
+ 1.6865479514312745,
+ 1.6857007721710204,
+ 1.686013254776001,
+ 1.685618132247925,
+ 1.6866127777099609,
+ 1.682880577697754,
+ 1.6817467867660523,
+ 1.6864028040695191,
+ 1.683715857887268,
+ 1.6836617621612548,
+ 1.6802171184921264,
+ 1.6822573038101196,
+ 1.6837931204223633,
+ 1.6831412961196899,
+ 1.6796661951065064,
+ 1.682324230041504,
+ 1.6844535943222045,
+ 1.6789856439590454,
+ 1.680855456161499
+ ],
+ "train_acc": [
+ 0.2477,
+ 0.28914,
+ 0.30422,
+ 0.30836,
+ 0.31314,
+ 0.31906,
+ 0.3247,
+ 0.32558,
+ 0.33112,
+ 0.33134,
+ 0.33596,
+ 0.33794,
+ 0.34382,
+ 0.34438,
+ 0.3429,
+ 0.3491,
+ 0.34826,
+ 0.351,
+ 0.35252,
+ 0.3536,
+ 0.35608,
+ 0.36104,
+ 0.35948,
+ 0.36006,
+ 0.36364,
+ 0.36444,
+ 0.36608,
+ 0.367,
+ 0.36846,
+ 0.36866,
+ 0.37048,
+ 0.37416,
+ 0.37428,
+ 0.37498,
+ 0.37714,
+ 0.37884,
+ 0.37872,
+ 0.3782,
+ 0.381,
+ 0.38058,
+ 0.3804,
+ 0.38016,
+ 0.38418,
+ 0.38552,
+ 0.38428,
+ 0.3811,
+ 0.38204,
+ 0.38364,
+ 0.38758,
+ 0.38694,
+ 0.38588,
+ 0.38568,
+ 0.388,
+ 0.38704,
+ 0.39158,
+ 0.38884,
+ 0.38912,
+ 0.38936,
+ 0.39034,
+ 0.39182,
+ 0.39316,
+ 0.3944,
+ 0.39182,
+ 0.39272,
+ 0.39394,
+ 0.39482,
+ 0.39348,
+ 0.39242,
+ 0.39554,
+ 0.3963,
+ 0.39508,
+ 0.39252,
+ 0.39634,
+ 0.39332,
+ 0.39612,
+ 0.39644,
+ 0.39648,
+ 0.39668,
+ 0.3985,
+ 0.3995,
+ 0.39528,
+ 0.39904,
+ 0.39988,
+ 0.39688,
+ 0.39954,
+ 0.39648,
+ 0.4014,
+ 0.39972,
+ 0.39808,
+ 0.4019,
+ 0.39968,
+ 0.40222,
+ 0.39884,
+ 0.39954,
+ 0.39804,
+ 0.40034,
+ 0.39982,
+ 0.39856,
+ 0.40168,
+ 0.4006
+ ],
+ "test_acc": [
+ 0.2942,
+ 0.311,
+ 0.3308,
+ 0.342,
+ 0.3419,
+ 0.3528,
+ 0.3504,
+ 0.3462,
+ 0.3605,
+ 0.367,
+ 0.3696,
+ 0.3674,
+ 0.3716,
+ 0.3801,
+ 0.3769,
+ 0.3624,
+ 0.3792,
+ 0.3785,
+ 0.3758,
+ 0.3779,
+ 0.3853,
+ 0.3821,
+ 0.3816,
+ 0.3891,
+ 0.3866,
+ 0.3936,
+ 0.3896,
+ 0.3908,
+ 0.397,
+ 0.3971,
+ 0.4013,
+ 0.4023,
+ 0.393,
+ 0.3983,
+ 0.3977,
+ 0.3969,
+ 0.3982,
+ 0.405,
+ 0.4014,
+ 0.4089,
+ 0.4078,
+ 0.3985,
+ 0.4063,
+ 0.4039,
+ 0.4008,
+ 0.4096,
+ 0.4111,
+ 0.4059,
+ 0.4128,
+ 0.4079,
+ 0.4066,
+ 0.4115,
+ 0.407,
+ 0.4156,
+ 0.4156,
+ 0.4208,
+ 0.4164,
+ 0.4206,
+ 0.4124,
+ 0.4186,
+ 0.4142,
+ 0.4123,
+ 0.4105,
+ 0.4153,
+ 0.4198,
+ 0.4192,
+ 0.419,
+ 0.4198,
+ 0.4202,
+ 0.417,
+ 0.4233,
+ 0.4224,
+ 0.4164,
+ 0.4183,
+ 0.4193,
+ 0.4178,
+ 0.4189,
+ 0.4232,
+ 0.4184,
+ 0.4206,
+ 0.4223,
+ 0.4222,
+ 0.417,
+ 0.4199,
+ 0.421,
+ 0.4202,
+ 0.4181,
+ 0.4177,
+ 0.4214,
+ 0.4186,
+ 0.4201,
+ 0.4189,
+ 0.4221,
+ 0.4205,
+ 0.4194,
+ 0.42,
+ 0.4203,
+ 0.4199,
+ 0.4196,
+ 0.4195
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.015272621065378189,
+ 0.05551045760512352,
+ 0.03781959414482117,
+ -0.09142173081636429,
+ -0.12766994535923004,
+ 0.9957026839256287
+ ],
+ "perturbation_rho": [
+ -0.044239602982997894,
+ 0.018639111891388893,
+ -0.002674652263522148,
+ -0.024060344323515892,
+ -0.007919290103018284,
+ -0.016996072605252266
+ ],
+ "nudging": {
+ "0.001": [
+ -2.239597961306572e-06,
+ -6.057089194655418e-07,
+ -1.341104507446289e-07,
+ 1.417938619852066e-07,
+ 1.6426201909780502e-07,
+ -1.8667196854948997e-06
+ ],
+ "0.003": [
+ -6.492482498288155e-06,
+ -1.5690457075834274e-06,
+ -4.0302984416484833e-07,
+ 5.105976015329361e-07,
+ 7.660128176212311e-07,
+ -6.384681910276413e-06
+ ],
+ "0.01": [
+ -2.1643005311489105e-05,
+ -5.265465006232262e-06,
+ -1.4334218576550484e-06,
+ 2.071727067232132e-06,
+ 2.557528205215931e-06,
+ -2.2026244550943375e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 5698.91357421875,
+ 74089.609375,
+ 464746.75,
+ 571451.9375,
+ 1133723.625,
+ 1542811.875,
+ 774708.375
+ ],
+ "bp_grad_norms_per_layer": [
+ 3.8776230212533846e-05,
+ 3.033989969480899e-06,
+ 9.73796886682976e-07,
+ 8.570191312173847e-07,
+ 8.443464594165562e-07,
+ 8.415062779931759e-07,
+ 8.336798487107444e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 39.486431642766554,
+ "embed.bias": 14.997056918535808,
+ "blocks.0.ln.weight": 1.193207647495217,
+ "blocks.0.w1.weight": 15.819034065039398,
+ "blocks.0.w1.bias": 13.818478530432463,
+ "blocks.0.w2.weight": 51.89139321984625,
+ "blocks.1.ln.weight": 1.0430825935788601,
+ "blocks.1.w1.weight": 19.906462986471904,
+ "blocks.1.w1.bias": 13.39027258705086,
+ "blocks.1.w2.weight": 46.62053425901664,
+ "blocks.2.ln.weight": 0.7864666972877575,
+ "blocks.2.w1.weight": 18.270196046735624,
+ "blocks.2.w1.bias": 12.944067249835973,
+ "blocks.2.w2.weight": 49.53460850460699,
+ "blocks.3.ln.weight": 0.64141877927046,
+ "blocks.3.w1.weight": 20.764880942811217,
+ "blocks.3.w1.bias": 20.679665524082093,
+ "blocks.3.w2.weight": 25.665386885036355,
+ "blocks.4.ln.weight": 0.5680829244316266,
+ "blocks.4.w1.weight": 20.078876458586997,
+ "blocks.4.w1.bias": 21.20100063860491,
+ "blocks.4.w2.weight": 30.60469406190784,
+ "blocks.5.ln.weight": 0.6546824348706847,
+ "blocks.5.w1.weight": 18.8873912461224,
+ "blocks.5.w1.bias": 18.79210459184489,
+ "blocks.5.w2.weight": 45.45712755218628,
+ "out_ln.weight": 0.3591377698736513,
+ "out_head.weight": 6.323626106875271,
+ "out_head.bias": 0.8856670357666573
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 6,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 4
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L6_seed4",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L6_seed5/results_cifar10.json b/results/fa_dfa_d512_L6_seed5/results_cifar10.json
new file mode 100644
index 0000000..e974678
--- /dev/null
+++ b/results/fa_dfa_d512_L6_seed5/results_cifar10.json
@@ -0,0 +1,837 @@
+{
+ "5": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0602571796417237,
+ 2.0357400941467283,
+ 2.0281838052368166,
+ 2.023866386795044,
+ 2.0154901094818114,
+ 2.019438636016846,
+ 2.0162963735580446,
+ 2.0127672254180906,
+ 2.0099401387786866,
+ 2.011543902282715,
+ 2.006219865684509,
+ 2.0047207692718505,
+ 2.0029416635894775,
+ 2.0006940887069704,
+ 2.0026997183609008,
+ 2.0016165689468384,
+ 2.000759786224365,
+ 1.9997175212860108,
+ 1.9980090442276002,
+ 1.998993673324585,
+ 1.996654589538574,
+ 1.9994389457702637,
+ 1.9997356970977784,
+ 1.993837992324829,
+ 1.9959214011383057,
+ 1.9948663213729858,
+ 1.9960267625427246,
+ 1.9953035424041747,
+ 1.9930475028991699,
+ 1.9938180725479127,
+ 1.9931120376586915,
+ 1.989824568710327,
+ 1.9910550234985351,
+ 1.9906865880584717,
+ 1.9898072389984132,
+ 1.9914395150375366,
+ 1.9915398007202147,
+ 1.9909269763183595,
+ 1.9913815157699586,
+ 1.9884502619934081,
+ 1.991486932411194,
+ 1.989191399459839,
+ 1.989710302581787,
+ 1.9897258306884766,
+ 1.9906724350738525,
+ 1.9914843771362305,
+ 1.9885967330169678,
+ 1.98751413482666,
+ 1.989536333770752,
+ 1.98715817817688,
+ 1.9881259790802002,
+ 1.9869521377182007,
+ 1.9907182498931886,
+ 1.9890414456939698,
+ 1.9886535820770264,
+ 1.9867755039215087,
+ 1.988041506919861,
+ 1.9891718532943725,
+ 1.9870871344375611,
+ 1.985717644920349,
+ 1.9884497722625731,
+ 1.9873963590240478,
+ 1.9863690199279784,
+ 1.9873637462997435,
+ 1.9868278685760499,
+ 1.9860613510894776,
+ 1.9854717936706543,
+ 1.9856478284454346,
+ 1.9866084854888917,
+ 1.987229610748291,
+ 1.9831793658828736,
+ 1.9870755458831788,
+ 1.985736505126953,
+ 1.9868509146881104,
+ 1.9870286437225342,
+ 1.9859972204971315,
+ 1.9860080694961548,
+ 1.9868842920684815,
+ 1.9851076070404052,
+ 1.985850848121643,
+ 1.9842121643447876,
+ 1.9848132551574706,
+ 1.9846837445831298,
+ 1.9864671915435792,
+ 1.9831796479034425,
+ 1.984968702774048,
+ 1.983607007446289,
+ 1.9863699700546265,
+ 1.9863424487304688,
+ 1.98700967628479,
+ 1.9837391945648193,
+ 1.9848160073471068,
+ 1.9845281774139405,
+ 1.9848566493988038,
+ 1.9822382134628296,
+ 1.9836605060577392,
+ 1.9821798385620117,
+ 1.9824478280639648,
+ 1.9817402114486695,
+ 1.9846896728515624
+ ],
+ "train_acc": [
+ 0.24254,
+ 0.2517,
+ 0.25592,
+ 0.25594,
+ 0.26192,
+ 0.25714,
+ 0.26284,
+ 0.26238,
+ 0.26512,
+ 0.26496,
+ 0.26548,
+ 0.26742,
+ 0.2712,
+ 0.2708,
+ 0.268,
+ 0.27104,
+ 0.27124,
+ 0.27076,
+ 0.2733,
+ 0.27132,
+ 0.27188,
+ 0.26918,
+ 0.27278,
+ 0.27414,
+ 0.2739,
+ 0.27446,
+ 0.27544,
+ 0.2747,
+ 0.27416,
+ 0.27496,
+ 0.27546,
+ 0.27906,
+ 0.27846,
+ 0.27636,
+ 0.27836,
+ 0.27738,
+ 0.27604,
+ 0.27838,
+ 0.27826,
+ 0.28024,
+ 0.27756,
+ 0.28068,
+ 0.2774,
+ 0.27826,
+ 0.27808,
+ 0.2748,
+ 0.27776,
+ 0.27942,
+ 0.28244,
+ 0.28036,
+ 0.28156,
+ 0.28332,
+ 0.27896,
+ 0.28098,
+ 0.28062,
+ 0.27874,
+ 0.2805,
+ 0.28012,
+ 0.2826,
+ 0.28204,
+ 0.27786,
+ 0.2819,
+ 0.28144,
+ 0.28236,
+ 0.28224,
+ 0.28264,
+ 0.2807,
+ 0.28432,
+ 0.28308,
+ 0.28156,
+ 0.28262,
+ 0.28086,
+ 0.28328,
+ 0.28292,
+ 0.28262,
+ 0.2819,
+ 0.28378,
+ 0.28152,
+ 0.28326,
+ 0.28238,
+ 0.28308,
+ 0.28294,
+ 0.28316,
+ 0.28292,
+ 0.28588,
+ 0.2832,
+ 0.28346,
+ 0.28494,
+ 0.2829,
+ 0.28252,
+ 0.28328,
+ 0.28276,
+ 0.28382,
+ 0.28216,
+ 0.28412,
+ 0.28436,
+ 0.28506,
+ 0.28418,
+ 0.28462,
+ 0.28428
+ ],
+ "test_acc": [
+ 0.2458,
+ 0.2841,
+ 0.28,
+ 0.2941,
+ 0.2898,
+ 0.2696,
+ 0.2791,
+ 0.2876,
+ 0.2822,
+ 0.3025,
+ 0.2919,
+ 0.292,
+ 0.2972,
+ 0.2946,
+ 0.2821,
+ 0.2924,
+ 0.2897,
+ 0.2957,
+ 0.3053,
+ 0.2873,
+ 0.3059,
+ 0.2873,
+ 0.2865,
+ 0.3017,
+ 0.3023,
+ 0.2803,
+ 0.3064,
+ 0.2856,
+ 0.2919,
+ 0.2974,
+ 0.2917,
+ 0.2949,
+ 0.2862,
+ 0.2929,
+ 0.2952,
+ 0.2864,
+ 0.3013,
+ 0.3011,
+ 0.2934,
+ 0.2975,
+ 0.2961,
+ 0.304,
+ 0.3058,
+ 0.2954,
+ 0.3001,
+ 0.2882,
+ 0.304,
+ 0.2976,
+ 0.3002,
+ 0.291,
+ 0.3019,
+ 0.3012,
+ 0.3085,
+ 0.3014,
+ 0.3042,
+ 0.2999,
+ 0.2951,
+ 0.305,
+ 0.3104,
+ 0.3037,
+ 0.3011,
+ 0.3046,
+ 0.2999,
+ 0.3087,
+ 0.2981,
+ 0.3044,
+ 0.298,
+ 0.2931,
+ 0.3016,
+ 0.306,
+ 0.2997,
+ 0.3019,
+ 0.2984,
+ 0.3028,
+ 0.2957,
+ 0.3086,
+ 0.3062,
+ 0.2985,
+ 0.2995,
+ 0.31,
+ 0.3028,
+ 0.305,
+ 0.2993,
+ 0.3029,
+ 0.3047,
+ 0.3011,
+ 0.3028,
+ 0.3008,
+ 0.302,
+ 0.3028,
+ 0.3053,
+ 0.305,
+ 0.304,
+ 0.3041,
+ 0.3021,
+ 0.3024,
+ 0.3022,
+ 0.3019,
+ 0.3021,
+ 0.3022
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.4137295186519623,
+ 0.00037798876292072237,
+ 0.00035168789327144623,
+ -0.00011552235810086131,
+ 0.0004928180132992566,
+ 0.0006692470051348209
+ ],
+ "perturbation_rho": [
+ -0.004769737366586924,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -4.731118679046631e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.455657184123993e-06,
+ -9.313225746154785e-10,
+ -9.313225746154785e-10,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -4.32552769780159e-06,
+ -1.862645149230957e-09,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 53850.484375,
+ 999738112.0,
+ 1825332736.0,
+ 2847918592.0,
+ 3948823552.0,
+ 7465979392.0,
+ 8693204992.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.7038154826186656e-07,
+ 3.3068986438067327e-10,
+ 3.3043778824293213e-10,
+ 3.3058311643685556e-10,
+ 3.306660778523707e-10,
+ 3.3053956793871464e-10,
+ 3.3055713721807933e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 326.316807505004,
+ "embed.bias": 247.64016879156944,
+ "blocks.0.ln.weight": 10.506143366890935,
+ "blocks.0.w1.weight": 266.6641005309724,
+ "blocks.0.w1.bias": 237.30788612441194,
+ "blocks.0.w2.weight": 463.4592869979373,
+ "blocks.1.ln.weight": 8.041758423869682,
+ "blocks.1.w1.weight": 275.6017801051782,
+ "blocks.1.w1.bias": 242.84249700605667,
+ "blocks.1.w2.weight": 293.6559726136312,
+ "blocks.2.ln.weight": 8.38043701730794,
+ "blocks.2.w1.weight": 303.9003341110909,
+ "blocks.2.w1.bias": 274.77693120914864,
+ "blocks.2.w2.weight": 311.7253398049225,
+ "blocks.3.ln.weight": 8.344881260946064,
+ "blocks.3.w1.weight": 324.91067284816666,
+ "blocks.3.w1.bias": 320.84528008040263,
+ "blocks.3.w2.weight": 323.2526807364217,
+ "blocks.4.ln.weight": 10.649638215660875,
+ "blocks.4.w1.weight": 442.18421448330207,
+ "blocks.4.w1.bias": 419.08703726739526,
+ "blocks.4.w2.weight": 435.13630410704593,
+ "blocks.5.ln.weight": 10.016310662065466,
+ "blocks.5.w1.weight": 397.1121641226028,
+ "blocks.5.w1.bias": 370.9478671282672,
+ "blocks.5.w2.weight": 383.0732880205366,
+ "out_ln.weight": 0.7323829331223068,
+ "out_head.weight": 9.997613880770942,
+ "out_head.bias": 0.47019353243584805
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0245736110687256,
+ 1.9432434065246582,
+ 1.9171724227905274,
+ 1.9035633584594727,
+ 1.8866331351470946,
+ 1.8834057013702392,
+ 1.8780549462509155,
+ 1.873760415649414,
+ 1.86404331199646,
+ 1.8641292553329467,
+ 1.8578160486602784,
+ 1.8527875860595704,
+ 1.8485828662109376,
+ 1.8464561464691163,
+ 1.8462472164154053,
+ 1.8430843777084351,
+ 1.8429961044692993,
+ 1.8410946324920654,
+ 1.8362357471466064,
+ 1.8319243711090087,
+ 1.8236686227416992,
+ 1.8258946035766601,
+ 1.8234262411117554,
+ 1.8137903323364257,
+ 1.811674436264038,
+ 1.808376842956543,
+ 1.809517276611328,
+ 1.8052343613052368,
+ 1.799909278907776,
+ 1.7993224626922608,
+ 1.7999323937225342,
+ 1.7926906624984742,
+ 1.7933230242156983,
+ 1.7922531407928466,
+ 1.7895995733261107,
+ 1.7906105719375611,
+ 1.7885604776763917,
+ 1.7840546982192993,
+ 1.7896481491088867,
+ 1.7832089348602296,
+ 1.782369646949768,
+ 1.7770448540878296,
+ 1.7808699810409545,
+ 1.7771298468017578,
+ 1.7774345711898805,
+ 1.7765879013061523,
+ 1.7742172146987916,
+ 1.7695884064102172,
+ 1.775497437171936,
+ 1.767691201248169,
+ 1.7683605837249756,
+ 1.7657782846069336,
+ 1.7674894732666016,
+ 1.7704336560058593,
+ 1.7661452458190918,
+ 1.7612660248947143,
+ 1.7608598317718507,
+ 1.7622494547271728,
+ 1.7596149068450928,
+ 1.7570250023651124,
+ 1.7613954134368897,
+ 1.7564489492034911,
+ 1.7566620055389404,
+ 1.7558204865264893,
+ 1.7551631838989257,
+ 1.7566033951187134,
+ 1.7519007582855224,
+ 1.7499195809936523,
+ 1.7534941720581054,
+ 1.7503434635162354,
+ 1.74504672996521,
+ 1.7483937967681884,
+ 1.7457905151367188,
+ 1.7486124094009399,
+ 1.7461996807479858,
+ 1.7445868758392333,
+ 1.7431557595062255,
+ 1.7459273419570922,
+ 1.7443120336151123,
+ 1.7433632315063476,
+ 1.7413235707855224,
+ 1.7421810306167602,
+ 1.7423726794815064,
+ 1.7430526740264893,
+ 1.7426294480133058,
+ 1.7443564529037476,
+ 1.7412052463531493,
+ 1.7419821909332276,
+ 1.742747957763672,
+ 1.7393953549194336,
+ 1.7356409253692626,
+ 1.7367648685455321,
+ 1.7391734333038331,
+ 1.7373552332305908,
+ 1.7321791064071654,
+ 1.737231011619568,
+ 1.7380055041122437,
+ 1.7336925805664063,
+ 1.735386132774353,
+ 1.7398383757781983
+ ],
+ "train_acc": [
+ 0.25622,
+ 0.29234,
+ 0.30374,
+ 0.307,
+ 0.31608,
+ 0.31724,
+ 0.32094,
+ 0.32354,
+ 0.32676,
+ 0.33046,
+ 0.33084,
+ 0.33444,
+ 0.33722,
+ 0.33964,
+ 0.3378,
+ 0.33812,
+ 0.33888,
+ 0.34132,
+ 0.3422,
+ 0.34414,
+ 0.34496,
+ 0.34604,
+ 0.3475,
+ 0.34998,
+ 0.35366,
+ 0.3559,
+ 0.35212,
+ 0.35394,
+ 0.35768,
+ 0.35582,
+ 0.35698,
+ 0.3617,
+ 0.35966,
+ 0.35904,
+ 0.36126,
+ 0.36076,
+ 0.36356,
+ 0.36338,
+ 0.3616,
+ 0.36454,
+ 0.36376,
+ 0.36512,
+ 0.36348,
+ 0.36704,
+ 0.36486,
+ 0.36646,
+ 0.36906,
+ 0.368,
+ 0.36894,
+ 0.37176,
+ 0.36712,
+ 0.37086,
+ 0.37036,
+ 0.37114,
+ 0.37032,
+ 0.37276,
+ 0.37386,
+ 0.37112,
+ 0.37256,
+ 0.3721,
+ 0.37356,
+ 0.37602,
+ 0.37426,
+ 0.37504,
+ 0.37378,
+ 0.37652,
+ 0.37642,
+ 0.37658,
+ 0.3768,
+ 0.3798,
+ 0.37862,
+ 0.37708,
+ 0.37848,
+ 0.37812,
+ 0.37732,
+ 0.37934,
+ 0.38062,
+ 0.37766,
+ 0.37892,
+ 0.38066,
+ 0.38076,
+ 0.38064,
+ 0.38124,
+ 0.38186,
+ 0.38218,
+ 0.37966,
+ 0.38064,
+ 0.38088,
+ 0.37996,
+ 0.38164,
+ 0.38374,
+ 0.38408,
+ 0.38228,
+ 0.3827,
+ 0.38392,
+ 0.38448,
+ 0.38308,
+ 0.38368,
+ 0.38292,
+ 0.38126
+ ],
+ "test_acc": [
+ 0.2956,
+ 0.3353,
+ 0.3321,
+ 0.3526,
+ 0.3442,
+ 0.3427,
+ 0.3492,
+ 0.3563,
+ 0.354,
+ 0.3594,
+ 0.3655,
+ 0.3603,
+ 0.3701,
+ 0.3726,
+ 0.3688,
+ 0.3702,
+ 0.3634,
+ 0.3695,
+ 0.3742,
+ 0.3755,
+ 0.3706,
+ 0.3715,
+ 0.3676,
+ 0.3816,
+ 0.3786,
+ 0.3874,
+ 0.3836,
+ 0.3703,
+ 0.3798,
+ 0.379,
+ 0.3806,
+ 0.3918,
+ 0.3845,
+ 0.3862,
+ 0.3933,
+ 0.3876,
+ 0.3924,
+ 0.3857,
+ 0.3748,
+ 0.3943,
+ 0.3942,
+ 0.389,
+ 0.398,
+ 0.3839,
+ 0.3964,
+ 0.3949,
+ 0.3943,
+ 0.3936,
+ 0.3949,
+ 0.3982,
+ 0.3982,
+ 0.4012,
+ 0.3968,
+ 0.3988,
+ 0.3977,
+ 0.3977,
+ 0.401,
+ 0.3978,
+ 0.4017,
+ 0.4016,
+ 0.4013,
+ 0.3972,
+ 0.3961,
+ 0.4032,
+ 0.3959,
+ 0.4004,
+ 0.3967,
+ 0.3993,
+ 0.4021,
+ 0.4044,
+ 0.4053,
+ 0.4023,
+ 0.4054,
+ 0.4061,
+ 0.4047,
+ 0.403,
+ 0.4036,
+ 0.4032,
+ 0.3985,
+ 0.4059,
+ 0.4049,
+ 0.4061,
+ 0.4017,
+ 0.4059,
+ 0.4063,
+ 0.4067,
+ 0.4039,
+ 0.4053,
+ 0.4048,
+ 0.4084,
+ 0.4051,
+ 0.4074,
+ 0.4073,
+ 0.4054,
+ 0.4059,
+ 0.4059,
+ 0.4056,
+ 0.4062,
+ 0.4061,
+ 0.4058
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.02408183179795742,
+ 0.061381518840789795,
+ -0.06236571818590164,
+ -0.022614534944295883,
+ 0.005173904821276665,
+ 0.9961987137794495
+ ],
+ "perturbation_rho": [
+ -0.0075406357645988464,
+ 0.015521236695349216,
+ -0.03143823519349098,
+ -0.03976670280098915,
+ 0.044851042330265045,
+ -0.006162412464618683
+ ],
+ "nudging": {
+ "0.001": [
+ -2.3316824808716774e-06,
+ -2.2514723241329193e-07,
+ 1.0617077350616455e-07,
+ 2.3283064365386963e-09,
+ -6.05359673500061e-09,
+ -1.5987316146492958e-06
+ ],
+ "0.003": [
+ -6.882240995764732e-06,
+ -6.901100277900696e-07,
+ 3.80445271730423e-07,
+ 1.1408701539039612e-08,
+ -5.21540641784668e-08,
+ -5.463254638016224e-06
+ ],
+ "0.01": [
+ -2.2817635908722878e-05,
+ -2.484419383108616e-06,
+ 1.1968659237027168e-06,
+ 3.1816307455301285e-07,
+ -1.9674189388751984e-07,
+ -1.9047758542001247e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 8055.30419921875,
+ 113125.40625,
+ 1163908.5,
+ 1491131.625,
+ 1596391.25,
+ 1615158.375,
+ 896264.875
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.2761472791898996e-05,
+ 1.4049503533897223e-06,
+ 7.91284321621788e-07,
+ 7.916268032204243e-07,
+ 7.900576974861906e-07,
+ 7.786943569954019e-07,
+ 7.593308168907242e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 54.562706327622614,
+ "embed.bias": 17.31102523118792,
+ "blocks.0.ln.weight": 1.1930605549913966,
+ "blocks.0.w1.weight": 17.719005515431316,
+ "blocks.0.w1.bias": 12.672636747852074,
+ "blocks.0.w2.weight": 57.38339155194176,
+ "blocks.1.ln.weight": 1.1832029127181742,
+ "blocks.1.w1.weight": 25.16862327064878,
+ "blocks.1.w1.bias": 22.86442569965511,
+ "blocks.1.w2.weight": 40.800794703713116,
+ "blocks.2.ln.weight": 0.7781541585073879,
+ "blocks.2.w1.weight": 21.816615196629947,
+ "blocks.2.w1.bias": 21.85112016896182,
+ "blocks.2.w2.weight": 56.80267641124345,
+ "blocks.3.ln.weight": 0.6733768384350268,
+ "blocks.3.w1.weight": 19.172490261311314,
+ "blocks.3.w1.bias": 19.28818718647845,
+ "blocks.3.w2.weight": 56.812396875482676,
+ "blocks.4.ln.weight": 0.4559901335911821,
+ "blocks.4.w1.weight": 14.442025794898862,
+ "blocks.4.w1.bias": 12.350979488836783,
+ "blocks.4.w2.weight": 51.455152706019774,
+ "blocks.5.ln.weight": 0.6410944171187689,
+ "blocks.5.w1.weight": 19.462352016085756,
+ "blocks.5.w1.bias": 18.71679203171091,
+ "blocks.5.w2.weight": 54.03878336670424,
+ "out_ln.weight": 0.3528795738919825,
+ "out_head.weight": 6.138358709909979,
+ "out_head.bias": 0.7832523450909458
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 6,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 5
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L6_seed5",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L6_seed6/results_cifar10.json b/results/fa_dfa_d512_L6_seed6/results_cifar10.json
new file mode 100644
index 0000000..e57be14
--- /dev/null
+++ b/results/fa_dfa_d512_L6_seed6/results_cifar10.json
@@ -0,0 +1,837 @@
+{
+ "6": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.058094557647705,
+ 2.0324147889709474,
+ 2.0218327713394166,
+ 2.0282914637756346,
+ 2.021828155441284,
+ 2.0190604560852052,
+ 2.0181676456069946,
+ 2.015587470474243,
+ 2.0169739764022827,
+ 2.012877261199951,
+ 2.015299163284302,
+ 2.015902668991089,
+ 2.010847349205017,
+ 2.0069280560302736,
+ 2.008257807006836,
+ 2.0082109774017334,
+ 2.006445485191345,
+ 2.0060603087615965,
+ 2.0080129043579102,
+ 2.0026243621444704,
+ 2.0042781149291993,
+ 2.0038208263397217,
+ 2.00284217376709,
+ 2.0032863708496094,
+ 2.003503814620972,
+ 2.0014031100082397,
+ 2.00154658203125,
+ 2.0027565859985352,
+ 1.9988653813934327,
+ 2.0048675049591065,
+ 2.002612951965332,
+ 1.9998337776184083,
+ 2.0022170097351073,
+ 2.0006915353012085,
+ 1.9998700122451782,
+ 2.001532240638733,
+ 2.0009784855651858,
+ 2.001896736717224,
+ 1.997888755760193,
+ 1.9997327941894532,
+ 1.998836244468689,
+ 1.9972671249008178,
+ 1.9991955379867554,
+ 1.996907271156311,
+ 1.9989860800933839,
+ 1.9979305069732667,
+ 1.9979226346588135,
+ 1.9973365603637696,
+ 1.9991827986145019,
+ 1.9979995524597167,
+ 1.9976954571533203,
+ 1.9975597812652588,
+ 1.9963330318450927,
+ 1.9988955184173585,
+ 1.9985223587036134,
+ 1.9940103855895996,
+ 1.9969634999084473,
+ 1.9979491520690917,
+ 1.9940481422424317,
+ 1.9955107457733154,
+ 1.9963652723693848,
+ 1.9967138528060913,
+ 1.9949829993438721,
+ 1.9970767431259155,
+ 1.996754500579834,
+ 1.9966884062194825,
+ 1.9968673025131225,
+ 1.9936358183288574,
+ 1.994392073059082,
+ 1.9950825340270997,
+ 1.9949580252838135,
+ 1.9944661192321778,
+ 1.994148543624878,
+ 1.992705733795166,
+ 1.9950622162246705,
+ 1.9951953311538697,
+ 1.9943858066558837,
+ 1.9932064359283448,
+ 1.9938985696411133,
+ 1.992524094696045,
+ 1.9919383232116699,
+ 1.9938886001586915,
+ 1.992321823425293,
+ 1.993851443786621,
+ 1.99483976688385,
+ 1.991985835800171,
+ 1.995015989379883,
+ 1.9928585873413085,
+ 1.9930935015487672,
+ 1.9924035341644286,
+ 1.992943452758789,
+ 1.992181587677002,
+ 1.992508976135254,
+ 1.9930797433471679,
+ 1.9915329122924805,
+ 1.9912273866271972,
+ 1.9927945401000977,
+ 1.991996759376526,
+ 1.992920351409912,
+ 1.99362177444458
+ ],
+ "train_acc": [
+ 0.24312,
+ 0.25066,
+ 0.25642,
+ 0.25462,
+ 0.25868,
+ 0.25902,
+ 0.25752,
+ 0.26142,
+ 0.26284,
+ 0.26284,
+ 0.26276,
+ 0.261,
+ 0.26512,
+ 0.2669,
+ 0.26932,
+ 0.2642,
+ 0.26996,
+ 0.26718,
+ 0.26772,
+ 0.26848,
+ 0.2686,
+ 0.27064,
+ 0.26932,
+ 0.26956,
+ 0.2707,
+ 0.27038,
+ 0.27084,
+ 0.27048,
+ 0.27238,
+ 0.26878,
+ 0.27158,
+ 0.27342,
+ 0.27218,
+ 0.27104,
+ 0.2718,
+ 0.27454,
+ 0.27166,
+ 0.27142,
+ 0.2736,
+ 0.27368,
+ 0.27276,
+ 0.2748,
+ 0.2733,
+ 0.2743,
+ 0.27712,
+ 0.2743,
+ 0.27604,
+ 0.2745,
+ 0.27354,
+ 0.27452,
+ 0.27538,
+ 0.27732,
+ 0.27562,
+ 0.27438,
+ 0.27546,
+ 0.27798,
+ 0.27778,
+ 0.27474,
+ 0.27972,
+ 0.27802,
+ 0.27448,
+ 0.27894,
+ 0.27802,
+ 0.27462,
+ 0.27436,
+ 0.27914,
+ 0.27646,
+ 0.27898,
+ 0.27676,
+ 0.27804,
+ 0.27688,
+ 0.2795,
+ 0.27758,
+ 0.27814,
+ 0.27706,
+ 0.27766,
+ 0.27888,
+ 0.2781,
+ 0.27842,
+ 0.27892,
+ 0.2802,
+ 0.27858,
+ 0.2792,
+ 0.2788,
+ 0.27574,
+ 0.28088,
+ 0.27956,
+ 0.27906,
+ 0.27764,
+ 0.27988,
+ 0.27716,
+ 0.27956,
+ 0.2782,
+ 0.2787,
+ 0.2789,
+ 0.2799,
+ 0.27836,
+ 0.28092,
+ 0.27914,
+ 0.27792
+ ],
+ "test_acc": [
+ 0.2564,
+ 0.2576,
+ 0.2815,
+ 0.2842,
+ 0.2869,
+ 0.2771,
+ 0.2864,
+ 0.2704,
+ 0.2864,
+ 0.2827,
+ 0.3005,
+ 0.2939,
+ 0.2817,
+ 0.2771,
+ 0.3033,
+ 0.2802,
+ 0.2776,
+ 0.3002,
+ 0.3,
+ 0.2773,
+ 0.3036,
+ 0.2858,
+ 0.2917,
+ 0.2964,
+ 0.2956,
+ 0.2996,
+ 0.2972,
+ 0.2926,
+ 0.3109,
+ 0.2826,
+ 0.3046,
+ 0.3077,
+ 0.278,
+ 0.2856,
+ 0.2955,
+ 0.2948,
+ 0.3034,
+ 0.3088,
+ 0.2955,
+ 0.2871,
+ 0.303,
+ 0.3009,
+ 0.2893,
+ 0.2922,
+ 0.3096,
+ 0.3061,
+ 0.2974,
+ 0.3028,
+ 0.2996,
+ 0.2988,
+ 0.3022,
+ 0.3003,
+ 0.3068,
+ 0.2961,
+ 0.2908,
+ 0.2971,
+ 0.3027,
+ 0.2998,
+ 0.3008,
+ 0.3071,
+ 0.2999,
+ 0.2988,
+ 0.2959,
+ 0.2982,
+ 0.3025,
+ 0.295,
+ 0.2934,
+ 0.2965,
+ 0.3014,
+ 0.2975,
+ 0.2893,
+ 0.2951,
+ 0.3003,
+ 0.3061,
+ 0.297,
+ 0.2987,
+ 0.3,
+ 0.3016,
+ 0.2974,
+ 0.303,
+ 0.3003,
+ 0.3043,
+ 0.3029,
+ 0.3004,
+ 0.2992,
+ 0.2983,
+ 0.3017,
+ 0.299,
+ 0.3005,
+ 0.2977,
+ 0.3004,
+ 0.2986,
+ 0.3,
+ 0.2993,
+ 0.2988,
+ 0.2994,
+ 0.2998,
+ 0.2994,
+ 0.2993,
+ 0.2994
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.3915177881717682,
+ 0.00031804549507796764,
+ -0.0005461883265525103,
+ 0.00020462644170038402,
+ -0.0005664663622155786,
+ -0.0008316519670188427
+ ],
+ "perturbation_rho": [
+ -0.011084532365202904,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -4.0885061025619507e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.2046657502651215e-06,
+ 0.0,
+ 9.313225746154785e-10,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -4.0549784898757935e-06,
+ 0.0,
+ 9.313225746154785e-10,
+ 0.0,
+ 9.313225746154785e-10,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 52265.20703125,
+ 1156494592.0,
+ 2602998784.0,
+ 3437189888.0,
+ 3901714432.0,
+ 6607706112.0,
+ 6800756736.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.745953793237277e-07,
+ 2.664374554317561e-10,
+ 2.662880471682172e-10,
+ 2.6685945120341614e-10,
+ 2.6688468102165075e-10,
+ 2.6681562514951906e-10,
+ 2.668691656548816e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 323.8438777567112,
+ "embed.bias": 236.32127317549046,
+ "blocks.0.ln.weight": 9.81678478040185,
+ "blocks.0.w1.weight": 283.8033448856046,
+ "blocks.0.w1.bias": 254.7986676902803,
+ "blocks.0.w2.weight": 459.2312703971922,
+ "blocks.1.ln.weight": 7.9902441845145225,
+ "blocks.1.w1.weight": 303.8032457038149,
+ "blocks.1.w1.bias": 290.75689909603165,
+ "blocks.1.w2.weight": 302.7987642593431,
+ "blocks.2.ln.weight": 7.8083033170497576,
+ "blocks.2.w1.weight": 313.18221523970163,
+ "blocks.2.w1.bias": 288.235969181505,
+ "blocks.2.w2.weight": 301.0694759577726,
+ "blocks.3.ln.weight": 7.457261198270327,
+ "blocks.3.w1.weight": 299.37309231794677,
+ "blocks.3.w1.bias": 276.9324398819875,
+ "blocks.3.w2.weight": 279.5917777002267,
+ "blocks.4.ln.weight": 10.517843332237796,
+ "blocks.4.w1.weight": 436.93554951444736,
+ "blocks.4.w1.bias": 404.54404533704337,
+ "blocks.4.w2.weight": 386.63677119691914,
+ "blocks.5.ln.weight": 7.187391888063363,
+ "blocks.5.w1.weight": 278.782404520272,
+ "blocks.5.w1.bias": 266.36329153767053,
+ "blocks.5.w2.weight": 249.56060346351742,
+ "out_ln.weight": 0.5731338871373126,
+ "out_head.weight": 8.11049488578885,
+ "out_head.bias": 1.1188503504473455
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0280703507995606,
+ 1.9438096570587158,
+ 1.9075839419174194,
+ 1.891309684791565,
+ 1.8722532345199585,
+ 1.8634102462768554,
+ 1.8554693552017212,
+ 1.8489897765350343,
+ 1.846228702659607,
+ 1.8386287690734864,
+ 1.8406133193206786,
+ 1.8387405684661866,
+ 1.8276975162506104,
+ 1.8232349169921875,
+ 1.8223698708724976,
+ 1.821195379638672,
+ 1.8214911682510375,
+ 1.8160597175598145,
+ 1.8229843518066406,
+ 1.813365035133362,
+ 1.8151773623275758,
+ 1.8169539303207398,
+ 1.8105776592254639,
+ 1.8114565746307374,
+ 1.807515641746521,
+ 1.806776870765686,
+ 1.8001547024154663,
+ 1.8010753986358643,
+ 1.7956665130233764,
+ 1.8011931986236571,
+ 1.7943034625625611,
+ 1.791777073059082,
+ 1.791583038711548,
+ 1.7873410034179686,
+ 1.7855097869873047,
+ 1.7860526992416381,
+ 1.7824570555877686,
+ 1.7813435136795044,
+ 1.7782593814086913,
+ 1.77326356174469,
+ 1.7763885403060913,
+ 1.7732242642974854,
+ 1.7696506113052368,
+ 1.7713894509887695,
+ 1.7732664206695556,
+ 1.7675420838165283,
+ 1.763011986732483,
+ 1.7635349313354491,
+ 1.7613904175186157,
+ 1.7604380879974366,
+ 1.7606450772476196,
+ 1.7594652207183838,
+ 1.7573485291290283,
+ 1.7578170404434204,
+ 1.754877228012085,
+ 1.7504150357437134,
+ 1.7527595220565797,
+ 1.7520960317230225,
+ 1.7485238824081422,
+ 1.7481630941772461,
+ 1.7490973169326782,
+ 1.7442448379516602,
+ 1.7454284634399415,
+ 1.7441554889678954,
+ 1.744502698326111,
+ 1.7424594699478149,
+ 1.7411711275482178,
+ 1.737814305076599,
+ 1.7386144577789306,
+ 1.7368740311431885,
+ 1.7391422924804687,
+ 1.737129613571167,
+ 1.7340563317108155,
+ 1.733810231666565,
+ 1.7361049320220947,
+ 1.7318184774398804,
+ 1.7328989402008057,
+ 1.731393734779358,
+ 1.7313611135482787,
+ 1.7314666943740844,
+ 1.7308115518188476,
+ 1.7282487900543213,
+ 1.7280186172485352,
+ 1.7302542147445679,
+ 1.729862018966675,
+ 1.7284494402313233,
+ 1.7274188668060302,
+ 1.72759757938385,
+ 1.728316011276245,
+ 1.7254533053588867,
+ 1.7282261031341553,
+ 1.7274837893676758,
+ 1.7255484241104126,
+ 1.7243229049682618,
+ 1.7256911779022217,
+ 1.7249952478790282,
+ 1.7246788320541382,
+ 1.724236849632263,
+ 1.7243867401885986,
+ 1.7224459433746337
+ ],
+ "train_acc": [
+ 0.26004,
+ 0.2935,
+ 0.3105,
+ 0.31688,
+ 0.32726,
+ 0.3268,
+ 0.33294,
+ 0.33576,
+ 0.33912,
+ 0.34126,
+ 0.3408,
+ 0.33982,
+ 0.34284,
+ 0.34334,
+ 0.34554,
+ 0.3478,
+ 0.34762,
+ 0.34808,
+ 0.34944,
+ 0.35122,
+ 0.35068,
+ 0.35214,
+ 0.35338,
+ 0.35102,
+ 0.35642,
+ 0.35468,
+ 0.35298,
+ 0.35718,
+ 0.35686,
+ 0.3546,
+ 0.3602,
+ 0.36098,
+ 0.35922,
+ 0.35992,
+ 0.36128,
+ 0.36012,
+ 0.36176,
+ 0.3639,
+ 0.36318,
+ 0.36768,
+ 0.36394,
+ 0.36712,
+ 0.36688,
+ 0.36672,
+ 0.36676,
+ 0.36582,
+ 0.37102,
+ 0.36946,
+ 0.37328,
+ 0.37212,
+ 0.36806,
+ 0.37476,
+ 0.3764,
+ 0.37204,
+ 0.37148,
+ 0.37532,
+ 0.37424,
+ 0.37422,
+ 0.37674,
+ 0.37706,
+ 0.37652,
+ 0.37612,
+ 0.37884,
+ 0.37688,
+ 0.37654,
+ 0.37978,
+ 0.37676,
+ 0.37814,
+ 0.3802,
+ 0.38226,
+ 0.3787,
+ 0.38016,
+ 0.38294,
+ 0.38174,
+ 0.37992,
+ 0.38192,
+ 0.38114,
+ 0.38394,
+ 0.38458,
+ 0.38406,
+ 0.38478,
+ 0.38358,
+ 0.38324,
+ 0.38232,
+ 0.38468,
+ 0.3854,
+ 0.3863,
+ 0.38538,
+ 0.38582,
+ 0.38406,
+ 0.38552,
+ 0.38434,
+ 0.3848,
+ 0.38542,
+ 0.38594,
+ 0.38728,
+ 0.38536,
+ 0.3858,
+ 0.38608,
+ 0.386
+ ],
+ "test_acc": [
+ 0.3013,
+ 0.3142,
+ 0.3483,
+ 0.3543,
+ 0.3608,
+ 0.3458,
+ 0.3633,
+ 0.3576,
+ 0.3713,
+ 0.3632,
+ 0.3817,
+ 0.3741,
+ 0.3661,
+ 0.3795,
+ 0.3734,
+ 0.3749,
+ 0.3706,
+ 0.3813,
+ 0.3822,
+ 0.3692,
+ 0.3725,
+ 0.3749,
+ 0.3836,
+ 0.3822,
+ 0.389,
+ 0.3821,
+ 0.3846,
+ 0.3777,
+ 0.3861,
+ 0.3809,
+ 0.3857,
+ 0.3855,
+ 0.3765,
+ 0.3955,
+ 0.3914,
+ 0.3948,
+ 0.3997,
+ 0.3994,
+ 0.3935,
+ 0.3989,
+ 0.3987,
+ 0.4008,
+ 0.3925,
+ 0.3922,
+ 0.4023,
+ 0.4055,
+ 0.3953,
+ 0.4008,
+ 0.4003,
+ 0.3971,
+ 0.3987,
+ 0.3931,
+ 0.4076,
+ 0.4019,
+ 0.4046,
+ 0.3981,
+ 0.4081,
+ 0.4046,
+ 0.4043,
+ 0.406,
+ 0.4118,
+ 0.4076,
+ 0.4057,
+ 0.414,
+ 0.3979,
+ 0.4079,
+ 0.4044,
+ 0.4087,
+ 0.4065,
+ 0.4024,
+ 0.3997,
+ 0.4078,
+ 0.4117,
+ 0.4092,
+ 0.409,
+ 0.406,
+ 0.4132,
+ 0.4133,
+ 0.4066,
+ 0.4131,
+ 0.4083,
+ 0.413,
+ 0.4126,
+ 0.4117,
+ 0.4109,
+ 0.4096,
+ 0.4111,
+ 0.4099,
+ 0.4094,
+ 0.4105,
+ 0.4105,
+ 0.4081,
+ 0.4122,
+ 0.4113,
+ 0.412,
+ 0.4095,
+ 0.41,
+ 0.4114,
+ 0.4111,
+ 0.4117
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.03050209954380989,
+ 0.07341398298740387,
+ -0.030213337391614914,
+ -0.1065329909324646,
+ -0.112009696662426,
+ 0.9969239234924316
+ ],
+ "perturbation_rho": [
+ 0.060943812131881714,
+ 0.02977900393307209,
+ 0.030837206169962883,
+ 0.03361157327890396,
+ 0.003993029240518808,
+ -0.012494717724621296
+ ],
+ "nudging": {
+ "0.001": [
+ -2.1707965061068535e-06,
+ -2.555316314101219e-07,
+ 1.9907020032405853e-08,
+ 1.1094380170106888e-07,
+ 1.1106021702289581e-07,
+ -1.6409903764724731e-06
+ ],
+ "0.003": [
+ -5.929847247898579e-06,
+ -7.075723260641098e-07,
+ 1.384178176522255e-07,
+ 6.816117092967033e-07,
+ 7.138587534427643e-07,
+ -6.536138243973255e-06
+ ],
+ "0.01": [
+ -2.0072446204721928e-05,
+ -2.6080524548888206e-06,
+ 4.3155159801244736e-07,
+ 2.434244379401207e-06,
+ 2.515967935323715e-06,
+ -2.3723463527858257e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 6712.94189453125,
+ 241810.65625,
+ 672440.6875,
+ 1111939.0,
+ 1518122.75,
+ 1746060.875,
+ 545338.8125
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.4450268028886057e-05,
+ 1.2711601584669552e-06,
+ 9.635764399718028e-07,
+ 9.082662018045085e-07,
+ 9.076145488506882e-07,
+ 9.084363341571589e-07,
+ 8.959328283708601e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 52.75751575367878,
+ "embed.bias": 16.290300308868286,
+ "blocks.0.ln.weight": 1.3122960145180722,
+ "blocks.0.w1.weight": 18.35861512742778,
+ "blocks.0.w1.bias": 15.033623290031253,
+ "blocks.0.w2.weight": 59.867189750033205,
+ "blocks.1.ln.weight": 0.9612442862570167,
+ "blocks.1.w1.weight": 19.510654541967284,
+ "blocks.1.w1.bias": 16.988315121645034,
+ "blocks.1.w2.weight": 44.17488250656146,
+ "blocks.2.ln.weight": 0.8653865881813659,
+ "blocks.2.w1.weight": 21.845526237032836,
+ "blocks.2.w1.bias": 20.228308191109985,
+ "blocks.2.w2.weight": 54.8483328488635,
+ "blocks.3.ln.weight": 0.687489668753582,
+ "blocks.3.w1.weight": 21.235696780055214,
+ "blocks.3.w1.bias": 22.153293813124844,
+ "blocks.3.w2.weight": 37.488286386992314,
+ "blocks.4.ln.weight": 0.6826937366325999,
+ "blocks.4.w1.weight": 20.105546147824473,
+ "blocks.4.w1.bias": 21.27470328328852,
+ "blocks.4.w2.weight": 45.57345483069108,
+ "blocks.5.ln.weight": 0.7529120733274255,
+ "blocks.5.w1.weight": 23.26064002188114,
+ "blocks.5.w1.bias": 24.637626686580436,
+ "blocks.5.w2.weight": 40.48651407120801,
+ "out_ln.weight": 0.28684521814071196,
+ "out_head.weight": 5.505824885038432,
+ "out_head.bias": 1.5561206526473979
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 6,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 6
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L6_seed6",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L6_seed7/results_cifar10.json b/results/fa_dfa_d512_L6_seed7/results_cifar10.json
new file mode 100644
index 0000000..1518de5
--- /dev/null
+++ b/results/fa_dfa_d512_L6_seed7/results_cifar10.json
@@ -0,0 +1,837 @@
+{
+ "7": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0644660066223146,
+ 2.0393594007873537,
+ 2.0298658728027346,
+ 2.027508737716675,
+ 2.021048464202881,
+ 2.0180776037979125,
+ 2.016010649871826,
+ 2.0145508810424806,
+ 2.0100066609954834,
+ 2.006050896835327,
+ 2.007042580833435,
+ 2.0072416454315185,
+ 2.0022777138519285,
+ 2.003505378875732,
+ 2.0038011445617676,
+ 2.0008850508880616,
+ 1.9970258136749268,
+ 1.9960380639648438,
+ 1.9933054499816893,
+ 1.9941288608551024,
+ 1.99546089138031,
+ 1.9936006065368652,
+ 1.994488134994507,
+ 1.9934685417938232,
+ 1.9926122019195556,
+ 1.9915543088531493,
+ 1.9883341995239259,
+ 1.9901361241149902,
+ 1.988699857711792,
+ 1.988091276512146,
+ 1.9886641019439697,
+ 1.9861467250061036,
+ 1.9892435473251342,
+ 1.986944725379944,
+ 1.9878197789001464,
+ 1.9859363149261475,
+ 1.9881446804046632,
+ 1.98439239112854,
+ 1.9870981986999512,
+ 1.9842346769714356,
+ 1.9852807202529907,
+ 1.9832774765396117,
+ 1.9852560285949707,
+ 1.985099944229126,
+ 1.984175789642334,
+ 1.9816588903808594,
+ 1.9819236499404906,
+ 1.9812517197418213,
+ 1.9811725481414795,
+ 1.9830146681213379,
+ 1.9799175099945068,
+ 1.9814645065689087,
+ 1.9808358678817748,
+ 1.9797256357192994,
+ 1.9818459476470947,
+ 1.980439381980896,
+ 1.9798791062164307,
+ 1.9782852731323242,
+ 1.9786473375701905,
+ 1.9781282710266113,
+ 1.9775994555664063,
+ 1.9782113193511963,
+ 1.9787678694915771,
+ 1.9774463504028321,
+ 1.9799045008850098,
+ 1.9775051036834717,
+ 1.977933309020996,
+ 1.9787949396514892,
+ 1.9781452545547484,
+ 1.9794640335845948,
+ 1.976175922241211,
+ 1.9797193655776977,
+ 1.9766233335113526,
+ 1.9788971273422242,
+ 1.9771730890274049,
+ 1.9750644179916381,
+ 1.9780011644744873,
+ 1.978542350692749,
+ 1.9770074579238892,
+ 1.9780642249298095,
+ 1.9761155870819092,
+ 1.9755325689697265,
+ 1.976641792564392,
+ 1.976419013442993,
+ 1.9755548094177247,
+ 1.9737617702484131,
+ 1.9747001873397827,
+ 1.974268023376465,
+ 1.974451787185669,
+ 1.9748253519439698,
+ 1.9772220765686035,
+ 1.9760730503845214,
+ 1.9763396139526368,
+ 1.9743848707580567,
+ 1.9757111903381348,
+ 1.9740122735214234,
+ 1.9744470761489867,
+ 1.9736765104675293,
+ 1.9756489211654662,
+ 1.975818688316345
+ ],
+ "train_acc": [
+ 0.23738,
+ 0.24404,
+ 0.25038,
+ 0.25232,
+ 0.25616,
+ 0.25784,
+ 0.2552,
+ 0.2584,
+ 0.26018,
+ 0.2647,
+ 0.26414,
+ 0.26396,
+ 0.26708,
+ 0.26626,
+ 0.2656,
+ 0.26712,
+ 0.26824,
+ 0.2697,
+ 0.27158,
+ 0.26954,
+ 0.27018,
+ 0.27384,
+ 0.27396,
+ 0.2686,
+ 0.27122,
+ 0.27376,
+ 0.2733,
+ 0.2745,
+ 0.276,
+ 0.27458,
+ 0.27622,
+ 0.27596,
+ 0.27604,
+ 0.2743,
+ 0.27596,
+ 0.27608,
+ 0.2749,
+ 0.27588,
+ 0.27448,
+ 0.27836,
+ 0.27578,
+ 0.27956,
+ 0.27798,
+ 0.27912,
+ 0.27864,
+ 0.27886,
+ 0.27796,
+ 0.2803,
+ 0.27986,
+ 0.27928,
+ 0.28028,
+ 0.28038,
+ 0.27754,
+ 0.2806,
+ 0.28016,
+ 0.27942,
+ 0.28168,
+ 0.28164,
+ 0.28036,
+ 0.2808,
+ 0.28056,
+ 0.2808,
+ 0.28214,
+ 0.283,
+ 0.28266,
+ 0.28086,
+ 0.28166,
+ 0.28236,
+ 0.28182,
+ 0.28274,
+ 0.28322,
+ 0.28242,
+ 0.28414,
+ 0.28278,
+ 0.2849,
+ 0.28342,
+ 0.28298,
+ 0.2835,
+ 0.28288,
+ 0.28306,
+ 0.28192,
+ 0.284,
+ 0.28374,
+ 0.2818,
+ 0.284,
+ 0.28564,
+ 0.28466,
+ 0.2856,
+ 0.28366,
+ 0.28388,
+ 0.28242,
+ 0.28532,
+ 0.2828,
+ 0.28576,
+ 0.28524,
+ 0.28524,
+ 0.28596,
+ 0.28682,
+ 0.28318,
+ 0.28228
+ ],
+ "test_acc": [
+ 0.2747,
+ 0.2448,
+ 0.2761,
+ 0.275,
+ 0.2784,
+ 0.2789,
+ 0.2712,
+ 0.2923,
+ 0.2752,
+ 0.2898,
+ 0.2816,
+ 0.2913,
+ 0.2903,
+ 0.2968,
+ 0.2885,
+ 0.2852,
+ 0.2949,
+ 0.2793,
+ 0.2989,
+ 0.2934,
+ 0.2914,
+ 0.2923,
+ 0.2946,
+ 0.3013,
+ 0.3014,
+ 0.2906,
+ 0.3031,
+ 0.297,
+ 0.2962,
+ 0.2942,
+ 0.3013,
+ 0.3006,
+ 0.3016,
+ 0.3082,
+ 0.311,
+ 0.2978,
+ 0.2803,
+ 0.299,
+ 0.3028,
+ 0.2966,
+ 0.3076,
+ 0.2768,
+ 0.3014,
+ 0.2929,
+ 0.3045,
+ 0.2953,
+ 0.2998,
+ 0.3013,
+ 0.3052,
+ 0.2902,
+ 0.3055,
+ 0.2939,
+ 0.3027,
+ 0.2966,
+ 0.3039,
+ 0.3032,
+ 0.294,
+ 0.3002,
+ 0.2989,
+ 0.307,
+ 0.3064,
+ 0.3031,
+ 0.3103,
+ 0.3015,
+ 0.2935,
+ 0.3131,
+ 0.2974,
+ 0.2958,
+ 0.3098,
+ 0.308,
+ 0.301,
+ 0.3079,
+ 0.306,
+ 0.3089,
+ 0.3077,
+ 0.304,
+ 0.3059,
+ 0.302,
+ 0.3017,
+ 0.3008,
+ 0.3065,
+ 0.3071,
+ 0.3039,
+ 0.3027,
+ 0.3065,
+ 0.3079,
+ 0.3051,
+ 0.305,
+ 0.3068,
+ 0.3084,
+ 0.3043,
+ 0.3041,
+ 0.3055,
+ 0.3043,
+ 0.3066,
+ 0.3062,
+ 0.3059,
+ 0.3066,
+ 0.3066,
+ 0.3065
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.4079825282096863,
+ 0.00017449544975534081,
+ -0.0002950271009467542,
+ -0.00047933883615769446,
+ 0.0008355096215382218,
+ -0.00013302025035955012
+ ],
+ "perturbation_rho": [
+ 0.005086352117359638,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -3.762543201446533e-07,
+ -2.7939677238464355e-09,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.4076940715312958e-06,
+ -2.7939677238464355e-09,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -4.495494067668915e-06,
+ -9.313225746154785e-10,
+ 0.0,
+ 2.7939677238464355e-09,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 53469.4921875,
+ 722511616.0,
+ 2513154304.0,
+ 4196997888.0,
+ 4850079232.0,
+ 7170064896.0,
+ 10128478208.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 3.0659566618851386e-07,
+ 2.1547569284408041e-10,
+ 2.1372952019316216e-10,
+ 2.1350407553022421e-10,
+ 2.1338901479150962e-10,
+ 2.1328670773979042e-10,
+ 2.1351549694959004e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 319.2060524889922,
+ "embed.bias": 168.91177351992346,
+ "blocks.0.ln.weight": 10.20084372934541,
+ "blocks.0.w1.weight": 259.3585707340092,
+ "blocks.0.w1.bias": 204.8860780612092,
+ "blocks.0.w2.weight": 474.58360990249685,
+ "blocks.1.ln.weight": 8.524169911482344,
+ "blocks.1.w1.weight": 317.8100676412464,
+ "blocks.1.w1.bias": 286.625861663325,
+ "blocks.1.w2.weight": 337.7969456452116,
+ "blocks.2.ln.weight": 8.931831095995939,
+ "blocks.2.w1.weight": 362.27088509151724,
+ "blocks.2.w1.bias": 323.3719624301686,
+ "blocks.2.w2.weight": 340.6308866629528,
+ "blocks.3.ln.weight": 7.651484669125739,
+ "blocks.3.w1.weight": 309.48260206501516,
+ "blocks.3.w1.bias": 279.96119268361133,
+ "blocks.3.w2.weight": 277.85914995873645,
+ "blocks.4.ln.weight": 10.210800501008027,
+ "blocks.4.w1.weight": 421.6457628353572,
+ "blocks.4.w1.bias": 399.4367334675059,
+ "blocks.4.w2.weight": 412.14310110719924,
+ "blocks.5.ln.weight": 11.351197134890906,
+ "blocks.5.w1.weight": 456.9953588488586,
+ "blocks.5.w1.bias": 441.75351526808663,
+ "blocks.5.w2.weight": 455.08343692703954,
+ "out_ln.weight": 0.6738960876175676,
+ "out_head.weight": 9.13993717128016,
+ "out_head.bias": 0.8636649912030224
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.035428274383545,
+ 1.9543501361465454,
+ 1.921069111099243,
+ 1.9030546938323976,
+ 1.8895003170013427,
+ 1.884778947982788,
+ 1.8827891104888916,
+ 1.8784568830108643,
+ 1.86878865776062,
+ 1.860254260787964,
+ 1.8565264916229247,
+ 1.8507944106674195,
+ 1.8448910778808594,
+ 1.8358412002563476,
+ 1.8325608110809326,
+ 1.8244803615570069,
+ 1.8170998212051392,
+ 1.8116112271499634,
+ 1.8029299909210206,
+ 1.798451124572754,
+ 1.796333960533142,
+ 1.7877936901092528,
+ 1.7880024145889282,
+ 1.783990569152832,
+ 1.773157628097534,
+ 1.7710246792984008,
+ 1.7636069051742553,
+ 1.7616489712142944,
+ 1.7549441582870484,
+ 1.754057723007202,
+ 1.7550610286712647,
+ 1.7466887496185304,
+ 1.7430288970565795,
+ 1.7387338048553467,
+ 1.7371670000839234,
+ 1.7317205599975587,
+ 1.733508980026245,
+ 1.7296288764190675,
+ 1.7255350440216064,
+ 1.7244919805908203,
+ 1.7210525772857665,
+ 1.7164554761123658,
+ 1.716478593711853,
+ 1.717660334854126,
+ 1.7179905541992186,
+ 1.7098763641738892,
+ 1.70570148979187,
+ 1.7075587792587281,
+ 1.7068590453720094,
+ 1.70389818359375,
+ 1.6986094250488282,
+ 1.6995252443695068,
+ 1.6985104891967773,
+ 1.6972654037094117,
+ 1.695931503982544,
+ 1.6940053173065186,
+ 1.6959078594589234,
+ 1.6945061222076416,
+ 1.6924745792388916,
+ 1.6908132946777343,
+ 1.6884509930038452,
+ 1.6876910009384156,
+ 1.684639096031189,
+ 1.6838580658721924,
+ 1.6875753708267212,
+ 1.6831429992294311,
+ 1.6840416891479493,
+ 1.6803303512573242,
+ 1.6848778852081299,
+ 1.6803966582870484,
+ 1.676663829689026,
+ 1.6796003302383422,
+ 1.677098304748535,
+ 1.678186548423767,
+ 1.675721337814331,
+ 1.671638519821167,
+ 1.6730699136734009,
+ 1.673575770187378,
+ 1.6733202564239502,
+ 1.671583014907837,
+ 1.6726769936370849,
+ 1.6701723910522461,
+ 1.6703911999893188,
+ 1.6733231628417968,
+ 1.6666682750701904,
+ 1.6670018822860717,
+ 1.6669657723999023,
+ 1.6693463718795776,
+ 1.6684244076156616,
+ 1.6675063357162476,
+ 1.668329490966797,
+ 1.6683633364486694,
+ 1.6668814519882202,
+ 1.6666266248321533,
+ 1.6659794528198242,
+ 1.6643282043075562,
+ 1.666822133102417,
+ 1.6650706377410889,
+ 1.6686715842056274,
+ 1.6671581310653687
+ ],
+ "train_acc": [
+ 0.252,
+ 0.28866,
+ 0.30228,
+ 0.31138,
+ 0.31636,
+ 0.32068,
+ 0.32168,
+ 0.32222,
+ 0.32708,
+ 0.32854,
+ 0.33246,
+ 0.33542,
+ 0.33984,
+ 0.34196,
+ 0.34244,
+ 0.34556,
+ 0.34844,
+ 0.35176,
+ 0.35512,
+ 0.35748,
+ 0.35646,
+ 0.3622,
+ 0.35886,
+ 0.36206,
+ 0.36288,
+ 0.36684,
+ 0.37276,
+ 0.37276,
+ 0.37378,
+ 0.37246,
+ 0.37274,
+ 0.37874,
+ 0.37508,
+ 0.37808,
+ 0.3804,
+ 0.38156,
+ 0.37756,
+ 0.37946,
+ 0.38278,
+ 0.38208,
+ 0.38226,
+ 0.3882,
+ 0.38378,
+ 0.38596,
+ 0.38588,
+ 0.38736,
+ 0.38724,
+ 0.38704,
+ 0.38846,
+ 0.387,
+ 0.39326,
+ 0.39342,
+ 0.3919,
+ 0.3942,
+ 0.39288,
+ 0.39274,
+ 0.3934,
+ 0.3913,
+ 0.39564,
+ 0.39482,
+ 0.39314,
+ 0.39526,
+ 0.39544,
+ 0.3977,
+ 0.39616,
+ 0.39674,
+ 0.39592,
+ 0.39896,
+ 0.39744,
+ 0.3976,
+ 0.39928,
+ 0.39962,
+ 0.39968,
+ 0.40054,
+ 0.39988,
+ 0.40192,
+ 0.40246,
+ 0.40078,
+ 0.40022,
+ 0.39994,
+ 0.40256,
+ 0.40522,
+ 0.40094,
+ 0.40278,
+ 0.40524,
+ 0.40424,
+ 0.4035,
+ 0.40222,
+ 0.40502,
+ 0.40334,
+ 0.40314,
+ 0.40426,
+ 0.40582,
+ 0.40816,
+ 0.40362,
+ 0.40492,
+ 0.40314,
+ 0.40624,
+ 0.40386,
+ 0.4031
+ ],
+ "test_acc": [
+ 0.2994,
+ 0.315,
+ 0.3222,
+ 0.3432,
+ 0.3472,
+ 0.3579,
+ 0.3389,
+ 0.357,
+ 0.3572,
+ 0.3638,
+ 0.3595,
+ 0.3655,
+ 0.3683,
+ 0.3749,
+ 0.3736,
+ 0.3761,
+ 0.379,
+ 0.3672,
+ 0.3847,
+ 0.3897,
+ 0.3818,
+ 0.3922,
+ 0.3878,
+ 0.3942,
+ 0.395,
+ 0.3922,
+ 0.3942,
+ 0.3994,
+ 0.393,
+ 0.3938,
+ 0.3987,
+ 0.3997,
+ 0.3985,
+ 0.4053,
+ 0.406,
+ 0.4058,
+ 0.4051,
+ 0.4097,
+ 0.4081,
+ 0.4058,
+ 0.4093,
+ 0.401,
+ 0.4153,
+ 0.4048,
+ 0.4131,
+ 0.4069,
+ 0.404,
+ 0.414,
+ 0.412,
+ 0.414,
+ 0.4144,
+ 0.4179,
+ 0.4122,
+ 0.4187,
+ 0.4136,
+ 0.4228,
+ 0.414,
+ 0.4186,
+ 0.418,
+ 0.4167,
+ 0.4195,
+ 0.4178,
+ 0.4212,
+ 0.4175,
+ 0.4217,
+ 0.4212,
+ 0.4164,
+ 0.4237,
+ 0.4199,
+ 0.4199,
+ 0.4209,
+ 0.4234,
+ 0.4238,
+ 0.4226,
+ 0.4199,
+ 0.4251,
+ 0.4239,
+ 0.4209,
+ 0.4222,
+ 0.4257,
+ 0.423,
+ 0.424,
+ 0.4253,
+ 0.4242,
+ 0.4253,
+ 0.4239,
+ 0.4255,
+ 0.4247,
+ 0.4241,
+ 0.4253,
+ 0.4256,
+ 0.4259,
+ 0.4276,
+ 0.426,
+ 0.4246,
+ 0.4258,
+ 0.4269,
+ 0.4251,
+ 0.4254,
+ 0.4253
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.05678346008062363,
+ 0.06278860569000244,
+ -0.06489825248718262,
+ -0.13550668954849243,
+ -0.030537210404872894,
+ 0.9970009326934814
+ ],
+ "perturbation_rho": [
+ 0.021435417234897614,
+ -0.006943210028111935,
+ 0.002432417357340455,
+ -0.022404946386814117,
+ 0.010333601385354996,
+ -0.011944804340600967
+ ],
+ "nudging": {
+ "0.001": [
+ -6.006448529660702e-06,
+ -6.604241207242012e-07,
+ 6.123445928096771e-08,
+ 1.4808028936386108e-07,
+ 1.4319084584712982e-08,
+ -1.7423881217837334e-06
+ ],
+ "0.003": [
+ -1.811189576983452e-05,
+ -1.56438909471035e-06,
+ 3.5529956221580505e-07,
+ 8.135102689266205e-07,
+ 1.6961712390184402e-07,
+ -6.420654244720936e-06
+ ],
+ "0.01": [
+ -6.019952706992626e-05,
+ -4.958710633218288e-06,
+ 1.4510005712509155e-06,
+ 2.9762741178274155e-06,
+ 6.504124030470848e-07,
+ -2.2383523173630238e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 4713.947265625,
+ 59263.7578125,
+ 803386.0625,
+ 994639.875,
+ 1492731.0,
+ 1505302.75,
+ 597029.1875
+ ],
+ "bp_grad_norms_per_layer": [
+ 3.536270742188208e-05,
+ 2.4848127395671327e-06,
+ 9.235278071173525e-07,
+ 9.185976637127169e-07,
+ 9.248900596503518e-07,
+ 9.24963046600169e-07,
+ 9.076006222130673e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 37.15779008870908,
+ "embed.bias": 18.146058604345335,
+ "blocks.0.ln.weight": 1.0848731333476576,
+ "blocks.0.w1.weight": 14.826104871554262,
+ "blocks.0.w1.bias": 11.802494272115224,
+ "blocks.0.w2.weight": 46.31577951436631,
+ "blocks.1.ln.weight": 0.9864104228146627,
+ "blocks.1.w1.weight": 20.750805358036015,
+ "blocks.1.w1.bias": 18.84914683848346,
+ "blocks.1.w2.weight": 45.31226900575547,
+ "blocks.2.ln.weight": 0.6561709772918677,
+ "blocks.2.w1.weight": 17.738489694580174,
+ "blocks.2.w1.bias": 16.431735764543493,
+ "blocks.2.w2.weight": 39.339953277678816,
+ "blocks.3.ln.weight": 0.5804518445695007,
+ "blocks.3.w1.weight": 19.985841730818343,
+ "blocks.3.w1.bias": 21.371168400070296,
+ "blocks.3.w2.weight": 31.937812891223096,
+ "blocks.4.ln.weight": 0.634509772716372,
+ "blocks.4.w1.weight": 17.17337766921684,
+ "blocks.4.w1.bias": 15.654490632053776,
+ "blocks.4.w2.weight": 45.84886660282379,
+ "blocks.5.ln.weight": 0.6629717085902781,
+ "blocks.5.w1.weight": 20.18167535513518,
+ "blocks.5.w1.bias": 21.32865390385067,
+ "blocks.5.w2.weight": 40.56800371651544,
+ "out_ln.weight": 0.3071358282526009,
+ "out_head.weight": 5.834156740127778,
+ "out_head.bias": 1.541462699862782
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 6,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 7
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L6_seed7",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L6_seed8/results_cifar10.json b/results/fa_dfa_d512_L6_seed8/results_cifar10.json
new file mode 100644
index 0000000..120e901
--- /dev/null
+++ b/results/fa_dfa_d512_L6_seed8/results_cifar10.json
@@ -0,0 +1,837 @@
+{
+ "8": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.080832463722229,
+ 2.048995506820679,
+ 2.0368573234558105,
+ 2.0319985338974,
+ 2.0285650724029543,
+ 2.0264119197082517,
+ 2.0246589180755614,
+ 2.0194223052597047,
+ 2.023226359863281,
+ 2.0190831980895996,
+ 2.0185574532699584,
+ 2.0163141152572632,
+ 2.0120892976379396,
+ 2.0131693548965455,
+ 2.0107243951416014,
+ 2.0126246429443357,
+ 2.0099691207504273,
+ 2.007364001235962,
+ 2.009500975265503,
+ 2.005168029327393,
+ 2.0078421032333376,
+ 2.005056644668579,
+ 2.003016718902588,
+ 2.002305891647339,
+ 2.004946018447876,
+ 2.0044735367202757,
+ 2.0007521640014647,
+ 2.0023752281188965,
+ 2.002063782119751,
+ 1.9980181777954102,
+ 1.999666137161255,
+ 2.0014348561096194,
+ 1.9975938603973389,
+ 1.997772049255371,
+ 1.9968431232452393,
+ 1.9957593490600587,
+ 1.9965335370254516,
+ 1.9956621952819824,
+ 1.9998830209732055,
+ 1.9981224115371705,
+ 1.9981820908355712,
+ 1.9963909757995606,
+ 1.9932935749816894,
+ 1.9949091632843017,
+ 1.9948957403564453,
+ 1.9962669714736938,
+ 1.9956311498641968,
+ 1.993200020980835,
+ 1.9959270191192626,
+ 1.9959917138671874,
+ 1.99261492729187,
+ 1.9928764194107056,
+ 1.9886184701538085,
+ 1.9945403707504272,
+ 1.9950171528625489,
+ 1.9918433042144776,
+ 1.9921051956939697,
+ 1.9939103940582275,
+ 1.9926260836791991,
+ 1.9888539260864257,
+ 1.9864782820129394,
+ 1.9928740283203126,
+ 1.9936177950286864,
+ 1.9908017050933837,
+ 1.9916553665924073,
+ 1.991268727722168,
+ 1.9904707485198974,
+ 1.990502048187256,
+ 1.9922669997406006,
+ 1.990579651412964,
+ 1.989777621154785,
+ 1.9894770226669312,
+ 1.9896350140762329,
+ 1.9901242208480836,
+ 1.9889081002426148,
+ 1.9884048733520507,
+ 1.991174902420044,
+ 1.9889308002090453,
+ 1.9896702439880372,
+ 1.9888165955352783,
+ 1.9881730316925048,
+ 1.9870906281280518,
+ 1.9875952992248536,
+ 1.987178568344116,
+ 1.98706350440979,
+ 1.987743454208374,
+ 1.9882577419281007,
+ 1.98791315574646,
+ 1.9878161795806886,
+ 1.9868540209197998,
+ 1.9870264992523194,
+ 1.9860009889984132,
+ 1.9875505159759521,
+ 1.9869904022979736,
+ 1.9855010274887086,
+ 1.9865515293884277,
+ 1.9860705053710936,
+ 1.9874219760131835,
+ 1.9881664807128907,
+ 1.9873311661148072
+ ],
+ "train_acc": [
+ 0.22856,
+ 0.2384,
+ 0.24456,
+ 0.24494,
+ 0.24732,
+ 0.25098,
+ 0.24904,
+ 0.2523,
+ 0.25028,
+ 0.2514,
+ 0.25612,
+ 0.25722,
+ 0.25858,
+ 0.25916,
+ 0.26012,
+ 0.26044,
+ 0.26232,
+ 0.26452,
+ 0.26066,
+ 0.26248,
+ 0.2637,
+ 0.26254,
+ 0.26474,
+ 0.26712,
+ 0.2677,
+ 0.26322,
+ 0.26788,
+ 0.26452,
+ 0.26682,
+ 0.26908,
+ 0.2686,
+ 0.26734,
+ 0.27066,
+ 0.26914,
+ 0.26942,
+ 0.27018,
+ 0.26882,
+ 0.2706,
+ 0.26786,
+ 0.26876,
+ 0.27074,
+ 0.27,
+ 0.27148,
+ 0.27242,
+ 0.27346,
+ 0.27144,
+ 0.27246,
+ 0.27478,
+ 0.2713,
+ 0.27238,
+ 0.2732,
+ 0.27598,
+ 0.27788,
+ 0.27462,
+ 0.27216,
+ 0.27474,
+ 0.27372,
+ 0.27488,
+ 0.2751,
+ 0.27356,
+ 0.27528,
+ 0.2739,
+ 0.2744,
+ 0.27592,
+ 0.27666,
+ 0.27542,
+ 0.2749,
+ 0.27564,
+ 0.27486,
+ 0.27662,
+ 0.27766,
+ 0.27678,
+ 0.27682,
+ 0.27618,
+ 0.27544,
+ 0.2767,
+ 0.27538,
+ 0.27816,
+ 0.27422,
+ 0.27562,
+ 0.2772,
+ 0.27786,
+ 0.27924,
+ 0.2775,
+ 0.2789,
+ 0.27834,
+ 0.27724,
+ 0.27724,
+ 0.27694,
+ 0.27716,
+ 0.27862,
+ 0.27786,
+ 0.27676,
+ 0.2771,
+ 0.278,
+ 0.27708,
+ 0.27678,
+ 0.27732,
+ 0.27858,
+ 0.27724
+ ],
+ "test_acc": [
+ 0.25,
+ 0.2553,
+ 0.2484,
+ 0.2586,
+ 0.2481,
+ 0.2619,
+ 0.2688,
+ 0.2577,
+ 0.2807,
+ 0.2806,
+ 0.2671,
+ 0.2735,
+ 0.2663,
+ 0.272,
+ 0.2824,
+ 0.2808,
+ 0.2859,
+ 0.2837,
+ 0.282,
+ 0.2895,
+ 0.2788,
+ 0.2787,
+ 0.275,
+ 0.2726,
+ 0.283,
+ 0.2729,
+ 0.2866,
+ 0.2909,
+ 0.2806,
+ 0.2892,
+ 0.2818,
+ 0.2915,
+ 0.288,
+ 0.2928,
+ 0.2895,
+ 0.3015,
+ 0.2906,
+ 0.2869,
+ 0.2936,
+ 0.2929,
+ 0.2996,
+ 0.2852,
+ 0.295,
+ 0.284,
+ 0.2772,
+ 0.302,
+ 0.278,
+ 0.2875,
+ 0.2899,
+ 0.3026,
+ 0.2806,
+ 0.2945,
+ 0.2936,
+ 0.298,
+ 0.2945,
+ 0.2987,
+ 0.2926,
+ 0.2891,
+ 0.2897,
+ 0.2928,
+ 0.2947,
+ 0.2867,
+ 0.2913,
+ 0.2912,
+ 0.2907,
+ 0.2955,
+ 0.2948,
+ 0.2955,
+ 0.2934,
+ 0.2874,
+ 0.2917,
+ 0.2904,
+ 0.2907,
+ 0.2893,
+ 0.2937,
+ 0.2975,
+ 0.2948,
+ 0.2938,
+ 0.2921,
+ 0.2903,
+ 0.2921,
+ 0.2932,
+ 0.2969,
+ 0.2915,
+ 0.294,
+ 0.2947,
+ 0.2891,
+ 0.2975,
+ 0.2948,
+ 0.2956,
+ 0.2919,
+ 0.2948,
+ 0.2936,
+ 0.2931,
+ 0.293,
+ 0.2934,
+ 0.2934,
+ 0.294,
+ 0.2937,
+ 0.2937
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.37266281247138977,
+ -0.00045336512266658247,
+ -0.00016533058078493923,
+ -0.00022915060981176794,
+ -9.622798825148493e-05,
+ -0.0005005986895412207
+ ],
+ "perturbation_rho": [
+ -0.022135162726044655,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -4.153698682785034e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.184176653623581e-06,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -4.217028617858887e-06,
+ 9.313225746154785e-10,
+ 0.0,
+ 0.0,
+ -4.6566128730773926e-09,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 50847.30078125,
+ 848742848.0,
+ 4660130304.0,
+ 4956724224.0,
+ 5474357248.0,
+ 5779449344.0,
+ 6897274368.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 3.144729134874069e-07,
+ 2.3805521442987754e-10,
+ 2.377963659316862e-10,
+ 2.3782173452779887e-10,
+ 2.382432029435222e-10,
+ 2.381181640753738e-10,
+ 2.3811083660341126e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 319.35155723550326,
+ "embed.bias": 248.60053068216845,
+ "blocks.0.ln.weight": 9.427908278003652,
+ "blocks.0.w1.weight": 267.21029834357404,
+ "blocks.0.w1.bias": 240.44244412845688,
+ "blocks.0.w2.weight": 467.22266822391686,
+ "blocks.1.ln.weight": 9.176869505684051,
+ "blocks.1.w1.weight": 396.8672687385826,
+ "blocks.1.w1.bias": 384.3213865917358,
+ "blocks.1.w2.weight": 392.8080177067534,
+ "blocks.2.ln.weight": 6.748169794145516,
+ "blocks.2.w1.weight": 260.5475963025436,
+ "blocks.2.w1.bias": 233.50388419740455,
+ "blocks.2.w2.weight": 243.9744268156159,
+ "blocks.3.ln.weight": 8.622995843562022,
+ "blocks.3.w1.weight": 331.24292230819924,
+ "blocks.3.w1.bias": 315.5877913098073,
+ "blocks.3.w2.weight": 298.8313159517988,
+ "blocks.4.ln.weight": 8.025346777953455,
+ "blocks.4.w1.weight": 289.7124732778298,
+ "blocks.4.w1.bias": 265.5485861970709,
+ "blocks.4.w2.weight": 262.14080086870655,
+ "blocks.5.ln.weight": 9.46176669645325,
+ "blocks.5.w1.weight": 364.3981229248546,
+ "blocks.5.w1.bias": 349.70853696261963,
+ "blocks.5.w2.weight": 357.5708723169573,
+ "out_ln.weight": 0.5682786122526117,
+ "out_head.weight": 8.418164907479396,
+ "out_head.bias": 0.662517650973914
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0414198583984375,
+ 1.9522941863250733,
+ 1.912906623916626,
+ 1.8914309909057616,
+ 1.8828500344848633,
+ 1.8722443673706055,
+ 1.867602448654175,
+ 1.856888031578064,
+ 1.8552497367095948,
+ 1.843837871170044,
+ 1.8456551111602784,
+ 1.8392053551864624,
+ 1.832319408721924,
+ 1.8315165106582643,
+ 1.8281906174468994,
+ 1.8275285326766968,
+ 1.823442045288086,
+ 1.8223411087799073,
+ 1.8216783404922485,
+ 1.8156820178604125,
+ 1.8176694821166992,
+ 1.8109132452392578,
+ 1.8122728036880493,
+ 1.8086374733734132,
+ 1.8135607794189452,
+ 1.807762541885376,
+ 1.8036900131225586,
+ 1.8017769944000244,
+ 1.8010728991699219,
+ 1.7973168952178955,
+ 1.792228649635315,
+ 1.7974130847549439,
+ 1.7889769022369384,
+ 1.78288419921875,
+ 1.7853075231552125,
+ 1.7797698288726806,
+ 1.7783561060333253,
+ 1.7755371138763427,
+ 1.7767201525115968,
+ 1.7773720627593994,
+ 1.7749587761688232,
+ 1.7682355377960206,
+ 1.7642274634552002,
+ 1.7686031677627563,
+ 1.763687038230896,
+ 1.7665823318862914,
+ 1.7619021569824218,
+ 1.7553145666885377,
+ 1.757571950340271,
+ 1.7612224188995362,
+ 1.7522613665390014,
+ 1.7504500290298461,
+ 1.7475368619537353,
+ 1.7495313736343383,
+ 1.7496113208389281,
+ 1.7413074100494386,
+ 1.7437634180450439,
+ 1.7434646717071534,
+ 1.746056427268982,
+ 1.740490657081604,
+ 1.7333746536254884,
+ 1.7381776495742798,
+ 1.73842948387146,
+ 1.7355398804092408,
+ 1.7360767101287842,
+ 1.7332445461273194,
+ 1.732193847579956,
+ 1.730364631652832,
+ 1.732538067970276,
+ 1.7276911224746705,
+ 1.7270672924804686,
+ 1.7267360440826416,
+ 1.725644203414917,
+ 1.7283870937347412,
+ 1.72088895652771,
+ 1.7216768536376954,
+ 1.723486647377014,
+ 1.7211247494125366,
+ 1.7208690827178954,
+ 1.7219977252578735,
+ 1.7204493698120118,
+ 1.7233893868255614,
+ 1.7198170611190795,
+ 1.7190866617202758,
+ 1.7203575790405274,
+ 1.7177474556350707,
+ 1.7174966396713256,
+ 1.7169412566375732,
+ 1.714385998878479,
+ 1.7148360108184815,
+ 1.7158790673446656,
+ 1.7134223749542237,
+ 1.714623501663208,
+ 1.7133040780639648,
+ 1.715691188583374,
+ 1.7187325539398193,
+ 1.7134061930084228,
+ 1.713903324661255,
+ 1.7177725345230102,
+ 1.7178887582397462
+ ],
+ "train_acc": [
+ 0.24766,
+ 0.28764,
+ 0.30724,
+ 0.31554,
+ 0.31774,
+ 0.32862,
+ 0.32412,
+ 0.32998,
+ 0.33022,
+ 0.33776,
+ 0.33938,
+ 0.33914,
+ 0.3422,
+ 0.33992,
+ 0.34394,
+ 0.34276,
+ 0.3413,
+ 0.34586,
+ 0.347,
+ 0.34834,
+ 0.34908,
+ 0.35144,
+ 0.35026,
+ 0.3513,
+ 0.34898,
+ 0.35394,
+ 0.35424,
+ 0.35282,
+ 0.3581,
+ 0.35672,
+ 0.35968,
+ 0.35698,
+ 0.36002,
+ 0.36412,
+ 0.36052,
+ 0.36534,
+ 0.36372,
+ 0.36502,
+ 0.36444,
+ 0.36414,
+ 0.36688,
+ 0.36802,
+ 0.37136,
+ 0.3681,
+ 0.3721,
+ 0.36958,
+ 0.37192,
+ 0.37408,
+ 0.37342,
+ 0.37124,
+ 0.37412,
+ 0.37666,
+ 0.37718,
+ 0.3741,
+ 0.37748,
+ 0.37834,
+ 0.3806,
+ 0.37612,
+ 0.3775,
+ 0.37774,
+ 0.38072,
+ 0.38068,
+ 0.38082,
+ 0.37998,
+ 0.382,
+ 0.38172,
+ 0.38358,
+ 0.3851,
+ 0.38346,
+ 0.38376,
+ 0.38542,
+ 0.38474,
+ 0.38684,
+ 0.38692,
+ 0.3899,
+ 0.38944,
+ 0.38642,
+ 0.38598,
+ 0.38606,
+ 0.38656,
+ 0.38402,
+ 0.38854,
+ 0.38994,
+ 0.38908,
+ 0.3902,
+ 0.38896,
+ 0.39004,
+ 0.39054,
+ 0.3903,
+ 0.38856,
+ 0.38994,
+ 0.38868,
+ 0.38934,
+ 0.39004,
+ 0.39128,
+ 0.39024,
+ 0.39048,
+ 0.3904,
+ 0.38948,
+ 0.39032
+ ],
+ "test_acc": [
+ 0.2817,
+ 0.3086,
+ 0.321,
+ 0.3426,
+ 0.3496,
+ 0.3478,
+ 0.3546,
+ 0.3478,
+ 0.3568,
+ 0.3627,
+ 0.3663,
+ 0.3752,
+ 0.3646,
+ 0.3581,
+ 0.3779,
+ 0.3748,
+ 0.3759,
+ 0.3762,
+ 0.3858,
+ 0.3739,
+ 0.3779,
+ 0.3667,
+ 0.3721,
+ 0.3827,
+ 0.369,
+ 0.3761,
+ 0.3719,
+ 0.3794,
+ 0.38,
+ 0.3844,
+ 0.3733,
+ 0.3869,
+ 0.3885,
+ 0.3911,
+ 0.387,
+ 0.3906,
+ 0.3904,
+ 0.395,
+ 0.3849,
+ 0.3987,
+ 0.3901,
+ 0.4009,
+ 0.3893,
+ 0.385,
+ 0.3918,
+ 0.3995,
+ 0.392,
+ 0.3934,
+ 0.3954,
+ 0.4044,
+ 0.3976,
+ 0.3997,
+ 0.3967,
+ 0.402,
+ 0.404,
+ 0.3987,
+ 0.3997,
+ 0.3959,
+ 0.3955,
+ 0.3962,
+ 0.4028,
+ 0.4067,
+ 0.3984,
+ 0.4075,
+ 0.4072,
+ 0.4011,
+ 0.4028,
+ 0.4082,
+ 0.4069,
+ 0.4041,
+ 0.4058,
+ 0.4022,
+ 0.4061,
+ 0.4029,
+ 0.4059,
+ 0.4071,
+ 0.4086,
+ 0.4069,
+ 0.4058,
+ 0.4081,
+ 0.4083,
+ 0.4114,
+ 0.4078,
+ 0.4083,
+ 0.4074,
+ 0.4097,
+ 0.4064,
+ 0.4087,
+ 0.4057,
+ 0.4068,
+ 0.4067,
+ 0.407,
+ 0.4072,
+ 0.4089,
+ 0.4094,
+ 0.4092,
+ 0.4088,
+ 0.4081,
+ 0.4078,
+ 0.408
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.016317440196871758,
+ 0.08700302243232727,
+ -0.07087832689285278,
+ -0.04807429015636444,
+ -0.011868046596646309,
+ 0.9980045557022095
+ ],
+ "perturbation_rho": [
+ 0.021565234288573265,
+ 0.02034841850399971,
+ 0.026769006624817848,
+ -0.05038269981741905,
+ 0.007762039080262184,
+ 0.04057261347770691
+ ],
+ "nudging": {
+ "0.001": [
+ -1.8766731955111027e-06,
+ -6.295740604400635e-07,
+ 1.050066202878952e-07,
+ 5.390029400587082e-08,
+ -8.824281394481659e-08,
+ -1.8120626918971539e-06
+ ],
+ "0.003": [
+ -6.020389264449477e-06,
+ -1.8251012079417706e-06,
+ 4.4528860598802567e-07,
+ 2.2060703486204147e-07,
+ 2.1478626877069473e-08,
+ -6.290327291935682e-06
+ ],
+ "0.01": [
+ -2.0035397028550506e-05,
+ -6.116693839430809e-06,
+ 1.5703844837844372e-06,
+ 9.065261110663414e-07,
+ 1.710723154246807e-07,
+ -2.1861022105440497e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 7090.23486328125,
+ 97328.1015625,
+ 1195400.0,
+ 1509493.75,
+ 1603199.125,
+ 1626782.875,
+ 733314.5
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.7028392651118338e-05,
+ 1.845057795435423e-06,
+ 8.682639531798486e-07,
+ 8.779788345236739e-07,
+ 8.714667956155608e-07,
+ 8.789162393441075e-07,
+ 8.642545594739204e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 49.97107099246965,
+ "embed.bias": 16.073336112282973,
+ "blocks.0.ln.weight": 1.1864794930711844,
+ "blocks.0.w1.weight": 16.772962175548443,
+ "blocks.0.w1.bias": 11.717179109423967,
+ "blocks.0.w2.weight": 54.31408520827723,
+ "blocks.1.ln.weight": 1.1429608481901679,
+ "blocks.1.w1.weight": 25.1727265395903,
+ "blocks.1.w1.bias": 23.582538699977512,
+ "blocks.1.w2.weight": 44.89348831927897,
+ "blocks.2.ln.weight": 0.6307651937494874,
+ "blocks.2.w1.weight": 20.089313457148293,
+ "blocks.2.w1.bias": 20.43961041250799,
+ "blocks.2.w2.weight": 36.04199442331163,
+ "blocks.3.ln.weight": 0.5625176858716091,
+ "blocks.3.w1.weight": 17.29253500275298,
+ "blocks.3.w1.bias": 18.353681256446446,
+ "blocks.3.w2.weight": 45.166175379509205,
+ "blocks.4.ln.weight": 0.5780873641807247,
+ "blocks.4.w1.weight": 15.563905559896059,
+ "blocks.4.w1.bias": 13.46488029874075,
+ "blocks.4.w2.weight": 59.00645861860142,
+ "blocks.5.ln.weight": 0.7521925423173332,
+ "blocks.5.w1.weight": 19.685866374226443,
+ "blocks.5.w1.bias": 20.12611595031584,
+ "blocks.5.w2.weight": 41.84701231716978,
+ "out_ln.weight": 0.3374467885491731,
+ "out_head.weight": 5.972151190658582,
+ "out_head.bias": 0.6096349208966545
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 6,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 8
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L6_seed8",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L6_seed9/results_cifar10.json b/results/fa_dfa_d512_L6_seed9/results_cifar10.json
new file mode 100644
index 0000000..b808ce9
--- /dev/null
+++ b/results/fa_dfa_d512_L6_seed9/results_cifar10.json
@@ -0,0 +1,837 @@
+{
+ "9": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.073234083404541,
+ 2.0497977373504637,
+ 2.0341352867126465,
+ 2.031250879135132,
+ 2.026975922088623,
+ 2.021300171508789,
+ 2.0191297291564942,
+ 2.0188821714782716,
+ 2.017022749557495,
+ 2.016341851234436,
+ 2.0123678436279295,
+ 2.010649602279663,
+ 2.0090650662994385,
+ 2.010900563201904,
+ 2.0095366540527344,
+ 2.0077656902313232,
+ 2.0068030431365966,
+ 2.008629825515747,
+ 2.003841092376709,
+ 2.005812280654907,
+ 2.0057511726379396,
+ 2.004702599334717,
+ 2.0014493046569823,
+ 2.004868564796448,
+ 2.001082904891968,
+ 1.9998853713989257,
+ 2.0003265306854248,
+ 2.0005562553405762,
+ 1.9986425912094117,
+ 2.0000717253875733,
+ 1.9990827807617189,
+ 1.9992364362335204,
+ 1.9985607556152343,
+ 1.996207259979248,
+ 1.9981809964370727,
+ 1.9958161113739015,
+ 1.9976731842422486,
+ 1.9966179808807374,
+ 1.9954889965057374,
+ 1.9954629190826416,
+ 1.9958389172363282,
+ 1.994535606842041,
+ 1.9974303654479981,
+ 1.9959670455932617,
+ 1.9949795317840575,
+ 1.995265456085205,
+ 1.9942493894195557,
+ 1.995007307357788,
+ 1.996318332901001,
+ 1.9937139911651611,
+ 1.994312198791504,
+ 1.9913738401031493,
+ 1.9951153566741944,
+ 1.9924673150253296,
+ 1.9923780603790284,
+ 1.9935618125152588,
+ 1.992571726989746,
+ 1.9926865383911132,
+ 1.992229995956421,
+ 1.993404683456421,
+ 1.9912095419311524,
+ 1.991911597442627,
+ 1.9921949435806274,
+ 1.9902286859512328,
+ 1.9926082902526856,
+ 1.9909677504730225,
+ 1.9914979708099365,
+ 1.9912472879791259,
+ 1.990876022491455,
+ 1.9902124596405029,
+ 1.9935787561035156,
+ 1.991565812225342,
+ 1.9911484326171875,
+ 1.9914972548675538,
+ 1.990185121498108,
+ 1.9900826383972168,
+ 1.9883790439605713,
+ 1.9883859337997436,
+ 1.9883267873382569,
+ 1.9883251065444947,
+ 1.9901439308166504,
+ 1.9902003237915038,
+ 1.9887106338500977,
+ 1.9913360192871095,
+ 1.9886777478027344,
+ 1.99161105342865,
+ 1.9905986673736573,
+ 1.9889384605407714,
+ 1.990373504333496,
+ 1.988581312599182,
+ 1.9881270119476318,
+ 1.986687230491638,
+ 1.9884908292007446,
+ 1.9869463809204102,
+ 1.9887974228668213,
+ 1.986062038192749,
+ 1.9904356650543213,
+ 1.9901889600372313,
+ 1.989139567527771,
+ 1.9876907534790038
+ ],
+ "train_acc": [
+ 0.23456,
+ 0.24532,
+ 0.2504,
+ 0.25112,
+ 0.25282,
+ 0.26,
+ 0.2606,
+ 0.2593,
+ 0.25506,
+ 0.2613,
+ 0.26512,
+ 0.26252,
+ 0.26454,
+ 0.2637,
+ 0.26436,
+ 0.26586,
+ 0.26598,
+ 0.26426,
+ 0.26796,
+ 0.26508,
+ 0.26656,
+ 0.26618,
+ 0.26934,
+ 0.26756,
+ 0.26878,
+ 0.2704,
+ 0.27164,
+ 0.2692,
+ 0.27146,
+ 0.27,
+ 0.27024,
+ 0.2712,
+ 0.27242,
+ 0.27516,
+ 0.27246,
+ 0.27374,
+ 0.27374,
+ 0.2729,
+ 0.27492,
+ 0.27392,
+ 0.2749,
+ 0.27492,
+ 0.2729,
+ 0.2735,
+ 0.27376,
+ 0.2749,
+ 0.27598,
+ 0.27244,
+ 0.27474,
+ 0.27518,
+ 0.27482,
+ 0.27532,
+ 0.27598,
+ 0.277,
+ 0.27772,
+ 0.27702,
+ 0.27816,
+ 0.2744,
+ 0.27456,
+ 0.27834,
+ 0.27612,
+ 0.27584,
+ 0.27394,
+ 0.27708,
+ 0.27626,
+ 0.27684,
+ 0.27762,
+ 0.27826,
+ 0.2762,
+ 0.27832,
+ 0.277,
+ 0.27744,
+ 0.27744,
+ 0.27842,
+ 0.27896,
+ 0.27732,
+ 0.27968,
+ 0.28094,
+ 0.27758,
+ 0.28126,
+ 0.28152,
+ 0.27998,
+ 0.2792,
+ 0.27652,
+ 0.27846,
+ 0.27788,
+ 0.27668,
+ 0.28002,
+ 0.27868,
+ 0.27792,
+ 0.2785,
+ 0.28064,
+ 0.27878,
+ 0.28056,
+ 0.279,
+ 0.28108,
+ 0.2801,
+ 0.27866,
+ 0.27774,
+ 0.27842
+ ],
+ "test_acc": [
+ 0.2428,
+ 0.2739,
+ 0.2627,
+ 0.2812,
+ 0.2892,
+ 0.2843,
+ 0.2907,
+ 0.2804,
+ 0.276,
+ 0.3009,
+ 0.2792,
+ 0.2917,
+ 0.2992,
+ 0.2872,
+ 0.2869,
+ 0.2731,
+ 0.2863,
+ 0.3004,
+ 0.2738,
+ 0.2964,
+ 0.2885,
+ 0.2907,
+ 0.2934,
+ 0.2869,
+ 0.283,
+ 0.295,
+ 0.2853,
+ 0.2785,
+ 0.2809,
+ 0.3069,
+ 0.2927,
+ 0.2929,
+ 0.3024,
+ 0.296,
+ 0.2988,
+ 0.2998,
+ 0.3001,
+ 0.3009,
+ 0.2946,
+ 0.3043,
+ 0.3071,
+ 0.2996,
+ 0.308,
+ 0.2894,
+ 0.2951,
+ 0.292,
+ 0.286,
+ 0.295,
+ 0.2997,
+ 0.2967,
+ 0.2942,
+ 0.2974,
+ 0.2921,
+ 0.2946,
+ 0.3006,
+ 0.303,
+ 0.303,
+ 0.3102,
+ 0.3042,
+ 0.31,
+ 0.3053,
+ 0.294,
+ 0.2993,
+ 0.3065,
+ 0.2955,
+ 0.2908,
+ 0.2943,
+ 0.3014,
+ 0.3029,
+ 0.3044,
+ 0.3066,
+ 0.2909,
+ 0.3097,
+ 0.3031,
+ 0.3,
+ 0.3012,
+ 0.2993,
+ 0.2967,
+ 0.3001,
+ 0.3051,
+ 0.2993,
+ 0.3091,
+ 0.3013,
+ 0.3013,
+ 0.3007,
+ 0.3031,
+ 0.3047,
+ 0.3032,
+ 0.3053,
+ 0.3041,
+ 0.3041,
+ 0.3041,
+ 0.3037,
+ 0.3036,
+ 0.3046,
+ 0.3036,
+ 0.3044,
+ 0.3041,
+ 0.3042,
+ 0.3041
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.3925938308238983,
+ 0.000656805990729481,
+ 0.0001544215774629265,
+ -0.0004282527952454984,
+ 0.0003690449520945549,
+ -0.00013921636855229735
+ ],
+ "perturbation_rho": [
+ 0.005831995978951454,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -3.3574178814888e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.0384246706962585e-06,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -3.6191195249557495e-06,
+ 1.862645149230957e-09,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 54551.35546875,
+ 1099375360.0,
+ 3004748800.0,
+ 5508783616.0,
+ 6090545664.0,
+ 6586548736.0,
+ 9585897472.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.399317509116372e-07,
+ 1.9445174637144902e-10,
+ 1.943899208267652e-10,
+ 1.9486748326080772e-10,
+ 1.9483341329173953e-10,
+ 1.9483435698131046e-10,
+ 1.9498901104864075e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 328.6355950373237,
+ "embed.bias": 242.36454815135576,
+ "blocks.0.ln.weight": 10.003397541810306,
+ "blocks.0.w1.weight": 280.20952373585186,
+ "blocks.0.w1.bias": 245.5434949270726,
+ "blocks.0.w2.weight": 491.79926117548297,
+ "blocks.1.ln.weight": 8.729605792932698,
+ "blocks.1.w1.weight": 325.11308424934106,
+ "blocks.1.w1.bias": 311.5625818317441,
+ "blocks.1.w2.weight": 334.530484796484,
+ "blocks.2.ln.weight": 9.417363473500547,
+ "blocks.2.w1.weight": 404.6194334537652,
+ "blocks.2.w1.bias": 374.09843900052965,
+ "blocks.2.w2.weight": 394.7925902511279,
+ "blocks.3.ln.weight": 8.451752857956773,
+ "blocks.3.w1.weight": 326.2362218443471,
+ "blocks.3.w1.bias": 306.77785286979747,
+ "blocks.3.w2.weight": 303.662545367935,
+ "blocks.4.ln.weight": 8.451660472063256,
+ "blocks.4.w1.weight": 344.2742835972674,
+ "blocks.4.w1.bias": 326.1264884495552,
+ "blocks.4.w2.weight": 324.1161613020223,
+ "blocks.5.ln.weight": 11.13712283522192,
+ "blocks.5.w1.weight": 457.04149442154977,
+ "blocks.5.w1.bias": 422.6989592235929,
+ "blocks.5.w2.weight": 446.6716640278749,
+ "out_ln.weight": 0.5876580707043741,
+ "out_head.weight": 9.25778651436399,
+ "out_head.bias": 0.5027234759174332
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0332628253936766,
+ 1.9553094621276856,
+ 1.9304233611679078,
+ 1.9176126587677003,
+ 1.9049958919906615,
+ 1.8898755237197875,
+ 1.8779738095474243,
+ 1.87334542842865,
+ 1.8662727130126953,
+ 1.8637196960830689,
+ 1.858073745689392,
+ 1.8548571952819823,
+ 1.8493252722930908,
+ 1.851236528892517,
+ 1.848223992576599,
+ 1.8478172241210937,
+ 1.8419879977416993,
+ 1.8413512692260743,
+ 1.8363069415664672,
+ 1.8369076172256469,
+ 1.8356854833984375,
+ 1.8313965267562866,
+ 1.826233511695862,
+ 1.8260947198867798,
+ 1.8246617177581788,
+ 1.8188705309677125,
+ 1.8175858071899413,
+ 1.817788968887329,
+ 1.8145698094940186,
+ 1.811288226928711,
+ 1.8075631722259522,
+ 1.8072492791366577,
+ 1.8021571556854248,
+ 1.8001763897705079,
+ 1.8002913983535767,
+ 1.7960401634979248,
+ 1.7964820532226562,
+ 1.7898353637695312,
+ 1.7879999541854859,
+ 1.789700000267029,
+ 1.7842918268585206,
+ 1.7852493328475951,
+ 1.7841659473037719,
+ 1.7837900876617432,
+ 1.7787483280181884,
+ 1.7782914597320556,
+ 1.7754856842422486,
+ 1.7753860149765015,
+ 1.7756032290267945,
+ 1.7689688110351562,
+ 1.7688034854507446,
+ 1.7665690698623657,
+ 1.767441110267639,
+ 1.7627149563598632,
+ 1.7611339742279053,
+ 1.7607593900299072,
+ 1.7611287790679933,
+ 1.7574685613250733,
+ 1.754263912963867,
+ 1.7551698053741456,
+ 1.7525737002563477,
+ 1.7504821019744874,
+ 1.7550537628936766,
+ 1.749086598892212,
+ 1.746996110610962,
+ 1.7465003091812135,
+ 1.7470869120025634,
+ 1.7463814974975587,
+ 1.7397632767486573,
+ 1.7418430507659912,
+ 1.7430473554229737,
+ 1.741324652786255,
+ 1.7399064194488525,
+ 1.7389730927276612,
+ 1.7398483694076539,
+ 1.7381602671051026,
+ 1.7356981131362914,
+ 1.7315244303131103,
+ 1.7377736062622071,
+ 1.7310680517196655,
+ 1.7351790799713134,
+ 1.733889009361267,
+ 1.73604018699646,
+ 1.7337274952697754,
+ 1.7322516064453124,
+ 1.7330827197647094,
+ 1.7328004323577881,
+ 1.7354140316772462,
+ 1.7350243264007568,
+ 1.7329780157470702,
+ 1.732491442489624,
+ 1.7300388946151732,
+ 1.7312415341949463,
+ 1.7289181127929687,
+ 1.7315491919326782,
+ 1.7262304349517823,
+ 1.733718437461853,
+ 1.7327616833877564,
+ 1.732974825668335,
+ 1.7304670404434204
+ ],
+ "train_acc": [
+ 0.25128,
+ 0.2863,
+ 0.29668,
+ 0.30188,
+ 0.30926,
+ 0.31842,
+ 0.32158,
+ 0.32524,
+ 0.326,
+ 0.32964,
+ 0.33238,
+ 0.33504,
+ 0.33644,
+ 0.33418,
+ 0.33848,
+ 0.33768,
+ 0.34274,
+ 0.34018,
+ 0.34388,
+ 0.34162,
+ 0.34312,
+ 0.3448,
+ 0.34788,
+ 0.34724,
+ 0.34808,
+ 0.34792,
+ 0.35052,
+ 0.3484,
+ 0.34834,
+ 0.35138,
+ 0.35202,
+ 0.3548,
+ 0.35396,
+ 0.35878,
+ 0.35564,
+ 0.35712,
+ 0.35584,
+ 0.36054,
+ 0.36252,
+ 0.3624,
+ 0.36288,
+ 0.36378,
+ 0.36368,
+ 0.36364,
+ 0.36672,
+ 0.36452,
+ 0.36916,
+ 0.36916,
+ 0.36646,
+ 0.36894,
+ 0.37018,
+ 0.37108,
+ 0.36976,
+ 0.3718,
+ 0.37092,
+ 0.37276,
+ 0.37434,
+ 0.37378,
+ 0.37382,
+ 0.37552,
+ 0.37628,
+ 0.37336,
+ 0.37356,
+ 0.37596,
+ 0.37708,
+ 0.37716,
+ 0.37758,
+ 0.37918,
+ 0.37974,
+ 0.37924,
+ 0.3793,
+ 0.38072,
+ 0.3823,
+ 0.38326,
+ 0.3807,
+ 0.3811,
+ 0.38186,
+ 0.3819,
+ 0.37932,
+ 0.38418,
+ 0.38282,
+ 0.38236,
+ 0.38112,
+ 0.38594,
+ 0.38248,
+ 0.38184,
+ 0.38116,
+ 0.3824,
+ 0.38242,
+ 0.38396,
+ 0.3811,
+ 0.38402,
+ 0.38436,
+ 0.38572,
+ 0.38324,
+ 0.38604,
+ 0.38198,
+ 0.38274,
+ 0.3843,
+ 0.38394
+ ],
+ "test_acc": [
+ 0.2784,
+ 0.3102,
+ 0.3139,
+ 0.3338,
+ 0.3473,
+ 0.3407,
+ 0.3546,
+ 0.341,
+ 0.3527,
+ 0.3631,
+ 0.3575,
+ 0.361,
+ 0.3661,
+ 0.3657,
+ 0.3671,
+ 0.3545,
+ 0.3752,
+ 0.3558,
+ 0.3637,
+ 0.3696,
+ 0.3788,
+ 0.3719,
+ 0.3783,
+ 0.3713,
+ 0.3801,
+ 0.3776,
+ 0.3826,
+ 0.3803,
+ 0.3804,
+ 0.38,
+ 0.3788,
+ 0.3834,
+ 0.3783,
+ 0.385,
+ 0.3843,
+ 0.3798,
+ 0.3854,
+ 0.3913,
+ 0.3868,
+ 0.3793,
+ 0.3823,
+ 0.387,
+ 0.3862,
+ 0.3885,
+ 0.393,
+ 0.3914,
+ 0.3913,
+ 0.388,
+ 0.395,
+ 0.3924,
+ 0.3895,
+ 0.3884,
+ 0.3872,
+ 0.3949,
+ 0.3852,
+ 0.3969,
+ 0.3964,
+ 0.3952,
+ 0.4008,
+ 0.3959,
+ 0.3952,
+ 0.3928,
+ 0.3987,
+ 0.3957,
+ 0.3957,
+ 0.4041,
+ 0.4015,
+ 0.4019,
+ 0.4027,
+ 0.4016,
+ 0.4043,
+ 0.3973,
+ 0.4001,
+ 0.3968,
+ 0.4013,
+ 0.4044,
+ 0.4001,
+ 0.4014,
+ 0.4009,
+ 0.4,
+ 0.3997,
+ 0.3998,
+ 0.4038,
+ 0.3999,
+ 0.402,
+ 0.4017,
+ 0.4004,
+ 0.402,
+ 0.4016,
+ 0.4016,
+ 0.4032,
+ 0.4014,
+ 0.4028,
+ 0.4035,
+ 0.4031,
+ 0.4026,
+ 0.403,
+ 0.4021,
+ 0.4026,
+ 0.4025
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.04259001836180687,
+ 0.06560904532670975,
+ -0.04747920483350754,
+ -0.030196242034435272,
+ -0.011228787712752819,
+ 0.9897887706756592
+ ],
+ "perturbation_rho": [
+ 0.027144353836774826,
+ 0.02096467837691307,
+ -0.03647351637482643,
+ 0.004128730855882168,
+ 0.035667650401592255,
+ 0.0024879188276827335
+ ],
+ "nudging": {
+ "0.001": [
+ -3.065855707973242e-06,
+ -3.6228448152542114e-07,
+ -7.62520357966423e-09,
+ 4.0279701352119446e-08,
+ -3.4924596548080444e-09,
+ -1.401233021169901e-06
+ ],
+ "0.003": [
+ -9.421346476301551e-06,
+ -1.0454095900058746e-06,
+ 1.4924444258213043e-07,
+ 1.5087425708770752e-07,
+ 2.1478626877069473e-08,
+ -4.659174010157585e-06
+ ],
+ "0.01": [
+ -3.1415780540555716e-05,
+ -3.479945007711649e-06,
+ 5.55417500436306e-07,
+ 4.3446198105812073e-07,
+ 1.6833655536174774e-07,
+ -1.60514609888196e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 8874.3623046875,
+ 79337.484375,
+ 1180381.75,
+ 1586145.875,
+ 1886674.0,
+ 1911188.375,
+ 1308062.125
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.749456871242728e-05,
+ 1.8702693296290818e-06,
+ 6.362390649883309e-07,
+ 6.333205533337605e-07,
+ 6.349519026116468e-07,
+ 6.351035040097486e-07,
+ 6.164591468404979e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 53.51461020835447,
+ "embed.bias": 12.540707882150965,
+ "blocks.0.ln.weight": 1.1919744306866826,
+ "blocks.0.w1.weight": 17.44693961444689,
+ "blocks.0.w1.bias": 11.761966180483919,
+ "blocks.0.w2.weight": 54.05207721791165,
+ "blocks.1.ln.weight": 1.2851156800356192,
+ "blocks.1.w1.weight": 27.13129237243264,
+ "blocks.1.w1.bias": 21.945665839852136,
+ "blocks.1.w2.weight": 45.75957303201888,
+ "blocks.2.ln.weight": 0.765602321043846,
+ "blocks.2.w1.weight": 21.68864597033105,
+ "blocks.2.w1.bias": 20.84889617417975,
+ "blocks.2.w2.weight": 36.85421923299837,
+ "blocks.3.ln.weight": 0.7387797290176757,
+ "blocks.3.w1.weight": 21.304625635934347,
+ "blocks.3.w1.bias": 21.302804910719708,
+ "blocks.3.w2.weight": 42.158761396062424,
+ "blocks.4.ln.weight": 0.5093491405372358,
+ "blocks.4.w1.weight": 16.62521527836923,
+ "blocks.4.w1.bias": 14.569854707495779,
+ "blocks.4.w2.weight": 46.72830505948521,
+ "blocks.5.ln.weight": 0.569690236673946,
+ "blocks.5.w1.weight": 18.425801313816322,
+ "blocks.5.w1.bias": 15.707781883071574,
+ "blocks.5.w2.weight": 65.35620786546646,
+ "out_ln.weight": 0.42037958632530303,
+ "out_head.weight": 6.6986797412678145,
+ "out_head.bias": 0.660644788913502
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 6,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 9
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L6_seed9",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L8_seed0/results_cifar10.json b/results/fa_dfa_d512_L8_seed0/results_cifar10.json
new file mode 100644
index 0000000..de5d08f
--- /dev/null
+++ b/results/fa_dfa_d512_L8_seed0/results_cifar10.json
@@ -0,0 +1,881 @@
+{
+ "0": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.068570284347534,
+ 2.0450995418930056,
+ 2.040309367141724,
+ 2.0384985535430906,
+ 2.0337540267944334,
+ 2.028315601158142,
+ 2.0283870655059815,
+ 2.0261440406036377,
+ 2.0277783988952636,
+ 2.0236783444976805,
+ 2.0177216738128663,
+ 2.020857338027954,
+ 2.0178855153656006,
+ 2.0204481651306154,
+ 2.0144878174209593,
+ 2.012771276702881,
+ 2.0133113939666747,
+ 2.0119590814208985,
+ 2.009890116882324,
+ 2.008774749298096,
+ 2.0083382148742674,
+ 2.0076066046142578,
+ 2.0096171319580076,
+ 2.0058218814849855,
+ 2.0037864183044434,
+ 2.005599175491333,
+ 2.0061829089355467,
+ 2.0027509896087645,
+ 2.0058307901763914,
+ 2.005169650306702,
+ 2.001157734375,
+ 2.0021650346374513,
+ 2.001039823226929,
+ 2.0046913961029054,
+ 2.0023456330871583,
+ 2.00130324256897,
+ 2.0025460794067382,
+ 2.001611663970947,
+ 2.0031372194671633,
+ 2.0014712918090822,
+ 2.0003627224731444,
+ 1.9975626316070556,
+ 1.9982662202453614,
+ 1.9976352671813964,
+ 1.9969971923828125,
+ 2.0001233780288694,
+ 1.997716463356018,
+ 1.9996397301483155,
+ 1.9975465998840332,
+ 1.9973710316467286,
+ 1.9996610871887206,
+ 1.9990007359313964,
+ 1.9958738163757324,
+ 1.9966175972747802,
+ 2.0009265493774415,
+ 1.9975428602600098,
+ 1.9962558139038087,
+ 1.9977977807998657,
+ 1.9964274404144287,
+ 1.9977582873535156,
+ 1.9967647107696533,
+ 1.9964400707626342,
+ 1.9988198545074463,
+ 1.9962505130386352,
+ 1.9980460896301269,
+ 1.9944342567443847,
+ 1.9969512873077393,
+ 1.9951286602783203,
+ 1.9959452591705322,
+ 1.9949243884658814,
+ 1.997910827407837,
+ 1.9961381217956542,
+ 1.9937760234832764,
+ 1.9977886120605468,
+ 1.9961791613006592,
+ 1.99590330657959,
+ 1.9961589616394042,
+ 1.996615062561035,
+ 1.9954028228759766,
+ 1.994140139312744,
+ 1.9964727613067628,
+ 1.995709580001831,
+ 1.9935905380630494,
+ 1.9950566864013672,
+ 1.9948614299011231,
+ 1.9930699211120606,
+ 1.9950269234466553,
+ 1.996148171005249,
+ 1.9932368953704833,
+ 1.9957756786346434,
+ 1.9943457048034667,
+ 1.9930462799072266,
+ 1.9908040158081055,
+ 1.9929836280059814,
+ 1.995184083518982,
+ 1.9943841827392579,
+ 1.99466581741333,
+ 1.9947607112121581,
+ 1.9959496617889405,
+ 1.9939526372909546
+ ],
+ "train_acc": [
+ 0.23544,
+ 0.24512,
+ 0.24644,
+ 0.25124,
+ 0.25002,
+ 0.25482,
+ 0.25544,
+ 0.25706,
+ 0.25856,
+ 0.2579,
+ 0.26,
+ 0.25966,
+ 0.26192,
+ 0.26044,
+ 0.26436,
+ 0.2634,
+ 0.26574,
+ 0.26464,
+ 0.26664,
+ 0.26776,
+ 0.26856,
+ 0.26684,
+ 0.269,
+ 0.26998,
+ 0.272,
+ 0.27086,
+ 0.269,
+ 0.27068,
+ 0.27142,
+ 0.27146,
+ 0.27248,
+ 0.2718,
+ 0.27584,
+ 0.27106,
+ 0.2725,
+ 0.27434,
+ 0.27146,
+ 0.27324,
+ 0.27368,
+ 0.2727,
+ 0.27496,
+ 0.2771,
+ 0.27492,
+ 0.27596,
+ 0.27874,
+ 0.2729,
+ 0.27634,
+ 0.2747,
+ 0.27376,
+ 0.277,
+ 0.275,
+ 0.2776,
+ 0.27882,
+ 0.27748,
+ 0.27616,
+ 0.2745,
+ 0.2764,
+ 0.2782,
+ 0.27698,
+ 0.276,
+ 0.27666,
+ 0.27568,
+ 0.27892,
+ 0.2781,
+ 0.27582,
+ 0.2795,
+ 0.27838,
+ 0.27616,
+ 0.2783,
+ 0.2789,
+ 0.2776,
+ 0.27716,
+ 0.28014,
+ 0.27904,
+ 0.2768,
+ 0.2776,
+ 0.27682,
+ 0.279,
+ 0.28004,
+ 0.27846,
+ 0.28068,
+ 0.27798,
+ 0.28202,
+ 0.27836,
+ 0.28026,
+ 0.28116,
+ 0.27894,
+ 0.28088,
+ 0.28236,
+ 0.27878,
+ 0.2797,
+ 0.28002,
+ 0.28094,
+ 0.27994,
+ 0.27928,
+ 0.27954,
+ 0.27828,
+ 0.28064,
+ 0.27654,
+ 0.2811
+ ],
+ "test_acc": [
+ 0.2555,
+ 0.2658,
+ 0.2338,
+ 0.2589,
+ 0.2645,
+ 0.2829,
+ 0.2836,
+ 0.2386,
+ 0.2797,
+ 0.2828,
+ 0.2825,
+ 0.2743,
+ 0.2832,
+ 0.2778,
+ 0.277,
+ 0.2861,
+ 0.2887,
+ 0.2843,
+ 0.2986,
+ 0.3013,
+ 0.2914,
+ 0.2909,
+ 0.2788,
+ 0.2839,
+ 0.301,
+ 0.3034,
+ 0.295,
+ 0.2851,
+ 0.3031,
+ 0.2935,
+ 0.3072,
+ 0.2842,
+ 0.2977,
+ 0.3087,
+ 0.2878,
+ 0.2992,
+ 0.2958,
+ 0.2776,
+ 0.3095,
+ 0.302,
+ 0.3019,
+ 0.3096,
+ 0.3102,
+ 0.2911,
+ 0.2998,
+ 0.2978,
+ 0.2993,
+ 0.3104,
+ 0.2967,
+ 0.289,
+ 0.3004,
+ 0.3059,
+ 0.3001,
+ 0.2963,
+ 0.3022,
+ 0.2988,
+ 0.3028,
+ 0.2962,
+ 0.3041,
+ 0.3057,
+ 0.2973,
+ 0.305,
+ 0.3004,
+ 0.3098,
+ 0.2968,
+ 0.3054,
+ 0.3037,
+ 0.2995,
+ 0.3053,
+ 0.3065,
+ 0.3013,
+ 0.3067,
+ 0.3097,
+ 0.2996,
+ 0.3024,
+ 0.3038,
+ 0.2982,
+ 0.3071,
+ 0.3011,
+ 0.3049,
+ 0.3004,
+ 0.3033,
+ 0.3033,
+ 0.3029,
+ 0.3018,
+ 0.3025,
+ 0.3059,
+ 0.3053,
+ 0.3051,
+ 0.3062,
+ 0.3074,
+ 0.3024,
+ 0.3045,
+ 0.3056,
+ 0.3046,
+ 0.3048,
+ 0.3052,
+ 0.3054,
+ 0.3055,
+ 0.3055
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.3844696283340454,
+ -0.0003293692716397345,
+ 0.00036326167173683643,
+ -7.58874011808075e-06,
+ -0.000848759722430259,
+ -0.0005374888423830271,
+ -0.00015908177010715008,
+ 0.00029008230194449425
+ ],
+ "perturbation_rho": [
+ 0.009705127216875553,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -5.657784640789032e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.2135133147239685e-06,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -3.6587007343769073e-06,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 51559.4296875,
+ 1329486592.0,
+ 4660075520.0,
+ 5993569792.0,
+ 6888088576.0,
+ 7015652864.0,
+ 8532604416.0,
+ 10583718912.0,
+ 12350084096.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.5801111291912093e-07,
+ 2.506394813917012e-10,
+ 2.495726680873389e-10,
+ 2.497514417498792e-10,
+ 2.4978316637280784e-10,
+ 2.497747564333963e-10,
+ 2.497407836088428e-10,
+ 2.497392292966083e-10,
+ 2.5016502758212766e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 331.3816715738323,
+ "embed.bias": 280.1972794762848,
+ "blocks.0.ln.weight": 10.206860276367006,
+ "blocks.0.w1.weight": 293.0648280946435,
+ "blocks.0.w1.bias": 287.8102020652532,
+ "blocks.0.w2.weight": 480.05673936823246,
+ "blocks.1.ln.weight": 9.536572296218509,
+ "blocks.1.w1.weight": 384.85005936378496,
+ "blocks.1.w1.bias": 372.907458869564,
+ "blocks.1.w2.weight": 395.5604232032487,
+ "blocks.2.ln.weight": 9.583575811181442,
+ "blocks.2.w1.weight": 391.35917488090115,
+ "blocks.2.w1.bias": 358.7794005832253,
+ "blocks.2.w2.weight": 359.6693258728706,
+ "blocks.3.ln.weight": 9.98809250356752,
+ "blocks.3.w1.weight": 372.15298556466314,
+ "blocks.3.w1.bias": 341.69408442244566,
+ "blocks.3.w2.weight": 332.4997523924133,
+ "blocks.4.ln.weight": 7.10483666283608,
+ "blocks.4.w1.weight": 274.0268087874156,
+ "blocks.4.w1.bias": 252.79226182282545,
+ "blocks.4.w2.weight": 253.45621037491264,
+ "blocks.5.ln.weight": 10.243956247187224,
+ "blocks.5.w1.weight": 408.60736547082587,
+ "blocks.5.w1.bias": 378.8848879049959,
+ "blocks.5.w2.weight": 385.0032037459413,
+ "blocks.6.ln.weight": 11.012226548081168,
+ "blocks.6.w1.weight": 445.2115675036214,
+ "blocks.6.w1.bias": 406.90392571558743,
+ "blocks.6.w2.weight": 422.7651604516687,
+ "blocks.7.ln.weight": 10.678682452403773,
+ "blocks.7.w1.weight": 427.86871943047703,
+ "blocks.7.w1.bias": 421.26775677167313,
+ "blocks.7.w2.weight": 424.8177101015246,
+ "out_ln.weight": 0.7424186036866861,
+ "out_head.weight": 9.83199206815889,
+ "out_head.bias": 0.4556331945087151
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.031117898826599,
+ 1.9475624210357667,
+ 1.920816502456665,
+ 1.909251974143982,
+ 1.8920108963775635,
+ 1.8767158478546142,
+ 1.8721334871673585,
+ 1.863781995162964,
+ 1.8590689016342163,
+ 1.8527897924041747,
+ 1.8401800704956055,
+ 1.846341034011841,
+ 1.8388335055160523,
+ 1.8359079233169555,
+ 1.8243437384414674,
+ 1.8181520553970336,
+ 1.8202268659210206,
+ 1.808049402732849,
+ 1.8043210766601563,
+ 1.804519454650879,
+ 1.805484197998047,
+ 1.8011124628448487,
+ 1.8000532245635987,
+ 1.793024960975647,
+ 1.7795401358413696,
+ 1.780609351196289,
+ 1.777570998802185,
+ 1.768387048072815,
+ 1.7707503821563721,
+ 1.7622215390396119,
+ 1.7559357610321045,
+ 1.7557656387710572,
+ 1.7530760443115234,
+ 1.7580088625717163,
+ 1.746208298110962,
+ 1.7444442225646972,
+ 1.7456918414306641,
+ 1.742843345336914,
+ 1.7410870483398437,
+ 1.7379502767181396,
+ 1.7418859932708741,
+ 1.7290691452789306,
+ 1.7275336447906493,
+ 1.7247933518218994,
+ 1.7241094170761109,
+ 1.723441693687439,
+ 1.7219038082122802,
+ 1.7236041189575195,
+ 1.7184861129379272,
+ 1.7169820540618896,
+ 1.7187074862289429,
+ 1.7181550318145753,
+ 1.7141529751205444,
+ 1.7122508407592774,
+ 1.7160141869735719,
+ 1.712083070716858,
+ 1.709077308998108,
+ 1.7120566661834717,
+ 1.7138131452178955,
+ 1.7056159759902954,
+ 1.706560867576599,
+ 1.7041735248565675,
+ 1.7031678924942018,
+ 1.7053270459747314,
+ 1.7001814685440064,
+ 1.698059520263672,
+ 1.700954571838379,
+ 1.6969071603012085,
+ 1.6964463064575195,
+ 1.6973033560562134,
+ 1.697128847579956,
+ 1.6953863137054443,
+ 1.6925591326141358,
+ 1.6960328076171876,
+ 1.694064889831543,
+ 1.6925205464935302,
+ 1.691927748451233,
+ 1.6897403769683839,
+ 1.6869727429580688,
+ 1.6902123848724364,
+ 1.692222232322693,
+ 1.6904909725952149,
+ 1.6880250762557982,
+ 1.6899029476165772,
+ 1.6866879183959962,
+ 1.6873126499176025,
+ 1.689984903640747,
+ 1.6877681387329102,
+ 1.6844988162994385,
+ 1.6888493407821654,
+ 1.6853452449798585,
+ 1.6879416900253297,
+ 1.6831971020889283,
+ 1.681332094078064,
+ 1.681463975868225,
+ 1.6855041525268555,
+ 1.6846018813323975,
+ 1.688128840942383,
+ 1.6876728596115111,
+ 1.6832955141830444
+ ],
+ "train_acc": [
+ 0.25122,
+ 0.29002,
+ 0.30074,
+ 0.3078,
+ 0.31202,
+ 0.32116,
+ 0.32302,
+ 0.32474,
+ 0.32998,
+ 0.3327,
+ 0.33662,
+ 0.3378,
+ 0.34062,
+ 0.3423,
+ 0.3468,
+ 0.3493,
+ 0.34718,
+ 0.35114,
+ 0.35658,
+ 0.35406,
+ 0.35438,
+ 0.356,
+ 0.35496,
+ 0.35712,
+ 0.36442,
+ 0.36318,
+ 0.3635,
+ 0.36856,
+ 0.36718,
+ 0.37118,
+ 0.37414,
+ 0.37068,
+ 0.37418,
+ 0.36996,
+ 0.3753,
+ 0.37762,
+ 0.3755,
+ 0.37454,
+ 0.37586,
+ 0.37612,
+ 0.37538,
+ 0.38164,
+ 0.3825,
+ 0.37992,
+ 0.38294,
+ 0.38242,
+ 0.38152,
+ 0.38306,
+ 0.3826,
+ 0.38608,
+ 0.38346,
+ 0.38492,
+ 0.38532,
+ 0.38858,
+ 0.38614,
+ 0.38406,
+ 0.38632,
+ 0.38606,
+ 0.38602,
+ 0.39134,
+ 0.39088,
+ 0.39142,
+ 0.39166,
+ 0.39208,
+ 0.39016,
+ 0.39156,
+ 0.3904,
+ 0.39498,
+ 0.39106,
+ 0.39302,
+ 0.39394,
+ 0.39172,
+ 0.39746,
+ 0.39332,
+ 0.39366,
+ 0.39408,
+ 0.39414,
+ 0.39544,
+ 0.3967,
+ 0.39562,
+ 0.39768,
+ 0.39624,
+ 0.397,
+ 0.39732,
+ 0.39628,
+ 0.39652,
+ 0.39712,
+ 0.39694,
+ 0.39822,
+ 0.39396,
+ 0.39708,
+ 0.39744,
+ 0.39826,
+ 0.39852,
+ 0.39892,
+ 0.39592,
+ 0.3976,
+ 0.39714,
+ 0.39696,
+ 0.40022
+ ],
+ "test_acc": [
+ 0.2945,
+ 0.3266,
+ 0.3121,
+ 0.33,
+ 0.3343,
+ 0.3566,
+ 0.3538,
+ 0.3381,
+ 0.3562,
+ 0.3602,
+ 0.3694,
+ 0.3632,
+ 0.3618,
+ 0.3697,
+ 0.3812,
+ 0.3821,
+ 0.3828,
+ 0.3851,
+ 0.3757,
+ 0.3917,
+ 0.389,
+ 0.3894,
+ 0.3834,
+ 0.3912,
+ 0.3926,
+ 0.3955,
+ 0.3907,
+ 0.3903,
+ 0.3941,
+ 0.384,
+ 0.3972,
+ 0.3875,
+ 0.399,
+ 0.4015,
+ 0.3924,
+ 0.3957,
+ 0.4009,
+ 0.3991,
+ 0.3996,
+ 0.4008,
+ 0.4036,
+ 0.4005,
+ 0.409,
+ 0.4097,
+ 0.4021,
+ 0.4009,
+ 0.4126,
+ 0.4071,
+ 0.4122,
+ 0.4089,
+ 0.4131,
+ 0.4113,
+ 0.4146,
+ 0.4059,
+ 0.4163,
+ 0.4114,
+ 0.4068,
+ 0.4117,
+ 0.4182,
+ 0.4115,
+ 0.4082,
+ 0.4193,
+ 0.4177,
+ 0.4224,
+ 0.416,
+ 0.4171,
+ 0.4137,
+ 0.4155,
+ 0.4184,
+ 0.4188,
+ 0.4182,
+ 0.4169,
+ 0.4182,
+ 0.414,
+ 0.4182,
+ 0.412,
+ 0.4183,
+ 0.4217,
+ 0.4169,
+ 0.4204,
+ 0.4189,
+ 0.4154,
+ 0.4163,
+ 0.4166,
+ 0.4175,
+ 0.4195,
+ 0.418,
+ 0.4198,
+ 0.4191,
+ 0.4194,
+ 0.4182,
+ 0.4198,
+ 0.4177,
+ 0.4183,
+ 0.4178,
+ 0.4181,
+ 0.4188,
+ 0.4187,
+ 0.4191,
+ 0.4191
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.04550348222255707,
+ 0.04952041804790497,
+ 0.012564106844365597,
+ -0.048967309296131134,
+ -0.018994076177477837,
+ -0.08921081572771072,
+ -0.09729112684726715,
+ 0.9970568418502808
+ ],
+ "perturbation_rho": [
+ -0.007405851036310196,
+ -0.01778452657163143,
+ 0.03651121258735657,
+ -0.004411454312503338,
+ -0.017335545271635056,
+ 0.024473480880260468,
+ 0.033633388578891754,
+ -0.0013661051634699106
+ ],
+ "nudging": {
+ "0.001": [
+ -5.257490556687117e-06,
+ -3.859749995172024e-07,
+ -9.487848728895187e-08,
+ 9.802170097827911e-08,
+ 2.2584572434425354e-08,
+ 1.0256189852952957e-07,
+ 7.171183824539185e-08,
+ -1.4471588656306267e-06
+ ],
+ "0.003": [
+ -1.5992671251296997e-05,
+ -1.0106596164405346e-06,
+ -1.9604340195655823e-07,
+ 2.4406472221016884e-07,
+ 1.200241968035698e-07,
+ 4.641478881239891e-07,
+ 5.108886398375034e-07,
+ -5.204754415899515e-06
+ ],
+ "0.01": [
+ -5.306815728545189e-05,
+ -3.1923409551382065e-06,
+ -3.415043465793133e-07,
+ 9.238137863576412e-07,
+ 3.079185262322426e-07,
+ 1.4764373190701008e-06,
+ 1.8483842723071575e-06,
+ -1.84740056283772e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 5801.0615234375,
+ 58545.69140625,
+ 278396.125,
+ 541219.0625,
+ 842750.8125,
+ 1009016.125,
+ 1188189.0,
+ 1390662.625,
+ 689550.6875
+ ],
+ "bp_grad_norms_per_layer": [
+ 3.488941365503706e-05,
+ 2.940359536296455e-06,
+ 8.656722911837278e-07,
+ 7.742645493635791e-07,
+ 7.646057156307506e-07,
+ 7.609253316331888e-07,
+ 7.622682005603565e-07,
+ 7.812644753357745e-07,
+ 7.56423219172575e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 40.85701563091728,
+ "embed.bias": 16.257987544795743,
+ "blocks.0.ln.weight": 1.0155844718531961,
+ "blocks.0.w1.weight": 14.308616002505659,
+ "blocks.0.w1.bias": 11.682066059725996,
+ "blocks.0.w2.weight": 47.88448262527169,
+ "blocks.1.ln.weight": 0.9800293898409066,
+ "blocks.1.w1.weight": 18.22850941673304,
+ "blocks.1.w1.bias": 10.248089109744662,
+ "blocks.1.w2.weight": 46.5074943877367,
+ "blocks.2.ln.weight": 0.6484944766132613,
+ "blocks.2.w1.weight": 18.04882879127194,
+ "blocks.2.w1.bias": 15.21848342753742,
+ "blocks.2.w2.weight": 28.777360387947045,
+ "blocks.3.ln.weight": 0.5971288470484941,
+ "blocks.3.w1.weight": 17.899823557577733,
+ "blocks.3.w1.bias": 18.289110041973974,
+ "blocks.3.w2.weight": 26.844107923153718,
+ "blocks.4.ln.weight": 0.5827124751217686,
+ "blocks.4.w1.weight": 16.72204012093031,
+ "blocks.4.w1.bias": 17.062801665778103,
+ "blocks.4.w2.weight": 33.8783390116795,
+ "blocks.5.ln.weight": 0.4109250365181375,
+ "blocks.5.w1.weight": 17.501541376807705,
+ "blocks.5.w1.bias": 19.653202019435096,
+ "blocks.5.w2.weight": 21.067728941101397,
+ "blocks.6.ln.weight": 0.5111244501205948,
+ "blocks.6.w1.weight": 18.004625850717634,
+ "blocks.6.w1.bias": 18.9556455723282,
+ "blocks.6.w2.weight": 26.773409027964927,
+ "blocks.7.ln.weight": 0.6137697718245447,
+ "blocks.7.w1.weight": 19.644875839051267,
+ "blocks.7.w1.bias": 20.525336825132296,
+ "blocks.7.w2.weight": 43.29822559398887,
+ "out_ln.weight": 0.2984864729265921,
+ "out_head.weight": 5.62935512400422,
+ "out_head.bias": 0.701363478107578
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 8,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 0
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L8_seed0",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L8_seed1/results_cifar10.json b/results/fa_dfa_d512_L8_seed1/results_cifar10.json
new file mode 100644
index 0000000..7ae4222
--- /dev/null
+++ b/results/fa_dfa_d512_L8_seed1/results_cifar10.json
@@ -0,0 +1,881 @@
+{
+ "1": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.082363905029297,
+ 2.0549054863739014,
+ 2.048207590179443,
+ 2.040793383560181,
+ 2.0366951902008057,
+ 2.032741487388611,
+ 2.0325103788757324,
+ 2.030692584877014,
+ 2.0259502252197263,
+ 2.025390202407837,
+ 2.0222666513061522,
+ 2.0214237771606447,
+ 2.0199728786468505,
+ 2.021625373916626,
+ 2.017548585968018,
+ 2.0168718279266358,
+ 2.0175807970428465,
+ 2.0172574797058105,
+ 2.01521342338562,
+ 2.0168276077270506,
+ 2.012039394607544,
+ 2.0129323637390137,
+ 2.014951186904907,
+ 2.0105557980346678,
+ 2.01169651550293,
+ 2.0122193618011472,
+ 2.0088330200576783,
+ 2.0112308924102784,
+ 2.010762939796448,
+ 2.0086743901824953,
+ 2.00971054649353,
+ 2.010672675628662,
+ 2.007623846206665,
+ 2.0087671311187743,
+ 2.0092544104766845,
+ 2.0102373889541627,
+ 2.0070104084014893,
+ 2.00739478553772,
+ 2.0070192804336546,
+ 2.0084682094573973,
+ 2.00599662361145,
+ 2.005214340057373,
+ 2.0051871164703368,
+ 2.0050434196472167,
+ 2.005824287185669,
+ 2.004784624862671,
+ 2.006481015930176,
+ 2.004454507408142,
+ 2.004789038391113,
+ 2.004796143836975,
+ 2.004693070678711,
+ 2.0054635766601563,
+ 2.005378037185669,
+ 2.004756614151001,
+ 2.0019625535583496,
+ 2.002588889465332,
+ 2.0051779277038575,
+ 2.0044091219711304,
+ 2.003338354034424,
+ 2.0033947316741942,
+ 2.003234750213623,
+ 2.0028414460754393,
+ 2.0036003881072997,
+ 2.0034564432525634,
+ 2.0025813438415527,
+ 2.0027055737304686,
+ 2.003683521270752,
+ 2.000025806732178,
+ 2.000833299789429,
+ 2.000070794067383,
+ 2.0004121925354004,
+ 1.9998590943908692,
+ 2.001034607772827,
+ 1.9999368872451782,
+ 2.001410151634216,
+ 1.9992089236831665,
+ 2.000929930076599,
+ 2.0007802046966554,
+ 1.999412225341797,
+ 2.00004220161438,
+ 1.9992640100860595,
+ 1.9985904244995116,
+ 1.9986971343231201,
+ 1.998013607559204,
+ 1.9992886389160156,
+ 1.999632513961792,
+ 1.9996888436889648,
+ 1.997718791885376,
+ 1.9993746613311767,
+ 1.9998883880233764,
+ 1.9990022793960571,
+ 1.997506809692383,
+ 2.0001587979507445,
+ 1.9999388419342041,
+ 1.997678748703003,
+ 1.9990675243759155,
+ 1.9979941717910767,
+ 1.99825477684021,
+ 1.9982349634552001,
+ 1.9987702852630616
+ ],
+ "train_acc": [
+ 0.22766,
+ 0.23814,
+ 0.24064,
+ 0.24662,
+ 0.24896,
+ 0.24744,
+ 0.25128,
+ 0.25148,
+ 0.257,
+ 0.25682,
+ 0.25768,
+ 0.25754,
+ 0.25738,
+ 0.25788,
+ 0.25796,
+ 0.26228,
+ 0.26038,
+ 0.26018,
+ 0.2624,
+ 0.26194,
+ 0.26886,
+ 0.26334,
+ 0.26348,
+ 0.26562,
+ 0.26452,
+ 0.26388,
+ 0.26886,
+ 0.26554,
+ 0.26736,
+ 0.26842,
+ 0.26788,
+ 0.2677,
+ 0.26986,
+ 0.26932,
+ 0.2696,
+ 0.26988,
+ 0.27094,
+ 0.26952,
+ 0.27026,
+ 0.26998,
+ 0.27264,
+ 0.27378,
+ 0.27244,
+ 0.27328,
+ 0.27004,
+ 0.27324,
+ 0.27278,
+ 0.27568,
+ 0.27154,
+ 0.27282,
+ 0.27324,
+ 0.27192,
+ 0.27224,
+ 0.27294,
+ 0.27522,
+ 0.27348,
+ 0.27266,
+ 0.2733,
+ 0.27448,
+ 0.27446,
+ 0.27554,
+ 0.27596,
+ 0.27772,
+ 0.2751,
+ 0.27624,
+ 0.2762,
+ 0.27634,
+ 0.27822,
+ 0.27598,
+ 0.2772,
+ 0.2767,
+ 0.27624,
+ 0.27632,
+ 0.27776,
+ 0.27558,
+ 0.27776,
+ 0.27686,
+ 0.27808,
+ 0.27808,
+ 0.27784,
+ 0.27804,
+ 0.2769,
+ 0.27848,
+ 0.27982,
+ 0.27622,
+ 0.27712,
+ 0.27716,
+ 0.27708,
+ 0.2775,
+ 0.27748,
+ 0.27844,
+ 0.27692,
+ 0.2778,
+ 0.27656,
+ 0.27824,
+ 0.27638,
+ 0.27978,
+ 0.27886,
+ 0.27804,
+ 0.27882
+ ],
+ "test_acc": [
+ 0.2423,
+ 0.2394,
+ 0.2347,
+ 0.2736,
+ 0.2726,
+ 0.255,
+ 0.2481,
+ 0.2855,
+ 0.2847,
+ 0.266,
+ 0.291,
+ 0.2613,
+ 0.2775,
+ 0.2742,
+ 0.268,
+ 0.2726,
+ 0.2918,
+ 0.2421,
+ 0.2813,
+ 0.2608,
+ 0.2622,
+ 0.2733,
+ 0.2684,
+ 0.2941,
+ 0.2935,
+ 0.2823,
+ 0.2868,
+ 0.2952,
+ 0.3002,
+ 0.2952,
+ 0.3004,
+ 0.2797,
+ 0.2936,
+ 0.2993,
+ 0.2878,
+ 0.291,
+ 0.2839,
+ 0.2917,
+ 0.2933,
+ 0.2925,
+ 0.2814,
+ 0.2948,
+ 0.2987,
+ 0.2876,
+ 0.2737,
+ 0.2985,
+ 0.3022,
+ 0.2788,
+ 0.2868,
+ 0.2958,
+ 0.2886,
+ 0.302,
+ 0.2925,
+ 0.2965,
+ 0.2882,
+ 0.2941,
+ 0.3024,
+ 0.2895,
+ 0.3013,
+ 0.2994,
+ 0.2891,
+ 0.2866,
+ 0.291,
+ 0.2939,
+ 0.2834,
+ 0.2973,
+ 0.2851,
+ 0.2965,
+ 0.2951,
+ 0.2911,
+ 0.297,
+ 0.2923,
+ 0.2987,
+ 0.2954,
+ 0.2925,
+ 0.2892,
+ 0.2923,
+ 0.2956,
+ 0.2976,
+ 0.2978,
+ 0.2955,
+ 0.2986,
+ 0.2959,
+ 0.294,
+ 0.294,
+ 0.297,
+ 0.296,
+ 0.2964,
+ 0.2979,
+ 0.2999,
+ 0.2958,
+ 0.2963,
+ 0.2963,
+ 0.2954,
+ 0.2959,
+ 0.2953,
+ 0.2952,
+ 0.2956,
+ 0.2958,
+ 0.2958
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.396922767162323,
+ -0.0003933884436264634,
+ -8.022795373108238e-05,
+ -0.00015045293548610061,
+ 0.0005392992752604187,
+ -0.00036555560654960573,
+ 0.0002907244488596916,
+ -9.105022036237642e-05
+ ],
+ "perturbation_rho": [
+ -0.022967863827943802,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -3.4226104617118835e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ -1.862645149230957e-09,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.051928848028183e-06,
+ 0.0,
+ -1.862645149230957e-09,
+ 0.0,
+ -2.7939677238464355e-09,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -3.5013072192668915e-06,
+ 0.0,
+ -1.862645149230957e-09,
+ 0.0,
+ -4.6566128730773926e-09,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 56414.3359375,
+ 1650990848.0,
+ 3797263872.0,
+ 4648061440.0,
+ 5705044480.0,
+ 6883173376.0,
+ 7810007552.0,
+ 9771259904.0,
+ 10072083456.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.3933893089633784e-07,
+ 2.075998678519042e-10,
+ 2.073478749808899e-10,
+ 2.0727551619525997e-10,
+ 2.0692902946706226e-10,
+ 2.0691065527600472e-10,
+ 2.0689701341058964e-10,
+ 2.0701990122162783e-10,
+ 2.070805887877114e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 342.6478357645448,
+ "embed.bias": 262.14247134543683,
+ "blocks.0.ln.weight": 10.104765097486084,
+ "blocks.0.w1.weight": 316.5544638552012,
+ "blocks.0.w1.bias": 286.1293710348026,
+ "blocks.0.w2.weight": 488.0042854468774,
+ "blocks.1.ln.weight": 9.276528933854145,
+ "blocks.1.w1.weight": 365.09493896949925,
+ "blocks.1.w1.bias": 338.1199629883399,
+ "blocks.1.w2.weight": 335.8119384773775,
+ "blocks.2.ln.weight": 8.33234375341126,
+ "blocks.2.w1.weight": 339.8426755615624,
+ "blocks.2.w1.bias": 311.7235998065332,
+ "blocks.2.w2.weight": 316.14958807276867,
+ "blocks.3.ln.weight": 8.800684936876243,
+ "blocks.3.w1.weight": 365.78353354881364,
+ "blocks.3.w1.bias": 339.9117633918553,
+ "blocks.3.w2.weight": 349.88606585029197,
+ "blocks.4.ln.weight": 9.453209200185142,
+ "blocks.4.w1.weight": 385.95491307373965,
+ "blocks.4.w1.bias": 356.64512170760173,
+ "blocks.4.w2.weight": 352.5237601257774,
+ "blocks.5.ln.weight": 9.069459103352756,
+ "blocks.5.w1.weight": 368.17480746032606,
+ "blocks.5.w1.bias": 336.75699627476126,
+ "blocks.5.w2.weight": 341.8227235483115,
+ "blocks.6.ln.weight": 11.485193095368288,
+ "blocks.6.w1.weight": 454.812566497913,
+ "blocks.6.w1.bias": 427.10330552698105,
+ "blocks.6.w2.weight": 417.7750307221631,
+ "blocks.7.ln.weight": 8.765686193273819,
+ "blocks.7.w1.weight": 347.8255816484655,
+ "blocks.7.w1.bias": 334.696723010558,
+ "blocks.7.w2.weight": 324.7278045130555,
+ "out_ln.weight": 0.6473491781322253,
+ "out_head.weight": 9.841211699963578,
+ "out_head.bias": 1.0361314019488228
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0216404482269286,
+ 1.9374429712677002,
+ 1.9162016125488281,
+ 1.8889766452026366,
+ 1.8700911392211914,
+ 1.8582790970611571,
+ 1.8527047716903686,
+ 1.848144301109314,
+ 1.8374218865966796,
+ 1.8318119472503662,
+ 1.8252220174407958,
+ 1.8180473458862305,
+ 1.8125171118927002,
+ 1.8029107147216796,
+ 1.8021541842651367,
+ 1.7970342992401123,
+ 1.7941553127288818,
+ 1.7909175275421143,
+ 1.782177930908203,
+ 1.78192981880188,
+ 1.7714600928115845,
+ 1.7727110540008546,
+ 1.7706272528076172,
+ 1.7628321991348266,
+ 1.7623195043563842,
+ 1.7597255520629882,
+ 1.7538186774063111,
+ 1.7597168767547609,
+ 1.751037622909546,
+ 1.7449284631729125,
+ 1.7485404402923583,
+ 1.7483091131973267,
+ 1.7475471432113647,
+ 1.7437016841888429,
+ 1.744876121864319,
+ 1.7462471084213256,
+ 1.739538355178833,
+ 1.740323678817749,
+ 1.7387758260345458,
+ 1.7394548602294921,
+ 1.7387311511611938,
+ 1.7320650988388062,
+ 1.7338726557159423,
+ 1.7331176211547852,
+ 1.7323205828475952,
+ 1.7314462683486938,
+ 1.7310566067886353,
+ 1.7290524214935303,
+ 1.728844566001892,
+ 1.7304431410980226,
+ 1.7348097546386718,
+ 1.7295909213638305,
+ 1.7310686675262452,
+ 1.7292217221069337,
+ 1.7266970907211303,
+ 1.7262331484985352,
+ 1.7316008935928344,
+ 1.732737080116272,
+ 1.72913513130188,
+ 1.7267526404190063,
+ 1.7301805443954468,
+ 1.726527798461914,
+ 1.7265644375991822,
+ 1.7238530459213257,
+ 1.7280543838500977,
+ 1.7257318978881835,
+ 1.726536202392578,
+ 1.7238884717559815,
+ 1.724731280784607,
+ 1.7219169683456421,
+ 1.725569347229004,
+ 1.7213600470352173,
+ 1.7229674570083617,
+ 1.723882184753418,
+ 1.7203116341781617,
+ 1.723835189590454,
+ 1.7169267540740967,
+ 1.7185560147094727,
+ 1.7191566823577882,
+ 1.7162863437652587,
+ 1.7187646448135376,
+ 1.7191588035202026,
+ 1.716515462913513,
+ 1.7150226160430908,
+ 1.7141532082748414,
+ 1.7141116730499268,
+ 1.7139857364273072,
+ 1.7161692111587525,
+ 1.7135670114135741,
+ 1.7172551581573485,
+ 1.711785320739746,
+ 1.710454179458618,
+ 1.7139234024429322,
+ 1.712289864768982,
+ 1.7146155573272706,
+ 1.7152564708709717,
+ 1.713687544631958,
+ 1.7137356586456298,
+ 1.7130619507217406,
+ 1.7119491666030884
+ ],
+ "train_acc": [
+ 0.25728,
+ 0.2904,
+ 0.30204,
+ 0.31214,
+ 0.32092,
+ 0.32608,
+ 0.32958,
+ 0.333,
+ 0.34152,
+ 0.34204,
+ 0.34534,
+ 0.34792,
+ 0.34984,
+ 0.35522,
+ 0.35358,
+ 0.35932,
+ 0.35884,
+ 0.35804,
+ 0.36126,
+ 0.3618,
+ 0.36572,
+ 0.36352,
+ 0.36422,
+ 0.36884,
+ 0.36892,
+ 0.36766,
+ 0.3732,
+ 0.36848,
+ 0.37482,
+ 0.37654,
+ 0.374,
+ 0.37336,
+ 0.37234,
+ 0.37542,
+ 0.37588,
+ 0.37886,
+ 0.3797,
+ 0.37564,
+ 0.37638,
+ 0.37848,
+ 0.37414,
+ 0.38038,
+ 0.38024,
+ 0.37908,
+ 0.37766,
+ 0.38196,
+ 0.38054,
+ 0.3817,
+ 0.37988,
+ 0.3799,
+ 0.37976,
+ 0.3801,
+ 0.38004,
+ 0.3791,
+ 0.38402,
+ 0.38122,
+ 0.38052,
+ 0.38022,
+ 0.37968,
+ 0.38174,
+ 0.37974,
+ 0.3824,
+ 0.38198,
+ 0.38646,
+ 0.38242,
+ 0.38228,
+ 0.37926,
+ 0.386,
+ 0.38334,
+ 0.3845,
+ 0.38592,
+ 0.38384,
+ 0.3829,
+ 0.38474,
+ 0.38294,
+ 0.38496,
+ 0.3877,
+ 0.3868,
+ 0.38626,
+ 0.38876,
+ 0.3878,
+ 0.38576,
+ 0.38496,
+ 0.387,
+ 0.38818,
+ 0.38784,
+ 0.38816,
+ 0.38708,
+ 0.38834,
+ 0.38604,
+ 0.38942,
+ 0.39,
+ 0.38986,
+ 0.39166,
+ 0.38932,
+ 0.3872,
+ 0.38822,
+ 0.38954,
+ 0.38726,
+ 0.39068
+ ],
+ "test_acc": [
+ 0.3016,
+ 0.3144,
+ 0.318,
+ 0.344,
+ 0.3607,
+ 0.3449,
+ 0.3552,
+ 0.3675,
+ 0.3696,
+ 0.3786,
+ 0.3818,
+ 0.374,
+ 0.3837,
+ 0.3739,
+ 0.3839,
+ 0.3766,
+ 0.3871,
+ 0.3763,
+ 0.3964,
+ 0.3769,
+ 0.375,
+ 0.3863,
+ 0.3847,
+ 0.4028,
+ 0.3957,
+ 0.4041,
+ 0.3988,
+ 0.3948,
+ 0.4082,
+ 0.4017,
+ 0.4074,
+ 0.4028,
+ 0.4108,
+ 0.4004,
+ 0.4041,
+ 0.4025,
+ 0.4049,
+ 0.4087,
+ 0.4,
+ 0.402,
+ 0.3985,
+ 0.4059,
+ 0.4004,
+ 0.4091,
+ 0.4071,
+ 0.4143,
+ 0.4132,
+ 0.409,
+ 0.4076,
+ 0.4057,
+ 0.4061,
+ 0.4117,
+ 0.41,
+ 0.4148,
+ 0.4041,
+ 0.4149,
+ 0.4053,
+ 0.4078,
+ 0.4003,
+ 0.4132,
+ 0.4143,
+ 0.4119,
+ 0.4157,
+ 0.4033,
+ 0.417,
+ 0.4029,
+ 0.4075,
+ 0.4054,
+ 0.4092,
+ 0.4144,
+ 0.4062,
+ 0.4099,
+ 0.4126,
+ 0.4157,
+ 0.4137,
+ 0.4152,
+ 0.4095,
+ 0.4124,
+ 0.4125,
+ 0.4084,
+ 0.4123,
+ 0.4109,
+ 0.4112,
+ 0.4101,
+ 0.4096,
+ 0.4125,
+ 0.4075,
+ 0.4126,
+ 0.4119,
+ 0.4123,
+ 0.412,
+ 0.4125,
+ 0.4119,
+ 0.4121,
+ 0.4112,
+ 0.4123,
+ 0.4125,
+ 0.4116,
+ 0.4123,
+ 0.4123
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.018269643187522888,
+ 0.07727976143360138,
+ -0.007720204535871744,
+ -0.03330487757921219,
+ -0.08965514600276947,
+ -0.05200214684009552,
+ -0.026286372914910316,
+ 0.9987640380859375
+ ],
+ "perturbation_rho": [
+ -0.009564901702105999,
+ -0.008453571237623692,
+ 0.004972926340997219,
+ -0.0027248896658420563,
+ -0.002009802497923374,
+ -0.036599986255168915,
+ -0.004199513234198093,
+ -0.03066324070096016
+ ],
+ "nudging": {
+ "0.001": [
+ -2.971384674310684e-06,
+ -4.7439243644475937e-07,
+ -6.693881005048752e-08,
+ 8.987262845039368e-08,
+ 6.495974957942963e-08,
+ 3.585591912269592e-08,
+ 5.820766091346741e-08,
+ -1.373467966914177e-06
+ ],
+ "0.003": [
+ -8.388538844883442e-06,
+ -1.432374119758606e-06,
+ 3.236345946788788e-08,
+ 1.6565900295972824e-07,
+ 4.4424086809158325e-07,
+ 1.909211277961731e-07,
+ 1.6344711184501648e-07,
+ -5.066161975264549e-06
+ ],
+ "0.01": [
+ -2.80656386166811e-05,
+ -5.379552021622658e-06,
+ 6.239861249923706e-08,
+ 5.534384399652481e-07,
+ 1.6264384612441063e-06,
+ 8.010538294911385e-07,
+ 5.328329280018806e-07,
+ -1.849012915045023e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 6323.6376953125,
+ 105527.390625,
+ 956923.0625,
+ 1390351.75,
+ 1878290.125,
+ 2305144.75,
+ 2408578.0,
+ 2446730.0,
+ 1101165.75
+ ],
+ "bp_grad_norms_per_layer": [
+ 3.628878766903654e-05,
+ 2.4742682853684528e-06,
+ 7.756235049782845e-07,
+ 7.634440635229112e-07,
+ 7.605355563100602e-07,
+ 7.63955029015051e-07,
+ 7.672065862607269e-07,
+ 7.676999871364387e-07,
+ 7.604297138641414e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 41.248137439097945,
+ "embed.bias": 19.492810839395194,
+ "blocks.0.ln.weight": 1.069830164213397,
+ "blocks.0.w1.weight": 14.662790472591759,
+ "blocks.0.w1.bias": 14.121256961200007,
+ "blocks.0.w2.weight": 58.06593910253397,
+ "blocks.1.ln.weight": 1.133609473869923,
+ "blocks.1.w1.weight": 22.616267702800116,
+ "blocks.1.w1.bias": 18.62883152876521,
+ "blocks.1.w2.weight": 55.42300201602086,
+ "blocks.2.ln.weight": 0.8395001619703416,
+ "blocks.2.w1.weight": 23.18964915987391,
+ "blocks.2.w1.bias": 23.25897523020434,
+ "blocks.2.w2.weight": 51.12589199717472,
+ "blocks.3.ln.weight": 0.8079243325814617,
+ "blocks.3.w1.weight": 23.270223091205043,
+ "blocks.3.w1.bias": 23.973451384958736,
+ "blocks.3.w2.weight": 44.38945707970049,
+ "blocks.4.ln.weight": 0.7352731954552547,
+ "blocks.4.w1.weight": 23.461113331044203,
+ "blocks.4.w1.bias": 25.70390171443948,
+ "blocks.4.w2.weight": 35.45845934569411,
+ "blocks.5.ln.weight": 0.5783243137671081,
+ "blocks.5.w1.weight": 18.59117833907557,
+ "blocks.5.w1.bias": 19.752451457096242,
+ "blocks.5.w2.weight": 34.26869815664233,
+ "blocks.6.ln.weight": 0.5587106874210115,
+ "blocks.6.w1.weight": 16.34076044382607,
+ "blocks.6.w1.bias": 16.11418290554864,
+ "blocks.6.w2.weight": 46.04419499078452,
+ "blocks.7.ln.weight": 0.7410616437288059,
+ "blocks.7.w1.weight": 24.360145562410782,
+ "blocks.7.w1.bias": 27.19985092354318,
+ "blocks.7.w2.weight": 37.619326829386324,
+ "out_ln.weight": 0.35428257928264884,
+ "out_head.weight": 7.449367264588818,
+ "out_head.bias": 2.03709619180192
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 8,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 1
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L8_seed1",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L8_seed2/results_cifar10.json b/results/fa_dfa_d512_L8_seed2/results_cifar10.json
new file mode 100644
index 0000000..5a01422
--- /dev/null
+++ b/results/fa_dfa_d512_L8_seed2/results_cifar10.json
@@ -0,0 +1,881 @@
+{
+ "2": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0644361302948,
+ 2.046419815826416,
+ 2.038333583984375,
+ 2.038490813598633,
+ 2.039234817237854,
+ 2.036761814575195,
+ 2.0334856452941894,
+ 2.034733783721924,
+ 2.029182948684692,
+ 2.0294196990585327,
+ 2.0262659832000733,
+ 2.026484178543091,
+ 2.025475791244507,
+ 2.0258196518707274,
+ 2.0231178736114503,
+ 2.0239224526977537,
+ 2.0206442892456056,
+ 2.020261905860901,
+ 2.0215636526489256,
+ 2.0221778102874755,
+ 2.019456211929321,
+ 2.0221493022155763,
+ 2.0212540914916994,
+ 2.0227064220809936,
+ 2.019565500793457,
+ 2.0192780477905274,
+ 2.022175419692993,
+ 2.0185312504577637,
+ 2.0168647090911866,
+ 2.018404118041992,
+ 2.0194629988098143,
+ 2.019041424560547,
+ 2.0179552308654785,
+ 2.0175965952301027,
+ 2.019192621688843,
+ 2.0174668720245363,
+ 2.0164142420196534,
+ 2.015060181007385,
+ 2.0180635737609864,
+ 2.0148490827941896,
+ 2.017637328643799,
+ 2.0145212058258055,
+ 2.0134287954711914,
+ 2.016226960449219,
+ 2.017133801612854,
+ 2.015110319671631,
+ 2.0140358378601073,
+ 2.0136930332946776,
+ 2.013491507110596,
+ 2.0132310498046877,
+ 2.0138910511779784,
+ 2.0127566445159912,
+ 2.014643051528931,
+ 2.013907117614746,
+ 2.013747940750122,
+ 2.013379987182617,
+ 2.014386905593872,
+ 2.0132828955841062,
+ 2.012256466674805,
+ 2.0110202868652345,
+ 2.011350602493286,
+ 2.0109373377990725,
+ 2.0134030670928955,
+ 2.013690048866272,
+ 2.010493644256592,
+ 2.0127015099716186,
+ 2.014034610977173,
+ 2.0104107666015625,
+ 2.009855454788208,
+ 2.0113264336395265,
+ 2.0088184381866454,
+ 2.009741211090088,
+ 2.0078707803726195,
+ 2.0089734397888184,
+ 2.010708229217529,
+ 2.0102198361206054,
+ 2.0088808136367797,
+ 2.010095508155823,
+ 2.0100783850097654,
+ 2.0090800459289553,
+ 2.0093112369155883,
+ 2.0093213819122315,
+ 2.0104708361053465,
+ 2.0093392966079713,
+ 2.007901397628784,
+ 2.0074678485870363,
+ 2.0079102828216553,
+ 2.0076543350982665,
+ 2.008981750946045,
+ 2.0082327671051026,
+ 2.0088174480819703,
+ 2.009487823638916,
+ 2.0088063831329346,
+ 2.008092264633179,
+ 2.009373797225952,
+ 2.0082830452728273,
+ 2.009356221847534,
+ 2.0067892125701903,
+ 2.0079211888122557,
+ 2.009279239501953
+ ],
+ "train_acc": [
+ 0.23872,
+ 0.2456,
+ 0.24996,
+ 0.2495,
+ 0.24748,
+ 0.2497,
+ 0.25566,
+ 0.25088,
+ 0.25386,
+ 0.2538,
+ 0.25732,
+ 0.25644,
+ 0.2601,
+ 0.25942,
+ 0.2592,
+ 0.25872,
+ 0.26188,
+ 0.26174,
+ 0.26336,
+ 0.2605,
+ 0.2632,
+ 0.2612,
+ 0.2603,
+ 0.2612,
+ 0.26016,
+ 0.2645,
+ 0.26032,
+ 0.26266,
+ 0.26196,
+ 0.26384,
+ 0.26492,
+ 0.26296,
+ 0.26584,
+ 0.26768,
+ 0.26482,
+ 0.2648,
+ 0.26722,
+ 0.26534,
+ 0.26276,
+ 0.2668,
+ 0.26668,
+ 0.26698,
+ 0.2691,
+ 0.26718,
+ 0.26602,
+ 0.26738,
+ 0.26584,
+ 0.26838,
+ 0.2676,
+ 0.26986,
+ 0.268,
+ 0.2675,
+ 0.26946,
+ 0.2669,
+ 0.2694,
+ 0.26946,
+ 0.26752,
+ 0.26804,
+ 0.27002,
+ 0.26888,
+ 0.27004,
+ 0.27052,
+ 0.2715,
+ 0.26972,
+ 0.2714,
+ 0.26728,
+ 0.26934,
+ 0.27246,
+ 0.27128,
+ 0.27064,
+ 0.27078,
+ 0.27072,
+ 0.27496,
+ 0.2735,
+ 0.27134,
+ 0.27228,
+ 0.2706,
+ 0.27152,
+ 0.27204,
+ 0.27414,
+ 0.27122,
+ 0.27298,
+ 0.27146,
+ 0.27378,
+ 0.27276,
+ 0.2735,
+ 0.27246,
+ 0.2746,
+ 0.2735,
+ 0.27246,
+ 0.27278,
+ 0.27358,
+ 0.27442,
+ 0.27286,
+ 0.2721,
+ 0.27286,
+ 0.27204,
+ 0.27472,
+ 0.27256,
+ 0.27478
+ ],
+ "test_acc": [
+ 0.2603,
+ 0.2353,
+ 0.2607,
+ 0.265,
+ 0.2673,
+ 0.2661,
+ 0.2701,
+ 0.2571,
+ 0.2897,
+ 0.288,
+ 0.2911,
+ 0.2945,
+ 0.265,
+ 0.2754,
+ 0.2857,
+ 0.2812,
+ 0.2695,
+ 0.2887,
+ 0.2854,
+ 0.288,
+ 0.2916,
+ 0.2906,
+ 0.2793,
+ 0.2867,
+ 0.2901,
+ 0.3,
+ 0.2805,
+ 0.2864,
+ 0.2983,
+ 0.2846,
+ 0.2886,
+ 0.2877,
+ 0.2925,
+ 0.2778,
+ 0.2959,
+ 0.2912,
+ 0.2908,
+ 0.2908,
+ 0.2801,
+ 0.2988,
+ 0.2829,
+ 0.2906,
+ 0.3007,
+ 0.2911,
+ 0.2975,
+ 0.2959,
+ 0.3009,
+ 0.2959,
+ 0.2901,
+ 0.2841,
+ 0.2989,
+ 0.2867,
+ 0.2927,
+ 0.2886,
+ 0.2802,
+ 0.281,
+ 0.3007,
+ 0.2873,
+ 0.2891,
+ 0.3031,
+ 0.2983,
+ 0.2973,
+ 0.2972,
+ 0.2921,
+ 0.2927,
+ 0.2971,
+ 0.2975,
+ 0.2988,
+ 0.3026,
+ 0.2926,
+ 0.2941,
+ 0.2971,
+ 0.2967,
+ 0.2972,
+ 0.2957,
+ 0.2998,
+ 0.2961,
+ 0.2934,
+ 0.2937,
+ 0.2949,
+ 0.2969,
+ 0.2975,
+ 0.2933,
+ 0.2965,
+ 0.2925,
+ 0.299,
+ 0.2975,
+ 0.2993,
+ 0.2935,
+ 0.2975,
+ 0.2962,
+ 0.2959,
+ 0.2958,
+ 0.2974,
+ 0.2975,
+ 0.2967,
+ 0.2967,
+ 0.2967,
+ 0.2967,
+ 0.2966
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.3420855402946472,
+ -0.0001374588318867609,
+ -0.0004025904054287821,
+ -0.0002873847261071205,
+ -0.0005695301806554198,
+ 0.00037305167643353343,
+ 0.0003001938748639077,
+ 6.27083791187033e-05
+ ],
+ "perturbation_rho": [
+ 0.05308223515748978,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -3.8510188460350037e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ -3.725290298461914e-09,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.0007061064243317e-06,
+ 0.0,
+ 0.0,
+ 0.0,
+ -3.725290298461914e-09,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -3.0365772545337677e-06,
+ 0.0,
+ 0.0,
+ 0.0,
+ -3.725290298461914e-09,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 53974.390625,
+ 1994035456.0,
+ 4991160320.0,
+ 6590162432.0,
+ 6723744768.0,
+ 9356510208.0,
+ 9466019840.0,
+ 10094161920.0,
+ 10161934336.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.1625525903345988e-07,
+ 2.0878114515010537e-10,
+ 2.0495614927451555e-10,
+ 2.0494976549212396e-10,
+ 2.0494639318968666e-10,
+ 2.0494643482305008e-10,
+ 2.0495241614959525e-10,
+ 2.0495422026201027e-10,
+ 2.0496979113993063e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 347.8854750017766,
+ "embed.bias": 315.64569824736526,
+ "blocks.0.ln.weight": 10.235928115345473,
+ "blocks.0.w1.weight": 318.5495193049707,
+ "blocks.0.w1.bias": 347.2457558883921,
+ "blocks.0.w2.weight": 498.50725738614966,
+ "blocks.1.ln.weight": 9.559401727161022,
+ "blocks.1.w1.weight": 396.35172799806554,
+ "blocks.1.w1.bias": 380.45360123690216,
+ "blocks.1.w2.weight": 397.06562678988604,
+ "blocks.2.ln.weight": 9.795519840502836,
+ "blocks.2.w1.weight": 400.5282974464668,
+ "blocks.2.w1.bias": 367.0528578182135,
+ "blocks.2.w2.weight": 385.20621819830745,
+ "blocks.3.ln.weight": 7.79115393268457,
+ "blocks.3.w1.weight": 267.38195610990635,
+ "blocks.3.w1.bias": 241.5060314212707,
+ "blocks.3.w2.weight": 261.60447846418674,
+ "blocks.4.ln.weight": 10.663570278143073,
+ "blocks.4.w1.weight": 440.19027772109945,
+ "blocks.4.w1.bias": 406.52995649086876,
+ "blocks.4.w2.weight": 429.9566257006414,
+ "blocks.5.ln.weight": 7.336168242804289,
+ "blocks.5.w1.weight": 278.99456283486023,
+ "blocks.5.w1.bias": 254.26825064498672,
+ "blocks.5.w2.weight": 255.6469209546951,
+ "blocks.6.ln.weight": 8.774665158034248,
+ "blocks.6.w1.weight": 349.580580396389,
+ "blocks.6.w1.bias": 332.01322875711713,
+ "blocks.6.w2.weight": 335.2183411011385,
+ "blocks.7.ln.weight": 6.103809206945497,
+ "blocks.7.w1.weight": 223.73607116076752,
+ "blocks.7.w1.bias": 204.11858072638188,
+ "blocks.7.w2.weight": 212.2909923599191,
+ "out_ln.weight": 0.5939581817276044,
+ "out_head.weight": 9.736202389552009,
+ "out_head.bias": 0.7653619259023434
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0312406940460206,
+ 1.9362668811798096,
+ 1.8983942519378663,
+ 1.881630844039917,
+ 1.8686952627563476,
+ 1.8598585787200927,
+ 1.8500165731430054,
+ 1.8523509564208984,
+ 1.841215087814331,
+ 1.8375128533935547,
+ 1.82443065284729,
+ 1.82105125,
+ 1.8107502059555054,
+ 1.8082057354736327,
+ 1.8022028189468384,
+ 1.7978119366836547,
+ 1.7920289902496338,
+ 1.7873877236938476,
+ 1.7848837859725952,
+ 1.7817217052459717,
+ 1.7746633721542358,
+ 1.7798608938598632,
+ 1.7749403537750243,
+ 1.7722899240112304,
+ 1.7705043783187866,
+ 1.765470712814331,
+ 1.7667155236434937,
+ 1.7605387261199952,
+ 1.7600090964126587,
+ 1.7570182471466065,
+ 1.7548303343963623,
+ 1.753013416786194,
+ 1.7530071124267579,
+ 1.752319090499878,
+ 1.7505883599090577,
+ 1.7475732799530028,
+ 1.7479963735961914,
+ 1.7440779452514648,
+ 1.7415211089706422,
+ 1.7423380712127685,
+ 1.7422383227157592,
+ 1.73590397315979,
+ 1.7367827083587646,
+ 1.7335861996078492,
+ 1.732941851272583,
+ 1.726208869934082,
+ 1.7261467092895508,
+ 1.731659492225647,
+ 1.7264601552963257,
+ 1.7286788833618163,
+ 1.722340958251953,
+ 1.7210906581497192,
+ 1.7243633469390869,
+ 1.7231188234710693,
+ 1.7203610498809814,
+ 1.7219875589752198,
+ 1.719448472518921,
+ 1.7194024643707275,
+ 1.7188453897857665,
+ 1.715792247581482,
+ 1.7124444417572022,
+ 1.71695018119812,
+ 1.7176160364532471,
+ 1.7146047933578492,
+ 1.7122148095321654,
+ 1.7136172246551513,
+ 1.7148900774765015,
+ 1.7101783513641358,
+ 1.7065966332244873,
+ 1.7109612771606446,
+ 1.7086562002182006,
+ 1.708388620033264,
+ 1.7043865182113647,
+ 1.7084569076919556,
+ 1.7085095125961303,
+ 1.7060216832733155,
+ 1.7013830905532836,
+ 1.706795428390503,
+ 1.7063351504898072,
+ 1.70022035030365,
+ 1.7059846701049806,
+ 1.706136099281311,
+ 1.7015950707626344,
+ 1.7021336114120484,
+ 1.6970344146347045,
+ 1.7002056908416747,
+ 1.6996512441635132,
+ 1.701994123878479,
+ 1.6985623202896118,
+ 1.7015460013580321,
+ 1.7006773757171632,
+ 1.7014044579696654,
+ 1.7009879675674437,
+ 1.699486920852661,
+ 1.6986795735931397,
+ 1.6978497576904297,
+ 1.7015439191436768,
+ 1.699127428817749,
+ 1.6978760889434814,
+ 1.69874500705719
+ ],
+ "train_acc": [
+ 0.25366,
+ 0.295,
+ 0.31024,
+ 0.322,
+ 0.3275,
+ 0.3335,
+ 0.33654,
+ 0.33282,
+ 0.33834,
+ 0.3379,
+ 0.34392,
+ 0.3437,
+ 0.34912,
+ 0.35138,
+ 0.35342,
+ 0.3525,
+ 0.35808,
+ 0.36144,
+ 0.36196,
+ 0.36088,
+ 0.3609,
+ 0.3617,
+ 0.36304,
+ 0.36386,
+ 0.36488,
+ 0.36634,
+ 0.3662,
+ 0.37146,
+ 0.36906,
+ 0.37174,
+ 0.37052,
+ 0.37144,
+ 0.37146,
+ 0.37096,
+ 0.37404,
+ 0.37546,
+ 0.37712,
+ 0.37372,
+ 0.37698,
+ 0.37738,
+ 0.37506,
+ 0.38058,
+ 0.379,
+ 0.37758,
+ 0.38042,
+ 0.38246,
+ 0.38102,
+ 0.38084,
+ 0.38438,
+ 0.38092,
+ 0.38366,
+ 0.38522,
+ 0.38544,
+ 0.38474,
+ 0.38626,
+ 0.385,
+ 0.38362,
+ 0.38682,
+ 0.38782,
+ 0.3882,
+ 0.38862,
+ 0.38608,
+ 0.38642,
+ 0.3857,
+ 0.38758,
+ 0.38714,
+ 0.38588,
+ 0.39092,
+ 0.38846,
+ 0.39068,
+ 0.38904,
+ 0.39098,
+ 0.39194,
+ 0.39138,
+ 0.38832,
+ 0.39186,
+ 0.39308,
+ 0.3901,
+ 0.39034,
+ 0.3932,
+ 0.39182,
+ 0.39044,
+ 0.39078,
+ 0.3919,
+ 0.39418,
+ 0.39302,
+ 0.39488,
+ 0.39178,
+ 0.39388,
+ 0.39272,
+ 0.39092,
+ 0.39096,
+ 0.3937,
+ 0.39176,
+ 0.39522,
+ 0.39246,
+ 0.39252,
+ 0.39446,
+ 0.3936,
+ 0.39362
+ ],
+ "test_acc": [
+ 0.2876,
+ 0.3293,
+ 0.3436,
+ 0.3566,
+ 0.3569,
+ 0.3491,
+ 0.3632,
+ 0.36,
+ 0.3748,
+ 0.3697,
+ 0.3686,
+ 0.3785,
+ 0.3636,
+ 0.3798,
+ 0.3817,
+ 0.3661,
+ 0.3711,
+ 0.386,
+ 0.3761,
+ 0.3774,
+ 0.3908,
+ 0.3855,
+ 0.3821,
+ 0.3861,
+ 0.3921,
+ 0.398,
+ 0.3973,
+ 0.3814,
+ 0.3875,
+ 0.3951,
+ 0.3951,
+ 0.3873,
+ 0.399,
+ 0.3912,
+ 0.4011,
+ 0.3901,
+ 0.4079,
+ 0.3961,
+ 0.397,
+ 0.4004,
+ 0.4022,
+ 0.4,
+ 0.4042,
+ 0.3988,
+ 0.4114,
+ 0.4031,
+ 0.4057,
+ 0.4077,
+ 0.4027,
+ 0.4028,
+ 0.4083,
+ 0.4001,
+ 0.4068,
+ 0.411,
+ 0.4046,
+ 0.4106,
+ 0.4098,
+ 0.4054,
+ 0.4117,
+ 0.4084,
+ 0.407,
+ 0.4054,
+ 0.4116,
+ 0.4124,
+ 0.4119,
+ 0.412,
+ 0.4066,
+ 0.4118,
+ 0.4119,
+ 0.4107,
+ 0.413,
+ 0.4056,
+ 0.4103,
+ 0.4076,
+ 0.4136,
+ 0.4117,
+ 0.4165,
+ 0.4112,
+ 0.4129,
+ 0.4125,
+ 0.413,
+ 0.4136,
+ 0.4145,
+ 0.4119,
+ 0.4112,
+ 0.4123,
+ 0.4126,
+ 0.4137,
+ 0.4137,
+ 0.4146,
+ 0.4117,
+ 0.4117,
+ 0.4125,
+ 0.4122,
+ 0.4119,
+ 0.4125,
+ 0.4126,
+ 0.4123,
+ 0.4122,
+ 0.412
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.018279677256941795,
+ 0.05121004581451416,
+ -0.0035886913537979126,
+ -0.08221495151519775,
+ -0.07729659974575043,
+ -0.03265474736690521,
+ -0.04502299427986145,
+ 0.9937314391136169
+ ],
+ "perturbation_rho": [
+ -0.0045250579714775085,
+ -0.0025405026972293854,
+ 0.02932029776275158,
+ -0.04425422102212906,
+ -0.024078164249658585,
+ 0.036674171686172485,
+ 0.03741011023521423,
+ -0.027221273630857468
+ ],
+ "nudging": {
+ "0.001": [
+ -1.8192222341895103e-06,
+ -1.5320256352424622e-07,
+ -4.225876182317734e-08,
+ 1.200241968035698e-07,
+ 7.438939064741135e-08,
+ 2.7241185307502747e-08,
+ 2.759043127298355e-08,
+ -1.1578667908906937e-06
+ ],
+ "0.003": [
+ -5.577923730015755e-06,
+ -8.606584742665291e-07,
+ -8.975621312856674e-08,
+ 4.239846020936966e-07,
+ 4.441244527697563e-07,
+ 1.2828968465328217e-07,
+ 2.665910869836807e-07,
+ -4.686298780143261e-06
+ ],
+ "0.01": [
+ -1.855997834354639e-05,
+ -2.620276063680649e-06,
+ -2.4156179279088974e-07,
+ 1.257285475730896e-06,
+ 1.3473909348249435e-06,
+ 5.852198228240013e-07,
+ 7.433118298649788e-07,
+ -1.6578123904764652e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 4948.6533203125,
+ 63535.5390625,
+ 485750.46875,
+ 729670.4375,
+ 1132140.625,
+ 1303753.625,
+ 1400111.125,
+ 1525175.75,
+ 735722.25
+ ],
+ "bp_grad_norms_per_layer": [
+ 3.38389327225741e-05,
+ 2.3188813429442234e-06,
+ 7.230223104670586e-07,
+ 6.714369078508753e-07,
+ 6.712992899338133e-07,
+ 6.754108312634344e-07,
+ 6.760963060514769e-07,
+ 6.705485020574997e-07,
+ 6.541473567267531e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 38.30467980586698,
+ "embed.bias": 18.474777878278143,
+ "blocks.0.ln.weight": 1.0140204865718874,
+ "blocks.0.w1.weight": 14.619711688794071,
+ "blocks.0.w1.bias": 11.460165338986966,
+ "blocks.0.w2.weight": 49.811132056190225,
+ "blocks.1.ln.weight": 0.943966438295369,
+ "blocks.1.w1.weight": 19.03349845703188,
+ "blocks.1.w1.bias": 16.732407132612977,
+ "blocks.1.w2.weight": 43.9070524360131,
+ "blocks.2.ln.weight": 0.6544196492657218,
+ "blocks.2.w1.weight": 18.12360528225708,
+ "blocks.2.w1.bias": 17.453784588368148,
+ "blocks.2.w2.weight": 43.32806943352846,
+ "blocks.3.ln.weight": 0.5531295594988335,
+ "blocks.3.w1.weight": 18.884523077230778,
+ "blocks.3.w1.bias": 20.07589187473831,
+ "blocks.3.w2.weight": 35.34492100476457,
+ "blocks.4.ln.weight": 0.5269231122615889,
+ "blocks.4.w1.weight": 17.103043932812323,
+ "blocks.4.w1.bias": 18.102481284342456,
+ "blocks.4.w2.weight": 42.32555419027007,
+ "blocks.5.ln.weight": 0.5628789686632275,
+ "blocks.5.w1.weight": 16.33193833700236,
+ "blocks.5.w1.bias": 15.909636919669264,
+ "blocks.5.w2.weight": 48.27717558625265,
+ "blocks.6.ln.weight": 0.6467844103525138,
+ "blocks.6.w1.weight": 16.86172393719663,
+ "blocks.6.w1.bias": 16.921984405373085,
+ "blocks.6.w2.weight": 56.44865850476068,
+ "blocks.7.ln.weight": 0.6477260974049089,
+ "blocks.7.w1.weight": 19.61181966070048,
+ "blocks.7.w1.bias": 20.268823478099563,
+ "blocks.7.w2.weight": 50.05420170873082,
+ "out_ln.weight": 0.28931782627119323,
+ "out_head.weight": 5.596138162617972,
+ "out_head.bias": 1.6180902727109185
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 8,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 2
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L8_seed2",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L8_seed3/results_cifar10.json b/results/fa_dfa_d512_L8_seed3/results_cifar10.json
new file mode 100644
index 0000000..4a81c5e
--- /dev/null
+++ b/results/fa_dfa_d512_L8_seed3/results_cifar10.json
@@ -0,0 +1,881 @@
+{
+ "3": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.067654705734253,
+ 2.0439451234436037,
+ 2.0388121842193603,
+ 2.0374467311859132,
+ 2.032723134689331,
+ 2.024335552711487,
+ 2.0244412200927733,
+ 2.020215712852478,
+ 2.02228947303772,
+ 2.0196216506958007,
+ 2.018127802886963,
+ 2.0154445655822752,
+ 2.014929994125366,
+ 2.0130414514160155,
+ 2.010970745010376,
+ 2.009714426345825,
+ 2.008820608253479,
+ 2.0076065615081786,
+ 2.0063576023864744,
+ 2.0080780532455442,
+ 2.0033639809799193,
+ 2.0052446017456056,
+ 2.003892343444824,
+ 2.0035709592437745,
+ 2.004005387611389,
+ 2.003134113922119,
+ 2.002949758758545,
+ 2.003510911102295,
+ 2.0014034066009523,
+ 2.0013524534606932,
+ 2.001380777282715,
+ 2.0003092810058596,
+ 2.001631733970642,
+ 1.9991252640533448,
+ 2.001369930343628,
+ 2.0026023275375366,
+ 1.9995970624542236,
+ 2.001532023696899,
+ 1.9995446050643921,
+ 1.9996646937561036,
+ 2.0011661471176145,
+ 2.001093216934204,
+ 1.9989905255126954,
+ 1.9983071016693115,
+ 2.0015053617095946,
+ 2.0005502851867676,
+ 1.9974795049667358,
+ 2.001641445236206,
+ 1.9987062954711914,
+ 1.9984554448699952,
+ 2.000685492324829,
+ 1.9992525644683838,
+ 2.0004551610565184,
+ 1.9981906829452514,
+ 1.998261117515564,
+ 1.9979981622314453,
+ 1.999091849937439,
+ 1.999358012046814,
+ 1.9977104736709594,
+ 1.998026010055542,
+ 1.9991155099487306,
+ 1.998898390197754,
+ 1.997283667640686,
+ 1.9974989557266236,
+ 1.9966122649765015,
+ 1.99706481754303,
+ 1.9956817004394531,
+ 1.9957520567321778,
+ 1.9964876544570922,
+ 1.9971850046539306,
+ 1.9973575018310548,
+ 1.9963464212417603,
+ 1.995423618736267,
+ 1.9970853455352784,
+ 1.9958270948028565,
+ 1.997421721458435,
+ 1.9960898080825806,
+ 1.996168349533081,
+ 1.9963192137145995,
+ 1.997360442123413,
+ 1.9975355083465576,
+ 1.996066244506836,
+ 1.9953448879241944,
+ 1.9958293856430054,
+ 1.9962540533828734,
+ 1.9953726630401611,
+ 1.9940088095092774,
+ 1.9959244760894774,
+ 1.9942969249725342,
+ 1.9947067263793945,
+ 1.9935712906646728,
+ 1.994222310256958,
+ 1.9953250341796875,
+ 1.995568331222534,
+ 1.9945212261199952,
+ 1.9952520877075195,
+ 1.9954078897857666,
+ 1.9954364456176759,
+ 1.9938959969329835,
+ 1.9937770639038086
+ ],
+ "train_acc": [
+ 0.23262,
+ 0.24192,
+ 0.24632,
+ 0.24536,
+ 0.2464,
+ 0.2547,
+ 0.25628,
+ 0.25992,
+ 0.25686,
+ 0.25908,
+ 0.25972,
+ 0.2614,
+ 0.26032,
+ 0.26244,
+ 0.26444,
+ 0.26376,
+ 0.26774,
+ 0.26756,
+ 0.26628,
+ 0.2646,
+ 0.26774,
+ 0.26684,
+ 0.26744,
+ 0.2703,
+ 0.26722,
+ 0.2705,
+ 0.26978,
+ 0.26872,
+ 0.2695,
+ 0.27244,
+ 0.27246,
+ 0.27274,
+ 0.26828,
+ 0.27612,
+ 0.27204,
+ 0.27338,
+ 0.27352,
+ 0.26948,
+ 0.27384,
+ 0.27384,
+ 0.27366,
+ 0.27284,
+ 0.27418,
+ 0.27444,
+ 0.2734,
+ 0.27474,
+ 0.27514,
+ 0.27438,
+ 0.274,
+ 0.27478,
+ 0.2748,
+ 0.27494,
+ 0.27578,
+ 0.27282,
+ 0.27458,
+ 0.27628,
+ 0.27448,
+ 0.27492,
+ 0.27672,
+ 0.27708,
+ 0.2768,
+ 0.27398,
+ 0.27716,
+ 0.27682,
+ 0.27596,
+ 0.27722,
+ 0.2767,
+ 0.27738,
+ 0.27778,
+ 0.27676,
+ 0.27824,
+ 0.27798,
+ 0.27824,
+ 0.27748,
+ 0.27828,
+ 0.2766,
+ 0.2783,
+ 0.27724,
+ 0.27768,
+ 0.27614,
+ 0.27838,
+ 0.27748,
+ 0.27688,
+ 0.27912,
+ 0.27784,
+ 0.27692,
+ 0.27876,
+ 0.27918,
+ 0.27702,
+ 0.27866,
+ 0.2807,
+ 0.27914,
+ 0.27646,
+ 0.27638,
+ 0.27878,
+ 0.27844,
+ 0.27988,
+ 0.27484,
+ 0.27874,
+ 0.27812
+ ],
+ "test_acc": [
+ 0.2578,
+ 0.2681,
+ 0.2679,
+ 0.2839,
+ 0.2546,
+ 0.2725,
+ 0.2785,
+ 0.2856,
+ 0.2934,
+ 0.2638,
+ 0.2995,
+ 0.288,
+ 0.2773,
+ 0.2846,
+ 0.3003,
+ 0.2936,
+ 0.2826,
+ 0.2985,
+ 0.2836,
+ 0.2804,
+ 0.282,
+ 0.2777,
+ 0.2854,
+ 0.2789,
+ 0.3056,
+ 0.296,
+ 0.2922,
+ 0.2934,
+ 0.2854,
+ 0.2952,
+ 0.3047,
+ 0.2955,
+ 0.2902,
+ 0.2858,
+ 0.3016,
+ 0.2931,
+ 0.2948,
+ 0.2922,
+ 0.2924,
+ 0.2932,
+ 0.2975,
+ 0.2989,
+ 0.2968,
+ 0.3084,
+ 0.295,
+ 0.2818,
+ 0.2943,
+ 0.2957,
+ 0.2895,
+ 0.2846,
+ 0.2928,
+ 0.2932,
+ 0.2923,
+ 0.2923,
+ 0.2948,
+ 0.2839,
+ 0.2885,
+ 0.2986,
+ 0.2955,
+ 0.292,
+ 0.3058,
+ 0.2945,
+ 0.302,
+ 0.2895,
+ 0.294,
+ 0.2968,
+ 0.293,
+ 0.3034,
+ 0.2963,
+ 0.2881,
+ 0.295,
+ 0.2915,
+ 0.2951,
+ 0.2916,
+ 0.3019,
+ 0.2971,
+ 0.2965,
+ 0.2974,
+ 0.297,
+ 0.2973,
+ 0.2983,
+ 0.3006,
+ 0.2984,
+ 0.2948,
+ 0.2929,
+ 0.2974,
+ 0.2957,
+ 0.2979,
+ 0.2947,
+ 0.2944,
+ 0.2955,
+ 0.2957,
+ 0.2938,
+ 0.296,
+ 0.2973,
+ 0.2969,
+ 0.2962,
+ 0.2965,
+ 0.2967,
+ 0.2967
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.3848226070404053,
+ 0.0005948130274191499,
+ 0.0008828549180179834,
+ -0.0005318043986335397,
+ -0.0011384200770407915,
+ 0.001081241061910987,
+ -0.0007273855153471231,
+ -9.393676009494811e-05
+ ],
+ "perturbation_rho": [
+ 0.02956267260015011,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -4.009343683719635e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.071486622095108e-06,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -3.686174750328064e-06,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 53208.375,
+ 1418977024.0,
+ 1839794304.0,
+ 3366365440.0,
+ 5876559872.0,
+ 6415245312.0,
+ 6484437504.0,
+ 6990735872.0,
+ 8058666496.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.507335352675e-07,
+ 3.0165914211011113e-10,
+ 3.015434568709452e-10,
+ 3.022892214321615e-10,
+ 3.0200991707474145e-10,
+ 3.020122762986688e-10,
+ 3.0200916767419983e-10,
+ 3.020158290123476e-10,
+ 3.0227947922512044e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 327.65976028345034,
+ "embed.bias": 222.60798330036593,
+ "blocks.0.ln.weight": 9.50619149859276,
+ "blocks.0.w1.weight": 302.2719637669162,
+ "blocks.0.w1.bias": 270.6809804281448,
+ "blocks.0.w2.weight": 471.5962929911042,
+ "blocks.1.ln.weight": 7.339419282166665,
+ "blocks.1.w1.weight": 236.07033236306975,
+ "blocks.1.w1.bias": 214.61371141440935,
+ "blocks.1.w2.weight": 257.6350959469773,
+ "blocks.2.ln.weight": 8.814285893710872,
+ "blocks.2.w1.weight": 346.78316046198574,
+ "blocks.2.w1.bias": 305.8043449728941,
+ "blocks.2.w2.weight": 337.46416422582774,
+ "blocks.3.ln.weight": 9.745238986543606,
+ "blocks.3.w1.weight": 405.8018654296332,
+ "blocks.3.w1.bias": 374.1566493279044,
+ "blocks.3.w2.weight": 402.68490503996316,
+ "blocks.4.ln.weight": 8.109738257108088,
+ "blocks.4.w1.weight": 332.27298936873524,
+ "blocks.4.w1.bias": 309.4368090711017,
+ "blocks.4.w2.weight": 317.97600700647837,
+ "blocks.5.ln.weight": 6.209367170959338,
+ "blocks.5.w1.weight": 230.00050402864986,
+ "blocks.5.w1.bias": 215.5148975730586,
+ "blocks.5.w2.weight": 220.6587410113886,
+ "blocks.6.ln.weight": 8.869002619258092,
+ "blocks.6.w1.weight": 341.5561743855726,
+ "blocks.6.w1.bias": 310.4841079276583,
+ "blocks.6.w2.weight": 307.6831168444481,
+ "blocks.7.ln.weight": 10.348632836289921,
+ "blocks.7.w1.weight": 389.83507647770097,
+ "blocks.7.w1.bias": 362.80530475665813,
+ "blocks.7.w2.weight": 372.02137428373555,
+ "out_ln.weight": 0.6700943575231241,
+ "out_head.weight": 9.265882212393693,
+ "out_head.bias": 0.5155313240128788
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0319221758270265,
+ 1.9485809626770019,
+ 1.9236068560409545,
+ 1.9108332887268067,
+ 1.8946271410751343,
+ 1.8748885510635376,
+ 1.8679874765777589,
+ 1.8528113153839112,
+ 1.854252926864624,
+ 1.8431586810302734,
+ 1.835074407119751,
+ 1.830763704071045,
+ 1.8243982820892335,
+ 1.8189660712051392,
+ 1.8128780780792237,
+ 1.8087021814346314,
+ 1.804141354446411,
+ 1.8017899124526977,
+ 1.7928460983657837,
+ 1.7929035697174072,
+ 1.7876431453704833,
+ 1.7875543393325806,
+ 1.7795426641845704,
+ 1.775789865951538,
+ 1.7698374728012085,
+ 1.768277823562622,
+ 1.7634769374847412,
+ 1.7641023599624635,
+ 1.757076796989441,
+ 1.7488349988174439,
+ 1.7501668548965454,
+ 1.7444925582122803,
+ 1.7474665740585327,
+ 1.7382987225723268,
+ 1.734984835205078,
+ 1.7357169647979735,
+ 1.7307214682388306,
+ 1.7250416897201537,
+ 1.7257672270965576,
+ 1.7227689398574828,
+ 1.7207815472030639,
+ 1.7202168838882446,
+ 1.7142020806503295,
+ 1.714960380783081,
+ 1.71727346408844,
+ 1.712963445739746,
+ 1.7071009867095948,
+ 1.7091050637435914,
+ 1.7093897729492187,
+ 1.7002552864837646,
+ 1.7015939654922485,
+ 1.7008016619873048,
+ 1.6995257330703735,
+ 1.697141215133667,
+ 1.6948078282928467,
+ 1.6939659241485596,
+ 1.69270690864563,
+ 1.6911025055313111,
+ 1.6915674340820313,
+ 1.6882078439712525,
+ 1.6849368871307373,
+ 1.6877099935913087,
+ 1.6871322372817994,
+ 1.683017247543335,
+ 1.6802406833648682,
+ 1.6818945336532594,
+ 1.6764708988189698,
+ 1.6813461883163452,
+ 1.679410824356079,
+ 1.6801742096328736,
+ 1.6759367601776123,
+ 1.6751640189361572,
+ 1.6755147110366821,
+ 1.6753325751113892,
+ 1.669490467529297,
+ 1.6727212616348266,
+ 1.6710585062026977,
+ 1.6708744039535524,
+ 1.6716560121536255,
+ 1.6683519116210936,
+ 1.669690319480896,
+ 1.6661786761474608,
+ 1.6632559734725951,
+ 1.6698375412750244,
+ 1.6648968856430053,
+ 1.6646921353912354,
+ 1.6623535321044922,
+ 1.6634717670440673,
+ 1.662731397628784,
+ 1.6652179148101807,
+ 1.664047002029419,
+ 1.6637420905303955,
+ 1.6616315328979492,
+ 1.6613892125320435,
+ 1.6618764827728272,
+ 1.6608539768218995,
+ 1.662214938583374,
+ 1.6588589643859863,
+ 1.6624627486419679,
+ 1.6634692792129517
+ ],
+ "train_acc": [
+ 0.25286,
+ 0.28656,
+ 0.29892,
+ 0.30622,
+ 0.31524,
+ 0.32466,
+ 0.33028,
+ 0.33336,
+ 0.33322,
+ 0.33718,
+ 0.34028,
+ 0.34314,
+ 0.3488,
+ 0.34888,
+ 0.3513,
+ 0.35072,
+ 0.35386,
+ 0.35666,
+ 0.35802,
+ 0.35736,
+ 0.36152,
+ 0.36052,
+ 0.36226,
+ 0.36666,
+ 0.3689,
+ 0.3657,
+ 0.36826,
+ 0.36686,
+ 0.36968,
+ 0.37314,
+ 0.37168,
+ 0.37526,
+ 0.3725,
+ 0.37586,
+ 0.37854,
+ 0.38008,
+ 0.38192,
+ 0.38086,
+ 0.38108,
+ 0.38154,
+ 0.38472,
+ 0.38546,
+ 0.38504,
+ 0.3864,
+ 0.3857,
+ 0.3858,
+ 0.38726,
+ 0.38836,
+ 0.38902,
+ 0.39314,
+ 0.39324,
+ 0.39162,
+ 0.39194,
+ 0.3928,
+ 0.39618,
+ 0.39266,
+ 0.39604,
+ 0.39298,
+ 0.39408,
+ 0.39738,
+ 0.3988,
+ 0.39496,
+ 0.39628,
+ 0.39638,
+ 0.3968,
+ 0.40014,
+ 0.40092,
+ 0.3965,
+ 0.39798,
+ 0.39902,
+ 0.40112,
+ 0.40126,
+ 0.39922,
+ 0.40196,
+ 0.40252,
+ 0.40146,
+ 0.40286,
+ 0.4025,
+ 0.40382,
+ 0.40394,
+ 0.40502,
+ 0.40518,
+ 0.40502,
+ 0.4029,
+ 0.40602,
+ 0.40654,
+ 0.40788,
+ 0.40838,
+ 0.40608,
+ 0.40626,
+ 0.40682,
+ 0.4054,
+ 0.40548,
+ 0.40818,
+ 0.4063,
+ 0.40556,
+ 0.40728,
+ 0.40878,
+ 0.41036,
+ 0.40504
+ ],
+ "test_acc": [
+ 0.3107,
+ 0.3197,
+ 0.3396,
+ 0.3501,
+ 0.3445,
+ 0.3533,
+ 0.3594,
+ 0.3665,
+ 0.3638,
+ 0.3697,
+ 0.3705,
+ 0.3731,
+ 0.3731,
+ 0.3779,
+ 0.3879,
+ 0.3797,
+ 0.3855,
+ 0.3895,
+ 0.389,
+ 0.3836,
+ 0.3898,
+ 0.3899,
+ 0.3957,
+ 0.3975,
+ 0.3903,
+ 0.3904,
+ 0.3925,
+ 0.4026,
+ 0.3948,
+ 0.404,
+ 0.407,
+ 0.4052,
+ 0.4016,
+ 0.4038,
+ 0.4077,
+ 0.4055,
+ 0.4139,
+ 0.4125,
+ 0.4071,
+ 0.41,
+ 0.4157,
+ 0.4116,
+ 0.4057,
+ 0.4093,
+ 0.4107,
+ 0.4121,
+ 0.4178,
+ 0.4151,
+ 0.4102,
+ 0.415,
+ 0.4171,
+ 0.4152,
+ 0.4107,
+ 0.4169,
+ 0.4165,
+ 0.4128,
+ 0.4211,
+ 0.4239,
+ 0.419,
+ 0.4169,
+ 0.4175,
+ 0.4204,
+ 0.4172,
+ 0.42,
+ 0.4223,
+ 0.4193,
+ 0.4249,
+ 0.4239,
+ 0.4247,
+ 0.4237,
+ 0.4241,
+ 0.4249,
+ 0.4246,
+ 0.4236,
+ 0.4222,
+ 0.4256,
+ 0.4236,
+ 0.4254,
+ 0.4218,
+ 0.4219,
+ 0.4253,
+ 0.4236,
+ 0.4242,
+ 0.4266,
+ 0.4265,
+ 0.4244,
+ 0.4236,
+ 0.4244,
+ 0.4257,
+ 0.426,
+ 0.4278,
+ 0.4252,
+ 0.4256,
+ 0.4257,
+ 0.425,
+ 0.4251,
+ 0.4257,
+ 0.4253,
+ 0.4252,
+ 0.4251
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.011332567781209946,
+ 0.06513819098472595,
+ 0.0006735082715749741,
+ 0.01789894700050354,
+ -0.04844595864415169,
+ -0.0789085328578949,
+ -0.08352568745613098,
+ 0.9912235736846924
+ ],
+ "perturbation_rho": [
+ -0.019055387005209923,
+ 0.000641997903585434,
+ 0.007458926644176245,
+ -0.0017425119876861572,
+ 0.01768876612186432,
+ -0.035874560475349426,
+ -0.009836452081799507,
+ -0.0037038950249552727
+ ],
+ "nudging": {
+ "0.001": [
+ 3.5937409847974777e-07,
+ -8.093193173408508e-07,
+ 5.8673322200775146e-08,
+ -9.988434612751007e-08,
+ 1.5122350305318832e-07,
+ 2.3562461137771606e-07,
+ 1.7601996660232544e-07,
+ -2.587912604212761e-06
+ ],
+ "0.003": [
+ 4.564644768834114e-07,
+ -2.398388460278511e-06,
+ -8.882489055395126e-08,
+ -2.555316314101219e-07,
+ 4.91039827466011e-07,
+ 6.976770237088203e-07,
+ 7.48317688703537e-07,
+ -8.602859452366829e-06
+ ],
+ "0.01": [
+ 2.159271389245987e-06,
+ -7.903669029474258e-06,
+ -2.2118911147117615e-09,
+ -8.208444342017174e-07,
+ 1.3509998098015785e-06,
+ 2.2408785298466682e-06,
+ 2.4311011657118797e-06,
+ -2.9218033887445927e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 4438.09326171875,
+ 51045.953125,
+ 224884.15625,
+ 281807.5625,
+ 313976.5,
+ 377254.8125,
+ 619965.6875,
+ 803209.875,
+ 395948.40625
+ ],
+ "bp_grad_norms_per_layer": [
+ 5.1603383326437324e-05,
+ 4.058777449245099e-06,
+ 1.2677732001975528e-06,
+ 1.2549784287330112e-06,
+ 1.1549120699783089e-06,
+ 1.1174195151397726e-06,
+ 1.1277491012151586e-06,
+ 1.1274379403403145e-06,
+ 1.1047595762647688e-06
+ ]
+ },
+ "drift": {
+ "embed.weight": 35.94724430826174,
+ "embed.bias": 12.93641303808505,
+ "blocks.0.ln.weight": 1.0316107269800443,
+ "blocks.0.w1.weight": 14.343462705354373,
+ "blocks.0.w1.bias": 10.48509752557022,
+ "blocks.0.w2.weight": 50.82449203404063,
+ "blocks.1.ln.weight": 0.8954057658407036,
+ "blocks.1.w1.weight": 17.084567030271085,
+ "blocks.1.w1.bias": 8.321442582132649,
+ "blocks.1.w2.weight": 43.15792770243562,
+ "blocks.2.ln.weight": 0.50963325244791,
+ "blocks.2.w1.weight": 13.992497779476563,
+ "blocks.2.w1.bias": 8.388377688031984,
+ "blocks.2.w2.weight": 36.31652372933758,
+ "blocks.3.ln.weight": 0.5126350353439263,
+ "blocks.3.w1.weight": 13.376631353463463,
+ "blocks.3.w1.bias": 7.3308712021855715,
+ "blocks.3.w2.weight": 38.8692971792842,
+ "blocks.4.ln.weight": 0.44132703400905127,
+ "blocks.4.w1.weight": 13.832310366505041,
+ "blocks.4.w1.bias": 8.752010456823355,
+ "blocks.4.w2.weight": 29.67447860418571,
+ "blocks.5.ln.weight": 0.44899228147891274,
+ "blocks.5.w1.weight": 15.279703613622448,
+ "blocks.5.w1.bias": 14.678313736857362,
+ "blocks.5.w2.weight": 23.989192930697847,
+ "blocks.6.ln.weight": 0.48252983596530574,
+ "blocks.6.w1.weight": 15.592232386492284,
+ "blocks.6.w1.bias": 15.403926884582539,
+ "blocks.6.w2.weight": 26.0662939832218,
+ "blocks.7.ln.weight": 0.5873549927115453,
+ "blocks.7.w1.weight": 16.643576121850806,
+ "blocks.7.w1.bias": 15.88707616632278,
+ "blocks.7.w2.weight": 36.93738990528072,
+ "out_ln.weight": 0.2817508020334675,
+ "out_head.weight": 5.01870628445436,
+ "out_head.bias": 1.5909890644945488
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 8,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 3
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L8_seed3",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L8_seed4/results_cifar10.json b/results/fa_dfa_d512_L8_seed4/results_cifar10.json
new file mode 100644
index 0000000..335a670
--- /dev/null
+++ b/results/fa_dfa_d512_L8_seed4/results_cifar10.json
@@ -0,0 +1,881 @@
+{
+ "4": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0558988859558105,
+ 2.0445796598815917,
+ 2.039279856033325,
+ 2.038156519508362,
+ 2.036371453819275,
+ 2.035036249847412,
+ 2.031745447311401,
+ 2.0292039811706544,
+ 2.0318929217529296,
+ 2.0292139208984374,
+ 2.0309901475524903,
+ 2.0294140184020995,
+ 2.024980134963989,
+ 2.026833506088257,
+ 2.0269724795532227,
+ 2.0259553061676026,
+ 2.025906521835327,
+ 2.0246393172454833,
+ 2.0217932791137696,
+ 2.024892644882202,
+ 2.0215970348739623,
+ 2.022969856185913,
+ 2.0212113690567017,
+ 2.018885112991333,
+ 2.019870955734253,
+ 2.017834800796509,
+ 2.0190272177124022,
+ 2.0189630345916747,
+ 2.01638854927063,
+ 2.017822699356079,
+ 2.019127551727295,
+ 2.0166009580230715,
+ 2.016471000518799,
+ 2.0156495739746094,
+ 2.0154129775238037,
+ 2.017227215194702,
+ 2.015604530029297,
+ 2.018137004623413,
+ 2.013566918640137,
+ 2.0139937634277345,
+ 2.014001354904175,
+ 2.014924657058716,
+ 2.01404189201355,
+ 2.012948843307495,
+ 2.0132822931671144,
+ 2.0135159986114504,
+ 2.0149466477966307,
+ 2.0134463012313843,
+ 2.0127882065200806,
+ 2.011855206069946,
+ 2.0118354721450804,
+ 2.009852321395874,
+ 2.0144452475357055,
+ 2.0108168384170533,
+ 2.0125812700653074,
+ 2.009986862640381,
+ 2.0114155954360964,
+ 2.010974427947998,
+ 2.012690294647217,
+ 2.0099948442077635,
+ 2.0100676802062987,
+ 2.0103798513031004,
+ 2.009976401939392,
+ 2.009263822669983,
+ 2.0103307348251342,
+ 2.0066765419006347,
+ 2.0081610918426516,
+ 2.009749242553711,
+ 2.009436781463623,
+ 2.008263199005127,
+ 2.008898328781128,
+ 2.0088115994262696,
+ 2.0076247560882567,
+ 2.008243568115234,
+ 2.009206538734436,
+ 2.0073812493896486,
+ 2.0084627660369874,
+ 2.0082589547729492,
+ 2.006824525222778,
+ 2.006816029586792,
+ 2.005350517425537,
+ 2.005616167564392,
+ 2.0048646172714233,
+ 2.005117626724243,
+ 2.007857433204651,
+ 2.006735478591919,
+ 2.0058193558502198,
+ 2.0068686953353883,
+ 2.0066705658721924,
+ 2.0060114856719973,
+ 2.0083387131118773,
+ 2.0070924770736696,
+ 2.006274801864624,
+ 2.005065950050354,
+ 2.007637509765625,
+ 2.0068550647354124,
+ 2.0040754894256594,
+ 2.0044231857681276,
+ 2.00634932926178,
+ 2.004043010559082
+ ],
+ "train_acc": [
+ 0.2432,
+ 0.24314,
+ 0.24398,
+ 0.244,
+ 0.2482,
+ 0.24582,
+ 0.24994,
+ 0.25054,
+ 0.24894,
+ 0.24966,
+ 0.2472,
+ 0.2524,
+ 0.25424,
+ 0.25128,
+ 0.25402,
+ 0.2517,
+ 0.25458,
+ 0.25468,
+ 0.2556,
+ 0.25626,
+ 0.25828,
+ 0.25636,
+ 0.2575,
+ 0.25626,
+ 0.2598,
+ 0.25736,
+ 0.25858,
+ 0.25966,
+ 0.261,
+ 0.26086,
+ 0.25872,
+ 0.26222,
+ 0.25918,
+ 0.26136,
+ 0.2628,
+ 0.26148,
+ 0.26234,
+ 0.26032,
+ 0.26366,
+ 0.26014,
+ 0.26316,
+ 0.2627,
+ 0.26332,
+ 0.26396,
+ 0.2624,
+ 0.2651,
+ 0.26332,
+ 0.26546,
+ 0.26388,
+ 0.2643,
+ 0.26664,
+ 0.26656,
+ 0.26452,
+ 0.26586,
+ 0.26372,
+ 0.26434,
+ 0.26688,
+ 0.265,
+ 0.26536,
+ 0.2676,
+ 0.26708,
+ 0.26556,
+ 0.26724,
+ 0.26668,
+ 0.26684,
+ 0.26658,
+ 0.26898,
+ 0.26802,
+ 0.26746,
+ 0.26814,
+ 0.2674,
+ 0.2696,
+ 0.26804,
+ 0.26742,
+ 0.26854,
+ 0.2678,
+ 0.26776,
+ 0.26772,
+ 0.26896,
+ 0.26856,
+ 0.2709,
+ 0.26856,
+ 0.27094,
+ 0.27032,
+ 0.26868,
+ 0.26798,
+ 0.27036,
+ 0.26862,
+ 0.2704,
+ 0.26882,
+ 0.26632,
+ 0.2691,
+ 0.26788,
+ 0.26786,
+ 0.26864,
+ 0.26766,
+ 0.27096,
+ 0.27096,
+ 0.26832,
+ 0.27034
+ ],
+ "test_acc": [
+ 0.255,
+ 0.282,
+ 0.2597,
+ 0.2682,
+ 0.2706,
+ 0.2653,
+ 0.268,
+ 0.2831,
+ 0.2608,
+ 0.2611,
+ 0.2809,
+ 0.2728,
+ 0.2628,
+ 0.2678,
+ 0.2917,
+ 0.2633,
+ 0.2587,
+ 0.2789,
+ 0.2667,
+ 0.255,
+ 0.2702,
+ 0.2763,
+ 0.2721,
+ 0.2728,
+ 0.2836,
+ 0.2812,
+ 0.2586,
+ 0.2753,
+ 0.2767,
+ 0.2765,
+ 0.2893,
+ 0.2684,
+ 0.2739,
+ 0.2781,
+ 0.283,
+ 0.2702,
+ 0.2792,
+ 0.2779,
+ 0.2757,
+ 0.2829,
+ 0.2798,
+ 0.2777,
+ 0.2673,
+ 0.2797,
+ 0.2958,
+ 0.272,
+ 0.2749,
+ 0.2875,
+ 0.2807,
+ 0.2796,
+ 0.2828,
+ 0.2825,
+ 0.2827,
+ 0.2782,
+ 0.2898,
+ 0.2783,
+ 0.2754,
+ 0.2926,
+ 0.2676,
+ 0.2797,
+ 0.2879,
+ 0.2839,
+ 0.2887,
+ 0.2792,
+ 0.2699,
+ 0.2925,
+ 0.2741,
+ 0.2804,
+ 0.2827,
+ 0.282,
+ 0.2908,
+ 0.2818,
+ 0.2801,
+ 0.2873,
+ 0.2889,
+ 0.2898,
+ 0.2934,
+ 0.2874,
+ 0.2894,
+ 0.2882,
+ 0.2751,
+ 0.2786,
+ 0.2871,
+ 0.2813,
+ 0.2875,
+ 0.2837,
+ 0.2827,
+ 0.2853,
+ 0.2816,
+ 0.2853,
+ 0.2848,
+ 0.2829,
+ 0.2862,
+ 0.2857,
+ 0.2852,
+ 0.2855,
+ 0.2859,
+ 0.2859,
+ 0.2859,
+ 0.2861
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.348124623298645,
+ 0.0005955962114967406,
+ 0.0003339378454256803,
+ -0.00018519387231208384,
+ -0.00039015739457681775,
+ -6.0401107475627214e-05,
+ 0.0005056928494013846,
+ -0.00041366269579157233
+ ],
+ "perturbation_rho": [
+ 0.023487141355872154,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -2.4354085326194763e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 9.313225746154785e-10,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -9.727664291858673e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 9.313225746154785e-10,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -3.107357770204544e-06,
+ 0.0,
+ -1.862645149230957e-09,
+ 0.0,
+ 0.0,
+ 9.313225746154785e-10,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 57046.83203125,
+ 1460526592.0,
+ 3493817088.0,
+ 5286084096.0,
+ 7466274304.0,
+ 9068557312.0,
+ 9149831168.0,
+ 9557196800.0,
+ 9628340224.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.206747353739047e-07,
+ 1.568134094798168e-10,
+ 1.5691002663853482e-10,
+ 1.5639808892409235e-10,
+ 1.5647411144570356e-10,
+ 1.564954832389276e-10,
+ 1.564743612458841e-10,
+ 1.564741808346426e-10,
+ 1.5647404205676452e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 345.4826524750452,
+ "embed.bias": 268.77599398354755,
+ "blocks.0.ln.weight": 9.693802971740622,
+ "blocks.0.w1.weight": 305.69328663379747,
+ "blocks.0.w1.bias": 282.72127878032245,
+ "blocks.0.w2.weight": 506.67910364132115,
+ "blocks.1.ln.weight": 9.022706446436455,
+ "blocks.1.w1.weight": 344.52276292946954,
+ "blocks.1.w1.bias": 336.8078507211997,
+ "blocks.1.w2.weight": 344.5230830295154,
+ "blocks.2.ln.weight": 9.205345940074093,
+ "blocks.2.w1.weight": 390.2517774750357,
+ "blocks.2.w1.bias": 365.62094868403284,
+ "blocks.2.w2.weight": 372.1071143869873,
+ "blocks.3.ln.weight": 10.058504715768814,
+ "blocks.3.w1.weight": 409.96586790482206,
+ "blocks.3.w1.bias": 389.26772963997206,
+ "blocks.3.w2.weight": 398.4000450175067,
+ "blocks.4.ln.weight": 10.413518753619888,
+ "blocks.4.w1.weight": 429.20878622659194,
+ "blocks.4.w1.bias": 400.403481558048,
+ "blocks.4.w2.weight": 399.0461533995044,
+ "blocks.5.ln.weight": 7.53017659614122,
+ "blocks.5.w1.weight": 303.5211766091988,
+ "blocks.5.w1.bias": 290.0904456815074,
+ "blocks.5.w2.weight": 267.46908289070075,
+ "blocks.6.ln.weight": 8.989001105343268,
+ "blocks.6.w1.weight": 361.1863333213492,
+ "blocks.6.w1.bias": 343.25957937988704,
+ "blocks.6.w2.weight": 317.54530185898403,
+ "blocks.7.ln.weight": 7.256623314136568,
+ "blocks.7.w1.weight": 264.5076613280686,
+ "blocks.7.w1.bias": 244.7481419757745,
+ "blocks.7.w2.weight": 244.11940761827694,
+ "out_ln.weight": 0.5537612143282766,
+ "out_head.weight": 8.260097616924732,
+ "out_head.bias": 0.594355583013062
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0499230930328367,
+ 1.9647031881713868,
+ 1.9228250009536743,
+ 1.8920866240692138,
+ 1.8762959116363525,
+ 1.8783360265731812,
+ 1.8759148642349244,
+ 1.8639714459609986,
+ 1.859312229270935,
+ 1.8467648499298095,
+ 1.8394229892730714,
+ 1.825405124130249,
+ 1.820375383529663,
+ 1.8109127722549438,
+ 1.8068262866210938,
+ 1.7986448593521118,
+ 1.7920247109603882,
+ 1.7862463021087647,
+ 1.7800769235610963,
+ 1.7746711435317992,
+ 1.7699551953125,
+ 1.7670180697250366,
+ 1.7607733086395263,
+ 1.7561056157684327,
+ 1.752474511489868,
+ 1.7487609728240967,
+ 1.7504161080551148,
+ 1.7488452796173095,
+ 1.744940209083557,
+ 1.7447595501327515,
+ 1.7444546957015992,
+ 1.7363371981811524,
+ 1.7366404415893555,
+ 1.7359863372802735,
+ 1.733716072654724,
+ 1.7326603939056398,
+ 1.7307178776550294,
+ 1.7296201685333252,
+ 1.7284672827911376,
+ 1.7325348129272462,
+ 1.7272048714828492,
+ 1.7266432265472411,
+ 1.7268222411727905,
+ 1.7249765619659423,
+ 1.7266766318130493,
+ 1.7234420058441162,
+ 1.7232857625579834,
+ 1.7222659621429444,
+ 1.7246627559280396,
+ 1.7202154187011718,
+ 1.721605923423767,
+ 1.7191662756729127,
+ 1.7233360834503173,
+ 1.7196103755950927,
+ 1.721537714920044,
+ 1.7234055492401124,
+ 1.7208966232681275,
+ 1.719355527381897,
+ 1.716101904335022,
+ 1.7131741651153565,
+ 1.716256374435425,
+ 1.7143639738082885,
+ 1.7126432967376708,
+ 1.7113130987167358,
+ 1.7133349226760863,
+ 1.7085220684051514,
+ 1.7085635126495362,
+ 1.7085992751312256,
+ 1.7081326258087157,
+ 1.7065643463897706,
+ 1.7067539226913453,
+ 1.7028010097503663,
+ 1.7049776620864867,
+ 1.7025865216445923,
+ 1.7059548104095459,
+ 1.7012168072128295,
+ 1.7034333710479737,
+ 1.700839725112915,
+ 1.7046385177612304,
+ 1.6998081121063233,
+ 1.6977149985122681,
+ 1.6993804161834716,
+ 1.696179700050354,
+ 1.696841403427124,
+ 1.6989550568771363,
+ 1.6974260115814208,
+ 1.691993281288147,
+ 1.6956287409210204,
+ 1.6960039244842529,
+ 1.693833966407776,
+ 1.7002782623672485,
+ 1.6935484520721436,
+ 1.6925058234405517,
+ 1.693256030807495,
+ 1.6941557180786133,
+ 1.6950180599594116,
+ 1.697810530014038,
+ 1.6920377853393556,
+ 1.694213403968811,
+ 1.6934792825317382
+ ],
+ "train_acc": [
+ 0.2421,
+ 0.2836,
+ 0.30398,
+ 0.31868,
+ 0.32722,
+ 0.32272,
+ 0.32508,
+ 0.33034,
+ 0.33142,
+ 0.33714,
+ 0.33564,
+ 0.34428,
+ 0.34574,
+ 0.35108,
+ 0.35066,
+ 0.35318,
+ 0.35484,
+ 0.35766,
+ 0.36236,
+ 0.36052,
+ 0.36336,
+ 0.3664,
+ 0.36704,
+ 0.36724,
+ 0.37118,
+ 0.37224,
+ 0.3698,
+ 0.36986,
+ 0.37486,
+ 0.3753,
+ 0.37402,
+ 0.37556,
+ 0.37578,
+ 0.37696,
+ 0.37936,
+ 0.3799,
+ 0.37784,
+ 0.38368,
+ 0.38064,
+ 0.37848,
+ 0.38158,
+ 0.38068,
+ 0.38108,
+ 0.38166,
+ 0.37934,
+ 0.38318,
+ 0.38062,
+ 0.38266,
+ 0.38166,
+ 0.3839,
+ 0.38078,
+ 0.38148,
+ 0.38032,
+ 0.38514,
+ 0.38458,
+ 0.38238,
+ 0.38438,
+ 0.38428,
+ 0.3852,
+ 0.38752,
+ 0.38766,
+ 0.38474,
+ 0.38774,
+ 0.38698,
+ 0.38748,
+ 0.38616,
+ 0.38806,
+ 0.38998,
+ 0.3884,
+ 0.39208,
+ 0.39082,
+ 0.39204,
+ 0.38986,
+ 0.39288,
+ 0.39172,
+ 0.3936,
+ 0.39076,
+ 0.3933,
+ 0.39058,
+ 0.3929,
+ 0.39284,
+ 0.39224,
+ 0.3934,
+ 0.3943,
+ 0.39238,
+ 0.39528,
+ 0.3933,
+ 0.39514,
+ 0.39354,
+ 0.39286,
+ 0.39398,
+ 0.39674,
+ 0.39442,
+ 0.39522,
+ 0.3961,
+ 0.39508,
+ 0.39172,
+ 0.39722,
+ 0.3963,
+ 0.39622
+ ],
+ "test_acc": [
+ 0.298,
+ 0.3175,
+ 0.3253,
+ 0.3526,
+ 0.3529,
+ 0.3373,
+ 0.3593,
+ 0.3592,
+ 0.3528,
+ 0.3615,
+ 0.3745,
+ 0.3626,
+ 0.3649,
+ 0.3807,
+ 0.3926,
+ 0.3892,
+ 0.3738,
+ 0.3794,
+ 0.3869,
+ 0.3749,
+ 0.3867,
+ 0.3913,
+ 0.3851,
+ 0.3865,
+ 0.3911,
+ 0.3974,
+ 0.3817,
+ 0.3933,
+ 0.3944,
+ 0.3958,
+ 0.3981,
+ 0.3966,
+ 0.3975,
+ 0.4001,
+ 0.4023,
+ 0.4001,
+ 0.402,
+ 0.4049,
+ 0.4088,
+ 0.4076,
+ 0.4051,
+ 0.4027,
+ 0.3994,
+ 0.4035,
+ 0.4076,
+ 0.3913,
+ 0.4047,
+ 0.4061,
+ 0.4018,
+ 0.4118,
+ 0.4097,
+ 0.41,
+ 0.4085,
+ 0.401,
+ 0.4083,
+ 0.4014,
+ 0.405,
+ 0.4018,
+ 0.4114,
+ 0.4104,
+ 0.4074,
+ 0.4083,
+ 0.4103,
+ 0.4046,
+ 0.4011,
+ 0.4139,
+ 0.408,
+ 0.4094,
+ 0.4125,
+ 0.4088,
+ 0.414,
+ 0.4114,
+ 0.4124,
+ 0.4154,
+ 0.4121,
+ 0.4153,
+ 0.4115,
+ 0.4125,
+ 0.4144,
+ 0.4152,
+ 0.4117,
+ 0.411,
+ 0.414,
+ 0.4126,
+ 0.4137,
+ 0.4151,
+ 0.4134,
+ 0.4126,
+ 0.4132,
+ 0.4127,
+ 0.4138,
+ 0.4143,
+ 0.4148,
+ 0.414,
+ 0.4142,
+ 0.4145,
+ 0.4145,
+ 0.415,
+ 0.4144,
+ 0.4143
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.026830948889255524,
+ 0.10699465870857239,
+ 0.09640855342149734,
+ 0.016582896932959557,
+ -0.06015268713235855,
+ -0.09386980533599854,
+ 0.010263346135616302,
+ 0.9927012920379639
+ ],
+ "perturbation_rho": [
+ 0.02561907097697258,
+ -0.004561614245176315,
+ 0.008805938996374607,
+ -0.004548710770905018,
+ -0.06699962168931961,
+ -0.0668429285287857,
+ -0.03943357244133949,
+ 0.02623065561056137
+ ],
+ "nudging": {
+ "0.001": [
+ -2.3067113943398e-06,
+ -4.3620821088552475e-07,
+ -2.4866312742233276e-07,
+ 7.08969309926033e-08,
+ 1.3213139027357101e-07,
+ 1.5960540622472763e-07,
+ 4.423782229423523e-09,
+ -1.1902302503585815e-06
+ ],
+ "0.003": [
+ -7.16338399797678e-06,
+ -1.5725381672382355e-06,
+ -7.672933861613274e-07,
+ -1.3835960999131203e-07,
+ 2.973247319459915e-07,
+ 5.42961061000824e-07,
+ -2.5727786123752594e-08,
+ -4.3523614294826984e-06
+ ],
+ "0.01": [
+ -2.3563567083328962e-05,
+ -5.601265002042055e-06,
+ -2.8641661629080772e-06,
+ -5.615875124931335e-07,
+ 9.248033165931702e-07,
+ 1.622654963284731e-06,
+ -2.4889595806598663e-07,
+ -1.5358731616288424e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 5353.1064453125,
+ 80361.6875,
+ 198367.984375,
+ 279507.6875,
+ 680688.0,
+ 1020542.375,
+ 1562414.75,
+ 1590694.75,
+ 789908.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 3.106619624304585e-05,
+ 1.862773842731258e-06,
+ 9.139845928984869e-07,
+ 7.041780349936744e-07,
+ 6.676064572275209e-07,
+ 6.690797818009742e-07,
+ 6.687893119305954e-07,
+ 6.670686047982599e-07,
+ 6.49854484890966e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 40.22494125084113,
+ "embed.bias": 15.798132334624936,
+ "blocks.0.ln.weight": 1.2241361855735886,
+ "blocks.0.w1.weight": 16.34451235786821,
+ "blocks.0.w1.bias": 11.623359166710161,
+ "blocks.0.w2.weight": 59.40609750891924,
+ "blocks.1.ln.weight": 0.9394658291469571,
+ "blocks.1.w1.weight": 16.816482551922167,
+ "blocks.1.w1.bias": 7.284029885203612,
+ "blocks.1.w2.weight": 43.964019309740266,
+ "blocks.2.ln.weight": 0.7618290822194834,
+ "blocks.2.w1.weight": 15.551125367027769,
+ "blocks.2.w1.bias": 9.075794440045083,
+ "blocks.2.w2.weight": 44.423765980175624,
+ "blocks.3.ln.weight": 0.7978062227799843,
+ "blocks.3.w1.weight": 18.895887551107087,
+ "blocks.3.w1.bias": 16.322155802924105,
+ "blocks.3.w2.weight": 40.59294466107463,
+ "blocks.4.ln.weight": 0.7028791271353807,
+ "blocks.4.w1.weight": 18.831247940509048,
+ "blocks.4.w1.bias": 19.397899365978574,
+ "blocks.4.w2.weight": 33.77994253855422,
+ "blocks.5.ln.weight": 0.5882412988040494,
+ "blocks.5.w1.weight": 20.16928056506279,
+ "blocks.5.w1.bias": 23.309656656430743,
+ "blocks.5.w2.weight": 30.02848330872152,
+ "blocks.6.ln.weight": 0.6458882840441386,
+ "blocks.6.w1.weight": 15.641543003808316,
+ "blocks.6.w1.bias": 12.528164451431063,
+ "blocks.6.w2.weight": 65.83396405007723,
+ "blocks.7.ln.weight": 0.8531338247599758,
+ "blocks.7.w1.weight": 19.29027360202624,
+ "blocks.7.w1.bias": 18.640476186308685,
+ "blocks.7.w2.weight": 55.45516056132436,
+ "out_ln.weight": 0.3127678274218022,
+ "out_head.weight": 5.776369546198508,
+ "out_head.bias": 1.402401683707018
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 8,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 4
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L8_seed4",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L8_seed5/results_cifar10.json b/results/fa_dfa_d512_L8_seed5/results_cifar10.json
new file mode 100644
index 0000000..e2ec1f0
--- /dev/null
+++ b/results/fa_dfa_d512_L8_seed5/results_cifar10.json
@@ -0,0 +1,881 @@
+{
+ "5": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0787187157440186,
+ 2.046413578338623,
+ 2.043807250671387,
+ 2.0408004566955564,
+ 2.034887484970093,
+ 2.03150455116272,
+ 2.0333121584320066,
+ 2.030504135971069,
+ 2.0268674066925048,
+ 2.026082299346924,
+ 2.0260079474639894,
+ 2.025023748931885,
+ 2.0206153302001955,
+ 2.019373860206604,
+ 2.019724616012573,
+ 2.0159671471405027,
+ 2.0133701262664796,
+ 2.013524337539673,
+ 2.01570996799469,
+ 2.0106682904815676,
+ 2.011510214996338,
+ 2.0109367053222655,
+ 2.0096939754486085,
+ 2.0094474443054198,
+ 2.0071240294647215,
+ 2.009241544647217,
+ 2.0069837924957277,
+ 2.006896128387451,
+ 2.0078313034439086,
+ 2.0056955588531493,
+ 2.0059358129119875,
+ 2.005650292816162,
+ 2.0021781530380247,
+ 2.0034559716033935,
+ 2.0014139038848877,
+ 2.0027021182250975,
+ 2.002708815307617,
+ 2.0024726174926757,
+ 2.0010244245910647,
+ 2.0033527210235595,
+ 2.0017753661727906,
+ 2.0007596390151976,
+ 2.0027179621124267,
+ 2.000149852218628,
+ 2.000393838119507,
+ 1.9997536290740967,
+ 1.998189719390869,
+ 2.0013531698226927,
+ 1.9993105652236938,
+ 2.002620627365112,
+ 2.000056958694458,
+ 1.998213128967285,
+ 1.9998394576263427,
+ 1.9988756994628907,
+ 2.000797734146118,
+ 1.9982664052581787,
+ 1.9998580239105224,
+ 1.9990567274475097,
+ 2.0013695433807372,
+ 2.000919705581665,
+ 2.00011759311676,
+ 1.998085468597412,
+ 1.9998952405548096,
+ 1.9984590853881836,
+ 2.0001076091766357,
+ 1.9984168599700927,
+ 1.998167473297119,
+ 1.9996371339416503,
+ 1.998458631324768,
+ 1.9980697260284423,
+ 1.99891614402771,
+ 1.9961408318328857,
+ 1.9967646357727051,
+ 1.9982722332000733,
+ 1.9978370711517335,
+ 1.9981338930892945,
+ 1.9954986195373534,
+ 1.998370800704956,
+ 1.9966814602661134,
+ 1.9975791915893555,
+ 1.9963215632629394,
+ 1.9960799507141114,
+ 1.9947424390029906,
+ 1.995765002822876,
+ 1.9970874053955079,
+ 1.9967461769866943,
+ 1.9952786428070068,
+ 1.9961291760253905,
+ 1.9962149816894532,
+ 1.9956222714614869,
+ 1.9963563278579712,
+ 1.9963637481307983,
+ 1.995376283531189,
+ 1.9958238674545288,
+ 1.996031088180542,
+ 1.9979531326675415,
+ 1.9962067671966552,
+ 1.9966400229644776,
+ 1.9965521046447754,
+ 1.9952978050994874
+ ],
+ "train_acc": [
+ 0.2307,
+ 0.2439,
+ 0.241,
+ 0.24388,
+ 0.24862,
+ 0.25128,
+ 0.2482,
+ 0.25152,
+ 0.25226,
+ 0.25402,
+ 0.25412,
+ 0.25508,
+ 0.25796,
+ 0.25802,
+ 0.25836,
+ 0.26098,
+ 0.26092,
+ 0.2638,
+ 0.2603,
+ 0.2651,
+ 0.26208,
+ 0.26628,
+ 0.2672,
+ 0.26436,
+ 0.26628,
+ 0.264,
+ 0.26676,
+ 0.26936,
+ 0.26734,
+ 0.26656,
+ 0.268,
+ 0.27012,
+ 0.26954,
+ 0.2697,
+ 0.2733,
+ 0.27156,
+ 0.2711,
+ 0.27136,
+ 0.27432,
+ 0.27026,
+ 0.2729,
+ 0.27246,
+ 0.27116,
+ 0.27356,
+ 0.27474,
+ 0.27462,
+ 0.27356,
+ 0.27106,
+ 0.27586,
+ 0.2728,
+ 0.2735,
+ 0.27494,
+ 0.27414,
+ 0.27566,
+ 0.2745,
+ 0.275,
+ 0.27516,
+ 0.27464,
+ 0.27214,
+ 0.27562,
+ 0.27386,
+ 0.27616,
+ 0.27478,
+ 0.27452,
+ 0.27438,
+ 0.27868,
+ 0.27604,
+ 0.27778,
+ 0.2761,
+ 0.2758,
+ 0.27738,
+ 0.27662,
+ 0.27798,
+ 0.2747,
+ 0.27758,
+ 0.27598,
+ 0.27646,
+ 0.27686,
+ 0.27664,
+ 0.2775,
+ 0.27674,
+ 0.27976,
+ 0.27856,
+ 0.27662,
+ 0.27848,
+ 0.2782,
+ 0.27792,
+ 0.27528,
+ 0.28002,
+ 0.27594,
+ 0.27606,
+ 0.27738,
+ 0.27828,
+ 0.2771,
+ 0.27888,
+ 0.2776,
+ 0.27728,
+ 0.27756,
+ 0.27762,
+ 0.27778
+ ],
+ "test_acc": [
+ 0.2557,
+ 0.2621,
+ 0.2511,
+ 0.2468,
+ 0.2652,
+ 0.2808,
+ 0.268,
+ 0.268,
+ 0.2707,
+ 0.2646,
+ 0.2731,
+ 0.2756,
+ 0.2675,
+ 0.2771,
+ 0.2753,
+ 0.2594,
+ 0.2718,
+ 0.2876,
+ 0.2822,
+ 0.29,
+ 0.2898,
+ 0.2714,
+ 0.2849,
+ 0.2855,
+ 0.2765,
+ 0.281,
+ 0.2888,
+ 0.2819,
+ 0.2876,
+ 0.2929,
+ 0.278,
+ 0.2912,
+ 0.2877,
+ 0.2921,
+ 0.2827,
+ 0.2961,
+ 0.2852,
+ 0.2869,
+ 0.2852,
+ 0.2944,
+ 0.2755,
+ 0.2905,
+ 0.2862,
+ 0.2873,
+ 0.2769,
+ 0.2929,
+ 0.2913,
+ 0.2949,
+ 0.291,
+ 0.2811,
+ 0.2879,
+ 0.2854,
+ 0.282,
+ 0.2891,
+ 0.2937,
+ 0.2909,
+ 0.2957,
+ 0.289,
+ 0.292,
+ 0.2901,
+ 0.2975,
+ 0.2944,
+ 0.2887,
+ 0.2984,
+ 0.2938,
+ 0.295,
+ 0.2949,
+ 0.2936,
+ 0.2911,
+ 0.2947,
+ 0.2942,
+ 0.2841,
+ 0.2989,
+ 0.2916,
+ 0.2949,
+ 0.2871,
+ 0.2863,
+ 0.2936,
+ 0.2936,
+ 0.294,
+ 0.2902,
+ 0.2943,
+ 0.2913,
+ 0.2937,
+ 0.2942,
+ 0.294,
+ 0.2913,
+ 0.2949,
+ 0.2934,
+ 0.294,
+ 0.293,
+ 0.2946,
+ 0.2939,
+ 0.2929,
+ 0.2932,
+ 0.2933,
+ 0.2937,
+ 0.2937,
+ 0.2939,
+ 0.2938
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.38104039430618286,
+ 0.0003684713738039136,
+ -0.00036610430106520653,
+ 7.536964403698221e-06,
+ -7.538420322816819e-05,
+ 0.00021099840523675084,
+ -0.00022237180382944643,
+ -0.0001967815769603476
+ ],
+ "perturbation_rho": [
+ -0.022990737110376358,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -3.380700945854187e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.1343508958816528e-06,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -3.5706907510757446e-06,
+ 0.0,
+ 1.862645149230957e-09,
+ 0.0,
+ 1.862645149230957e-09,
+ 0.0,
+ 1.862645149230957e-09,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 53305.52734375,
+ 1104618240.0,
+ 1890710528.0,
+ 2939089664.0,
+ 4109807360.0,
+ 7493927424.0,
+ 8776990720.0,
+ 9704909824.0,
+ 9853197312.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.442081949993735e-07,
+ 1.9960764985338386e-10,
+ 1.943529920334086e-10,
+ 1.947161321069757e-10,
+ 1.9485994762202807e-10,
+ 1.949839456560909e-10,
+ 1.9488108349285937e-10,
+ 1.9481991020420253e-10,
+ 1.9481959101508295e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 332.9361658873903,
+ "embed.bias": 249.6745044186471,
+ "blocks.0.ln.weight": 9.964419461290774,
+ "blocks.0.w1.weight": 277.0099379887824,
+ "blocks.0.w1.bias": 243.60625234836664,
+ "blocks.0.w2.weight": 475.0412234341198,
+ "blocks.1.ln.weight": 7.805044859841917,
+ "blocks.1.w1.weight": 276.45375034840094,
+ "blocks.1.w1.bias": 248.51195524041702,
+ "blocks.1.w2.weight": 286.4144000014896,
+ "blocks.2.ln.weight": 8.343399040552265,
+ "blocks.2.w1.weight": 312.7393216527087,
+ "blocks.2.w1.bias": 278.64438111710933,
+ "blocks.2.w2.weight": 319.41728918519345,
+ "blocks.3.ln.weight": 8.2818066466087,
+ "blocks.3.w1.weight": 331.8178756864101,
+ "blocks.3.w1.bias": 324.7690510217321,
+ "blocks.3.w2.weight": 327.59347009101197,
+ "blocks.4.ln.weight": 10.593671245978031,
+ "blocks.4.w1.weight": 439.88191861683595,
+ "blocks.4.w1.bias": 415.51184102478516,
+ "blocks.4.w2.weight": 435.66114149915,
+ "blocks.5.ln.weight": 10.04837328756298,
+ "blocks.5.w1.weight": 400.6014717531882,
+ "blocks.5.w1.bias": 372.89187324310467,
+ "blocks.5.w2.weight": 389.61950905043466,
+ "blocks.6.ln.weight": 9.160507087287726,
+ "blocks.6.w1.weight": 366.7858107672568,
+ "blocks.6.w1.bias": 337.0123511781895,
+ "blocks.6.w2.weight": 345.65905679108874,
+ "blocks.7.ln.weight": 8.151913429022978,
+ "blocks.7.w1.weight": 320.48348360714385,
+ "blocks.7.w1.bias": 294.0973824112796,
+ "blocks.7.w2.weight": 295.11322720454183,
+ "out_ln.weight": 0.6460140923890967,
+ "out_head.weight": 9.185934457998872,
+ "out_head.bias": 0.6276268606859465
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.038202630004883,
+ 1.9484270775604249,
+ 1.9142503852081298,
+ 1.8978817923355102,
+ 1.8827772109603882,
+ 1.8714566381073,
+ 1.866606608543396,
+ 1.8608839841079712,
+ 1.851916226119995,
+ 1.8518026924133302,
+ 1.8441847302246093,
+ 1.8414982928466797,
+ 1.832841236038208,
+ 1.8305379946517943,
+ 1.824911393432617,
+ 1.8228179180908204,
+ 1.815796693687439,
+ 1.8136043480682373,
+ 1.8111609268569946,
+ 1.8098963827133179,
+ 1.8101768481063842,
+ 1.8083936654663086,
+ 1.806380335998535,
+ 1.8081299662017822,
+ 1.8065022414398193,
+ 1.8069096044158937,
+ 1.808437180557251,
+ 1.8035286350250244,
+ 1.803345340270996,
+ 1.7989226748657225,
+ 1.8019704568481445,
+ 1.799195940284729,
+ 1.7924841125106812,
+ 1.7943892532348633,
+ 1.7934645865249634,
+ 1.7921251168823242,
+ 1.789522707901001,
+ 1.7859628525161744,
+ 1.7830697146606445,
+ 1.7806333806991577,
+ 1.7807968478775025,
+ 1.7813195696258546,
+ 1.7742907537460326,
+ 1.7763463638687134,
+ 1.7789305100250243,
+ 1.7736491188812256,
+ 1.768717915878296,
+ 1.7715998685073853,
+ 1.7694499214935302,
+ 1.7765150137329102,
+ 1.7702046938323974,
+ 1.7672565545272827,
+ 1.7681742614746094,
+ 1.7645444506835937,
+ 1.7642444360351563,
+ 1.7655976934814452,
+ 1.7601742944717407,
+ 1.7602788580703734,
+ 1.7589675115585328,
+ 1.7637487964630127,
+ 1.7622764212036133,
+ 1.7553310860443114,
+ 1.7552539752197265,
+ 1.7542964382553101,
+ 1.7557507891464232,
+ 1.7533866836929322,
+ 1.752781385498047,
+ 1.7534792293930053,
+ 1.751649098548889,
+ 1.7528042163848876,
+ 1.7522574480438233,
+ 1.748868000869751,
+ 1.745579310951233,
+ 1.748803058128357,
+ 1.7481703802490234,
+ 1.7455506887817382,
+ 1.7438273191070557,
+ 1.7489568131256104,
+ 1.7463487658691406,
+ 1.7455516606903076,
+ 1.7449686437225342,
+ 1.7442902671051026,
+ 1.7408798983383178,
+ 1.7403113724136352,
+ 1.7415612424087525,
+ 1.7422372088623046,
+ 1.7399942685317993,
+ 1.7430598278045655,
+ 1.7437233737182618,
+ 1.7388530453872681,
+ 1.742972678489685,
+ 1.7391708444213867,
+ 1.7380083013153076,
+ 1.7414154996109008,
+ 1.7416090111541749,
+ 1.7426657040405273,
+ 1.7410819094085694,
+ 1.7388902407455444,
+ 1.7386211107635499,
+ 1.7390259966278077
+ ],
+ "train_acc": [
+ 0.24962,
+ 0.29246,
+ 0.30748,
+ 0.31596,
+ 0.32232,
+ 0.32566,
+ 0.32894,
+ 0.33218,
+ 0.33564,
+ 0.33338,
+ 0.33816,
+ 0.33802,
+ 0.34154,
+ 0.34408,
+ 0.3454,
+ 0.3464,
+ 0.35,
+ 0.35064,
+ 0.34942,
+ 0.35102,
+ 0.3499,
+ 0.35208,
+ 0.354,
+ 0.35186,
+ 0.35288,
+ 0.35062,
+ 0.35306,
+ 0.35098,
+ 0.35422,
+ 0.35166,
+ 0.35522,
+ 0.35382,
+ 0.35856,
+ 0.35798,
+ 0.35914,
+ 0.35776,
+ 0.36182,
+ 0.36158,
+ 0.36558,
+ 0.36348,
+ 0.36282,
+ 0.36244,
+ 0.36576,
+ 0.36514,
+ 0.3636,
+ 0.36724,
+ 0.36794,
+ 0.36594,
+ 0.36652,
+ 0.36342,
+ 0.36684,
+ 0.37162,
+ 0.36754,
+ 0.36882,
+ 0.36786,
+ 0.36904,
+ 0.36942,
+ 0.37216,
+ 0.37156,
+ 0.37028,
+ 0.37302,
+ 0.37194,
+ 0.37354,
+ 0.37204,
+ 0.37152,
+ 0.37402,
+ 0.3749,
+ 0.37556,
+ 0.37472,
+ 0.37472,
+ 0.3748,
+ 0.37612,
+ 0.37782,
+ 0.37806,
+ 0.37734,
+ 0.37744,
+ 0.37922,
+ 0.37866,
+ 0.37972,
+ 0.37874,
+ 0.37706,
+ 0.37788,
+ 0.37796,
+ 0.38026,
+ 0.37858,
+ 0.37822,
+ 0.37768,
+ 0.3786,
+ 0.37856,
+ 0.3791,
+ 0.37826,
+ 0.3811,
+ 0.38088,
+ 0.37906,
+ 0.3784,
+ 0.37828,
+ 0.37966,
+ 0.38174,
+ 0.38126,
+ 0.38014
+ ],
+ "test_acc": [
+ 0.3045,
+ 0.3393,
+ 0.3346,
+ 0.3359,
+ 0.3456,
+ 0.3582,
+ 0.3589,
+ 0.3566,
+ 0.3662,
+ 0.3668,
+ 0.3696,
+ 0.3757,
+ 0.3629,
+ 0.3738,
+ 0.3716,
+ 0.3674,
+ 0.3792,
+ 0.3837,
+ 0.3815,
+ 0.3839,
+ 0.3912,
+ 0.3753,
+ 0.3781,
+ 0.3804,
+ 0.3753,
+ 0.3771,
+ 0.3802,
+ 0.3792,
+ 0.3805,
+ 0.3876,
+ 0.3826,
+ 0.3862,
+ 0.3827,
+ 0.3927,
+ 0.382,
+ 0.393,
+ 0.3874,
+ 0.3935,
+ 0.381,
+ 0.3979,
+ 0.3786,
+ 0.3946,
+ 0.3908,
+ 0.3995,
+ 0.3955,
+ 0.3971,
+ 0.4012,
+ 0.399,
+ 0.3975,
+ 0.3922,
+ 0.3985,
+ 0.3901,
+ 0.3931,
+ 0.3976,
+ 0.3921,
+ 0.3985,
+ 0.3962,
+ 0.4034,
+ 0.3942,
+ 0.4,
+ 0.4027,
+ 0.4062,
+ 0.4005,
+ 0.4042,
+ 0.4061,
+ 0.4041,
+ 0.4029,
+ 0.4026,
+ 0.4045,
+ 0.4,
+ 0.4023,
+ 0.4018,
+ 0.4061,
+ 0.4049,
+ 0.4069,
+ 0.4042,
+ 0.4047,
+ 0.4007,
+ 0.4039,
+ 0.4028,
+ 0.4057,
+ 0.4022,
+ 0.4017,
+ 0.4046,
+ 0.4012,
+ 0.4026,
+ 0.4039,
+ 0.4032,
+ 0.4027,
+ 0.4051,
+ 0.4043,
+ 0.4052,
+ 0.4041,
+ 0.4048,
+ 0.4033,
+ 0.4037,
+ 0.4032,
+ 0.4021,
+ 0.4023,
+ 0.4025
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.014374179765582085,
+ 0.09000293165445328,
+ -0.03108237311244011,
+ -0.02239440567791462,
+ -0.02732698619365692,
+ -0.04657082259654999,
+ -0.06352561712265015,
+ 0.9887357950210571
+ ],
+ "perturbation_rho": [
+ 0.03167568892240524,
+ -0.015739459544420242,
+ 0.0075595797970891,
+ 0.03960222005844116,
+ 0.0035968590527772903,
+ 0.03037603199481964,
+ 0.029943909496068954,
+ 0.005924902856349945
+ ],
+ "nudging": {
+ "0.001": [
+ 5.2677933126688e-07,
+ -4.189787432551384e-07,
+ 9.185168892145157e-08,
+ -3.841705620288849e-09,
+ -3.655441105365753e-08,
+ 2.3981556296348572e-08,
+ 2.3981556296348572e-08,
+ -1.473352313041687e-06
+ ],
+ "0.003": [
+ 1.9101426005363464e-06,
+ -1.223525032401085e-06,
+ 1.5366822481155396e-07,
+ -2.537854015827179e-08,
+ 1.5622936189174652e-07,
+ 9.872019290924072e-08,
+ 1.771841198205948e-07,
+ -4.843459464609623e-06
+ ],
+ "0.01": [
+ 6.739639502484351e-06,
+ -3.795139491558075e-06,
+ 4.704343155026436e-07,
+ 2.0337756723165512e-07,
+ 3.941822797060013e-07,
+ 6.516929715871811e-07,
+ 1.0146759450435638e-06,
+ -1.683842856436968e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 7750.18701171875,
+ 163374.5,
+ 1020835.0,
+ 1417354.375,
+ 1653246.5,
+ 1891510.125,
+ 2137834.5,
+ 2238698.25,
+ 1372594.75
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.8991271392442286e-05,
+ 1.4679561672892305e-06,
+ 6.962879979255376e-07,
+ 6.957282039365964e-07,
+ 6.946868325030664e-07,
+ 6.940763341845013e-07,
+ 6.940714456504793e-07,
+ 6.966275805098121e-07,
+ 6.714612368341477e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 55.14922199302306,
+ "embed.bias": 15.427529434718334,
+ "blocks.0.ln.weight": 1.3139840801235205,
+ "blocks.0.w1.weight": 18.28738161809814,
+ "blocks.0.w1.bias": 13.519470867121564,
+ "blocks.0.w2.weight": 65.29381196065084,
+ "blocks.1.ln.weight": 1.2381725193607873,
+ "blocks.1.w1.weight": 24.224592130996264,
+ "blocks.1.w1.bias": 19.040373052112855,
+ "blocks.1.w2.weight": 51.77163849918677,
+ "blocks.2.ln.weight": 0.7724310378501466,
+ "blocks.2.w1.weight": 20.96874606199096,
+ "blocks.2.w1.bias": 19.420703681847986,
+ "blocks.2.w2.weight": 49.62414572233535,
+ "blocks.3.ln.weight": 0.6858974872301369,
+ "blocks.3.w1.weight": 19.410601791558882,
+ "blocks.3.w1.bias": 18.939055453950672,
+ "blocks.3.w2.weight": 45.97967492141307,
+ "blocks.4.ln.weight": 0.6186727990894257,
+ "blocks.4.w1.weight": 19.69072198327253,
+ "blocks.4.w1.bias": 19.749605410301402,
+ "blocks.4.w2.weight": 41.99820530820194,
+ "blocks.5.ln.weight": 0.6406844547162872,
+ "blocks.5.w1.weight": 20.510058050945528,
+ "blocks.5.w1.bias": 20.988299652643033,
+ "blocks.5.w2.weight": 34.25267498831366,
+ "blocks.6.ln.weight": 0.6310276000328071,
+ "blocks.6.w1.weight": 18.188089381701026,
+ "blocks.6.w1.bias": 17.13681902701148,
+ "blocks.6.w2.weight": 39.811714763649384,
+ "blocks.7.ln.weight": 0.7307433363934756,
+ "blocks.7.w1.weight": 19.56130269038455,
+ "blocks.7.w1.bias": 16.899275966635617,
+ "blocks.7.w2.weight": 58.89283532862689,
+ "out_ln.weight": 0.430748638467109,
+ "out_head.weight": 7.024186774080115,
+ "out_head.bias": 0.6948984479282404
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 8,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 5
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L8_seed5",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L8_seed6/results_cifar10.json b/results/fa_dfa_d512_L8_seed6/results_cifar10.json
new file mode 100644
index 0000000..996693e
--- /dev/null
+++ b/results/fa_dfa_d512_L8_seed6/results_cifar10.json
@@ -0,0 +1,881 @@
+{
+ "6": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.048823083343506,
+ 2.036137353439331,
+ 2.0293412883758544,
+ 2.0249873413085937,
+ 2.022372166824341,
+ 2.0224948783874512,
+ 2.0168244441223147,
+ 2.016312102279663,
+ 2.0174264653015137,
+ 2.0148667056655882,
+ 2.016306040496826,
+ 2.012973408279419,
+ 2.009305914993286,
+ 2.0127887310409545,
+ 2.010646263999939,
+ 2.0090836833953856,
+ 2.010158688583374,
+ 2.0119627042007444,
+ 2.0057619748306275,
+ 2.00755533531189,
+ 2.001894345550537,
+ 2.004210508117676,
+ 2.002892424316406,
+ 2.0056438186264036,
+ 2.006109398841858,
+ 2.0008294903945925,
+ 2.004116688156128,
+ 2.003745227279663,
+ 2.0009986640930175,
+ 2.0015462801361084,
+ 2.001444738845825,
+ 1.9975958602142334,
+ 1.9996052980804444,
+ 1.999814730834961,
+ 2.0010388219451904,
+ 1.9962302626800537,
+ 1.9987594539642335,
+ 1.9968312421417236,
+ 1.9977108895111084,
+ 1.997722573852539,
+ 1.9974460966491698,
+ 1.9960338827514648,
+ 1.997143808517456,
+ 1.9972000302505493,
+ 1.9973672123718262,
+ 1.9960454425811767,
+ 1.9950894018936158,
+ 1.9955249545669556,
+ 1.9957580101776122,
+ 1.9944090689086913,
+ 1.9936514922332764,
+ 1.99337042137146,
+ 1.9966205854034424,
+ 1.9951700267791748,
+ 1.995979843597412,
+ 1.9958187718200684,
+ 1.9929929906463624,
+ 1.996409147720337,
+ 1.992997929534912,
+ 1.9945113285827636,
+ 1.9946366765975951,
+ 1.9921776248550416,
+ 1.9923859210586548,
+ 1.9960651620864869,
+ 1.992535831222534,
+ 1.9948228299331665,
+ 1.992319468231201,
+ 1.991410069732666,
+ 1.990960274810791,
+ 1.9932797208023072,
+ 1.9914824683380128,
+ 1.9920508996963502,
+ 1.9927404278564453,
+ 1.9916280765533447,
+ 1.9920522421646119,
+ 1.9932328916931152,
+ 1.9915541756439208,
+ 1.992103966407776,
+ 1.9913361803817748,
+ 1.9909088207626342,
+ 1.9888611060333252,
+ 1.9917893444061279,
+ 1.990510139694214,
+ 1.9895336349105834,
+ 1.9925078125,
+ 1.9905930145263673,
+ 1.9911131750488282,
+ 1.9901078464126587,
+ 1.9898306075286865,
+ 1.9894448529052735,
+ 1.9916437425231934,
+ 1.9892843076324462,
+ 1.9908900201797486,
+ 1.991130443496704,
+ 1.9902202153778077,
+ 1.9898065716934203,
+ 1.9898762433242798,
+ 1.989882501220703,
+ 1.9897464168548584,
+ 1.991097922897339
+ ],
+ "train_acc": [
+ 0.24736,
+ 0.24926,
+ 0.25592,
+ 0.25554,
+ 0.25688,
+ 0.25748,
+ 0.25952,
+ 0.25872,
+ 0.26084,
+ 0.26068,
+ 0.25918,
+ 0.26338,
+ 0.2641,
+ 0.2628,
+ 0.2662,
+ 0.26518,
+ 0.26584,
+ 0.26602,
+ 0.26744,
+ 0.26454,
+ 0.2692,
+ 0.26846,
+ 0.27026,
+ 0.26784,
+ 0.26908,
+ 0.27298,
+ 0.27076,
+ 0.27126,
+ 0.26854,
+ 0.27136,
+ 0.27188,
+ 0.27314,
+ 0.27166,
+ 0.27166,
+ 0.27,
+ 0.27358,
+ 0.27228,
+ 0.27568,
+ 0.27252,
+ 0.27382,
+ 0.275,
+ 0.27676,
+ 0.27532,
+ 0.27448,
+ 0.2742,
+ 0.27754,
+ 0.27586,
+ 0.27378,
+ 0.27572,
+ 0.27606,
+ 0.27584,
+ 0.27508,
+ 0.2744,
+ 0.27512,
+ 0.27518,
+ 0.27536,
+ 0.2768,
+ 0.27628,
+ 0.2759,
+ 0.27418,
+ 0.27668,
+ 0.27618,
+ 0.27754,
+ 0.2755,
+ 0.2797,
+ 0.27622,
+ 0.27722,
+ 0.2781,
+ 0.27882,
+ 0.27704,
+ 0.27728,
+ 0.27598,
+ 0.27658,
+ 0.2793,
+ 0.2785,
+ 0.27512,
+ 0.27576,
+ 0.27754,
+ 0.2809,
+ 0.27864,
+ 0.27994,
+ 0.27934,
+ 0.278,
+ 0.2789,
+ 0.27944,
+ 0.2777,
+ 0.2785,
+ 0.28082,
+ 0.27722,
+ 0.27896,
+ 0.27776,
+ 0.27826,
+ 0.27702,
+ 0.2777,
+ 0.28018,
+ 0.27868,
+ 0.27942,
+ 0.277,
+ 0.2778,
+ 0.2796
+ ],
+ "test_acc": [
+ 0.2851,
+ 0.2882,
+ 0.2834,
+ 0.2969,
+ 0.2682,
+ 0.2744,
+ 0.2751,
+ 0.2761,
+ 0.2883,
+ 0.2869,
+ 0.2764,
+ 0.2839,
+ 0.2682,
+ 0.2743,
+ 0.2888,
+ 0.2735,
+ 0.2847,
+ 0.2852,
+ 0.2974,
+ 0.2908,
+ 0.2957,
+ 0.3032,
+ 0.2908,
+ 0.272,
+ 0.2959,
+ 0.2858,
+ 0.2995,
+ 0.2807,
+ 0.292,
+ 0.302,
+ 0.2824,
+ 0.2802,
+ 0.2916,
+ 0.2913,
+ 0.3048,
+ 0.2908,
+ 0.2934,
+ 0.284,
+ 0.3083,
+ 0.302,
+ 0.3039,
+ 0.2868,
+ 0.3047,
+ 0.2997,
+ 0.2914,
+ 0.2968,
+ 0.2848,
+ 0.3035,
+ 0.2934,
+ 0.2966,
+ 0.2973,
+ 0.2946,
+ 0.2953,
+ 0.296,
+ 0.2932,
+ 0.2959,
+ 0.2864,
+ 0.2976,
+ 0.293,
+ 0.2961,
+ 0.2957,
+ 0.3012,
+ 0.2948,
+ 0.2926,
+ 0.2969,
+ 0.3043,
+ 0.3005,
+ 0.2957,
+ 0.2956,
+ 0.2994,
+ 0.2919,
+ 0.2945,
+ 0.2915,
+ 0.2903,
+ 0.2934,
+ 0.2967,
+ 0.2953,
+ 0.3,
+ 0.3006,
+ 0.2939,
+ 0.2995,
+ 0.2965,
+ 0.292,
+ 0.294,
+ 0.3006,
+ 0.3013,
+ 0.2978,
+ 0.2961,
+ 0.3011,
+ 0.2982,
+ 0.2984,
+ 0.2974,
+ 0.2977,
+ 0.297,
+ 0.2967,
+ 0.2972,
+ 0.2972,
+ 0.2962,
+ 0.2961,
+ 0.296
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.385863333940506,
+ 0.0001165914727607742,
+ 0.00017788054537959397,
+ 0.00018135752179659903,
+ -0.00028566765831783414,
+ -0.0002498100802768022,
+ 0.0006119838217273355,
+ -0.00013276470417622477
+ ],
+ "perturbation_rho": [
+ -0.01099667139351368,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -3.4226104617118835e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.0388903319835663e-06,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -3.7979334592819214e-06,
+ 0.0,
+ -9.313225746154785e-10,
+ 0.0,
+ 3.725290298461914e-09,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 54199.1640625,
+ 1216095872.0,
+ 2414873600.0,
+ 3109203712.0,
+ 3571607296.0,
+ 6284516352.0,
+ 6465921536.0,
+ 6651301376.0,
+ 8724734976.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.6299613864466664e-07,
+ 2.240632540617682e-10,
+ 2.239412544291497e-10,
+ 2.2397983467925542e-10,
+ 2.240085755778054e-10,
+ 2.2390612974820812e-10,
+ 2.2391909160202061e-10,
+ 2.2392485088396086e-10,
+ 2.239693430716727e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 328.7167633164963,
+ "embed.bias": 238.81345295557173,
+ "blocks.0.ln.weight": 10.101694109463628,
+ "blocks.0.w1.weight": 287.42152472298335,
+ "blocks.0.w1.bias": 258.1218430687453,
+ "blocks.0.w2.weight": 482.380198439405,
+ "blocks.1.ln.weight": 8.527375095072756,
+ "blocks.1.w1.weight": 281.10785120372356,
+ "blocks.1.w1.bias": 271.84962810099614,
+ "blocks.1.w2.weight": 308.13809976512954,
+ "blocks.2.ln.weight": 8.093445971068885,
+ "blocks.2.w1.weight": 291.96840313708185,
+ "blocks.2.w1.bias": 267.27308802490285,
+ "blocks.2.w2.weight": 296.34890032183836,
+ "blocks.3.ln.weight": 7.507548795725433,
+ "blocks.3.w1.weight": 290.43783218773393,
+ "blocks.3.w1.bias": 271.71149018462404,
+ "blocks.3.w2.weight": 280.50007531299553,
+ "blocks.4.ln.weight": 10.4676649794159,
+ "blocks.4.w1.weight": 425.7246219342384,
+ "blocks.4.w1.bias": 395.9832771459978,
+ "blocks.4.w2.weight": 387.2972211221749,
+ "blocks.5.ln.weight": 7.145103425417736,
+ "blocks.5.w1.weight": 272.3772890298663,
+ "blocks.5.w1.bias": 261.31341956376576,
+ "blocks.5.w2.weight": 248.44511726184933,
+ "blocks.6.ln.weight": 7.2864592372954435,
+ "blocks.6.w1.weight": 277.1704677507615,
+ "blocks.6.w1.bias": 265.20680207190264,
+ "blocks.6.w2.weight": 260.3666734377331,
+ "blocks.7.ln.weight": 10.55594745569165,
+ "blocks.7.w1.weight": 426.2374140444505,
+ "blocks.7.w1.bias": 401.49349715227146,
+ "blocks.7.w2.weight": 419.0556854355582,
+ "out_ln.weight": 0.5880689181906897,
+ "out_head.weight": 8.642576606816835,
+ "out_head.bias": 0.6087240911810886
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0266723140716554,
+ 1.9453410259246826,
+ 1.9245470600128174,
+ 1.9017182848739624,
+ 1.889633105506897,
+ 1.8787115466308595,
+ 1.869741219520569,
+ 1.859507017288208,
+ 1.8565170881652833,
+ 1.8455627252197266,
+ 1.8404858825683594,
+ 1.8341104727172852,
+ 1.8273077993774414,
+ 1.821364828453064,
+ 1.8166592828369141,
+ 1.8075398138046264,
+ 1.8040798845672608,
+ 1.7960231017303467,
+ 1.785357122154236,
+ 1.7876761278915405,
+ 1.77504403591156,
+ 1.7683619609832764,
+ 1.769164297103882,
+ 1.7614449658966065,
+ 1.7590879248428344,
+ 1.7571340868759155,
+ 1.7514765051651,
+ 1.7532490229034423,
+ 1.7487156938934325,
+ 1.7469101626205443,
+ 1.7441981435394287,
+ 1.7379362894058228,
+ 1.7393075399017335,
+ 1.7378688484954834,
+ 1.7407434060668945,
+ 1.7369408278656007,
+ 1.7355171717453004,
+ 1.7350507015228271,
+ 1.733909783859253,
+ 1.7398296880722046,
+ 1.7312470238876343,
+ 1.7290490047836304,
+ 1.7353802160644531,
+ 1.7334120412445069,
+ 1.7291723877334595,
+ 1.726144882774353,
+ 1.7297367659759522,
+ 1.72646170627594,
+ 1.724361591567993,
+ 1.7226831272125245,
+ 1.7208573818969726,
+ 1.7199689587402345,
+ 1.7188016274261475,
+ 1.7211026276016235,
+ 1.7158281423568726,
+ 1.7192893993759155,
+ 1.711656453590393,
+ 1.717024673728943,
+ 1.710442032470703,
+ 1.7105829236602783,
+ 1.711408941345215,
+ 1.706308694000244,
+ 1.7078606945419312,
+ 1.7072623968887328,
+ 1.703292763900757,
+ 1.7017784185028075,
+ 1.7011818041992188,
+ 1.7006354034805298,
+ 1.6977454791259765,
+ 1.6999914080429077,
+ 1.6981370053863525,
+ 1.699045623397827,
+ 1.7020124697494508,
+ 1.6962461737823487,
+ 1.6977318247222901,
+ 1.696797931213379,
+ 1.697660752220154,
+ 1.695864578933716,
+ 1.6946413637542725,
+ 1.6933105139923095,
+ 1.6918804026031493,
+ 1.69379876953125,
+ 1.692444557876587,
+ 1.692386579284668,
+ 1.6933874633026123,
+ 1.6912192764282226,
+ 1.6896385999298096,
+ 1.6908740300750733,
+ 1.6846979732894898,
+ 1.6878035149765014,
+ 1.6874957360839844,
+ 1.6844608339691163,
+ 1.687544917602539,
+ 1.6878573336791993,
+ 1.6888155934906006,
+ 1.6884653591537475,
+ 1.684945572128296,
+ 1.6854769310760498,
+ 1.689350791053772,
+ 1.684672553062439
+ ],
+ "train_acc": [
+ 0.25516,
+ 0.2934,
+ 0.30626,
+ 0.31242,
+ 0.32024,
+ 0.32414,
+ 0.32588,
+ 0.32934,
+ 0.33358,
+ 0.3364,
+ 0.33836,
+ 0.34152,
+ 0.34374,
+ 0.34868,
+ 0.34688,
+ 0.35322,
+ 0.35536,
+ 0.35502,
+ 0.3615,
+ 0.35864,
+ 0.365,
+ 0.36708,
+ 0.36492,
+ 0.36656,
+ 0.36836,
+ 0.36936,
+ 0.3728,
+ 0.37342,
+ 0.37252,
+ 0.37256,
+ 0.37424,
+ 0.3751,
+ 0.37706,
+ 0.3752,
+ 0.37348,
+ 0.37624,
+ 0.37684,
+ 0.37782,
+ 0.37856,
+ 0.3766,
+ 0.37936,
+ 0.38126,
+ 0.37778,
+ 0.37912,
+ 0.38126,
+ 0.38394,
+ 0.37856,
+ 0.38026,
+ 0.38072,
+ 0.38172,
+ 0.3842,
+ 0.38356,
+ 0.38258,
+ 0.38382,
+ 0.38378,
+ 0.38544,
+ 0.38808,
+ 0.38518,
+ 0.3869,
+ 0.386,
+ 0.3856,
+ 0.38972,
+ 0.38756,
+ 0.38892,
+ 0.39094,
+ 0.38878,
+ 0.39068,
+ 0.3887,
+ 0.39396,
+ 0.39364,
+ 0.39368,
+ 0.39064,
+ 0.39046,
+ 0.39298,
+ 0.39274,
+ 0.39592,
+ 0.39298,
+ 0.39386,
+ 0.39488,
+ 0.39222,
+ 0.39268,
+ 0.39366,
+ 0.39478,
+ 0.39506,
+ 0.39528,
+ 0.39594,
+ 0.39594,
+ 0.39416,
+ 0.39786,
+ 0.39686,
+ 0.39512,
+ 0.39482,
+ 0.39516,
+ 0.3969,
+ 0.39614,
+ 0.39604,
+ 0.39616,
+ 0.3983,
+ 0.39634,
+ 0.39874
+ ],
+ "test_acc": [
+ 0.3093,
+ 0.3342,
+ 0.3399,
+ 0.3542,
+ 0.3538,
+ 0.3515,
+ 0.3467,
+ 0.3593,
+ 0.3629,
+ 0.3586,
+ 0.3658,
+ 0.3651,
+ 0.3634,
+ 0.368,
+ 0.3662,
+ 0.3772,
+ 0.3706,
+ 0.3806,
+ 0.3767,
+ 0.3884,
+ 0.3928,
+ 0.3913,
+ 0.3908,
+ 0.3845,
+ 0.394,
+ 0.3977,
+ 0.3963,
+ 0.3919,
+ 0.3927,
+ 0.4001,
+ 0.3845,
+ 0.3968,
+ 0.3923,
+ 0.3972,
+ 0.4001,
+ 0.3927,
+ 0.3949,
+ 0.392,
+ 0.4012,
+ 0.3949,
+ 0.4069,
+ 0.3964,
+ 0.4031,
+ 0.3987,
+ 0.3974,
+ 0.4014,
+ 0.3977,
+ 0.4103,
+ 0.3991,
+ 0.3969,
+ 0.3986,
+ 0.4054,
+ 0.4046,
+ 0.4081,
+ 0.4036,
+ 0.4082,
+ 0.4012,
+ 0.4036,
+ 0.4027,
+ 0.4102,
+ 0.4079,
+ 0.4038,
+ 0.4044,
+ 0.4118,
+ 0.4118,
+ 0.4102,
+ 0.4106,
+ 0.4039,
+ 0.4082,
+ 0.4046,
+ 0.4096,
+ 0.4129,
+ 0.408,
+ 0.4041,
+ 0.4099,
+ 0.4103,
+ 0.4081,
+ 0.4101,
+ 0.4125,
+ 0.4133,
+ 0.4071,
+ 0.4109,
+ 0.4092,
+ 0.412,
+ 0.4117,
+ 0.4117,
+ 0.4081,
+ 0.412,
+ 0.412,
+ 0.41,
+ 0.4116,
+ 0.4107,
+ 0.409,
+ 0.4105,
+ 0.4105,
+ 0.4117,
+ 0.411,
+ 0.4107,
+ 0.4109,
+ 0.4109
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.036382660269737244,
+ 0.07720986753702164,
+ 0.015243301168084145,
+ -0.03982359915971756,
+ -0.08008211106061935,
+ -0.07353264093399048,
+ 0.004661812447011471,
+ 0.9986914992332458
+ ],
+ "perturbation_rho": [
+ 0.046086542308330536,
+ -0.012133870273828506,
+ 0.0135452039539814,
+ 0.027485966682434082,
+ -0.0412953719496727,
+ -0.01517622172832489,
+ 0.015256978571414948,
+ -0.014813247136771679
+ ],
+ "nudging": {
+ "0.001": [
+ -2.9372749850153923e-06,
+ -4.57162968814373e-07,
+ 5.78584149479866e-08,
+ 1.1955853551626205e-07,
+ 9.022187441587448e-08,
+ 7.35744833946228e-08,
+ 2.5029294192790985e-08,
+ -1.23586505651474e-06
+ ],
+ "0.003": [
+ -9.18388832360506e-06,
+ -1.2764940038323402e-06,
+ -1.8684659153223038e-07,
+ 2.1711457520723343e-07,
+ 3.7939753383398056e-07,
+ 3.0547380447387695e-07,
+ 1.57160684466362e-08,
+ -4.404224455356598e-06
+ ],
+ "0.01": [
+ -3.0463445000350475e-05,
+ -4.363246262073517e-06,
+ -3.3760443329811096e-07,
+ 6.594927981495857e-07,
+ 1.332256942987442e-06,
+ 1.2486707419157028e-06,
+ -1.0861549526453018e-07,
+ -1.546763814985752e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 6046.84130859375,
+ 74194.140625,
+ 477900.9375,
+ 689443.0,
+ 1004220.0625,
+ 1523434.625,
+ 1680752.625,
+ 1682025.375,
+ 898568.625
+ ],
+ "bp_grad_norms_per_layer": [
+ 3.200672654202208e-05,
+ 2.357817038500798e-06,
+ 7.007323006291699e-07,
+ 6.70125302804081e-07,
+ 6.57040743590187e-07,
+ 6.679950388388534e-07,
+ 6.679492230432515e-07,
+ 6.673712391602749e-07,
+ 6.578414968316793e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 41.37337372663241,
+ "embed.bias": 21.739351895524546,
+ "blocks.0.ln.weight": 1.096869475338319,
+ "blocks.0.w1.weight": 14.853427419399033,
+ "blocks.0.w1.bias": 14.626173478331832,
+ "blocks.0.w2.weight": 52.544426520863894,
+ "blocks.1.ln.weight": 0.989631116445591,
+ "blocks.1.w1.weight": 18.73171250459504,
+ "blocks.1.w1.bias": 13.329767291528706,
+ "blocks.1.w2.weight": 51.37681586378203,
+ "blocks.2.ln.weight": 0.7265939790865649,
+ "blocks.2.w1.weight": 18.824924434649834,
+ "blocks.2.w1.bias": 13.21966047090143,
+ "blocks.2.w2.weight": 36.1324487703447,
+ "blocks.3.ln.weight": 0.6455349248601757,
+ "blocks.3.w1.weight": 19.270974488434813,
+ "blocks.3.w1.bias": 16.976194451895758,
+ "blocks.3.w2.weight": 28.417177242878225,
+ "blocks.4.ln.weight": 0.6212274460619375,
+ "blocks.4.w1.weight": 20.86703490367653,
+ "blocks.4.w1.bias": 22.972850990124385,
+ "blocks.4.w2.weight": 29.740214683313905,
+ "blocks.5.ln.weight": 0.6222289394933932,
+ "blocks.5.w1.weight": 18.57775599009712,
+ "blocks.5.w1.bias": 20.08003721644836,
+ "blocks.5.w2.weight": 40.53068019154897,
+ "blocks.6.ln.weight": 0.5581383332084382,
+ "blocks.6.w1.weight": 15.461885552051882,
+ "blocks.6.w1.bias": 11.684922950989124,
+ "blocks.6.w2.weight": 53.18350858546716,
+ "blocks.7.ln.weight": 0.6558470787555182,
+ "blocks.7.w1.weight": 20.23429444317466,
+ "blocks.7.w1.bias": 21.51332878594272,
+ "blocks.7.w2.weight": 38.38263927504239,
+ "out_ln.weight": 0.3257959664285173,
+ "out_head.weight": 5.900526651995236,
+ "out_head.bias": 0.9300218587303252
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 8,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 6
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L8_seed6",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L8_seed7/results_cifar10.json b/results/fa_dfa_d512_L8_seed7/results_cifar10.json
new file mode 100644
index 0000000..1afcf83
--- /dev/null
+++ b/results/fa_dfa_d512_L8_seed7/results_cifar10.json
@@ -0,0 +1,881 @@
+{
+ "7": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0667303594207764,
+ 2.0324611769866943,
+ 2.0354494356536867,
+ 2.029560486984253,
+ 2.0285463153076173,
+ 2.0250388471984864,
+ 2.024455110168457,
+ 2.019448572616577,
+ 2.016967566986084,
+ 2.015476321334839,
+ 2.0111602995300295,
+ 2.011849951324463,
+ 2.009353925704956,
+ 2.0070058068084715,
+ 2.008019479827881,
+ 2.004458062477112,
+ 2.0052689391326903,
+ 1.9994013024520874,
+ 1.9999373209381104,
+ 1.998676916770935,
+ 1.9997672403717042,
+ 2.0009046767807006,
+ 1.9968020938491822,
+ 1.995622604598999,
+ 1.996247008934021,
+ 1.9960846052551269,
+ 1.994126517906189,
+ 1.9923845544433594,
+ 1.993994869995117,
+ 1.9906352404403687,
+ 1.993604411315918,
+ 1.9903970510864257,
+ 1.9910701220321656,
+ 1.9923721322250367,
+ 1.9914608909606935,
+ 1.990251951751709,
+ 1.9879735006332397,
+ 1.990091123123169,
+ 1.989167085533142,
+ 1.9870023511505126,
+ 1.9876113708496095,
+ 1.987236508255005,
+ 1.9868223078918457,
+ 1.986799183616638,
+ 1.9872047930145265,
+ 1.9893735236358643,
+ 1.985566644592285,
+ 1.9861162490081787,
+ 1.9881098473739625,
+ 1.9854161669540404,
+ 1.9856359400939942,
+ 1.9845690382385255,
+ 1.98623692237854,
+ 1.9848303674316405,
+ 1.985502159729004,
+ 1.9834946334075927,
+ 1.9829965356063843,
+ 1.982283455657959,
+ 1.984253911705017,
+ 1.9829701402282716,
+ 1.9815960638046264,
+ 1.9816721053314208,
+ 1.9825859942245483,
+ 1.980166824951172,
+ 1.9817002365493774,
+ 1.9804359143447876,
+ 1.9821267053604126,
+ 1.9813222342681884,
+ 1.980442075805664,
+ 1.9812367477416992,
+ 1.9801365872192382,
+ 1.980766887664795,
+ 1.979991530380249,
+ 1.9813088734436035,
+ 1.9798352686309815,
+ 1.979272435836792,
+ 1.9787259462738036,
+ 1.9776743465805053,
+ 1.9783772805404662,
+ 1.9771632527160645,
+ 1.9797238985824586,
+ 1.9785139586639404,
+ 1.978402091369629,
+ 1.9799914836883545,
+ 1.9797544343566895,
+ 1.9781852695465088,
+ 1.9793489038848877,
+ 1.97948084274292,
+ 1.9786187704849243,
+ 1.9765155652236939,
+ 1.9782553648376464,
+ 1.980494052734375,
+ 1.9774739639282226,
+ 1.9782270874786376,
+ 1.9778608634948731,
+ 1.9789943241119385,
+ 1.9787702282333375,
+ 1.9780272876358032,
+ 1.9778476651000976,
+ 1.9772668927764891
+ ],
+ "train_acc": [
+ 0.23342,
+ 0.24908,
+ 0.24646,
+ 0.24752,
+ 0.25104,
+ 0.25202,
+ 0.2535,
+ 0.25812,
+ 0.25712,
+ 0.2563,
+ 0.26068,
+ 0.25952,
+ 0.26266,
+ 0.261,
+ 0.26174,
+ 0.26348,
+ 0.26424,
+ 0.26552,
+ 0.2655,
+ 0.2661,
+ 0.26904,
+ 0.26678,
+ 0.27024,
+ 0.26984,
+ 0.26944,
+ 0.27048,
+ 0.26884,
+ 0.27052,
+ 0.27058,
+ 0.26958,
+ 0.27028,
+ 0.27444,
+ 0.27172,
+ 0.27178,
+ 0.27452,
+ 0.2702,
+ 0.27174,
+ 0.27434,
+ 0.27366,
+ 0.27622,
+ 0.27582,
+ 0.2751,
+ 0.27744,
+ 0.2755,
+ 0.2772,
+ 0.27482,
+ 0.27754,
+ 0.2768,
+ 0.27594,
+ 0.27634,
+ 0.27546,
+ 0.27758,
+ 0.27588,
+ 0.27868,
+ 0.27538,
+ 0.27782,
+ 0.27744,
+ 0.27858,
+ 0.27892,
+ 0.27808,
+ 0.27762,
+ 0.28194,
+ 0.27864,
+ 0.28092,
+ 0.27878,
+ 0.28206,
+ 0.27952,
+ 0.27822,
+ 0.28132,
+ 0.28178,
+ 0.2819,
+ 0.28098,
+ 0.28214,
+ 0.2826,
+ 0.28144,
+ 0.2839,
+ 0.28118,
+ 0.28474,
+ 0.28238,
+ 0.28314,
+ 0.28064,
+ 0.27932,
+ 0.28206,
+ 0.28026,
+ 0.28226,
+ 0.2815,
+ 0.28228,
+ 0.27942,
+ 0.2824,
+ 0.28296,
+ 0.2822,
+ 0.27914,
+ 0.28222,
+ 0.2813,
+ 0.28184,
+ 0.28122,
+ 0.28294,
+ 0.28278,
+ 0.28304,
+ 0.28276
+ ],
+ "test_acc": [
+ 0.2395,
+ 0.2584,
+ 0.278,
+ 0.2745,
+ 0.2714,
+ 0.2845,
+ 0.279,
+ 0.2849,
+ 0.2824,
+ 0.2736,
+ 0.2673,
+ 0.2695,
+ 0.2977,
+ 0.2632,
+ 0.2837,
+ 0.2899,
+ 0.2993,
+ 0.2917,
+ 0.2987,
+ 0.2881,
+ 0.2865,
+ 0.2963,
+ 0.307,
+ 0.2818,
+ 0.2905,
+ 0.297,
+ 0.3003,
+ 0.284,
+ 0.2824,
+ 0.3007,
+ 0.3045,
+ 0.3001,
+ 0.2875,
+ 0.3014,
+ 0.2948,
+ 0.2938,
+ 0.2869,
+ 0.3033,
+ 0.3048,
+ 0.2973,
+ 0.2897,
+ 0.2889,
+ 0.3027,
+ 0.309,
+ 0.2915,
+ 0.3052,
+ 0.3026,
+ 0.3069,
+ 0.2847,
+ 0.3014,
+ 0.3036,
+ 0.3065,
+ 0.3041,
+ 0.3006,
+ 0.296,
+ 0.3047,
+ 0.299,
+ 0.2993,
+ 0.3054,
+ 0.3044,
+ 0.3094,
+ 0.3083,
+ 0.2916,
+ 0.2964,
+ 0.3022,
+ 0.3068,
+ 0.3073,
+ 0.2973,
+ 0.3006,
+ 0.2954,
+ 0.3078,
+ 0.3015,
+ 0.3052,
+ 0.2978,
+ 0.3006,
+ 0.2996,
+ 0.3006,
+ 0.3029,
+ 0.2958,
+ 0.2935,
+ 0.3002,
+ 0.2972,
+ 0.3032,
+ 0.3043,
+ 0.3048,
+ 0.3077,
+ 0.3024,
+ 0.3073,
+ 0.3056,
+ 0.3067,
+ 0.3072,
+ 0.3058,
+ 0.3066,
+ 0.3057,
+ 0.3058,
+ 0.3058,
+ 0.306,
+ 0.3059,
+ 0.3055,
+ 0.3055
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.38665446639060974,
+ 0.000932278111577034,
+ -3.8507863791892305e-05,
+ 0.0002074055082630366,
+ 0.0004886630922555923,
+ -2.073507675959263e-05,
+ 9.8392330983188e-05,
+ -0.00026387785328552127
+ ],
+ "perturbation_rho": [
+ -0.015509183518588543,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -4.4563785195350647e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.2372620403766632e-06,
+ 0.0,
+ 9.313225746154785e-10,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -3.957655280828476e-06,
+ -5.587935447692871e-09,
+ 9.313225746154785e-10,
+ -3.725290298461914e-09,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 54526.05078125,
+ 813104704.0,
+ 2402701568.0,
+ 3855742976.0,
+ 4565018112.0,
+ 7107309056.0,
+ 10427996160.0,
+ 10594944000.0,
+ 10802217984.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.7473959107737755e-07,
+ 1.7227932969099413e-10,
+ 1.702108870516028e-10,
+ 1.6996638818600474e-10,
+ 1.6991698326140892e-10,
+ 1.6980272743438718e-10,
+ 1.6994737561670803e-10,
+ 1.6996060114848888e-10,
+ 1.6999159024866373e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 327.4346996072575,
+ "embed.bias": 170.27189912672551,
+ "blocks.0.ln.weight": 10.029448340733337,
+ "blocks.0.w1.weight": 271.09694985433896,
+ "blocks.0.w1.bias": 210.3344533990309,
+ "blocks.0.w2.weight": 492.9386447153367,
+ "blocks.1.ln.weight": 8.828382497492473,
+ "blocks.1.w1.weight": 304.4175908830063,
+ "blocks.1.w1.bias": 277.43931427160345,
+ "blocks.1.w2.weight": 335.16168952866406,
+ "blocks.2.ln.weight": 8.838151125620117,
+ "blocks.2.w1.weight": 344.66383116618897,
+ "blocks.2.w1.bias": 306.24514451886506,
+ "blocks.2.w2.weight": 332.4200685911524,
+ "blocks.3.ln.weight": 7.729321470287778,
+ "blocks.3.w1.weight": 307.1583996497501,
+ "blocks.3.w1.bias": 280.7450378625442,
+ "blocks.3.w2.weight": 283.2991400409648,
+ "blocks.4.ln.weight": 10.279472891273015,
+ "blocks.4.w1.weight": 425.65902304929637,
+ "blocks.4.w1.bias": 405.30162642773695,
+ "blocks.4.w2.weight": 417.0126410755384,
+ "blocks.5.ln.weight": 11.518133724029006,
+ "blocks.5.w1.weight": 467.0126802605906,
+ "blocks.5.w1.bias": 452.55789718669956,
+ "blocks.5.w2.weight": 467.65851514176296,
+ "blocks.6.ln.weight": 9.037508419886805,
+ "blocks.6.w1.weight": 358.31655559732087,
+ "blocks.6.w1.bias": 330.36950370925376,
+ "blocks.6.w2.weight": 350.477939939699,
+ "blocks.7.ln.weight": 8.758836150161805,
+ "blocks.7.w1.weight": 347.69073535976315,
+ "blocks.7.w1.bias": 320.1104471013453,
+ "blocks.7.w2.weight": 332.8564062769089,
+ "out_ln.weight": 0.6350668697695233,
+ "out_head.weight": 9.197192445712407,
+ "out_head.bias": 0.8845643723650614
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0300379509735107,
+ 1.9401999561309815,
+ 1.913938886642456,
+ 1.8904116683578491,
+ 1.8764363840103149,
+ 1.873182268371582,
+ 1.8717842712402344,
+ 1.8644564879608154,
+ 1.8566048559570312,
+ 1.850687060508728,
+ 1.8460483242797852,
+ 1.8433684701538087,
+ 1.833488243789673,
+ 1.8278405266571045,
+ 1.8265484925079345,
+ 1.818034171447754,
+ 1.8155431734466552,
+ 1.8074933514022826,
+ 1.8059410440444947,
+ 1.8039924750137328,
+ 1.8001109600067138,
+ 1.7982124895477294,
+ 1.7878496411514282,
+ 1.7872236651611328,
+ 1.7860211572265625,
+ 1.7805663021469116,
+ 1.779324355392456,
+ 1.771492137527466,
+ 1.772376263961792,
+ 1.7669432696914673,
+ 1.7704145129776,
+ 1.7629687177276612,
+ 1.759504497718811,
+ 1.7608156897735596,
+ 1.7560134534072875,
+ 1.7523908859634398,
+ 1.7503845543670655,
+ 1.7478893075180053,
+ 1.7472649411773682,
+ 1.7424788817596435,
+ 1.74519405002594,
+ 1.7384309884262086,
+ 1.7379801452636718,
+ 1.7321213195800782,
+ 1.732442968559265,
+ 1.7347256170654297,
+ 1.7268475212860108,
+ 1.7259371692276,
+ 1.730874881286621,
+ 1.7255464766311646,
+ 1.7273355867767335,
+ 1.7193929712295533,
+ 1.7231387823104858,
+ 1.7167245266342164,
+ 1.7185063571929933,
+ 1.7160026277923583,
+ 1.71593848777771,
+ 1.7101723334121703,
+ 1.7123066550445556,
+ 1.7091578268432617,
+ 1.710422399520874,
+ 1.7071148763275146,
+ 1.703984679031372,
+ 1.7055902273559571,
+ 1.7043205630493163,
+ 1.702749596977234,
+ 1.7015760723114013,
+ 1.6997603458023072,
+ 1.7010435842895508,
+ 1.7012611554718018,
+ 1.7004539463043213,
+ 1.6962661600494384,
+ 1.6981626427841185,
+ 1.698192346496582,
+ 1.6980322202682494,
+ 1.6932597018814086,
+ 1.6941705449676514,
+ 1.6931912238311768,
+ 1.6942447135162353,
+ 1.6932169602203369,
+ 1.6921008915328979,
+ 1.691394665260315,
+ 1.692651755027771,
+ 1.690492251586914,
+ 1.6901156521606446,
+ 1.6877212131118775,
+ 1.6918383533859254,
+ 1.6916584229278564,
+ 1.6862779132843018,
+ 1.690645524635315,
+ 1.689451340637207,
+ 1.6918565393829346,
+ 1.6878593984222412,
+ 1.6896748791122436,
+ 1.6869253600692748,
+ 1.6864034613037109,
+ 1.6866692586517333,
+ 1.6912582437515258,
+ 1.6882677157211303,
+ 1.6879832342147827
+ ],
+ "train_acc": [
+ 0.25058,
+ 0.29132,
+ 0.30476,
+ 0.31654,
+ 0.32498,
+ 0.32736,
+ 0.32762,
+ 0.3327,
+ 0.33604,
+ 0.33698,
+ 0.33776,
+ 0.33972,
+ 0.34396,
+ 0.34484,
+ 0.34528,
+ 0.34798,
+ 0.351,
+ 0.35216,
+ 0.35302,
+ 0.35184,
+ 0.35472,
+ 0.3551,
+ 0.35822,
+ 0.35894,
+ 0.36172,
+ 0.36298,
+ 0.36054,
+ 0.36104,
+ 0.36522,
+ 0.36498,
+ 0.364,
+ 0.36804,
+ 0.36678,
+ 0.36776,
+ 0.3693,
+ 0.37214,
+ 0.37256,
+ 0.37198,
+ 0.37316,
+ 0.37604,
+ 0.37578,
+ 0.37634,
+ 0.37724,
+ 0.38006,
+ 0.38006,
+ 0.3758,
+ 0.38288,
+ 0.37976,
+ 0.37962,
+ 0.38206,
+ 0.38228,
+ 0.38356,
+ 0.38352,
+ 0.38466,
+ 0.38582,
+ 0.3873,
+ 0.3864,
+ 0.38692,
+ 0.38786,
+ 0.38986,
+ 0.38712,
+ 0.38938,
+ 0.389,
+ 0.39036,
+ 0.38952,
+ 0.3926,
+ 0.3918,
+ 0.39226,
+ 0.39226,
+ 0.39486,
+ 0.39342,
+ 0.39342,
+ 0.39574,
+ 0.39404,
+ 0.39118,
+ 0.39872,
+ 0.39752,
+ 0.39638,
+ 0.39602,
+ 0.39634,
+ 0.39706,
+ 0.39678,
+ 0.39506,
+ 0.39628,
+ 0.39606,
+ 0.39918,
+ 0.39864,
+ 0.39598,
+ 0.39636,
+ 0.39802,
+ 0.39958,
+ 0.39712,
+ 0.39732,
+ 0.39682,
+ 0.39824,
+ 0.39882,
+ 0.39722,
+ 0.39738,
+ 0.39682,
+ 0.39604
+ ],
+ "test_acc": [
+ 0.2982,
+ 0.3236,
+ 0.3362,
+ 0.355,
+ 0.3615,
+ 0.3565,
+ 0.3706,
+ 0.3689,
+ 0.3752,
+ 0.3721,
+ 0.3648,
+ 0.3757,
+ 0.358,
+ 0.3682,
+ 0.3671,
+ 0.3888,
+ 0.3867,
+ 0.3821,
+ 0.3853,
+ 0.3871,
+ 0.3942,
+ 0.387,
+ 0.3844,
+ 0.3793,
+ 0.3865,
+ 0.3944,
+ 0.3914,
+ 0.3841,
+ 0.3867,
+ 0.3946,
+ 0.3922,
+ 0.3911,
+ 0.3974,
+ 0.3923,
+ 0.4016,
+ 0.4003,
+ 0.398,
+ 0.3965,
+ 0.4044,
+ 0.4054,
+ 0.3959,
+ 0.3964,
+ 0.3997,
+ 0.4038,
+ 0.4096,
+ 0.4134,
+ 0.4049,
+ 0.4077,
+ 0.4074,
+ 0.4021,
+ 0.4087,
+ 0.4057,
+ 0.4133,
+ 0.415,
+ 0.4057,
+ 0.414,
+ 0.4129,
+ 0.4116,
+ 0.4118,
+ 0.4059,
+ 0.4124,
+ 0.414,
+ 0.4067,
+ 0.4166,
+ 0.4129,
+ 0.4136,
+ 0.4099,
+ 0.4092,
+ 0.418,
+ 0.4107,
+ 0.4178,
+ 0.4112,
+ 0.4128,
+ 0.4178,
+ 0.4138,
+ 0.4147,
+ 0.4157,
+ 0.4178,
+ 0.4179,
+ 0.4197,
+ 0.4188,
+ 0.414,
+ 0.416,
+ 0.4182,
+ 0.4217,
+ 0.4191,
+ 0.4173,
+ 0.4185,
+ 0.421,
+ 0.42,
+ 0.4176,
+ 0.4186,
+ 0.421,
+ 0.4193,
+ 0.4192,
+ 0.42,
+ 0.4192,
+ 0.42,
+ 0.4203,
+ 0.42
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.04720165580511093,
+ 0.023647643625736237,
+ -0.033746276050806046,
+ -0.02436545118689537,
+ -0.028243303298950195,
+ -0.01576988771557808,
+ -0.08274193108081818,
+ 0.9948431253433228
+ ],
+ "perturbation_rho": [
+ -0.019907839596271515,
+ 0.026347745209932327,
+ 0.00029688142240047455,
+ -0.015195554122328758,
+ 0.043242912739515305,
+ 0.008055397309362888,
+ 0.003536408767104149,
+ -0.039388515055179596
+ ],
+ "nudging": {
+ "0.001": [
+ -7.520895451307297e-06,
+ -1.26776285469532e-07,
+ 9.906943887472153e-08,
+ -4.656612873077393e-10,
+ -4.0978193283081055e-08,
+ -3.585591912269592e-08,
+ 6.740447133779526e-08,
+ -1.3819662854075432e-06
+ ],
+ "0.003": [
+ -2.2122403606772423e-05,
+ -3.012828528881073e-07,
+ 2.1152663975954056e-07,
+ 6.798654794692993e-08,
+ 1.9441358745098114e-07,
+ 2.6775524020195007e-08,
+ 3.398745320737362e-07,
+ -5.154346581548452e-06
+ ],
+ "0.01": [
+ -7.367332000285387e-05,
+ -1.0807416401803493e-06,
+ 6.495974957942963e-07,
+ 3.817840479314327e-07,
+ 5.302717909216881e-07,
+ 2.4068867787718773e-07,
+ 1.5024561434984207e-06,
+ -1.799390884116292e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 5806.3515625,
+ 63154.71875,
+ 345991.1875,
+ 741559.875,
+ 923550.8125,
+ 1119541.375,
+ 1242674.125,
+ 1367644.625,
+ 780196.0625
+ ],
+ "bp_grad_norms_per_layer": [
+ 5.0447550165699795e-05,
+ 2.6470297598280013e-06,
+ 8.134598488140909e-07,
+ 7.909251280580065e-07,
+ 7.877957273194625e-07,
+ 7.858119488446391e-07,
+ 7.467841669495101e-07,
+ 7.52233631828858e-07,
+ 7.457880428773933e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 40.32473097964277,
+ "embed.bias": 16.685566967564547,
+ "blocks.0.ln.weight": 1.0445740862794894,
+ "blocks.0.w1.weight": 15.133344163582892,
+ "blocks.0.w1.bias": 12.448585097052113,
+ "blocks.0.w2.weight": 47.37614824821112,
+ "blocks.1.ln.weight": 0.9771977116283915,
+ "blocks.1.w1.weight": 19.65973837310052,
+ "blocks.1.w1.bias": 15.073340004663,
+ "blocks.1.w2.weight": 45.15493814957138,
+ "blocks.2.ln.weight": 0.7677471742012729,
+ "blocks.2.w1.weight": 18.286316393259757,
+ "blocks.2.w1.bias": 16.668691780331002,
+ "blocks.2.w2.weight": 43.33024072420251,
+ "blocks.3.ln.weight": 0.7157196705161631,
+ "blocks.3.w1.weight": 17.769177715348974,
+ "blocks.3.w1.bias": 17.48172961098867,
+ "blocks.3.w2.weight": 46.41797990739132,
+ "blocks.4.ln.weight": 0.7158938212971907,
+ "blocks.4.w1.weight": 18.203867219610753,
+ "blocks.4.w1.bias": 18.870397314936728,
+ "blocks.4.w2.weight": 51.23112107531505,
+ "blocks.5.ln.weight": 0.6884334631484388,
+ "blocks.5.w1.weight": 18.258883275015975,
+ "blocks.5.w1.bias": 18.71714897198837,
+ "blocks.5.w2.weight": 51.991090351166356,
+ "blocks.6.ln.weight": 0.633491884160565,
+ "blocks.6.w1.weight": 17.95984323437285,
+ "blocks.6.w1.bias": 18.493378505745028,
+ "blocks.6.w2.weight": 38.75838405347133,
+ "blocks.7.ln.weight": 0.5989570930744887,
+ "blocks.7.w1.weight": 18.705839523958478,
+ "blocks.7.w1.bias": 20.505853112621466,
+ "blocks.7.w2.weight": 33.59326297500316,
+ "out_ln.weight": 0.2917390152456112,
+ "out_head.weight": 5.883284575632486,
+ "out_head.bias": 0.8881859391580925
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 8,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 7
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L8_seed7",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L8_seed8/results_cifar10.json b/results/fa_dfa_d512_L8_seed8/results_cifar10.json
new file mode 100644
index 0000000..8f97591
--- /dev/null
+++ b/results/fa_dfa_d512_L8_seed8/results_cifar10.json
@@ -0,0 +1,881 @@
+{
+ "8": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.068326115875244,
+ 2.038449552383423,
+ 2.032887363433838,
+ 2.0267745724487303,
+ 2.026417502670288,
+ 2.0202374043273927,
+ 2.018483938064575,
+ 2.0165669372558592,
+ 2.019748689651489,
+ 2.0128509746551515,
+ 2.0144077210235594,
+ 2.0115078826904296,
+ 2.0115395934295655,
+ 2.007768662490845,
+ 2.0097858222961427,
+ 2.0063318170166013,
+ 2.0055432247924805,
+ 2.0048474722290037,
+ 2.0065497072982788,
+ 2.0015513079071043,
+ 2.0043911225128173,
+ 2.0010596670150758,
+ 2.0026096622467042,
+ 2.003728278236389,
+ 2.0012191329193114,
+ 2.0005321977233885,
+ 2.0005435330581665,
+ 1.999249365234375,
+ 1.9984628038024903,
+ 1.9983316033935548,
+ 1.997845711593628,
+ 1.9972105752563476,
+ 1.9978763916778564,
+ 1.9953950925064088,
+ 1.9970173344421387,
+ 1.9984811785888672,
+ 1.9950866604232789,
+ 1.9947769330596923,
+ 1.9955189801788331,
+ 1.9929481762313843,
+ 1.9966953428649903,
+ 1.9945243342590333,
+ 1.9926670757675171,
+ 1.993055216293335,
+ 1.9953397652435303,
+ 1.992445609664917,
+ 1.9928514415740968,
+ 1.9945266257476806,
+ 1.9926752143096924,
+ 1.992415104598999,
+ 1.9914240314483642,
+ 1.9914591967773438,
+ 1.9917445565032958,
+ 1.9892081171417237,
+ 1.9890185669708251,
+ 1.9918106182479858,
+ 1.9911041564178467,
+ 1.9896882055282592,
+ 1.9890501625823975,
+ 1.9907431673431397,
+ 1.9901649180603027,
+ 1.9891316648101807,
+ 1.9877268949127198,
+ 1.989798042602539,
+ 1.9878116387939453,
+ 1.9890665167236328,
+ 1.9905334783935547,
+ 1.9886393180847168,
+ 1.988864939727783,
+ 1.9884007026672363,
+ 1.9877286835479737,
+ 1.989529699935913,
+ 1.9890546997070313,
+ 1.9868773061752318,
+ 1.9869191469573975,
+ 1.9882244151306152,
+ 1.987383364868164,
+ 1.9855076770782472,
+ 1.9868913415908813,
+ 1.9883932913970948,
+ 1.9862220487213136,
+ 1.9841738792419434,
+ 1.9836644219970703,
+ 1.9869539430999756,
+ 1.984350054588318,
+ 1.9856882331848145,
+ 1.9835283684539795,
+ 1.9859053189849853,
+ 1.9878566506576538,
+ 1.986811107788086,
+ 1.9838481521606446,
+ 1.986321011314392,
+ 1.9864019583892822,
+ 1.9851219155883788,
+ 1.9863925772476196,
+ 1.9855677154541016,
+ 1.984962483253479,
+ 1.9849489275360108,
+ 1.985427260131836,
+ 1.984917903213501
+ ],
+ "train_acc": [
+ 0.23632,
+ 0.24556,
+ 0.24874,
+ 0.25046,
+ 0.25146,
+ 0.25464,
+ 0.25568,
+ 0.25664,
+ 0.25548,
+ 0.25852,
+ 0.25636,
+ 0.26028,
+ 0.25868,
+ 0.26176,
+ 0.2607,
+ 0.26174,
+ 0.26266,
+ 0.26672,
+ 0.26472,
+ 0.2636,
+ 0.26466,
+ 0.26618,
+ 0.2643,
+ 0.26808,
+ 0.2664,
+ 0.26632,
+ 0.27,
+ 0.26608,
+ 0.26812,
+ 0.26848,
+ 0.26888,
+ 0.27018,
+ 0.27012,
+ 0.2694,
+ 0.2686,
+ 0.2696,
+ 0.27142,
+ 0.27298,
+ 0.27042,
+ 0.27126,
+ 0.2717,
+ 0.2718,
+ 0.27386,
+ 0.27346,
+ 0.26924,
+ 0.27342,
+ 0.27324,
+ 0.27112,
+ 0.27066,
+ 0.27258,
+ 0.27196,
+ 0.27386,
+ 0.27262,
+ 0.2753,
+ 0.274,
+ 0.27484,
+ 0.27268,
+ 0.27362,
+ 0.27338,
+ 0.27272,
+ 0.274,
+ 0.27568,
+ 0.27592,
+ 0.27312,
+ 0.27434,
+ 0.2757,
+ 0.27476,
+ 0.27544,
+ 0.27548,
+ 0.27548,
+ 0.2741,
+ 0.27606,
+ 0.27428,
+ 0.27634,
+ 0.27604,
+ 0.2735,
+ 0.27494,
+ 0.27588,
+ 0.27586,
+ 0.27474,
+ 0.27652,
+ 0.27834,
+ 0.27718,
+ 0.27562,
+ 0.27728,
+ 0.27692,
+ 0.27862,
+ 0.27782,
+ 0.27452,
+ 0.27704,
+ 0.27826,
+ 0.27674,
+ 0.27832,
+ 0.27826,
+ 0.27584,
+ 0.27546,
+ 0.27866,
+ 0.27724,
+ 0.2748,
+ 0.2757
+ ],
+ "test_acc": [
+ 0.2602,
+ 0.2704,
+ 0.2727,
+ 0.2629,
+ 0.2497,
+ 0.2721,
+ 0.2855,
+ 0.2759,
+ 0.2772,
+ 0.2885,
+ 0.2833,
+ 0.2656,
+ 0.2889,
+ 0.2844,
+ 0.2696,
+ 0.2757,
+ 0.2739,
+ 0.2826,
+ 0.2819,
+ 0.2819,
+ 0.2892,
+ 0.2867,
+ 0.2852,
+ 0.2806,
+ 0.2973,
+ 0.2863,
+ 0.2939,
+ 0.2937,
+ 0.2926,
+ 0.2764,
+ 0.2824,
+ 0.2885,
+ 0.2758,
+ 0.2824,
+ 0.2965,
+ 0.2933,
+ 0.2839,
+ 0.2859,
+ 0.2924,
+ 0.2871,
+ 0.2828,
+ 0.2905,
+ 0.2969,
+ 0.2792,
+ 0.286,
+ 0.2835,
+ 0.2903,
+ 0.2892,
+ 0.2897,
+ 0.2923,
+ 0.2943,
+ 0.2885,
+ 0.2928,
+ 0.2967,
+ 0.3031,
+ 0.2862,
+ 0.2863,
+ 0.2915,
+ 0.2885,
+ 0.2905,
+ 0.2855,
+ 0.3001,
+ 0.2961,
+ 0.2865,
+ 0.2976,
+ 0.2898,
+ 0.2962,
+ 0.2908,
+ 0.2936,
+ 0.2822,
+ 0.2934,
+ 0.2904,
+ 0.2952,
+ 0.2923,
+ 0.29,
+ 0.2923,
+ 0.2933,
+ 0.2869,
+ 0.2957,
+ 0.2945,
+ 0.2951,
+ 0.2981,
+ 0.2954,
+ 0.2891,
+ 0.2935,
+ 0.294,
+ 0.2948,
+ 0.2916,
+ 0.2959,
+ 0.2929,
+ 0.296,
+ 0.2939,
+ 0.2944,
+ 0.2937,
+ 0.2942,
+ 0.294,
+ 0.2942,
+ 0.2945,
+ 0.2944,
+ 0.2943
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.38314950466156006,
+ -0.0002771378494799137,
+ -0.0004160030803177506,
+ 3.204246604582295e-05,
+ -4.1770588723011315e-05,
+ -0.0001344233169220388,
+ -3.0795046768616885e-05,
+ -3.3676558814477175e-05
+ ],
+ "perturbation_rho": [
+ -0.00034889206290245056,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -3.711320459842682e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.1324882507324219e-06,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -3.98978590965271e-06,
+ 9.313225746154785e-10,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 54124.7890625,
+ 783932864.0,
+ 4690777600.0,
+ 5000870912.0,
+ 5716334080.0,
+ 6005877760.0,
+ 7473982976.0,
+ 7776250880.0,
+ 8210811904.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 3.0676110895910824e-07,
+ 1.972915580905621e-10,
+ 1.953913281171893e-10,
+ 1.953771588958375e-10,
+ 1.9510017212898134e-10,
+ 1.950706401965263e-10,
+ 1.9518447968991381e-10,
+ 1.951617339956968e-10,
+ 1.953130296383776e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 328.1038460126514,
+ "embed.bias": 254.00296968816264,
+ "blocks.0.ln.weight": 10.375421373661911,
+ "blocks.0.w1.weight": 272.1998177697825,
+ "blocks.0.w1.bias": 246.55078363508565,
+ "blocks.0.w2.weight": 489.8754501153931,
+ "blocks.1.ln.weight": 9.050343989939378,
+ "blocks.1.w1.weight": 388.3338705775239,
+ "blocks.1.w1.bias": 383.7188912712258,
+ "blocks.1.w2.weight": 402.9058555114096,
+ "blocks.2.ln.weight": 6.952665632270595,
+ "blocks.2.w1.weight": 255.3639402973493,
+ "blocks.2.w1.bias": 230.39667811143107,
+ "blocks.2.w2.weight": 247.93087044444246,
+ "blocks.3.ln.weight": 8.807891710920156,
+ "blocks.3.w1.weight": 351.3359831200073,
+ "blocks.3.w1.bias": 336.28061721166756,
+ "blocks.3.w2.weight": 321.5850139623348,
+ "blocks.4.ln.weight": 8.196055678685234,
+ "blocks.4.w1.weight": 287.52820992582446,
+ "blocks.4.w1.bias": 262.4079024326842,
+ "blocks.4.w2.weight": 263.46643755724006,
+ "blocks.5.ln.weight": 9.62846119408936,
+ "blocks.5.w1.weight": 390.9965336931924,
+ "blocks.5.w1.bias": 375.54893253589324,
+ "blocks.5.w2.weight": 382.6688713102951,
+ "blocks.6.ln.weight": 7.376838940788987,
+ "blocks.6.w1.weight": 284.9872318771198,
+ "blocks.6.w1.bias": 263.7123597178357,
+ "blocks.6.w2.weight": 270.37673819187216,
+ "blocks.7.ln.weight": 8.135998104403308,
+ "blocks.7.w1.weight": 306.058248194457,
+ "blocks.7.w1.bias": 285.31467158190975,
+ "blocks.7.w2.weight": 284.5221337965431,
+ "out_ln.weight": 0.540826641326857,
+ "out_head.weight": 7.9794751282903125,
+ "out_head.bias": 0.727890481081825
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.0412177281188963,
+ 1.9586744415664672,
+ 1.9367398124694823,
+ 1.9134309594726562,
+ 1.9032167110824585,
+ 1.8894411625671386,
+ 1.8745269380950929,
+ 1.8681198519515991,
+ 1.859885403137207,
+ 1.8490976187515258,
+ 1.8433319261932373,
+ 1.8375304843139648,
+ 1.8348215142822266,
+ 1.8295704886627198,
+ 1.8297615969085694,
+ 1.8252419507217408,
+ 1.8235907801055908,
+ 1.8161062512207031,
+ 1.8182137090301513,
+ 1.8134747228240966,
+ 1.8125705044555664,
+ 1.8099720315551757,
+ 1.8140770354843139,
+ 1.812659885635376,
+ 1.8111054842376708,
+ 1.8094129010772706,
+ 1.8020060912322997,
+ 1.803116989402771,
+ 1.795589069519043,
+ 1.7962562110137938,
+ 1.793900502319336,
+ 1.7944407584381104,
+ 1.7943759982681275,
+ 1.7872746685028076,
+ 1.788272954788208,
+ 1.7918251754379273,
+ 1.783637704849243,
+ 1.7797057202529907,
+ 1.7784698824691771,
+ 1.7769115085601808,
+ 1.7821795818328858,
+ 1.774635576171875,
+ 1.777582476463318,
+ 1.7744439294815064,
+ 1.7718625156402588,
+ 1.7697111307525635,
+ 1.77068788394928,
+ 1.764987317237854,
+ 1.7658291906356811,
+ 1.7618338104629516,
+ 1.763578690109253,
+ 1.7595625869750977,
+ 1.7586208461761474,
+ 1.7597406827163695,
+ 1.7557958205795288,
+ 1.756042088279724,
+ 1.7561137405014038,
+ 1.75862396736145,
+ 1.7520068192672729,
+ 1.7528972547149657,
+ 1.7537218439102173,
+ 1.7509230569076537,
+ 1.7497916799163817,
+ 1.7516448692703248,
+ 1.747637153892517,
+ 1.7501363418579101,
+ 1.7462628519058228,
+ 1.7452370420074463,
+ 1.751023518638611,
+ 1.7453709937286377,
+ 1.7441615590667725,
+ 1.745248391342163,
+ 1.7455106867218018,
+ 1.744859725265503,
+ 1.7450293384170532,
+ 1.741427905960083,
+ 1.7391855532836915,
+ 1.739177918357849,
+ 1.7413748574447632,
+ 1.740176322555542,
+ 1.7360426489639282,
+ 1.7357201895904542,
+ 1.7336676422119142,
+ 1.7378975563812256,
+ 1.7353016548156739,
+ 1.735442308998108,
+ 1.7332442119979858,
+ 1.7347565906524658,
+ 1.7399420615386962,
+ 1.735971279335022,
+ 1.732453648071289,
+ 1.7338156942749023,
+ 1.7323283444976807,
+ 1.733454556350708,
+ 1.7349756786727906,
+ 1.730115432472229,
+ 1.7333317721176147,
+ 1.730795558128357,
+ 1.7303683010482789,
+ 1.7296705428695678
+ ],
+ "train_acc": [
+ 0.24972,
+ 0.2857,
+ 0.29716,
+ 0.3051,
+ 0.31388,
+ 0.32028,
+ 0.32718,
+ 0.32794,
+ 0.33452,
+ 0.33888,
+ 0.33738,
+ 0.34414,
+ 0.3427,
+ 0.344,
+ 0.34354,
+ 0.34574,
+ 0.34834,
+ 0.34704,
+ 0.34938,
+ 0.34914,
+ 0.34736,
+ 0.34888,
+ 0.34864,
+ 0.3503,
+ 0.35144,
+ 0.35012,
+ 0.35328,
+ 0.35404,
+ 0.3563,
+ 0.3579,
+ 0.35846,
+ 0.35674,
+ 0.35756,
+ 0.35986,
+ 0.35896,
+ 0.35632,
+ 0.36026,
+ 0.35954,
+ 0.36354,
+ 0.36492,
+ 0.36274,
+ 0.36668,
+ 0.3658,
+ 0.36558,
+ 0.36578,
+ 0.36772,
+ 0.36466,
+ 0.36692,
+ 0.36736,
+ 0.36832,
+ 0.36896,
+ 0.36776,
+ 0.37266,
+ 0.37066,
+ 0.3741,
+ 0.37286,
+ 0.3711,
+ 0.37272,
+ 0.37522,
+ 0.37178,
+ 0.37578,
+ 0.37454,
+ 0.37386,
+ 0.37256,
+ 0.37668,
+ 0.3778,
+ 0.3781,
+ 0.37876,
+ 0.37596,
+ 0.37894,
+ 0.37748,
+ 0.3801,
+ 0.37678,
+ 0.3776,
+ 0.37852,
+ 0.37916,
+ 0.38186,
+ 0.38062,
+ 0.38234,
+ 0.3802,
+ 0.38074,
+ 0.38048,
+ 0.38312,
+ 0.38086,
+ 0.38172,
+ 0.38188,
+ 0.37984,
+ 0.38142,
+ 0.37894,
+ 0.38204,
+ 0.38274,
+ 0.38072,
+ 0.38316,
+ 0.38076,
+ 0.38282,
+ 0.38266,
+ 0.38244,
+ 0.3807,
+ 0.38074,
+ 0.38534
+ ],
+ "test_acc": [
+ 0.305,
+ 0.3111,
+ 0.3184,
+ 0.3374,
+ 0.3246,
+ 0.3486,
+ 0.3605,
+ 0.3591,
+ 0.3631,
+ 0.3644,
+ 0.3656,
+ 0.37,
+ 0.3724,
+ 0.3813,
+ 0.3682,
+ 0.3692,
+ 0.3665,
+ 0.3747,
+ 0.3765,
+ 0.3701,
+ 0.3749,
+ 0.3749,
+ 0.3821,
+ 0.377,
+ 0.3807,
+ 0.3795,
+ 0.3856,
+ 0.3799,
+ 0.3833,
+ 0.383,
+ 0.3687,
+ 0.3841,
+ 0.3836,
+ 0.377,
+ 0.3894,
+ 0.3861,
+ 0.3944,
+ 0.384,
+ 0.3911,
+ 0.3879,
+ 0.3843,
+ 0.3919,
+ 0.389,
+ 0.3888,
+ 0.3918,
+ 0.3954,
+ 0.3994,
+ 0.3935,
+ 0.3926,
+ 0.3914,
+ 0.3866,
+ 0.3981,
+ 0.3864,
+ 0.3947,
+ 0.4018,
+ 0.3938,
+ 0.3965,
+ 0.3931,
+ 0.3998,
+ 0.4003,
+ 0.401,
+ 0.3984,
+ 0.4009,
+ 0.3958,
+ 0.3936,
+ 0.4004,
+ 0.3968,
+ 0.3984,
+ 0.4015,
+ 0.4012,
+ 0.4042,
+ 0.397,
+ 0.4023,
+ 0.3968,
+ 0.4001,
+ 0.402,
+ 0.3965,
+ 0.3973,
+ 0.3964,
+ 0.4025,
+ 0.4011,
+ 0.4017,
+ 0.4014,
+ 0.3981,
+ 0.3992,
+ 0.4015,
+ 0.4024,
+ 0.4008,
+ 0.4012,
+ 0.4004,
+ 0.4017,
+ 0.4023,
+ 0.4025,
+ 0.4032,
+ 0.4018,
+ 0.4015,
+ 0.4018,
+ 0.4028,
+ 0.4024,
+ 0.4026
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.016079379245638847,
+ 0.0893096849322319,
+ -0.03535890951752663,
+ -0.027881216257810593,
+ -0.04659418389201164,
+ -0.039142537862062454,
+ -0.024059921503067017,
+ 0.9954550266265869
+ ],
+ "perturbation_rho": [
+ -0.003716413863003254,
+ -0.01681683585047722,
+ 0.0011358268093317747,
+ 0.011708367615938187,
+ -0.0036298027262091637,
+ -0.028253771364688873,
+ -0.03197161853313446,
+ -0.0007108366116881371
+ ],
+ "nudging": {
+ "0.001": [
+ -1.6684643924236298e-06,
+ -3.836903488263488e-07,
+ 2.2817403078079224e-08,
+ -4.656612873077393e-10,
+ -4.0745362639427185e-08,
+ 4.21423465013504e-08,
+ 5.2852556109428406e-08,
+ -1.6444828361272812e-06
+ ],
+ "0.003": [
+ -4.838439053855836e-06,
+ -1.1676456779241562e-06,
+ 1.3969838619232178e-07,
+ 2.7101486921310425e-07,
+ 2.761371433734894e-07,
+ 2.1047890186309814e-07,
+ 8.940696716308594e-08,
+ -5.75091689825058e-06
+ ],
+ "0.01": [
+ -1.587084261700511e-05,
+ -3.770226612687111e-06,
+ 4.6566128730773926e-07,
+ 6.083864718675613e-07,
+ 9.522773325443268e-07,
+ 8.021015673875809e-07,
+ 3.6670826375484467e-07,
+ -2.0256484276615083e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 7878.02783203125,
+ 160404.015625,
+ 657860.0625,
+ 1298493.75,
+ 1530210.875,
+ 1789493.5,
+ 1949845.875,
+ 2001972.0,
+ 954169.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.3713459086138755e-05,
+ 1.3362353001866722e-06,
+ 8.249007805716246e-07,
+ 8.118395271594636e-07,
+ 8.120087500174122e-07,
+ 8.141144007822732e-07,
+ 8.151110932885786e-07,
+ 8.161479740920186e-07,
+ 7.983429100022477e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 50.27203344624106,
+ "embed.bias": 18.164593935621223,
+ "blocks.0.ln.weight": 1.2923599647753492,
+ "blocks.0.w1.weight": 17.615281639137443,
+ "blocks.0.w1.bias": 15.943020869292715,
+ "blocks.0.w2.weight": 62.07043407165766,
+ "blocks.1.ln.weight": 0.9933874122365621,
+ "blocks.1.w1.weight": 19.89251765717899,
+ "blocks.1.w1.bias": 15.397253448680312,
+ "blocks.1.w2.weight": 48.46816742349729,
+ "blocks.2.ln.weight": 0.7602194940473055,
+ "blocks.2.w1.weight": 21.862181220405073,
+ "blocks.2.w1.bias": 20.159047961824015,
+ "blocks.2.w2.weight": 35.30605863540395,
+ "blocks.3.ln.weight": 0.6625976974501915,
+ "blocks.3.w1.weight": 19.25465609702534,
+ "blocks.3.w1.bias": 19.348925466014506,
+ "blocks.3.w2.weight": 46.43927447797268,
+ "blocks.4.ln.weight": 0.5980307474752141,
+ "blocks.4.w1.weight": 19.617144750384867,
+ "blocks.4.w1.bias": 20.038154099436944,
+ "blocks.4.w2.weight": 42.571562389062414,
+ "blocks.5.ln.weight": 0.6412290763020658,
+ "blocks.5.w1.weight": 18.227445383614373,
+ "blocks.5.w1.bias": 17.78546251590179,
+ "blocks.5.w2.weight": 48.65373336198152,
+ "blocks.6.ln.weight": 0.6350627815251478,
+ "blocks.6.w1.weight": 16.896056503722676,
+ "blocks.6.w1.bias": 14.642090090469127,
+ "blocks.6.w2.weight": 58.25817193229909,
+ "blocks.7.ln.weight": 0.7771808191712193,
+ "blocks.7.w1.weight": 20.486284886878302,
+ "blocks.7.w1.bias": 19.32003589686942,
+ "blocks.7.w2.weight": 62.85071102943643,
+ "out_ln.weight": 0.3595629051418566,
+ "out_head.weight": 6.471892657261998,
+ "out_head.bias": 1.5825345450853237
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 8,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 8
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L8_seed8",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file
diff --git a/results/fa_dfa_d512_L8_seed9/results_cifar10.json b/results/fa_dfa_d512_L8_seed9/results_cifar10.json
new file mode 100644
index 0000000..b70df2d
--- /dev/null
+++ b/results/fa_dfa_d512_L8_seed9/results_cifar10.json
@@ -0,0 +1,881 @@
+{
+ "9": {
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 2.0749315245056152,
+ 2.0436887923812868,
+ 2.0410750790023804,
+ 2.0344971646881103,
+ 2.030566175842285,
+ 2.0275797971343996,
+ 2.0244680599212646,
+ 2.024917350387573,
+ 2.027327563819885,
+ 2.018273809814453,
+ 2.0191563272094726,
+ 2.0152392296600343,
+ 2.0181246972274782,
+ 2.0139052614974977,
+ 2.011121483078003,
+ 2.007120831375122,
+ 2.009763352279663,
+ 2.010490125274658,
+ 2.0083817123413086,
+ 2.010725986175537,
+ 2.006365931854248,
+ 2.0057907654571534,
+ 2.006084675445557,
+ 2.0068956647491456,
+ 2.0047656631851196,
+ 2.005573451766968,
+ 2.003518327026367,
+ 2.003638141479492,
+ 2.0002300247192384,
+ 2.001433812713623,
+ 2.004064089279175,
+ 2.001738834075928,
+ 1.999301480255127,
+ 2.0016421214294433,
+ 1.9995300130462645,
+ 1.9971350913238526,
+ 2.002013059692383,
+ 2.000174626083374,
+ 1.9979997787475585,
+ 1.9982461312103272,
+ 1.9966425020599365,
+ 1.9979894361114503,
+ 1.9972480834579467,
+ 1.9982498080444335,
+ 1.9934808911514281,
+ 1.9956281592559815,
+ 1.99603251953125,
+ 1.9947702381134034,
+ 1.9941040051269532,
+ 1.9968071872329711,
+ 1.9977976748657227,
+ 1.9950814849853515,
+ 1.996515994567871,
+ 1.9940743815612794,
+ 1.9959130297470093,
+ 1.9953608598327637,
+ 1.995676156616211,
+ 1.9918495462036132,
+ 1.9927745639801024,
+ 1.9944643795013428,
+ 1.9957545782852173,
+ 1.99323220954895,
+ 1.9948831502914428,
+ 1.99567852684021,
+ 1.9960307669830322,
+ 1.993249538116455,
+ 1.992333110961914,
+ 1.9930713208007813,
+ 1.9917521324157714,
+ 1.9922201361846923,
+ 1.990846061668396,
+ 1.9919507978057862,
+ 1.993395569152832,
+ 1.9929897798919678,
+ 1.9927783852386474,
+ 1.992465722427368,
+ 1.9913483853149414,
+ 1.9910357999420165,
+ 1.9905700182724,
+ 1.9907023773956298,
+ 1.9905259375,
+ 1.9902730661010741,
+ 1.9915288207244872,
+ 1.9918687856292725,
+ 1.9918737398910522,
+ 1.9905504457092285,
+ 1.991807127685547,
+ 1.991159518737793,
+ 1.9909059362030028,
+ 1.987746954574585,
+ 1.9886708137512208,
+ 1.991118684539795,
+ 1.9877248714447022,
+ 1.9914885107040405,
+ 1.9913817571258545,
+ 1.990808463897705,
+ 1.9888677963638306,
+ 1.9901370908355713,
+ 1.9902113021850585,
+ 1.9896661295318603
+ ],
+ "train_acc": [
+ 0.233,
+ 0.24974,
+ 0.24902,
+ 0.2555,
+ 0.2535,
+ 0.25314,
+ 0.2584,
+ 0.25894,
+ 0.25562,
+ 0.25858,
+ 0.25952,
+ 0.26262,
+ 0.2592,
+ 0.26194,
+ 0.26504,
+ 0.26518,
+ 0.26234,
+ 0.26346,
+ 0.2654,
+ 0.2614,
+ 0.2644,
+ 0.26158,
+ 0.26868,
+ 0.26646,
+ 0.26696,
+ 0.26778,
+ 0.26766,
+ 0.26614,
+ 0.2709,
+ 0.26686,
+ 0.26754,
+ 0.26956,
+ 0.27002,
+ 0.27106,
+ 0.27018,
+ 0.27268,
+ 0.26956,
+ 0.26954,
+ 0.27276,
+ 0.2717,
+ 0.27314,
+ 0.27052,
+ 0.27232,
+ 0.2731,
+ 0.27476,
+ 0.27392,
+ 0.27266,
+ 0.27468,
+ 0.27238,
+ 0.27302,
+ 0.27042,
+ 0.27416,
+ 0.27348,
+ 0.27274,
+ 0.27358,
+ 0.27494,
+ 0.27386,
+ 0.27458,
+ 0.2768,
+ 0.27558,
+ 0.27496,
+ 0.27546,
+ 0.27332,
+ 0.27632,
+ 0.27632,
+ 0.27756,
+ 0.27488,
+ 0.27564,
+ 0.27622,
+ 0.27424,
+ 0.27614,
+ 0.27482,
+ 0.27548,
+ 0.2777,
+ 0.27614,
+ 0.27582,
+ 0.27502,
+ 0.27412,
+ 0.27556,
+ 0.2774,
+ 0.27578,
+ 0.27622,
+ 0.27676,
+ 0.27774,
+ 0.27778,
+ 0.27504,
+ 0.276,
+ 0.27678,
+ 0.2769,
+ 0.27788,
+ 0.2765,
+ 0.27636,
+ 0.27804,
+ 0.27786,
+ 0.27472,
+ 0.2759,
+ 0.2799,
+ 0.27662,
+ 0.27648,
+ 0.27812
+ ],
+ "test_acc": [
+ 0.2539,
+ 0.2722,
+ 0.2611,
+ 0.265,
+ 0.2773,
+ 0.2768,
+ 0.2858,
+ 0.2962,
+ 0.2833,
+ 0.2576,
+ 0.2732,
+ 0.2985,
+ 0.2776,
+ 0.2927,
+ 0.2865,
+ 0.2838,
+ 0.2825,
+ 0.283,
+ 0.2827,
+ 0.2834,
+ 0.2795,
+ 0.2866,
+ 0.2944,
+ 0.2847,
+ 0.297,
+ 0.2895,
+ 0.2903,
+ 0.2906,
+ 0.2904,
+ 0.2849,
+ 0.3028,
+ 0.2955,
+ 0.2932,
+ 0.2846,
+ 0.2898,
+ 0.2915,
+ 0.2827,
+ 0.3025,
+ 0.2708,
+ 0.2993,
+ 0.2941,
+ 0.2879,
+ 0.3013,
+ 0.2873,
+ 0.3017,
+ 0.3012,
+ 0.2932,
+ 0.3037,
+ 0.2939,
+ 0.2959,
+ 0.2899,
+ 0.2903,
+ 0.291,
+ 0.2999,
+ 0.292,
+ 0.3001,
+ 0.3011,
+ 0.299,
+ 0.292,
+ 0.3016,
+ 0.2875,
+ 0.301,
+ 0.2948,
+ 0.2952,
+ 0.2906,
+ 0.2961,
+ 0.2975,
+ 0.2937,
+ 0.2969,
+ 0.2938,
+ 0.3015,
+ 0.2993,
+ 0.3007,
+ 0.3012,
+ 0.3012,
+ 0.2974,
+ 0.2929,
+ 0.3003,
+ 0.2973,
+ 0.2988,
+ 0.301,
+ 0.2941,
+ 0.2997,
+ 0.2995,
+ 0.2979,
+ 0.2993,
+ 0.2998,
+ 0.2963,
+ 0.3029,
+ 0.2969,
+ 0.2976,
+ 0.2979,
+ 0.2958,
+ 0.2979,
+ 0.2971,
+ 0.2974,
+ 0.2977,
+ 0.2977,
+ 0.2977,
+ 0.2977
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.3850422501564026,
+ 0.0012538384180516005,
+ 0.0006158786127343774,
+ -0.0003950851387344301,
+ 0.000671692076139152,
+ 0.00011835923214675859,
+ -0.00013412139378488064,
+ -0.0009948844090104103
+ ],
+ "perturbation_rho": [
+ -0.009509662166237831,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -3.688037395477295e-07,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -1.0244548320770264e-06,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ -1.862645149230957e-09
+ ],
+ "0.01": [
+ -3.604218363761902e-06,
+ -8.381903171539307e-09,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ -1.862645149230957e-09
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 57815.328125,
+ 1306904576.0,
+ 3130353408.0,
+ 6042526720.0,
+ 6570734080.0,
+ 7140251648.0,
+ 10449125376.0,
+ 10382031872.0,
+ 10819989504.0
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.387568827089126e-07,
+ 2.7390667511895117e-10,
+ 2.7443888828138086e-10,
+ 2.747598537578e-10,
+ 2.7503455068966787e-10,
+ 2.749568628335197e-10,
+ 2.749303285032312e-10,
+ 2.749313832151046e-10,
+ 2.7492974563614325e-10
+ ]
+ },
+ "drift": {
+ "embed.weight": 338.964341619299,
+ "embed.bias": 255.6470029656094,
+ "blocks.0.ln.weight": 9.7660440200238,
+ "blocks.0.w1.weight": 293.6860368082436,
+ "blocks.0.w1.bias": 259.7815916326078,
+ "blocks.0.w2.weight": 507.1631892613405,
+ "blocks.1.ln.weight": 8.6057770011893,
+ "blocks.1.w1.weight": 327.85104096601293,
+ "blocks.1.w1.bias": 315.96204358536954,
+ "blocks.1.w2.weight": 340.58083302512796,
+ "blocks.2.ln.weight": 9.649643863338673,
+ "blocks.2.w1.weight": 416.26890827062306,
+ "blocks.2.w1.bias": 387.4960514327406,
+ "blocks.2.w2.weight": 412.7077111053307,
+ "blocks.3.ln.weight": 8.443154226363815,
+ "blocks.3.w1.weight": 329.2601915023256,
+ "blocks.3.w1.bias": 308.6467481709907,
+ "blocks.3.w2.weight": 307.62522052468177,
+ "blocks.4.ln.weight": 8.653036768995564,
+ "blocks.4.w1.weight": 354.6654001843918,
+ "blocks.4.w1.bias": 335.86010162934014,
+ "blocks.4.w2.weight": 337.73623750534483,
+ "blocks.5.ln.weight": 11.320001722963196,
+ "blocks.5.w1.weight": 468.93827725593786,
+ "blocks.5.w1.bias": 433.59224246544227,
+ "blocks.5.w2.weight": 463.6712206215392,
+ "blocks.6.ln.weight": 8.063403696936168,
+ "blocks.6.w1.weight": 318.91137867276336,
+ "blocks.6.w1.bias": 306.65686506595944,
+ "blocks.6.w2.weight": 299.5449959675926,
+ "blocks.7.ln.weight": 8.29383434585519,
+ "blocks.7.w1.weight": 333.33394053359984,
+ "blocks.7.w1.bias": 309.27674432467256,
+ "blocks.7.w2.weight": 305.2383919884082,
+ "out_ln.weight": 0.6921153697505151,
+ "out_head.weight": 9.920257520136026,
+ "out_head.bias": 0.6756283909065531
+ }
+ },
+ "fa": {
+ "log": {
+ "train_loss": [
+ 2.054712998123169,
+ 1.9743254754638673,
+ 1.9496866632080079,
+ 1.9313411603164672,
+ 1.916562248878479,
+ 1.9087221280288695,
+ 1.8928792087173463,
+ 1.8860099752426147,
+ 1.8823522333526612,
+ 1.8718870676040649,
+ 1.8658955471038818,
+ 1.8600263873672485,
+ 1.861494415512085,
+ 1.8535768915176392,
+ 1.848890178527832,
+ 1.8419178400421143,
+ 1.843526672897339,
+ 1.8426634002685547,
+ 1.836815809020996,
+ 1.8399362145233153,
+ 1.8320682555770873,
+ 1.8253674346923827,
+ 1.8266462970733643,
+ 1.8268505213165283,
+ 1.819463009338379,
+ 1.815872594833374,
+ 1.8166155709075928,
+ 1.8089701477432252,
+ 1.8076338037490844,
+ 1.8046819745635987,
+ 1.8099775692749023,
+ 1.804258670349121,
+ 1.800314033203125,
+ 1.802857616043091,
+ 1.795948698425293,
+ 1.7952545282745362,
+ 1.7960923547363281,
+ 1.7939829620361327,
+ 1.7962147301483153,
+ 1.7921875751113892,
+ 1.7896775894927979,
+ 1.7867036238861085,
+ 1.7867207586669922,
+ 1.7872415407562257,
+ 1.782658879699707,
+ 1.780779080429077,
+ 1.7818151058959961,
+ 1.7821691263580322,
+ 1.7803733910369872,
+ 1.7798878842163086,
+ 1.7819366843032838,
+ 1.779062672958374,
+ 1.7781674340438842,
+ 1.7732916201019286,
+ 1.7719713204574585,
+ 1.7729518437957763,
+ 1.7732273389053346,
+ 1.7703840225982665,
+ 1.768649151878357,
+ 1.7698098583221435,
+ 1.7702393863296508,
+ 1.7676599294662476,
+ 1.7648498352813722,
+ 1.767660079307556,
+ 1.7671956900024415,
+ 1.7631733521270752,
+ 1.7601489026260375,
+ 1.7607763108062744,
+ 1.7581446547698976,
+ 1.7573072035980224,
+ 1.7607083935546874,
+ 1.7579230298233033,
+ 1.7577186632537842,
+ 1.7563656659317017,
+ 1.7547048263931275,
+ 1.7598694692230226,
+ 1.757342479248047,
+ 1.7562967572021484,
+ 1.7538646789169312,
+ 1.7546308361816407,
+ 1.753781385269165,
+ 1.7569293072128296,
+ 1.7535972839736937,
+ 1.756253225708008,
+ 1.752462748413086,
+ 1.750845112876892,
+ 1.7530096230316161,
+ 1.753320961074829,
+ 1.751194098739624,
+ 1.7476333127593995,
+ 1.7507887158966065,
+ 1.75599822265625,
+ 1.7483607616424561,
+ 1.750292029685974,
+ 1.7498931618881226,
+ 1.7509470245742798,
+ 1.7501217209243773,
+ 1.7520614090728759,
+ 1.750200431213379,
+ 1.7495508447265624
+ ],
+ "train_acc": [
+ 0.23466,
+ 0.27712,
+ 0.2872,
+ 0.29944,
+ 0.30162,
+ 0.30732,
+ 0.31528,
+ 0.31818,
+ 0.31696,
+ 0.32156,
+ 0.32474,
+ 0.32968,
+ 0.32702,
+ 0.33288,
+ 0.33656,
+ 0.33372,
+ 0.33702,
+ 0.33554,
+ 0.33892,
+ 0.33848,
+ 0.34342,
+ 0.34664,
+ 0.34692,
+ 0.34766,
+ 0.34448,
+ 0.3486,
+ 0.346,
+ 0.35034,
+ 0.35144,
+ 0.35384,
+ 0.35184,
+ 0.35544,
+ 0.35718,
+ 0.35464,
+ 0.35864,
+ 0.358,
+ 0.35758,
+ 0.35774,
+ 0.35722,
+ 0.36054,
+ 0.35958,
+ 0.36134,
+ 0.36046,
+ 0.3609,
+ 0.3655,
+ 0.36438,
+ 0.36688,
+ 0.36438,
+ 0.36392,
+ 0.36454,
+ 0.36186,
+ 0.36844,
+ 0.36298,
+ 0.3657,
+ 0.36936,
+ 0.3674,
+ 0.36656,
+ 0.36678,
+ 0.3693,
+ 0.36684,
+ 0.36768,
+ 0.36934,
+ 0.36892,
+ 0.37064,
+ 0.37,
+ 0.3704,
+ 0.37166,
+ 0.37122,
+ 0.3729,
+ 0.37234,
+ 0.37368,
+ 0.37366,
+ 0.37094,
+ 0.3736,
+ 0.3747,
+ 0.37176,
+ 0.37262,
+ 0.37366,
+ 0.374,
+ 0.37594,
+ 0.37354,
+ 0.374,
+ 0.37318,
+ 0.3751,
+ 0.375,
+ 0.3755,
+ 0.3763,
+ 0.3747,
+ 0.37738,
+ 0.3774,
+ 0.37554,
+ 0.37554,
+ 0.37726,
+ 0.37876,
+ 0.3747,
+ 0.37554,
+ 0.37696,
+ 0.37494,
+ 0.37724,
+ 0.37714
+ ],
+ "test_acc": [
+ 0.2763,
+ 0.3104,
+ 0.3093,
+ 0.3052,
+ 0.3209,
+ 0.3254,
+ 0.3374,
+ 0.3255,
+ 0.3509,
+ 0.3324,
+ 0.3485,
+ 0.3621,
+ 0.3572,
+ 0.3522,
+ 0.3638,
+ 0.3522,
+ 0.3562,
+ 0.3678,
+ 0.3662,
+ 0.3637,
+ 0.3713,
+ 0.372,
+ 0.3708,
+ 0.3802,
+ 0.3793,
+ 0.3859,
+ 0.3685,
+ 0.3665,
+ 0.3765,
+ 0.3782,
+ 0.3917,
+ 0.3905,
+ 0.3833,
+ 0.3867,
+ 0.3788,
+ 0.3843,
+ 0.3776,
+ 0.3794,
+ 0.3847,
+ 0.383,
+ 0.3929,
+ 0.392,
+ 0.392,
+ 0.3834,
+ 0.3857,
+ 0.3926,
+ 0.3914,
+ 0.387,
+ 0.3899,
+ 0.3859,
+ 0.3952,
+ 0.3883,
+ 0.3936,
+ 0.3901,
+ 0.3938,
+ 0.3899,
+ 0.3887,
+ 0.3938,
+ 0.3882,
+ 0.395,
+ 0.3878,
+ 0.3907,
+ 0.3952,
+ 0.3954,
+ 0.393,
+ 0.3931,
+ 0.3952,
+ 0.3999,
+ 0.3926,
+ 0.3963,
+ 0.3966,
+ 0.3973,
+ 0.3991,
+ 0.4007,
+ 0.3957,
+ 0.399,
+ 0.3988,
+ 0.3977,
+ 0.3955,
+ 0.395,
+ 0.3975,
+ 0.3971,
+ 0.3999,
+ 0.4003,
+ 0.3973,
+ 0.3988,
+ 0.3966,
+ 0.3972,
+ 0.4,
+ 0.3995,
+ 0.4008,
+ 0.3991,
+ 0.3981,
+ 0.3983,
+ 0.4006,
+ 0.3998,
+ 0.4001,
+ 0.4001,
+ 0.3992,
+ 0.3992
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.009833792224526405,
+ 0.05025029927492142,
+ 0.011137298308312893,
+ -0.039807502180337906,
+ -0.012322505004703999,
+ -0.023192159831523895,
+ 0.01924338936805725,
+ 0.9853411912918091
+ ],
+ "perturbation_rho": [
+ 0.009103747084736824,
+ -0.019891876727342606,
+ -0.013953445479273796,
+ -0.03727542236447334,
+ 0.01967245526611805,
+ 0.006721112877130508,
+ 0.013294404372572899,
+ 0.0555168092250824
+ ],
+ "nudging": {
+ "0.001": [
+ -6.459013093262911e-07,
+ -2.127781044691801e-07,
+ -3.3585820347070694e-08,
+ 5.916808731853962e-08,
+ 5.3318217396736145e-08,
+ 1.6763806343078613e-08,
+ 2.9802322387695312e-08,
+ -1.0665098670870066e-06
+ ],
+ "0.003": [
+ -1.868669642135501e-06,
+ -6.879272405058146e-07,
+ -5.1280949264764786e-08,
+ 2.151064109057188e-07,
+ 3.4226104617118835e-08,
+ 5.404581315815449e-08,
+ -1.3181124813854694e-07,
+ -3.7869031075388193e-06
+ ],
+ "0.01": [
+ -6.252725142985582e-06,
+ -2.3539469111710787e-06,
+ -1.600128598511219e-07,
+ 5.455221980810165e-07,
+ 1.2578675523400307e-07,
+ 2.0262086763978004e-07,
+ -3.8879807107150555e-07,
+ -1.3263634173199534e-05
+ ]
+ },
+ "hidden_norms_per_layer": [
+ 9282.8193359375,
+ 113090.3984375,
+ 1271631.0,
+ 1386270.5,
+ 1726514.25,
+ 1932736.5,
+ 2234588.0,
+ 2239280.5,
+ 1909485.875
+ ],
+ "bp_grad_norms_per_layer": [
+ 2.4564533305238e-05,
+ 1.5358415339505882e-06,
+ 5.894418677598878e-07,
+ 5.805990781482251e-07,
+ 5.811043592984788e-07,
+ 5.801387032988714e-07,
+ 5.800572466796439e-07,
+ 5.741365498579398e-07,
+ 5.41225858796679e-07
+ ]
+ },
+ "drift": {
+ "embed.weight": 57.80682284068776,
+ "embed.bias": 15.88198491033531,
+ "blocks.0.ln.weight": 1.158570355271808,
+ "blocks.0.w1.weight": 17.652543030292062,
+ "blocks.0.w1.bias": 13.648960422025777,
+ "blocks.0.w2.weight": 62.30914257814011,
+ "blocks.1.ln.weight": 0.9953869430578365,
+ "blocks.1.w1.weight": 25.201451590168976,
+ "blocks.1.w1.bias": 21.042828750553948,
+ "blocks.1.w2.weight": 46.84933855533669,
+ "blocks.2.ln.weight": 0.5242024488499516,
+ "blocks.2.w1.weight": 18.18564601499141,
+ "blocks.2.w1.bias": 16.795126625795433,
+ "blocks.2.w2.weight": 37.67242795528057,
+ "blocks.3.ln.weight": 0.6028075449266447,
+ "blocks.3.w1.weight": 22.217273085841715,
+ "blocks.3.w1.bias": 23.55749047158996,
+ "blocks.3.w2.weight": 31.067562938269838,
+ "blocks.4.ln.weight": 0.5882814647513678,
+ "blocks.4.w1.weight": 19.789776992865228,
+ "blocks.4.w1.bias": 19.617707942323612,
+ "blocks.4.w2.weight": 40.08965442413366,
+ "blocks.5.ln.weight": 0.6533636599582089,
+ "blocks.5.w1.weight": 22.778934917253743,
+ "blocks.5.w1.bias": 23.182695560236425,
+ "blocks.5.w2.weight": 28.95545561908214,
+ "blocks.6.ln.weight": 0.5059341920781962,
+ "blocks.6.w1.weight": 15.068172940394142,
+ "blocks.6.w1.bias": 10.870554507643952,
+ "blocks.6.w2.weight": 64.80351856105872,
+ "blocks.7.ln.weight": 0.5640754334192314,
+ "blocks.7.w1.weight": 17.02027959939073,
+ "blocks.7.w1.bias": 12.619662219928584,
+ "blocks.7.w2.weight": 75.96627478247885,
+ "out_ln.weight": 0.44409435398278874,
+ "out_head.weight": 7.673916065937935,
+ "out_head.bias": 0.9250628284434071
+ }
+ }
+ },
+ "config": {
+ "dataset": "cifar10",
+ "d_hidden": 512,
+ "num_blocks": 8,
+ "batch_size": 128,
+ "epochs": 100,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 9
+ ],
+ "gpu": 0,
+ "output_dir": "results/fa_dfa_d512_L8_seed9",
+ "methods": [
+ "fa",
+ "dfa"
+ ],
+ "random_targets": false,
+ "penalty_lam": 0.0,
+ "num_classes": 10
+ }
+} \ No newline at end of file