summaryrefslogtreecommitdiff
path: root/results/smoke_test2
diff options
context:
space:
mode:
authorYurenHao0426 <Blackhao0426@gmail.com>2026-03-23 18:21:26 -0500
committerYurenHao0426 <Blackhao0426@gmail.com>2026-03-23 18:21:26 -0500
commit6ed4fa50ddfa4c7957aaa909aaf72f0d7d317712 (patch)
treed7c63adcd19c4f5d46c8a937e5047fece55dea62 /results/smoke_test2
Initial implementation: all models, methods, toy and CIFAR experiments
Debug phase. Toy LQ experiments (3 seeds) complete with terminal gradient matching. Credit bridge matches state bridge on linear system (~0.94 cosine). CIFAR experiments in progress.
Diffstat (limited to 'results/smoke_test2')
-rw-r--r--results/smoke_test2/results_fashionmnist.json721
1 files changed, 721 insertions, 0 deletions
diff --git a/results/smoke_test2/results_fashionmnist.json b/results/smoke_test2/results_fashionmnist.json
new file mode 100644
index 0000000..e4b5d70
--- /dev/null
+++ b/results/smoke_test2/results_fashionmnist.json
@@ -0,0 +1,721 @@
+{
+ "42": {
+ "bp": {
+ "log": {
+ "train_loss": [
+ 0.7028828698158264,
+ 0.5380959739049276,
+ 0.4825267461776733,
+ 0.45401095023155214,
+ 0.4324853138923645,
+ 0.41239778510729475,
+ 0.39849929070472717,
+ 0.38395428166389467,
+ 0.3671353324095408,
+ 0.3565144721984863,
+ 0.3396712650140127,
+ 0.32847159377733864,
+ 0.3156646738688151,
+ 0.3063636976877848,
+ 0.29311317729949954,
+ 0.281319753352801,
+ 0.27345897892316184,
+ 0.26703561499913536,
+ 0.26238394471009574,
+ 0.2581080759366353
+ ],
+ "train_acc": [
+ 0.73755,
+ 0.7990666666666667,
+ 0.8184166666666667,
+ 0.8286,
+ 0.8373333333333334,
+ 0.8439833333333333,
+ 0.8508166666666667,
+ 0.85665,
+ 0.8617666666666667,
+ 0.86675,
+ 0.87145,
+ 0.8759333333333333,
+ 0.87975,
+ 0.8850666666666667,
+ 0.8894166666666666,
+ 0.8943333333333333,
+ 0.8967166666666667,
+ 0.8991833333333333,
+ 0.90125,
+ 0.9020333333333334
+ ],
+ "test_acc": [
+ 0.7939,
+ 0.8145,
+ 0.8348,
+ 0.8457,
+ 0.8515,
+ 0.8507,
+ 0.86,
+ 0.8616,
+ 0.8629,
+ 0.867,
+ 0.8749,
+ 0.8758,
+ 0.8749,
+ 0.8826,
+ 0.8871,
+ 0.8893,
+ 0.8887,
+ 0.8914,
+ 0.8918,
+ 0.8933
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 1.0,
+ 1.0,
+ 1.0,
+ 1.0,
+ 1.0,
+ 1.0,
+ 1.0,
+ 1.0
+ ],
+ "perturbation_rho": [
+ 0.999779462814331,
+ 0.9997812509536743,
+ 0.9997234344482422,
+ 0.9995989799499512,
+ 0.9995447993278503,
+ 0.999480128288269,
+ 0.999315083026886,
+ 0.9990702271461487
+ ],
+ "nudging": {
+ "0.001": [
+ -0.0015512802638113499,
+ -0.0014223953476175666,
+ -0.0012828018516302109,
+ -0.0011290921829640865,
+ -0.0010026510572060943,
+ -0.0008882835973054171,
+ -0.0007996053318493068,
+ -0.0007174870697781444
+ ],
+ "0.003": [
+ -0.00463186064735055,
+ -0.004248819313943386,
+ -0.003833592403680086,
+ -0.003375905565917492,
+ -0.0029987930320203304,
+ -0.0026576737873256207,
+ -0.002393170492723584,
+ -0.0021477085538208485
+ ],
+ "0.01": [
+ -0.015182608738541603,
+ -0.013949813321232796,
+ -0.012605881318449974,
+ -0.011121007613837719,
+ -0.009890388697385788,
+ -0.008775115013122559,
+ -0.007910609245300293,
+ -0.0071043153293430805
+ ]
+ }
+ },
+ "drift": {
+ "embed.weight": 2.2156592696361637,
+ "embed.bias": 1.1767430604557616,
+ "blocks.0.ln.weight": 0.10304339230060577,
+ "blocks.0.w1.weight": 1.482853423570453,
+ "blocks.0.w1.bias": 1.6160260440173806,
+ "blocks.0.w2.weight": 4.957564419601416,
+ "blocks.1.ln.weight": 0.10342221707105637,
+ "blocks.1.w1.weight": 1.4925957913309882,
+ "blocks.1.w1.bias": 1.387226988584735,
+ "blocks.1.w2.weight": 4.8677799360581435,
+ "blocks.2.ln.weight": 0.10164804011583328,
+ "blocks.2.w1.weight": 1.5187247212963315,
+ "blocks.2.w1.bias": 1.315003727574603,
+ "blocks.2.w2.weight": 4.892590160655272,
+ "blocks.3.ln.weight": 0.0980648621916771,
+ "blocks.3.w1.weight": 1.5309313086145322,
+ "blocks.3.w1.bias": 1.183681086835403,
+ "blocks.3.w2.weight": 4.853244692493399,
+ "blocks.4.ln.weight": 0.10139396041631699,
+ "blocks.4.w1.weight": 1.4821627671323643,
+ "blocks.4.w1.bias": 1.3175104220848128,
+ "blocks.4.w2.weight": 4.605731857817927,
+ "blocks.5.ln.weight": 0.11211488395929337,
+ "blocks.5.w1.weight": 1.4651151472668016,
+ "blocks.5.w1.bias": 1.2329307722821297,
+ "blocks.5.w2.weight": 4.4285985689227365,
+ "blocks.6.ln.weight": 0.10427453368902206,
+ "blocks.6.w1.weight": 1.4545050310842818,
+ "blocks.6.w1.bias": 1.2120760166211149,
+ "blocks.6.w2.weight": 4.21210005778474,
+ "blocks.7.ln.weight": 0.10458954423666,
+ "blocks.7.w1.weight": 1.4114832006479092,
+ "blocks.7.w1.bias": 1.3654059522290227,
+ "blocks.7.w2.weight": 4.118203814443419,
+ "out_ln.weight": 0.07383835315704346,
+ "out_head.weight": 0.9730150380688232,
+ "out_head.bias": 0.5616520351930565
+ }
+ },
+ "dfa": {
+ "log": {
+ "train_loss": [
+ 1.4100907169977823,
+ 1.436597176615397,
+ 1.4409521081288656,
+ 1.4391240961710612,
+ 1.431752833366394,
+ 1.4278661415100098,
+ 1.42600347849528,
+ 1.4258823367436726,
+ 1.4264747858683269,
+ 1.4247130299250286,
+ 1.4233300720850626,
+ 1.4235623852411905,
+ 1.423257282193502,
+ 1.4203537824630736,
+ 1.4199624996821085,
+ 1.4181565198898316,
+ 1.4212706031799316,
+ 1.4193874025344848,
+ 1.4183136430740357,
+ 1.4177714852015177
+ ],
+ "train_acc": [
+ 0.4805,
+ 0.4955833333333333,
+ 0.48483333333333334,
+ 0.48445,
+ 0.48668333333333336,
+ 0.48556666666666665,
+ 0.48233333333333334,
+ 0.48796666666666666,
+ 0.48468333333333335,
+ 0.48793333333333333,
+ 0.49155,
+ 0.49241666666666667,
+ 0.49341666666666667,
+ 0.4975,
+ 0.49946666666666667,
+ 0.5013666666666666,
+ 0.5008166666666667,
+ 0.5015833333333334,
+ 0.5064833333333333,
+ 0.5055166666666666
+ ],
+ "test_acc": [
+ 0.5175,
+ 0.5227,
+ 0.4511,
+ 0.5054,
+ 0.5238,
+ 0.4415,
+ 0.4969,
+ 0.5415,
+ 0.5148,
+ 0.463,
+ 0.5415,
+ 0.479,
+ 0.5198,
+ 0.5259,
+ 0.5167,
+ 0.5293,
+ 0.5023,
+ 0.513,
+ 0.5234,
+ 0.5205
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.18807855248451233,
+ 0.003098251298069954,
+ -0.0009170115226879716,
+ -0.0030592959374189377,
+ 0.001578816445544362,
+ -0.002526880707591772,
+ -0.0002142013981938362,
+ 0.002638747449964285
+ ],
+ "perturbation_rho": [
+ -0.02331582084298134,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.008287552744150162
+ ],
+ "nudging": {
+ "0.001": [
+ -1.016305759549141e-06,
+ 0.0,
+ 0.0,
+ 9.313225746154785e-10,
+ 0.0,
+ -1.862645149230957e-09,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -3.2687094062566757e-06,
+ 0.0,
+ 0.0,
+ -1.3969838619232178e-09,
+ -9.313225746154785e-10,
+ -2.3283064365386963e-09,
+ -6.984919309616089e-10,
+ 0.0
+ ],
+ "0.01": [
+ -1.1139316484332085e-05,
+ 9.313225746154785e-10,
+ 9.313225746154785e-10,
+ 6.05359673500061e-09,
+ -9.313225746154785e-10,
+ -1.3969838619232178e-09,
+ 1.1641532182693481e-09,
+ -9.313225746154785e-10
+ ]
+ }
+ },
+ "drift": {
+ "embed.weight": 118.42757824386928,
+ "embed.bias": 91.53070095956268,
+ "blocks.0.ln.weight": 6.256274700164795,
+ "blocks.0.w1.weight": 139.8976340759611,
+ "blocks.0.w1.bias": 140.08208587154786,
+ "blocks.0.w2.weight": 332.94308388138757,
+ "blocks.1.ln.weight": 5.70231819152832,
+ "blocks.1.w1.weight": 129.75151109455834,
+ "blocks.1.w1.bias": 121.17432552115143,
+ "blocks.1.w2.weight": 248.7841682293662,
+ "blocks.2.ln.weight": 5.466833114624023,
+ "blocks.2.w1.weight": 124.61655550824435,
+ "blocks.2.w1.bias": 116.83509018118278,
+ "blocks.2.w2.weight": 237.4413053793645,
+ "blocks.3.ln.weight": 5.473834037780762,
+ "blocks.3.w1.weight": 130.23769942327985,
+ "blocks.3.w1.bias": 111.35214850610642,
+ "blocks.3.w2.weight": 253.67927391219192,
+ "blocks.4.ln.weight": 5.401853084564209,
+ "blocks.4.w1.weight": 138.8132811262774,
+ "blocks.4.w1.bias": 138.7305587293139,
+ "blocks.4.w2.weight": 257.0171443869646,
+ "blocks.5.ln.weight": 5.765963554382324,
+ "blocks.5.w1.weight": 155.34176828036547,
+ "blocks.5.w1.bias": 142.51555937014615,
+ "blocks.5.w2.weight": 294.4127013869835,
+ "blocks.6.ln.weight": 5.595874309539795,
+ "blocks.6.w1.weight": 145.1627005952125,
+ "blocks.6.w1.bias": 132.11466551434717,
+ "blocks.6.w2.weight": 265.955180270204,
+ "blocks.7.ln.weight": 5.609050750732422,
+ "blocks.7.w1.weight": 147.06679411967872,
+ "blocks.7.w1.bias": 140.14577658787653,
+ "blocks.7.w2.weight": 258.1442232900185,
+ "out_ln.weight": 1.0355137586593628,
+ "out_head.weight": 8.13612662750539,
+ "out_head.bias": 0.9719656640023888
+ }
+ },
+ "state_bridge": {
+ "log": {
+ "train_loss": [
+ 1.7336509002049765,
+ 1.5758645205815633,
+ 1.9318606907526652,
+ 2.191984078470866,
+ 1.8327842120488484,
+ 1.8950384799957276,
+ 2.377087445449829,
+ 2.1417452257792156,
+ 1.9545116751352947,
+ 2.1638838397979736,
+ 2.023908238474528,
+ 2.336049980545044,
+ 1.9471053196589152,
+ 1.7918469515482585,
+ 1.7594309553146363,
+ 1.7820972992579143,
+ 1.7949362015406292,
+ 1.7877578330993653,
+ 1.7872745515823365,
+ 1.7890100787480672
+ ],
+ "train_acc": [
+ 0.32705,
+ 0.35613333333333336,
+ 0.25498333333333334,
+ 0.14148333333333332,
+ 0.23143333333333332,
+ 0.23958333333333334,
+ 0.07806666666666667,
+ 0.16218333333333335,
+ 0.20036666666666667,
+ 0.1637,
+ 0.23035,
+ 0.13533333333333333,
+ 0.21553333333333333,
+ 0.25556666666666666,
+ 0.29751666666666665,
+ 0.32916666666666666,
+ 0.3303333333333333,
+ 0.33866666666666667,
+ 0.33645,
+ 0.3411
+ ],
+ "test_acc": [
+ 0.4036,
+ 0.3937,
+ 0.2369,
+ 0.2137,
+ 0.201,
+ 0.0282,
+ 0.1012,
+ 0.1994,
+ 0.1525,
+ 0.1935,
+ 0.136,
+ 0.1863,
+ 0.2135,
+ 0.287,
+ 0.3171,
+ 0.3199,
+ 0.3193,
+ 0.309,
+ 0.3431,
+ 0.3284
+ ],
+ "state_pred_error": [
+ 5076234.573784879,
+ 451395873.88586664,
+ 9768031180.663467,
+ 22542289907.438934,
+ 14961824114.824533,
+ 30110774233.224533,
+ 11799262674.944,
+ 64511141264.315735,
+ 46851419274.717865,
+ 25131605056.98987,
+ 35883748757.77707,
+ 48375831022.7968,
+ 69333597113.5488,
+ 40594997328.827736,
+ 42734315291.71627,
+ 38351667650.01387,
+ 32476750280.567467,
+ 28322754527.232,
+ 25330875547.648,
+ 23083374417.237335
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.1429949849843979,
+ 0.3180725574493408,
+ 0.20951706171035767,
+ 0.2099049985408783,
+ 0.2332497388124466,
+ 0.18252244591712952,
+ 0.16670912504196167,
+ 0.19962334632873535
+ ],
+ "perturbation_rho": [
+ 0.22952523827552795,
+ 0.06709301471710205,
+ 0.0,
+ 0.02129420079290867,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -2.1068379282951355e-05,
+ -8.009374141693115e-08,
+ 1.862645149230957e-09,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -6.297603249549866e-05,
+ -1.4621764421463013e-07,
+ 2.7939677238464355e-09,
+ -7.450580596923828e-09,
+ 5.587935447692871e-09,
+ 0.0,
+ 9.313225746154785e-10,
+ 0.0
+ ],
+ "0.01": [
+ -0.00020940043032169342,
+ -4.3585896492004395e-07,
+ 0.0,
+ -5.587935447692871e-09,
+ 8.381903171539307e-09,
+ 0.0,
+ -4.6566128730773926e-09,
+ 0.0
+ ]
+ }
+ },
+ "drift": {
+ "embed.weight": 10.2616062216752,
+ "embed.bias": 9.637697583673647,
+ "blocks.0.ln.weight": 2.2154791355133057,
+ "blocks.0.w1.weight": 28.129394231034535,
+ "blocks.0.w1.bias": 29.44761280088024,
+ "blocks.0.w2.weight": 83.23565733006456,
+ "blocks.1.ln.weight": 1.6048834323883057,
+ "blocks.1.w1.weight": 72.41049473988551,
+ "blocks.1.w1.bias": 83.01353544678275,
+ "blocks.1.w2.weight": 118.5292924947854,
+ "blocks.2.ln.weight": 1.528304100036621,
+ "blocks.2.w1.weight": 22.974203066797124,
+ "blocks.2.w1.bias": 24.217216068101656,
+ "blocks.2.w2.weight": 45.236771862572276,
+ "blocks.3.ln.weight": 2.2928571701049805,
+ "blocks.3.w1.weight": 48.83060261437247,
+ "blocks.3.w1.bias": 44.264646679202606,
+ "blocks.3.w2.weight": 77.90843201854017,
+ "blocks.4.ln.weight": 1.569786548614502,
+ "blocks.4.w1.weight": 38.25042383023526,
+ "blocks.4.w1.bias": 46.7989781903236,
+ "blocks.4.w2.weight": 61.850484751580424,
+ "blocks.5.ln.weight": 1.3877884149551392,
+ "blocks.5.w1.weight": 25.667927717045547,
+ "blocks.5.w1.bias": 27.47666495212993,
+ "blocks.5.w2.weight": 56.74081708072433,
+ "blocks.6.ln.weight": 2.3657233715057373,
+ "blocks.6.w1.weight": 68.78499791753522,
+ "blocks.6.w1.bias": 66.7112278827284,
+ "blocks.6.w2.weight": 88.32659623386007,
+ "blocks.7.ln.weight": 1.5114411115646362,
+ "blocks.7.w1.weight": 40.81772220069051,
+ "blocks.7.w1.bias": 45.46224641191049,
+ "blocks.7.w2.weight": 78.43904162151014,
+ "out_ln.weight": 0.27722063660621643,
+ "out_head.weight": 3.4145406581484017,
+ "out_head.bias": 1.9963410657002738
+ }
+ },
+ "credit_bridge": {
+ "log": {
+ "train_loss": [
+ 1.4013389415105184,
+ 1.4349893891016643,
+ 1.442590291341146,
+ 1.4306699399312337,
+ 1.4177788179397584,
+ 1.4436163345336914,
+ 1.5327887171427408,
+ 1.5964955659866333,
+ 1.6370685063680013,
+ 1.6712419691085816,
+ 1.6618358172098795,
+ 1.6343142255147298,
+ 1.6132340278625488,
+ 1.5844669729232788,
+ 1.5616684883117675,
+ 1.541010483233134,
+ 1.525136720085144,
+ 1.5156665041605633,
+ 1.5106076243718465,
+ 1.5088000661214194
+ ],
+ "train_acc": [
+ 0.4805333333333333,
+ 0.48885,
+ 0.4822,
+ 0.48246666666666665,
+ 0.48988333333333334,
+ 0.4848,
+ 0.4645166666666667,
+ 0.44521666666666665,
+ 0.4347,
+ 0.4149833333333333,
+ 0.41485,
+ 0.42995,
+ 0.4308,
+ 0.43275,
+ 0.44,
+ 0.4475,
+ 0.44975,
+ 0.45098333333333335,
+ 0.4527333333333333,
+ 0.45531666666666665
+ ],
+ "test_acc": [
+ 0.4889,
+ 0.4758,
+ 0.4915,
+ 0.5234,
+ 0.5192,
+ 0.4658,
+ 0.4303,
+ 0.4477,
+ 0.4427,
+ 0.4192,
+ 0.4341,
+ 0.4369,
+ 0.4415,
+ 0.4713,
+ 0.4554,
+ 0.4785,
+ 0.4729,
+ 0.47,
+ 0.4671,
+ 0.4741
+ ],
+ "value_loss": [
+ 0.4515702169219653,
+ 0.10145944621364275,
+ 0.0791723535656929,
+ 0.06821208957632383,
+ 0.048990531079967814,
+ 0.04999728363355001,
+ 0.0479251907547315,
+ 0.04561474083264669,
+ 0.03763539131681124,
+ 0.040053354968627296,
+ 0.03465893845955531,
+ 0.029476907690366108,
+ 0.022895141302545864,
+ 0.020180800672868888,
+ 0.01736196913222472,
+ 0.012846514955163002,
+ 0.009126213994373878,
+ 0.008726185441513856,
+ 0.008221083876490592,
+ 0.007363871405273676
+ ]
+ },
+ "diagnostics": {
+ "bp_cosine": [
+ 0.15763823688030243,
+ 0.11252982914447784,
+ 0.23011641204357147,
+ 0.21486178040504456,
+ 0.20616328716278076,
+ 0.2209414690732956,
+ 0.22547751665115356,
+ 0.21971024572849274
+ ],
+ "perturbation_rho": [
+ 0.022450335323810577,
+ 0.03968421369791031,
+ -0.010952746495604515,
+ -0.02441849745810032,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "nudging": {
+ "0.001": [
+ -2.5480985641479492e-06,
+ -3.864988684654236e-08,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.003": [
+ -7.3623377829790115e-06,
+ -4.377216100692749e-08,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ],
+ "0.01": [
+ -2.4445587769150734e-05,
+ -8.102506399154663e-08,
+ -1.862645149230957e-09,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0
+ ]
+ }
+ },
+ "drift": {
+ "embed.weight": 56.40174417770624,
+ "embed.bias": 40.65522803248636,
+ "blocks.0.ln.weight": 3.5452845096588135,
+ "blocks.0.w1.weight": 60.29274021947322,
+ "blocks.0.w1.bias": 39.86138087773039,
+ "blocks.0.w2.weight": 158.13629039802228,
+ "blocks.1.ln.weight": 4.1766037940979,
+ "blocks.1.w1.weight": 97.47012476867003,
+ "blocks.1.w1.bias": 94.45373800951319,
+ "blocks.1.w2.weight": 182.69479749878457,
+ "blocks.2.ln.weight": 3.4605705738067627,
+ "blocks.2.w1.weight": 73.73554694789455,
+ "blocks.2.w1.bias": 70.46643111415602,
+ "blocks.2.w2.weight": 154.24816844331818,
+ "blocks.3.ln.weight": 3.458436965942383,
+ "blocks.3.w1.weight": 81.09956449846064,
+ "blocks.3.w1.bias": 70.54884627062913,
+ "blocks.3.w2.weight": 164.27884471330486,
+ "blocks.4.ln.weight": 3.470608949661255,
+ "blocks.4.w1.weight": 84.05857705229029,
+ "blocks.4.w1.bias": 85.61940602638978,
+ "blocks.4.w2.weight": 160.78897537314361,
+ "blocks.5.ln.weight": 3.7341277599334717,
+ "blocks.5.w1.weight": 97.50908269356889,
+ "blocks.5.w1.bias": 91.11903815503673,
+ "blocks.5.w2.weight": 187.97349614569833,
+ "blocks.6.ln.weight": 3.5159947872161865,
+ "blocks.6.w1.weight": 91.22877884822417,
+ "blocks.6.w1.bias": 83.71561937882119,
+ "blocks.6.w2.weight": 167.99172531428107,
+ "blocks.7.ln.weight": 3.4693915843963623,
+ "blocks.7.w1.weight": 88.21253603229287,
+ "blocks.7.w1.bias": 85.04756223959555,
+ "blocks.7.w2.weight": 156.73218429415434,
+ "out_ln.weight": 0.8287110924720764,
+ "out_head.weight": 7.02544389908901,
+ "out_head.bias": 2.6789805309811348
+ }
+ }
+ },
+ "config": {
+ "dataset": "fashionmnist",
+ "d_hidden": 256,
+ "num_blocks": 8,
+ "batch_size": 128,
+ "epochs": 20,
+ "lr": 0.001,
+ "lr_fb": 0.001,
+ "wd": 0.01,
+ "lam": 0.1,
+ "K": 4,
+ "sigma_bridge": 0.05,
+ "ema_momentum": 0.995,
+ "term_grad_weight": 1.0,
+ "seeds": [
+ 42
+ ],
+ "gpu": 0,
+ "output_dir": "results/smoke_test2",
+ "num_classes": 10
+ }
+} \ No newline at end of file