diff options
| author | YurenHao0426 <Blackhao0426@gmail.com> | 2026-03-23 18:21:26 -0500 |
|---|---|---|
| committer | YurenHao0426 <Blackhao0426@gmail.com> | 2026-03-23 18:21:26 -0500 |
| commit | 6ed4fa50ddfa4c7957aaa909aaf72f0d7d317712 (patch) | |
| tree | d7c63adcd19c4f5d46c8a937e5047fece55dea62 /results/smoke_test2/results_fashionmnist.json | |
Initial implementation: all models, methods, toy and CIFAR experiments
Debug phase. Toy LQ experiments (3 seeds) complete with terminal gradient matching.
Credit bridge matches state bridge on linear system (~0.94 cosine).
CIFAR experiments in progress.
Diffstat (limited to 'results/smoke_test2/results_fashionmnist.json')
| -rw-r--r-- | results/smoke_test2/results_fashionmnist.json | 721 |
1 files changed, 721 insertions, 0 deletions
diff --git a/results/smoke_test2/results_fashionmnist.json b/results/smoke_test2/results_fashionmnist.json new file mode 100644 index 0000000..e4b5d70 --- /dev/null +++ b/results/smoke_test2/results_fashionmnist.json @@ -0,0 +1,721 @@ +{ + "42": { + "bp": { + "log": { + "train_loss": [ + 0.7028828698158264, + 0.5380959739049276, + 0.4825267461776733, + 0.45401095023155214, + 0.4324853138923645, + 0.41239778510729475, + 0.39849929070472717, + 0.38395428166389467, + 0.3671353324095408, + 0.3565144721984863, + 0.3396712650140127, + 0.32847159377733864, + 0.3156646738688151, + 0.3063636976877848, + 0.29311317729949954, + 0.281319753352801, + 0.27345897892316184, + 0.26703561499913536, + 0.26238394471009574, + 0.2581080759366353 + ], + "train_acc": [ + 0.73755, + 0.7990666666666667, + 0.8184166666666667, + 0.8286, + 0.8373333333333334, + 0.8439833333333333, + 0.8508166666666667, + 0.85665, + 0.8617666666666667, + 0.86675, + 0.87145, + 0.8759333333333333, + 0.87975, + 0.8850666666666667, + 0.8894166666666666, + 0.8943333333333333, + 0.8967166666666667, + 0.8991833333333333, + 0.90125, + 0.9020333333333334 + ], + "test_acc": [ + 0.7939, + 0.8145, + 0.8348, + 0.8457, + 0.8515, + 0.8507, + 0.86, + 0.8616, + 0.8629, + 0.867, + 0.8749, + 0.8758, + 0.8749, + 0.8826, + 0.8871, + 0.8893, + 0.8887, + 0.8914, + 0.8918, + 0.8933 + ] + }, + "diagnostics": { + "bp_cosine": [ + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0 + ], + "perturbation_rho": [ + 0.999779462814331, + 0.9997812509536743, + 0.9997234344482422, + 0.9995989799499512, + 0.9995447993278503, + 0.999480128288269, + 0.999315083026886, + 0.9990702271461487 + ], + "nudging": { + "0.001": [ + -0.0015512802638113499, + -0.0014223953476175666, + -0.0012828018516302109, + -0.0011290921829640865, + -0.0010026510572060943, + -0.0008882835973054171, + -0.0007996053318493068, + -0.0007174870697781444 + ], + "0.003": [ + -0.00463186064735055, + -0.004248819313943386, + -0.003833592403680086, + -0.003375905565917492, + -0.0029987930320203304, + -0.0026576737873256207, + -0.002393170492723584, + -0.0021477085538208485 + ], + "0.01": [ + -0.015182608738541603, + -0.013949813321232796, + -0.012605881318449974, + -0.011121007613837719, + -0.009890388697385788, + -0.008775115013122559, + -0.007910609245300293, + -0.0071043153293430805 + ] + } + }, + "drift": { + "embed.weight": 2.2156592696361637, + "embed.bias": 1.1767430604557616, + "blocks.0.ln.weight": 0.10304339230060577, + "blocks.0.w1.weight": 1.482853423570453, + "blocks.0.w1.bias": 1.6160260440173806, + "blocks.0.w2.weight": 4.957564419601416, + "blocks.1.ln.weight": 0.10342221707105637, + "blocks.1.w1.weight": 1.4925957913309882, + "blocks.1.w1.bias": 1.387226988584735, + "blocks.1.w2.weight": 4.8677799360581435, + "blocks.2.ln.weight": 0.10164804011583328, + "blocks.2.w1.weight": 1.5187247212963315, + "blocks.2.w1.bias": 1.315003727574603, + "blocks.2.w2.weight": 4.892590160655272, + "blocks.3.ln.weight": 0.0980648621916771, + "blocks.3.w1.weight": 1.5309313086145322, + "blocks.3.w1.bias": 1.183681086835403, + "blocks.3.w2.weight": 4.853244692493399, + "blocks.4.ln.weight": 0.10139396041631699, + "blocks.4.w1.weight": 1.4821627671323643, + "blocks.4.w1.bias": 1.3175104220848128, + "blocks.4.w2.weight": 4.605731857817927, + "blocks.5.ln.weight": 0.11211488395929337, + "blocks.5.w1.weight": 1.4651151472668016, + "blocks.5.w1.bias": 1.2329307722821297, + "blocks.5.w2.weight": 4.4285985689227365, + "blocks.6.ln.weight": 0.10427453368902206, + "blocks.6.w1.weight": 1.4545050310842818, + "blocks.6.w1.bias": 1.2120760166211149, + "blocks.6.w2.weight": 4.21210005778474, + "blocks.7.ln.weight": 0.10458954423666, + "blocks.7.w1.weight": 1.4114832006479092, + "blocks.7.w1.bias": 1.3654059522290227, + "blocks.7.w2.weight": 4.118203814443419, + "out_ln.weight": 0.07383835315704346, + "out_head.weight": 0.9730150380688232, + "out_head.bias": 0.5616520351930565 + } + }, + "dfa": { + "log": { + "train_loss": [ + 1.4100907169977823, + 1.436597176615397, + 1.4409521081288656, + 1.4391240961710612, + 1.431752833366394, + 1.4278661415100098, + 1.42600347849528, + 1.4258823367436726, + 1.4264747858683269, + 1.4247130299250286, + 1.4233300720850626, + 1.4235623852411905, + 1.423257282193502, + 1.4203537824630736, + 1.4199624996821085, + 1.4181565198898316, + 1.4212706031799316, + 1.4193874025344848, + 1.4183136430740357, + 1.4177714852015177 + ], + "train_acc": [ + 0.4805, + 0.4955833333333333, + 0.48483333333333334, + 0.48445, + 0.48668333333333336, + 0.48556666666666665, + 0.48233333333333334, + 0.48796666666666666, + 0.48468333333333335, + 0.48793333333333333, + 0.49155, + 0.49241666666666667, + 0.49341666666666667, + 0.4975, + 0.49946666666666667, + 0.5013666666666666, + 0.5008166666666667, + 0.5015833333333334, + 0.5064833333333333, + 0.5055166666666666 + ], + "test_acc": [ + 0.5175, + 0.5227, + 0.4511, + 0.5054, + 0.5238, + 0.4415, + 0.4969, + 0.5415, + 0.5148, + 0.463, + 0.5415, + 0.479, + 0.5198, + 0.5259, + 0.5167, + 0.5293, + 0.5023, + 0.513, + 0.5234, + 0.5205 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.18807855248451233, + 0.003098251298069954, + -0.0009170115226879716, + -0.0030592959374189377, + 0.001578816445544362, + -0.002526880707591772, + -0.0002142013981938362, + 0.002638747449964285 + ], + "perturbation_rho": [ + -0.02331582084298134, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.008287552744150162 + ], + "nudging": { + "0.001": [ + -1.016305759549141e-06, + 0.0, + 0.0, + 9.313225746154785e-10, + 0.0, + -1.862645149230957e-09, + 0.0, + 0.0 + ], + "0.003": [ + -3.2687094062566757e-06, + 0.0, + 0.0, + -1.3969838619232178e-09, + -9.313225746154785e-10, + -2.3283064365386963e-09, + -6.984919309616089e-10, + 0.0 + ], + "0.01": [ + -1.1139316484332085e-05, + 9.313225746154785e-10, + 9.313225746154785e-10, + 6.05359673500061e-09, + -9.313225746154785e-10, + -1.3969838619232178e-09, + 1.1641532182693481e-09, + -9.313225746154785e-10 + ] + } + }, + "drift": { + "embed.weight": 118.42757824386928, + "embed.bias": 91.53070095956268, + "blocks.0.ln.weight": 6.256274700164795, + "blocks.0.w1.weight": 139.8976340759611, + "blocks.0.w1.bias": 140.08208587154786, + "blocks.0.w2.weight": 332.94308388138757, + "blocks.1.ln.weight": 5.70231819152832, + "blocks.1.w1.weight": 129.75151109455834, + "blocks.1.w1.bias": 121.17432552115143, + "blocks.1.w2.weight": 248.7841682293662, + "blocks.2.ln.weight": 5.466833114624023, + "blocks.2.w1.weight": 124.61655550824435, + "blocks.2.w1.bias": 116.83509018118278, + "blocks.2.w2.weight": 237.4413053793645, + "blocks.3.ln.weight": 5.473834037780762, + "blocks.3.w1.weight": 130.23769942327985, + "blocks.3.w1.bias": 111.35214850610642, + "blocks.3.w2.weight": 253.67927391219192, + "blocks.4.ln.weight": 5.401853084564209, + "blocks.4.w1.weight": 138.8132811262774, + "blocks.4.w1.bias": 138.7305587293139, + "blocks.4.w2.weight": 257.0171443869646, + "blocks.5.ln.weight": 5.765963554382324, + "blocks.5.w1.weight": 155.34176828036547, + "blocks.5.w1.bias": 142.51555937014615, + "blocks.5.w2.weight": 294.4127013869835, + "blocks.6.ln.weight": 5.595874309539795, + "blocks.6.w1.weight": 145.1627005952125, + "blocks.6.w1.bias": 132.11466551434717, + "blocks.6.w2.weight": 265.955180270204, + "blocks.7.ln.weight": 5.609050750732422, + "blocks.7.w1.weight": 147.06679411967872, + "blocks.7.w1.bias": 140.14577658787653, + "blocks.7.w2.weight": 258.1442232900185, + "out_ln.weight": 1.0355137586593628, + "out_head.weight": 8.13612662750539, + "out_head.bias": 0.9719656640023888 + } + }, + "state_bridge": { + "log": { + "train_loss": [ + 1.7336509002049765, + 1.5758645205815633, + 1.9318606907526652, + 2.191984078470866, + 1.8327842120488484, + 1.8950384799957276, + 2.377087445449829, + 2.1417452257792156, + 1.9545116751352947, + 2.1638838397979736, + 2.023908238474528, + 2.336049980545044, + 1.9471053196589152, + 1.7918469515482585, + 1.7594309553146363, + 1.7820972992579143, + 1.7949362015406292, + 1.7877578330993653, + 1.7872745515823365, + 1.7890100787480672 + ], + "train_acc": [ + 0.32705, + 0.35613333333333336, + 0.25498333333333334, + 0.14148333333333332, + 0.23143333333333332, + 0.23958333333333334, + 0.07806666666666667, + 0.16218333333333335, + 0.20036666666666667, + 0.1637, + 0.23035, + 0.13533333333333333, + 0.21553333333333333, + 0.25556666666666666, + 0.29751666666666665, + 0.32916666666666666, + 0.3303333333333333, + 0.33866666666666667, + 0.33645, + 0.3411 + ], + "test_acc": [ + 0.4036, + 0.3937, + 0.2369, + 0.2137, + 0.201, + 0.0282, + 0.1012, + 0.1994, + 0.1525, + 0.1935, + 0.136, + 0.1863, + 0.2135, + 0.287, + 0.3171, + 0.3199, + 0.3193, + 0.309, + 0.3431, + 0.3284 + ], + "state_pred_error": [ + 5076234.573784879, + 451395873.88586664, + 9768031180.663467, + 22542289907.438934, + 14961824114.824533, + 30110774233.224533, + 11799262674.944, + 64511141264.315735, + 46851419274.717865, + 25131605056.98987, + 35883748757.77707, + 48375831022.7968, + 69333597113.5488, + 40594997328.827736, + 42734315291.71627, + 38351667650.01387, + 32476750280.567467, + 28322754527.232, + 25330875547.648, + 23083374417.237335 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.1429949849843979, + 0.3180725574493408, + 0.20951706171035767, + 0.2099049985408783, + 0.2332497388124466, + 0.18252244591712952, + 0.16670912504196167, + 0.19962334632873535 + ], + "perturbation_rho": [ + 0.22952523827552795, + 0.06709301471710205, + 0.0, + 0.02129420079290867, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -2.1068379282951355e-05, + -8.009374141693115e-08, + 1.862645149230957e-09, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -6.297603249549866e-05, + -1.4621764421463013e-07, + 2.7939677238464355e-09, + -7.450580596923828e-09, + 5.587935447692871e-09, + 0.0, + 9.313225746154785e-10, + 0.0 + ], + "0.01": [ + -0.00020940043032169342, + -4.3585896492004395e-07, + 0.0, + -5.587935447692871e-09, + 8.381903171539307e-09, + 0.0, + -4.6566128730773926e-09, + 0.0 + ] + } + }, + "drift": { + "embed.weight": 10.2616062216752, + "embed.bias": 9.637697583673647, + "blocks.0.ln.weight": 2.2154791355133057, + "blocks.0.w1.weight": 28.129394231034535, + "blocks.0.w1.bias": 29.44761280088024, + "blocks.0.w2.weight": 83.23565733006456, + "blocks.1.ln.weight": 1.6048834323883057, + "blocks.1.w1.weight": 72.41049473988551, + "blocks.1.w1.bias": 83.01353544678275, + "blocks.1.w2.weight": 118.5292924947854, + "blocks.2.ln.weight": 1.528304100036621, + "blocks.2.w1.weight": 22.974203066797124, + "blocks.2.w1.bias": 24.217216068101656, + "blocks.2.w2.weight": 45.236771862572276, + "blocks.3.ln.weight": 2.2928571701049805, + "blocks.3.w1.weight": 48.83060261437247, + "blocks.3.w1.bias": 44.264646679202606, + "blocks.3.w2.weight": 77.90843201854017, + "blocks.4.ln.weight": 1.569786548614502, + "blocks.4.w1.weight": 38.25042383023526, + "blocks.4.w1.bias": 46.7989781903236, + "blocks.4.w2.weight": 61.850484751580424, + "blocks.5.ln.weight": 1.3877884149551392, + "blocks.5.w1.weight": 25.667927717045547, + "blocks.5.w1.bias": 27.47666495212993, + "blocks.5.w2.weight": 56.74081708072433, + "blocks.6.ln.weight": 2.3657233715057373, + "blocks.6.w1.weight": 68.78499791753522, + "blocks.6.w1.bias": 66.7112278827284, + "blocks.6.w2.weight": 88.32659623386007, + "blocks.7.ln.weight": 1.5114411115646362, + "blocks.7.w1.weight": 40.81772220069051, + "blocks.7.w1.bias": 45.46224641191049, + "blocks.7.w2.weight": 78.43904162151014, + "out_ln.weight": 0.27722063660621643, + "out_head.weight": 3.4145406581484017, + "out_head.bias": 1.9963410657002738 + } + }, + "credit_bridge": { + "log": { + "train_loss": [ + 1.4013389415105184, + 1.4349893891016643, + 1.442590291341146, + 1.4306699399312337, + 1.4177788179397584, + 1.4436163345336914, + 1.5327887171427408, + 1.5964955659866333, + 1.6370685063680013, + 1.6712419691085816, + 1.6618358172098795, + 1.6343142255147298, + 1.6132340278625488, + 1.5844669729232788, + 1.5616684883117675, + 1.541010483233134, + 1.525136720085144, + 1.5156665041605633, + 1.5106076243718465, + 1.5088000661214194 + ], + "train_acc": [ + 0.4805333333333333, + 0.48885, + 0.4822, + 0.48246666666666665, + 0.48988333333333334, + 0.4848, + 0.4645166666666667, + 0.44521666666666665, + 0.4347, + 0.4149833333333333, + 0.41485, + 0.42995, + 0.4308, + 0.43275, + 0.44, + 0.4475, + 0.44975, + 0.45098333333333335, + 0.4527333333333333, + 0.45531666666666665 + ], + "test_acc": [ + 0.4889, + 0.4758, + 0.4915, + 0.5234, + 0.5192, + 0.4658, + 0.4303, + 0.4477, + 0.4427, + 0.4192, + 0.4341, + 0.4369, + 0.4415, + 0.4713, + 0.4554, + 0.4785, + 0.4729, + 0.47, + 0.4671, + 0.4741 + ], + "value_loss": [ + 0.4515702169219653, + 0.10145944621364275, + 0.0791723535656929, + 0.06821208957632383, + 0.048990531079967814, + 0.04999728363355001, + 0.0479251907547315, + 0.04561474083264669, + 0.03763539131681124, + 0.040053354968627296, + 0.03465893845955531, + 0.029476907690366108, + 0.022895141302545864, + 0.020180800672868888, + 0.01736196913222472, + 0.012846514955163002, + 0.009126213994373878, + 0.008726185441513856, + 0.008221083876490592, + 0.007363871405273676 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.15763823688030243, + 0.11252982914447784, + 0.23011641204357147, + 0.21486178040504456, + 0.20616328716278076, + 0.2209414690732956, + 0.22547751665115356, + 0.21971024572849274 + ], + "perturbation_rho": [ + 0.022450335323810577, + 0.03968421369791031, + -0.010952746495604515, + -0.02441849745810032, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -2.5480985641479492e-06, + -3.864988684654236e-08, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.003": [ + -7.3623377829790115e-06, + -4.377216100692749e-08, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "0.01": [ + -2.4445587769150734e-05, + -8.102506399154663e-08, + -1.862645149230957e-09, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + } + }, + "drift": { + "embed.weight": 56.40174417770624, + "embed.bias": 40.65522803248636, + "blocks.0.ln.weight": 3.5452845096588135, + "blocks.0.w1.weight": 60.29274021947322, + "blocks.0.w1.bias": 39.86138087773039, + "blocks.0.w2.weight": 158.13629039802228, + "blocks.1.ln.weight": 4.1766037940979, + "blocks.1.w1.weight": 97.47012476867003, + "blocks.1.w1.bias": 94.45373800951319, + "blocks.1.w2.weight": 182.69479749878457, + "blocks.2.ln.weight": 3.4605705738067627, + "blocks.2.w1.weight": 73.73554694789455, + "blocks.2.w1.bias": 70.46643111415602, + "blocks.2.w2.weight": 154.24816844331818, + "blocks.3.ln.weight": 3.458436965942383, + "blocks.3.w1.weight": 81.09956449846064, + "blocks.3.w1.bias": 70.54884627062913, + "blocks.3.w2.weight": 164.27884471330486, + "blocks.4.ln.weight": 3.470608949661255, + "blocks.4.w1.weight": 84.05857705229029, + "blocks.4.w1.bias": 85.61940602638978, + "blocks.4.w2.weight": 160.78897537314361, + "blocks.5.ln.weight": 3.7341277599334717, + "blocks.5.w1.weight": 97.50908269356889, + "blocks.5.w1.bias": 91.11903815503673, + "blocks.5.w2.weight": 187.97349614569833, + "blocks.6.ln.weight": 3.5159947872161865, + "blocks.6.w1.weight": 91.22877884822417, + "blocks.6.w1.bias": 83.71561937882119, + "blocks.6.w2.weight": 167.99172531428107, + "blocks.7.ln.weight": 3.4693915843963623, + "blocks.7.w1.weight": 88.21253603229287, + "blocks.7.w1.bias": 85.04756223959555, + "blocks.7.w2.weight": 156.73218429415434, + "out_ln.weight": 0.8287110924720764, + "out_head.weight": 7.02544389908901, + "out_head.bias": 2.6789805309811348 + } + } + }, + "config": { + "dataset": "fashionmnist", + "d_hidden": 256, + "num_blocks": 8, + "batch_size": 128, + "epochs": 20, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 42 + ], + "gpu": 0, + "output_dir": "results/smoke_test2", + "num_classes": 10 + } +}
\ No newline at end of file |
