diff options
| -rw-r--r-- | results/cifar100_d256_L4/results_cifar100.json | 1175 | ||||
| -rw-r--r-- | results/cifar100_scan.log | 207 |
2 files changed, 1382 insertions, 0 deletions
diff --git a/results/cifar100_d256_L4/results_cifar100.json b/results/cifar100_d256_L4/results_cifar100.json new file mode 100644 index 0000000..a5894f0 --- /dev/null +++ b/results/cifar100_d256_L4/results_cifar100.json @@ -0,0 +1,1175 @@ +{ + "456": { + "bp": { + "log": { + "train_loss": [ + 3.9721967333221437, + 3.6422341596221925, + 3.48317311088562, + 3.384400603027344, + 3.3024317309570312, + 3.244081290435791, + 3.1941285145568847, + 3.1515661222839357, + 3.110703917541504, + 3.0678702397155764, + 3.0385052178955076, + 3.0125491227722168, + 2.9755833444976805, + 2.9489572755432127, + 2.922896576004028, + 2.8952785552215574, + 2.8644921349334718, + 2.837543485031128, + 2.815750347290039, + 2.7902291624450686, + 2.7643565017700196, + 2.749027015914917, + 2.7195132698059084, + 2.7054608350372313, + 2.678812169647217, + 2.655367773590088, + 2.6455082903289795, + 2.621019700469971, + 2.6011389213562013, + 2.592018772201538, + 2.563096626586914, + 2.5475857734680174, + 2.5312666052246096, + 2.507406205291748, + 2.4910193432617187, + 2.4760341359710694, + 2.463254167022705, + 2.443411533126831, + 2.4220050102996824, + 2.4045714128875733, + 2.3885920139312744, + 2.3683135289001465, + 2.35701943649292, + 2.339880431365967, + 2.3229517127990724, + 2.307848415145874, + 2.2967623091125486, + 2.277586201171875, + 2.2599157135009764, + 2.2420585137939453, + 2.2226363661193846, + 2.211539700012207, + 2.1965325047302247, + 2.181053132362366, + 2.1636188303375246, + 2.148667406234741, + 2.1403865797424317, + 2.118398011932373, + 2.1047801040649414, + 2.090780695877075, + 2.0796419092178344, + 2.0641602599716187, + 2.046349721298218, + 2.034338469238281, + 2.0246941263580323, + 2.010717328338623, + 1.9879577135848998, + 1.9908112752532958, + 1.9648436877822877, + 1.9547819673156739, + 1.942468291015625, + 1.9376066028594972, + 1.9182865232086181, + 1.9184452313232423, + 1.9024611935424804, + 1.8942070873641967, + 1.8815567203521728, + 1.8690018142318725, + 1.8623281653213501, + 1.8579870316314697, + 1.8522963430404662, + 1.836771893234253, + 1.8371977347564696, + 1.8297668377685548, + 1.822632707901001, + 1.8128088718032838, + 1.8157146021270751, + 1.8022041482162476, + 1.8005397263336183, + 1.80292332862854, + 1.7940929901504516, + 1.7858528171539307, + 1.7898485187149047, + 1.7881443228149414, + 1.7818482210540771, + 1.776848041419983, + 1.7760994798278809, + 1.7772335396957397, + 1.7761591400146484, + 1.7769086535644532 + ], + "train_acc": [ + 0.09778, + 0.1434, + 0.17072, + 0.18902, + 0.20188, + 0.21248, + 0.22264, + 0.22896, + 0.23846, + 0.24332, + 0.2513, + 0.25484, + 0.26372, + 0.26734, + 0.27242, + 0.27908, + 0.2835, + 0.28882, + 0.29154, + 0.29834, + 0.30224, + 0.30392, + 0.31194, + 0.3125, + 0.31934, + 0.32328, + 0.32578, + 0.3307, + 0.33552, + 0.33738, + 0.34074, + 0.34822, + 0.3489, + 0.35476, + 0.35714, + 0.3598, + 0.36264, + 0.3679, + 0.373, + 0.37474, + 0.37926, + 0.38364, + 0.38482, + 0.38824, + 0.3921, + 0.39568, + 0.39682, + 0.4015, + 0.4051, + 0.40902, + 0.41356, + 0.41618, + 0.42064, + 0.4233, + 0.42546, + 0.42778, + 0.43316, + 0.4337, + 0.4397, + 0.442, + 0.44436, + 0.45016, + 0.4521, + 0.45792, + 0.45892, + 0.4619, + 0.46716, + 0.46476, + 0.47062, + 0.475, + 0.4752, + 0.47938, + 0.48198, + 0.48142, + 0.48788, + 0.49032, + 0.49212, + 0.49664, + 0.49638, + 0.4973, + 0.4982, + 0.5018, + 0.5012, + 0.50402, + 0.50828, + 0.512, + 0.50698, + 0.51086, + 0.51272, + 0.5128, + 0.51364, + 0.5147, + 0.51322, + 0.51572, + 0.51924, + 0.51582, + 0.52014, + 0.51688, + 0.519, + 0.51792 + ], + "test_acc": [ + 0.1436, + 0.1724, + 0.1976, + 0.2161, + 0.2193, + 0.2286, + 0.2386, + 0.2425, + 0.2508, + 0.2496, + 0.2572, + 0.2667, + 0.2678, + 0.266, + 0.2744, + 0.2762, + 0.278, + 0.277, + 0.2819, + 0.2857, + 0.2884, + 0.2924, + 0.2971, + 0.2938, + 0.2967, + 0.2984, + 0.2994, + 0.2987, + 0.3023, + 0.3018, + 0.3005, + 0.3058, + 0.3086, + 0.3118, + 0.3122, + 0.317, + 0.3139, + 0.3083, + 0.3131, + 0.3166, + 0.3193, + 0.3185, + 0.318, + 0.3176, + 0.319, + 0.3176, + 0.3157, + 0.3158, + 0.3241, + 0.3165, + 0.3253, + 0.3217, + 0.3224, + 0.3244, + 0.3217, + 0.3181, + 0.3206, + 0.3261, + 0.3208, + 0.3204, + 0.3198, + 0.324, + 0.322, + 0.3227, + 0.3184, + 0.3193, + 0.323, + 0.3233, + 0.3254, + 0.3202, + 0.3196, + 0.3251, + 0.3243, + 0.3247, + 0.3209, + 0.3245, + 0.3267, + 0.322, + 0.3224, + 0.3177, + 0.3213, + 0.3196, + 0.3218, + 0.3207, + 0.3219, + 0.3223, + 0.3244, + 0.3202, + 0.3248, + 0.3217, + 0.3236, + 0.3228, + 0.3211, + 0.3231, + 0.3222, + 0.3215, + 0.3217, + 0.3216, + 0.3217, + 0.3219 + ] + }, + "diagnostics": { + "bp_cosine": [ + 1.0, + 1.0, + 1.0, + 1.0 + ], + "perturbation_rho": [ + 0.9977043271064758, + 0.9984360337257385, + 0.9986849427223206, + 0.998687744140625 + ], + "nudging": { + "0.001": [ + -0.0023326585069298744, + -0.0024166624061763287, + -0.002438523806631565, + -0.002317411359399557 + ], + "0.003": [ + -0.006994417868554592, + -0.007245808374136686, + -0.007311580237001181, + -0.0069488403387367725 + ], + "0.01": [ + -0.023275790736079216, + -0.024107107892632484, + -0.02432604506611824, + -0.023123476654291153 + ] + }, + "hidden_norms_per_layer": [ + 233.81195068359375, + 216.790283203125, + 197.1819305419922, + 192.0648956298828, + 192.4108123779297 + ], + "bp_grad_norms_per_layer": [ + 0.0011535885278135538, + 0.0012452645460143685, + 0.0012428142363205552, + 0.0011899643577635288, + 0.0009729627054184675 + ] + }, + "drift": { + "embed.weight": 7.39398977590873, + "embed.bias": 12.21138556118351, + "blocks.0.ln.weight": 0.14770954847335815, + "blocks.0.w1.weight": 3.60408439890455, + "blocks.0.w1.bias": 5.22994684696013, + "blocks.0.w2.weight": 13.053238719433654, + "blocks.1.ln.weight": 0.2946910560131073, + "blocks.1.w1.weight": 4.020628252632815, + "blocks.1.w1.bias": 4.08986730586542, + "blocks.1.w2.weight": 14.948894323523715, + "blocks.2.ln.weight": 0.3565492630004883, + "blocks.2.w1.weight": 4.131160512042399, + "blocks.2.w1.bias": 4.003626979292673, + "blocks.2.w2.weight": 15.902707284756408, + "blocks.3.ln.weight": 0.5485884547233582, + "blocks.3.w1.weight": 4.454943236812576, + "blocks.3.w1.bias": 3.7282169522367608, + "blocks.3.w2.weight": 17.516109303089266, + "out_ln.weight": 0.14718063175678253, + "out_head.weight": 2.8761547030079035, + "out_head.bias": 2.9449926838807814 + } + }, + "dfa": { + "log": { + "train_loss": [ + 4.161876879730225, + 4.0187293235778805, + 4.006406578216553, + 4.022609137268066, + 4.043629914093017, + 4.065061766357422, + 4.069896491699219, + 4.078123603668213, + 4.078062142028808, + 4.077979364929199, + 4.076888464050293, + 4.075497508239746, + 4.072547879943848, + 4.0701713494110106, + 4.069026535797119, + 4.064811604537964, + 4.061763154296875, + 4.060759606628418, + 4.057107001800537, + 4.0601828897094725, + 4.055534585723877, + 4.054944921569824, + 4.051256662597656, + 4.050451954650879, + 4.050393276824951, + 4.046223313903808, + 4.047258992919922, + 4.047048767089843, + 4.044600559692383, + 4.0430317344665525, + 4.043368485031128, + 4.043571725158691, + 4.04141174987793, + 4.0433316299438475, + 4.041590490188598, + 4.042222282104492, + 4.039775361480713, + 4.0390912678527835, + 4.040217839660644, + 4.039096583099365, + 4.040037224197388, + 4.037792987365723, + 4.037555375213623, + 4.0370241506958005, + 4.036846955261231, + 4.037261250762939, + 4.036293372497559, + 4.036143635406495, + 4.036682392578125, + 4.037161629333496, + 4.033746524200439, + 4.0374753099060054, + 4.035349784240722, + 4.037173667144775, + 4.0371518122863765, + 4.035486927947998, + 4.036137412185669, + 4.036821200714112, + 4.035519530792237, + 4.035781366424561, + 4.034954352722168, + 4.03612970993042, + 4.036103559722901, + 4.03622392074585, + 4.035975666046142, + 4.036468890686035, + 4.0351540557861325, + 4.035398180618286, + 4.034821971435547, + 4.0340077725219725, + 4.0375495939636235, + 4.034684106140137, + 4.034154214782715, + 4.032997749557495, + 4.033953489532471, + 4.034259751586914, + 4.034655684280396, + 4.0341955810546875, + 4.032625467605591, + 4.033415794677734, + 4.0320995791625975, + 4.034218223724365, + 4.033138923187256, + 4.03524599899292, + 4.03208328704834, + 4.032160759277343, + 4.034070838088989, + 4.030838821868897, + 4.031133236694336, + 4.032454485397339, + 4.0351671361541745, + 4.032833539428711, + 4.03182988456726, + 4.035511425170898, + 4.034130640563965, + 4.030528937072754, + 4.031213985443115, + 4.02996834274292, + 4.034611818542481, + 4.032926365203857 + ], + "train_acc": [ + 0.06838, + 0.08666, + 0.08636, + 0.08428, + 0.08436, + 0.08, + 0.07998, + 0.0795, + 0.08026, + 0.07902, + 0.08168, + 0.08248, + 0.08128, + 0.08158, + 0.0824, + 0.08262, + 0.08376, + 0.08384, + 0.08508, + 0.08482, + 0.0866, + 0.08544, + 0.08676, + 0.08688, + 0.08632, + 0.08594, + 0.0864, + 0.08776, + 0.08736, + 0.0885, + 0.08856, + 0.08896, + 0.08866, + 0.08886, + 0.08898, + 0.09026, + 0.08896, + 0.09028, + 0.09104, + 0.08926, + 0.08906, + 0.08948, + 0.09216, + 0.09092, + 0.0916, + 0.09188, + 0.09304, + 0.09212, + 0.09048, + 0.0914, + 0.09228, + 0.09146, + 0.09234, + 0.09168, + 0.09086, + 0.09146, + 0.09152, + 0.0926, + 0.0927, + 0.09192, + 0.09064, + 0.09282, + 0.09188, + 0.09342, + 0.09134, + 0.09174, + 0.09164, + 0.09388, + 0.09276, + 0.0928, + 0.09378, + 0.0937, + 0.09332, + 0.09394, + 0.09278, + 0.09276, + 0.09362, + 0.09378, + 0.0931, + 0.09256, + 0.09352, + 0.0925, + 0.09258, + 0.0933, + 0.09242, + 0.09364, + 0.09322, + 0.09358, + 0.09406, + 0.09348, + 0.09254, + 0.09332, + 0.09516, + 0.09486, + 0.09452, + 0.09486, + 0.09398, + 0.09486, + 0.0926, + 0.09292 + ], + "test_acc": [ + 0.0832, + 0.0875, + 0.0896, + 0.0827, + 0.0848, + 0.0809, + 0.0756, + 0.0769, + 0.0809, + 0.0777, + 0.0775, + 0.0751, + 0.0835, + 0.0804, + 0.0776, + 0.0805, + 0.0857, + 0.0753, + 0.0811, + 0.0813, + 0.0813, + 0.0786, + 0.0812, + 0.0834, + 0.0862, + 0.0883, + 0.0863, + 0.0861, + 0.0853, + 0.0878, + 0.0835, + 0.087, + 0.0873, + 0.0833, + 0.087, + 0.0834, + 0.08, + 0.0867, + 0.0897, + 0.0872, + 0.0834, + 0.0871, + 0.0804, + 0.084, + 0.0892, + 0.0908, + 0.0851, + 0.0891, + 0.0889, + 0.0834, + 0.0892, + 0.0905, + 0.0915, + 0.0885, + 0.09, + 0.0895, + 0.0863, + 0.0889, + 0.0923, + 0.0884, + 0.0901, + 0.087, + 0.0912, + 0.0893, + 0.0901, + 0.0868, + 0.09, + 0.0877, + 0.09, + 0.0906, + 0.0895, + 0.0902, + 0.0864, + 0.0878, + 0.0914, + 0.0888, + 0.0878, + 0.0886, + 0.089, + 0.0879, + 0.0894, + 0.0902, + 0.0885, + 0.0887, + 0.0886, + 0.089, + 0.0897, + 0.0892, + 0.09, + 0.0898, + 0.0898, + 0.089, + 0.0898, + 0.0896, + 0.0895, + 0.0893, + 0.0895, + 0.0894, + 0.0895, + 0.0894 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.1401812732219696, + -0.0036733129527419806, + -0.012468342669308186, + -0.005749615840613842 + ], + "perturbation_rho": [ + -0.006108707282692194, + 0.0, + 0.0, + 0.0 + ], + "nudging": { + "0.001": [ + -6.239861249923706e-07, + 0.0, + -7.450580596923828e-09, + 0.0 + ], + "0.003": [ + -1.9390136003494263e-06, + -1.862645149230957e-09, + -5.587935447692871e-09, + -9.313225746154785e-10 + ], + "0.01": [ + -6.262678653001785e-06, + 7.450580596923828e-09, + -9.313225746154785e-09, + -9.313225746154785e-10 + ] + }, + "hidden_norms_per_layer": [ + 18858.84765625, + 30104360.0, + 115109720.0, + 142242432.0, + 189834320.0 + ], + "bp_grad_norms_per_layer": [ + 1.6800353250800981e-06, + 1.0384521687001325e-08, + 1.0259526561640087e-08, + 1.025457141423658e-08, + 1.027041474088719e-08 + ] + }, + "drift": { + "embed.weight": 159.1972472218232, + "embed.bias": 106.18499737865416, + "blocks.0.ln.weight": 8.374361991882324, + "blocks.0.w1.weight": 130.25292802776573, + "blocks.0.w1.bias": 147.17751025342662, + "blocks.0.w2.weight": 140.99980409584657, + "blocks.1.ln.weight": 7.522654056549072, + "blocks.1.w1.weight": 183.9407903471226, + "blocks.1.w1.bias": 180.51106512410223, + "blocks.1.w2.weight": 111.67829579527385, + "blocks.2.ln.weight": 7.498361110687256, + "blocks.2.w1.weight": 174.07305808153814, + "blocks.2.w1.bias": 174.07821632973472, + "blocks.2.w2.weight": 98.850301691257, + "blocks.3.ln.weight": 7.616756439208984, + "blocks.3.w1.weight": 196.59852287605085, + "blocks.3.w1.bias": 194.4284525943356, + "blocks.3.w2.weight": 107.54506697831434, + "out_ln.weight": 1.6382553577423096, + "out_head.weight": 8.655421610551045, + "out_head.bias": 0.8148936743952768 + } + }, + "fa": { + "log": { + "train_loss": [ + 4.217838704376221, + 4.173818229370117, + 4.116404933776855, + 4.05976043182373, + 4.010950881347656, + 3.9877797143554687, + 3.9654394758605958, + 3.9516948413085937, + 3.9414562885284425, + 3.933943572845459, + 3.9315726831054687, + 3.9283136461639403, + 3.918033894042969, + 3.912524202957153, + 3.9111721415710448, + 3.9033537468719484, + 3.898933872528076, + 3.8940574435424806, + 3.888040324020386, + 3.8902594078826906, + 3.881309941253662, + 3.879018879852295, + 3.874496072845459, + 3.871587658691406, + 3.867957287750244, + 3.8586695248413085, + 3.858678229446411, + 3.8550938296508788, + 3.848460863571167, + 3.843934544219971, + 3.841355255813599, + 3.838310421142578, + 3.832888546218872, + 3.8317080396270753, + 3.8244371617126465, + 3.826680388870239, + 3.820091895675659, + 3.8168691710662843, + 3.8143892570495606, + 3.8094303827667235, + 3.811340135421753, + 3.808084242401123, + 3.8061284660339356, + 3.8042135272979736, + 3.803864609298706, + 3.8014156354522703, + 3.796123163604736, + 3.7960259418487547, + 3.794198034515381, + 3.793316494140625, + 3.7887488148498534, + 3.7955673222351076, + 3.787652738494873, + 3.789291185760498, + 3.7856833452606202, + 3.7837219773864748, + 3.7850829206848147, + 3.7822190380859375, + 3.780595345993042, + 3.780773038482666, + 3.7798492553710936, + 3.777712000579834, + 3.77948525390625, + 3.7754471015930178, + 3.7783823108673094, + 3.7751461651611327, + 3.7731632305908205, + 3.7698883753204346, + 3.7678470989227293, + 3.7675471087646484, + 3.768726477050781, + 3.7664906182861326, + 3.7642928605651855, + 3.7634823627471925, + 3.7624606089782713, + 3.7626713751983645, + 3.76369431892395, + 3.7644162739562987, + 3.759299655914307, + 3.759175650482178, + 3.758095093460083, + 3.758687426223755, + 3.7558257189941404, + 3.758380405960083, + 3.7552133833312986, + 3.7560367737579345, + 3.757290748748779, + 3.7513376335144044, + 3.7552056824493407, + 3.7553686474609376, + 3.754800821685791, + 3.752587652511597, + 3.7521287742614744, + 3.754502056884766, + 3.7546933338928223, + 3.750953895263672, + 3.749489372558594, + 3.7477175559997558, + 3.753209332885742, + 3.75081868347168 + ], + "train_acc": [ + 0.06112, + 0.06028, + 0.0684, + 0.07838, + 0.08954, + 0.09232, + 0.0962, + 0.09766, + 0.099, + 0.10078, + 0.1022, + 0.10314, + 0.10234, + 0.1031, + 0.10412, + 0.10498, + 0.10776, + 0.10782, + 0.109, + 0.10792, + 0.10914, + 0.11072, + 0.10982, + 0.11022, + 0.11282, + 0.11262, + 0.11402, + 0.11302, + 0.11302, + 0.11694, + 0.11542, + 0.11602, + 0.11642, + 0.1175, + 0.11876, + 0.12074, + 0.11926, + 0.11816, + 0.12184, + 0.12202, + 0.12178, + 0.12238, + 0.1222, + 0.12238, + 0.12092, + 0.12362, + 0.12338, + 0.12428, + 0.1248, + 0.12516, + 0.12588, + 0.12532, + 0.12592, + 0.12498, + 0.12348, + 0.1247, + 0.12646, + 0.12716, + 0.12708, + 0.1273, + 0.12668, + 0.12754, + 0.1268, + 0.1288, + 0.12586, + 0.12914, + 0.12678, + 0.12832, + 0.1304, + 0.12808, + 0.12922, + 0.13104, + 0.12964, + 0.13008, + 0.13144, + 0.1314, + 0.12958, + 0.1298, + 0.1312, + 0.13116, + 0.13188, + 0.1315, + 0.1315, + 0.13028, + 0.13222, + 0.1339, + 0.13118, + 0.13218, + 0.13354, + 0.1333, + 0.13136, + 0.13216, + 0.13268, + 0.13262, + 0.13292, + 0.13426, + 0.13368, + 0.13344, + 0.13306, + 0.13192 + ], + "test_acc": [ + 0.0534, + 0.0616, + 0.0812, + 0.0837, + 0.0888, + 0.0944, + 0.0942, + 0.0981, + 0.1007, + 0.0999, + 0.1021, + 0.0988, + 0.1083, + 0.1019, + 0.1073, + 0.1051, + 0.1085, + 0.107, + 0.1044, + 0.1125, + 0.1066, + 0.1037, + 0.1053, + 0.1129, + 0.1129, + 0.1183, + 0.1149, + 0.1188, + 0.1188, + 0.1138, + 0.114, + 0.1201, + 0.1229, + 0.1223, + 0.1169, + 0.1239, + 0.1156, + 0.1221, + 0.1195, + 0.1228, + 0.1275, + 0.1226, + 0.1232, + 0.1264, + 0.1312, + 0.1262, + 0.126, + 0.1266, + 0.126, + 0.124, + 0.1272, + 0.1229, + 0.1293, + 0.1247, + 0.125, + 0.1265, + 0.1232, + 0.1295, + 0.1274, + 0.1275, + 0.1273, + 0.1276, + 0.1285, + 0.1266, + 0.1285, + 0.128, + 0.1299, + 0.1295, + 0.1308, + 0.1252, + 0.1315, + 0.1292, + 0.1291, + 0.1296, + 0.1298, + 0.1308, + 0.1314, + 0.1297, + 0.1304, + 0.1307, + 0.13, + 0.1335, + 0.1296, + 0.1302, + 0.1329, + 0.1303, + 0.1313, + 0.1315, + 0.132, + 0.1311, + 0.1311, + 0.1322, + 0.1311, + 0.1317, + 0.1308, + 0.1309, + 0.1306, + 0.1308, + 0.1311, + 0.131 + ] + }, + "diagnostics": { + "bp_cosine": [ + 0.08354228734970093, + 0.008694879710674286, + -0.10087701678276062, + 0.9982630610466003 + ], + "perturbation_rho": [ + -0.00011634547263383865, + 0.02022995427250862, + -0.014473065733909607, + -0.005974383093416691 + ], + "nudging": { + "0.001": [ + -1.8016435205936432e-06, + -6.216578185558319e-08, + 9.220093488693237e-08, + -1.9045546650886536e-06 + ], + "0.003": [ + -5.271751433610916e-06, + -1.3830140233039856e-07, + 7.264316082000732e-07, + -7.401220500469208e-06 + ], + "0.01": [ + -1.7585232853889465e-05, + -5.245674401521683e-07, + 3.0526425689458847e-06, + -2.642977051436901e-05 + ] + }, + "hidden_norms_per_layer": [ + 11478.736328125, + 155429.828125, + 870084.3125, + 1247755.625, + 225384.953125 + ], + "bp_grad_norms_per_layer": [ + 9.544731256028172e-06, + 1.3853283462594845e-06, + 1.3375421303862822e-06, + 1.3386792261371738e-06, + 1.3382986026044819e-06 + ] + }, + "drift": { + "embed.weight": 105.03852710133908, + "embed.bias": 26.837405563994206, + "blocks.0.ln.weight": 2.3730220794677734, + "blocks.0.w1.weight": 28.170015713455946, + "blocks.0.w1.bias": 23.197896445244968, + "blocks.0.w2.weight": 73.04463083219117, + "blocks.1.ln.weight": 1.476442575454712, + "blocks.1.w1.weight": 29.314224337417265, + "blocks.1.w1.bias": 28.13876609135274, + "blocks.1.w2.weight": 39.8446977611363, + "blocks.2.ln.weight": 1.1785210371017456, + "blocks.2.w1.weight": 29.01006529023273, + "blocks.2.w1.bias": 32.13611131919081, + "blocks.2.w2.weight": 31.240416399097963, + "blocks.3.ln.weight": 1.4267656803131104, + "blocks.3.w1.weight": 32.92178917188402, + "blocks.3.w1.bias": 34.64484336575641, + "blocks.3.w2.weight": 31.82487883605455, + "out_ln.weight": 0.5113871097564697, + "out_head.weight": 3.9161050246024547, + "out_head.bias": 2.224345965208238 + } + } + }, + "config": { + "dataset": "cifar100", + "d_hidden": 256, + "num_blocks": 4, + "batch_size": 128, + "epochs": 100, + "lr": 0.001, + "lr_fb": 0.001, + "wd": 0.01, + "lam": 0.1, + "K": 4, + "sigma_bridge": 0.05, + "ema_momentum": 0.995, + "term_grad_weight": 1.0, + "seeds": [ + 456 + ], + "gpu": 0, + "output_dir": "results/cifar100_d256_L4", + "methods": [ + "bp", + "fa", + "dfa" + ], + "random_targets": false, + "penalty_lam": 0.0, + "num_classes": 100 + } +}
\ No newline at end of file diff --git a/results/cifar100_scan.log b/results/cifar100_scan.log new file mode 100644 index 0000000..637893f --- /dev/null +++ b/results/cifar100_scan.log @@ -0,0 +1,207 @@ +=== CIFAR-100: d=256 L=4 === +Start: Sun Apr 26 09:13:20 AM CDT 2026 + +--- Frozen baseline --- + frozen s=42 (Sun Apr 26 09:13:20 AM CDT 2026) +Device: cuda:0, seed=42, epochs=100, dataset=cifar100 + +=== BP shallow (ResMLP num_blocks=0), seed=42 === + n_params: 812900 (812900 trainable) + [BP-shallow] ep 1: test_acc=0.1119 + [BP-shallow] ep 10: test_acc=0.1320 + [BP-shallow] ep 20: test_acc=0.1286 + [BP-shallow] ep 30: test_acc=0.1324 + [BP-shallow] ep 40: test_acc=0.1407 + [BP-shallow] ep 50: test_acc=0.1555 + [BP-shallow] ep 60: test_acc=0.1599 + [BP-shallow] ep 70: test_acc=0.1706 + [BP-shallow] ep 80: test_acc=0.1738 + [BP-shallow] ep 90: test_acc=0.1780 + [BP-shallow] ep 100: test_acc=0.1787 +FINAL BP-shallow: 0.1787 + +=== BP frozen-blocks (ResMLP num_blocks=4, blocks frozen), seed=42 === + n_params: 1341284 (812900 trainable) + [BP-frozen] ep 1: test_acc=0.1109 + [BP-frozen] ep 10: test_acc=0.1313 + [BP-frozen] ep 20: test_acc=0.1252 + [BP-frozen] ep 30: test_acc=0.1271 + [BP-frozen] ep 40: test_acc=0.1338 + [BP-frozen] ep 50: test_acc=0.1557 + [BP-frozen] ep 60: test_acc=0.1613 + [BP-frozen] ep 70: test_acc=0.1713 + [BP-frozen] ep 80: test_acc=0.1751 + [BP-frozen] ep 90: test_acc=0.1764 + [BP-frozen] ep 100: test_acc=0.1770 +FINAL BP-frozen-blocks: 0.1770 + +=== DFA shallow (ResMLP num_blocks=0), seed=42 === + n_params: 812900 (812900 trainable) + [DFA-shallow] ep 1: test_acc=0.0914 + [DFA-shallow] ep 10: test_acc=0.1120 + [DFA-shallow] ep 20: test_acc=0.1130 + [DFA-shallow] ep 30: test_acc=0.1198 + [DFA-shallow] ep 40: test_acc=0.1170 + [DFA-shallow] ep 50: test_acc=0.1211 + [DFA-shallow] ep 60: test_acc=0.1248 + [DFA-shallow] ep 70: test_acc=0.1203 + [DFA-shallow] ep 80: test_acc=0.1248 + [DFA-shallow] ep 90: test_acc=0.1254 + [DFA-shallow] ep 100: test_acc=0.1255 +FINAL DFA-shallow: 0.1255 + +=== DFA frozen-blocks (ResMLP num_blocks=4, blocks frozen), seed=42 === + n_params: 1341284 (812900 trainable) + [DFA-frozen] ep 1: test_acc=0.0920 + [DFA-frozen] ep 10: test_acc=0.1004 + [DFA-frozen] ep 20: test_acc=0.1171 + [DFA-frozen] ep 30: test_acc=0.1141 + [DFA-frozen] ep 40: test_acc=0.1207 + [DFA-frozen] ep 50: test_acc=0.1208 + [DFA-frozen] ep 60: test_acc=0.1204 + [DFA-frozen] ep 70: test_acc=0.1235 + [DFA-frozen] ep 80: test_acc=0.1243 + [DFA-frozen] ep 90: test_acc=0.1262 + [DFA-frozen] ep 100: test_acc=0.1256 +FINAL DFA-frozen-blocks: 0.1256 + +=== ResMLP frozen/shallow baseline summary, seed=42 === + BP-shallow: 0.1787 + BP-frozen: 0.1770 + DFA-shallow: 0.1255 + DFA-frozen: 0.1256 + +Compare to trainable 4-block ResMLP (3-seed): BP=0.6147 100ep / 0.585 30ep, DFA=0.306 100ep / 0.301 30ep + +Interpretation: + If DFA-frozen ≈ DFA-trainable: blocks are passengers, walk-back parallels ViT + If DFA-frozen << DFA-trainable: ResMLP DFA actually trains the blocks (interesting contrast with ViT) + frozen s=123 (Sun Apr 26 09:54:18 AM CDT 2026) +Device: cuda:0, seed=123, epochs=100, dataset=cifar100 + +=== BP shallow (ResMLP num_blocks=0), seed=123 === + n_params: 812900 (812900 trainable) + [BP-shallow] ep 1: test_acc=0.1098 + [BP-shallow] ep 10: test_acc=0.1309 + [BP-shallow] ep 20: test_acc=0.1203 + [BP-shallow] ep 30: test_acc=0.1262 + [BP-shallow] ep 40: test_acc=0.1415 + [BP-shallow] ep 50: test_acc=0.1532 + [BP-shallow] ep 60: test_acc=0.1622 + [BP-shallow] ep 70: test_acc=0.1725 + [BP-shallow] ep 80: test_acc=0.1751 + [BP-shallow] ep 90: test_acc=0.1745 + [BP-shallow] ep 100: test_acc=0.1756 +FINAL BP-shallow: 0.1756 + +=== BP frozen-blocks (ResMLP num_blocks=4, blocks frozen), seed=123 === + n_params: 1341284 (812900 trainable) + [BP-frozen] ep 1: test_acc=0.1100 + [BP-frozen] ep 10: test_acc=0.1328 + [BP-frozen] ep 20: test_acc=0.1256 + [BP-frozen] ep 30: test_acc=0.1333 + [BP-frozen] ep 40: test_acc=0.1411 + [BP-frozen] ep 50: test_acc=0.1596 + [BP-frozen] ep 60: test_acc=0.1638 + [BP-frozen] ep 70: test_acc=0.1720 + [BP-frozen] ep 80: test_acc=0.1737 + [BP-frozen] ep 90: test_acc=0.1769 + [BP-frozen] ep 100: test_acc=0.1777 +FINAL BP-frozen-blocks: 0.1777 + +=== DFA shallow (ResMLP num_blocks=0), seed=123 === + n_params: 812900 (812900 trainable) + [DFA-shallow] ep 1: test_acc=0.0928 + [DFA-shallow] ep 10: test_acc=0.1025 +st=0.2812 + [BP] Epoch 30: loss=2.5874, train=0.3376, test=0.3021 + [BP] Epoch 40: loss=2.4113, train=0.3699, test=0.3104 + [BP] Epoch 50: loss=2.2468, train=0.4084, test=0.3160 + [BP] Epoch 60: loss=2.1034, train=0.4373, test=0.3209 + [BP] Epoch 70: loss=1.9664, train=0.4711, test=0.3212 + [BP] Epoch 80: loss=1.8659, train=0.4913, test=0.3208 + [BP] Epoch 90: loss=1.8143, train=0.5098, test=0.3201 + [BP] Epoch 100: loss=1.7758, train=0.5158, test=0.3218 + Final test acc: 0.3218 + +--- DFA --- + [DFA] Epoch 1: loss=4.1790, train=0.0644, test=0.0808 + [DFA] Epoch 10: loss=4.1013, train=0.0738, test=0.0764 + [DFA] Epoch 20: loss=4.0720, train=0.0808, test=0.0803 + [DFA] Epoch 30: loss=4.0493, train=0.0865, test=0.0845 + [DFA] Epoch 40: loss=4.0403, train=0.0866, test=0.0855 + [DFA] Epoch 50: loss=4.0321, train=0.0897, test=0.0852 + [DFA] Epoch 60: loss=4.0243, train=0.0921, test=0.0856 + [DFA] Epoch 70: loss=4.0213, train=0.0924, test=0.0868 + [DFA] Epoch 80: loss=4.0207, train=0.0933, test=0.0867 + [DFA] Epoch 90: loss=4.0178, train=0.0948, test=0.0875 + [DFA] Epoch 100: loss=4.0181, train=0.0932, test=0.0872 + Final test acc: 0.0872 + +--- FA --- + [FA] Epoch 1: loss=4.1971, train=0.0632, test=0.0708 + [FA] Epoch 10: loss=4.0477, train=0.0854, test=0.0847 + [FA] Epoch 20: loss=3.9867, train=0.0968, test=0.0997 + [FA] Epoch 30: loss=3.9504, train=0.1036, test=0.1037 + [FA] Epoch 40: loss=3.9204, train=0.1070, test=0.1068 + [FA] Epoch 50: loss=3.8915, train=0.1107, test=0.1091 + [FA] Epoch 60: loss=3.8680, train=0.1147, test=0.1135 + [FA] Epoch 70: loss=3.8517, train=0.1166, test=0.1156 + [FA] Epoch 80: loss=3.8433, train=0.1188, test=0.1182 + [FA] Epoch 90: loss=3.8342, train=0.1202, test=0.1215 + [FA] Epoch 100: loss=3.8330, train=0.1228, test=0.1208 + Final test acc: 0.1208 + +All results saved to results/cifar100_d256_L4/results_cifar100.json + methods s=456 (Sun Apr 26 09:51:41 AM CDT 2026) +Using device: cuda:0 + +============================================================ +Seed 456 +============================================================ + +--- BP --- + [BP] Epoch 1: loss=3.9722, train=0.0978, test=0.1436 + [BP] Epoch 10: loss=3.0679, train=0.2433, test=0.2496 + [BP] Epoch 20: loss=2.7902, train=0.2983, test=0.2857 + [BP] Epoch 30: loss=2.5920, train=0.3374, test=0.3018 + [BP] Epoch 40: loss=2.4046, train=0.3747, test=0.3166 + [BP] Epoch 50: loss=2.2421, train=0.4090, test=0.3165 + [BP] Epoch 60: loss=2.0908, train=0.4420, test=0.3204 + [BP] Epoch 70: loss=1.9548, train=0.4750, test=0.3202 + [BP] Epoch 80: loss=1.8580, train=0.4973, test=0.3177 + [BP] Epoch 90: loss=1.8029, train=0.5128, test=0.3217 + [BP] Epoch 100: loss=1.7769, train=0.5179, test=0.3219 + Final test acc: 0.3219 + +--- DFA --- + [DFA] Epoch 1: loss=4.1619, train=0.0684, test=0.0832 + [DFA] Epoch 10: loss=4.0780, train=0.0790, test=0.0777 + [DFA] Epoch 20: loss=4.0602, train=0.0848, test=0.0813 + [DFA] Epoch 30: loss=4.0430, train=0.0885, test=0.0878 + [DFA] Epoch 40: loss=4.0391, train=0.0893, test=0.0872 + [DFA] Epoch 50: loss=4.0372, train=0.0914, test=0.0834 + [DFA] Epoch 60: loss=4.0358, train=0.0919, test=0.0884 + [DFA] Epoch 70: loss=4.0340, train=0.0928, test=0.0906 + [DFA] Epoch 80: loss=4.0334, train=0.0926, test=0.0879 + [DFA] Epoch 90: loss=4.0325, train=0.0935, test=0.0898 + [DFA] Epoch 100: loss=4.0329, train=0.0929, test=0.0894 + Final test acc: 0.0894 + +--- FA --- + [FA] Epoch 1: loss=4.2178, train=0.0611, test=0.0534 + [FA] Epoch 10: loss=3.9339, train=0.1008, test=0.0999 + [FA] Epoch 20: loss=3.8903, train=0.1079, test=0.1125 + [FA] Epoch 30: loss=3.8439, train=0.1169, test=0.1138 + [FA] Epoch 40: loss=3.8094, train=0.1220, test=0.1228 + [FA] Epoch 50: loss=3.7933, train=0.1252, test=0.1240 + [FA] Epoch 60: loss=3.7808, train=0.1273, test=0.1275 + [FA] Epoch 70: loss=3.7675, train=0.1281, test=0.1252 + [FA] Epoch 80: loss=3.7592, train=0.1312, test=0.1307 + [FA] Epoch 90: loss=3.7554, train=0.1333, test=0.1311 + [FA] Epoch 100: loss=3.7508, train=0.1319, test=0.1310 + Final test acc: 0.1310 + +All results saved to results/cifar100_d256_L4/results_cifar100.json + +=== CIFAR-100 SCAN DONE (Sun Apr 26 10:11:18 AM CDT 2026) === |
