diff options
Diffstat (limited to 'results/cifar100_scan.log')
| -rw-r--r-- | results/cifar100_scan.log | 207 |
1 files changed, 207 insertions, 0 deletions
diff --git a/results/cifar100_scan.log b/results/cifar100_scan.log new file mode 100644 index 0000000..637893f --- /dev/null +++ b/results/cifar100_scan.log @@ -0,0 +1,207 @@ +=== CIFAR-100: d=256 L=4 === +Start: Sun Apr 26 09:13:20 AM CDT 2026 + +--- Frozen baseline --- + frozen s=42 (Sun Apr 26 09:13:20 AM CDT 2026) +Device: cuda:0, seed=42, epochs=100, dataset=cifar100 + +=== BP shallow (ResMLP num_blocks=0), seed=42 === + n_params: 812900 (812900 trainable) + [BP-shallow] ep 1: test_acc=0.1119 + [BP-shallow] ep 10: test_acc=0.1320 + [BP-shallow] ep 20: test_acc=0.1286 + [BP-shallow] ep 30: test_acc=0.1324 + [BP-shallow] ep 40: test_acc=0.1407 + [BP-shallow] ep 50: test_acc=0.1555 + [BP-shallow] ep 60: test_acc=0.1599 + [BP-shallow] ep 70: test_acc=0.1706 + [BP-shallow] ep 80: test_acc=0.1738 + [BP-shallow] ep 90: test_acc=0.1780 + [BP-shallow] ep 100: test_acc=0.1787 +FINAL BP-shallow: 0.1787 + +=== BP frozen-blocks (ResMLP num_blocks=4, blocks frozen), seed=42 === + n_params: 1341284 (812900 trainable) + [BP-frozen] ep 1: test_acc=0.1109 + [BP-frozen] ep 10: test_acc=0.1313 + [BP-frozen] ep 20: test_acc=0.1252 + [BP-frozen] ep 30: test_acc=0.1271 + [BP-frozen] ep 40: test_acc=0.1338 + [BP-frozen] ep 50: test_acc=0.1557 + [BP-frozen] ep 60: test_acc=0.1613 + [BP-frozen] ep 70: test_acc=0.1713 + [BP-frozen] ep 80: test_acc=0.1751 + [BP-frozen] ep 90: test_acc=0.1764 + [BP-frozen] ep 100: test_acc=0.1770 +FINAL BP-frozen-blocks: 0.1770 + +=== DFA shallow (ResMLP num_blocks=0), seed=42 === + n_params: 812900 (812900 trainable) + [DFA-shallow] ep 1: test_acc=0.0914 + [DFA-shallow] ep 10: test_acc=0.1120 + [DFA-shallow] ep 20: test_acc=0.1130 + [DFA-shallow] ep 30: test_acc=0.1198 + [DFA-shallow] ep 40: test_acc=0.1170 + [DFA-shallow] ep 50: test_acc=0.1211 + [DFA-shallow] ep 60: test_acc=0.1248 + [DFA-shallow] ep 70: test_acc=0.1203 + [DFA-shallow] ep 80: test_acc=0.1248 + [DFA-shallow] ep 90: test_acc=0.1254 + [DFA-shallow] ep 100: test_acc=0.1255 +FINAL DFA-shallow: 0.1255 + +=== DFA frozen-blocks (ResMLP num_blocks=4, blocks frozen), seed=42 === + n_params: 1341284 (812900 trainable) + [DFA-frozen] ep 1: test_acc=0.0920 + [DFA-frozen] ep 10: test_acc=0.1004 + [DFA-frozen] ep 20: test_acc=0.1171 + [DFA-frozen] ep 30: test_acc=0.1141 + [DFA-frozen] ep 40: test_acc=0.1207 + [DFA-frozen] ep 50: test_acc=0.1208 + [DFA-frozen] ep 60: test_acc=0.1204 + [DFA-frozen] ep 70: test_acc=0.1235 + [DFA-frozen] ep 80: test_acc=0.1243 + [DFA-frozen] ep 90: test_acc=0.1262 + [DFA-frozen] ep 100: test_acc=0.1256 +FINAL DFA-frozen-blocks: 0.1256 + +=== ResMLP frozen/shallow baseline summary, seed=42 === + BP-shallow: 0.1787 + BP-frozen: 0.1770 + DFA-shallow: 0.1255 + DFA-frozen: 0.1256 + +Compare to trainable 4-block ResMLP (3-seed): BP=0.6147 100ep / 0.585 30ep, DFA=0.306 100ep / 0.301 30ep + +Interpretation: + If DFA-frozen ≈ DFA-trainable: blocks are passengers, walk-back parallels ViT + If DFA-frozen << DFA-trainable: ResMLP DFA actually trains the blocks (interesting contrast with ViT) + frozen s=123 (Sun Apr 26 09:54:18 AM CDT 2026) +Device: cuda:0, seed=123, epochs=100, dataset=cifar100 + +=== BP shallow (ResMLP num_blocks=0), seed=123 === + n_params: 812900 (812900 trainable) + [BP-shallow] ep 1: test_acc=0.1098 + [BP-shallow] ep 10: test_acc=0.1309 + [BP-shallow] ep 20: test_acc=0.1203 + [BP-shallow] ep 30: test_acc=0.1262 + [BP-shallow] ep 40: test_acc=0.1415 + [BP-shallow] ep 50: test_acc=0.1532 + [BP-shallow] ep 60: test_acc=0.1622 + [BP-shallow] ep 70: test_acc=0.1725 + [BP-shallow] ep 80: test_acc=0.1751 + [BP-shallow] ep 90: test_acc=0.1745 + [BP-shallow] ep 100: test_acc=0.1756 +FINAL BP-shallow: 0.1756 + +=== BP frozen-blocks (ResMLP num_blocks=4, blocks frozen), seed=123 === + n_params: 1341284 (812900 trainable) + [BP-frozen] ep 1: test_acc=0.1100 + [BP-frozen] ep 10: test_acc=0.1328 + [BP-frozen] ep 20: test_acc=0.1256 + [BP-frozen] ep 30: test_acc=0.1333 + [BP-frozen] ep 40: test_acc=0.1411 + [BP-frozen] ep 50: test_acc=0.1596 + [BP-frozen] ep 60: test_acc=0.1638 + [BP-frozen] ep 70: test_acc=0.1720 + [BP-frozen] ep 80: test_acc=0.1737 + [BP-frozen] ep 90: test_acc=0.1769 + [BP-frozen] ep 100: test_acc=0.1777 +FINAL BP-frozen-blocks: 0.1777 + +=== DFA shallow (ResMLP num_blocks=0), seed=123 === + n_params: 812900 (812900 trainable) + [DFA-shallow] ep 1: test_acc=0.0928 + [DFA-shallow] ep 10: test_acc=0.1025 +st=0.2812 + [BP] Epoch 30: loss=2.5874, train=0.3376, test=0.3021 + [BP] Epoch 40: loss=2.4113, train=0.3699, test=0.3104 + [BP] Epoch 50: loss=2.2468, train=0.4084, test=0.3160 + [BP] Epoch 60: loss=2.1034, train=0.4373, test=0.3209 + [BP] Epoch 70: loss=1.9664, train=0.4711, test=0.3212 + [BP] Epoch 80: loss=1.8659, train=0.4913, test=0.3208 + [BP] Epoch 90: loss=1.8143, train=0.5098, test=0.3201 + [BP] Epoch 100: loss=1.7758, train=0.5158, test=0.3218 + Final test acc: 0.3218 + +--- DFA --- + [DFA] Epoch 1: loss=4.1790, train=0.0644, test=0.0808 + [DFA] Epoch 10: loss=4.1013, train=0.0738, test=0.0764 + [DFA] Epoch 20: loss=4.0720, train=0.0808, test=0.0803 + [DFA] Epoch 30: loss=4.0493, train=0.0865, test=0.0845 + [DFA] Epoch 40: loss=4.0403, train=0.0866, test=0.0855 + [DFA] Epoch 50: loss=4.0321, train=0.0897, test=0.0852 + [DFA] Epoch 60: loss=4.0243, train=0.0921, test=0.0856 + [DFA] Epoch 70: loss=4.0213, train=0.0924, test=0.0868 + [DFA] Epoch 80: loss=4.0207, train=0.0933, test=0.0867 + [DFA] Epoch 90: loss=4.0178, train=0.0948, test=0.0875 + [DFA] Epoch 100: loss=4.0181, train=0.0932, test=0.0872 + Final test acc: 0.0872 + +--- FA --- + [FA] Epoch 1: loss=4.1971, train=0.0632, test=0.0708 + [FA] Epoch 10: loss=4.0477, train=0.0854, test=0.0847 + [FA] Epoch 20: loss=3.9867, train=0.0968, test=0.0997 + [FA] Epoch 30: loss=3.9504, train=0.1036, test=0.1037 + [FA] Epoch 40: loss=3.9204, train=0.1070, test=0.1068 + [FA] Epoch 50: loss=3.8915, train=0.1107, test=0.1091 + [FA] Epoch 60: loss=3.8680, train=0.1147, test=0.1135 + [FA] Epoch 70: loss=3.8517, train=0.1166, test=0.1156 + [FA] Epoch 80: loss=3.8433, train=0.1188, test=0.1182 + [FA] Epoch 90: loss=3.8342, train=0.1202, test=0.1215 + [FA] Epoch 100: loss=3.8330, train=0.1228, test=0.1208 + Final test acc: 0.1208 + +All results saved to results/cifar100_d256_L4/results_cifar100.json + methods s=456 (Sun Apr 26 09:51:41 AM CDT 2026) +Using device: cuda:0 + +============================================================ +Seed 456 +============================================================ + +--- BP --- + [BP] Epoch 1: loss=3.9722, train=0.0978, test=0.1436 + [BP] Epoch 10: loss=3.0679, train=0.2433, test=0.2496 + [BP] Epoch 20: loss=2.7902, train=0.2983, test=0.2857 + [BP] Epoch 30: loss=2.5920, train=0.3374, test=0.3018 + [BP] Epoch 40: loss=2.4046, train=0.3747, test=0.3166 + [BP] Epoch 50: loss=2.2421, train=0.4090, test=0.3165 + [BP] Epoch 60: loss=2.0908, train=0.4420, test=0.3204 + [BP] Epoch 70: loss=1.9548, train=0.4750, test=0.3202 + [BP] Epoch 80: loss=1.8580, train=0.4973, test=0.3177 + [BP] Epoch 90: loss=1.8029, train=0.5128, test=0.3217 + [BP] Epoch 100: loss=1.7769, train=0.5179, test=0.3219 + Final test acc: 0.3219 + +--- DFA --- + [DFA] Epoch 1: loss=4.1619, train=0.0684, test=0.0832 + [DFA] Epoch 10: loss=4.0780, train=0.0790, test=0.0777 + [DFA] Epoch 20: loss=4.0602, train=0.0848, test=0.0813 + [DFA] Epoch 30: loss=4.0430, train=0.0885, test=0.0878 + [DFA] Epoch 40: loss=4.0391, train=0.0893, test=0.0872 + [DFA] Epoch 50: loss=4.0372, train=0.0914, test=0.0834 + [DFA] Epoch 60: loss=4.0358, train=0.0919, test=0.0884 + [DFA] Epoch 70: loss=4.0340, train=0.0928, test=0.0906 + [DFA] Epoch 80: loss=4.0334, train=0.0926, test=0.0879 + [DFA] Epoch 90: loss=4.0325, train=0.0935, test=0.0898 + [DFA] Epoch 100: loss=4.0329, train=0.0929, test=0.0894 + Final test acc: 0.0894 + +--- FA --- + [FA] Epoch 1: loss=4.2178, train=0.0611, test=0.0534 + [FA] Epoch 10: loss=3.9339, train=0.1008, test=0.0999 + [FA] Epoch 20: loss=3.8903, train=0.1079, test=0.1125 + [FA] Epoch 30: loss=3.8439, train=0.1169, test=0.1138 + [FA] Epoch 40: loss=3.8094, train=0.1220, test=0.1228 + [FA] Epoch 50: loss=3.7933, train=0.1252, test=0.1240 + [FA] Epoch 60: loss=3.7808, train=0.1273, test=0.1275 + [FA] Epoch 70: loss=3.7675, train=0.1281, test=0.1252 + [FA] Epoch 80: loss=3.7592, train=0.1312, test=0.1307 + [FA] Epoch 90: loss=3.7554, train=0.1333, test=0.1311 + [FA] Epoch 100: loss=3.7508, train=0.1319, test=0.1310 + Final test acc: 0.1310 + +All results saved to results/cifar100_d256_L4/results_cifar100.json + +=== CIFAR-100 SCAN DONE (Sun Apr 26 10:11:18 AM CDT 2026) === |
