From aa73718eb6427d7da3b9cb416275802d90c4b2ed Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Sun, 14 Jun 2026 04:06:32 -0500 Subject: Add new experiment scripts, figures, and paper assets; untrack pyc/build artifacts Co-Authored-By: Claude Opus 4.8 (1M context) --- results/resmlp_frozen_blocks_s123.log | 73 +++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 results/resmlp_frozen_blocks_s123.log (limited to 'results/resmlp_frozen_blocks_s123.log') diff --git a/results/resmlp_frozen_blocks_s123.log b/results/resmlp_frozen_blocks_s123.log new file mode 100644 index 0000000..728da12 --- /dev/null +++ b/results/resmlp_frozen_blocks_s123.log @@ -0,0 +1,73 @@ +Device: cuda:0, seed=123, epochs=100 + +=== BP shallow (ResMLP num_blocks=0), seed=123 === + n_params: 789770 (789770 trainable) + [BP-shallow] ep 1: test_acc=0.3507 + [BP-shallow] ep 10: test_acc=0.3744 + [BP-shallow] ep 20: test_acc=0.3604 + [BP-shallow] ep 30: test_acc=0.3518 + [BP-shallow] ep 40: test_acc=0.3535 + [BP-shallow] ep 50: test_acc=0.3613 + [BP-shallow] ep 60: test_acc=0.3631 + [BP-shallow] ep 70: test_acc=0.3709 + [BP-shallow] ep 80: test_acc=0.3828 + [BP-shallow] ep 90: test_acc=0.3832 + [BP-shallow] ep 100: test_acc=0.3841 +FINAL BP-shallow: 0.3841 + +=== BP frozen-blocks (ResMLP num_blocks=4, blocks frozen), seed=123 === + n_params: 1318154 (789770 trainable) + [BP-frozen] ep 1: test_acc=0.3517 + [BP-frozen] ep 10: test_acc=0.3653 + [BP-frozen] ep 20: test_acc=0.3538 + [BP-frozen] ep 30: test_acc=0.3501 + [BP-frozen] ep 40: test_acc=0.3558 + [BP-frozen] ep 50: test_acc=0.3528 + [BP-frozen] ep 60: test_acc=0.3701 + [BP-frozen] ep 70: test_acc=0.3750 + [BP-frozen] ep 80: test_acc=0.3855 + [BP-frozen] ep 90: test_acc=0.3855 + [BP-frozen] ep 100: test_acc=0.3896 +FINAL BP-frozen-blocks: 0.3896 + +=== DFA shallow (ResMLP num_blocks=0), seed=123 === + n_params: 789770 (789770 trainable) + [DFA-shallow] ep 1: test_acc=0.3348 + [DFA-shallow] ep 10: test_acc=0.3457 + [DFA-shallow] ep 20: test_acc=0.3484 + [DFA-shallow] ep 30: test_acc=0.3323 + [DFA-shallow] ep 40: test_acc=0.3442 + [DFA-shallow] ep 50: test_acc=0.3460 + [DFA-shallow] ep 60: test_acc=0.3469 + [DFA-shallow] ep 70: test_acc=0.3427 + [DFA-shallow] ep 80: test_acc=0.3432 + [DFA-shallow] ep 90: test_acc=0.3485 + [DFA-shallow] ep 100: test_acc=0.3485 +FINAL DFA-shallow: 0.3485 + +=== DFA frozen-blocks (ResMLP num_blocks=4, blocks frozen), seed=123 === + n_params: 1318154 (789770 trainable) + [DFA-frozen] ep 1: test_acc=0.3182 + [DFA-frozen] ep 10: test_acc=0.3388 + [DFA-frozen] ep 20: test_acc=0.3429 + [DFA-frozen] ep 30: test_acc=0.3286 + [DFA-frozen] ep 40: test_acc=0.3372 + [DFA-frozen] ep 50: test_acc=0.3281 + [DFA-frozen] ep 60: test_acc=0.3417 + [DFA-frozen] ep 70: test_acc=0.3483 + [DFA-frozen] ep 80: test_acc=0.3469 + [DFA-frozen] ep 90: test_acc=0.3479 + [DFA-frozen] ep 100: test_acc=0.3497 +FINAL DFA-frozen-blocks: 0.3497 + +=== ResMLP frozen/shallow baseline summary, seed=123 === + BP-shallow: 0.3841 + BP-frozen: 0.3896 + DFA-shallow: 0.3485 + DFA-frozen: 0.3497 + +Compare to trainable 4-block ResMLP (3-seed mean): BP=0.609, DFA=0.308 + +Interpretation: + If DFA-frozen ≈ DFA-trainable (0.308): blocks are passengers, walk-back parallels ViT + If DFA-frozen << DFA-trainable: ResMLP DFA actually trains the blocks (interesting contrast with ViT) -- cgit v1.2.3