From 1118b7457c261de36ead6103503c00c321c75f9b Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Sun, 14 Jun 2026 20:32:31 -0500 Subject: Depth-utility ladder: trainable-block sweep (BP/FA/DFA) on ResMLP CIFAR-10 Appendix experiment triangulating the depth-utility diagnostic (D3) by varying the number of trainable residual blocks k (last-k trainable, first L-k frozen at init; embed/LN/head always trained). - d=256 L=4 and d=512 L=2, 3 seeds, recipe identical to the main audit. - BP climbs monotonically (+22-23pp); DFA peaks at the frozen baseline (k=0) and declines once any deep block is trained; FA shows partial/no net depth utility. - Cross-checks reproduce existing anchors (BP 0.617, DFA 0.301, FA 0.402, frozen 0.349). - frozen_init_identity_check quantifies frozen stack as a near-norm-preserving random feature map (per-block ||f||/||h||~0.10, stack cos 0.981), explaining the above-chance k=0 rung. Co-Authored-By: Claude Opus 4.8 (1M context) --- results/depth_ladder/frozen_init_identity.json | 57 ++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 results/depth_ladder/frozen_init_identity.json (limited to 'results/depth_ladder/frozen_init_identity.json') diff --git a/results/depth_ladder/frozen_init_identity.json b/results/depth_ladder/frozen_init_identity.json new file mode 100644 index 0000000..1c7048b --- /dev/null +++ b/results/depth_ladder/frozen_init_identity.json @@ -0,0 +1,57 @@ +{ + "config": { + "d_hidden": 256, + "L": 4, + "num_classes": 10, + "batch": 256, + "dataset": "cifar10-test", + "seeds": [ + 42, + 123, + 456 + ] + }, + "per_seed": { + "42": { + "per_block_ratio": [ + 0.09595257043838501, + 0.0955488458275795, + 0.09637212753295898, + 0.09818045794963837 + ], + "rel_dev": 0.1959637552499771, + "cos": 0.9811521172523499 + }, + "123": { + "per_block_ratio": [ + 0.09584859013557434, + 0.09690074622631073, + 0.10017187148332596, + 0.09818752110004425 + ], + "rel_dev": 0.19837374985218048, + "cos": 0.9805399179458618 + }, + "456": { + "per_block_ratio": [ + 0.09482444077730179, + 0.09799206256866455, + 0.09791108965873718, + 0.09693857282400131 + ], + "rel_dev": 0.19332122802734375, + "cos": 0.9819751381874084 + } + }, + "per_block_ratio_mean": [ + 0.09554186711708705, + 0.09681388487418492, + 0.09815169622500737, + 0.09776885062456131 + ], + "per_block_ratio_grand_mean": 0.09706907471021016, + "rel_dev_mean": 0.19588624437650046, + "rel_dev_std": 0.0025271525773572136, + "cos_mean": 0.98122239112854, + "cos_std": 0.0007201861555822825 +} \ No newline at end of file -- cgit v1.2.3