summaryrefslogtreecommitdiff
path: root/results/depth_ladder/frozen_init_identity.json
diff options
context:
space:
mode:
authorYurenHao0426 <Blackhao0426@gmail.com>2026-06-14 20:32:31 -0500
committerYurenHao0426 <Blackhao0426@gmail.com>2026-06-14 20:32:31 -0500
commit1118b7457c261de36ead6103503c00c321c75f9b (patch)
tree7ea76b32f070cb58458caaa2897a5d8133561f48 /results/depth_ladder/frozen_init_identity.json
parentaa73718eb6427d7da3b9cb416275802d90c4b2ed (diff)
Depth-utility ladder: trainable-block sweep (BP/FA/DFA) on ResMLP CIFAR-10HEADmaster
Appendix experiment triangulating the depth-utility diagnostic (D3) by varying the number of trainable residual blocks k (last-k trainable, first L-k frozen at init; embed/LN/head always trained). - d=256 L=4 and d=512 L=2, 3 seeds, recipe identical to the main audit. - BP climbs monotonically (+22-23pp); DFA peaks at the frozen baseline (k=0) and declines once any deep block is trained; FA shows partial/no net depth utility. - Cross-checks reproduce existing anchors (BP 0.617, DFA 0.301, FA 0.402, frozen 0.349). - frozen_init_identity_check quantifies frozen stack as a near-norm-preserving random feature map (per-block ||f||/||h||~0.10, stack cos 0.981), explaining the above-chance k=0 rung. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Diffstat (limited to 'results/depth_ladder/frozen_init_identity.json')
-rw-r--r--results/depth_ladder/frozen_init_identity.json57
1 files changed, 57 insertions, 0 deletions
diff --git a/results/depth_ladder/frozen_init_identity.json b/results/depth_ladder/frozen_init_identity.json
new file mode 100644
index 0000000..1c7048b
--- /dev/null
+++ b/results/depth_ladder/frozen_init_identity.json
@@ -0,0 +1,57 @@
+{
+ "config": {
+ "d_hidden": 256,
+ "L": 4,
+ "num_classes": 10,
+ "batch": 256,
+ "dataset": "cifar10-test",
+ "seeds": [
+ 42,
+ 123,
+ 456
+ ]
+ },
+ "per_seed": {
+ "42": {
+ "per_block_ratio": [
+ 0.09595257043838501,
+ 0.0955488458275795,
+ 0.09637212753295898,
+ 0.09818045794963837
+ ],
+ "rel_dev": 0.1959637552499771,
+ "cos": 0.9811521172523499
+ },
+ "123": {
+ "per_block_ratio": [
+ 0.09584859013557434,
+ 0.09690074622631073,
+ 0.10017187148332596,
+ 0.09818752110004425
+ ],
+ "rel_dev": 0.19837374985218048,
+ "cos": 0.9805399179458618
+ },
+ "456": {
+ "per_block_ratio": [
+ 0.09482444077730179,
+ 0.09799206256866455,
+ 0.09791108965873718,
+ 0.09693857282400131
+ ],
+ "rel_dev": 0.19332122802734375,
+ "cos": 0.9819751381874084
+ }
+ },
+ "per_block_ratio_mean": [
+ 0.09554186711708705,
+ 0.09681388487418492,
+ 0.09815169622500737,
+ 0.09776885062456131
+ ],
+ "per_block_ratio_grand_mean": 0.09706907471021016,
+ "rel_dev_mean": 0.19588624437650046,
+ "rel_dev_std": 0.0025271525773572136,
+ "cos_mean": 0.98122239112854,
+ "cos_std": 0.0007201861555822825
+} \ No newline at end of file