summaryrefslogtreecommitdiff
path: root/experiments/plot_depth_ladder.py
diff options
context:
space:
mode:
authorYurenHao0426 <Blackhao0426@gmail.com>2026-06-14 20:32:31 -0500
committerYurenHao0426 <Blackhao0426@gmail.com>2026-06-14 20:32:31 -0500
commit1118b7457c261de36ead6103503c00c321c75f9b (patch)
tree7ea76b32f070cb58458caaa2897a5d8133561f48 /experiments/plot_depth_ladder.py
parentaa73718eb6427d7da3b9cb416275802d90c4b2ed (diff)
Depth-utility ladder: trainable-block sweep (BP/FA/DFA) on ResMLP CIFAR-10HEADmaster
Appendix experiment triangulating the depth-utility diagnostic (D3) by varying the number of trainable residual blocks k (last-k trainable, first L-k frozen at init; embed/LN/head always trained). - d=256 L=4 and d=512 L=2, 3 seeds, recipe identical to the main audit. - BP climbs monotonically (+22-23pp); DFA peaks at the frozen baseline (k=0) and declines once any deep block is trained; FA shows partial/no net depth utility. - Cross-checks reproduce existing anchors (BP 0.617, DFA 0.301, FA 0.402, frozen 0.349). - frozen_init_identity_check quantifies frozen stack as a near-norm-preserving random feature map (per-block ||f||/||h||~0.10, stack cos 0.981), explaining the above-chance k=0 rung. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Diffstat (limited to 'experiments/plot_depth_ladder.py')
-rw-r--r--experiments/plot_depth_ladder.py63
1 files changed, 63 insertions, 0 deletions
diff --git a/experiments/plot_depth_ladder.py b/experiments/plot_depth_ladder.py
new file mode 100644
index 0000000..a5709bf
--- /dev/null
+++ b/experiments/plot_depth_ladder.py
@@ -0,0 +1,63 @@
+"""
+Plot the depth-utility ladder: test accuracy vs number of trainable blocks k,
+one curve per method (BP / FA / DFA), one panel per architecture.
+
+Usage:
+ python experiments/plot_depth_ladder.py
+"""
+import os, sys, json
+import numpy as np
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+
+CONFIGS = [
+ ('results/depth_ladder/ladder_d256_L4_cifar10.json', 'ResMLP d=256, L=4', 4),
+ ('results/depth_ladder/ladder_d512_L2_cifar10.json', 'ResMLP d=512, L=2', 2),
+]
+METHODS = [('bp', 'BP', 'tab:green', 'o'),
+ ('fa', 'FA', 'tab:orange', 's'),
+ ('dfa', 'DFA', 'tab:red', '^')]
+
+
+def agg(path, L):
+ d = json.load(open(path))['results']
+ out = {}
+ for m, _, _, _ in METHODS:
+ ks, mu, sd = [], [], []
+ for k in range(L + 1):
+ a = [v['final_acc'] for v in d[m][str(k)].values()]
+ ks.append(k); mu.append(np.mean(a))
+ sd.append(np.std(a, ddof=1) if len(a) > 1 else 0.0)
+ out[m] = (np.array(ks), np.array(mu), np.array(sd))
+ return out
+
+
+def main():
+ fig, axes = plt.subplots(1, len(CONFIGS), figsize=(11, 4.2))
+ if len(CONFIGS) == 1:
+ axes = [axes]
+ for ax, (path, title, L) in zip(axes, CONFIGS):
+ data = agg(path, L)
+ for m, label, color, mk in METHODS:
+ ks, mu, sd = data[m]
+ ax.errorbar(ks, mu, yerr=sd, marker=mk, color=color, label=label,
+ capsize=3, lw=2, ms=7)
+ # frozen baseline reference (k=0, averaged across methods is ~chance-of-readout)
+ ax.axhline(0.10, ls=':', color='gray', lw=1)
+ ax.text(0.02, 0.105, 'chance', color='gray', fontsize=8, transform=ax.get_yaxis_transform())
+ ax.set_xlabel('trainable blocks $k$ (last $k$ of $L$)')
+ ax.set_ylabel('CIFAR-10 test accuracy')
+ ax.set_title(title)
+ ax.set_xticks(range(L + 1))
+ ax.grid(alpha=0.3)
+ ax.legend(loc='center right')
+ fig.suptitle('Depth-utility ladder: does training deeper blocks raise accuracy?', y=1.02)
+ fig.tight_layout()
+ out = 'results/depth_ladder/depth_ladder.png'
+ fig.savefig(out, dpi=150, bbox_inches='tight')
+ print(f"Saved -> {out}")
+
+
+if __name__ == '__main__':
+ main()