From 1118b7457c261de36ead6103503c00c321c75f9b Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Sun, 14 Jun 2026 20:32:31 -0500 Subject: Depth-utility ladder: trainable-block sweep (BP/FA/DFA) on ResMLP CIFAR-10 Appendix experiment triangulating the depth-utility diagnostic (D3) by varying the number of trainable residual blocks k (last-k trainable, first L-k frozen at init; embed/LN/head always trained). - d=256 L=4 and d=512 L=2, 3 seeds, recipe identical to the main audit. - BP climbs monotonically (+22-23pp); DFA peaks at the frozen baseline (k=0) and declines once any deep block is trained; FA shows partial/no net depth utility. - Cross-checks reproduce existing anchors (BP 0.617, DFA 0.301, FA 0.402, frozen 0.349). - frozen_init_identity_check quantifies frozen stack as a near-norm-preserving random feature map (per-block ||f||/||h||~0.10, stack cos 0.981), explaining the above-chance k=0 rung. Co-Authored-By: Claude Opus 4.8 (1M context) --- experiments/plot_depth_ladder.py | 63 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 experiments/plot_depth_ladder.py (limited to 'experiments/plot_depth_ladder.py') diff --git a/experiments/plot_depth_ladder.py b/experiments/plot_depth_ladder.py new file mode 100644 index 0000000..a5709bf --- /dev/null +++ b/experiments/plot_depth_ladder.py @@ -0,0 +1,63 @@ +""" +Plot the depth-utility ladder: test accuracy vs number of trainable blocks k, +one curve per method (BP / FA / DFA), one panel per architecture. + +Usage: + python experiments/plot_depth_ladder.py +""" +import os, sys, json +import numpy as np +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt + +CONFIGS = [ + ('results/depth_ladder/ladder_d256_L4_cifar10.json', 'ResMLP d=256, L=4', 4), + ('results/depth_ladder/ladder_d512_L2_cifar10.json', 'ResMLP d=512, L=2', 2), +] +METHODS = [('bp', 'BP', 'tab:green', 'o'), + ('fa', 'FA', 'tab:orange', 's'), + ('dfa', 'DFA', 'tab:red', '^')] + + +def agg(path, L): + d = json.load(open(path))['results'] + out = {} + for m, _, _, _ in METHODS: + ks, mu, sd = [], [], [] + for k in range(L + 1): + a = [v['final_acc'] for v in d[m][str(k)].values()] + ks.append(k); mu.append(np.mean(a)) + sd.append(np.std(a, ddof=1) if len(a) > 1 else 0.0) + out[m] = (np.array(ks), np.array(mu), np.array(sd)) + return out + + +def main(): + fig, axes = plt.subplots(1, len(CONFIGS), figsize=(11, 4.2)) + if len(CONFIGS) == 1: + axes = [axes] + for ax, (path, title, L) in zip(axes, CONFIGS): + data = agg(path, L) + for m, label, color, mk in METHODS: + ks, mu, sd = data[m] + ax.errorbar(ks, mu, yerr=sd, marker=mk, color=color, label=label, + capsize=3, lw=2, ms=7) + # frozen baseline reference (k=0, averaged across methods is ~chance-of-readout) + ax.axhline(0.10, ls=':', color='gray', lw=1) + ax.text(0.02, 0.105, 'chance', color='gray', fontsize=8, transform=ax.get_yaxis_transform()) + ax.set_xlabel('trainable blocks $k$ (last $k$ of $L$)') + ax.set_ylabel('CIFAR-10 test accuracy') + ax.set_title(title) + ax.set_xticks(range(L + 1)) + ax.grid(alpha=0.3) + ax.legend(loc='center right') + fig.suptitle('Depth-utility ladder: does training deeper blocks raise accuracy?', y=1.02) + fig.tight_layout() + out = 'results/depth_ladder/depth_ladder.png' + fig.savefig(out, dpi=150, bbox_inches='tight') + print(f"Saved -> {out}") + + +if __name__ == '__main__': + main() -- cgit v1.2.3