From 1118b7457c261de36ead6103503c00c321c75f9b Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Sun, 14 Jun 2026 20:32:31 -0500 Subject: Depth-utility ladder: trainable-block sweep (BP/FA/DFA) on ResMLP CIFAR-10 Appendix experiment triangulating the depth-utility diagnostic (D3) by varying the number of trainable residual blocks k (last-k trainable, first L-k frozen at init; embed/LN/head always trained). - d=256 L=4 and d=512 L=2, 3 seeds, recipe identical to the main audit. - BP climbs monotonically (+22-23pp); DFA peaks at the frozen baseline (k=0) and declines once any deep block is trained; FA shows partial/no net depth utility. - Cross-checks reproduce existing anchors (BP 0.617, DFA 0.301, FA 0.402, frozen 0.349). - frozen_init_identity_check quantifies frozen stack as a near-norm-preserving random feature map (per-block ||f||/||h||~0.10, stack cos 0.981), explaining the above-chance k=0 rung. Co-Authored-By: Claude Opus 4.8 (1M context) --- run_depth_ladder.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100755 run_depth_ladder.sh (limited to 'run_depth_ladder.sh') diff --git a/run_depth_ladder.sh b/run_depth_ladder.sh new file mode 100755 index 0000000..e450b47 --- /dev/null +++ b/run_depth_ladder.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -e +cd /home/yurenh2/fa +echo "[$(date)] START primary d=256 L=4 ladder" +CUDA_VISIBLE_DEVICES=2 python3 experiments/depth_utility_ladder.py \ + --d_hidden 256 --num_blocks 4 --dataset cifar10 \ + --methods bp fa dfa --k_values 0 1 2 3 4 --seeds 42 123 456 \ + --epochs 100 --gpu 0 --output_dir results/depth_ladder +echo "[$(date)] START secondary d=512 L=2 FA-failure ladder" +CUDA_VISIBLE_DEVICES=2 python3 experiments/depth_utility_ladder.py \ + --d_hidden 512 --num_blocks 2 --dataset cifar10 \ + --methods bp fa dfa --k_values 0 1 2 --seeds 42 123 456 \ + --epochs 100 --gpu 0 --output_dir results/depth_ladder +echo "[$(date)] ALL DONE" -- cgit v1.2.3