From aa73718eb6427d7da3b9cb416275802d90c4b2ed Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Sun, 14 Jun 2026 04:06:32 -0500 Subject: Add new experiment scripts, figures, and paper assets; untrack pyc/build artifacts Co-Authored-By: Claude Opus 4.8 (1M context) --- .claude/scheduled_tasks.lock | 1 + .gitignore | 3 + CLAUDE.md | 45 +- FAEval__NeurIPS_template_final.pdf | Bin 0 -> 561851 bytes TRAINING_INVENTORY.md | 115 + experiments/__pycache__/__init__.cpython-313.pyc | Bin 137 -> 0 bytes experiments/__pycache__/toy_lq.cpython-313.pyc | Bin 19620 -> 0 bytes experiments/analyze_snapshot_evolution.py | 60 + experiments/dfa_penalty_freshB.py | 183 + experiments/dfa_penalty_trajectory.py | 135 + experiments/figure_snapshot_evolution.py | 178 + experiments/frozen_baselines_crossarch.py | 191 + experiments/resnet_frozen_blocks_baseline.py | 278 + experiments/resnet_protocol_validation.py | 343 ++ experiments/snapshot_compare_outln.py | 93 + experiments/snapshot_evolution_no_outln.py | 249 + .../snapshot_evolution_residual_explosion.py | 78 + experiments/snapshot_evolution_vit.py | 244 + experiments/snapshot_fa_crossarch.py | 243 + experiments/snapshot_fa_only.py | 38 + experiments/snapshot_fa_studentnet.py | 94 + experiments/snapshot_synth_residual_explosion.py | 195 + experiments/vit_frozen_blocks_baseline.py | 177 + experiments/vit_shallow_baseline.py | 147 + metrics/__pycache__/__init__.cpython-313.pyc | Bin 133 -> 0 bytes metrics/__pycache__/credit_metrics.cpython-313.pyc | Bin 6418 -> 0 bytes models/__pycache__/__init__.cpython-313.pyc | Bin 132 -> 0 bytes models/__pycache__/residual_mlp.cpython-313.pyc | Bin 4692 -> 0 bytes models/__pycache__/state_bridge.cpython-313.pyc | Bin 2468 -> 0 bytes models/__pycache__/value_net.cpython-313.pyc | Bin 5308 -> 0 bytes models/small_resnet.py | 74 + models/vit_mini.py | 109 + paper/figures/3arc.pdf | Bin 0 -> 22447 bytes paper/figures/3arcnew.pdf | Bin 0 -> 22454 bytes paper/figures/3arcnew_cropped.pdf | Bin 0 -> 58401 bytes paper/figures/fig1_audit_hero.pdf | Bin 0 -> 54506 bytes paper/figures/fig1_audit_hero.png | Bin 0 -> 246321 bytes paper/figures/fig1_combined.pdf | Bin 0 -> 458517 bytes paper/figures/fig1_combined.png | Bin 0 -> 529831 bytes paper/figures/fig1_panels_abc.pdf | Bin 0 -> 47025 bytes paper/figures/fig3a_temporal_resmlp.pdf | Bin 0 -> 31175 bytes paper/figures/fig3a_temporal_resmlp.png | Bin 0 -> 150113 bytes paper/figures/fig3b_crossarch_3row.pdf | Bin 0 -> 52139 bytes paper/figures/fig3b_crossarch_3row.png | Bin 0 -> 380161 bytes paper/figures/fig3b_temporal_crossarch.pdf | Bin 0 -> 40564 bytes paper/figures/fig3b_temporal_crossarch.png | Bin 0 -> 254005 bytes paper/figures/fig4_penalty_rescue.pdf | Bin 34201 -> 37235 bytes paper/figures/fig4_penalty_rescue.png | Bin 0 -> 168931 bytes paper/figures/fig_d512L2_panelA.pdf | Bin 0 -> 30850 bytes paper/figures/fig_d512L2_panelA.png | Bin 0 -> 73483 bytes paper/figures/fig_nooutln_temporal.pdf | Bin 0 -> 30888 bytes paper/figures/fig_nooutln_temporal.png | Bin 0 -> 179525 bytes paper/figures/render_fig1_audit_hero.py | 210 + paper/figures/render_fig3_temporal.py | 192 + paper/figures/render_fig3b_crossarch_3row.py | 123 + paper/figures/render_fig4_penalty.py | 167 + paper/figures/render_fig_d512L2_panelA.py | 92 + paper/figures/render_fig_nooutln_temporal.py | 96 + paper/main.log | 595 ++ .../alpha_sweep_scaffold/alpha_sweep_t5_s42.json | 6489 ++++++++++++++++++++ results/audit_d512_L2_4method.log | 16 + results/audit_d512_L2_6method.log | 35 + results/blend_dissection/dissection_t5_s42.json | 4311 +++++++++++++ .../ablation_a1.0_L4_s42.json | 90 + .../ablation_a1.0_L4_s123.json | 46 + .../ablation_a1.0_L4_s456.json | 46 + .../ablation_a1.0_L4_s42.json | 90 + .../ablation_a1.0_L4_s42.json | 90 + .../ablation_a1.0_L4_s42.json | 90 + results/bp_with_penalty_run.log | 18 + results/checkpointed_handoff/handoff_s42.json | 5828 ++++++++++++++++++ results/cifar100_protocol_validation.log | 371 ++ .../results_cifar100.json | 3467 +++++++++++ results/cifar100_scan.log | 179 +- results/cifar_deltaL/cb_deltaL_d512_L4_s42.json | 428 ++ results/cifar_depth_scan_multiseed.log | 81 + .../cifar_depth_scan_multiseed/d512_L4_s123.json | 838 +++ .../cifar_depth_scan_multiseed/d512_L4_s456.json | 838 +++ results/cifar_depth_scan_multiseed/summary.json | 78 + results/cifar_depth_scan_s42/d512_L12_s42.json | 1421 +++++ results/cifar_depth_scan_s42/d512_L2_s42.json | 1151 ++++ results/cifar_depth_scan_s42/d512_L4_s42.json | 1205 ++++ results/cifar_depth_scan_s42/d512_L6_s42.json | 1259 ++++ results/cifar_depth_scan_s42/d512_L8_s42.json | 1313 ++++ results/cifar_depth_scan_s42/summary.json | 186 + .../confirmatory/T1_cifar_full_metrics_gpu1.csv | 41 + results/confirmatory/T2_support_sparsity_FIXED.csv | 961 +++ results/confirmatory/T2_support_sparsity_gpu1.csv | 801 +++ results/confirmatory/T4_active_gamma_gpu1.csv | 101 + .../element_grad_concentration_gpu1.csv | 49 + results/deeper_L_multiseed_scan.log | 1090 ++++ results/dfa_canonical_freshB.log | 13 + .../freshB_null_canonical_s42.json | 56 + .../results_cifar10.json | 549 ++ .../results_cifar10.json | 549 ++ results/dfa_canonical_penalty_sweep.log | 77 + results/dfa_canonical_penalty_trajectory.json | 1700 +++++ results/dfa_canonical_penalty_trajectory.log | 47 + results/dfa_direction_quality_run.log | 3 + results/dfa_pen_short_lam1e-1.log | 15 + results/dfa_pen_short_lam1e-4.log | 15 + results/dfa_pen_short_lam1e-4_s123.log | 15 + results/dfa_pen_short_lam1e-4_s456.log | 15 + results/dfa_pen_short_run.log | 15 + results/dfa_pen_short_s123.log | 15 + results/dfa_pen_short_s456.log | 15 + .../dfa_residual_penalty/dfa_pen_lam0.001_s42.json | 85 + .../dfa_residual_penalty/dfa_pen_lam0.01_s123.json | 85 + .../dfa_residual_penalty/dfa_pen_lam0.01_s42.json | 85 + .../dfa_residual_penalty/dfa_pen_lam0.01_s456.json | 85 + results/dfa_residual_penalty/run_lam1e-1_s42.log | 7 + results/dfa_residual_penalty/run_lam1e-2_s123.log | 21 + results/dfa_residual_penalty/run_lam1e-2_s42.log | 21 + results/dfa_residual_penalty/run_lam1e-2_s456.log | 21 + results/dfa_residual_penalty/run_lam1e-3_s123.log | 7 + results/dfa_residual_penalty/run_lam1e-3_s42.log | 21 + results/dfa_residual_penalty/run_lam1e-3_s456.log | 7 + results/ep_baseline/ep_s2048.json | 23 + .../exploit_linesearch/linesearch_L4_d256_s42.json | 112 + .../linesearch_L4_d256_s42.json | 1850 ++++++ .../fa_canonical_lam1e-2_30ep/results_cifar10.json | 549 ++ .../fa_canonical_lam1e-4_30ep/results_cifar10.json | 549 ++ results/fa_canonical_penalty_sweep.log | 76 + results/figures/figure_snapshot_resmlp.pdf | Bin 0 -> 40868 bytes results/figures/figure_snapshot_vit.pdf | Bin 0 -> 28184 bytes .../freeze_with_decay_t5_s42.json | 5762 +++++++++++++++++ results/frozen_baselines_crossarch.json | 14 + results/frozen_baselines_crossarch.log | 120 + results/frozen_cifar/frozen_L4_d256_s42.json | 174 + results/frozen_cifar/frozen_L4_d512_s42.json | 174 + results/frozen_cifar/frozen_L6_d256_s42.json | 216 + .../frozen_cifar_vec/frozen_vec_L4_d256_s42.json | 188 + results/h2_no_residual_full_s123.log | 107 + .../snapshot_evolution_s123.json | 2749 +++++++++ results/h2_no_residual_full_s42.log | 210 + .../snapshot_evolution_s42.json | 5475 +++++++++++++++++ results/h2_no_residual_full_s456.log | 107 + .../snapshot_evolution_s456.json | 2749 +++++++++ results/h2_no_residual_s42.log | 16 + results/h2_smoke_w0.1/snapshot_evolution_s42.json | 237 + results/h2_smoke_w0.2/snapshot_evolution_s42.json | 237 + results/h2_smoke_w0.5/snapshot_evolution_s42.json | 237 + results/h2_smoketest/snapshot_evolution_s42.json | 129 + .../minimal_aux_compression_t5_s42.json | 5768 +++++++++++++++++ results/online_shallow/scan_s42.json | 422 ++ results/online_shallow_3seed/scan_s123.json | 62 + results/online_shallow_3seed/scan_s42.json | 62 + results/online_shallow_3seed/scan_s456.json | 62 + results/online_vec_pilot/pilot_s42.json | 177 + results/optionA_random_targets_s42.log | 107 + .../snapshot_evolution_s42.json | 2749 +++++++++ results/optionA_smoke/snapshot_evolution_s42.json | 130 + results/optionEP_random_targets_full.log | 7 + .../ep_random_s42.json | 23 + results/optionEP_smoke/ep_random_s42.json | 23 + results/optionSBCB_random_targets_s42.log | 36 + .../results_cifar10.json | 996 +++ results/optionSBCB_smoke/results_cifar10.json | 220 + results/periodic_refit/periodic_refit_s42.json | 414 ++ results/prefit_threshold/prefit_curve_t5_s42.json | 330 + results/resmlp_frozen_blocks_s123.log | 73 + results/resmlp_frozen_blocks_s42.log | 73 + results/resmlp_frozen_blocks_s456.log | 73 + results/resnet_baseline_s42.log | 6 + results/resnet_frozen_blocks_s42.log | 5 + results/resnet_protocol_validation.json | 2999 +++++++++ results/resnet_protocol_validation.log | 161 + results/round38_cb_penalty_30ep_s123.log | 15 + .../results_cifar10.json | 233 + results/round38_cb_penalty_30ep_s456.log | 15 + .../results_cifar10.json | 233 + results/round38_sb_penalty_30ep_s123.log | 14 + .../results_cifar10.json | 233 + results/round38_sb_penalty_30ep_s456.log | 14 + .../results_cifar10.json | 233 + results/round38_sbcb_baseline_30ep.log | 22 + .../results_cifar10.json | 437 ++ results/round38_sbcb_penalty_30ep.log | 22 + .../round38_sbcb_penalty_30ep/results_cifar10.json | 437 ++ .../round38_smoke_sbcb_pen/results_cifar10.json | 213 + results/round41_dfa_penalty_30ep.log | 14 + .../round41_dfa_penalty_30ep/results_cifar10.json | 201 + results/round41_dfa_penalty_30ep_s123.log | 14 + .../results_cifar10.json | 201 + results/round41_dfa_penalty_30ep_s456.log | 14 + .../results_cifar10.json | 201 + results/scaffold_replication/replication.json | 38 + results/schedule_timing/schedules_s42.json | 2018 ++++++ results/snapshot_evolution_v2/run_s123.log | 210 + results/snapshot_evolution_v2/run_s42.log | 210 + results/snapshot_evolution_v2/run_s456.log | 210 + .../snapshot_evolution_s123.json | 5473 +++++++++++++++++ .../snapshot_evolution_s42.json | 5473 +++++++++++++++++ .../snapshot_evolution_s456.json | 5473 +++++++++++++++++ .../snapshot_fa_canonical_s42.json | 2734 +++++++++ results/snapshot_evolution_v2/snapshot_fa_s42.json | 2734 +++++++++ results/snapshot_evolution_v3_fa.log | 1 + results/snapshot_exploit/snapshot_L4_d256_s42.json | 209 + results/snapshot_fa_canonical_all.log | 142 + results/snapshot_fa_canonical_noln.log | 13 + results/snapshot_fa_crossarch.log | 28 + results/snapshot_fa_evolution.log | 103 + results/snapshot_fa_studentnet_vit.log | 37 + results/snapshot_no_outln_v1/run_s123.log | 51 + results/snapshot_no_outln_v1/run_s42.log | 51 + results/snapshot_no_outln_v1/run_s456.log | 51 + .../snapshot_fa_canonical_noln_s42.json | 1824 ++++++ .../snapshot_no_outln_v1/snapshot_fa_noln_s42.json | 1824 ++++++ .../snapshot_no_outln_v1/snapshot_noLN_s123.json | 4766 ++++++++++++++ .../snapshot_no_outln_v1/snapshot_noLN_s42.json | 4766 ++++++++++++++ .../snapshot_no_outln_v1/snapshot_noLN_s456.json | 4766 ++++++++++++++ results/snapshot_synth_v1/run_a1.0_s42.log | 44 + .../snapshot_fa_canonical_s42.json | 2276 +++++++ .../snapshot_fa_synth_a1.0_L4_s42.json | 2276 +++++++ .../snapshot_synth_a1.0_L4_s42.json | 4960 +++++++++++++++ results/snapshot_time/time_sweep_L4_d256_s42.json | 128 + results/snapshot_vit_v1/run_s123.log | 36 + results/snapshot_vit_v1/run_s42.log | 36 + results/snapshot_vit_v1/run_s456.log | 36 + .../snapshot_vit_v1/snapshot_fa_canonical_s42.json | 1104 ++++ results/snapshot_vit_v1/snapshot_fa_vit_s42.json | 1104 ++++ results/snapshot_vit_v1/snapshot_vit_s123.json | 4594 ++++++++++++++ results/snapshot_vit_v1/snapshot_vit_s42.json | 4594 ++++++++++++++ results/snapshot_vit_v1/snapshot_vit_s456.json | 4594 ++++++++++++++ results/structured_aux/structured_aux_t5_s42.json | 6481 +++++++++++++++++++ results/synth_ladder_smoke/config.json | 30 + results/synth_ladder_smoke/summary.json | 782 +++ results/synth_ladder_smoke/synth_a0.0_L2_s42.json | 1172 ++++ results/synth_ladder_smoke/synth_a0.0_L8_s42.json | 1298 ++++ results/synth_ladder_smoke/synth_a0.5_L2_s42.json | 1172 ++++ results/synth_ladder_smoke/synth_a0.5_L8_s42.json | 1298 ++++ results/synth_ladder_smoke/synth_a1.0_L2_s42.json | 1172 ++++ results/synth_ladder_smoke/synth_a1.0_L8_s42.json | 1298 ++++ results/synth_ladder_v2_hi/config.json | 33 + results/synth_ladder_v2_hi/summary.json | 3590 +++++++++++ .../synth_ladder_v2_hi/synth_a0.5_L12_s123.json | 1722 ++++++ results/synth_ladder_v2_hi/synth_a0.5_L12_s42.json | 1722 ++++++ .../synth_ladder_v2_hi/synth_a0.5_L12_s456.json | 1722 ++++++ results/synth_ladder_v2_hi/synth_a0.5_L2_s123.json | 1512 +++++ results/synth_ladder_v2_hi/synth_a0.5_L2_s42.json | 1512 +++++ results/synth_ladder_v2_hi/synth_a0.5_L2_s456.json | 1512 +++++ results/synth_ladder_v2_hi/synth_a0.5_L4_s123.json | 1554 +++++ results/synth_ladder_v2_hi/synth_a0.5_L4_s42.json | 1554 +++++ results/synth_ladder_v2_hi/synth_a0.5_L4_s456.json | 1554 +++++ results/synth_ladder_v2_hi/synth_a0.5_L8_s123.json | 1638 +++++ results/synth_ladder_v2_hi/synth_a0.5_L8_s42.json | 1638 +++++ results/synth_ladder_v2_hi/synth_a0.5_L8_s456.json | 1638 +++++ .../synth_ladder_v2_hi/synth_a1.0_L12_s123.json | 1722 ++++++ results/synth_ladder_v2_hi/synth_a1.0_L12_s42.json | 1722 ++++++ .../synth_ladder_v2_hi/synth_a1.0_L12_s456.json | 1722 ++++++ results/synth_ladder_v2_hi/synth_a1.0_L2_s123.json | 1512 +++++ results/synth_ladder_v2_hi/synth_a1.0_L2_s42.json | 1512 +++++ results/synth_ladder_v2_hi/synth_a1.0_L2_s456.json | 1512 +++++ results/synth_ladder_v2_hi/synth_a1.0_L4_s123.json | 1554 +++++ results/synth_ladder_v2_hi/synth_a1.0_L4_s42.json | 1554 +++++ results/synth_ladder_v2_hi/synth_a1.0_L4_s456.json | 1554 +++++ results/synth_ladder_v2_hi/synth_a1.0_L8_s123.json | 1638 +++++ results/synth_ladder_v2_hi/synth_a1.0_L8_s42.json | 1638 +++++ results/synth_ladder_v2_hi/synth_a1.0_L8_s456.json | 1638 +++++ results/synth_ladder_v2_lo/config.json | 33 + results/synth_ladder_v2_lo/summary.json | 3590 +++++++++++ .../synth_ladder_v2_lo/synth_a0.0_L12_s123.json | 1722 ++++++ results/synth_ladder_v2_lo/synth_a0.0_L12_s42.json | 1722 ++++++ .../synth_ladder_v2_lo/synth_a0.0_L12_s456.json | 1722 ++++++ results/synth_ladder_v2_lo/synth_a0.0_L2_s123.json | 1512 +++++ results/synth_ladder_v2_lo/synth_a0.0_L2_s42.json | 1512 +++++ results/synth_ladder_v2_lo/synth_a0.0_L2_s456.json | 1512 +++++ results/synth_ladder_v2_lo/synth_a0.0_L4_s123.json | 1554 +++++ results/synth_ladder_v2_lo/synth_a0.0_L4_s42.json | 1554 +++++ results/synth_ladder_v2_lo/synth_a0.0_L4_s456.json | 1554 +++++ results/synth_ladder_v2_lo/synth_a0.0_L8_s123.json | 1638 +++++ results/synth_ladder_v2_lo/synth_a0.0_L8_s42.json | 1638 +++++ results/synth_ladder_v2_lo/synth_a0.0_L8_s456.json | 1638 +++++ .../synth_ladder_v2_lo/synth_a0.25_L12_s123.json | 1722 ++++++ .../synth_ladder_v2_lo/synth_a0.25_L12_s42.json | 1722 ++++++ .../synth_ladder_v2_lo/synth_a0.25_L12_s456.json | 1722 ++++++ .../synth_ladder_v2_lo/synth_a0.25_L2_s123.json | 1512 +++++ results/synth_ladder_v2_lo/synth_a0.25_L2_s42.json | 1512 +++++ .../synth_ladder_v2_lo/synth_a0.25_L2_s456.json | 1512 +++++ .../synth_ladder_v2_lo/synth_a0.25_L4_s123.json | 1554 +++++ results/synth_ladder_v2_lo/synth_a0.25_L4_s42.json | 1554 +++++ .../synth_ladder_v2_lo/synth_a0.25_L4_s456.json | 1554 +++++ .../synth_ladder_v2_lo/synth_a0.25_L8_s123.json | 1638 +++++ results/synth_ladder_v2_lo/synth_a0.25_L8_s42.json | 1638 +++++ .../synth_ladder_v2_lo/synth_a0.25_L8_s456.json | 1638 +++++ results/topdown_curriculum/topdown_s42.json | 620 ++ results/update_swap/update_swap_L4_d256_s42.json | 326 + results/vector_audit/audit_results.json | 184 + results/vector_audit_full/audit_results.json | 1346 ++++ results/vector_credit_pilot/results.json | 590 ++ results/vit_dfa_train_s42.log | 4 + results/vit_frozen_blocks_s123.log | 34 + results/vit_frozen_blocks_s42.log | 34 + results/vit_frozen_blocks_s456.log | 34 + results/vit_shallow_baseline_s42.log | 28 + results/vit_test/snapshot_vit_s42.json | 244 + 296 files changed, 252591 insertions(+), 4 deletions(-) create mode 100644 .claude/scheduled_tasks.lock create mode 100644 FAEval__NeurIPS_template_final.pdf create mode 100644 TRAINING_INVENTORY.md delete mode 100644 experiments/__pycache__/__init__.cpython-313.pyc delete mode 100644 experiments/__pycache__/toy_lq.cpython-313.pyc create mode 100644 experiments/analyze_snapshot_evolution.py create mode 100644 experiments/dfa_penalty_freshB.py create mode 100644 experiments/dfa_penalty_trajectory.py create mode 100644 experiments/figure_snapshot_evolution.py create mode 100644 experiments/frozen_baselines_crossarch.py create mode 100644 experiments/resnet_frozen_blocks_baseline.py create mode 100644 experiments/resnet_protocol_validation.py create mode 100644 experiments/snapshot_compare_outln.py create mode 100644 experiments/snapshot_evolution_no_outln.py create mode 100644 experiments/snapshot_evolution_vit.py create mode 100644 experiments/snapshot_fa_crossarch.py create mode 100644 experiments/snapshot_fa_only.py create mode 100644 experiments/snapshot_fa_studentnet.py create mode 100644 experiments/snapshot_synth_residual_explosion.py create mode 100644 experiments/vit_frozen_blocks_baseline.py create mode 100644 experiments/vit_shallow_baseline.py delete mode 100644 metrics/__pycache__/__init__.cpython-313.pyc delete mode 100644 metrics/__pycache__/credit_metrics.cpython-313.pyc delete mode 100644 models/__pycache__/__init__.cpython-313.pyc delete mode 100644 models/__pycache__/residual_mlp.cpython-313.pyc delete mode 100644 models/__pycache__/state_bridge.cpython-313.pyc delete mode 100644 models/__pycache__/value_net.cpython-313.pyc create mode 100644 models/small_resnet.py create mode 100644 models/vit_mini.py create mode 100644 paper/figures/3arc.pdf create mode 100644 paper/figures/3arcnew.pdf create mode 100644 paper/figures/3arcnew_cropped.pdf create mode 100644 paper/figures/fig1_audit_hero.pdf create mode 100644 paper/figures/fig1_audit_hero.png create mode 100644 paper/figures/fig1_combined.pdf create mode 100644 paper/figures/fig1_combined.png create mode 100644 paper/figures/fig1_panels_abc.pdf create mode 100644 paper/figures/fig3a_temporal_resmlp.pdf create mode 100644 paper/figures/fig3a_temporal_resmlp.png create mode 100644 paper/figures/fig3b_crossarch_3row.pdf create mode 100644 paper/figures/fig3b_crossarch_3row.png create mode 100644 paper/figures/fig3b_temporal_crossarch.pdf create mode 100644 paper/figures/fig3b_temporal_crossarch.png create mode 100644 paper/figures/fig4_penalty_rescue.png create mode 100644 paper/figures/fig_d512L2_panelA.pdf create mode 100644 paper/figures/fig_d512L2_panelA.png create mode 100644 paper/figures/fig_nooutln_temporal.pdf create mode 100644 paper/figures/fig_nooutln_temporal.png create mode 100644 paper/figures/render_fig1_audit_hero.py create mode 100644 paper/figures/render_fig3_temporal.py create mode 100644 paper/figures/render_fig3b_crossarch_3row.py create mode 100644 paper/figures/render_fig4_penalty.py create mode 100644 paper/figures/render_fig_d512L2_panelA.py create mode 100644 paper/figures/render_fig_nooutln_temporal.py create mode 100644 paper/main.log create mode 100644 results/alpha_sweep_scaffold/alpha_sweep_t5_s42.json create mode 100644 results/audit_d512_L2_4method.log create mode 100644 results/audit_d512_L2_6method.log create mode 100644 results/blend_dissection/dissection_t5_s42.json create mode 100644 results/boundary_ablation_deltaL_wr/ablation_a1.0_L4_s42.json create mode 100644 results/boundary_ablation_s123/ablation_a1.0_L4_s123.json create mode 100644 results/boundary_ablation_s456/ablation_a1.0_L4_s456.json create mode 100644 results/boundary_ablation_s_sweep/ablation_a1.0_L4_s42.json create mode 100644 results/boundary_ablation_tgw_sweep/ablation_a1.0_L4_s42.json create mode 100644 results/boundary_ablation_wr_sweep/ablation_a1.0_L4_s42.json create mode 100644 results/bp_with_penalty_run.log create mode 100644 results/checkpointed_handoff/handoff_s42.json create mode 100644 results/cifar100_protocol_validation.log create mode 100644 results/cifar100_protocol_validation/results_cifar100.json create mode 100644 results/cifar_deltaL/cb_deltaL_d512_L4_s42.json create mode 100644 results/cifar_depth_scan_multiseed.log create mode 100644 results/cifar_depth_scan_multiseed/d512_L4_s123.json create mode 100644 results/cifar_depth_scan_multiseed/d512_L4_s456.json create mode 100644 results/cifar_depth_scan_multiseed/summary.json create mode 100644 results/cifar_depth_scan_s42/d512_L12_s42.json create mode 100644 results/cifar_depth_scan_s42/d512_L2_s42.json create mode 100644 results/cifar_depth_scan_s42/d512_L4_s42.json create mode 100644 results/cifar_depth_scan_s42/d512_L6_s42.json create mode 100644 results/cifar_depth_scan_s42/d512_L8_s42.json create mode 100644 results/cifar_depth_scan_s42/summary.json create mode 100644 results/confirmatory/T1_cifar_full_metrics_gpu1.csv create mode 100644 results/confirmatory/T2_support_sparsity_FIXED.csv create mode 100644 results/confirmatory/T2_support_sparsity_gpu1.csv create mode 100644 results/confirmatory/T4_active_gamma_gpu1.csv create mode 100644 results/confirmatory/element_grad_concentration_gpu1.csv create mode 100644 results/deeper_L_multiseed_scan.log create mode 100644 results/dfa_canonical_freshB.log create mode 100644 results/dfa_canonical_freshB/freshB_null_canonical_s42.json create mode 100644 results/dfa_canonical_lam1e-2_30ep/results_cifar10.json create mode 100644 results/dfa_canonical_lam1e-4_30ep/results_cifar10.json create mode 100644 results/dfa_canonical_penalty_sweep.log create mode 100644 results/dfa_canonical_penalty_trajectory.json create mode 100644 results/dfa_canonical_penalty_trajectory.log create mode 100644 results/dfa_direction_quality_run.log create mode 100644 results/dfa_pen_short_lam1e-1.log create mode 100644 results/dfa_pen_short_lam1e-4.log create mode 100644 results/dfa_pen_short_lam1e-4_s123.log create mode 100644 results/dfa_pen_short_lam1e-4_s456.log create mode 100644 results/dfa_pen_short_run.log create mode 100644 results/dfa_pen_short_s123.log create mode 100644 results/dfa_pen_short_s456.log create mode 100644 results/dfa_residual_penalty/dfa_pen_lam0.001_s42.json create mode 100644 results/dfa_residual_penalty/dfa_pen_lam0.01_s123.json create mode 100644 results/dfa_residual_penalty/dfa_pen_lam0.01_s42.json create mode 100644 results/dfa_residual_penalty/dfa_pen_lam0.01_s456.json create mode 100644 results/dfa_residual_penalty/run_lam1e-1_s42.log create mode 100644 results/dfa_residual_penalty/run_lam1e-2_s123.log create mode 100644 results/dfa_residual_penalty/run_lam1e-2_s42.log create mode 100644 results/dfa_residual_penalty/run_lam1e-2_s456.log create mode 100644 results/dfa_residual_penalty/run_lam1e-3_s123.log create mode 100644 results/dfa_residual_penalty/run_lam1e-3_s42.log create mode 100644 results/dfa_residual_penalty/run_lam1e-3_s456.log create mode 100644 results/ep_baseline/ep_s2048.json create mode 100644 results/exploit_linesearch/linesearch_L4_d256_s42.json create mode 100644 results/exploit_linesearch_full/linesearch_L4_d256_s42.json create mode 100644 results/fa_canonical_lam1e-2_30ep/results_cifar10.json create mode 100644 results/fa_canonical_lam1e-4_30ep/results_cifar10.json create mode 100644 results/fa_canonical_penalty_sweep.log create mode 100644 results/figures/figure_snapshot_resmlp.pdf create mode 100644 results/figures/figure_snapshot_vit.pdf create mode 100644 results/freeze_with_decay/freeze_with_decay_t5_s42.json create mode 100644 results/frozen_baselines_crossarch.json create mode 100644 results/frozen_baselines_crossarch.log create mode 100644 results/frozen_cifar/frozen_L4_d256_s42.json create mode 100644 results/frozen_cifar/frozen_L4_d512_s42.json create mode 100644 results/frozen_cifar/frozen_L6_d256_s42.json create mode 100644 results/frozen_cifar_vec/frozen_vec_L4_d256_s42.json create mode 100644 results/h2_no_residual_full_s123.log create mode 100644 results/h2_no_residual_full_s123/snapshot_evolution_s123.json create mode 100644 results/h2_no_residual_full_s42.log create mode 100644 results/h2_no_residual_full_s42/snapshot_evolution_s42.json create mode 100644 results/h2_no_residual_full_s456.log create mode 100644 results/h2_no_residual_full_s456/snapshot_evolution_s456.json create mode 100644 results/h2_no_residual_s42.log create mode 100644 results/h2_smoke_w0.1/snapshot_evolution_s42.json create mode 100644 results/h2_smoke_w0.2/snapshot_evolution_s42.json create mode 100644 results/h2_smoke_w0.5/snapshot_evolution_s42.json create mode 100644 results/h2_smoketest/snapshot_evolution_s42.json create mode 100644 results/minimal_aux_compression/minimal_aux_compression_t5_s42.json create mode 100644 results/online_shallow/scan_s42.json create mode 100644 results/online_shallow_3seed/scan_s123.json create mode 100644 results/online_shallow_3seed/scan_s42.json create mode 100644 results/online_shallow_3seed/scan_s456.json create mode 100644 results/online_vec_pilot/pilot_s42.json create mode 100644 results/optionA_random_targets_s42.log create mode 100644 results/optionA_random_targets_s42/snapshot_evolution_s42.json create mode 100644 results/optionA_smoke/snapshot_evolution_s42.json create mode 100644 results/optionEP_random_targets_full.log create mode 100644 results/optionEP_random_targets_full/ep_random_s42.json create mode 100644 results/optionEP_smoke/ep_random_s42.json create mode 100644 results/optionSBCB_random_targets_s42.log create mode 100644 results/optionSBCB_random_targets_s42/results_cifar10.json create mode 100644 results/optionSBCB_smoke/results_cifar10.json create mode 100644 results/periodic_refit/periodic_refit_s42.json create mode 100644 results/prefit_threshold/prefit_curve_t5_s42.json create mode 100644 results/resmlp_frozen_blocks_s123.log create mode 100644 results/resmlp_frozen_blocks_s42.log create mode 100644 results/resmlp_frozen_blocks_s456.log create mode 100644 results/resnet_baseline_s42.log create mode 100644 results/resnet_frozen_blocks_s42.log create mode 100644 results/resnet_protocol_validation.json create mode 100644 results/resnet_protocol_validation.log create mode 100644 results/round38_cb_penalty_30ep_s123.log create mode 100644 results/round38_cb_penalty_30ep_s123/results_cifar10.json create mode 100644 results/round38_cb_penalty_30ep_s456.log create mode 100644 results/round38_cb_penalty_30ep_s456/results_cifar10.json create mode 100644 results/round38_sb_penalty_30ep_s123.log create mode 100644 results/round38_sb_penalty_30ep_s123/results_cifar10.json create mode 100644 results/round38_sb_penalty_30ep_s456.log create mode 100644 results/round38_sb_penalty_30ep_s456/results_cifar10.json create mode 100644 results/round38_sbcb_baseline_30ep.log create mode 100644 results/round38_sbcb_baseline_30ep/results_cifar10.json create mode 100644 results/round38_sbcb_penalty_30ep.log create mode 100644 results/round38_sbcb_penalty_30ep/results_cifar10.json create mode 100644 results/round38_smoke_sbcb_pen/results_cifar10.json create mode 100644 results/round41_dfa_penalty_30ep.log create mode 100644 results/round41_dfa_penalty_30ep/results_cifar10.json create mode 100644 results/round41_dfa_penalty_30ep_s123.log create mode 100644 results/round41_dfa_penalty_30ep_s123/results_cifar10.json create mode 100644 results/round41_dfa_penalty_30ep_s456.log create mode 100644 results/round41_dfa_penalty_30ep_s456/results_cifar10.json create mode 100644 results/scaffold_replication/replication.json create mode 100644 results/schedule_timing/schedules_s42.json create mode 100644 results/snapshot_evolution_v2/run_s123.log create mode 100644 results/snapshot_evolution_v2/run_s42.log create mode 100644 results/snapshot_evolution_v2/run_s456.log create mode 100644 results/snapshot_evolution_v2/snapshot_evolution_s123.json create mode 100644 results/snapshot_evolution_v2/snapshot_evolution_s42.json create mode 100644 results/snapshot_evolution_v2/snapshot_evolution_s456.json create mode 100644 results/snapshot_evolution_v2/snapshot_fa_canonical_s42.json create mode 100644 results/snapshot_evolution_v2/snapshot_fa_s42.json create mode 100644 results/snapshot_evolution_v3_fa.log create mode 100644 results/snapshot_exploit/snapshot_L4_d256_s42.json create mode 100644 results/snapshot_fa_canonical_all.log create mode 100644 results/snapshot_fa_canonical_noln.log create mode 100644 results/snapshot_fa_crossarch.log create mode 100644 results/snapshot_fa_evolution.log create mode 100644 results/snapshot_fa_studentnet_vit.log create mode 100644 results/snapshot_no_outln_v1/run_s123.log create mode 100644 results/snapshot_no_outln_v1/run_s42.log create mode 100644 results/snapshot_no_outln_v1/run_s456.log create mode 100644 results/snapshot_no_outln_v1/snapshot_fa_canonical_noln_s42.json create mode 100644 results/snapshot_no_outln_v1/snapshot_fa_noln_s42.json create mode 100644 results/snapshot_no_outln_v1/snapshot_noLN_s123.json create mode 100644 results/snapshot_no_outln_v1/snapshot_noLN_s42.json create mode 100644 results/snapshot_no_outln_v1/snapshot_noLN_s456.json create mode 100644 results/snapshot_synth_v1/run_a1.0_s42.log create mode 100644 results/snapshot_synth_v1/snapshot_fa_canonical_s42.json create mode 100644 results/snapshot_synth_v1/snapshot_fa_synth_a1.0_L4_s42.json create mode 100644 results/snapshot_synth_v1/snapshot_synth_a1.0_L4_s42.json create mode 100644 results/snapshot_time/time_sweep_L4_d256_s42.json create mode 100644 results/snapshot_vit_v1/run_s123.log create mode 100644 results/snapshot_vit_v1/run_s42.log create mode 100644 results/snapshot_vit_v1/run_s456.log create mode 100644 results/snapshot_vit_v1/snapshot_fa_canonical_s42.json create mode 100644 results/snapshot_vit_v1/snapshot_fa_vit_s42.json create mode 100644 results/snapshot_vit_v1/snapshot_vit_s123.json create mode 100644 results/snapshot_vit_v1/snapshot_vit_s42.json create mode 100644 results/snapshot_vit_v1/snapshot_vit_s456.json create mode 100644 results/structured_aux/structured_aux_t5_s42.json create mode 100644 results/synth_ladder_smoke/config.json create mode 100644 results/synth_ladder_smoke/summary.json create mode 100644 results/synth_ladder_smoke/synth_a0.0_L2_s42.json create mode 100644 results/synth_ladder_smoke/synth_a0.0_L8_s42.json create mode 100644 results/synth_ladder_smoke/synth_a0.5_L2_s42.json create mode 100644 results/synth_ladder_smoke/synth_a0.5_L8_s42.json create mode 100644 results/synth_ladder_smoke/synth_a1.0_L2_s42.json create mode 100644 results/synth_ladder_smoke/synth_a1.0_L8_s42.json create mode 100644 results/synth_ladder_v2_hi/config.json create mode 100644 results/synth_ladder_v2_hi/summary.json create mode 100644 results/synth_ladder_v2_hi/synth_a0.5_L12_s123.json create mode 100644 results/synth_ladder_v2_hi/synth_a0.5_L12_s42.json create mode 100644 results/synth_ladder_v2_hi/synth_a0.5_L12_s456.json create mode 100644 results/synth_ladder_v2_hi/synth_a0.5_L2_s123.json create mode 100644 results/synth_ladder_v2_hi/synth_a0.5_L2_s42.json create mode 100644 results/synth_ladder_v2_hi/synth_a0.5_L2_s456.json create mode 100644 results/synth_ladder_v2_hi/synth_a0.5_L4_s123.json create mode 100644 results/synth_ladder_v2_hi/synth_a0.5_L4_s42.json create mode 100644 results/synth_ladder_v2_hi/synth_a0.5_L4_s456.json create mode 100644 results/synth_ladder_v2_hi/synth_a0.5_L8_s123.json create mode 100644 results/synth_ladder_v2_hi/synth_a0.5_L8_s42.json create mode 100644 results/synth_ladder_v2_hi/synth_a0.5_L8_s456.json create mode 100644 results/synth_ladder_v2_hi/synth_a1.0_L12_s123.json create mode 100644 results/synth_ladder_v2_hi/synth_a1.0_L12_s42.json create mode 100644 results/synth_ladder_v2_hi/synth_a1.0_L12_s456.json create mode 100644 results/synth_ladder_v2_hi/synth_a1.0_L2_s123.json create mode 100644 results/synth_ladder_v2_hi/synth_a1.0_L2_s42.json create mode 100644 results/synth_ladder_v2_hi/synth_a1.0_L2_s456.json create mode 100644 results/synth_ladder_v2_hi/synth_a1.0_L4_s123.json create mode 100644 results/synth_ladder_v2_hi/synth_a1.0_L4_s42.json create mode 100644 results/synth_ladder_v2_hi/synth_a1.0_L4_s456.json create mode 100644 results/synth_ladder_v2_hi/synth_a1.0_L8_s123.json create mode 100644 results/synth_ladder_v2_hi/synth_a1.0_L8_s42.json create mode 100644 results/synth_ladder_v2_hi/synth_a1.0_L8_s456.json create mode 100644 results/synth_ladder_v2_lo/config.json create mode 100644 results/synth_ladder_v2_lo/summary.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.0_L12_s123.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.0_L12_s42.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.0_L12_s456.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.0_L2_s123.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.0_L2_s42.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.0_L2_s456.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.0_L4_s123.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.0_L4_s42.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.0_L4_s456.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.0_L8_s123.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.0_L8_s42.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.0_L8_s456.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.25_L12_s123.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.25_L12_s42.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.25_L12_s456.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.25_L2_s123.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.25_L2_s42.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.25_L2_s456.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.25_L4_s123.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.25_L4_s42.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.25_L4_s456.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.25_L8_s123.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.25_L8_s42.json create mode 100644 results/synth_ladder_v2_lo/synth_a0.25_L8_s456.json create mode 100644 results/topdown_curriculum/topdown_s42.json create mode 100644 results/update_swap/update_swap_L4_d256_s42.json create mode 100644 results/vector_audit/audit_results.json create mode 100644 results/vector_audit_full/audit_results.json create mode 100644 results/vector_credit_pilot/results.json create mode 100644 results/vit_dfa_train_s42.log create mode 100644 results/vit_frozen_blocks_s123.log create mode 100644 results/vit_frozen_blocks_s42.log create mode 100644 results/vit_frozen_blocks_s456.log create mode 100644 results/vit_shallow_baseline_s42.log create mode 100644 results/vit_test/snapshot_vit_s42.json diff --git a/.claude/scheduled_tasks.lock b/.claude/scheduled_tasks.lock new file mode 100644 index 0000000..297c171 --- /dev/null +++ b/.claude/scheduled_tasks.lock @@ -0,0 +1 @@ +{"sessionId":"f27e30df-1dac-49d4-b21a-ee801fd7f0b9","pid":1410767,"acquiredAt":1776912147023} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 00d9c79..af48f6e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,6 @@ __pycache__/ *.pyc data/ *.pt + +# nested standalone repo — push separately +submission_code/ diff --git a/CLAUDE.md b/CLAUDE.md index 6d9d08f..df986a2 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,2 +1,45 @@ -chor 的前提下,能不能学到一种有用的、terminal-conditioned 的局部 credit signal,用来替代 Feedback Alignment (FA/DFA) 中固定随机反馈矩阵带来的粗糙 credit assignment。 我们的目标不是立刻追求最高分类精度,而是验证一个新的假设是否可行:与其桥接 hidden state,不如桥接 credit / costate / value field。 也就是说,我们希望学习的对象不是 h_l \mapsto h_{l+1} 或者 h_l \mapsto h_L, 而是一个 terminal-conditioned 的标量 value / desirability 场 V_\phi(h_l, t_l, s), \qquad t_l = l/L, 并把每层的局部信用分配定义为 a_l := \nabla_{h_l} V_\phi(h_l,t_l,s). 这里 s 是来自终端误差的全局调制码。最简单的做法是 e_T = \nabla_{\hat y}\ell(\hat y,y),\qquad s = P^\top \mathrm{sg}(e_T), 其中 \mathrm{sg}[\cdot] 表示 stop-gradient。对于 10 类分类任务,可以先直接取 P=I,所以 s=e_T\in\mathbb R^{10}。 项目分成两个阶段。第一阶段是一个线性二次 residual sanity check,它的作用不是追任务性能,而是验证"credit bridge 学到的 a_l 到底像不像真正的局部梯度"。第二阶段是一个深 residual MLP 主实验,用来比较 BP、DFA、state bridge 和 credit bridge。 整个项目的最重要约束是:训练时不允许在隐藏层使用 BP anchor。 也就是说,中间层不能用 exact backprop 的 hidden gradient 作为监督信号。允许的只有两件事:一是输出层本地 exact gradient,因为输出层本来就直接看到 loss;二是离线 evaluation 时计算 BP hidden gradient 作为诊断指标,但绝对不能用于训练。 请优先按下面的顺序推进:先把线性 sanity check 完成并拿到清晰诊断结果,再做主实验。不要反过来。 ⸻ 一、项目背景与核心假设 标准 BP 在隐藏层使用 \delta_l^{\mathrm{BP}} = J_{l\to L}^\top e_T, 其中 J_{l\to L} 是从层 l 到输出的 Jacobian。FA/DFA 则用一个固定随机反馈矩阵近似这个对象,例如 DFA 用 \delta_l^{\mathrm{DFA}} = B_l e_T. 我们的怀疑是:问题的关键不在于"找一个更好的静态 B_l",而在于学习一个状态相关、样本相关、深度相关的信用场。也就是说,我们想把隐藏层该收到的 top-down signal 看成某种 terminal-conditioned value field 的梯度: a_l = \nabla_{h_l} V_\phi(h_l,t_l,s). 这个想法可以理解成一种无中间 anchor 的 credit bridge。它和单纯预测 hidden state 的 state bridge 是不同对象:state bridge 试图从 h_l 预测 h_L 或中间状态,而 credit bridge 直接学习 loss 对局部状态的敏感度。 我们需要验证两个具体命题: 第一,state bridge 是否不足以产生有用的局部 credit。也就是说,即使它能把 h_L 预测得不错,它算出来的 a_l^{\mathrm{state}} = \nabla_{h_l} \ell(W_{\rm out}\hat h_L,y) 也未必能真的指导局部更新。 第二,credit bridge 在没有中间 BP anchor 的情况下,能否产生比 DFA 更有用的局部 credit。这里"更有用"主要通过局部扰动相关性、nudging test、离线 BP cosine 等诊断指标体现,而不只是最终 test accuracy。 ⸻ 二、你需要实现的方法 你至少要实现下面 4 个方法,并保证所有方法使用尽可能相同的前向架构、优化器族和训练设置。 方法 1:BP baseline 这是上界基线。在这个方法里,允许标准 end-to-end backprop。它的主要作用是给出 accuracy upper bound,并在 evaluation 阶段提供 hidden gradient 参考。这个方法训练时当然可以调用整网 loss.backward()。 方法 2:DFA baseline 这是无 hidden BP anchor 的强基线。设网络有 L 个 residual blocks,每个隐藏状态维度为 d,类别数为 C。对每个 block 采样一个固定随机矩阵 B_l \in \mathbb R^{d\times C}, 并保持训练中不变。输出误差是 e_T = \nabla_{\hat y}\ell(\hat y,y). 则该层使用 a_l^{\mathrm{DFA}} = B_l e_T. block l 的参数更新使用局部 surrogate: \mathcal L_l^{\mathrm{local}} = \langle F_l(h_l;\theta_l),\mathrm{sg}[a_{l+1}^{\mathrm{DFA}}]\rangle. 请注意:DFA 方法里不能因为偷懒而让 loss.backward() 穿过隐藏层。每个 block 的更新必须只依赖自己的局部前向和本层收到的固定反馈。 方法 3:State bridge baseline 这是我们故意加入的"对象错位"基线。它的目的不是追最好效果,而是验证"桥接 state 不等于桥接 credit"。 做法如下。定义一个共享或半共享的预测器 G_\psi(h_l,t_l,s)\to \hat h_L^{(l)}. 它的输入是当前层 hidden state、层位置 t_l=l/L、以及终端调制码 s。它的训练目标是终端状态回归: \mathcal L_{\mathrm{state}} = \sum_{l=0}^{L-1}\|G_\psi(h_l,t_l,s)-\mathrm{sg}[h_L]\|_2^2. 这里建议只用 l