summaryrefslogtreecommitdiff
path: root/protocol
diff options
context:
space:
mode:
authorYurenHao0426 <Blackhao0426@gmail.com>2026-04-08 19:24:06 -0500
committerYurenHao0426 <Blackhao0426@gmail.com>2026-04-08 19:24:06 -0500
commit2fa24acae8bb7f8c026db2f7fdade4a29b640d8d (patch)
tree98bf266ac07a1d6974769262dff916553223612f /protocol
parentcebc4c4a81809a982a16dd07da41487aa2f30322 (diff)
Sync experiment+protocol scripts with v2.32 corrected control values
The pre-v2.31 unsourced values BP=0.609 and DFA=0.308 (which v2.31 fixed to 0.585 and 0.301 via matched 30-ep controls) were also hardcoded as "compare to" comments in 5 helper scripts: experiments/bp_with_penalty_control.py experiments/dfa_residual_penalty_test.py experiments/resmlp_frozen_blocks_baseline.py protocol/examples/threshold_d_sensitivity.py protocol/examples/plot_penalty_rescue.py These are non-paper-input scripts (their output goes to stdout, not to the paper), so the stale values didn't cause numerical errors in the paper itself. But the original v2.31 BP+pen=0.609 unsourced number bug came from exactly this kind of hardcoded "for-comparison" comment that was never measured. Updating them now to remove the same trap from future runs. Each script now references the matched 30-ep 3-seed values from results/bp_no_penalty_30ep, results/dfa_no_penalty_30ep, results/ dfa_pen_short, and results/bp_with_penalty. protocol/EVIDENCE_SUMMARY.md and PAPER_OUTLINE.md still have stale numbers — these are project scratch documents and not user-facing. Deferred to a separate sweep if needed. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat (limited to 'protocol')
-rw-r--r--protocol/examples/plot_penalty_rescue.py4
-rw-r--r--protocol/examples/threshold_d_sensitivity.py14
2 files changed, 10 insertions, 8 deletions
diff --git a/protocol/examples/plot_penalty_rescue.py b/protocol/examples/plot_penalty_rescue.py
index 37b0fa9..fff300e 100644
--- a/protocol/examples/plot_penalty_rescue.py
+++ b/protocol/examples/plot_penalty_rescue.py
@@ -10,7 +10,7 @@ Data sources:
- vanilla DFA trajectory: results/snapshot_evolution_v2/snapshot_evolution_s42.json
- penalized DFA (lam=1e-2): results/dfa_residual_penalty/dfa_pen_lam0.01_s42.json
- DFA-shallow baseline 3-seed mean (drawn as horizontal line): 0.349
- - BP-trainable 3-seed mean: 0.609
+ - BP-trainable 3-seed mean: 0.6147 (100 ep) / 0.585 (matched 30 ep)
Run:
python -m protocol.examples.plot_penalty_rescue
@@ -91,7 +91,7 @@ def main():
ax.plot([e["epoch"] for e in penalty], [e["acc_eval"] for e in penalty],
label=r"DFA + $\lambda \|f_l\|^2$", color="C2", lw=2, marker="o", markersize=4)
ax.axhline(0.349, color="k", linestyle="--", lw=1.2, label="DFA-shallow 0.349")
- ax.axhline(0.609, color="C0", linestyle=":", lw=1, label="BP-trainable 0.609")
+ ax.axhline(0.6147, color="C0", linestyle=":", lw=1, label="BP-trainable 100ep 0.615")
ax.set_xlabel("epoch", fontsize=10)
ax.set_ylabel("test acc", fontsize=10)
ax.set_title("(d) headline accuracy", fontsize=11)
diff --git a/protocol/examples/threshold_d_sensitivity.py b/protocol/examples/threshold_d_sensitivity.py
index d3f2c58..065efc7 100644
--- a/protocol/examples/threshold_d_sensitivity.py
+++ b/protocol/examples/threshold_d_sensitivity.py
@@ -22,13 +22,15 @@ REPO_ROOT = os.path.dirname(
def main():
# 3-seed mean accuracies on 4-block d=256 ResMLP CIFAR-10
+ # Updated v2.32 with matched 30-epoch controls
conditions = [
- ("BP-trainable", 0.609, 0.004),
- ("DFA-shallow", 0.349, 0.002),
- ("DFA-vanilla", 0.308, 0.014),
- ("DFA-pen lam=1e-3", 0.372, None), # 1 seed
- ("DFA-pen lam=1e-2", 0.363, 0.0007),
- ("DFA-frozen-rand", 0.349, 0.002),
+ ("BP-trainable 100ep", 0.6147, 0.004), # protocol_audit
+ ("BP-trainable 30ep", 0.585, 0.001), # results/bp_no_penalty_30ep
+ ("BP+pen 30ep lam=1e-2", 0.532, 0.006), # results/bp_with_penalty
+ ("DFA-shallow", 0.349, 0.002), # frozen baseline
+ ("DFA-vanilla 100ep", 0.306, 0.006), # protocol_audit
+ ("DFA-vanilla 30ep", 0.301, 0.005), # results/dfa_no_penalty_30ep
+ ("DFA+pen 30ep lam=1e-2", 0.360, 0.001), # results/dfa_pen_short
]
shallow_acc = 0.349