From 2fa24acae8bb7f8c026db2f7fdade4a29b640d8d Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Wed, 8 Apr 2026 19:24:06 -0500 Subject: Sync experiment+protocol scripts with v2.32 corrected control values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pre-v2.31 unsourced values BP=0.609 and DFA=0.308 (which v2.31 fixed to 0.585 and 0.301 via matched 30-ep controls) were also hardcoded as "compare to" comments in 5 helper scripts: experiments/bp_with_penalty_control.py experiments/dfa_residual_penalty_test.py experiments/resmlp_frozen_blocks_baseline.py protocol/examples/threshold_d_sensitivity.py protocol/examples/plot_penalty_rescue.py These are non-paper-input scripts (their output goes to stdout, not to the paper), so the stale values didn't cause numerical errors in the paper itself. But the original v2.31 BP+pen=0.609 unsourced number bug came from exactly this kind of hardcoded "for-comparison" comment that was never measured. Updating them now to remove the same trap from future runs. Each script now references the matched 30-ep 3-seed values from results/bp_no_penalty_30ep, results/dfa_no_penalty_30ep, results/ dfa_pen_short, and results/bp_with_penalty. protocol/EVIDENCE_SUMMARY.md and PAPER_OUTLINE.md still have stale numbers — these are project scratch documents and not user-facing. Deferred to a separate sweep if needed. Co-Authored-By: Claude Opus 4.6 (1M context) --- experiments/dfa_residual_penalty_test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'experiments/dfa_residual_penalty_test.py') diff --git a/experiments/dfa_residual_penalty_test.py b/experiments/dfa_residual_penalty_test.py index 3fa5466..f2b43ec 100644 --- a/experiments/dfa_residual_penalty_test.py +++ b/experiments/dfa_residual_penalty_test.py @@ -182,11 +182,11 @@ def main(): final_test = evaluate(m, test_loader, dev) print(f"\nFINAL test acc: {final_test:.4f}") - print(f"Compare to:") - print(f" DFA-vanilla (3-seed mean): 0.308") - print(f" DFA-shallow (3-seed mean): 0.349") - print(f" DFA-frozen (3-seed mean): 0.349") - print(f" BP-trainable (3-seed mean): 0.609") + print(f"Compare to (matched 30-epoch 3-seed values, see paper v2.32):") + print(f" DFA-vanilla 30ep (3-seed): 0.301 ± 0.005") + print(f" DFA-shallow / DFA-frozen: 0.349 ± 0.002") + print(f" BP-trainable no-pen 30ep: 0.585 ± 0.001") + print(f" BP+pen lam=1e-2 30ep: 0.532 ± 0.006") out = {'config': vars(args), 'final_test_acc': final_test, 'log': log} out_path = os.path.join(args.output_dir, f'dfa_pen_lam{args.lam}_s{args.seed}.json') -- cgit v1.2.3