From 503bb06ed9ab00cfa72d2d9a532b0caf10cb83b3 Mon Sep 17 00:00:00 2001
From: YurenHao0426 <Blackhao0426@gmail.com>
Date: Tue, 7 Apr 2026 23:59:35 -0500
Subject: =?UTF-8?q?Add=20(d)=20frozen-baseline=20threshold=20sensitivity?=
 =?UTF-8?q?=20=E2=80=94=20IMPORTANT=20new=20finding?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Critical observation: at lambda=1e-3 (single seed), penalized DFA margin
above shallow baseline is +2.3 pp — which PASSES (d) at the 2 pp default
threshold. At lambda=1e-2 (3 seeds), the margin is +1.4 pp — FIRES (d)
at 2 pp.

So the (d) verdict on penalized DFA depends on BOTH the lambda choice
AND the threshold choice. This is a significantly weaker claim than
'two failure modes are separable via (d)'.

The honest framing per round 18 lesson: there is a real tradeoff between
penalty strength and depth utilization. Weaker penalty preserves more
depth contribution but also more scale pathology. Stronger penalty kills
depth contribution. The protocol surfaces this tradeoff but doesn't
establish the second failure mode by itself.

Compared to (a) 63x and (b) 24338x separation gaps, (d) is the LEAST
robust diagnostic and the most sensitive to threshold choice. Need to
flag this prominently in the paper.
---
 protocol/examples/threshold_d_sensitivity.py | 94 ++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 protocol/examples/threshold_d_sensitivity.py

(limited to 'protocol/examples')

diff --git a/protocol/examples/threshold_d_sensitivity.py b/protocol/examples/threshold_d_sensitivity.py
new file mode 100644
index 0000000..d3f2c58
--- /dev/null
+++ b/protocol/examples/threshold_d_sensitivity.py
@@ -0,0 +1,94 @@
+"""
+Sensitivity of diagnostic (d) frozen-blocks margin threshold.
+
+Codex round 18 specifically called out the +1.4 pp margin on penalized
+DFA as fragile under the choice of threshold. This script sweeps the
+margin threshold from 0.5 pp to 5 pp and reports the verdict on each
+condition (vanilla DFA, penalized DFA at 3 lambda values, BP).
+
+Run:
+    python -m protocol.examples.threshold_d_sensitivity
+"""
+import os
+import sys
+import json
+
+import numpy as np
+
+REPO_ROOT = os.path.dirname(
+    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+)
+
+
+def main():
+    # 3-seed mean accuracies on 4-block d=256 ResMLP CIFAR-10
+    conditions = [
+        ("BP-trainable",     0.609,  0.004),
+        ("DFA-shallow",      0.349,  0.002),
+        ("DFA-vanilla",      0.308,  0.014),
+        ("DFA-pen lam=1e-3", 0.372,  None),  # 1 seed
+        ("DFA-pen lam=1e-2", 0.363,  0.0007),
+        ("DFA-frozen-rand",  0.349,  0.002),
+    ]
+    shallow_acc = 0.349
+
+    print("=" * 80)
+    print("Diagnostic (d) frozen-baseline margin threshold sensitivity")
+    print("=" * 80)
+    print(f"  Reference baseline (DFA-frozen-random): {shallow_acc:.4f}")
+    print()
+    print(f"  {'condition':<22}{'acc':>10}{'std':>10}{'margin (pp)':>14}")
+    print("  " + "-" * 56)
+    for name, acc, std in conditions:
+        margin_pp = (acc - shallow_acc) * 100
+        std_str = f"±{std:.4f}" if std is not None else "(n=1)"
+        print(f"  {name:<22}{acc:>10.4f}{std_str:>10}{margin_pp:>14.2f}")
+    print()
+
+    # Sweep
+    thresholds = [0.5, 1.0, 1.5, 2.0, 3.0, 5.0, 10.0]
+    print("Walk-back verdict at each threshold:")
+    print(f"  {'condition':<22}", end="")
+    for t in thresholds:
+        print(f"{'>'+str(t)+'pp':>10}", end="")
+    print()
+    print("  " + "-" * (22 + 10 * len(thresholds)))
+    for name, acc, std in conditions:
+        margin_pp = (acc - shallow_acc) * 100
+        print(f"  {name:<22}", end="")
+        for t in thresholds:
+            verdict = "FIRE" if margin_pp < t else "ok"
+            print(f"{verdict:>10}", end="")
+        print()
+
+    print()
+    print("=" * 80)
+    print("INTERPRETATION")
+    print("=" * 80)
+    print()
+    print("  - DFA-vanilla margin = -4 pp: FIRES at ALL reasonable thresholds")
+    print("    (it's actively below the shallow baseline, not just close to it)")
+    print()
+    print("  - DFA-pen lam=1e-2 margin = +1.4 pp: knife-edge")
+    print("    fires at threshold ≥ 1.5 pp")
+    print("    passes at threshold ≤ 1.0 pp")
+    print()
+    print("    The default 2.0 pp gives a walk-back, but a reviewer setting")
+    print("    1.0 pp would say the penalized DFA passes (d). The conclusion")
+    print("    is sensitive to this choice.")
+    print()
+    print("  - DFA-pen lam=1e-3 margin = +2.3 pp: passes (d) at 2.0 pp threshold")
+    print("    (slightly stronger penalty, slightly better acc — would NOT walk back)")
+    print()
+    print("  Round 18 lesson: the +1.4 pp finding is real but the binary verdict")
+    print("  depends on a knife-edge threshold choice. The honest paper claim")
+    print("  should be: 'after the penalty correction, the depth contribution is")
+    print("  at most 1.4 pp above the random-blocks baseline — much smaller than")
+    print("  BP's +26 pp gap over shallow', not 'the deep blocks are passive'.")
+    print()
+    print("  Compare to (a) 63x and (b) 24338x separation gaps from")
+    print("  threshold_sensitivity.py — those diagnostics are robust; (d) is not.")
+
+
+if __name__ == "__main__":
+    main()
-- 
cgit v1.2.3