summaryrefslogtreecommitdiff
path: root/protocol
diff options
context:
space:
mode:
authorYurenHao0426 <Blackhao0426@gmail.com>2026-04-07 23:59:35 -0500
committerYurenHao0426 <Blackhao0426@gmail.com>2026-04-07 23:59:35 -0500
commit503bb06ed9ab00cfa72d2d9a532b0caf10cb83b3 (patch)
tree719fbb2e237a98d8985f54350a8d081072957993 /protocol
parentab1b783c7a4f3d586d082ba142d7c046453a310c (diff)
Add (d) frozen-baseline threshold sensitivity — IMPORTANT new finding
Critical observation: at lambda=1e-3 (single seed), penalized DFA margin above shallow baseline is +2.3 pp — which PASSES (d) at the 2 pp default threshold. At lambda=1e-2 (3 seeds), the margin is +1.4 pp — FIRES (d) at 2 pp. So the (d) verdict on penalized DFA depends on BOTH the lambda choice AND the threshold choice. This is a significantly weaker claim than 'two failure modes are separable via (d)'. The honest framing per round 18 lesson: there is a real tradeoff between penalty strength and depth utilization. Weaker penalty preserves more depth contribution but also more scale pathology. Stronger penalty kills depth contribution. The protocol surfaces this tradeoff but doesn't establish the second failure mode by itself. Compared to (a) 63x and (b) 24338x separation gaps, (d) is the LEAST robust diagnostic and the most sensitive to threshold choice. Need to flag this prominently in the paper.
Diffstat (limited to 'protocol')
-rw-r--r--protocol/examples/threshold_d_sensitivity.py94
1 files changed, 94 insertions, 0 deletions
diff --git a/protocol/examples/threshold_d_sensitivity.py b/protocol/examples/threshold_d_sensitivity.py
new file mode 100644
index 0000000..d3f2c58
--- /dev/null
+++ b/protocol/examples/threshold_d_sensitivity.py
@@ -0,0 +1,94 @@
+"""
+Sensitivity of diagnostic (d) frozen-blocks margin threshold.
+
+Codex round 18 specifically called out the +1.4 pp margin on penalized
+DFA as fragile under the choice of threshold. This script sweeps the
+margin threshold from 0.5 pp to 5 pp and reports the verdict on each
+condition (vanilla DFA, penalized DFA at 3 lambda values, BP).
+
+Run:
+ python -m protocol.examples.threshold_d_sensitivity
+"""
+import os
+import sys
+import json
+
+import numpy as np
+
+REPO_ROOT = os.path.dirname(
+ os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+)
+
+
+def main():
+ # 3-seed mean accuracies on 4-block d=256 ResMLP CIFAR-10
+ conditions = [
+ ("BP-trainable", 0.609, 0.004),
+ ("DFA-shallow", 0.349, 0.002),
+ ("DFA-vanilla", 0.308, 0.014),
+ ("DFA-pen lam=1e-3", 0.372, None), # 1 seed
+ ("DFA-pen lam=1e-2", 0.363, 0.0007),
+ ("DFA-frozen-rand", 0.349, 0.002),
+ ]
+ shallow_acc = 0.349
+
+ print("=" * 80)
+ print("Diagnostic (d) frozen-baseline margin threshold sensitivity")
+ print("=" * 80)
+ print(f" Reference baseline (DFA-frozen-random): {shallow_acc:.4f}")
+ print()
+ print(f" {'condition':<22}{'acc':>10}{'std':>10}{'margin (pp)':>14}")
+ print(" " + "-" * 56)
+ for name, acc, std in conditions:
+ margin_pp = (acc - shallow_acc) * 100
+ std_str = f"±{std:.4f}" if std is not None else "(n=1)"
+ print(f" {name:<22}{acc:>10.4f}{std_str:>10}{margin_pp:>14.2f}")
+ print()
+
+ # Sweep
+ thresholds = [0.5, 1.0, 1.5, 2.0, 3.0, 5.0, 10.0]
+ print("Walk-back verdict at each threshold:")
+ print(f" {'condition':<22}", end="")
+ for t in thresholds:
+ print(f"{'>'+str(t)+'pp':>10}", end="")
+ print()
+ print(" " + "-" * (22 + 10 * len(thresholds)))
+ for name, acc, std in conditions:
+ margin_pp = (acc - shallow_acc) * 100
+ print(f" {name:<22}", end="")
+ for t in thresholds:
+ verdict = "FIRE" if margin_pp < t else "ok"
+ print(f"{verdict:>10}", end="")
+ print()
+
+ print()
+ print("=" * 80)
+ print("INTERPRETATION")
+ print("=" * 80)
+ print()
+ print(" - DFA-vanilla margin = -4 pp: FIRES at ALL reasonable thresholds")
+ print(" (it's actively below the shallow baseline, not just close to it)")
+ print()
+ print(" - DFA-pen lam=1e-2 margin = +1.4 pp: knife-edge")
+ print(" fires at threshold ≥ 1.5 pp")
+ print(" passes at threshold ≤ 1.0 pp")
+ print()
+ print(" The default 2.0 pp gives a walk-back, but a reviewer setting")
+ print(" 1.0 pp would say the penalized DFA passes (d). The conclusion")
+ print(" is sensitive to this choice.")
+ print()
+ print(" - DFA-pen lam=1e-3 margin = +2.3 pp: passes (d) at 2.0 pp threshold")
+ print(" (slightly stronger penalty, slightly better acc — would NOT walk back)")
+ print()
+ print(" Round 18 lesson: the +1.4 pp finding is real but the binary verdict")
+ print(" depends on a knife-edge threshold choice. The honest paper claim")
+ print(" should be: 'after the penalty correction, the depth contribution is")
+ print(" at most 1.4 pp above the random-blocks baseline — much smaller than")
+ print(" BP's +26 pp gap over shallow', not 'the deep blocks are passive'.")
+ print()
+ print(" Compare to (a) 63x and (b) 24338x separation gaps from")
+ print(" threshold_sensitivity.py — those diagnostics are robust; (d) is not.")
+
+
+if __name__ == "__main__":
+ main()