research/flossing/analysis_2x2/analyze_phase1.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126

"""Analyze phase-1 results: E5 horizon sweep, E6 matched-objective pairs, E2 second-run replication.
Outputs: analysis_2x2/phase1/phase1_results.md + figures.
"""
from __future__ import annotations
from pathlib import Path
import matplotlib; matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np

HERE = Path(__file__).resolve().parent
P1 = HERE / "phase1"
RETEST = HERE / "retest"


def auc_rank(score, label):
    score = np.asarray(score, float); label = np.asarray(label, int)
    pos, neg = score[label == 1], score[label == 0]
    if len(pos) == 0 or len(neg) == 0:
        return float("nan")
    allv = np.concatenate([pos, neg]); order = np.argsort(allv, kind="mergesort")
    ranks = np.empty(len(allv)); ranks[order] = np.arange(1, len(allv) + 1)
    sv = allv[order]; i = 0
    while i < len(sv):
        j = i
        while j + 1 < len(sv) and sv[j + 1] == sv[i]:
            j += 1
        if j > i:
            ranks[order[i:j + 1]] = ranks[order[i:j + 1]].mean()
        i = j + 1
    return float((ranks[:len(pos)].sum() - len(pos) * (len(pos) + 1) / 2) / (len(pos) * len(neg)))


def otsu(x, nbins=256):
    h, e = np.histogram(x, bins=nbins); h = h.astype(float); c = (e[:-1] + e[1:]) / 2
    p = h / h.sum(); om = np.cumsum(p); mu = np.cumsum(p * c); mt = mu[-1]
    den = om * (1 - om); den[den <= 0] = np.nan
    return float(c[np.nanargmax((mt * om - mu) ** 2 / den)])


def cells_of(d):
    ld = np.log10(np.clip(d["drift_zH"][:, -4:].mean(1), 1e-12, None))
    c = d["exact_correct"].astype(int); tau = otsu(ld); conv = ld < tau
    A = conv & (c == 1); B = conv & (c == 0); C = (~conv) & (c == 1); D = (~conv) & (c == 0)
    l1 = d["lyap_spec"][:, 0]
    return dict(acc=float(c.mean()), tau=tau,
                nA=int(A.sum()), nB=int(B.sum()), nC=int(C.sum()), nD=int(D.sum()),
                fD=float(D.mean()), l1A=float(np.median(l1[A])) if A.sum() else float("nan"),
                l1D=float(np.median(l1[D])) if D.sum() else float("nan"))


lines = ["# Phase-1 analysis (E5 horizon sweep, E6 matched-objective, E2 replication)", ""]

# ---------- E5 horizon sweep ----------
lines += ["## E5: when does fate become legible? AUC(early signal -> FINAL correct), restricted to not-yet-correct@H", ""]
finals = {"trm": np.load(RETEST / "trm_gbs768_step58590_full_n2048.npz"),
          "hrm": np.load(RETEST / "hrm_righteous_step26040_full_n2048.npz")}
# include h=4 from retest short runs
short4 = {"trm": np.load(RETEST / "trm_gbs768_step58590_short_n2048.npz"),
          "hrm": np.load(RETEST / "hrm_righteous_step26040_short_n2048.npz")}
sweep = {}
for model in ["trm", "hrm"]:
    fin = finals[model]; fi = fin["idx"]; y_final_all = fin["exact_correct"].astype(int)
    rows = []
    for H in [2, 4, 6, 8, 10, 12]:
        if H == 4:
            s = short4[model]
        else:
            tag = f"{'trm_official58590' if model=='trm' else 'hrm26040'}_h{H}_n2048"
            s = np.load(P1 / f"{tag}.npz")
        si = s["idx"]; common, fp, sp = np.intersect1d(fi, si, return_indices=True)
        yf = y_final_all[fp]; ye = s["exact_correct"].astype(int)[sp]
        l1 = s["lyap_spec"][sp, 0]; dr = np.log10(np.clip(s["drift_zH"][sp, -1], 1e-12, None)); q = s["q_halt"][sp, -1]
        m = ye == 0  # not yet correct at H
        rows.append(dict(H=H, n_und=int(m.sum()), frac_eventual=float(yf[m].mean()) if m.sum() else float("nan"),
                         auc_l1=auc_rank(-l1[m], yf[m]), auc_drift=auc_rank(-dr[m], yf[m]), auc_q=auc_rank(q[m], yf[m]),
                         solved_at_H=float(ye.mean())))
    sweep[model] = rows
    lines.append(f"### {model.upper()}")
    lines.append("| H | solved@H | undecided n | of which eventual✓ | AUC(-λ₁) | AUC(-drift) | AUC(q_halt) |")
    lines.append("|---|---|---|---|---|---|---|")
    for r in rows:
        lines.append(f"| {r['H']} | {r['solved_at_H']:.3f} | {r['n_und']} | {r['frac_eventual']:.3f} | "
                     f"{r['auc_l1']:.3f} | {r['auc_drift']:.3f} | {r['auc_q']:.3f} |")
    lines.append("")

# E5 figure
fig, axes = plt.subplots(1, 2, figsize=(11, 4))
for ax, model in zip(axes, ["trm", "hrm"]):
    rows = sweep[model]; H = [r["H"] for r in rows]
    ax.plot(H, [r["auc_l1"] for r in rows], "o-", label="−λ₁")
    ax.plot(H, [r["auc_drift"] for r in rows], "s-", label="−drift")
    ax.plot(H, [r["auc_q"] for r in rows], "^-", label="q_halt")
    ax.axhline(0.5, color="gray", lw=0.6, ls="--")
    ax.set_xlabel("prefix length H (ACT segments)"); ax.set_ylabel("AUC → final correct (undecided@H only)")
    ax.set_ylim(0.3, 1.0); ax.set_title(f"{model.upper()}: legibility of fate vs prefix"); ax.legend(fontsize=8)
fig.tight_layout(); fig.savefig(P1 / "fig_E5_horizon_sweep.png", dpi=150); plt.close(fig)

# ---------- E6 matched-objective ----------
lines += ["## E6: matched-objective intervention (step9 fixed-unroll runs, n=512)", ""]
for fam, base, mult in [("HRM (E base vs F multi4)", "step9E_hrm", "step9F_hrm"),
                        ("TRM (G base vs H multi4)", "step9G_trm", "step9H_trm")]:
    lines.append(f"### {fam}")
    lines.append("| ckpt | base acc | base fD | base λ₁(D) | multi4 acc | multi4 fD | multi4 λ₁(D) |")
    lines.append("|---|---|---|---|---|---|---|")
    for ck in ["step_12500", "step_25000", "best", "final"]:
        bp = P1 / f"{base}_{ck}_n512.npz"; mp = P1 / f"{mult}_{ck}_n512.npz"
        if not bp.exists() or not mp.exists():
            lines.append(f"| {ck} | missing | | | | | |"); continue
        b = cells_of(np.load(bp)); m = cells_of(np.load(mp))
        lines.append(f"| {ck} | {b['acc']:.3f} | {b['fD']:.3f} | {b['l1D']:+.4f} | "
                     f"{m['acc']:.3f} | {m['fD']:.3f} | {m['l1D']:+.4f} |")
    lines.append("")

# ---------- E2 second-run replication ----------
lines += ["## E2: decomposition on a SECOND HRM training run (step9_E fixed-unroll, n=2048)", ""]
lines.append("| ckpt | acc | A | B | C | D | λ₁(A) | λ₁(D) | settled-wrong frac of failures |")
lines.append("|---|---|---|---|---|---|---|---|---|")
for ck in ["best", "final"]:
    d = np.load(P1 / f"step9E_hrm_{ck}_full_n2048.npz"); cc = cells_of(d)
    nfail = cc["nB"] + cc["nD"]
    lines.append(f"| {ck} | {cc['acc']:.3f} | {cc['nA']} | {cc['nB']} | {cc['nC']} | {cc['nD']} | "
                 f"{cc['l1A']:+.4f} | {cc['l1D']:+.4f} | {cc['nB']/max(nfail,1):.4f} |")

(P1 / "phase1_results.md").write_text("\n".join(lines))
print("\n".join(lines))
print("\nwrote", P1 / "phase1_results.md", "and fig_E5_horizon_sweep.png")