From 66e0d8b9fd4d0f7a2231d689c055e26fdf1cf04a Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Sat, 13 Jun 2026 12:35:36 -0500 Subject: rrm workspace: TRM/HRM/SRM code, Maze dataset, dynamical-analysis pipeline Curated export for clone-and-run Maze training (2x A6000) + diagnostics. trm/hrm pretrain.py carry trajectory-augmentation code (backward-compatible). Heavy artifacts (checkpoints/wandb/npz) gitignored; see PROVENANCE.md. Co-Authored-By: Claude Fable 5 --- research/flossing/analysis_2x2/analyze_phase1.py | 126 +++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 research/flossing/analysis_2x2/analyze_phase1.py (limited to 'research/flossing/analysis_2x2/analyze_phase1.py') diff --git a/research/flossing/analysis_2x2/analyze_phase1.py b/research/flossing/analysis_2x2/analyze_phase1.py new file mode 100644 index 0000000..2ffa58d --- /dev/null +++ b/research/flossing/analysis_2x2/analyze_phase1.py @@ -0,0 +1,126 @@ +"""Analyze phase-1 results: E5 horizon sweep, E6 matched-objective pairs, E2 second-run replication. +Outputs: analysis_2x2/phase1/phase1_results.md + figures. +""" +from __future__ import annotations +from pathlib import Path +import matplotlib; matplotlib.use("Agg") +import matplotlib.pyplot as plt +import numpy as np + +HERE = Path(__file__).resolve().parent +P1 = HERE / "phase1" +RETEST = HERE / "retest" + + +def auc_rank(score, label): + score = np.asarray(score, float); label = np.asarray(label, int) + pos, neg = score[label == 1], score[label == 0] + if len(pos) == 0 or len(neg) == 0: + return float("nan") + allv = np.concatenate([pos, neg]); order = np.argsort(allv, kind="mergesort") + ranks = np.empty(len(allv)); ranks[order] = np.arange(1, len(allv) + 1) + sv = allv[order]; i = 0 + while i < len(sv): + j = i + while j + 1 < len(sv) and sv[j + 1] == sv[i]: + j += 1 + if j > i: + ranks[order[i:j + 1]] = ranks[order[i:j + 1]].mean() + i = j + 1 + return float((ranks[:len(pos)].sum() - len(pos) * (len(pos) + 1) / 2) / (len(pos) * len(neg))) + + +def otsu(x, nbins=256): + h, e = np.histogram(x, bins=nbins); h = h.astype(float); c = (e[:-1] + e[1:]) / 2 + p = h / h.sum(); om = np.cumsum(p); mu = np.cumsum(p * c); mt = mu[-1] + den = om * (1 - om); den[den <= 0] = np.nan + return float(c[np.nanargmax((mt * om - mu) ** 2 / den)]) + + +def cells_of(d): + ld = np.log10(np.clip(d["drift_zH"][:, -4:].mean(1), 1e-12, None)) + c = d["exact_correct"].astype(int); tau = otsu(ld); conv = ld < tau + A = conv & (c == 1); B = conv & (c == 0); C = (~conv) & (c == 1); D = (~conv) & (c == 0) + l1 = d["lyap_spec"][:, 0] + return dict(acc=float(c.mean()), tau=tau, + nA=int(A.sum()), nB=int(B.sum()), nC=int(C.sum()), nD=int(D.sum()), + fD=float(D.mean()), l1A=float(np.median(l1[A])) if A.sum() else float("nan"), + l1D=float(np.median(l1[D])) if D.sum() else float("nan")) + + +lines = ["# Phase-1 analysis (E5 horizon sweep, E6 matched-objective, E2 replication)", ""] + +# ---------- E5 horizon sweep ---------- +lines += ["## E5: when does fate become legible? AUC(early signal -> FINAL correct), restricted to not-yet-correct@H", ""] +finals = {"trm": np.load(RETEST / "trm_gbs768_step58590_full_n2048.npz"), + "hrm": np.load(RETEST / "hrm_righteous_step26040_full_n2048.npz")} +# include h=4 from retest short runs +short4 = {"trm": np.load(RETEST / "trm_gbs768_step58590_short_n2048.npz"), + "hrm": np.load(RETEST / "hrm_righteous_step26040_short_n2048.npz")} +sweep = {} +for model in ["trm", "hrm"]: + fin = finals[model]; fi = fin["idx"]; y_final_all = fin["exact_correct"].astype(int) + rows = [] + for H in [2, 4, 6, 8, 10, 12]: + if H == 4: + s = short4[model] + else: + tag = f"{'trm_official58590' if model=='trm' else 'hrm26040'}_h{H}_n2048" + s = np.load(P1 / f"{tag}.npz") + si = s["idx"]; common, fp, sp = np.intersect1d(fi, si, return_indices=True) + yf = y_final_all[fp]; ye = s["exact_correct"].astype(int)[sp] + l1 = s["lyap_spec"][sp, 0]; dr = np.log10(np.clip(s["drift_zH"][sp, -1], 1e-12, None)); q = s["q_halt"][sp, -1] + m = ye == 0 # not yet correct at H + rows.append(dict(H=H, n_und=int(m.sum()), frac_eventual=float(yf[m].mean()) if m.sum() else float("nan"), + auc_l1=auc_rank(-l1[m], yf[m]), auc_drift=auc_rank(-dr[m], yf[m]), auc_q=auc_rank(q[m], yf[m]), + solved_at_H=float(ye.mean()))) + sweep[model] = rows + lines.append(f"### {model.upper()}") + lines.append("| H | solved@H | undecided n | of which eventual✓ | AUC(-λ₁) | AUC(-drift) | AUC(q_halt) |") + lines.append("|---|---|---|---|---|---|---|") + for r in rows: + lines.append(f"| {r['H']} | {r['solved_at_H']:.3f} | {r['n_und']} | {r['frac_eventual']:.3f} | " + f"{r['auc_l1']:.3f} | {r['auc_drift']:.3f} | {r['auc_q']:.3f} |") + lines.append("") + +# E5 figure +fig, axes = plt.subplots(1, 2, figsize=(11, 4)) +for ax, model in zip(axes, ["trm", "hrm"]): + rows = sweep[model]; H = [r["H"] for r in rows] + ax.plot(H, [r["auc_l1"] for r in rows], "o-", label="−λ₁") + ax.plot(H, [r["auc_drift"] for r in rows], "s-", label="−drift") + ax.plot(H, [r["auc_q"] for r in rows], "^-", label="q_halt") + ax.axhline(0.5, color="gray", lw=0.6, ls="--") + ax.set_xlabel("prefix length H (ACT segments)"); ax.set_ylabel("AUC → final correct (undecided@H only)") + ax.set_ylim(0.3, 1.0); ax.set_title(f"{model.upper()}: legibility of fate vs prefix"); ax.legend(fontsize=8) +fig.tight_layout(); fig.savefig(P1 / "fig_E5_horizon_sweep.png", dpi=150); plt.close(fig) + +# ---------- E6 matched-objective ---------- +lines += ["## E6: matched-objective intervention (step9 fixed-unroll runs, n=512)", ""] +for fam, base, mult in [("HRM (E base vs F multi4)", "step9E_hrm", "step9F_hrm"), + ("TRM (G base vs H multi4)", "step9G_trm", "step9H_trm")]: + lines.append(f"### {fam}") + lines.append("| ckpt | base acc | base fD | base λ₁(D) | multi4 acc | multi4 fD | multi4 λ₁(D) |") + lines.append("|---|---|---|---|---|---|---|") + for ck in ["step_12500", "step_25000", "best", "final"]: + bp = P1 / f"{base}_{ck}_n512.npz"; mp = P1 / f"{mult}_{ck}_n512.npz" + if not bp.exists() or not mp.exists(): + lines.append(f"| {ck} | missing | | | | | |"); continue + b = cells_of(np.load(bp)); m = cells_of(np.load(mp)) + lines.append(f"| {ck} | {b['acc']:.3f} | {b['fD']:.3f} | {b['l1D']:+.4f} | " + f"{m['acc']:.3f} | {m['fD']:.3f} | {m['l1D']:+.4f} |") + lines.append("") + +# ---------- E2 second-run replication ---------- +lines += ["## E2: decomposition on a SECOND HRM training run (step9_E fixed-unroll, n=2048)", ""] +lines.append("| ckpt | acc | A | B | C | D | λ₁(A) | λ₁(D) | settled-wrong frac of failures |") +lines.append("|---|---|---|---|---|---|---|---|---|") +for ck in ["best", "final"]: + d = np.load(P1 / f"step9E_hrm_{ck}_full_n2048.npz"); cc = cells_of(d) + nfail = cc["nB"] + cc["nD"] + lines.append(f"| {ck} | {cc['acc']:.3f} | {cc['nA']} | {cc['nB']} | {cc['nC']} | {cc['nD']} | " + f"{cc['l1A']:+.4f} | {cc['l1D']:+.4f} | {cc['nB']/max(nfail,1):.4f} |") + +(P1 / "phase1_results.md").write_text("\n".join(lines)) +print("\n".join(lines)) +print("\nwrote", P1 / "phase1_results.md", "and fig_E5_horizon_sweep.png") -- cgit v1.2.3