1 files changed, 157 insertions, 0 deletions
diff --git a/research/flossing/merge_and_analyze.py b/research/flossing/merge_and_analyze.py
new file mode 100644
index 0000000..3aeb28f
--- /dev/null
+++ b/research/flossing/merge_and_analyze.py
@@ -0,0 +1,157 @@
+"""Merge shard npz files and produce top-k Lyapunov analysis + plots."""
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+from scipy import stats
+import argparse, glob, os
+
+ap = argparse.ArgumentParser()
+ap.add_argument("--in-glob", default="/home/yurenh2/rrm/research/flossing/diag_8k_shard*.npz")
+ap.add_argument("--out-dir", default="/home/yurenh2/rrm/research/flossing/plots_topk")
+ap.add_argument("--merged-npz", default="/home/yurenh2/rrm/research/flossing/diag_8k.npz")
+args = ap.parse_args()
+
+os.makedirs(args.out_dir, exist_ok=True)
+
+files = sorted(glob.glob(args.in_glob))
+print(f"Merging {len(files)} files:")
+for f in files: print(f"  {f}")
+
+merged = {}
+for f in files:
+    d = np.load(f)
+    for k in d.files:
+        merged.setdefault(k, []).append(d[k])
+for k in list(merged.keys()):
+    merged[k] = np.concatenate(merged[k], axis=0)
+np.savez_compressed(args.merged_npz, **merged)
+print(f"Merged → {args.merged_npz}")
+print(f"N={len(merged['exact_correct'])}")
+
+lyap_spec = merged["lyap_spec"]              # (N, K)
+exact = merged["exact_correct"].astype(bool)
+token_acc = merged["token_acc"]
+drift_zH = merged["drift_zH"]
+drift_zL = merged["drift_zL"]
+q_halt = merged["q_halt"]; q_continue = merged["q_continue"]
+N, K = lyap_spec.shape
+print(f"K (top-k) = {K}")
+
+succ = lyap_spec[exact]; fail = lyap_spec[~exact]
+print(f"acc = {exact.mean():.4f}")
+
+# --- per-lambda stats ---
+print(f"\n--per-λ stats (Δ = mean_fail - mean_succ; positive ⇒ failures are LESS contractive) --")
+print(f"{'i':>3} {'mean_all':>10} {'mean_succ':>10} {'mean_fail':>10} {'Δ':>8} {'cohen_d':>8} {'auroc':>7} {'p_t':>10}")
+for i in range(K):
+    li = lyap_spec[:, i]; ls = li[exact]; lf = li[~exact]
+    pooled = np.sqrt(((ls.size-1)*ls.var() + (lf.size-1)*lf.var()) / (ls.size+lf.size-2))
+    d_c = (ls.mean() - lf.mean()) / pooled
+    delta = lf.mean() - ls.mean()
+    auc = stats.mannwhitneyu(lf, ls, alternative="greater").statistic / (lf.size * ls.size)
+    t, p = stats.ttest_ind(ls, lf, equal_var=False)
+    print(f"{i+1:>3} {li.mean():+10.4f} {ls.mean():+10.4f} {lf.mean():+10.4f} {delta:+8.4f} {abs(d_c):>8.3f} {auc:>7.3f} {p:>10.2e}")
+
+# --- combined predictor: which λ best separates? sum? mean? λ_1 alone? ---
+print(f"\n--combined predictors--")
+def auc(score, target):
+    # AUC: probability the score is higher for fail than succ
+    return stats.mannwhitneyu(score[~exact], score[exact], alternative="greater").statistic / ((~exact).sum() * exact.sum())
+for label, score in [
+    ("λ_1 alone", lyap_spec[:,0]),
+    ("λ_K alone", lyap_spec[:,-1]),
+    ("mean λ", lyap_spec.mean(axis=1)),
+    ("λ_1 + λ_K", lyap_spec[:,0] + lyap_spec[:,-1]),
+    ("sum λ", lyap_spec.sum(axis=1)),
+    ("max(λ) (= λ_1)", lyap_spec.max(axis=1)),
+]:
+    print(f"  {label:18s}: AUROC = {auc(score, ~exact):.4f}")
+
+# --- plot 1: Lyapunov spectrum mean ± std ---
+fig, ax = plt.subplots(1, 1, figsize=(7,5))
+mean_s = succ.mean(0); std_s = succ.std(0)
+mean_f = fail.mean(0); std_f = fail.std(0)
+x = np.arange(1, K+1)
+ax.fill_between(x, mean_s-std_s, mean_s+std_s, color="C0", alpha=0.25)
+ax.plot(x, mean_s, "C0-o", label=f"success (n={succ.shape[0]})", lw=2)
+ax.fill_between(x, mean_f-std_f, mean_f+std_f, color="C3", alpha=0.25)
+ax.plot(x, mean_f, "C3-o", label=f"failure (n={fail.shape[0]})", lw=2)
+ax.axhline(0, color="k", ls=":", lw=0.6)
+ax.set_xlabel(r"Lyapunov index $i$")
+ax.set_ylabel(r"$\lambda_i$ (per inner cycle)")
+ax.set_title(f"HRM Sudoku-Extreme-1k @ step_26040: top-{K} Lyapunov spectrum\n"
+             f"N={N}, acc={exact.mean():.3f}")
+ax.legend()
+ax.grid(alpha=0.3)
+fig.tight_layout()
+fig.savefig(f"{args.out_dir}/lyap_spectrum.png", dpi=130)
+plt.close()
+
+# --- plot 2: λ_1 hist (re-do at 8k) ---
+fig, ax = plt.subplots(1, 1, figsize=(7,4))
+lo = min(lyap_spec[:,0].min(), -1.5); hi = max(lyap_spec[:,0].max(), -0.1)
+bins = np.linspace(lo, hi, 60)
+ax.hist(succ[:,0], bins=bins, alpha=0.55, label=f"success (n={succ.shape[0]})", color="C0", density=True)
+ax.hist(fail[:,0], bins=bins, alpha=0.55, label=f"failure (n={fail.shape[0]})", color="C3", density=True)
+ax.axvline(succ[:,0].mean(), color="C0", ls="--", lw=1)
+ax.axvline(fail[:,0].mean(), color="C3", ls="--", lw=1)
+ax.set_xlabel(r"$\lambda_1$ (top Lyapunov exponent)")
+ax.set_ylabel("density")
+auc1 = auc(lyap_spec[:,0], ~exact)
+ax.set_title(f"$\\lambda_1$ distribution by outcome (N={N}, AUROC={auc1:.3f})")
+ax.legend()
+fig.tight_layout()
+fig.savefig(f"{args.out_dir}/lyap1_hist.png", dpi=130)
+plt.close()
+
+# --- plot 3: 2D histogram of (λ_1, λ_8) ---
+fig, ax = plt.subplots(1, 1, figsize=(6.5,5.5))
+ax.scatter(lyap_spec[exact,0], lyap_spec[exact,-1], s=3, alpha=0.3, color="C0", label="success")
+ax.scatter(lyap_spec[~exact,0], lyap_spec[~exact,-1], s=3, alpha=0.3, color="C3", label="failure")
+ax.set_xlabel(r"$\lambda_1$")
+ax.set_ylabel(r"$\lambda_{%d}$" % K)
+ax.set_title(f"Top vs bottom of the top-{K} spectrum")
+ax.legend()
+ax.plot([-1.5,0], [-1.5,0], "k:", lw=0.6, label="diag")
+fig.tight_layout()
+fig.savefig(f"{args.out_dir}/lyap1_vs_lyapK.png", dpi=130)
+plt.close()
+
+# --- plot 4: spectrum slope and gap analysis ---
+slope = (lyap_spec[:, 0] - lyap_spec[:, -1])  # λ_1 - λ_K = "spread"
+spread_succ = slope[exact]; spread_fail = slope[~exact]
+fig, ax = plt.subplots(1, 1, figsize=(6,4))
+ax.hist(spread_succ, bins=40, alpha=0.55, label=f"success", color="C0", density=True)
+ax.hist(spread_fail, bins=40, alpha=0.55, label=f"failure", color="C3", density=True)
+ax.set_xlabel(r"$\lambda_1 - \lambda_{%d}$  (spectrum spread)" % K)
+ax.set_ylabel("density")
+spread_auc = auc(slope, ~exact)
+ax.set_title(f"Top-bottom Lyapunov spread (AUROC={spread_auc:.3f})\n"
+             f"succ={spread_succ.mean():.4f}±{spread_succ.std():.4f}, "
+             f"fail={spread_fail.mean():.4f}±{spread_fail.std():.4f}")
+ax.legend()
+fig.tight_layout()
+fig.savefig(f"{args.out_dir}/lyap_spread.png", dpi=130)
+plt.close()
+
+# --- plot 5: drift per ACT step (revisited) ---
+fig, axes = plt.subplots(1, 2, figsize=(12,4))
+for ax, drift, name in [(axes[0], drift_zH, "$\\|\\Delta z_H\\|$"),
+                         (axes[1], drift_zL, "$\\|\\Delta z_L\\|$")]:
+    mean_s = drift[exact].mean(0); std_s = drift[exact].std(0)
+    mean_f = drift[~exact].mean(0); std_f = drift[~exact].std(0)
+    x = np.arange(drift.shape[1])
+    ax.fill_between(x, mean_s-std_s, mean_s+std_s, color="C0", alpha=0.2)
+    ax.plot(x, mean_s, "C0-o", label="success", lw=2)
+    ax.fill_between(x, mean_f-std_f, mean_f+std_f, color="C3", alpha=0.2)
+    ax.plot(x, mean_f, "C3-o", label="failure", lw=2)
+    ax.set_xlabel("ACT step")
+    ax.set_title(name)
+    ax.legend()
+    ax.set_yscale("log")
+fig.tight_layout()
+fig.savefig(f"{args.out_dir}/drift_per_act.png", dpi=130)
+plt.close()
+
+print(f"\nplots saved to {args.out_dir}/")