"""Merge shard npz files and produce top-k Lyapunov analysis + plots."""
import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from scipy import stats
import argparse, glob, os

ap = argparse.ArgumentParser()
ap.add_argument("--in-glob", default="/home/yurenh2/rrm/research/flossing/diag_8k_shard*.npz")
ap.add_argument("--out-dir", default="/home/yurenh2/rrm/research/flossing/plots_topk")
ap.add_argument("--merged-npz", default="/home/yurenh2/rrm/research/flossing/diag_8k.npz")
args = ap.parse_args()

os.makedirs(args.out_dir, exist_ok=True)

files = sorted(glob.glob(args.in_glob))
print(f"Merging {len(files)} files:")
for f in files: print(f"  {f}")

merged = {}
for f in files:
    d = np.load(f)
    for k in d.files:
        merged.setdefault(k, []).append(d[k])
for k in list(merged.keys()):
    merged[k] = np.concatenate(merged[k], axis=0)
np.savez_compressed(args.merged_npz, **merged)
print(f"Merged → {args.merged_npz}")
print(f"N={len(merged['exact_correct'])}")

lyap_spec = merged["lyap_spec"]              # (N, K)
exact = merged["exact_correct"].astype(bool)
token_acc = merged["token_acc"]
drift_zH = merged["drift_zH"]
drift_zL = merged["drift_zL"]
q_halt = merged["q_halt"]; q_continue = merged["q_continue"]
N, K = lyap_spec.shape
print(f"K (top-k) = {K}")

succ = lyap_spec[exact]; fail = lyap_spec[~exact]
print(f"acc = {exact.mean():.4f}")

# --- per-lambda stats ---
print(f"\n--per-λ stats (Δ = mean_fail - mean_succ; positive ⇒ failures are LESS contractive) --")
print(f"{'i':>3} {'mean_all':>10} {'mean_succ':>10} {'mean_fail':>10} {'Δ':>8} {'cohen_d':>8} {'auroc':>7} {'p_t':>10}")
for i in range(K):
    li = lyap_spec[:, i]; ls = li[exact]; lf = li[~exact]
    pooled = np.sqrt(((ls.size-1)*ls.var() + (lf.size-1)*lf.var()) / (ls.size+lf.size-2))
    d_c = (ls.mean() - lf.mean()) / pooled
    delta = lf.mean() - ls.mean()
    auc = stats.mannwhitneyu(lf, ls, alternative="greater").statistic / (lf.size * ls.size)
    t, p = stats.ttest_ind(ls, lf, equal_var=False)
    print(f"{i+1:>3} {li.mean():+10.4f} {ls.mean():+10.4f} {lf.mean():+10.4f} {delta:+8.4f} {abs(d_c):>8.3f} {auc:>7.3f} {p:>10.2e}")

# --- combined predictor: which λ best separates? sum? mean? λ_1 alone? ---
print(f"\n--combined predictors--")
def auc(score, target):
    # AUC: probability the score is higher for fail than succ
    return stats.mannwhitneyu(score[~exact], score[exact], alternative="greater").statistic / ((~exact).sum() * exact.sum())
for label, score in [
    ("λ_1 alone", lyap_spec[:,0]),
    ("λ_K alone", lyap_spec[:,-1]),
    ("mean λ", lyap_spec.mean(axis=1)),
    ("λ_1 + λ_K", lyap_spec[:,0] + lyap_spec[:,-1]),
    ("sum λ", lyap_spec.sum(axis=1)),
    ("max(λ) (= λ_1)", lyap_spec.max(axis=1)),
]:
    print(f"  {label:18s}: AUROC = {auc(score, ~exact):.4f}")

# --- plot 1: Lyapunov spectrum mean ± std ---
fig, ax = plt.subplots(1, 1, figsize=(7,5))
mean_s = succ.mean(0); std_s = succ.std(0)
mean_f = fail.mean(0); std_f = fail.std(0)
x = np.arange(1, K+1)
ax.fill_between(x, mean_s-std_s, mean_s+std_s, color="C0", alpha=0.25)
ax.plot(x, mean_s, "C0-o", label=f"success (n={succ.shape[0]})", lw=2)
ax.fill_between(x, mean_f-std_f, mean_f+std_f, color="C3", alpha=0.25)
ax.plot(x, mean_f, "C3-o", label=f"failure (n={fail.shape[0]})", lw=2)
ax.axhline(0, color="k", ls=":", lw=0.6)
ax.set_xlabel(r"Lyapunov index $i$")
ax.set_ylabel(r"$\lambda_i$ (per inner cycle)")
ax.set_title(f"HRM Sudoku-Extreme-1k @ step_26040: top-{K} Lyapunov spectrum\n"
             f"N={N}, acc={exact.mean():.3f}")
ax.legend()
ax.grid(alpha=0.3)
fig.tight_layout()
fig.savefig(f"{args.out_dir}/lyap_spectrum.png", dpi=130)
plt.close()

# --- plot 2: λ_1 hist (re-do at 8k) ---
fig, ax = plt.subplots(1, 1, figsize=(7,4))
lo = min(lyap_spec[:,0].min(), -1.5); hi = max(lyap_spec[:,0].max(), -0.1)
bins = np.linspace(lo, hi, 60)
ax.hist(succ[:,0], bins=bins, alpha=0.55, label=f"success (n={succ.shape[0]})", color="C0", density=True)
ax.hist(fail[:,0], bins=bins, alpha=0.55, label=f"failure (n={fail.shape[0]})", color="C3", density=True)
ax.axvline(succ[:,0].mean(), color="C0", ls="--", lw=1)
ax.axvline(fail[:,0].mean(), color="C3", ls="--", lw=1)
ax.set_xlabel(r"$\lambda_1$ (top Lyapunov exponent)")
ax.set_ylabel("density")
auc1 = auc(lyap_spec[:,0], ~exact)
ax.set_title(f"$\\lambda_1$ distribution by outcome (N={N}, AUROC={auc1:.3f})")
ax.legend()
fig.tight_layout()
fig.savefig(f"{args.out_dir}/lyap1_hist.png", dpi=130)
plt.close()

# --- plot 3: 2D histogram of (λ_1, λ_8) ---
fig, ax = plt.subplots(1, 1, figsize=(6.5,5.5))
ax.scatter(lyap_spec[exact,0], lyap_spec[exact,-1], s=3, alpha=0.3, color="C0", label="success")
ax.scatter(lyap_spec[~exact,0], lyap_spec[~exact,-1], s=3, alpha=0.3, color="C3", label="failure")
ax.set_xlabel(r"$\lambda_1$")
ax.set_ylabel(r"$\lambda_{%d}$" % K)
ax.set_title(f"Top vs bottom of the top-{K} spectrum")
ax.legend()
ax.plot([-1.5,0], [-1.5,0], "k:", lw=0.6, label="diag")
fig.tight_layout()
fig.savefig(f"{args.out_dir}/lyap1_vs_lyapK.png", dpi=130)
plt.close()

# --- plot 4: spectrum slope and gap analysis ---
slope = (lyap_spec[:, 0] - lyap_spec[:, -1])  # λ_1 - λ_K = "spread"
spread_succ = slope[exact]; spread_fail = slope[~exact]
fig, ax = plt.subplots(1, 1, figsize=(6,4))
ax.hist(spread_succ, bins=40, alpha=0.55, label=f"success", color="C0", density=True)
ax.hist(spread_fail, bins=40, alpha=0.55, label=f"failure", color="C3", density=True)
ax.set_xlabel(r"$\lambda_1 - \lambda_{%d}$  (spectrum spread)" % K)
ax.set_ylabel("density")
spread_auc = auc(slope, ~exact)
ax.set_title(f"Top-bottom Lyapunov spread (AUROC={spread_auc:.3f})\n"
             f"succ={spread_succ.mean():.4f}±{spread_succ.std():.4f}, "
             f"fail={spread_fail.mean():.4f}±{spread_fail.std():.4f}")
ax.legend()
fig.tight_layout()
fig.savefig(f"{args.out_dir}/lyap_spread.png", dpi=130)
plt.close()

# --- plot 5: drift per ACT step (revisited) ---
fig, axes = plt.subplots(1, 2, figsize=(12,4))
for ax, drift, name in [(axes[0], drift_zH, "$\\|\\Delta z_H\\|$"),
                         (axes[1], drift_zL, "$\\|\\Delta z_L\\|$")]:
    mean_s = drift[exact].mean(0); std_s = drift[exact].std(0)
    mean_f = drift[~exact].mean(0); std_f = drift[~exact].std(0)
    x = np.arange(drift.shape[1])
    ax.fill_between(x, mean_s-std_s, mean_s+std_s, color="C0", alpha=0.2)
    ax.plot(x, mean_s, "C0-o", label="success", lw=2)
    ax.fill_between(x, mean_f-std_f, mean_f+std_f, color="C3", alpha=0.2)
    ax.plot(x, mean_f, "C3-o", label="failure", lw=2)
    ax.set_xlabel("ACT step")
    ax.set_title(name)
    ax.legend()
    ax.set_yscale("log")
fig.tight_layout()
fig.savefig(f"{args.out_dir}/drift_per_act.png", dpi=130)
plt.close()

print(f"\nplots saved to {args.out_dir}/")