research/flossing/merge_and_analyze.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157

"""Merge shard npz files and produce top-k Lyapunov analysis + plots."""
import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from scipy import stats
import argparse, glob, os

ap = argparse.ArgumentParser()
ap.add_argument("--in-glob", default="/home/yurenh2/rrm/research/flossing/diag_8k_shard*.npz")
ap.add_argument("--out-dir", default="/home/yurenh2/rrm/research/flossing/plots_topk")
ap.add_argument("--merged-npz", default="/home/yurenh2/rrm/research/flossing/diag_8k.npz")
args = ap.parse_args()

os.makedirs(args.out_dir, exist_ok=True)

files = sorted(glob.glob(args.in_glob))
print(f"Merging {len(files)} files:")
for f in files: print(f"  {f}")

merged = {}
for f in files:
    d = np.load(f)
    for k in d.files:
        merged.setdefault(k, []).append(d[k])
for k in list(merged.keys()):
    merged[k] = np.concatenate(merged[k], axis=0)
np.savez_compressed(args.merged_npz, **merged)
print(f"Merged → {args.merged_npz}")
print(f"N={len(merged['exact_correct'])}")

lyap_spec = merged["lyap_spec"]              # (N, K)
exact = merged["exact_correct"].astype(bool)
token_acc = merged["token_acc"]
drift_zH = merged["drift_zH"]
drift_zL = merged["drift_zL"]
q_halt = merged["q_halt"]; q_continue = merged["q_continue"]
N, K = lyap_spec.shape
print(f"K (top-k) = {K}")

succ = lyap_spec[exact]; fail = lyap_spec[~exact]
print(f"acc = {exact.mean():.4f}")

# --- per-lambda stats ---
print(f"\n--per-λ stats (Δ = mean_fail - mean_succ; positive ⇒ failures are LESS contractive) --")
print(f"{'i':>3} {'mean_all':>10} {'mean_succ':>10} {'mean_fail':>10} {'Δ':>8} {'cohen_d':>8} {'auroc':>7} {'p_t':>10}")
for i in range(K):
    li = lyap_spec[:, i]; ls = li[exact]; lf = li[~exact]
    pooled = np.sqrt(((ls.size-1)*ls.var() + (lf.size-1)*lf.var()) / (ls.size+lf.size-2))
    d_c = (ls.mean() - lf.mean()) / pooled
    delta = lf.mean() - ls.mean()
    auc = stats.mannwhitneyu(lf, ls, alternative="greater").statistic / (lf.size * ls.size)
    t, p = stats.ttest_ind(ls, lf, equal_var=False)
    print(f"{i+1:>3} {li.mean():+10.4f} {ls.mean():+10.4f} {lf.mean():+10.4f} {delta:+8.4f} {abs(d_c):>8.3f} {auc:>7.3f} {p:>10.2e}")

# --- combined predictor: which λ best separates? sum? mean? λ_1 alone? ---
print(f"\n--combined predictors--")
def auc(score, target):
    # AUC: probability the score is higher for fail than succ
    return stats.mannwhitneyu(score[~exact], score[exact], alternative="greater").statistic / ((~exact).sum() * exact.sum())
for label, score in [
    ("λ_1 alone", lyap_spec[:,0]),
    ("λ_K alone", lyap_spec[:,-1]),
    ("mean λ", lyap_spec.mean(axis=1)),
    ("λ_1 + λ_K", lyap_spec[:,0] + lyap_spec[:,-1]),
    ("sum λ", lyap_spec.sum(axis=1)),
    ("max(λ) (= λ_1)", lyap_spec.max(axis=1)),
]:
    print(f"  {label:18s}: AUROC = {auc(score, ~exact):.4f}")

# --- plot 1: Lyapunov spectrum mean ± std ---
fig, ax = plt.subplots(1, 1, figsize=(7,5))
mean_s = succ.mean(0); std_s = succ.std(0)
mean_f = fail.mean(0); std_f = fail.std(0)
x = np.arange(1, K+1)
ax.fill_between(x, mean_s-std_s, mean_s+std_s, color="C0", alpha=0.25)
ax.plot(x, mean_s, "C0-o", label=f"success (n={succ.shape[0]})", lw=2)
ax.fill_between(x, mean_f-std_f, mean_f+std_f, color="C3", alpha=0.25)
ax.plot(x, mean_f, "C3-o", label=f"failure (n={fail.shape[0]})", lw=2)
ax.axhline(0, color="k", ls=":", lw=0.6)
ax.set_xlabel(r"Lyapunov index $i$")
ax.set_ylabel(r"$\lambda_i$ (per inner cycle)")
ax.set_title(f"HRM Sudoku-Extreme-1k @ step_26040: top-{K} Lyapunov spectrum\n"
             f"N={N}, acc={exact.mean():.3f}")
ax.legend()
ax.grid(alpha=0.3)
fig.tight_layout()
fig.savefig(f"{args.out_dir}/lyap_spectrum.png", dpi=130)
plt.close()

# --- plot 2: λ_1 hist (re-do at 8k) ---
fig, ax = plt.subplots(1, 1, figsize=(7,4))
lo = min(lyap_spec[:,0].min(), -1.5); hi = max(lyap_spec[:,0].max(), -0.1)
bins = np.linspace(lo, hi, 60)
ax.hist(succ[:,0], bins=bins, alpha=0.55, label=f"success (n={succ.shape[0]})", color="C0", density=True)
ax.hist(fail[:,0], bins=bins, alpha=0.55, label=f"failure (n={fail.shape[0]})", color="C3", density=True)
ax.axvline(succ[:,0].mean(), color="C0", ls="--", lw=1)
ax.axvline(fail[:,0].mean(), color="C3", ls="--", lw=1)
ax.set_xlabel(r"$\lambda_1$ (top Lyapunov exponent)")
ax.set_ylabel("density")
auc1 = auc(lyap_spec[:,0], ~exact)
ax.set_title(f"$\\lambda_1$ distribution by outcome (N={N}, AUROC={auc1:.3f})")
ax.legend()
fig.tight_layout()
fig.savefig(f"{args.out_dir}/lyap1_hist.png", dpi=130)
plt.close()

# --- plot 3: 2D histogram of (λ_1, λ_8) ---
fig, ax = plt.subplots(1, 1, figsize=(6.5,5.5))
ax.scatter(lyap_spec[exact,0], lyap_spec[exact,-1], s=3, alpha=0.3, color="C0", label="success")
ax.scatter(lyap_spec[~exact,0], lyap_spec[~exact,-1], s=3, alpha=0.3, color="C3", label="failure")
ax.set_xlabel(r"$\lambda_1$")
ax.set_ylabel(r"$\lambda_{%d}$" % K)
ax.set_title(f"Top vs bottom of the top-{K} spectrum")
ax.legend()
ax.plot([-1.5,0], [-1.5,0], "k:", lw=0.6, label="diag")
fig.tight_layout()
fig.savefig(f"{args.out_dir}/lyap1_vs_lyapK.png", dpi=130)
plt.close()

# --- plot 4: spectrum slope and gap analysis ---
slope = (lyap_spec[:, 0] - lyap_spec[:, -1])  # λ_1 - λ_K = "spread"
spread_succ = slope[exact]; spread_fail = slope[~exact]
fig, ax = plt.subplots(1, 1, figsize=(6,4))
ax.hist(spread_succ, bins=40, alpha=0.55, label=f"success", color="C0", density=True)
ax.hist(spread_fail, bins=40, alpha=0.55, label=f"failure", color="C3", density=True)
ax.set_xlabel(r"$\lambda_1 - \lambda_{%d}$  (spectrum spread)" % K)
ax.set_ylabel("density")
spread_auc = auc(slope, ~exact)
ax.set_title(f"Top-bottom Lyapunov spread (AUROC={spread_auc:.3f})\n"
             f"succ={spread_succ.mean():.4f}±{spread_succ.std():.4f}, "
             f"fail={spread_fail.mean():.4f}±{spread_fail.std():.4f}")
ax.legend()
fig.tight_layout()
fig.savefig(f"{args.out_dir}/lyap_spread.png", dpi=130)
plt.close()

# --- plot 5: drift per ACT step (revisited) ---
fig, axes = plt.subplots(1, 2, figsize=(12,4))
for ax, drift, name in [(axes[0], drift_zH, "$\\|\\Delta z_H\\|$"),
                         (axes[1], drift_zL, "$\\|\\Delta z_L\\|$")]:
    mean_s = drift[exact].mean(0); std_s = drift[exact].std(0)
    mean_f = drift[~exact].mean(0); std_f = drift[~exact].std(0)
    x = np.arange(drift.shape[1])
    ax.fill_between(x, mean_s-std_s, mean_s+std_s, color="C0", alpha=0.2)
    ax.plot(x, mean_s, "C0-o", label="success", lw=2)
    ax.fill_between(x, mean_f-std_f, mean_f+std_f, color="C3", alpha=0.2)
    ax.plot(x, mean_f, "C3-o", label="failure", lw=2)
    ax.set_xlabel("ACT step")
    ax.set_title(name)
    ax.legend()
    ax.set_yscale("log")
fig.tight_layout()
fig.savefig(f"{args.out_dir}/drift_per_act.png", dpi=130)
plt.close()

print(f"\nplots saved to {args.out_dir}/")