analysis_2x2/characterize_separation.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182

"""Characterization of the success/failure FTLE separation (describe WHAT, not WHY).
Char 1: full-spectrum vs lambda1 — is it one mode or a whole-spectrum shift?
Char 2: bimodality vs threshold-on-continuum.
Char 3: separation magnitude vs integration window (uses phase-1 horizon npz).
Char 4: effect size / overlap beyond AUC.
All offline, existing npz. numpy-only. Mechanism is NOT addressed here by design.
"""
from __future__ import annotations
from pathlib import Path
import matplotlib; matplotlib.use("Agg")
import matplotlib.pyplot as plt
import numpy as np

HERE = Path(__file__).resolve().parent
FLOSS = HERE.parent
P1 = HERE / "phase1"; RETEST = HERE / "retest"
OUT = HERE / "characterization"; OUT.mkdir(exist_ok=True)


def auc(score, label):
    score = np.asarray(score, float); label = np.asarray(label, int)
    pos, neg = score[label == 1], score[label == 0]
    if len(pos) == 0 or len(neg) == 0:
        return float("nan")
    a = np.concatenate([pos, neg]); o = np.argsort(a, kind="mergesort")
    r = np.empty(len(a)); r[o] = np.arange(1, len(a) + 1)
    s = a[o]; i = 0
    while i < len(s):
        j = i
        while j + 1 < len(s) and s[j + 1] == s[i]:
            j += 1
        if j > i:
            r[o[i:j + 1]] = r[o[i:j + 1]].mean()
        i = j + 1
    return float((r[:len(pos)].sum() - len(pos) * (len(pos) + 1) / 2) / (len(pos) * len(neg)))


def cohens_d(a, b):
    a, b = np.asarray(a, float), np.asarray(b, float)
    na, nb = len(a), len(b)
    sp = np.sqrt(((na - 1) * a.var(ddof=1) + (nb - 1) * b.var(ddof=1)) / (na + nb - 2))
    return float((a.mean() - b.mean()) / sp) if sp > 0 else float("nan")


def bhattacharyya_gauss(a, b):
    a, b = np.asarray(a, float), np.asarray(b, float)
    m1, m2, v1, v2 = a.mean(), b.mean(), a.var() + 1e-12, b.var() + 1e-12
    bd = 0.25 * np.log(0.25 * (v1 / v2 + v2 / v1 + 2)) + 0.25 * (m1 - m2) ** 2 / (v1 + v2)
    return float(np.exp(-bd))  # 1=identical, 0=disjoint


def hist_overlap(a, b, bins=60):
    lo, hi = min(a.min(), b.min()), max(a.max(), b.max())
    e = np.linspace(lo, hi, bins + 1)
    ha = np.histogram(a, e)[0] / len(a); hb = np.histogram(b, e)[0] / len(b)
    return float(np.minimum(ha, hb).sum())


def bimodality_coeff(x):
    x = np.asarray(x, float); n = len(x)
    m = x.mean(); s = x.std()
    if s == 0:
        return float("nan")
    g = np.mean(((x - m) / s) ** 3)              # skewness
    k = np.mean(((x - m) / s) ** 4) - 3.0         # excess kurtosis
    bc = (g ** 2 + 1) / (k + 3 * (n - 1) ** 2 / ((n - 2) * (n - 3)))
    return float(bc)  # >0.555 suggests bimodality (uniform=0.555)


HEAD = {
    "TRM official @58590 (n=2048)": RETEST / "trm_gbs768_step58590_full_n2048.npz",
    "HRM joint @26040 (n=2048)": RETEST / "hrm_righteous_step26040_full_n2048.npz",
    "HRM @26040 (n=8192, est-2)": FLOSS / "diag_8k.npz",
}

lines = ["# Characterization of the FTLE success/failure separation", "",
         "Describes the separation (shape, spectrum, time-scaling, effect size). Mechanism = open.", ""]

# ---------- Char 1: full spectrum ----------
lines += ["## Char 1 — full spectrum vs lambda1 (one mode or whole-spectrum shift?)", ""]
for name, p in HEAD.items():
    d = np.load(p); ly = d["lyap_spec"].astype(float); c = d["exact_correct"].astype(int)
    k = ly.shape[1]
    per = [auc(-ly[:, i], c) for i in range(k)]
    ks_proxy = np.clip(ly, 0, None).sum(1)        # sum of positive exponents (KS-entropy proxy)
    spec_mean = ly.mean(1)
    lines += [f"### {name}",
              f"- per-exponent AUC(-λ_i→correct), i=1..{k}: " + ", ".join(f"{v:.3f}" for v in per),
              f"- median spectrum | success: " + ", ".join(f"{np.median(ly[c==1,i]):+.3f}" for i in range(k)),
              f"- median spectrum | failure: " + ", ".join(f"{np.median(ly[c==0,i]):+.3f}" for i in range(k)),
              f"- AUC λ1={per[0]:.3f} | AUC spectral-mean={auc(-spec_mean,c):.3f} | "
              f"AUC KS-proxy(Σλ⁺)={auc(-ks_proxy,c):.3f}",
              f"- shift uniformity: success−failure median gap per exponent: " +
              ", ".join(f"{np.median(ly[c==1,i])-np.median(ly[c==0,i]):+.3f}" for i in range(k)), ""]
    # spectrum figure
    fig, ax = plt.subplots(figsize=(6, 4))
    xs = np.arange(1, k + 1)
    ax.plot(xs, [np.median(ly[c == 1, i]) for i in range(k)], "o-", color="tab:green", label="success (median)")
    ax.plot(xs, [np.median(ly[c == 0, i]) for i in range(k)], "s-", color="tab:red", label="failure (median)")
    ax.fill_between(xs, [np.percentile(ly[c==1,i],25) for i in range(k)], [np.percentile(ly[c==1,i],75) for i in range(k)], color="tab:green", alpha=0.15)
    ax.fill_between(xs, [np.percentile(ly[c==0,i],25) for i in range(k)], [np.percentile(ly[c==0,i],75) for i in range(k)], color="tab:red", alpha=0.15)
    ax.axhline(0, color="gray", lw=0.6); ax.set_xlabel("exponent index"); ax.set_ylabel("λ_i")
    ax.set_title(f"{name}: spectrum by outcome"); ax.legend(fontsize=8)
    fig.tight_layout(); fig.savefig(OUT / f"spectrum_{name.split()[0]}_{'n8192' if '8192' in name else 'n2048'}.png", dpi=140); plt.close(fig)

# ---------- Char 2: bimodality vs threshold ----------
lines += ["## Char 2 — bimodality vs threshold-on-continuum", "",
          "BC>0.555 hints bimodal. outcome-rate-vs-λ1 sharpness: width of 25→75% outcome transition,",
          "in λ1 units, divided by the 10–90 inter-percentile spread of λ1 (small = sharp threshold).", ""]
for name, p in HEAD.items():
    d = np.load(p); l1 = d["lyap_spec"][:, 0].astype(float); c = d["exact_correct"].astype(int)
    bc_all = bimodality_coeff(l1); bc_s = bimodality_coeff(l1[c == 1]); bc_f = bimodality_coeff(l1[c == 0])
    # outcome rate vs l1 (sorted, sliding)
    order = np.argsort(l1); ls = l1[order]; cs = c[order]
    win = max(50, len(l1) // 40); rate = np.convolve(cs, np.ones(win) / win, mode="valid")
    lcent = ls[win // 2: win // 2 + len(rate)]
    # find l1 where rate crosses 0.25 and 0.75 (rate decreases with l1)
    def cross(target):
        idx = np.where(np.diff((rate >= target).astype(int)) != 0)[0]
        return float(lcent[idx[0]]) if len(idx) else float("nan")
    l25, l75 = cross(0.25), cross(0.75)
    spread = np.percentile(l1, 90) - np.percentile(l1, 10)
    sharp = abs(l25 - l75) / spread if spread > 0 and np.isfinite(l25) and np.isfinite(l75) else float("nan")
    lines += [f"- {name}: BC(all)={bc_all:.3f}, BC(succ)={bc_s:.3f}, BC(fail)={bc_f:.3f}; "
              f"outcome transition width/spread={sharp:.3f} (λ1@75%={l75:+.3f}, @25%={l25:+.3f})"]
    fig, ax = plt.subplots(1, 2, figsize=(10, 3.6))
    ax[0].hist(l1[c == 1], bins=50, alpha=0.55, color="tab:green", label="success", density=True)
    ax[0].hist(l1[c == 0], bins=50, alpha=0.55, color="tab:red", label="failure", density=True)
    ax[0].set_xlabel("λ1"); ax[0].set_ylabel("density"); ax[0].legend(fontsize=8); ax[0].set_title(f"{name}: λ1 by outcome")
    ax[1].plot(lcent, rate, "-", color="tab:blue"); ax[1].axhline(0.5, color="gray", lw=0.5)
    ax[1].set_xlabel("λ1"); ax[1].set_ylabel("P(correct)"); ax[1].set_title("outcome rate vs λ1")
    fig.tight_layout(); fig.savefig(OUT / f"bimodal_{name.split()[0]}_{'n8192' if '8192' in name else 'n2048'}.png", dpi=140); plt.close(fig)
lines.append("")

# ---------- Char 3: separation vs integration window ----------
lines += ["## Char 3 — separation magnitude vs integration window H (all examples, final outcome)", "",
          "Distinct from E5 (which restricted to undecided@H for *prediction*). Here: how does the",
          "two-class λ1 separation BUILD as the integration window grows?", ""]
finals = {"trm": np.load(RETEST / "trm_gbs768_step58590_full_n2048.npz"),
          "hrm": np.load(RETEST / "hrm_righteous_step26040_full_n2048.npz")}
short4 = {"trm": RETEST / "trm_gbs768_step58590_short_n2048.npz",
          "hrm": RETEST / "hrm_righteous_step26040_short_n2048.npz"}
for model in ["trm", "hrm"]:
    fin = finals[model]; fi = fin["idx"]; yf_all = fin["exact_correct"].astype(int)
    rows = []
    for H in [2, 4, 6, 8, 10, 12, 16]:
        if H == 16:
            s = fin
        elif H == 4:
            s = np.load(short4[model])
        else:
            s = np.load(P1 / f"{'trm_official58590' if model=='trm' else 'hrm26040'}_h{H}_n2048.npz")
        si = s["idx"]; common, fp, sp = np.intersect1d(fi, si, return_indices=True)
        yf = yf_all[fp]; l1 = s["lyap_spec"][sp, 0].astype(float)
        rows.append((H, auc(-l1, yf), cohens_d(l1[yf == 0], l1[yf == 1])))
    lines.append(f"### {model.upper()}: H, AUC(-λ1→final correct), Cohen's d(fail−succ)")
    for H, a, dd in rows:
        lines.append(f"- H={H:2d}: AUC={a:.3f}, d={dd:+.3f}")
    lines.append("")
    fig, ax = plt.subplots(figsize=(6, 4)); H = [r[0] for r in rows]
    ax.plot(H, [r[1] for r in rows], "o-", label="AUC(-λ1→correct)")
    ax2 = ax.twinx(); ax2.plot(H, [abs(r[2]) for r in rows], "s--", color="tab:purple", label="|Cohen's d|")
    ax.axhline(0.5, color="gray", lw=0.5); ax.set_xlabel("integration window H (segments)")
    ax.set_ylabel("AUC"); ax2.set_ylabel("|Cohen's d|"); ax.set_title(f"{model.upper()}: λ1 separation vs window")
    ax.set_ylim(0.4, 1.0); fig.tight_layout(); fig.savefig(OUT / f"sep_vs_window_{model}.png", dpi=140); plt.close(fig)

# ---------- Char 4: effect size ----------
lines += ["## Char 4 — effect size / overlap beyond AUC (full-window λ1 and KS-proxy)", ""]
for name, p in HEAD.items():
    d = np.load(p); l1 = d["lyap_spec"][:, 0].astype(float); c = d["exact_correct"].astype(int)
    ksp = np.clip(d["lyap_spec"].astype(float), 0, None).sum(1)
    lines += [f"- {name}:",
              f"    λ1: AUC={auc(-l1,c):.3f}, Cohen's d(fail−succ)={cohens_d(l1[c==0],l1[c==1]):+.2f}, "
              f"Bhattacharyya(gauss)={bhattacharyya_gauss(l1[c==0],l1[c==1]):.3f}, "
              f"hist-overlap={hist_overlap(l1[c==0],l1[c==1]):.3f}",
              f"    KS-proxy: AUC={auc(-ksp,c):.3f}, Cohen's d={cohens_d(ksp[c==0],ksp[c==1]):+.2f}, "
              f"hist-overlap={hist_overlap(ksp[c==0],ksp[c==1]):.3f}"]
lines.append("")

(OUT / "characterization_results.md").write_text("\n".join(lines))
print("\n".join(lines))
print("\nfigures + md in", OUT)