"""Maze with CONNECTIVITY as the success criterion (not exact-match). Genuine failure = predicted path does NOT connect start->goal (broken/incomplete answer). Valid alternative paths (connected but != labeled) count as 'complete answer'. Joins per-cell preds (my dump, seed 20260616) with the friend's FTLE/drift npz (same seed/idx), and asks: do BROKEN (disconnected) predictions WANDER while CONNECTED ones SETTLE? If yes, the dynamical signal tracks answer-COMPLETENESS, and exact-match was the wrong lens for Maze. """ from __future__ import annotations from pathlib import Path from collections import deque import glob import numpy as np HERE = Path(__file__).resolve().parent FU = HERE / "maze_followup" FRIEND = "/tmp/friend_maze/maze_all_ckpts_lyap" def is_connected(inp, pred): g = inp.reshape(30, 30); pr = pred.reshape(30, 30) se = np.argwhere((g == 3) | (g == 4)) if len(se) < 2: return True # can't judge -> treat as connected (won't happen) s, e = tuple(se[0]), tuple(se[1]) pathset = set(map(tuple, np.argwhere(pr == 5))) | {s, e} if any(g[r, c] == 1 for r, c in pathset): return False # crosses wall = invalid seen = {s}; q = deque([s]) while q: r, c = q.popleft() if (r, c) == e: return True for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]: nr, nc = r + dr, c + dc if 0 <= nr < 30 and 0 <= nc < 30 and (nr, nc) in pathset and (nr, nc) not in seen: seen.add((nr, nc)); q.append((nr, nc)) return False def auc(score, y): p, n = score[y == 1], score[y == 0] if len(p) == 0 or len(n) == 0: return float("nan") a = np.concatenate([p, n]); o = np.argsort(a); r = np.empty(len(a)); r[o] = np.arange(1, len(a) + 1) return float((r[:len(p)].sum() - len(p) * (len(p) + 1) / 2) / (len(p) * len(n))) def cohend(a, b): if len(a) < 2 or len(b) < 2: return float("nan") s = np.sqrt(((len(a) - 1) * a.var(ddof=1) + (len(b) - 1) * b.var(ddof=1)) / (len(a) + len(b) - 2)) return (a.mean() - b.mean()) / s if s > 0 else float("nan") for step in [13020, 52080, 130200]: pred_f = FU / f"mazepreds_step_{step}_seed20260616.npz" fr = glob.glob(f"{FRIEND}/maze_step_{step}_*.npz") if not pred_f.exists() or not fr: print(f"[pending] step {step}") continue P = np.load(pred_f); F = np.load(fr[0]) common, pi, fi = np.intersect1d(P["idx"], F["idx"], return_indices=True) preds = P["preds"][pi]; inputs = P["inputs"][pi] exact = P["exact_correct"][pi].astype(int) l1 = F["lyap_spec"][fi, 0].astype(float) late_drift = np.log10(np.clip(F["drift_zH"][fi, -4:].mean(1), 1e-12, None)) conn = np.array([is_connected(inputs[k], preds[k]) for k in range(len(common))]).astype(int) nb = int((conn == 0).sum()) print(f"\n=== step {step} (joined n={len(common)}) ===") print(f" exact-match acc={exact.mean():.3f} | CONNECTIVITY acc (valid complete path)={conn.mean():.3f} | broken={nb}") if nb < 3 or nb > len(common) - 3: print(f" too few broken/connected to condition dynamics (broken={nb})") continue # dynamics conditioned on CONNECTIVITY (broken=0 vs connected=1) print(f" late-drift (settling): connected median={np.median(late_drift[conn==1]):.2f} broken median={np.median(late_drift[conn==0]):.2f}") print(f" AUC(-late-drift -> connected) = {auc(-late_drift, conn):.3f} Cohen d(broken-conn)={cohend(late_drift[conn==0], late_drift[conn==1]):+.2f}") print(f" lambda1: connected median={np.median(l1[conn==1]):+.4f} broken median={np.median(l1[conn==0]):+.4f}") print(f" AUC(-lambda1 -> connected) = {auc(-l1, conn):.3f}") # compare: does connectivity separate dynamics BETTER than exact-match? print(f" [vs exact-match] AUC(-late-drift -> exact_correct) = {auc(-late_drift, exact):.3f}, " f"AUC(-lambda1 -> exact) = {auc(-l1, exact):.3f}") # within CONNECTED, does exact-match still separate? (should NOT, if dynamics track completeness) m = conn == 1 if 0 < exact[m].mean() < 1: print(f" within CONNECTED (n={m.sum()}): AUC(-late-drift -> exact) = {auc(-late_drift[m], exact[m]):.3f} " f"(near 0.5 => dynamics track completeness, not correctness)")