summaryrefslogtreecommitdiff
path: root/ep_run/watch_contraction.py
diff options
context:
space:
mode:
Diffstat (limited to 'ep_run/watch_contraction.py')
-rw-r--r--ep_run/watch_contraction.py41
1 files changed, 41 insertions, 0 deletions
diff --git a/ep_run/watch_contraction.py b/ep_run/watch_contraction.py
new file mode 100644
index 0000000..64f6a9f
--- /dev/null
+++ b/ep_run/watch_contraction.py
@@ -0,0 +1,41 @@
+"""Watcher for the two contraction experiments (c3 + specnorm). Fires (exits) when either run
+hits a decisive state: DIVERGED (res>0.2 or val>15), CLEARED the danger zone (step>=10200, res<0.06,
+val<2.5 -> survived past the step ~9400 where the unconstrained run blew), or process EXITED."""
+import time, os, re
+RUNS = [
+ ("c3", "/home/yurenh2/ept/ep_run/runs/ep_c3_scratch.log", 1429784),
+ ("specnorm", "/home/yurenh2/ept/ep_run/runs/ep_specnorm09_scratch.log",1435898),
+]
+def alive(pid):
+ try: os.kill(pid, 0); return True
+ except Exception: return False
+def latest(log):
+ try: lines = [l for l in open(log) if l.startswith("step")]
+ except FileNotFoundError: return None
+ if not lines: return None
+ m = re.search(r"step (\d+)/.*val CE ([\d.eE+-]+).*res=([\d.eE+-]+)", lines[-1])
+ if not m: return None
+ return int(m.group(1)), float(m.group(2)), float(m.group(3)), lines[-1].strip()
+def status_all():
+ out = []
+ for tag, log, pid in RUNS:
+ d = latest(log)
+ out.append(f"[{tag}] {'ALIVE' if alive(pid) else 'DEAD'} | {d[3] if d else 'no steps yet'}")
+ return "\n".join(out)
+t0 = time.time(); fired = None
+while fired is None and time.time() - t0 < 15 * 3600:
+ for tag, log, pid in RUNS:
+ d = latest(log)
+ if d:
+ step, val, res, _ = d
+ if res > 0.2 or val > 15:
+ fired = f"{tag} DIVERGED (res={res:.2e}, val={val:.2f}) at step {step}"; break
+ if step >= 10200 and res < 0.06 and val < 2.5:
+ fired = f"{tag} CLEARED danger zone: step {step}, val {val:.4f}, res {res:.2e} (survived past ~9400)"; break
+ if not alive(pid):
+ fired = f"{tag} process EXITED (abort_res / crash / done); last: {d[3] if d else 'no steps'}"; break
+ if fired: break
+ time.sleep(300)
+print("=== CONTRACTION WATCHER FIRED ===")
+print("trigger:", fired if fired else "max wall-time (15h) reached, no decisive event")
+print(status_all())