diff options
Diffstat (limited to 'ep_run/watch_contraction.py')
| -rw-r--r-- | ep_run/watch_contraction.py | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/ep_run/watch_contraction.py b/ep_run/watch_contraction.py new file mode 100644 index 0000000..64f6a9f --- /dev/null +++ b/ep_run/watch_contraction.py @@ -0,0 +1,41 @@ +"""Watcher for the two contraction experiments (c3 + specnorm). Fires (exits) when either run +hits a decisive state: DIVERGED (res>0.2 or val>15), CLEARED the danger zone (step>=10200, res<0.06, +val<2.5 -> survived past the step ~9400 where the unconstrained run blew), or process EXITED.""" +import time, os, re +RUNS = [ + ("c3", "/home/yurenh2/ept/ep_run/runs/ep_c3_scratch.log", 1429784), + ("specnorm", "/home/yurenh2/ept/ep_run/runs/ep_specnorm09_scratch.log",1435898), +] +def alive(pid): + try: os.kill(pid, 0); return True + except Exception: return False +def latest(log): + try: lines = [l for l in open(log) if l.startswith("step")] + except FileNotFoundError: return None + if not lines: return None + m = re.search(r"step (\d+)/.*val CE ([\d.eE+-]+).*res=([\d.eE+-]+)", lines[-1]) + if not m: return None + return int(m.group(1)), float(m.group(2)), float(m.group(3)), lines[-1].strip() +def status_all(): + out = [] + for tag, log, pid in RUNS: + d = latest(log) + out.append(f"[{tag}] {'ALIVE' if alive(pid) else 'DEAD'} | {d[3] if d else 'no steps yet'}") + return "\n".join(out) +t0 = time.time(); fired = None +while fired is None and time.time() - t0 < 15 * 3600: + for tag, log, pid in RUNS: + d = latest(log) + if d: + step, val, res, _ = d + if res > 0.2 or val > 15: + fired = f"{tag} DIVERGED (res={res:.2e}, val={val:.2f}) at step {step}"; break + if step >= 10200 and res < 0.06 and val < 2.5: + fired = f"{tag} CLEARED danger zone: step {step}, val {val:.4f}, res {res:.2e} (survived past ~9400)"; break + if not alive(pid): + fired = f"{tag} process EXITED (abort_res / crash / done); last: {d[3] if d else 'no steps'}"; break + if fired: break + time.sleep(300) +print("=== CONTRACTION WATCHER FIRED ===") +print("trigger:", fired if fired else "max wall-time (15h) reached, no decisive event") +print(status_all()) |
