import time, os, re, subprocess os.chdir("/home/yurenh2/ept/ep_run"); LOG="runs/ep_jacreg.log" def alive(): return subprocess.run(["pgrep","-f","ckpt runs/ep_jacreg.pt"],capture_output=True).returncode==0 def steps(): out=[] try: for l in open(LOG): if not l.startswith("step"): continue ms=re.search(r"step\s+(\d+)",l); mv=re.search(r"val CE ([\d.]+)",l) mj=re.search(r"jr=([\d.eE+-]+)",l); mr=re.search(r"res=([\d.eE+-]+)",l) if ms and mv and mj: out.append((int(ms.group(1)),float(mv.group(1)),float(mj.group(1)),float(mr.group(1)) if mr else 0)) except Exception: pass return out fired=None; t0=time.time(); hi=0; prev=None; seen=set([r[0] for r in steps()]) # ignore already-seen (incl the 6250 spike) while fired is None and time.time()-t0<4*3600: time.sleep(60) if not alive(): fired=f"EXITED last={steps()[-1] if steps() else None}"; break for r in steps(): if r[0] in seen: continue seen.add(r[0]); step,val,jr,res=r if val>15 or res>0.3: fired=f"DIVERGED @{step} val{val:.2f} res{res:.1e} jr{jr:.1f}"; break hi = hi+1 if jr>=8 else 0 if jr<2.0 and val<2.55: fired=f"SUPPRESSED @{step}: jr relaxed to {jr:.1f}, CE recovered {val:.3f} (best2.4381) res{res:.1e} -> controller WON the spike"; break if hi>=3: fired=f"jr SATURATING @{step}: jr>=8 for {hi} logged-steps (now {jr:.1f}), val{val:.3f} res{res:.1e} -> controller maxed, not relaxing (early hijack/saturation)"; break if fired: break print("=== EP_JACREG SPIKE-RECOVERY ==="); print(fired or "4h timeout"); print("last5:", steps()[-5:])