summaryrefslogtreecommitdiff
path: root/ep_run/ep_jacreg_spike.py
blob: a72b89cfd679061189f4ee5aa4a4b765477d5786 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import time, os, re, subprocess
os.chdir("/home/yurenh2/ept/ep_run"); LOG="runs/ep_jacreg.log"
def alive(): return subprocess.run(["pgrep","-f","ckpt runs/ep_jacreg.pt"],capture_output=True).returncode==0
def steps():
    out=[]
    try:
        for l in open(LOG):
            if not l.startswith("step"): continue
            ms=re.search(r"step\s+(\d+)",l); mv=re.search(r"val CE ([\d.]+)",l)
            mj=re.search(r"jr=([\d.eE+-]+)",l); mr=re.search(r"res=([\d.eE+-]+)",l)
            if ms and mv and mj: out.append((int(ms.group(1)),float(mv.group(1)),float(mj.group(1)),float(mr.group(1)) if mr else 0))
    except Exception: pass
    return out
fired=None; t0=time.time(); hi=0; prev=None; seen=set([r[0] for r in steps()])  # ignore already-seen (incl the 6250 spike)
while fired is None and time.time()-t0<4*3600:
    time.sleep(60)
    if not alive(): fired=f"EXITED last={steps()[-1] if steps() else None}"; break
    for r in steps():
        if r[0] in seen: continue
        seen.add(r[0]); step,val,jr,res=r
        if val>15 or res>0.3: fired=f"DIVERGED @{step} val{val:.2f} res{res:.1e} jr{jr:.1f}"; break
        hi = hi+1 if jr>=8 else 0
        if jr<2.0 and val<2.55: fired=f"SUPPRESSED @{step}: jr relaxed to {jr:.1f}, CE recovered {val:.3f} (best2.4381) res{res:.1e} -> controller WON the spike"; break
        if hi>=3: fired=f"jr SATURATING @{step}: jr>=8 for {hi} logged-steps (now {jr:.1f}), val{val:.3f} res{res:.1e} -> controller maxed, not relaxing (early hijack/saturation)"; break
    if fired: break
print("=== EP_JACREG SPIKE-RECOVERY ==="); print(fired or "4h timeout"); print("last5:", steps()[-5:])