blob: 942f81de455bc0b874260bc19ba9c2113fe87850 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
import time, os, re
os.chdir("/home/yurenh2/ept/ep_run"); LOG="runs/ep_resreg_fast.log"
def step():
try: ls=[l for l in open(LOG) if l.startswith("step")]; return int(re.search(r"step\s+(\d+)",ls[-1]).group(1)) if ls else 0
except Exception: return 0
def vrb():
try: l=[x for x in open(LOG) if x.startswith("step")][-1]
except Exception: return None
v=re.search(r"val CE ([\d.]+)",l); r=re.search(r"res=([\d.eE+-]+)",l); b=re.search(r"best ([\d.]+)",l)
return (float(v.group(1)) if v else 0, float(r.group(1)) if r else 0, float(b.group(1)) if b else 0)
t0=time.time()
while step()<100 and time.time()-t0<900: time.sleep(15) # 等过编译预热
s1=step(); t1=time.time(); time.sleep(180); s2=step(); t2=time.time()
ds=s2-s1; dt=t2-t1
print("=== EP_RESREG_FAST compile+TF32 计时 ===")
if ds>0:
print(f"{ds} steps / {dt:.0f}s = {dt/ds:.2f} s/step = {60*ds/dt:.1f} steps/min")
print(f"对照:其他 eager run ~13.6 s/step(但那是 t2sel160;此为 t2sel40+compile+tf32)")
print(f"sanity (val,res,best): {vrb()} <- res 没爆+在降 = TF32 没破稳定")
else: print("no progress (还在编译/卡住), step", step())
|