1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
|
"""Aggregate multi-seed coloring results -> mean+/-std per architecture/config."""
import glob, json
import numpy as np
from collections import defaultdict
R = '/home/yurenh2/rrog/runs'
def ms(xs):
a = np.array([x for x in xs if x is not None], dtype=float)
return f"{a.mean():.3f}±{a.std():.3f} (n={len(a)})" if len(a) else "—"
def load(pat):
out = []
for f in glob.glob(pat):
try:
out.append(json.load(open(f)))
except Exception:
pass
return out
def key(d):
return (d.get('arch', 'legacy'), d.get('conv', 'gin'), d.get('pe'),
d.get('grad_mode'), 'ctr' if d.get('contract') else '-')
solve, le, ml = defaultdict(list), defaultdict(list), defaultdict(list)
pk, ls, au, det = defaultdict(list), defaultdict(list), defaultdict(list), defaultdict(list)
for d in load(f"{R}/color_*.json"):
if 'solve_rate' in d and d.get('pe') is not None:
solve[key(d)].append(d['solve_rate'])
for d in load(f"{R}/le_color_*.json"):
le[key(d)].append(d.get('auroc'))
ml[key(d)].append(d.get('mean_lam'))
for d in load(f"{R}/ptrm_color_*.json"):
k = key(d); det[k].append(d.get('det'))
s2 = d.get('sigmas', {}).get('0.2')
if s2:
pk[k].append(s2.get('passk')); ls[k].append(s2.get('lamsel')); au[k].append(s2.get('auroc'))
print("=== best solve_rate (deterministic, EMA) ===")
for k in sorted(solve, key=str):
print(f" {k}: {ms(solve[k])}")
print("=== LE AUROC(fail|lambda1) ===")
for k in sorted(le, key=str):
print(f" {k}: {ms(le[k])}")
print("=== LE mean_lambda1 (forced-contraction dose) ===")
for k in sorted(ml, key=str):
print(f" {k}: {ms(ml[k])}")
print("=== PTRM sigma=0.2 ===")
for k in sorted(pk, key=str):
print(f" {k}: det {ms(det[k])} | pass@K {ms(pk[k])} | lambda-sel {ms(ls[k])} | AUROC {ms(au[k])}")
|