1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
|
host=timan1.cs.illinois.edu gpu=0 start=2026-06-16T11:50:28-05:00
===== seed=0 full none =====
ep20 solve_rate=0.000 mean_conflicts=114.06
ep40 solve_rate=0.163 mean_conflicts=61.27
ep60 solve_rate=0.410 mean_conflicts=17.81
ep80 solve_rate=0.610 mean_conflicts=4.32
ep100 solve_rate=0.440 mean_conflicts=16.00
ep120 solve_rate=0.287 mean_conflicts=19.54
ep140 solve_rate=0.090 mean_conflicts=59.83
ep150 solve_rate=0.063 mean_conflicts=70.46
[color_full_none_n50_k3_p0.2_T3_ns3_s0] best solve_rate=0.61 mean_conflicts=4.323 @ep80 (155.3s)
wrote /home/yurenh2/rrog/runs/ckpt_color_full_none_n50_k3_p0.2_T3_ns3_s0.pt
[full] LE n=300 fail_rate=0.39 | lambda1 SOLVED mean -0.3206 (n=183) | UNSOLVED mean +0.0485 (n=117) | sep=+0.3692 | AUROC(fail|lambda1)=0.919 | mean_lambda1=-0.1767
[pe=none s0] deterministic solve_rate = 0.627 (n=150, K=16)
sigma pass@K lam-sel random perRoll AUROC(s|-lam)
0.2 0.900 0.773 0.640 0.595 0.883
wrote /home/yurenh2/rrog/runs/ptrm_color_full_none_n50_k3_p0.2_T3_ns3_s0.json
===== seed=0 full rwse =====
ep20 solve_rate=0.000 mean_conflicts=51.22
ep40 solve_rate=0.083 mean_conflicts=55.16
ep60 solve_rate=0.203 mean_conflicts=58.38
ep80 solve_rate=0.350 mean_conflicts=37.23
ep100 solve_rate=0.383 mean_conflicts=31.48
ep120 solve_rate=0.203 mean_conflicts=53.56
ep140 solve_rate=0.273 mean_conflicts=37.79
ep150 solve_rate=0.280 mean_conflicts=36.98
[color_full_rwse_n50_k3_p0.2_T3_ns3_s0] best solve_rate=0.3833 mean_conflicts=31.483 @ep100 (153.3s)
wrote /home/yurenh2/rrog/runs/ckpt_color_full_rwse_n50_k3_p0.2_T3_ns3_s0.pt
[full] LE n=300 fail_rate=0.62 | lambda1 SOLVED mean -0.1834 (n=115) | UNSOLVED mean +0.0100 (n=185) | sep=+0.1934 | AUROC(fail|lambda1)=0.817 | mean_lambda1=-0.0641
[pe=rwse s0] deterministic solve_rate = 0.407 (n=150, K=16)
sigma pass@K lam-sel random perRoll AUROC(s|-lam)
0.2 0.740 0.587 0.493 0.426 0.808
wrote /home/yurenh2/rrog/runs/ptrm_color_full_rwse_n50_k3_p0.2_T3_ns3_s0.json
===== seed=0 1step none =====
ep20 solve_rate=0.000 mean_conflicts=44.63
ep40 solve_rate=0.123 mean_conflicts=19.01
ep60 solve_rate=0.200 mean_conflicts=18.33
ep80 solve_rate=0.067 mean_conflicts=42.81
ep100 solve_rate=0.027 mean_conflicts=42.48
ep120 solve_rate=0.087 mean_conflicts=23.53
ep140 solve_rate=0.100 mean_conflicts=10.46
ep150 solve_rate=0.127 mean_conflicts=18.32
[color_1step_none_n50_k3_p0.2_T3_ns3_s0] best solve_rate=0.2 mean_conflicts=18.333 @ep60 (127.1s)
wrote /home/yurenh2/rrog/runs/ckpt_color_1step_none_n50_k3_p0.2_T3_ns3_s0.pt
[1step] LE n=300 fail_rate=0.80 | lambda1 SOLVED mean -0.2978 (n=60) | UNSOLVED mean -0.1209 (n=240) | sep=+0.1769 | AUROC(fail|lambda1)=0.770 | mean_lambda1=-0.1563
===== seed=1 full none =====
ep20 solve_rate=0.000 mean_conflicts=76.94
ep40 solve_rate=0.070 mean_conflicts=41.69
ep60 solve_rate=0.207 mean_conflicts=33.02
ep80 solve_rate=0.453 mean_conflicts=11.75
ep100 solve_rate=0.203 mean_conflicts=21.24
ep120 solve_rate=0.163 mean_conflicts=19.60
ep140 solve_rate=0.067 mean_conflicts=41.63
ep150 solve_rate=0.027 mean_conflicts=50.77
[color_full_none_n50_k3_p0.2_T3_ns3_s1] best solve_rate=0.4533 mean_conflicts=11.753 @ep80 (151.0s)
wrote /home/yurenh2/rrog/runs/ckpt_color_full_none_n50_k3_p0.2_T3_ns3_s1.pt
[full] LE n=300 fail_rate=0.55 | lambda1 SOLVED mean -0.3182 (n=136) | UNSOLVED mean -0.0426 (n=164) | sep=+0.2757 | AUROC(fail|lambda1)=0.864 | mean_lambda1=-0.1675
[pe=none s1] deterministic solve_rate = 0.480 (n=150, K=16)
sigma pass@K lam-sel random perRoll AUROC(s|-lam)
0.2 0.740 0.593 0.440 0.456 0.869
wrote /home/yurenh2/rrog/runs/ptrm_color_full_none_n50_k3_p0.2_T3_ns3_s1.json
===== seed=1 full rwse =====
ep20 solve_rate=0.000 mean_conflicts=115.59
ep40 solve_rate=0.100 mean_conflicts=91.86
ep60 solve_rate=0.327 mean_conflicts=34.60
ep80 solve_rate=0.097 mean_conflicts=88.34
ep100 solve_rate=0.140 mean_conflicts=56.41
ep120 solve_rate=0.500 mean_conflicts=16.20
ep140 solve_rate=0.537 mean_conflicts=13.68
ep150 solve_rate=0.500 mean_conflicts=16.04
[color_full_rwse_n50_k3_p0.2_T3_ns3_s1] best solve_rate=0.5367 mean_conflicts=13.68 @ep140 (152.1s)
wrote /home/yurenh2/rrog/runs/ckpt_color_full_rwse_n50_k3_p0.2_T3_ns3_s1.pt
[full] LE n=300 fail_rate=0.46 | lambda1 SOLVED mean +0.0219 (n=161) | UNSOLVED mean +0.1773 (n=139) | sep=+0.1554 | AUROC(fail|lambda1)=0.774 | mean_lambda1=+0.0939
[pe=rwse s1] deterministic solve_rate = 0.547 (n=150, K=16)
sigma pass@K lam-sel random perRoll AUROC(s|-lam)
0.2 0.947 0.727 0.553 0.507 0.814
wrote /home/yurenh2/rrog/runs/ptrm_color_full_rwse_n50_k3_p0.2_T3_ns3_s1.json
===== seed=1 1step none =====
ep20 solve_rate=0.000 mean_conflicts=47.30
ep40 solve_rate=0.157 mean_conflicts=34.06
ep60 solve_rate=0.297 mean_conflicts=7.36
ep80 solve_rate=0.333 mean_conflicts=8.70
ep100 solve_rate=0.193 mean_conflicts=13.99
ep120 solve_rate=0.233 mean_conflicts=6.94
ep140 solve_rate=0.227 mean_conflicts=6.85
ep150 solve_rate=0.320 mean_conflicts=5.31
[color_1step_none_n50_k3_p0.2_T3_ns3_s1] best solve_rate=0.3333 mean_conflicts=8.7 @ep80 (123.3s)
wrote /home/yurenh2/rrog/runs/ckpt_color_1step_none_n50_k3_p0.2_T3_ns3_s1.pt
[1step] LE n=300 fail_rate=0.67 | lambda1 SOLVED mean -0.2773 (n=100) | UNSOLVED mean +0.0189 (n=200) | sep=+0.2963 | AUROC(fail|lambda1)=0.846 | mean_lambda1=-0.0798
===== seed=2 full none =====
ep20 solve_rate=0.000 mean_conflicts=137.74
ep40 solve_rate=0.087 mean_conflicts=44.48
ep60 solve_rate=0.140 mean_conflicts=42.89
ep80 solve_rate=0.413 mean_conflicts=16.23
ep100 solve_rate=0.123 mean_conflicts=60.71
ep120 solve_rate=0.027 mean_conflicts=58.64
ep140 solve_rate=0.017 mean_conflicts=79.05
ep150 solve_rate=0.013 mean_conflicts=83.54
[color_full_none_n50_k3_p0.2_T3_ns3_s2] best solve_rate=0.4133 mean_conflicts=16.23 @ep80 (154.4s)
wrote /home/yurenh2/rrog/runs/ckpt_color_full_none_n50_k3_p0.2_T3_ns3_s2.pt
[full] LE n=300 fail_rate=0.59 | lambda1 SOLVED mean -0.2913 (n=124) | UNSOLVED mean -0.0796 (n=176) | sep=+0.2117 | AUROC(fail|lambda1)=0.830 | mean_lambda1=-0.1671
[pe=none s2] deterministic solve_rate = 0.447 (n=150, K=16)
sigma pass@K lam-sel random perRoll AUROC(s|-lam)
0.2 0.773 0.620 0.467 0.435 0.821
wrote /home/yurenh2/rrog/runs/ptrm_color_full_none_n50_k3_p0.2_T3_ns3_s2.json
===== seed=2 full rwse =====
ep20 solve_rate=0.000 mean_conflicts=89.84
ep40 solve_rate=0.110 mean_conflicts=36.78
ep60 solve_rate=0.400 mean_conflicts=9.39
ep80 solve_rate=0.560 mean_conflicts=3.40
ep100 solve_rate=0.463 mean_conflicts=23.90
ep120 solve_rate=0.550 mean_conflicts=16.37
ep140 solve_rate=0.557 mean_conflicts=23.72
ep150 solve_rate=0.510 mean_conflicts=34.19
[color_full_rwse_n50_k3_p0.2_T3_ns3_s2] best solve_rate=0.56 mean_conflicts=3.4 @ep80 (151.8s)
wrote /home/yurenh2/rrog/runs/ckpt_color_full_rwse_n50_k3_p0.2_T3_ns3_s2.pt
[full] LE n=300 fail_rate=0.44 | lambda1 SOLVED mean -0.2328 (n=168) | UNSOLVED mean +0.0707 (n=132) | sep=+0.3034 | AUROC(fail|lambda1)=0.883 | mean_lambda1=-0.0993
[pe=rwse s2] deterministic solve_rate = 0.600 (n=150, K=16)
sigma pass@K lam-sel random perRoll AUROC(s|-lam)
0.2 0.893 0.787 0.573 0.556 0.873
wrote /home/yurenh2/rrog/runs/ptrm_color_full_rwse_n50_k3_p0.2_T3_ns3_s2.json
===== seed=2 1step none =====
ep20 solve_rate=0.000 mean_conflicts=38.00
ep40 solve_rate=0.153 mean_conflicts=18.17
ep60 solve_rate=0.327 mean_conflicts=7.32
ep80 solve_rate=0.320 mean_conflicts=5.31
ep100 solve_rate=0.357 mean_conflicts=4.67
ep120 solve_rate=0.293 mean_conflicts=7.30
ep140 solve_rate=0.273 mean_conflicts=7.96
ep150 solve_rate=0.227 mean_conflicts=10.33
[color_1step_none_n50_k3_p0.2_T3_ns3_s2] best solve_rate=0.3567 mean_conflicts=4.67 @ep100 (131.6s)
wrote /home/yurenh2/rrog/runs/ckpt_color_1step_none_n50_k3_p0.2_T3_ns3_s2.pt
[1step] LE n=300 fail_rate=0.64 | lambda1 SOLVED mean -0.2153 (n=107) | UNSOLVED mean +0.0916 (n=193) | sep=+0.3069 | AUROC(fail|lambda1)=0.867 | mean_lambda1=-0.0179
===== seed=3 full none =====
ep20 solve_rate=0.000 mean_conflicts=62.81
ep40 solve_rate=0.207 mean_conflicts=33.51
ep60 solve_rate=0.460 mean_conflicts=15.68
ep80 solve_rate=0.033 mean_conflicts=114.30
ep100 solve_rate=0.000 mean_conflicts=82.87
ep120 solve_rate=0.000 mean_conflicts=85.01
ep140 solve_rate=0.000 mean_conflicts=77.61
ep150 solve_rate=0.000 mean_conflicts=79.58
[color_full_none_n50_k3_p0.2_T3_ns3_s3] best solve_rate=0.46 mean_conflicts=15.68 @ep60 (151.6s)
wrote /home/yurenh2/rrog/runs/ckpt_color_full_none_n50_k3_p0.2_T3_ns3_s3.pt
[full] LE n=300 fail_rate=0.54 | lambda1 SOLVED mean -0.4216 (n=138) | UNSOLVED mean -0.0826 (n=162) | sep=+0.3390 | AUROC(fail|lambda1)=0.886 | mean_lambda1=-0.2385
[pe=none s3] deterministic solve_rate = 0.493 (n=150, K=16)
sigma pass@K lam-sel random perRoll AUROC(s|-lam)
0.2 0.733 0.587 0.493 0.472 0.856
wrote /home/yurenh2/rrog/runs/ptrm_color_full_none_n50_k3_p0.2_T3_ns3_s3.json
===== seed=3 full rwse =====
ep20 solve_rate=0.000 mean_conflicts=139.52
ep40 solve_rate=0.043 mean_conflicts=135.26
ep60 solve_rate=0.313 mean_conflicts=32.37
ep80 solve_rate=0.370 mean_conflicts=14.78
ep100 solve_rate=0.490 mean_conflicts=12.46
ep120 solve_rate=0.067 mean_conflicts=89.87
ep140 solve_rate=0.137 mean_conflicts=53.29
ep150 solve_rate=0.127 mean_conflicts=46.23
[color_full_rwse_n50_k3_p0.2_T3_ns3_s3] best solve_rate=0.49 mean_conflicts=12.46 @ep100 (152.1s)
wrote /home/yurenh2/rrog/runs/ckpt_color_full_rwse_n50_k3_p0.2_T3_ns3_s3.pt
[full] LE n=300 fail_rate=0.51 | lambda1 SOLVED mean -0.0438 (n=147) | UNSOLVED mean +0.1618 (n=153) | sep=+0.2056 | AUROC(fail|lambda1)=0.830 | mean_lambda1=+0.0610
[pe=rwse s3] deterministic solve_rate = 0.507 (n=150, K=16)
sigma pass@K lam-sel random perRoll AUROC(s|-lam)
0.2 0.893 0.727 0.473 0.497 0.823
wrote /home/yurenh2/rrog/runs/ptrm_color_full_rwse_n50_k3_p0.2_T3_ns3_s3.json
===== seed=3 1step none =====
ep20 solve_rate=0.000 mean_conflicts=54.09
ep40 solve_rate=0.103 mean_conflicts=17.25
ep60 solve_rate=0.187 mean_conflicts=28.48
ep80 solve_rate=0.120 mean_conflicts=29.85
ep100 solve_rate=0.200 mean_conflicts=16.34
ep120 solve_rate=0.337 mean_conflicts=7.23
ep140 solve_rate=0.157 mean_conflicts=16.67
ep150 solve_rate=0.087 mean_conflicts=19.67
[color_1step_none_n50_k3_p0.2_T3_ns3_s3] best solve_rate=0.3367 mean_conflicts=7.227 @ep120 (124.3s)
wrote /home/yurenh2/rrog/runs/ckpt_color_1step_none_n50_k3_p0.2_T3_ns3_s3.pt
[1step] LE n=300 fail_rate=0.66 | lambda1 SOLVED mean -0.0124 (n=101) | UNSOLVED mean +0.1694 (n=199) | sep=+0.1818 | AUROC(fail|lambda1)=0.786 | mean_lambda1=+0.1082
===== seed=4 full none =====
ep20 solve_rate=0.000 mean_conflicts=67.14
ep40 solve_rate=0.200 mean_conflicts=24.37
ep60 solve_rate=0.347 mean_conflicts=27.20
ep80 solve_rate=0.127 mean_conflicts=41.29
ep100 solve_rate=0.180 mean_conflicts=35.82
ep120 solve_rate=0.073 mean_conflicts=66.69
ep140 solve_rate=0.070 mean_conflicts=74.02
ep150 solve_rate=0.073 mean_conflicts=81.13
[color_full_none_n50_k3_p0.2_T3_ns3_s4] best solve_rate=0.3467 mean_conflicts=27.197 @ep60 (149.8s)
wrote /home/yurenh2/rrog/runs/ckpt_color_full_none_n50_k3_p0.2_T3_ns3_s4.pt
[full] LE n=300 fail_rate=0.65 | lambda1 SOLVED mean -0.4008 (n=104) | UNSOLVED mean -0.1440 (n=196) | sep=+0.2567 | AUROC(fail|lambda1)=0.842 | mean_lambda1=-0.2330
[pe=none s4] deterministic solve_rate = 0.353 (n=150, K=16)
sigma pass@K lam-sel random perRoll AUROC(s|-lam)
0.2 0.547 0.427 0.373 0.358 0.835
wrote /home/yurenh2/rrog/runs/ptrm_color_full_none_n50_k3_p0.2_T3_ns3_s4.json
===== seed=4 full rwse =====
ep20 solve_rate=0.000 mean_conflicts=56.06
ep40 solve_rate=0.050 mean_conflicts=55.24
ep60 solve_rate=0.303 mean_conflicts=39.93
ep80 solve_rate=0.357 mean_conflicts=15.34
ep100 solve_rate=0.547 mean_conflicts=6.19
ep120 solve_rate=0.440 mean_conflicts=18.82
ep140 solve_rate=0.240 mean_conflicts=41.55
ep150 solve_rate=0.233 mean_conflicts=46.02
[color_full_rwse_n50_k3_p0.2_T3_ns3_s4] best solve_rate=0.5467 mean_conflicts=6.193 @ep100 (154.2s)
wrote /home/yurenh2/rrog/runs/ckpt_color_full_rwse_n50_k3_p0.2_T3_ns3_s4.pt
[full] LE n=300 fail_rate=0.45 | lambda1 SOLVED mean -0.1742 (n=164) | UNSOLVED mean +0.1084 (n=136) | sep=+0.2827 | AUROC(fail|lambda1)=0.880 | mean_lambda1=-0.0461
[pe=rwse s4] deterministic solve_rate = 0.547 (n=150, K=16)
sigma pass@K lam-sel random perRoll AUROC(s|-lam)
0.2 0.940 0.853 0.493 0.557 0.868
wrote /home/yurenh2/rrog/runs/ptrm_color_full_rwse_n50_k3_p0.2_T3_ns3_s4.json
===== seed=4 1step none =====
ep20 solve_rate=0.000 mean_conflicts=63.11
ep40 solve_rate=0.187 mean_conflicts=11.62
ep60 solve_rate=0.237 mean_conflicts=22.39
ep80 solve_rate=0.200 mean_conflicts=24.78
ep100 solve_rate=0.183 mean_conflicts=15.49
ep120 solve_rate=0.207 mean_conflicts=12.25
ep140 solve_rate=0.093 mean_conflicts=21.66
ep150 solve_rate=0.067 mean_conflicts=30.98
[color_1step_none_n50_k3_p0.2_T3_ns3_s4] best solve_rate=0.2367 mean_conflicts=22.393 @ep60 (120.8s)
wrote /home/yurenh2/rrog/runs/ckpt_color_1step_none_n50_k3_p0.2_T3_ns3_s4.pt
[1step] LE n=300 fail_rate=0.76 | lambda1 SOLVED mean -0.2996 (n=71) | UNSOLVED mean -0.0523 (n=229) | sep=+0.2473 | AUROC(fail|lambda1)=0.835 | mean_lambda1=-0.1108
===== AGGREGATE =====
=== best solve_rate (deterministic, EMA) ===
('1step', 'none'): 0.293±0.062 (n=5)
('full', 'none'): 0.457±0.087 (n=5)
('full', 'rwse'): 0.503±0.064 (n=5)
=== LE AUROC(fail|lambda1) ===
('1step', 'none'): 0.821±0.036 (n=5)
('full', 'none'): 0.868±0.032 (n=5)
('full', 'rwse'): 0.837±0.041 (n=5)
=== PTRM sigma=0.2 ===
('full', 'none'): det 0.480±0.088 (n=5) | pass@K 0.739±0.113 (n=5) | lambda-sel 0.600±0.110 (n=5) | AUROC 0.853±0.023 (n=5)
('full', 'rwse'): det 0.521±0.065 (n=5) | pass@K 0.883±0.075 (n=5) | lambda-sel 0.736±0.088 (n=5) | AUROC 0.837±0.028 (n=5)
done=2026-06-16T12:41:20-05:00
|