summaryrefslogtreecommitdiff
path: root/runs/seeds_sweep.log
diff options
context:
space:
mode:
Diffstat (limited to 'runs/seeds_sweep.log')
-rw-r--r--runs/seeds_sweep.log234
1 files changed, 234 insertions, 0 deletions
diff --git a/runs/seeds_sweep.log b/runs/seeds_sweep.log
new file mode 100644
index 0000000..7e1b08f
--- /dev/null
+++ b/runs/seeds_sweep.log
@@ -0,0 +1,234 @@
+host=timan1.cs.illinois.edu gpu=0 start=2026-06-16T11:50:28-05:00
+===== seed=0 full none =====
+ep20 solve_rate=0.000 mean_conflicts=114.06
+ep40 solve_rate=0.163 mean_conflicts=61.27
+ep60 solve_rate=0.410 mean_conflicts=17.81
+ep80 solve_rate=0.610 mean_conflicts=4.32
+ep100 solve_rate=0.440 mean_conflicts=16.00
+ep120 solve_rate=0.287 mean_conflicts=19.54
+ep140 solve_rate=0.090 mean_conflicts=59.83
+ep150 solve_rate=0.063 mean_conflicts=70.46
+[color_full_none_n50_k3_p0.2_T3_ns3_s0] best solve_rate=0.61 mean_conflicts=4.323 @ep80 (155.3s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_full_none_n50_k3_p0.2_T3_ns3_s0.pt
+[full] LE n=300 fail_rate=0.39 | lambda1 SOLVED mean -0.3206 (n=183) | UNSOLVED mean +0.0485 (n=117) | sep=+0.3692 | AUROC(fail|lambda1)=0.919 | mean_lambda1=-0.1767
+[pe=none s0] deterministic solve_rate = 0.627 (n=150, K=16)
+ sigma pass@K lam-sel random perRoll AUROC(s|-lam)
+ 0.2 0.900 0.773 0.640 0.595 0.883
+ wrote /home/yurenh2/rrog/runs/ptrm_color_full_none_n50_k3_p0.2_T3_ns3_s0.json
+===== seed=0 full rwse =====
+ep20 solve_rate=0.000 mean_conflicts=51.22
+ep40 solve_rate=0.083 mean_conflicts=55.16
+ep60 solve_rate=0.203 mean_conflicts=58.38
+ep80 solve_rate=0.350 mean_conflicts=37.23
+ep100 solve_rate=0.383 mean_conflicts=31.48
+ep120 solve_rate=0.203 mean_conflicts=53.56
+ep140 solve_rate=0.273 mean_conflicts=37.79
+ep150 solve_rate=0.280 mean_conflicts=36.98
+[color_full_rwse_n50_k3_p0.2_T3_ns3_s0] best solve_rate=0.3833 mean_conflicts=31.483 @ep100 (153.3s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_full_rwse_n50_k3_p0.2_T3_ns3_s0.pt
+[full] LE n=300 fail_rate=0.62 | lambda1 SOLVED mean -0.1834 (n=115) | UNSOLVED mean +0.0100 (n=185) | sep=+0.1934 | AUROC(fail|lambda1)=0.817 | mean_lambda1=-0.0641
+[pe=rwse s0] deterministic solve_rate = 0.407 (n=150, K=16)
+ sigma pass@K lam-sel random perRoll AUROC(s|-lam)
+ 0.2 0.740 0.587 0.493 0.426 0.808
+ wrote /home/yurenh2/rrog/runs/ptrm_color_full_rwse_n50_k3_p0.2_T3_ns3_s0.json
+===== seed=0 1step none =====
+ep20 solve_rate=0.000 mean_conflicts=44.63
+ep40 solve_rate=0.123 mean_conflicts=19.01
+ep60 solve_rate=0.200 mean_conflicts=18.33
+ep80 solve_rate=0.067 mean_conflicts=42.81
+ep100 solve_rate=0.027 mean_conflicts=42.48
+ep120 solve_rate=0.087 mean_conflicts=23.53
+ep140 solve_rate=0.100 mean_conflicts=10.46
+ep150 solve_rate=0.127 mean_conflicts=18.32
+[color_1step_none_n50_k3_p0.2_T3_ns3_s0] best solve_rate=0.2 mean_conflicts=18.333 @ep60 (127.1s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_1step_none_n50_k3_p0.2_T3_ns3_s0.pt
+[1step] LE n=300 fail_rate=0.80 | lambda1 SOLVED mean -0.2978 (n=60) | UNSOLVED mean -0.1209 (n=240) | sep=+0.1769 | AUROC(fail|lambda1)=0.770 | mean_lambda1=-0.1563
+===== seed=1 full none =====
+ep20 solve_rate=0.000 mean_conflicts=76.94
+ep40 solve_rate=0.070 mean_conflicts=41.69
+ep60 solve_rate=0.207 mean_conflicts=33.02
+ep80 solve_rate=0.453 mean_conflicts=11.75
+ep100 solve_rate=0.203 mean_conflicts=21.24
+ep120 solve_rate=0.163 mean_conflicts=19.60
+ep140 solve_rate=0.067 mean_conflicts=41.63
+ep150 solve_rate=0.027 mean_conflicts=50.77
+[color_full_none_n50_k3_p0.2_T3_ns3_s1] best solve_rate=0.4533 mean_conflicts=11.753 @ep80 (151.0s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_full_none_n50_k3_p0.2_T3_ns3_s1.pt
+[full] LE n=300 fail_rate=0.55 | lambda1 SOLVED mean -0.3182 (n=136) | UNSOLVED mean -0.0426 (n=164) | sep=+0.2757 | AUROC(fail|lambda1)=0.864 | mean_lambda1=-0.1675
+[pe=none s1] deterministic solve_rate = 0.480 (n=150, K=16)
+ sigma pass@K lam-sel random perRoll AUROC(s|-lam)
+ 0.2 0.740 0.593 0.440 0.456 0.869
+ wrote /home/yurenh2/rrog/runs/ptrm_color_full_none_n50_k3_p0.2_T3_ns3_s1.json
+===== seed=1 full rwse =====
+ep20 solve_rate=0.000 mean_conflicts=115.59
+ep40 solve_rate=0.100 mean_conflicts=91.86
+ep60 solve_rate=0.327 mean_conflicts=34.60
+ep80 solve_rate=0.097 mean_conflicts=88.34
+ep100 solve_rate=0.140 mean_conflicts=56.41
+ep120 solve_rate=0.500 mean_conflicts=16.20
+ep140 solve_rate=0.537 mean_conflicts=13.68
+ep150 solve_rate=0.500 mean_conflicts=16.04
+[color_full_rwse_n50_k3_p0.2_T3_ns3_s1] best solve_rate=0.5367 mean_conflicts=13.68 @ep140 (152.1s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_full_rwse_n50_k3_p0.2_T3_ns3_s1.pt
+[full] LE n=300 fail_rate=0.46 | lambda1 SOLVED mean +0.0219 (n=161) | UNSOLVED mean +0.1773 (n=139) | sep=+0.1554 | AUROC(fail|lambda1)=0.774 | mean_lambda1=+0.0939
+[pe=rwse s1] deterministic solve_rate = 0.547 (n=150, K=16)
+ sigma pass@K lam-sel random perRoll AUROC(s|-lam)
+ 0.2 0.947 0.727 0.553 0.507 0.814
+ wrote /home/yurenh2/rrog/runs/ptrm_color_full_rwse_n50_k3_p0.2_T3_ns3_s1.json
+===== seed=1 1step none =====
+ep20 solve_rate=0.000 mean_conflicts=47.30
+ep40 solve_rate=0.157 mean_conflicts=34.06
+ep60 solve_rate=0.297 mean_conflicts=7.36
+ep80 solve_rate=0.333 mean_conflicts=8.70
+ep100 solve_rate=0.193 mean_conflicts=13.99
+ep120 solve_rate=0.233 mean_conflicts=6.94
+ep140 solve_rate=0.227 mean_conflicts=6.85
+ep150 solve_rate=0.320 mean_conflicts=5.31
+[color_1step_none_n50_k3_p0.2_T3_ns3_s1] best solve_rate=0.3333 mean_conflicts=8.7 @ep80 (123.3s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_1step_none_n50_k3_p0.2_T3_ns3_s1.pt
+[1step] LE n=300 fail_rate=0.67 | lambda1 SOLVED mean -0.2773 (n=100) | UNSOLVED mean +0.0189 (n=200) | sep=+0.2963 | AUROC(fail|lambda1)=0.846 | mean_lambda1=-0.0798
+===== seed=2 full none =====
+ep20 solve_rate=0.000 mean_conflicts=137.74
+ep40 solve_rate=0.087 mean_conflicts=44.48
+ep60 solve_rate=0.140 mean_conflicts=42.89
+ep80 solve_rate=0.413 mean_conflicts=16.23
+ep100 solve_rate=0.123 mean_conflicts=60.71
+ep120 solve_rate=0.027 mean_conflicts=58.64
+ep140 solve_rate=0.017 mean_conflicts=79.05
+ep150 solve_rate=0.013 mean_conflicts=83.54
+[color_full_none_n50_k3_p0.2_T3_ns3_s2] best solve_rate=0.4133 mean_conflicts=16.23 @ep80 (154.4s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_full_none_n50_k3_p0.2_T3_ns3_s2.pt
+[full] LE n=300 fail_rate=0.59 | lambda1 SOLVED mean -0.2913 (n=124) | UNSOLVED mean -0.0796 (n=176) | sep=+0.2117 | AUROC(fail|lambda1)=0.830 | mean_lambda1=-0.1671
+[pe=none s2] deterministic solve_rate = 0.447 (n=150, K=16)
+ sigma pass@K lam-sel random perRoll AUROC(s|-lam)
+ 0.2 0.773 0.620 0.467 0.435 0.821
+ wrote /home/yurenh2/rrog/runs/ptrm_color_full_none_n50_k3_p0.2_T3_ns3_s2.json
+===== seed=2 full rwse =====
+ep20 solve_rate=0.000 mean_conflicts=89.84
+ep40 solve_rate=0.110 mean_conflicts=36.78
+ep60 solve_rate=0.400 mean_conflicts=9.39
+ep80 solve_rate=0.560 mean_conflicts=3.40
+ep100 solve_rate=0.463 mean_conflicts=23.90
+ep120 solve_rate=0.550 mean_conflicts=16.37
+ep140 solve_rate=0.557 mean_conflicts=23.72
+ep150 solve_rate=0.510 mean_conflicts=34.19
+[color_full_rwse_n50_k3_p0.2_T3_ns3_s2] best solve_rate=0.56 mean_conflicts=3.4 @ep80 (151.8s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_full_rwse_n50_k3_p0.2_T3_ns3_s2.pt
+[full] LE n=300 fail_rate=0.44 | lambda1 SOLVED mean -0.2328 (n=168) | UNSOLVED mean +0.0707 (n=132) | sep=+0.3034 | AUROC(fail|lambda1)=0.883 | mean_lambda1=-0.0993
+[pe=rwse s2] deterministic solve_rate = 0.600 (n=150, K=16)
+ sigma pass@K lam-sel random perRoll AUROC(s|-lam)
+ 0.2 0.893 0.787 0.573 0.556 0.873
+ wrote /home/yurenh2/rrog/runs/ptrm_color_full_rwse_n50_k3_p0.2_T3_ns3_s2.json
+===== seed=2 1step none =====
+ep20 solve_rate=0.000 mean_conflicts=38.00
+ep40 solve_rate=0.153 mean_conflicts=18.17
+ep60 solve_rate=0.327 mean_conflicts=7.32
+ep80 solve_rate=0.320 mean_conflicts=5.31
+ep100 solve_rate=0.357 mean_conflicts=4.67
+ep120 solve_rate=0.293 mean_conflicts=7.30
+ep140 solve_rate=0.273 mean_conflicts=7.96
+ep150 solve_rate=0.227 mean_conflicts=10.33
+[color_1step_none_n50_k3_p0.2_T3_ns3_s2] best solve_rate=0.3567 mean_conflicts=4.67 @ep100 (131.6s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_1step_none_n50_k3_p0.2_T3_ns3_s2.pt
+[1step] LE n=300 fail_rate=0.64 | lambda1 SOLVED mean -0.2153 (n=107) | UNSOLVED mean +0.0916 (n=193) | sep=+0.3069 | AUROC(fail|lambda1)=0.867 | mean_lambda1=-0.0179
+===== seed=3 full none =====
+ep20 solve_rate=0.000 mean_conflicts=62.81
+ep40 solve_rate=0.207 mean_conflicts=33.51
+ep60 solve_rate=0.460 mean_conflicts=15.68
+ep80 solve_rate=0.033 mean_conflicts=114.30
+ep100 solve_rate=0.000 mean_conflicts=82.87
+ep120 solve_rate=0.000 mean_conflicts=85.01
+ep140 solve_rate=0.000 mean_conflicts=77.61
+ep150 solve_rate=0.000 mean_conflicts=79.58
+[color_full_none_n50_k3_p0.2_T3_ns3_s3] best solve_rate=0.46 mean_conflicts=15.68 @ep60 (151.6s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_full_none_n50_k3_p0.2_T3_ns3_s3.pt
+[full] LE n=300 fail_rate=0.54 | lambda1 SOLVED mean -0.4216 (n=138) | UNSOLVED mean -0.0826 (n=162) | sep=+0.3390 | AUROC(fail|lambda1)=0.886 | mean_lambda1=-0.2385
+[pe=none s3] deterministic solve_rate = 0.493 (n=150, K=16)
+ sigma pass@K lam-sel random perRoll AUROC(s|-lam)
+ 0.2 0.733 0.587 0.493 0.472 0.856
+ wrote /home/yurenh2/rrog/runs/ptrm_color_full_none_n50_k3_p0.2_T3_ns3_s3.json
+===== seed=3 full rwse =====
+ep20 solve_rate=0.000 mean_conflicts=139.52
+ep40 solve_rate=0.043 mean_conflicts=135.26
+ep60 solve_rate=0.313 mean_conflicts=32.37
+ep80 solve_rate=0.370 mean_conflicts=14.78
+ep100 solve_rate=0.490 mean_conflicts=12.46
+ep120 solve_rate=0.067 mean_conflicts=89.87
+ep140 solve_rate=0.137 mean_conflicts=53.29
+ep150 solve_rate=0.127 mean_conflicts=46.23
+[color_full_rwse_n50_k3_p0.2_T3_ns3_s3] best solve_rate=0.49 mean_conflicts=12.46 @ep100 (152.1s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_full_rwse_n50_k3_p0.2_T3_ns3_s3.pt
+[full] LE n=300 fail_rate=0.51 | lambda1 SOLVED mean -0.0438 (n=147) | UNSOLVED mean +0.1618 (n=153) | sep=+0.2056 | AUROC(fail|lambda1)=0.830 | mean_lambda1=+0.0610
+[pe=rwse s3] deterministic solve_rate = 0.507 (n=150, K=16)
+ sigma pass@K lam-sel random perRoll AUROC(s|-lam)
+ 0.2 0.893 0.727 0.473 0.497 0.823
+ wrote /home/yurenh2/rrog/runs/ptrm_color_full_rwse_n50_k3_p0.2_T3_ns3_s3.json
+===== seed=3 1step none =====
+ep20 solve_rate=0.000 mean_conflicts=54.09
+ep40 solve_rate=0.103 mean_conflicts=17.25
+ep60 solve_rate=0.187 mean_conflicts=28.48
+ep80 solve_rate=0.120 mean_conflicts=29.85
+ep100 solve_rate=0.200 mean_conflicts=16.34
+ep120 solve_rate=0.337 mean_conflicts=7.23
+ep140 solve_rate=0.157 mean_conflicts=16.67
+ep150 solve_rate=0.087 mean_conflicts=19.67
+[color_1step_none_n50_k3_p0.2_T3_ns3_s3] best solve_rate=0.3367 mean_conflicts=7.227 @ep120 (124.3s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_1step_none_n50_k3_p0.2_T3_ns3_s3.pt
+[1step] LE n=300 fail_rate=0.66 | lambda1 SOLVED mean -0.0124 (n=101) | UNSOLVED mean +0.1694 (n=199) | sep=+0.1818 | AUROC(fail|lambda1)=0.786 | mean_lambda1=+0.1082
+===== seed=4 full none =====
+ep20 solve_rate=0.000 mean_conflicts=67.14
+ep40 solve_rate=0.200 mean_conflicts=24.37
+ep60 solve_rate=0.347 mean_conflicts=27.20
+ep80 solve_rate=0.127 mean_conflicts=41.29
+ep100 solve_rate=0.180 mean_conflicts=35.82
+ep120 solve_rate=0.073 mean_conflicts=66.69
+ep140 solve_rate=0.070 mean_conflicts=74.02
+ep150 solve_rate=0.073 mean_conflicts=81.13
+[color_full_none_n50_k3_p0.2_T3_ns3_s4] best solve_rate=0.3467 mean_conflicts=27.197 @ep60 (149.8s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_full_none_n50_k3_p0.2_T3_ns3_s4.pt
+[full] LE n=300 fail_rate=0.65 | lambda1 SOLVED mean -0.4008 (n=104) | UNSOLVED mean -0.1440 (n=196) | sep=+0.2567 | AUROC(fail|lambda1)=0.842 | mean_lambda1=-0.2330
+[pe=none s4] deterministic solve_rate = 0.353 (n=150, K=16)
+ sigma pass@K lam-sel random perRoll AUROC(s|-lam)
+ 0.2 0.547 0.427 0.373 0.358 0.835
+ wrote /home/yurenh2/rrog/runs/ptrm_color_full_none_n50_k3_p0.2_T3_ns3_s4.json
+===== seed=4 full rwse =====
+ep20 solve_rate=0.000 mean_conflicts=56.06
+ep40 solve_rate=0.050 mean_conflicts=55.24
+ep60 solve_rate=0.303 mean_conflicts=39.93
+ep80 solve_rate=0.357 mean_conflicts=15.34
+ep100 solve_rate=0.547 mean_conflicts=6.19
+ep120 solve_rate=0.440 mean_conflicts=18.82
+ep140 solve_rate=0.240 mean_conflicts=41.55
+ep150 solve_rate=0.233 mean_conflicts=46.02
+[color_full_rwse_n50_k3_p0.2_T3_ns3_s4] best solve_rate=0.5467 mean_conflicts=6.193 @ep100 (154.2s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_full_rwse_n50_k3_p0.2_T3_ns3_s4.pt
+[full] LE n=300 fail_rate=0.45 | lambda1 SOLVED mean -0.1742 (n=164) | UNSOLVED mean +0.1084 (n=136) | sep=+0.2827 | AUROC(fail|lambda1)=0.880 | mean_lambda1=-0.0461
+[pe=rwse s4] deterministic solve_rate = 0.547 (n=150, K=16)
+ sigma pass@K lam-sel random perRoll AUROC(s|-lam)
+ 0.2 0.940 0.853 0.493 0.557 0.868
+ wrote /home/yurenh2/rrog/runs/ptrm_color_full_rwse_n50_k3_p0.2_T3_ns3_s4.json
+===== seed=4 1step none =====
+ep20 solve_rate=0.000 mean_conflicts=63.11
+ep40 solve_rate=0.187 mean_conflicts=11.62
+ep60 solve_rate=0.237 mean_conflicts=22.39
+ep80 solve_rate=0.200 mean_conflicts=24.78
+ep100 solve_rate=0.183 mean_conflicts=15.49
+ep120 solve_rate=0.207 mean_conflicts=12.25
+ep140 solve_rate=0.093 mean_conflicts=21.66
+ep150 solve_rate=0.067 mean_conflicts=30.98
+[color_1step_none_n50_k3_p0.2_T3_ns3_s4] best solve_rate=0.2367 mean_conflicts=22.393 @ep60 (120.8s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_1step_none_n50_k3_p0.2_T3_ns3_s4.pt
+[1step] LE n=300 fail_rate=0.76 | lambda1 SOLVED mean -0.2996 (n=71) | UNSOLVED mean -0.0523 (n=229) | sep=+0.2473 | AUROC(fail|lambda1)=0.835 | mean_lambda1=-0.1108
+===== AGGREGATE =====
+=== best solve_rate (deterministic, EMA) ===
+ ('1step', 'none'): 0.293±0.062 (n=5)
+ ('full', 'none'): 0.457±0.087 (n=5)
+ ('full', 'rwse'): 0.503±0.064 (n=5)
+=== LE AUROC(fail|lambda1) ===
+ ('1step', 'none'): 0.821±0.036 (n=5)
+ ('full', 'none'): 0.868±0.032 (n=5)
+ ('full', 'rwse'): 0.837±0.041 (n=5)
+=== PTRM sigma=0.2 ===
+ ('full', 'none'): det 0.480±0.088 (n=5) | pass@K 0.739±0.113 (n=5) | lambda-sel 0.600±0.110 (n=5) | AUROC 0.853±0.023 (n=5)
+ ('full', 'rwse'): det 0.521±0.065 (n=5) | pass@K 0.883±0.075 (n=5) | lambda-sel 0.736±0.088 (n=5) | AUROC 0.837±0.028 (n=5)
+done=2026-06-16T12:41:20-05:00