summaryrefslogtreecommitdiff
path: root/runs/pe2_sweep.log
diff options
context:
space:
mode:
Diffstat (limited to 'runs/pe2_sweep.log')
-rw-r--r--runs/pe2_sweep.log97
1 files changed, 97 insertions, 0 deletions
diff --git a/runs/pe2_sweep.log b/runs/pe2_sweep.log
new file mode 100644
index 0000000..9c83802
--- /dev/null
+++ b/runs/pe2_sweep.log
@@ -0,0 +1,97 @@
+host=timan1.cs.illinois.edu gpu=0 start=2026-06-16T12:46:37-05:00
+===== seed=0 full gsn =====
+ep20 solve_rate=0.000 mean_conflicts=89.71
+ep40 solve_rate=0.023 mean_conflicts=149.88
+ep60 solve_rate=0.113 mean_conflicts=86.64
+ep80 solve_rate=0.643 mean_conflicts=2.77
+ep100 solve_rate=0.587 mean_conflicts=8.41
+ep120 solve_rate=0.170 mean_conflicts=37.12
+ep140 solve_rate=0.053 mean_conflicts=54.13
+ep150 solve_rate=0.037 mean_conflicts=58.30
+[color_full_gsn_n50_k3_p0.2_T3_ns3_s0] best solve_rate=0.6433 mean_conflicts=2.77 @ep80 (153.5s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_full_gsn_n50_k3_p0.2_T3_ns3_s0.pt
+[full] LE n=300 fail_rate=0.36 | lambda1 SOLVED mean -0.3404 (n=193) | UNSOLVED mean +0.0177 (n=107) | sep=+0.3580 | AUROC(fail|lambda1)=0.898 | mean_lambda1=-0.2127
+[pe=gsn s0] deterministic solve_rate = 0.627 (n=150, K=16)
+ sigma pass@K lam-sel random perRoll AUROC(s|-lam)
+ 0.2 0.853 0.733 0.613 0.623 0.888
+ wrote /home/yurenh2/rrog/runs/ptrm_color_full_gsn_n50_k3_p0.2_T3_ns3_s0.json
+===== seed=1 full gsn =====
+ep20 solve_rate=0.000 mean_conflicts=64.37
+ep40 solve_rate=0.130 mean_conflicts=24.72
+ep60 solve_rate=0.073 mean_conflicts=52.57
+ep80 solve_rate=0.533 mean_conflicts=19.95
+ep100 solve_rate=0.143 mean_conflicts=81.78
+ep120 solve_rate=0.080 mean_conflicts=67.85
+ep140 solve_rate=0.033 mean_conflicts=97.71
+ep150 solve_rate=0.010 mean_conflicts=121.37
+[color_full_gsn_n50_k3_p0.2_T3_ns3_s1] best solve_rate=0.5333 mean_conflicts=19.95 @ep80 (150.6s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_full_gsn_n50_k3_p0.2_T3_ns3_s1.pt
+[full] LE n=300 fail_rate=0.47 | lambda1 SOLVED mean -0.2563 (n=160) | UNSOLVED mean +0.0329 (n=140) | sep=+0.2893 | AUROC(fail|lambda1)=0.854 | mean_lambda1=-0.1213
+[pe=gsn s1] deterministic solve_rate = 0.527 (n=150, K=16)
+ sigma pass@K lam-sel random perRoll AUROC(s|-lam)
+ 0.2 0.787 0.640 0.527 0.524 0.862
+ wrote /home/yurenh2/rrog/runs/ptrm_color_full_gsn_n50_k3_p0.2_T3_ns3_s1.json
+===== seed=2 full gsn =====
+ep20 solve_rate=0.000 mean_conflicts=73.69
+ep40 solve_rate=0.140 mean_conflicts=28.86
+ep60 solve_rate=0.350 mean_conflicts=19.34
+ep80 solve_rate=0.650 mean_conflicts=9.43
+ep100 solve_rate=0.460 mean_conflicts=16.59
+ep120 solve_rate=0.110 mean_conflicts=54.56
+ep140 solve_rate=0.030 mean_conflicts=77.26
+ep150 solve_rate=0.023 mean_conflicts=86.61
+[color_full_gsn_n50_k3_p0.2_T3_ns3_s2] best solve_rate=0.65 mean_conflicts=9.427 @ep80 (154.1s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_full_gsn_n50_k3_p0.2_T3_ns3_s2.pt
+[full] LE n=300 fail_rate=0.35 | lambda1 SOLVED mean -0.3873 (n=195) | UNSOLVED mean -0.0600 (n=105) | sep=+0.3273 | AUROC(fail|lambda1)=0.839 | mean_lambda1=-0.2727
+[pe=gsn s2] deterministic solve_rate = 0.627 (n=150, K=16)
+ sigma pass@K lam-sel random perRoll AUROC(s|-lam)
+ 0.2 0.827 0.713 0.627 0.625 0.863
+ wrote /home/yurenh2/rrog/runs/ptrm_color_full_gsn_n50_k3_p0.2_T3_ns3_s2.json
+===== seed=3 full gsn =====
+ep20 solve_rate=0.000 mean_conflicts=105.31
+ep40 solve_rate=0.190 mean_conflicts=13.04
+ep60 solve_rate=0.097 mean_conflicts=53.37
+ep80 solve_rate=0.577 mean_conflicts=7.48
+ep100 solve_rate=0.417 mean_conflicts=31.46
+ep120 solve_rate=0.180 mean_conflicts=58.09
+ep140 solve_rate=0.110 mean_conflicts=83.96
+ep150 solve_rate=0.103 mean_conflicts=93.02
+[color_full_gsn_n50_k3_p0.2_T3_ns3_s3] best solve_rate=0.5767 mean_conflicts=7.477 @ep80 (150.0s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_full_gsn_n50_k3_p0.2_T3_ns3_s3.pt
+[full] LE n=300 fail_rate=0.42 | lambda1 SOLVED mean -0.2939 (n=173) | UNSOLVED mean +0.0027 (n=127) | sep=+0.2966 | AUROC(fail|lambda1)=0.857 | mean_lambda1=-0.1684
+[pe=gsn s3] deterministic solve_rate = 0.573 (n=150, K=16)
+ sigma pass@K lam-sel random perRoll AUROC(s|-lam)
+ 0.2 0.807 0.660 0.573 0.565 0.865
+ wrote /home/yurenh2/rrog/runs/ptrm_color_full_gsn_n50_k3_p0.2_T3_ns3_s3.json
+===== seed=4 full gsn =====
+ep20 solve_rate=0.000 mean_conflicts=96.19
+ep40 solve_rate=0.140 mean_conflicts=21.32
+ep60 solve_rate=0.140 mean_conflicts=43.18
+ep80 solve_rate=0.367 mean_conflicts=17.62
+ep100 solve_rate=0.577 mean_conflicts=7.39
+ep120 solve_rate=0.163 mean_conflicts=76.12
+ep140 solve_rate=0.040 mean_conflicts=100.85
+ep150 solve_rate=0.040 mean_conflicts=104.33
+[color_full_gsn_n50_k3_p0.2_T3_ns3_s4] best solve_rate=0.5767 mean_conflicts=7.393 @ep100 (153.1s)
+ wrote /home/yurenh2/rrog/runs/ckpt_color_full_gsn_n50_k3_p0.2_T3_ns3_s4.pt
+[full] LE n=300 fail_rate=0.42 | lambda1 SOLVED mean -0.3064 (n=173) | UNSOLVED mean +0.0036 (n=127) | sep=+0.3100 | AUROC(fail|lambda1)=0.890 | mean_lambda1=-0.1751
+[pe=gsn s4] deterministic solve_rate = 0.620 (n=150, K=16)
+ sigma pass@K lam-sel random perRoll AUROC(s|-lam)
+ 0.2 0.847 0.740 0.627 0.635 0.896
+ wrote /home/yurenh2/rrog/runs/ptrm_color_full_gsn_n50_k3_p0.2_T3_ns3_s4.json
+===== AGGREGATE (all pe: none/rwse/gsn) =====
+=== best solve_rate (deterministic, EMA) ===
+ ('1step', 'none'): 0.293±0.062 (n=5)
+ ('full', 'gsn'): 0.596±0.044 (n=5)
+ ('full', 'none'): 0.457±0.087 (n=5)
+ ('full', 'rwse'): 0.503±0.064 (n=5)
+=== LE AUROC(fail|lambda1) ===
+ ('1step', 'none'): 0.821±0.036 (n=5)
+ ('full', 'gsn'): 0.868±0.023 (n=5)
+ ('full', 'none'): 0.868±0.032 (n=5)
+ ('full', 'rwse'): 0.837±0.041 (n=5)
+=== PTRM sigma=0.2 ===
+ ('full', 'gsn'): det 0.595±0.039 (n=5) | pass@K 0.824±0.025 (n=5) | lambda-sel 0.697±0.040 (n=5) | AUROC 0.875±0.014 (n=5)
+ ('full', 'none'): det 0.480±0.088 (n=5) | pass@K 0.739±0.113 (n=5) | lambda-sel 0.600±0.110 (n=5) | AUROC 0.853±0.023 (n=5)
+ ('full', 'rwse'): det 0.521±0.065 (n=5) | pass@K 0.883±0.075 (n=5) | lambda-sel 0.736±0.088 (n=5) | AUROC 0.837±0.028 (n=5)
+done=2026-06-16T13:05:45-05:00