diff options
Diffstat (limited to 'runs/pe2_sweep.log')
| -rw-r--r-- | runs/pe2_sweep.log | 97 |
1 files changed, 97 insertions, 0 deletions
diff --git a/runs/pe2_sweep.log b/runs/pe2_sweep.log new file mode 100644 index 0000000..9c83802 --- /dev/null +++ b/runs/pe2_sweep.log @@ -0,0 +1,97 @@ +host=timan1.cs.illinois.edu gpu=0 start=2026-06-16T12:46:37-05:00 +===== seed=0 full gsn ===== +ep20 solve_rate=0.000 mean_conflicts=89.71 +ep40 solve_rate=0.023 mean_conflicts=149.88 +ep60 solve_rate=0.113 mean_conflicts=86.64 +ep80 solve_rate=0.643 mean_conflicts=2.77 +ep100 solve_rate=0.587 mean_conflicts=8.41 +ep120 solve_rate=0.170 mean_conflicts=37.12 +ep140 solve_rate=0.053 mean_conflicts=54.13 +ep150 solve_rate=0.037 mean_conflicts=58.30 +[color_full_gsn_n50_k3_p0.2_T3_ns3_s0] best solve_rate=0.6433 mean_conflicts=2.77 @ep80 (153.5s) + wrote /home/yurenh2/rrog/runs/ckpt_color_full_gsn_n50_k3_p0.2_T3_ns3_s0.pt +[full] LE n=300 fail_rate=0.36 | lambda1 SOLVED mean -0.3404 (n=193) | UNSOLVED mean +0.0177 (n=107) | sep=+0.3580 | AUROC(fail|lambda1)=0.898 | mean_lambda1=-0.2127 +[pe=gsn s0] deterministic solve_rate = 0.627 (n=150, K=16) + sigma pass@K lam-sel random perRoll AUROC(s|-lam) + 0.2 0.853 0.733 0.613 0.623 0.888 + wrote /home/yurenh2/rrog/runs/ptrm_color_full_gsn_n50_k3_p0.2_T3_ns3_s0.json +===== seed=1 full gsn ===== +ep20 solve_rate=0.000 mean_conflicts=64.37 +ep40 solve_rate=0.130 mean_conflicts=24.72 +ep60 solve_rate=0.073 mean_conflicts=52.57 +ep80 solve_rate=0.533 mean_conflicts=19.95 +ep100 solve_rate=0.143 mean_conflicts=81.78 +ep120 solve_rate=0.080 mean_conflicts=67.85 +ep140 solve_rate=0.033 mean_conflicts=97.71 +ep150 solve_rate=0.010 mean_conflicts=121.37 +[color_full_gsn_n50_k3_p0.2_T3_ns3_s1] best solve_rate=0.5333 mean_conflicts=19.95 @ep80 (150.6s) + wrote /home/yurenh2/rrog/runs/ckpt_color_full_gsn_n50_k3_p0.2_T3_ns3_s1.pt +[full] LE n=300 fail_rate=0.47 | lambda1 SOLVED mean -0.2563 (n=160) | UNSOLVED mean +0.0329 (n=140) | sep=+0.2893 | AUROC(fail|lambda1)=0.854 | mean_lambda1=-0.1213 +[pe=gsn s1] deterministic solve_rate = 0.527 (n=150, K=16) + sigma pass@K lam-sel random perRoll AUROC(s|-lam) + 0.2 0.787 0.640 0.527 0.524 0.862 + wrote /home/yurenh2/rrog/runs/ptrm_color_full_gsn_n50_k3_p0.2_T3_ns3_s1.json +===== seed=2 full gsn ===== +ep20 solve_rate=0.000 mean_conflicts=73.69 +ep40 solve_rate=0.140 mean_conflicts=28.86 +ep60 solve_rate=0.350 mean_conflicts=19.34 +ep80 solve_rate=0.650 mean_conflicts=9.43 +ep100 solve_rate=0.460 mean_conflicts=16.59 +ep120 solve_rate=0.110 mean_conflicts=54.56 +ep140 solve_rate=0.030 mean_conflicts=77.26 +ep150 solve_rate=0.023 mean_conflicts=86.61 +[color_full_gsn_n50_k3_p0.2_T3_ns3_s2] best solve_rate=0.65 mean_conflicts=9.427 @ep80 (154.1s) + wrote /home/yurenh2/rrog/runs/ckpt_color_full_gsn_n50_k3_p0.2_T3_ns3_s2.pt +[full] LE n=300 fail_rate=0.35 | lambda1 SOLVED mean -0.3873 (n=195) | UNSOLVED mean -0.0600 (n=105) | sep=+0.3273 | AUROC(fail|lambda1)=0.839 | mean_lambda1=-0.2727 +[pe=gsn s2] deterministic solve_rate = 0.627 (n=150, K=16) + sigma pass@K lam-sel random perRoll AUROC(s|-lam) + 0.2 0.827 0.713 0.627 0.625 0.863 + wrote /home/yurenh2/rrog/runs/ptrm_color_full_gsn_n50_k3_p0.2_T3_ns3_s2.json +===== seed=3 full gsn ===== +ep20 solve_rate=0.000 mean_conflicts=105.31 +ep40 solve_rate=0.190 mean_conflicts=13.04 +ep60 solve_rate=0.097 mean_conflicts=53.37 +ep80 solve_rate=0.577 mean_conflicts=7.48 +ep100 solve_rate=0.417 mean_conflicts=31.46 +ep120 solve_rate=0.180 mean_conflicts=58.09 +ep140 solve_rate=0.110 mean_conflicts=83.96 +ep150 solve_rate=0.103 mean_conflicts=93.02 +[color_full_gsn_n50_k3_p0.2_T3_ns3_s3] best solve_rate=0.5767 mean_conflicts=7.477 @ep80 (150.0s) + wrote /home/yurenh2/rrog/runs/ckpt_color_full_gsn_n50_k3_p0.2_T3_ns3_s3.pt +[full] LE n=300 fail_rate=0.42 | lambda1 SOLVED mean -0.2939 (n=173) | UNSOLVED mean +0.0027 (n=127) | sep=+0.2966 | AUROC(fail|lambda1)=0.857 | mean_lambda1=-0.1684 +[pe=gsn s3] deterministic solve_rate = 0.573 (n=150, K=16) + sigma pass@K lam-sel random perRoll AUROC(s|-lam) + 0.2 0.807 0.660 0.573 0.565 0.865 + wrote /home/yurenh2/rrog/runs/ptrm_color_full_gsn_n50_k3_p0.2_T3_ns3_s3.json +===== seed=4 full gsn ===== +ep20 solve_rate=0.000 mean_conflicts=96.19 +ep40 solve_rate=0.140 mean_conflicts=21.32 +ep60 solve_rate=0.140 mean_conflicts=43.18 +ep80 solve_rate=0.367 mean_conflicts=17.62 +ep100 solve_rate=0.577 mean_conflicts=7.39 +ep120 solve_rate=0.163 mean_conflicts=76.12 +ep140 solve_rate=0.040 mean_conflicts=100.85 +ep150 solve_rate=0.040 mean_conflicts=104.33 +[color_full_gsn_n50_k3_p0.2_T3_ns3_s4] best solve_rate=0.5767 mean_conflicts=7.393 @ep100 (153.1s) + wrote /home/yurenh2/rrog/runs/ckpt_color_full_gsn_n50_k3_p0.2_T3_ns3_s4.pt +[full] LE n=300 fail_rate=0.42 | lambda1 SOLVED mean -0.3064 (n=173) | UNSOLVED mean +0.0036 (n=127) | sep=+0.3100 | AUROC(fail|lambda1)=0.890 | mean_lambda1=-0.1751 +[pe=gsn s4] deterministic solve_rate = 0.620 (n=150, K=16) + sigma pass@K lam-sel random perRoll AUROC(s|-lam) + 0.2 0.847 0.740 0.627 0.635 0.896 + wrote /home/yurenh2/rrog/runs/ptrm_color_full_gsn_n50_k3_p0.2_T3_ns3_s4.json +===== AGGREGATE (all pe: none/rwse/gsn) ===== +=== best solve_rate (deterministic, EMA) === + ('1step', 'none'): 0.293±0.062 (n=5) + ('full', 'gsn'): 0.596±0.044 (n=5) + ('full', 'none'): 0.457±0.087 (n=5) + ('full', 'rwse'): 0.503±0.064 (n=5) +=== LE AUROC(fail|lambda1) === + ('1step', 'none'): 0.821±0.036 (n=5) + ('full', 'gsn'): 0.868±0.023 (n=5) + ('full', 'none'): 0.868±0.032 (n=5) + ('full', 'rwse'): 0.837±0.041 (n=5) +=== PTRM sigma=0.2 === + ('full', 'gsn'): det 0.595±0.039 (n=5) | pass@K 0.824±0.025 (n=5) | lambda-sel 0.697±0.040 (n=5) | AUROC 0.875±0.014 (n=5) + ('full', 'none'): det 0.480±0.088 (n=5) | pass@K 0.739±0.113 (n=5) | lambda-sel 0.600±0.110 (n=5) | AUROC 0.853±0.023 (n=5) + ('full', 'rwse'): det 0.521±0.065 (n=5) | pass@K 0.883±0.075 (n=5) | lambda-sel 0.736±0.088 (n=5) | AUROC 0.837±0.028 (n=5) +done=2026-06-16T13:05:45-05:00 |
