1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
label,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,correct_count_q10,correct_count_q50,correct_count_q90,zero_frac,full_frac
trm_baseline_best,0.0,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8658000230789185,0.9502986073493958,0.8658000230789185,0.8658000230789185,6.926400184631348,2.7269365787506104,0.0,8.0,8.0,0.13420000672340393,0.8658000230789185
trm_baseline_best,3e-05,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.868399977684021,0.9510218501091003,0.9455999732017517,0.769599974155426,6.947199821472168,2.309980869293213,3.0,8.0,8.0,0.0544000007212162,0.769599974155426
trm_baseline_best,0.0001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8675500154495239,0.9506199955940247,0.9485999941825867,0.769599974155426,6.940400123596191,2.3122386932373047,2.0,8.0,8.0,0.05139999836683273,0.769599974155426
trm_baseline_best,0.0003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8685500025749207,0.9512001872062683,0.9476000070571899,0.7698000073432922,6.948400020599365,2.310267925262451,3.0,8.0,8.0,0.052400000393390656,0.7698000073432922
trm_baseline_best,0.001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8694999814033508,0.9514188766479492,0.9488000273704529,0.7712000012397766,6.955999851226807,2.289206027984619,3.0,8.0,8.0,0.05119999870657921,0.7712000012397766
trm_baseline_best,0.003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8676750063896179,0.950739860534668,0.9448000192642212,0.7666000127792358,6.941400051116943,2.311615467071533,3.0,8.0,8.0,0.0551999993622303,0.7666000127792358
trm_baseline_best,0.01,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669000267982483,0.9505268335342407,0.9452000260353088,0.7635999917984009,6.935200214385986,2.3034324645996094,3.0,8.0,8.0,0.05480000004172325,0.7635999917984009
trm_baseline_best,0.03,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8665000200271606,0.9502792954444885,0.9503999948501587,0.7573999762535095,6.932000160217285,2.2814416885375977,3.0,8.0,8.0,0.04960000142455101,0.7573999762535095
trm_baseline_best,0.1,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8664500117301941,0.9503796696662903,0.9553999900817871,0.7558000087738037,6.931600093841553,2.2636523246765137,3.0,8.0,8.0,0.044599998742341995,0.7558000087738037
trm_multi4_best,0.0,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8920000195503235,0.9585950970649719,0.8920000195503235,0.8920000195503235,7.136000156402588,2.4830431938171387,0.0,8.0,8.0,0.1080000028014183,0.8920000195503235
trm_multi4_best,3e-05,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8941249847412109,0.9596672654151917,0.9577999711036682,0.8095999956130981,7.1529998779296875,2.0940847396850586,4.0,8.0,8.0,0.0421999990940094,0.8095999956130981
trm_multi4_best,0.0001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8919249773025513,0.9588295817375183,0.9574000239372253,0.8040000200271606,7.13539981842041,2.107004165649414,4.0,8.0,8.0,0.04259999841451645,0.8040000200271606
trm_multi4_best,0.0003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.9595761895179749,0.9570000171661377,0.8118000030517578,7.1519999504089355,2.106584072113037,4.0,8.0,8.0,0.0430000014603138,0.8118000030517578
trm_multi4_best,0.001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8934500217437744,0.9593700170516968,0.9535999894142151,0.8091999888420105,7.147600173950195,2.1107852458953857,4.0,8.0,8.0,0.04639999940991402,0.8091999888420105
trm_multi4_best,0.003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8934749960899353,0.9593802690505981,0.9593999981880188,0.8101999759674072,7.147799968719482,2.1013221740722656,4.0,8.0,8.0,0.0406000018119812,0.8101999759674072
trm_multi4_best,0.01,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8938249945640564,0.9593154191970825,0.9599999785423279,0.7993999719619751,7.150599956512451,2.065894365310669,4.0,8.0,8.0,0.03999999910593033,0.7993999719619751
trm_multi4_best,0.03,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8929749727249146,0.9591527581214905,0.9606000185012817,0.7986000180244446,7.143799781799316,2.0669593811035156,4.0,8.0,8.0,0.039400000125169754,0.7986000180244446
trm_multi4_best,0.1,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8917250037193298,0.9587355256080627,0.9639999866485596,0.79339998960495,7.133800029754639,2.062788724899292,4.0,8.0,8.0,0.035999998450279236,0.79339998960495
trm_multi4_final,0.0,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8452000021934509,0.9396741390228271,0.8452000021934509,0.8452000021934509,6.761600017547607,2.8937113285064697,0.0,8.0,8.0,0.15479999780654907,0.8452000021934509
trm_multi4_final,3e-05,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.840499997138977,0.9371709823608398,0.9114000201225281,0.7591999769210815,6.723999977111816,2.6249237060546875,1.0,8.0,8.0,0.08860000222921371,0.7591999769210815
trm_multi4_final,0.0001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8407999873161316,0.9373589754104614,0.9136000275611877,0.7577999830245972,6.726399898529053,2.612344264984131,1.0,8.0,8.0,0.08640000224113464,0.7577999830245972
trm_multi4_final,0.0003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8399249911308289,0.9371064305305481,0.9092000126838684,0.7572000026702881,6.719399929046631,2.6245501041412354,1.0,8.0,8.0,0.09080000221729279,0.7572000026702881
trm_multi4_final,0.001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8385249972343445,0.9361668229103088,0.9089999794960022,0.7588000297546387,6.708199977874756,2.6401236057281494,1.0,8.0,8.0,0.09099999815225601,0.7588000297546387
trm_multi4_final,0.003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8406999707221985,0.9372812509536743,0.9165999889373779,0.753000020980835,6.725599765777588,2.6047465801239014,1.0,8.0,8.0,0.08340000361204147,0.753000020980835
trm_multi4_final,0.01,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.838100016117096,0.9362147450447083,0.9132000207901001,0.7477999925613403,6.704800128936768,2.612366199493408,1.0,8.0,8.0,0.0868000015616417,0.7477999925613403
trm_multi4_final,0.03,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8380249738693237,0.9364882707595825,0.9197999835014343,0.7378000020980835,6.70419979095459,2.575946807861328,1.0,8.0,8.0,0.08020000159740448,0.7378000020980835
trm_multi4_final,0.1,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.841949999332428,0.9376598596572876,0.9305999875068665,0.7305999994277954,6.735599994659424,2.5063304901123047,2.0,8.0,8.0,0.06939999759197235,0.7305999994277954
|