summaryrefslogtreecommitdiff
path: root/research/flossing/initial_perturb_robustness/trm_baseline_best_step58590_n5000_k8.summary.csv
blob: 94b9d4a82401ae9fd4d6d578a019793d51de2ac2 (plain)
1
2
3
4
5
6
7
8
9
10
label,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,correct_count_q10,correct_count_q50,correct_count_q90,zero_frac,full_frac
trm_baseline_best,0.0,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8658000230789185,0.9502986073493958,0.8658000230789185,0.8658000230789185,6.926400184631348,2.7269365787506104,0.0,8.0,8.0,0.13420000672340393,0.8658000230789185
trm_baseline_best,3e-05,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.868399977684021,0.9510218501091003,0.9455999732017517,0.769599974155426,6.947199821472168,2.309980869293213,3.0,8.0,8.0,0.0544000007212162,0.769599974155426
trm_baseline_best,0.0001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8675500154495239,0.9506199955940247,0.9485999941825867,0.769599974155426,6.940400123596191,2.3122386932373047,2.0,8.0,8.0,0.05139999836683273,0.769599974155426
trm_baseline_best,0.0003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8685500025749207,0.9512001872062683,0.9476000070571899,0.7698000073432922,6.948400020599365,2.310267925262451,3.0,8.0,8.0,0.052400000393390656,0.7698000073432922
trm_baseline_best,0.001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8694999814033508,0.9514188766479492,0.9488000273704529,0.7712000012397766,6.955999851226807,2.289206027984619,3.0,8.0,8.0,0.05119999870657921,0.7712000012397766
trm_baseline_best,0.003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8676750063896179,0.950739860534668,0.9448000192642212,0.7666000127792358,6.941400051116943,2.311615467071533,3.0,8.0,8.0,0.0551999993622303,0.7666000127792358
trm_baseline_best,0.01,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669000267982483,0.9505268335342407,0.9452000260353088,0.7635999917984009,6.935200214385986,2.3034324645996094,3.0,8.0,8.0,0.05480000004172325,0.7635999917984009
trm_baseline_best,0.03,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8665000200271606,0.9502792954444885,0.9503999948501587,0.7573999762535095,6.932000160217285,2.2814416885375977,3.0,8.0,8.0,0.04960000142455101,0.7573999762535095
trm_baseline_best,0.1,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8664500117301941,0.9503796696662903,0.9553999900817871,0.7558000087738037,6.931600093841553,2.2636523246765137,3.0,8.0,8.0,0.044599998742341995,0.7558000087738037