diff options
Diffstat (limited to 'research/flossing/initial_perturb_robustness/smoke_baseline.summary.csv')
| -rw-r--r-- | research/flossing/initial_perturb_robustness/smoke_baseline.summary.csv | 3 |
1 files changed, 3 insertions, 0 deletions
diff --git a/research/flossing/initial_perturb_robustness/smoke_baseline.summary.csv b/research/flossing/initial_perturb_robustness/smoke_baseline.summary.csv new file mode 100644 index 0000000..c5fd40c --- /dev/null +++ b/research/flossing/initial_perturb_robustness/smoke_baseline.summary.csv @@ -0,0 +1,3 @@ +label,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,correct_count_q10,correct_count_q50,correct_count_q90,zero_frac,full_frac
+trm_baseline_smoke,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9537037014961243,0.875,0.875,1.75,0.6614378094673157,1.0,2.0,2.0,0.125,0.875
+trm_baseline_smoke,0.001,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8125,0.9309413433074951,0.875,0.75,1.625,0.6959705352783203,0.5,2.0,2.0,0.125,0.75
|
