diff options
Diffstat (limited to 'research/flossing/late_perturb_robustness/trm_baseline_best_step58590_n3000_k8_late.summary.csv')
| -rw-r--r-- | research/flossing/late_perturb_robustness/trm_baseline_best_step58590_n3000_k8_late.summary.csv | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/research/flossing/late_perturb_robustness/trm_baseline_best_step58590_n3000_k8_late.summary.csv b/research/flossing/late_perturb_robustness/trm_baseline_best_step58590_n3000_k8_late.summary.csv new file mode 100644 index 0000000..9f376be --- /dev/null +++ b/research/flossing/late_perturb_robustness/trm_baseline_best_step58590_n3000_k8_late.summary.csv @@ -0,0 +1,31 @@ +label,perturb_after,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,zero_frac,full_frac,clean_acc,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail
+trm_baseline_best,0,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,0,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9504892826080322,0.9483333230018616,0.7639999985694885,6.935999870300293,2.303454875946045,0.05166666582226753,0.7639999985694885,0.8669999837875366,0.9578046798706055,0.875048041343689,0.2750626504421234,0.6315789222717285
+trm_baseline_best,0,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8684166669845581,0.9509722590446472,0.9456666707992554,0.7683333158493042,6.947333335876465,2.3008460998535156,0.05433333292603493,0.7683333158493042,0.8669999837875366,0.9602556824684143,0.8800461292266846,0.2697368562221527,0.6090225577354431
+trm_baseline_best,0,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9504145383834839,0.9463333487510681,0.765333354473114,6.935999870300293,2.3080809116363525,0.05366666615009308,0.765333354473114,0.8669999837875366,0.9579008221626282,0.8785082697868347,0.2744360864162445,0.6190476417541504
+trm_baseline_best,0,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8675000071525574,0.9505621790885925,0.9496666789054871,0.7616666555404663,6.940000057220459,2.2861320972442627,0.050333332270383835,0.7616666555404663,0.8669999837875366,0.9567474126815796,0.8723567724227905,0.2857142984867096,0.6390977501869202
+trm_baseline_best,0,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8682083487510681,0.9508457183837891,0.9506666660308838,0.7593333125114441,6.945666790008545,2.265108108520508,0.04933333396911621,0.7593333125114441,0.8669999837875366,0.956122636795044,0.8696655035018921,0.2951127886772156,0.6441102623939514
+trm_baseline_best,4,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,4,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8666250109672546,0.9501558542251587,0.9350000023841858,0.8009999990463257,6.933000087738037,2.4135406017303467,0.06499999761581421,0.8009999990463257,0.8669999837875366,0.9687620401382446,0.9211841821670532,0.20081453025341034,0.5263158082962036
+trm_baseline_best,4,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8679583072662354,0.950590968132019,0.9359999895095825,0.7996666431427002,6.943666458129883,2.404542922973633,0.06400000303983688,0.7996666431427002,0.8669999837875366,0.970588207244873,0.9200307726860046,0.19893483817577362,0.5338345766067505
+trm_baseline_best,4,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8680416941642761,0.9508261680603027,0.9399999976158142,0.8013333082199097,6.944333553314209,2.3910739421844482,0.05999999865889549,0.8013333082199097,0.8669999837875366,0.9685697555541992,0.9227220416069031,0.21271929144859314,0.5664160251617432
+trm_baseline_best,4,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8660833239555359,0.9499126076698303,0.934333324432373,0.7953333258628845,6.928666591644287,2.413485288619995,0.06566666811704636,0.7953333258628845,0.8669999837875366,0.967608630657196,0.915032684803009,0.20426064729690552,0.523809552192688
+trm_baseline_best,4,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8659999966621399,0.9500890970230103,0.9403333067893982,0.7973333597183228,6.927999973297119,2.399336576461792,0.05966666713356972,0.7973333597183228,0.8669999837875366,0.9667435884475708,0.9161860942840576,0.20927318930625916,0.5664160251617432
+trm_baseline_best,8,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,8,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669583201408386,0.9507402181625366,0.9226666688919067,0.8306666612625122,6.935666561126709,2.5147950649261475,0.07733333110809326,0.8306666612625122,0.8669999837875366,0.9784217476844788,0.9573240876197815,0.14035087823867798,0.4436090290546417
+trm_baseline_best,8,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8678333163261414,0.9507989287376404,0.9246666431427002,0.831333339214325,6.942666530609131,2.5057361125946045,0.07533333450555801,0.831333339214325,0.8669999837875366,0.9795271158218384,0.9588619470596313,0.13972431421279907,0.451127827167511
+trm_baseline_best,8,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8676666617393494,0.9505447745323181,0.9229999780654907,0.8306666612625122,6.941333293914795,2.51287841796875,0.07699999958276749,0.8306666612625122,0.8669999837875366,0.980199933052063,0.9573240876197815,0.13408520817756653,0.4411027431488037
+trm_baseline_best,8,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8682083487510681,0.950954258441925,0.9236666560173035,0.8286666870117188,6.945666790008545,2.4972081184387207,0.07633333653211594,0.8286666870117188,0.8669999837875366,0.9788542985916138,0.9554017782211304,0.14692983031272888,0.448621541261673
+trm_baseline_best,8,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8687083125114441,0.9511321783065796,0.9286666512489319,0.824999988079071,6.949666500091553,2.46991229057312,0.07133333384990692,0.824999988079071,0.8669999837875366,0.9784698486328125,0.9504036903381348,0.15319548547267914,0.48120301961898804
+trm_baseline_best,12,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,12,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8673333525657654,0.9506542682647705,0.8926666378974915,0.856333315372467,6.938666820526123,2.641887903213501,0.10733333230018616,0.856333315372467,0.8669999837875366,0.9928392767906189,0.9873125553131104,0.049185462296009064,0.2005012482404709
+trm_baseline_best,12,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8668749928474426,0.9505452513694763,0.8896666765213013,0.8560000061988831,6.934999942779541,2.651435136795044,0.11033333092927933,0.8560000061988831,0.8669999837875366,0.9924548268318176,0.9869281053543091,0.048245612531900406,0.18045112490653992
+trm_baseline_best,12,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669583201408386,0.9506661891937256,0.8913333415985107,0.8556666374206543,6.935666561126709,2.6479289531707764,0.10866666585206985,0.8556666374206543,0.8669999837875366,0.991782009601593,0.9869281053543091,0.05325814709067345,0.19548872113227844
+trm_baseline_best,12,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8671666383743286,0.9506459832191467,0.8989999890327454,0.8539999723434448,6.937333106994629,2.629715919494629,0.10100000351667404,0.8539999723434448,0.8669999837875366,0.991733968257904,0.985005795955658,0.055137842893600464,0.24561403691768646
+trm_baseline_best,12,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8677916526794434,0.950908362865448,0.9043333530426025,0.8516666889190674,6.942333221435547,2.602372169494629,0.09566666930913925,0.8516666889190674,0.8669999837875366,0.9901480078697205,0.9823144674301147,0.07017543911933899,0.2882205545902252
+trm_baseline_best,15,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,15,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9506445527076721,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,15,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9506717920303345,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,15,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9506852030754089,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,15,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8670833110809326,0.9507201910018921,0.8673333525657654,0.8669999837875366,6.936666488647461,2.7151405811309814,0.1326666623353958,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0006265664123930037,0.002506265649572015
+trm_baseline_best,15,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8670833110809326,0.9508404731750488,0.8686666488647461,0.8656666874885559,6.936666488647461,2.7110862731933594,0.1313333362340927,0.8656666874885559,0.8669999837875366,0.9997597336769104,0.9984621405601501,0.0021929824724793434,0.01253132801502943
|
