From a6ec4288a2232988b130b2f00bb2565f81706966 Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Mon, 29 Jun 2026 12:15:51 -0500 Subject: Recursive reasoning dynamics: analysis pipeline, paper drafts, toy models Failure=more-chaotic (task-general under validity labeling) reduces to convergence/completeness detection; mechanism (transient chaos vs multistability vs input-induced) under investigation. Co-Authored-By: Claude Fable 5 --- .../trm_baseline_best_step58590_n5000_k8.summary.csv | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 initial_perturb_robustness/trm_baseline_best_step58590_n5000_k8.summary.csv (limited to 'initial_perturb_robustness/trm_baseline_best_step58590_n5000_k8.summary.csv') diff --git a/initial_perturb_robustness/trm_baseline_best_step58590_n5000_k8.summary.csv b/initial_perturb_robustness/trm_baseline_best_step58590_n5000_k8.summary.csv new file mode 100644 index 0000000..94b9d4a --- /dev/null +++ b/initial_perturb_robustness/trm_baseline_best_step58590_n5000_k8.summary.csv @@ -0,0 +1,10 @@ +label,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,correct_count_q10,correct_count_q50,correct_count_q90,zero_frac,full_frac +trm_baseline_best,0.0,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8658000230789185,0.9502986073493958,0.8658000230789185,0.8658000230789185,6.926400184631348,2.7269365787506104,0.0,8.0,8.0,0.13420000672340393,0.8658000230789185 +trm_baseline_best,3e-05,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.868399977684021,0.9510218501091003,0.9455999732017517,0.769599974155426,6.947199821472168,2.309980869293213,3.0,8.0,8.0,0.0544000007212162,0.769599974155426 +trm_baseline_best,0.0001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8675500154495239,0.9506199955940247,0.9485999941825867,0.769599974155426,6.940400123596191,2.3122386932373047,2.0,8.0,8.0,0.05139999836683273,0.769599974155426 +trm_baseline_best,0.0003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8685500025749207,0.9512001872062683,0.9476000070571899,0.7698000073432922,6.948400020599365,2.310267925262451,3.0,8.0,8.0,0.052400000393390656,0.7698000073432922 +trm_baseline_best,0.001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8694999814033508,0.9514188766479492,0.9488000273704529,0.7712000012397766,6.955999851226807,2.289206027984619,3.0,8.0,8.0,0.05119999870657921,0.7712000012397766 +trm_baseline_best,0.003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8676750063896179,0.950739860534668,0.9448000192642212,0.7666000127792358,6.941400051116943,2.311615467071533,3.0,8.0,8.0,0.0551999993622303,0.7666000127792358 +trm_baseline_best,0.01,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669000267982483,0.9505268335342407,0.9452000260353088,0.7635999917984009,6.935200214385986,2.3034324645996094,3.0,8.0,8.0,0.05480000004172325,0.7635999917984009 +trm_baseline_best,0.03,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8665000200271606,0.9502792954444885,0.9503999948501587,0.7573999762535095,6.932000160217285,2.2814416885375977,3.0,8.0,8.0,0.04960000142455101,0.7573999762535095 +trm_baseline_best,0.1,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8664500117301941,0.9503796696662903,0.9553999900817871,0.7558000087738037,6.931600093841553,2.2636523246765137,3.0,8.0,8.0,0.044599998742341995,0.7558000087738037 -- cgit v1.2.3