diff options
| author | YurenHao0426 <blackhao0426@gmail.com> | 2026-06-13 12:35:36 -0500 |
|---|---|---|
| committer | YurenHao0426 <blackhao0426@gmail.com> | 2026-06-13 12:35:36 -0500 |
| commit | 66e0d8b9fd4d0f7a2231d689c055e26fdf1cf04a (patch) | |
| tree | c29cba61124018755a19b02c9d33e3ad5f2e05cc /research/flossing/smoke_step9_hrm_parallel_sampling.json | |
Curated export for clone-and-run Maze training (2x A6000) + diagnostics.
trm/hrm pretrain.py carry trajectory-augmentation code (backward-compatible).
Heavy artifacts (checkpoints/wandb/npz) gitignored; see PROVENANCE.md.
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
Diffstat (limited to 'research/flossing/smoke_step9_hrm_parallel_sampling.json')
| -rw-r--r-- | research/flossing/smoke_step9_hrm_parallel_sampling.json | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/research/flossing/smoke_step9_hrm_parallel_sampling.json b/research/flossing/smoke_step9_hrm_parallel_sampling.json new file mode 100644 index 0000000..9d400b4 --- /dev/null +++ b/research/flossing/smoke_step9_hrm_parallel_sampling.json @@ -0,0 +1,106 @@ +{ + "args": { + "model": "hrm", + "ckpt_root": "/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python", + "ckpt_name": "step_26040", + "mode": "multi_perturbed_ce", + "train_steps": 2, + "batch_size": 2, + "lr": 1e-05, + "noise_std": 0.001, + "noise_min": 0.0001, + "noise_max": null, + "noise_sampling": "loguniform", + "clean_prob": 0.0, + "sigma_start": 0.0, + "sigma_ramp_steps": 10, + "n_trajectories": 4, + "rollout_impl": "parallel_fixed", + "perturb": "both", + "seed": 123, + "eval_every": 1, + "eval_n": 16, + "eval_batch_size": 8, + "out": "smoke_step9_hrm_parallel_sampling.json", + "save_dir": null, + "save_best": true, + "save_final": true, + "save_every_eval": false + }, + "initial_acc": 0.4375, + "initial_tok_acc": 0.816358024691358, + "steps": [ + { + "train_step": 1, + "loss": 0.2585001587867737, + "clean_loss": 0.22608037292957306, + "noisy_loss_mean": 0.2693067789077759, + "noise_std_target": 0.0001, + "noise_std_mean": 9.99999901978299e-05, + "noise_std_max": 9.99999901978299e-05, + "effective_batch": 8 + }, + { + "train_step": 2, + "loss": 0.08793824911117554, + "clean_loss": 0.08808630704879761, + "noisy_loss_mean": 0.08788891136646271, + "noise_std_target": 0.0002, + "noise_std_mean": 0.00015660555800423026, + "noise_std_max": 0.00019374192925170064, + "effective_batch": 8 + } + ], + "evals": [ + { + "kind": "initial", + "train_step": 0, + "acc": 0.4375, + "tok_acc": 0.816358024691358 + }, + { + "kind": "task", + "train_step": 1, + "acc": 0.4375, + "tok_acc": 0.8240740740740741 + }, + { + "kind": "task", + "train_step": 2, + "acc": 0.4375, + "tok_acc": 0.8263888888888888 + }, + { + "kind": "final", + "train_step": 2, + "acc": 0.4375, + "tok_acc": 0.8263888888888888 + } + ], + "checkpoints": [ + { + "kind": "best", + "train_step": 1, + "acc": 0.4375, + "path": "smoke_step9_hrm_parallel_sampling_ckpts/best.pt" + }, + { + "kind": "best", + "train_step": 2, + "acc": 0.4375, + "path": "smoke_step9_hrm_parallel_sampling_ckpts/best.pt" + }, + { + "kind": "final", + "train_step": 2, + "acc": 0.4375, + "path": "smoke_step9_hrm_parallel_sampling_ckpts/final.pt" + } + ], + "best_acc": 0.4375, + "best_step": 2, + "best_checkpoint": "smoke_step9_hrm_parallel_sampling_ckpts/best.pt", + "final_acc": 0.4375, + "final_tok_acc": 0.8263888888888888, + "final_checkpoint": "smoke_step9_hrm_parallel_sampling_ckpts/final.pt" +}
\ No newline at end of file |
