{ "args": { "model": "hrm", "ckpt_root": "/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python", "ckpt_name": "step_26040", "mode": "multi_perturbed_ce", "train_steps": 2, "batch_size": 2, "lr": 1e-05, "noise_std": 0.001, "noise_min": 0.0001, "noise_max": null, "noise_sampling": "loguniform", "clean_prob": 0.0, "sigma_start": 0.0, "sigma_ramp_steps": 10, "n_trajectories": 4, "rollout_impl": "parallel_fixed", "perturb": "both", "seed": 123, "eval_every": 1, "eval_n": 16, "eval_batch_size": 8, "out": "smoke_step9_hrm_parallel_sampling.json", "save_dir": null, "save_best": true, "save_final": true, "save_every_eval": false }, "initial_acc": 0.4375, "initial_tok_acc": 0.816358024691358, "steps": [ { "train_step": 1, "loss": 0.2585001587867737, "clean_loss": 0.22608037292957306, "noisy_loss_mean": 0.2693067789077759, "noise_std_target": 0.0001, "noise_std_mean": 9.99999901978299e-05, "noise_std_max": 9.99999901978299e-05, "effective_batch": 8 }, { "train_step": 2, "loss": 0.08793824911117554, "clean_loss": 0.08808630704879761, "noisy_loss_mean": 0.08788891136646271, "noise_std_target": 0.0002, "noise_std_mean": 0.00015660555800423026, "noise_std_max": 0.00019374192925170064, "effective_batch": 8 } ], "evals": [ { "kind": "initial", "train_step": 0, "acc": 0.4375, "tok_acc": 0.816358024691358 }, { "kind": "task", "train_step": 1, "acc": 0.4375, "tok_acc": 0.8240740740740741 }, { "kind": "task", "train_step": 2, "acc": 0.4375, "tok_acc": 0.8263888888888888 }, { "kind": "final", "train_step": 2, "acc": 0.4375, "tok_acc": 0.8263888888888888 } ], "checkpoints": [ { "kind": "best", "train_step": 1, "acc": 0.4375, "path": "smoke_step9_hrm_parallel_sampling_ckpts/best.pt" }, { "kind": "best", "train_step": 2, "acc": 0.4375, "path": "smoke_step9_hrm_parallel_sampling_ckpts/best.pt" }, { "kind": "final", "train_step": 2, "acc": 0.4375, "path": "smoke_step9_hrm_parallel_sampling_ckpts/final.pt" } ], "best_acc": 0.4375, "best_step": 2, "best_checkpoint": "smoke_step9_hrm_parallel_sampling_ckpts/best.pt", "final_acc": 0.4375, "final_tok_acc": 0.8263888888888888, "final_checkpoint": "smoke_step9_hrm_parallel_sampling_ckpts/final.pt" }