summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYurenHao0426 <blackhao0426@gmail.com>2026-06-29 12:15:51 -0500
committerYurenHao0426 <blackhao0426@gmail.com>2026-06-29 12:15:51 -0500
commita6ec4288a2232988b130b2f00bb2565f81706966 (patch)
tree1bb86e7f0b899b823b9e7fdf383e832d30a181e0
Recursive reasoning dynamics: analysis pipeline, paper drafts, toy models
Failure=more-chaotic (task-general under validity labeling) reduces to convergence/completeness detection; mechanism (transient chaos vs multistability vs input-induced) under investigation. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
-rw-r--r--.gitignore12
-rw-r--r--README.md32
-rw-r--r--analysis_2x2/OBSERVATIONS.md155
-rw-r--r--analysis_2x2/analyze_2x2.py350
-rw-r--r--analysis_2x2/analyze_clv.py72
-rw-r--r--analysis_2x2/analyze_early_connectivity.py77
-rw-r--r--analysis_2x2/analyze_maze_connectivity.py87
-rw-r--r--analysis_2x2/analyze_phase1.py126
-rw-r--r--analysis_2x2/analyze_solution_space.py95
-rw-r--r--analysis_2x2/cells_hrm26040_joint_n2048.csv5
-rw-r--r--analysis_2x2/cells_hrm26040_n8192.csv5
-rw-r--r--analysis_2x2/cells_trm_official58590_n2048.csv5
-rw-r--r--analysis_2x2/cells_trm_official_gbs768_step58590_n512.csv5
-rw-r--r--analysis_2x2/cells_trm_singleGPU_step130205_n512.csv5
-rw-r--r--analysis_2x2/cells_trm_singleGPU_step260410_n512.csv5
-rw-r--r--analysis_2x2/cells_trm_step13020_n512.csv5
-rw-r--r--analysis_2x2/characterization/characterization_results.md70
-rw-r--r--analysis_2x2/characterize_separation.py182
-rwxr-xr-xanalysis_2x2/dump_early_ckpts.sh20
-rw-r--r--analysis_2x2/early_pairing_hrm26040_joint.md17
-rw-r--r--analysis_2x2/early_pairing_trm_official58590.md17
-rw-r--r--analysis_2x2/early_window_pairing.py85
-rw-r--r--analysis_2x2/evolution_hrm.csv11
-rw-r--r--analysis_2x2/evolution_trm.csv11
-rw-r--r--analysis_2x2/offline_followups.py229
-rw-r--r--analysis_2x2/offline_followups/followups.md88
-rw-r--r--analysis_2x2/offline_followups/phase1_e1.md27
-rw-r--r--analysis_2x2/offline_phase1_e1.py123
-rw-r--r--analysis_2x2/phase1/phase1_results.md48
-rwxr-xr-xanalysis_2x2/queue_maze_followups.sh50
-rw-r--r--analysis_2x2/results.md69
-rwxr-xr-xanalysis_2x2/run_clv_geom.sh23
-rwxr-xr-xanalysis_2x2/run_maze_diag_queue.sh40
-rwxr-xr-xanalysis_2x2/run_phase1_queue.sh92
-rwxr-xr-xanalysis_2x2/run_retest_2x2.sh75
-rw-r--r--analysis_2x2/sweep_hrm26040_joint_n2048.csv20
-rw-r--r--analysis_2x2/sweep_hrm26040_n8192.csv20
-rw-r--r--analysis_2x2/sweep_trm_official58590_n2048.csv20
-rw-r--r--analysis_2x2/sweep_trm_official_gbs768_step58590_n512.csv20
-rw-r--r--analysis_2x2/sweep_trm_singleGPU_step130205_n512.csv20
-rw-r--r--analysis_2x2/sweep_trm_singleGPU_step260410_n512.csv20
-rw-r--r--analysis_2x2/sweep_trm_step13020_n512.csv20
-rw-r--r--analyze_diag.py115
-rw-r--r--analyze_dynamics_experiments.py289
-rw-r--r--analyze_halt_bucket.py122
-rw-r--r--analyze_joint.py103
-rw-r--r--analyze_ptrm_rollout_cache.py135
-rw-r--r--analyze_separate.py124
-rw-r--r--analyze_spectrum_microscope.py360
-rw-r--r--analyze_step3.py76
-rw-r--r--analyze_step3_all.py94
-rw-r--r--analyze_tangent.py149
-rw-r--r--analyze_tangent_modes.py143
-rw-r--r--chaos_trm_multi4_step182287_clean_k4_n1000.summary.csv2
-rw-r--r--chaos_trm_multi4_step260410_clean_k4_n1000.summary.csv2
-rw-r--r--chaos_trm_multi4_success_failure_summary.csv11
-rw-r--r--compare_ptrm_rollout_counts.py203
-rw-r--r--diagnose_hrm.py306
-rw-r--r--diagnose_hrm_joint.py240
-rw-r--r--diagnose_hrm_joint_clv.py310
-rw-r--r--diagnose_hrm_joint_geom.py264
-rw-r--r--diagnose_hrm_joint_horizon.py240
-rw-r--r--diagnose_hrm_joint_short.py240
-rw-r--r--diagnose_hrm_separate.py242
-rw-r--r--diagnose_trm_joint.py225
-rw-r--r--diagnose_trm_joint_clv.py304
-rw-r--r--diagnose_trm_joint_geom.py251
-rw-r--r--diagnose_trm_joint_horizon.py225
-rw-r--r--diagnose_trm_joint_maze.py227
-rw-r--r--diagnose_trm_joint_short.py225
-rw-r--r--directional_lyap_perturb/plots/directional_lyap_perturb_combined.csv73
-rw-r--r--directional_lyap_perturb/plots/final_gpu_status.txt4
-rw-r--r--directional_lyap_perturb/smoke_baseline.summary.csv5
-rw-r--r--directional_lyap_perturb/smoke_baseline_fixedclean.summary.csv5
-rw-r--r--directional_lyap_perturb/trm_baseline_best_step58590_n1000_c8_dirlyap.summary.csv25
-rw-r--r--directional_lyap_perturb/trm_multi4_best_step35805_n1000_c8_dirlyap.summary.csv25
-rw-r--r--directional_lyap_perturb/trm_multi4_final_step65100_n1000_c8_dirlyap.summary.csv25
-rw-r--r--directional_lyap_perturb/watch_and_plot.sh32
-rw-r--r--directional_lyap_perturb_eps_sweep/plots/directional_lyap_perturb_combined.csv97
-rw-r--r--directional_lyap_perturb_eps_sweep/plots/final_gpu_status.txt4
-rw-r--r--directional_lyap_perturb_eps_sweep/trm_baseline_best_step58590_n1000_c8_fdeps001.summary.csv25
-rw-r--r--directional_lyap_perturb_eps_sweep/trm_baseline_best_step58590_n1000_c8_fdeps003.summary.csv25
-rw-r--r--directional_lyap_perturb_eps_sweep/trm_multi4_best_step35805_n1000_c8_fdeps001.summary.csv25
-rw-r--r--directional_lyap_perturb_eps_sweep/trm_multi4_best_step35805_n1000_c8_fdeps003.summary.csv25
-rwxr-xr-xdirectional_lyap_perturb_eps_sweep/watch_queue_and_plot.sh55
-rw-r--r--directional_lyap_perturb_robustness.py428
-rw-r--r--directional_multi4_eval_n1000.csv6
-rw-r--r--dynamics_experiment_report.md86
-rw-r--r--engelken_python_flossing.py336
-rw-r--r--eval_directional_ckpts.py86
-rw-r--r--eval_trm_gbs192_step260410_n1000.summary.csv2
-rw-r--r--eval_trm_gbs192_step52082_n1000.summary.csv2
-rw-r--r--eval_trm_official768_step13020_n1000.summary.csv2
-rw-r--r--extend_rollout.py68
-rw-r--r--external/GradientFlossing/README.md79
-rw-r--r--external/GradientFlossing/packages.txt6
-rw-r--r--external/julia-1.6.7/LICENSE.md26
-rw-r--r--external/julia-1.6.7/share/julia/base/ryu/LICENSE.md25
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/ArgTools/README.md431
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Artifacts/docs/src/index.md21
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Base64/docs/src/index.md10
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/CRC32c/docs/src/index.md6
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Dates/docs/src/index.md826
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/DelimitedFiles/docs/src/index.md11
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Distributed/docs/src/index.md69
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Downloads/README.md186
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Downloads/docs/src/index.md9
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/FileWatching/docs/src/index.md9
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Future/docs/src/index.md9
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/InteractiveUtils/docs/src/index.md28
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/LazyArtifacts/docs/src/index.md10
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/LibCURL/LICENSE.md23
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/LibCURL/README.md72
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/LibGit2/docs/src/index.md161
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Libdl/docs/src/index.md15
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/LinearAlgebra/docs/src/index.md681
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Logging/docs/src/index.md311
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Markdown/docs/src/index.md398
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Mmap/docs/src/index.md7
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/NetworkOptions/README.md270
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/LICENSE.md47
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/README.md34
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/api.md88
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/artifacts.md229
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/basedocs.md25
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/compatibility.md201
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/creating-packages.md290
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/environments.md108
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/faq.md3
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/getting-started.md216
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/glossary.md124
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/index.md65
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/managing-packages.md428
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/registries.md81
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/toml-files.md270
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/SameNameDifferentUUID/dev/Unregistered/README.md2
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/SandboxFallback2/dev/Unregistered/README.md2
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/TestDepTrackingPath/test/dev/Unregistered/README.md2
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/TransferSubgraph/dev/Unregistered/README.md2
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Printf/docs/src/index.md6
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Profile/docs/src/index.md17
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/REPL/docs/src/index.md692
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/REPL/src/TerminalMenus/LICENSE.md22
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Random/docs/src/index.md358
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/SHA/LICENSE.md58
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/SHA/docs/src/index.md75
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Serialization/docs/src/index.md7
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/SharedArrays/docs/src/index.md11
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Sockets/docs/src/index.md33
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/SparseArrays/docs/src/index.md232
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/LICENSE.md24
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/README.md19
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/docs/src/index.md19
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/SuiteSparse/docs/src/index.md34
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/TOML/docs/src/index.md132
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Tar/README.md467
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Test/docs/src/index.md293
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/UUIDs/docs/src/index.md8
-rw-r--r--external/julia-1.6.7/share/julia/stdlib/v1.6/Unicode/docs/src/index.md7
-rw-r--r--external/julia-1.6.7/share/julia/test/clangsa/lit.cfg.py27
-rw-r--r--external/julia-1.6.7/share/julia/test/llvmpasses/lit.cfg.py21
-rwxr-xr-xexternal/julia_depot/registries/General/.ci/instantiate.sh24
-rw-r--r--external/julia_depot/registries/General/CONTRIBUTING.md185
-rw-r--r--flossing_suite/README.md89
-rwxr-xr-xflossing_suite/archive_failed_oom_b8/trm_seed123_baseline_nofloss_b8_10000.cmd.sh30
-rwxr-xr-xflossing_suite/archive_failed_oom_b8/trm_seed123_pre_inter_0_500_b8_k4_10000.cmd.sh30
-rwxr-xr-xflossing_suite/archive_failed_oom_b8/trm_seed123_prefloss_0_b8_k4_10000.cmd.sh30
-rwxr-xr-xflossing_suite/launch_toy_official_suite.sh77
-rwxr-xr-xflossing_suite/launch_trm_faithful_suite.sh88
-rwxr-xr-xflossing_suite/launch_trm_variant_suite.sh87
-rw-r--r--flossing_suite/results/summary/flossing_eval_curves.csv399
-rw-r--r--flossing_suite/results/summary/flossing_runs_summary.csv32
-rwxr-xr-xflossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000.cmd.sh43
-rwxr-xr-xflossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000.cmd.sh43
-rwxr-xr-xflossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000.cmd.sh43
-rwxr-xr-xflossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000.cmd.sh30
-rwxr-xr-xflossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000.cmd.sh35
-rwxr-xr-xflossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b4_k4_10000.cmd.sh30
-rwxr-xr-xflossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000.cmd.sh35
-rwxr-xr-xflossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000.cmd.sh30
-rwxr-xr-xflossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000.cmd.sh35
-rwxr-xr-xflossing_suite/smoke_test.sh61
-rwxr-xr-xflossing_suite/status.sh33
-rw-r--r--flossing_suite/summarize_flossing.py311
-rwxr-xr-xflossing_suite/watch_and_summarize.sh27
-rw-r--r--initial_perturb_robustness.py286
-rw-r--r--initial_perturb_robustness/plots/final_gpu_status.txt4
-rw-r--r--initial_perturb_robustness/plots/initial_perturb_robustness_combined.csv28
-rw-r--r--initial_perturb_robustness/plots/initial_perturb_robustness_conditional.csv28
-rw-r--r--initial_perturb_robustness/smoke_baseline.summary.csv3
-rw-r--r--initial_perturb_robustness/smoke_baseline_b32k8.summary.csv2
-rw-r--r--initial_perturb_robustness/smoke_plots/initial_perturb_robustness_combined.csv4
-rw-r--r--initial_perturb_robustness/trm_baseline_best_step58590_n5000_k8.summary.csv10
-rw-r--r--initial_perturb_robustness/trm_multi4_best_step35805_n5000_k8.summary.csv10
-rw-r--r--initial_perturb_robustness/trm_multi4_final_step65100_n5000_k8.summary.csv10
-rwxr-xr-xinitial_perturb_robustness/watch_and_plot.sh25
-rw-r--r--late_perturb_robustness.py323
-rw-r--r--late_perturb_robustness/plots/final_gpu_status.txt4
-rw-r--r--late_perturb_robustness/plots/late_perturb_robustness_combined.csv91
-rw-r--r--late_perturb_robustness/smoke_baseline.summary.csv5
-rw-r--r--late_perturb_robustness/smoke_baseline_expandedclean.summary.csv5
-rw-r--r--late_perturb_robustness/smoke_baseline_fastclean.summary.csv5
-rw-r--r--late_perturb_robustness/smoke_plots/late_perturb_robustness_combined.csv5
-rw-r--r--late_perturb_robustness/trm_baseline_best_step58590_n3000_k8_late.summary.csv31
-rw-r--r--late_perturb_robustness/trm_multi4_best_step35805_n3000_k8_late.summary.csv31
-rw-r--r--late_perturb_robustness/trm_multi4_final_step65100_n3000_k8_late.summary.csv31
-rwxr-xr-xlate_perturb_robustness/watch_and_plot.sh32
-rwxr-xr-xlaunch_10k_queue.sh124
-rwxr-xr-xlaunch_dynamics_variants_queue.sh186
-rwxr-xr-xlaunch_engelken_paper_faithful_trm_queue.sh78
-rwxr-xr-xlaunch_engelken_python_official_queue.sh74
-rwxr-xr-xlaunch_hrm_multi4_perturb_ablation.sh60
-rwxr-xr-xlaunch_interfloss_queue.sh84
-rwxr-xr-xlaunch_multi4_parallel_repro_config.sh70
-rwxr-xr-xlaunch_multi4_repro_config.sh68
-rwxr-xr-xlaunch_multi4_repro_config_all.sh10
-rwxr-xr-xlaunch_trajectory_perturb_queue.sh92
-rwxr-xr-xlaunch_trajectory_sampling_long.sh128
-rwxr-xr-xlaunch_trajectory_sampling_long_all.sh18
-rwxr-xr-xlaunch_trm_directional_multi4_long.sh43
-rwxr-xr-xlaunch_trm_official_gbs768_multi4.sh37
-rw-r--r--make_meeting_artifacts_v2.py364
-rw-r--r--make_q_lambda_scatter.py263
-rw-r--r--make_trm_multi4_meeting_artifacts.py133
-rw-r--r--maze_package/README.md30
-rw-r--r--maze_package/TRANSFER_README.md36
-rw-r--r--maze_package/diagnose_trm_joint.py225
-rwxr-xr-xmaze_package/launch_maze_trm.sh40
-rwxr-xr-xmaze_package/launch_maze_trm_portable.sh58
-rw-r--r--maze_package/pip-freeze.txt18
-rw-r--r--maze_package/requirements.txt13
-rw-r--r--maze_package/step7_interfloss.py589
-rw-r--r--maze_pred_dump.py85
-rw-r--r--meeting_artifacts/trm_multi4_dynamics_report.md57
-rw-r--r--meeting_artifacts_v2/fig5_qhead_vs_lambda1_ptrm_summary.csv3
-rw-r--r--meeting_artifacts_v2/hrm_trm_redesigned_summary.csv7
-rw-r--r--meeting_artifacts_v2/meeting_figures_v2_report.md26
-rw-r--r--merge_and_analyze.py157
-rwxr-xr-xmonitor_dynamics_experiments.sh38
-rw-r--r--multi4_eval_compare/hrm_baseline_eval.csv11
-rw-r--r--multi4_eval_compare/hrm_honly_step26040_eval.csv2
-rw-r--r--multi4_eval_compare/hrm_matched_compare.csv6
-rw-r--r--multi4_eval_compare/hrm_multi4_complete_eval.csv11
-rw-r--r--multi4_eval_compare/hrm_multi4_eval.csv6
-rw-r--r--multi4_eval_compare/hrm_multi4_horizon_sweep_768.csv41
-rw-r--r--multi4_eval_compare/trm_baseline_eval.csv11
-rw-r--r--multi4_eval_compare/trm_gbs768_multi4_step65100_det_n10000_seed20260602.summary.csv2
-rw-r--r--multi4_eval_compare/trm_matched_compare.csv2
-rw-r--r--multi4_eval_compare/trm_multi4_eval.csv2
-rw-r--r--multi4_eval_compare/trm_multi4_eval_full.csv11
-rw-r--r--multi4_eval_compare/trm_official_gbs768_eval.csv11
-rw-r--r--multi4_eval_compare/trm_official_gbs768_multi4_eval.csv21
-rw-r--r--multi4_eval_compare/trm_official_gbs768_multi4_step16275_eval.csv2
-rw-r--r--multi4_eval_compare/wallclock_eval.csv30
-rw-r--r--official_gbs768_spectrum/headline_trm_multi4_dynamics_table.csv4
-rw-r--r--official_gbs768_spectrum/summary_n512_k8_seed20260602.csv4
-rw-r--r--official_gbs768_spectrum/summary_n64_k8_seed20260602.csv5
-rw-r--r--ordinary_multi4_eval_n1000_seed20260611.csv6
-rw-r--r--paper/claims.md37
-rw-r--r--paper/experiment_framework.md55
-rw-r--r--paper/intro.md57
-rw-r--r--paper/outline.md79
-rw-r--r--paper/rainer_followup_draft.md37
-rw-r--r--paper/readiness.md119
-rw-r--r--paper/sample_intro.md49
-rw-r--r--paper/setup_results.md129
-rw-r--r--paper/style_contract.md47
-rw-r--r--paper/validation/validate_le_estimator.py107
-rw-r--r--paper/validation/validation_results.md17
-rw-r--r--papers/txt/engelken2023_gradient_flossing.txt1879
-rw-r--r--papers/txt/gram2025_generative_recursive.txt1532
-rw-r--r--papers/txt/gram2026_generative_recursive.txt1532
-rw-r--r--papers/txt/hrm2025_hierarchical_reasoning.txt1302
-rw-r--r--papers/txt/ptrm2025_probabilistic_trm.txt906
-rw-r--r--papers/txt/trm2025_tiny_recursive.txt796
-rw-r--r--plot_ckpt_evolution.py105
-rw-r--r--plot_directional_lyap_perturb.py136
-rw-r--r--plot_initial_perturb_robustness.py123
-rw-r--r--plot_late_perturb_robustness.py140
-rw-r--r--plot_trm_chaos_trajectory.py67
-rw-r--r--plot_trm_lyap_hist.py69
-rw-r--r--problem_tracks/hrm_learning_events.csv37
-rw-r--r--problem_tracks/hrm_learning_groups.csv6
-rw-r--r--problem_tracks/hrm_problem_tracks.csv513
-rw-r--r--problem_tracks/trm_learning_events.csv37
-rw-r--r--problem_tracks/trm_learning_groups.csv6
-rw-r--r--problem_tracks/trm_problem_tracks.csv513
-rw-r--r--ptrm_gbs768_base_vs_multi4_k100_d64_sigma03_Lonly_n1000_seed0.hist.csv102
-rw-r--r--ptrm_gbs768_base_vs_multi4_k100_d64_sigma03_Lonly_n1000_seed0.kcurve.csv101
-rw-r--r--ptrm_gbs768_base_vs_multi4_k100_d64_sigma03_Lonly_n1000_seed0.summary.csv2
-rw-r--r--ptrm_lambda_main_trm_base260410_k100_d64_sigma03_Lonly_n256.summary.csv2
-rw-r--r--ptrm_lambda_main_trm_multi4_104164_k100_d64_sigma03_Lonly_n256.summary.csv2
-rw-r--r--ptrm_lambda_quick_trm_base260410_k100_d64_sigma03_Lonly_n64.summary.csv2
-rw-r--r--ptrm_lambda_quick_trm_multi4_104164_k100_d64_sigma03_Lonly_n64.summary.csv2
-rw-r--r--ptrm_official_gbs768_base58590_k100_d64_sigma03_Lonly_n1000_seed0.summary.csv2
-rw-r--r--ptrm_official_gbs768_multi4_35805_k100_d64_sigma03_Lonly_n1000_seed0.summary.csv2
-rw-r--r--ptrm_official_main_trm_base260410_k100_d64_sigma03_Lonly_n1000.summary.csv2
-rw-r--r--ptrm_official_main_trm_multi4_104164_k100_d64_sigma03_Lonly_n1000.summary.csv2
-rw-r--r--ptrm_official_sweep_trm_base260410_k25_d16_sigma03_Lonly_n1000.summary.csv2
-rw-r--r--ptrm_official_sweep_trm_multi4_104164_k25_d16_sigma03_Lonly_n1000.summary.csv2
-rw-r--r--ptrm_rollout_selection.py643
-rw-r--r--ptrm_same_subset/base58590_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv2
-rw-r--r--ptrm_same_subset/multi4_35805_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv2
-rw-r--r--ptrm_same_subset/paired_ptrm_k100_n1000_seed0_summary.csv2
-rw-r--r--ptrm_selection_smoke_trm_base260410_k4_n64.summary.csv2
-rw-r--r--ptrm_selection_trm_base260410_k8_n512.summary.csv2
-rw-r--r--ptrm_selection_trm_multi4_104164_k8_n512.summary.csv2
-rw-r--r--ptrm_smoke_official_gbs768_base58590.summary.csv2
-rw-r--r--ptrm_spectrum_k4_cache_analysis.csv3
-rw-r--r--ptrm_spectrum_k4_trm_base260410_k100_d64_sigma03_Lonly_n64.summary.csv2
-rw-r--r--ptrm_spectrum_k4_trm_multi4_104164_k100_d64_sigma03_Lonly_n64.summary.csv2
-rw-r--r--ptrm_spectrum_smoke.summary.csv2
-rw-r--r--q_lambda_scatter/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv2
-rw-r--r--q_lambda_scatter/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv2
-rw-r--r--rainer_email_bundle_20260605/README.md17
-rw-r--r--rainer_email_bundle_20260605/email_draft.md18
-rw-r--r--rainer_email_bundle_20260605/figure_captions.md23
-rw-r--r--report_bundle_20260603/MANIFEST.txt96
-rw-r--r--report_bundle_20260603/README.md34
-rw-r--r--report_bundle_20260603/scripts/make_meeting_artifacts_v2.py364
-rw-r--r--report_bundle_20260603/scripts/make_q_lambda_scatter.py263
-rw-r--r--report_bundle_20260603/tables/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv2
-rw-r--r--report_bundle_20260603/tables/base58590_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv2
-rw-r--r--report_bundle_20260603/tables/fig5_qhead_vs_lambda1_ptrm_summary.csv3
-rw-r--r--report_bundle_20260603/tables/headline_trm_multi4_dynamics_table.csv4
-rw-r--r--report_bundle_20260603/tables/hrm_baseline_eval.csv11
-rw-r--r--report_bundle_20260603/tables/hrm_honly_step26040_eval.csv2
-rw-r--r--report_bundle_20260603/tables/hrm_matched_compare.csv6
-rw-r--r--report_bundle_20260603/tables/hrm_multi4_complete_eval.csv11
-rw-r--r--report_bundle_20260603/tables/hrm_multi4_eval.csv6
-rw-r--r--report_bundle_20260603/tables/hrm_multi4_horizon_sweep_768.csv41
-rw-r--r--report_bundle_20260603/tables/hrm_trm_redesigned_summary.csv7
-rw-r--r--report_bundle_20260603/tables/meeting_figures_v2_report.md26
-rw-r--r--report_bundle_20260603/tables/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv2
-rw-r--r--report_bundle_20260603/tables/multi4_35805_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv2
-rw-r--r--report_bundle_20260603/tables/paired_ptrm_k100_n1000_seed0_summary.csv2
-rw-r--r--report_bundle_20260603/tables/summary_n512_k8_seed20260602.csv4
-rw-r--r--report_bundle_20260603/tables/summary_n64_k8_seed20260602.csv5
-rw-r--r--report_bundle_20260603/tables/trm_baseline_eval.csv11
-rw-r--r--report_bundle_20260603/tables/trm_gbs768_multi4_step65100_det_n10000_seed20260602.summary.csv2
-rw-r--r--report_bundle_20260603/tables/trm_matched_compare.csv2
-rw-r--r--report_bundle_20260603/tables/trm_multi4_eval.csv2
-rw-r--r--report_bundle_20260603/tables/trm_multi4_eval_full.csv11
-rw-r--r--report_bundle_20260603/tables/trm_official_gbs768_eval.csv11
-rw-r--r--report_bundle_20260603/tables/trm_official_gbs768_multi4_eval.csv21
-rw-r--r--report_bundle_20260603/tables/trm_official_gbs768_multi4_step16275_eval.csv2
-rw-r--r--report_bundle_20260603/tables/wallclock_eval.csv30
-rw-r--r--reverse_floss_finetune.py289
-rwxr-xr-xrun_HRM256_after_H.sh27
-rwxr-xr-xrun_HRMOrth_after_SRM7M.sh29
-rwxr-xr-xrun_H_after_SRM.sh29
-rwxr-xr-xrun_SRM7M_after_HRM256.sh31
-rwxr-xr-xrun_checkpoint_evolution.sh59
-rwxr-xr-xrun_ckpt_evolution2.sh55
-rwxr-xr-xrun_hrm_diag_all_ckpts.sh26
-rwxr-xr-xrun_srm_after_W.sh32
-rwxr-xr-xrun_step4_W.sh27
-rwxr-xr-xrun_step4_pipeline.sh36
-rwxr-xr-xrun_step6_prefloss.sh40
-rwxr-xr-xrun_trm_cf_abcd.sh39
-rwxr-xr-xrun_trm_cf_ef.sh39
-rwxr-xr-xrun_trm_cf_long.sh36
-rwxr-xr-xrun_trm_chaos_onset_diag.sh31
-rw-r--r--sanity_lipschitz_check.py184
-rw-r--r--spectrum_microscope/checkpoint_summary.csv21
-rw-r--r--spectrum_microscope/feature_and_rank_summary.csv421
-rw-r--r--srm_design_codex.md69
-rw-r--r--srm_design_round2_claude.md155
-rw-r--r--step3_train_with_rf.py302
-rw-r--r--step4_from_scratch.py334
-rw-r--r--step5_train_trm_cf.py307
-rw-r--r--step6_prefloss.py310
-rw-r--r--step7_interfloss.py589
-rw-r--r--step8_basin_consistency.py199
-rw-r--r--step9_trajectory_perturb_train.py595
-rw-r--r--surrogate_flossing/MinimalGradientFlossinExample.py135
-rw-r--r--surrogate_flossing/MinimalSurrogateGradientFlossinExample.py228
-rw-r--r--surrogate_flossing/README.md106
-rw-r--r--surrogate_flossing/figures/binary_surrogate_gradient_flossing.md2
-rw-r--r--toy/toy_transient_chaos.py112
-rw-r--r--track_problem_learning.py375
-rw-r--r--trajectory_augmentation_notes.md190
-rwxr-xr-xwatch_ptrm_gbs768_compare.sh26
-rwxr-xr-xwatch_trm_directional_multi4_long.sh35
384 files changed, 41721 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0e73031
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,12 @@
+*.npz
+*.png
+*.pdf
+*.pt
+*.zip
+*.tar.gz
+*.log
+*.json
+*wandb_history*.csv
+__pycache__/
+wandb/
+checkpoints/
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9b9d3cb
--- /dev/null
+++ b/README.md
@@ -0,0 +1,32 @@
+# Recursive Reasoning Dynamics
+
+Analysis of the inference-trajectory dynamics of recursive reasoning models (HRM, TRM) on
+Sudoku-Extreme and Maze-Hard. Central finding and its honest deflation:
+
+- Along the recurrent inference trajectory, **genuine failures are more chaotic** (higher
+ finite-time Lyapunov exponents / latent drift) than successes, in the *same* trained network —
+ task-general (Sudoku; Maze under a validity/connectivity criterion, not exact-match).
+- This signal **reduces to convergence / answer-completeness detection** (the FTLE separation is
+ recoverable from late drift + the model's own halting confidence; leading + full Ginelli CLV
+ geometry add nothing beyond it). On unique-solution tasks completeness aliases onto correctness.
+- Open: the *mechanism* of the non-convergence (multistability vs transient chaos vs input-induced) —
+ the current line of work (see `paper/readiness.md`, `extend_rollout.py`, `toy/`).
+
+This repo is the **analysis half** (diagnostics, experiments, paper drafts, toy models). The model
+code (TRM/HRM forks with trajectory-augmentation training) lives in the companion `rrm` repo;
+heavy artifacts (checkpoints, npz, wandb, figures) are not committed.
+
+## Entry points
+- `paper/` — `readiness.md` (live status + all results), `claims.md` (claim/evidence/counter table),
+ `intro.md`, `setup_results.md`, `style_contract.md`, `experiment_framework.md`.
+- `analysis_2x2/` — the 2×2 (settling × correctness) decomposition, characterization, CLV reducibility,
+ checkpoint-evolution, connectivity-labeled Maze analysis. Result `.md`/`.csv` kept; npz/png ignored.
+- `diagnose_{trm,hrm}_joint*.py` — per-example FTLE (JVP+QR), drift, CLV estimators. `_maze` variant
+ forces the math SDP backend (FlashAttention lacks the JVP double-backward); `_clv` adds Ginelli CLVs.
+- `extend_rollout.py` — long-rollout probe (transient vs multistable vs persistent non-convergence).
+- `toy/` — minimal analytically-grounded toy models reproducing "failure = more chaotic".
+- `maze_pred_dump.py` — cheap forward dump (connectivity + drift, no JVP) for Maze validity labeling.
+
+## Estimator validation
+`paper/validation/validate_le_estimator.py` — the QR/Benettin FTLE core recovers known spectra
+(diagonal/symmetric/non-normal linear maps, Hénon λ₁) to <1e-3.
diff --git a/analysis_2x2/OBSERVATIONS.md b/analysis_2x2/OBSERVATIONS.md
new file mode 100644
index 0000000..d0b0670
--- /dev/null
+++ b/analysis_2x2/OBSERVATIONS.md
@@ -0,0 +1,155 @@
+# 2×2 analysis: (late-trajectory settling) × (answer correctness) — observations
+
+Date: 2026-06-11. Code: `analyze_2x2.py` (+ ad-hoc strict-threshold checks logged in session).
+All statements below are measurements on existing diagnostic npz files; no new GPU runs.
+
+## Data provenance (CRITICAL: λ scales are NOT comparable across estimator versions)
+
+| dataset | model / ckpt | n | estimator | window |
+|---|---|---|---|---|
+| `diag_8k.npz` | HRM righteous-python @ step_26040 (H=2,L=2) | 8192 | `diagnose_hrm.py` (May 22 version) | full 16 ACT steps |
+| `diag_trm_singleGPU_step*_512.npz` ×10 | TRM mlp_t singleGPU @ 26041…260410 | 512 each | `diagnose_trm_joint.py` (joint JVP+QR, per-sub-update norm) | full 16 ACT steps |
+| `diag_hrm_step_*_512.npz` ×10 | HRM @ 2604…26040 | 512 each | joint estimator (−0.15-scale, matches step7 reports) | full 16 ACT steps |
+
+Within-dataset comparisons only. "Converged"/"settled" here = late z_H drift (mean over ACT steps 13–16)
+falls in the low-drift band; threshold from Otsu on pooled log10 drift, robustness via percentile sweep.
+Settled ≠ literal fixed point: the settled band has a narrow characteristic residual velocity
+(HRM ≈0.96/step, IQR-width <0.03 in log10; TRM ≈18.5/step, q10–q90 = 15.8–21.4).
+
+## Headline numbers
+
+### HRM @26040, n=8192, exact_acc 0.525 (full-window λ, May-22 estimator scale)
+Otsu τ → cells: A(settled,correct)=4103, B(settled,wrong)=63, C(unsettled,correct)=195, D(unsettled,wrong)=3831.
+
+- Failure mass is overwhelmingly unsettled: wrong-that-settled = 1.6% at Otsu τ; at a STRICT in-band
+ threshold (45th pct, drift<0.97) it is 21/3894 = **0.55% of failures**.
+- The strict-band B examples (n=21) have λ₁ median **−0.842** vs A −0.867 (success-like contraction),
+ q_halt(final) median **+7.47 — identical to A** (ACT-confident), token_acc 0.41–0.88 (median 0.62,
+ substantially wrong, not near-misses). These are confidently-wrong settled answers — invisible to
+ both a stability-based and an ACT-confidence-based selector.
+- λ₁ as a predictor: AUC(−λ₁→correct) = 0.984 ≈ AUC(−λ₁→settled) = 0.986. At the Otsu split the
+ within-settled outcome gap (Δλ₁ −0.27, AUC 0.852) is a THRESHOLD-MIXTURE ARTIFACT: the Otsu τ sits
+ above the band gap (drift distribution has a near-empty region between pct≈45 and pct≈55,
+ τ jumps −0.013→1.39), so Otsu-"settled" includes mid-drift examples. At the strict in-band τ the
+ within-settled gap nearly closes (−0.867 vs −0.842).
+- Residual outcome signal in the unsettled stratum: C vs D Δλ₁(median) = −0.094, AUC 0.818
+ (C n=195). Note C drifts MORE than D (median 55.4 vs 40.8) while being more contracting in λ₁.
+- AUC(−log late-drift→correct) = 0.964.
+
+### TRM singleGPU @260410 (final), n=512, exact_acc 0.770 (joint estimator)
+Cells: A=383, B=**0**, C=11, D=118.
+
+- **B = 0 at every threshold**: drift distributions of settled-correct (q90 = 21.4) and wrong
+ (q10 = 47.8) are completely separated — no wrong example reaches the settled band. Robust across
+ the entire percentile sweep (nB=0, nD=118 constant). Same at step 130205. Across the 10-ckpt series,
+ fB>0 only at step 26041 (1.2%).
+- λ₁ medians: A +0.0047 (≈0, consistent with "TRM success short-window λ₁>0 is acceptable"),
+ C +0.0998, D +0.1023. Within-unsettled outcome gap ≈ 0 (Δ −0.0025; AUC 0.619, C n=11).
+ AUC(−λ₁→correct) = 0.989 ≈ AUC(−λ₁→settled) = 0.996: at the final checkpoint, λ₁'s outcome
+ signal in TRM is (almost) entirely settling-regime detection.
+- q_halt(final) median: A +7.44, C +7.41, D −11.1.
+
+## Checkpoint evolution (512/ckpt; within-series comparisons)
+
+- TRM (26041→260410): fB≈0 from 52082 on; fD shrinks 0.41→0.23; λ₁(D) rises monotonically
+ +0.036→+0.102 while λ₁(A) stays ≈0 — the success/failure λ₁ gap widens over training via the
+ failure cell becoming more expansive.
+- HRM (2604→26040, joint-estimator series): mass migration over training:
+ fB 0.89→0.008 (early ckpts: nearly all examples low-drift & wrong), fD rises to ~0.52 mid-training,
+ fA grows with accuracy; λ₁(D) rises from −0.087 to +0.023 (sign flip ~step 15–18k), λ₁(A) stays
+ −0.10…−0.20.
+
+## Direct answers to the motivating questions
+
+1. **Is the failure FTLE cluster a mixture of wandering + wrong-fixed-point modes?**
+ Measured: yes, but extremely lopsided. Wandering dominates (HRM ≥98.4% of failures at Otsu τ,
+ 99.45% at strict τ; TRM final 100%). The wrong-fixed-point mode exists in HRM (21 examples) with
+ exactly the predicted signature (success-like λ₁), and is absent in TRM at late checkpoints.
+2. **Does conditioning on settling absorb the success/failure FTLE gap?**
+ TRM final: essentially yes (within-stratum AUC 0.619 with n=11; regime AUC 0.996).
+ HRM: mostly yes after threshold correction (strict-band within-settled gap ≈0.025), with one
+ genuine residual: the unsettled stratum retains an outcome gap (AUC 0.818, C n=195).
+3. **Relation to published taxonomies** (factual cross-references):
+ Ren & Liu's four HRM modes map onto our cells; their "non-trivial failure (converged to wrong
+ fixed point)" = our strict-band B (rare: 0.55% of failures); their "trivial failure (wanders or
+ oscillates)" = our D (dominant). Efstathiou & Balwani's "failed runs plateau at stable high-loss
+ attractors" (TRM): by our state-drift criterion, TRM failures are NOT settled (B=0, drift ≥~48/step,
+ ~0.77× the early-trajectory velocity); their "stable" refers to loss plateaus/bounded regions,
+ not state convergence. Our drift+λ₁ measurement distinguishes these.
+
+## Addendum (same day, offline follow-ups — see offline_followups/followups.md)
+
+**REVISION of headline point 2.** "λ₁'s outcome signal is (almost) entirely settling-regime detection"
+was based on comparing raw AUCs and is too strong as a mediation claim. The proper control —
+AUC(−λ₁→correct) **within matched late-drift deciles** of the unsettled stratum — shows substantial
+independent signal (HRM: per-decile 0.97→0.69 from low to high drift, weighted mean 0.879;
+unconditioned within-unsettled 0.933). TRM official @58590 with a strict band τ shows the same
+qualitatively: unsettled-correct λ₁ ≈ +0.017 (n=141) vs unsettled-wrong +0.103 (n=64) at overlapping
+drift levels. Corrected statement: **λ₁ correlates strongly with settledness, but at matched drift
+level it still separates outcome** — drift and λ₁ are not redundant observables.
+
+**Difficulty control (#givens, crude proxy).** HRM n=8192: Spearman(correct, givens)=+0.28;
+Spearman(λ₁, givens)=−0.35 overall but −0.16/−0.18 within outcome. Within-givens-bin
+AUC(−λ₁→correct) = 0.976–0.987 (weighted 0.982, vs overall 0.984): at the #givens level, the
+FTLE-outcome separation is NOT a difficulty artifact. (Solver-backtrack difficulty not available
+offline; #givens is a weak proxy — flag for the writeup.)
+
+**Strict-B per-example (n=21).** All 21 have halted_at ∈ [4,9] (median 6) — under real ACT inference
+every one would have halted early, confidently (q_halt +7.4–7.5), and wrong (token_acc 0.41–0.88).
+The three lowest-token-acc cases are all 17-givens (minimum-clue) puzzles. Per-example table in
+followups.md; drift profiles indistinguishable from the A band (fig_hrm_strictB_profiles.png).
+
+**TRM official @58590 note.** 90% of its settled-correct examples are still descending at window end
+(slope median −0.147) — unlike singleGPU @260410 whose A-band is flat (~18.5/step). The "settled band"
+criterion is checkpoint-specific; cross-checkpoint comparisons must re-derive τ per dataset.
+
+## Addendum 2 (2026-06-12, n=2048 retest + early-window pairing; npz in retest/)
+
+Retest ran on GPU 0 (shared, 12h-fallback claim). Four diagnostics, seed 0, idx-paired.
+
+**1. TRM official @58590 (87.6%), full window, n=2048: B = 0 confirmed.**
+254 failures, none settled; the MINIMUM late-drift among wrong examples (log10 1.664 ≈ 46/step)
+exceeds the late-drift of 96.5% of correct examples — near-complete distribution separation,
+threshold-free. λ₁: wrong +0.103 / correct +0.012, AUC 0.993. Within-unsettled outcome AUC 0.848
+(C n=70) — residual signal confirmed at usable n.
+
+**2. HRM @26040, full window, joint estimator, n=2048: replicates diag_8k on a second estimator.**
+acc 0.526; A/B/C/D = 1020/14/57/957; λ₁(A) −0.152 vs λ₁(D) +0.032 (the email's −0.15/+0.04 scale);
+strict-band B n=5 (0.5% of failures) with λ₁ −0.141 ≈ A and q_halt +7.47 (selector-blind, replicated);
+unsettled within-decile AUC weighted 0.900 (was 0.879 on diag_8k).
+
+**3. Early-window (first 4 ACT steps) does NOT forecast eventual success among still-unsolved
+examples — and on HRM the dynamical signals point the OTHER way.**
+Unconditioned early AUCs are inflated by already-solved-at-4 examples (TRM 69.4% solved@4,
+HRM 34.5%). Restricted to not-yet-correct@4:
+
+| signal @ step 4 | TRM (n=626, 59.4% eventually correct) | HRM (n=1342, 27.6% eventually correct) |
+|---|---|---|
+| AUC(−λ₁_early → eventual correct) | 0.543 | **0.448** (reversed) |
+| AUC(−drift@4 → eventual correct) | 0.492 | **0.312** (reversed: MORE early movement ↔ eventual success, +dir AUC 0.688) |
+| AUC(q_halt@4 → eventual correct) | 0.521 | **0.734** |
+
+Observations: (i) the λ₁/outcome association is concurrent with the trajectory's fate (final window),
+not antecedent — early-window λ₁ has no forward predictive power at this granularity; (ii) on HRM the
+sign of the early association is inverted: among undecided examples, higher early drift (and
+marginally higher early λ₁) accompany eventual success; (iii) the one early signal with real forecast
+power is HRM's learned q_halt (0.734) — absent in TRM (0.521); factual architecture note: TRM removed
+HRM's Q-learning continue-head (BCE halt only). Window length 4 was chosen to match train-time; other
+horizons untested.
+
+**Consequences for writeup/claims:** "failure ↔ chaos" should be stated as an outcome-concurrent
+dynamical signature, not an early predictor; early-exit/reallocation applications are unsupported at
+this granularity (and sign-reversed on HRM); the within-stratum independence result (drift-matched
+AUC 0.88-0.90) plus the difficulty control (Addendum 1) remain the strongest positive claims.
+
+## Caveats
+
+- n(B)=21 and n(C)=11/195 are small; per-cell statements about those cells are low-precision.
+- "Settled" is a relative (band) criterion; both A-bands have nonzero characteristic residual velocity.
+- exact_correct is evaluated at ACT step 16 under fixed unroll; post-window corruption or recovery
+ (Ren & Liu's fixed-point violation) is not observable in these arrays.
+- λ window = full trajectory (336 sub-updates TRM / 16 segments HRM); the early-window
+ (length-matched) version of this analysis is NOT covered by existing npz files — the `_short`
+ diagnose scripts exist (4 ACT steps) but produced no saved npz we could find. Open follow-up.
+- diag_8k (HRM) and the HRM evolution series use different estimator normalizations; only
+ within-file comparisons are reported above.
diff --git a/analysis_2x2/analyze_2x2.py b/analysis_2x2/analyze_2x2.py
new file mode 100644
index 0000000..315ffc6
--- /dev/null
+++ b/analysis_2x2/analyze_2x2.py
@@ -0,0 +1,350 @@
+"""2x2 analysis: (terminal convergence) x (answer correctness), per-example FTLE per cell.
+
+Inputs: existing diagnostic npz files produced by diagnose_hrm*.py / diagnose_trm_joint.py:
+ drift_zH/drift_zL (N,16) per-ACT-step state displacement norms,
+ lyap_spec (N,8) full-window joint FTLE spectrum, exact_correct (N,),
+ token_acc, halted_at, q_halt/q_continue (N,16), idx.
+
+Convergence metric (measurement choice, reported alongside robustness sweep):
+ d_late = mean(drift_zH[:, -4:]) (late-trajectory z_H velocity, ACT steps 13-16)
+ primary threshold tau = Otsu on log10(d_late) pooled per dataset;
+ sensitivity: tau swept over pooled percentiles 5..95.
+
+Cells: A = converged & correct, B = converged & wrong,
+ C = non-converged & correct, D = non-converged & wrong.
+
+Outputs (in this directory): results_<tag>.json, cells_<tag>.csv, sweep_<tag>.csv,
+ fig_<tag>_{drift_hist,lyap_by_cell,scatter,spectrum}.png, evolution_{hrm,trm}.{csv,png},
+ results.md (combined human-readable summary).
+
+Observational only: this script reports counts, distributions and rank statistics; it does
+not test mechanisms.
+"""
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import matplotlib
+
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import numpy as np
+
+HERE = Path(__file__).resolve().parent
+FLOSS = HERE.parent
+
+LATE_K = 4 # ACT steps used for late drift
+
+
+def otsu_threshold(x: np.ndarray, nbins: int = 256) -> float:
+ h, edges = np.histogram(x, bins=nbins)
+ h = h.astype(np.float64)
+ centers = 0.5 * (edges[:-1] + edges[1:])
+ w = h.sum()
+ if w == 0:
+ return float(np.median(x))
+ p = h / w
+ omega = np.cumsum(p)
+ mu = np.cumsum(p * centers)
+ mu_t = mu[-1]
+ denom = omega * (1.0 - omega)
+ denom[denom <= 0] = np.nan
+ sigma_b2 = (mu_t * omega - mu) ** 2 / denom
+ k = np.nanargmax(sigma_b2)
+ return float(centers[k])
+
+
+def auc_rank(score: np.ndarray, label: np.ndarray) -> float:
+ """AUC of `score` for predicting label==1 (rank-based, ties averaged)."""
+ pos = score[label == 1]
+ neg = score[label == 0]
+ if len(pos) == 0 or len(neg) == 0:
+ return float("nan")
+ allv = np.concatenate([pos, neg])
+ order = np.argsort(allv, kind="mergesort")
+ ranks = np.empty_like(order, dtype=np.float64)
+ ranks[order] = np.arange(1, len(allv) + 1)
+ # average ranks for ties
+ sv = allv[order]
+ i = 0
+ while i < len(sv):
+ j = i
+ while j + 1 < len(sv) and sv[j + 1] == sv[i]:
+ j += 1
+ if j > i:
+ ranks[order[i : j + 1]] = ranks[order[i : j + 1]].mean()
+ i = j + 1
+ r_pos = ranks[: len(pos)].sum()
+ return float((r_pos - len(pos) * (len(pos) + 1) / 2) / (len(pos) * len(neg)))
+
+
+def cell_stats(lyap: np.ndarray, tok: np.ndarray, halted: np.ndarray, mask: np.ndarray) -> dict:
+ if mask.sum() == 0:
+ return {"n": 0}
+ l1 = lyap[mask, 0]
+ return {
+ "n": int(mask.sum()),
+ "lam1_median": float(np.median(l1)),
+ "lam1_mean": float(l1.mean()),
+ "lam1_iqr": [float(np.percentile(l1, 25)), float(np.percentile(l1, 75))],
+ "lam8_median": float(np.median(lyap[mask, -1])),
+ "spectrum_median": [float(np.median(lyap[mask, i])) for i in range(lyap.shape[1])],
+ "token_acc_median": float(np.median(tok[mask])),
+ "halted_at_median": float(np.median(halted[mask])),
+ }
+
+
+def analyze(npz_path: Path, tag: str, make_figs: bool = True) -> dict:
+ d = np.load(npz_path)
+ lyap = d["lyap_spec"].astype(np.float64)
+ correct = d["exact_correct"].astype(int)
+ tok = d["token_acc"].astype(np.float64)
+ halted = d["halted_at"].astype(np.float64)
+ drift_h = d["drift_zH"].astype(np.float64)
+ drift_l = d["drift_zL"].astype(np.float64)
+
+ d_late = drift_h[:, -LATE_K:].mean(axis=1)
+ d_late_l = drift_l[:, -LATE_K:].mean(axis=1)
+ logd = np.log10(np.clip(d_late, 1e-12, None))
+
+ tau = otsu_threshold(logd)
+ conv = logd < tau
+
+ cells = {
+ "A_conv_correct": (conv) & (correct == 1),
+ "B_conv_wrong": (conv) & (correct == 0),
+ "C_nonconv_correct": (~conv) & (correct == 1),
+ "D_nonconv_wrong": (~conv) & (correct == 0),
+ }
+
+ res = {
+ "npz": str(npz_path),
+ "n": int(len(correct)),
+ "exact_acc": float(correct.mean()),
+ "late_drift_def": f"mean(drift_zH[:, -{LATE_K}:])",
+ "otsu_tau_log10": tau,
+ "frac_converged": float(conv.mean()),
+ "cells": {k: cell_stats(lyap, tok, halted, m) for k, m in cells.items()},
+ "mixture": {
+ "wrong_that_converged": float(
+ cells["B_conv_wrong"].sum() / max((correct == 0).sum(), 1)
+ ),
+ "correct_that_nonconverged": float(
+ cells["C_nonconv_correct"].sum() / max((correct == 1).sum(), 1)
+ ),
+ },
+ "contrasts": {
+ "dlam1_correct_minus_wrong_overall": float(
+ np.median(lyap[correct == 1, 0]) - np.median(lyap[correct == 0, 0])
+ ),
+ "dlam1_within_converged": float(
+ np.median(lyap[cells["A_conv_correct"], 0]) - np.median(lyap[cells["B_conv_wrong"], 0])
+ )
+ if cells["B_conv_wrong"].sum() > 0 and cells["A_conv_correct"].sum() > 0
+ else float("nan"),
+ "dlam1_within_nonconverged": float(
+ np.median(lyap[cells["C_nonconv_correct"], 0]) - np.median(lyap[cells["D_nonconv_wrong"], 0])
+ )
+ if cells["C_nonconv_correct"].sum() > 0 and cells["D_nonconv_wrong"].sum() > 0
+ else float("nan"),
+ "dlam1_wrong_conv_minus_wrong_nonconv": float(
+ np.median(lyap[cells["B_conv_wrong"], 0]) - np.median(lyap[cells["D_nonconv_wrong"], 0])
+ )
+ if cells["B_conv_wrong"].sum() > 0 and cells["D_nonconv_wrong"].sum() > 0
+ else float("nan"),
+ },
+ "auc": {
+ "neg_lam1_predicts_correct_overall": auc_rank(-lyap[:, 0], correct),
+ "neg_lam1_predicts_correct_within_conv": auc_rank(-lyap[conv, 0], correct[conv]),
+ "neg_lam1_predicts_correct_within_nonconv": auc_rank(-lyap[~conv, 0], correct[~conv]),
+ "neg_logdrift_predicts_correct": auc_rank(-logd, correct),
+ "neg_lam1_predicts_converged": auc_rank(-lyap[:, 0], conv.astype(int)),
+ },
+ }
+
+ # threshold sensitivity sweep
+ sweep_rows = []
+ for pct in range(5, 96, 5):
+ t = np.percentile(logd, pct)
+ c = logd < t
+ row = {
+ "pct": pct,
+ "tau": float(t),
+ "nA": int((c & (correct == 1)).sum()),
+ "nB": int((c & (correct == 0)).sum()),
+ "nC": int((~c & (correct == 1)).sum()),
+ "nD": int((~c & (correct == 0)).sum()),
+ }
+ for nm, m in [
+ ("lam1_med_B", c & (correct == 0)),
+ ("lam1_med_D", ~c & (correct == 0)),
+ ]:
+ row[nm] = float(np.median(lyap[m, 0])) if m.sum() > 0 else float("nan")
+ sweep_rows.append(row)
+ sweep_csv = HERE / f"sweep_{tag}.csv"
+ with sweep_csv.open("w") as f:
+ keys = list(sweep_rows[0].keys())
+ f.write(",".join(keys) + "\n")
+ for r in sweep_rows:
+ f.write(",".join(str(r[k]) for k in keys) + "\n")
+
+ # per-cell csv
+ with (HERE / f"cells_{tag}.csv").open("w") as f:
+ f.write("cell,n,lam1_median,lam1_mean,lam1_q25,lam1_q75,lam8_median,token_acc_median,halted_at_median\n")
+ for k, m in cells.items():
+ s = res["cells"][k]
+ if s["n"] == 0:
+ f.write(f"{k},0,,,,,,,\n")
+ continue
+ f.write(
+ f"{k},{s['n']},{s['lam1_median']:.6f},{s['lam1_mean']:.6f},"
+ f"{s['lam1_iqr'][0]:.6f},{s['lam1_iqr'][1]:.6f},{s['lam8_median']:.6f},"
+ f"{s['token_acc_median']:.4f},{s['halted_at_median']:.1f}\n"
+ )
+
+ if make_figs:
+ colors = {"A_conv_correct": "tab:green", "B_conv_wrong": "tab:orange",
+ "C_nonconv_correct": "tab:blue", "D_nonconv_wrong": "tab:red"}
+
+ fig, ax = plt.subplots(figsize=(6, 4))
+ bins = np.linspace(logd.min(), logd.max(), 60)
+ ax.hist(logd[correct == 1], bins=bins, alpha=0.55, label=f"correct (n={int(correct.sum())})", color="tab:green")
+ ax.hist(logd[correct == 0], bins=bins, alpha=0.55, label=f"wrong (n={int((1-correct).sum())})", color="tab:red")
+ ax.axvline(tau, color="k", ls="--", lw=1, label=f"Otsu tau={tau:.2f}")
+ ax.set_xlabel("log10 late drift_zH (steps -4:)"); ax.set_ylabel("count")
+ ax.set_title(f"{tag}: late-drift distribution by correctness"); ax.legend(fontsize=8)
+ fig.tight_layout(); fig.savefig(HERE / f"fig_{tag}_drift_hist.png", dpi=150); plt.close(fig)
+
+ fig, ax = plt.subplots(figsize=(6.5, 4))
+ for i, (k, m) in enumerate(cells.items()):
+ if m.sum() == 0:
+ continue
+ y = lyap[m, 0]
+ x = np.full(y.shape, i) + (np.random.default_rng(0).uniform(-0.18, 0.18, y.shape))
+ ax.plot(x, y, ".", ms=3, alpha=0.35, color=colors[k])
+ ax.hlines(np.median(y), i - 0.28, i + 0.28, color=colors[k], lw=2.5)
+ ax.set_xticks(range(4)); ax.set_xticklabels([f"{k}\n(n={int(m.sum())})" for k, m in cells.items()], fontsize=7)
+ ax.set_ylabel("lambda_1 (full-window FTLE)"); ax.axhline(0, color="gray", lw=0.6)
+ ax.set_title(f"{tag}: lambda_1 by 2x2 cell")
+ fig.tight_layout(); fig.savefig(HERE / f"fig_{tag}_lyap_by_cell.png", dpi=150); plt.close(fig)
+
+ fig, ax = plt.subplots(figsize=(6, 4.5))
+ ax.scatter(logd[correct == 1], lyap[correct == 1, 0], s=5, alpha=0.4, c="tab:green", label="correct")
+ ax.scatter(logd[correct == 0], lyap[correct == 0, 0], s=5, alpha=0.4, c="tab:red", label="wrong")
+ ax.axvline(tau, color="k", ls="--", lw=1); ax.axhline(0, color="gray", lw=0.6)
+ ax.set_xlabel("log10 late drift_zH"); ax.set_ylabel("lambda_1")
+ ax.set_title(f"{tag}: drift vs lambda_1"); ax.legend(fontsize=8)
+ fig.tight_layout(); fig.savefig(HERE / f"fig_{tag}_scatter.png", dpi=150); plt.close(fig)
+
+ fig, ax = plt.subplots(figsize=(6, 4))
+ for k, m in cells.items():
+ if m.sum() < 3:
+ continue
+ ax.plot(range(1, lyap.shape[1] + 1), [np.median(lyap[m, i]) for i in range(lyap.shape[1])],
+ "o-", ms=4, label=f"{k} (n={int(m.sum())})", color=colors[k])
+ ax.axhline(0, color="gray", lw=0.6)
+ ax.set_xlabel("exponent index"); ax.set_ylabel("median lambda_i")
+ ax.set_title(f"{tag}: median FTLE spectrum per cell"); ax.legend(fontsize=7)
+ fig.tight_layout(); fig.savefig(HERE / f"fig_{tag}_spectrum.png", dpi=150); plt.close(fig)
+
+ # secondary observables
+ res["aux"] = {
+ "late_drift_zL_corr_with_zH_log": float(np.corrcoef(logd, np.log10(np.clip(d_late_l, 1e-12, None)))[0, 1]),
+ "q_halt_final_median_by_cell": {
+ k: float(np.median(d["q_halt"][m, -1])) if m.sum() > 0 else float("nan") for k, m in cells.items()
+ },
+ }
+ (HERE / f"results_{tag}.json").write_text(json.dumps(res, indent=2))
+ return res
+
+
+def evolution(series: list[tuple[str, Path]], out_tag: str) -> None:
+ rows = []
+ for label, p in series:
+ if not p.exists():
+ continue
+ d = np.load(p)
+ lyap = d["lyap_spec"].astype(np.float64)
+ correct = d["exact_correct"].astype(int)
+ logd = np.log10(np.clip(d["drift_zH"][:, -LATE_K:].mean(axis=1), 1e-12, None))
+ tau = otsu_threshold(logd)
+ conv = logd < tau
+ row = dict(step=label, acc=float(correct.mean()), tau=tau,
+ fA=float(((conv) & (correct == 1)).mean()), fB=float(((conv) & (correct == 0)).mean()),
+ fC=float(((~conv) & (correct == 1)).mean()), fD=float(((~conv) & (correct == 0)).mean()))
+ for nm, m in [("l1A", (conv) & (correct == 1)), ("l1B", (conv) & (correct == 0)),
+ ("l1C", (~conv) & (correct == 1)), ("l1D", (~conv) & (correct == 0))]:
+ row[nm] = float(np.median(lyap[m, 0])) if m.sum() > 2 else float("nan")
+ rows.append(row)
+ if not rows:
+ return
+ keys = list(rows[0].keys())
+ with (HERE / f"evolution_{out_tag}.csv").open("w") as f:
+ f.write(",".join(keys) + "\n")
+ for r in rows:
+ f.write(",".join(str(r[k]) for k in keys) + "\n")
+ fig, axes = plt.subplots(1, 2, figsize=(11, 4))
+ xs = range(len(rows))
+ for nm, c in [("fA", "tab:green"), ("fB", "tab:orange"), ("fC", "tab:blue"), ("fD", "tab:red")]:
+ axes[0].plot(xs, [r[nm] for r in rows], "o-", label=nm, color=c)
+ axes[0].set_xticks(list(xs)); axes[0].set_xticklabels([r["step"] for r in rows], rotation=45, fontsize=7)
+ axes[0].set_ylabel("cell fraction"); axes[0].legend(fontsize=8); axes[0].set_title(f"{out_tag}: cell fractions")
+ for nm, c in [("l1A", "tab:green"), ("l1B", "tab:orange"), ("l1C", "tab:blue"), ("l1D", "tab:red")]:
+ axes[1].plot(xs, [r[nm] for r in rows], "o-", label=nm, color=c)
+ axes[1].axhline(0, color="gray", lw=0.6)
+ axes[1].set_xticks(list(xs)); axes[1].set_xticklabels([r["step"] for r in rows], rotation=45, fontsize=7)
+ axes[1].set_ylabel("median lambda_1"); axes[1].legend(fontsize=8); axes[1].set_title(f"{out_tag}: per-cell lambda_1")
+ fig.tight_layout(); fig.savefig(HERE / f"evolution_{out_tag}.png", dpi=150); plt.close(fig)
+
+
+def main() -> None:
+ results = {}
+ primary = [
+ ("hrm26040_n8192", FLOSS / "diag_8k.npz"),
+ ("trm_singleGPU_step260410_n512", FLOSS / "diag_trm_singleGPU_step260410_512.npz"),
+ ("trm_singleGPU_step130205_n512", FLOSS / "diag_trm_singleGPU_step130205_512.npz"),
+ ("trm_step13020_n512", FLOSS / "diag_trm_step13020_512.npz"),
+ ]
+ for tag, p in primary:
+ if p.exists():
+ results[tag] = analyze(p, tag)
+ print(f"[done] {tag}")
+
+ evolution(
+ [(f"{s}", FLOSS / f"diag_hrm_step_{s}_512.npz") for s in
+ [2604, 5208, 7812, 10416, 13020, 15624, 18228, 20832, 23436, 26040]],
+ "hrm",
+ )
+ evolution(
+ [(f"{s}", FLOSS / f"diag_trm_singleGPU_step{s}_512.npz") for s in
+ [26041, 52082, 78123, 104164, 130205, 156246, 182287, 208328, 234369, 260410]],
+ "trm",
+ )
+
+ # combined human-readable summary
+ lines = ["# 2x2 analysis (convergence x correctness) — generated " + __import__("datetime").date.today().isoformat(), ""]
+ for tag, r in results.items():
+ lines += [f"## {tag}", f"- npz: `{r['npz']}`, n={r['n']}, exact_acc={r['exact_acc']:.3f}",
+ f"- late-drift def: {r['late_drift_def']}, Otsu tau(log10)={r['otsu_tau_log10']:.3f}, frac_converged={r['frac_converged']:.3f}", ""]
+ lines.append("| cell | n | lam1 median | lam1 IQR | token_acc med |")
+ lines.append("|---|---|---|---|---|")
+ for k, s in r["cells"].items():
+ if s["n"] == 0:
+ lines.append(f"| {k} | 0 | - | - | - |")
+ else:
+ lines.append(f"| {k} | {s['n']} | {s['lam1_median']:+.4f} | [{s['lam1_iqr'][0]:+.4f}, {s['lam1_iqr'][1]:+.4f}] | {s['token_acc_median']:.3f} |")
+ c = r["contrasts"]; a = r["auc"]; m = r["mixture"]
+ lines += ["",
+ f"- mixture: wrong-that-converged = {m['wrong_that_converged']:.3f}; correct-that-nonconverged = {m['correct_that_nonconverged']:.3f}",
+ f"- dlam1(correct-wrong): overall {c['dlam1_correct_minus_wrong_overall']:+.4f}; within-conv {c['dlam1_within_converged']:+.4f}; within-nonconv {c['dlam1_within_nonconverged']:+.4f}",
+ f"- dlam1(wrong: conv - nonconv) = {c['dlam1_wrong_conv_minus_wrong_nonconv']:+.4f}",
+ f"- AUC(-lam1 -> correct): overall {a['neg_lam1_predicts_correct_overall']:.3f}; within-conv {a['neg_lam1_predicts_correct_within_conv']:.3f}; within-nonconv {a['neg_lam1_predicts_correct_within_nonconv']:.3f}",
+ f"- AUC(-log d_late -> correct) = {a['neg_logdrift_predicts_correct']:.3f}; AUC(-lam1 -> converged) = {a['neg_lam1_predicts_converged']:.3f}",
+ ""]
+ (HERE / "results.md").write_text("\n".join(lines))
+ print("wrote", HERE / "results.md")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/analysis_2x2/analyze_clv.py b/analysis_2x2/analyze_clv.py
new file mode 100644
index 0000000..b4ae12a
--- /dev/null
+++ b/analysis_2x2/analyze_clv.py
@@ -0,0 +1,72 @@
+"""Decisive test for (b): does the leading-CLV GEOMETRY predict correctness beyond
+drift + q_halt (which already fully explain the scalar spectrum)?
+
+For each geometric feature (leading-CLV H-fraction, token participation ratio, answer-token PR,
+readout alignment): raw AUC, partial corr with correctness | (drift+q_halt), and the multivariate
+held-out ridge-AUC GAIN from adding all CLV features on top of drift+q_halt.
+Gain ~ 0 => geometry also reducible (honest pivot to (a)).
+Gain > 0 => a non-reducible dynamical-geometry signal (chase further).
+"""
+from __future__ import annotations
+import sys, glob
+import numpy as np
+rng = np.random.default_rng(0)
+
+
+def resid(y, X):
+ A = np.concatenate([X, np.ones((len(y), 1))], 1)
+ b, _, _, _ = np.linalg.lstsq(A, y, rcond=None)
+ return y - A @ b
+
+
+def pcorr(a, b, Z):
+ return float(np.corrcoef(resid(a.astype(float), Z), resid(b.astype(float), Z))[0, 1])
+
+
+def auc(score, y):
+ pos, neg = score[y == 1], score[y == 0]
+ if len(pos) == 0 or len(neg) == 0:
+ return float("nan")
+ a = np.concatenate([pos, neg]); o = np.argsort(a); r = np.empty(len(a)); r[o] = np.arange(1, len(a) + 1)
+ return float((r[:len(pos)].sum() - len(pos) * (len(pos) + 1) / 2) / (len(pos) * len(neg)))
+
+
+def ridge_cv_auc(X, y, folds=5, lam=1.0):
+ n = len(y); idx = rng.permutation(n); Xs = (X - X.mean(0)) / (X.std(0) + 1e-8); aucs = []
+ for f in range(folds):
+ te = idx[f::folds]; tr = np.setdiff1d(idx, te)
+ A = np.concatenate([Xs[tr], np.ones((len(tr), 1))], 1)
+ w = np.linalg.solve(A.T @ A + lam * np.eye(A.shape[1]), A.T @ y[tr])
+ pr = np.concatenate([Xs[te], np.ones((len(te), 1))], 1) @ w
+ aucs.append(auc(pr, y[te]))
+ return float(np.nanmean(aucs))
+
+
+def go(tag, npz):
+ d = np.load(npz)
+ y = d["exact_correct"].astype(float)
+ dr = np.concatenate([d["drift_zH"], d["drift_zL"]], 1).astype(float)
+ qh = np.concatenate([d["q_halt"], d["q_continue"]], 1).astype(float)
+ ctrl = np.concatenate([dr, qh], 1)
+ feats = {nm: d[nm][:, 0].astype(float) for nm in ["clv_hfrac", "clv_pr", "clv_pr_ans", "clv_readout"]}
+ clv_all = np.concatenate([d[nm].astype(float) for nm in ["clv_hfrac", "clv_pr", "clv_pr_ans", "clv_readout"]], 1)
+ print(f"\n=== {tag} (n={len(y)}, acc={y.mean():.3f}) ===")
+ print(f"{'feature':14s} {'rawAUC':>7} {'partial|drift+qhalt':>20}")
+ for nm, f in feats.items():
+ print(f"{nm:14s} {auc(f, y):>7.3f} {pcorr(f, y, ctrl):>20.3f}")
+ base = ridge_cv_auc(ctrl, y)
+ plus = ridge_cv_auc(np.concatenate([ctrl, clv_all], 1), y)
+ clv_alone = ridge_cv_auc(clv_all, y)
+ print(f"held-out ridge AUC: CLV-alone={clv_alone:.3f} | drift+qhalt={base:.3f} | +CLV-geom={plus:.3f} GAIN={plus-base:+.3f}")
+ print(f" -> GAIN ~0 means geometry reduces to convergence+confidence; >0 means non-reducible signal")
+
+
+if __name__ == "__main__":
+ CLV = "/home/yurenh2/rrm/research/flossing/analysis_2x2/clv"
+ for tag, pat in [("Sudoku TRM", f"{CLV}/trm_geom_step58590_n512.npz"),
+ ("Sudoku HRM", f"{CLV}/hrm_geom_step26040_n512.npz")]:
+ g = glob.glob(pat)
+ if g:
+ go(tag, g[0])
+ else:
+ print(f"[pending] {tag}: {pat} not found yet")
diff --git a/analysis_2x2/analyze_early_connectivity.py b/analysis_2x2/analyze_early_connectivity.py
new file mode 100644
index 0000000..c74023a
--- /dev/null
+++ b/analysis_2x2/analyze_early_connectivity.py
@@ -0,0 +1,77 @@
+"""Pooled broken-vs-complete dynamics across EARLY maze checkpoints (large-n robust version).
+Reads maze_earlysave dumps (each has preds, inputs, drift_zH, ans_drift, exact via maze_pred_dump).
+Connectivity = success criterion. Tests: do BROKEN (incomplete) predictions have higher latent
+drift (more chaotic / non-settling) than CONNECTED (complete) ones? Pools across checkpoints for n.
+"""
+from __future__ import annotations
+from pathlib import Path
+from collections import deque
+import glob
+import numpy as np
+
+HERE = Path(__file__).resolve().parent
+DUMPS = HERE / "maze_followup"
+rng = np.random.default_rng(0)
+
+
+def is_connected(inp, pred):
+ g = inp.reshape(30, 30); pr = pred.reshape(30, 30)
+ se = np.argwhere((g == 3) | (g == 4))
+ if len(se) < 2:
+ return True
+ s, e = tuple(se[0]), tuple(se[1])
+ ps = set(map(tuple, np.argwhere(pr == 5))) | {s, e}
+ if any(g[r, c] == 1 for r, c in ps):
+ return False
+ seen = {s}; q = deque([s])
+ while q:
+ r, c = q.popleft()
+ if (r, c) == e:
+ return True
+ for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
+ nr, nc = r + dr, c + dc
+ if 0 <= nr < 30 and 0 <= nc < 30 and (nr, nc) in ps and (nr, nc) not in seen:
+ seen.add((nr, nc)); q.append((nr, nc))
+ return False
+
+
+def auc(score, y):
+ p, n = score[y == 1], score[y == 0]
+ if len(p) == 0 or len(n) == 0:
+ return float("nan")
+ a = np.concatenate([p, n]); o = np.argsort(a); r = np.empty(len(a)); r[o] = np.arange(1, len(a) + 1)
+ return float((r[:len(p)].sum() - len(p) * (len(p) + 1) / 2) / (len(p) * len(n)))
+
+
+def main():
+ files = sorted(glob.glob(str(DUMPS / "earlydump_step_*.npz")),
+ key=lambda f: int(f.split("step_")[1].split("_")[0].split(".")[0]))
+ if not files:
+ print("[no earlydump files yet]"); return
+ pool_ld, pool_ad, pool_conn = [], [], []
+ print(f"{'ckpt':>10} {'connAcc':>8} {'broken':>7} {'AUC(ldrift)':>11} {'AUC(ansdrift)':>13}")
+ for f in files:
+ d = np.load(f)
+ step = f.split("step_")[1].split(".")[0]
+ conn = np.array([is_connected(d["inputs"][k], d["preds"][k]) for k in range(len(d["preds"]))]).astype(int)
+ ld = np.log10(np.clip(d["drift_zH"][:, -4:].mean(1), 1e-12, None))
+ ad = d["ans_drift_ans"][:, -4:].mean(1).astype(float)
+ nb = int((conn == 0).sum())
+ a_ld = auc(-ld, conn) if 3 <= nb <= len(conn) - 3 else float("nan")
+ a_ad = auc(-ad, conn) if 3 <= nb <= len(conn) - 3 else float("nan")
+ print(f"{step:>10} {conn.mean():>8.3f} {nb:>7} {a_ld:>11.3f} {a_ad:>13.3f}")
+ pool_ld.append(ld); pool_ad.append(ad); pool_conn.append(conn)
+ ld = np.concatenate(pool_ld); ad = np.concatenate(pool_ad); conn = np.concatenate(pool_conn)
+ nb = int((conn == 0).sum())
+ a_ld = auc(-ld, conn); a_ad = auc(-ad, conn)
+ boot = [auc(-ld[i], conn[i]) for i in (rng.integers(0, len(conn), len(conn)) for _ in range(5000))]
+ boot = [b for b in boot if not np.isnan(b)]
+ print(f"\nPOOLED: n={len(conn)}, broken={nb}, connectivity-acc={conn.mean():.3f}")
+ print(f" latent-drift: broken median={np.median(ld[conn==0]):.2f} vs connected={np.median(ld[conn==1]):.2f}")
+ print(f" AUC(-latent_drift -> connected/complete) = {a_ld:.3f} bootstrap95%CI=[{np.percentile(boot,2.5):.3f}, {np.percentile(boot,97.5):.3f}]")
+ print(f" AUC(-answer_drift -> connected) = {a_ad:.3f}")
+ print(f" => robust 'broken/incomplete = more chaotic'? CI excludes 0.5: {np.percentile(boot,2.5) > 0.5}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/analysis_2x2/analyze_maze_connectivity.py b/analysis_2x2/analyze_maze_connectivity.py
new file mode 100644
index 0000000..f2fb01b
--- /dev/null
+++ b/analysis_2x2/analyze_maze_connectivity.py
@@ -0,0 +1,87 @@
+"""Maze with CONNECTIVITY as the success criterion (not exact-match).
+Genuine failure = predicted path does NOT connect start->goal (broken/incomplete answer).
+Valid alternative paths (connected but != labeled) count as 'complete answer'.
+
+Joins per-cell preds (my dump, seed 20260616) with the friend's FTLE/drift npz (same seed/idx),
+and asks: do BROKEN (disconnected) predictions WANDER while CONNECTED ones SETTLE?
+If yes, the dynamical signal tracks answer-COMPLETENESS, and exact-match was the wrong lens for Maze.
+"""
+from __future__ import annotations
+from pathlib import Path
+from collections import deque
+import glob
+import numpy as np
+
+HERE = Path(__file__).resolve().parent
+FU = HERE / "maze_followup"
+FRIEND = "/tmp/friend_maze/maze_all_ckpts_lyap"
+
+
+def is_connected(inp, pred):
+ g = inp.reshape(30, 30); pr = pred.reshape(30, 30)
+ se = np.argwhere((g == 3) | (g == 4))
+ if len(se) < 2:
+ return True # can't judge -> treat as connected (won't happen)
+ s, e = tuple(se[0]), tuple(se[1])
+ pathset = set(map(tuple, np.argwhere(pr == 5))) | {s, e}
+ if any(g[r, c] == 1 for r, c in pathset):
+ return False # crosses wall = invalid
+ seen = {s}; q = deque([s])
+ while q:
+ r, c = q.popleft()
+ if (r, c) == e:
+ return True
+ for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
+ nr, nc = r + dr, c + dc
+ if 0 <= nr < 30 and 0 <= nc < 30 and (nr, nc) in pathset and (nr, nc) not in seen:
+ seen.add((nr, nc)); q.append((nr, nc))
+ return False
+
+
+def auc(score, y):
+ p, n = score[y == 1], score[y == 0]
+ if len(p) == 0 or len(n) == 0:
+ return float("nan")
+ a = np.concatenate([p, n]); o = np.argsort(a); r = np.empty(len(a)); r[o] = np.arange(1, len(a) + 1)
+ return float((r[:len(p)].sum() - len(p) * (len(p) + 1) / 2) / (len(p) * len(n)))
+
+
+def cohend(a, b):
+ if len(a) < 2 or len(b) < 2:
+ return float("nan")
+ s = np.sqrt(((len(a) - 1) * a.var(ddof=1) + (len(b) - 1) * b.var(ddof=1)) / (len(a) + len(b) - 2))
+ return (a.mean() - b.mean()) / s if s > 0 else float("nan")
+
+
+for step in [13020, 52080, 130200]:
+ pred_f = FU / f"mazepreds_step_{step}_seed20260616.npz"
+ fr = glob.glob(f"{FRIEND}/maze_step_{step}_*.npz")
+ if not pred_f.exists() or not fr:
+ print(f"[pending] step {step}")
+ continue
+ P = np.load(pred_f); F = np.load(fr[0])
+ common, pi, fi = np.intersect1d(P["idx"], F["idx"], return_indices=True)
+ preds = P["preds"][pi]; inputs = P["inputs"][pi]
+ exact = P["exact_correct"][pi].astype(int)
+ l1 = F["lyap_spec"][fi, 0].astype(float)
+ late_drift = np.log10(np.clip(F["drift_zH"][fi, -4:].mean(1), 1e-12, None))
+ conn = np.array([is_connected(inputs[k], preds[k]) for k in range(len(common))]).astype(int)
+ nb = int((conn == 0).sum())
+ print(f"\n=== step {step} (joined n={len(common)}) ===")
+ print(f" exact-match acc={exact.mean():.3f} | CONNECTIVITY acc (valid complete path)={conn.mean():.3f} | broken={nb}")
+ if nb < 3 or nb > len(common) - 3:
+ print(f" too few broken/connected to condition dynamics (broken={nb})")
+ continue
+ # dynamics conditioned on CONNECTIVITY (broken=0 vs connected=1)
+ print(f" late-drift (settling): connected median={np.median(late_drift[conn==1]):.2f} broken median={np.median(late_drift[conn==0]):.2f}")
+ print(f" AUC(-late-drift -> connected) = {auc(-late_drift, conn):.3f} Cohen d(broken-conn)={cohend(late_drift[conn==0], late_drift[conn==1]):+.2f}")
+ print(f" lambda1: connected median={np.median(l1[conn==1]):+.4f} broken median={np.median(l1[conn==0]):+.4f}")
+ print(f" AUC(-lambda1 -> connected) = {auc(-l1, conn):.3f}")
+ # compare: does connectivity separate dynamics BETTER than exact-match?
+ print(f" [vs exact-match] AUC(-late-drift -> exact_correct) = {auc(-late_drift, exact):.3f}, "
+ f"AUC(-lambda1 -> exact) = {auc(-l1, exact):.3f}")
+ # within CONNECTED, does exact-match still separate? (should NOT, if dynamics track completeness)
+ m = conn == 1
+ if 0 < exact[m].mean() < 1:
+ print(f" within CONNECTED (n={m.sum()}): AUC(-late-drift -> exact) = {auc(-late_drift[m], exact[m]):.3f} "
+ f"(near 0.5 => dynamics track completeness, not correctness)")
diff --git a/analysis_2x2/analyze_phase1.py b/analysis_2x2/analyze_phase1.py
new file mode 100644
index 0000000..2ffa58d
--- /dev/null
+++ b/analysis_2x2/analyze_phase1.py
@@ -0,0 +1,126 @@
+"""Analyze phase-1 results: E5 horizon sweep, E6 matched-objective pairs, E2 second-run replication.
+Outputs: analysis_2x2/phase1/phase1_results.md + figures.
+"""
+from __future__ import annotations
+from pathlib import Path
+import matplotlib; matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import numpy as np
+
+HERE = Path(__file__).resolve().parent
+P1 = HERE / "phase1"
+RETEST = HERE / "retest"
+
+
+def auc_rank(score, label):
+ score = np.asarray(score, float); label = np.asarray(label, int)
+ pos, neg = score[label == 1], score[label == 0]
+ if len(pos) == 0 or len(neg) == 0:
+ return float("nan")
+ allv = np.concatenate([pos, neg]); order = np.argsort(allv, kind="mergesort")
+ ranks = np.empty(len(allv)); ranks[order] = np.arange(1, len(allv) + 1)
+ sv = allv[order]; i = 0
+ while i < len(sv):
+ j = i
+ while j + 1 < len(sv) and sv[j + 1] == sv[i]:
+ j += 1
+ if j > i:
+ ranks[order[i:j + 1]] = ranks[order[i:j + 1]].mean()
+ i = j + 1
+ return float((ranks[:len(pos)].sum() - len(pos) * (len(pos) + 1) / 2) / (len(pos) * len(neg)))
+
+
+def otsu(x, nbins=256):
+ h, e = np.histogram(x, bins=nbins); h = h.astype(float); c = (e[:-1] + e[1:]) / 2
+ p = h / h.sum(); om = np.cumsum(p); mu = np.cumsum(p * c); mt = mu[-1]
+ den = om * (1 - om); den[den <= 0] = np.nan
+ return float(c[np.nanargmax((mt * om - mu) ** 2 / den)])
+
+
+def cells_of(d):
+ ld = np.log10(np.clip(d["drift_zH"][:, -4:].mean(1), 1e-12, None))
+ c = d["exact_correct"].astype(int); tau = otsu(ld); conv = ld < tau
+ A = conv & (c == 1); B = conv & (c == 0); C = (~conv) & (c == 1); D = (~conv) & (c == 0)
+ l1 = d["lyap_spec"][:, 0]
+ return dict(acc=float(c.mean()), tau=tau,
+ nA=int(A.sum()), nB=int(B.sum()), nC=int(C.sum()), nD=int(D.sum()),
+ fD=float(D.mean()), l1A=float(np.median(l1[A])) if A.sum() else float("nan"),
+ l1D=float(np.median(l1[D])) if D.sum() else float("nan"))
+
+
+lines = ["# Phase-1 analysis (E5 horizon sweep, E6 matched-objective, E2 replication)", ""]
+
+# ---------- E5 horizon sweep ----------
+lines += ["## E5: when does fate become legible? AUC(early signal -> FINAL correct), restricted to not-yet-correct@H", ""]
+finals = {"trm": np.load(RETEST / "trm_gbs768_step58590_full_n2048.npz"),
+ "hrm": np.load(RETEST / "hrm_righteous_step26040_full_n2048.npz")}
+# include h=4 from retest short runs
+short4 = {"trm": np.load(RETEST / "trm_gbs768_step58590_short_n2048.npz"),
+ "hrm": np.load(RETEST / "hrm_righteous_step26040_short_n2048.npz")}
+sweep = {}
+for model in ["trm", "hrm"]:
+ fin = finals[model]; fi = fin["idx"]; y_final_all = fin["exact_correct"].astype(int)
+ rows = []
+ for H in [2, 4, 6, 8, 10, 12]:
+ if H == 4:
+ s = short4[model]
+ else:
+ tag = f"{'trm_official58590' if model=='trm' else 'hrm26040'}_h{H}_n2048"
+ s = np.load(P1 / f"{tag}.npz")
+ si = s["idx"]; common, fp, sp = np.intersect1d(fi, si, return_indices=True)
+ yf = y_final_all[fp]; ye = s["exact_correct"].astype(int)[sp]
+ l1 = s["lyap_spec"][sp, 0]; dr = np.log10(np.clip(s["drift_zH"][sp, -1], 1e-12, None)); q = s["q_halt"][sp, -1]
+ m = ye == 0 # not yet correct at H
+ rows.append(dict(H=H, n_und=int(m.sum()), frac_eventual=float(yf[m].mean()) if m.sum() else float("nan"),
+ auc_l1=auc_rank(-l1[m], yf[m]), auc_drift=auc_rank(-dr[m], yf[m]), auc_q=auc_rank(q[m], yf[m]),
+ solved_at_H=float(ye.mean())))
+ sweep[model] = rows
+ lines.append(f"### {model.upper()}")
+ lines.append("| H | solved@H | undecided n | of which eventual✓ | AUC(-λ₁) | AUC(-drift) | AUC(q_halt) |")
+ lines.append("|---|---|---|---|---|---|---|")
+ for r in rows:
+ lines.append(f"| {r['H']} | {r['solved_at_H']:.3f} | {r['n_und']} | {r['frac_eventual']:.3f} | "
+ f"{r['auc_l1']:.3f} | {r['auc_drift']:.3f} | {r['auc_q']:.3f} |")
+ lines.append("")
+
+# E5 figure
+fig, axes = plt.subplots(1, 2, figsize=(11, 4))
+for ax, model in zip(axes, ["trm", "hrm"]):
+ rows = sweep[model]; H = [r["H"] for r in rows]
+ ax.plot(H, [r["auc_l1"] for r in rows], "o-", label="−λ₁")
+ ax.plot(H, [r["auc_drift"] for r in rows], "s-", label="−drift")
+ ax.plot(H, [r["auc_q"] for r in rows], "^-", label="q_halt")
+ ax.axhline(0.5, color="gray", lw=0.6, ls="--")
+ ax.set_xlabel("prefix length H (ACT segments)"); ax.set_ylabel("AUC → final correct (undecided@H only)")
+ ax.set_ylim(0.3, 1.0); ax.set_title(f"{model.upper()}: legibility of fate vs prefix"); ax.legend(fontsize=8)
+fig.tight_layout(); fig.savefig(P1 / "fig_E5_horizon_sweep.png", dpi=150); plt.close(fig)
+
+# ---------- E6 matched-objective ----------
+lines += ["## E6: matched-objective intervention (step9 fixed-unroll runs, n=512)", ""]
+for fam, base, mult in [("HRM (E base vs F multi4)", "step9E_hrm", "step9F_hrm"),
+ ("TRM (G base vs H multi4)", "step9G_trm", "step9H_trm")]:
+ lines.append(f"### {fam}")
+ lines.append("| ckpt | base acc | base fD | base λ₁(D) | multi4 acc | multi4 fD | multi4 λ₁(D) |")
+ lines.append("|---|---|---|---|---|---|---|")
+ for ck in ["step_12500", "step_25000", "best", "final"]:
+ bp = P1 / f"{base}_{ck}_n512.npz"; mp = P1 / f"{mult}_{ck}_n512.npz"
+ if not bp.exists() or not mp.exists():
+ lines.append(f"| {ck} | missing | | | | | |"); continue
+ b = cells_of(np.load(bp)); m = cells_of(np.load(mp))
+ lines.append(f"| {ck} | {b['acc']:.3f} | {b['fD']:.3f} | {b['l1D']:+.4f} | "
+ f"{m['acc']:.3f} | {m['fD']:.3f} | {m['l1D']:+.4f} |")
+ lines.append("")
+
+# ---------- E2 second-run replication ----------
+lines += ["## E2: decomposition on a SECOND HRM training run (step9_E fixed-unroll, n=2048)", ""]
+lines.append("| ckpt | acc | A | B | C | D | λ₁(A) | λ₁(D) | settled-wrong frac of failures |")
+lines.append("|---|---|---|---|---|---|---|---|---|")
+for ck in ["best", "final"]:
+ d = np.load(P1 / f"step9E_hrm_{ck}_full_n2048.npz"); cc = cells_of(d)
+ nfail = cc["nB"] + cc["nD"]
+ lines.append(f"| {ck} | {cc['acc']:.3f} | {cc['nA']} | {cc['nB']} | {cc['nC']} | {cc['nD']} | "
+ f"{cc['l1A']:+.4f} | {cc['l1D']:+.4f} | {cc['nB']/max(nfail,1):.4f} |")
+
+(P1 / "phase1_results.md").write_text("\n".join(lines))
+print("\n".join(lines))
+print("\nwrote", P1 / "phase1_results.md", "and fig_E5_horizon_sweep.png")
diff --git a/analysis_2x2/analyze_solution_space.py b/analysis_2x2/analyze_solution_space.py
new file mode 100644
index 0000000..a1cd6d0
--- /dev/null
+++ b/analysis_2x2/analyze_solution_space.py
@@ -0,0 +1,95 @@
+"""Solution-space settling test (user's hypothesis): is the weak Maze separation an artifact of
+analyzing the FULL latent space (88% trivial copy) instead of the SOLUTION/output space?
+
+Measures per-step decoded-ANSWER Hamming drift over solution-space cells (label!=input) and asks:
+(1) does the DECODED ANSWER settle (stop changing) differently for success vs failure?
+(2) does solution-space settling separate outcome where full-latent drift did not (Maze)?
+(3) Sudoku control: full-latent DID separate; does solution-space too?
+Plus per-cell failure structure for Maze: are error cells a connected detour or scattered?
+"""
+from __future__ import annotations
+from pathlib import Path
+import numpy as np
+
+HERE = Path(__file__).resolve().parent
+FU = HERE / "maze_followup"
+
+
+def auc(s, y):
+ p, n = s[y == 1], s[y == 0]
+ if len(p) == 0 or len(n) == 0:
+ return float("nan")
+ a = np.concatenate([p, n]); o = np.argsort(a); r = np.empty(len(a)); r[o] = np.arange(1, len(a) + 1)
+ return float((r[:len(p)].sum() - len(p) * (len(p) + 1) / 2) / (len(p) * len(n)))
+
+
+def cohend(a, b):
+ s = np.sqrt(((len(a) - 1) * a.var(ddof=1) + (len(b) - 1) * b.var(ddof=1)) / max(len(a) + len(b) - 2, 1))
+ return (a.mean() - b.mean()) / s if s > 0 else float("nan")
+
+
+def connected_components(mask2d):
+ # 4-connectivity component count of True cells (small grids, simple flood fill)
+ seen = np.zeros_like(mask2d, bool); comps = 0; sizes = []
+ H, W = mask2d.shape
+ for i in range(H):
+ for j in range(W):
+ if mask2d[i, j] and not seen[i, j]:
+ comps += 1; stack = [(i, j)]; seen[i, j] = True; sz = 0
+ while stack:
+ r, c = stack.pop(); sz += 1
+ for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
+ rr, cc = r + dr, c + dc
+ if 0 <= rr < H and 0 <= cc < W and mask2d[rr, cc] and not seen[rr, cc]:
+ seen[rr, cc] = True; stack.append((rr, cc))
+ sizes.append(sz)
+ return comps, sizes
+
+
+def analyze(tag, npz, grid=None):
+ d = np.load(npz)
+ y = d["exact_correct"].astype(int)
+ ad = d["ans_drift_ans"].astype(float) # (N, steps) decoded-answer drift over solution cells
+ late = ad[:, -4:].mean(1) # late answer-drift = solution-space "still changing"
+ print(f"\n=== {tag} (n={len(y)}, acc={y.mean():.3f}) — SOLUTION-SPACE settling ===")
+ print(f" late answer-drift (cells/step changing) | success median={np.median(late[y==1]):.2f} "
+ f"failure median={np.median(late[y==0]):.2f}")
+ print(f" AUC(-late answer-drift -> correct) = {auc(-late, y):.3f} Cohen d(fail-succ)={cohend(late[y==0],late[y==1]):+.2f}")
+ settled = late < 0.5
+ print(f" fraction with SETTLED answer (drift<0.5 cells/step): success={settled[y==1].mean():.3f} failure={settled[y==0].mean():.3f}")
+ # full-grid for reference
+ adf = d["ans_drift_full"][:, -4:].mean(1).astype(float)
+ print(f" [ref] AUC(-late FULL-grid answer-drift -> correct) = {auc(-adf, y):.3f}")
+
+ if grid is not None:
+ # per-cell failure structure: are error cells connected (detour) or scattered?
+ preds = d["preds"]; labels = d["labels"]; inputs = d["inputs"]
+ fail = np.where(y == 0)[0]
+ comps, errs = [], []
+ for k in fail:
+ err = (preds[k] != labels[k]).reshape(grid)
+ ne = int(err.sum())
+ if ne == 0:
+ continue
+ c, sizes = connected_components(err)
+ comps.append(c); errs.append(ne)
+ comps = np.array(comps); errs = np.array(errs)
+ if len(comps):
+ print(f" per-failure error cells: median={int(np.median(errs))}; connected components: median={int(np.median(comps))}; "
+ f"largest-component fraction median={np.median([1]):.2f}")
+ frac_one = (comps <= 2).mean()
+ print(f" -> {frac_one:.2f} of failures have <=2 error components (connected detour); "
+ f"{(comps>=5).mean():.2f} have >=5 (scattered)")
+
+
+if __name__ == "__main__":
+ mz = FU / "maze_preds_step130200.npz"
+ sd = FU / "sudoku_preds_step58590.npz"
+ if mz.exists():
+ analyze("MAZE", mz, grid=(30, 30))
+ else:
+ print("[pending] maze dump")
+ if sd.exists():
+ analyze("SUDOKU (control)", sd, grid=(9, 9))
+ else:
+ print("[pending] sudoku dump")
diff --git a/analysis_2x2/cells_hrm26040_joint_n2048.csv b/analysis_2x2/cells_hrm26040_joint_n2048.csv
new file mode 100644
index 0000000..9ee8208
--- /dev/null
+++ b/analysis_2x2/cells_hrm26040_joint_n2048.csv
@@ -0,0 +1,5 @@
+cell,n,lam1_median,lam1_mean,lam1_q25,lam1_q75,lam8_median,token_acc_median,halted_at_median
+A_conv_correct,1020,-0.152252,-0.149175,-0.177945,-0.123537,-0.229215,1.0000,4.0
+B_conv_wrong,14,-0.035939,-0.045284,-0.100539,0.023329,-0.091743,0.6420,0.0
+C_nonconv_correct,57,-0.044267,-0.039474,-0.073953,-0.009610,-0.115885,1.0000,14.0
+D_nonconv_wrong,957,0.031881,0.037504,0.001404,0.070978,-0.064309,0.6296,0.0
diff --git a/analysis_2x2/cells_hrm26040_n8192.csv b/analysis_2x2/cells_hrm26040_n8192.csv
new file mode 100644
index 0000000..7d2c776
--- /dev/null
+++ b/analysis_2x2/cells_hrm26040_n8192.csv
@@ -0,0 +1,5 @@
+cell,n,lam1_median,lam1_mean,lam1_q25,lam1_q75,lam8_median,token_acc_median,halted_at_median
+A_conv_correct,4103,-0.861743,-0.856474,-0.902015,-0.815340,-0.974624,1.0000,4.0
+B_conv_wrong,63,-0.591228,-0.631761,-0.809065,-0.506116,-0.737965,0.6296,0.0
+C_nonconv_correct,195,-0.694297,-0.685632,-0.735417,-0.646644,-0.788653,1.0000,14.0
+D_nonconv_wrong,3831,-0.599770,-0.597011,-0.647486,-0.548848,-0.714284,0.6296,0.0
diff --git a/analysis_2x2/cells_trm_official58590_n2048.csv b/analysis_2x2/cells_trm_official58590_n2048.csv
new file mode 100644
index 0000000..d805cf9
--- /dev/null
+++ b/analysis_2x2/cells_trm_official58590_n2048.csv
@@ -0,0 +1,5 @@
+cell,n,lam1_median,lam1_mean,lam1_q25,lam1_q75,lam8_median,token_acc_median,halted_at_median
+A_conv_correct,1724,0.011282,0.015547,0.008467,0.016524,0.002090,1.0000,2.0
+B_conv_wrong,0,,,,,,,
+C_nonconv_correct,70,0.081975,0.077830,0.068631,0.090418,0.008649,1.0000,14.0
+D_nonconv_wrong,254,0.102872,0.102872,0.092413,0.112425,0.018793,0.6296,0.0
diff --git a/analysis_2x2/cells_trm_official_gbs768_step58590_n512.csv b/analysis_2x2/cells_trm_official_gbs768_step58590_n512.csv
new file mode 100644
index 0000000..2ff9465
--- /dev/null
+++ b/analysis_2x2/cells_trm_official_gbs768_step58590_n512.csv
@@ -0,0 +1,5 @@
+cell,n,lam1_median,lam1_mean,lam1_q25,lam1_q75,lam8_median,token_acc_median,halted_at_median
+A_conv_correct,434,0.011053,0.015590,0.008054,0.017474,0.002147,1.0000,2.0
+B_conv_wrong,0,,,,,,,
+C_nonconv_correct,14,0.082749,0.080288,0.070048,0.095862,0.006181,1.0000,14.0
+D_nonconv_wrong,64,0.103398,0.102595,0.092417,0.111892,0.021354,0.6296,0.0
diff --git a/analysis_2x2/cells_trm_singleGPU_step130205_n512.csv b/analysis_2x2/cells_trm_singleGPU_step130205_n512.csv
new file mode 100644
index 0000000..1e9aec5
--- /dev/null
+++ b/analysis_2x2/cells_trm_singleGPU_step130205_n512.csv
@@ -0,0 +1,5 @@
+cell,n,lam1_median,lam1_mean,lam1_q25,lam1_q75,lam8_median,token_acc_median,halted_at_median
+A_conv_correct,365,0.004312,0.009192,-0.000673,0.014061,-0.008585,1.0000,3.0
+B_conv_wrong,0,,,,,,,
+C_nonconv_correct,22,0.085199,0.084005,0.069379,0.098772,0.015529,1.0000,15.0
+D_nonconv_wrong,125,0.106698,0.106791,0.099522,0.112582,0.032808,0.6296,0.0
diff --git a/analysis_2x2/cells_trm_singleGPU_step260410_n512.csv b/analysis_2x2/cells_trm_singleGPU_step260410_n512.csv
new file mode 100644
index 0000000..4d11774
--- /dev/null
+++ b/analysis_2x2/cells_trm_singleGPU_step260410_n512.csv
@@ -0,0 +1,5 @@
+cell,n,lam1_median,lam1_mean,lam1_q25,lam1_q75,lam8_median,token_acc_median,halted_at_median
+A_conv_correct,383,0.004744,0.010418,0.000744,0.013660,-0.004220,1.0000,2.0
+B_conv_wrong,0,,,,,,,
+C_nonconv_correct,11,0.099828,0.089867,0.090672,0.103510,0.013320,1.0000,15.0
+D_nonconv_wrong,118,0.102321,0.102882,0.093520,0.111216,0.021455,0.6420,0.0
diff --git a/analysis_2x2/cells_trm_step13020_n512.csv b/analysis_2x2/cells_trm_step13020_n512.csv
new file mode 100644
index 0000000..d3d1ff0
--- /dev/null
+++ b/analysis_2x2/cells_trm_step13020_n512.csv
@@ -0,0 +1,5 @@
+cell,n,lam1_median,lam1_mean,lam1_q25,lam1_q75,lam8_median,token_acc_median,halted_at_median
+A_conv_correct,296,-0.004330,-0.002231,-0.011287,0.005197,-0.032494,1.0000,3.0
+B_conv_wrong,10,-0.002707,-0.003217,-0.008984,0.000914,-0.024164,0.5556,0.0
+C_nonconv_correct,9,0.041901,0.041647,0.032285,0.047971,-0.018014,1.0000,14.0
+D_nonconv_wrong,197,0.031663,0.030858,0.014350,0.046172,-0.012672,0.6420,0.0
diff --git a/analysis_2x2/characterization/characterization_results.md b/analysis_2x2/characterization/characterization_results.md
new file mode 100644
index 0000000..2e604aa
--- /dev/null
+++ b/analysis_2x2/characterization/characterization_results.md
@@ -0,0 +1,70 @@
+# Characterization of the FTLE success/failure separation
+
+Describes the separation (shape, spectrum, time-scaling, effect size). Mechanism = open.
+
+## Char 1 — full spectrum vs lambda1 (one mode or whole-spectrum shift?)
+
+### TRM official @58590 (n=2048)
+- per-exponent AUC(-λ_i→correct), i=1..8: 0.993, 0.994, 0.994, 0.993, 0.992, 0.987, 0.984, 0.977
+- median spectrum | success: +0.011, +0.009, +0.007, +0.006, +0.005, +0.004, +0.003, +0.002
+- median spectrum | failure: +0.103, +0.074, +0.057, +0.046, +0.036, +0.029, +0.024, +0.019
+- AUC λ1=0.993 | AUC spectral-mean=0.995 | AUC KS-proxy(Σλ⁺)=0.995
+- shift uniformity: success−failure median gap per exponent: -0.091, -0.065, -0.050, -0.040, -0.031, -0.026, -0.021, -0.017
+
+### HRM joint @26040 (n=2048)
+- per-exponent AUC(-λ_i→correct), i=1..8: 0.987, 0.990, 0.989, 0.990, 0.991, 0.992, 0.992, 0.993
+- median spectrum | success: -0.150, -0.163, -0.182, -0.191, -0.203, -0.212, -0.220, -0.227
+- median spectrum | failure: +0.031, +0.002, -0.018, -0.030, -0.041, -0.050, -0.056, -0.064
+- AUC λ1=0.987 | AUC spectral-mean=0.992 | AUC KS-proxy(Σλ⁺)=0.877
+- shift uniformity: success−failure median gap per exponent: -0.181, -0.165, -0.164, -0.161, -0.161, -0.162, -0.163, -0.163
+
+### HRM @26040 (n=8192, est-2)
+- per-exponent AUC(-λ_i→correct), i=1..8: 0.984, 0.989, 0.990, 0.991, 0.991, 0.991, 0.991, 0.991
+- median spectrum | success: -0.858, -0.887, -0.908, -0.927, -0.941, -0.952, -0.964, -0.972
+- median spectrum | failure: -0.600, -0.635, -0.656, -0.671, -0.684, -0.696, -0.706, -0.714
+- AUC λ1=0.984 | AUC spectral-mean=0.991 | AUC KS-proxy(Σλ⁺)=0.500
+- shift uniformity: success−failure median gap per exponent: -0.259, -0.252, -0.252, -0.255, -0.256, -0.257, -0.258, -0.258
+
+## Char 2 — bimodality vs threshold-on-continuum
+
+BC>0.555 hints bimodal. outcome-rate-vs-λ1 sharpness: width of 25→75% outcome transition,
+in λ1 units, divided by the 10–90 inter-percentile spread of λ1 (small = sharp threshold).
+
+- TRM official @58590 (n=2048): BC(all)=0.855, BC(succ)=0.780, BC(fail)=0.303; outcome transition width/spread=0.298 (λ1@75%=+0.068, @25%=+0.094)
+- HRM joint @26040 (n=2048): BC(all)=0.555, BC(succ)=0.331, BC(fail)=0.287; outcome transition width/spread=0.124 (λ1@75%=-0.069, @25%=-0.037)
+- HRM @26040 (n=8192, est-2): BC(all)=0.554, BC(succ)=0.401, BC(fail)=0.258; outcome transition width/spread=0.150 (λ1@75%=-0.749, @25%=-0.693)
+
+## Char 3 — separation magnitude vs integration window H (all examples, final outcome)
+
+Distinct from E5 (which restricted to undecided@H for *prediction*). Here: how does the
+two-class λ1 separation BUILD as the integration window grows?
+
+### TRM: H, AUC(-λ1→final correct), Cohen's d(fail−succ)
+- H= 2: AUC=0.782, d=+1.059
+- H= 4: AUC=0.891, d=+1.759
+- H= 6: AUC=0.929, d=+2.250
+- H= 8: AUC=0.947, d=+2.680
+- H=10: AUC=0.964, d=+3.118
+- H=12: AUC=0.975, d=+3.577
+- H=16: AUC=0.993, d=+4.840
+
+### HRM: H, AUC(-λ1→final correct), Cohen's d(fail−succ)
+- H= 2: AUC=0.428, d=+0.028
+- H= 4: AUC=0.728, d=+0.932
+- H= 6: AUC=0.871, d=+1.691
+- H= 8: AUC=0.918, d=+2.154
+- H=10: AUC=0.940, d=+2.497
+- H=12: AUC=0.960, d=+2.821
+- H=16: AUC=0.987, d=+3.448
+
+## Char 4 — effect size / overlap beyond AUC (full-window λ1 and KS-proxy)
+
+- TRM official @58590 (n=2048):
+ λ1: AUC=0.993, Cohen's d(fail−succ)=+4.84, Bhattacharyya(gauss)=0.042, hist-overlap=0.049
+ KS-proxy: AUC=0.995, Cohen's d=+5.55, hist-overlap=0.039
+- HRM joint @26040 (n=2048):
+ λ1: AUC=0.987, Cohen's d(fail−succ)=+3.45, Bhattacharyya(gauss)=0.227, hist-overlap=0.074
+ KS-proxy: AUC=0.877, Cohen's d=+1.14, hist-overlap=0.330
+- HRM @26040 (n=8192, est-2):
+ λ1: AUC=0.984, Cohen's d(fail−succ)=+3.34, Bhattacharyya(gauss)=0.248, hist-overlap=0.101
+ KS-proxy: AUC=0.500, Cohen's d=+nan, hist-overlap=1.000
diff --git a/analysis_2x2/characterize_separation.py b/analysis_2x2/characterize_separation.py
new file mode 100644
index 0000000..3677cf8
--- /dev/null
+++ b/analysis_2x2/characterize_separation.py
@@ -0,0 +1,182 @@
+"""Characterization of the success/failure FTLE separation (describe WHAT, not WHY).
+Char 1: full-spectrum vs lambda1 — is it one mode or a whole-spectrum shift?
+Char 2: bimodality vs threshold-on-continuum.
+Char 3: separation magnitude vs integration window (uses phase-1 horizon npz).
+Char 4: effect size / overlap beyond AUC.
+All offline, existing npz. numpy-only. Mechanism is NOT addressed here by design.
+"""
+from __future__ import annotations
+from pathlib import Path
+import matplotlib; matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import numpy as np
+
+HERE = Path(__file__).resolve().parent
+FLOSS = HERE.parent
+P1 = HERE / "phase1"; RETEST = HERE / "retest"
+OUT = HERE / "characterization"; OUT.mkdir(exist_ok=True)
+
+
+def auc(score, label):
+ score = np.asarray(score, float); label = np.asarray(label, int)
+ pos, neg = score[label == 1], score[label == 0]
+ if len(pos) == 0 or len(neg) == 0:
+ return float("nan")
+ a = np.concatenate([pos, neg]); o = np.argsort(a, kind="mergesort")
+ r = np.empty(len(a)); r[o] = np.arange(1, len(a) + 1)
+ s = a[o]; i = 0
+ while i < len(s):
+ j = i
+ while j + 1 < len(s) and s[j + 1] == s[i]:
+ j += 1
+ if j > i:
+ r[o[i:j + 1]] = r[o[i:j + 1]].mean()
+ i = j + 1
+ return float((r[:len(pos)].sum() - len(pos) * (len(pos) + 1) / 2) / (len(pos) * len(neg)))
+
+
+def cohens_d(a, b):
+ a, b = np.asarray(a, float), np.asarray(b, float)
+ na, nb = len(a), len(b)
+ sp = np.sqrt(((na - 1) * a.var(ddof=1) + (nb - 1) * b.var(ddof=1)) / (na + nb - 2))
+ return float((a.mean() - b.mean()) / sp) if sp > 0 else float("nan")
+
+
+def bhattacharyya_gauss(a, b):
+ a, b = np.asarray(a, float), np.asarray(b, float)
+ m1, m2, v1, v2 = a.mean(), b.mean(), a.var() + 1e-12, b.var() + 1e-12
+ bd = 0.25 * np.log(0.25 * (v1 / v2 + v2 / v1 + 2)) + 0.25 * (m1 - m2) ** 2 / (v1 + v2)
+ return float(np.exp(-bd)) # 1=identical, 0=disjoint
+
+
+def hist_overlap(a, b, bins=60):
+ lo, hi = min(a.min(), b.min()), max(a.max(), b.max())
+ e = np.linspace(lo, hi, bins + 1)
+ ha = np.histogram(a, e)[0] / len(a); hb = np.histogram(b, e)[0] / len(b)
+ return float(np.minimum(ha, hb).sum())
+
+
+def bimodality_coeff(x):
+ x = np.asarray(x, float); n = len(x)
+ m = x.mean(); s = x.std()
+ if s == 0:
+ return float("nan")
+ g = np.mean(((x - m) / s) ** 3) # skewness
+ k = np.mean(((x - m) / s) ** 4) - 3.0 # excess kurtosis
+ bc = (g ** 2 + 1) / (k + 3 * (n - 1) ** 2 / ((n - 2) * (n - 3)))
+ return float(bc) # >0.555 suggests bimodality (uniform=0.555)
+
+
+HEAD = {
+ "TRM official @58590 (n=2048)": RETEST / "trm_gbs768_step58590_full_n2048.npz",
+ "HRM joint @26040 (n=2048)": RETEST / "hrm_righteous_step26040_full_n2048.npz",
+ "HRM @26040 (n=8192, est-2)": FLOSS / "diag_8k.npz",
+}
+
+lines = ["# Characterization of the FTLE success/failure separation", "",
+ "Describes the separation (shape, spectrum, time-scaling, effect size). Mechanism = open.", ""]
+
+# ---------- Char 1: full spectrum ----------
+lines += ["## Char 1 — full spectrum vs lambda1 (one mode or whole-spectrum shift?)", ""]
+for name, p in HEAD.items():
+ d = np.load(p); ly = d["lyap_spec"].astype(float); c = d["exact_correct"].astype(int)
+ k = ly.shape[1]
+ per = [auc(-ly[:, i], c) for i in range(k)]
+ ks_proxy = np.clip(ly, 0, None).sum(1) # sum of positive exponents (KS-entropy proxy)
+ spec_mean = ly.mean(1)
+ lines += [f"### {name}",
+ f"- per-exponent AUC(-λ_i→correct), i=1..{k}: " + ", ".join(f"{v:.3f}" for v in per),
+ f"- median spectrum | success: " + ", ".join(f"{np.median(ly[c==1,i]):+.3f}" for i in range(k)),
+ f"- median spectrum | failure: " + ", ".join(f"{np.median(ly[c==0,i]):+.3f}" for i in range(k)),
+ f"- AUC λ1={per[0]:.3f} | AUC spectral-mean={auc(-spec_mean,c):.3f} | "
+ f"AUC KS-proxy(Σλ⁺)={auc(-ks_proxy,c):.3f}",
+ f"- shift uniformity: success−failure median gap per exponent: " +
+ ", ".join(f"{np.median(ly[c==1,i])-np.median(ly[c==0,i]):+.3f}" for i in range(k)), ""]
+ # spectrum figure
+ fig, ax = plt.subplots(figsize=(6, 4))
+ xs = np.arange(1, k + 1)
+ ax.plot(xs, [np.median(ly[c == 1, i]) for i in range(k)], "o-", color="tab:green", label="success (median)")
+ ax.plot(xs, [np.median(ly[c == 0, i]) for i in range(k)], "s-", color="tab:red", label="failure (median)")
+ ax.fill_between(xs, [np.percentile(ly[c==1,i],25) for i in range(k)], [np.percentile(ly[c==1,i],75) for i in range(k)], color="tab:green", alpha=0.15)
+ ax.fill_between(xs, [np.percentile(ly[c==0,i],25) for i in range(k)], [np.percentile(ly[c==0,i],75) for i in range(k)], color="tab:red", alpha=0.15)
+ ax.axhline(0, color="gray", lw=0.6); ax.set_xlabel("exponent index"); ax.set_ylabel("λ_i")
+ ax.set_title(f"{name}: spectrum by outcome"); ax.legend(fontsize=8)
+ fig.tight_layout(); fig.savefig(OUT / f"spectrum_{name.split()[0]}_{'n8192' if '8192' in name else 'n2048'}.png", dpi=140); plt.close(fig)
+
+# ---------- Char 2: bimodality vs threshold ----------
+lines += ["## Char 2 — bimodality vs threshold-on-continuum", "",
+ "BC>0.555 hints bimodal. outcome-rate-vs-λ1 sharpness: width of 25→75% outcome transition,",
+ "in λ1 units, divided by the 10–90 inter-percentile spread of λ1 (small = sharp threshold).", ""]
+for name, p in HEAD.items():
+ d = np.load(p); l1 = d["lyap_spec"][:, 0].astype(float); c = d["exact_correct"].astype(int)
+ bc_all = bimodality_coeff(l1); bc_s = bimodality_coeff(l1[c == 1]); bc_f = bimodality_coeff(l1[c == 0])
+ # outcome rate vs l1 (sorted, sliding)
+ order = np.argsort(l1); ls = l1[order]; cs = c[order]
+ win = max(50, len(l1) // 40); rate = np.convolve(cs, np.ones(win) / win, mode="valid")
+ lcent = ls[win // 2: win // 2 + len(rate)]
+ # find l1 where rate crosses 0.25 and 0.75 (rate decreases with l1)
+ def cross(target):
+ idx = np.where(np.diff((rate >= target).astype(int)) != 0)[0]
+ return float(lcent[idx[0]]) if len(idx) else float("nan")
+ l25, l75 = cross(0.25), cross(0.75)
+ spread = np.percentile(l1, 90) - np.percentile(l1, 10)
+ sharp = abs(l25 - l75) / spread if spread > 0 and np.isfinite(l25) and np.isfinite(l75) else float("nan")
+ lines += [f"- {name}: BC(all)={bc_all:.3f}, BC(succ)={bc_s:.3f}, BC(fail)={bc_f:.3f}; "
+ f"outcome transition width/spread={sharp:.3f} (λ1@75%={l75:+.3f}, @25%={l25:+.3f})"]
+ fig, ax = plt.subplots(1, 2, figsize=(10, 3.6))
+ ax[0].hist(l1[c == 1], bins=50, alpha=0.55, color="tab:green", label="success", density=True)
+ ax[0].hist(l1[c == 0], bins=50, alpha=0.55, color="tab:red", label="failure", density=True)
+ ax[0].set_xlabel("λ1"); ax[0].set_ylabel("density"); ax[0].legend(fontsize=8); ax[0].set_title(f"{name}: λ1 by outcome")
+ ax[1].plot(lcent, rate, "-", color="tab:blue"); ax[1].axhline(0.5, color="gray", lw=0.5)
+ ax[1].set_xlabel("λ1"); ax[1].set_ylabel("P(correct)"); ax[1].set_title("outcome rate vs λ1")
+ fig.tight_layout(); fig.savefig(OUT / f"bimodal_{name.split()[0]}_{'n8192' if '8192' in name else 'n2048'}.png", dpi=140); plt.close(fig)
+lines.append("")
+
+# ---------- Char 3: separation vs integration window ----------
+lines += ["## Char 3 — separation magnitude vs integration window H (all examples, final outcome)", "",
+ "Distinct from E5 (which restricted to undecided@H for *prediction*). Here: how does the",
+ "two-class λ1 separation BUILD as the integration window grows?", ""]
+finals = {"trm": np.load(RETEST / "trm_gbs768_step58590_full_n2048.npz"),
+ "hrm": np.load(RETEST / "hrm_righteous_step26040_full_n2048.npz")}
+short4 = {"trm": RETEST / "trm_gbs768_step58590_short_n2048.npz",
+ "hrm": RETEST / "hrm_righteous_step26040_short_n2048.npz"}
+for model in ["trm", "hrm"]:
+ fin = finals[model]; fi = fin["idx"]; yf_all = fin["exact_correct"].astype(int)
+ rows = []
+ for H in [2, 4, 6, 8, 10, 12, 16]:
+ if H == 16:
+ s = fin
+ elif H == 4:
+ s = np.load(short4[model])
+ else:
+ s = np.load(P1 / f"{'trm_official58590' if model=='trm' else 'hrm26040'}_h{H}_n2048.npz")
+ si = s["idx"]; common, fp, sp = np.intersect1d(fi, si, return_indices=True)
+ yf = yf_all[fp]; l1 = s["lyap_spec"][sp, 0].astype(float)
+ rows.append((H, auc(-l1, yf), cohens_d(l1[yf == 0], l1[yf == 1])))
+ lines.append(f"### {model.upper()}: H, AUC(-λ1→final correct), Cohen's d(fail−succ)")
+ for H, a, dd in rows:
+ lines.append(f"- H={H:2d}: AUC={a:.3f}, d={dd:+.3f}")
+ lines.append("")
+ fig, ax = plt.subplots(figsize=(6, 4)); H = [r[0] for r in rows]
+ ax.plot(H, [r[1] for r in rows], "o-", label="AUC(-λ1→correct)")
+ ax2 = ax.twinx(); ax2.plot(H, [abs(r[2]) for r in rows], "s--", color="tab:purple", label="|Cohen's d|")
+ ax.axhline(0.5, color="gray", lw=0.5); ax.set_xlabel("integration window H (segments)")
+ ax.set_ylabel("AUC"); ax2.set_ylabel("|Cohen's d|"); ax.set_title(f"{model.upper()}: λ1 separation vs window")
+ ax.set_ylim(0.4, 1.0); fig.tight_layout(); fig.savefig(OUT / f"sep_vs_window_{model}.png", dpi=140); plt.close(fig)
+
+# ---------- Char 4: effect size ----------
+lines += ["## Char 4 — effect size / overlap beyond AUC (full-window λ1 and KS-proxy)", ""]
+for name, p in HEAD.items():
+ d = np.load(p); l1 = d["lyap_spec"][:, 0].astype(float); c = d["exact_correct"].astype(int)
+ ksp = np.clip(d["lyap_spec"].astype(float), 0, None).sum(1)
+ lines += [f"- {name}:",
+ f" λ1: AUC={auc(-l1,c):.3f}, Cohen's d(fail−succ)={cohens_d(l1[c==0],l1[c==1]):+.2f}, "
+ f"Bhattacharyya(gauss)={bhattacharyya_gauss(l1[c==0],l1[c==1]):.3f}, "
+ f"hist-overlap={hist_overlap(l1[c==0],l1[c==1]):.3f}",
+ f" KS-proxy: AUC={auc(-ksp,c):.3f}, Cohen's d={cohens_d(ksp[c==0],ksp[c==1]):+.2f}, "
+ f"hist-overlap={hist_overlap(ksp[c==0],ksp[c==1]):.3f}"]
+lines.append("")
+
+(OUT / "characterization_results.md").write_text("\n".join(lines))
+print("\n".join(lines))
+print("\nfigures + md in", OUT)
diff --git a/analysis_2x2/dump_early_ckpts.sh b/analysis_2x2/dump_early_ckpts.sh
new file mode 100755
index 0000000..1dfb722
--- /dev/null
+++ b/analysis_2x2/dump_early_ckpts.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# Run after the early-save training: cheap forward dumps (connectivity + drift_zH + ans_drift,
+# NO JVP) on all saved early checkpoints, then pooled broken-vs-complete analysis.
+set -o pipefail
+cd /home/yurenh2/rrm/research/flossing
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+export CUDA_VISIBLE_DEVICES="${1:-1}"
+CK=/home/yurenh2/rrm/trm/checkpoints/Maze-30x30-hard-1k-ACT-torch/maze_earlysave_freshTRM
+DATA=/home/yurenh2/rrm/data/maze-30x30-hard-1k
+OUT=analysis_2x2/maze_followup
+for f in "$CK"/step_*; do
+ s=$(basename "$f")
+ [[ -f "$OUT/earlydump_${s}.npz" ]] && { echo "skip $s"; continue; }
+ echo "=== dump $s ==="
+ python maze_pred_dump.py --ckpt-root "$CK" --ckpt-name "$s" --data "$DATA" \
+ --n 512 --seed 20260616 --out "$OUT/earlydump_${s}.npz" 2>&1 | tail -1
+done
+echo "=== pooled analysis ==="
+python analysis_2x2/analyze_early_connectivity.py
diff --git a/analysis_2x2/early_pairing_hrm26040_joint.md b/analysis_2x2/early_pairing_hrm26040_joint.md
new file mode 100644
index 0000000..8d90e02
--- /dev/null
+++ b/analysis_2x2/early_pairing_hrm26040_joint.md
@@ -0,0 +1,17 @@
+# Early-window pairing — hrm26040_joint
+- paired n=2048; final acc=0.5259; already-correct@step4=0.3447
+- of final-correct, fraction already correct@4: 0.6555
+- early-window lam1: final-correct med -0.1714, final-wrong med -0.1314
+
+## Forecasting FINAL outcome from the first 4 ACT steps
+- AUC(-lam1_early -> final correct) = 0.728
+- AUC(-drift@4 -> final correct) = 0.486
+- AUC(q_halt@4 -> final correct) = 0.908
+- reference: AUC(-lam1_full -> final correct) = 0.987
+
+## Restricted to examples NOT yet correct at step 4 (the decision-relevant set)
+- n=1342, of which eventually correct: 371 (0.276)
+- AUC(-lam1_early -> eventually correct) = 0.448
+- AUC(-drift@4 -> eventually correct) = 0.312
+- AUC(q_halt@4 -> eventually correct) = 0.734
+- early lam1 med: eventually-correct -0.1225 vs never-correct -0.1314 \ No newline at end of file
diff --git a/analysis_2x2/early_pairing_trm_official58590.md b/analysis_2x2/early_pairing_trm_official58590.md
new file mode 100644
index 0000000..2b29ab5
--- /dev/null
+++ b/analysis_2x2/early_pairing_trm_official58590.md
@@ -0,0 +1,17 @@
+# Early-window pairing — trm_official58590
+- paired n=2048; final acc=0.8760; already-correct@step4=0.6943
+- of final-correct, fraction already correct@4: 0.7926
+- early-window lam1: final-correct med +0.0178, final-wrong med +0.1075
+
+## Forecasting FINAL outcome from the first 4 ACT steps
+- AUC(-lam1_early -> final correct) = 0.891
+- AUC(-drift@4 -> final correct) = 0.800
+- AUC(q_halt@4 -> final correct) = 0.901
+- reference: AUC(-lam1_full -> final correct) = 0.993
+
+## Restricted to examples NOT yet correct at step 4 (the decision-relevant set)
+- n=626, of which eventually correct: 372 (0.594)
+- AUC(-lam1_early -> eventually correct) = 0.543
+- AUC(-drift@4 -> eventually correct) = 0.492
+- AUC(q_halt@4 -> eventually correct) = 0.521
+- early lam1 med: eventually-correct +0.1060 vs never-correct +0.1075 \ No newline at end of file
diff --git a/analysis_2x2/early_window_pairing.py b/analysis_2x2/early_window_pairing.py
new file mode 100644
index 0000000..6277df2
--- /dev/null
+++ b/analysis_2x2/early_window_pairing.py
@@ -0,0 +1,85 @@
+"""Early-window (first 4 ACT steps) vs full-window pairing.
+
+Question: does the early-window FTLE forecast FINAL failure before convergence?
+- join full/short npz per example via idx (same seed/n => same sampling)
+- target label = final exact_correct from the FULL-window run
+- report: AUC(-lam1_early -> final correct), same for early drift and q_halt@4,
+ and the conditional version restricted to examples NOT yet correct at step 4
+ (short run's own exact_correct == 0) — the practically relevant population for
+ early-exit / compute-reallocation decisions.
+
+Observational only. Usage: python early_window_pairing.py FULL.npz SHORT.npz TAG
+"""
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+import numpy as np
+
+HERE = Path(__file__).resolve().parent
+
+
+def auc_rank(score: np.ndarray, label: np.ndarray) -> float:
+ pos, neg = score[label == 1], score[label == 0]
+ if len(pos) == 0 or len(neg) == 0:
+ return float("nan")
+ allv = np.concatenate([pos, neg])
+ order = np.argsort(allv, kind="mergesort")
+ ranks = np.empty(len(allv)); ranks[order] = np.arange(1, len(allv) + 1)
+ sv = allv[order]; i = 0
+ while i < len(sv):
+ j = i
+ while j + 1 < len(sv) and sv[j + 1] == sv[i]:
+ j += 1
+ if j > i:
+ ranks[order[i:j + 1]] = ranks[order[i:j + 1]].mean()
+ i = j + 1
+ return float((ranks[:len(pos)].sum() - len(pos) * (len(pos) + 1) / 2) / (len(pos) * len(neg)))
+
+
+def main(full_path: str, short_path: str, tag: str) -> None:
+ f = np.load(full_path)
+ s = np.load(short_path)
+
+ fi, si = f["idx"], s["idx"]
+ common, f_pos, s_pos = np.intersect1d(fi, si, return_indices=True)
+ print(f"[{tag}] paired {len(common)} examples (full n={len(fi)}, short n={len(si)})")
+
+ y_final = f["exact_correct"].astype(int)[f_pos] # FINAL outcome (step 16)
+ y_early = s["exact_correct"].astype(int)[s_pos] # correct already at step 4
+ l1_early = s["lyap_spec"][s_pos, 0]
+ l1_full = f["lyap_spec"][f_pos, 0]
+ drift4 = np.log10(np.clip(s["drift_zH"][s_pos, -1], 1e-12, None)) # drift at ACT step 4
+ q4 = s["q_halt"][s_pos, -1]
+
+ lines = [f"# Early-window pairing — {tag}",
+ f"- paired n={len(common)}; final acc={y_final.mean():.4f}; already-correct@step4={y_early.mean():.4f}",
+ f"- of final-correct, fraction already correct@4: {y_early[y_final==1].mean():.4f}",
+ f"- early-window lam1: final-correct med {np.median(l1_early[y_final==1]):+.4f}, "
+ f"final-wrong med {np.median(l1_early[y_final==0]):+.4f}",
+ "",
+ "## Forecasting FINAL outcome from the first 4 ACT steps",
+ f"- AUC(-lam1_early -> final correct) = {auc_rank(-l1_early, y_final):.3f}",
+ f"- AUC(-drift@4 -> final correct) = {auc_rank(-drift4, y_final):.3f}",
+ f"- AUC(q_halt@4 -> final correct) = {auc_rank(q4, y_final):.3f}",
+ f"- reference: AUC(-lam1_full -> final correct) = {auc_rank(-l1_full, y_final):.3f}",
+ "",
+ "## Restricted to examples NOT yet correct at step 4 (the decision-relevant set)"]
+ m = y_early == 0
+ n_m = int(m.sum()); n_pos = int(y_final[m].sum())
+ lines += [f"- n={n_m}, of which eventually correct: {n_pos} ({n_pos/max(n_m,1):.3f})",
+ f"- AUC(-lam1_early -> eventually correct) = {auc_rank(-l1_early[m], y_final[m]):.3f}",
+ f"- AUC(-drift@4 -> eventually correct) = {auc_rank(-drift4[m], y_final[m]):.3f}",
+ f"- AUC(q_halt@4 -> eventually correct) = {auc_rank(q4[m], y_final[m]):.3f}",
+ f"- early lam1 med: eventually-correct {np.median(l1_early[m & (y_final==1)]):+.4f} vs "
+ f"never-correct {np.median(l1_early[m & (y_final==0)]):+.4f}"]
+
+ out = HERE / f"early_pairing_{tag}.md"
+ out.write_text("\n".join(lines))
+ print("\n".join(lines))
+ print("wrote", out)
+
+
+if __name__ == "__main__":
+ main(sys.argv[1], sys.argv[2], sys.argv[3])
diff --git a/analysis_2x2/evolution_hrm.csv b/analysis_2x2/evolution_hrm.csv
new file mode 100644
index 0000000..c6a6b10
--- /dev/null
+++ b/analysis_2x2/evolution_hrm.csv
@@ -0,0 +1,11 @@
+step,acc,tau,fA,fB,fC,fD,l1A,l1B,l1C,l1D
+2604,0.015625,0.32445240020751953,0.015625,0.892578125,0.0,0.091796875,-0.11765776202082634,-0.10138508677482605,nan,-0.08651512861251831
+5208,0.048828125,0.40048032999038696,0.029296875,0.900390625,0.01953125,0.05078125,-0.17876404523849487,-0.12725482881069183,-0.18487446755170822,-0.11079395189881325
+7812,0.15234375,0.6775624752044678,0.119140625,0.7734375,0.033203125,0.07421875,-0.20277076959609985,-0.1001129113137722,-0.19649504125118256,-0.12255467846989632
+10416,0.1796875,0.7741286754608154,0.166015625,0.599609375,0.013671875,0.220703125,-0.18029353022575378,-0.06513682007789612,-0.038433440029621124,-0.0504322350025177
+13020,0.30078125,0.8470979928970337,0.283203125,0.2421875,0.017578125,0.45703125,-0.17255809903144836,-0.024668633937835693,-0.07013614475727081,-0.010587955359369516
+15624,0.333984375,0.8144785761833191,0.3125,0.1484375,0.021484375,0.517578125,-0.19566883146762848,0.0031415667617693543,-0.05802934989333153,0.009074798785150051
+18228,0.474609375,0.8437007665634155,0.439453125,0.001953125,0.03515625,0.5234375,-0.08657147735357285,nan,-0.0343703031539917,0.012321019545197487
+20832,0.45703125,0.6549708247184753,0.423828125,0.0078125,0.033203125,0.53515625,-0.09613814204931259,-0.022231140173971653,-0.04714604467153549,0.001996797218453139
+23436,0.505859375,0.7171876430511475,0.484375,0.01171875,0.021484375,0.482421875,-0.14968957751989365,-0.04964173026382923,-0.06192338466644287,-0.0044072768650949
+26040,0.5,0.7611898183822632,0.47265625,0.0078125,0.02734375,0.4921875,-0.1546676605939865,0.0052672732854261994,-0.03868532460182905,0.023184756748378277
diff --git a/analysis_2x2/evolution_trm.csv b/analysis_2x2/evolution_trm.csv
new file mode 100644
index 0000000..a0b19f5
--- /dev/null
+++ b/analysis_2x2/evolution_trm.csv
@@ -0,0 +1,11 @@
+step,acc,tau,fA,fB,fC,fD,l1A,l1B,l1C,l1D
+26041,0.576171875,0.7283560037612915,0.560546875,0.01171875,0.015625,0.412109375,-0.04085663706064224,-0.0017999516567215323,0.015885045286267996,0.035852354019880295
+52082,0.6484375,0.8146334886550903,0.626953125,0.0,0.021484375,0.3515625,-0.005659707821905613,nan,0.05140006169676781,0.060247575864195824
+78123,0.6875,0.9091860055923462,0.66015625,0.0,0.02734375,0.3125,-0.004423868143931031,nan,0.05615279637277126,0.08166220411658287
+104164,0.7109375,1.0002410411834717,0.681640625,0.0,0.029296875,0.2890625,-0.006436891388148069,nan,0.08611240983009338,0.09516968205571175
+130205,0.755859375,1.2130658626556396,0.712890625,0.0,0.04296875,0.244140625,0.004312093835324049,nan,0.08519911020994186,0.10669758170843124
+156246,0.7578125,1.2462413311004639,0.73046875,0.0,0.02734375,0.2421875,0.03479858115315437,nan,0.08626002073287964,0.10777830705046654
+182287,0.7421875,1.3035683631896973,0.712890625,0.0,0.029296875,0.2578125,-0.006856455933302641,nan,0.09087501466274261,0.10055964067578316
+208328,0.75,1.3776456117630005,0.720703125,0.0,0.029296875,0.25,0.03374895453453064,nan,0.08769012242555618,0.10804034024477005
+234369,0.7734375,1.36112380027771,0.7421875,0.0,0.03125,0.2265625,0.011585433036088943,nan,0.09419485554099083,0.10411781445145607
+260410,0.76953125,1.406412124633789,0.748046875,0.0,0.021484375,0.23046875,0.004744160454720259,nan,0.09982780367136002,0.10232088342308998
diff --git a/analysis_2x2/offline_followups.py b/analysis_2x2/offline_followups.py
new file mode 100644
index 0000000..8101acb
--- /dev/null
+++ b/analysis_2x2/offline_followups.py
@@ -0,0 +1,229 @@
+"""Offline follow-ups to the 2x2 analysis (no GPU):
+
+1. Residual outcome signal within the unsettled stratum (HRM diag_8k primary,
+ TRM official @58590 secondary): per-cell drift profiles over the 16 ACT steps,
+ end-of-window drift slope, q_halt trajectories, halted_at, lambda spectra with a
+ STRICT in-band threshold, and per-drift-decile AUC(lambda1 -> correct) within the
+ unsettled stratum (does lambda1 add signal beyond drift level?).
+2. Per-example profile of the strict-band settled-but-wrong examples (HRM, n~21).
+3. Difficulty control: #givens per puzzle (input tokens != 1) joined via idx;
+ lambda1 ~ givens rank correlation overall/within outcome, and per-givens-bin
+ AUC(-lambda1 -> correct).
+
+Observational only. Outputs to analysis_2x2/offline_followups/.
+"""
+from __future__ import annotations
+
+from pathlib import Path
+
+import matplotlib
+
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import numpy as np
+
+HERE = Path(__file__).resolve().parent
+FLOSS = HERE.parent
+OUT = HERE / "offline_followups"
+OUT.mkdir(exist_ok=True)
+
+DATA_TEST_INPUTS = Path("/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000/test/all__inputs.npy")
+
+CELL_COLORS = {"A": "tab:green", "B": "tab:orange", "C": "tab:blue", "D": "tab:red"}
+
+
+def auc_rank(score: np.ndarray, label: np.ndarray) -> float:
+ pos, neg = score[label == 1], score[label == 0]
+ if len(pos) == 0 or len(neg) == 0:
+ return float("nan")
+ allv = np.concatenate([pos, neg])
+ order = np.argsort(allv, kind="mergesort")
+ ranks = np.empty(len(allv)); ranks[order] = np.arange(1, len(allv) + 1)
+ sv = allv[order]; i = 0
+ while i < len(sv):
+ j = i
+ while j + 1 < len(sv) and sv[j + 1] == sv[i]:
+ j += 1
+ if j > i:
+ ranks[order[i:j + 1]] = ranks[order[i:j + 1]].mean()
+ i = j + 1
+ return float((ranks[:len(pos)].sum() - len(pos) * (len(pos) + 1) / 2) / (len(pos) * len(neg)))
+
+
+def spearman(a: np.ndarray, b: np.ndarray) -> float:
+ ra = np.argsort(np.argsort(a)).astype(float)
+ rb = np.argsort(np.argsort(b)).astype(float)
+ return float(np.corrcoef(ra, rb)[0, 1])
+
+
+def load(npz_path: Path, strict_pct: float):
+ d = np.load(npz_path)
+ out = {k: d[k] for k in d.files}
+ out["logd_late"] = np.log10(np.clip(out["drift_zH"][:, -4:].mean(1), 1e-12, None))
+ out["tau_strict"] = float(np.percentile(out["logd_late"], strict_pct))
+ conv = out["logd_late"] < out["tau_strict"]
+ c = out["exact_correct"].astype(int)
+ out["cells"] = {
+ "A": conv & (c == 1), "B": conv & (c == 0),
+ "C": (~conv) & (c == 1), "D": (~conv) & (c == 0),
+ }
+ return out
+
+
+def givens_for(idx: np.ndarray) -> np.ndarray:
+ inputs = np.load(DATA_TEST_INPUTS, mmap_mode="r")
+ return np.array([(inputs[i] != 1).sum() for i in idx])
+
+
+def drift_profiles_fig(ds, tag, lines):
+ fig, axes = plt.subplots(1, 2, figsize=(11, 4))
+ steps = np.arange(1, ds["drift_zH"].shape[1] + 1)
+ for nm, m in ds["cells"].items():
+ if m.sum() < 3:
+ continue
+ med = np.median(ds["drift_zH"][m], axis=0)
+ q1 = np.percentile(ds["drift_zH"][m], 25, axis=0)
+ q3 = np.percentile(ds["drift_zH"][m], 75, axis=0)
+ axes[0].plot(steps, med, "o-", ms=3, color=CELL_COLORS[nm], label=f"{nm} (n={int(m.sum())})")
+ axes[0].fill_between(steps, q1, q3, color=CELL_COLORS[nm], alpha=0.15)
+ qm = np.median(ds["q_halt"][m], axis=0)
+ axes[1].plot(steps, qm, "o-", ms=3, color=CELL_COLORS[nm], label=nm)
+ axes[0].set_yscale("log"); axes[0].set_xlabel("ACT step"); axes[0].set_ylabel("drift_zH (median, IQR)")
+ axes[0].legend(fontsize=8); axes[0].set_title(f"{tag}: drift profiles per cell")
+ axes[1].set_xlabel("ACT step"); axes[1].set_ylabel("q_halt (median)"); axes[1].axhline(0, color="gray", lw=0.6)
+ axes[1].set_title(f"{tag}: q_halt per cell"); axes[1].legend(fontsize=8)
+ fig.tight_layout(); fig.savefig(OUT / f"fig_{tag}_profiles.png", dpi=150); plt.close(fig)
+
+ # end-of-window slope: log10 mean(drift[13:16]) - log10 mean(drift[9:12])
+ slope = (np.log10(np.clip(ds["drift_zH"][:, 12:16].mean(1), 1e-12, None))
+ - np.log10(np.clip(ds["drift_zH"][:, 8:12].mean(1), 1e-12, None)))
+ lines.append(f"\n### {tag}: end-of-window drift slope (log10 steps13-16 vs 9-12; <0 = still descending)")
+ for nm, m in ds["cells"].items():
+ if m.sum() == 0:
+ lines.append(f"- {nm}: n=0")
+ continue
+ lines.append(f"- {nm}: n={int(m.sum())}, slope median {np.median(slope[m]):+.4f}, "
+ f"IQR [{np.percentile(slope[m],25):+.4f}, {np.percentile(slope[m],75):+.4f}], "
+ f"frac still descending (<-0.01): {float((slope[m] < -0.01).mean()):.2f}")
+ return slope
+
+
+def main() -> None:
+ lines = ["# Offline follow-ups (no GPU) — 2026-06-11",
+ "",
+ "Strict in-band thresholds: HRM pct45 of pooled log10 late-drift; TRM pct60 (band edge; B=0 regardless).",
+ "All numbers observational; within-dataset comparisons only."]
+
+ # ---------- HRM diag_8k ----------
+ hrm = load(FLOSS / "diag_8k.npz", strict_pct=45)
+ tag = "hrm26040_n8192_strict"
+ lines.append(f"\n## HRM @26040 (n=8192), strict tau(log10)={hrm['tau_strict']:.4f}")
+ lines.append("| cell | n | lam1 med | lam8 med | token_acc med | halted_at med | q_halt_final med | givens med |")
+ lines.append("|---|---|---|---|---|---|---|---|")
+ g_hrm = givens_for(hrm["idx"])
+ for nm, m in hrm["cells"].items():
+ if m.sum() == 0:
+ lines.append(f"| {nm} | 0 | | | | | | |")
+ continue
+ lines.append(
+ f"| {nm} | {int(m.sum())} | {np.median(hrm['lyap_spec'][m,0]):+.4f} | {np.median(hrm['lyap_spec'][m,-1]):+.4f} "
+ f"| {np.median(hrm['token_acc'][m]):.3f} | {np.median(hrm['halted_at'][m]):.0f} "
+ f"| {np.median(hrm['q_halt'][m,-1]):+.2f} | {np.median(g_hrm[m]):.0f} |")
+
+ slope = drift_profiles_fig(hrm, tag, lines)
+
+ # residual signal within unsettled stratum: per-drift-decile AUC
+ uns = ~(hrm["cells"]["A"] | hrm["cells"]["B"])
+ c = hrm["exact_correct"].astype(int)
+ lines.append("\n### HRM unsettled stratum: AUC(-lam1 -> correct) per log-drift decile")
+ lines.append("| decile | drift range (log10) | n | n_correct | AUC |")
+ lines.append("|---|---|---|---|---|")
+ ld_u, l1_u, c_u = hrm["logd_late"][uns], hrm["lyap_spec"][uns, 0], c[uns]
+ qs = np.percentile(ld_u, np.arange(0, 101, 10))
+ aucs, ws = [], []
+ for i in range(10):
+ m = (ld_u >= qs[i]) & (ld_u <= qs[i + 1] if i == 9 else ld_u < qs[i + 1])
+ a = auc_rank(-l1_u[m], c_u[m])
+ if not np.isnan(a) and c_u[m].sum() >= 5:
+ aucs.append(a); ws.append(m.sum())
+ lines.append(f"| {i+1} | [{qs[i]:.2f}, {qs[i+1]:.2f}] | {int(m.sum())} | {int(c_u[m].sum())} | "
+ f"{a:.3f} |" if not np.isnan(a) else f"| {i+1} | [{qs[i]:.2f}, {qs[i+1]:.2f}] | {int(m.sum())} | {int(c_u[m].sum())} | n/a |")
+ if aucs:
+ lines.append(f"- weighted mean within-decile AUC = {np.average(aucs, weights=ws):.3f} "
+ f"(vs unconditioned within-unsettled AUC {auc_rank(-l1_u, c_u):.3f})")
+
+ # also: does end-slope separate C from D?
+ lines.append(f"- AUC(-end_slope -> correct | unsettled) = {auc_rank(-slope[uns], c_u):.3f} "
+ f"(C still-descending fraction vs D, see slope table above)")
+
+ # ---------- strict-B per-example table ----------
+ B = hrm["cells"]["B"]
+ lines.append(f"\n## HRM strict-band settled-but-wrong examples (n={int(B.sum())})")
+ lines.append("| idx | givens | token_acc | lam1 | drift_final | halted_at | q_halt_final |")
+ lines.append("|---|---|---|---|---|---|---|")
+ bi = np.where(B)[0]
+ order = np.argsort(hrm["token_acc"][bi])
+ for j in bi[order]:
+ lines.append(
+ f"| {int(hrm['idx'][j])} | {int(g_hrm[j])} | {hrm['token_acc'][j]:.3f} | {hrm['lyap_spec'][j,0]:+.3f} "
+ f"| {hrm['drift_zH'][j,-1]:.3f} | {int(hrm['halted_at'][j])} | {hrm['q_halt'][j,-1]:+.2f} |")
+ # B drift profiles vs A band
+ fig, ax = plt.subplots(figsize=(6.5, 4))
+ steps = np.arange(1, 17)
+ A = hrm["cells"]["A"]
+ ax.fill_between(steps, np.percentile(hrm["drift_zH"][A], 10, axis=0),
+ np.percentile(hrm["drift_zH"][A], 90, axis=0), color="tab:green", alpha=0.2,
+ label=f"A q10-q90 (n={int(A.sum())})")
+ for j in bi:
+ ax.plot(steps, hrm["drift_zH"][j], "-", lw=1, alpha=0.8, color="tab:orange")
+ ax.set_yscale("log"); ax.set_xlabel("ACT step"); ax.set_ylabel("drift_zH")
+ ax.set_title("HRM: strict-B drift profiles vs A band"); ax.legend(fontsize=8)
+ fig.tight_layout(); fig.savefig(OUT / "fig_hrm_strictB_profiles.png", dpi=150); plt.close(fig)
+
+ # ---------- difficulty control (HRM) ----------
+ l1 = hrm["lyap_spec"][:, 0]
+ lines.append("\n## HRM difficulty control (#givens, input tokens != 1)")
+ lines.append(f"- givens: min {g_hrm.min()}, median {np.median(g_hrm):.0f}, max {g_hrm.max()}")
+ lines.append(f"- Spearman(lam1, givens): overall {spearman(l1, g_hrm):+.3f}; "
+ f"correct-only {spearman(l1[c==1], g_hrm[c==1]):+.3f}; "
+ f"wrong-only {spearman(l1[c==0], g_hrm[c==0]):+.3f}")
+ lines.append(f"- Spearman(correct, givens) = {spearman(c.astype(float), g_hrm):+.3f}")
+ lines.append("\n| givens bin | n | acc | AUC(-lam1 -> correct) |")
+ lines.append("|---|---|---|---|")
+ edges = np.unique(np.percentile(g_hrm, [0, 20, 40, 60, 80, 100]))
+ bin_aucs, bin_ws = [], []
+ for i in range(len(edges) - 1):
+ m = (g_hrm >= edges[i]) & (g_hrm <= edges[i + 1] if i == len(edges) - 2 else g_hrm < edges[i + 1])
+ a = auc_rank(-l1[m], c[m])
+ lines.append(f"| [{edges[i]:.0f}, {edges[i+1]:.0f}] | {int(m.sum())} | {c[m].mean():.3f} | {a:.3f} |")
+ if not np.isnan(a):
+ bin_aucs.append(a); bin_ws.append(m.sum())
+ lines.append(f"- weighted mean within-bin AUC = {np.average(bin_aucs, weights=bin_ws):.3f} (overall 0.984)")
+
+ # ---------- TRM official @58590 (secondary, n=512) ----------
+ trm = load(FLOSS / "official_gbs768_spectrum/trm_gbs768_base_step58590_n512_k8_seed20260602.npz", strict_pct=60)
+ g_trm = givens_for(trm["idx"])
+ ct = trm["exact_correct"].astype(int)
+ lines.append(f"\n## TRM official @58590 (n=512), strict tau(log10)={trm['tau_strict']:.4f}")
+ lines.append("| cell | n | lam1 med | token_acc med | q_halt_final med | givens med |")
+ lines.append("|---|---|---|---|---|---|")
+ for nm, m in trm["cells"].items():
+ if m.sum() == 0:
+ lines.append(f"| {nm} | 0 | | | | |")
+ continue
+ lines.append(f"| {nm} | {int(m.sum())} | {np.median(trm['lyap_spec'][m,0]):+.4f} "
+ f"| {np.median(trm['token_acc'][m]):.3f} | {np.median(trm['q_halt'][m,-1]):+.2f} "
+ f"| {np.median(g_trm[m]):.0f} |")
+ drift_profiles_fig(trm, "trm_official58590_n512_strict", lines)
+ l1t = trm["lyap_spec"][:, 0]
+ lines.append(f"- Spearman(lam1, givens): overall {spearman(l1t, g_trm):+.3f}; "
+ f"wrong-only {spearman(l1t[ct==0], g_trm[ct==0]):+.3f}")
+ lines.append(f"- Spearman(correct, givens) = {spearman(ct.astype(float), g_trm):+.3f}")
+
+ (OUT / "followups.md").write_text("\n".join(lines))
+ print("\n".join(lines[:6]))
+ print("wrote", OUT / "followups.md")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/analysis_2x2/offline_followups/followups.md b/analysis_2x2/offline_followups/followups.md
new file mode 100644
index 0000000..be5f519
--- /dev/null
+++ b/analysis_2x2/offline_followups/followups.md
@@ -0,0 +1,88 @@
+# Offline follow-ups (no GPU) — 2026-06-11
+
+Strict in-band thresholds: HRM pct45 of pooled log10 late-drift; TRM pct60 (band edge; B=0 regardless).
+All numbers observational; within-dataset comparisons only.
+
+## HRM @26040 (n=8192), strict tau(log10)=-0.0129
+| cell | n | lam1 med | lam8 med | token_acc med | halted_at med | q_halt_final med | givens med |
+|---|---|---|---|---|---|---|---|
+| A | 3665 | -0.8670 | -0.9787 | 1.000 | 4 | +7.47 | 26 |
+| B | 21 | -0.8421 | -0.9495 | 0.617 | 6 | +7.47 | 25 |
+| C | 633 | -0.7796 | -0.8815 | 1.000 | 10 | +7.47 | 25 |
+| D | 3873 | -0.5991 | -0.7140 | 0.630 | 0 | -9.62 | 25 |
+
+### hrm26040_n8192_strict: end-of-window drift slope (log10 steps13-16 vs 9-12; <0 = still descending)
+- A: n=3665, slope median -0.0023, IQR [-0.0073, +0.0012], frac still descending (<-0.01): 0.20
+- B: n=21, slope median -0.0063, IQR [-0.4005, -0.0009], frac still descending (<-0.01): 0.48
+- C: n=633, slope median -0.0006, IQR [-1.4084, +0.0088], frac still descending (<-0.01): 0.44
+- D: n=3873, slope median -0.0031, IQR [-0.0556, +0.0459], frac still descending (<-0.01): 0.46
+
+### HRM unsettled stratum: AUC(-lam1 -> correct) per log-drift decile
+| decile | drift range (log10) | n | n_correct | AUC |
+|---|---|---|---|---|
+| 1 | [-0.01, 0.66] | 451 | 422 | 0.966 |
+| 2 | [0.66, 1.42] | 450 | 50 | 0.972 |
+| 3 | [1.42, 1.52] | 451 | 4 | 0.988 |
+| 4 | [1.52, 1.56] | 450 | 6 | 0.964 |
+| 5 | [1.56, 1.60] | 451 | 1 | 0.984 |
+| 6 | [1.60, 1.62] | 450 | 7 | 0.949 |
+| 7 | [1.62, 1.65] | 451 | 5 | 0.837 |
+| 8 | [1.65, 1.68] | 450 | 5 | 0.851 |
+| 9 | [1.68, 1.71] | 451 | 16 | 0.804 |
+| 10 | [1.71, 1.93] | 451 | 117 | 0.685 |
+- weighted mean within-decile AUC = 0.879 (vs unconditioned within-unsettled AUC 0.933)
+- AUC(-end_slope -> correct | unsettled) = 0.605 (C still-descending fraction vs D, see slope table above)
+
+## HRM strict-band settled-but-wrong examples (n=21)
+| idx | givens | token_acc | lam1 | drift_final | halted_at | q_halt_final |
+|---|---|---|---|---|---|---|
+| 342267 | 17 | 0.407 | -0.867 | 0.976 | 5 | +7.41 |
+| 212705 | 17 | 0.469 | -0.838 | 0.964 | 8 | +7.44 |
+| 329832 | 17 | 0.481 | -0.703 | 0.970 | 8 | +7.41 |
+| 20075 | 27 | 0.519 | -0.812 | 0.966 | 5 | +7.50 |
+| 198242 | 25 | 0.568 | -0.843 | 0.980 | 7 | +7.47 |
+| 223591 | 24 | 0.580 | -0.939 | 0.951 | 4 | +7.47 |
+| 238704 | 27 | 0.593 | -0.931 | 0.953 | 5 | +7.47 |
+| 364431 | 25 | 0.593 | -0.806 | 0.956 | 6 | +7.44 |
+| 274637 | 26 | 0.593 | -0.859 | 0.979 | 6 | +7.47 |
+| 182424 | 24 | 0.605 | -0.985 | 0.949 | 6 | +7.47 |
+| 351919 | 25 | 0.617 | -0.742 | 0.965 | 5 | +7.47 |
+| 123022 | 27 | 0.617 | -0.826 | 0.951 | 7 | +7.50 |
+| 150426 | 25 | 0.630 | -0.767 | 0.963 | 9 | +7.47 |
+| 175427 | 26 | 0.630 | -0.843 | 0.946 | 8 | +7.50 |
+| 422185 | 26 | 0.642 | -0.841 | 0.946 | 7 | +7.47 |
+| 344032 | 24 | 0.654 | -0.903 | 0.965 | 4 | +7.53 |
+| 30703 | 25 | 0.691 | -0.732 | 0.972 | 6 | +7.53 |
+| 386549 | 23 | 0.691 | -0.842 | 0.966 | 4 | +7.47 |
+| 3370 | 26 | 0.716 | -0.861 | 0.955 | 6 | +7.47 |
+| 243909 | 24 | 0.753 | -0.786 | 0.969 | 8 | +7.50 |
+| 258307 | 25 | 0.877 | -0.918 | 0.952 | 5 | +7.47 |
+
+## HRM difficulty control (#givens, input tokens != 1)
+- givens: min 17, median 25, max 36
+- Spearman(lam1, givens): overall -0.350; correct-only -0.155; wrong-only -0.180
+- Spearman(correct, givens) = +0.276
+
+| givens bin | n | acc | AUC(-lam1 -> correct) |
+|---|---|---|---|
+| [17, 24] | 1152 | 0.321 | 0.976 |
+| [24, 25] | 1795 | 0.373 | 0.980 |
+| [25, 26] | 1764 | 0.503 | 0.987 |
+| [26, 36] | 3481 | 0.681 | 0.983 |
+- weighted mean within-bin AUC = 0.982 (overall 0.984)
+
+## TRM official @58590 (n=512), strict tau(log10)=1.0240
+| cell | n | lam1 med | token_acc med | q_halt_final med | givens med |
+|---|---|---|---|---|---|
+| A | 307 | +0.0105 | 1.000 | +7.78 | 26 |
+| B | 0 | | | | |
+| C | 141 | +0.0174 | 1.000 | +7.81 | 25 |
+| D | 64 | +0.1034 | 0.630 | -11.12 | 25 |
+
+### trm_official58590_n512_strict: end-of-window drift slope (log10 steps13-16 vs 9-12; <0 = still descending)
+- A: n=307, slope median -0.1471, IQR [-0.2267, -0.0641], frac still descending (<-0.01): 0.90
+- B: n=0
+- C: n=141, slope median -0.0080, IQR [-0.2603, +0.0808], frac still descending (<-0.01): 0.49
+- D: n=64, slope median -0.0125, IQR [-0.0525, +0.0276], frac still descending (<-0.01): 0.53
+- Spearman(lam1, givens): overall -0.240; wrong-only -0.238
+- Spearman(correct, givens) = +0.148 \ No newline at end of file
diff --git a/analysis_2x2/offline_followups/phase1_e1.md b/analysis_2x2/offline_followups/phase1_e1.md
new file mode 100644
index 0000000..b3e3f40
--- /dev/null
+++ b/analysis_2x2/offline_followups/phase1_e1.md
@@ -0,0 +1,27 @@
+# E1 offline batch — bootstrap CIs, settling robustness, TRM multi4 pair
+
+## Bootstrap / exact CIs (TRM official @58590, n=2048)
+- settled-wrong fraction: observed 0/254; exact 95% upper bound 0.0117 (1.17% of failures)
+- AUC(-lam1->correct) = 0.9935, bootstrap 95% CI (0.9908244697676584, 0.9957330475628791)
+- lam1(wrong) median 95% CI (0.10100110620260239, 0.10556983947753906)
+- lam1(correct) median 95% CI (0.011215815320611, 0.011744528077542782)
+
+## Bootstrap CIs (HRM @26040, n=8192, strict band)
+- strict settled-wrong fraction of failures: observed 0.0054, bootstrap 95% CI (0.0032613427182413084, 0.007798538095694945)
+- AUC(-lam1->correct) = 0.9841, bootstrap 95% CI (0.9815470536412456, 0.9865145187475995)
+
+## Settling-criterion robustness (B-cell counts under alternative drift definitions)
+- TRM official n=2048 | zH: B=0/A=1724 (tau=1.36) | zL: B=0/A=1728 (tau=1.42) | combined: B=0/A=1727 (tau=1.54)
+- HRM n=8192 | zH: B=63/A=4103 (tau=0.77) | zL: B=59/A=4083 (tau=1.01) | combined: B=60/A=4087 (tau=1.07)
+
+## TRM official-pipeline multi4 vs baseline (matched objective, n=512 each)
+- baseline @58590: acc=0.875; A/B/C/D=434/0/14/64; fD=0.125; lam1(D)=+0.1034; lam1(A)=+0.0111
+- multi4 @35805 (best): acc=0.900; A/B/C/D=452/0/9/51; fD=0.100; lam1(D)=+0.1019; lam1(A)=+0.0039
+- multi4 @65100 (final): acc=0.824; A/B/C/D=408/1/14/89; fD=0.174; lam1(D)=+0.0946; lam1(A)=+0.0133
+
+## hrm_multi4 provenance (E6a)
+- diag_hrm_multi4_step_{20832,23436,26040}_512.npz step grid matches HRM pretrain numbering;
+ multi4_eval_compare/logs should contain the eval invocations — checked manually below.
+- ACTION: if the hrm_multi4 run is pretrain-pipeline (ACT-streaming + perturbation), then the
+ May-28 multi4 vs righteous baseline comparison IS matched-pipeline and Sec 3.4's caveat is
+ narrower than written; step9 E-vs-F pair (queued) covers the fixed-unroll objective regardless. \ No newline at end of file
diff --git a/analysis_2x2/offline_phase1_e1.py b/analysis_2x2/offline_phase1_e1.py
new file mode 100644
index 0000000..7c42825
--- /dev/null
+++ b/analysis_2x2/offline_phase1_e1.py
@@ -0,0 +1,123 @@
+"""E1 offline batch (experiment_framework.md):
+(1) bootstrap CIs for headline quantities; (2) settling-criterion robustness (z_L / combined);
+(3) TRM official multi4 vs baseline matched-pipeline 2x2; (4) provenance note for hrm_multi4.
+Outputs: offline_followups/phase1_e1.md
+"""
+from __future__ import annotations
+
+from pathlib import Path
+
+import numpy as np
+
+HERE = Path(__file__).resolve().parent
+FLOSS = HERE.parent
+OUT = HERE / "offline_followups"
+RNG = np.random.default_rng(0)
+
+
+def auc_rank(score, label):
+ pos, neg = score[label == 1], score[label == 0]
+ if len(pos) == 0 or len(neg) == 0:
+ return float("nan")
+ allv = np.concatenate([pos, neg])
+ order = np.argsort(allv, kind="mergesort")
+ ranks = np.empty(len(allv)); ranks[order] = np.arange(1, len(allv) + 1)
+ sv = allv[order]; i = 0
+ while i < len(sv):
+ j = i
+ while j + 1 < len(sv) and sv[j + 1] == sv[i]:
+ j += 1
+ if j > i:
+ ranks[order[i:j + 1]] = ranks[order[i:j + 1]].mean()
+ i = j + 1
+ return float((ranks[:len(pos)].sum() - len(pos) * (len(pos) + 1) / 2) / (len(pos) * len(neg)))
+
+
+def boot_ci(stat_fn, n, B=10000):
+ vals = []
+ for _ in range(B):
+ idx = RNG.integers(0, n, n)
+ v = stat_fn(idx)
+ if not np.isnan(v):
+ vals.append(v)
+ return float(np.percentile(vals, 2.5)), float(np.percentile(vals, 97.5))
+
+
+def late(d, key="drift_zH"):
+ return np.log10(np.clip(d[key][:, -4:].mean(1), 1e-12, None))
+
+
+def otsu(x, nbins=256):
+ h, e = np.histogram(x, bins=nbins); h = h.astype(float)
+ c = (e[:-1] + e[1:]) / 2
+ p = h / h.sum(); om = np.cumsum(p); mu = np.cumsum(p * c); mt = mu[-1]
+ den = om * (1 - om); den[den <= 0] = np.nan
+ return float(c[np.nanargmax((mt * om - mu) ** 2 / den)])
+
+
+lines = ["# E1 offline batch — bootstrap CIs, settling robustness, TRM multi4 pair", ""]
+
+# ---------- (1) bootstrap CIs ----------
+trm = np.load(FLOSS / "analysis_2x2/retest/trm_gbs768_step58590_full_n2048.npz")
+c_t = trm["exact_correct"].astype(int); l1_t = trm["lyap_spec"][:, 0]; ld_t = late(trm)
+n_wrong = int((c_t == 0).sum())
+cp_upper = 1 - 0.05 ** (1 / n_wrong) # Clopper-Pearson upper for 0 events
+lines += ["## Bootstrap / exact CIs (TRM official @58590, n=2048)",
+ f"- settled-wrong fraction: observed 0/{n_wrong}; exact 95% upper bound {cp_upper:.4f} "
+ f"({cp_upper*100:.2f}% of failures)",
+ f"- AUC(-lam1->correct) = {auc_rank(-l1_t, c_t):.4f}, bootstrap 95% CI "
+ f"{boot_ci(lambda i: auc_rank(-l1_t[i], c_t[i]), len(c_t))}",
+ f"- lam1(wrong) median 95% CI {boot_ci(lambda i: float(np.median(l1_t[i][c_t[i]==0])), len(c_t))}",
+ f"- lam1(correct) median 95% CI {boot_ci(lambda i: float(np.median(l1_t[i][c_t[i]==1])), len(c_t))}"]
+
+hrm = np.load(FLOSS / "diag_8k.npz")
+c_h = hrm["exact_correct"].astype(int); l1_h = hrm["lyap_spec"][:, 0]; ld_h = late(hrm)
+tau_strict = float(np.percentile(ld_h, 45))
+def strictB_frac(idx):
+ c, ld = c_h[idx], ld_h[idx]
+ w = (c == 0).sum()
+ return float(((ld < tau_strict) & (c == 0)).sum() / max(w, 1))
+lines += ["", "## Bootstrap CIs (HRM @26040, n=8192, strict band)",
+ f"- strict settled-wrong fraction of failures: observed {21/3894:.4f}, bootstrap 95% CI "
+ f"{boot_ci(strictB_frac, len(c_h))}",
+ f"- AUC(-lam1->correct) = {auc_rank(-l1_h, c_h):.4f}, bootstrap 95% CI "
+ f"{boot_ci(lambda i: auc_rank(-l1_h[i], c_h[i]), len(c_h))}"]
+
+# ---------- (2) settling-criterion robustness ----------
+lines += ["", "## Settling-criterion robustness (B-cell counts under alternative drift definitions)"]
+for tag, d, c in [("TRM official n=2048", trm, c_t), ("HRM n=8192", hrm, c_h)]:
+ row = [tag]
+ for key, nm in [("drift_zH", "zH"), ("drift_zL", "zL")]:
+ ld = late(d, key); tau = otsu(ld); conv = ld < tau
+ row.append(f"{nm}: B={int((conv & (c==0)).sum())}/A={int((conv & (c==1)).sum())} (tau={tau:.2f})")
+ comb = np.log10(np.clip(np.sqrt(d["drift_zH"][:, -4:].mean(1) ** 2 + d["drift_zL"][:, -4:].mean(1) ** 2), 1e-12, None))
+ tau = otsu(comb); conv = comb < tau
+ row.append(f"combined: B={int((conv & (c==0)).sum())}/A={int((conv & (c==1)).sum())} (tau={tau:.2f})")
+ lines.append("- " + " | ".join(row))
+
+# ---------- (3) TRM official multi4 vs baseline (matched pipeline) ----------
+lines += ["", "## TRM official-pipeline multi4 vs baseline (matched objective, n=512 each)"]
+spec = FLOSS / "official_gbs768_spectrum"
+for nm, f in [("baseline @58590", spec / "trm_gbs768_base_step58590_n512_k8_seed20260602.npz"),
+ ("multi4 @35805 (best)", spec / "trm_gbs768_multi4_step35805_n512_k8_seed20260602.npz"),
+ ("multi4 @65100 (final)", spec / "trm_gbs768_multi4_step65100_n512_k8_seed20260602.npz")]:
+ if not f.exists():
+ lines.append(f"- {nm}: npz missing"); continue
+ d = np.load(f); c = d["exact_correct"].astype(int); l1 = d["lyap_spec"][:, 0]
+ ld = late(d); tau = otsu(ld); conv = ld < tau
+ A = conv & (c == 1); B = conv & (c == 0); C = (~conv) & (c == 1); D = (~conv) & (c == 0)
+ lines.append(f"- {nm}: acc={c.mean():.3f}; A/B/C/D={int(A.sum())}/{int(B.sum())}/{int(C.sum())}/{int(D.sum())}; "
+ f"fD={float(D.mean()):.3f}; lam1(D)={np.median(l1[D]) if D.sum()>0 else float('nan'):+.4f}; "
+ f"lam1(A)={np.median(l1[A]) if A.sum()>0 else float('nan'):+.4f}")
+
+# ---------- (4) provenance note ----------
+lines += ["", "## hrm_multi4 provenance (E6a)",
+ "- diag_hrm_multi4_step_{20832,23436,26040}_512.npz step grid matches HRM pretrain numbering;",
+ " multi4_eval_compare/logs should contain the eval invocations — checked manually below.",
+ "- ACTION: if the hrm_multi4 run is pretrain-pipeline (ACT-streaming + perturbation), then the",
+ " May-28 multi4 vs righteous baseline comparison IS matched-pipeline and Sec 3.4's caveat is",
+ " narrower than written; step9 E-vs-F pair (queued) covers the fixed-unroll objective regardless."]
+
+OUT.mkdir(exist_ok=True)
+(OUT / "phase1_e1.md").write_text("\n".join(lines))
+print("\n".join(lines))
diff --git a/analysis_2x2/phase1/phase1_results.md b/analysis_2x2/phase1/phase1_results.md
new file mode 100644
index 0000000..5dde25d
--- /dev/null
+++ b/analysis_2x2/phase1/phase1_results.md
@@ -0,0 +1,48 @@
+# Phase-1 analysis (E5 horizon sweep, E6 matched-objective, E2 replication)
+
+## E5: when does fate become legible? AUC(early signal -> FINAL correct), restricted to not-yet-correct@H
+
+### TRM
+| H | solved@H | undecided n | of which eventual✓ | AUC(-λ₁) | AUC(-drift) | AUC(q_halt) |
+|---|---|---|---|---|---|---|
+| 2 | 0.542 | 939 | 0.729 | 0.526 | 0.411 | 0.608 |
+| 4 | 0.694 | 626 | 0.594 | 0.543 | 0.492 | 0.521 |
+| 6 | 0.766 | 479 | 0.470 | 0.523 | 0.441 | 0.531 |
+| 8 | 0.801 | 407 | 0.376 | 0.483 | 0.495 | 0.514 |
+| 10 | 0.831 | 347 | 0.268 | 0.477 | 0.536 | 0.476 |
+| 12 | 0.846 | 315 | 0.194 | 0.483 | 0.509 | 0.509 |
+
+### HRM
+| H | solved@H | undecided n | of which eventual✓ | AUC(-λ₁) | AUC(-drift) | AUC(q_halt) |
+|---|---|---|---|---|---|---|
+| 2 | 0.071 | 1903 | 0.490 | 0.375 | 0.193 | 0.810 |
+| 4 | 0.345 | 1342 | 0.276 | 0.448 | 0.312 | 0.734 |
+| 6 | 0.436 | 1155 | 0.159 | 0.438 | 0.349 | 0.676 |
+| 8 | 0.471 | 1083 | 0.103 | 0.432 | 0.371 | 0.705 |
+| 10 | 0.489 | 1046 | 0.072 | 0.478 | 0.407 | 0.705 |
+| 12 | 0.506 | 1012 | 0.041 | 0.503 | 0.348 | 0.635 |
+
+## E6: matched-objective intervention (step9 fixed-unroll runs, n=512)
+
+### HRM (E base vs F multi4)
+| ckpt | base acc | base fD | base λ₁(D) | multi4 acc | multi4 fD | multi4 λ₁(D) |
+|---|---|---|---|---|---|---|
+| step_12500 | 0.613 | 0.387 | -0.0157 | 0.631 | 0.369 | -0.0215 |
+| step_25000 | 0.615 | 0.385 | +0.0138 | 0.617 | 0.379 | +0.0101 |
+| best | 0.619 | 0.381 | -0.0214 | 0.613 | 0.387 | -0.0181 |
+| final | 0.588 | 0.410 | +0.0456 | 0.609 | 0.387 | +0.0335 |
+
+### TRM (G base vs H multi4)
+| ckpt | base acc | base fD | base λ₁(D) | multi4 acc | multi4 fD | multi4 λ₁(D) |
+|---|---|---|---|---|---|---|
+| step_12500 | 0.553 | 0.344 | +0.0219 | 0.600 | 0.305 | +0.0284 |
+| step_25000 | 0.525 | 0.396 | +0.0209 | 0.545 | 0.361 | +0.0222 |
+| best | 0.596 | 0.334 | +0.0233 | 0.580 | 0.361 | +0.0366 |
+| final | 0.477 | 0.312 | +0.0164 | 0.537 | 0.270 | +0.0249 |
+
+## E2: decomposition on a SECOND HRM training run (step9_E fixed-unroll, n=2048)
+
+| ckpt | acc | A | B | C | D | λ₁(A) | λ₁(D) | settled-wrong frac of failures |
+|---|---|---|---|---|---|---|---|---|
+| best | 0.637 | 1244 | 1 | 61 | 742 | -0.1685 | -0.0308 | 0.0013 |
+| final | 0.594 | 1124 | 3 | 92 | 829 | -0.0144 | +0.0444 | 0.0036 | \ No newline at end of file
diff --git a/analysis_2x2/queue_maze_followups.sh b/analysis_2x2/queue_maze_followups.sh
new file mode 100755
index 0000000..2257013
--- /dev/null
+++ b/analysis_2x2/queue_maze_followups.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+# Pre-HRM-Maze queue (user request): (1) per-cell failure structure of TRM-Maze (cheap inference),
+# (2) continue-train TRM-Maze from step_130200 to test saturation (does acc climb toward ~1.0?).
+# Waits for a genuinely free GPU; good-neighbor on the shared box.
+set -o pipefail
+cd /home/yurenh2/rrm/research/flossing
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+OUTDIR=analysis_2x2/maze_followup; mkdir -p "$OUTDIR"
+ST="$OUTDIR/queue.log"; log(){ echo "[$(date '+%m-%d %H:%M:%S')] $*" >> "$ST"; }
+CKDIR=/home/yurenh2/rrm/trm/checkpoints/maze-30x30-hard-1k-ACT-torch/pretrain_att_maze30x30_2gpu_gbs384
+DATA=/home/yurenh2/rrm/data/maze-30x30-hard-1k
+free_gpu(){ nvidia-smi --query-gpu=index,utilization.gpu,memory.used --format=csv,noheader,nounits \
+ | awk -F', ' '$2<20 && $3<4000 {print $1; exit}'; }
+
+log "queue started; waiting for a free GPU (util<20, mem<4G; 12h fallback)"
+DEADLINE=$(( $(date +%s) + 12*3600 )); GPU=""
+while true; do
+ g1="$(free_gpu)"; if [[ -n "$g1" ]]; then sleep 60; g2="$(free_gpu)"; [[ "$g2" == "$g1" ]] && GPU="$g1" && break; fi
+ if (( $(date +%s) > DEADLINE )); then GPU="$(nvidia-smi --query-gpu=index,memory.used --format=csv,noheader,nounits|sort -t, -k2 -n|head -1|cut -d, -f1)"; log "fallback GPU $GPU"; break; fi
+ sleep 300
+done
+log "claimed GPU $GPU"; export CUDA_VISIBLE_DEVICES="$GPU"
+
+# (1) Solution-space dynamics + per-cell structure: TRM-Maze final ckpt (saves preds, inputs,
+# labels, AND per-step decoded-answer Hamming drift over solution-space cells).
+log "start MAZE solution-space dump (inference, no JVP)"
+if python maze_pred_dump.py --ckpt-root "$CKDIR" --ckpt-name step_130200 --data "$DATA" \
+ --n 512 --out "$OUTDIR/maze_preds_step130200.npz" > "$OUTDIR/preds_maze.log" 2>&1; then
+ log "done maze dump"; else log "FAILED maze dump"; fi
+# (1b) SUDOKU control: solution-space dynamics where full-space DID separate.
+SUD_CK=/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro
+SUD_DATA=/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000
+log "start SUDOKU solution-space control dump"
+if python maze_pred_dump.py --ckpt-root "$SUD_CK" --ckpt-name step_58590 --data "$SUD_DATA" \
+ --n 512 --out "$OUTDIR/sudoku_preds_step58590.npz" > "$OUTDIR/preds_sudoku.log" 2>&1; then
+ log "done sudoku dump"; else log "FAILED sudoku dump"; fi
+
+# (2) continue-train TRM-Maze from step_130200 (saturation test). gbs 192 on 1 GPU; constant LR.
+log "start continue-train TRM-Maze from step_130200 (+25000 epochs, eval every 2500)"
+cd /home/yurenh2/rrm/trm
+export WANDB_MODE=offline
+RUN=pretrain_att_maze30x30_CONTINUE_from130200
+nohup python pretrain.py arch=trm "data_paths=[$DATA]" "evaluators=[]" \
+ epochs=12500 eval_interval=2500 \
+ lr=1e-4 puzzle_emb_lr=1e-4 weight_decay=1.0 puzzle_emb_weight_decay=1.0 global_batch_size=192 \
+ arch.L_layers=2 arch.H_cycles=3 arch.L_cycles=4 +run_name="$RUN" ema=True \
+ +checkpoint_every_eval=true "+load_checkpoint=$CKDIR/step_130200" \
+ > /home/yurenh2/rrm/research/flossing/$OUTDIR/continue_train.log 2>&1
+echo "continue-train exited" >> "/home/yurenh2/rrm/research/flossing/$ST"
diff --git a/analysis_2x2/results.md b/analysis_2x2/results.md
new file mode 100644
index 0000000..a62bba2
--- /dev/null
+++ b/analysis_2x2/results.md
@@ -0,0 +1,69 @@
+# 2x2 analysis (convergence x correctness) — generated 2026-06-11
+
+## hrm26040_n8192
+- npz: `/home/yurenh2/rrm/research/flossing/diag_8k.npz`, n=8192, exact_acc=0.525
+- late-drift def: mean(drift_zH[:, -4:]), Otsu tau(log10)=0.766, frac_converged=0.509
+
+| cell | n | lam1 median | lam1 IQR | token_acc med |
+|---|---|---|---|---|
+| A_conv_correct | 4103 | -0.8617 | [-0.9020, -0.8153] | 1.000 |
+| B_conv_wrong | 63 | -0.5912 | [-0.8091, -0.5061] | 0.630 |
+| C_nonconv_correct | 195 | -0.6943 | [-0.7354, -0.6466] | 1.000 |
+| D_nonconv_wrong | 3831 | -0.5998 | [-0.6475, -0.5488] | 0.630 |
+
+- mixture: wrong-that-converged = 0.016; correct-that-nonconverged = 0.045
+- dlam1(correct-wrong): overall -0.2586; within-conv -0.2705; within-nonconv -0.0945
+- dlam1(wrong: conv - nonconv) = +0.0085
+- AUC(-lam1 -> correct): overall 0.984; within-conv 0.852; within-nonconv 0.818
+- AUC(-log d_late -> correct) = 0.964; AUC(-lam1 -> converged) = 0.986
+
+## trm_singleGPU_step260410_n512
+- npz: `/home/yurenh2/rrm/research/flossing/diag_trm_singleGPU_step260410_512.npz`, n=512, exact_acc=0.770
+- late-drift def: mean(drift_zH[:, -4:]), Otsu tau(log10)=1.406, frac_converged=0.748
+
+| cell | n | lam1 median | lam1 IQR | token_acc med |
+|---|---|---|---|---|
+| A_conv_correct | 383 | +0.0047 | [+0.0007, +0.0137] | 1.000 |
+| B_conv_wrong | 0 | - | - | - |
+| C_nonconv_correct | 11 | +0.0998 | [+0.0907, +0.1035] | 1.000 |
+| D_nonconv_wrong | 118 | +0.1023 | [+0.0935, +0.1112] | 0.642 |
+
+- mixture: wrong-that-converged = 0.000; correct-that-nonconverged = 0.028
+- dlam1(correct-wrong): overall -0.0973; within-conv +nan; within-nonconv -0.0025
+- dlam1(wrong: conv - nonconv) = +nan
+- AUC(-lam1 -> correct): overall 0.989; within-conv nan; within-nonconv 0.619
+- AUC(-log d_late -> correct) = 0.975; AUC(-lam1 -> converged) = 0.996
+
+## trm_singleGPU_step130205_n512
+- npz: `/home/yurenh2/rrm/research/flossing/diag_trm_singleGPU_step130205_512.npz`, n=512, exact_acc=0.756
+- late-drift def: mean(drift_zH[:, -4:]), Otsu tau(log10)=1.213, frac_converged=0.713
+
+| cell | n | lam1 median | lam1 IQR | token_acc med |
+|---|---|---|---|---|
+| A_conv_correct | 365 | +0.0043 | [-0.0007, +0.0141] | 1.000 |
+| B_conv_wrong | 0 | - | - | - |
+| C_nonconv_correct | 22 | +0.0852 | [+0.0694, +0.0988] | 1.000 |
+| D_nonconv_wrong | 125 | +0.1067 | [+0.0995, +0.1126] | 0.630 |
+
+- mixture: wrong-that-converged = 0.000; correct-that-nonconverged = 0.057
+- dlam1(correct-wrong): overall -0.1017; within-conv +nan; within-nonconv -0.0215
+- dlam1(wrong: conv - nonconv) = +nan
+- AUC(-lam1 -> correct): overall 0.989; within-conv nan; within-nonconv 0.805
+- AUC(-log d_late -> correct) = 0.957; AUC(-lam1 -> converged) = 0.996
+
+## trm_step13020_n512
+- npz: `/home/yurenh2/rrm/research/flossing/diag_trm_step13020_512.npz`, n=512, exact_acc=0.596
+- late-drift def: mean(drift_zH[:, -4:]), Otsu tau(log10)=0.730, frac_converged=0.598
+
+| cell | n | lam1 median | lam1 IQR | token_acc med |
+|---|---|---|---|---|
+| A_conv_correct | 296 | -0.0043 | [-0.0113, +0.0052] | 1.000 |
+| B_conv_wrong | 10 | -0.0027 | [-0.0090, +0.0009] | 0.556 |
+| C_nonconv_correct | 9 | +0.0419 | [+0.0323, +0.0480] | 1.000 |
+| D_nonconv_wrong | 197 | +0.0317 | [+0.0143, +0.0462] | 0.642 |
+
+- mixture: wrong-that-converged = 0.048; correct-that-nonconverged = 0.030
+- dlam1(correct-wrong): overall -0.0339; within-conv -0.0016; within-nonconv +0.0102
+- dlam1(wrong: conv - nonconv) = -0.0344
+- AUC(-lam1 -> correct): overall 0.849; within-conv 0.515; within-nonconv 0.360
+- AUC(-log d_late -> correct) = 0.976; AUC(-lam1 -> converged) = 0.887
diff --git a/analysis_2x2/run_clv_geom.sh b/analysis_2x2/run_clv_geom.sh
new file mode 100755
index 0000000..d271370
--- /dev/null
+++ b/analysis_2x2/run_clv_geom.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+# CLV leading-vector geometry probe (b): does the EXPANSION DIRECTION carry outcome signal
+# beyond drift+q_halt? Sudoku TRM + HRM (mlp_t, no flash issue), n=512, on the free GPU.
+set -o pipefail
+cd /home/yurenh2/rrm/research/flossing
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+export CUDA_VISIBLE_DEVICES="${1:-1}"
+OUTDIR=analysis_2x2/clv; mkdir -p "$OUTDIR"
+ST="$OUTDIR/status.log"; log(){ echo "[$(date '+%H:%M:%S')] $*" >> "$ST"; }
+TRM=/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro
+HRM="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python"
+
+log "start TRM geom n=512 (GPU $CUDA_VISIBLE_DEVICES)"
+if python diagnose_trm_joint_geom.py --ckpt-root "$TRM" --ckpt-name step_58590 \
+ --n-samples 512 --batch-size 8 --k-lyap 8 --t-ons 1 --seed 0 \
+ --out "$OUTDIR/trm_geom_step58590_n512.npz" > "$OUTDIR/trm.log" 2>&1; then log "done TRM"; else log "FAILED TRM"; fi
+
+log "start HRM geom n=512"
+if python diagnose_hrm_joint_geom.py --ckpt-root "$HRM" --ckpt-name step_26040 \
+ --n-samples 512 --batch-size 16 --k-lyap 8 --t-ons 1 --seed 0 \
+ --out "$OUTDIR/hrm_geom_step26040_n512.npz" > "$OUTDIR/hrm.log" 2>&1; then log "done HRM"; else log "FAILED HRM"; fi
+log "clv geom finished"
diff --git a/analysis_2x2/run_maze_diag_queue.sh b/analysis_2x2/run_maze_diag_queue.sh
new file mode 100755
index 0000000..7dfc757
--- /dev/null
+++ b/analysis_2x2/run_maze_diag_queue.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+# Maze FTLE diagnostics (att+rope, math SDP backend). Final checkpoint first (answers the
+# headline separation question), then two more for an evolution view. Waits for a GPU.
+set -o pipefail
+cd /home/yurenh2/rrm/research/flossing
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+
+CKPT_ROOT=/home/yurenh2/rrm/trm/checkpoints/maze-30x30-hard-1k-ACT-torch/pretrain_att_maze30x30_2gpu_gbs384
+OUTDIR=analysis_2x2/maze; mkdir -p "$OUTDIR"
+STATUS="$OUTDIR/queue_status.log"
+log(){ echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >> "$STATUS"; }
+free_gpu(){ nvidia-smi --query-gpu=index,utilization.gpu,memory.used --format=csv,noheader,nounits \
+ | awk -F', ' '$2<25 && $3<6000 {print $1; exit}'; }
+
+log "maze diag queue started (n=512, batch 2, math SDP; final ckpt first)"
+DEADLINE=$(( $(date +%s) + 12*3600 )); GPU=""
+while true; do
+ g1="$(free_gpu)"; if [[ -n "$g1" ]]; then sleep 60; g2="$(free_gpu)"; [[ "$g2" == "$g1" ]] && GPU="$g1" && break; fi
+ if (( $(date +%s) > DEADLINE )); then
+ GPU="$(nvidia-smi --query-gpu=index,memory.used --format=csv,noheader,nounits | sort -t, -k2 -n | head -1 | cut -d, -f1)"
+ log "12h fallback: GPU $GPU"; break; fi
+ sleep 300
+done
+log "claimed GPU $GPU"; export CUDA_VISIBLE_DEVICES="$GPU"
+
+for CK in step_130200 step_65100 step_26040; do
+ OUT="$OUTDIR/maze_${CK}_n512.npz"
+ [[ -f "$OUT" ]] && { log "skip $CK"; continue; }
+ log "start $CK"
+ if python diagnose_trm_joint_maze.py --ckpt-root "$CKPT_ROOT" --ckpt-name "$CK" \
+ --n-samples 512 --batch-size 2 --k-lyap 8 --t-ons 1 --seed 0 \
+ --out "$OUT" > "$OUTDIR/${CK}.log" 2>&1; then
+ acc=$(grep -oE "acc=[0-9.]+" "$OUTDIR/${CK}.log" | tail -1)
+ log "done $CK ($acc)"
+ else
+ log "FAILED $CK (see $OUTDIR/${CK}.log)"
+ fi
+done
+log "maze diag queue finished"
diff --git a/analysis_2x2/run_phase1_queue.sh b/analysis_2x2/run_phase1_queue.sh
new file mode 100755
index 0000000..989c7ee
--- /dev/null
+++ b/analysis_2x2/run_phase1_queue.sh
@@ -0,0 +1,92 @@
+#!/usr/bin/env bash
+# Phase-1 queue (experiment_framework.md): E5 horizon sweeps, E2 run-level replication,
+# E6 matched-objective step9 pairs. Waits for a free GPU (12h fallback), runs sequentially.
+set -o pipefail
+
+cd /home/yurenh2/rrm/research/flossing
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+
+OUTDIR=analysis_2x2/phase1
+mkdir -p "$OUTDIR"
+STATUS="$OUTDIR/queue_status.log"
+TRM_OFF="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro"
+TRM_SGL="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_singleGPU"
+HRM_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python"
+S9=/home/yurenh2/rrm/research/flossing
+
+log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >> "$STATUS"; }
+free_gpu() {
+ nvidia-smi --query-gpu=index,utilization.gpu,memory.used --format=csv,noheader,nounits \
+ | awk -F', ' '$2<30 && $3<8000 {print $1; exit}'
+}
+
+log "phase-1 queue started (E5 horizon sweeps, E2 step9_E replication, E6 step9 pairs)"
+DEADLINE=$(( $(date +%s) + 12*3600 ))
+GPU=""
+while true; do
+ g1="$(free_gpu)"
+ if [[ -n "$g1" ]]; then
+ sleep 60; g2="$(free_gpu)"
+ if [[ "$g2" == "$g1" ]]; then GPU="$g1"; break; fi
+ fi
+ if (( $(date +%s) > DEADLINE )); then
+ GPU="$(nvidia-smi --query-gpu=index,memory.used --format=csv,noheader,nounits | sort -t, -k2 -n | head -1 | cut -d, -f1)"
+ log "12h fallback: taking GPU $GPU"
+ break
+ fi
+ sleep 300
+done
+log "claimed GPU $GPU"
+export CUDA_VISIBLE_DEVICES="$GPU"
+
+run_job() { # name horizon script args...
+ local name="$1" hor="$2"; shift 2
+ if [[ -f "$OUTDIR/${name}.npz" ]]; then log "skip $name"; return 0; fi
+ log "start $name"
+ if DIAG_HORIZON="$hor" python "$@" --out "$OUTDIR/${name}.npz" > "$OUTDIR/${name}.log" 2>&1; then
+ log "done $name"
+ else
+ log "FAILED $name"
+ fi
+}
+
+# --- E5: TRM horizon sweep (h=4 already exists in retest/) ---
+for H in 2 6 8 10 12; do
+ run_job "trm_official58590_h${H}_n2048" "$H" diagnose_trm_joint_horizon.py \
+ --ckpt-root "$TRM_OFF" --ckpt-name step_58590 --n-samples 2048 --batch-size 16 \
+ --k-lyap 8 --t-ons 1 --seed 0
+done
+
+# --- E5: HRM horizon sweep ---
+for H in 2 6 8 10 12; do
+ run_job "hrm26040_h${H}_n2048" "$H" diagnose_hrm_joint_horizon.py \
+ --ckpt-root "$HRM_ROOT" --ckpt-name step_26040 --n-samples 2048 --batch-size 32 \
+ --k-lyap 8 --t-ons 1 --seed 0
+done
+
+# --- E2: HRM second training run (step9_E fixed-unroll baseline), full window ---
+run_job "step9E_hrm_best_full_n2048" 16 diagnose_hrm_joint.py \
+ --ckpt-root "$HRM_ROOT" --ckpt-name "$S9/step9_E_hrm_baseline_parallel_fixed_26040_50k_ckpts/best.pt" \
+ --n-samples 2048 --batch-size 32 --k-lyap 8 --t-ons 1 --seed 0
+run_job "step9E_hrm_final_full_n2048" 16 diagnose_hrm_joint.py \
+ --ckpt-root "$HRM_ROOT" --ckpt-name "$S9/step9_E_hrm_baseline_parallel_fixed_26040_50k_ckpts/final.pt" \
+ --n-samples 2048 --batch-size 32 --k-lyap 8 --t-ons 1 --seed 0
+
+# --- E6: matched-objective pairs (n=512): HRM E vs F, TRM G vs H ---
+for CK in step_12500 step_25000 best final; do
+ run_job "step9E_hrm_${CK}_n512" 16 diagnose_hrm_joint.py \
+ --ckpt-root "$HRM_ROOT" --ckpt-name "$S9/step9_E_hrm_baseline_parallel_fixed_26040_50k_ckpts/${CK}.pt" \
+ --n-samples 512 --batch-size 32 --k-lyap 8 --t-ons 1 --seed 0
+ run_job "step9F_hrm_${CK}_n512" 16 diagnose_hrm_joint.py \
+ --ckpt-root "$HRM_ROOT" --ckpt-name "$S9/step9_F_hrm_multi4_loguniform_ramp_26040_50k_ckpts/${CK}.pt" \
+ --n-samples 512 --batch-size 32 --k-lyap 8 --t-ons 1 --seed 0
+ run_job "step9G_trm_${CK}_n512" 16 diagnose_trm_joint.py \
+ --ckpt-root "$TRM_SGL" --ckpt-name "$S9/step9_G_trm_baseline_parallel_fixed_26041_batch4_50k_ckpts/${CK}.pt" \
+ --n-samples 512 --batch-size 16 --k-lyap 8 --t-ons 1 --seed 0
+ run_job "step9H_trm_${CK}_n512" 16 diagnose_trm_joint.py \
+ --ckpt-root "$TRM_SGL" --ckpt-name "$S9/step9_H_trm_multi4_loguniform_ramp_26041_batch4_50k_ckpts/${CK}.pt" \
+ --n-samples 512 --batch-size 16 --k-lyap 8 --t-ons 1 --seed 0
+done
+
+log "phase-1 queue finished"
diff --git a/analysis_2x2/run_retest_2x2.sh b/analysis_2x2/run_retest_2x2.sh
new file mode 100755
index 0000000..9cdcc13
--- /dev/null
+++ b/analysis_2x2/run_retest_2x2.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+# Queue: wait for a free GPU, then run the 2x2 re-test diagnostics:
+# 1. TRM official_gbs768 @ step_58590 (86.9% ckpt) full-window n=2048
+# 2. TRM official_gbs768 @ step_58590 early-window n=2048 (first 4 ACT steps)
+# 3. HRM righteous-python @ step_26040 (joint est.) full-window n=2048
+# 4. HRM righteous-python @ step_26040 early-window n=2048
+# Same --seed 0 and same n across full/short pairs so idx fields pair up.
+set -o pipefail
+
+cd /home/yurenh2/rrm/research/flossing
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+
+OUTDIR=analysis_2x2/retest
+mkdir -p "$OUTDIR"
+STATUS="$OUTDIR/queue_status.log"
+TRM_ROOT="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro"
+HRM_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python"
+
+log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >> "$STATUS"; }
+
+free_gpu() {
+ # print index of a GPU with util<30% and mem<8GB, else empty
+ nvidia-smi --query-gpu=index,utilization.gpu,memory.used --format=csv,noheader,nounits \
+ | awk -F', ' '$2<30 && $3<8000 {print $1; exit}'
+}
+
+log "queue started, waiting for a free GPU (util<30%, mem<8GB, two checks 60s apart; 12h fallback)"
+DEADLINE=$(( $(date +%s) + 12*3600 ))
+GPU=""
+while true; do
+ g1="$(free_gpu)"
+ if [[ -n "$g1" ]]; then
+ sleep 60
+ g2="$(free_gpu)"
+ if [[ "$g2" == "$g1" ]]; then GPU="$g1"; break; fi
+ fi
+ if (( $(date +%s) > DEADLINE )); then
+ GPU="$(nvidia-smi --query-gpu=index,memory.used --format=csv,noheader,nounits | sort -t, -k2 -n | head -1 | cut -d, -f1)"
+ log "12h fallback: taking GPU $GPU (most free memory) despite utilization"
+ break
+ fi
+ sleep 300
+done
+log "claimed GPU $GPU"
+export CUDA_VISIBLE_DEVICES="$GPU"
+
+run_job() {
+ local name="$1"; shift
+ if [[ -f "$OUTDIR/${name}.npz" ]]; then log "skip $name (output exists)"; return 0; fi
+ log "start $name"
+ if python "$@" --out "$OUTDIR/${name}.npz" > "$OUTDIR/${name}.log" 2>&1; then
+ log "done $name"
+ else
+ log "FAILED $name (see $OUTDIR/${name}.log)"
+ fi
+}
+
+run_job trm_gbs768_step58590_full_n2048 \
+ diagnose_trm_joint.py --ckpt-root "$TRM_ROOT" --ckpt-name step_58590 \
+ --n-samples 2048 --batch-size 16 --k-lyap 8 --t-ons 1 --seed 0
+
+run_job trm_gbs768_step58590_short_n2048 \
+ diagnose_trm_joint_short.py --ckpt-root "$TRM_ROOT" --ckpt-name step_58590 \
+ --n-samples 2048 --batch-size 16 --k-lyap 8 --t-ons 1 --seed 0
+
+run_job hrm_righteous_step26040_full_n2048 \
+ diagnose_hrm_joint.py --ckpt-root "$HRM_ROOT" --ckpt-name step_26040 \
+ --n-samples 2048 --batch-size 32 --k-lyap 8 --t-ons 1 --seed 0
+
+run_job hrm_righteous_step26040_short_n2048 \
+ diagnose_hrm_joint_short.py --ckpt-root "$HRM_ROOT" --ckpt-name step_26040 \
+ --n-samples 2048 --batch-size 32 --k-lyap 8 --t-ons 1 --seed 0
+
+log "all retest diagnostics finished"
diff --git a/analysis_2x2/sweep_hrm26040_joint_n2048.csv b/analysis_2x2/sweep_hrm26040_joint_n2048.csv
new file mode 100644
index 0000000..c4a8b6c
--- /dev/null
+++ b/analysis_2x2/sweep_hrm26040_joint_n2048.csv
@@ -0,0 +1,20 @@
+pct,tau,nA,nB,nC,nD,lam1_med_B,lam1_med_D
+5,-0.011992306107950053,102,1,975,970,-0.22750887274742126,0.03138776309788227
+10,-0.01058249634236916,203,2,874,969,-0.15784957632422447,0.0314599983394146
+15,-0.009196568905867631,306,2,771,969,-0.15784957632422447,0.0314599983394146
+20,-0.008030915298460381,408,2,669,969,-0.15784957632422447,0.0314599983394146
+25,-0.006957811036151525,509,3,568,968,-0.10465513914823532,0.03146406076848507
+30,-0.006023327926233211,612,3,465,968,-0.10465513914823532,0.03146406076848507
+35,-0.004937302122343026,714,3,363,968,-0.10465513914823532,0.03146406076848507
+40,-0.0033157474065108714,814,5,263,966,-0.14139553904533386,0.03153638541698456
+45,-0.0012688460769353994,917,5,160,966,-0.14139553904533386,0.03153638541698456
+50,0.5973517463361264,1013,11,64,960,-0.0452079139649868,0.03175541199743748
+55,1.3993508404545523,1034,92,43,879,0.033553484827280045,0.031315527856349945
+60,1.4994241881984267,1036,193,41,778,0.029089782387018204,0.03191038407385349
+65,1.547490873090265,1038,293,39,678,0.02823900803923607,0.032731860876083374
+70,1.5799635318056726,1040,393,37,578,0.029996931552886963,0.03191038407385349
+75,1.6038922156413973,1040,496,37,475,0.029781382530927658,0.03269977867603302
+80,1.6256149248592253,1040,598,37,373,0.03138776309788227,0.030956093221902847
+85,1.6518692868467673,1042,698,35,273,0.02991918846964836,0.03507234901189804
+90,1.677233478220475,1044,799,33,172,0.028961632400751114,0.044597823172807693
+95,1.708463281897988,1049,896,28,75,0.029092321172356606,0.052752863615751266
diff --git a/analysis_2x2/sweep_hrm26040_n8192.csv b/analysis_2x2/sweep_hrm26040_n8192.csv
new file mode 100644
index 0000000..b2f9f82
--- /dev/null
+++ b/analysis_2x2/sweep_hrm26040_n8192.csv
@@ -0,0 +1,20 @@
+pct,tau,nA,nB,nC,nD,lam1_med_B,lam1_med_D
+5,-0.024403165931412795,410,0,3888,3894,nan,-0.5996554493904114
+10,-0.02256924601928628,818,2,3480,3892,-0.8345716893672943,-0.5995031297206879
+15,-0.021242109610698556,1225,4,3073,3890,-0.8419434428215027,-0.5994212925434113
+20,-0.020075217197184483,1634,5,2664,3889,-0.8429288864135742,-0.599414587020874
+25,-0.019023493082334777,2039,9,2259,3885,-0.8409579992294312,-0.5993443727493286
+30,-0.017916215347934226,2444,14,1854,3880,-0.8415209650993347,-0.599242627620697
+35,-0.016770285088033448,2852,15,1446,3879,-0.8420839309692383,-0.5992319583892822
+40,-0.015257325632230992,3260,17,1038,3877,-0.8420839309692383,-0.599224328994751
+45,-0.012884453933432984,3665,21,633,3873,-0.8420839309692383,-0.5990880727767944
+50,0.371726245629762,4067,29,231,3865,-0.8250086307525635,-0.5989847183227539
+55,1.3871392863925978,4136,370,162,3524,-0.5518185496330261,-0.6034163236618042
+60,1.4972477780890994,4138,777,160,3117,-0.5671209096908569,-0.6080851554870605
+65,1.5481401201578837,4144,1181,154,2713,-0.5731313228607178,-0.6117637753486633
+70,1.5835620683527638,4147,1587,151,2307,-0.5808747410774231,-0.6136600971221924
+75,1.6110709254128324,4153,1991,145,1903,-0.584848165512085,-0.616129994392395
+80,1.6353553466380342,4156,2397,142,1497,-0.5879771709442139,-0.6185622215270996
+85,1.6574195825233073,4161,2802,137,1092,-0.5930990278720856,-0.6169184744358063
+90,1.6817109395003784,4167,3205,131,689,-0.5954653024673462,-0.619819164276123
+95,1.7160308725239535,4184,3598,114,296,-0.5972362160682678,-0.6263471245765686
diff --git a/analysis_2x2/sweep_trm_official58590_n2048.csv b/analysis_2x2/sweep_trm_official58590_n2048.csv
new file mode 100644
index 0000000..747eb40
--- /dev/null
+++ b/analysis_2x2/sweep_trm_official58590_n2048.csv
@@ -0,0 +1,20 @@
+pct,tau,nA,nB,nC,nD,lam1_med_B,lam1_med_D
+5,0.6425822518044539,103,0,1691,254,nan,0.10287182405591011
+10,0.7090247691558889,205,0,1589,254,nan,0.10287182405591011
+15,0.7579128424918095,308,0,1486,254,nan,0.10287182405591011
+20,0.7959777945095498,410,0,1384,254,nan,0.10287182405591011
+25,0.8365441996415908,512,0,1282,254,nan,0.10287182405591011
+30,0.8629687925201839,615,0,1179,254,nan,0.10287182405591011
+35,0.8888206300392528,717,0,1077,254,nan,0.10287182405591011
+40,0.9161186566022281,819,0,975,254,nan,0.10287182405591011
+45,0.9434356854634222,922,0,872,254,nan,0.10287182405591011
+50,0.9675603217529236,1024,0,770,254,nan,0.10287182405591011
+55,0.9912876432316691,1126,0,668,254,nan,0.10287182405591011
+60,1.01543024505144,1229,0,565,254,nan,0.10287182405591011
+65,1.0446947959840196,1331,0,463,254,nan,0.10287182405591011
+70,1.0695176837117564,1433,0,361,254,nan,0.10287182405591011
+75,1.100648956309921,1536,0,258,254,nan,0.10287182405591011
+80,1.2184501775276642,1638,0,156,254,nan,0.10287182405591011
+85,1.696239099229016,1733,7,61,247,0.1096716970205307,0.10244401544332504
+90,1.794792940908479,1740,103,54,151,0.10650952905416489,0.10119745135307312
+95,1.8549837815942356,1749,196,45,58,0.10514867305755615,0.09396017715334892
diff --git a/analysis_2x2/sweep_trm_official_gbs768_step58590_n512.csv b/analysis_2x2/sweep_trm_official_gbs768_step58590_n512.csv
new file mode 100644
index 0000000..011c7cc
--- /dev/null
+++ b/analysis_2x2/sweep_trm_official_gbs768_step58590_n512.csv
@@ -0,0 +1,20 @@
+pct,tau,nA,nB,nC,nD,lam1_med_B,lam1_med_D
+5,0.6520149305579591,26,0,422,64,nan,0.10339764878153801
+10,0.7055634417495923,52,0,396,64,nan,0.10339764878153801
+15,0.7503615854928217,77,0,371,64,nan,0.10339764878153801
+20,0.8005556182318934,103,0,345,64,nan,0.10339764878153801
+25,0.840565017355408,128,0,320,64,nan,0.10339764878153801
+30,0.8627818507451074,154,0,294,64,nan,0.10339764878153801
+35,0.8956748981953134,179,0,269,64,nan,0.10339764878153801
+40,0.923274365661878,205,0,243,64,nan,0.10339764878153801
+45,0.9484961694413702,230,0,218,64,nan,0.10339764878153801
+50,0.9733951084873498,256,0,192,64,nan,0.10339764878153801
+55,1.0041568660376061,282,0,166,64,nan,0.10339764878153801
+60,1.02397895505604,307,0,141,64,nan,0.10339764878153801
+65,1.04513267210118,333,0,115,64,nan,0.10339764878153801
+70,1.0684270138688392,358,0,90,64,nan,0.10339764878153801
+75,1.097585285269502,384,0,64,64,nan,0.10339764878153801
+80,1.1958246561577712,409,0,39,64,nan,0.10339764878153801
+85,1.3747578433518455,435,0,13,64,nan,0.10339764878153801
+90,1.785234311475947,437,23,11,41,0.10155700892210007,0.10475098341703415
+95,1.8427352264688373,438,48,10,16,0.10240473970770836,0.10433581843972206
diff --git a/analysis_2x2/sweep_trm_singleGPU_step130205_n512.csv b/analysis_2x2/sweep_trm_singleGPU_step130205_n512.csv
new file mode 100644
index 0000000..2c44154
--- /dev/null
+++ b/analysis_2x2/sweep_trm_singleGPU_step130205_n512.csv
@@ -0,0 +1,20 @@
+pct,tau,nA,nB,nC,nD,lam1_med_B,lam1_med_D
+5,0.3525141817739674,26,0,361,125,nan,0.10669758170843124
+10,0.5997049779303102,52,0,335,125,nan,0.10669758170843124
+15,0.7276586924161174,77,0,310,125,nan,0.10669758170843124
+20,0.745895549719167,103,0,284,125,nan,0.10669758170843124
+25,0.7574238256855987,128,0,259,125,nan,0.10669758170843124
+30,0.7756026094327713,154,0,233,125,nan,0.10669758170843124
+35,0.7948198738771284,179,0,208,125,nan,0.10669758170843124
+40,0.8150679269326413,205,0,182,125,nan,0.10669758170843124
+45,0.8328527737072317,230,0,157,125,nan,0.10669758170843124
+50,0.8680717056251492,256,0,131,125,nan,0.10669758170843124
+55,0.8878173590916981,282,0,105,125,nan,0.10669758170843124
+60,0.9114474129601072,307,0,80,125,nan,0.10669758170843124
+65,0.9357973777374493,333,0,54,125,nan,0.10669758170843124
+70,0.9914630324419641,358,0,29,125,nan,0.10669758170843124
+75,1.6265775245886065,370,14,17,111,0.10315298289060593,0.10740122199058533
+80,1.6638789305051203,370,39,17,86,0.10702066123485565,0.10642785206437111
+85,1.690579189299017,370,65,17,60,0.10719160735607147,0.10596203431487083
+90,1.722529342618924,370,90,17,35,0.10658939182758331,0.10669758170843124
+95,1.7704888842696966,372,114,15,11,0.10589195787906647,0.10992329567670822
diff --git a/analysis_2x2/sweep_trm_singleGPU_step260410_n512.csv b/analysis_2x2/sweep_trm_singleGPU_step260410_n512.csv
new file mode 100644
index 0000000..fac3a6d
--- /dev/null
+++ b/analysis_2x2/sweep_trm_singleGPU_step260410_n512.csv
@@ -0,0 +1,20 @@
+pct,tau,nA,nB,nC,nD,lam1_med_B,lam1_med_D
+5,1.1904157471082346,26,0,368,118,nan,0.10232088342308998
+10,1.205761365098233,52,0,342,118,nan,0.10232088342308998
+15,1.220964285676207,77,0,317,118,nan,0.10232088342308998
+20,1.2340382280630668,103,0,291,118,nan,0.10232088342308998
+25,1.2434017565460773,128,0,266,118,nan,0.10232088342308998
+30,1.253923051119596,154,0,240,118,nan,0.10232088342308998
+35,1.260020693170751,179,0,215,118,nan,0.10232088342308998
+40,1.2729663625028254,205,0,189,118,nan,0.10232088342308998
+45,1.286432864193943,230,0,164,118,nan,0.10232088342308998
+50,1.29425001787462,256,0,138,118,nan,0.10232088342308998
+55,1.3019951688280547,282,0,112,118,nan,0.10232088342308998
+60,1.3106991625583733,307,0,87,118,nan,0.10232088342308998
+65,1.323434760415417,333,0,61,118,nan,0.10232088342308998
+70,1.3423083391290647,358,0,36,118,nan,0.10232088342308998
+75,1.4595699782590752,384,0,10,118,nan,0.10232088342308998
+80,1.6965097149553376,384,25,10,93,0.10151073336601257,0.10370030999183655
+85,1.7416635003129175,384,51,10,67,0.1015518382191658,0.10389910638332367
+90,1.7702691318062869,384,76,10,42,0.10161199420690536,0.10456060990691185
+95,1.7933802257282094,384,102,10,16,0.10197825729846954,0.10976629704236984
diff --git a/analysis_2x2/sweep_trm_step13020_n512.csv b/analysis_2x2/sweep_trm_step13020_n512.csv
new file mode 100644
index 0000000..4dba5eb
--- /dev/null
+++ b/analysis_2x2/sweep_trm_step13020_n512.csv
@@ -0,0 +1,20 @@
+pct,tau,nA,nB,nC,nD,lam1_med_B,lam1_med_D
+5,0.1461886368890553,26,0,279,207,nan,0.02986210025846958
+10,0.1484773952154017,52,0,253,207,nan,0.02986210025846958
+15,0.14987173414316576,77,0,228,207,nan,0.02986210025846958
+20,0.15185786893932013,103,0,202,207,nan,0.02986210025846958
+25,0.15372304028625772,128,0,177,207,nan,0.02986210025846958
+30,0.15513756643739401,154,0,151,207,nan,0.02986210025846958
+35,0.15748307719056248,179,0,126,207,nan,0.02986210025846958
+40,0.16089218583944756,205,0,100,207,nan,0.02986210025846958
+45,0.16811494142658162,230,0,75,207,nan,0.02986210025846958
+50,0.21289853477178294,256,0,49,207,nan,0.02986210025846958
+55,0.31298600561046025,281,1,24,206,-0.014628570526838303,0.030006153509020805
+60,0.8658403204807424,296,11,9,196,-0.003207409055903554,0.03166834078729153
+65,1.1762548477525878,298,35,7,172,-0.0013167093275114894,0.03500013053417206
+70,1.3359059506661692,298,60,7,147,0.008530525024980307,0.03881623595952988
+75,1.4369319380372483,298,86,7,121,0.01115282904356718,0.043723251670598984
+80,1.5107383605809237,298,111,7,96,0.014448095113039017,0.045386193320155144
+85,1.5691906481172995,298,137,7,70,0.020133845508098602,0.04726334474980831
+90,1.6297611879974812,298,162,7,45,0.022078031674027443,0.04918263107538223
+95,1.7017159174665506,298,188,7,19,0.0257729422301054,0.061704523861408234
diff --git a/analyze_diag.py b/analyze_diag.py
new file mode 100644
index 0000000..0382d3c
--- /dev/null
+++ b/analyze_diag.py
@@ -0,0 +1,115 @@
+"""Analyze the HRM diagnostic output: stats + plots."""
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+from scipy import stats
+import argparse
+
+ap = argparse.ArgumentParser()
+ap.add_argument("--in", dest="inp", required=True)
+ap.add_argument("--out-dir", default="/home/yurenh2/rrm/research/flossing/plots")
+args = ap.parse_args()
+
+import os
+os.makedirs(args.out_dir, exist_ok=True)
+
+d = np.load(args.inp)
+print("keys:", list(d.keys()))
+lyap = d["lyap_max"] # (N,)
+exact = d["exact_correct"].astype(bool) # (N,)
+drift_zH = d["drift_zH"] # (N, T) ACT-step level drift
+drift_zL = d["drift_zL"] # (N, T)
+q_halt = d["q_halt"] # (N, T)
+q_continue = d["q_continue"] # (N, T)
+halted_at = d["halted_at"] # (N,) — ACT step at which q_halt>q_continue first
+
+N = len(lyap)
+print(f"N={N}, exact_acc={exact.mean():.3f}")
+
+succ = lyap[exact]; fail = lyap[~exact]
+print(f"\n=== Lyapunov max ===")
+print(f" success: mean={succ.mean():+.4f} std={succ.std():.4f} n={succ.size}")
+print(f" failure: mean={fail.mean():+.4f} std={fail.std():.4f} n={fail.size}")
+
+# Stats tests
+t, p = stats.ttest_ind(succ, fail, equal_var=False)
+print(f" Welch t-test: t={t:.3f} p={p:.2e}")
+u, p_u = stats.mannwhitneyu(succ, fail, alternative="two-sided")
+print(f" Mann-Whitney U: U={u:.0f} p={p_u:.2e}")
+# Effect size (Cohen's d)
+pooled_std = np.sqrt(((succ.size-1)*succ.var() + (fail.size-1)*fail.var()) / (succ.size+fail.size-2))
+d_cohen = (succ.mean() - fail.mean()) / pooled_std
+print(f" Cohen's d: {d_cohen:.3f}")
+# AUC: lyap predicts failure
+# Failure tends to have higher (less negative) lyap.
+auc = stats.mannwhitneyu(fail, succ, alternative="greater").statistic / (fail.size * succ.size)
+print(f" AUROC (lyap predicting failure): {auc:.3f}")
+
+# ----- plot 1: histogram of lyap by success/failure -----
+fig, ax = plt.subplots(1, 1, figsize=(6,4))
+bins = np.linspace(min(lyap.min(),-1.5), max(lyap.max(), -0.1), 40)
+ax.hist(succ, bins=bins, alpha=0.55, label=f"success (n={succ.size})", color="C0", density=True)
+ax.hist(fail, bins=bins, alpha=0.55, label=f"failure (n={fail.size})", color="C3", density=True)
+ax.axvline(succ.mean(), color="C0", ls="--", lw=1)
+ax.axvline(fail.mean(), color="C3", ls="--", lw=1)
+ax.set_xlabel(r"$\lambda_{max}$ (top Lyapunov exponent, per inner cycle)")
+ax.set_ylabel("density")
+ax.set_title(f"HRM Sudoku 1k @ step_26040: λ_max distribution\nCohen's d={d_cohen:.2f}, Welch p={p:.1e}, AUROC={auc:.2f}")
+ax.legend()
+fig.tight_layout()
+fig.savefig(f"{args.out_dir}/lyap_hist.png", dpi=130)
+plt.close()
+
+# ----- plot 2: drift trajectory means -----
+fig, axes = plt.subplots(1, 2, figsize=(11,4), sharey=False)
+for ax, drift, name in [(axes[0], drift_zH, r"$\|z_H^{(t+1)} - z_H^{(t)}\|$"),
+ (axes[1], drift_zL, r"$\|z_L^{(t+1)} - z_L^{(t)}\|$")]:
+ mean_s = drift[exact].mean(0); std_s = drift[exact].std(0)
+ mean_f = drift[~exact].mean(0); std_f = drift[~exact].std(0)
+ x = np.arange(drift.shape[1])
+ ax.fill_between(x, mean_s-std_s, mean_s+std_s, color="C0", alpha=0.25)
+ ax.plot(x, mean_s, "C0", label="success")
+ ax.fill_between(x, mean_f-std_f, mean_f+std_f, color="C3", alpha=0.25)
+ ax.plot(x, mean_f, "C3", label="failure")
+ ax.set_xlabel("ACT step")
+ ax.set_title(f"drift per ACT step: {name}")
+ ax.legend()
+ ax.set_yscale("log")
+fig.tight_layout()
+fig.savefig(f"{args.out_dir}/drift_per_act.png", dpi=130)
+plt.close()
+
+# ----- plot 3: Q-halt gating dynamics -----
+fig, ax = plt.subplots(1, 1, figsize=(6,4))
+diff = q_halt - q_continue
+mean_s = diff[exact].mean(0); std_s = diff[exact].std(0)
+mean_f = diff[~exact].mean(0); std_f = diff[~exact].std(0)
+x = np.arange(diff.shape[1])
+ax.fill_between(x, mean_s-std_s, mean_s+std_s, color="C0", alpha=0.25)
+ax.plot(x, mean_s, "C0", label="success")
+ax.fill_between(x, mean_f-std_f, mean_f+std_f, color="C3", alpha=0.25)
+ax.plot(x, mean_f, "C3", label="failure")
+ax.axhline(0, color="k", ls=":", lw=0.6)
+ax.set_xlabel("ACT step")
+ax.set_ylabel("q_halt − q_continue")
+ax.set_title("ACT Q-head gating signal over ACT steps")
+ax.legend()
+fig.tight_layout()
+fig.savefig(f"{args.out_dir}/q_gating.png", dpi=130)
+plt.close()
+
+# ----- plot 4: lyap vs token_acc scatter -----
+token_acc = d["token_acc"]
+fig, ax = plt.subplots(1, 1, figsize=(6,4))
+ax.scatter(lyap[exact], token_acc[exact], s=8, alpha=0.3, color="C0", label="exact-correct")
+ax.scatter(lyap[~exact], token_acc[~exact], s=8, alpha=0.3, color="C3", label="exact-fail")
+ax.set_xlabel(r"$\lambda_{max}$")
+ax.set_ylabel("token-level accuracy")
+ax.legend()
+ax.set_title("λ_max vs token accuracy")
+fig.tight_layout()
+fig.savefig(f"{args.out_dir}/lyap_vs_tokenacc.png", dpi=130)
+plt.close()
+
+print(f"\nplots saved to {args.out_dir}/")
diff --git a/analyze_dynamics_experiments.py b/analyze_dynamics_experiments.py
new file mode 100644
index 0000000..22491d0
--- /dev/null
+++ b/analyze_dynamics_experiments.py
@@ -0,0 +1,289 @@
+"""Summarize dynamics-control experiments into a markdown report."""
+from __future__ import annotations
+
+import json
+import re
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent
+
+
+@dataclass(frozen=True)
+class RunSpec:
+ name: str
+ model: str
+ family: str
+ json_name: str
+ log_name: str | None = None
+
+
+RUNS = [
+ RunSpec("HRM baseline 10k", "HRM", "baseline", "step3_L_baseline_26040_fast_10k.json"),
+ RunSpec("HRM mixed volume-CF", "HRM", "mixed_loss", "step3_M_volume_cf_26040_lstar_neg015_k8_a10_10k.json"),
+ RunSpec("HRM Engelken interfloss", "HRM", "interfloss", "step7_A_hrm_engelken_interfloss_26040_k8_10k.json"),
+ RunSpec("HRM Engelken+KL interfloss", "HRM", "interfloss_kl", "step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k.json"),
+ RunSpec("HRM conservative Engelken+KL", "HRM", "interfloss_kl_conservative", "step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k.json"),
+ RunSpec("HRM late Engelken+KL", "HRM", "late_interfloss_kl", "step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k.json"),
+ RunSpec("HRM volume-envelope+KL", "HRM", "volume_interfloss_kl", "step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k.json"),
+ RunSpec("HRM basin consistency", "HRM", "basin", "step8_A_hrm_basin_consistency_beta1_noise002_after8_26040_10k.json"),
+ RunSpec("HRM single perturbed CE", "HRM", "trajectory_augment_single", "step9_A_hrm_single_perturb_sigma1e-3_26040_10k.json"),
+ RunSpec("HRM clean+multi perturbed CE", "HRM", "trajectory_augment_multi", "step9_B_hrm_multi4_perturb_sigma1e-3_26040_10k.json"),
+ RunSpec("HRM fixed-unroll baseline 50k", "HRM", "trajectory_fixed_baseline", "step9_E_hrm_baseline_parallel_fixed_26040_50k.json"),
+ RunSpec("HRM multi4 loguniform 50k", "HRM", "trajectory_augment_loguniform", "step9_F_hrm_multi4_loguniform_ramp_26040_50k.json"),
+ RunSpec("TRM baseline 10k", "TRM", "baseline", "step5_L_trm_baseline_26041_batch4_fast_10k.json"),
+ RunSpec("TRM mixed volume-CF", "TRM", "mixed_loss", "step5_M_trm_volume_cf_26041_lstar002_batch4_k4_a10_10k.json"),
+ RunSpec("TRM Engelken interfloss", "TRM", "interfloss", "step7_B_trm_engelken_interfloss_26041_k4_batch4_10k.json"),
+ RunSpec("TRM Engelken+KL interfloss", "TRM", "interfloss_kl", "step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k.json"),
+ RunSpec("TRM late Engelken+KL", "TRM", "late_interfloss_kl", "step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k.json"),
+ RunSpec("TRM volume-envelope+KL", "TRM", "volume_interfloss_kl", "step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k.json"),
+ RunSpec("TRM basin consistency", "TRM", "basin", "step8_B_trm_basin_consistency_beta1_noise002_after8_26041_batch4_10k.json"),
+ RunSpec("TRM single perturbed CE", "TRM", "trajectory_augment_single", "step9_C_trm_single_perturb_sigma1e-3_26041_batch4_10k.json"),
+ RunSpec("TRM clean+multi perturbed CE", "TRM", "trajectory_augment_multi", "step9_D_trm_multi4_perturb_sigma1e-3_26041_batch4_10k.json"),
+ RunSpec("TRM fixed-unroll baseline 50k", "TRM", "trajectory_fixed_baseline", "step9_G_trm_baseline_parallel_fixed_26041_batch4_50k.json"),
+ RunSpec("TRM multi4 loguniform 50k", "TRM", "trajectory_augment_loguniform", "step9_H_trm_multi4_loguniform_ramp_26041_batch4_50k.json"),
+]
+
+
+def load_json(path: Path):
+ if not path.exists():
+ return None
+ try:
+ return json.loads(path.read_text())
+ except Exception as exc: # noqa: BLE001
+ return {"_bad_json": str(exc)}
+
+
+def step_of(ev: dict):
+ return ev.get("step", ev.get("train_step"))
+
+
+def acc_of(ev: dict):
+ acc = ev.get("acc")
+ return None if acc is None else float(acc)
+
+
+def evals_of(data: dict):
+ return [ev for ev in data.get("evals", []) if acc_of(ev) is not None]
+
+
+def best_eval(evals: list[dict]):
+ if not evals:
+ return None
+ return max(evals, key=lambda ev: acc_of(ev))
+
+
+def last_eval(evals: list[dict]):
+ if not evals:
+ return None
+ return evals[-1]
+
+
+def fmt(x, digits=4):
+ if x is None:
+ return "NA"
+ return f"{float(x):.{digits}f}"
+
+
+def completion_status(data):
+ if data is None:
+ return "missing"
+ if "_bad_json" in data:
+ return "bad_json"
+ if data.get("final_acc") is not None:
+ return "complete"
+ evs = evals_of(data)
+ if evs:
+ return "partial"
+ return "started"
+
+
+def run_summary(spec: RunSpec):
+ path = ROOT / spec.json_name
+ data = load_json(path)
+ status = completion_status(data)
+ if data is None or "_bad_json" in data:
+ return {
+ "spec": spec,
+ "status": status,
+ "initial": None,
+ "final": None,
+ "last": None,
+ "best": None,
+ "n_evals": 0,
+ "data": data,
+ }
+ evs = evals_of(data)
+ last = last_eval(evs)
+ best = best_eval(evs)
+ final = data.get("final_acc")
+ if final is None and last is not None:
+ final = acc_of(last)
+ return {
+ "spec": spec,
+ "status": status,
+ "initial": data.get("initial_acc"),
+ "final": final,
+ "last": last,
+ "best": best,
+ "n_evals": len(evs),
+ "data": data,
+ }
+
+
+def collect_process_snapshot():
+ # Avoid importing psutil. This is a best-effort snapshot from /proc.
+ patterns = [
+ "step3_M_volume_cf_26040_lstar_neg015",
+ "step7_C_hrm_engelken_interfloss_kl10",
+ "step7_D_trm_engelken_interfloss_kl10",
+ "launch_dynamics_variants_queue",
+ "step7_E_hrm_late",
+ "step7_F_trm_late",
+ "step7_G_hrm_volume",
+ "step7_H_trm_volume",
+ "step8_A_hrm_basin",
+ "step8_B_trm_basin",
+ "step9_A_hrm_single",
+ "step9_B_hrm_multi4",
+ "step9_C_trm_single",
+ "step9_D_trm_multi4",
+ "step9_E_hrm_baseline",
+ "step9_F_hrm_multi4",
+ "step9_G_trm_baseline",
+ "step9_H_trm_multi4",
+ "launch_trajectory_perturb_queue",
+ "launch_trajectory_sampling_long",
+ ]
+ rows = []
+ for proc in Path("/proc").iterdir():
+ if not proc.name.isdigit():
+ continue
+ try:
+ cmd = (proc / "cmdline").read_bytes().replace(b"\x00", b" ").decode("utf-8", "ignore")
+ except Exception: # noqa: BLE001
+ continue
+ if any(p in cmd for p in patterns):
+ rows.append((int(proc.name), cmd.strip()))
+ return sorted(rows)
+
+
+def log_tail_metrics(log_name: str):
+ path = ROOT / log_name
+ if not path.exists():
+ return {}
+ text = path.read_text(errors="ignore")
+ evals = []
+ for step, acc in re.findall(r"EVAL @ step\s+(\d+): exact_acc=([0-9.]+)", text):
+ evals.append((int(step), float(acc)))
+ progress = re.findall(r"\[\s*(\d+)/10000\]", text)
+ return {
+ "log_step": int(progress[-1]) if progress else None,
+ "log_last_eval": evals[-1] if evals else None,
+ "log_best_eval": max(evals, key=lambda x: x[1]) if evals else None,
+ }
+
+
+def floss_episode_summary(data: dict):
+ lines = []
+ episodes = data.get("floss_episodes", []) if data else []
+ for ep in episodes:
+ steps = ep.get("steps", [])
+ if not steps:
+ continue
+ first = steps[0]
+ last = steps[-1]
+ max_kl = max((s.get("kl_loss", 0.0) for s in steps), default=0.0)
+ mean_kl = sum((s.get("kl_loss", 0.0) for s in steps)) / max(len(steps), 1)
+ lines.append(
+ f"episode {ep.get('episode')} @ train_step {ep.get('train_step')}: "
+ f"floss {fmt(first.get('floss_loss', first.get('loss')), 6)} -> "
+ f"{fmt(last.get('floss_loss', last.get('loss')), 6)}, "
+ f"lyap1 {fmt(first.get('lyap1_mean'))} -> {fmt(last.get('lyap1_mean'))}, "
+ f"volume {fmt(first.get('volume_mean'))} -> {fmt(last.get('volume_mean'))}, "
+ f"KL mean/max {fmt(mean_kl, 6)}/{fmt(max_kl, 6)}"
+ )
+ return lines
+
+
+def markdown_report():
+ rows = [run_summary(spec) for spec in RUNS]
+ baseline_final = {
+ row["spec"].model: row["final"]
+ for row in rows
+ if row["spec"].family == "baseline" and row["final"] is not None
+ }
+
+ out = []
+ out.append("# Dynamics Control Experiment Report")
+ out.append("")
+ out.append(f"Generated: {datetime.now().isoformat(timespec='seconds')}")
+ out.append("")
+ out.append("## Summary Table")
+ out.append("")
+ out.append("| Model | Run | Status | Init | Final/Last | Delta | Best | Best Step | Vs Baseline | Evals |")
+ out.append("|---|---|---:|---:|---:|---:|---:|---:|---:|---:|")
+ for row in rows:
+ spec = row["spec"]
+ init = row["initial"]
+ final = row["final"]
+ best = row["best"]
+ best_acc = acc_of(best) if best else None
+ best_step = step_of(best) if best else None
+ delta = None if init is None or final is None else final - init
+ base = baseline_final.get(spec.model)
+ vs_base = None if base is None or final is None or spec.family == "baseline" else final - base
+ out.append(
+ f"| {spec.model} | {spec.name} | {row['status']} | {fmt(init)} | {fmt(final)} | "
+ f"{fmt(delta)} | {fmt(best_acc)} | {best_step if best_step is not None else 'NA'} | "
+ f"{fmt(vs_base)} | {row['n_evals']} |"
+ )
+
+ out.append("")
+ out.append("## Floss Episode Diagnostics")
+ out.append("")
+ any_floss = False
+ for row in rows:
+ lines = floss_episode_summary(row.get("data") or {})
+ if not lines:
+ continue
+ any_floss = True
+ out.append(f"### {row['spec'].name}")
+ for line in lines:
+ out.append(f"- {line}")
+ out.append("")
+ if not any_floss:
+ out.append("No completed floss episode diagnostics found yet.")
+ out.append("")
+
+ out.append("## Incomplete Runs / Process Snapshot")
+ out.append("")
+ active = collect_process_snapshot()
+ if active:
+ for pid, cmd in active:
+ out.append(f"- PID {pid}: `{cmd[:220]}`")
+ else:
+ out.append("- No monitored experiment processes are active.")
+ out.append("")
+
+ out.append("## Notes")
+ out.append("")
+ out.append("- `Final/Last` is `final_acc` when present, otherwise the latest eval accuracy.")
+ out.append("- `Vs Baseline` compares against the matching HRM/TRM 10k no-floss baseline.")
+ out.append("- A complete report may still show partial rows if an experiment crashed or was interrupted.")
+ out.append("")
+ return "\n".join(out)
+
+
+def main():
+ report = markdown_report()
+ out_path = ROOT / "dynamics_experiment_report.md"
+ out_path.write_text(report)
+ print(report)
+ print(f"\nWrote {out_path}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/analyze_halt_bucket.py b/analyze_halt_bucket.py
new file mode 100644
index 0000000..2ad80fc
--- /dev/null
+++ b/analyze_halt_bucket.py
@@ -0,0 +1,122 @@
+"""Validate codex's λ*(h, β) = (1/h) log β framework using halted_at buckets.
+
+If the framework is right, then across halt buckets:
+- success samples should all satisfy λ_1 × h_micro × halted_at ≈ log(β_target) (a constant ≪ 0)
+- equivalently: success λ_1 should scale as ~1/halted_at
+
+We use halted_at as a proxy for "effective computation budget" (h in codex's notation).
+For each ACT step we have cycles_per_act micro-Lyapunov-steps:
+ HRM (H=2, L=2): 2*(2+1) = 6
+ TRM (H=3, L=6): 3*(6+1) = 21
+"""
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+
+ROOT = "/home/yurenh2/rrm/research/flossing"
+
+cases = {
+ "HRM_step26040": dict(npz=f"{ROOT}/diag_joint_1k.npz", cycles_per_act=6),
+ "TRM_step13020": dict(npz=f"{ROOT}/diag_trm_step13020_512.npz", cycles_per_act=21),
+}
+
+fig, axes = plt.subplots(2, 3, figsize=(15, 9))
+
+for row, (name, meta) in enumerate(cases.items()):
+ d = np.load(meta["npz"])
+ cpa = meta["cycles_per_act"]
+ lam = d["lyap_spec"][:, 0] # top-1 Lyap per sample (per micro step)
+ succ = d["exact_correct"] > 0.5
+ halted = d["halted_at"].copy()
+ halted[halted == 0] = 16 # never-halt → used full 16 ACT steps
+
+ h_micro = halted * cpa # effective micro-step horizon
+ logbeta = lam * h_micro # log of cumulative decay over h_micro
+ beta = np.exp(np.clip(logbeta, -20, 20)) # implied β
+
+ print(f"\n=== {name} ===")
+ print(f" N={len(lam)} acc={succ.mean():.3f} cycles/ACT={cpa}")
+ print(f" λ_1 mean: succ={lam[succ].mean():+.4f} fail={lam[~succ].mean():+.4f}")
+ print(f" halted_at: succ={halted[succ].mean():.2f} fail={halted[~succ].mean():.2f}")
+ print(f" log β implied succ={logbeta[succ].mean():+.3f} fail={logbeta[~succ].mean():+.3f}")
+ print(f" β implied: succ={beta[succ].mean():.3f} fail={beta[~succ].mean():.3f}")
+
+ # By halt bucket
+ print(f" bucket n_succ n_fail λ_succ λ_fail logβ_succ logβ_fail")
+ buckets = sorted(set(halted.tolist()))
+ for b in buckets:
+ ms = (halted == b) & succ
+ mf = (halted == b) & ~succ
+ if ms.sum() + mf.sum() < 5: continue
+ lam_s = lam[ms].mean() if ms.sum() > 0 else np.nan
+ lam_f = lam[mf].mean() if mf.sum() > 0 else np.nan
+ lb_s = lam_s * b * cpa
+ lb_f = lam_f * b * cpa
+ print(f" h={b:>3} {ms.sum():>4} {mf.sum():>4} "
+ f"{lam_s:+7.4f} {lam_f:+7.4f} {lb_s:+8.3f} {lb_f:+8.3f}")
+
+ # ----- Panel A: λ vs halted_at, succ vs fail -----
+ ax = axes[row, 0]
+ ax.scatter(halted[succ] + np.random.uniform(-0.15, 0.15, succ.sum()), lam[succ],
+ s=5, alpha=0.35, c="C2", label=f"succ (n={succ.sum()})")
+ ax.scatter(halted[~succ] + np.random.uniform(-0.15, 0.15, (~succ).sum()), lam[~succ],
+ s=5, alpha=0.35, c="C3", label=f"fail (n={(~succ).sum()})")
+ # Per-bucket mean
+ means_s, means_f, bs = [], [], []
+ for b in buckets:
+ ms = (halted == b) & succ
+ mf = (halted == b) & ~succ
+ if ms.sum() >= 3: means_s.append((b, lam[ms].mean()))
+ if mf.sum() >= 3: means_f.append((b, lam[mf].mean()))
+ if means_s:
+ xs, ys = zip(*means_s); ax.plot(xs, ys, "C2o-", lw=2, ms=8, label="succ mean")
+ if means_f:
+ xf, yf = zip(*means_f); ax.plot(xf, yf, "C3o-", lw=2, ms=8, label="fail mean")
+ # 1/h reference: codex prediction λ ∝ 1/h
+ if means_s:
+ h_ref = np.array([b for b, _ in means_s])
+ # fit constant = mean of λ*h*cpa over succ
+ c = (lam[succ] * halted[succ] * cpa).mean()
+ ref = c / (h_ref * cpa)
+ ax.plot(h_ref, ref, "k--", lw=1, alpha=0.5, label=f"λ=1/h·log β (log β={c:.2f})")
+ ax.axhline(0, color="k", lw=0.5, ls=":")
+ ax.set_xlabel("halted_at (ACT steps)"); ax.set_ylabel("λ_1 (per micro-step)")
+ ax.set_title(f"{name}: λ_1 vs halt step")
+ ax.legend(fontsize=7); ax.grid(alpha=0.3)
+
+ # ----- Panel B: log β = λ × h_micro by halt bucket -----
+ ax = axes[row, 1]
+ ax.scatter(halted[succ] + np.random.uniform(-0.15, 0.15, succ.sum()), logbeta[succ],
+ s=5, alpha=0.35, c="C2", label="succ")
+ ax.scatter(halted[~succ] + np.random.uniform(-0.15, 0.15, (~succ).sum()), logbeta[~succ],
+ s=5, alpha=0.35, c="C3", label="fail")
+ if means_s:
+ xs = [b for b, _ in means_s]
+ ys = [lam[(halted==b)&succ].mean() * b * cpa for b in xs]
+ ax.plot(xs, ys, "C2o-", lw=2, ms=8, label="succ mean")
+ if means_f:
+ xf = [b for b, _ in means_f]
+ yf = [lam[(halted==b)&~succ].mean() * b * cpa for b in xf]
+ ax.plot(xf, yf, "C3o-", lw=2, ms=8, label="fail mean")
+ ax.axhline(0, color="k", lw=0.5, ls=":")
+ ax.set_xlabel("halted_at"); ax.set_ylabel("log β = λ_1 × halt × cycles/ACT")
+ ax.set_title(f"{name}: implied log β (constant ⇒ 1/h scaling holds)")
+ ax.legend(fontsize=8); ax.grid(alpha=0.3)
+
+ # ----- Panel C: histogram of implied β by succ/fail -----
+ ax = axes[row, 2]
+ bins = np.linspace(-6, 4, 50)
+ ax.hist(logbeta[succ], bins=bins, alpha=0.6, color="C2", label=f"succ μ={logbeta[succ].mean():+.2f}")
+ ax.hist(logbeta[~succ], bins=bins, alpha=0.6, color="C3", label=f"fail μ={logbeta[~succ].mean():+.2f}")
+ ax.axvline(0, color="k", lw=0.5, ls=":")
+ ax.set_xlabel("log β implied"); ax.set_ylabel("count")
+ ax.set_title(f"{name}: log β distribution")
+ ax.legend(fontsize=8); ax.grid(alpha=0.3)
+
+fig.suptitle("Codex's λ*(h, β) = (1/h) log β prediction: across halt buckets, "
+ "λ should scale as 1/h with constant log β", fontsize=11)
+fig.tight_layout()
+out = f"{ROOT}/plots_halt_bucket.png"
+fig.savefig(out, dpi=130)
+print(f"\n→ {out}")
diff --git a/analyze_joint.py b/analyze_joint.py
new file mode 100644
index 0000000..1b65d0b
--- /dev/null
+++ b/analyze_joint.py
@@ -0,0 +1,103 @@
+"""Analyze the JOINT-tangent diagnostic (1024 samples)."""
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+from scipy import stats
+import glob, os
+
+ROOT = "/home/yurenh2/rrm/research/flossing"
+OUT = f"{ROOT}/plots_joint"
+os.makedirs(OUT, exist_ok=True)
+
+# Merge
+files = sorted(glob.glob(f"{ROOT}/diag_joint_1k_shard*.npz"))
+m = {}
+for f in files:
+ d = np.load(f)
+ for k in d.files: m.setdefault(k, []).append(d[k])
+for k in list(m.keys()): m[k] = np.concatenate(m[k], 0)
+np.savez_compressed(f"{ROOT}/diag_joint_1k.npz", **m)
+print(f"merged: N={len(m['exact_correct'])} acc={m['exact_correct'].mean():.4f}")
+
+ls = m["lyap_spec"] # (N, K)
+exact = m["exact_correct"].astype(bool)
+N, K = ls.shape
+succ = ls[exact]; fail = ls[~exact]
+
+print(f"\n--per-λ stats (joint tangent, k={K}) --")
+print(f"{'i':>3} {'mean_all':>10} {'mean_succ':>10} {'mean_fail':>10} {'Δ':>8} {'d':>7} {'auroc':>7} {'p_t':>9}")
+for i in range(K):
+ li = ls[:, i]; s_ = li[exact]; f_ = li[~exact]
+ pooled = np.sqrt(((s_.size-1)*s_.var() + (f_.size-1)*f_.var()) / (s_.size+f_.size-2))
+ dc = (s_.mean() - f_.mean()) / pooled
+ delta = f_.mean() - s_.mean()
+ auc = stats.mannwhitneyu(f_, s_, alternative="greater").statistic / (f_.size * s_.size)
+ t, p = stats.ttest_ind(s_, f_, equal_var=False)
+ print(f"{i+1:>3} {li.mean():+10.4f} {s_.mean():+10.4f} {f_.mean():+10.4f} {delta:+8.4f} {abs(dc):>7.3f} {auc:>7.3f} {p:>9.2e}")
+
+def auc(score, target_pos):
+ return stats.mannwhitneyu(score[target_pos], score[~target_pos], alternative="greater").statistic / (target_pos.sum() * (~target_pos).sum())
+
+print(f"\n--combined predictors AUROC (failure)--")
+for label, score in [
+ ("λ_1", ls[:, 0]),
+ ("λ_K (most-neg)", ls[:, -1]),
+ ("mean λ", ls.mean(1)),
+ ("max λ", ls.max(1)),
+ ("# of positive λ", (ls > 0).sum(1).astype(float)),
+]:
+ print(f" {label:18s}: {auc(score, ~exact):.4f}")
+
+# --- plot 1: spectrum mean ± std ---
+fig, ax = plt.subplots(1, 1, figsize=(7, 5))
+mean_s = succ.mean(0); std_s = succ.std(0)
+mean_f = fail.mean(0); std_f = fail.std(0)
+x = np.arange(1, K+1)
+ax.fill_between(x, mean_s-std_s, mean_s+std_s, color="C0", alpha=0.25)
+ax.plot(x, mean_s, "C0-o", label=f"success n={succ.shape[0]}", lw=2)
+ax.fill_between(x, mean_f-std_f, mean_f+std_f, color="C3", alpha=0.25)
+ax.plot(x, mean_f, "C3-o", label=f"failure n={fail.shape[0]}", lw=2)
+ax.axhline(0, color="k", ls="--", lw=1, label="critical λ=0")
+ax.set_xlabel(r"Lyapunov index $i$")
+ax.set_ylabel(r"$\lambda_i$ (per inner cycle, joint $(z_H,z_L)$ tangent)")
+ax.set_title(f"Joint-tangent top-{K} Lyapunov spectrum @ step_26040 (N={N}, acc={exact.mean():.3f})")
+ax.legend()
+ax.grid(alpha=0.3)
+fig.tight_layout()
+fig.savefig(f"{OUT}/lyap_spectrum_joint.png", dpi=130)
+plt.close()
+
+# --- plot 2: λ_1 histogram with critical line ---
+fig, ax = plt.subplots(1, 1, figsize=(7, 4))
+lo, hi = ls[:,0].min(), ls[:,0].max()
+bins = np.linspace(lo, hi, 60)
+ax.hist(succ[:,0], bins=bins, alpha=0.55, label=f"succ", color="C0", density=True)
+ax.hist(fail[:,0], bins=bins, alpha=0.55, label=f"fail", color="C3", density=True)
+ax.axvline(0, color="k", ls="--", lw=1, label="λ=0")
+ax.axvline(succ[:,0].mean(), color="C0", ls=":", lw=1)
+ax.axvline(fail[:,0].mean(), color="C3", ls=":", lw=1)
+ax.set_xlabel(r"$\lambda_1$")
+ax.set_ylabel("density")
+ax.set_title(f"Joint $\\lambda_1$ distribution (AUROC={auc(ls[:,0], ~exact):.3f})")
+ax.legend()
+fig.tight_layout()
+fig.savefig(f"{OUT}/lyap1_hist_joint.png", dpi=130)
+plt.close()
+
+# --- plot 3: # positive Lyapunov per sample (each sample, how many of top-8 are positive?) ---
+n_pos_per_sample = (ls > 0).sum(1)
+fig, ax = plt.subplots(1, 1, figsize=(6.5, 4))
+bins = np.arange(K+2) - 0.5
+ax.hist(n_pos_per_sample[exact], bins=bins, alpha=0.55, label="success", color="C0", density=True, align="mid")
+ax.hist(n_pos_per_sample[~exact], bins=bins, alpha=0.55, label="failure", color="C3", density=True, align="mid")
+ax.set_xticks(range(K+1))
+ax.set_xlabel("# of positive (expansive) Lyapunov exponents in top-8")
+ax.set_ylabel("fraction of samples in group")
+ax.set_title("Counts of expansive modes per sample\n(joint dynamics has unstable manifold → no fixed point)")
+ax.legend()
+fig.tight_layout()
+fig.savefig(f"{OUT}/n_positive_modes.png", dpi=130)
+plt.close()
+
+print(f"\nplots → {OUT}/")
diff --git a/analyze_ptrm_rollout_cache.py b/analyze_ptrm_rollout_cache.py
new file mode 100644
index 0000000..b7413e8
--- /dev/null
+++ b/analyze_ptrm_rollout_cache.py
@@ -0,0 +1,135 @@
+"""Analyze cached PTRM rollout/spectrum files without rerunning the model."""
+from __future__ import annotations
+
+import argparse
+import csv
+from pathlib import Path
+
+import numpy as np
+
+
+def _safe_corr(a: np.ndarray, b: np.ndarray) -> float:
+ a = np.asarray(a, dtype=np.float64).reshape(-1)
+ b = np.asarray(b, dtype=np.float64).reshape(-1)
+ if a.size == 0 or b.size == 0 or np.std(a) == 0 or np.std(b) == 0:
+ return float("nan")
+ return float(np.corrcoef(a, b)[0, 1])
+
+
+def _take(values: np.ndarray, idx: np.ndarray) -> np.ndarray:
+ return values[np.arange(values.shape[0]), idx]
+
+
+def _selector_metrics(exact: np.ndarray, token_acc: np.ndarray, idx: np.ndarray, prefix: str) -> dict[str, float]:
+ return {
+ f"{prefix}/exact": float(_take(exact, idx).mean()),
+ f"{prefix}/token_acc": float(_take(token_acc, idx).mean()),
+ }
+
+
+def _feature_table(data: np.lib.npyio.NpzFile) -> dict[str, np.ndarray]:
+ features: dict[str, np.ndarray] = {}
+ lyap = data["lyap"] if "lyap" in data.files else np.asarray([])
+ if lyap.size:
+ features["lambda1"] = lyap
+
+ spec = data["lyap_spec"] if "lyap_spec" in data.files else np.asarray([])
+ if spec.size:
+ features["spec_lambda1"] = spec[..., 0]
+ features["spec_pos_mass"] = np.maximum(spec, 0).sum(axis=-1)
+ features["spec_pos_l2"] = np.sqrt((np.maximum(spec, 0) ** 2).mean(axis=-1))
+ features["spec_mean"] = spec.mean(axis=-1)
+ features["spec_count_pos"] = (spec > 0).sum(axis=-1).astype(np.float32)
+ features["spec_spread"] = spec[..., 0] - spec[..., -1]
+ return features
+
+
+def analyze(path: Path) -> dict[str, float | str]:
+ data = np.load(path)
+ exact = data["exact"].astype(bool)
+ token_acc = data["token_acc"].astype(np.float32)
+ q_halt = data["q_halt"].astype(np.float32)
+ det_exact = data["det_exact"].astype(bool) if "det_exact" in data.files else np.asarray([])
+ correct_count = exact.sum(axis=1).astype(np.float32)
+
+ out: dict[str, float | str] = {
+ "file": str(path),
+ "n_samples": float(exact.shape[0]),
+ "rollouts": float(exact.shape[1]),
+ "deterministic/exact": float(det_exact.mean()) if det_exact.size else float("nan"),
+ "mean_rollout/exact": float(exact.mean()),
+ "oracle_pass/exact": float(exact.any(axis=1).mean()),
+ "correct_count/mean": float(correct_count.mean()),
+ "correct_count/std": float(correct_count.std()),
+ "correct_count/median": float(np.median(correct_count)),
+ "correct_count/q10": float(np.quantile(correct_count, 0.10)),
+ "correct_count/q25": float(np.quantile(correct_count, 0.25)),
+ "correct_count/q75": float(np.quantile(correct_count, 0.75)),
+ "correct_count/q90": float(np.quantile(correct_count, 0.90)),
+ "correct_count/zero_frac": float((correct_count == 0).mean()),
+ "correct_count/full_frac": float((correct_count == exact.shape[1]).mean()),
+ }
+ for threshold in (1, 5, 10, 25, 50, 75, 90):
+ if threshold <= exact.shape[1]:
+ out[f"correct_count/ge_{threshold}_frac"] = float((correct_count >= threshold).mean())
+
+ q_idx = q_halt.argmax(axis=1)
+ out.update(_selector_metrics(exact, token_acc, q_idx, "q_max"))
+ out["corr_q_correct"] = _safe_corr(q_halt, exact.astype(np.float32))
+ q_selected = _take(exact, q_idx).astype(bool)
+
+ if det_exact.size:
+ det_success = det_exact.astype(bool)
+ det_fail = ~det_success
+ if det_success.any():
+ out["correct_count/det_success_mean"] = float(correct_count[det_success].mean())
+ out["oracle_pass/det_success_frac"] = float(exact.any(axis=1)[det_success].mean())
+ out["q_max/det_success_frac"] = float(q_selected[det_success].mean())
+ if det_fail.any():
+ out["correct_count/det_fail_mean"] = float(correct_count[det_fail].mean())
+ out["oracle_pass/det_fail_frac"] = float(exact.any(axis=1)[det_fail].mean())
+ out["q_max/det_fail_frac"] = float(q_selected[det_fail].mean())
+
+ for name, feature in _feature_table(data).items():
+ idx = feature.argmin(axis=1)
+ out.update(_selector_metrics(exact, token_acc, idx, f"{name}_min"))
+ out[f"corr_neg_{name}_correct"] = _safe_corr(-feature, exact.astype(np.float32))
+ out[f"corr_q_{name}"] = _safe_corr(q_halt, feature)
+
+ q_sel = _take(exact, q_idx).astype(bool)
+ f_sel = _take(exact, idx).astype(bool)
+ out[f"{name}_q_wins"] = float((q_sel & ~f_sel).sum())
+ out[f"{name}_feature_wins"] = float((~q_sel & f_sel).sum())
+ out[f"{name}_both_fail"] = float((~q_sel & ~f_sel).sum())
+
+ return out
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser()
+ parser.add_argument("npz", nargs="+", help="Cached .npz files from ptrm_rollout_selection.py")
+ parser.add_argument("--out-csv", default=None)
+ args = parser.parse_args()
+
+ rows = [analyze(Path(p)) for p in args.npz]
+ fieldnames = sorted({k for row in rows for k in row})
+
+ if args.out_csv:
+ out = Path(args.out_csv)
+ out.parent.mkdir(parents=True, exist_ok=True)
+ with out.open("w", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=fieldnames)
+ writer.writeheader()
+ writer.writerows(rows)
+ print(f"saved {out}")
+
+ for row in rows:
+ print(f"\n{row['file']}")
+ for key in fieldnames:
+ if key == "file" or key not in row:
+ continue
+ print(f"{key}: {row[key]}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/analyze_separate.py b/analyze_separate.py
new file mode 100644
index 0000000..d5759ab
--- /dev/null
+++ b/analyze_separate.py
@@ -0,0 +1,124 @@
+"""Analyze separated λ_L and λ_H, and their predictive power for failure."""
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+from scipy import stats
+import glob, os
+
+ROOT = "/home/yurenh2/rrm/research/flossing"
+OUT = f"{ROOT}/plots_separate"
+os.makedirs(OUT, exist_ok=True)
+
+files = sorted(glob.glob(f"{ROOT}/diag_sep_1k_shard*.npz"))
+m = {}
+for f in files:
+ d = np.load(f)
+ for k in d.files: m.setdefault(k, []).append(d[k])
+for k in list(m.keys()): m[k] = np.concatenate(m[k], 0)
+np.savez_compressed(f"{ROOT}/diag_sep_1k.npz", **m)
+
+N = len(m["exact_correct"])
+exact = m["exact_correct"].astype(bool)
+acc = exact.mean()
+print(f"N={N} acc={acc:.4f}\n")
+
+def auc(score, target):
+ return stats.mannwhitneyu(score[target], score[~target], alternative="greater").statistic / (target.sum() * (~target).sum())
+
+results = {}
+for name in ["lyap_joint", "lyap_L", "lyap_H"]:
+ ls = m[name]
+ print(f"== {name} ==")
+ print(f"{'i':>3} {'mean_all':>10} {'mean_succ':>10} {'mean_fail':>10} {'Δ':>8} {'AUROC':>7} {'cohen_d':>8}")
+ for i in range(ls.shape[1]):
+ li = ls[:, i]; s_ = li[exact]; f_ = li[~exact]
+ pooled = np.sqrt(((s_.size-1)*s_.var() + (f_.size-1)*f_.var()) / (s_.size+f_.size-2))
+ dc = (s_.mean() - f_.mean()) / pooled
+ delta = f_.mean() - s_.mean()
+ au = auc(li, ~exact)
+ print(f"{i+1:>3} {li.mean():+10.4f} {s_.mean():+10.4f} {f_.mean():+10.4f} {delta:+8.4f} {au:>7.3f} {abs(dc):>8.3f}")
+ results[name] = ls
+ print()
+
+# AUROC comparison
+print("=== Single-feature AUROC predicting failure ===")
+for name in ["lyap_joint", "lyap_L", "lyap_H"]:
+ ls = m[name]
+ for label, score in [("λ_1", ls[:, 0]), ("mean", ls.mean(1)), ("λ_K", ls[:, -1])]:
+ print(f" {name}.{label:6s}: AUROC = {auc(score, ~exact):.4f}")
+
+# --- plot 1: three spectra overlay ---
+fig, axes = plt.subplots(1, 3, figsize=(15, 4.5), sharex=True)
+for ax, name in zip(axes, ["lyap_joint", "lyap_L", "lyap_H"]):
+ ls = m[name]
+ K = ls.shape[1]
+ x = np.arange(1, K+1)
+ s_ = ls[exact]; f_ = ls[~exact]
+ ax.fill_between(x, s_.mean(0)-s_.std(0), s_.mean(0)+s_.std(0), color="C0", alpha=0.25)
+ ax.plot(x, s_.mean(0), "C0-o", label="succ", lw=2)
+ ax.fill_between(x, f_.mean(0)-f_.std(0), f_.mean(0)+f_.std(0), color="C3", alpha=0.25)
+ ax.plot(x, f_.mean(0), "C3-o", label="fail", lw=2)
+ ax.axhline(0, color="k", ls="--", lw=1, label="critical")
+ ax.set_xlabel("λ index")
+ ax.set_ylabel(r"$\lambda_i$ (per cycle)")
+ ax.set_title(name)
+ ax.legend()
+ ax.grid(alpha=0.3)
+fig.suptitle(f"HRM step_26040 — separated Lyapunov spectra (N={N}, acc={acc:.2%})")
+fig.tight_layout()
+fig.savefig(f"{OUT}/three_spectra.png", dpi=130)
+plt.close()
+
+# --- plot 2: λ_H_1 histogram (the differentiator) ---
+fig, ax = plt.subplots(1, 1, figsize=(7, 4))
+ls = m["lyap_H"][:, 0]
+lo = ls.min(); hi = ls.max()
+bins = np.linspace(lo, hi, 60)
+ax.hist(ls[exact], bins=bins, alpha=0.55, label=f"succ", color="C0", density=True)
+ax.hist(ls[~exact], bins=bins, alpha=0.55, label=f"fail", color="C3", density=True)
+ax.axvline(0, color="k", ls="--", lw=1)
+ax.axvline(ls[exact].mean(), color="C0", ls=":", lw=1)
+ax.axvline(ls[~exact].mean(), color="C3", ls=":", lw=1)
+ax.set_xlabel(r"$\lambda_{H,1}$ — H subsystem top Lyapunov")
+ax.set_ylabel("density")
+ax.set_title(f"H-module Lyapunov is the differentiator (AUROC={auc(ls, ~exact):.3f})")
+ax.legend()
+fig.tight_layout()
+fig.savefig(f"{OUT}/lyap_H_hist.png", dpi=130)
+plt.close()
+
+# --- plot 3: λ_L_1 histogram (NOT a differentiator) ---
+fig, ax = plt.subplots(1, 1, figsize=(7, 4))
+ls = m["lyap_L"][:, 0]
+lo = ls.min(); hi = ls.max()
+bins = np.linspace(lo, hi, 60)
+ax.hist(ls[exact], bins=bins, alpha=0.55, label=f"succ", color="C0", density=True)
+ax.hist(ls[~exact], bins=bins, alpha=0.55, label=f"fail", color="C3", density=True)
+ax.axvline(ls[exact].mean(), color="C0", ls=":", lw=1)
+ax.axvline(ls[~exact].mean(), color="C3", ls=":", lw=1)
+ax.set_xlabel(r"$\lambda_{L,1}$ — L subsystem top Lyapunov")
+ax.set_ylabel("density")
+ax.set_title(f"L-module Lyapunov is NOT a differentiator (AUROC={auc(ls, ~exact):.3f})")
+ax.legend()
+fig.tight_layout()
+fig.savefig(f"{OUT}/lyap_L_hist.png", dpi=130)
+plt.close()
+
+# --- plot 4: scatter λ_L vs λ_H, colored by success/failure ---
+fig, ax = plt.subplots(1, 1, figsize=(7, 6))
+xL = m["lyap_L"][:, 0]
+xH = m["lyap_H"][:, 0]
+ax.scatter(xL[exact], xH[exact], s=5, alpha=0.4, color="C0", label="succ")
+ax.scatter(xL[~exact], xH[~exact], s=5, alpha=0.4, color="C3", label="fail")
+ax.axhline(0, color="k", ls="--", lw=0.5)
+ax.set_xlabel(r"$\lambda_{L,1}$ (L subsystem)")
+ax.set_ylabel(r"$\lambda_{H,1}$ (H subsystem)")
+ax.set_title("Success/failure separates along the λ_H axis")
+ax.legend()
+ax.grid(alpha=0.3)
+fig.tight_layout()
+fig.savefig(f"{OUT}/lyap_L_vs_H_scatter.png", dpi=130)
+plt.close()
+
+print(f"\nplots → {OUT}/")
diff --git a/analyze_spectrum_microscope.py b/analyze_spectrum_microscope.py
new file mode 100644
index 0000000..d2b4d82
--- /dev/null
+++ b/analyze_spectrum_microscope.py
@@ -0,0 +1,360 @@
+"""Full-spectrum microscope for HRM/TRM joint Lyapunov diagnostics.
+
+This script intentionally treats the finite-time QR columns as an unordered
+top-k estimate per sample and analyzes both the raw column-0 value and the
+sorted spectrum. The sorted features are safer for per-sample questions like
+"how many unstable directions does this trajectory have?".
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import math
+import re
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+ROOT = Path("/home/yurenh2/rrm/research/flossing")
+
+
+def auc_score(score: np.ndarray, label: np.ndarray) -> float:
+ """AUROC for score predicting label=True, with average ranks for ties."""
+ score = np.asarray(score, dtype=np.float64)
+ label = np.asarray(label, dtype=bool)
+ n_pos = int(label.sum())
+ n_neg = int((~label).sum())
+ if n_pos == 0 or n_neg == 0:
+ return float("nan")
+
+ order = np.argsort(score)
+ ranks = np.empty_like(order, dtype=np.float64)
+ ranks[order] = np.arange(1, len(score) + 1, dtype=np.float64)
+
+ sorted_score = score[order]
+ i = 0
+ while i < len(sorted_score):
+ j = i + 1
+ while j < len(sorted_score) and sorted_score[j] == sorted_score[i]:
+ j += 1
+ if j - i > 1:
+ ranks[order[i:j]] = (i + 1 + j) / 2.0
+ i = j
+
+ rank_sum = ranks[label].sum()
+ return float((rank_sum - n_pos * (n_pos + 1) / 2.0) / (n_pos * n_neg))
+
+
+def cohen_d(success: np.ndarray, failure: np.ndarray) -> float:
+ success = np.asarray(success, dtype=np.float64)
+ failure = np.asarray(failure, dtype=np.float64)
+ if len(success) < 2 or len(failure) < 2:
+ return float("nan")
+ pooled = (
+ (len(success) - 1) * success.var(ddof=1)
+ + (len(failure) - 1) * failure.var(ddof=1)
+ ) / (len(success) + len(failure) - 2)
+ if pooled <= 0:
+ return float("nan")
+ return float((failure.mean() - success.mean()) / math.sqrt(pooled))
+
+
+def safe_mean(x: np.ndarray) -> float:
+ return float(np.mean(x)) if len(x) else float("nan")
+
+
+def safe_std(x: np.ndarray) -> float:
+ return float(np.std(x)) if len(x) else float("nan")
+
+
+def parse_step(path: Path, kind: str) -> int:
+ if kind == "HRM":
+ return int(re.search(r"step_(\d+)_512", path.name).group(1))
+ return int(re.search(r"step(\d+)_512", path.name).group(1))
+
+
+def discover(kind: str) -> list[Path]:
+ if kind == "HRM":
+ files = sorted(
+ ROOT.glob("diag_hrm_step_*_512.npz"),
+ key=lambda p: parse_step(p, kind),
+ )
+ else:
+ files = sorted(
+ ROOT.glob("diag_trm_singleGPU_step*_512.npz"),
+ key=lambda p: parse_step(p, kind),
+ )
+ if not files:
+ raise FileNotFoundError(f"No {kind} diagnostic files found under {ROOT}")
+ return files
+
+
+def feature_dict(raw_lam: np.ndarray) -> dict[str, np.ndarray]:
+ sorted_lam = np.sort(raw_lam, axis=1)[:, ::-1]
+ positive = np.clip(sorted_lam, 0.0, None)
+ k = sorted_lam.shape[1]
+ x = np.arange(k, dtype=np.float64)
+ x = x - x.mean()
+ denom = float((x**2).sum())
+ slope = (sorted_lam @ x) / denom
+ return {
+ "raw_col0": raw_lam[:, 0],
+ "lambda_max": sorted_lam[:, 0],
+ "lambda_2": sorted_lam[:, 1],
+ "lambda_min8": sorted_lam[:, -1],
+ "mean8": sorted_lam.mean(axis=1),
+ "sum8": sorted_lam.sum(axis=1),
+ "tail_mean_5_8": sorted_lam[:, 4:].mean(axis=1),
+ "positive_sum": positive.sum(axis=1),
+ "positive_count": (sorted_lam > 0).sum(axis=1),
+ "spread": sorted_lam[:, 0] - sorted_lam[:, -1],
+ "gap12": sorted_lam[:, 0] - sorted_lam[:, 1],
+ "std8": sorted_lam.std(axis=1),
+ "linear_slope": slope,
+ }
+
+
+def summarize_file(kind: str, path: Path) -> tuple[dict, list[dict]]:
+ data = np.load(path)
+ raw_lam = np.asarray(data["lyap_spec"], dtype=np.float64)
+ sorted_lam = np.sort(raw_lam, axis=1)[:, ::-1]
+ exact = data["exact_correct"].astype(bool)
+ fail = ~exact
+ token_acc = np.asarray(data["token_acc"], dtype=np.float64)
+ features = feature_dict(raw_lam)
+
+ monotone_frac = float((np.diff(raw_lam, axis=1) <= 1e-5).mean())
+ summary = {
+ "kind": kind,
+ "file": str(path),
+ "step": parse_step(path, kind),
+ "n": int(len(exact)),
+ "k": int(raw_lam.shape[1]),
+ "acc": float(exact.mean()),
+ "n_success": int(exact.sum()),
+ "n_failure": int(fail.sum()),
+ "raw_monotone_adjacent_fraction": monotone_frac,
+ "raw_col0_is_sample_max_fraction": float((raw_lam[:, 0] >= sorted_lam[:, 0] - 1e-7).mean()),
+ }
+
+ # Group means for the most interpretable features.
+ for name in [
+ "raw_col0",
+ "lambda_max",
+ "mean8",
+ "tail_mean_5_8",
+ "positive_sum",
+ "positive_count",
+ "spread",
+ "gap12",
+ ]:
+ arr = features[name]
+ summary[f"{name}_success_mean"] = safe_mean(arr[exact])
+ summary[f"{name}_failure_mean"] = safe_mean(arr[fail])
+ summary[f"{name}_delta_failure_minus_success"] = (
+ summary[f"{name}_failure_mean"] - summary[f"{name}_success_mean"]
+ )
+ summary[f"{name}_auc_failure"] = auc_score(arr, fail)
+
+ # Continuous token-accuracy correlations catch near-misses, not only exact success.
+ for name in ["lambda_max", "mean8", "tail_mean_5_8", "positive_sum", "positive_count"]:
+ arr = features[name]
+ if arr.std() > 0 and token_acc.std() > 0:
+ summary[f"{name}_corr_token_acc"] = float(np.corrcoef(arr, token_acc)[0, 1])
+ else:
+ summary[f"{name}_corr_token_acc"] = float("nan")
+
+ feature_rows = []
+ for name, arr in features.items():
+ feature_rows.append(
+ {
+ "kind": kind,
+ "step": summary["step"],
+ "feature": name,
+ "success_mean": safe_mean(arr[exact]),
+ "failure_mean": safe_mean(arr[fail]),
+ "success_std": safe_std(arr[exact]),
+ "failure_std": safe_std(arr[fail]),
+ "delta_failure_minus_success": safe_mean(arr[fail]) - safe_mean(arr[exact]),
+ "cohen_d_failure_minus_success": cohen_d(arr[exact], arr[fail]),
+ "auc_failure": auc_score(arr, fail),
+ }
+ )
+
+ spectrum_rows = []
+ for i in range(raw_lam.shape[1]):
+ spectrum_rows.append(
+ {
+ "kind": kind,
+ "step": summary["step"],
+ "rank": i + 1,
+ "success_mean": safe_mean(sorted_lam[exact, i]),
+ "failure_mean": safe_mean(sorted_lam[fail, i]),
+ "delta_failure_minus_success": safe_mean(sorted_lam[fail, i])
+ - safe_mean(sorted_lam[exact, i]),
+ "auc_failure": auc_score(sorted_lam[:, i], fail),
+ }
+ )
+
+ return summary, feature_rows + spectrum_rows
+
+
+def write_csv(path: Path, rows: list[dict]) -> None:
+ if not rows:
+ return
+ keys: list[str] = []
+ for row in rows:
+ for key in row:
+ if key not in keys:
+ keys.append(key)
+ with path.open("w", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=keys)
+ writer.writeheader()
+ writer.writerows(rows)
+
+
+def plot_series(kind: str, summaries: list[dict], out_dir: Path) -> None:
+ steps = np.array([r["step"] for r in summaries])
+ acc = np.array([r["acc"] for r in summaries])
+
+ fig, axes = plt.subplots(2, 2, figsize=(12, 8))
+ ax = axes[0, 0]
+ ax.plot(steps, acc, "ko-", label="exact acc")
+ ax.set_title(f"{kind}: accuracy")
+ ax.set_xlabel("checkpoint step")
+ ax.set_ylabel("accuracy")
+ ax.grid(alpha=0.3)
+
+ ax = axes[0, 1]
+ ax.plot(steps, [r["lambda_max_success_mean"] for r in summaries], "C0-o", label="success λmax")
+ ax.plot(steps, [r["lambda_max_failure_mean"] for r in summaries], "C3-o", label="failure λmax")
+ ax.axhline(0, color="k", lw=1, alpha=0.35)
+ ax.set_title("Most unstable measured mode")
+ ax.set_xlabel("checkpoint step")
+ ax.set_ylabel("sorted λmax")
+ ax.legend()
+ ax.grid(alpha=0.3)
+
+ ax = axes[1, 0]
+ ax.plot(steps, [r["mean8_success_mean"] for r in summaries], "C0-o", label="success mean top-8")
+ ax.plot(steps, [r["mean8_failure_mean"] for r in summaries], "C3-o", label="failure mean top-8")
+ ax.axhline(0, color="k", lw=1, alpha=0.35)
+ ax.set_title("Top-8 volume proxy")
+ ax.set_xlabel("checkpoint step")
+ ax.set_ylabel("mean sorted λ1..λ8")
+ ax.legend()
+ ax.grid(alpha=0.3)
+
+ ax = axes[1, 1]
+ ax.plot(steps, [r["positive_count_success_mean"] for r in summaries], "C0-o", label="success")
+ ax.plot(steps, [r["positive_count_failure_mean"] for r in summaries], "C3-o", label="failure")
+ ax.set_title("Dimensionality of expansion")
+ ax.set_xlabel("checkpoint step")
+ ax.set_ylabel("# positive exponents among top 8")
+ ax.legend()
+ ax.grid(alpha=0.3)
+
+ fig.tight_layout()
+ fig.savefig(out_dir / f"{kind.lower()}_checkpoint_spectrum_features.png", dpi=150)
+ plt.close(fig)
+
+
+def plot_spectra(kind: str, files: list[Path], out_dir: Path) -> None:
+ n = len(files)
+ cols = min(5, n)
+ rows = int(math.ceil(n / cols))
+ fig, axes = plt.subplots(rows, cols, figsize=(3.4 * cols, 2.7 * rows), squeeze=False)
+ for ax, path in zip(axes.flat, files):
+ data = np.load(path)
+ raw_lam = np.asarray(data["lyap_spec"], dtype=np.float64)
+ sorted_lam = np.sort(raw_lam, axis=1)[:, ::-1]
+ exact = data["exact_correct"].astype(bool)
+ fail = ~exact
+ x = np.arange(1, sorted_lam.shape[1] + 1)
+ if exact.any():
+ ax.plot(x, sorted_lam[exact].mean(axis=0), "C0-o", lw=1.6, ms=3, label="success")
+ if fail.any():
+ ax.plot(x, sorted_lam[fail].mean(axis=0), "C3-o", lw=1.6, ms=3, label="failure")
+ ax.axhline(0, color="k", lw=0.8, alpha=0.35)
+ ax.set_title(f"step {parse_step(path, kind)} acc={exact.mean():.2f}")
+ ax.set_xlabel("sorted rank")
+ ax.set_ylabel("λ")
+ ax.grid(alpha=0.25)
+ for ax in axes.flat[len(files) :]:
+ ax.axis("off")
+ handles, labels = axes.flat[0].get_legend_handles_labels()
+ if handles:
+ fig.legend(handles, labels, loc="upper center", ncol=2)
+ fig.tight_layout(rect=[0, 0, 1, 0.96])
+ fig.savefig(out_dir / f"{kind.lower()}_mean_sorted_spectra_grid.png", dpi=150)
+ plt.close(fig)
+
+
+def plot_feature_auc(kind: str, feature_rows: list[dict], out_dir: Path) -> None:
+ selected = [
+ "raw_col0",
+ "lambda_max",
+ "mean8",
+ "tail_mean_5_8",
+ "positive_sum",
+ "positive_count",
+ "spread",
+ "gap12",
+ ]
+ fig, ax = plt.subplots(figsize=(12, 5))
+ for feature in selected:
+ rows = [r for r in feature_rows if r.get("feature") == feature and r["kind"] == kind]
+ rows = sorted(rows, key=lambda r: r["step"])
+ ax.plot([r["step"] for r in rows], [r["auc_failure"] for r in rows], marker="o", label=feature)
+ ax.axhline(0.5, color="k", lw=1, alpha=0.35)
+ ax.set_title(f"{kind}: feature AUROC for predicting exact failure")
+ ax.set_xlabel("checkpoint step")
+ ax.set_ylabel("AUROC")
+ ax.set_ylim(0.0, 1.02)
+ ax.legend(ncol=4, fontsize=8)
+ ax.grid(alpha=0.3)
+ fig.tight_layout()
+ fig.savefig(out_dir / f"{kind.lower()}_feature_auc.png", dpi=150)
+ plt.close(fig)
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--out-dir", default=str(ROOT / "spectrum_microscope"))
+ args = parser.parse_args()
+
+ out_dir = Path(args.out_dir)
+ out_dir.mkdir(parents=True, exist_ok=True)
+
+ all_summaries: list[dict] = []
+ all_feature_rows: list[dict] = []
+ for kind in ["HRM", "TRM"]:
+ files = discover(kind)
+ summaries = []
+ feature_rows = []
+ for path in files:
+ summary, rows = summarize_file(kind, path)
+ summaries.append(summary)
+ feature_rows.extend(rows)
+ summaries = sorted(summaries, key=lambda r: r["step"])
+ all_summaries.extend(summaries)
+ all_feature_rows.extend(feature_rows)
+
+ plot_series(kind, summaries, out_dir)
+ plot_spectra(kind, files, out_dir)
+ plot_feature_auc(kind, feature_rows, out_dir)
+
+ write_csv(out_dir / "checkpoint_summary.csv", all_summaries)
+ write_csv(out_dir / "feature_and_rank_summary.csv", all_feature_rows)
+ (out_dir / "checkpoint_summary.json").write_text(json.dumps(all_summaries, indent=2))
+
+ print(f"Wrote {out_dir}")
+ print(f" {out_dir / 'checkpoint_summary.csv'}")
+ print(f" {out_dir / 'feature_and_rank_summary.csv'}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/analyze_step3.py b/analyze_step3.py
new file mode 100644
index 0000000..825408c
--- /dev/null
+++ b/analyze_step3.py
@@ -0,0 +1,76 @@
+"""Plot Step 3 A/B/C trajectories: λ and acc over training."""
+import json, os
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+
+ROOT = "/home/yurenh2/rrm/research/flossing"
+OUT = f"{ROOT}/plots_step3"
+os.makedirs(OUT, exist_ok=True)
+
+runs = {
+ "A: baseline (α=0)\nfrom step_18228": "step3_A_baseline_18228.json",
+ "B: CF α=10 λ*=-0.15\nfrom step_18228": "step3_B_rf_18228.json",
+ "C: CF α=10 λ*=-0.05\nfrom step_26040": "step3_C_rf_26040.json",
+}
+colors = {"A": "C0", "B": "C3", "C": "C2"}
+
+fig, axes = plt.subplots(2, 2, figsize=(13, 9))
+
+for label, fn in runs.items():
+ d = json.loads(open(f"{ROOT}/{fn}").read())
+ key = label.split(":")[0]
+ steps = [r["step"] for r in d["steps"]]
+ sup = np.array([r["sup_loss"] for r in d["steps"]])
+ rf = np.array([r["rf_loss"] for r in d["steps"]])
+ lyap_mean = np.array([r["lyap1_mean"] for r in d["steps"]])
+ lyap_max = np.array([r["lyap1_max"] for r in d["steps"]])
+ frac = np.array([r["frac_above_star"] for r in d["steps"]])
+
+ # Smooth (moving average)
+ def smooth(x, w=20):
+ if len(x) < w: return x
+ kernel = np.ones(w) / w
+ return np.convolve(x, kernel, mode="same")
+
+ # acc evals
+ eval_steps = [e["step"] for e in d["evals"]]
+ eval_accs = [e["acc"] for e in d["evals"]]
+
+ axes[0,0].plot(eval_steps, eval_accs, f"{colors[key]}-o", label=label)
+ axes[0,1].plot(steps, smooth(sup), f"{colors[key]}-", label=label, alpha=0.8)
+ axes[1,0].plot(steps, smooth(lyap_mean), f"{colors[key]}-", label=f"{key} mean", alpha=0.8)
+ axes[1,0].plot(steps, smooth(lyap_max), f"{colors[key]}--", label=f"{key} max", alpha=0.4)
+ axes[1,1].plot(steps, smooth(frac), f"{colors[key]}-", label=label, alpha=0.8)
+
+axes[0,0].set_title("Test exact accuracy vs training step")
+axes[0,0].set_xlabel("step"); axes[0,0].set_ylabel("exact_acc"); axes[0,0].legend(fontsize=8, loc="best"); axes[0,0].grid(alpha=0.3)
+
+axes[0,1].set_title("Supervised loss (smoothed, w=20)")
+axes[0,1].set_xlabel("step"); axes[0,1].set_ylabel("sup_loss"); axes[0,1].legend(fontsize=8); axes[0,1].grid(alpha=0.3)
+
+axes[1,0].set_title("λ_joint_1 trajectory (smoothed)")
+axes[1,0].axhline(0, color="k", ls=":", lw=0.6)
+axes[1,0].axhline(-0.05, color="C2", ls="--", lw=0.5, label="C λ*")
+axes[1,0].axhline(-0.15, color="C3", ls="--", lw=0.5, label="B λ*")
+axes[1,0].set_xlabel("step"); axes[1,0].set_ylabel(r"$\lambda_{joint,1}$"); axes[1,0].legend(fontsize=7); axes[1,0].grid(alpha=0.3)
+
+axes[1,1].set_title(r"fraction of batch samples with $\lambda > \lambda^*$")
+axes[1,1].set_xlabel("step"); axes[1,1].set_ylabel("fraction"); axes[1,1].legend(fontsize=8); axes[1,1].grid(alpha=0.3)
+
+fig.suptitle("Step 3: contractive flossing as training-time regularizer on HRM")
+fig.tight_layout()
+fig.savefig(f"{OUT}/step3_trajectories.png", dpi=130)
+plt.close()
+
+print(f"\n== summary ==")
+for label, fn in runs.items():
+ d = json.loads(open(f"{ROOT}/{fn}").read())
+ init = d["initial_acc"]; fin = d["final_acc"]
+ last_lyap = d["steps"][-1]
+ print(f" {label.split(':')[0]}: acc {init:.3f} → {fin:.3f} (Δ {fin-init:+.4f}) "
+ f"final λ_mean={last_lyap['lyap1_mean']:+.3f} max={last_lyap['lyap1_max']:+.3f} "
+ f"final frac>λ*={last_lyap['frac_above_star']:.2f}")
+
+print(f"\nplots → {OUT}/")
diff --git a/analyze_step3_all.py b/analyze_step3_all.py
new file mode 100644
index 0000000..bfd2676
--- /dev/null
+++ b/analyze_step3_all.py
@@ -0,0 +1,94 @@
+"""Compile ABCDEF Step 3 final analysis."""
+import json, os
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+
+ROOT = "/home/yurenh2/rrm/research/flossing"
+OUT = f"{ROOT}/plots_step3_final"
+os.makedirs(OUT, exist_ok=True)
+
+runs = {
+ "A: baseline α=0 from 18228": ("step3_A_baseline_18228.json", "C0", "-"),
+ "B: CF α=10 λ*=-0.15 from 18228": ("step3_B_rf_18228.json", "C3", "-"),
+ "C: CF α=10 λ*=-0.05 from 26040": ("step3_C_rf_26040.json", "C2", "-"),
+ "D: CF α=10 λ*=0 from 26040": ("step3_D_rf_26040_lstar0.json", "C4", "-"),
+ "E: CF α=10 λ*=0 from 18228": ("step3_E_rf_18228_lstar0.json", "C1", "-"),
+ "F: extended D (1500 step)": ("step3_F_rf_26040_lstar0_1500.json", "C2", "--"),
+}
+
+fig, axes = plt.subplots(2, 2, figsize=(15, 9))
+
+summary = []
+for label, (fn, color, ls) in runs.items():
+ d = json.loads(open(f"{ROOT}/{fn}").read())
+ key = label.split(":")[0]
+
+ eval_steps = [e["step"] for e in d["evals"]]
+ eval_accs = [e["acc"] for e in d["evals"]]
+ steps = [r["step"] for r in d["steps"]]
+ sup = np.array([r["sup_loss"] for r in d["steps"]])
+ lyap_mean = np.array([r["lyap1_mean"] for r in d["steps"]])
+ frac = np.array([r["frac_above_star"] for r in d["steps"]])
+
+ def smooth(x, w=20):
+ if len(x) < w: return x
+ return np.convolve(x, np.ones(w)/w, mode="same")
+
+ axes[0,0].plot(eval_steps, eval_accs, f"{color}{ls}", marker="o", label=label, lw=1.5, alpha=0.85)
+ axes[0,1].plot(steps, smooth(sup), f"{color}{ls}", label=key, alpha=0.7)
+ axes[1,0].plot(steps, smooth(lyap_mean), f"{color}{ls}", label=key, alpha=0.7)
+ axes[1,1].plot(steps, smooth(frac), f"{color}{ls}", label=key, alpha=0.7)
+
+ summary.append({
+ "key": key, "label": label,
+ "init_acc": d["initial_acc"],
+ "final_acc": d["final_acc"],
+ "delta": d["final_acc"] - d["initial_acc"],
+ "final_lyap_mean": d["steps"][-1]["lyap1_mean"],
+ "final_frac_above": d["steps"][-1]["frac_above_star"],
+ "n_steps": len(d["steps"]),
+ })
+
+axes[0,0].set_title("Test exact accuracy vs training step")
+axes[0,0].set_xlabel("step"); axes[0,0].set_ylabel("exact_acc"); axes[0,0].legend(fontsize=8, loc="best"); axes[0,0].grid(alpha=0.3)
+
+axes[0,1].set_title("Supervised loss (smoothed)")
+axes[0,1].set_xlabel("step"); axes[0,1].set_ylabel("sup_loss"); axes[0,1].legend(fontsize=8); axes[0,1].grid(alpha=0.3)
+
+axes[1,0].set_title(r"$\lambda_{joint,1}$ mean trajectory (smoothed)")
+axes[1,0].axhline(0, color="k", ls=":", lw=0.6, alpha=0.6)
+axes[1,0].set_xlabel("step"); axes[1,0].set_ylabel(r"$\lambda_{1,joint}$"); axes[1,0].legend(fontsize=8); axes[1,0].grid(alpha=0.3)
+
+axes[1,1].set_title(r"Fraction of batch with $\lambda > \lambda^*$ (smoothed)")
+axes[1,1].set_xlabel("step"); axes[1,1].set_ylabel("frac > λ*"); axes[1,1].legend(fontsize=8); axes[1,1].grid(alpha=0.3)
+
+fig.suptitle("Step 3 — Contractive Flossing (CF) as training-time regularizer on HRM Sudoku-Extreme-1k", fontsize=11)
+fig.tight_layout()
+fig.savefig(f"{OUT}/abcdef_full.png", dpi=130)
+plt.close()
+
+# Bar chart of final Δ acc
+fig, ax = plt.subplots(1, 1, figsize=(9, 5))
+keys = [s["key"] for s in summary]
+deltas = [s["delta"]*100 for s in summary]
+colors = ["C0", "C3", "C2", "C4", "C1", "C2"]
+bars = ax.bar(keys, deltas, color=colors)
+ax.axhline(0, color="k", lw=0.6)
+for bar, delta in zip(bars, deltas):
+ ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + (0.3 if delta>0 else -1),
+ f"{delta:+.1f}%", ha="center", fontsize=9, fontweight="bold")
+ax.set_ylabel("Δ test exact_accuracy (pp)")
+ax.set_title("CF intervention: final accuracy change vs baseline")
+ax.grid(alpha=0.3, axis="y")
+fig.tight_layout()
+fig.savefig(f"{OUT}/abcdef_deltas.png", dpi=130)
+plt.close()
+
+print(f"{'key':>3} {'init':>7} {'final':>7} {'Δ':>7} {'n_steps':>8} {'final_λ':>9} {'frac>λ*':>9}")
+for s in summary:
+ print(f" {s['key']:>3} {s['init_acc']:>7.3f} {s['final_acc']:>7.3f} {s['delta']*100:>6.1f}% {s['n_steps']:>8} "
+ f"{s['final_lyap_mean']:>+9.3f} {s['final_frac_above']:>9.2f}")
+
+print(f"\nplots → {OUT}/")
diff --git a/analyze_tangent.py b/analyze_tangent.py
new file mode 100644
index 0000000..32fde5e
--- /dev/null
+++ b/analyze_tangent.py
@@ -0,0 +1,149 @@
+"""Analyze the saved tangent basis Q (final, after all ACT steps).
+
+Q shape: (N, seq_full=82, hidden=512, k=4).
+Splits by success/failure and computes:
+ - position activity per mode: ||Q[:, s, :, i]||_2 averaged
+ - hidden-dim activity per mode
+ - cosine similarity between succ and fail mode subspaces
+"""
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import os
+
+d = np.load("/home/yurenh2/rrm/research/flossing/tangent_modes_512.npz")
+Q = d["Q_final"] # (N, seq, hidden, k)
+exact = d["exact_correct"].astype(bool) # (N,)
+N, seq, hidden, K = Q.shape
+print(f"N={N}, seq={seq}, hidden={hidden}, K={K}, acc={exact.mean():.3f}")
+
+OUT = "/home/yurenh2/rrm/research/flossing/plots_tangent"
+os.makedirs(OUT, exist_ok=True)
+
+# Position activity per mode: ||Q[:, s, :, i]||_2 averaged across samples in group
+def pos_activity(Q_, exact_mask):
+ # for each mode i and each position s, compute average ||Q[s, :, i]||
+ sub = Q_[exact_mask] # (n, seq, hidden, k)
+ acts = np.linalg.norm(sub, axis=2) # (n, seq, k) — L2 norm over hidden dim
+ return acts.mean(axis=0), acts.std(axis=0) # (seq, k)
+
+pa_s_mean, pa_s_std = pos_activity(Q, exact)
+pa_f_mean, pa_f_std = pos_activity(Q, ~exact)
+
+# Hidden activity per mode
+def hid_activity(Q_, exact_mask):
+ sub = Q_[exact_mask] # (n, seq, hidden, k)
+ acts = np.linalg.norm(sub, axis=1) # (n, hidden, k) — L2 norm over positions
+ return acts.mean(axis=0), acts.std(axis=0) # (hidden, k)
+
+ha_s_mean, _ = hid_activity(Q, exact)
+ha_f_mean, _ = hid_activity(Q, ~exact)
+
+# ---- Plot 1: position activity per mode ----
+fig, axes = plt.subplots(2, 2, figsize=(13, 7), sharex=True, sharey=True)
+for i, ax in enumerate(axes.flat):
+ if i >= K: ax.set_visible(False); continue
+ x = np.arange(seq)
+ ax.plot(x, pa_s_mean[:, i], "C0-", label="succ", lw=1.5)
+ ax.fill_between(x, pa_s_mean[:, i]-pa_s_std[:, i], pa_s_mean[:, i]+pa_s_std[:, i], color="C0", alpha=0.2)
+ ax.plot(x, pa_f_mean[:, i], "C3-", label="fail", lw=1.5)
+ ax.fill_between(x, pa_f_mean[:, i]-pa_f_std[:, i], pa_f_mean[:, i]+pa_f_std[:, i], color="C3", alpha=0.2)
+ ax.axvline(0.5, color="k", ls=":", lw=0.6) # mark puzzle_emb boundary
+ ax.set_title(f"mode {i+1}: position activity")
+ ax.set_xlabel("seq position (0=puzzle_emb, 1-81=Sudoku cells)")
+ ax.set_ylabel(r"$\|Q[\cdot,:,i]\|_2$")
+ ax.legend()
+ ax.grid(alpha=0.3)
+fig.suptitle(f"Per-mode position activity (N={N}, acc={exact.mean():.2%})")
+fig.tight_layout()
+fig.savefig(f"{OUT}/position_activity.png", dpi=130)
+plt.close()
+
+# ---- Plot 2: position activity as 9x9 grid (positions 1-81 = Sudoku cells) ----
+fig, axes = plt.subplots(2, K, figsize=(K*3, 6))
+for i in range(K):
+ s_grid = pa_s_mean[1:82, i].reshape(9, 9)
+ f_grid = pa_f_mean[1:82, i].reshape(9, 9)
+ vmax = max(s_grid.max(), f_grid.max())
+ im0 = axes[0, i].imshow(s_grid, vmin=0, vmax=vmax, cmap="viridis")
+ axes[0, i].set_title(f"succ mode {i+1}")
+ axes[0, i].set_xticks([]); axes[0, i].set_yticks([])
+ im1 = axes[1, i].imshow(f_grid, vmin=0, vmax=vmax, cmap="viridis")
+ axes[1, i].set_title(f"fail mode {i+1}")
+ axes[1, i].set_xticks([]); axes[1, i].set_yticks([])
+ plt.colorbar(im1, ax=axes[:, i], fraction=0.046)
+fig.suptitle("Tangent mode activity over the 9x9 Sudoku board")
+fig.savefig(f"{OUT}/sudoku_grid_activity.png", dpi=130, bbox_inches="tight")
+plt.close()
+
+# ---- Plot 3: hidden activity per mode ----
+fig, axes = plt.subplots(2, 2, figsize=(13, 7), sharex=True)
+for i, ax in enumerate(axes.flat):
+ if i >= K: ax.set_visible(False); continue
+ x = np.arange(hidden)
+ # sort for cleaner viewing
+ order = np.argsort(-(ha_s_mean[:, i] + ha_f_mean[:, i]))
+ ax.plot(ha_s_mean[order, i], "C0-", label="succ (sorted)", lw=1)
+ ax.plot(ha_f_mean[order, i], "C3-", label="fail (sorted)", lw=1)
+ ax.set_title(f"mode {i+1}: hidden-dim activity (sorted by sum)")
+ ax.set_xlabel("hidden dim (sorted)")
+ ax.set_ylabel(r"$\|Q[:,h,i]\|_2$")
+ ax.legend()
+ ax.grid(alpha=0.3)
+fig.tight_layout()
+fig.savefig(f"{OUT}/hidden_activity.png", dpi=130)
+plt.close()
+
+# ---- Subspace cosine analysis ----
+# Flatten Q to (N, seq*hidden, k). The k columns span a k-d subspace per sample.
+# Average outer projector P_succ = (1/n) Σ Q_n Q_n^T (over success samples). Same for fail.
+# Then compare top eigenvalues / overlap.
+Qflat = Q.reshape(N, seq*hidden, K)
+def projector_sample(qf):
+ # qf: (state_dim, k). Already orthonormal columns since QR'd at end. Returns top-r eigvecs of P.
+ # We'll compute the trace of P_s @ P_f as a "subspace overlap": Σ_ij ||q_s^i · q_f^j||²
+ return qf
+
+def avg_pos_outer(qflat_subset):
+ # Average of q q^T over samples — too big (state_dim x state_dim). Instead average the
+ # k x k Gram matrices won't help since each sample has its own orthonormal basis.
+ # Compute average squared norm of cross-projections instead:
+ # For pairs (s_a, s_b) in same group, compute ||q_a^T q_b||_F² / k (subspace overlap)
+ n = qflat_subset.shape[0]
+ # Sample a subset of pairs to keep this manageable
+ rng = np.random.default_rng(0)
+ pairs = min(n*(n-1)//2, 5000)
+ if n < 2: return np.nan, np.nan
+ overlaps = []
+ for _ in range(pairs):
+ a, b = rng.choice(n, size=2, replace=False)
+ M = qflat_subset[a].T @ qflat_subset[b] # (k, k)
+ overlaps.append((M**2).sum() / qflat_subset.shape[-1])
+ return float(np.mean(overlaps)), float(np.std(overlaps))
+
+print("\nSubspace overlap (squared cosine, averaged over random pairs):")
+ovl_ss, std_ss = avg_pos_outer(Qflat[exact])
+ovl_ff, std_ff = avg_pos_outer(Qflat[~exact])
+# Cross-group overlap
+def cross_overlap(A, B):
+ nA, nB = A.shape[0], B.shape[0]
+ rng = np.random.default_rng(0)
+ pairs = min(nA*nB, 5000)
+ overlaps = []
+ for _ in range(pairs):
+ a = rng.integers(nA); b = rng.integers(nB)
+ M = A[a].T @ B[b]
+ overlaps.append((M**2).sum() / A.shape[-1])
+ return float(np.mean(overlaps)), float(np.std(overlaps))
+ovl_sf, std_sf = cross_overlap(Qflat[exact], Qflat[~exact])
+
+print(f" succ-succ: {ovl_ss:.4f} ± {std_ss:.4f}")
+print(f" fail-fail: {ovl_ff:.4f} ± {std_ff:.4f}")
+print(f" succ-fail: {ovl_sf:.4f} ± {std_sf:.4f}")
+
+# ---- Final ratio of subspace overlap inside vs cross-group ----
+# If succ-succ and fail-fail >> succ-fail, the two groups live in DIFFERENT subspaces.
+# If they're similar, the unstable subspace is shared.
+
+print("\nplots saved to", OUT)
diff --git a/analyze_tangent_modes.py b/analyze_tangent_modes.py
new file mode 100644
index 0000000..5801ad8
--- /dev/null
+++ b/analyze_tangent_modes.py
@@ -0,0 +1,143 @@
+"""(a) lite: For each test sample, save the final tangent basis Q (top-k modes after
+running through the full inference). Compute position/hidden activity profiles per
+mode and compare success vs failure groups.
+"""
+from __future__ import annotations
+import sys, os, yaml, json, time, argparse
+from pathlib import Path
+import numpy as np
+import torch
+
+HRM_DIR = Path("/home/yurenh2/rrm/hrm")
+sys.path.insert(0, str(HRM_DIR))
+from models.hrm.hrm_act_v1 import HierarchicalReasoningModel_ACTV1
+
+
+def load_model(ckpt_root, ckpt_name, device):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text())
+ arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], causal=False)
+ model = HierarchicalReasoningModel_ACTV1(arch_cfg)
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {}
+ for k, v in sd.items():
+ nk = k
+ for p in ("_orig_mod.", "model."):
+ if nk.startswith(p): nk = nk[len(p):]
+ stripped[nk] = v
+ model.load_state_dict(stripped, strict=False)
+ model.to(device).eval()
+ return model, cfg, train_meta
+
+
+def jvp_one(f, x, v):
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False)
+
+
+def run_save_final_Q(model, batch, k_lyap, device, seed):
+ """Run inference with QR-iteration on top-k tangents; return final Q (B, seq, hidden, k)
+ after all ACT steps. Also return exact_correct, predicted_logits.
+ """
+ inner = model.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ state_dim = seq_full * hidden
+
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"].to(device),
+ batch["puzzle_identifiers"].to(device))
+
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q0 = torch.randn(B, state_dim, k_lyap, device=device, dtype=torch.float32, generator=g)
+ Q, _ = torch.linalg.qr(Q0)
+
+ with torch.enable_grad():
+ for _act in range(cfg.halt_max_steps):
+ zH = z_H.detach(); zL = z_L.detach()
+ for _h in range(cfg.H_cycles):
+ for _l in range(cfg.L_cycles):
+ out = []
+ fx_last = None
+ f = lambda x: inner.L_level(x, zH + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = Q[..., i].view_as(zL)
+ fx, Dv = jvp_one(f, zL, v_i)
+ out.append(Dv.reshape(B, state_dim).to(torch.float32))
+ fx_last = fx
+ Q = torch.stack(out, dim=-1)
+ zL = fx_last
+ Q, R = torch.linalg.qr(Q)
+ out = []
+ f = lambda x: inner.H_level(x, zL, **seq_info)
+ for i in range(k_lyap):
+ v_i = Q[..., i].view_as(zH)
+ fx, Dv = jvp_one(f, zH, v_i)
+ out.append(Dv.reshape(B, state_dim).to(torch.float32))
+ fx_last = fx
+ Q = torch.stack(out, dim=-1)
+ zH = fx_last
+ Q, R = torch.linalg.qr(Q)
+ z_H, z_L = zH, zL
+
+ with torch.no_grad():
+ output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float()
+ preds = output.argmax(dim=-1)
+ labels = batch["labels"].to(device)
+ mask = labels > 0
+ exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float().numpy()
+
+ Q_final = Q.reshape(B, seq_full, hidden, k_lyap).cpu().float().numpy()
+ return Q_final, exact
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_26040")
+ ap.add_argument("--n-samples", type=int, default=256)
+ ap.add_argument("--batch-size", type=int, default=32)
+ ap.add_argument("--k-lyap", type=int, default=4)
+ ap.add_argument("--seed", type=int, default=0)
+ ap.add_argument("--out", default="tangent_modes.npz")
+ args = ap.parse_args()
+ device = "cuda"
+
+ model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+
+ rng = np.random.default_rng(args.seed)
+ data_path = Path(cfg["data_path"])
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ idx = rng.choice(len(inputs), size=args.n_samples, replace=False)
+
+ Q_all = []; exact_all = []
+ t0 = time.time()
+ for s in range(0, args.n_samples, args.batch_size):
+ e = min(s + args.batch_size, args.n_samples)
+ bidx = idx[s:e]
+ batch = {
+ "inputs": torch.from_numpy(inputs[bidx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[bidx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[bidx].astype(np.int32)),
+ }
+ Q_final, exact = run_save_final_Q(model, batch, args.k_lyap, device, seed=args.seed + s)
+ Q_all.append(Q_final); exact_all.append(exact)
+ print(f" [{e}/{args.n_samples}] dt={time.time()-t0:.1f}s exact={exact.mean():.3f}", flush=True)
+
+ Q_all = np.concatenate(Q_all, axis=0) # (N, seq, hidden, k)
+ exact_all = np.concatenate(exact_all, axis=0) # (N,)
+ print(f"saved shape Q={Q_all.shape}, exact={exact_all.shape}, acc={exact_all.mean():.3f}")
+ np.savez_compressed(args.out, Q_final=Q_all, exact_correct=exact_all, sample_idx=idx)
+ print(f"saved → {args.out}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/chaos_trm_multi4_step182287_clean_k4_n1000.summary.csv b/chaos_trm_multi4_step182287_clean_k4_n1000.summary.csv
new file mode 100644
index 0000000..88ab53b
--- /dev/null
+++ b/chaos_trm_multi4_step182287_clean_k4_n1000.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_fail_mean,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts,spec_count_pos_fail_mean,spec_count_pos_mean,spec_count_pos_min/exact,spec_count_pos_min/token_acc,spec_count_pos_success_mean,spec_k,spec_mean_fail_mean,spec_mean_mean,spec_mean_min/exact,spec_mean_min/token_acc,spec_mean_success_mean,spec_pos_l2_mean,spec_pos_l2_min/exact,spec_pos_l2_min/token_acc,spec_pos_mass_fail_mean,spec_pos_mass_mean,spec_pos_mass_min/exact,spec_pos_mass_min/token_acc,spec_pos_mass_success_mean,spec_spread_mean,spec_spread_min/exact,spec_spread_min/token_acc,steps
+0.8640000224113464,0.9498025178909302,0.0,4.0,0.0,7.309304714202881,6.157678127288818,5.977945327758789,0.8650000095367432,0.9495925307273865,0.8650000095367432,0.9495925307273865,1000.0,0.0,0.8650000095367432,0.9495925307273865,1.0,0.0,0.0,-10.708564758300781,0.8650000095367432,0.9495925307273865,5.329593658447266,0.8650000095367432,0.9495925307273865,0.8650000095367432,0.9495925307273865,0.8650000095367432,0.9495925307273865,0.8650000095367432,0.9495925307273865,7.832658767700195,0.8650000095367432,0.9495925307273865,1.0,4.0,4.0,0.8650000095367432,0.9495925307273865,4.0,4.0,7.008894443511963,5.972970008850098,0.8650000095367432,0.9495925307273865,5.811293601989746,5.974251747131348,0.8650000095367432,0.9495925307273865,28.03557777404785,23.89188003540039,0.8650000095367432,0.9495925307273865,23.245174407958984,0.30898988246917725,0.8650000095367432,0.9495925307273865,16.0
diff --git a/chaos_trm_multi4_step260410_clean_k4_n1000.summary.csv b/chaos_trm_multi4_step260410_clean_k4_n1000.summary.csv
new file mode 100644
index 0000000..e640bc4
--- /dev/null
+++ b/chaos_trm_multi4_step260410_clean_k4_n1000.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_fail_mean,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts,spec_count_pos_fail_mean,spec_count_pos_mean,spec_count_pos_min/exact,spec_count_pos_min/token_acc,spec_count_pos_success_mean,spec_k,spec_mean_fail_mean,spec_mean_mean,spec_mean_min/exact,spec_mean_min/token_acc,spec_mean_success_mean,spec_pos_l2_mean,spec_pos_l2_min/exact,spec_pos_l2_min/token_acc,spec_pos_mass_fail_mean,spec_pos_mass_mean,spec_pos_mass_min/exact,spec_pos_mass_min/token_acc,spec_pos_mass_success_mean,spec_spread_mean,spec_spread_min/exact,spec_spread_min/token_acc,steps
+0.8320000171661377,0.9365678429603577,0.0,4.0,0.0,7.217503547668457,6.245500564575195,6.071272850036621,0.8479999899864197,0.9428519010543823,0.8479999899864197,0.9428519010543823,1000.0,0.0,0.8479999899864197,0.9428519010543823,1.0,0.0,0.0,-10.5178861618042,0.8479999899864197,0.9428519010543823,5.794531345367432,0.8479999899864197,0.9428519010543823,0.8479999899864197,0.9428519010543823,0.8479999899864197,0.9428519010543823,0.8479999899864197,0.9428519010543823,8.71845531463623,0.8479999899864197,0.9428519010543823,1.0,4.0,4.0,0.8479999899864197,0.9428519010543823,4.0,4.0,6.9086737632751465,6.057910919189453,0.8479999899864197,0.9428519010543823,5.905416011810303,6.059232711791992,0.8479999899864197,0.9428519010543823,27.634695053100586,24.231643676757812,0.8479999899864197,0.9428519010543823,23.62166404724121,0.3121836483478546,0.8479999899864197,0.9428519010543823,16.0
diff --git a/chaos_trm_multi4_success_failure_summary.csv b/chaos_trm_multi4_success_failure_summary.csv
new file mode 100644
index 0000000..ff2d00f
--- /dev/null
+++ b/chaos_trm_multi4_success_failure_summary.csv
@@ -0,0 +1,11 @@
+checkpoint,feature,n,det_acc,rollout_acc,disagree,succ_n,fail_n,succ_mean,fail_mean,delta_fail_minus_succ,succ_median,fail_median,succ_q25,succ_q75,fail_q25,fail_q75,auc_failure,corr_token_acc
+step182287_best,lambda1,1000,0.864,0.865,0.065,864,136,6.0026164054870605,7.142782688140869,1.1401662826538086,5.860308647155762,7.266811370849609,5.824850082397461,6.004127025604248,7.107278347015381,7.365792274475098,0.9588524645969498,-0.7372595375732337
+step182287_best,spec_mean,1000,0.864,0.865,0.065,864,136,5.8336968421936035,6.857765197753906,1.0240683555603027,5.705381870269775,6.9673967361450195,5.6780009269714355,5.8299102783203125,6.840461730957031,7.037558078765869,0.9598481753812637,-0.7403875443857514
+step182287_best,pos_mass,1000,0.864,0.865,0.065,864,136,23.334787368774414,27.431060791015625,4.096273422241211,22.8215274810791,27.869586944580078,22.712003707885742,23.31964111328125,27.361846923828125,28.150232315063477,0.9598481753812637,-0.7403875443857514
+step182287_best,pos_l2,1000,0.864,0.865,0.065,864,136,5.834743499755859,6.8605427742004395,1.02579927444458,5.706200122833252,6.97038459777832,5.678751468658447,5.831027984619141,6.842685699462891,7.040294170379639,0.9598396650326797,-0.7403654596306658
+step182287_best,spread,1000,0.864,0.865,0.065,864,136,0.2796080410480499,0.49565085768699646,0.21604281663894653,0.2521345615386963,0.49982571601867676,0.23640167713165283,0.2870197296142578,0.38921797275543213,0.5801196098327637,0.9201473992374728,-0.6308994031700919
+step260410_final,lambda1,1000,0.832,0.848,0.052,832,168,6.075295448303223,7.088419437408447,1.0131239891052246,5.967571258544922,7.155555725097656,5.938466548919678,6.082776069641113,7.029343128204346,7.297847270965576,0.9660957532051282,-0.7909342942372484
+step260410_final,spec_mean,1000,0.832,0.848,0.052,832,168,5.9091668128967285,6.794551849365234,0.8853850364685059,5.8149518966674805,6.87446928024292,5.792088031768799,5.9152512550354,6.788240432739258,6.948907375335693,0.9685997596153846,-0.7938247800283409
+step260410_final,pos_mass,1000,0.832,0.848,0.052,832,168,23.636667251586914,27.178207397460938,3.5415401458740234,23.259807586669922,27.49787712097168,23.168352127075195,23.6610050201416,27.15296173095703,27.795629501342773,0.9685997596153846,-0.7938247800283409
+step260410_final,pos_l2,1000,0.832,0.848,0.052,832,168,5.910159587860107,6.7974982261657715,0.8873386383056641,5.815634250640869,6.875983238220215,5.792854309082031,5.915988445281982,6.789846897125244,6.953376770019531,0.968585451007326,-0.793837702081877
+step260410_final,spread,1000,0.832,0.848,0.052,832,168,0.273774653673172,0.5023994445800781,0.22862479090690613,0.2473444938659668,0.49890947341918945,0.23368358612060547,0.27810585498809814,0.39853525161743164,0.595841646194458,0.9267900068681318,-0.6832077001617288
diff --git a/compare_ptrm_rollout_counts.py b/compare_ptrm_rollout_counts.py
new file mode 100644
index 0000000..0528820
--- /dev/null
+++ b/compare_ptrm_rollout_counts.py
@@ -0,0 +1,203 @@
+"""Paired comparison of PTRM rollout caches.
+
+This focuses on per-problem correct rollout counts, not only best-Q accuracy.
+Use it when two caches were generated with the same sample seed so `idx` aligns.
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+from pathlib import Path
+
+import numpy as np
+
+
+def _load(path: Path) -> dict[str, np.ndarray]:
+ data = np.load(path)
+ return {name: data[name] for name in data.files}
+
+
+def _take(values: np.ndarray, idx: np.ndarray) -> np.ndarray:
+ return values[np.arange(values.shape[0]), idx]
+
+
+def _summary(prefix: str, exact: np.ndarray, q_halt: np.ndarray, det_exact: np.ndarray) -> dict[str, float]:
+ correct_count = exact.sum(axis=1).astype(np.float32)
+ q_sel = _take(exact, q_halt.argmax(axis=1)).astype(bool)
+ oracle = exact.any(axis=1)
+ out = {
+ f"{prefix}/det_exact": float(det_exact.mean()),
+ f"{prefix}/q_max_exact": float(q_sel.mean()),
+ f"{prefix}/oracle_pass": float(oracle.mean()),
+ f"{prefix}/mean_rollout_exact": float(exact.mean()),
+ f"{prefix}/correct_count_mean": float(correct_count.mean()),
+ f"{prefix}/correct_count_median": float(np.median(correct_count)),
+ f"{prefix}/correct_count_q10": float(np.quantile(correct_count, 0.10)),
+ f"{prefix}/correct_count_q25": float(np.quantile(correct_count, 0.25)),
+ f"{prefix}/correct_count_q75": float(np.quantile(correct_count, 0.75)),
+ f"{prefix}/correct_count_q90": float(np.quantile(correct_count, 0.90)),
+ f"{prefix}/zero_correct_frac": float((correct_count == 0).mean()),
+ f"{prefix}/all_correct_frac": float((correct_count == exact.shape[1]).mean()),
+ }
+ for threshold in (1, 5, 10, 25, 50, 75, 90):
+ if threshold <= exact.shape[1]:
+ out[f"{prefix}/correct_count_ge_{threshold}_frac"] = float((correct_count >= threshold).mean())
+ for name, mask in {
+ "det_success": det_exact.astype(bool),
+ "det_fail": ~det_exact.astype(bool),
+ }.items():
+ if mask.any():
+ out[f"{prefix}/correct_count_{name}_mean"] = float(correct_count[mask].mean())
+ out[f"{prefix}/oracle_{name}_frac"] = float(oracle[mask].mean())
+ out[f"{prefix}/q_max_{name}_frac"] = float(q_sel[mask].mean())
+ return out
+
+
+def _prefix_curves(prefix: str, exact: np.ndarray, q_halt: np.ndarray) -> tuple[dict[str, float], list[dict[str, float]]]:
+ rows: list[dict[str, float]] = []
+ out: dict[str, float] = {}
+ max_k = exact.shape[1]
+ for k in range(1, max_k + 1):
+ exact_k = exact[:, :k]
+ q_k = q_halt[:, :k]
+ oracle = exact_k.any(axis=1)
+ q_sel = _take(exact_k, q_k.argmax(axis=1)).astype(bool)
+ count = exact_k.sum(axis=1).astype(np.float32)
+ row = {
+ "K": float(k),
+ f"{prefix}/oracle_pass": float(oracle.mean()),
+ f"{prefix}/q_max_exact": float(q_sel.mean()),
+ f"{prefix}/correct_count_mean": float(count.mean()),
+ f"{prefix}/zero_correct_frac": float((count == 0).mean()),
+ f"{prefix}/correct_count_ge_5_frac": float((count >= min(5, k)).mean()),
+ f"{prefix}/correct_count_ge_half_frac": float((count >= (k / 2.0)).mean()),
+ }
+ rows.append(row)
+
+ for target in (0.90, 0.95, 0.975, 0.99):
+ for metric in ("oracle_pass", "q_max_exact"):
+ needed = next(
+ (int(row["K"]) for row in rows if row[f"{prefix}/{metric}"] >= target),
+ None,
+ )
+ out[f"{prefix}/K_for_{metric}_{target:g}"] = float("nan") if needed is None else float(needed)
+ return out, rows
+
+
+def compare(base_path: Path, test_path: Path, base_label: str, test_label: str) -> tuple[dict[str, float | str], list[dict[str, float]]]:
+ base = _load(base_path)
+ test = _load(test_path)
+ if "idx" in base and "idx" in test and not np.array_equal(base["idx"], test["idx"]):
+ raise ValueError("Caches are not paired: idx arrays differ.")
+
+ base_exact = base["exact"].astype(bool)
+ test_exact = test["exact"].astype(bool)
+ base_q = base["q_halt"].astype(np.float32)
+ test_q = test["q_halt"].astype(np.float32)
+ base_det = base["det_exact"].astype(bool)
+ test_det = test["det_exact"].astype(bool)
+
+ base_count = base_exact.sum(axis=1).astype(np.float32)
+ test_count = test_exact.sum(axis=1).astype(np.float32)
+ delta = test_count - base_count
+ base_oracle = base_exact.any(axis=1)
+ test_oracle = test_exact.any(axis=1)
+ base_q_sel = _take(base_exact, base_q.argmax(axis=1)).astype(bool)
+ test_q_sel = _take(test_exact, test_q.argmax(axis=1)).astype(bool)
+
+ out: dict[str, float | str] = {
+ "base_file": str(base_path),
+ "test_file": str(test_path),
+ "n_samples": float(base_exact.shape[0]),
+ "rollouts": float(base_exact.shape[1]),
+ }
+ out.update(_summary(base_label, base_exact, base_q, base_det))
+ out.update(_summary(test_label, test_exact, test_q, test_det))
+ base_curve_summary, base_curve_rows = _prefix_curves(base_label, base_exact, base_q)
+ test_curve_summary, test_curve_rows = _prefix_curves(test_label, test_exact, test_q)
+ out.update(base_curve_summary)
+ out.update(test_curve_summary)
+ curve_rows: list[dict[str, float]] = []
+ for base_row, test_row in zip(base_curve_rows, test_curve_rows, strict=True):
+ row = {**base_row, **{k: v for k, v in test_row.items() if k != "K"}}
+ k = int(row["K"])
+ base_prefix = f"{base_label}/"
+ test_prefix = f"{test_label}/"
+ row["delta/oracle_pass"] = row[f"{test_prefix}oracle_pass"] - row[f"{base_prefix}oracle_pass"]
+ row["delta/q_max_exact"] = row[f"{test_prefix}q_max_exact"] - row[f"{base_prefix}q_max_exact"]
+ row["delta/correct_count_mean"] = row[f"{test_prefix}correct_count_mean"] - row[f"{base_prefix}correct_count_mean"]
+ curve_rows.append(row)
+ out.update(
+ {
+ "delta_correct_count_mean": float(delta.mean()),
+ "delta_correct_count_median": float(np.median(delta)),
+ "delta_correct_count_q10": float(np.quantile(delta, 0.10)),
+ "delta_correct_count_q25": float(np.quantile(delta, 0.25)),
+ "delta_correct_count_q75": float(np.quantile(delta, 0.75)),
+ "delta_correct_count_q90": float(np.quantile(delta, 0.90)),
+ "test_more_correct_frac": float((delta > 0).mean()),
+ "test_equal_correct_frac": float((delta == 0).mean()),
+ "test_fewer_correct_frac": float((delta < 0).mean()),
+ "base_zero_test_nonzero_frac": float(((base_count == 0) & (test_count > 0)).mean()),
+ "base_nonzero_test_zero_frac": float(((base_count > 0) & (test_count == 0)).mean()),
+ "both_oracle_success_test_more_frac": float((base_oracle & test_oracle & (delta > 0)).mean()),
+ "oracle_test_only_frac": float((~base_oracle & test_oracle).mean()),
+ "oracle_base_only_frac": float((base_oracle & ~test_oracle).mean()),
+ "oracle_both_success_frac": float((base_oracle & test_oracle).mean()),
+ "oracle_both_fail_frac": float((~base_oracle & ~test_oracle).mean()),
+ "q_max_test_only_frac": float((~base_q_sel & test_q_sel).mean()),
+ "q_max_base_only_frac": float((base_q_sel & ~test_q_sel).mean()),
+ "q_max_both_success_frac": float((base_q_sel & test_q_sel).mean()),
+ "q_max_both_fail_frac": float((~base_q_sel & ~test_q_sel).mean()),
+ }
+ )
+
+ hist_rows: list[dict[str, float]] = []
+ max_count = int(max(base_count.max(), test_count.max()))
+ for count in range(max_count + 1):
+ hist_rows.append(
+ {
+ "correct_count": float(count),
+ f"{base_label}_frac": float((base_count == count).mean()),
+ f"{test_label}_frac": float((test_count == count).mean()),
+ }
+ )
+ return out, hist_rows, curve_rows
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--base", required=True)
+ parser.add_argument("--test", required=True)
+ parser.add_argument("--base-label", default="base")
+ parser.add_argument("--test-label", default="test")
+ parser.add_argument("--out-prefix", default=None)
+ args = parser.parse_args()
+
+ summary, hist_rows, curve_rows = compare(Path(args.base), Path(args.test), args.base_label, args.test_label)
+ for key in sorted(summary):
+ print(f"{key}: {summary[key]}")
+
+ if args.out_prefix:
+ prefix = Path(args.out_prefix)
+ prefix.parent.mkdir(parents=True, exist_ok=True)
+ with (prefix.with_suffix(".summary.csv")).open("w", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=sorted(summary))
+ writer.writeheader()
+ writer.writerow(summary)
+ with (prefix.with_suffix(".hist.csv")).open("w", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=list(hist_rows[0]))
+ writer.writeheader()
+ writer.writerows(hist_rows)
+ with (prefix.with_suffix(".kcurve.csv")).open("w", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=list(curve_rows[0]))
+ writer.writeheader()
+ writer.writerows(curve_rows)
+ print(
+ f"saved {prefix.with_suffix('.summary.csv')}, "
+ f"{prefix.with_suffix('.hist.csv')}, and {prefix.with_suffix('.kcurve.csv')}"
+ )
+
+
+if __name__ == "__main__":
+ main()
diff --git a/diagnose_hrm.py b/diagnose_hrm.py
new file mode 100644
index 0000000..193fa41
--- /dev/null
+++ b/diagnose_hrm.py
@@ -0,0 +1,306 @@
+"""HRM Sudoku Lyapunov / trajectory diagnostic.
+
+Loads a trained HRM checkpoint, runs inference on a sample of the test set,
+records the recursion trajectory (z_H, z_L at every (act_step, h_cycle, l_cycle)),
+and computes the top Lyapunov exponent of the recursion Jacobian via power
+iteration with JVP. Splits samples by success / failure and writes a npz.
+"""
+from __future__ import annotations
+import os, sys, yaml, math, argparse, json, time
+from pathlib import Path
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+HRM_DIR = Path("/home/yurenh2/rrm/hrm")
+sys.path.insert(0, str(HRM_DIR))
+
+from models.hrm.hrm_act_v1 import (
+ HierarchicalReasoningModel_ACTV1,
+ HierarchicalReasoningModel_ACTV1Config,
+ HierarchicalReasoningModel_ACTV1Carry,
+ HierarchicalReasoningModel_ACTV1InnerCarry,
+)
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str = "cuda"):
+ cfg_path = ckpt_root / "all_config.yaml"
+ cfg = yaml.safe_load(cfg_path.read_text())
+ arch_cfg = cfg["arch"]
+ # Need batch_size, seq_len, vocab_size, num_puzzle_identifiers — read from train metadata
+ train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text())
+ arch_cfg = dict(arch_cfg)
+ arch_cfg["batch_size"] = cfg["global_batch_size"]
+ arch_cfg["seq_len"] = train_meta["seq_len"]
+ arch_cfg["vocab_size"] = train_meta["vocab_size"]
+ arch_cfg["num_puzzle_identifiers"] = train_meta["num_puzzle_identifiers"]
+ arch_cfg["causal"] = False
+ model = HierarchicalReasoningModel_ACTV1(arch_cfg)
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ # Strip torch.compile (`_orig_mod.`) and ACTLossHead wrapper (`model.`) prefixes
+ stripped = {}
+ for k, v in sd.items():
+ nk = k
+ for prefix in ("_orig_mod.", "model."):
+ if nk.startswith(prefix):
+ nk = nk[len(prefix):]
+ stripped[nk] = v
+ missing, unexpected = model.load_state_dict(stripped, strict=False)
+ if missing or unexpected:
+ print(f"[load] missing={len(missing)} unexpected={len(unexpected)}; "
+ f"sample missing={missing[:3]}, sample unexpected={unexpected[:3]}")
+ model.to(device).eval()
+ return model, cfg, train_meta
+
+
+def load_test_samples(data_path: Path, n_total: int, shard_id: int = 0, num_shards: int = 1, seed: int = 0):
+ """Choose a deterministic set of n_total samples using `seed`, then return shard `shard_id`."""
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ all_idx = rng.choice(len(inputs), size=n_total, replace=False)
+ shard_size = (n_total + num_shards - 1) // num_shards
+ s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total)
+ idx = all_idx[s:e]
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ "idx": idx,
+ }
+
+
+def inner_step(inner, z_H, z_L, input_embeddings, seq_info):
+ """One *full* inner forward = H_cycles x L_cycles cycles, exactly mirroring the
+ training-time recursion but with gradient enabled throughout (we need Jacobians).
+ Returns the *new* (z_H, z_L) and a list of intermediate states.
+
+ The natural unit step we use for Lyapunov is one *L_level* application; the
+ extra H_level update at the end of each H_cycle is also included as a step.
+ """
+ trajectory = [(z_H.detach().clone(), z_L.detach().clone())]
+ for _ in range(inner.config.H_cycles):
+ for _ in range(inner.config.L_cycles):
+ z_L = inner.L_level(z_L, z_H + input_embeddings, **seq_info)
+ trajectory.append((z_H.detach().clone(), z_L.detach().clone()))
+ z_H = inner.H_level(z_H, z_L, **seq_info)
+ trajectory.append((z_H.detach().clone(), z_L.detach().clone()))
+ return z_H, z_L, trajectory
+
+
+def _flatten(z):
+ """(B, seq, hidden) → (B, seq*hidden)."""
+ return z.reshape(z.shape[0], -1)
+
+
+def _unflatten(v_flat, B, seq, hidden):
+ return v_flat.reshape(B, seq, hidden)
+
+
+def jvp_apply_D(f, x, V):
+ """Compute D_f(x) @ V where V has shape (B, state_dim, k).
+
+ Returns f(x) (computed once with the LAST tangent), plus stacked Dv with same shape as V.
+ We do k separate JVPs.
+ """
+ B, state_dim, k = V.shape
+ out_list = []
+ fx_last = None
+ for i in range(k):
+ v_i = V[..., i].view_as(x) # (B, seq, hidden)
+ fx, Dv = torch.autograd.functional.jvp(f, x, v=v_i, create_graph=False, strict=False)
+ out_list.append(_flatten(Dv).to(torch.float32))
+ fx_last = fx
+ DV = torch.stack(out_list, dim=-1) # (B, state_dim, k)
+ return fx_last, DV
+
+
+def run_diagnose_batch(model, batch, device, halt_max_steps, compute_lyap=True, k_lyap=8, t_ons=1, seed=0):
+ """Run inference and collect trajectory + top-k Lyapunov for each sample in batch.
+
+ For Lyapunov: maintain an orthonormal basis Q of size (state_dim, k_lyap) per sample.
+ At each (h_cycle, l_cycle) step we apply D_t (the Jacobian of one L_level or H_level
+ update) via JVP, then QR-reorthonormalize every t_ons steps and accumulate log|R_ii|.
+ λ_i = (1/T) Σ_t log|R_ii(t)|.
+ """
+ inner = model.inner
+ B = batch["inputs"].shape[0]
+ seq_full = train_meta_seq_full
+ hidden = inner.config.hidden_size
+ state_dim = seq_full * hidden
+
+ # Initialize carry
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"].to(device),
+ batch["puzzle_identifiers"].to(device))
+
+ # Initialize orthonormal Q basis for top-k Lyapunov
+ if compute_lyap and k_lyap > 0:
+ torch.manual_seed(seed)
+ # Init random Gaussian then QR
+ Q0 = torch.randn(B, state_dim, k_lyap, device=device, dtype=torch.float32)
+ Q, _ = torch.linalg.qr(Q0) # Q: (B, state_dim, k_lyap), orthonormal columns
+ log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_lyap_steps = 0
+ step_counter = 0
+
+ drift_zH_per_step = []
+ drift_zL_per_step = []
+ halted_at = torch.zeros(B, dtype=torch.long, device=device)
+ q_halt_history = []
+
+ final_logits = None
+ for act_step in range(halt_max_steps):
+ z_H_prev = z_H.detach().clone()
+ z_L_prev = z_L.detach().clone()
+
+ if compute_lyap and k_lyap > 0:
+ with torch.enable_grad():
+ zH = z_H.detach()
+ zL = z_L.detach()
+ for _h in range(inner.config.H_cycles):
+ for _l in range(inner.config.L_cycles):
+ f = lambda x: inner.L_level(x, zH + input_embeddings, **seq_info)
+ zL_new, DV = jvp_apply_D(f, zL, Q) # DV: (B, state_dim, k)
+ Q = DV # evolved tangent
+ zL = zL_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q) # Q (B, state_dim, k), R (B, k, k)
+ log_R_sum += R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+ f = lambda x: inner.H_level(x, zL, **seq_info)
+ zH_new, DV = jvp_apply_D(f, zH, Q)
+ Q = DV
+ zH = zH_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum += R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+ z_H = zH
+ z_L = zL
+ else:
+ with torch.no_grad():
+ for _h in range(inner.config.H_cycles):
+ for _l in range(inner.config.L_cycles):
+ z_L = inner.L_level(z_L, z_H + input_embeddings, **seq_info)
+ z_H = inner.H_level(z_H, z_L, **seq_info)
+
+ drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu())
+ drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu())
+
+ with torch.no_grad():
+ q_logits = inner.q_head(z_H[:, 0]).float()
+ q_halt = q_logits[..., 0]; q_continue = q_logits[..., 1]
+ q_halt_history.append((q_halt.cpu(), q_continue.cpu()))
+ newly = (q_halt > q_continue) & (halted_at == 0)
+ halted_at[newly] = act_step + 1
+ output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float()
+ final_logits = output
+
+ lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() if (compute_lyap and k_lyap > 0) else None
+
+ with torch.no_grad():
+ preds = final_logits.argmax(dim=-1)
+ labels = batch["labels"].to(device)
+ mask = labels > 0
+ exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float()
+ token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1)
+ token_acc = token_acc.cpu()
+
+ return {
+ "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(),
+ "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(),
+ "halted_at": halted_at.cpu().numpy(),
+ "q_halt": torch.stack([h[0] for h in q_halt_history], dim=1).numpy(),
+ "q_continue": torch.stack([h[1] for h in q_halt_history], dim=1).numpy(),
+ "lyap_spec": lyap_spec, # (B, k_lyap)
+ "exact_correct": exact.numpy(),
+ "token_acc": token_acc.numpy(),
+ }
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True,
+ help="path containing all_config.yaml and step_X")
+ ap.add_argument("--ckpt-name", default="step_26040")
+ ap.add_argument("--n-samples", type=int, default=5, help="total sample pool")
+ ap.add_argument("--shard-id", type=int, default=0)
+ ap.add_argument("--num-shards", type=int, default=1)
+ ap.add_argument("--batch-size", type=int, default=64)
+ ap.add_argument("--out", default="diagnose_out.npz")
+ ap.add_argument("--seed", type=int, default=0)
+ ap.add_argument("--no-lyap", action="store_true")
+ ap.add_argument("--k-lyap", type=int, default=8, help="top-k Lyapunov exponents to compute")
+ ap.add_argument("--t-ons", type=int, default=1, help="QR reorthonormalization interval")
+ args = ap.parse_args()
+
+ device = "cuda"
+ print(f"Loading model from {args.ckpt_root}/{args.ckpt_name} ...")
+ model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ global train_meta_seq_full
+ train_meta_seq_full = train_meta["seq_len"] + model.inner.puzzle_emb_len
+ print(f" hidden={model.inner.config.hidden_size}, seq_full={train_meta_seq_full}, "
+ f"halt_max_steps={model.inner.config.halt_max_steps}, "
+ f"H_cycles={model.inner.config.H_cycles}, L_cycles={model.inner.config.L_cycles}")
+
+ test_samples = load_test_samples(Path(cfg["data_path"]), args.n_samples,
+ shard_id=args.shard_id, num_shards=args.num_shards,
+ seed=args.seed)
+ n_this_shard = len(test_samples['inputs'])
+ print(f"Loaded shard {args.shard_id}/{args.num_shards}: {n_this_shard} samples")
+
+ results = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue",
+ "lyap_spec","exact_correct","token_acc","idx"]}
+ t0 = time.time()
+ for s in range(0, n_this_shard, args.batch_size):
+ e = min(s + args.batch_size, n_this_shard)
+ batch = {k: test_samples[k][s:e].to(device)
+ for k in ["inputs","labels","puzzle_identifiers"]}
+ out = run_diagnose_batch(
+ model, batch, device,
+ halt_max_steps=model.inner.config.halt_max_steps,
+ compute_lyap=not args.no_lyap, k_lyap=args.k_lyap, t_ons=args.t_ons,
+ seed=args.seed + s,
+ )
+ for k, v in out.items():
+ if v is not None:
+ results[k].append(v)
+ results["idx"].append(test_samples["idx"][s:e])
+ lyap_str = (f" lyap_max={out['lyap_spec'][:,0].mean():.4f} "
+ f"lyap_min={out['lyap_spec'][:,-1].mean():.4f}"
+ if out["lyap_spec"] is not None else "")
+ print(f" [{e}/{n_this_shard}] dt={time.time()-t0:.1f}s "
+ f"exact={out['exact_correct'].mean():.3f}{lyap_str}", flush=True)
+
+ # Stack
+ saved = {}
+ for k, v in results.items():
+ if not v: continue
+ try:
+ saved[k] = np.concatenate(v, axis=0)
+ except ValueError:
+ saved[k] = np.stack(v, axis=0)
+ np.savez_compressed(args.out, **saved)
+ print(f"saved to {args.out}")
+ print(f"summary:")
+ print(f" N={len(saved['exact_correct'])} acc={saved['exact_correct'].mean():.3f}")
+ if "lyap_spec" in saved:
+ ls = saved["lyap_spec"] # (N, k)
+ succ = saved["exact_correct"] > 0.5
+ print(f" lyap_spec shape: {ls.shape}")
+ for i in range(ls.shape[1]):
+ li = ls[:, i]
+ print(f" λ_{i+1}: overall={li.mean():+.4f}±{li.std():.4f} "
+ f"succ={li[succ].mean():+.4f} fail={li[~succ].mean():+.4f} "
+ f"Δ={li[~succ].mean()-li[succ].mean():+.4f}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/diagnose_hrm_joint.py b/diagnose_hrm_joint.py
new file mode 100644
index 0000000..3ba39cf
--- /dev/null
+++ b/diagnose_hrm_joint.py
@@ -0,0 +1,240 @@
+"""HRM Sudoku Lyapunov diagnostic with CORRECTED joint (z_H, z_L) tangent tracking.
+
+Key fix over diagnose_hrm.py:
+ - State is conceptually (z_H, z_L) ∈ R^{2D} where D = seq_full * hidden.
+ - L_level update: z_L_new = layers_L(z_L + z_H + input_embeddings), so
+ v_L_new = J_L · (v_H + v_L), v_H_new = v_H
+ - H_level update: z_H_new = layers_H(z_H + z_L), so
+ v_H_new = J_H · (v_H + v_L), v_L_new = v_L
+ - Each L or H cycle = ONE JVP per tangent column (same cost as before),
+ but operating on the combined tangent v_H + v_L.
+ - Q is (B, 2D, k); QR over the 2D dimension keeps an orthonormal basis.
+"""
+from __future__ import annotations
+import sys, os, yaml, math, argparse, json, time
+from pathlib import Path
+import numpy as np
+import torch
+
+HRM_DIR = Path("/home/yurenh2/rrm/hrm")
+sys.path.insert(0, str(HRM_DIR))
+
+from models.hrm.hrm_act_v1 import HierarchicalReasoningModel_ACTV1
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text())
+ arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], causal=False)
+ model = HierarchicalReasoningModel_ACTV1(arch_cfg)
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()}
+ model.load_state_dict(stripped, strict=False)
+ model.to(device).eval()
+ return model, cfg, train_meta
+
+
+def load_test_samples(data_path: Path, n_total: int, shard_id: int, num_shards: int, seed: int):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ all_idx = rng.choice(len(inputs), size=n_total, replace=False)
+ shard_size = (n_total + num_shards - 1) // num_shards
+ s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total)
+ idx = all_idx[s:e]
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ "idx": idx,
+ }
+
+
+def jvp_through(f, x, v):
+ """One JVP. Returns (f(x), D_f(x) @ v). create_graph=False since this is diagnostic."""
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False)
+
+
+def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed):
+ """Compute joint top-k Lyapunov spectrum over (z_H, z_L) joint tangent.
+
+ Per L_level step:
+ v_L_new = J_L · (v_H + v_L), v_H_new = v_H
+ Per H_level step:
+ v_H_new = J_H · (v_H + v_L), v_L_new = v_L
+ """
+ inner = model.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ state_dim = seq_full * hidden # one of (z_H or z_L)
+ total_dim = 2 * state_dim # joint (v_H, v_L)
+
+ # Carry init
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"].to(device),
+ batch["puzzle_identifiers"].to(device))
+
+ # Joint orthonormal tangent basis
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q0 = torch.randn(B, total_dim, k_lyap, device=device, dtype=torch.float32, generator=g)
+ Q, _ = torch.linalg.qr(Q0) # (B, 2D, k)
+ log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_lyap_steps = 0
+ step_counter = 0
+
+ drift_zH_per_step, drift_zL_per_step = [], []
+ halted_at = torch.zeros(B, dtype=torch.long, device=device)
+ q_halt_hist, q_continue_hist = [], []
+
+ for act_step in range(cfg.halt_max_steps):
+ z_H_prev = z_H.detach().clone()
+ z_L_prev = z_L.detach().clone()
+
+ with torch.enable_grad():
+ zH, zL = z_H.detach(), z_L.detach()
+ for _h in range(cfg.H_cycles):
+ for _l in range(cfg.L_cycles):
+ # --- joint tangent: prep v_combined = v_H + v_L ---
+ v_H_all = Q[:, :state_dim, :] # (B, D, k)
+ v_L_all = Q[:, state_dim:, :]
+ v_comb = v_H_all + v_L_all
+ # --- k JVPs through L_level ---
+ new_v_L_cols = []
+ f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zL_new, Dv = jvp_through(f_L, zL, v_i)
+ new_v_L_cols.append(Dv.reshape(B, state_dim).to(torch.float32))
+ new_v_L = torch.stack(new_v_L_cols, dim=-1) # (B, D, k)
+ # Reassemble Q (v_H unchanged, v_L updated)
+ Q = torch.cat([v_H_all, new_v_L], dim=1)
+ zL = zL_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ # --- H step: v_comb = v_H + v_L, JVP through H_level ---
+ v_H_all = Q[:, :state_dim, :]
+ v_L_all = Q[:, state_dim:, :]
+ v_comb = v_H_all + v_L_all
+ new_v_H_cols = []
+ f_H = lambda z: inner.H_level(z, zL, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zH_new, Dv = jvp_through(f_H, zH, v_i)
+ new_v_H_cols.append(Dv.reshape(B, state_dim).to(torch.float32))
+ new_v_H = torch.stack(new_v_H_cols, dim=-1)
+ Q = torch.cat([new_v_H, v_L_all], dim=1)
+ zH = zH_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ z_H, z_L = zH, zL
+
+ drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu())
+ drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu())
+
+ with torch.no_grad():
+ q_logits = inner.q_head(z_H[:, 0]).float()
+ q_halt, q_continue = q_logits[..., 0], q_logits[..., 1]
+ q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu())
+ new_halt = (q_halt > q_continue) & (halted_at == 0)
+ halted_at[new_halt] = act_step + 1
+ output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float()
+ final_logits = output
+
+ lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() # (B, k)
+
+ with torch.no_grad():
+ preds = final_logits.argmax(dim=-1)
+ labels = batch["labels"].to(device)
+ mask = labels > 0
+ exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float()
+ token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1)
+ token_acc = token_acc.cpu()
+
+ return {
+ "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(),
+ "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(),
+ "halted_at": halted_at.cpu().numpy(),
+ "q_halt": torch.stack(q_halt_hist, dim=1).numpy(),
+ "q_continue": torch.stack(q_continue_hist, dim=1).numpy(),
+ "lyap_spec": lyap_spec,
+ "exact_correct": exact.numpy(),
+ "token_acc": token_acc.numpy(),
+ }
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_26040")
+ ap.add_argument("--n-samples", type=int, default=1024)
+ ap.add_argument("--shard-id", type=int, default=0)
+ ap.add_argument("--num-shards", type=int, default=1)
+ ap.add_argument("--batch-size", type=int, default=32)
+ ap.add_argument("--k-lyap", type=int, default=8)
+ ap.add_argument("--t-ons", type=int, default=1)
+ ap.add_argument("--seed", type=int, default=0)
+ ap.add_argument("--out", default="diag_joint.npz")
+ args = ap.parse_args()
+
+ device = "cuda"
+ model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, "
+ f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, "
+ f"halt_max_steps={model.inner.config.halt_max_steps}, "
+ f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}")
+
+ test_samples = load_test_samples(Path(cfg["data_path"]), args.n_samples,
+ args.shard_id, args.num_shards, args.seed)
+ n_this = len(test_samples["inputs"])
+ print(f"shard {args.shard_id}/{args.num_shards}: {n_this} samples")
+
+ results = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue",
+ "lyap_spec","exact_correct","token_acc","idx"]}
+ t0 = time.time()
+ for s in range(0, n_this, args.batch_size):
+ e = min(s + args.batch_size, n_this)
+ batch = {k: test_samples[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]}
+ out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s)
+ for k, v in out.items():
+ if v is not None: results[k].append(v)
+ results["idx"].append(test_samples["idx"][s:e])
+ ls = out["lyap_spec"]
+ print(f" [{e}/{n_this}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} "
+ f"λ_1={ls[:,0].mean():.4f} λ_{args.k_lyap}={ls[:,-1].mean():.4f}", flush=True)
+
+ saved = {}
+ for k, v in results.items():
+ if not v: continue
+ try: saved[k] = np.concatenate(v, 0)
+ except ValueError: saved[k] = np.stack(v, 0)
+ np.savez_compressed(args.out, **saved)
+
+ ls = saved["lyap_spec"]
+ succ = saved["exact_correct"] > 0.5
+ print(f"\nN={len(saved['exact_correct'])} acc={succ.mean():.4f}")
+ print(f"{'i':>3} {'mean':>10} {'succ':>10} {'fail':>10} {'Δ(f-s)':>10}")
+ for i in range(ls.shape[1]):
+ li = ls[:, i]
+ print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} "
+ f"{li[~succ].mean()-li[succ].mean():+10.4f}")
+ print(f"\nsaved → {args.out}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/diagnose_hrm_joint_clv.py b/diagnose_hrm_joint_clv.py
new file mode 100644
index 0000000..227ce67
--- /dev/null
+++ b/diagnose_hrm_joint_clv.py
@@ -0,0 +1,310 @@
+"""HRM Sudoku Lyapunov diagnostic with CORRECTED joint (z_H, z_L) tangent tracking.
+
+Key fix over diagnose_hrm.py:
+ - State is conceptually (z_H, z_L) ∈ R^{2D} where D = seq_full * hidden.
+ - L_level update: z_L_new = layers_L(z_L + z_H + input_embeddings), so
+ v_L_new = J_L · (v_H + v_L), v_H_new = v_H
+ - H_level update: z_H_new = layers_H(z_H + z_L), so
+ v_H_new = J_H · (v_H + v_L), v_L_new = v_L
+ - Each L or H cycle = ONE JVP per tangent column (same cost as before),
+ but operating on the combined tangent v_H + v_L.
+ - Q is (B, 2D, k); QR over the 2D dimension keeps an orthonormal basis.
+"""
+from __future__ import annotations
+import sys, os, yaml, math, argparse, json, time
+from pathlib import Path
+import numpy as np
+import torch
+
+HRM_DIR = Path("/home/yurenh2/rrm/hrm")
+sys.path.insert(0, str(HRM_DIR))
+
+from models.hrm.hrm_act_v1 import HierarchicalReasoningModel_ACTV1
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text())
+ arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], causal=False)
+ model = HierarchicalReasoningModel_ACTV1(arch_cfg)
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()}
+ model.load_state_dict(stripped, strict=False)
+ model.to(device).eval()
+ return model, cfg, train_meta
+
+
+def load_test_samples(data_path: Path, n_total: int, shard_id: int, num_shards: int, seed: int):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ all_idx = rng.choice(len(inputs), size=n_total, replace=False)
+ shard_size = (n_total + num_shards - 1) // num_shards
+ s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total)
+ idx = all_idx[s:e]
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ "idx": idx,
+ }
+
+
+def jvp_through(f, x, v):
+ """One JVP. Returns (f(x), D_f(x) @ v). create_graph=False since this is diagnostic."""
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False)
+
+
+def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed):
+ """Compute joint top-k Lyapunov spectrum over (z_H, z_L) joint tangent.
+
+ Per L_level step:
+ v_L_new = J_L · (v_H + v_L), v_H_new = v_H
+ Per H_level step:
+ v_H_new = J_H · (v_H + v_L), v_L_new = v_L
+ """
+ inner = model.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ state_dim = seq_full * hidden # one of (z_H or z_L)
+ total_dim = 2 * state_dim # joint (v_H, v_L)
+
+ # Carry init
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"].to(device),
+ batch["puzzle_identifiers"].to(device))
+
+ # Joint orthonormal tangent basis
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q0 = torch.randn(B, total_dim, k_lyap, device=device, dtype=torch.float32, generator=g)
+ Q, _ = torch.linalg.qr(Q0) # (B, 2D, k)
+ log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_lyap_steps = 0
+ step_counter = 0
+
+ drift_zH_per_step, drift_zL_per_step = [], []
+ halted_at = torch.zeros(B, dtype=torch.long, device=device)
+ q_halt_hist, q_continue_hist = [], []
+
+ R_list = []
+ Q_mid = None
+ target_qr = (cfg.halt_max_steps * cfg.H_cycles * (cfg.L_cycles + 1) // t_ons) // 2
+
+ for act_step in range(cfg.halt_max_steps):
+ z_H_prev = z_H.detach().clone()
+ z_L_prev = z_L.detach().clone()
+
+ with torch.enable_grad():
+ zH, zL = z_H.detach(), z_L.detach()
+ for _h in range(cfg.H_cycles):
+ for _l in range(cfg.L_cycles):
+ # --- joint tangent: prep v_combined = v_H + v_L ---
+ v_H_all = Q[:, :state_dim, :] # (B, D, k)
+ v_L_all = Q[:, state_dim:, :]
+ v_comb = v_H_all + v_L_all
+ # --- k JVPs through L_level ---
+ new_v_L_cols = []
+ f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zL_new, Dv = jvp_through(f_L, zL, v_i)
+ new_v_L_cols.append(Dv.reshape(B, state_dim).to(torch.float32))
+ new_v_L = torch.stack(new_v_L_cols, dim=-1) # (B, D, k)
+ # Reassemble Q (v_H unchanged, v_L updated)
+ Q = torch.cat([v_H_all, new_v_L], dim=1)
+ zL = zL_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+ R_list.append(R.detach())
+ if n_lyap_steps == target_qr:
+ Q_mid = Q.detach().clone()
+
+ # --- H step: v_comb = v_H + v_L, JVP through H_level ---
+ v_H_all = Q[:, :state_dim, :]
+ v_L_all = Q[:, state_dim:, :]
+ v_comb = v_H_all + v_L_all
+ new_v_H_cols = []
+ f_H = lambda z: inner.H_level(z, zL, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zH_new, Dv = jvp_through(f_H, zH, v_i)
+ new_v_H_cols.append(Dv.reshape(B, state_dim).to(torch.float32))
+ new_v_H = torch.stack(new_v_H_cols, dim=-1)
+ Q = torch.cat([new_v_H, v_L_all], dim=1)
+ zH = zH_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+ R_list.append(R.detach())
+ if n_lyap_steps == target_qr:
+ Q_mid = Q.detach().clone()
+
+ z_H, z_L = zH, zL
+
+ drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu())
+ drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu())
+
+ with torch.no_grad():
+ q_logits = inner.q_head(z_H[:, 0]).float()
+ q_halt, q_continue = q_logits[..., 0], q_logits[..., 1]
+ q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu())
+ new_halt = (q_halt > q_continue) & (halted_at == 0)
+ halted_at[new_halt] = act_step + 1
+ output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float()
+ final_logits = output
+
+ lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() # (B, k)
+
+ # Ginelli CLV backward pass -> covariant Lyapunov vectors at the midpoint; pairwise CLV angles.
+ with torch.no_grad():
+ if Q_mid is None:
+ Q_mid = Q
+ g2 = torch.Generator(device=device).manual_seed(seed + 9973)
+ C = torch.triu(torch.randn(B, k_lyap, k_lyap, device=device, dtype=torch.float32, generator=g2))
+ C = C / C.norm(dim=1, keepdim=True).clamp_min(1e-20)
+ for R in reversed(R_list[target_qr:]):
+ Rf = R.float()
+ diag = Rf.diagonal(dim1=-2, dim2=-1)
+ Rf = Rf + torch.diag_embed(torch.where(diag.abs() < 1e-8, torch.full_like(diag, 1e-8), torch.zeros_like(diag)))
+ C = torch.linalg.solve_triangular(Rf, C, upper=True)
+ C = C / C.norm(dim=1, keepdim=True).clamp_min(1e-20)
+ V = torch.matmul(Q_mid.float(), C)
+ V = V / V.norm(dim=1, keepdim=True).clamp_min(1e-20)
+ gram = torch.matmul(V.transpose(1, 2), V).abs()
+ eye = torch.eye(k_lyap, device=device).unsqueeze(0)
+ off = gram * (1 - eye)
+ clv_maxcos = off.amax(dim=(1, 2)).cpu().numpy()
+ clv_meancos = (off.sum(dim=(1, 2)) / (k_lyap * (k_lyap - 1))).cpu().numpy()
+ clv_minangle = torch.acos(off.amax(dim=(1, 2)).clamp(max=1 - 1e-7)).cpu().numpy()
+ Dh0 = seq_full * hidden; pe0 = inner.puzzle_emb_len
+ vH = V[:, :Dh0, 0].reshape(B, seq_full, hidden)
+ eH = (vH ** 2).sum(-1); eL = (V[:, Dh0:, 0].reshape(B, seq_full, hidden) ** 2).sum(-1)
+ e = eH + eL
+ clv_true_hfrac = (eH.sum(-1) / e.sum(-1).clamp_min(1e-20)).cpu().numpy()
+ clv_true_pr_ans = (e[:, pe0:].sum(-1) ** 2 / (e[:, pe0:] ** 2).sum(-1).clamp_min(1e-20) / max(seq_full - pe0, 1)).cpu().numpy()
+ ro = torch.matmul(vH[:, pe0:], inner.lm_head.weight.float().t())
+ clv_true_readout = (ro.flatten(1).norm(dim=1) / vH[:, pe0:].flatten(1).norm(dim=1).clamp_min(1e-20)).cpu().numpy()
+
+ with torch.no_grad():
+ preds = final_logits.argmax(dim=-1)
+ labels = batch["labels"].to(device)
+ mask = labels > 0
+ exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float()
+ token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1)
+ token_acc = token_acc.cpu()
+
+ # CLV geometry: leading columns of final Q are the leading covariant Lyapunov vectors.
+ with torch.no_grad():
+ Dh = seq_full * hidden
+ n_top = min(3, k_lyap)
+ pe = inner.puzzle_emb_len
+ W = inner.lm_head.weight.float()
+ hfrac_c, pr_c, prans_c, ro_c = [], [], [], []
+ for j in range(n_top):
+ col = Q[:, :, j].float()
+ qH = col[:, :Dh].reshape(B, seq_full, hidden)
+ qL = col[:, Dh:].reshape(B, seq_full, hidden)
+ eH = (qH ** 2).sum(-1); eL = (qL ** 2).sum(-1)
+ e = eH + eL
+ hfrac_c.append((eH.sum(-1) / e.sum(-1).clamp_min(1e-20)).cpu())
+ pr_c.append((e.sum(-1) ** 2 / (e ** 2).sum(-1).clamp_min(1e-20) / seq_full).cpu())
+ ea = e[:, pe:]
+ prans_c.append((ea.sum(-1) ** 2 / (ea ** 2).sum(-1).clamp_min(1e-20) / max(seq_full - pe, 1)).cpu())
+ ro = torch.matmul(qH[:, pe:], W.t())
+ ro_c.append((ro.flatten(1).norm(dim=1) / qH[:, pe:].flatten(1).norm(dim=1).clamp_min(1e-20)).cpu())
+
+ return {
+ "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(),
+ "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(),
+ "halted_at": halted_at.cpu().numpy(),
+ "q_halt": torch.stack(q_halt_hist, dim=1).numpy(),
+ "q_continue": torch.stack(q_continue_hist, dim=1).numpy(),
+ "lyap_spec": lyap_spec,
+ "exact_correct": exact.numpy(),
+ "token_acc": token_acc.numpy(),
+ "clv_hfrac": torch.stack(hfrac_c, 1).numpy(),
+ "clv_pr": torch.stack(pr_c, 1).numpy(),
+ "clv_pr_ans": torch.stack(prans_c, 1).numpy(),
+ "clv_readout": torch.stack(ro_c, 1).numpy(),
+ "clv_maxcos": clv_maxcos,
+ "clv_meancos": clv_meancos,
+ "clv_minangle": clv_minangle,
+ "clv_true_hfrac": clv_true_hfrac,
+ "clv_true_pr_ans": clv_true_pr_ans,
+ "clv_true_readout": clv_true_readout,
+ }
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_26040")
+ ap.add_argument("--n-samples", type=int, default=1024)
+ ap.add_argument("--shard-id", type=int, default=0)
+ ap.add_argument("--num-shards", type=int, default=1)
+ ap.add_argument("--batch-size", type=int, default=32)
+ ap.add_argument("--k-lyap", type=int, default=8)
+ ap.add_argument("--t-ons", type=int, default=1)
+ ap.add_argument("--seed", type=int, default=0)
+ ap.add_argument("--out", default="diag_joint.npz")
+ args = ap.parse_args()
+
+ device = "cuda"
+ model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, "
+ f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, "
+ f"halt_max_steps={model.inner.config.halt_max_steps}, "
+ f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}")
+
+ test_samples = load_test_samples(Path(cfg["data_path"]), args.n_samples,
+ args.shard_id, args.num_shards, args.seed)
+ n_this = len(test_samples["inputs"])
+ print(f"shard {args.shard_id}/{args.num_shards}: {n_this} samples")
+
+ results = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue",
+ "lyap_spec","exact_correct","token_acc","idx"]}
+ t0 = time.time()
+ for s in range(0, n_this, args.batch_size):
+ e = min(s + args.batch_size, n_this)
+ batch = {k: test_samples[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]}
+ out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s)
+ for k, v in out.items():
+ if v is not None: results.setdefault(k, []).append(v)
+ results.setdefault("idx", []).append(test_samples["idx"][s:e])
+ ls = out["lyap_spec"]
+ print(f" [{e}/{n_this}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} "
+ f"λ_1={ls[:,0].mean():.4f} λ_{args.k_lyap}={ls[:,-1].mean():.4f}", flush=True)
+
+ saved = {}
+ for k, v in results.items():
+ if not v: continue
+ try: saved[k] = np.concatenate(v, 0)
+ except ValueError: saved[k] = np.stack(v, 0)
+ np.savez_compressed(args.out, **saved)
+
+ ls = saved["lyap_spec"]
+ succ = saved["exact_correct"] > 0.5
+ print(f"\nN={len(saved['exact_correct'])} acc={succ.mean():.4f}")
+ print(f"{'i':>3} {'mean':>10} {'succ':>10} {'fail':>10} {'Δ(f-s)':>10}")
+ for i in range(ls.shape[1]):
+ li = ls[:, i]
+ print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} "
+ f"{li[~succ].mean()-li[succ].mean():+10.4f}")
+ print(f"\nsaved → {args.out}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/diagnose_hrm_joint_geom.py b/diagnose_hrm_joint_geom.py
new file mode 100644
index 0000000..41c8629
--- /dev/null
+++ b/diagnose_hrm_joint_geom.py
@@ -0,0 +1,264 @@
+"""HRM Sudoku Lyapunov diagnostic with CORRECTED joint (z_H, z_L) tangent tracking.
+
+Key fix over diagnose_hrm.py:
+ - State is conceptually (z_H, z_L) ∈ R^{2D} where D = seq_full * hidden.
+ - L_level update: z_L_new = layers_L(z_L + z_H + input_embeddings), so
+ v_L_new = J_L · (v_H + v_L), v_H_new = v_H
+ - H_level update: z_H_new = layers_H(z_H + z_L), so
+ v_H_new = J_H · (v_H + v_L), v_L_new = v_L
+ - Each L or H cycle = ONE JVP per tangent column (same cost as before),
+ but operating on the combined tangent v_H + v_L.
+ - Q is (B, 2D, k); QR over the 2D dimension keeps an orthonormal basis.
+"""
+from __future__ import annotations
+import sys, os, yaml, math, argparse, json, time
+from pathlib import Path
+import numpy as np
+import torch
+
+HRM_DIR = Path("/home/yurenh2/rrm/hrm")
+sys.path.insert(0, str(HRM_DIR))
+
+from models.hrm.hrm_act_v1 import HierarchicalReasoningModel_ACTV1
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text())
+ arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], causal=False)
+ model = HierarchicalReasoningModel_ACTV1(arch_cfg)
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()}
+ model.load_state_dict(stripped, strict=False)
+ model.to(device).eval()
+ return model, cfg, train_meta
+
+
+def load_test_samples(data_path: Path, n_total: int, shard_id: int, num_shards: int, seed: int):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ all_idx = rng.choice(len(inputs), size=n_total, replace=False)
+ shard_size = (n_total + num_shards - 1) // num_shards
+ s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total)
+ idx = all_idx[s:e]
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ "idx": idx,
+ }
+
+
+def jvp_through(f, x, v):
+ """One JVP. Returns (f(x), D_f(x) @ v). create_graph=False since this is diagnostic."""
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False)
+
+
+def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed):
+ """Compute joint top-k Lyapunov spectrum over (z_H, z_L) joint tangent.
+
+ Per L_level step:
+ v_L_new = J_L · (v_H + v_L), v_H_new = v_H
+ Per H_level step:
+ v_H_new = J_H · (v_H + v_L), v_L_new = v_L
+ """
+ inner = model.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ state_dim = seq_full * hidden # one of (z_H or z_L)
+ total_dim = 2 * state_dim # joint (v_H, v_L)
+
+ # Carry init
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"].to(device),
+ batch["puzzle_identifiers"].to(device))
+
+ # Joint orthonormal tangent basis
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q0 = torch.randn(B, total_dim, k_lyap, device=device, dtype=torch.float32, generator=g)
+ Q, _ = torch.linalg.qr(Q0) # (B, 2D, k)
+ log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_lyap_steps = 0
+ step_counter = 0
+
+ drift_zH_per_step, drift_zL_per_step = [], []
+ halted_at = torch.zeros(B, dtype=torch.long, device=device)
+ q_halt_hist, q_continue_hist = [], []
+
+ for act_step in range(cfg.halt_max_steps):
+ z_H_prev = z_H.detach().clone()
+ z_L_prev = z_L.detach().clone()
+
+ with torch.enable_grad():
+ zH, zL = z_H.detach(), z_L.detach()
+ for _h in range(cfg.H_cycles):
+ for _l in range(cfg.L_cycles):
+ # --- joint tangent: prep v_combined = v_H + v_L ---
+ v_H_all = Q[:, :state_dim, :] # (B, D, k)
+ v_L_all = Q[:, state_dim:, :]
+ v_comb = v_H_all + v_L_all
+ # --- k JVPs through L_level ---
+ new_v_L_cols = []
+ f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zL_new, Dv = jvp_through(f_L, zL, v_i)
+ new_v_L_cols.append(Dv.reshape(B, state_dim).to(torch.float32))
+ new_v_L = torch.stack(new_v_L_cols, dim=-1) # (B, D, k)
+ # Reassemble Q (v_H unchanged, v_L updated)
+ Q = torch.cat([v_H_all, new_v_L], dim=1)
+ zL = zL_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ # --- H step: v_comb = v_H + v_L, JVP through H_level ---
+ v_H_all = Q[:, :state_dim, :]
+ v_L_all = Q[:, state_dim:, :]
+ v_comb = v_H_all + v_L_all
+ new_v_H_cols = []
+ f_H = lambda z: inner.H_level(z, zL, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zH_new, Dv = jvp_through(f_H, zH, v_i)
+ new_v_H_cols.append(Dv.reshape(B, state_dim).to(torch.float32))
+ new_v_H = torch.stack(new_v_H_cols, dim=-1)
+ Q = torch.cat([new_v_H, v_L_all], dim=1)
+ zH = zH_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ z_H, z_L = zH, zL
+
+ drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu())
+ drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu())
+
+ with torch.no_grad():
+ q_logits = inner.q_head(z_H[:, 0]).float()
+ q_halt, q_continue = q_logits[..., 0], q_logits[..., 1]
+ q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu())
+ new_halt = (q_halt > q_continue) & (halted_at == 0)
+ halted_at[new_halt] = act_step + 1
+ output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float()
+ final_logits = output
+
+ lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() # (B, k)
+
+ with torch.no_grad():
+ preds = final_logits.argmax(dim=-1)
+ labels = batch["labels"].to(device)
+ mask = labels > 0
+ exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float()
+ token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1)
+ token_acc = token_acc.cpu()
+
+ # CLV geometry: leading columns of final Q are the leading covariant Lyapunov vectors.
+ with torch.no_grad():
+ Dh = seq_full * hidden
+ n_top = min(3, k_lyap)
+ pe = inner.puzzle_emb_len
+ W = inner.lm_head.weight.float()
+ hfrac_c, pr_c, prans_c, ro_c = [], [], [], []
+ for j in range(n_top):
+ col = Q[:, :, j].float()
+ qH = col[:, :Dh].reshape(B, seq_full, hidden)
+ qL = col[:, Dh:].reshape(B, seq_full, hidden)
+ eH = (qH ** 2).sum(-1); eL = (qL ** 2).sum(-1)
+ e = eH + eL
+ hfrac_c.append((eH.sum(-1) / e.sum(-1).clamp_min(1e-20)).cpu())
+ pr_c.append((e.sum(-1) ** 2 / (e ** 2).sum(-1).clamp_min(1e-20) / seq_full).cpu())
+ ea = e[:, pe:]
+ prans_c.append((ea.sum(-1) ** 2 / (ea ** 2).sum(-1).clamp_min(1e-20) / max(seq_full - pe, 1)).cpu())
+ ro = torch.matmul(qH[:, pe:], W.t())
+ ro_c.append((ro.flatten(1).norm(dim=1) / qH[:, pe:].flatten(1).norm(dim=1).clamp_min(1e-20)).cpu())
+
+ return {
+ "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(),
+ "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(),
+ "halted_at": halted_at.cpu().numpy(),
+ "q_halt": torch.stack(q_halt_hist, dim=1).numpy(),
+ "q_continue": torch.stack(q_continue_hist, dim=1).numpy(),
+ "lyap_spec": lyap_spec,
+ "exact_correct": exact.numpy(),
+ "token_acc": token_acc.numpy(),
+ "clv_hfrac": torch.stack(hfrac_c, 1).numpy(),
+ "clv_pr": torch.stack(pr_c, 1).numpy(),
+ "clv_pr_ans": torch.stack(prans_c, 1).numpy(),
+ "clv_readout": torch.stack(ro_c, 1).numpy(),
+ }
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_26040")
+ ap.add_argument("--n-samples", type=int, default=1024)
+ ap.add_argument("--shard-id", type=int, default=0)
+ ap.add_argument("--num-shards", type=int, default=1)
+ ap.add_argument("--batch-size", type=int, default=32)
+ ap.add_argument("--k-lyap", type=int, default=8)
+ ap.add_argument("--t-ons", type=int, default=1)
+ ap.add_argument("--seed", type=int, default=0)
+ ap.add_argument("--out", default="diag_joint.npz")
+ args = ap.parse_args()
+
+ device = "cuda"
+ model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, "
+ f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, "
+ f"halt_max_steps={model.inner.config.halt_max_steps}, "
+ f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}")
+
+ test_samples = load_test_samples(Path(cfg["data_path"]), args.n_samples,
+ args.shard_id, args.num_shards, args.seed)
+ n_this = len(test_samples["inputs"])
+ print(f"shard {args.shard_id}/{args.num_shards}: {n_this} samples")
+
+ results = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue",
+ "lyap_spec","exact_correct","token_acc","idx"]}
+ t0 = time.time()
+ for s in range(0, n_this, args.batch_size):
+ e = min(s + args.batch_size, n_this)
+ batch = {k: test_samples[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]}
+ out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s)
+ for k, v in out.items():
+ if v is not None: results.setdefault(k, []).append(v)
+ results.setdefault("idx", []).append(test_samples["idx"][s:e])
+ ls = out["lyap_spec"]
+ print(f" [{e}/{n_this}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} "
+ f"λ_1={ls[:,0].mean():.4f} λ_{args.k_lyap}={ls[:,-1].mean():.4f}", flush=True)
+
+ saved = {}
+ for k, v in results.items():
+ if not v: continue
+ try: saved[k] = np.concatenate(v, 0)
+ except ValueError: saved[k] = np.stack(v, 0)
+ np.savez_compressed(args.out, **saved)
+
+ ls = saved["lyap_spec"]
+ succ = saved["exact_correct"] > 0.5
+ print(f"\nN={len(saved['exact_correct'])} acc={succ.mean():.4f}")
+ print(f"{'i':>3} {'mean':>10} {'succ':>10} {'fail':>10} {'Δ(f-s)':>10}")
+ for i in range(ls.shape[1]):
+ li = ls[:, i]
+ print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} "
+ f"{li[~succ].mean()-li[succ].mean():+10.4f}")
+ print(f"\nsaved → {args.out}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/diagnose_hrm_joint_horizon.py b/diagnose_hrm_joint_horizon.py
new file mode 100644
index 0000000..156d2d8
--- /dev/null
+++ b/diagnose_hrm_joint_horizon.py
@@ -0,0 +1,240 @@
+"""HRM Sudoku Lyapunov diagnostic with CORRECTED joint (z_H, z_L) tangent tracking.
+
+Key fix over diagnose_hrm.py:
+ - State is conceptually (z_H, z_L) ∈ R^{2D} where D = seq_full * hidden.
+ - L_level update: z_L_new = layers_L(z_L + z_H + input_embeddings), so
+ v_L_new = J_L · (v_H + v_L), v_H_new = v_H
+ - H_level update: z_H_new = layers_H(z_H + z_L), so
+ v_H_new = J_H · (v_H + v_L), v_L_new = v_L
+ - Each L or H cycle = ONE JVP per tangent column (same cost as before),
+ but operating on the combined tangent v_H + v_L.
+ - Q is (B, 2D, k); QR over the 2D dimension keeps an orthonormal basis.
+"""
+from __future__ import annotations
+import sys, os, yaml, math, argparse, json, time
+from pathlib import Path
+import numpy as np
+import torch
+
+HRM_DIR = Path("/home/yurenh2/rrm/hrm")
+sys.path.insert(0, str(HRM_DIR))
+
+from models.hrm.hrm_act_v1 import HierarchicalReasoningModel_ACTV1
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text())
+ arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], causal=False)
+ model = HierarchicalReasoningModel_ACTV1(arch_cfg)
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()}
+ model.load_state_dict(stripped, strict=False)
+ model.to(device).eval()
+ return model, cfg, train_meta
+
+
+def load_test_samples(data_path: Path, n_total: int, shard_id: int, num_shards: int, seed: int):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ all_idx = rng.choice(len(inputs), size=n_total, replace=False)
+ shard_size = (n_total + num_shards - 1) // num_shards
+ s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total)
+ idx = all_idx[s:e]
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ "idx": idx,
+ }
+
+
+def jvp_through(f, x, v):
+ """One JVP. Returns (f(x), D_f(x) @ v). create_graph=False since this is diagnostic."""
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False)
+
+
+def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed):
+ """Compute joint top-k Lyapunov spectrum over (z_H, z_L) joint tangent.
+
+ Per L_level step:
+ v_L_new = J_L · (v_H + v_L), v_H_new = v_H
+ Per H_level step:
+ v_H_new = J_H · (v_H + v_L), v_L_new = v_L
+ """
+ inner = model.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ state_dim = seq_full * hidden # one of (z_H or z_L)
+ total_dim = 2 * state_dim # joint (v_H, v_L)
+
+ # Carry init
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"].to(device),
+ batch["puzzle_identifiers"].to(device))
+
+ # Joint orthonormal tangent basis
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q0 = torch.randn(B, total_dim, k_lyap, device=device, dtype=torch.float32, generator=g)
+ Q, _ = torch.linalg.qr(Q0) # (B, 2D, k)
+ log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_lyap_steps = 0
+ step_counter = 0
+
+ drift_zH_per_step, drift_zL_per_step = [], []
+ halted_at = torch.zeros(B, dtype=torch.long, device=device)
+ q_halt_hist, q_continue_hist = [], []
+
+ for act_step in range(int(__import__("os").environ.get("DIAG_HORIZON", "4"))): # horizon via env
+ z_H_prev = z_H.detach().clone()
+ z_L_prev = z_L.detach().clone()
+
+ with torch.enable_grad():
+ zH, zL = z_H.detach(), z_L.detach()
+ for _h in range(cfg.H_cycles):
+ for _l in range(cfg.L_cycles):
+ # --- joint tangent: prep v_combined = v_H + v_L ---
+ v_H_all = Q[:, :state_dim, :] # (B, D, k)
+ v_L_all = Q[:, state_dim:, :]
+ v_comb = v_H_all + v_L_all
+ # --- k JVPs through L_level ---
+ new_v_L_cols = []
+ f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zL_new, Dv = jvp_through(f_L, zL, v_i)
+ new_v_L_cols.append(Dv.reshape(B, state_dim).to(torch.float32))
+ new_v_L = torch.stack(new_v_L_cols, dim=-1) # (B, D, k)
+ # Reassemble Q (v_H unchanged, v_L updated)
+ Q = torch.cat([v_H_all, new_v_L], dim=1)
+ zL = zL_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ # --- H step: v_comb = v_H + v_L, JVP through H_level ---
+ v_H_all = Q[:, :state_dim, :]
+ v_L_all = Q[:, state_dim:, :]
+ v_comb = v_H_all + v_L_all
+ new_v_H_cols = []
+ f_H = lambda z: inner.H_level(z, zL, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zH_new, Dv = jvp_through(f_H, zH, v_i)
+ new_v_H_cols.append(Dv.reshape(B, state_dim).to(torch.float32))
+ new_v_H = torch.stack(new_v_H_cols, dim=-1)
+ Q = torch.cat([new_v_H, v_L_all], dim=1)
+ zH = zH_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ z_H, z_L = zH, zL
+
+ drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu())
+ drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu())
+
+ with torch.no_grad():
+ q_logits = inner.q_head(z_H[:, 0]).float()
+ q_halt, q_continue = q_logits[..., 0], q_logits[..., 1]
+ q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu())
+ new_halt = (q_halt > q_continue) & (halted_at == 0)
+ halted_at[new_halt] = act_step + 1
+ output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float()
+ final_logits = output
+
+ lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() # (B, k)
+
+ with torch.no_grad():
+ preds = final_logits.argmax(dim=-1)
+ labels = batch["labels"].to(device)
+ mask = labels > 0
+ exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float()
+ token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1)
+ token_acc = token_acc.cpu()
+
+ return {
+ "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(),
+ "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(),
+ "halted_at": halted_at.cpu().numpy(),
+ "q_halt": torch.stack(q_halt_hist, dim=1).numpy(),
+ "q_continue": torch.stack(q_continue_hist, dim=1).numpy(),
+ "lyap_spec": lyap_spec,
+ "exact_correct": exact.numpy(),
+ "token_acc": token_acc.numpy(),
+ }
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_26040")
+ ap.add_argument("--n-samples", type=int, default=1024)
+ ap.add_argument("--shard-id", type=int, default=0)
+ ap.add_argument("--num-shards", type=int, default=1)
+ ap.add_argument("--batch-size", type=int, default=32)
+ ap.add_argument("--k-lyap", type=int, default=8)
+ ap.add_argument("--t-ons", type=int, default=1)
+ ap.add_argument("--seed", type=int, default=0)
+ ap.add_argument("--out", default="diag_joint.npz")
+ args = ap.parse_args()
+
+ device = "cuda"
+ model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, "
+ f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, "
+ f"halt_max_steps={model.inner.config.halt_max_steps}, "
+ f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}")
+
+ test_samples = load_test_samples(Path(cfg["data_path"]), args.n_samples,
+ args.shard_id, args.num_shards, args.seed)
+ n_this = len(test_samples["inputs"])
+ print(f"shard {args.shard_id}/{args.num_shards}: {n_this} samples")
+
+ results = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue",
+ "lyap_spec","exact_correct","token_acc","idx"]}
+ t0 = time.time()
+ for s in range(0, n_this, args.batch_size):
+ e = min(s + args.batch_size, n_this)
+ batch = {k: test_samples[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]}
+ out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s)
+ for k, v in out.items():
+ if v is not None: results[k].append(v)
+ results["idx"].append(test_samples["idx"][s:e])
+ ls = out["lyap_spec"]
+ print(f" [{e}/{n_this}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} "
+ f"λ_1={ls[:,0].mean():.4f} λ_{args.k_lyap}={ls[:,-1].mean():.4f}", flush=True)
+
+ saved = {}
+ for k, v in results.items():
+ if not v: continue
+ try: saved[k] = np.concatenate(v, 0)
+ except ValueError: saved[k] = np.stack(v, 0)
+ np.savez_compressed(args.out, **saved)
+
+ ls = saved["lyap_spec"]
+ succ = saved["exact_correct"] > 0.5
+ print(f"\nN={len(saved['exact_correct'])} acc={succ.mean():.4f}")
+ print(f"{'i':>3} {'mean':>10} {'succ':>10} {'fail':>10} {'Δ(f-s)':>10}")
+ for i in range(ls.shape[1]):
+ li = ls[:, i]
+ print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} "
+ f"{li[~succ].mean()-li[succ].mean():+10.4f}")
+ print(f"\nsaved → {args.out}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/diagnose_hrm_joint_short.py b/diagnose_hrm_joint_short.py
new file mode 100644
index 0000000..2160470
--- /dev/null
+++ b/diagnose_hrm_joint_short.py
@@ -0,0 +1,240 @@
+"""HRM Sudoku Lyapunov diagnostic with CORRECTED joint (z_H, z_L) tangent tracking.
+
+Key fix over diagnose_hrm.py:
+ - State is conceptually (z_H, z_L) ∈ R^{2D} where D = seq_full * hidden.
+ - L_level update: z_L_new = layers_L(z_L + z_H + input_embeddings), so
+ v_L_new = J_L · (v_H + v_L), v_H_new = v_H
+ - H_level update: z_H_new = layers_H(z_H + z_L), so
+ v_H_new = J_H · (v_H + v_L), v_L_new = v_L
+ - Each L or H cycle = ONE JVP per tangent column (same cost as before),
+ but operating on the combined tangent v_H + v_L.
+ - Q is (B, 2D, k); QR over the 2D dimension keeps an orthonormal basis.
+"""
+from __future__ import annotations
+import sys, os, yaml, math, argparse, json, time
+from pathlib import Path
+import numpy as np
+import torch
+
+HRM_DIR = Path("/home/yurenh2/rrm/hrm")
+sys.path.insert(0, str(HRM_DIR))
+
+from models.hrm.hrm_act_v1 import HierarchicalReasoningModel_ACTV1
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text())
+ arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], causal=False)
+ model = HierarchicalReasoningModel_ACTV1(arch_cfg)
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()}
+ model.load_state_dict(stripped, strict=False)
+ model.to(device).eval()
+ return model, cfg, train_meta
+
+
+def load_test_samples(data_path: Path, n_total: int, shard_id: int, num_shards: int, seed: int):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ all_idx = rng.choice(len(inputs), size=n_total, replace=False)
+ shard_size = (n_total + num_shards - 1) // num_shards
+ s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total)
+ idx = all_idx[s:e]
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ "idx": idx,
+ }
+
+
+def jvp_through(f, x, v):
+ """One JVP. Returns (f(x), D_f(x) @ v). create_graph=False since this is diagnostic."""
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False)
+
+
+def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed):
+ """Compute joint top-k Lyapunov spectrum over (z_H, z_L) joint tangent.
+
+ Per L_level step:
+ v_L_new = J_L · (v_H + v_L), v_H_new = v_H
+ Per H_level step:
+ v_H_new = J_H · (v_H + v_L), v_L_new = v_L
+ """
+ inner = model.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ state_dim = seq_full * hidden # one of (z_H or z_L)
+ total_dim = 2 * state_dim # joint (v_H, v_L)
+
+ # Carry init
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"].to(device),
+ batch["puzzle_identifiers"].to(device))
+
+ # Joint orthonormal tangent basis
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q0 = torch.randn(B, total_dim, k_lyap, device=device, dtype=torch.float32, generator=g)
+ Q, _ = torch.linalg.qr(Q0) # (B, 2D, k)
+ log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_lyap_steps = 0
+ step_counter = 0
+
+ drift_zH_per_step, drift_zL_per_step = [], []
+ halted_at = torch.zeros(B, dtype=torch.long, device=device)
+ q_halt_hist, q_continue_hist = [], []
+
+ for act_step in range(4): # SHORT: 4 ACT steps
+ z_H_prev = z_H.detach().clone()
+ z_L_prev = z_L.detach().clone()
+
+ with torch.enable_grad():
+ zH, zL = z_H.detach(), z_L.detach()
+ for _h in range(cfg.H_cycles):
+ for _l in range(cfg.L_cycles):
+ # --- joint tangent: prep v_combined = v_H + v_L ---
+ v_H_all = Q[:, :state_dim, :] # (B, D, k)
+ v_L_all = Q[:, state_dim:, :]
+ v_comb = v_H_all + v_L_all
+ # --- k JVPs through L_level ---
+ new_v_L_cols = []
+ f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zL_new, Dv = jvp_through(f_L, zL, v_i)
+ new_v_L_cols.append(Dv.reshape(B, state_dim).to(torch.float32))
+ new_v_L = torch.stack(new_v_L_cols, dim=-1) # (B, D, k)
+ # Reassemble Q (v_H unchanged, v_L updated)
+ Q = torch.cat([v_H_all, new_v_L], dim=1)
+ zL = zL_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ # --- H step: v_comb = v_H + v_L, JVP through H_level ---
+ v_H_all = Q[:, :state_dim, :]
+ v_L_all = Q[:, state_dim:, :]
+ v_comb = v_H_all + v_L_all
+ new_v_H_cols = []
+ f_H = lambda z: inner.H_level(z, zL, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zH_new, Dv = jvp_through(f_H, zH, v_i)
+ new_v_H_cols.append(Dv.reshape(B, state_dim).to(torch.float32))
+ new_v_H = torch.stack(new_v_H_cols, dim=-1)
+ Q = torch.cat([new_v_H, v_L_all], dim=1)
+ zH = zH_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ z_H, z_L = zH, zL
+
+ drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu())
+ drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu())
+
+ with torch.no_grad():
+ q_logits = inner.q_head(z_H[:, 0]).float()
+ q_halt, q_continue = q_logits[..., 0], q_logits[..., 1]
+ q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu())
+ new_halt = (q_halt > q_continue) & (halted_at == 0)
+ halted_at[new_halt] = act_step + 1
+ output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float()
+ final_logits = output
+
+ lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() # (B, k)
+
+ with torch.no_grad():
+ preds = final_logits.argmax(dim=-1)
+ labels = batch["labels"].to(device)
+ mask = labels > 0
+ exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float()
+ token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1)
+ token_acc = token_acc.cpu()
+
+ return {
+ "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(),
+ "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(),
+ "halted_at": halted_at.cpu().numpy(),
+ "q_halt": torch.stack(q_halt_hist, dim=1).numpy(),
+ "q_continue": torch.stack(q_continue_hist, dim=1).numpy(),
+ "lyap_spec": lyap_spec,
+ "exact_correct": exact.numpy(),
+ "token_acc": token_acc.numpy(),
+ }
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_26040")
+ ap.add_argument("--n-samples", type=int, default=1024)
+ ap.add_argument("--shard-id", type=int, default=0)
+ ap.add_argument("--num-shards", type=int, default=1)
+ ap.add_argument("--batch-size", type=int, default=32)
+ ap.add_argument("--k-lyap", type=int, default=8)
+ ap.add_argument("--t-ons", type=int, default=1)
+ ap.add_argument("--seed", type=int, default=0)
+ ap.add_argument("--out", default="diag_joint.npz")
+ args = ap.parse_args()
+
+ device = "cuda"
+ model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, "
+ f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, "
+ f"halt_max_steps={model.inner.config.halt_max_steps}, "
+ f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}")
+
+ test_samples = load_test_samples(Path(cfg["data_path"]), args.n_samples,
+ args.shard_id, args.num_shards, args.seed)
+ n_this = len(test_samples["inputs"])
+ print(f"shard {args.shard_id}/{args.num_shards}: {n_this} samples")
+
+ results = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue",
+ "lyap_spec","exact_correct","token_acc","idx"]}
+ t0 = time.time()
+ for s in range(0, n_this, args.batch_size):
+ e = min(s + args.batch_size, n_this)
+ batch = {k: test_samples[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]}
+ out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s)
+ for k, v in out.items():
+ if v is not None: results[k].append(v)
+ results["idx"].append(test_samples["idx"][s:e])
+ ls = out["lyap_spec"]
+ print(f" [{e}/{n_this}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} "
+ f"λ_1={ls[:,0].mean():.4f} λ_{args.k_lyap}={ls[:,-1].mean():.4f}", flush=True)
+
+ saved = {}
+ for k, v in results.items():
+ if not v: continue
+ try: saved[k] = np.concatenate(v, 0)
+ except ValueError: saved[k] = np.stack(v, 0)
+ np.savez_compressed(args.out, **saved)
+
+ ls = saved["lyap_spec"]
+ succ = saved["exact_correct"] > 0.5
+ print(f"\nN={len(saved['exact_correct'])} acc={succ.mean():.4f}")
+ print(f"{'i':>3} {'mean':>10} {'succ':>10} {'fail':>10} {'Δ(f-s)':>10}")
+ for i in range(ls.shape[1]):
+ li = ls[:, i]
+ print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} "
+ f"{li[~succ].mean()-li[succ].mean():+10.4f}")
+ print(f"\nsaved → {args.out}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/diagnose_hrm_separate.py b/diagnose_hrm_separate.py
new file mode 100644
index 0000000..1157c16
--- /dev/null
+++ b/diagnose_hrm_separate.py
@@ -0,0 +1,242 @@
+"""HRM diagnostic: separate λ_L (固定 z_H 下 L 子系统) and λ_H (z_L 收敛后 H 子系统),
+in addition to the joint λ from diagnose_hrm_joint.py.
+
+Three orthonormal bases evolved in parallel:
+- Q_joint: (B, 2D, k) — joint (v_H, v_L). Block-matrix update per L/H step.
+- Q_L: (B, D, k) — only updated during L steps via J_L. Unchanged during H steps.
+- Q_H: (B, D, k) — only updated during H steps via J_H. Unchanged during L steps.
+
+Note: Q_L's evolution uses J_L evaluated at the current trajectory's z_L+z_H+ie,
+which means we measure the L sub-system Lyapunov "along the actual z_H trajectory"
+(z_H changes between H-cycles). Similarly for Q_H.
+"""
+from __future__ import annotations
+import sys, os, yaml, math, argparse, json, time
+from pathlib import Path
+import numpy as np
+import torch
+
+HRM_DIR = Path("/home/yurenh2/rrm/hrm")
+sys.path.insert(0, str(HRM_DIR))
+from models.hrm.hrm_act_v1 import HierarchicalReasoningModel_ACTV1
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text())
+ arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], causal=False)
+ model = HierarchicalReasoningModel_ACTV1(arch_cfg)
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()}
+ model.load_state_dict(stripped, strict=False)
+ model.to(device).eval()
+ return model, cfg, train_meta
+
+
+def load_test_samples(data_path, n_total, shard_id, num_shards, seed):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ all_idx = rng.choice(len(inputs), size=n_total, replace=False)
+ shard_size = (n_total + num_shards - 1) // num_shards
+ s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total)
+ idx = all_idx[s:e]
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ "idx": idx,
+ }
+
+
+def jvp(f, x, v):
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False)
+
+
+def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed):
+ inner = model.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ D = seq_full * hidden
+
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"].to(device),
+ batch["puzzle_identifiers"].to(device))
+
+ # Three independent orthonormal bases
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q_joint = torch.linalg.qr(torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g))[0]
+ Q_L = torch.linalg.qr(torch.randn(B, D, k_lyap, device=device, dtype=torch.float32, generator=g))[0]
+ Q_H = torch.linalg.qr(torch.randn(B, D, k_lyap, device=device, dtype=torch.float32, generator=g))[0]
+
+ log_R_joint = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ log_R_L = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ log_R_H = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_joint_steps = 0; n_L_steps = 0; n_H_steps = 0
+ step_counter_joint = 0; step_counter_L = 0; step_counter_H = 0
+
+ for act_step in range(cfg.halt_max_steps):
+ with torch.enable_grad():
+ zH, zL = z_H.detach(), z_L.detach()
+ for _h in range(cfg.H_cycles):
+ for _l in range(cfg.L_cycles):
+ # ============ JOINT update (L step) ============
+ v_H_j = Q_joint[:, :D, :]
+ v_L_j = Q_joint[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_L_j_cols = []
+ f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zL_new, Dv = jvp(f_L, zL, v_i)
+ new_v_L_j_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_L_j = torch.stack(new_v_L_j_cols, dim=-1)
+ Q_joint = torch.cat([v_H_j, new_v_L_j], dim=1)
+
+ # ============ L-only update ============
+ new_v_L_only_cols = []
+ for i in range(k_lyap):
+ v_i = Q_L[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ _, Dv = jvp(f_L, zL, v_i)
+ new_v_L_only_cols.append(Dv.reshape(B, D).to(torch.float32))
+ Q_L = torch.stack(new_v_L_only_cols, dim=-1)
+
+ # Q_H untouched during L step (since H_level wasn't applied)
+ zL = zL_new
+
+ step_counter_joint += 1; step_counter_L += 1
+ if step_counter_joint % t_ons == 0:
+ Q_joint, Rj = torch.linalg.qr(Q_joint)
+ log_R_joint = log_R_joint + Rj.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_joint_steps += 1
+ if step_counter_L % t_ons == 0:
+ Q_L, Rl = torch.linalg.qr(Q_L)
+ log_R_L = log_R_L + Rl.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_L_steps += 1
+
+ # ============ JOINT update (H step) ============
+ v_H_j = Q_joint[:, :D, :]
+ v_L_j = Q_joint[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_H_j_cols = []
+ f_H = lambda z: inner.H_level(z, zL, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zH_new, Dv = jvp(f_H, zH, v_i)
+ new_v_H_j_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_H_j = torch.stack(new_v_H_j_cols, dim=-1)
+ Q_joint = torch.cat([new_v_H_j, v_L_j], dim=1)
+
+ # ============ H-only update ============
+ new_v_H_only_cols = []
+ for i in range(k_lyap):
+ v_i = Q_H[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ _, Dv = jvp(f_H, zH, v_i)
+ new_v_H_only_cols.append(Dv.reshape(B, D).to(torch.float32))
+ Q_H = torch.stack(new_v_H_only_cols, dim=-1)
+
+ # Q_L untouched during H step
+ zH = zH_new
+
+ step_counter_joint += 1; step_counter_H += 1
+ if step_counter_joint % t_ons == 0:
+ Q_joint, Rj = torch.linalg.qr(Q_joint)
+ log_R_joint = log_R_joint + Rj.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_joint_steps += 1
+ if step_counter_H % t_ons == 0:
+ Q_H, Rh = torch.linalg.qr(Q_H)
+ log_R_H = log_R_H + Rh.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_H_steps += 1
+
+ z_H, z_L = zH, zL
+
+ with torch.no_grad():
+ output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float()
+ final_logits = output
+
+ lyap_joint = (log_R_joint / max(n_joint_steps, 1)).cpu().numpy()
+ lyap_L = (log_R_L / max(n_L_steps, 1)).cpu().numpy()
+ lyap_H = (log_R_H / max(n_H_steps, 1)).cpu().numpy()
+
+ with torch.no_grad():
+ preds = final_logits.argmax(dim=-1)
+ labels = batch["labels"].to(device)
+ mask = labels > 0
+ exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float()
+ token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1)
+ token_acc = token_acc.cpu()
+
+ return {
+ "lyap_joint": lyap_joint,
+ "lyap_L": lyap_L,
+ "lyap_H": lyap_H,
+ "exact_correct": exact.numpy(),
+ "token_acc": token_acc.numpy(),
+ }
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_26040")
+ ap.add_argument("--n-samples", type=int, default=1024)
+ ap.add_argument("--shard-id", type=int, default=0)
+ ap.add_argument("--num-shards", type=int, default=1)
+ ap.add_argument("--batch-size", type=int, default=32)
+ ap.add_argument("--k-lyap", type=int, default=8)
+ ap.add_argument("--t-ons", type=int, default=1)
+ ap.add_argument("--seed", type=int, default=0)
+ ap.add_argument("--out", default="diag_separate.npz")
+ args = ap.parse_args()
+
+ device = "cuda"
+ model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ test = load_test_samples(Path(cfg["data_path"]), args.n_samples,
+ args.shard_id, args.num_shards, args.seed)
+ n = len(test["inputs"])
+ print(f"shard {args.shard_id}/{args.num_shards}: {n} samples")
+ print(f"H_cycles={model.inner.config.H_cycles} L_cycles={model.inner.config.L_cycles} halt={model.inner.config.halt_max_steps}")
+
+ res = {k: [] for k in ["lyap_joint","lyap_L","lyap_H","exact_correct","token_acc","idx"]}
+ t0 = time.time()
+ for s in range(0, n, args.batch_size):
+ e = min(s + args.batch_size, n)
+ batch = {k: test[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]}
+ out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s)
+ for k, v in out.items():
+ res[k].append(v)
+ res["idx"].append(test["idx"][s:e])
+ print(f" [{e}/{n}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} "
+ f"λj1={out['lyap_joint'][:,0].mean():+.3f} "
+ f"λL1={out['lyap_L'][:,0].mean():+.3f} "
+ f"λH1={out['lyap_H'][:,0].mean():+.3f}", flush=True)
+
+ saved = {}
+ for k, v in res.items():
+ if not v: continue
+ try: saved[k] = np.concatenate(v, 0)
+ except ValueError: saved[k] = np.stack(v, 0)
+ np.savez_compressed(args.out, **saved)
+
+ succ = saved["exact_correct"] > 0.5
+ print(f"\nN={len(succ)} acc={succ.mean():.4f}")
+ for name in ["lyap_joint", "lyap_L", "lyap_H"]:
+ ls = saved[name]
+ print(f"\n{name}:")
+ print(f" i mean_succ mean_fail Δ")
+ for i in range(ls.shape[1]):
+ ms, mf = ls[succ,i].mean(), ls[~succ,i].mean()
+ print(f" {i+1} {ms:+8.4f} {mf:+8.4f} {mf-ms:+8.4f}")
+ print(f"\nsaved → {args.out}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/diagnose_trm_joint.py b/diagnose_trm_joint.py
new file mode 100644
index 0000000..160a7cb
--- /dev/null
+++ b/diagnose_trm_joint.py
@@ -0,0 +1,225 @@
+"""TRM Sudoku joint Lyapunov diagnostic — TRM version of diagnose_hrm_joint.py.
+
+Key differences from HRM:
+- TRM has ONE shared L_level (H_layers config is "ignored")
+- z_L update: z_L = L_level(z_L, z_H + input_embeddings)
+- z_H update: z_H = L_level(z_H, z_L) ← same L_level!
+- H_cycles=3, L_cycles=6 (vs HRM 2,2)
+
+Joint tangent block structure:
+- L step: v_L_new = J · (v_L + v_H), v_H_new = v_H, J at (z_L + z_H + ie)
+- H step: v_H_new = J' · (v_H + v_L), v_L_new = v_L, J' at (z_H + z_L)
+J and J' share weights but evaluated at different points.
+"""
+from __future__ import annotations
+import sys, os, yaml, math, argparse, json, time
+from pathlib import Path
+import numpy as np
+import torch
+
+TRM_DIR = Path("/home/yurenh2/rrm/trm")
+sys.path.insert(0, str(TRM_DIR))
+
+from models.recursive_reasoning.trm import TinyRecursiveReasoningModel_ACTV1
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ train_meta = json.loads((Path(cfg["data_paths"][0]) / "train" / "dataset.json").read_text())
+ arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"])
+ model = TinyRecursiveReasoningModel_ACTV1(arch_cfg)
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()}
+ missing, unexpected = model.load_state_dict(stripped, strict=False)
+ print(f"[load] missing={len(missing)} unexpected={len(unexpected)}")
+ if missing[:3]: print(f" sample missing: {missing[:3]}")
+ if unexpected[:3]: print(f" sample unexpected: {unexpected[:3]}")
+ model.to(device).eval()
+ return model, cfg, train_meta
+
+
+def load_test_samples(data_path, n_total, shard_id, num_shards, seed):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ all_idx = rng.choice(len(inputs), size=n_total, replace=False)
+ shard_size = (n_total + num_shards - 1) // num_shards
+ s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total)
+ idx = all_idx[s:e]
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ "idx": idx,
+ }
+
+
+def jvp_through(f, x, v):
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False)
+
+
+def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed):
+ inner = model.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ D = seq_full * hidden
+
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"].to(device),
+ batch["puzzle_identifiers"].to(device))
+
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q0 = torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g)
+ Q, _ = torch.linalg.qr(Q0)
+ log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_lyap_steps = 0
+ step_counter = 0
+
+ drift_zH_per_step, drift_zL_per_step = [], []
+ halted_at = torch.zeros(B, dtype=torch.long, device=device)
+ q_halt_hist, q_continue_hist = [], []
+
+ for act_step in range(cfg.halt_max_steps):
+ z_H_prev = z_H.detach().clone()
+ z_L_prev = z_L.detach().clone()
+
+ with torch.enable_grad():
+ zH, zL = z_H.detach(), z_L.detach()
+ for _h in range(cfg.H_cycles):
+ # L cycles
+ for _l in range(cfg.L_cycles):
+ v_H_j = Q[:, :D, :]
+ v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_L_cols = []
+ f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zL_new, Dv = jvp_through(f_L, zL, v_i)
+ new_v_L_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_L = torch.stack(new_v_L_cols, dim=-1)
+ Q = torch.cat([v_H_j, new_v_L], dim=1)
+ zL = zL_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ # H step (uses SAME L_level!)
+ v_H_j = Q[:, :D, :]
+ v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_H_cols = []
+ f_H = lambda z: inner.L_level(z, zL, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zH_new, Dv = jvp_through(f_H, zH, v_i)
+ new_v_H_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_H = torch.stack(new_v_H_cols, dim=-1)
+ Q = torch.cat([new_v_H, v_L_j], dim=1)
+ zH = zH_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ z_H, z_L = zH, zL
+
+ drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu())
+ drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu())
+
+ with torch.no_grad():
+ q_logits = inner.q_head(z_H[:, 0]).float()
+ q_halt, q_continue = q_logits[..., 0], q_logits[..., 1]
+ q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu())
+ new_halt = (q_halt > q_continue) & (halted_at == 0)
+ halted_at[new_halt] = act_step + 1
+ output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float()
+ final_logits = output
+
+ lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy()
+
+ with torch.no_grad():
+ preds = final_logits.argmax(dim=-1)
+ labels = batch["labels"].to(device)
+ mask = labels > 0
+ exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float()
+ token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1)
+ token_acc = token_acc.cpu()
+
+ return {
+ "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(),
+ "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(),
+ "halted_at": halted_at.cpu().numpy(),
+ "q_halt": torch.stack(q_halt_hist, dim=1).numpy(),
+ "q_continue": torch.stack(q_continue_hist, dim=1).numpy(),
+ "lyap_spec": lyap_spec,
+ "exact_correct": exact.numpy(),
+ "token_acc": token_acc.numpy(),
+ }
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_13020")
+ ap.add_argument("--n-samples", type=int, default=512)
+ ap.add_argument("--shard-id", type=int, default=0)
+ ap.add_argument("--num-shards", type=int, default=1)
+ ap.add_argument("--batch-size", type=int, default=16)
+ ap.add_argument("--k-lyap", type=int, default=8)
+ ap.add_argument("--t-ons", type=int, default=1)
+ ap.add_argument("--seed", type=int, default=0)
+ ap.add_argument("--out", default="diag_trm.npz")
+ args = ap.parse_args()
+
+ device = "cuda"
+ model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, "
+ f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, "
+ f"halt_max_steps={model.inner.config.halt_max_steps}, "
+ f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}")
+
+ test = load_test_samples(Path(cfg["data_paths"][0]), args.n_samples, args.shard_id, args.num_shards, args.seed)
+ n = len(test["inputs"])
+ print(f"shard {args.shard_id}/{args.num_shards}: {n} samples")
+
+ res = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue","lyap_spec","exact_correct","token_acc","idx"]}
+ t0 = time.time()
+ for s in range(0, n, args.batch_size):
+ e = min(s + args.batch_size, n)
+ batch = {k: test[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]}
+ out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s)
+ for k, v in out.items():
+ res[k].append(v)
+ res["idx"].append(test["idx"][s:e])
+ ls = out["lyap_spec"]
+ print(f" [{e}/{n}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} "
+ f"λ_1={ls[:,0].mean():+.4f} λ_{args.k_lyap}={ls[:,-1].mean():+.4f}", flush=True)
+
+ saved = {}
+ for k, v in res.items():
+ if not v: continue
+ try: saved[k] = np.concatenate(v, 0)
+ except ValueError: saved[k] = np.stack(v, 0)
+ np.savez_compressed(args.out, **saved)
+ succ = saved["exact_correct"] > 0.5
+ print(f"\nN={len(succ)} acc={succ.mean():.4f}")
+ print(f"{'i':>3} {'all':>10} {'succ':>10} {'fail':>10} {'Δ':>9}")
+ for i in range(saved["lyap_spec"].shape[1]):
+ li = saved["lyap_spec"][:, i]
+ print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} {li[~succ].mean()-li[succ].mean():+9.4f}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/diagnose_trm_joint_clv.py b/diagnose_trm_joint_clv.py
new file mode 100644
index 0000000..f4c939e
--- /dev/null
+++ b/diagnose_trm_joint_clv.py
@@ -0,0 +1,304 @@
+"""TRM Sudoku joint Lyapunov diagnostic — TRM version of diagnose_hrm_joint.py.
+
+Key differences from HRM:
+- TRM has ONE shared L_level (H_layers config is "ignored")
+- z_L update: z_L = L_level(z_L, z_H + input_embeddings)
+- z_H update: z_H = L_level(z_H, z_L) ← same L_level!
+- H_cycles=3, L_cycles=6 (vs HRM 2,2)
+
+Joint tangent block structure:
+- L step: v_L_new = J · (v_L + v_H), v_H_new = v_H, J at (z_L + z_H + ie)
+- H step: v_H_new = J' · (v_H + v_L), v_L_new = v_L, J' at (z_H + z_L)
+J and J' share weights but evaluated at different points.
+"""
+from __future__ import annotations
+import sys, os, yaml, math, argparse, json, time
+from pathlib import Path
+import numpy as np
+import torch
+
+TRM_DIR = Path("/home/yurenh2/rrm/trm")
+sys.path.insert(0, str(TRM_DIR))
+
+from models.recursive_reasoning.trm import TinyRecursiveReasoningModel_ACTV1
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ train_meta = json.loads((Path(cfg["data_paths"][0]) / "train" / "dataset.json").read_text())
+ arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"])
+ model = TinyRecursiveReasoningModel_ACTV1(arch_cfg)
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()}
+ missing, unexpected = model.load_state_dict(stripped, strict=False)
+ print(f"[load] missing={len(missing)} unexpected={len(unexpected)}")
+ if missing[:3]: print(f" sample missing: {missing[:3]}")
+ if unexpected[:3]: print(f" sample unexpected: {unexpected[:3]}")
+ model.to(device).eval()
+ return model, cfg, train_meta
+
+
+def load_test_samples(data_path, n_total, shard_id, num_shards, seed):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ all_idx = rng.choice(len(inputs), size=n_total, replace=False)
+ shard_size = (n_total + num_shards - 1) // num_shards
+ s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total)
+ idx = all_idx[s:e]
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ "idx": idx,
+ }
+
+
+def jvp_through(f, x, v):
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False)
+
+
+def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed):
+ inner = model.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ D = seq_full * hidden
+
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"].to(device),
+ batch["puzzle_identifiers"].to(device))
+
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q0 = torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g)
+ Q, _ = torch.linalg.qr(Q0)
+ log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_lyap_steps = 0
+ step_counter = 0
+
+ drift_zH_per_step, drift_zL_per_step = [], []
+ halted_at = torch.zeros(B, dtype=torch.long, device=device)
+ q_halt_hist, q_continue_hist = [], []
+
+ # Ginelli CLV: store R at every QR, capture Q at the trajectory midpoint (analysis time).
+ R_list = []
+ Q_mid = None
+ target_qr = (cfg.halt_max_steps * cfg.H_cycles * (cfg.L_cycles + 1) // t_ons) // 2
+
+ for act_step in range(cfg.halt_max_steps):
+ z_H_prev = z_H.detach().clone()
+ z_L_prev = z_L.detach().clone()
+
+ with torch.enable_grad():
+ zH, zL = z_H.detach(), z_L.detach()
+ for _h in range(cfg.H_cycles):
+ # L cycles
+ for _l in range(cfg.L_cycles):
+ v_H_j = Q[:, :D, :]
+ v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_L_cols = []
+ f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zL_new, Dv = jvp_through(f_L, zL, v_i)
+ new_v_L_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_L = torch.stack(new_v_L_cols, dim=-1)
+ Q = torch.cat([v_H_j, new_v_L], dim=1)
+ zL = zL_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+ R_list.append(R.detach())
+ if n_lyap_steps == target_qr:
+ Q_mid = Q.detach().clone()
+
+ # H step (uses SAME L_level!)
+ v_H_j = Q[:, :D, :]
+ v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_H_cols = []
+ f_H = lambda z: inner.L_level(z, zL, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zH_new, Dv = jvp_through(f_H, zH, v_i)
+ new_v_H_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_H = torch.stack(new_v_H_cols, dim=-1)
+ Q = torch.cat([new_v_H, v_L_j], dim=1)
+ zH = zH_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+ R_list.append(R.detach())
+ if n_lyap_steps == target_qr:
+ Q_mid = Q.detach().clone()
+
+ z_H, z_L = zH, zL
+
+ drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu())
+ drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu())
+
+ with torch.no_grad():
+ q_logits = inner.q_head(z_H[:, 0]).float()
+ q_halt, q_continue = q_logits[..., 0], q_logits[..., 1]
+ q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu())
+ new_halt = (q_halt > q_continue) & (halted_at == 0)
+ halted_at[new_halt] = act_step + 1
+ output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float()
+ final_logits = output
+
+ lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy()
+
+ # Ginelli CLV backward pass: iterate C = normalize(R^{-1} C) from the end back to the
+ # midpoint analysis time, then CLVs = Q_mid @ C. Geometry NOT captured by exponents:
+ # pairwise CLV angles (Oseledets-splitting tangency).
+ with torch.no_grad():
+ if Q_mid is None:
+ Q_mid = Q # fallback (shouldn't happen with no early halt)
+ g2 = torch.Generator(device=device).manual_seed(seed + 9973)
+ C = torch.triu(torch.randn(B, k_lyap, k_lyap, device=device, dtype=torch.float32, generator=g2))
+ C = C / C.norm(dim=1, keepdim=True).clamp_min(1e-20)
+ n_used = 0
+ for R in reversed(R_list[target_qr:]):
+ Rf = R.float()
+ # guard against tiny diagonal (rank-deficient tangent) before triangular solve
+ diag = Rf.diagonal(dim1=-2, dim2=-1)
+ Rf = Rf + torch.diag_embed(torch.where(diag.abs() < 1e-8, torch.full_like(diag, 1e-8), torch.zeros_like(diag)))
+ C = torch.linalg.solve_triangular(Rf, C, upper=True)
+ C = C / C.norm(dim=1, keepdim=True).clamp_min(1e-20)
+ n_used += 1
+ V = torch.matmul(Q_mid.float(), C) # (B, 2D, k) covariant LVs
+ V = V / V.norm(dim=1, keepdim=True).clamp_min(1e-20) # unit columns (non-orthogonal)
+ gram = torch.matmul(V.transpose(1, 2), V).abs() # (B, k, k) |cos| between CLVs
+ eye = torch.eye(k_lyap, device=device).unsqueeze(0)
+ off = gram * (1 - eye)
+ clv_maxcos = off.amax(dim=(1, 2)).cpu().numpy() # tangency: max |cos| (1=degenerate)
+ clv_meancos = (off.sum(dim=(1, 2)) / (k_lyap * (k_lyap - 1))).cpu().numpy()
+ clv_minangle = torch.acos(off.amax(dim=(1, 2)).clamp(max=1 - 1e-7)).cpu().numpy()
+ # leading TRUE CLV localization/readout (vs the leading-GS-vector version below)
+ Dh0 = seq_full * hidden; pe0 = inner.puzzle_emb_len
+ vH = V[:, :Dh0, 0].reshape(B, seq_full, hidden)
+ eH = (vH ** 2).sum(-1); eL = (V[:, Dh0:, 0].reshape(B, seq_full, hidden) ** 2).sum(-1)
+ e = eH + eL
+ clv_true_hfrac = (eH.sum(-1) / e.sum(-1).clamp_min(1e-20)).cpu().numpy()
+ clv_true_pr_ans = (e[:, pe0:].sum(-1) ** 2 / (e[:, pe0:] ** 2).sum(-1).clamp_min(1e-20) / max(seq_full - pe0, 1)).cpu().numpy()
+ ro = torch.matmul(vH[:, pe0:], inner.lm_head.weight.float().t())
+ clv_true_readout = (ro.flatten(1).norm(dim=1) / vH[:, pe0:].flatten(1).norm(dim=1).clamp_min(1e-20)).cpu().numpy()
+
+ with torch.no_grad():
+ preds = final_logits.argmax(dim=-1)
+ labels = batch["labels"].to(device)
+ mask = labels > 0
+ exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float()
+ token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1)
+ token_acc = token_acc.cpu()
+
+ # CLV geometry: leading columns of final Q are the leading covariant Lyapunov vectors.
+ # Features are DIRECTION-based (not scalar growth rates): H/L energy split, token
+ # participation ratio (localization), and alignment with the decode/readout direction.
+ with torch.no_grad():
+ Dh = seq_full * hidden
+ n_top = min(3, k_lyap)
+ pe = inner.puzzle_emb_len
+ W = inner.lm_head.weight.float() # (vocab, hidden), bias-free
+ hfrac_c, pr_c, prans_c, ro_c = [], [], [], []
+ for j in range(n_top):
+ col = Q[:, :, j].float() # (B, 2Dh), unit norm
+ qH = col[:, :Dh].reshape(B, seq_full, hidden)
+ qL = col[:, Dh:].reshape(B, seq_full, hidden)
+ eH = (qH ** 2).sum(-1); eL = (qL ** 2).sum(-1) # (B, seq_full) per-token energy
+ e = eH + eL
+ hfrac_c.append((eH.sum(-1) / e.sum(-1).clamp_min(1e-20)).cpu())
+ pr_c.append((e.sum(-1) ** 2 / (e ** 2).sum(-1).clamp_min(1e-20) / seq_full).cpu())
+ ea = e[:, pe:]
+ prans_c.append((ea.sum(-1) ** 2 / (ea ** 2).sum(-1).clamp_min(1e-20) / max(seq_full - pe, 1)).cpu())
+ ro = torch.matmul(qH[:, pe:], W.t()) # (B, ans, vocab) readout projection
+ ro_c.append((ro.flatten(1).norm(dim=1) / qH[:, pe:].flatten(1).norm(dim=1).clamp_min(1e-20)).cpu())
+
+ return {
+ "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(),
+ "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(),
+ "halted_at": halted_at.cpu().numpy(),
+ "q_halt": torch.stack(q_halt_hist, dim=1).numpy(),
+ "q_continue": torch.stack(q_continue_hist, dim=1).numpy(),
+ "lyap_spec": lyap_spec,
+ "exact_correct": exact.numpy(),
+ "token_acc": token_acc.numpy(),
+ "clv_hfrac": torch.stack(hfrac_c, 1).numpy(),
+ "clv_pr": torch.stack(pr_c, 1).numpy(),
+ "clv_pr_ans": torch.stack(prans_c, 1).numpy(),
+ "clv_readout": torch.stack(ro_c, 1).numpy(),
+ "clv_maxcos": clv_maxcos,
+ "clv_meancos": clv_meancos,
+ "clv_minangle": clv_minangle,
+ "clv_true_hfrac": clv_true_hfrac,
+ "clv_true_pr_ans": clv_true_pr_ans,
+ "clv_true_readout": clv_true_readout,
+ }
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_13020")
+ ap.add_argument("--n-samples", type=int, default=512)
+ ap.add_argument("--shard-id", type=int, default=0)
+ ap.add_argument("--num-shards", type=int, default=1)
+ ap.add_argument("--batch-size", type=int, default=16)
+ ap.add_argument("--k-lyap", type=int, default=8)
+ ap.add_argument("--t-ons", type=int, default=1)
+ ap.add_argument("--seed", type=int, default=0)
+ ap.add_argument("--out", default="diag_trm.npz")
+ args = ap.parse_args()
+
+ device = "cuda"
+ model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, "
+ f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, "
+ f"halt_max_steps={model.inner.config.halt_max_steps}, "
+ f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}")
+
+ test = load_test_samples(Path(cfg["data_paths"][0]), args.n_samples, args.shard_id, args.num_shards, args.seed)
+ n = len(test["inputs"])
+ print(f"shard {args.shard_id}/{args.num_shards}: {n} samples")
+
+ res = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue","lyap_spec","exact_correct","token_acc","idx"]}
+ t0 = time.time()
+ for s in range(0, n, args.batch_size):
+ e = min(s + args.batch_size, n)
+ batch = {k: test[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]}
+ out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s)
+ for k, v in out.items():
+ res.setdefault(k, []).append(v)
+ res.setdefault("idx", []).append(test["idx"][s:e])
+ ls = out["lyap_spec"]
+ print(f" [{e}/{n}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} "
+ f"λ_1={ls[:,0].mean():+.4f} λ_{args.k_lyap}={ls[:,-1].mean():+.4f}", flush=True)
+
+ saved = {}
+ for k, v in res.items():
+ if not v: continue
+ try: saved[k] = np.concatenate(v, 0)
+ except ValueError: saved[k] = np.stack(v, 0)
+ np.savez_compressed(args.out, **saved)
+ succ = saved["exact_correct"] > 0.5
+ print(f"\nN={len(succ)} acc={succ.mean():.4f}")
+ print(f"{'i':>3} {'all':>10} {'succ':>10} {'fail':>10} {'Δ':>9}")
+ for i in range(saved["lyap_spec"].shape[1]):
+ li = saved["lyap_spec"][:, i]
+ print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} {li[~succ].mean()-li[succ].mean():+9.4f}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/diagnose_trm_joint_geom.py b/diagnose_trm_joint_geom.py
new file mode 100644
index 0000000..f130305
--- /dev/null
+++ b/diagnose_trm_joint_geom.py
@@ -0,0 +1,251 @@
+"""TRM Sudoku joint Lyapunov diagnostic — TRM version of diagnose_hrm_joint.py.
+
+Key differences from HRM:
+- TRM has ONE shared L_level (H_layers config is "ignored")
+- z_L update: z_L = L_level(z_L, z_H + input_embeddings)
+- z_H update: z_H = L_level(z_H, z_L) ← same L_level!
+- H_cycles=3, L_cycles=6 (vs HRM 2,2)
+
+Joint tangent block structure:
+- L step: v_L_new = J · (v_L + v_H), v_H_new = v_H, J at (z_L + z_H + ie)
+- H step: v_H_new = J' · (v_H + v_L), v_L_new = v_L, J' at (z_H + z_L)
+J and J' share weights but evaluated at different points.
+"""
+from __future__ import annotations
+import sys, os, yaml, math, argparse, json, time
+from pathlib import Path
+import numpy as np
+import torch
+
+TRM_DIR = Path("/home/yurenh2/rrm/trm")
+sys.path.insert(0, str(TRM_DIR))
+
+from models.recursive_reasoning.trm import TinyRecursiveReasoningModel_ACTV1
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ train_meta = json.loads((Path(cfg["data_paths"][0]) / "train" / "dataset.json").read_text())
+ arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"])
+ model = TinyRecursiveReasoningModel_ACTV1(arch_cfg)
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()}
+ missing, unexpected = model.load_state_dict(stripped, strict=False)
+ print(f"[load] missing={len(missing)} unexpected={len(unexpected)}")
+ if missing[:3]: print(f" sample missing: {missing[:3]}")
+ if unexpected[:3]: print(f" sample unexpected: {unexpected[:3]}")
+ model.to(device).eval()
+ return model, cfg, train_meta
+
+
+def load_test_samples(data_path, n_total, shard_id, num_shards, seed):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ all_idx = rng.choice(len(inputs), size=n_total, replace=False)
+ shard_size = (n_total + num_shards - 1) // num_shards
+ s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total)
+ idx = all_idx[s:e]
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ "idx": idx,
+ }
+
+
+def jvp_through(f, x, v):
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False)
+
+
+def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed):
+ inner = model.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ D = seq_full * hidden
+
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"].to(device),
+ batch["puzzle_identifiers"].to(device))
+
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q0 = torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g)
+ Q, _ = torch.linalg.qr(Q0)
+ log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_lyap_steps = 0
+ step_counter = 0
+
+ drift_zH_per_step, drift_zL_per_step = [], []
+ halted_at = torch.zeros(B, dtype=torch.long, device=device)
+ q_halt_hist, q_continue_hist = [], []
+
+ for act_step in range(cfg.halt_max_steps):
+ z_H_prev = z_H.detach().clone()
+ z_L_prev = z_L.detach().clone()
+
+ with torch.enable_grad():
+ zH, zL = z_H.detach(), z_L.detach()
+ for _h in range(cfg.H_cycles):
+ # L cycles
+ for _l in range(cfg.L_cycles):
+ v_H_j = Q[:, :D, :]
+ v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_L_cols = []
+ f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zL_new, Dv = jvp_through(f_L, zL, v_i)
+ new_v_L_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_L = torch.stack(new_v_L_cols, dim=-1)
+ Q = torch.cat([v_H_j, new_v_L], dim=1)
+ zL = zL_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ # H step (uses SAME L_level!)
+ v_H_j = Q[:, :D, :]
+ v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_H_cols = []
+ f_H = lambda z: inner.L_level(z, zL, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zH_new, Dv = jvp_through(f_H, zH, v_i)
+ new_v_H_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_H = torch.stack(new_v_H_cols, dim=-1)
+ Q = torch.cat([new_v_H, v_L_j], dim=1)
+ zH = zH_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ z_H, z_L = zH, zL
+
+ drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu())
+ drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu())
+
+ with torch.no_grad():
+ q_logits = inner.q_head(z_H[:, 0]).float()
+ q_halt, q_continue = q_logits[..., 0], q_logits[..., 1]
+ q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu())
+ new_halt = (q_halt > q_continue) & (halted_at == 0)
+ halted_at[new_halt] = act_step + 1
+ output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float()
+ final_logits = output
+
+ lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy()
+
+ with torch.no_grad():
+ preds = final_logits.argmax(dim=-1)
+ labels = batch["labels"].to(device)
+ mask = labels > 0
+ exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float()
+ token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1)
+ token_acc = token_acc.cpu()
+
+ # CLV geometry: leading columns of final Q are the leading covariant Lyapunov vectors.
+ # Features are DIRECTION-based (not scalar growth rates): H/L energy split, token
+ # participation ratio (localization), and alignment with the decode/readout direction.
+ with torch.no_grad():
+ Dh = seq_full * hidden
+ n_top = min(3, k_lyap)
+ pe = inner.puzzle_emb_len
+ W = inner.lm_head.weight.float() # (vocab, hidden), bias-free
+ hfrac_c, pr_c, prans_c, ro_c = [], [], [], []
+ for j in range(n_top):
+ col = Q[:, :, j].float() # (B, 2Dh), unit norm
+ qH = col[:, :Dh].reshape(B, seq_full, hidden)
+ qL = col[:, Dh:].reshape(B, seq_full, hidden)
+ eH = (qH ** 2).sum(-1); eL = (qL ** 2).sum(-1) # (B, seq_full) per-token energy
+ e = eH + eL
+ hfrac_c.append((eH.sum(-1) / e.sum(-1).clamp_min(1e-20)).cpu())
+ pr_c.append((e.sum(-1) ** 2 / (e ** 2).sum(-1).clamp_min(1e-20) / seq_full).cpu())
+ ea = e[:, pe:]
+ prans_c.append((ea.sum(-1) ** 2 / (ea ** 2).sum(-1).clamp_min(1e-20) / max(seq_full - pe, 1)).cpu())
+ ro = torch.matmul(qH[:, pe:], W.t()) # (B, ans, vocab) readout projection
+ ro_c.append((ro.flatten(1).norm(dim=1) / qH[:, pe:].flatten(1).norm(dim=1).clamp_min(1e-20)).cpu())
+
+ return {
+ "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(),
+ "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(),
+ "halted_at": halted_at.cpu().numpy(),
+ "q_halt": torch.stack(q_halt_hist, dim=1).numpy(),
+ "q_continue": torch.stack(q_continue_hist, dim=1).numpy(),
+ "lyap_spec": lyap_spec,
+ "exact_correct": exact.numpy(),
+ "token_acc": token_acc.numpy(),
+ "clv_hfrac": torch.stack(hfrac_c, 1).numpy(),
+ "clv_pr": torch.stack(pr_c, 1).numpy(),
+ "clv_pr_ans": torch.stack(prans_c, 1).numpy(),
+ "clv_readout": torch.stack(ro_c, 1).numpy(),
+ }
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_13020")
+ ap.add_argument("--n-samples", type=int, default=512)
+ ap.add_argument("--shard-id", type=int, default=0)
+ ap.add_argument("--num-shards", type=int, default=1)
+ ap.add_argument("--batch-size", type=int, default=16)
+ ap.add_argument("--k-lyap", type=int, default=8)
+ ap.add_argument("--t-ons", type=int, default=1)
+ ap.add_argument("--seed", type=int, default=0)
+ ap.add_argument("--out", default="diag_trm.npz")
+ args = ap.parse_args()
+
+ device = "cuda"
+ model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, "
+ f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, "
+ f"halt_max_steps={model.inner.config.halt_max_steps}, "
+ f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}")
+
+ test = load_test_samples(Path(cfg["data_paths"][0]), args.n_samples, args.shard_id, args.num_shards, args.seed)
+ n = len(test["inputs"])
+ print(f"shard {args.shard_id}/{args.num_shards}: {n} samples")
+
+ res = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue","lyap_spec","exact_correct","token_acc","idx"]}
+ t0 = time.time()
+ for s in range(0, n, args.batch_size):
+ e = min(s + args.batch_size, n)
+ batch = {k: test[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]}
+ out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s)
+ for k, v in out.items():
+ res.setdefault(k, []).append(v)
+ res.setdefault("idx", []).append(test["idx"][s:e])
+ ls = out["lyap_spec"]
+ print(f" [{e}/{n}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} "
+ f"λ_1={ls[:,0].mean():+.4f} λ_{args.k_lyap}={ls[:,-1].mean():+.4f}", flush=True)
+
+ saved = {}
+ for k, v in res.items():
+ if not v: continue
+ try: saved[k] = np.concatenate(v, 0)
+ except ValueError: saved[k] = np.stack(v, 0)
+ np.savez_compressed(args.out, **saved)
+ succ = saved["exact_correct"] > 0.5
+ print(f"\nN={len(succ)} acc={succ.mean():.4f}")
+ print(f"{'i':>3} {'all':>10} {'succ':>10} {'fail':>10} {'Δ':>9}")
+ for i in range(saved["lyap_spec"].shape[1]):
+ li = saved["lyap_spec"][:, i]
+ print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} {li[~succ].mean()-li[succ].mean():+9.4f}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/diagnose_trm_joint_horizon.py b/diagnose_trm_joint_horizon.py
new file mode 100644
index 0000000..0ba5d71
--- /dev/null
+++ b/diagnose_trm_joint_horizon.py
@@ -0,0 +1,225 @@
+"""TRM Sudoku joint Lyapunov diagnostic — TRM version of diagnose_hrm_joint.py.
+
+Key differences from HRM:
+- TRM has ONE shared L_level (H_layers config is "ignored")
+- z_L update: z_L = L_level(z_L, z_H + input_embeddings)
+- z_H update: z_H = L_level(z_H, z_L) ← same L_level!
+- H_cycles=3, L_cycles=6 (vs HRM 2,2)
+
+Joint tangent block structure:
+- L step: v_L_new = J · (v_L + v_H), v_H_new = v_H, J at (z_L + z_H + ie)
+- H step: v_H_new = J' · (v_H + v_L), v_L_new = v_L, J' at (z_H + z_L)
+J and J' share weights but evaluated at different points.
+"""
+from __future__ import annotations
+import sys, os, yaml, math, argparse, json, time
+from pathlib import Path
+import numpy as np
+import torch
+
+TRM_DIR = Path("/home/yurenh2/rrm/trm")
+sys.path.insert(0, str(TRM_DIR))
+
+from models.recursive_reasoning.trm import TinyRecursiveReasoningModel_ACTV1
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ train_meta = json.loads((Path(cfg["data_paths"][0]) / "train" / "dataset.json").read_text())
+ arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"])
+ model = TinyRecursiveReasoningModel_ACTV1(arch_cfg)
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()}
+ missing, unexpected = model.load_state_dict(stripped, strict=False)
+ print(f"[load] missing={len(missing)} unexpected={len(unexpected)}")
+ if missing[:3]: print(f" sample missing: {missing[:3]}")
+ if unexpected[:3]: print(f" sample unexpected: {unexpected[:3]}")
+ model.to(device).eval()
+ return model, cfg, train_meta
+
+
+def load_test_samples(data_path, n_total, shard_id, num_shards, seed):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ all_idx = rng.choice(len(inputs), size=n_total, replace=False)
+ shard_size = (n_total + num_shards - 1) // num_shards
+ s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total)
+ idx = all_idx[s:e]
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ "idx": idx,
+ }
+
+
+def jvp_through(f, x, v):
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False)
+
+
+def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed):
+ inner = model.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ D = seq_full * hidden
+
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"].to(device),
+ batch["puzzle_identifiers"].to(device))
+
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q0 = torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g)
+ Q, _ = torch.linalg.qr(Q0)
+ log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_lyap_steps = 0
+ step_counter = 0
+
+ drift_zH_per_step, drift_zL_per_step = [], []
+ halted_at = torch.zeros(B, dtype=torch.long, device=device)
+ q_halt_hist, q_continue_hist = [], []
+
+ for act_step in range(int(__import__("os").environ.get("DIAG_HORIZON", "4"))): # horizon via env
+ z_H_prev = z_H.detach().clone()
+ z_L_prev = z_L.detach().clone()
+
+ with torch.enable_grad():
+ zH, zL = z_H.detach(), z_L.detach()
+ for _h in range(cfg.H_cycles):
+ # L cycles
+ for _l in range(cfg.L_cycles):
+ v_H_j = Q[:, :D, :]
+ v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_L_cols = []
+ f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zL_new, Dv = jvp_through(f_L, zL, v_i)
+ new_v_L_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_L = torch.stack(new_v_L_cols, dim=-1)
+ Q = torch.cat([v_H_j, new_v_L], dim=1)
+ zL = zL_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ # H step (uses SAME L_level!)
+ v_H_j = Q[:, :D, :]
+ v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_H_cols = []
+ f_H = lambda z: inner.L_level(z, zL, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zH_new, Dv = jvp_through(f_H, zH, v_i)
+ new_v_H_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_H = torch.stack(new_v_H_cols, dim=-1)
+ Q = torch.cat([new_v_H, v_L_j], dim=1)
+ zH = zH_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ z_H, z_L = zH, zL
+
+ drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu())
+ drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu())
+
+ with torch.no_grad():
+ q_logits = inner.q_head(z_H[:, 0]).float()
+ q_halt, q_continue = q_logits[..., 0], q_logits[..., 1]
+ q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu())
+ new_halt = (q_halt > q_continue) & (halted_at == 0)
+ halted_at[new_halt] = act_step + 1
+ output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float()
+ final_logits = output
+
+ lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy()
+
+ with torch.no_grad():
+ preds = final_logits.argmax(dim=-1)
+ labels = batch["labels"].to(device)
+ mask = labels > 0
+ exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float()
+ token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1)
+ token_acc = token_acc.cpu()
+
+ return {
+ "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(),
+ "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(),
+ "halted_at": halted_at.cpu().numpy(),
+ "q_halt": torch.stack(q_halt_hist, dim=1).numpy(),
+ "q_continue": torch.stack(q_continue_hist, dim=1).numpy(),
+ "lyap_spec": lyap_spec,
+ "exact_correct": exact.numpy(),
+ "token_acc": token_acc.numpy(),
+ }
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_13020")
+ ap.add_argument("--n-samples", type=int, default=512)
+ ap.add_argument("--shard-id", type=int, default=0)
+ ap.add_argument("--num-shards", type=int, default=1)
+ ap.add_argument("--batch-size", type=int, default=16)
+ ap.add_argument("--k-lyap", type=int, default=8)
+ ap.add_argument("--t-ons", type=int, default=1)
+ ap.add_argument("--seed", type=int, default=0)
+ ap.add_argument("--out", default="diag_trm.npz")
+ args = ap.parse_args()
+
+ device = "cuda"
+ model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, "
+ f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, "
+ f"halt_max_steps={model.inner.config.halt_max_steps}, "
+ f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}")
+
+ test = load_test_samples(Path(cfg["data_paths"][0]), args.n_samples, args.shard_id, args.num_shards, args.seed)
+ n = len(test["inputs"])
+ print(f"shard {args.shard_id}/{args.num_shards}: {n} samples")
+
+ res = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue","lyap_spec","exact_correct","token_acc","idx"]}
+ t0 = time.time()
+ for s in range(0, n, args.batch_size):
+ e = min(s + args.batch_size, n)
+ batch = {k: test[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]}
+ out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s)
+ for k, v in out.items():
+ res[k].append(v)
+ res["idx"].append(test["idx"][s:e])
+ ls = out["lyap_spec"]
+ print(f" [{e}/{n}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} "
+ f"λ_1={ls[:,0].mean():+.4f} λ_{args.k_lyap}={ls[:,-1].mean():+.4f}", flush=True)
+
+ saved = {}
+ for k, v in res.items():
+ if not v: continue
+ try: saved[k] = np.concatenate(v, 0)
+ except ValueError: saved[k] = np.stack(v, 0)
+ np.savez_compressed(args.out, **saved)
+ succ = saved["exact_correct"] > 0.5
+ print(f"\nN={len(succ)} acc={succ.mean():.4f}")
+ print(f"{'i':>3} {'all':>10} {'succ':>10} {'fail':>10} {'Δ':>9}")
+ for i in range(saved["lyap_spec"].shape[1]):
+ li = saved["lyap_spec"][:, i]
+ print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} {li[~succ].mean()-li[succ].mean():+9.4f}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/diagnose_trm_joint_maze.py b/diagnose_trm_joint_maze.py
new file mode 100644
index 0000000..ab4bf73
--- /dev/null
+++ b/diagnose_trm_joint_maze.py
@@ -0,0 +1,227 @@
+"""TRM Sudoku joint Lyapunov diagnostic — TRM version of diagnose_hrm_joint.py.
+
+Key differences from HRM:
+- TRM has ONE shared L_level (H_layers config is "ignored")
+- z_L update: z_L = L_level(z_L, z_H + input_embeddings)
+- z_H update: z_H = L_level(z_H, z_L) ← same L_level!
+- H_cycles=3, L_cycles=6 (vs HRM 2,2)
+
+Joint tangent block structure:
+- L step: v_L_new = J · (v_L + v_H), v_H_new = v_H, J at (z_L + z_H + ie)
+- H step: v_H_new = J' · (v_H + v_L), v_L_new = v_L, J' at (z_H + z_L)
+J and J' share weights but evaluated at different points.
+"""
+from __future__ import annotations
+import sys, os, yaml, math, argparse, json, time
+from pathlib import Path
+import numpy as np
+import torch
+from torch.nn.attention import sdpa_kernel, SDPBackend # MAZE: force math SDP for JVP double-backward
+
+TRM_DIR = Path("/home/yurenh2/rrm/trm")
+sys.path.insert(0, str(TRM_DIR))
+
+from models.recursive_reasoning.trm import TinyRecursiveReasoningModel_ACTV1
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ train_meta = json.loads((Path(cfg["data_paths"][0]) / "train" / "dataset.json").read_text())
+ arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"])
+ model = TinyRecursiveReasoningModel_ACTV1(arch_cfg)
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()}
+ missing, unexpected = model.load_state_dict(stripped, strict=False)
+ print(f"[load] missing={len(missing)} unexpected={len(unexpected)}")
+ if missing[:3]: print(f" sample missing: {missing[:3]}")
+ if unexpected[:3]: print(f" sample unexpected: {unexpected[:3]}")
+ model.to(device).eval()
+ return model, cfg, train_meta
+
+
+def load_test_samples(data_path, n_total, shard_id, num_shards, seed):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ all_idx = rng.choice(len(inputs), size=n_total, replace=False)
+ shard_size = (n_total + num_shards - 1) // num_shards
+ s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total)
+ idx = all_idx[s:e]
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ "idx": idx,
+ }
+
+
+def jvp_through(f, x, v):
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False)
+
+
+def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed):
+ inner = model.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ D = seq_full * hidden
+
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"].to(device),
+ batch["puzzle_identifiers"].to(device))
+
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q0 = torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g)
+ Q, _ = torch.linalg.qr(Q0)
+ log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_lyap_steps = 0
+ step_counter = 0
+
+ drift_zH_per_step, drift_zL_per_step = [], []
+ halted_at = torch.zeros(B, dtype=torch.long, device=device)
+ q_halt_hist, q_continue_hist = [], []
+
+ for act_step in range(cfg.halt_max_steps):
+ z_H_prev = z_H.detach().clone()
+ z_L_prev = z_L.detach().clone()
+
+ with torch.enable_grad():
+ zH, zL = z_H.detach(), z_L.detach()
+ for _h in range(cfg.H_cycles):
+ # L cycles
+ for _l in range(cfg.L_cycles):
+ v_H_j = Q[:, :D, :]
+ v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_L_cols = []
+ f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zL_new, Dv = jvp_through(f_L, zL, v_i)
+ new_v_L_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_L = torch.stack(new_v_L_cols, dim=-1)
+ Q = torch.cat([v_H_j, new_v_L], dim=1)
+ zL = zL_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ # H step (uses SAME L_level!)
+ v_H_j = Q[:, :D, :]
+ v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_H_cols = []
+ f_H = lambda z: inner.L_level(z, zL, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zH_new, Dv = jvp_through(f_H, zH, v_i)
+ new_v_H_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_H = torch.stack(new_v_H_cols, dim=-1)
+ Q = torch.cat([new_v_H, v_L_j], dim=1)
+ zH = zH_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ z_H, z_L = zH, zL
+
+ drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu())
+ drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu())
+
+ with torch.no_grad():
+ q_logits = inner.q_head(z_H[:, 0]).float()
+ q_halt, q_continue = q_logits[..., 0], q_logits[..., 1]
+ q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu())
+ new_halt = (q_halt > q_continue) & (halted_at == 0)
+ halted_at[new_halt] = act_step + 1
+ output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float()
+ final_logits = output
+
+ lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy()
+
+ with torch.no_grad():
+ preds = final_logits.argmax(dim=-1)
+ labels = batch["labels"].to(device)
+ mask = labels > 0
+ exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float()
+ token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1)
+ token_acc = token_acc.cpu()
+
+ return {
+ "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(),
+ "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(),
+ "halted_at": halted_at.cpu().numpy(),
+ "q_halt": torch.stack(q_halt_hist, dim=1).numpy(),
+ "q_continue": torch.stack(q_continue_hist, dim=1).numpy(),
+ "lyap_spec": lyap_spec,
+ "exact_correct": exact.numpy(),
+ "token_acc": token_acc.numpy(),
+ }
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_13020")
+ ap.add_argument("--n-samples", type=int, default=512)
+ ap.add_argument("--shard-id", type=int, default=0)
+ ap.add_argument("--num-shards", type=int, default=1)
+ ap.add_argument("--batch-size", type=int, default=16)
+ ap.add_argument("--k-lyap", type=int, default=8)
+ ap.add_argument("--t-ons", type=int, default=1)
+ ap.add_argument("--seed", type=int, default=0)
+ ap.add_argument("--out", default="diag_trm.npz")
+ args = ap.parse_args()
+
+ device = "cuda"
+ model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, "
+ f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, "
+ f"halt_max_steps={model.inner.config.halt_max_steps}, "
+ f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}")
+
+ test = load_test_samples(Path(cfg["data_paths"][0]), args.n_samples, args.shard_id, args.num_shards, args.seed)
+ n = len(test["inputs"])
+ print(f"shard {args.shard_id}/{args.num_shards}: {n} samples")
+
+ res = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue","lyap_spec","exact_correct","token_acc","idx"]}
+ t0 = time.time()
+ for s in range(0, n, args.batch_size):
+ e = min(s + args.batch_size, n)
+ batch = {k: test[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]}
+ with sdpa_kernel(SDPBackend.MATH): # MAZE: math backend supports the JVP double-backward
+ out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s)
+ for k, v in out.items():
+ res[k].append(v)
+ res["idx"].append(test["idx"][s:e])
+ ls = out["lyap_spec"]
+ print(f" [{e}/{n}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} "
+ f"λ_1={ls[:,0].mean():+.4f} λ_{args.k_lyap}={ls[:,-1].mean():+.4f}", flush=True)
+
+ saved = {}
+ for k, v in res.items():
+ if not v: continue
+ try: saved[k] = np.concatenate(v, 0)
+ except ValueError: saved[k] = np.stack(v, 0)
+ np.savez_compressed(args.out, **saved)
+ succ = saved["exact_correct"] > 0.5
+ print(f"\nN={len(succ)} acc={succ.mean():.4f}")
+ print(f"{'i':>3} {'all':>10} {'succ':>10} {'fail':>10} {'Δ':>9}")
+ for i in range(saved["lyap_spec"].shape[1]):
+ li = saved["lyap_spec"][:, i]
+ print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} {li[~succ].mean()-li[succ].mean():+9.4f}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/diagnose_trm_joint_short.py b/diagnose_trm_joint_short.py
new file mode 100644
index 0000000..956afe7
--- /dev/null
+++ b/diagnose_trm_joint_short.py
@@ -0,0 +1,225 @@
+"""TRM Sudoku joint Lyapunov diagnostic — TRM version of diagnose_hrm_joint.py.
+
+Key differences from HRM:
+- TRM has ONE shared L_level (H_layers config is "ignored")
+- z_L update: z_L = L_level(z_L, z_H + input_embeddings)
+- z_H update: z_H = L_level(z_H, z_L) ← same L_level!
+- H_cycles=3, L_cycles=6 (vs HRM 2,2)
+
+Joint tangent block structure:
+- L step: v_L_new = J · (v_L + v_H), v_H_new = v_H, J at (z_L + z_H + ie)
+- H step: v_H_new = J' · (v_H + v_L), v_L_new = v_L, J' at (z_H + z_L)
+J and J' share weights but evaluated at different points.
+"""
+from __future__ import annotations
+import sys, os, yaml, math, argparse, json, time
+from pathlib import Path
+import numpy as np
+import torch
+
+TRM_DIR = Path("/home/yurenh2/rrm/trm")
+sys.path.insert(0, str(TRM_DIR))
+
+from models.recursive_reasoning.trm import TinyRecursiveReasoningModel_ACTV1
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ train_meta = json.loads((Path(cfg["data_paths"][0]) / "train" / "dataset.json").read_text())
+ arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"])
+ model = TinyRecursiveReasoningModel_ACTV1(arch_cfg)
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()}
+ missing, unexpected = model.load_state_dict(stripped, strict=False)
+ print(f"[load] missing={len(missing)} unexpected={len(unexpected)}")
+ if missing[:3]: print(f" sample missing: {missing[:3]}")
+ if unexpected[:3]: print(f" sample unexpected: {unexpected[:3]}")
+ model.to(device).eval()
+ return model, cfg, train_meta
+
+
+def load_test_samples(data_path, n_total, shard_id, num_shards, seed):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ all_idx = rng.choice(len(inputs), size=n_total, replace=False)
+ shard_size = (n_total + num_shards - 1) // num_shards
+ s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total)
+ idx = all_idx[s:e]
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ "idx": idx,
+ }
+
+
+def jvp_through(f, x, v):
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False)
+
+
+def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed):
+ inner = model.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ D = seq_full * hidden
+
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"].to(device),
+ batch["puzzle_identifiers"].to(device))
+
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q0 = torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g)
+ Q, _ = torch.linalg.qr(Q0)
+ log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_lyap_steps = 0
+ step_counter = 0
+
+ drift_zH_per_step, drift_zL_per_step = [], []
+ halted_at = torch.zeros(B, dtype=torch.long, device=device)
+ q_halt_hist, q_continue_hist = [], []
+
+ for act_step in range(4): # SHORT: only 4 ACT steps to match train-time
+ z_H_prev = z_H.detach().clone()
+ z_L_prev = z_L.detach().clone()
+
+ with torch.enable_grad():
+ zH, zL = z_H.detach(), z_L.detach()
+ for _h in range(cfg.H_cycles):
+ # L cycles
+ for _l in range(cfg.L_cycles):
+ v_H_j = Q[:, :D, :]
+ v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_L_cols = []
+ f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zL_new, Dv = jvp_through(f_L, zL, v_i)
+ new_v_L_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_L = torch.stack(new_v_L_cols, dim=-1)
+ Q = torch.cat([v_H_j, new_v_L], dim=1)
+ zL = zL_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ # H step (uses SAME L_level!)
+ v_H_j = Q[:, :D, :]
+ v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_H_cols = []
+ f_H = lambda z: inner.L_level(z, zL, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zH_new, Dv = jvp_through(f_H, zH, v_i)
+ new_v_H_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_H = torch.stack(new_v_H_cols, dim=-1)
+ Q = torch.cat([new_v_H, v_L_j], dim=1)
+ zH = zH_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ z_H, z_L = zH, zL
+
+ drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu())
+ drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu())
+
+ with torch.no_grad():
+ q_logits = inner.q_head(z_H[:, 0]).float()
+ q_halt, q_continue = q_logits[..., 0], q_logits[..., 1]
+ q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu())
+ new_halt = (q_halt > q_continue) & (halted_at == 0)
+ halted_at[new_halt] = act_step + 1
+ output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float()
+ final_logits = output
+
+ lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy()
+
+ with torch.no_grad():
+ preds = final_logits.argmax(dim=-1)
+ labels = batch["labels"].to(device)
+ mask = labels > 0
+ exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float()
+ token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1)
+ token_acc = token_acc.cpu()
+
+ return {
+ "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(),
+ "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(),
+ "halted_at": halted_at.cpu().numpy(),
+ "q_halt": torch.stack(q_halt_hist, dim=1).numpy(),
+ "q_continue": torch.stack(q_continue_hist, dim=1).numpy(),
+ "lyap_spec": lyap_spec,
+ "exact_correct": exact.numpy(),
+ "token_acc": token_acc.numpy(),
+ }
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_13020")
+ ap.add_argument("--n-samples", type=int, default=512)
+ ap.add_argument("--shard-id", type=int, default=0)
+ ap.add_argument("--num-shards", type=int, default=1)
+ ap.add_argument("--batch-size", type=int, default=16)
+ ap.add_argument("--k-lyap", type=int, default=8)
+ ap.add_argument("--t-ons", type=int, default=1)
+ ap.add_argument("--seed", type=int, default=0)
+ ap.add_argument("--out", default="diag_trm.npz")
+ args = ap.parse_args()
+
+ device = "cuda"
+ model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, "
+ f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, "
+ f"halt_max_steps={model.inner.config.halt_max_steps}, "
+ f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}")
+
+ test = load_test_samples(Path(cfg["data_paths"][0]), args.n_samples, args.shard_id, args.num_shards, args.seed)
+ n = len(test["inputs"])
+ print(f"shard {args.shard_id}/{args.num_shards}: {n} samples")
+
+ res = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue","lyap_spec","exact_correct","token_acc","idx"]}
+ t0 = time.time()
+ for s in range(0, n, args.batch_size):
+ e = min(s + args.batch_size, n)
+ batch = {k: test[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]}
+ out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s)
+ for k, v in out.items():
+ res[k].append(v)
+ res["idx"].append(test["idx"][s:e])
+ ls = out["lyap_spec"]
+ print(f" [{e}/{n}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} "
+ f"λ_1={ls[:,0].mean():+.4f} λ_{args.k_lyap}={ls[:,-1].mean():+.4f}", flush=True)
+
+ saved = {}
+ for k, v in res.items():
+ if not v: continue
+ try: saved[k] = np.concatenate(v, 0)
+ except ValueError: saved[k] = np.stack(v, 0)
+ np.savez_compressed(args.out, **saved)
+ succ = saved["exact_correct"] > 0.5
+ print(f"\nN={len(succ)} acc={succ.mean():.4f}")
+ print(f"{'i':>3} {'all':>10} {'succ':>10} {'fail':>10} {'Δ':>9}")
+ for i in range(saved["lyap_spec"].shape[1]):
+ li = saved["lyap_spec"][:, i]
+ print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} {li[~succ].mean()-li[succ].mean():+9.4f}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/directional_lyap_perturb/plots/directional_lyap_perturb_combined.csv b/directional_lyap_perturb/plots/directional_lyap_perturb_combined.csv
new file mode 100644
index 0000000..0491f8c
--- /dev/null
+++ b/directional_lyap_perturb/plots/directional_lyap_perturb_combined.csv
@@ -0,0 +1,73 @@
+label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail
+trm_baseline_best,0,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8579999804496765,0.9472222328186035,0.8579999804496765,0.8579999804496765,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0
+trm_baseline_best,0,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8579999804496765,0.9472222328186035,0.8579999804496765,0.8579999804496765,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0
+trm_baseline_best,0,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8535000085830688,0.9457222819328308,0.8289999961853027,0.878000020980835,-3.885612726211548,-3.8856122493743896,0.9825174808502197,0.9650349617004395,0.07394365966320038,0.14084507524967194
+trm_baseline_best,0,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8615000247955322,0.9481049180030823,0.8240000009536743,0.8989999890327454,-3.885612726211548,-3.8856122493743896,0.9603729844093323,0.9382284283638,0.26408451795578003,0.39436620473861694
+trm_baseline_best,0,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8560000061988831,0.9467840790748596,0.8119999766349792,0.8999999761581421,-3.885612726211548,-3.8856122493743896,0.9545454382896423,0.9219114184379578,0.26056337356567383,0.3732394278049469
+trm_baseline_best,0,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8619999885559082,0.9479444622993469,0.8240000009536743,0.8999999761581421,-3.885612726211548,-3.8856122493743896,0.9586247205734253,0.939393937587738,0.27816900610923767,0.42957746982574463
+trm_baseline_best,4,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8489999771118164,0.9435185194015503,0.8489999771118164,0.8489999771118164,-1.2879749536514282,0.8905364871025085,1.0,1.0,0.0,0.0
+trm_baseline_best,4,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8510000109672546,0.944135844707489,0.8379999995231628,0.8640000224113464,-1.2879749536514282,0.8905364871025085,0.9929328560829163,0.9858657121658325,0.05298013240098953,0.09933774918317795
+trm_baseline_best,4,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8554999828338623,0.9452839493751526,0.8270000219345093,0.8840000033378601,-1.2879749536514282,0.8905364871025085,0.9734982252120972,0.9611307382583618,0.19205297529697418,0.3112582862377167
+trm_baseline_best,4,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8565000295639038,0.9460123181343079,0.828000009059906,0.8849999904632568,-1.2879749536514282,0.8905364871025085,0.9723203778266907,0.9587750434875488,0.20529800653457642,0.3178808093070984
+trm_baseline_best,4,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8560000061988831,0.9458826184272766,0.828000009059906,0.8840000033378601,-1.2879749536514282,0.8905364871025085,0.9699646830558777,0.9564192891120911,0.215231791138649,0.3245033025741577
+trm_baseline_best,4,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8550000190734863,0.9454135894775391,0.8289999961853027,0.8809999823570251,-1.2879749536514282,0.8905364871025085,0.9699646830558777,0.9552414417266846,0.20860926806926727,0.29801324009895325
+trm_baseline_best,8,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8539999723434448,0.9449505805969238,0.8539999723434448,0.8539999723434448,-1.811623215675354,1.4589354991912842,1.0,1.0,0.0,0.0
+trm_baseline_best,8,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8544999957084656,0.9452963471412659,0.8429999947547913,0.8659999966621399,-1.811623215675354,1.4589354991912842,0.993559718132019,0.9871194362640381,0.04109589010477066,0.08219178020954132
+trm_baseline_best,8,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8600000143051147,0.9476419687271118,0.8420000076293945,0.878000020980835,-1.811623215675354,1.4589354991912842,0.983021080493927,0.976580798625946,0.14041095972061157,0.22602739930152893
+trm_baseline_best,8,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8544999957084656,0.9457098245620728,0.8320000171661377,0.8769999742507935,-1.811623215675354,1.4589354991912842,0.9783372282981873,0.966042160987854,0.13013698160648346,0.21232876181602478
+trm_baseline_best,8,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8529999852180481,0.9448826909065247,0.8360000252723694,0.8700000047683716,-1.811623215675354,1.4589354991912842,0.9783372282981873,0.9695550203323364,0.11986301094293594,0.18493150174617767
+trm_baseline_best,8,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8544999957084656,0.9452345371246338,0.8379999995231628,0.8709999918937683,-1.811623215675354,1.4589354991912842,0.9800936579704285,0.9730679392814636,0.11986301094293594,0.19178082048892975
+trm_baseline_best,12,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8550000190734863,0.9453826546669006,0.8550000190734863,0.8550000190734863,-3.6991007328033447,2.8621342182159424,1.0,1.0,0.0,0.0
+trm_baseline_best,12,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8525000214576721,0.944302499294281,0.847000002861023,0.8579999804496765,-3.6991007328033447,2.8621342182159424,0.9947368502616882,0.9906432628631592,0.013793103396892548,0.027586206793785095
+trm_baseline_best,12,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8519999980926514,0.9443457126617432,0.8429999947547913,0.8610000014305115,-3.6991007328033447,2.8621342182159424,0.988304078578949,0.984795331954956,0.04827586188912392,0.08965517580509186
+trm_baseline_best,12,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8489999771118164,0.9430555105209351,0.8460000157356262,0.8519999980926514,-3.6991007328033447,2.8621342182159424,0.988304078578949,0.9871345162391663,0.027586206793785095,0.04137931019067764
+trm_baseline_best,12,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8554999828338623,0.945648193359375,0.8479999899864197,0.8629999756813049,-3.6991007328033447,2.8621342182159424,0.9888888597488403,0.9859648942947388,0.06896551698446274,0.1034482792019844
+trm_baseline_best,12,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8500000238418579,0.9435864090919495,0.8460000157356262,0.8539999723434448,-3.6991007328033447,2.8621342182159424,0.988304078578949,0.9871345162391663,0.03448275849223137,0.05517241358757019
+trm_multi4_best,0,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8930000066757202,0.9589999914169312,0.8930000066757202,0.8930000066757202,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0
+trm_multi4_best,0,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8930000066757202,0.9589999914169312,0.8930000066757202,0.8930000066757202,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0
+trm_multi4_best,0,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8970000147819519,0.960277795791626,0.8840000033378601,0.9100000262260437,-3.885612726211548,-3.8856122493743896,0.9938409924507141,0.9876819849014282,0.08878504484891891,0.15887850522994995
+trm_multi4_best,0,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8884999752044678,0.9585925340652466,0.8600000143051147,0.9169999957084656,-3.885612726211548,-3.8856122493743896,0.9591265320777893,0.9406495094299316,0.29906541109085083,0.4112149477005005
+trm_multi4_best,0,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8865000009536743,0.9574320316314697,0.8519999980926514,0.9210000038146973,-3.885612726211548,-3.8856122493743896,0.9596864581108093,0.9372900128364563,0.2757009267807007,0.4112149477005005
+trm_multi4_best,0,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8989999890327454,0.9610432386398315,0.871999979019165,0.9259999990463257,-3.885612726211548,-3.8856122493743896,0.9664053916931152,0.9462485909461975,0.336448609828949,0.420560747385025
+trm_multi4_best,4,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8840000033378601,0.9556913375854492,0.8840000033378601,0.8840000033378601,-1.7770261764526367,0.8977539539337158,1.0,1.0,0.0,0.0
+trm_multi4_best,4,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.887499988079071,0.9567221403121948,0.8769999742507935,0.8980000019073486,-1.7770261764526367,0.8977539539337158,0.9954751133918762,0.9909502267837524,0.06465516984462738,0.12068965286016464
+trm_multi4_best,4,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8884999752044678,0.9570987820625305,0.8659999966621399,0.9110000133514404,-1.7770261764526367,0.8977539539337158,0.9796379804611206,0.9683257937431335,0.19396552443504333,0.3017241358757019
+trm_multi4_best,4,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8865000009536743,0.9563456773757935,0.8610000014305115,0.9120000004768372,-1.7770261764526367,0.8977539539337158,0.9717194437980652,0.959276020526886,0.23706896603107452,0.36206895112991333
+trm_multi4_best,4,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8899999856948853,0.9573581218719482,0.8659999966621399,0.9139999747276306,-1.7770261764526367,0.8977539539337158,0.9739819169044495,0.959276020526886,0.25,0.3448275923728943
+trm_multi4_best,4,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8960000276565552,0.9607962369918823,0.8700000047683716,0.921999990940094,-1.7770261764526367,0.8977539539337158,0.9739819169044495,0.9615384340286255,0.3017241358757019,0.43103447556495667
+trm_multi4_best,8,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8889999985694885,0.9573332667350769,0.8889999985694885,0.8889999985694885,-2.587038040161133,1.1594672203063965,1.0,1.0,0.0,0.0
+trm_multi4_best,8,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8920000195503235,0.9578518271446228,0.8870000243186951,0.8970000147819519,-2.587038040161133,1.1594672203063965,0.9977502822875977,0.9955005645751953,0.045045044273138046,0.07207207381725311
+trm_multi4_best,8,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8880000114440918,0.9568086266517639,0.8709999918937683,0.9049999713897705,-2.587038040161133,1.1594672203063965,0.9842519760131836,0.9752531051635742,0.11711711436510086,0.19819819927215576
+trm_multi4_best,8,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8889999985694885,0.9574383497238159,0.8730000257492065,0.9049999713897705,-2.587038040161133,1.1594672203063965,0.9848144054412842,0.9775028228759766,0.12162162363529205,0.20720720291137695
+trm_multi4_best,8,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8895000219345093,0.957660436630249,0.871999979019165,0.9070000052452087,-2.587038040161133,1.1594672203063965,0.9859392642974854,0.9797525405883789,0.11711711436510086,0.22522522509098053
+trm_multi4_best,8,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8914999961853027,0.9589135646820068,0.8769999742507935,0.906000018119812,-2.587038040161133,1.1594672203063965,0.9848144054412842,0.9786276817321777,0.14414414763450623,0.22522522509098053
+trm_multi4_best,12,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8840000033378601,0.956098735332489,0.8840000033378601,0.8840000033378601,-4.764944553375244,2.045381546020508,1.0,1.0,0.0,0.0
+trm_multi4_best,12,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8849999904632568,0.9564691781997681,0.8820000290870667,0.8880000114440918,-4.764944553375244,2.045381546020508,0.9977375268936157,0.9966063499450684,0.0258620698004961,0.043103449046611786
+trm_multi4_best,12,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8855000138282776,0.9565123915672302,0.8809999823570251,0.8899999856948853,-4.764944553375244,2.045381546020508,0.9949095249176025,0.9943438768386841,0.0517241396009922,0.08620689809322357
+trm_multi4_best,12,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8855000138282776,0.9570556282997131,0.8799999952316284,0.890999972820282,-4.764944553375244,2.045381546020508,0.9954751133918762,0.9943438768386841,0.047413792461156845,0.08620689809322357
+trm_multi4_best,12,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8849999904632568,0.9560925960540771,0.8809999823570251,0.8889999985694885,-4.764944553375244,2.045381546020508,0.9949095249176025,0.9943438768386841,0.047413792461156845,0.07758620381355286
+trm_multi4_best,12,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8849999904632568,0.9559259414672852,0.8799999952316284,0.8899999856948853,-4.764944553375244,2.045381546020508,0.9943438768386841,0.9932126402854919,0.0517241396009922,0.08620689809322357
+trm_multi4_final,0,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8299999833106995,0.9333704113960266,0.8299999833106995,0.8299999833106995,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0
+trm_multi4_final,0,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8299999833106995,0.9333704113960266,0.8299999833106995,0.8299999833106995,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0
+trm_multi4_final,0,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8289999961853027,0.9334814548492432,0.8199999928474426,0.8379999995231628,-3.885612726211548,-3.8856122493743896,0.9927710890769958,0.9855421781539917,0.029411764815449715,0.0470588244497776
+trm_multi4_final,0,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8255000114440918,0.9313827157020569,0.7990000247955322,0.8519999980926514,-3.885612726211548,-3.8856122493743896,0.9572288990020752,0.9385542273521423,0.1823529452085495,0.24705882370471954
+trm_multi4_final,0,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8295000195503235,0.9325369596481323,0.7960000038146973,0.8629999756813049,-3.885612726211548,-3.8856122493743896,0.9590361714363098,0.9385542273521423,0.19705882668495178,0.29411765933036804
+trm_multi4_final,0,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8309999704360962,0.933006227016449,0.7990000247955322,0.8629999756813049,-3.885612726211548,-3.8856122493743896,0.9554216861724854,0.9325301051139832,0.2235294133424759,0.30000001192092896
+trm_multi4_final,4,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8370000123977661,0.9353950619697571,0.8370000123977661,0.8370000123977661,-1.9923189878463745,0.9806549549102783,1.0,1.0,0.0,0.0
+trm_multi4_final,4,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8355000019073486,0.9351357817649841,0.8240000009536743,0.847000002861023,-1.9923189878463745,0.9806549549102783,0.9910394549369812,0.9820788502693176,0.03680981695652008,0.061349693685770035
+trm_multi4_final,4,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8355000019073486,0.9351975321769714,0.8130000233650208,0.8579999804496765,-1.9923189878463745,0.9806549549102783,0.9731183052062988,0.9617682099342346,0.12883435189723969,0.20858895778656006
+trm_multi4_final,4,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8299999833106995,0.9334691166877747,0.8059999942779541,0.8539999723434448,-1.9923189878463745,0.9806549549102783,0.965352475643158,0.9498208165168762,0.1349693238735199,0.20245398581027985
+trm_multi4_final,4,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8309999704360962,0.9334567785263062,0.8069999814033508,0.8550000190734863,-1.9923189878463745,0.9806549549102783,0.9677419066429138,0.9534050226211548,0.12883435189723969,0.20245398581027985
+trm_multi4_final,4,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8330000042915344,0.933746874332428,0.8069999814033508,0.859000027179718,-1.9923189878463745,0.9806549549102783,0.9701313972473145,0.9569892287254333,0.12883435189723969,0.22085890173912048
+trm_multi4_final,8,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8299999833106995,0.9329506754875183,0.8299999833106995,0.8299999833106995,-2.597419500350952,1.466092586517334,1.0,1.0,0.0,0.0
+trm_multi4_final,8,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8299999833106995,0.9333766102790833,0.8220000267028809,0.8379999995231628,-2.597419500350952,1.466092586517334,0.9951807260513306,0.9903614521026611,0.0235294122248888,0.0470588244497776
+trm_multi4_final,8,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.828000009059906,0.9320307374000549,0.8130000233650208,0.8429999947547913,-2.597419500350952,1.466092586517334,0.9849397540092468,0.9771084189414978,0.061764705926179886,0.11176470667123795
+trm_multi4_final,8,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8314999938011169,0.9331419467926025,0.8190000057220459,0.843999981880188,-2.597419500350952,1.466092586517334,0.9849397540092468,0.9819276928901672,0.08235294371843338,0.1411764770746231
+trm_multi4_final,8,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8309999704360962,0.9327530860900879,0.8169999718666077,0.8450000286102295,-2.597419500350952,1.466092586517334,0.9819276928901672,0.9771084189414978,0.0941176488995552,0.15294118225574493
+trm_multi4_final,8,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8335000276565552,0.9352839589118958,0.8180000185966492,0.8489999771118164,-2.597419500350952,1.466092586517334,0.9855421781539917,0.9819276928901672,0.09117647260427475,0.16470588743686676
+trm_multi4_final,12,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8339999914169312,0.9340246319770813,0.8339999914169312,0.8339999914169312,-5.133573055267334,2.7817435264587402,1.0,1.0,0.0,0.0
+trm_multi4_final,12,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8335000276565552,0.9341234564781189,0.8289999961853027,0.8379999995231628,-5.133573055267334,2.7817435264587402,0.9970024228096008,0.9940047860145569,0.012048192322254181,0.024096384644508362
+trm_multi4_final,12,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8345000147819519,0.9352160692214966,0.8289999961853027,0.8399999737739563,-5.133573055267334,2.7817435264587402,0.9922062158584595,0.9892086386680603,0.04216867312788963,0.0602409653365612
+trm_multi4_final,12,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8320000171661377,0.9335123300552368,0.824999988079071,0.8389999866485596,-5.133573055267334,2.7817435264587402,0.9892086386680603,0.986810564994812,0.04216867312788963,0.07228915393352509
+trm_multi4_final,12,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8299999833106995,0.9333456754684448,0.8240000009536743,0.8360000252723694,-5.133573055267334,2.7817435264587402,0.9880095720291138,0.9856114983558655,0.03614457696676254,0.0602409653365612
+trm_multi4_final,12,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8309999704360962,0.9343765377998352,0.824999988079071,0.8370000123977661,-5.133573055267334,2.7817435264587402,0.9898081421852112,0.9880095720291138,0.03313253074884415,0.0602409653365612
diff --git a/directional_lyap_perturb/plots/final_gpu_status.txt b/directional_lyap_perturb/plots/final_gpu_status.txt
new file mode 100644
index 0000000..7c03653
--- /dev/null
+++ b/directional_lyap_perturb/plots/final_gpu_status.txt
@@ -0,0 +1,4 @@
+0, 480, 49140, 25
+1, 13, 49140, 0
+2, 45494, 49140, 0
+3, 13, 49140, 0
diff --git a/directional_lyap_perturb/smoke_baseline.summary.csv b/directional_lyap_perturb/smoke_baseline.summary.csv
new file mode 100644
index 0000000..49a997c
--- /dev/null
+++ b/directional_lyap_perturb/smoke_baseline.summary.csv
@@ -0,0 +1,5 @@
+label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail
+smoke_baseline,0,0.0,16,4,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.875,0.9375,0.979938268661499,0.9375,0.9375,-3.8856122493743896,-3.8856122493743896,1.0,1.0,0.5,0.5
+smoke_baseline,0,0.03,16,4,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.875,0.875,0.9475308656692505,0.8125,0.9375,-3.8856122493743896,-3.8856122493743896,0.9642857313156128,0.9285714030265808,0.25,0.5
+smoke_baseline,8,0.0,16,4,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,1.0,1.0,1.0,1.0,1.0,-5.557522296905518,1.0598664283752441,1.0,1.0,nan,nan
+smoke_baseline,8,0.03,16,4,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,1.0,0.9375,0.9764660596847534,0.9375,0.9375,-5.557522296905518,1.0598664283752441,0.9375,0.9375,nan,nan
diff --git a/directional_lyap_perturb/smoke_baseline_fixedclean.summary.csv b/directional_lyap_perturb/smoke_baseline_fixedclean.summary.csv
new file mode 100644
index 0000000..26ecd07
--- /dev/null
+++ b/directional_lyap_perturb/smoke_baseline_fixedclean.summary.csv
@@ -0,0 +1,5 @@
+label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail
+smoke_baseline,0,0.0,16,4,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.9375,0.9375,0.979938268661499,0.9375,0.9375,-3.8856122493743896,-3.8856122493743896,1.0,1.0,0.0,0.0
+smoke_baseline,0,0.03,16,4,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.9375,0.875,0.9475308656692505,0.8125,0.9375,-3.8856122493743896,-3.8856122493743896,0.9333333373069763,0.8666666746139526,0.0,0.0
+smoke_baseline,8,0.0,16,4,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,1.0,1.0,1.0,1.0,1.0,-5.557522296905518,1.0598664283752441,1.0,1.0,nan,nan
+smoke_baseline,8,0.03,16,4,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,1.0,0.9375,0.9764660596847534,0.9375,0.9375,-5.557522296905518,1.0598664283752441,0.9375,0.9375,nan,nan
diff --git a/directional_lyap_perturb/trm_baseline_best_step58590_n1000_c8_dirlyap.summary.csv b/directional_lyap_perturb/trm_baseline_best_step58590_n1000_c8_dirlyap.summary.csv
new file mode 100644
index 0000000..5e1db0a
--- /dev/null
+++ b/directional_lyap_perturb/trm_baseline_best_step58590_n1000_c8_dirlyap.summary.csv
@@ -0,0 +1,25 @@
+label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail
+trm_baseline_best,0,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8579999804496765,0.9472222328186035,0.8579999804496765,0.8579999804496765,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0
+trm_baseline_best,0,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8579999804496765,0.9472222328186035,0.8579999804496765,0.8579999804496765,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0
+trm_baseline_best,0,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8535000085830688,0.9457222819328308,0.8289999961853027,0.878000020980835,-3.885612726211548,-3.8856122493743896,0.9825174808502197,0.9650349617004395,0.07394365966320038,0.14084507524967194
+trm_baseline_best,0,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8615000247955322,0.9481049180030823,0.8240000009536743,0.8989999890327454,-3.885612726211548,-3.8856122493743896,0.9603729844093323,0.9382284283638,0.26408451795578003,0.39436620473861694
+trm_baseline_best,0,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8560000061988831,0.9467840790748596,0.8119999766349792,0.8999999761581421,-3.885612726211548,-3.8856122493743896,0.9545454382896423,0.9219114184379578,0.26056337356567383,0.3732394278049469
+trm_baseline_best,0,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8619999885559082,0.9479444622993469,0.8240000009536743,0.8999999761581421,-3.885612726211548,-3.8856122493743896,0.9586247205734253,0.939393937587738,0.27816900610923767,0.42957746982574463
+trm_baseline_best,4,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8489999771118164,0.9435185194015503,0.8489999771118164,0.8489999771118164,-1.2879749536514282,0.8905364871025085,1.0,1.0,0.0,0.0
+trm_baseline_best,4,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8510000109672546,0.944135844707489,0.8379999995231628,0.8640000224113464,-1.2879749536514282,0.8905364871025085,0.9929328560829163,0.9858657121658325,0.05298013240098953,0.09933774918317795
+trm_baseline_best,4,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8554999828338623,0.9452839493751526,0.8270000219345093,0.8840000033378601,-1.2879749536514282,0.8905364871025085,0.9734982252120972,0.9611307382583618,0.19205297529697418,0.3112582862377167
+trm_baseline_best,4,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8565000295639038,0.9460123181343079,0.828000009059906,0.8849999904632568,-1.2879749536514282,0.8905364871025085,0.9723203778266907,0.9587750434875488,0.20529800653457642,0.3178808093070984
+trm_baseline_best,4,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8560000061988831,0.9458826184272766,0.828000009059906,0.8840000033378601,-1.2879749536514282,0.8905364871025085,0.9699646830558777,0.9564192891120911,0.215231791138649,0.3245033025741577
+trm_baseline_best,4,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8550000190734863,0.9454135894775391,0.8289999961853027,0.8809999823570251,-1.2879749536514282,0.8905364871025085,0.9699646830558777,0.9552414417266846,0.20860926806926727,0.29801324009895325
+trm_baseline_best,8,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8539999723434448,0.9449505805969238,0.8539999723434448,0.8539999723434448,-1.811623215675354,1.4589354991912842,1.0,1.0,0.0,0.0
+trm_baseline_best,8,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8544999957084656,0.9452963471412659,0.8429999947547913,0.8659999966621399,-1.811623215675354,1.4589354991912842,0.993559718132019,0.9871194362640381,0.04109589010477066,0.08219178020954132
+trm_baseline_best,8,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8600000143051147,0.9476419687271118,0.8420000076293945,0.878000020980835,-1.811623215675354,1.4589354991912842,0.983021080493927,0.976580798625946,0.14041095972061157,0.22602739930152893
+trm_baseline_best,8,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8544999957084656,0.9457098245620728,0.8320000171661377,0.8769999742507935,-1.811623215675354,1.4589354991912842,0.9783372282981873,0.966042160987854,0.13013698160648346,0.21232876181602478
+trm_baseline_best,8,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8529999852180481,0.9448826909065247,0.8360000252723694,0.8700000047683716,-1.811623215675354,1.4589354991912842,0.9783372282981873,0.9695550203323364,0.11986301094293594,0.18493150174617767
+trm_baseline_best,8,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8544999957084656,0.9452345371246338,0.8379999995231628,0.8709999918937683,-1.811623215675354,1.4589354991912842,0.9800936579704285,0.9730679392814636,0.11986301094293594,0.19178082048892975
+trm_baseline_best,12,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8550000190734863,0.9453826546669006,0.8550000190734863,0.8550000190734863,-3.6991007328033447,2.8621342182159424,1.0,1.0,0.0,0.0
+trm_baseline_best,12,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8525000214576721,0.944302499294281,0.847000002861023,0.8579999804496765,-3.6991007328033447,2.8621342182159424,0.9947368502616882,0.9906432628631592,0.013793103396892548,0.027586206793785095
+trm_baseline_best,12,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8519999980926514,0.9443457126617432,0.8429999947547913,0.8610000014305115,-3.6991007328033447,2.8621342182159424,0.988304078578949,0.984795331954956,0.04827586188912392,0.08965517580509186
+trm_baseline_best,12,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8489999771118164,0.9430555105209351,0.8460000157356262,0.8519999980926514,-3.6991007328033447,2.8621342182159424,0.988304078578949,0.9871345162391663,0.027586206793785095,0.04137931019067764
+trm_baseline_best,12,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8554999828338623,0.945648193359375,0.8479999899864197,0.8629999756813049,-3.6991007328033447,2.8621342182159424,0.9888888597488403,0.9859648942947388,0.06896551698446274,0.1034482792019844
+trm_baseline_best,12,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8500000238418579,0.9435864090919495,0.8460000157356262,0.8539999723434448,-3.6991007328033447,2.8621342182159424,0.988304078578949,0.9871345162391663,0.03448275849223137,0.05517241358757019
diff --git a/directional_lyap_perturb/trm_multi4_best_step35805_n1000_c8_dirlyap.summary.csv b/directional_lyap_perturb/trm_multi4_best_step35805_n1000_c8_dirlyap.summary.csv
new file mode 100644
index 0000000..34bb554
--- /dev/null
+++ b/directional_lyap_perturb/trm_multi4_best_step35805_n1000_c8_dirlyap.summary.csv
@@ -0,0 +1,25 @@
+label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail
+trm_multi4_best,0,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8930000066757202,0.9589999914169312,0.8930000066757202,0.8930000066757202,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0
+trm_multi4_best,0,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8930000066757202,0.9589999914169312,0.8930000066757202,0.8930000066757202,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0
+trm_multi4_best,0,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8970000147819519,0.960277795791626,0.8840000033378601,0.9100000262260437,-3.885612726211548,-3.8856122493743896,0.9938409924507141,0.9876819849014282,0.08878504484891891,0.15887850522994995
+trm_multi4_best,0,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8884999752044678,0.9585925340652466,0.8600000143051147,0.9169999957084656,-3.885612726211548,-3.8856122493743896,0.9591265320777893,0.9406495094299316,0.29906541109085083,0.4112149477005005
+trm_multi4_best,0,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8865000009536743,0.9574320316314697,0.8519999980926514,0.9210000038146973,-3.885612726211548,-3.8856122493743896,0.9596864581108093,0.9372900128364563,0.2757009267807007,0.4112149477005005
+trm_multi4_best,0,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8989999890327454,0.9610432386398315,0.871999979019165,0.9259999990463257,-3.885612726211548,-3.8856122493743896,0.9664053916931152,0.9462485909461975,0.336448609828949,0.420560747385025
+trm_multi4_best,4,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8840000033378601,0.9556913375854492,0.8840000033378601,0.8840000033378601,-1.7770261764526367,0.8977539539337158,1.0,1.0,0.0,0.0
+trm_multi4_best,4,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.887499988079071,0.9567221403121948,0.8769999742507935,0.8980000019073486,-1.7770261764526367,0.8977539539337158,0.9954751133918762,0.9909502267837524,0.06465516984462738,0.12068965286016464
+trm_multi4_best,4,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8884999752044678,0.9570987820625305,0.8659999966621399,0.9110000133514404,-1.7770261764526367,0.8977539539337158,0.9796379804611206,0.9683257937431335,0.19396552443504333,0.3017241358757019
+trm_multi4_best,4,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8865000009536743,0.9563456773757935,0.8610000014305115,0.9120000004768372,-1.7770261764526367,0.8977539539337158,0.9717194437980652,0.959276020526886,0.23706896603107452,0.36206895112991333
+trm_multi4_best,4,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8899999856948853,0.9573581218719482,0.8659999966621399,0.9139999747276306,-1.7770261764526367,0.8977539539337158,0.9739819169044495,0.959276020526886,0.25,0.3448275923728943
+trm_multi4_best,4,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8960000276565552,0.9607962369918823,0.8700000047683716,0.921999990940094,-1.7770261764526367,0.8977539539337158,0.9739819169044495,0.9615384340286255,0.3017241358757019,0.43103447556495667
+trm_multi4_best,8,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8889999985694885,0.9573332667350769,0.8889999985694885,0.8889999985694885,-2.587038040161133,1.1594672203063965,1.0,1.0,0.0,0.0
+trm_multi4_best,8,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8920000195503235,0.9578518271446228,0.8870000243186951,0.8970000147819519,-2.587038040161133,1.1594672203063965,0.9977502822875977,0.9955005645751953,0.045045044273138046,0.07207207381725311
+trm_multi4_best,8,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8880000114440918,0.9568086266517639,0.8709999918937683,0.9049999713897705,-2.587038040161133,1.1594672203063965,0.9842519760131836,0.9752531051635742,0.11711711436510086,0.19819819927215576
+trm_multi4_best,8,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8889999985694885,0.9574383497238159,0.8730000257492065,0.9049999713897705,-2.587038040161133,1.1594672203063965,0.9848144054412842,0.9775028228759766,0.12162162363529205,0.20720720291137695
+trm_multi4_best,8,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8895000219345093,0.957660436630249,0.871999979019165,0.9070000052452087,-2.587038040161133,1.1594672203063965,0.9859392642974854,0.9797525405883789,0.11711711436510086,0.22522522509098053
+trm_multi4_best,8,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8914999961853027,0.9589135646820068,0.8769999742507935,0.906000018119812,-2.587038040161133,1.1594672203063965,0.9848144054412842,0.9786276817321777,0.14414414763450623,0.22522522509098053
+trm_multi4_best,12,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8840000033378601,0.956098735332489,0.8840000033378601,0.8840000033378601,-4.764944553375244,2.045381546020508,1.0,1.0,0.0,0.0
+trm_multi4_best,12,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8849999904632568,0.9564691781997681,0.8820000290870667,0.8880000114440918,-4.764944553375244,2.045381546020508,0.9977375268936157,0.9966063499450684,0.0258620698004961,0.043103449046611786
+trm_multi4_best,12,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8855000138282776,0.9565123915672302,0.8809999823570251,0.8899999856948853,-4.764944553375244,2.045381546020508,0.9949095249176025,0.9943438768386841,0.0517241396009922,0.08620689809322357
+trm_multi4_best,12,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8855000138282776,0.9570556282997131,0.8799999952316284,0.890999972820282,-4.764944553375244,2.045381546020508,0.9954751133918762,0.9943438768386841,0.047413792461156845,0.08620689809322357
+trm_multi4_best,12,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8849999904632568,0.9560925960540771,0.8809999823570251,0.8889999985694885,-4.764944553375244,2.045381546020508,0.9949095249176025,0.9943438768386841,0.047413792461156845,0.07758620381355286
+trm_multi4_best,12,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8849999904632568,0.9559259414672852,0.8799999952316284,0.8899999856948853,-4.764944553375244,2.045381546020508,0.9943438768386841,0.9932126402854919,0.0517241396009922,0.08620689809322357
diff --git a/directional_lyap_perturb/trm_multi4_final_step65100_n1000_c8_dirlyap.summary.csv b/directional_lyap_perturb/trm_multi4_final_step65100_n1000_c8_dirlyap.summary.csv
new file mode 100644
index 0000000..9df33ba
--- /dev/null
+++ b/directional_lyap_perturb/trm_multi4_final_step65100_n1000_c8_dirlyap.summary.csv
@@ -0,0 +1,25 @@
+label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail
+trm_multi4_final,0,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8299999833106995,0.9333704113960266,0.8299999833106995,0.8299999833106995,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0
+trm_multi4_final,0,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8299999833106995,0.9333704113960266,0.8299999833106995,0.8299999833106995,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0
+trm_multi4_final,0,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8289999961853027,0.9334814548492432,0.8199999928474426,0.8379999995231628,-3.885612726211548,-3.8856122493743896,0.9927710890769958,0.9855421781539917,0.029411764815449715,0.0470588244497776
+trm_multi4_final,0,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8255000114440918,0.9313827157020569,0.7990000247955322,0.8519999980926514,-3.885612726211548,-3.8856122493743896,0.9572288990020752,0.9385542273521423,0.1823529452085495,0.24705882370471954
+trm_multi4_final,0,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8295000195503235,0.9325369596481323,0.7960000038146973,0.8629999756813049,-3.885612726211548,-3.8856122493743896,0.9590361714363098,0.9385542273521423,0.19705882668495178,0.29411765933036804
+trm_multi4_final,0,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8309999704360962,0.933006227016449,0.7990000247955322,0.8629999756813049,-3.885612726211548,-3.8856122493743896,0.9554216861724854,0.9325301051139832,0.2235294133424759,0.30000001192092896
+trm_multi4_final,4,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8370000123977661,0.9353950619697571,0.8370000123977661,0.8370000123977661,-1.9923189878463745,0.9806549549102783,1.0,1.0,0.0,0.0
+trm_multi4_final,4,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8355000019073486,0.9351357817649841,0.8240000009536743,0.847000002861023,-1.9923189878463745,0.9806549549102783,0.9910394549369812,0.9820788502693176,0.03680981695652008,0.061349693685770035
+trm_multi4_final,4,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8355000019073486,0.9351975321769714,0.8130000233650208,0.8579999804496765,-1.9923189878463745,0.9806549549102783,0.9731183052062988,0.9617682099342346,0.12883435189723969,0.20858895778656006
+trm_multi4_final,4,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8299999833106995,0.9334691166877747,0.8059999942779541,0.8539999723434448,-1.9923189878463745,0.9806549549102783,0.965352475643158,0.9498208165168762,0.1349693238735199,0.20245398581027985
+trm_multi4_final,4,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8309999704360962,0.9334567785263062,0.8069999814033508,0.8550000190734863,-1.9923189878463745,0.9806549549102783,0.9677419066429138,0.9534050226211548,0.12883435189723969,0.20245398581027985
+trm_multi4_final,4,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8330000042915344,0.933746874332428,0.8069999814033508,0.859000027179718,-1.9923189878463745,0.9806549549102783,0.9701313972473145,0.9569892287254333,0.12883435189723969,0.22085890173912048
+trm_multi4_final,8,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8299999833106995,0.9329506754875183,0.8299999833106995,0.8299999833106995,-2.597419500350952,1.466092586517334,1.0,1.0,0.0,0.0
+trm_multi4_final,8,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8299999833106995,0.9333766102790833,0.8220000267028809,0.8379999995231628,-2.597419500350952,1.466092586517334,0.9951807260513306,0.9903614521026611,0.0235294122248888,0.0470588244497776
+trm_multi4_final,8,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.828000009059906,0.9320307374000549,0.8130000233650208,0.8429999947547913,-2.597419500350952,1.466092586517334,0.9849397540092468,0.9771084189414978,0.061764705926179886,0.11176470667123795
+trm_multi4_final,8,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8314999938011169,0.9331419467926025,0.8190000057220459,0.843999981880188,-2.597419500350952,1.466092586517334,0.9849397540092468,0.9819276928901672,0.08235294371843338,0.1411764770746231
+trm_multi4_final,8,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8309999704360962,0.9327530860900879,0.8169999718666077,0.8450000286102295,-2.597419500350952,1.466092586517334,0.9819276928901672,0.9771084189414978,0.0941176488995552,0.15294118225574493
+trm_multi4_final,8,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8335000276565552,0.9352839589118958,0.8180000185966492,0.8489999771118164,-2.597419500350952,1.466092586517334,0.9855421781539917,0.9819276928901672,0.09117647260427475,0.16470588743686676
+trm_multi4_final,12,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8339999914169312,0.9340246319770813,0.8339999914169312,0.8339999914169312,-5.133573055267334,2.7817435264587402,1.0,1.0,0.0,0.0
+trm_multi4_final,12,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8335000276565552,0.9341234564781189,0.8289999961853027,0.8379999995231628,-5.133573055267334,2.7817435264587402,0.9970024228096008,0.9940047860145569,0.012048192322254181,0.024096384644508362
+trm_multi4_final,12,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8345000147819519,0.9352160692214966,0.8289999961853027,0.8399999737739563,-5.133573055267334,2.7817435264587402,0.9922062158584595,0.9892086386680603,0.04216867312788963,0.0602409653365612
+trm_multi4_final,12,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8320000171661377,0.9335123300552368,0.824999988079071,0.8389999866485596,-5.133573055267334,2.7817435264587402,0.9892086386680603,0.986810564994812,0.04216867312788963,0.07228915393352509
+trm_multi4_final,12,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8299999833106995,0.9333456754684448,0.8240000009536743,0.8360000252723694,-5.133573055267334,2.7817435264587402,0.9880095720291138,0.9856114983558655,0.03614457696676254,0.0602409653365612
+trm_multi4_final,12,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8309999704360962,0.9343765377998352,0.824999988079071,0.8370000123977661,-5.133573055267334,2.7817435264587402,0.9898081421852112,0.9880095720291138,0.03313253074884415,0.0602409653365612
diff --git a/directional_lyap_perturb/watch_and_plot.sh b/directional_lyap_perturb/watch_and_plot.sh
new file mode 100644
index 0000000..683d1b8
--- /dev/null
+++ b/directional_lyap_perturb/watch_and_plot.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT=/home/yurenh2/rrm
+PY=/home/yurenh2/miniconda3/envs/rrm/bin/python
+cd "${ROOT}"
+
+PIDS=(
+ research/flossing/directional_lyap_perturb/logs/trm_baseline_best_step58590_n1000_c8_dirlyap.pid
+ research/flossing/directional_lyap_perturb/logs/trm_multi4_best_step35805_n1000_c8_dirlyap.pid
+ research/flossing/directional_lyap_perturb/logs/trm_multi4_final_step65100_n1000_c8_dirlyap.pid
+)
+
+for pf in "${PIDS[@]}"; do
+ pid=$(cat "${pf}")
+ echo "watch ${pf}: ${pid}"
+ while kill -0 "${pid}" 2>/dev/null; do
+ sleep 60
+ done
+ echo "done ${pf}: ${pid}"
+done
+
+"${PY}" research/flossing/plot_directional_lyap_perturb.py \
+ --summaries \
+ research/flossing/directional_lyap_perturb/trm_baseline_best_step58590_n1000_c8_dirlyap.summary.csv \
+ research/flossing/directional_lyap_perturb/trm_multi4_best_step35805_n1000_c8_dirlyap.summary.csv \
+ research/flossing/directional_lyap_perturb/trm_multi4_final_step65100_n1000_c8_dirlyap.summary.csv \
+ --out-dir research/flossing/directional_lyap_perturb/plots \
+ --slice-sigma 0.03
+
+nvidia-smi --query-gpu=index,memory.used,memory.total,utilization.gpu --format=csv,noheader,nounits \
+ > research/flossing/directional_lyap_perturb/plots/final_gpu_status.txt
diff --git a/directional_lyap_perturb_eps_sweep/plots/directional_lyap_perturb_combined.csv b/directional_lyap_perturb_eps_sweep/plots/directional_lyap_perturb_combined.csv
new file mode 100644
index 0000000..9aacd7a
--- /dev/null
+++ b/directional_lyap_perturb_eps_sweep/plots/directional_lyap_perturb_combined.csv
@@ -0,0 +1,97 @@
+label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail
+trm_baseline_best,0,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8949999809265137,0.961814820766449,0.8949999809265137,0.8949999809265137,0.5487902164459229,0.6468369364738464,1.0,1.0,0.0,0.0
+trm_baseline_best,0,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8949999809265137,0.961814820766449,0.8949999809265137,0.8949999809265137,0.5487902164459229,0.6468369364738464,1.0,1.0,0.0,0.0
+trm_baseline_best,0,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8934999704360962,0.961506187915802,0.8790000081062317,0.9079999923706055,0.5487902164459229,0.6468369364738464,0.9905027747154236,0.9821228981018066,0.06666667014360428,0.13333334028720856
+trm_baseline_best,0,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.890500009059906,0.9606789350509644,0.8539999723434448,0.9269999861717224,0.5487902164459229,0.6468369364738464,0.9631285071372986,0.9396647810935974,0.27142858505249023,0.41904762387275696
+trm_baseline_best,0,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8859999775886536,0.9576419591903687,0.8610000014305115,0.9110000133514404,0.5487902164459229,0.6468369364738464,0.9642457962036133,0.9486033320426941,0.21904762089252472,0.32380953431129456
+trm_baseline_best,0,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.890500009059906,0.9597654342651367,0.8579999804496765,0.9229999780654907,0.5487902164459229,0.6468369364738464,0.9653631448745728,0.9474860429763794,0.25238096714019775,0.4095238149166107
+trm_baseline_best,4,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8980000019073486,0.9629135727882385,0.8980000019073486,0.8980000019073486,0.6417636275291443,0.8611953854560852,1.0,1.0,0.0,0.0
+trm_baseline_best,4,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8974999785423279,0.9625309109687805,0.8920000195503235,0.902999997138977,0.6417636275291443,0.8611953854560852,0.9966592192649841,0.993318498134613,0.02450980432331562,0.04901960864663124
+trm_baseline_best,4,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8934999704360962,0.9609135389328003,0.8659999966621399,0.9210000038146973,0.6417636275291443,0.8611953854560852,0.9760578870773315,0.9587973356246948,0.1666666716337204,0.28431373834609985
+trm_baseline_best,4,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8859999775886536,0.9590432047843933,0.8629999756813049,0.9089999794960022,0.6417636275291443,0.8611953854560852,0.9660356640815735,0.948775053024292,0.18137255311012268,0.2549019753932953
+trm_baseline_best,4,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8840000033378601,0.957679033279419,0.8560000061988831,0.9120000004768372,0.6417636275291443,0.8611953854560852,0.9649220705032349,0.9476614594459534,0.1715686321258545,0.29411765933036804
+trm_baseline_best,4,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8889999985694885,0.9591296315193176,0.8629999756813049,0.9150000214576721,0.6417636275291443,0.8611953854560852,0.9671491980552673,0.9521158337593079,0.20098039507865906,0.3235294222831726
+trm_baseline_best,8,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8970000147819519,0.9637901186943054,0.8970000147819519,0.8970000147819519,0.8348738551139832,1.2223771810531616,1.0,1.0,0.0,0.0
+trm_baseline_best,8,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8970000147819519,0.9635308980941772,0.8939999938011169,0.8999999761581421,0.8348738551139832,1.2223771810531616,0.997770369052887,0.9966555237770081,0.019417475908994675,0.03883495181798935
+trm_baseline_best,8,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8964999914169312,0.9620987772941589,0.8769999742507935,0.9160000085830688,0.8348738551139832,1.2223771810531616,0.9832776188850403,0.9732441306114197,0.14077669382095337,0.24271844327449799
+trm_baseline_best,8,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8945000171661377,0.9619569182395935,0.8790000081062317,0.9100000262260437,0.8348738551139832,1.2223771810531616,0.9777034521102905,0.9732441306114197,0.16990290582180023,0.28155338764190674
+trm_baseline_best,8,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8964999914169312,0.9622469544410706,0.8809999823570251,0.9120000004768372,0.8348738551139832,1.2223771810531616,0.9821627736091614,0.9754738211631775,0.15048544108867645,0.24271844327449799
+trm_baseline_best,8,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.890500009059906,0.9603580236434937,0.875,0.906000018119812,0.8348738551139832,1.2223771810531616,0.9771460294723511,0.9676700234413147,0.13592232763767242,0.20388349890708923
+trm_baseline_best,12,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.9613826274871826,0.8930000066757202,0.8930000066757202,1.5429991483688354,2.3959367275238037,1.0,1.0,0.0,0.0
+trm_baseline_best,12,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8924999833106995,0.961481511592865,0.8920000195503235,0.8930000066757202,1.5429991483688354,2.3959367275238037,0.99944007396698,0.9988802075386047,0.0,0.0
+trm_baseline_best,12,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8914999961853027,0.9610123634338379,0.8859999775886536,0.8970000147819519,1.5429991483688354,2.3959367275238037,0.9938409924507141,0.9910414218902588,0.037383176386356354,0.06542056053876877
+trm_baseline_best,12,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8914999961853027,0.9606543183326721,0.8849999904632568,0.8980000019073486,1.5429991483688354,2.3959367275238037,0.9921612739562988,0.9899216294288635,0.05140186846256256,0.09345794469118118
+trm_baseline_best,12,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.9610803127288818,0.8849999904632568,0.9010000228881836,1.5429991483688354,2.3959367275238037,0.9927211403846741,0.9899216294288635,0.0607476644217968,0.11214952915906906
+trm_baseline_best,12,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.961530864238739,0.8859999775886536,0.8999999761581421,1.5429991483688354,2.3959367275238037,0.9938409924507141,0.9910414218902588,0.05140186846256256,0.09345794469118118
+trm_multi4_best,0,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8899999856948853,0.9582962989807129,0.8899999856948853,0.8899999856948853,0.5268718004226685,0.6456454992294312,1.0,1.0,0.0,0.0
+trm_multi4_best,0,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8899999856948853,0.9582962989807129,0.8899999856948853,0.8899999856948853,0.5268718004226685,0.6456454992294312,1.0,1.0,0.0,0.0
+trm_multi4_best,0,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8955000042915344,0.9605987668037415,0.8870000243186951,0.9039999842643738,0.5268718004226685,0.6456454992294312,0.9960674047470093,0.9932584166526794,0.08181817829608917,0.13636364042758942
+trm_multi4_best,0,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9075000286102295,0.9647963047027588,0.8820000290870667,0.9330000281333923,0.5268718004226685,0.6456454992294312,0.9820224642753601,0.9674157500267029,0.30454546213150024,0.41818180680274963
+trm_multi4_best,0,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8970000147819519,0.9618827104568481,0.8679999709129333,0.9259999990463257,0.5268718004226685,0.6456454992294312,0.9764044880867004,0.9606741666793823,0.2545454502105713,0.3909091055393219
+trm_multi4_best,0,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9089999794960022,0.9657654166221619,0.8790000081062317,0.9390000104904175,0.5268718004226685,0.6456454992294312,0.983146071434021,0.968539297580719,0.30909091234207153,0.4636363685131073
+trm_multi4_best,4,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.921999990940094,0.970037043094635,0.921999990940094,0.921999990940094,0.5925552845001221,0.8220179677009583,1.0,1.0,0.0,0.0
+trm_multi4_best,4,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9175000190734863,0.9687839150428772,0.9120000004768372,0.9229999780654907,0.5925552845001221,0.8220179677009583,0.9940347075462341,0.989154040813446,0.012820512987673283,0.025641025975346565
+trm_multi4_best,4,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9135000109672546,0.9665740132331848,0.8980000019073486,0.9290000200271606,0.5925552845001221,0.8220179677009583,0.9772234559059143,0.9674620628356934,0.16025641560554504,0.24358974397182465
+trm_multi4_best,4,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9110000133514404,0.9670863747596741,0.8920000195503235,0.9300000071525574,0.5925552845001221,0.8220179677009583,0.9701735377311707,0.9598698616027832,0.21153846383094788,0.3333333432674408
+trm_multi4_best,4,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9125000238418579,0.9668148159980774,0.8930000066757202,0.9319999814033508,0.5925552845001221,0.8220179677009583,0.9718004465103149,0.9566160440444946,0.21153846383094788,0.28205129504203796
+trm_multi4_best,4,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9150000214576721,0.967555582523346,0.8949999809265137,0.9350000023841858,0.5925552845001221,0.8220179677009583,0.9750542044639587,0.962039053440094,0.20512820780277252,0.3076923191547394
+trm_multi4_best,8,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9110000133514404,0.9664074182510376,0.9110000133514404,0.9110000133514404,0.8208635449409485,1.2134735584259033,1.0,1.0,0.0,0.0
+trm_multi4_best,8,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9089999794960022,0.9657962918281555,0.9079999923706055,0.9100000262260437,0.8208635449409485,1.2134735584259033,0.9972557425498962,0.9967069029808044,0.00561797758564353,0.01123595517128706
+trm_multi4_best,8,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9120000004768372,0.9669011831283569,0.8960000276565552,0.9279999732971191,0.8208635449409485,1.2134735584259033,0.9868276715278625,0.9791437983512878,0.14606741070747375,0.24719101190567017
+trm_multi4_best,8,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9120000004768372,0.966401219367981,0.8980000019073486,0.9259999990463257,0.8208635449409485,1.2134735584259033,0.9868276715278625,0.9780461192131042,0.14606741070747375,0.21348313987255096
+trm_multi4_best,8,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9120000004768372,0.9667222499847412,0.8960000276565552,0.9279999732971191,0.8208635449409485,1.2134735584259033,0.983534574508667,0.9769483804702759,0.17977528274059296,0.2921348214149475
+trm_multi4_best,8,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9129999876022339,0.9660493731498718,0.8999999761581421,0.9259999990463257,0.8208635449409485,1.2134735584259033,0.9862788319587708,0.9802414774894714,0.16292135417461395,0.24719101190567017
+trm_multi4_best,12,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9190000295639038,0.9690986275672913,0.9190000295639038,0.9190000295639038,1.5447064638137817,1.7272931337356567,1.0,1.0,0.0,0.0
+trm_multi4_best,12,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9175000190734863,0.9684197902679443,0.9150000214576721,0.9200000166893005,1.5447064638137817,1.7272931337356567,0.9972796440124512,0.9956474304199219,0.012345679104328156,0.02469135820865631
+trm_multi4_best,12,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9175000190734863,0.9685803055763245,0.9110000133514404,0.9240000247955322,1.5447064638137817,1.7272931337356567,0.9940152168273926,0.9902067184448242,0.04938271641731262,0.08641975373029709
+trm_multi4_best,12,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9154999852180481,0.9677098989486694,0.9110000133514404,0.9200000166893005,1.5447064638137817,1.7272931337356567,0.990750789642334,0.9891186356544495,0.06172839552164078,0.09876543283462524
+trm_multi4_best,12,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9175000190734863,0.9684074521064758,0.9110000133514404,0.9240000247955322,1.5447064638137817,1.7272931337356567,0.9912948608398438,0.9902067184448242,0.08024691045284271,0.14814814925193787
+trm_multi4_best,12,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9200000166893005,0.9692777991294861,0.9139999747276306,0.9259999990463257,1.5447064638137817,1.7272931337356567,0.992927074432373,0.9912948608398438,0.09259258955717087,0.14814814925193787
+trm_baseline_best,0,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8949999809265137,0.961814820766449,0.8949999809265137,0.8949999809265137,0.4821663796901703,0.5784756541252136,1.0,1.0,0.0,0.0
+trm_baseline_best,0,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8949999809265137,0.961814820766449,0.8949999809265137,0.8949999809265137,0.4821663796901703,0.5784756541252136,1.0,1.0,0.0,0.0
+trm_baseline_best,0,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8980000019073486,0.9630987644195557,0.8790000081062317,0.9169999957084656,0.4821663796901703,0.5784756541252136,0.9893854856491089,0.9798882603645325,0.1190476194024086,0.21904762089252472
+trm_baseline_best,0,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.887499988079071,0.9590184688568115,0.8600000143051147,0.9150000214576721,0.4821663796901703,0.5784756541252136,0.9653631448745728,0.9463686943054199,0.22380952537059784,0.32380953431129456
+trm_baseline_best,0,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8895000219345093,0.9602900743484497,0.8619999885559082,0.9169999957084656,0.4821663796901703,0.5784756541252136,0.9670391082763672,0.9508379697799683,0.22857142984867096,0.3523809611797333
+trm_baseline_best,0,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8930000066757202,0.961030900478363,0.8619999885559082,0.9240000247955322,0.4821663796901703,0.5784756541252136,0.9670391082763672,0.9474860429763794,0.261904776096344,0.39047619700431824
+trm_baseline_best,4,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8980000019073486,0.9629135727882385,0.8980000019073486,0.8980000019073486,0.5513403415679932,0.7693743705749512,1.0,1.0,0.0,0.0
+trm_baseline_best,4,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8955000042915344,0.9619382619857788,0.8899999856948853,0.9010000228881836,0.5513403415679932,0.7693743705749512,0.9955456852912903,0.9910913109779358,0.014705882407724857,0.029411764815449715
+trm_baseline_best,4,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8880000114440918,0.9597716331481934,0.8669999837875366,0.9089999794960022,0.5513403415679932,0.7693743705749512,0.9716035723686218,0.9599109292030334,0.15196079015731812,0.2549019753932953
+trm_baseline_best,4,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8934999704360962,0.9613086581230164,0.8700000047683716,0.9169999957084656,0.5513403415679932,0.7693743705749512,0.9738307595252991,0.9610245227813721,0.18627451360225677,0.30392158031463623
+trm_baseline_best,4,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.890999972820282,0.9601975083351135,0.8650000095367432,0.9169999957084656,0.5513403415679932,0.7693743705749512,0.9688196182250977,0.9510022401809692,0.20588235557079315,0.30392158031463623
+trm_baseline_best,4,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8880000114440918,0.9588024616241455,0.8640000224113464,0.9120000004768372,0.5513403415679932,0.7693743705749512,0.9699332118034363,0.955456554889679,0.1666666716337204,0.27450981736183167
+trm_baseline_best,8,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8970000147819519,0.9637901186943054,0.8970000147819519,0.8970000147819519,0.6981183886528015,1.082965612411499,1.0,1.0,0.0,0.0
+trm_baseline_best,8,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8974999785423279,0.963746964931488,0.8939999938011169,0.9010000228881836,0.6981183886528015,1.082965612411499,0.9983277320861816,0.9966555237770081,0.019417475908994675,0.03883495181798935
+trm_baseline_best,8,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8934999704360962,0.9613950848579407,0.8769999742507935,0.9100000262260437,0.6981183886528015,1.082965612411499,0.9793757200241089,0.9698996543884277,0.1456310749053955,0.223300963640213
+trm_baseline_best,8,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8914999961853027,0.9603024125099182,0.8759999871253967,0.9070000052452087,0.6981183886528015,1.082965612411499,0.9788182973861694,0.9710144996643066,0.13106796145439148,0.21359223127365112
+trm_baseline_best,8,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8985000252723694,0.9633148312568665,0.8820000290870667,0.9150000214576721,0.6981183886528015,1.082965612411499,0.9816053509712219,0.9743589758872986,0.17475728690624237,0.27184465527534485
+trm_baseline_best,8,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8920000195503235,0.9607345461845398,0.8790000081062317,0.9049999713897705,0.6981183886528015,1.082965612411499,0.97826087474823,0.9698996543884277,0.14077669382095337,0.19417475163936615
+trm_baseline_best,12,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.9613826274871826,0.8930000066757202,0.8930000066757202,1.2681450843811035,2.1185879707336426,1.0,1.0,0.0,0.0
+trm_baseline_best,12,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8914999961853027,0.9607530236244202,0.8899999856948853,0.8930000066757202,1.2681450843811035,2.1185879707336426,0.9983202815055847,0.9966405630111694,0.0,0.0
+trm_baseline_best,12,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.9613209962844849,0.8880000114440918,0.8980000019073486,1.2681450843811035,2.1185879707336426,0.9955207109451294,0.9921612739562988,0.037383176386356354,0.05607476457953453
+trm_baseline_best,12,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8899999856948853,0.9602900743484497,0.8849999904632568,0.8949999809265137,1.2681450843811035,2.1185879707336426,0.9904814958572388,0.9888017773628235,0.05140186846256256,0.08411215245723724
+trm_baseline_best,12,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.890999972820282,0.9605308175086975,0.8870000243186951,0.8949999809265137,1.2681450843811035,2.1185879707336426,0.9921612739562988,0.9910414218902588,0.04672897234559059,0.07476635277271271
+trm_baseline_best,12,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8895000219345093,0.9602345824241638,0.8840000033378601,0.8949999809265137,1.2681450843811035,2.1185879707336426,0.9921612739562988,0.9899216294288635,0.032710280269384384,0.06542056053876877
+trm_multi4_best,0,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8899999856948853,0.9582962989807129,0.8899999856948853,0.8899999856948853,0.4601479172706604,0.5775974988937378,1.0,1.0,0.0,0.0
+trm_multi4_best,0,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8899999856948853,0.9582962989807129,0.8899999856948853,0.8899999856948853,0.4601479172706604,0.5775974988937378,1.0,1.0,0.0,0.0
+trm_multi4_best,0,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8964999914169312,0.9609135985374451,0.8889999985694885,0.9039999842643738,0.4601479172706604,0.5775974988937378,0.9977527856826782,0.9955056309700012,0.07727272808551788,0.12727272510528564
+trm_multi4_best,0,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9139999747276306,0.9676605463027954,0.8849999904632568,0.9430000185966492,0.4601479172706604,0.5775974988937378,0.9853932857513428,0.9719101190567017,0.33636364340782166,0.4909090995788574
+trm_multi4_best,0,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9104999899864197,0.9665431380271912,0.8880000114440918,0.9330000281333923,0.4601479172706604,0.5775974988937378,0.9876404404640198,0.9786517024040222,0.2863636314868927,0.41818180680274963
+trm_multi4_best,0,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9035000205039978,0.9636295437812805,0.8709999918937683,0.9359999895095825,0.4601479172706604,0.5775974988937378,0.9764044880867004,0.9606741666793823,0.3136363625526428,0.48181816935539246
+trm_multi4_best,4,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.921999990940094,0.970037043094635,0.921999990940094,0.921999990940094,0.5034276247024536,0.7669552564620972,1.0,1.0,0.0,0.0
+trm_multi4_best,4,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9204999804496765,0.9694691300392151,0.9169999957084656,0.9240000247955322,0.5034276247024536,0.7669552564620972,0.9972885251045227,0.9945769906044006,0.012820512987673283,0.025641025975346565
+trm_multi4_best,4,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9139999747276306,0.9672160148620605,0.8999999761581421,0.9279999732971191,0.5034276247024536,0.7669552564620972,0.9783080220222473,0.9674620628356934,0.1538461595773697,0.20512820780277252
+trm_multi4_best,4,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9164999723434448,0.9680370092391968,0.8980000019073486,0.9350000023841858,0.5034276247024536,0.7669552564620972,0.9761388301849365,0.9652928709983826,0.21153846383094788,0.3205128312110901
+trm_multi4_best,4,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9075000286102295,0.9649505615234375,0.8899999856948853,0.925000011920929,0.5034276247024536,0.7669552564620972,0.9690889120101929,0.9566160440444946,0.1794871836900711,0.25641027092933655
+trm_multi4_best,4,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9139999747276306,0.9669753313064575,0.8939999938011169,0.9340000152587891,0.5034276247024536,0.7669552564620972,0.97559654712677,0.9642082452774048,0.18589743971824646,0.3076923191547394
+trm_multi4_best,8,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9110000133514404,0.9664074182510376,0.9110000133514404,0.9110000133514404,0.6825441718101501,1.0761522054672241,1.0,1.0,0.0,0.0
+trm_multi4_best,8,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9125000238418579,0.9669321179389954,0.9089999794960022,0.9160000085830688,0.6825441718101501,1.0761522054672241,0.9989023208618164,0.997804582118988,0.028089888393878937,0.056179776787757874
+trm_multi4_best,8,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9139999747276306,0.9670432806015015,0.8980000019073486,0.9300000071525574,0.6825441718101501,1.0761522054672241,0.9890230298042297,0.983534574508667,0.14606741070747375,0.26966291666030884
+trm_multi4_best,8,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9160000085830688,0.9678518772125244,0.9020000100135803,0.9300000071525574,0.6825441718101501,1.0761522054672241,0.9879253506660461,0.983534574508667,0.17977528274059296,0.2921348214149475
+trm_multi4_best,8,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9125000238418579,0.9668642282485962,0.9020000100135803,0.9229999780654907,0.6825441718101501,1.0761522054672241,0.9879253506660461,0.9824368953704834,0.1404494345188141,0.20224718749523163
+trm_multi4_best,8,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9179999828338623,0.9684135317802429,0.9049999713897705,0.9309999942779541,0.6825441718101501,1.0761522054672241,0.9884741902351379,0.983534574508667,0.19662921130657196,0.2921348214149475
+trm_multi4_best,12,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9190000295639038,0.9690986275672913,0.9190000295639038,0.9190000295639038,1.2695486545562744,1.4881229400634766,1.0,1.0,0.0,0.0
+trm_multi4_best,12,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9175000190734863,0.9683518409729004,0.9160000085830688,0.9190000295639038,1.2695486545562744,1.4881229400634766,0.9978237152099609,0.9967355728149414,0.006172839552164078,0.012345679104328156
+trm_multi4_best,12,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9164999723434448,0.9678456783294678,0.9100000262260437,0.9229999780654907,1.2695486545562744,1.4881229400634766,0.992927074432373,0.9891186356544495,0.04938271641731262,0.08641975373029709
+trm_multi4_best,12,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9190000295639038,0.9691110849380493,0.9120000004768372,0.9259999990463257,1.2695486545562744,1.4881229400634766,0.9940152168273926,0.9912948608398438,0.06790123134851456,0.12345679104328156
+trm_multi4_best,12,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9164999723434448,0.9683271646499634,0.9120000004768372,0.9210000038146973,1.2695486545562744,1.4881229400634766,0.992927074432373,0.9912948608398438,0.04938271641731262,0.08641975373029709
+trm_multi4_best,12,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9200000166893005,0.9692345857620239,0.9139999747276306,0.9259999990463257,1.2695486545562744,1.4881229400634766,0.9934711456298828,0.9923830032348633,0.08641975373029709,0.14814814925193787
diff --git a/directional_lyap_perturb_eps_sweep/plots/final_gpu_status.txt b/directional_lyap_perturb_eps_sweep/plots/final_gpu_status.txt
new file mode 100644
index 0000000..4ddfa59
--- /dev/null
+++ b/directional_lyap_perturb_eps_sweep/plots/final_gpu_status.txt
@@ -0,0 +1,4 @@
+0, 13, 49140, 0
+1, 618, 49140, 29
+2, 45494, 49140, 0
+3, 13, 49140, 0
diff --git a/directional_lyap_perturb_eps_sweep/trm_baseline_best_step58590_n1000_c8_fdeps001.summary.csv b/directional_lyap_perturb_eps_sweep/trm_baseline_best_step58590_n1000_c8_fdeps001.summary.csv
new file mode 100644
index 0000000..fe53bc3
--- /dev/null
+++ b/directional_lyap_perturb_eps_sweep/trm_baseline_best_step58590_n1000_c8_fdeps001.summary.csv
@@ -0,0 +1,25 @@
+label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail
+trm_baseline_best,0,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8949999809265137,0.961814820766449,0.8949999809265137,0.8949999809265137,0.5487902164459229,0.6468369364738464,1.0,1.0,0.0,0.0
+trm_baseline_best,0,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8949999809265137,0.961814820766449,0.8949999809265137,0.8949999809265137,0.5487902164459229,0.6468369364738464,1.0,1.0,0.0,0.0
+trm_baseline_best,0,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8934999704360962,0.961506187915802,0.8790000081062317,0.9079999923706055,0.5487902164459229,0.6468369364738464,0.9905027747154236,0.9821228981018066,0.06666667014360428,0.13333334028720856
+trm_baseline_best,0,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.890500009059906,0.9606789350509644,0.8539999723434448,0.9269999861717224,0.5487902164459229,0.6468369364738464,0.9631285071372986,0.9396647810935974,0.27142858505249023,0.41904762387275696
+trm_baseline_best,0,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8859999775886536,0.9576419591903687,0.8610000014305115,0.9110000133514404,0.5487902164459229,0.6468369364738464,0.9642457962036133,0.9486033320426941,0.21904762089252472,0.32380953431129456
+trm_baseline_best,0,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.890500009059906,0.9597654342651367,0.8579999804496765,0.9229999780654907,0.5487902164459229,0.6468369364738464,0.9653631448745728,0.9474860429763794,0.25238096714019775,0.4095238149166107
+trm_baseline_best,4,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8980000019073486,0.9629135727882385,0.8980000019073486,0.8980000019073486,0.6417636275291443,0.8611953854560852,1.0,1.0,0.0,0.0
+trm_baseline_best,4,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8974999785423279,0.9625309109687805,0.8920000195503235,0.902999997138977,0.6417636275291443,0.8611953854560852,0.9966592192649841,0.993318498134613,0.02450980432331562,0.04901960864663124
+trm_baseline_best,4,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8934999704360962,0.9609135389328003,0.8659999966621399,0.9210000038146973,0.6417636275291443,0.8611953854560852,0.9760578870773315,0.9587973356246948,0.1666666716337204,0.28431373834609985
+trm_baseline_best,4,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8859999775886536,0.9590432047843933,0.8629999756813049,0.9089999794960022,0.6417636275291443,0.8611953854560852,0.9660356640815735,0.948775053024292,0.18137255311012268,0.2549019753932953
+trm_baseline_best,4,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8840000033378601,0.957679033279419,0.8560000061988831,0.9120000004768372,0.6417636275291443,0.8611953854560852,0.9649220705032349,0.9476614594459534,0.1715686321258545,0.29411765933036804
+trm_baseline_best,4,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8889999985694885,0.9591296315193176,0.8629999756813049,0.9150000214576721,0.6417636275291443,0.8611953854560852,0.9671491980552673,0.9521158337593079,0.20098039507865906,0.3235294222831726
+trm_baseline_best,8,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8970000147819519,0.9637901186943054,0.8970000147819519,0.8970000147819519,0.8348738551139832,1.2223771810531616,1.0,1.0,0.0,0.0
+trm_baseline_best,8,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8970000147819519,0.9635308980941772,0.8939999938011169,0.8999999761581421,0.8348738551139832,1.2223771810531616,0.997770369052887,0.9966555237770081,0.019417475908994675,0.03883495181798935
+trm_baseline_best,8,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8964999914169312,0.9620987772941589,0.8769999742507935,0.9160000085830688,0.8348738551139832,1.2223771810531616,0.9832776188850403,0.9732441306114197,0.14077669382095337,0.24271844327449799
+trm_baseline_best,8,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8945000171661377,0.9619569182395935,0.8790000081062317,0.9100000262260437,0.8348738551139832,1.2223771810531616,0.9777034521102905,0.9732441306114197,0.16990290582180023,0.28155338764190674
+trm_baseline_best,8,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8964999914169312,0.9622469544410706,0.8809999823570251,0.9120000004768372,0.8348738551139832,1.2223771810531616,0.9821627736091614,0.9754738211631775,0.15048544108867645,0.24271844327449799
+trm_baseline_best,8,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.890500009059906,0.9603580236434937,0.875,0.906000018119812,0.8348738551139832,1.2223771810531616,0.9771460294723511,0.9676700234413147,0.13592232763767242,0.20388349890708923
+trm_baseline_best,12,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.9613826274871826,0.8930000066757202,0.8930000066757202,1.5429991483688354,2.3959367275238037,1.0,1.0,0.0,0.0
+trm_baseline_best,12,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8924999833106995,0.961481511592865,0.8920000195503235,0.8930000066757202,1.5429991483688354,2.3959367275238037,0.99944007396698,0.9988802075386047,0.0,0.0
+trm_baseline_best,12,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8914999961853027,0.9610123634338379,0.8859999775886536,0.8970000147819519,1.5429991483688354,2.3959367275238037,0.9938409924507141,0.9910414218902588,0.037383176386356354,0.06542056053876877
+trm_baseline_best,12,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8914999961853027,0.9606543183326721,0.8849999904632568,0.8980000019073486,1.5429991483688354,2.3959367275238037,0.9921612739562988,0.9899216294288635,0.05140186846256256,0.09345794469118118
+trm_baseline_best,12,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.9610803127288818,0.8849999904632568,0.9010000228881836,1.5429991483688354,2.3959367275238037,0.9927211403846741,0.9899216294288635,0.0607476644217968,0.11214952915906906
+trm_baseline_best,12,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.961530864238739,0.8859999775886536,0.8999999761581421,1.5429991483688354,2.3959367275238037,0.9938409924507141,0.9910414218902588,0.05140186846256256,0.09345794469118118
diff --git a/directional_lyap_perturb_eps_sweep/trm_baseline_best_step58590_n1000_c8_fdeps003.summary.csv b/directional_lyap_perturb_eps_sweep/trm_baseline_best_step58590_n1000_c8_fdeps003.summary.csv
new file mode 100644
index 0000000..696a9e9
--- /dev/null
+++ b/directional_lyap_perturb_eps_sweep/trm_baseline_best_step58590_n1000_c8_fdeps003.summary.csv
@@ -0,0 +1,25 @@
+label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail
+trm_baseline_best,0,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8949999809265137,0.961814820766449,0.8949999809265137,0.8949999809265137,0.4821663796901703,0.5784756541252136,1.0,1.0,0.0,0.0
+trm_baseline_best,0,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8949999809265137,0.961814820766449,0.8949999809265137,0.8949999809265137,0.4821663796901703,0.5784756541252136,1.0,1.0,0.0,0.0
+trm_baseline_best,0,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8980000019073486,0.9630987644195557,0.8790000081062317,0.9169999957084656,0.4821663796901703,0.5784756541252136,0.9893854856491089,0.9798882603645325,0.1190476194024086,0.21904762089252472
+trm_baseline_best,0,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.887499988079071,0.9590184688568115,0.8600000143051147,0.9150000214576721,0.4821663796901703,0.5784756541252136,0.9653631448745728,0.9463686943054199,0.22380952537059784,0.32380953431129456
+trm_baseline_best,0,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8895000219345093,0.9602900743484497,0.8619999885559082,0.9169999957084656,0.4821663796901703,0.5784756541252136,0.9670391082763672,0.9508379697799683,0.22857142984867096,0.3523809611797333
+trm_baseline_best,0,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8930000066757202,0.961030900478363,0.8619999885559082,0.9240000247955322,0.4821663796901703,0.5784756541252136,0.9670391082763672,0.9474860429763794,0.261904776096344,0.39047619700431824
+trm_baseline_best,4,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8980000019073486,0.9629135727882385,0.8980000019073486,0.8980000019073486,0.5513403415679932,0.7693743705749512,1.0,1.0,0.0,0.0
+trm_baseline_best,4,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8955000042915344,0.9619382619857788,0.8899999856948853,0.9010000228881836,0.5513403415679932,0.7693743705749512,0.9955456852912903,0.9910913109779358,0.014705882407724857,0.029411764815449715
+trm_baseline_best,4,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8880000114440918,0.9597716331481934,0.8669999837875366,0.9089999794960022,0.5513403415679932,0.7693743705749512,0.9716035723686218,0.9599109292030334,0.15196079015731812,0.2549019753932953
+trm_baseline_best,4,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8934999704360962,0.9613086581230164,0.8700000047683716,0.9169999957084656,0.5513403415679932,0.7693743705749512,0.9738307595252991,0.9610245227813721,0.18627451360225677,0.30392158031463623
+trm_baseline_best,4,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.890999972820282,0.9601975083351135,0.8650000095367432,0.9169999957084656,0.5513403415679932,0.7693743705749512,0.9688196182250977,0.9510022401809692,0.20588235557079315,0.30392158031463623
+trm_baseline_best,4,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8880000114440918,0.9588024616241455,0.8640000224113464,0.9120000004768372,0.5513403415679932,0.7693743705749512,0.9699332118034363,0.955456554889679,0.1666666716337204,0.27450981736183167
+trm_baseline_best,8,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8970000147819519,0.9637901186943054,0.8970000147819519,0.8970000147819519,0.6981183886528015,1.082965612411499,1.0,1.0,0.0,0.0
+trm_baseline_best,8,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8974999785423279,0.963746964931488,0.8939999938011169,0.9010000228881836,0.6981183886528015,1.082965612411499,0.9983277320861816,0.9966555237770081,0.019417475908994675,0.03883495181798935
+trm_baseline_best,8,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8934999704360962,0.9613950848579407,0.8769999742507935,0.9100000262260437,0.6981183886528015,1.082965612411499,0.9793757200241089,0.9698996543884277,0.1456310749053955,0.223300963640213
+trm_baseline_best,8,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8914999961853027,0.9603024125099182,0.8759999871253967,0.9070000052452087,0.6981183886528015,1.082965612411499,0.9788182973861694,0.9710144996643066,0.13106796145439148,0.21359223127365112
+trm_baseline_best,8,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8985000252723694,0.9633148312568665,0.8820000290870667,0.9150000214576721,0.6981183886528015,1.082965612411499,0.9816053509712219,0.9743589758872986,0.17475728690624237,0.27184465527534485
+trm_baseline_best,8,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8920000195503235,0.9607345461845398,0.8790000081062317,0.9049999713897705,0.6981183886528015,1.082965612411499,0.97826087474823,0.9698996543884277,0.14077669382095337,0.19417475163936615
+trm_baseline_best,12,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.9613826274871826,0.8930000066757202,0.8930000066757202,1.2681450843811035,2.1185879707336426,1.0,1.0,0.0,0.0
+trm_baseline_best,12,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8914999961853027,0.9607530236244202,0.8899999856948853,0.8930000066757202,1.2681450843811035,2.1185879707336426,0.9983202815055847,0.9966405630111694,0.0,0.0
+trm_baseline_best,12,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.9613209962844849,0.8880000114440918,0.8980000019073486,1.2681450843811035,2.1185879707336426,0.9955207109451294,0.9921612739562988,0.037383176386356354,0.05607476457953453
+trm_baseline_best,12,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8899999856948853,0.9602900743484497,0.8849999904632568,0.8949999809265137,1.2681450843811035,2.1185879707336426,0.9904814958572388,0.9888017773628235,0.05140186846256256,0.08411215245723724
+trm_baseline_best,12,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.890999972820282,0.9605308175086975,0.8870000243186951,0.8949999809265137,1.2681450843811035,2.1185879707336426,0.9921612739562988,0.9910414218902588,0.04672897234559059,0.07476635277271271
+trm_baseline_best,12,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8895000219345093,0.9602345824241638,0.8840000033378601,0.8949999809265137,1.2681450843811035,2.1185879707336426,0.9921612739562988,0.9899216294288635,0.032710280269384384,0.06542056053876877
diff --git a/directional_lyap_perturb_eps_sweep/trm_multi4_best_step35805_n1000_c8_fdeps001.summary.csv b/directional_lyap_perturb_eps_sweep/trm_multi4_best_step35805_n1000_c8_fdeps001.summary.csv
new file mode 100644
index 0000000..3cd781e
--- /dev/null
+++ b/directional_lyap_perturb_eps_sweep/trm_multi4_best_step35805_n1000_c8_fdeps001.summary.csv
@@ -0,0 +1,25 @@
+label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail
+trm_multi4_best,0,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8899999856948853,0.9582962989807129,0.8899999856948853,0.8899999856948853,0.5268718004226685,0.6456454992294312,1.0,1.0,0.0,0.0
+trm_multi4_best,0,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8899999856948853,0.9582962989807129,0.8899999856948853,0.8899999856948853,0.5268718004226685,0.6456454992294312,1.0,1.0,0.0,0.0
+trm_multi4_best,0,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8955000042915344,0.9605987668037415,0.8870000243186951,0.9039999842643738,0.5268718004226685,0.6456454992294312,0.9960674047470093,0.9932584166526794,0.08181817829608917,0.13636364042758942
+trm_multi4_best,0,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9075000286102295,0.9647963047027588,0.8820000290870667,0.9330000281333923,0.5268718004226685,0.6456454992294312,0.9820224642753601,0.9674157500267029,0.30454546213150024,0.41818180680274963
+trm_multi4_best,0,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8970000147819519,0.9618827104568481,0.8679999709129333,0.9259999990463257,0.5268718004226685,0.6456454992294312,0.9764044880867004,0.9606741666793823,0.2545454502105713,0.3909091055393219
+trm_multi4_best,0,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9089999794960022,0.9657654166221619,0.8790000081062317,0.9390000104904175,0.5268718004226685,0.6456454992294312,0.983146071434021,0.968539297580719,0.30909091234207153,0.4636363685131073
+trm_multi4_best,4,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.921999990940094,0.970037043094635,0.921999990940094,0.921999990940094,0.5925552845001221,0.8220179677009583,1.0,1.0,0.0,0.0
+trm_multi4_best,4,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9175000190734863,0.9687839150428772,0.9120000004768372,0.9229999780654907,0.5925552845001221,0.8220179677009583,0.9940347075462341,0.989154040813446,0.012820512987673283,0.025641025975346565
+trm_multi4_best,4,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9135000109672546,0.9665740132331848,0.8980000019073486,0.9290000200271606,0.5925552845001221,0.8220179677009583,0.9772234559059143,0.9674620628356934,0.16025641560554504,0.24358974397182465
+trm_multi4_best,4,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9110000133514404,0.9670863747596741,0.8920000195503235,0.9300000071525574,0.5925552845001221,0.8220179677009583,0.9701735377311707,0.9598698616027832,0.21153846383094788,0.3333333432674408
+trm_multi4_best,4,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9125000238418579,0.9668148159980774,0.8930000066757202,0.9319999814033508,0.5925552845001221,0.8220179677009583,0.9718004465103149,0.9566160440444946,0.21153846383094788,0.28205129504203796
+trm_multi4_best,4,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9150000214576721,0.967555582523346,0.8949999809265137,0.9350000023841858,0.5925552845001221,0.8220179677009583,0.9750542044639587,0.962039053440094,0.20512820780277252,0.3076923191547394
+trm_multi4_best,8,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9110000133514404,0.9664074182510376,0.9110000133514404,0.9110000133514404,0.8208635449409485,1.2134735584259033,1.0,1.0,0.0,0.0
+trm_multi4_best,8,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9089999794960022,0.9657962918281555,0.9079999923706055,0.9100000262260437,0.8208635449409485,1.2134735584259033,0.9972557425498962,0.9967069029808044,0.00561797758564353,0.01123595517128706
+trm_multi4_best,8,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9120000004768372,0.9669011831283569,0.8960000276565552,0.9279999732971191,0.8208635449409485,1.2134735584259033,0.9868276715278625,0.9791437983512878,0.14606741070747375,0.24719101190567017
+trm_multi4_best,8,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9120000004768372,0.966401219367981,0.8980000019073486,0.9259999990463257,0.8208635449409485,1.2134735584259033,0.9868276715278625,0.9780461192131042,0.14606741070747375,0.21348313987255096
+trm_multi4_best,8,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9120000004768372,0.9667222499847412,0.8960000276565552,0.9279999732971191,0.8208635449409485,1.2134735584259033,0.983534574508667,0.9769483804702759,0.17977528274059296,0.2921348214149475
+trm_multi4_best,8,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9129999876022339,0.9660493731498718,0.8999999761581421,0.9259999990463257,0.8208635449409485,1.2134735584259033,0.9862788319587708,0.9802414774894714,0.16292135417461395,0.24719101190567017
+trm_multi4_best,12,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9190000295639038,0.9690986275672913,0.9190000295639038,0.9190000295639038,1.5447064638137817,1.7272931337356567,1.0,1.0,0.0,0.0
+trm_multi4_best,12,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9175000190734863,0.9684197902679443,0.9150000214576721,0.9200000166893005,1.5447064638137817,1.7272931337356567,0.9972796440124512,0.9956474304199219,0.012345679104328156,0.02469135820865631
+trm_multi4_best,12,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9175000190734863,0.9685803055763245,0.9110000133514404,0.9240000247955322,1.5447064638137817,1.7272931337356567,0.9940152168273926,0.9902067184448242,0.04938271641731262,0.08641975373029709
+trm_multi4_best,12,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9154999852180481,0.9677098989486694,0.9110000133514404,0.9200000166893005,1.5447064638137817,1.7272931337356567,0.990750789642334,0.9891186356544495,0.06172839552164078,0.09876543283462524
+trm_multi4_best,12,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9175000190734863,0.9684074521064758,0.9110000133514404,0.9240000247955322,1.5447064638137817,1.7272931337356567,0.9912948608398438,0.9902067184448242,0.08024691045284271,0.14814814925193787
+trm_multi4_best,12,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9200000166893005,0.9692777991294861,0.9139999747276306,0.9259999990463257,1.5447064638137817,1.7272931337356567,0.992927074432373,0.9912948608398438,0.09259258955717087,0.14814814925193787
diff --git a/directional_lyap_perturb_eps_sweep/trm_multi4_best_step35805_n1000_c8_fdeps003.summary.csv b/directional_lyap_perturb_eps_sweep/trm_multi4_best_step35805_n1000_c8_fdeps003.summary.csv
new file mode 100644
index 0000000..ee59fd9
--- /dev/null
+++ b/directional_lyap_perturb_eps_sweep/trm_multi4_best_step35805_n1000_c8_fdeps003.summary.csv
@@ -0,0 +1,25 @@
+label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail
+trm_multi4_best,0,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8899999856948853,0.9582962989807129,0.8899999856948853,0.8899999856948853,0.4601479172706604,0.5775974988937378,1.0,1.0,0.0,0.0
+trm_multi4_best,0,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8899999856948853,0.9582962989807129,0.8899999856948853,0.8899999856948853,0.4601479172706604,0.5775974988937378,1.0,1.0,0.0,0.0
+trm_multi4_best,0,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8964999914169312,0.9609135985374451,0.8889999985694885,0.9039999842643738,0.4601479172706604,0.5775974988937378,0.9977527856826782,0.9955056309700012,0.07727272808551788,0.12727272510528564
+trm_multi4_best,0,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9139999747276306,0.9676605463027954,0.8849999904632568,0.9430000185966492,0.4601479172706604,0.5775974988937378,0.9853932857513428,0.9719101190567017,0.33636364340782166,0.4909090995788574
+trm_multi4_best,0,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9104999899864197,0.9665431380271912,0.8880000114440918,0.9330000281333923,0.4601479172706604,0.5775974988937378,0.9876404404640198,0.9786517024040222,0.2863636314868927,0.41818180680274963
+trm_multi4_best,0,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9035000205039978,0.9636295437812805,0.8709999918937683,0.9359999895095825,0.4601479172706604,0.5775974988937378,0.9764044880867004,0.9606741666793823,0.3136363625526428,0.48181816935539246
+trm_multi4_best,4,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.921999990940094,0.970037043094635,0.921999990940094,0.921999990940094,0.5034276247024536,0.7669552564620972,1.0,1.0,0.0,0.0
+trm_multi4_best,4,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9204999804496765,0.9694691300392151,0.9169999957084656,0.9240000247955322,0.5034276247024536,0.7669552564620972,0.9972885251045227,0.9945769906044006,0.012820512987673283,0.025641025975346565
+trm_multi4_best,4,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9139999747276306,0.9672160148620605,0.8999999761581421,0.9279999732971191,0.5034276247024536,0.7669552564620972,0.9783080220222473,0.9674620628356934,0.1538461595773697,0.20512820780277252
+trm_multi4_best,4,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9164999723434448,0.9680370092391968,0.8980000019073486,0.9350000023841858,0.5034276247024536,0.7669552564620972,0.9761388301849365,0.9652928709983826,0.21153846383094788,0.3205128312110901
+trm_multi4_best,4,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9075000286102295,0.9649505615234375,0.8899999856948853,0.925000011920929,0.5034276247024536,0.7669552564620972,0.9690889120101929,0.9566160440444946,0.1794871836900711,0.25641027092933655
+trm_multi4_best,4,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9139999747276306,0.9669753313064575,0.8939999938011169,0.9340000152587891,0.5034276247024536,0.7669552564620972,0.97559654712677,0.9642082452774048,0.18589743971824646,0.3076923191547394
+trm_multi4_best,8,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9110000133514404,0.9664074182510376,0.9110000133514404,0.9110000133514404,0.6825441718101501,1.0761522054672241,1.0,1.0,0.0,0.0
+trm_multi4_best,8,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9125000238418579,0.9669321179389954,0.9089999794960022,0.9160000085830688,0.6825441718101501,1.0761522054672241,0.9989023208618164,0.997804582118988,0.028089888393878937,0.056179776787757874
+trm_multi4_best,8,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9139999747276306,0.9670432806015015,0.8980000019073486,0.9300000071525574,0.6825441718101501,1.0761522054672241,0.9890230298042297,0.983534574508667,0.14606741070747375,0.26966291666030884
+trm_multi4_best,8,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9160000085830688,0.9678518772125244,0.9020000100135803,0.9300000071525574,0.6825441718101501,1.0761522054672241,0.9879253506660461,0.983534574508667,0.17977528274059296,0.2921348214149475
+trm_multi4_best,8,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9125000238418579,0.9668642282485962,0.9020000100135803,0.9229999780654907,0.6825441718101501,1.0761522054672241,0.9879253506660461,0.9824368953704834,0.1404494345188141,0.20224718749523163
+trm_multi4_best,8,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9179999828338623,0.9684135317802429,0.9049999713897705,0.9309999942779541,0.6825441718101501,1.0761522054672241,0.9884741902351379,0.983534574508667,0.19662921130657196,0.2921348214149475
+trm_multi4_best,12,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9190000295639038,0.9690986275672913,0.9190000295639038,0.9190000295639038,1.2695486545562744,1.4881229400634766,1.0,1.0,0.0,0.0
+trm_multi4_best,12,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9175000190734863,0.9683518409729004,0.9160000085830688,0.9190000295639038,1.2695486545562744,1.4881229400634766,0.9978237152099609,0.9967355728149414,0.006172839552164078,0.012345679104328156
+trm_multi4_best,12,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9164999723434448,0.9678456783294678,0.9100000262260437,0.9229999780654907,1.2695486545562744,1.4881229400634766,0.992927074432373,0.9891186356544495,0.04938271641731262,0.08641975373029709
+trm_multi4_best,12,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9190000295639038,0.9691110849380493,0.9120000004768372,0.9259999990463257,1.2695486545562744,1.4881229400634766,0.9940152168273926,0.9912948608398438,0.06790123134851456,0.12345679104328156
+trm_multi4_best,12,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9164999723434448,0.9683271646499634,0.9120000004768372,0.9210000038146973,1.2695486545562744,1.4881229400634766,0.992927074432373,0.9912948608398438,0.04938271641731262,0.08641975373029709
+trm_multi4_best,12,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9200000166893005,0.9692345857620239,0.9139999747276306,0.9259999990463257,1.2695486545562744,1.4881229400634766,0.9934711456298828,0.9923830032348633,0.08641975373029709,0.14814814925193787
diff --git a/directional_lyap_perturb_eps_sweep/watch_queue_and_plot.sh b/directional_lyap_perturb_eps_sweep/watch_queue_and_plot.sh
new file mode 100755
index 0000000..4a44936
--- /dev/null
+++ b/directional_lyap_perturb_eps_sweep/watch_queue_and_plot.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT=/home/yurenh2/rrm
+PY=/home/yurenh2/miniconda3/envs/rrm/bin/python
+cd "${ROOT}"
+
+DIR=research/flossing/directional_lyap_perturb_eps_sweep
+BASE_CKPT="${ROOT}/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro"
+MULTI_CKPT="${ROOT}/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro"
+SIGMAS='0,0.001,0.003,0.01,0.03,0.1'
+AFTERS='0,4,8,12'
+N=1000
+CAND=8
+BS=16
+SEED=20260608
+
+wait_pid_file() {
+ local pf="$1"
+ local pid
+ pid=$(cat "${pf}")
+ echo "watch ${pf}: ${pid}"
+ while kill -0 "${pid}" 2>/dev/null; do
+ sleep 60
+ done
+ echo "done ${pf}: ${pid}"
+}
+
+launch_multi_eps003_on_gpu0() {
+ local out=trm_multi4_best_step35805_n1000_c8_fdeps003
+ local log="${DIR}/logs/${out}.log"
+ local pidf="${DIR}/logs/${out}.pid"
+ setsid bash -c "cd '${ROOT}' && export CUDA_VISIBLE_DEVICES='0' PYTHONUNBUFFERED=1 && exec '${PY}' research/flossing/directional_lyap_perturb_robustness.py --ckpt-root '${MULTI_CKPT}' --ckpt-name step_35805 --label trm_multi4_best --n-samples '${N}' --batch-size '${BS}' --candidates '${CAND}' --fd-eps 0.03 --sigmas '${SIGMAS}' --perturb-afters '${AFTERS}' --seed '${SEED}' --out-prefix '${DIR}/${out}'" \
+ > "${log}" 2>&1 < /dev/null &
+ echo $! > "${pidf}"
+ echo "launched ${out}: $(cat "${pidf}")"
+}
+
+wait_pid_file "${DIR}/logs/trm_baseline_best_step58590_n1000_c8_fdeps001.pid"
+launch_multi_eps003_on_gpu0
+wait_pid_file "${DIR}/logs/trm_multi4_best_step35805_n1000_c8_fdeps001.pid"
+wait_pid_file "${DIR}/logs/trm_baseline_best_step58590_n1000_c8_fdeps003.pid"
+wait_pid_file "${DIR}/logs/trm_multi4_best_step35805_n1000_c8_fdeps003.pid"
+
+"${PY}" research/flossing/plot_directional_lyap_perturb.py \
+ --summaries \
+ "${DIR}/trm_baseline_best_step58590_n1000_c8_fdeps001.summary.csv" \
+ "${DIR}/trm_multi4_best_step35805_n1000_c8_fdeps001.summary.csv" \
+ "${DIR}/trm_baseline_best_step58590_n1000_c8_fdeps003.summary.csv" \
+ "${DIR}/trm_multi4_best_step35805_n1000_c8_fdeps003.summary.csv" \
+ --out-dir "${DIR}/plots" \
+ --slice-sigma 0.03
+
+nvidia-smi --query-gpu=index,memory.used,memory.total,utilization.gpu --format=csv,noheader,nounits \
+ > "${DIR}/plots/final_gpu_status.txt"
diff --git a/directional_lyap_perturb_robustness.py b/directional_lyap_perturb_robustness.py
new file mode 100644
index 0000000..c0a803c
--- /dev/null
+++ b/directional_lyap_perturb_robustness.py
@@ -0,0 +1,428 @@
+"""Directional late-state perturbation using finite-difference Lyapunov search.
+
+At a chosen recurrent step, sample several unit tangent directions, propagate
+small shadow trajectories through the remaining deterministic dynamics, choose
+the direction with maximal final hidden-state expansion, then perturb along
+that selected direction with +/- sigma and measure answer robustness.
+
+This is a practical finite-difference proxy for perturbing along a local top
+Lyapunov direction without paying exact JVP costs.
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import math
+import sys
+from dataclasses import replace
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import torch
+import yaml
+
+
+TRM_DIR = Path("/home/yurenh2/rrm/trm")
+sys.path.insert(0, str(TRM_DIR))
+
+from models.recursive_reasoning.trm import ( # noqa: E402
+ TinyRecursiveReasoningModel_ACTV1,
+ TinyRecursiveReasoningModel_ACTV1InnerCarry,
+)
+
+
+IGNORE_LABEL_ID = -100
+
+
+def parse_float_list(text: str) -> list[float]:
+ return [float(x.strip()) for x in text.split(",") if x.strip()]
+
+
+def parse_int_list(text: str) -> list[int]:
+ return [int(x.strip()) for x in text.split(",") if x.strip()]
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ data_path = Path(cfg.get("data_path") or cfg["data_paths"][0])
+ train_meta = json.loads((data_path / "train" / "dataset.json").read_text())
+
+ arch_cfg = dict(cfg["arch"])
+ arch_cfg.update(
+ batch_size=cfg["global_batch_size"],
+ seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"],
+ causal=False,
+ )
+
+ model = TinyRecursiveReasoningModel_ACTV1(arch_cfg)
+ state = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in state.items()}
+ missing, unexpected = model.load_state_dict(stripped, strict=False)
+ print(f"[load] {ckpt_root.name}/{ckpt_name} missing={len(missing)} unexpected={len(unexpected)}", flush=True)
+ model.to(device).eval()
+ return model, cfg, data_path
+
+
+def load_test_samples(data_path: Path, n_samples: int, seed: int):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ puzzle_ids = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+
+ n = min(n_samples, len(inputs))
+ idx = rng.choice(len(inputs), size=n, replace=False)
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(puzzle_ids[idx].astype(np.int32)),
+ "idx": idx,
+ }
+
+
+def batch_slice(samples: dict[str, Any], start: int, end: int, device: str):
+ return {
+ k: v[start:end].to(device, non_blocking=True)
+ for k, v in samples.items()
+ if k in ("inputs", "labels", "puzzle_identifiers")
+ }
+
+
+def repeat_batch(batch: dict[str, torch.Tensor], repeats: int):
+ if repeats == 1:
+ return batch
+ return {k: v.repeat_interleave(repeats, dim=0) for k, v in batch.items()}
+
+
+def cat_batches(a: dict[str, torch.Tensor], b: dict[str, torch.Tensor]):
+ return {k: torch.cat([a[k], b[k]], dim=0) for k in a}
+
+
+def cat_inner(a: TinyRecursiveReasoningModel_ACTV1InnerCarry, b: TinyRecursiveReasoningModel_ACTV1InnerCarry):
+ return TinyRecursiveReasoningModel_ACTV1InnerCarry(
+ z_H=torch.cat([a.z_H, b.z_H], dim=0),
+ z_L=torch.cat([a.z_L, b.z_L], dim=0),
+ )
+
+
+def split_inner(inner: TinyRecursiveReasoningModel_ACTV1InnerCarry, n_main: int):
+ return (
+ TinyRecursiveReasoningModel_ACTV1InnerCarry(
+ z_H=inner.z_H[:n_main].contiguous(),
+ z_L=inner.z_L[:n_main].contiguous(),
+ ),
+ TinyRecursiveReasoningModel_ACTV1InnerCarry(
+ z_H=inner.z_H[n_main:].contiguous(),
+ z_L=inner.z_L[n_main:].contiguous(),
+ ),
+ )
+
+
+def correctness(logits: torch.Tensor, labels: torch.Tensor):
+ preds = logits.argmax(dim=-1)
+ mask = labels != IGNORE_LABEL_ID
+ exact = torch.where(mask, preds == labels, True).all(dim=-1)
+ denom = mask.sum(-1).clamp_min(1)
+ token_acc = ((preds == labels) & mask).sum(-1).float() / denom.float()
+ return exact, token_acc
+
+
+def rand_unit_dirs(
+ inner: TinyRecursiveReasoningModel_ACTV1InnerCarry,
+ candidates: int,
+ generator: torch.Generator,
+):
+ bsz = inner.z_H.shape[0]
+ h_dirs = torch.randn(
+ (bsz, candidates) + tuple(inner.z_H.shape[1:]),
+ device=inner.z_H.device,
+ dtype=torch.float32,
+ generator=generator,
+ )
+ l_dirs = torch.randn(
+ (bsz, candidates) + tuple(inner.z_L.shape[1:]),
+ device=inner.z_L.device,
+ dtype=torch.float32,
+ generator=generator,
+ )
+ norm = torch.sqrt(h_dirs.flatten(2).square().sum(-1) + l_dirs.flatten(2).square().sum(-1)).clamp_min(1e-30)
+ h_view = (bsz, candidates) + (1,) * (h_dirs.ndim - 2)
+ l_view = (bsz, candidates) + (1,) * (l_dirs.ndim - 2)
+ return (h_dirs / norm.view(h_view)).to(inner.z_H.dtype), (l_dirs / norm.view(l_view)).to(inner.z_L.dtype)
+
+
+def make_shadow_inner(
+ inner: TinyRecursiveReasoningModel_ACTV1InnerCarry,
+ h_dirs: torch.Tensor,
+ l_dirs: torch.Tensor,
+ eps: float,
+):
+ bsz, candidates = h_dirs.shape[:2]
+ z_h = inner.z_H[:, None] + eps * h_dirs
+ z_l = inner.z_L[:, None] + eps * l_dirs
+ return TinyRecursiveReasoningModel_ACTV1InnerCarry(
+ z_H=z_h.reshape((bsz * candidates,) + tuple(inner.z_H.shape[1:])).detach(),
+ z_L=z_l.reshape((bsz * candidates,) + tuple(inner.z_L.shape[1:])).detach(),
+ )
+
+
+def separation(
+ main: TinyRecursiveReasoningModel_ACTV1InnerCarry,
+ shadow: TinyRecursiveReasoningModel_ACTV1InnerCarry,
+ candidates: int,
+):
+ bsz = main.z_H.shape[0]
+ sh = shadow.z_H.reshape((bsz, candidates) + tuple(main.z_H.shape[1:])).float()
+ sl = shadow.z_L.reshape((bsz, candidates) + tuple(main.z_L.shape[1:])).float()
+ dh = (sh - main.z_H[:, None].float()).flatten(2)
+ dl = (sl - main.z_L[:, None].float()).flatten(2)
+ return torch.sqrt(dh.square().sum(-1) + dl.square().sum(-1)).clamp_min(1e-30)
+
+
+def gather_dirs(h_dirs: torch.Tensor, l_dirs: torch.Tensor, idx: torch.Tensor):
+ bsz = h_dirs.shape[0]
+ arange = torch.arange(bsz, device=h_dirs.device)
+ return h_dirs[arange, idx].contiguous(), l_dirs[arange, idx].contiguous()
+
+
+def perturb_inner(
+ inner: TinyRecursiveReasoningModel_ACTV1InnerCarry,
+ h_dir: torch.Tensor,
+ l_dir: torch.Tensor,
+ sigma: float,
+ sign: float,
+):
+ return replace(
+ inner,
+ z_H=inner.z_H + (sign * sigma) * h_dir.to(inner.z_H.dtype),
+ z_L=inner.z_L + (sign * sigma) * l_dir.to(inner.z_L.dtype),
+ )
+
+
+@torch.inference_mode()
+def warmup_inner(model, batch: dict[str, torch.Tensor], after: int):
+ bsz = batch["inputs"].shape[0]
+ with torch.device(batch["inputs"].device):
+ carry = model.initial_carry(batch)
+ reset = torch.ones(bsz, device=batch["inputs"].device, dtype=torch.bool)
+ inner = model.inner.reset_carry(reset, carry.inner_carry)
+ logits = None
+ for _ in range(after):
+ inner, logits, _q = model.inner(inner, batch)
+ return inner, logits
+
+
+@torch.inference_mode()
+def search_direction(
+ model,
+ warm: TinyRecursiveReasoningModel_ACTV1InnerCarry,
+ batch: dict[str, torch.Tensor],
+ after: int,
+ candidates: int,
+ fd_eps: float,
+ generator: torch.Generator,
+):
+ bsz = batch["inputs"].shape[0]
+ remaining = model.config.halt_max_steps - after
+ h_dirs, l_dirs = rand_unit_dirs(warm, candidates, generator)
+ main = TinyRecursiveReasoningModel_ACTV1InnerCarry(z_H=warm.z_H.detach(), z_L=warm.z_L.detach())
+ shadow = make_shadow_inner(main, h_dirs, l_dirs, fd_eps)
+ combined = cat_inner(main, shadow)
+ combined_batch = cat_batches(batch, repeat_batch(batch, candidates))
+
+ logits = None
+ for _ in range(remaining):
+ combined, logits, _q = model.inner(combined, combined_batch)
+ assert logits is not None
+ main_final, shadow_final = split_inner(combined, bsz)
+ main_logits = logits[:bsz]
+ sep = separation(main_final, shadow_final, candidates)
+ best_idx = sep.argmax(dim=1)
+ best_sep = sep.gather(1, best_idx[:, None]).squeeze(1)
+ best_h, best_l = gather_dirs(h_dirs, l_dirs, best_idx)
+ growth = torch.log(best_sep / fd_eps).float() / max(remaining, 1)
+ clean_exact, clean_token = correctness(main_logits, batch["labels"])
+ return best_h, best_l, growth, clean_exact, clean_token
+
+
+@torch.inference_mode()
+def eval_directional_sigma(
+ model,
+ warm: TinyRecursiveReasoningModel_ACTV1InnerCarry,
+ batch: dict[str, torch.Tensor],
+ after: int,
+ h_dir: torch.Tensor,
+ l_dir: torch.Tensor,
+ sigma: float,
+):
+ remaining = model.config.halt_max_steps - after
+ plus = perturb_inner(warm, h_dir, l_dir, sigma, +1.0)
+ minus = perturb_inner(warm, h_dir, l_dir, sigma, -1.0)
+ inner = cat_inner(plus, minus)
+ combined_batch = cat_batches(batch, batch)
+ logits = None
+ for _ in range(remaining):
+ inner, logits, _q = model.inner(inner, combined_batch)
+ assert logits is not None
+ exact, token = correctness(logits, combined_batch["labels"])
+ bsz = batch["inputs"].shape[0]
+ return exact.view(2, bsz).transpose(0, 1).contiguous(), token.view(2, bsz).transpose(0, 1).contiguous()
+
+
+def summarize(exact: torch.Tensor, token: torch.Tensor, clean_exact: torch.Tensor, growth: torch.Tensor):
+ clean_success = clean_exact.bool()
+ clean_fail = ~clean_success
+ both = exact.all(dim=1)
+ either = exact.any(dim=1)
+ out = {
+ "clean_acc": clean_success.float().mean().item(),
+ "mean_sign_exact": exact.float().mean().item(),
+ "mean_sign_token_acc": token.mean().item(),
+ "worst_sign_exact": both.float().mean().item(),
+ "best_sign_exact": either.float().mean().item(),
+ "selected_growth_mean": growth.mean().item(),
+ "selected_growth_q90": torch.quantile(growth, 0.90).item(),
+ }
+ if clean_success.any().item():
+ out["retain_mean_on_clean_success"] = exact[clean_success].float().mean().item()
+ out["retain_worst_on_clean_success"] = both[clean_success].float().mean().item()
+ else:
+ out["retain_mean_on_clean_success"] = float("nan")
+ out["retain_worst_on_clean_success"] = float("nan")
+ if clean_fail.any().item():
+ out["rescue_mean_on_clean_fail"] = exact[clean_fail].float().mean().item()
+ out["rescue_best_on_clean_fail"] = either[clean_fail].float().mean().item()
+ else:
+ out["rescue_mean_on_clean_fail"] = float("nan")
+ out["rescue_best_on_clean_fail"] = float("nan")
+ return out
+
+
+def write_summary(path: Path, rows: list[dict[str, Any]]) -> None:
+ keys = list(rows[0])
+ for row in rows[1:]:
+ for key in row:
+ if key not in keys:
+ keys.append(key)
+ with path.open("w", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=keys)
+ writer.writeheader()
+ writer.writerows(rows)
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--ckpt-root", required=True)
+ parser.add_argument("--ckpt-name", required=True)
+ parser.add_argument("--label", required=True)
+ parser.add_argument("--n-samples", type=int, default=1000)
+ parser.add_argument("--batch-size", type=int, default=16)
+ parser.add_argument("--candidates", type=int, default=8)
+ parser.add_argument("--fd-eps", type=float, default=1e-3)
+ parser.add_argument("--sigmas", default="0,0.001,0.003,0.01,0.03,0.1")
+ parser.add_argument("--perturb-afters", default="0,4,8,12")
+ parser.add_argument("--seed", type=int, default=20260607)
+ parser.add_argument("--out-prefix", required=True)
+ args = parser.parse_args()
+
+ device = "cuda"
+ sigmas = parse_float_list(args.sigmas)
+ if not any(abs(s) <= 1e-12 for s in sigmas):
+ sigmas = [0.0] + sigmas
+ afters = parse_int_list(args.perturb_afters)
+ torch.manual_seed(args.seed)
+ generator = torch.Generator(device=device).manual_seed(args.seed + 17)
+
+ model, cfg, data_path = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ samples = load_test_samples(data_path, args.n_samples, args.seed)
+ n = len(samples["inputs"])
+ print(
+ f"[run] label={args.label} n={n} batch={args.batch_size} candidates={args.candidates} "
+ f"afters={afters} sigmas={sigmas}",
+ flush=True,
+ )
+
+ rows: list[dict[str, Any]] = []
+ exact_store = []
+ token_store = []
+ growth_store = []
+ for after in afters:
+ sigma_exact = {sigma: [] for sigma in sigmas}
+ sigma_token = {sigma: [] for sigma in sigmas}
+ growth_parts = []
+ for start in range(0, n, args.batch_size):
+ end = min(start + args.batch_size, n)
+ batch = batch_slice(samples, start, end, device)
+ warm, _ = warmup_inner(model, batch, after)
+ h_dir, l_dir, growth, _search_clean_exact, _search_clean_token = search_direction(
+ model, warm, batch, after, args.candidates, args.fd_eps, generator
+ )
+ growth_parts.append(growth.cpu())
+ for sigma in sigmas:
+ exact, token = eval_directional_sigma(model, warm, batch, after, h_dir, l_dir, sigma)
+ sigma_exact[sigma].append(exact.cpu())
+ sigma_token[sigma].append(token.cpu())
+ if end == n or (end // args.batch_size) % 10 == 0:
+ print(f" after={after} [{end}/{n}]", flush=True)
+
+ growth_all = torch.cat(growth_parts, dim=0)
+ growth_store.append(growth_all.numpy())
+ exact_by_sigma = {sigma: torch.cat(sigma_exact[sigma], dim=0) for sigma in sigmas}
+ token_by_sigma = {sigma: torch.cat(sigma_token[sigma], dim=0) for sigma in sigmas}
+ clean_all = exact_by_sigma[0.0][:, 0].clone()
+ print(f" after={after} clean grouping done clean_acc={clean_all.float().mean().item():.4f}", flush=True)
+ for sigma in sigmas:
+ exact_all = exact_by_sigma[sigma]
+ token_all = token_by_sigma[sigma]
+ row: dict[str, Any] = {
+ "label": args.label,
+ "perturb_after": after,
+ "sigma": sigma,
+ "n_samples": n,
+ "candidates": args.candidates,
+ "fd_eps": args.fd_eps,
+ "ckpt_root": str(Path(args.ckpt_root)),
+ "ckpt_name": args.ckpt_name,
+ **summarize(exact_all, token_all, clean_all, growth_all),
+ }
+ rows.append(row)
+ exact_store.append(exact_all.numpy())
+ token_store.append(token_all.numpy())
+ print(
+ f" after={after} sigma={sigma:g} clean={row['clean_acc']:.4f} "
+ f"mean={row['mean_sign_exact']:.4f} worst={row['worst_sign_exact']:.4f} "
+ f"retain_worst={row['retain_worst_on_clean_success']:.4f} "
+ f"rescue_best={row['rescue_best_on_clean_fail']:.4f}",
+ flush=True,
+ )
+
+ out_prefix = Path(args.out_prefix)
+ out_prefix.parent.mkdir(parents=True, exist_ok=True)
+ write_summary(out_prefix.with_suffix(".summary.csv"), rows)
+ meta = {
+ "args": vars(args),
+ "data_path": str(data_path),
+ "config_global_batch_size": cfg.get("global_batch_size"),
+ "sigmas": sigmas,
+ "perturb_afters": afters,
+ "n_samples": n,
+ }
+ out_prefix.with_suffix(".meta.json").write_text(json.dumps(meta, indent=2, sort_keys=True))
+ np.savez_compressed(
+ out_prefix.with_suffix(".npz"),
+ idx=samples["idx"],
+ sigmas=np.asarray(sigmas, dtype=np.float32),
+ perturb_afters=np.asarray(afters, dtype=np.int32),
+ exact=np.stack(exact_store, axis=0),
+ token_acc=np.stack(token_store, axis=0),
+ selected_growth=np.stack(growth_store, axis=0),
+ meta_json=np.asarray(json.dumps(meta, sort_keys=True)),
+ )
+ print(f"[done] {out_prefix}.summary.csv", flush=True)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/directional_multi4_eval_n1000.csv b/directional_multi4_eval_n1000.csv
new file mode 100644
index 0000000..42dfad7
--- /dev/null
+++ b/directional_multi4_eval_n1000.csv
@@ -0,0 +1,6 @@
+step,n,seed,exact_acc,token_acc,elapsed_sec
+6510,1000,20260611,0.117,0.7025432098765432,39.42588257789612
+13020,1000,20260611,0.464,0.8220246913580247,37.25507998466492
+19530,1000,20260611,0.692,0.888753086419753,37.18044900894165
+26040,1000,20260611,0.719,0.8994074074074074,37.06992506980896
+32550,1000,20260611,0.761,0.9130493827160494,37.29014587402344
diff --git a/dynamics_experiment_report.md b/dynamics_experiment_report.md
new file mode 100644
index 0000000..489fb91
--- /dev/null
+++ b/dynamics_experiment_report.md
@@ -0,0 +1,86 @@
+# Dynamics Control Experiment Report
+
+Generated: 2026-05-27T20:24:05
+
+## Summary Table
+
+| Model | Run | Status | Init | Final/Last | Delta | Best | Best Step | Vs Baseline | Evals |
+|---|---|---:|---:|---:|---:|---:|---:|---:|---:|
+| HRM | HRM baseline 10k | complete | 0.5176 | 0.6504 | 0.1328 | 0.6699 | 9000 | NA | 12 |
+| HRM | HRM mixed volume-CF | complete | 0.5176 | 0.6562 | 0.1387 | 0.6660 | 7000 | 0.0059 | 12 |
+| HRM | HRM Engelken interfloss | complete | 0.5176 | 0.6465 | 0.1289 | 0.6621 | 8000 | -0.0039 | 14 |
+| HRM | HRM Engelken+KL interfloss | complete | 0.5176 | 0.6211 | 0.1035 | 0.6250 | 9000 | -0.0293 | 14 |
+| HRM | HRM conservative Engelken+KL | complete | 0.5176 | 0.6367 | 0.1191 | 0.6777 | 7000 | -0.0137 | 14 |
+| HRM | HRM late Engelken+KL | complete | 0.5176 | 0.6191 | 0.1016 | 0.6367 | 7000 | -0.0312 | 14 |
+| HRM | HRM volume-envelope+KL | complete | 0.5176 | 0.6074 | 0.0898 | 0.6211 | 8000 | -0.0430 | 14 |
+| HRM | HRM basin consistency | complete | 0.5176 | 0.6152 | 0.0977 | 0.6152 | 10000 | -0.0352 | 12 |
+| HRM | HRM single perturbed CE | complete | 0.5176 | 0.6660 | 0.1484 | 0.6660 | 5000 | 0.0156 | 12 |
+| HRM | HRM clean+multi perturbed CE | complete | 0.5176 | 0.6543 | 0.1367 | 0.6660 | 7000 | 0.0039 | 12 |
+| HRM | HRM fixed-unroll baseline 50k | complete | 0.5176 | 0.5801 | 0.0625 | 0.6328 | 5000 | -0.0703 | 22 |
+| HRM | HRM multi4 loguniform 50k | complete | 0.5176 | 0.5889 | 0.0713 | 0.6250 | 7500 | -0.0615 | 22 |
+| TRM | TRM baseline 10k | complete | 0.5977 | 0.5762 | -0.0215 | 0.6621 | 2000 | NA | 12 |
+| TRM | TRM mixed volume-CF | complete | 0.5977 | 0.5078 | -0.0898 | 0.6133 | 1000 | -0.0684 | 12 |
+| TRM | TRM Engelken interfloss | complete | 0.5977 | 0.5195 | -0.0781 | 0.5977 | 0 | -0.0566 | 14 |
+| TRM | TRM Engelken+KL interfloss | complete | 0.5977 | 0.5957 | -0.0020 | 0.5977 | 0 | 0.0195 | 14 |
+| TRM | TRM late Engelken+KL | complete | 0.5977 | 0.5449 | -0.0527 | 0.5977 | 0 | -0.0312 | 14 |
+| TRM | TRM volume-envelope+KL | complete | 0.5977 | 0.5508 | -0.0469 | 0.5977 | 0 | -0.0254 | 14 |
+| TRM | TRM basin consistency | complete | 0.5977 | 0.2422 | -0.3555 | 0.5977 | 0 | -0.3340 | 12 |
+| TRM | TRM single perturbed CE | complete | 0.5977 | 0.6191 | 0.0215 | 0.6582 | 2000 | 0.0430 | 12 |
+| TRM | TRM clean+multi perturbed CE | complete | 0.5977 | 0.5918 | -0.0059 | 0.6719 | 4000 | 0.0156 | 12 |
+| TRM | TRM fixed-unroll baseline 50k | complete | 0.5615 | 0.4971 | -0.0645 | 0.5947 | 22500 | -0.0791 | 22 |
+| TRM | TRM multi4 loguniform 50k | complete | 0.5615 | 0.5508 | -0.0107 | 0.6084 | 42500 | -0.0254 | 22 |
+
+## Floss Episode Diagnostics
+
+### HRM Engelken interfloss
+- episode 0 @ train_step 0: floss 0.030840 -> 0.000022, lyap1 -0.1476 -> 0.0047, volume -0.1693 -> 0.0013, KL mean/max 0.000000/0.000000
+- episode 1 @ train_step 500: floss 0.019141 -> 0.000014, lyap1 -0.1208 -> 0.0017, volume -0.1357 -> 0.0005, KL mean/max 0.000000/0.000000
+
+### HRM Engelken+KL interfloss
+- episode 0 @ train_step 0: floss 0.030840 -> 0.001082, lyap1 -0.1476 -> 0.0107, volume -0.1693 -> -0.0180, KL mean/max 0.645335/14.346942
+- episode 1 @ train_step 500: floss 0.007646 -> 0.000361, lyap1 -0.0588 -> 0.0122, volume -0.0812 -> -0.0068, KL mean/max 0.130172/4.959406
+
+### HRM conservative Engelken+KL
+- episode 0 @ train_step 0: floss 0.030840 -> 0.046181, lyap1 -0.1476 -> -0.2018, volume -0.1693 -> -0.2144, KL mean/max 2.819328/11.853225
+- episode 1 @ train_step 500: floss 0.016849 -> 0.048128, lyap1 -0.0850 -> -0.2127, volume -0.1248 -> -0.2180, KL mean/max 1.228630/5.586896
+
+### HRM late Engelken+KL
+- episode 0 @ train_step 0: floss 0.046401 -> 0.000793, lyap1 -0.1729 -> 0.0256, volume -0.1976 -> 0.0086, KL mean/max 0.418510/6.242658
+- episode 1 @ train_step 500: floss 0.007572 -> 0.001378, lyap1 0.0810 -> 0.0078, volume 0.0456 -> -0.0055, KL mean/max 0.132306/2.906027
+
+### HRM volume-envelope+KL
+- episode 0 @ train_step 0: floss 0.000046 -> 0.000000, lyap1 -0.1476 -> -0.2818, volume -0.1693 -> -0.3005, KL mean/max 0.436240/8.779900
+- episode 1 @ train_step 500: floss 0.000801 -> 0.000000, lyap1 -0.1108 -> -0.2791, volume -0.1299 -> -0.2994, KL mean/max 0.112411/1.281256
+
+### TRM Engelken interfloss
+- episode 0 @ train_step 0: floss 0.000883 -> 0.000047, lyap1 0.0089 -> -0.0011, volume 0.0023 -> -0.0047, KL mean/max 0.000000/0.000000
+- episode 1 @ train_step 500: floss 0.000257 -> 0.000006, lyap1 -0.0149 -> -0.0008, volume -0.0157 -> -0.0015, KL mean/max 0.000000/0.000000
+
+### TRM Engelken+KL interfloss
+- episode 0 @ train_step 0: floss 0.000883 -> 0.000242, lyap1 0.0089 -> 0.0158, volume 0.0023 -> 0.0064, KL mean/max 0.309170/7.308133
+- episode 1 @ train_step 500: floss 0.000063 -> 0.000059, lyap1 0.0031 -> 0.0088, volume -0.0017 -> 0.0018, KL mean/max 0.086242/0.927036
+
+### TRM late Engelken+KL
+- episode 0 @ train_step 0: floss 0.004981 -> 0.001447, lyap1 -0.0423 -> -0.0327, volume -0.0482 -> -0.0368, KL mean/max 0.296968/3.760708
+- episode 1 @ train_step 500: floss 0.002911 -> 0.001941, lyap1 -0.0540 -> -0.0372, volume -0.0538 -> -0.0426, KL mean/max 0.092765/3.175512
+
+### TRM volume-envelope+KL
+- episode 0 @ train_step 0: floss 0.000074 -> 0.000000, lyap1 0.0089 -> 0.0145, volume 0.0023 -> -0.0076, KL mean/max 0.286836/6.571321
+- episode 1 @ train_step 500: floss 0.000000 -> 0.000000, lyap1 0.0055 -> -0.0251, volume -0.0009 -> -0.0281, KL mean/max 0.089595/2.901087
+
+## Incomplete Runs / Process Snapshot
+
+- No monitored experiment processes are active.
+
+## Notes
+
+- `Final/Last` is `final_acc` when present, otherwise the latest eval accuracy.
+- `Vs Baseline` compares against the matching HRM/TRM 10k no-floss baseline.
+- A complete report may still show partial rows if an experiment crashed or was interrupted.
+
+## Next Experiment Questions
+
+- Close the faithful-flossing loop before making a claim that flossing is ineffective. Required matrix: from-scratch baseline, direct flossing loss as negative control, faithful prefloss, faithful interfloss with separated floss optimizer steps, and optionally volume/spectrum interfloss.
+- Treat continuation runs as screening only. Final claims about flossing should use from-scratch full training, because continuation can confound optimizer state, EMA horizon, puzzle embeddings, and data-order effects.
+- Run GRM/PTRM after or in parallel with faithful flossing. This answers a different question: whether stochastic multi-rollout plus Q-selection is learning a low-dimensional stability/Lyapunov-spectrum observer.
+- For GRM/PTRM, compare learned Q selection with lambda1 and top-spectrum-feature selection, and measure Q-score correlation with stability features.
diff --git a/engelken_python_flossing.py b/engelken_python_flossing.py
new file mode 100644
index 0000000..b83be2f
--- /dev/null
+++ b/engelken_python_flossing.py
@@ -0,0 +1,336 @@
+"""PyTorch port of Rainer Engelken's GradientFlossing_ExampleCode.jl.
+
+This script is intentionally a vanilla-RNN delayed-XOR reproduction scaffold,
+not an HRM/TRM experiment. It keeps the algorithmic choices that matter for
+checking gradient flossing itself:
+
+ - flossing is a separate phase, not mixed with task loss;
+ - flossing optimizes only input/recurrent/bias parameters, not readout;
+ - flossing uses mean((lambda_i - target)^2);
+ - flossing batch size is one, as in the Julia code;
+ - QR is differentiable and participates in the backward pass;
+ - Lyapunov accumulation discards the initial transient.
+
+Reference:
+ research/flossing/external/GradientFlossing/GradientFlossing_ExampleCode.jl
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import math
+import time
+from dataclasses import asdict, dataclass
+from pathlib import Path
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+
+def recabs(bits: np.ndarray) -> np.ndarray:
+ out = bits[0]
+ for bit in bits[1:]:
+ out = np.abs(out - bit)
+ return out
+
+
+def generate_input_output(
+ batch_size: int,
+ steps: int,
+ seed: int,
+ input_dim: int,
+ input_scale: float,
+ delay: int,
+ task: int,
+ device: torch.device,
+) -> tuple[torch.Tensor, torch.Tensor]:
+ """Port of generateInputOutput in GradientFlossing_XOR.jl.
+
+ Returns tensors shaped as Julia uses conceptually:
+ s: [T, S, B]
+ target: [T, 1, B]
+ """
+ rng = np.random.default_rng(seed)
+ raw_steps = steps + delay
+ s = rng.integers(0, 2, size=(raw_steps, input_dim, batch_size), dtype=np.int64).astype(np.float32)
+ target = np.zeros((raw_steps, 1, batch_size), dtype=np.float32)
+
+ if task == 0:
+ for i in range(delay, raw_steps):
+ target[i, :, :] = s[i - delay, :1, :]
+ else:
+ h = abs(task)
+ span = delay * (2**h)
+ for i in range(span, raw_steps):
+ indices = [i - delay * j for j in range(1, 2**h + 1)]
+ values = np.flip(s[indices, 0, :], axis=0)
+ target[i, 0, :] = recabs(values)
+
+ sx = torch.from_numpy(s[delay:] * np.float32(input_scale)).to(device)
+ ty = torch.from_numpy(target[delay:]).to(device)
+ return sx, ty
+
+
+@dataclass
+class Config:
+ hidden_size: int = 80
+ train_epochs: int = 3001
+ inter_period: int = 100
+ inter_epochs: int = 500
+ pre_epochs: int = 500
+ max_inter_episodes: int = 2
+ batch_size: int = 16
+ input_dim: int = 1
+ train_steps: int = 300
+ lyap_steps: int = 55
+ floss_input_steps: int = 300
+ seed_ic: int = 1
+ seed_input: int = 1
+ seed_net: int = 1
+ seed_ons: int = 1
+ lr: float = 1e-3
+ beta1: float = 0.9
+ beta2: float = 0.999
+ init_type: int = 1
+ recurrent_gain: float = 1.0
+ recurrent_mean_gain: float = 0.0
+ input_scale: float = 1.0
+ delay: int = 10
+ ws_std: float = 1.0
+ ws_mean: float = 0.0
+ wr_std: float = 1.0
+ wr_mean: float = 0.0
+ b_std: float = 0.1
+ b_mean: float = 0.0
+ n_lyap: int = 75
+ task: int = -1
+ lyap_target: float = 0.0
+ eval_every: int = 100
+ eval_batches: int = 4
+ log_every_floss: int = 50
+ device: str = "cpu"
+ out: str = "research/flossing/engelken_python/run.json"
+
+
+class VanillaRNN:
+ def __init__(self, cfg: Config, device: torch.device):
+ self.cfg = cfg
+ self.device = device
+ torch.manual_seed(cfg.seed_net)
+ n = cfg.hidden_size
+
+ self.ws = torch.nn.Parameter(
+ cfg.ws_std * torch.randn(n, cfg.input_dim, device=device) + cfg.ws_mean
+ )
+ self.wr = torch.nn.Parameter(
+ cfg.wr_std * torch.randn(1, n, device=device) + cfg.wr_mean
+ )
+ self.b = torch.nn.Parameter(
+ cfg.b_mean + cfg.b_std * torch.rand(n, device=device)
+ )
+ self.offset = torch.nn.Parameter(torch.tensor([0.001], device=device))
+
+ if cfg.init_type == 1:
+ j = cfg.recurrent_gain * torch.randn(n, n, device=device) / math.sqrt(n)
+ if cfg.recurrent_mean_gain != 0:
+ j = j + cfg.recurrent_mean_gain / n
+ elif cfg.init_type == 3:
+ j0 = cfg.recurrent_gain * torch.randn(n, n, device=device) / math.sqrt(n)
+ q, _ = torch.linalg.qr(j0)
+ j = cfg.recurrent_gain * q
+ else:
+ raise ValueError(f"unsupported init_type={cfg.init_type}")
+ self.j = torch.nn.Parameter(j)
+
+ self.x_init = torch.nn.Parameter(self._relax_initial_condition(cfg.seed_ic, 100))
+
+ def _relax_initial_condition(self, seed: int, steps: int) -> torch.Tensor:
+ gen = torch.Generator(device=self.device).manual_seed(seed)
+ x = torch.randn(self.cfg.hidden_size, self.cfg.batch_size, generator=gen, device=self.device)
+ with torch.no_grad():
+ for _ in range(steps):
+ x = self.j.detach() @ torch.tanh(x) + self.b.detach().unsqueeze(1)
+ return x
+
+ def task_parameters(self) -> list[torch.nn.Parameter]:
+ return [self.ws, self.j, self.wr, self.b, self.offset, self.x_init]
+
+ def floss_parameters(self) -> list[torch.nn.Parameter]:
+ return [self.ws, self.j, self.b]
+
+ def task_loss_and_accuracy(self, s: torch.Tensor, target: torch.Tensor) -> tuple[torch.Tensor, float]:
+ cfg = self.cfg
+ first_loss_idx = 2 * cfg.delay if cfg.task > 0 else cfg.delay * (2 ** abs(cfg.task))
+ # Julia is 1-indexed and starts loss at ti >= firstlossidx.
+ first_loss_idx = max(first_loss_idx - 1, 0)
+ x = self.x_init
+ loss = torch.zeros((), device=self.device)
+ correct = 0.0
+ count = 0
+ for ti in range(cfg.train_steps):
+ x = self.j @ torch.tanh(x) + self.b.unsqueeze(1) + (self.ws @ s[ti]) / cfg.input_dim
+ if ti >= first_loss_idx:
+ logits = self.wr @ x + self.offset.unsqueeze(1)
+ loss = loss + F.binary_cross_entropy_with_logits(logits, target[ti], reduction="sum")
+ pred = (torch.sigmoid(logits) >= 0.5).to(target.dtype)
+ correct += (pred == target[ti]).float().sum().item()
+ count += target[ti].numel()
+ denom = cfg.batch_size * max(cfg.train_steps - first_loss_idx, 1)
+ return loss / denom, correct / max(count, 1)
+
+ def lyapunov_spectrum(self, s: torch.Tensor) -> torch.Tensor:
+ cfg = self.cfg
+ if cfg.n_lyap <= 0:
+ return torch.empty(0, device=self.device)
+
+ x = self.x_init[:, 0].clone()
+ gen = torch.Generator(device=self.device).manual_seed(cfg.seed_ons)
+ q0 = torch.randn(cfg.hidden_size, cfg.n_lyap, generator=gen, device=self.device)
+ q, _ = torch.linalg.qr(q0)
+ accum = torch.zeros(cfg.n_lyap, device=self.device)
+ tsim = 0
+ transient = 10
+ transient_ons = math.ceil(cfg.lyap_steps / 10)
+ diag_idx = torch.arange(cfg.n_lyap, device=self.device)
+
+ for n in range(1, cfg.lyap_steps + 1):
+ x = self.j @ torch.tanh(x) + self.b + (self.ws @ s[n - 1, :, 0]) / cfg.input_dim
+ phi_prime = 1.0 / torch.cosh(x).square()
+ tangent = phi_prime.unsqueeze(1) * self.j
+ q = tangent @ q
+ q, r = torch.linalg.qr(q)
+ if n > transient + transient_ons:
+ accum = accum + r[diag_idx, diag_idx].abs().clamp_min(1e-30).log()
+ tsim += 1
+
+ if tsim == 0:
+ raise ValueError("lyap_steps too short after transient discard")
+ return accum / tsim
+
+
+def evaluate(model: VanillaRNN, cfg: Config, epoch: int) -> dict[str, float]:
+ losses = []
+ accs = []
+ with torch.no_grad():
+ for i in range(cfg.eval_batches):
+ seed = 10_000_000 + epoch * 1000 + i
+ s, target = generate_input_output(
+ cfg.batch_size, cfg.train_steps, seed, cfg.input_dim,
+ cfg.input_scale, cfg.delay, cfg.task, model.device,
+ )
+ loss, acc = model.task_loss_and_accuracy(s, target)
+ losses.append(float(loss.item()))
+ accs.append(acc)
+ return {"loss": float(np.mean(losses)), "accuracy": float(np.mean(accs))}
+
+
+def run(cfg: Config) -> dict:
+ device = torch.device(cfg.device)
+ if cfg.n_lyap > cfg.hidden_size:
+ raise ValueError("n_lyap must be <= hidden_size")
+ if cfg.lyap_steps <= 10 + math.ceil(cfg.lyap_steps / 10):
+ raise ValueError("lyap_steps too short for official transient discard")
+
+ model = VanillaRNN(cfg, device)
+ task_optim = torch.optim.AdamW(
+ model.task_parameters(), lr=cfg.lr, betas=(cfg.beta1, cfg.beta2), weight_decay=0.0
+ )
+ floss_optim = torch.optim.AdamW(
+ model.floss_parameters(), lr=cfg.lr, betas=(cfg.beta1, cfg.beta2), weight_decay=0.0
+ )
+ target = torch.full((cfg.n_lyap,), cfg.lyap_target, device=device)
+ log: dict = {"config": asdict(cfg), "evals": [], "floss": [], "task": []}
+
+ t0 = time.time()
+ inter_count = 0
+ for epoch in range(1, cfg.train_epochs + 1):
+ should_prefloss = epoch == 1 and cfg.pre_epochs > 0
+ should_interfloss = (
+ epoch % cfg.inter_period == 0
+ and inter_count < cfg.max_inter_episodes
+ and cfg.inter_epochs > 0
+ )
+ if should_prefloss or should_interfloss:
+ n_steps = cfg.pre_epochs if should_prefloss else cfg.inter_epochs
+ kind = "pre" if should_prefloss else "inter"
+ if should_interfloss:
+ inter_count += 1
+ for floss_step in range(1, n_steps + 1):
+ seed = cfg.train_epochs * cfg.seed_input + epoch + floss_step
+ s, _ = generate_input_output(
+ 1, cfg.floss_input_steps, seed, cfg.input_dim,
+ cfg.input_scale, cfg.delay, cfg.task, device,
+ )
+ floss_optim.zero_grad(set_to_none=True)
+ spectrum = model.lyapunov_spectrum(s)
+ floss_loss = (spectrum - target).square().mean()
+ floss_loss.backward()
+ floss_optim.step()
+ if floss_step == 1 or floss_step == n_steps or floss_step % cfg.log_every_floss == 0:
+ rec = {
+ "epoch": epoch,
+ "kind": kind,
+ "floss_step": floss_step,
+ "loss": float(floss_loss.item()),
+ "lambda_mean": float(spectrum.detach().mean().item()),
+ "lambda_1": float(spectrum.detach()[0].item()),
+ "elapsed": time.time() - t0,
+ }
+ log["floss"].append(rec)
+ print(
+ f"F {kind} epoch={epoch} step={floss_step}/{n_steps} "
+ f"loss={rec['loss']:.6f} lambda1={rec['lambda_1']:+.4f}",
+ flush=True,
+ )
+
+ seed = cfg.train_epochs * cfg.seed_input + epoch
+ s, target_batch = generate_input_output(
+ cfg.batch_size, cfg.train_steps, seed, cfg.input_dim,
+ cfg.input_scale, cfg.delay, cfg.task, device,
+ )
+ task_optim.zero_grad(set_to_none=True)
+ task_loss, task_acc = model.task_loss_and_accuracy(s, target_batch)
+ task_loss.backward()
+ torch.nn.utils.clip_grad_norm_(model.task_parameters(), 0.03)
+ task_optim.step()
+
+ if epoch == 1 or epoch == cfg.train_epochs or epoch % cfg.eval_every == 0:
+ eval_metrics = evaluate(model, cfg, epoch)
+ rec = {
+ "epoch": epoch,
+ "train_loss": float(task_loss.item()),
+ "train_accuracy": float(task_acc),
+ "eval_loss": eval_metrics["loss"],
+ "eval_accuracy": eval_metrics["accuracy"],
+ "elapsed": time.time() - t0,
+ }
+ log["evals"].append(rec)
+ print(
+ f"T epoch={epoch}/{cfg.train_epochs} loss={rec['train_loss']:.4f} "
+ f"train_acc={rec['train_accuracy']:.3f} eval_acc={rec['eval_accuracy']:.3f}",
+ flush=True,
+ )
+
+ out_path = Path(cfg.out)
+ out_path.parent.mkdir(parents=True, exist_ok=True)
+ out_path.write_text(json.dumps(log, indent=2))
+ return log
+
+
+def parse_args() -> Config:
+ parser = argparse.ArgumentParser()
+ defaults = Config()
+ for field_name, default in asdict(defaults).items():
+ arg = "--" + field_name.replace("_", "-")
+ if isinstance(default, bool):
+ parser.add_argument(arg, action=argparse.BooleanOptionalAction, default=default)
+ else:
+ parser.add_argument(arg, type=type(default), default=default)
+ ns = parser.parse_args()
+ return Config(**vars(ns))
+
+
+if __name__ == "__main__":
+ run(parse_args())
diff --git a/eval_directional_ckpts.py b/eval_directional_ckpts.py
new file mode 100644
index 0000000..68653e6
--- /dev/null
+++ b/eval_directional_ckpts.py
@@ -0,0 +1,86 @@
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import sys
+import time
+from pathlib import Path
+
+import torch
+
+
+FLOSS_DIR = Path(__file__).resolve().parent
+sys.path.insert(0, str(FLOSS_DIR))
+
+from step7_interfloss import evaluate, load_model # noqa: E402
+
+
+def parse_steps(text: str) -> list[int]:
+ return [int(x.strip()) for x in text.split(",") if x.strip()]
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--ckpt-root", required=True)
+ parser.add_argument("--steps", required=True)
+ parser.add_argument("--n", type=int, default=1000)
+ parser.add_argument("--batch-size", type=int, default=32)
+ parser.add_argument("--seed", type=int, default=20260611)
+ parser.add_argument("--out", required=True)
+ args = parser.parse_args()
+
+ ckpt_root = Path(args.ckpt_root)
+ rows = []
+ for step in parse_steps(args.steps):
+ ckpt_name = f"step_{step}"
+ if not (ckpt_root / ckpt_name).exists():
+ print(f"[skip] missing {ckpt_name}", flush=True)
+ continue
+
+ t0 = time.time()
+ head, base, cfg, _adam_cls, _sparse_cls = load_model(
+ "trm",
+ ckpt_root,
+ ckpt_name,
+ "cuda",
+ batch_size_override=args.batch_size,
+ )
+ data_path = Path(cfg["data_path"])
+ acc, tok = evaluate(
+ head,
+ base,
+ data_path,
+ n_samples=args.n,
+ batch_size=args.batch_size,
+ device="cuda",
+ seed=args.seed,
+ )
+ elapsed = time.time() - t0
+ row = {
+ "step": step,
+ "n": args.n,
+ "seed": args.seed,
+ "exact_acc": acc,
+ "token_acc": tok,
+ "elapsed_sec": elapsed,
+ }
+ rows.append(row)
+ print(
+ f"step={step} exact={acc:.4f} token={tok:.4f} elapsed={elapsed:.1f}s",
+ flush=True,
+ )
+ del head, base
+ torch.cuda.empty_cache()
+
+ out_path = Path(args.out)
+ out_path.parent.mkdir(parents=True, exist_ok=True)
+ out_path.with_suffix(".json").write_text(json.dumps(rows, indent=2))
+ with out_path.open("w", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=list(rows[0].keys()))
+ writer.writeheader()
+ writer.writerows(rows)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/eval_trm_gbs192_step260410_n1000.summary.csv b/eval_trm_gbs192_step260410_n1000.summary.csv
new file mode 100644
index 0000000..1d25685
--- /dev/null
+++ b/eval_trm_gbs192_step260410_n1000.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+0.7879999876022339,0.9226666688919067,0.0,0.0,0.0,0.7879999876022339,0.9226666688919067,1000.0,0.0,0.7879999876022339,0.9226666688919067,0.0,0.0,1.0,0.7879999876022339,0.9226666688919067,3.5457303524017334,0.7879999876022339,0.9226666688919067,1.0,16.0
diff --git a/eval_trm_gbs192_step52082_n1000.summary.csv b/eval_trm_gbs192_step52082_n1000.summary.csv
new file mode 100644
index 0000000..53637db
--- /dev/null
+++ b/eval_trm_gbs192_step52082_n1000.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+0.6610000133514404,0.8824321031570435,0.0,0.0,0.0,0.6610000133514404,0.8824321031570435,1000.0,0.0,0.6610000133514404,0.8824321031570435,0.0,0.0,1.0,0.6610000133514404,0.8824321031570435,1.226062536239624,0.6610000133514404,0.8824321031570435,1.0,16.0
diff --git a/eval_trm_official768_step13020_n1000.summary.csv b/eval_trm_official768_step13020_n1000.summary.csv
new file mode 100644
index 0000000..fb4d943
--- /dev/null
+++ b/eval_trm_official768_step13020_n1000.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+0.6050000190734863,0.8624938130378723,0.0,0.0,0.0,0.6050000190734863,0.8624938130378723,1000.0,0.0,0.6050000190734863,0.8624938130378723,0.0,0.0,1.0,0.6050000190734863,0.8624938130378723,0.3627968728542328,0.6050000190734863,0.8624938130378723,1.0,16.0
diff --git a/extend_rollout.py b/extend_rollout.py
new file mode 100644
index 0000000..09af1f8
--- /dev/null
+++ b/extend_rollout.py
@@ -0,0 +1,68 @@
+"""Discriminate Rainer's hypotheses: run the trained recurrence FAR beyond the 16-segment budget
+and watch the fate of trajectories that FAIL at segment 16.
+ - settle to CORRECT later => transient that would self-resolve (more compute helps)
+ - settle to WRONG (drift->0) => multistable WRONG attractor (genuine bistability)
+ - never settle (drift stays high) => chaotic saddle / persistent non-convergence
+Plain forward (no JVP). Saves per-segment decoded-exactness and per-segment z_H drift.
+"""
+from __future__ import annotations
+import sys, argparse
+from pathlib import Path
+import numpy as np
+import torch
+
+sys.path.insert(0, "/home/yurenh2/rrm/research/flossing")
+from diagnose_trm_joint_maze import load_model, load_test_samples
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", required=True)
+ ap.add_argument("--data", required=True)
+ ap.add_argument("--n", type=int, default=512)
+ ap.add_argument("--batch-size", type=int, default=32)
+ ap.add_argument("--n-seg", type=int, default=128)
+ ap.add_argument("--seed", type=int, default=0)
+ ap.add_argument("--out", required=True)
+ args = ap.parse_args()
+ device = "cuda"
+ model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ inner = model.inner
+ test = load_test_samples(Path(args.data), args.n, 0, 1, args.seed)
+ n = len(test["inputs"]); pe = inner.puzzle_emb_len
+
+ EX, DR, IDX = [], [], []
+ for s in range(0, n, args.batch_size):
+ e = min(s + args.batch_size, n)
+ batch = {k: test[k][s:e].to(device) for k in ["inputs", "labels", "puzzle_identifiers"]}
+ B = batch["inputs"].shape[0]
+ seq_full = inner.config.seq_len + pe; hidden = inner.config.hidden_size
+ with torch.no_grad():
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ inp_emb = inner._input_embeddings(batch["inputs"], batch["puzzle_identifiers"])
+ labels = batch["labels"]; mask = labels > 0
+ prev_zH = None; ex_seg, dr_seg = [], []
+ for seg in range(args.n_seg):
+ for _h in range(inner.config.H_cycles):
+ for _l in range(inner.config.L_cycles):
+ z_L = inner.L_level(z_L, z_H + inp_emb, **seq_info)
+ z_H = inner.L_level(z_H, z_L, **seq_info)
+ p = inner.lm_head(z_H)[:, pe:].float().argmax(-1)
+ ex_seg.append(((p == labels) | ~mask).all(-1).float().cpu())
+ dr_seg.append((torch.zeros(B) if prev_zH is None
+ else (z_H - prev_zH).float().flatten(1).norm(dim=1).cpu()))
+ prev_zH = z_H.detach()
+ EX.append(torch.stack(ex_seg, 1).numpy()); DR.append(torch.stack(dr_seg, 1).numpy())
+ IDX.append(test["idx"][s:e])
+ print(f" [{e}/{n}] exact@16={torch.stack(ex_seg,1)[:,15].mean():.3f} exact@{args.n_seg}={torch.stack(ex_seg,1)[:,-1].mean():.3f}", flush=True)
+
+ np.savez_compressed(args.out, exact_seg=np.concatenate(EX), drift_seg=np.concatenate(DR),
+ idx=np.concatenate(IDX))
+ print("saved", args.out)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/external/GradientFlossing/README.md b/external/GradientFlossing/README.md
new file mode 100644
index 0000000..962c955
--- /dev/null
+++ b/external/GradientFlossing/README.md
@@ -0,0 +1,79 @@
+# Gradient Flossing: Improving Gradient Descent through Dynamic Control of Jacobians
+
+This repository contains the implementation code for manuscript: <br>
+__Gradient Flossing: Improving Gradient Descent through Dynamic Control of Jacobians__ <br>
+## Overview
+In this work we show that RNN training can be improved by pushing Lyapunov exponents of the forward pass to zero during or before training.
+
+## Installation
+
+#### Prerequisites
+- Download [Julia](https://julialang.org/downloads/)
+
+#### Dependencies
+- Julia (1.6)
+- Flux, BackwardsLinalg
+
+## Getting started
+To install the required packages, run the following in the julia REPL after installing Julia:
+
+```
+using Pkg
+
+for pkg in ["Flux", "BackwardsLinalg"]
+ Pkg.add(pkg)
+end
+```
+
+For example, to train an RNN on tracking delayed XOR task, run:
+```
+include("GradientFlossing_ExampleCode.jl")
+# setting parameters:
+N, E, Ef, Ei, Ep, Ni, B, S, T, Tp, Ti, sIC, sIn, sNet, sONS, lr, b1, b2, IC, g, gbar, I1, delay, wsS, wsM, wrS, wrM, bS, bM, nLE, task, intype, Lwnt=
+80, 3001, 100, 500, 500, 2, 16, 1, 300, 55, 300, 1,1,1,1, 0.001f0, 0.9, 0.999, 1, 1.0, 0.0, 1.0,10, 1.0f0, 0.0f0, 1.0f0, 0.0f0, 0.1f0, 0.0f0,75, -1, 3, 0.0
+
+trainRNNflossing(N, E, Ef, Ei, Ep, Ni, B, S, T, Tp, Ti, sIC, sIn, sNet, sONS, lr, b1, b2, IC, g, gbar, I1, delay, wsS, wsM, wrS, wrM, bS, bM, nLE, task, intype, Lwnt)
+```
+
+## Repository Overview
+_GradientFlossing_ExampleCode.jl_:\
+Example scripts for training networks with gradient flossing before training, with gradient flossing before and during training and without gradient flossing.
+
+
+_GradientFlossing_XOR.jl_:\
+Generates input and target output for copy task and delayed XOR task.
+
+<!---
+
+runOneStimulus.jl trains an RNN on tracking one OU-signal showing that the network becomes more tightly balanced over training epochs.\
+runTwoStimuli.jl trains an RNN on two OU-signal stimulus showing that the network becomes more tightly balanced over training epochs and breaks up into two weakly-connected subnetworks.\
+runTheeStimuli.jl trains an RNN on two OU-signal stimulus showing that the network becomes more tightly balanced over training epochs and breaks up into three weakly-connected subnetworks.\
+![Training RNN on two signals leads to balanced subpopulations](/figures/S=2.svg?raw=true "balanced subnetworks emerge after runTheeStimuli.jl")
+-->
+
+
+<!---
+
+### Training dynamics of eigenvalues:
+Here is a visualization of the recurrent weight matrix and the eigenvalues throughout across training epochs.
+![Training dynamics of networks trained on multiple signals shows first tracking of global mean input](eigenvalue_movie_2D_task.gif)
+-->
+
+
+### Implementation details
+A full specification of packages used and their versions can be found in _packages.txt_ .\
+For learning rates the default ADAM parameters were used to avoid any impression of fine-tuning.\
+All simulations were run on a single CPU and took on the order of minutes to a few of hours.
+
+
+
+<!---
+### figures/
+Contains all figures of the main text and the supplement.
+-->
+
+
+<!---
+### tex/
+Contains the raw text of the main text and the supplement.
+-->
diff --git a/external/GradientFlossing/packages.txt b/external/GradientFlossing/packages.txt
new file mode 100644
index 0000000..e90facc
--- /dev/null
+++ b/external/GradientFlossing/packages.txt
@@ -0,0 +1,6 @@
+julia => v"1.6.7"
+
+[442b4e1a] BackwardsLinalg v0.1.1
+[587475ba] Flux v0.11.3
+[e6cf234a] RandomNumbers v1.5.3
+[e88e6eb3] Zygote v0.5.1
diff --git a/external/julia-1.6.7/LICENSE.md b/external/julia-1.6.7/LICENSE.md
new file mode 100644
index 0000000..7912722
--- /dev/null
+++ b/external/julia-1.6.7/LICENSE.md
@@ -0,0 +1,26 @@
+MIT License
+
+Copyright (c) 2009-2021: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+end of terms and conditions
+
+Please see THIRDPARTY.md for license information for other software used in this project.
diff --git a/external/julia-1.6.7/share/julia/base/ryu/LICENSE.md b/external/julia-1.6.7/share/julia/base/ryu/LICENSE.md
new file mode 100644
index 0000000..74c7186
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/base/ryu/LICENSE.md
@@ -0,0 +1,25 @@
+The code in this directory (base/ryu) is a derivative based on the work in the https://github.com/ulfjack/ryu repository, which allows the use of the Boost software license, included below.
+
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE. \ No newline at end of file
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/ArgTools/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/ArgTools/README.md
new file mode 100644
index 0000000..a0415d7
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/ArgTools/README.md
@@ -0,0 +1,431 @@
+# ArgTools
+
+[![Build Status](https://travis-ci.org/JuliaIO/ArgTools.jl.svg?branch=master)](https://travis-ci.org/JuliaIO/ArgTools.jl)
+[![Codecov](https://codecov.io/gh/JuliaIO/ArgTools.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaIO/ArgTools.jl)
+
+`ArgTools` provides tools for creating consistent, flexible APIs that work with
+various kinds of function arguments. In the current version, it helps deal with
+arguments that are, at their core, IO handles, but which you'd like to allow the
+user to specify directly as file names, commands, pipelines, or, of course, as
+raw IO handles. For write arguments, it's also possible to use `nothing` and
+write to a temporary file whose path is returned.
+
+## API
+
+There are two parts to the `ArgTools` API:
+
+1. Functions and types for helping define flexible function APIs.
+2. Functions for helping to test APIs defined with above.
+
+While it's great to be able to define a flexibile API, if you're not sure that
+it works the way it's supposed to, what's the benefit. Since it's can be quite
+verbose to test such a combinatorial explosion of methods, `ArgTools` also
+provides tools to help testing all the ways your tools can be called to make
+sure everything is working as intented.
+
+### Argument Handling
+
+The API for helping defing flexible function signatures consists of two types
+and four helper functions: `ArgRead` and `ArgWrite`; `arg_read`, `arg_write`,
+`arg_isdir` and `arg_mkdir`.
+
+<!-- BEGIN: copied from inline doc strings -->
+
+#### ArgRead
+
+```jl
+ArgRead = Union{AbstractString, AbstractCmd, IO}
+```
+The `ArgRead` types is a union of the types that the `arg_read` function knows
+how to convert into readable IO handles. See [`arg_read`](@ref) for details.
+
+#### ArgWrite
+
+```jl
+ArgWrite = Union{AbstractString, AbstractCmd, IO}
+```
+The `ArgWrite` types is a union of the types that the `arg_write` function knows
+how to convert into writeable IO handles, except for `Nothing` which `arg_write`
+handles by generating a temporary file. See [`arg_write`](@ref) for details.
+
+#### arg_read
+
+```jl
+arg_read(f::Function, arg::ArgRead) -> f(arg_io)
+```
+The `arg_read` function accepts an argument `arg` that can be any of these:
+
+- `AbstractString`: a file path to be opened for reading
+- `AbstractCmd`: a command to be run, reading from its standard output
+- `IO`: an open IO handle to be read from
+
+Whether the body returns normally or throws an error, a path which is opened
+will be closed before returning from `arg_read` and an `IO` handle will be
+flushed but not closed before returning from `arg_read`.
+
+#### arg_write
+
+```jl
+arg_write(f::Function, arg::ArgWrite) -> arg
+arg_write(f::Function, arg::Nothing) -> tempname()
+```
+The `arg_write` function accepts an argument `arg` that can be any of these:
+
+- `AbstractString`: a file path to be opened for writing
+- `AbstractCmd`: a command to be run, writing to its standard input
+- `IO`: an open IO handle to be written to
+- `Nothing`: a temporary path should be written to
+
+If the body returns normally, a path that is opened will be closed upon
+completion; an IO handle argument is left open but flushed before return. If the
+argument is `nothing` then a temporary path is opened for writing and closed
+open completion and the path is returned from `arg_write`. In all other cases,
+`arg` itself is returned. This is a useful pattern since you can consistently
+return whatever was written, whether an argument was passed or not.
+
+If there is an error during the evaluation of the body, a path that is opened by
+`arg_write` for writing will be deleted, whether it's passed in as a string or a
+temporary path generated when `arg` is `nothing`.
+
+#### arg_isdir
+
+```jl
+arg_isdir(f::Function, arg::AbstractString) -> f(arg)
+```
+The `arg_isdir` function takes `arg` which must be the path to an existing
+directory (an error is raised otherwise) and passes that path to `f` finally
+returning the result of `f(arg)`. This is definitely the least useful tool
+offered by `ArgTools` and mostly exists for symmetry with `arg_mkdir` and to
+give consistent error messages.
+
+#### arg_mkdir
+
+```jl
+arg_mkdir(f::Function, arg::AbstractString) -> arg
+arg_mkdir(f::Function, arg::Nothing) -> mktempdir()
+```
+The `arg_mkdir` function takes `arg` which must either be one of:
+
+- a path to an already existing empty directory,
+- a non-existent path which can be created as a directory, or
+- `nothing` in which case a temporary directory is created.
+
+In all cases the path to the directory is returned. If an error occurs during
+`f(arg)`, the directory is returned to its original state: if it already existed
+but was empty, it will be emptied; if it did not exist it will be deleted.
+
+<!-- END: copied from inline doc strings -->
+
+### Function Testing
+
+Using `ArgTools` is easy; thoroughly testing flexible functions defined using
+`ArgTools` is a bit trickier, but the package includes testing tools that help.
+The API for testing functions defined with the argument handling API consists
+of two functions and a macro: `arg_readers`, `arg_writers` and `@arg_test`.
+
+<!-- BEGIN: copied from inline doc strings -->
+
+#### arg_readers
+
+```jl
+arg_readers(arg :: AbstractString, [ type = ArgRead ]) do arg::Function
+ ## pre-test setup ##
+ @arg_test arg begin
+ arg :: ArgRead
+ ## test using `arg` ##
+ end
+ ## post-test cleanup ##
+end
+```
+
+The `arg_readers` function takes a path to be read and a single-argument do
+block, which is invoked once for each test reader type that `arg_read` can
+handle. If the optional `type` argument is given then the do block is only
+invoked for readers that produce arguments of that type.
+
+The `arg` passed to the do block is not the argument value itself, because some
+of test argument types need to be initialized and finalized for each test case.
+Consider an open file handle argument: once you've used it for one test, you
+can't use it again; you need to close it and open the file again for the next
+test. This function `arg` can be converted into an `ArgRead` instance using
+`@arg_test arg begin ... end`.
+
+#### arg_writers
+
+```jl
+arg_writers([ type = ArgWrite ]) do path::String, arg::Function
+ ## pre-test setup ##
+ @arg_test arg begin
+ arg :: ArgWrite
+ ## test using `arg` ##
+ end
+ ## post-test cleanup ##
+end
+```
+
+The `arg_writers` function takes a do block, which is invoked once for each test
+writer type that `arg_write` can handle with a temporary (non-existent) `path`
+and `arg` which can be converted into various writable argument types which
+write to `path`. If the optional `type` argument is given then the do block is
+only invoked for writers that produce arguments of that type.
+
+The `arg` passed to the do block is not the argument value itself, because some
+of test argument types need to be initialized and finalized for each test case.
+Consider an open file handle argument: once you've used it for one test, you
+can't use it again; you need to close it and open the file again for the next
+test. This function `arg` can be converted into an `ArgWrite` instance using
+`@arg_test arg begin ... end`.
+
+There is also an `arg_writers` method that takes a path name like `arg_readers`:
+
+```jl
+arg_writers(path::AbstractString, [ type = ArgWrite ]) do arg::Function
+ ## pre-test setup ##
+ @arg_test arg begin
+ arg :: ArgWrite
+ ## test using `arg` ##
+ end
+ ## post-test cleanup ##
+end
+```
+
+This method is useful if you need to specify `path` instead of using path name
+generated by `tempname()`. Since `path` is passed from outside of `arg_writers`,
+the path is not an argument to the do block in this form.
+
+#### @arg_test
+
+```jl
+@arg_test arg1 arg2 ... body
+```
+
+The `@arg_test` macro is used to convert `arg` functions provided by
+`arg_readers` and `arg_writers` into actual argument values. When you write
+`@arg_test arg body` it is equivalent to `arg(arg -> body)`.
+
+<!-- END: copied from inline doc strings -->
+
+## Examples
+
+The examples, like the API, are split into two parts:
+
+1. An example of defining a function with a flexible API using the main API;
+2. Examples of how to thoroughly test that function using the test utilities.
+
+### Usage Example
+
+The best explanation may be an example, which is also used for testing:
+
+```jl
+using ArgTools
+
+function send_data(src::ArgRead, dst::Union{ArgWrite, Nothing} = nothing)
+ arg_read(src) do src_io
+ arg_write(dst) do dst_io
+ buffer = Vector{UInt8}(undef, 2*1024*1024)
+ while !eof(src_io)
+ n = readbytes!(src_io, buffer)
+ write(dst_io, view(buffer, 1:n))
+ end
+ end
+ end
+end
+```
+
+This defines the `send_data` function which reads data from a source and writes
+it to a destination, specified by the `src` and `dst` arguments, respectively.
+Thanks to `ArgTools`, this relatively simple definition acts as a swiss-army
+knife for sending data from a source to a destination. Here are some examples:
+
+```jl
+julia> cd(mktempdir())
+
+julia> write("hello.txt", "Hello, world.\n")
+14
+
+julia> run(`cat hello.txt`);
+Hello, world.
+
+julia> send_data("hello.txt", "hello_copy.txt")
+"hello_copy.txt"
+
+julia> run(`cat $ans`);
+Hello, world.
+
+julia> rm("hello_copy.txt")
+
+julia> send_data("hello.txt", stdout);
+Hello, world.
+
+julia> send_data("hello.txt", pipeline(`gzip -9`, "hello.gz"));
+
+julia> run(`gzcat hello.gz`);
+Hello, world.
+
+julia> hello_copy = send_data(`gzcat hello.gz`)
+"/var/folders/4g/b8p546px3nd550b3k288mhp80000gp/T/jl_cguepi"
+
+julia> run(`cat $hello_copy`);
+Hello, world.
+```
+
+To understand the definition of `send_data`, let's work from the inside out:
+
+* The main body of the function operates on the `src_io` and `dst_io` IO
+ handles, using a buffer to read data from the former to the latter in 2MiB
+ blocks.
+
+* The calls to `arg_read` and `arg_write` transform the `src` and `dst`
+ arguments from various types to `src_io` and `dst_io` IO handles. This allows
+ the inner body to handle the core case of dealing with IO handles, without
+ having to worry about the various possible incoming argument types. See the API
+ section below for more details about how `arg_read` and `arg_write` work on
+ different types.
+
+* The arguments to `send_data` are `src::ArgRead` and `dst::ArgWrite` where
+ `dst` is optional and defaults to `nothing` if not given. The `ArgRead` type is
+ a union including all the types that `arg_read` knows how to handle. Similarly,
+ the `ArgWrite` type is a union including the types that `arg_write` knows how to
+ handle, except for `nothing` which must be explicitly opted into, for which
+ `arg_write` creates a temporary file and returns its path.
+
+Taken altogether, this allows the `send_data` function to work with a combinatoral
+explosion of type signatures:
+
+* `send_data(src::AbstractString)`
+* `send_data(src::AbstractCmd)`
+* `send_data(src::IO)`
+* `send_data(src::AbstractString, dst::AbstractString)`
+* `send_data(src::AbstractCmd, dst::AbstractString)`
+* `send_data(src::IO, dst::AbstractString)`
+* `send_data(src::AbstractString, dst::AbstractCmd)`
+* `send_data(src::AbstractCmd, dst::AbstractCmd)`
+* `send_data(src::IO, dst::AbstractCmd)`
+* `send_data(src::AbstractString, dst::IO)`
+* `send_data(src::AbstractCmd, dst::IO)`
+* `send_data(src::IO, dst::IO)`
+
+Each combination guarantees the proper initialization and cleanup of its
+arguments whether it is opening a file and closing it upon completion or error,
+or creating a temporary output file and returning it upon completion or deleting
+it on error. If the arguments are commands or pipelines, those are correctly
+opened with the necessary read/write options.
+
+### Testing Example
+
+Now that we've defined the `send_data` function, we must test it. But it has so
+many different kinds of arguments that it can accept, how do we produce tests
+for all of these combinations? `ArgTools` also offers tools to help with testing
+APIs that it lets you define. The example tests assume that the above definition
+of `send_data` has already been evaluated in the same Julia session.
+
+```jl
+using Test
+
+# create a source file
+src_file = tempname()
+data = rand(UInt8, 666)
+write(src_file, data)
+
+print_sig(args...) =
+ println("send_data(", join(map(typeof, args), ", "), ")")
+
+arg_readers(src_file) do src
+ # test 1-arg methods
+ @arg_test src begin
+ print_sig(src)
+ dst_file = send_data(src)
+ @test data == read(dst_file)
+ rm(dst_file)
+ end
+
+ # test 2-arg methods
+ arg_writers() do dst_file, dst
+ @arg_test src dst begin
+ print_sig(src, dst)
+ @test dst == send_data(src, dst)
+ end
+ @test data == read(dst_file)
+ end
+end
+
+# cleanup
+rm(src_file)
+```
+
+Evaluating this testing code prints the following output:
+```jl
+send_data(String)
+send_data(String, String)
+send_data(String, Cmd)
+send_data(String, Base.CmdRedirect)
+send_data(String, IOStream)
+send_data(String, Base.Process)
+send_data(Cmd)
+send_data(Cmd, String)
+send_data(Cmd, Cmd)
+send_data(Cmd, Base.CmdRedirect)
+send_data(Cmd, IOStream)
+send_data(Cmd, Base.Process)
+send_data(Base.CmdRedirect)
+send_data(Base.CmdRedirect, String)
+send_data(Base.CmdRedirect, Cmd)
+send_data(Base.CmdRedirect, Base.CmdRedirect)
+send_data(Base.CmdRedirect, IOStream)
+send_data(Base.CmdRedirect, Base.Process)
+send_data(IOStream)
+send_data(IOStream, String)
+send_data(IOStream, Cmd)
+send_data(IOStream, Base.CmdRedirect)
+send_data(IOStream, IOStream)
+send_data(IOStream, Base.Process)
+send_data(Base.Process)
+send_data(Base.Process, String)
+send_data(Base.Process, Cmd)
+send_data(Base.Process, Base.CmdRedirect)
+send_data(Base.Process, IOStream)
+send_data(Base.Process, Base.Process)
+```
+
+Test code doesn't isn't normally this verbose, but for this example it may be
+helpful to understand what's happening. What this output shows is the various
+ways in which this short bit of code tests invoking the `send_data` function.
+Here are some details about what's happening:
+
+* The call to `arg_readers(src_file)` evaluates the attached do block with five
+ different `arg` values, which can be converted to readable arguments of the
+ types: `String`, `Cmd`, `CmdRedirect`, `IOStream` and `Process`.
+
+* The call to `@arg_test src begin ... end` converts `src` into a readable
+ arguments of those same types and closes or finalizes each at the end.
+
+* The call to `arg_writers()` evaluates the attached do block with five
+ different `arg` values, which can be converted to writable arguments of the
+ types: `String`, `Cmd`, `CmdRedirect`, `IOStream` and `Process`.
+
+* The call to `@arg_test src dst begin ... end` converts `src` into a readable
+ arguments and `dst` into writeable arguments of the same set of types, and
+ closes or otherwise finalizes each one at the end of the block.
+
+This example test code illustrates some of the reasoning features of the testing
+API which might initially seem puzzling. For example, it shows why `arg_readers`
+and `arg_writers` don't simply produce argument values that can be passed to the
+function being tested, instead requiring conversion by the `@arg_test` macro.
+There are two reasons:
+
+1. The same value returned from `arg_readers` or `arg_writers` may need to be
+ used in multiple tests and some argument types, such as IO handles, need to
+ be initialized before each test and finalized after. The `@arg_test` block
+ delimits where initialization and finalization occur.
+
+2. Sometimes operations need to be done after the `@arg_test` block but before
+ the end of the enclosing `arg_readers` or `arg_writers` block. Testing that
+ `dst_file` has the expected contents, i.e. `@test data == read(dst_file)`,
+ will not work reliably inside of the `@arg_test` block: data is not guaranteed
+ to have been fully written to `dst_file` until `dst` is finalized. This is an
+ issue when `dst` is an already-opened process, for example: `arg_write` leaves
+ the process open since it recieved it that way (you might want to write more
+ data to it), and while it does flush the handle, there is no guarantee that
+ the process will get data to its final destination until the process has
+ exited. Putting the test after the `@arg_test` block ensures that the process
+ has terminated, so we can reliably test the contents of `dst_file`.
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Artifacts/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Artifacts/docs/src/index.md
new file mode 100644
index 0000000..80f4c62
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Artifacts/docs/src/index.md
@@ -0,0 +1,21 @@
+# Artifacts
+
+```@meta
+DocTestSetup = :(using Artifacts)
+```
+
+Starting with Julia 1.6, the artifacts support has moved from `Pkg.jl` to Julia itself.
+Until proper documentation can be added here, you can learn more about artifacts in the
+`Pkg.jl` manual at <https://julialang.github.io/Pkg.jl/v1/artifacts/>.
+
+!!! compat "Julia 1.6"
+ Julia's artifacts API requires at least Julia 1.6. In Julia
+ versions 1.3 to 1.5, you can use `Pkg.Artifacts` instead.
+
+
+```@docs
+Artifacts.artifact_meta
+Artifacts.artifact_hash
+Artifacts.find_artifacts_toml
+Artifacts.@artifact_str
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Base64/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Base64/docs/src/index.md
new file mode 100644
index 0000000..6bc647f
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Base64/docs/src/index.md
@@ -0,0 +1,10 @@
+# Base64
+
+```@docs
+Base64.Base64
+Base64.Base64EncodePipe
+Base64.base64encode
+Base64.Base64DecodePipe
+Base64.base64decode
+Base64.stringmime
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/CRC32c/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/CRC32c/docs/src/index.md
new file mode 100644
index 0000000..1304709
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/CRC32c/docs/src/index.md
@@ -0,0 +1,6 @@
+# CRC32c
+
+```@docs
+CRC32c.crc32c
+CRC32c.crc32c(::IO, ::Integer, ::UInt32)
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Dates/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Dates/docs/src/index.md
new file mode 100644
index 0000000..975c087
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Dates/docs/src/index.md
@@ -0,0 +1,826 @@
+# Dates
+
+```@meta
+DocTestSetup = :(using Dates)
+```
+
+The `Dates` module provides two types for working with dates: [`Date`](@ref) and [`DateTime`](@ref),
+representing day and millisecond precision, respectively; both are subtypes of the abstract [`TimeType`](@ref).
+The motivation for distinct types is simple: some operations are much simpler, both in terms of
+code and mental reasoning, when the complexities of greater precision don't have to be dealt with.
+For example, since the [`Date`](@ref) type only resolves to the precision of a single date (i.e.
+no hours, minutes, or seconds), normal considerations for time zones, daylight savings/summer
+time, and leap seconds are unnecessary and avoided.
+
+Both [`Date`](@ref) and [`DateTime`](@ref) are basically immutable [`Int64`](@ref) wrappers.
+The single `instant` field of either type is actually a `UTInstant{P}` type, which
+represents a continuously increasing machine timeline based on the UT second [^1]. The
+[`DateTime`](@ref) type is not aware of time zones (*naive*, in Python parlance),
+analogous to a *LocalDateTime* in Java 8. Additional time zone functionality
+can be added through the [TimeZones.jl package](https://github.com/JuliaTime/TimeZones.jl/), which
+compiles the [IANA time zone database](http://www.iana.org/time-zones). Both [`Date`](@ref) and
+[`DateTime`](@ref) are based on the [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) standard, which follows the proleptic Gregorian calendar.
+One note is that the ISO 8601 standard is particular about BC/BCE dates. In general, the last
+day of the BC/BCE era, 1-12-31 BC/BCE, was followed by 1-1-1 AD/CE, thus no year zero exists.
+The ISO standard, however, states that 1 BC/BCE is year zero, so `0000-12-31` is the day before
+`0001-01-01`, and year `-0001` (yes, negative one for the year) is 2 BC/BCE, year `-0002` is 3
+BC/BCE, etc.
+
+[^1]:
+ The notion of the UT second is actually quite fundamental. There are basically two different notions
+ of time generally accepted, one based on the physical rotation of the earth (one full rotation
+ = 1 day), the other based on the SI second (a fixed, constant value). These are radically different!
+ Think about it, a "UT second", as defined relative to the rotation of the earth, may have a different
+ absolute length depending on the day! Anyway, the fact that [`Date`](@ref) and [`DateTime`](@ref)
+ are based on UT seconds is a simplifying, yet honest assumption so that things like leap seconds
+ and all their complexity can be avoided. This basis of time is formally called [UT](https://en.wikipedia.org/wiki/Universal_Time)
+ or UT1. Basing types on the UT second basically means that every minute has 60 seconds and every
+ day has 24 hours and leads to more natural calculations when working with calendar dates.
+
+## Constructors
+
+[`Date`](@ref) and [`DateTime`](@ref) types can be constructed by integer or [`Period`](@ref)
+types, by parsing, or through adjusters (more on those later):
+
+```jldoctest
+julia> DateTime(2013)
+2013-01-01T00:00:00
+
+julia> DateTime(2013,7)
+2013-07-01T00:00:00
+
+julia> DateTime(2013,7,1)
+2013-07-01T00:00:00
+
+julia> DateTime(2013,7,1,12)
+2013-07-01T12:00:00
+
+julia> DateTime(2013,7,1,12,30)
+2013-07-01T12:30:00
+
+julia> DateTime(2013,7,1,12,30,59)
+2013-07-01T12:30:59
+
+julia> DateTime(2013,7,1,12,30,59,1)
+2013-07-01T12:30:59.001
+
+julia> Date(2013)
+2013-01-01
+
+julia> Date(2013,7)
+2013-07-01
+
+julia> Date(2013,7,1)
+2013-07-01
+
+julia> Date(Dates.Year(2013),Dates.Month(7),Dates.Day(1))
+2013-07-01
+
+julia> Date(Dates.Month(7),Dates.Year(2013))
+2013-07-01
+```
+
+[`Date`](@ref) or [`DateTime`](@ref) parsing is accomplished by the use of format strings. Format
+strings work by the notion of defining *delimited* or *fixed-width* "slots" that contain a period
+to parse and passing the text to parse and format string to a [`Date`](@ref) or [`DateTime`](@ref)
+constructor, of the form `Date("2015-01-01","y-m-d")` or `DateTime("20150101","yyyymmdd")`.
+
+Delimited slots are marked by specifying the delimiter the parser should expect between two subsequent
+periods; so `"y-m-d"` lets the parser know that between the first and second slots in a date string
+like `"2014-07-16"`, it should find the `-` character. The `y`, `m`, and `d` characters let the
+parser know which periods to parse in each slot.
+
+Fixed-width slots are specified by repeating the period character the number of times corresponding
+to the width with no delimiter between characters. So `"yyyymmdd"` would correspond to a date
+string like `"20140716"`. The parser distinguishes a fixed-width slot by the absence of a delimiter,
+noting the transition `"yyyymm"` from one period character to the next.
+
+Support for text-form month parsing is also supported through the `u` and `U` characters, for
+abbreviated and full-length month names, respectively. By default, only English month names are
+supported, so `u` corresponds to "Jan", "Feb", "Mar", etc. And `U` corresponds to "January", "February",
+"March", etc. Similar to other name=>value mapping functions [`dayname`](@ref) and [`monthname`](@ref),
+custom locales can be loaded by passing in the `locale=>Dict{String,Int}` mapping to the `MONTHTOVALUEABBR`
+and `MONTHTOVALUE` dicts for abbreviated and full-name month names, respectively.
+
+One note on parsing performance: using the `Date(date_string,format_string)` function is fine
+if only called a few times. If there are many similarly formatted date strings to parse however,
+it is much more efficient to first create a [`Dates.DateFormat`](@ref), and pass it instead of
+a raw format string.
+
+```jldoctest
+julia> df = DateFormat("y-m-d");
+
+julia> dt = Date("2015-01-01",df)
+2015-01-01
+
+julia> dt2 = Date("2015-01-02",df)
+2015-01-02
+```
+
+You can also use the `dateformat""` string macro. This macro creates the `DateFormat` object once when the macro is expanded and uses the same `DateFormat` object even if a code snippet is run multiple times.
+
+```jldoctest
+julia> for i = 1:10^5
+ Date("2015-01-01", dateformat"y-m-d")
+ end
+```
+
+A full suite of parsing and formatting tests and examples is available in [`stdlib/Dates/test/io.jl`](https://github.com/JuliaLang/julia/blob/master/stdlib/Dates/test/io.jl).
+
+## Durations/Comparisons
+
+Finding the length of time between two [`Date`](@ref) or [`DateTime`](@ref) is straightforward
+given their underlying representation as `UTInstant{Day}` and `UTInstant{Millisecond}`, respectively.
+The difference between [`Date`](@ref) is returned in the number of [`Day`](@ref), and [`DateTime`](@ref)
+in the number of [`Millisecond`](@ref). Similarly, comparing [`TimeType`](@ref) is a simple matter
+of comparing the underlying machine instants (which in turn compares the internal [`Int64`](@ref) values).
+
+```jldoctest
+julia> dt = Date(2012,2,29)
+2012-02-29
+
+julia> dt2 = Date(2000,2,1)
+2000-02-01
+
+julia> dump(dt)
+Date
+ instant: Dates.UTInstant{Day}
+ periods: Day
+ value: Int64 734562
+
+julia> dump(dt2)
+Date
+ instant: Dates.UTInstant{Day}
+ periods: Day
+ value: Int64 730151
+
+julia> dt > dt2
+true
+
+julia> dt != dt2
+true
+
+julia> dt + dt2
+ERROR: MethodError: no method matching +(::Date, ::Date)
+[...]
+
+julia> dt * dt2
+ERROR: MethodError: no method matching *(::Date, ::Date)
+[...]
+
+julia> dt / dt2
+ERROR: MethodError: no method matching /(::Date, ::Date)
+
+julia> dt - dt2
+4411 days
+
+julia> dt2 - dt
+-4411 days
+
+julia> dt = DateTime(2012,2,29)
+2012-02-29T00:00:00
+
+julia> dt2 = DateTime(2000,2,1)
+2000-02-01T00:00:00
+
+julia> dt - dt2
+381110400000 milliseconds
+```
+
+## Accessor Functions
+
+Because the [`Date`](@ref) and [`DateTime`](@ref) types are stored as single [`Int64`](@ref) values, date
+parts or fields can be retrieved through accessor functions. The lowercase accessors return the
+field as an integer:
+
+```jldoctest tdate
+julia> t = Date(2014, 1, 31)
+2014-01-31
+
+julia> Dates.year(t)
+2014
+
+julia> Dates.month(t)
+1
+
+julia> Dates.week(t)
+5
+
+julia> Dates.day(t)
+31
+```
+
+While propercase return the same value in the corresponding [`Period`](@ref) type:
+
+```jldoctest tdate
+julia> Dates.Year(t)
+2014 years
+
+julia> Dates.Day(t)
+31 days
+```
+
+Compound methods are provided because it is more efficient to access multiple fields at the same time than individually:
+
+```jldoctest tdate
+julia> Dates.yearmonth(t)
+(2014, 1)
+
+julia> Dates.monthday(t)
+(1, 31)
+
+julia> Dates.yearmonthday(t)
+(2014, 1, 31)
+```
+
+One may also access the underlying `UTInstant` or integer value:
+
+```jldoctest tdate
+julia> dump(t)
+Date
+ instant: Dates.UTInstant{Day}
+ periods: Day
+ value: Int64 735264
+
+julia> t.instant
+Dates.UTInstant{Day}(Day(735264))
+
+julia> Dates.value(t)
+735264
+```
+
+## Query Functions
+
+Query functions provide calendrical information about a [`TimeType`](@ref). They include information
+about the day of the week:
+
+```jldoctest tdate2
+julia> t = Date(2014, 1, 31)
+2014-01-31
+
+julia> Dates.dayofweek(t)
+5
+
+julia> Dates.dayname(t)
+"Friday"
+
+julia> Dates.dayofweekofmonth(t) # 5th Friday of January
+5
+```
+
+Month of the year:
+
+```jldoctest tdate2
+julia> Dates.monthname(t)
+"January"
+
+julia> Dates.daysinmonth(t)
+31
+```
+
+As well as information about the [`TimeType`](@ref)'s year and quarter:
+
+```jldoctest tdate2
+julia> Dates.isleapyear(t)
+false
+
+julia> Dates.dayofyear(t)
+31
+
+julia> Dates.quarterofyear(t)
+1
+
+julia> Dates.dayofquarter(t)
+31
+```
+
+The [`dayname`](@ref) and [`monthname`](@ref) methods can also take an optional `locale` keyword
+that can be used to return the name of the day or month of the year for other languages/locales.
+There are also versions of these functions returning the abbreviated names, namely
+[`dayabbr`](@ref) and [`monthabbr`](@ref).
+First the mapping is loaded into the `LOCALES` variable:
+
+```jldoctest tdate2
+julia> french_months = ["janvier", "février", "mars", "avril", "mai", "juin",
+ "juillet", "août", "septembre", "octobre", "novembre", "décembre"];
+
+julia> french_monts_abbrev = ["janv","févr","mars","avril","mai","juin",
+ "juil","août","sept","oct","nov","déc"];
+
+julia> french_days = ["lundi","mardi","mercredi","jeudi","vendredi","samedi","dimanche"];
+
+julia> Dates.LOCALES["french"] = Dates.DateLocale(french_months, french_monts_abbrev, french_days, [""]);
+```
+
+ The above mentioned functions can then be used to perform the queries:
+
+```jldoctest tdate2
+julia> Dates.dayname(t;locale="french")
+"vendredi"
+
+julia> Dates.monthname(t;locale="french")
+"janvier"
+
+julia> Dates.monthabbr(t;locale="french")
+"janv"
+```
+
+Since the abbreviated versions of the days are not loaded, trying to use the
+function `dayabbr` will error.
+
+```jldoctest tdate2
+julia> Dates.dayabbr(t;locale="french")
+ERROR: BoundsError: attempt to access 1-element Vector{String} at index [5]
+Stacktrace:
+[...]
+```
+
+
+## TimeType-Period Arithmetic
+
+It's good practice when using any language/date framework to be familiar with how date-period
+arithmetic is handled as there are some [tricky issues](https://codeblog.jonskeet.uk/2010/12/01/the-joys-of-date-time-arithmetic/)
+to deal with (though much less so for day-precision types).
+
+The `Dates` module approach tries to follow the simple principle of trying to change as
+little as possible when doing [`Period`](@ref) arithmetic. This approach is also often known as
+*calendrical* arithmetic or what you would probably guess if someone were to ask you the same
+calculation in a conversation. Why all the fuss about this? Let's take a classic example: add
+1 month to January 31st, 2014. What's the answer? Javascript will say [March 3](https://markhneedham.com/blog/2009/01/07/javascript-add-a-month-to-a-date/)
+(assumes 31 days). PHP says [March 2](https://stackoverflow.com/questions/5760262/php-adding-months-to-a-date-while-not-exceeding-the-last-day-of-the-month)
+(assumes 30 days). The fact is, there is no right answer. In the `Dates` module, it gives
+the result of February 28th. How does it figure that out? Consider the classic 7-7-7
+gambling game in casinos.
+
+Now just imagine that instead of 7-7-7, the slots are Year-Month-Day, or in our example, 2014-01-31.
+When you ask to add 1 month to this date, the month slot is incremented, so now we have 2014-02-31.
+Then the day number is checked if it is greater than the last valid day of the new month; if it
+is (as in the case above), the day number is adjusted down to the last valid day (28). What are
+the ramifications with this approach? Go ahead and add another month to our date, `2014-02-28 + Month(1) == 2014-03-28`.
+What? Were you expecting the last day of March? Nope, sorry, remember the 7-7-7 slots. As few
+slots as possible are going to change, so we first increment the month slot by 1, 2014-03-28,
+and boom, we're done because that's a valid date. On the other hand, if we were to add 2 months
+to our original date, 2014-01-31, then we end up with 2014-03-31, as expected. The other ramification
+of this approach is a loss in associativity when a specific ordering is forced (i.e. adding things
+in different orders results in different outcomes). For example:
+
+```jldoctest
+julia> (Date(2014,1,29)+Dates.Day(1)) + Dates.Month(1)
+2014-02-28
+
+julia> (Date(2014,1,29)+Dates.Month(1)) + Dates.Day(1)
+2014-03-01
+```
+
+What's going on there? In the first line, we're adding 1 day to January 29th, which results in
+2014-01-30; then we add 1 month, so we get 2014-02-30, which then adjusts down to 2014-02-28.
+In the second example, we add 1 month *first*, where we get 2014-02-29, which adjusts down to
+2014-02-28, and *then* add 1 day, which results in 2014-03-01. One design principle that helps
+in this case is that, in the presence of multiple Periods, the operations will be ordered by the
+Periods' *types*, not their value or positional order; this means `Year` will always be added
+first, then `Month`, then `Week`, etc. Hence the following *does* result in associativity and
+Just Works:
+
+```jldoctest
+julia> Date(2014,1,29) + Dates.Day(1) + Dates.Month(1)
+2014-03-01
+
+julia> Date(2014,1,29) + Dates.Month(1) + Dates.Day(1)
+2014-03-01
+```
+
+Tricky? Perhaps. What is an innocent `Dates` user to do? The bottom line is to be aware
+that explicitly forcing a certain associativity, when dealing with months, may lead to some unexpected
+results, but otherwise, everything should work as expected. Thankfully, that's pretty much the
+extent of the odd cases in date-period arithmetic when dealing with time in UT (avoiding the "joys"
+of dealing with daylight savings, leap seconds, etc.).
+
+As a bonus, all period arithmetic objects work directly with ranges:
+
+```jldoctest
+julia> dr = Date(2014,1,29):Day(1):Date(2014,2,3)
+Date("2014-01-29"):Day(1):Date("2014-02-03")
+
+julia> collect(dr)
+6-element Vector{Date}:
+ 2014-01-29
+ 2014-01-30
+ 2014-01-31
+ 2014-02-01
+ 2014-02-02
+ 2014-02-03
+
+julia> dr = Date(2014,1,29):Dates.Month(1):Date(2014,07,29)
+Date("2014-01-29"):Month(1):Date("2014-07-29")
+
+julia> collect(dr)
+7-element Vector{Date}:
+ 2014-01-29
+ 2014-02-28
+ 2014-03-29
+ 2014-04-29
+ 2014-05-29
+ 2014-06-29
+ 2014-07-29
+```
+
+## Adjuster Functions
+
+As convenient as date-period arithmetic is, often the kinds of calculations needed on dates
+take on a *calendrical* or *temporal* nature rather than a fixed number of periods. Holidays are
+a perfect example; most follow rules such as "Memorial Day = Last Monday of May", or "Thanksgiving
+= 4th Thursday of November". These kinds of temporal expressions deal with rules relative to the
+calendar, like first or last of the month, next Tuesday, or the first and third Wednesdays, etc.
+
+The `Dates` module provides the *adjuster* API through several convenient methods that
+aid in simply and succinctly expressing temporal rules. The first group of adjuster methods deal
+with the first and last of weeks, months, quarters, and years. They each take a single [`TimeType`](@ref)
+as input and return or *adjust to* the first or last of the desired period relative to the input.
+
+```jldoctest
+julia> Dates.firstdayofweek(Date(2014,7,16)) # Adjusts the input to the Monday of the input's week
+2014-07-14
+
+julia> Dates.lastdayofmonth(Date(2014,7,16)) # Adjusts to the last day of the input's month
+2014-07-31
+
+julia> Dates.lastdayofquarter(Date(2014,7,16)) # Adjusts to the last day of the input's quarter
+2014-09-30
+```
+
+The next two higher-order methods, [`tonext`](@ref), and [`toprev`](@ref), generalize working
+with temporal expressions by taking a `DateFunction` as first argument, along with a starting
+[`TimeType`](@ref). A `DateFunction` is just a function, usually anonymous, that takes a single
+[`TimeType`](@ref) as input and returns a [`Bool`](@ref), `true` indicating a satisfied
+adjustment criterion.
+For example:
+
+```jldoctest
+julia> istuesday = x->Dates.dayofweek(x) == Dates.Tuesday; # Returns true if the day of the week of x is Tuesday
+
+julia> Dates.tonext(istuesday, Date(2014,7,13)) # 2014-07-13 is a Sunday
+2014-07-15
+
+julia> Dates.tonext(Date(2014,7,13), Dates.Tuesday) # Convenience method provided for day of the week adjustments
+2014-07-15
+```
+
+This is useful with the do-block syntax for more complex temporal expressions:
+
+```jldoctest
+julia> Dates.tonext(Date(2014,7,13)) do x
+ # Return true on the 4th Thursday of November (Thanksgiving)
+ Dates.dayofweek(x) == Dates.Thursday &&
+ Dates.dayofweekofmonth(x) == 4 &&
+ Dates.month(x) == Dates.November
+ end
+2014-11-27
+```
+
+The [`Base.filter`](@ref) method can be used to obtain all valid dates/moments in a specified
+range:
+
+```jldoctest
+# Pittsburgh street cleaning; Every 2nd Tuesday from April to November
+# Date range from January 1st, 2014 to January 1st, 2015
+julia> dr = Dates.Date(2014):Day(1):Dates.Date(2015);
+
+julia> filter(dr) do x
+ Dates.dayofweek(x) == Dates.Tue &&
+ Dates.April <= Dates.month(x) <= Dates.Nov &&
+ Dates.dayofweekofmonth(x) == 2
+ end
+8-element Vector{Date}:
+ 2014-04-08
+ 2014-05-13
+ 2014-06-10
+ 2014-07-08
+ 2014-08-12
+ 2014-09-09
+ 2014-10-14
+ 2014-11-11
+```
+
+Additional examples and tests are available in [`stdlib/Dates/test/adjusters.jl`](https://github.com/JuliaLang/julia/blob/master/stdlib/Dates/test/adjusters.jl).
+
+## Period Types
+
+Periods are a human view of discrete, sometimes irregular durations of time. Consider 1 month;
+it could represent, in days, a value of 28, 29, 30, or 31 depending on the year and month context.
+Or a year could represent 365 or 366 days in the case of a leap year. [`Period`](@ref) types are
+simple [`Int64`](@ref) wrappers and are constructed by wrapping any `Int64` convertible type, i.e. `Year(1)`
+or `Month(3.0)`. Arithmetic between [`Period`](@ref) of the same type behave like integers, and
+limited `Period-Real` arithmetic is available. You can extract the underlying integer with
+[`Dates.value`](@ref).
+
+```jldoctest
+julia> y1 = Dates.Year(1)
+1 year
+
+julia> y2 = Dates.Year(2)
+2 years
+
+julia> y3 = Dates.Year(10)
+10 years
+
+julia> y1 + y2
+3 years
+
+julia> div(y3,y2)
+5
+
+julia> y3 - y2
+8 years
+
+julia> y3 % y2
+0 years
+
+julia> div(y3,3) # mirrors integer division
+3 years
+
+julia> Dates.value(Dates.Millisecond(10))
+10
+```
+
+## Rounding
+
+[`Date`](@ref) and [`DateTime`](@ref) values can be rounded to a specified resolution (e.g., 1
+month or 15 minutes) with [`floor`](@ref), [`ceil`](@ref), or [`round`](@ref):
+
+```jldoctest
+julia> floor(Date(1985, 8, 16), Dates.Month)
+1985-08-01
+
+julia> ceil(DateTime(2013, 2, 13, 0, 31, 20), Dates.Minute(15))
+2013-02-13T00:45:00
+
+julia> round(DateTime(2016, 8, 6, 20, 15), Dates.Day)
+2016-08-07T00:00:00
+```
+
+Unlike the numeric [`round`](@ref) method, which breaks ties toward the even number by default,
+the [`TimeType`](@ref)[`round`](@ref) method uses the `RoundNearestTiesUp` rounding mode. (It's
+difficult to guess what breaking ties to nearest "even" [`TimeType`](@ref) would entail.) Further
+details on the available `RoundingMode` s can be found in the [API reference](@ref stdlib-dates-api).
+
+Rounding should generally behave as expected, but there are a few cases in which the expected
+behaviour is not obvious.
+
+### Rounding Epoch
+
+In many cases, the resolution specified for rounding (e.g., `Dates.Second(30)`) divides evenly
+into the next largest period (in this case, `Dates.Minute(1)`). But rounding behaviour in cases
+in which this is not true may lead to confusion. What is the expected result of rounding a [`DateTime`](@ref)
+to the nearest 10 hours?
+
+```jldoctest
+julia> round(DateTime(2016, 7, 17, 11, 55), Dates.Hour(10))
+2016-07-17T12:00:00
+```
+
+That may seem confusing, given that the hour (12) is not divisible by 10. The reason that `2016-07-17T12:00:00`
+was chosen is that it is 17,676,660 hours after `0000-01-01T00:00:00`, and 17,676,660 is divisible
+by 10.
+
+As Julia [`Date`](@ref) and [`DateTime`](@ref) values are represented according to the ISO 8601
+standard, `0000-01-01T00:00:00` was chosen as base (or "rounding epoch") from which to begin the
+count of days (and milliseconds) used in rounding calculations. (Note that this differs slightly
+from Julia's internal representation of [`Date`](@ref) s using Rata Die notation; but since the
+ISO 8601 standard is most visible to the end user, `0000-01-01T00:00:00` was chosen as the rounding
+epoch instead of the `0000-12-31T00:00:00` used internally to minimize confusion.)
+
+The only exception to the use of `0000-01-01T00:00:00` as the rounding epoch is when rounding
+to weeks. Rounding to the nearest week will always return a Monday (the first day of the week
+as specified by ISO 8601). For this reason, we use `0000-01-03T00:00:00` (the first day of the
+first week of year 0000, as defined by ISO 8601) as the base when rounding to a number of weeks.
+
+Here is a related case in which the expected behaviour is not necessarily obvious: What happens
+when we round to the nearest `P(2)`, where `P` is a [`Period`](@ref) type? In some cases (specifically,
+when `P <: Dates.TimePeriod`) the answer is clear:
+
+```jldoctest
+julia> round(DateTime(2016, 7, 17, 8, 55, 30), Dates.Hour(2))
+2016-07-17T08:00:00
+
+julia> round(DateTime(2016, 7, 17, 8, 55, 30), Dates.Minute(2))
+2016-07-17T08:56:00
+```
+
+This seems obvious, because two of each of these periods still divides evenly into the next larger
+order period. But in the case of two months (which still divides evenly into one year), the answer
+may be surprising:
+
+```jldoctest
+julia> round(DateTime(2016, 7, 17, 8, 55, 30), Dates.Month(2))
+2016-07-01T00:00:00
+```
+
+Why round to the first day in July, even though it is month 7 (an odd number)? The key is that
+months are 1-indexed (the first month is assigned 1), unlike hours, minutes, seconds, and milliseconds
+(the first of which are assigned 0).
+
+This means that rounding a [`DateTime`](@ref) to an even multiple of seconds, minutes, hours,
+or years (because the ISO 8601 specification includes a year zero) will result in a [`DateTime`](@ref)
+with an even value in that field, while rounding a [`DateTime`](@ref) to an even multiple of months
+will result in the months field having an odd value. Because both months and years may contain
+an irregular number of days, whether rounding to an even number of days will result in an even
+value in the days field is uncertain.
+
+See the [API reference](@ref stdlib-dates-api) for additional information
+on methods exported from the `Dates` module.
+
+# [API reference](@id stdlib-dates-api)
+
+## Dates and Time Types
+
+```@docs
+Dates.Period
+Dates.CompoundPeriod
+Dates.Instant
+Dates.UTInstant
+Dates.TimeType
+Dates.DateTime
+Dates.Date
+Dates.Time
+Dates.TimeZone
+Dates.UTC
+```
+
+## Dates Functions
+
+```@docs
+Dates.DateTime(::Int64, ::Int64, ::Int64, ::Int64, ::Int64, ::Int64, ::Int64)
+Dates.DateTime(::Dates.Period)
+Dates.DateTime(::Function, ::Any...)
+Dates.DateTime(::Dates.TimeType)
+Dates.DateTime(::AbstractString, ::AbstractString)
+Dates.format(::Dates.TimeType, ::AbstractString)
+Dates.DateFormat
+Dates.@dateformat_str
+Dates.DateTime(::AbstractString, ::Dates.DateFormat)
+Dates.Date(::Int64, ::Int64, ::Int64)
+Dates.Date(::Dates.Period)
+Dates.Date(::Function, ::Any, ::Any, ::Any)
+Dates.Date(::Dates.TimeType)
+Dates.Date(::AbstractString, ::AbstractString)
+Dates.Date(::AbstractString, ::Dates.DateFormat)
+Dates.Time(::Int64::Int64, ::Int64, ::Int64, ::Int64, ::Int64)
+Dates.Time(::Dates.TimePeriod)
+Dates.Time(::Function, ::Any...)
+Dates.Time(::Dates.DateTime)
+Dates.now()
+Dates.now(::Type{Dates.UTC})
+Base.eps(::Union{Type{DateTime}, Type{Date}, Type{Time}, TimeType})
+```
+
+### Accessor Functions
+
+```@docs
+Dates.year
+Dates.month
+Dates.week
+Dates.day
+Dates.hour
+Dates.minute
+Dates.second
+Dates.millisecond
+Dates.microsecond
+Dates.nanosecond
+Dates.Year(::Dates.TimeType)
+Dates.Month(::Dates.TimeType)
+Dates.Week(::Dates.TimeType)
+Dates.Day(::Dates.TimeType)
+Dates.Hour(::DateTime)
+Dates.Minute(::DateTime)
+Dates.Second(::DateTime)
+Dates.Millisecond(::DateTime)
+Dates.Microsecond(::Dates.Time)
+Dates.Nanosecond(::Dates.Time)
+Dates.yearmonth
+Dates.monthday
+Dates.yearmonthday
+```
+
+### Query Functions
+
+```@docs
+Dates.dayname
+Dates.dayabbr
+Dates.dayofweek
+Dates.dayofmonth
+Dates.dayofweekofmonth
+Dates.daysofweekinmonth
+Dates.monthname
+Dates.monthabbr
+Dates.daysinmonth
+Dates.isleapyear
+Dates.dayofyear
+Dates.daysinyear
+Dates.quarterofyear
+Dates.dayofquarter
+```
+
+### Adjuster Functions
+
+```@docs
+Base.trunc(::Dates.TimeType, ::Type{Dates.Period})
+Dates.firstdayofweek
+Dates.lastdayofweek
+Dates.firstdayofmonth
+Dates.lastdayofmonth
+Dates.firstdayofyear
+Dates.lastdayofyear
+Dates.firstdayofquarter
+Dates.lastdayofquarter
+Dates.tonext(::Dates.TimeType, ::Int)
+Dates.toprev(::Dates.TimeType, ::Int)
+Dates.tofirst
+Dates.tolast
+Dates.tonext(::Function, ::Dates.TimeType)
+Dates.toprev(::Function, ::Dates.TimeType)
+```
+
+### Periods
+
+```@docs
+Dates.Period(::Any)
+Dates.CompoundPeriod(::Vector{<:Dates.Period})
+Dates.value
+Dates.default
+```
+
+### Rounding Functions
+
+`Date` and `DateTime` values can be rounded to a specified resolution (e.g., 1 month or 15 minutes)
+with `floor`, `ceil`, or `round`.
+
+```@docs
+Base.floor(::Dates.TimeType, ::Dates.Period)
+Base.ceil(::Dates.TimeType, ::Dates.Period)
+Base.round(::Dates.TimeType, ::Dates.Period, ::RoundingMode{:NearestTiesUp})
+```
+
+Most `Period` values can also be rounded to a specified resolution:
+
+```@docs
+Base.floor(::Dates.ConvertiblePeriod, ::T) where T <: Dates.ConvertiblePeriod
+Base.ceil(::Dates.ConvertiblePeriod, ::Dates.ConvertiblePeriod)
+Base.round(::Dates.ConvertiblePeriod, ::Dates.ConvertiblePeriod, ::RoundingMode{:NearestTiesUp})
+```
+
+The following functions are not exported:
+
+```@docs
+Dates.floorceil
+Dates.epochdays2date
+Dates.epochms2datetime
+Dates.date2epochdays
+Dates.datetime2epochms
+```
+
+### Conversion Functions
+
+```@docs
+Dates.today
+Dates.unix2datetime
+Dates.datetime2unix
+Dates.julian2datetime
+Dates.datetime2julian
+Dates.rata2datetime
+Dates.datetime2rata
+```
+
+### Constants
+
+Days of the Week:
+
+| Variable | Abbr. | Value (Int) |
+|:----------- |:----- |:----------- |
+| `Monday` | `Mon` | 1 |
+| `Tuesday` | `Tue` | 2 |
+| `Wednesday` | `Wed` | 3 |
+| `Thursday` | `Thu` | 4 |
+| `Friday` | `Fri` | 5 |
+| `Saturday` | `Sat` | 6 |
+| `Sunday` | `Sun` | 7 |
+
+Months of the Year:
+
+| Variable | Abbr. | Value (Int) |
+|:----------- |:----- |:----------- |
+| `January` | `Jan` | 1 |
+| `February` | `Feb` | 2 |
+| `March` | `Mar` | 3 |
+| `April` | `Apr` | 4 |
+| `May` | `May` | 5 |
+| `June` | `Jun` | 6 |
+| `July` | `Jul` | 7 |
+| `August` | `Aug` | 8 |
+| `September` | `Sep` | 9 |
+| `October` | `Oct` | 10 |
+| `November` | `Nov` | 11 |
+| `December` | `Dec` | 12 |
+
+```@meta
+DocTestSetup = nothing
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/DelimitedFiles/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/DelimitedFiles/docs/src/index.md
new file mode 100644
index 0000000..11e24f1
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/DelimitedFiles/docs/src/index.md
@@ -0,0 +1,11 @@
+# Delimited Files
+
+```@docs
+DelimitedFiles.readdlm(::Any, ::AbstractChar, ::Type, ::AbstractChar)
+DelimitedFiles.readdlm(::Any, ::AbstractChar, ::AbstractChar)
+DelimitedFiles.readdlm(::Any, ::AbstractChar, ::Type)
+DelimitedFiles.readdlm(::Any, ::AbstractChar)
+DelimitedFiles.readdlm(::Any, ::Type)
+DelimitedFiles.readdlm(::Any)
+DelimitedFiles.writedlm
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Distributed/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Distributed/docs/src/index.md
new file mode 100644
index 0000000..1b1675e
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Distributed/docs/src/index.md
@@ -0,0 +1,69 @@
+# Distributed Computing
+
+```@docs
+Distributed.addprocs
+Distributed.nprocs
+Distributed.nworkers
+Distributed.procs()
+Distributed.procs(::Integer)
+Distributed.workers
+Distributed.rmprocs
+Distributed.interrupt
+Distributed.myid
+Distributed.pmap
+Distributed.RemoteException
+Distributed.Future
+Distributed.RemoteChannel
+Distributed.fetch(::Distributed.Future)
+Distributed.fetch(::RemoteChannel)
+Distributed.remotecall(::Any, ::Integer, ::Any...)
+Distributed.remotecall_wait(::Any, ::Integer, ::Any...)
+Distributed.remotecall_fetch(::Any, ::Integer, ::Any...)
+Distributed.remote_do(::Any, ::Integer, ::Any...)
+Distributed.put!(::RemoteChannel, ::Any...)
+Distributed.put!(::Distributed.Future, ::Any)
+Distributed.take!(::RemoteChannel, ::Any...)
+Distributed.isready(::RemoteChannel, ::Any...)
+Distributed.isready(::Distributed.Future)
+Distributed.AbstractWorkerPool
+Distributed.WorkerPool
+Distributed.CachingPool
+Distributed.default_worker_pool
+Distributed.clear!(::CachingPool)
+Distributed.remote
+Distributed.remotecall(::Any, ::AbstractWorkerPool, ::Any...)
+Distributed.remotecall_wait(::Any, ::AbstractWorkerPool, ::Any...)
+Distributed.remotecall_fetch(::Any, ::AbstractWorkerPool, ::Any...)
+Distributed.remote_do(::Any, ::AbstractWorkerPool, ::Any...)
+Distributed.@spawnat
+Distributed.@fetch
+Distributed.@fetchfrom
+Distributed.@distributed
+Distributed.@everywhere
+Distributed.clear!(::Any, ::Any; ::Any)
+Distributed.remoteref_id
+Distributed.channel_from_id
+Distributed.worker_id_from_socket
+Distributed.cluster_cookie()
+Distributed.cluster_cookie(::Any)
+```
+
+## Cluster Manager Interface
+
+This interface provides a mechanism to launch and manage Julia workers on different cluster environments.
+There are two types of managers present in Base: `LocalManager`, for launching additional workers on the
+same host, and `SSHManager`, for launching on remote hosts via `ssh`. TCP/IP sockets are used to connect
+and transport messages between processes. It is possible for Cluster Managers to provide a different transport.
+
+```@docs
+Distributed.ClusterManager
+Distributed.WorkerConfig
+Distributed.launch
+Distributed.manage
+Distributed.kill(::ClusterManager, ::Int, ::WorkerConfig)
+Distributed.connect(::ClusterManager, ::Int, ::WorkerConfig)
+Distributed.init_worker
+Distributed.start_worker
+Distributed.process_messages
+Distributed.default_addprocs_params
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Downloads/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Downloads/README.md
new file mode 100644
index 0000000..132a564
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Downloads/README.md
@@ -0,0 +1,186 @@
+# Downloads
+
+[![Build Status](https://travis-ci.com/JuliaLang/Downloads.jl.svg?branch=master)](https://travis-ci.com/JuliaLang/Downloads.jl)
+[![Codecov](https://codecov.io/gh/JuliaLang/Downloads.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaLang/Downloads.jl)
+
+The `Downloads` package provides a single function, `download`, which provides
+cross-platform, multi-protocol, in-process download functionality implemented
+with [libcurl](https://curl.haxx.se/libcurl/). It uses libcurl's multi-handle
+callback API to present a Julian API: `download(url)` blocks the task in which
+it occurs but yields to Julia's scheduler, allowing arbitrarily many tasks to
+download URLs concurrently and efficiently. As of Julia 1.6, this package is a
+standard library that is included with Julia, but this package can be used with
+Julia 1.3 through 1.5 as well.
+
+## API
+
+The public API of `Downloads` consists of two functions and three types:
+
+- `download` — download a file from a URL, erroring if it can't be downloaded
+- `request` — request a URL, returning a `Response` object indicating success
+- `Response` — a type capturing the status and other metadata about a request
+- `RequestError` — an error type thrown by `download` and `request` on error
+- `Downloader` — an object encapsulating shared resources for downloading
+
+### download
+
+```jl
+download(url, [ output = tempfile() ];
+ [ method = "GET", ]
+ [ headers = <none>, ]
+ [ timeout = <none>, ]
+ [ progress = <none>, ]
+ [ verbose = false, ]
+ [ downloader = <default>, ]
+) -> output
+```
+- `url :: AbstractString`
+- `output :: Union{AbstractString, AbstractCmd, IO}`
+- `method :: AbstractString`
+- `headers :: Union{AbstractVector, AbstractDict}`
+- `timeout :: Real`
+- `progress :: (total::Integer, now::Integer) --> Any`
+- `verbose :: Bool`
+- `downloader :: Downloader`
+
+Download a file from the given url, saving it to `output` or if not specified, a
+temporary path. The `output` can also be an `IO` handle, in which case the body
+of the response is streamed to that handle and the handle is returned. If
+`output` is a command, the command is run and output is sent to it on stdin.
+
+If the `downloader` keyword argument is provided, it must be a `Downloader`
+object. Resources and connections will be shared between downloads performed by
+the same `Downloader` and cleaned up automatically when the object is garbage
+collected or there have been no downloads performed with it for a grace period.
+See `Downloader` for more info about configuration and usage.
+
+If the `headers` keyword argument is provided, it must be a vector or dictionary
+whose elements are all pairs of strings. These pairs are passed as headers when
+downloading URLs with protocols that supports them, such as HTTP/S.
+
+The `timeout` keyword argument specifies a timeout for the download in seconds,
+with a resolution of milliseconds. By default no timeout is set, but this can
+also be explicitly requested by passing a timeout value of `Inf`.
+
+If the `progress` keyword argument is provided, it must be a callback funtion
+which will be called whenever there are updates about the size and status of the
+ongoing download. The callback must take two integer arguments: `total` and
+`now` which are the total size of the download in bytes, and the number of bytes
+which have been downloaded so far. Note that `total` starts out as zero and
+remains zero until the server gives an indiation of the total size of the
+download (e.g. with a `Content-Length` header), which may never happen. So a
+well-behaved progress callback should handle a total size of zero gracefully.
+
+If the `verbose` optoin is set to true, `libcurl`, which is used to implement
+the download functionality will print debugging information to `stderr`.
+
+### request
+
+```jl
+request(url;
+ [ input = <none>, ]
+ [ output = <none>, ]
+ [ method = input ? "PUT" : output ? "GET" : "HEAD", ]
+ [ headers = <none>, ]
+ [ timeout = <none>, ]
+ [ progress = <none>, ]
+ [ verbose = false, ]
+ [ throw = true, ]
+ [ downloader = <default>, ]
+) -> Union{Response, RequestError}
+```
+- `url :: AbstractString`
+- `input :: Union{AbstractString, AbstractCmd, IO}`
+- `output :: Union{AbstractString, AbstractCmd, IO}`
+- `method :: AbstractString`
+- `headers :: Union{AbstractVector, AbstractDict}`
+- `timeout :: Real`
+- `progress :: (dl_total, dl_now, ul_total, ul_now) --> Any`
+- `verbose :: Bool`
+- `throw :: Bool`
+- `downloader :: Downloader`
+
+Make a request to the given url, returning a `Response` object capturing the
+status, headers and other information about the response. The body of the
+reponse is written to `output` if specified and discarded otherwise. For HTTP/S
+requests, if an `input` stream is given, a `PUT` request is made; otherwise if
+an `output` stream is givven, a `GET` request is made; if neither is given a
+`HEAD` request is made. For other protocols, appropriate default methods are
+used based on what combination of input and output are requested. The following
+options differ from the `download` function:
+
+- `input` allows providing a request body; if provided default to `PUT` request
+- `progress` is a callback taking four integers for upload and download progress
+- `throw` controls whether to throw or return a `RequestError` on request error
+
+Note that unlike `download` which throws an error if the requested URL could not
+be downloaded (indicated by non-2xx status code), `request` returns a `Response`
+object no matter what the status code of the response is. If there is an error
+with getting a response at all, then a `RequestError` is thrown or returned.
+
+### Response
+
+```jl
+struct Response
+ proto :: String
+ url :: String
+ status :: Int
+ message :: String
+ headers :: Vector{Pair{String,String}}
+end
+```
+`Response` is a type capturing the properties of a successful response to a
+request as an object. It has the following fields:
+
+- `proto`: the protocol that was used to get the response
+- `url`: the URL that was ultimately requested after following redirects
+- `status`: the status code of the response, indicating success, failure, etc.
+- `message`: a textual message describing the nature of the response
+- `headers`: any headers that were returned with the response
+
+The meaning and availability of some of these responses depends on the protocol
+used for the request. For many protocols, including HTTP/S and S/FTP, a 2xx
+status code indicates a successful response. For responses in protocols that do
+not support headers, the headers vector will be empty. HTTP/2 does not include a
+status message, only a status code, so the message will be empty.
+
+### RequestError
+
+```jl
+struct RequestError <: ErrorException
+ url :: String
+ code :: Int
+ message :: String
+ response :: Response
+end
+```
+`RequestError` is a type capturing the properties of a failed response to a
+request as an exception object:
+
+- `url`: the original URL that was requested without any redirects
+- `code`: the libcurl error code; `0` if a protocol-only error occurred
+- `message`: the libcurl error message indicating what went wrong
+- `response`: response object capturing what response info is available
+
+The same `RequestError` type is thrown by `download` if the request was
+successful but there was a protocol-level error indicated by a status code that
+is not in the 2xx range, in which case `code` will be zero and the `message`
+field will be the empty string. The `request` API only throws a `RequestError`
+if the libcurl error `code` is non-zero, in which case the included `response`
+object is likely to have a `status` of zero and an empty message. There are,
+however, situations where a curl-level error is thrown due to a protocol error,
+in which case both the inner and outer code and message may be of interest.
+
+### Downloader
+
+```jl
+Downloader(; [ grace::Real = 30 ])
+```
+`Downloader` objects are used to perform individual `download` operations.
+Connections, name lookups and other resources are shared within a `Downloader`.
+These connections and resources are cleaned up after a configurable grace period
+(default: 30 seconds) since anything was downloaded with it, or when it is
+garbage collected, whichever comes first. If the grace period is set to zero,
+all resources will be cleaned up immediately as soon as there are no more
+ongoing downloads in progress. If the grace period is set to `Inf` then
+resources are not cleaned up until `Downloader` is garbage collected.
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Downloads/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Downloads/docs/src/index.md
new file mode 100644
index 0000000..23628fd
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Downloads/docs/src/index.md
@@ -0,0 +1,9 @@
+# Downloads
+
+```@docs
+Downloads.download
+Downloads.request
+Downloads.Response
+Downloads.RequestError
+Downloads.Downloader
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/FileWatching/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/FileWatching/docs/src/index.md
new file mode 100644
index 0000000..3944f5d
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/FileWatching/docs/src/index.md
@@ -0,0 +1,9 @@
+# [File Events](@id lib-filewatching)
+
+```@docs
+FileWatching.poll_fd
+FileWatching.poll_file
+FileWatching.watch_file
+FileWatching.watch_folder
+FileWatching.unwatch_folder
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Future/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Future/docs/src/index.md
new file mode 100644
index 0000000..dcb1a36
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Future/docs/src/index.md
@@ -0,0 +1,9 @@
+# Future
+
+The `Future` module implements future behavior of already existing functions,
+which will replace the current version in a future release of Julia.
+
+```@docs
+Future.copy!
+Future.randjump
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/InteractiveUtils/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/InteractiveUtils/docs/src/index.md
new file mode 100644
index 0000000..a8a675e
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/InteractiveUtils/docs/src/index.md
@@ -0,0 +1,28 @@
+# Interactive Utilities
+
+```@docs
+InteractiveUtils.apropos
+InteractiveUtils.varinfo
+InteractiveUtils.versioninfo
+InteractiveUtils.methodswith
+InteractiveUtils.subtypes
+InteractiveUtils.supertypes
+InteractiveUtils.edit(::AbstractString, ::Integer)
+InteractiveUtils.edit(::Any)
+InteractiveUtils.@edit
+InteractiveUtils.define_editor
+InteractiveUtils.less(::AbstractString)
+InteractiveUtils.less(::Any)
+InteractiveUtils.@less
+InteractiveUtils.@which
+InteractiveUtils.@functionloc
+InteractiveUtils.@code_lowered
+InteractiveUtils.@code_typed
+InteractiveUtils.code_warntype
+InteractiveUtils.@code_warntype
+InteractiveUtils.code_llvm
+InteractiveUtils.@code_llvm
+InteractiveUtils.code_native
+InteractiveUtils.@code_native
+InteractiveUtils.clipboard
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/LazyArtifacts/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/LazyArtifacts/docs/src/index.md
new file mode 100644
index 0000000..9de6b21
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/LazyArtifacts/docs/src/index.md
@@ -0,0 +1,10 @@
+# Lazy Artifacts
+
+```@meta
+DocTestSetup = :(using LazyArtifacts)
+```
+
+In order for a package to download artifacts lazily, `LazyArtifacts` must be
+explicitly listed as a dependency of that package.
+
+For further information on artifacts, see [Artifacts](@ref).
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/LibCURL/LICENSE.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/LibCURL/LICENSE.md
new file mode 100644
index 0000000..b6f1d7e
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/LibCURL/LICENSE.md
@@ -0,0 +1,23 @@
+LibCURL.jl is licensed under the MIT "Expat" License:
+
+> Copyright (c) 2013: JuliaWeb contributors
+>
+> Permission is hereby granted, free of charge, to any person obtaining
+> a copy of this software and associated documentation files (the
+> "Software"), to deal in the Software without restriction, including
+> without limitation the rights to use, copy, modify, merge, publish,
+> distribute, sublicense, and/or sell copies of the Software, and to
+> permit persons to whom the Software is furnished to do so, subject to
+> the following conditions:
+>
+> The above copyright notice and this permission notice shall be
+> included in all copies or substantial portions of the Software.
+>
+> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+> NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+> LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+> OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+> WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/LibCURL/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/LibCURL/README.md
new file mode 100644
index 0000000..14b256b
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/LibCURL/README.md
@@ -0,0 +1,72 @@
+LibCURL.jl
+==========
+
+*Julia wrapper for libCURL*
+
+[![Build Status](https://travis-ci.com/JuliaWeb/LibCURL.jl.svg?branch=master)](https://travis-ci.com/JuliaWeb/LibCURL.jl)
+[![Appveyor](https://ci.appveyor.com/api/projects/status/github/JuliaWeb/LibCurl.jl?svg=true)](https://ci.appveyor.com/project/shashi/libcurl-jl)
+[![codecov.io](http://codecov.io/github/JuliaWeb/LibCURL.jl/coverage.svg?branch=master)](http://codecov.io/github/JuliaWeb/LibCURL.jl?branch=master)
+
+---
+This is a simple Julia wrapper around http://curl.haxx.se/libcurl/ generated using [Clang.jl](https://github.com/ihnorton/Clang.jl). Please see the [libcurl API documentation](https://curl.haxx.se/libcurl/c/) for help on how to use this package.
+
+### Example (fetch a URL)
+
+```julia
+using LibCURL
+
+# init a curl handle
+curl = curl_easy_init()
+
+# set the URL and request to follow redirects
+curl_easy_setopt(curl, CURLOPT_URL, "http://example.com")
+curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1)
+
+
+# setup the callback function to recv data
+function curl_write_cb(curlbuf::Ptr{Cvoid}, s::Csize_t, n::Csize_t, p_ctxt::Ptr{Cvoid})
+ sz = s * n
+ data = Array{UInt8}(undef, sz)
+
+ ccall(:memcpy, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, UInt64), data, curlbuf, sz)
+ println("recd: ", String(data))
+
+ sz::Csize_t
+end
+
+c_curl_write_cb = @cfunction(curl_write_cb, Csize_t, (Ptr{Cvoid}, Csize_t, Csize_t, Ptr{Cvoid}))
+curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, c_curl_write_cb)
+
+
+# execute the query
+res = curl_easy_perform(curl)
+println("curl url exec response : ", res)
+
+# retrieve HTTP code
+http_code = Array{Clong}(undef, 1)
+curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, http_code)
+println("httpcode : ", http_code)
+
+# release handle
+curl_easy_cleanup(curl)
+
+```
+
+### Binaries
+
+This package uses the [LibCURL_jll](https://github.com/JuliaBinaryWrappers/libCURL_jll.jl) binary package to install compiled libCURL binaries on all supported platforms. The following products are defined in the jll
+
+* `libcurl`: A `LibraryProduct` referencing the shared library
+* `curl`: An `ExecutableProduct` referencing the binary
+
+This package also uses the [MozillaCACerts_jll](https://github.com/JuliaBinaryWrappers/MozillaCACerts_jll.jl) package to supply the Mozilla CA root certificate bundle. Note that the `cacert` symbol is re-exported from this package for ease of use.
+
+* `cacert`: A `FileProduct` referencing the Mozilla CA certificate bundle
+
+### SSL certificates
+
+Making SSL/TLS connections usually needs access to a CA certificate to validate peers. The Mozilla CA bundle can be used via this package. To use this certificate bundle, set the following option:
+
+```julia
+curl_easy_setopt(curl, CURLOPT_CAINFO, LibCURL.cacert)
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/LibGit2/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/LibGit2/docs/src/index.md
new file mode 100644
index 0000000..e53a933
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/LibGit2/docs/src/index.md
@@ -0,0 +1,161 @@
+# LibGit2
+
+The LibGit2 module provides bindings to [libgit2](https://libgit2.org/), a portable C library that
+implements core functionality for the [Git](https://git-scm.com/) version control system.
+These bindings are currently used to power Julia's package manager.
+It is expected that this module will eventually be moved into a separate package.
+
+### Functionality
+
+Some of this documentation assumes some prior knowledge of the libgit2 API.
+For more information on some of the objects and methods referenced here, consult the upstream
+[libgit2 API reference](https://libgit2.org/libgit2/#v0.25.1).
+
+```@docs
+LibGit2.Buffer
+LibGit2.CheckoutOptions
+LibGit2.CloneOptions
+LibGit2.DescribeOptions
+LibGit2.DescribeFormatOptions
+LibGit2.DiffDelta
+LibGit2.DiffFile
+LibGit2.DiffOptionsStruct
+LibGit2.FetchHead
+LibGit2.FetchOptions
+LibGit2.GitAnnotated
+LibGit2.GitBlame
+LibGit2.GitBlob
+LibGit2.GitCommit
+LibGit2.GitHash
+LibGit2.GitObject
+LibGit2.GitRemote
+LibGit2.GitRemoteAnon
+LibGit2.GitRepo
+LibGit2.GitRepoExt
+LibGit2.GitRevWalker
+LibGit2.GitShortHash
+LibGit2.GitSignature
+LibGit2.GitStatus
+LibGit2.GitTag
+LibGit2.GitTree
+LibGit2.IndexEntry
+LibGit2.IndexTime
+LibGit2.BlameOptions
+LibGit2.MergeOptions
+LibGit2.ProxyOptions
+LibGit2.PushOptions
+LibGit2.RebaseOperation
+LibGit2.RebaseOptions
+LibGit2.RemoteCallbacks
+LibGit2.SignatureStruct
+LibGit2.StatusEntry
+LibGit2.StatusOptions
+LibGit2.StrArrayStruct
+LibGit2.TimeStruct
+LibGit2.add!
+LibGit2.add_fetch!
+LibGit2.add_push!
+LibGit2.addblob!
+LibGit2.author
+LibGit2.authors
+LibGit2.branch
+LibGit2.branch!
+LibGit2.checkout!
+LibGit2.clone
+LibGit2.commit
+LibGit2.committer
+LibGit2.count
+LibGit2.counthunks
+LibGit2.create_branch
+LibGit2.credentials_callback
+LibGit2.credentials_cb
+LibGit2.default_signature
+LibGit2.delete_branch
+LibGit2.diff_files
+LibGit2.entryid
+LibGit2.entrytype
+LibGit2.fetch
+LibGit2.fetchheads
+LibGit2.fetch_refspecs
+LibGit2.fetchhead_foreach_cb
+LibGit2.merge_base
+LibGit2.merge!(::LibGit2.GitRepo; ::Any...)
+LibGit2.merge!(::LibGit2.GitRepo, ::Vector{LibGit2.GitAnnotated}; ::LibGit2.MergeOptions, ::LibGit2.CheckoutOptions)
+LibGit2.merge!(::LibGit2.GitRepo, ::Vector{LibGit2.GitAnnotated}, ::Bool; ::LibGit2.MergeOptions, ::LibGit2.CheckoutOptions)
+LibGit2.ffmerge!
+LibGit2.fullname
+LibGit2.features
+LibGit2.filename
+LibGit2.filemode
+LibGit2.gitdir
+LibGit2.git_url
+LibGit2.@githash_str
+LibGit2.head
+LibGit2.head!
+LibGit2.head_oid
+LibGit2.headname
+LibGit2.init
+LibGit2.is_ancestor_of
+LibGit2.isbinary
+LibGit2.iscommit
+LibGit2.isdiff
+LibGit2.isdirty
+LibGit2.isorphan
+LibGit2.isset
+LibGit2.iszero
+LibGit2.lookup_branch
+LibGit2.map
+LibGit2.mirror_callback
+LibGit2.mirror_cb
+LibGit2.message
+LibGit2.merge_analysis
+LibGit2.name
+LibGit2.need_update
+LibGit2.objtype
+LibGit2.path
+LibGit2.peel
+LibGit2.posixpath
+LibGit2.push
+LibGit2.push!(::LibGit2.GitRevWalker, ::LibGit2.GitHash)
+LibGit2.push_head!
+LibGit2.push_refspecs
+LibGit2.raw
+LibGit2.read_tree!
+LibGit2.rebase!
+LibGit2.ref_list
+LibGit2.reftype
+LibGit2.remotes
+LibGit2.remove!
+LibGit2.reset
+LibGit2.reset!
+LibGit2.restore
+LibGit2.revcount
+LibGit2.set_remote_url
+LibGit2.shortname
+LibGit2.snapshot
+LibGit2.split_cfg_entry
+LibGit2.status
+LibGit2.stage
+LibGit2.tag_create
+LibGit2.tag_delete
+LibGit2.tag_list
+LibGit2.target
+LibGit2.toggle
+LibGit2.transact
+LibGit2.treewalk
+LibGit2.upstream
+LibGit2.update!
+LibGit2.url
+LibGit2.version
+LibGit2.with
+LibGit2.with_warn
+LibGit2.workdir
+LibGit2.GitObject(::LibGit2.GitTreeEntry)
+LibGit2.UserPasswordCredential
+LibGit2.SSHCredential
+LibGit2.isfilled
+LibGit2.CachedCredentials
+LibGit2.CredentialPayload
+LibGit2.approve
+LibGit2.reject
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Libdl/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Libdl/docs/src/index.md
new file mode 100644
index 0000000..62f9837
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Libdl/docs/src/index.md
@@ -0,0 +1,15 @@
+# Dynamic Linker
+
+```@docs
+Libdl.dlopen
+Libdl.dlopen_e
+Libdl.RTLD_NOW
+Libdl.dlsym
+Libdl.dlsym_e
+Libdl.dlclose
+Libdl.dlext
+Libdl.dllist
+Libdl.dlpath
+Libdl.find_library
+Libdl.DL_LOAD_PATH
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/LinearAlgebra/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/LinearAlgebra/docs/src/index.md
new file mode 100644
index 0000000..52e7860
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/LinearAlgebra/docs/src/index.md
@@ -0,0 +1,681 @@
+# [Linear Algebra](@id man-linalg)
+
+```@meta
+DocTestSetup = :(using LinearAlgebra, SparseArrays, SuiteSparse)
+```
+
+In addition to (and as part of) its support for multi-dimensional arrays, Julia provides native implementations
+of many common and useful linear algebra operations which can be loaded with `using LinearAlgebra`. Basic operations, such as [`tr`](@ref), [`det`](@ref),
+and [`inv`](@ref) are all supported:
+
+```jldoctest
+julia> A = [1 2 3; 4 1 6; 7 8 1]
+3×3 Matrix{Int64}:
+ 1 2 3
+ 4 1 6
+ 7 8 1
+
+julia> tr(A)
+3
+
+julia> det(A)
+104.0
+
+julia> inv(A)
+3×3 Matrix{Float64}:
+ -0.451923 0.211538 0.0865385
+ 0.365385 -0.192308 0.0576923
+ 0.240385 0.0576923 -0.0673077
+```
+
+As well as other useful operations, such as finding eigenvalues or eigenvectors:
+
+```jldoctest
+julia> A = [-4. -17.; 2. 2.]
+2×2 Matrix{Float64}:
+ -4.0 -17.0
+ 2.0 2.0
+
+julia> eigvals(A)
+2-element Vector{ComplexF64}:
+ -1.0 - 5.0im
+ -1.0 + 5.0im
+
+julia> eigvecs(A)
+2×2 Matrix{ComplexF64}:
+ 0.945905-0.0im 0.945905+0.0im
+ -0.166924+0.278207im -0.166924-0.278207im
+```
+
+In addition, Julia provides many [factorizations](@ref man-linalg-factorizations) which can be used to
+speed up problems such as linear solve or matrix exponentiation by pre-factorizing a matrix into a form
+more amenable (for performance or memory reasons) to the problem. See the documentation on [`factorize`](@ref)
+for more information. As an example:
+
+```jldoctest
+julia> A = [1.5 2 -4; 3 -1 -6; -10 2.3 4]
+3×3 Matrix{Float64}:
+ 1.5 2.0 -4.0
+ 3.0 -1.0 -6.0
+ -10.0 2.3 4.0
+
+julia> factorize(A)
+LU{Float64, Matrix{Float64}}
+L factor:
+3×3 Matrix{Float64}:
+ 1.0 0.0 0.0
+ -0.15 1.0 0.0
+ -0.3 -0.132196 1.0
+U factor:
+3×3 Matrix{Float64}:
+ -10.0 2.3 4.0
+ 0.0 2.345 -3.4
+ 0.0 0.0 -5.24947
+```
+
+Since `A` is not Hermitian, symmetric, triangular, tridiagonal, or bidiagonal, an LU factorization may be the
+best we can do. Compare with:
+
+```jldoctest
+julia> B = [1.5 2 -4; 2 -1 -3; -4 -3 5]
+3×3 Matrix{Float64}:
+ 1.5 2.0 -4.0
+ 2.0 -1.0 -3.0
+ -4.0 -3.0 5.0
+
+julia> factorize(B)
+BunchKaufman{Float64, Matrix{Float64}}
+D factor:
+3×3 Tridiagonal{Float64, Vector{Float64}}:
+ -1.64286 0.0 ⋅
+ 0.0 -2.8 0.0
+ ⋅ 0.0 5.0
+U factor:
+3×3 UnitUpperTriangular{Float64, Matrix{Float64}}:
+ 1.0 0.142857 -0.8
+ ⋅ 1.0 -0.6
+ ⋅ ⋅ 1.0
+permutation:
+3-element Vector{Int64}:
+ 1
+ 2
+ 3
+```
+
+Here, Julia was able to detect that `B` is in fact symmetric, and used a more appropriate factorization.
+Often it's possible to write more efficient code for a matrix that is known to have certain properties e.g.
+it is symmetric, or tridiagonal. Julia provides some special types so that you can "tag" matrices as having
+these properties. For instance:
+
+```jldoctest
+julia> B = [1.5 2 -4; 2 -1 -3; -4 -3 5]
+3×3 Matrix{Float64}:
+ 1.5 2.0 -4.0
+ 2.0 -1.0 -3.0
+ -4.0 -3.0 5.0
+
+julia> sB = Symmetric(B)
+3×3 Symmetric{Float64, Matrix{Float64}}:
+ 1.5 2.0 -4.0
+ 2.0 -1.0 -3.0
+ -4.0 -3.0 5.0
+```
+
+`sB` has been tagged as a matrix that's (real) symmetric, so for later operations we might perform on it,
+such as eigenfactorization or computing matrix-vector products, efficiencies can be found by only referencing
+half of it. For example:
+
+```jldoctest
+julia> B = [1.5 2 -4; 2 -1 -3; -4 -3 5]
+3×3 Matrix{Float64}:
+ 1.5 2.0 -4.0
+ 2.0 -1.0 -3.0
+ -4.0 -3.0 5.0
+
+julia> sB = Symmetric(B)
+3×3 Symmetric{Float64, Matrix{Float64}}:
+ 1.5 2.0 -4.0
+ 2.0 -1.0 -3.0
+ -4.0 -3.0 5.0
+
+julia> x = [1; 2; 3]
+3-element Vector{Int64}:
+ 1
+ 2
+ 3
+
+julia> sB\x
+3-element Vector{Float64}:
+ -1.7391304347826084
+ -1.1086956521739126
+ -1.4565217391304346
+```
+The `\` operation here performs the linear solution. The left-division operator is pretty powerful and it's easy to write compact, readable code that is flexible enough to solve all sorts of systems of linear equations.
+
+## Special matrices
+
+[Matrices with special symmetries and structures](http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274)
+arise often in linear algebra and are frequently associated with various matrix factorizations.
+Julia features a rich collection of special matrix types, which allow for fast computation with
+specialized routines that are specially developed for particular matrix types.
+
+The following tables summarize the types of special matrices that have been implemented in Julia,
+as well as whether hooks to various optimized methods for them in LAPACK are available.
+
+| Type | Description |
+|:----------------------------- |:--------------------------------------------------------------------------------------------- |
+| [`Symmetric`](@ref) | [Symmetric matrix](https://en.wikipedia.org/wiki/Symmetric_matrix) |
+| [`Hermitian`](@ref) | [Hermitian matrix](https://en.wikipedia.org/wiki/Hermitian_matrix) |
+| [`UpperTriangular`](@ref) | Upper [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix) |
+| [`UnitUpperTriangular`](@ref) | Upper [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix) with unit diagonal |
+| [`LowerTriangular`](@ref) | Lower [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix) | |
+| [`UnitLowerTriangular`](@ref) | Lower [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix) with unit diagonal |
+| [`UpperHessenberg`](@ref) | Upper [Hessenberg matrix](https://en.wikipedia.org/wiki/Hessenberg_matrix)
+| [`Tridiagonal`](@ref) | [Tridiagonal matrix](https://en.wikipedia.org/wiki/Tridiagonal_matrix) |
+| [`SymTridiagonal`](@ref) | Symmetric tridiagonal matrix |
+| [`Bidiagonal`](@ref) | Upper/lower [bidiagonal matrix](https://en.wikipedia.org/wiki/Bidiagonal_matrix) |
+| [`Diagonal`](@ref) | [Diagonal matrix](https://en.wikipedia.org/wiki/Diagonal_matrix) |
+| [`UniformScaling`](@ref) | [Uniform scaling operator](https://en.wikipedia.org/wiki/Uniform_scaling) |
+
+### Elementary operations
+
+| Matrix type | `+` | `-` | `*` | `\` | Other functions with optimized methods |
+|:----------------------------- |:--- |:--- |:--- |:--- |:----------------------------------------------------------- |
+| [`Symmetric`](@ref) | | | | MV | [`inv`](@ref), [`sqrt`](@ref), [`exp`](@ref) |
+| [`Hermitian`](@ref) | | | | MV | [`inv`](@ref), [`sqrt`](@ref), [`exp`](@ref) |
+| [`UpperTriangular`](@ref) | | | MV | MV | [`inv`](@ref), [`det`](@ref) |
+| [`UnitUpperTriangular`](@ref) | | | MV | MV | [`inv`](@ref), [`det`](@ref) |
+| [`LowerTriangular`](@ref) | | | MV | MV | [`inv`](@ref), [`det`](@ref) |
+| [`UnitLowerTriangular`](@ref) | | | MV | MV | [`inv`](@ref), [`det`](@ref) |
+| [`UpperHessenberg`](@ref) | | | | MM | [`inv`](@ref), [`det`](@ref) |
+| [`SymTridiagonal`](@ref) | M | M | MS | MV | [`eigmax`](@ref), [`eigmin`](@ref) |
+| [`Tridiagonal`](@ref) | M | M | MS | MV | |
+| [`Bidiagonal`](@ref) | M | M | MS | MV | |
+| [`Diagonal`](@ref) | M | M | MV | MV | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref), [`/`](@ref) |
+| [`UniformScaling`](@ref) | M | M | MVS | MVS | [`/`](@ref) |
+
+Legend:
+
+| Key | Description |
+|:---------- |:------------------------------------------------------------- |
+| M (matrix) | An optimized method for matrix-matrix operations is available |
+| V (vector) | An optimized method for matrix-vector operations is available |
+| S (scalar) | An optimized method for matrix-scalar operations is available |
+
+### Matrix factorizations
+
+| Matrix type | LAPACK | [`eigen`](@ref) | [`eigvals`](@ref) | [`eigvecs`](@ref) | [`svd`](@ref) | [`svdvals`](@ref) |
+|:----------------------------- |:------ |:------------- |:----------------- |:----------------- |:------------- |:----------------- |
+| [`Symmetric`](@ref) | SY | | ARI | | | |
+| [`Hermitian`](@ref) | HE | | ARI | | | |
+| [`UpperTriangular`](@ref) | TR | A | A | A | | |
+| [`UnitUpperTriangular`](@ref) | TR | A | A | A | | |
+| [`LowerTriangular`](@ref) | TR | A | A | A | | |
+| [`UnitLowerTriangular`](@ref) | TR | A | A | A | | |
+| [`SymTridiagonal`](@ref) | ST | A | ARI | AV | | |
+| [`Tridiagonal`](@ref) | GT | | | | | |
+| [`Bidiagonal`](@ref) | BD | | | | A | A |
+| [`Diagonal`](@ref) | DI | | A | | | |
+
+Legend:
+
+| Key | Description | Example |
+|:------------ |:------------------------------------------------------------------------------------------------------------------------------- |:-------------------- |
+| A (all) | An optimized method to find all the characteristic values and/or vectors is available | e.g. `eigvals(M)` |
+| R (range) | An optimized method to find the `il`th through the `ih`th characteristic values are available | `eigvals(M, il, ih)` |
+| I (interval) | An optimized method to find the characteristic values in the interval [`vl`, `vh`] is available | `eigvals(M, vl, vh)` |
+| V (vectors) | An optimized method to find the characteristic vectors corresponding to the characteristic values `x=[x1, x2,...]` is available | `eigvecs(M, x)` |
+
+### The uniform scaling operator
+
+A [`UniformScaling`](@ref) operator represents a scalar times the identity operator, `λ*I`. The identity
+operator `I` is defined as a constant and is an instance of `UniformScaling`. The size of these
+operators are generic and match the other matrix in the binary operations [`+`](@ref), [`-`](@ref),
+[`*`](@ref) and [`\`](@ref). For `A+I` and `A-I` this means that `A` must be square. Multiplication
+with the identity operator `I` is a noop (except for checking that the scaling factor is one)
+and therefore almost without overhead.
+
+To see the `UniformScaling` operator in action:
+
+```jldoctest
+julia> U = UniformScaling(2);
+
+julia> a = [1 2; 3 4]
+2×2 Matrix{Int64}:
+ 1 2
+ 3 4
+
+julia> a + U
+2×2 Matrix{Int64}:
+ 3 2
+ 3 6
+
+julia> a * U
+2×2 Matrix{Int64}:
+ 2 4
+ 6 8
+
+julia> [a U]
+2×4 Matrix{Int64}:
+ 1 2 2 0
+ 3 4 0 2
+
+julia> b = [1 2 3; 4 5 6]
+2×3 Matrix{Int64}:
+ 1 2 3
+ 4 5 6
+
+julia> b - U
+ERROR: DimensionMismatch("matrix is not square: dimensions are (2, 3)")
+Stacktrace:
+[...]
+```
+
+If you need to solve many systems of the form `(A+μI)x = b` for the same `A` and different `μ`, it might be beneficial
+to first compute the Hessenberg factorization `F` of `A` via the [`hessenberg`](@ref) function.
+Given `F`, Julia employs an efficient algorithm for `(F+μ*I) \ b` (equivalent to `(A+μ*I)x \ b`) and related
+operations like determinants.
+
+
+## [Matrix factorizations](@id man-linalg-factorizations)
+
+[Matrix factorizations (a.k.a. matrix decompositions)](https://en.wikipedia.org/wiki/Matrix_decomposition)
+compute the factorization of a matrix into a product of matrices, and are one of the central concepts
+in linear algebra.
+
+The following table summarizes the types of matrix factorizations that have been implemented in
+Julia. Details of their associated methods can be found in the [Standard functions](@ref) section
+of the Linear Algebra documentation.
+
+| Type | Description |
+|:------------------ |:-------------------------------------------------------------------------------------------------------------- |
+| `BunchKaufman` | Bunch-Kaufman factorization |
+| `Cholesky` | [Cholesky factorization](https://en.wikipedia.org/wiki/Cholesky_decomposition) |
+| `CholeskyPivoted` | [Pivoted](https://en.wikipedia.org/wiki/Pivot_element) Cholesky factorization |
+| `LDLt` | [LDL(T) factorization](https://en.wikipedia.org/wiki/Cholesky_decomposition#LDL_decomposition) |
+| `LU` | [LU factorization](https://en.wikipedia.org/wiki/LU_decomposition) |
+| `QR` | [QR factorization](https://en.wikipedia.org/wiki/QR_decomposition) |
+| `QRCompactWY` | Compact WY form of the QR factorization |
+| `QRPivoted` | Pivoted [QR factorization](https://en.wikipedia.org/wiki/QR_decomposition) |
+| `LQ` | [QR factorization](https://en.wikipedia.org/wiki/QR_decomposition) of `transpose(A)` |
+| `Hessenberg` | [Hessenberg decomposition](http://mathworld.wolfram.com/HessenbergDecomposition.html) |
+| `Eigen` | [Spectral decomposition](https://en.wikipedia.org/wiki/Eigendecomposition_of_a_matrix) |
+| `GeneralizedEigen` | [Generalized spectral decomposition](https://en.wikipedia.org/wiki/Eigendecomposition_of_a_matrix#Generalized_eigenvalue_problem) |
+| `SVD` | [Singular value decomposition](https://en.wikipedia.org/wiki/Singular_value_decomposition) |
+| `GeneralizedSVD` | [Generalized SVD](https://en.wikipedia.org/wiki/Generalized_singular_value_decomposition#Higher_order_version) |
+| `Schur` | [Schur decomposition](https://en.wikipedia.org/wiki/Schur_decomposition) |
+| `GeneralizedSchur` | [Generalized Schur decomposition](https://en.wikipedia.org/wiki/Schur_decomposition#Generalized_Schur_decomposition) |
+
+## Standard functions
+
+Linear algebra functions in Julia are largely implemented by calling functions from [LAPACK](http://www.netlib.org/lapack/). Sparse matrix factorizations call functions from [SuiteSparse](http://suitesparse.com). Other sparse solvers are available as Julia packages.
+
+```@docs
+Base.:*(::AbstractMatrix, ::AbstractMatrix)
+Base.:\(::AbstractMatrix, ::AbstractVecOrMat)
+LinearAlgebra.SingularException
+LinearAlgebra.PosDefException
+LinearAlgebra.ZeroPivotException
+LinearAlgebra.dot
+LinearAlgebra.dot(::Any, ::Any, ::Any)
+LinearAlgebra.cross
+LinearAlgebra.factorize
+LinearAlgebra.Diagonal
+LinearAlgebra.Bidiagonal
+LinearAlgebra.SymTridiagonal
+LinearAlgebra.Tridiagonal
+LinearAlgebra.Symmetric
+LinearAlgebra.Hermitian
+LinearAlgebra.LowerTriangular
+LinearAlgebra.UpperTriangular
+LinearAlgebra.UnitLowerTriangular
+LinearAlgebra.UnitUpperTriangular
+LinearAlgebra.UpperHessenberg
+LinearAlgebra.UniformScaling
+LinearAlgebra.I
+LinearAlgebra.UniformScaling(::Integer)
+LinearAlgebra.Factorization
+LinearAlgebra.LU
+LinearAlgebra.lu
+LinearAlgebra.lu!
+LinearAlgebra.Cholesky
+LinearAlgebra.CholeskyPivoted
+LinearAlgebra.cholesky
+LinearAlgebra.cholesky!
+LinearAlgebra.lowrankupdate
+LinearAlgebra.lowrankdowndate
+LinearAlgebra.lowrankupdate!
+LinearAlgebra.lowrankdowndate!
+LinearAlgebra.LDLt
+LinearAlgebra.ldlt
+LinearAlgebra.ldlt!
+LinearAlgebra.QR
+LinearAlgebra.QRCompactWY
+LinearAlgebra.QRPivoted
+LinearAlgebra.qr
+LinearAlgebra.qr!
+LinearAlgebra.LQ
+LinearAlgebra.lq
+LinearAlgebra.lq!
+LinearAlgebra.BunchKaufman
+LinearAlgebra.bunchkaufman
+LinearAlgebra.bunchkaufman!
+LinearAlgebra.Eigen
+LinearAlgebra.GeneralizedEigen
+LinearAlgebra.eigvals
+LinearAlgebra.eigvals!
+LinearAlgebra.eigmax
+LinearAlgebra.eigmin
+LinearAlgebra.eigvecs
+LinearAlgebra.eigen
+LinearAlgebra.eigen!
+LinearAlgebra.Hessenberg
+LinearAlgebra.hessenberg
+LinearAlgebra.hessenberg!
+LinearAlgebra.Schur
+LinearAlgebra.GeneralizedSchur
+LinearAlgebra.schur
+LinearAlgebra.schur!
+LinearAlgebra.ordschur
+LinearAlgebra.ordschur!
+LinearAlgebra.SVD
+LinearAlgebra.GeneralizedSVD
+LinearAlgebra.svd
+LinearAlgebra.svd!
+LinearAlgebra.svdvals
+LinearAlgebra.svdvals!
+LinearAlgebra.Givens
+LinearAlgebra.givens
+LinearAlgebra.triu
+LinearAlgebra.triu!
+LinearAlgebra.tril
+LinearAlgebra.tril!
+LinearAlgebra.diagind
+LinearAlgebra.diag
+LinearAlgebra.diagm
+LinearAlgebra.rank
+LinearAlgebra.norm
+LinearAlgebra.opnorm
+LinearAlgebra.normalize!
+LinearAlgebra.normalize
+LinearAlgebra.cond
+LinearAlgebra.condskeel
+LinearAlgebra.tr
+LinearAlgebra.det
+LinearAlgebra.logdet
+LinearAlgebra.logabsdet
+Base.inv(::AbstractMatrix)
+LinearAlgebra.pinv
+LinearAlgebra.nullspace
+Base.kron
+Base.kron!
+LinearAlgebra.exp(::StridedMatrix{<:LinearAlgebra.BlasFloat})
+Base.:^(::AbstractMatrix, ::Number)
+Base.:^(::Number, ::AbstractMatrix)
+LinearAlgebra.log(::StridedMatrix)
+LinearAlgebra.sqrt(::StridedMatrix{<:Real})
+LinearAlgebra.cos(::StridedMatrix{<:Real})
+LinearAlgebra.sin(::StridedMatrix{<:Real})
+LinearAlgebra.sincos(::StridedMatrix{<:Real})
+LinearAlgebra.tan(::StridedMatrix{<:Real})
+LinearAlgebra.sec(::StridedMatrix)
+LinearAlgebra.csc(::StridedMatrix)
+LinearAlgebra.cot(::StridedMatrix)
+LinearAlgebra.cosh(::StridedMatrix)
+LinearAlgebra.sinh(::StridedMatrix)
+LinearAlgebra.tanh(::StridedMatrix)
+LinearAlgebra.sech(::StridedMatrix)
+LinearAlgebra.csch(::StridedMatrix)
+LinearAlgebra.coth(::StridedMatrix)
+LinearAlgebra.acos(::StridedMatrix)
+LinearAlgebra.asin(::StridedMatrix)
+LinearAlgebra.atan(::StridedMatrix)
+LinearAlgebra.asec(::StridedMatrix)
+LinearAlgebra.acsc(::StridedMatrix)
+LinearAlgebra.acot(::StridedMatrix)
+LinearAlgebra.acosh(::StridedMatrix)
+LinearAlgebra.asinh(::StridedMatrix)
+LinearAlgebra.atanh(::StridedMatrix)
+LinearAlgebra.asech(::StridedMatrix)
+LinearAlgebra.acsch(::StridedMatrix)
+LinearAlgebra.acoth(::StridedMatrix)
+LinearAlgebra.lyap
+LinearAlgebra.sylvester
+LinearAlgebra.issuccess
+LinearAlgebra.issymmetric
+LinearAlgebra.isposdef
+LinearAlgebra.isposdef!
+LinearAlgebra.istril
+LinearAlgebra.istriu
+LinearAlgebra.isdiag
+LinearAlgebra.ishermitian
+Base.transpose
+LinearAlgebra.transpose!
+LinearAlgebra.Transpose
+Base.adjoint
+LinearAlgebra.adjoint!
+LinearAlgebra.Adjoint
+Base.copy(::Union{Transpose,Adjoint})
+LinearAlgebra.stride1
+LinearAlgebra.checksquare
+LinearAlgebra.peakflops
+```
+
+## Low-level matrix operations
+
+In many cases there are in-place versions of matrix operations that allow you to supply
+a pre-allocated output vector or matrix. This is useful when optimizing critical code in order
+to avoid the overhead of repeated allocations. These in-place operations are suffixed with `!`
+below (e.g. `mul!`) according to the usual Julia convention.
+
+```@docs
+LinearAlgebra.mul!
+LinearAlgebra.lmul!
+LinearAlgebra.rmul!
+LinearAlgebra.ldiv!
+LinearAlgebra.rdiv!
+```
+
+## BLAS functions
+
+In Julia (as in much of scientific computation), dense linear-algebra operations are based on
+the [LAPACK library](http://www.netlib.org/lapack/), which in turn is built on top of basic linear-algebra
+building-blocks known as the [BLAS](http://www.netlib.org/blas/). There are highly optimized
+implementations of BLAS available for every computer architecture, and sometimes in high-performance
+linear algebra routines it is useful to call the BLAS functions directly.
+
+`LinearAlgebra.BLAS` provides wrappers for some of the BLAS functions. Those BLAS functions
+that overwrite one of the input arrays have names ending in `'!'`. Usually, a BLAS function has
+four methods defined, for [`Float64`](@ref), [`Float32`](@ref), `ComplexF64`, and `ComplexF32` arrays.
+
+### [BLAS character arguments](@id stdlib-blas-chars)
+Many BLAS functions accept arguments that determine whether to transpose an argument (`trans`),
+which triangle of a matrix to reference (`uplo` or `ul`),
+whether the diagonal of a triangular matrix can be assumed to
+be all ones (`dA`) or which side of a matrix multiplication
+the input argument belongs on (`side`). The possibilities are:
+
+#### [Multiplication order](@id stdlib-blas-side)
+| `side` | Meaning |
+|:-------|:--------------------------------------------------------------------|
+| `'L'` | The argument goes on the *left* side of a matrix-matrix operation. |
+| `'R'` | The argument goes on the *right* side of a matrix-matrix operation. |
+
+#### [Triangle referencing](@id stdlib-blas-uplo)
+| `uplo`/`ul` | Meaning |
+|:------------|:------------------------------------------------------|
+| `'U'` | Only the *upper* triangle of the matrix will be used. |
+| `'L'` | Only the *lower* triangle of the matrix will be used. |
+
+#### [Transposition operation](@id stdlib-blas-trans)
+| `trans`/`tX` | Meaning |
+|:-------------|:--------------------------------------------------------|
+| `'N'` | The input matrix `X` is not transposed or conjugated. |
+| `'T'` | The input matrix `X` will be transposed. |
+| `'C'` | The input matrix `X` will be conjugated and transposed. |
+
+#### [Unit diagonal](@id stdlib-blas-diag)
+| `diag`/`dX` | Meaning |
+|:------------|:----------------------------------------------------------|
+| `'N'` | The diagonal values of the matrix `X` will be read. |
+| `'U'` | The diagonal of the matrix `X` is assumed to be all ones. |
+
+```@docs
+LinearAlgebra.BLAS
+LinearAlgebra.BLAS.dot
+LinearAlgebra.BLAS.dotu
+LinearAlgebra.BLAS.dotc
+LinearAlgebra.BLAS.blascopy!
+LinearAlgebra.BLAS.nrm2
+LinearAlgebra.BLAS.asum
+LinearAlgebra.axpy!
+LinearAlgebra.axpby!
+LinearAlgebra.BLAS.scal!
+LinearAlgebra.BLAS.scal
+LinearAlgebra.BLAS.iamax
+LinearAlgebra.BLAS.ger!
+LinearAlgebra.BLAS.syr!
+LinearAlgebra.BLAS.syrk!
+LinearAlgebra.BLAS.syrk
+LinearAlgebra.BLAS.syr2k!
+LinearAlgebra.BLAS.syr2k
+LinearAlgebra.BLAS.her!
+LinearAlgebra.BLAS.herk!
+LinearAlgebra.BLAS.herk
+LinearAlgebra.BLAS.her2k!
+LinearAlgebra.BLAS.her2k
+LinearAlgebra.BLAS.gbmv!
+LinearAlgebra.BLAS.gbmv
+LinearAlgebra.BLAS.sbmv!
+LinearAlgebra.BLAS.sbmv(::Any, ::Any, ::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.sbmv(::Any, ::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.gemm!
+LinearAlgebra.BLAS.gemm(::Any, ::Any, ::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.gemm(::Any, ::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.gemv!
+LinearAlgebra.BLAS.gemv(::Any, ::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.gemv(::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.symm!
+LinearAlgebra.BLAS.symm(::Any, ::Any, ::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.symm(::Any, ::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.symv!
+LinearAlgebra.BLAS.symv(::Any, ::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.symv(::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.hemm!
+LinearAlgebra.BLAS.hemm(::Any, ::Any, ::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.hemm(::Any, ::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.hemv!
+LinearAlgebra.BLAS.hemv(::Any, ::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.hemv(::Any, ::Any, ::Any)
+LinearAlgebra.BLAS.trmm!
+LinearAlgebra.BLAS.trmm
+LinearAlgebra.BLAS.trsm!
+LinearAlgebra.BLAS.trsm
+LinearAlgebra.BLAS.trmv!
+LinearAlgebra.BLAS.trmv
+LinearAlgebra.BLAS.trsv!
+LinearAlgebra.BLAS.trsv
+LinearAlgebra.BLAS.set_num_threads
+```
+
+## LAPACK functions
+
+`LinearAlgebra.LAPACK` provides wrappers for some of the LAPACK functions for linear algebra.
+ Those functions that overwrite one of the input arrays have names ending in `'!'`.
+
+Usually a function has 4 methods defined, one each for [`Float64`](@ref), [`Float32`](@ref),
+`ComplexF64` and `ComplexF32` arrays.
+
+Note that the LAPACK API provided by Julia can and will change in the future. Since this API is
+not user-facing, there is no commitment to support/deprecate this specific set of functions in
+future releases.
+
+```@docs
+LinearAlgebra.LAPACK
+LinearAlgebra.LAPACK.gbtrf!
+LinearAlgebra.LAPACK.gbtrs!
+LinearAlgebra.LAPACK.gebal!
+LinearAlgebra.LAPACK.gebak!
+LinearAlgebra.LAPACK.gebrd!
+LinearAlgebra.LAPACK.gelqf!
+LinearAlgebra.LAPACK.geqlf!
+LinearAlgebra.LAPACK.geqrf!
+LinearAlgebra.LAPACK.geqp3!
+LinearAlgebra.LAPACK.gerqf!
+LinearAlgebra.LAPACK.geqrt!
+LinearAlgebra.LAPACK.geqrt3!
+LinearAlgebra.LAPACK.getrf!
+LinearAlgebra.LAPACK.tzrzf!
+LinearAlgebra.LAPACK.ormrz!
+LinearAlgebra.LAPACK.gels!
+LinearAlgebra.LAPACK.gesv!
+LinearAlgebra.LAPACK.getrs!
+LinearAlgebra.LAPACK.getri!
+LinearAlgebra.LAPACK.gesvx!
+LinearAlgebra.LAPACK.gelsd!
+LinearAlgebra.LAPACK.gelsy!
+LinearAlgebra.LAPACK.gglse!
+LinearAlgebra.LAPACK.geev!
+LinearAlgebra.LAPACK.gesdd!
+LinearAlgebra.LAPACK.gesvd!
+LinearAlgebra.LAPACK.ggsvd!
+LinearAlgebra.LAPACK.ggsvd3!
+LinearAlgebra.LAPACK.geevx!
+LinearAlgebra.LAPACK.ggev!
+LinearAlgebra.LAPACK.gtsv!
+LinearAlgebra.LAPACK.gttrf!
+LinearAlgebra.LAPACK.gttrs!
+LinearAlgebra.LAPACK.orglq!
+LinearAlgebra.LAPACK.orgqr!
+LinearAlgebra.LAPACK.orgql!
+LinearAlgebra.LAPACK.orgrq!
+LinearAlgebra.LAPACK.ormlq!
+LinearAlgebra.LAPACK.ormqr!
+LinearAlgebra.LAPACK.ormql!
+LinearAlgebra.LAPACK.ormrq!
+LinearAlgebra.LAPACK.gemqrt!
+LinearAlgebra.LAPACK.posv!
+LinearAlgebra.LAPACK.potrf!
+LinearAlgebra.LAPACK.potri!
+LinearAlgebra.LAPACK.potrs!
+LinearAlgebra.LAPACK.pstrf!
+LinearAlgebra.LAPACK.ptsv!
+LinearAlgebra.LAPACK.pttrf!
+LinearAlgebra.LAPACK.pttrs!
+LinearAlgebra.LAPACK.trtri!
+LinearAlgebra.LAPACK.trtrs!
+LinearAlgebra.LAPACK.trcon!
+LinearAlgebra.LAPACK.trevc!
+LinearAlgebra.LAPACK.trrfs!
+LinearAlgebra.LAPACK.stev!
+LinearAlgebra.LAPACK.stebz!
+LinearAlgebra.LAPACK.stegr!
+LinearAlgebra.LAPACK.stein!
+LinearAlgebra.LAPACK.syconv!
+LinearAlgebra.LAPACK.sysv!
+LinearAlgebra.LAPACK.sytrf!
+LinearAlgebra.LAPACK.sytri!
+LinearAlgebra.LAPACK.sytrs!
+LinearAlgebra.LAPACK.hesv!
+LinearAlgebra.LAPACK.hetrf!
+LinearAlgebra.LAPACK.hetri!
+LinearAlgebra.LAPACK.hetrs!
+LinearAlgebra.LAPACK.syev!
+LinearAlgebra.LAPACK.syevr!
+LinearAlgebra.LAPACK.sygvd!
+LinearAlgebra.LAPACK.bdsqr!
+LinearAlgebra.LAPACK.bdsdc!
+LinearAlgebra.LAPACK.gecon!
+LinearAlgebra.LAPACK.gehrd!
+LinearAlgebra.LAPACK.orghr!
+LinearAlgebra.LAPACK.gees!
+LinearAlgebra.LAPACK.gges!
+LinearAlgebra.LAPACK.trexc!
+LinearAlgebra.LAPACK.trsen!
+LinearAlgebra.LAPACK.tgsen!
+LinearAlgebra.LAPACK.trsyl!
+```
+
+```@meta
+DocTestSetup = nothing
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Logging/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Logging/docs/src/index.md
new file mode 100644
index 0000000..e1d6022
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Logging/docs/src/index.md
@@ -0,0 +1,311 @@
+# Logging
+
+The [`Logging`](@ref Logging.Logging) module provides a way to record the history and progress of a
+computation as a log of events. Events are created by inserting a logging
+statement into the source code, for example:
+
+```julia
+@warn "Abandon printf debugging, all ye who enter here!"
+┌ Warning: Abandon printf debugging, all ye who enter here!
+└ @ Main REPL[1]:1
+```
+
+The system provides several advantages over peppering your source code with
+calls to `println()`. First, it allows you to control the visibility and
+presentation of messages without editing the source code. For example, in
+contrast to the `@warn` above
+
+```julia
+@debug "The sum of some values $(sum(rand(100)))"
+```
+
+will produce no output by default. Furthermore, it's very cheap to leave debug
+statements like this in the source code because the system avoids evaluating
+the message if it would later be ignored. In this case `sum(rand(100))` and
+the associated string processing will never be executed unless debug logging is
+enabled.
+
+Second, the logging tools allow you to attach arbitrary data to each event as a
+set of key--value pairs. This allows you to capture local variables and other
+program state for later analysis. For example, to attach the local array
+variable `A` and the sum of a vector `v` as the key `s` you can use
+
+```jldoctest
+A = ones(Int, 4, 4)
+v = ones(100)
+@info "Some variables" A s=sum(v)
+
+# output
+┌ Info: Some variables
+│ A =
+│ 4×4 Matrix{Int64}:
+│ 1 1 1 1
+│ 1 1 1 1
+│ 1 1 1 1
+│ 1 1 1 1
+└ s = 100.0
+```
+
+All of the logging macros `@debug`, `@info`, `@warn` and `@error` share common
+features that are described in detail in the documentation for the more
+general macro [`@logmsg`](@ref).
+
+## Log event structure
+
+Each event generates several pieces of data, some provided by the user and some
+automatically extracted. Let's examine the user-defined data first:
+
+* The *log level* is a broad category for the message that is used for early
+ filtering. There are several standard levels of type [`LogLevel`](@ref);
+ user-defined levels are also possible.
+ Each is distinct in purpose:
+ - `Debug` is information intended for the developer of the program.
+ These events are disabled by default.
+ - `Info` is for general information to the user.
+ Think of it as an alternative to using `println` directly.
+ - `Warn` means something is wrong and action is likely required
+ but that for now the program is still working.
+ - `Error` means something is wrong and it is unlikely to be recovered,
+ at least by this part of the code.
+ Often this log-level is unneeded as throwing an exception can convey
+ all the required information.
+
+* The *message* is an object describing the event. By convention
+ `AbstractString`s passed as messages are assumed to be in markdown format.
+ Other types will be displayed using `print(io, obj)` or `string(obj)` for
+ text-based output and possibly `show(io,mime,obj)` for other multimedia
+ displays used in the installed logger.
+* Optional *key--value pairs* allow arbitrary data to be attached to each event.
+ Some keys have conventional meaning that can affect the way an event is
+ interpreted (see [`@logmsg`](@ref)).
+
+The system also generates some standard information for each event:
+
+* The `module` in which the logging macro was expanded.
+* The `file` and `line` where the logging macro occurs in the source code.
+* A message `id` that is a unique, fixed identifier for the *source code
+ statement* where the logging macro appears. This identifier is designed to be
+ fairly stable even if the source code of the file changes, as long as the
+ logging statement itself remains the same.
+* A `group` for the event, which is set to the base name of the file by default,
+ without extension. This can be used to group messages into categories more
+ finely than the log level (for example, all deprecation warnings have group
+ `:depwarn`), or into logical groupings across or within modules.
+
+Notice that some useful information such as the event time is not included by
+default. This is because such information can be expensive to extract and is
+also *dynamically* available to the current logger. It's simple to define a
+[custom logger](@ref AbstractLogger-interface) to augment event data with the
+time, backtrace, values of global variables and other useful information as
+required.
+
+
+## Processing log events
+
+As you can see in the examples, logging statements make no mention of
+where log events go or how they are processed. This is a key design feature
+that makes the system composable and natural for concurrent use. It does this
+by separating two different concerns:
+
+* *Creating* log events is the concern of the module author who needs to
+ decide where events are triggered and which information to include.
+* *Processing* of log events — that is, display, filtering, aggregation and
+ recording — is the concern of the application author who needs to bring
+ multiple modules together into a cooperating application.
+
+### Loggers
+
+Processing of events is performed by a *logger*, which is the first piece of
+user configurable code to see the event. All loggers must be subtypes of
+[`AbstractLogger`](@ref).
+
+When an event is triggered, the appropriate logger is found by looking for a
+task-local logger with the global logger as fallback. The idea here is that
+the application code knows how log events should be processed and exists
+somewhere at the top of the call stack. So we should look up through the call
+stack to discover the logger — that is, the logger should be *dynamically
+scoped*. (This is a point of contrast with logging frameworks where the
+logger is *lexically scoped*; provided explicitly by the module author or as a
+simple global variable. In such a system it's awkward to control logging while
+composing functionality from multiple modules.)
+
+The global logger may be set with [`global_logger`](@ref), and task-local
+loggers controlled using [`with_logger`](@ref). Newly spawned tasks inherit
+the logger of the parent task.
+
+There are three logger types provided by the library. [`ConsoleLogger`](@ref)
+is the default logger you see when starting the REPL. It displays events in a
+readable text format and tries to give simple but user friendly control over
+formatting and filtering. [`NullLogger`](@ref) is a convenient way to drop all
+messages where necessary; it is the logging equivalent of the [`devnull`](@ref)
+stream. [`SimpleLogger`](@ref) is a very simplistic text formatting logger,
+mainly useful for debugging the logging system itself.
+
+Custom loggers should come with overloads for the functions described in the
+[reference section](@ref AbstractLogger-interface).
+
+### Early filtering and message handling
+
+When an event occurs, a few steps of early filtering occur to avoid generating
+messages that will be discarded:
+
+1. The message log level is checked against a global minimum level (set via
+ [`disable_logging`](@ref)). This is a crude but extremely cheap global
+ setting.
+2. The current logger state is looked up and the message level checked against the
+ logger's cached minimum level, as found by calling [`Logging.min_enabled_level`](@ref).
+ This behavior can be overridden via environment variables (more on this later).
+3. The [`Logging.shouldlog`](@ref) function is called with the current logger, taking
+ some minimal information (level, module, group, id) which can be computed
+ statically. Most usefully, `shouldlog` is passed an event `id` which can be
+ used to discard events early based on a cached predicate.
+
+If all these checks pass, the message and key--value pairs are evaluated in full
+and passed to the current logger via the [`Logging.handle_message`](@ref) function.
+`handle_message()` may perform additional filtering as required and display the
+event to the screen, save it to a file, etc.
+
+Exceptions that occur while generating the log event are captured and logged
+by default. This prevents individual broken events from crashing the
+application, which is helpful when enabling little-used debug events in a
+production system. This behavior can be customized per logger type by
+extending [`Logging.catch_exceptions`](@ref).
+
+## Testing log events
+
+Log events are a side effect of running normal code, but you might find
+yourself wanting to test particular informational messages and warnings. The
+`Test` module provides a [`@test_logs`](@ref) macro that can be used to
+pattern match against the log event stream.
+
+## Environment variables
+
+Message filtering can be influenced through the `JULIA_DEBUG` environment
+variable, and serves as an easy way to enable debug logging for a file or
+module. For example, loading julia with `JULIA_DEBUG=loading` will activate
+`@debug` log messages in `loading.jl`:
+
+```
+$ JULIA_DEBUG=loading julia -e 'using OhMyREPL'
+┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/OhMyREPL.ji due to it containing an invalid cache header
+└ @ Base loading.jl:1328
+[ Info: Recompiling stale cache file /home/user/.julia/compiled/v0.7/OhMyREPL.ji for module OhMyREPL
+┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/Tokenize.ji due to it containing an invalid cache header
+└ @ Base loading.jl:1328
+...
+```
+
+Similarly, the environment variable can be used to enable debug logging of
+modules, such as `Pkg`, or module roots (see [`Base.moduleroot`](@ref)). To
+enable all debug logging, use the special value `all`.
+
+To turn debug logging on from the REPL, set `ENV["JULIA_DEBUG"]` to the
+name of the module of interest. Functions defined in the REPL belong to
+module `Main`; logging for them can be enabled like this:
+```julia-repl
+julia> foo() = @debug "foo"
+foo (generic function with 1 method)
+
+julia> foo()
+
+julia> ENV["JULIA_DEBUG"] = Main
+Main
+
+julia> foo()
+┌ Debug: foo
+└ @ Main REPL[1]:1
+
+```
+
+## Writing log events to a file
+
+Sometimes it can be useful to write log events to a file. Here is an example
+of how to use a task-local and global logger to write information to a text
+file:
+
+```julia-repl
+# Load the logging module
+julia> using Logging
+
+# Open a textfile for writing
+julia> io = open("log.txt", "w+")
+IOStream(<file log.txt>)
+
+# Create a simple logger
+julia> logger = SimpleLogger(io)
+SimpleLogger(IOStream(<file log.txt>), Info, Dict{Any,Int64}())
+
+# Log a task-specific message
+julia> with_logger(logger) do
+ @info("a context specific log message")
+ end
+
+# Write all buffered messages to the file
+julia> flush(io)
+
+# Set the global logger to logger
+julia> global_logger(logger)
+SimpleLogger(IOStream(<file log.txt>), Info, Dict{Any,Int64}())
+
+# This message will now also be written to the file
+julia> @info("a global log message")
+
+# Close the file
+julia> close(io)
+```
+
+
+## Reference
+
+### Logging module
+```@docs
+Logging.Logging
+```
+
+### Creating events
+
+```@docs
+Logging.@logmsg
+Logging.LogLevel
+```
+
+### [Processing events with AbstractLogger](@id AbstractLogger-interface)
+
+Event processing is controlled by overriding functions associated with
+`AbstractLogger`:
+
+| Methods to implement | | Brief description |
+|:----------------------------------- |:---------------------- |:---------------------------------------- |
+| [`Logging.handle_message`](@ref) | | Handle a log event |
+| [`Logging.shouldlog`](@ref) | | Early filtering of events |
+| [`Logging.min_enabled_level`](@ref) | | Lower bound for log level of accepted events |
+| **Optional methods** | **Default definition** | **Brief description** |
+| [`Logging.catch_exceptions`](@ref) | `true` | Catch exceptions during event evaluation |
+
+
+```@docs
+Logging.AbstractLogger
+Logging.handle_message
+Logging.shouldlog
+Logging.min_enabled_level
+Logging.catch_exceptions
+Logging.disable_logging
+```
+
+### Using Loggers
+
+Logger installation and inspection:
+
+```@docs
+Logging.global_logger
+Logging.with_logger
+Logging.current_logger
+```
+
+Loggers that are supplied with the system:
+
+```@docs
+Logging.NullLogger
+Logging.ConsoleLogger
+Logging.SimpleLogger
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Markdown/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Markdown/docs/src/index.md
new file mode 100644
index 0000000..a107929
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Markdown/docs/src/index.md
@@ -0,0 +1,398 @@
+# [Markdown](@id markdown_stdlib)
+
+This section describes Julia's markdown syntax, which is enabled by the
+Markdown standard library. The following Markdown elements are supported:
+
+## Inline elements
+
+Here "inline" refers to elements that can be found within blocks of text, i.e. paragraphs. These
+include the following elements.
+
+### Bold
+
+Surround words with two asterisks, `**`, to display the enclosed text in boldface.
+
+```
+A paragraph containing a **bold** word.
+```
+
+### Italics
+
+Surround words with one asterisk, `*`, to display the enclosed text in italics.
+
+```
+A paragraph containing an *italicized* word.
+```
+
+### Literals
+
+Surround text that should be displayed exactly as written with single backticks, ``` ` ``` .
+
+```
+A paragraph containing a `literal` word.
+```
+
+Literals should be used when writing text that refers to names of variables, functions, or other
+parts of a Julia program.
+
+!!! tip
+ To include a backtick character within literal text use three backticks rather than one to enclose
+ the text.
+
+ ```
+ A paragraph containing ``` `backtick` characters ```.
+ ```
+
+ By extension any odd number of backticks may be used to enclose a lesser number of backticks.
+
+### ``\LaTeX``
+
+Surround text that should be displayed as mathematics using ``\LaTeX`` syntax with double backticks,
+``` `` ``` .
+
+```
+A paragraph containing some ``\LaTeX`` markup.
+```
+
+!!! tip
+ As with literals in the previous section, if literal backticks need to be written within double
+ backticks use an even number greater than two. Note that if a single literal backtick needs to
+ be included within ``\LaTeX`` markup then two enclosing backticks is sufficient.
+
+!!! note
+ The `\` character should be escaped appropriately if the text is embedded in a Julia source code,
+ for example, ``` "``\\LaTeX`` syntax in a docstring." ```, since it is interpreted as a string
+ literal. Alternatively, in order to avoid escaping, it is possible to use the `raw` string macro
+ together with the `@doc` macro:
+ ```
+ @doc raw"``\LaTeX`` syntax in a docstring." functionname
+ ```
+
+### Links
+
+Links to either external or internal targets can be written using the following syntax, where
+the text enclosed in square brackets, `[ ]`, is the name of the link and the text enclosed in
+parentheses, `( )`, is the URL.
+
+```
+A paragraph containing a link to [Julia](http://www.julialang.org).
+```
+
+It's also possible to add cross-references to other documented functions/methods/variables within
+the Julia documentation itself. For example:
+
+```julia
+"""
+ tryparse(type, str; base)
+
+Like [`parse`](@ref), but returns either a value of the requested type,
+or [`nothing`](@ref) if the string does not contain a valid number.
+"""
+```
+
+This will create a link in the generated docs to the [`parse`](@ref) documentation
+(which has more information about what this function actually does), and to the
+[`nothing`](@ref) documentation. It's good to include cross references to mutating/non-mutating
+versions of a function, or to highlight a difference between two similar-seeming functions.
+
+!!! note
+ The above cross referencing is *not* a Markdown feature, and relies on
+ [Documenter.jl](https://github.com/JuliaDocs/Documenter.jl), which is
+ used to build base Julia's documentation.
+
+### Footnote references
+
+Named and numbered footnote references can be written using the following syntax. A footnote name
+must be a single alphanumeric word containing no punctuation.
+
+```
+A paragraph containing a numbered footnote [^1] and a named one [^named].
+```
+
+!!! note
+ The text associated with a footnote can be written anywhere within the same page as the footnote
+ reference. The syntax used to define the footnote text is discussed in the [Footnotes](@ref) section
+ below.
+
+## Toplevel elements
+
+The following elements can be written either at the "toplevel" of a document or within another
+"toplevel" element.
+
+### Paragraphs
+
+A paragraph is a block of plain text, possibly containing any number of inline elements defined
+in the [Inline elements](@ref) section above, with one or more blank lines above and below it.
+
+```
+This is a paragraph.
+
+And this is *another* paragraph containing some emphasized text.
+A new line, but still part of the same paragraph.
+```
+
+### Headers
+
+A document can be split up into different sections using headers. Headers use the following syntax:
+
+```julia
+# Level One
+## Level Two
+### Level Three
+#### Level Four
+##### Level Five
+###### Level Six
+```
+
+A header line can contain any inline syntax in the same way as a paragraph can.
+
+!!! tip
+ Try to avoid using too many levels of header within a single document. A heavily nested document
+ may be indicative of a need to restructure it or split it into several pages covering separate
+ topics.
+
+### Code blocks
+
+Source code can be displayed as a literal block using an indent of four spaces as shown in the
+following example.
+
+```
+This is a paragraph.
+
+ function func(x)
+ # ...
+ end
+
+Another paragraph.
+```
+
+Additionally, code blocks can be enclosed using triple backticks with an optional "language" to
+specify how a block of code should be highlighted.
+
+````
+A code block without a "language":
+
+```
+function func(x)
+ # ...
+end
+```
+
+and another one with the "language" specified as `julia`:
+
+```julia
+function func(x)
+ # ...
+end
+```
+````
+
+!!! note
+ "Fenced" code blocks, as shown in the last example, should be preferred over indented code blocks
+ since there is no way to specify what language an indented code block is written in.
+
+### Block quotes
+
+Text from external sources, such as quotations from books or websites, can be quoted using `>`
+characters prepended to each line of the quote as follows.
+
+```
+Here's a quote:
+
+> Julia is a high-level, high-performance dynamic programming language for
+> technical computing, with syntax that is familiar to users of other
+> technical computing environments.
+```
+
+Note that a single space must appear after the `>` character on each line. Quoted blocks may themselves
+contain other toplevel or inline elements.
+
+### Images
+
+The syntax for images is similar to the link syntax mentioned above. Prepending a `!` character
+to a link will display an image from the specified URL rather than a link to it.
+
+```julia
+![alternative text](link/to/image.png)
+```
+
+### Lists
+
+Unordered lists can be written by prepending each item in a list with either `*`, `+`, or `-`.
+
+```
+A list of items:
+
+ * item one
+ * item two
+ * item three
+```
+
+Note the two spaces before each `*` and the single space after each one.
+
+Lists can contain other nested toplevel elements such as lists, code blocks, or quoteblocks. A
+blank line should be left between each list item when including any toplevel elements within a
+list.
+
+```
+Another list:
+
+ * item one
+
+ * item two
+
+ ```
+ f(x) = x
+ ```
+
+ * And a sublist:
+
+ + sub-item one
+ + sub-item two
+```
+
+!!! note
+ The contents of each item in the list must line up with the first line of the item. In the above
+ example the fenced code block must be indented by four spaces to align with the `i` in `item two`.
+
+Ordered lists are written by replacing the "bullet" character, either `*`, `+`, or `-`, with a
+positive integer followed by either `.` or `)`.
+
+```
+Two ordered lists:
+
+ 1. item one
+ 2. item two
+ 3. item three
+
+ 5) item five
+ 6) item six
+ 7) item seven
+```
+
+An ordered list may start from a number other than one, as in the second list of the above example,
+where it is numbered from five. As with unordered lists, ordered lists can contain nested toplevel
+elements.
+
+### Display equations
+
+Large ``\LaTeX`` equations that do not fit inline within a paragraph may be written as display
+equations using a fenced code block with the "language" `math` as in the example below.
+
+````julia
+```math
+f(a) = \frac{1}{2\pi}\int_{0}^{2\pi} (\alpha+R\cos(\theta))d\theta
+```
+````
+
+### Footnotes
+
+This syntax is paired with the inline syntax for [Footnote references](@ref). Make sure to read
+that section as well.
+
+Footnote text is defined using the following syntax, which is similar to footnote reference syntax,
+aside from the `:` character that is appended to the footnote label.
+
+```
+[^1]: Numbered footnote text.
+
+[^note]:
+
+ Named footnote text containing several toplevel elements.
+
+ * item one
+ * item two
+ * item three
+
+ ```julia
+ function func(x)
+ # ...
+ end
+ ```
+```
+
+!!! note
+ No checks are done during parsing to make sure that all footnote references have matching footnotes.
+
+### Horizontal rules
+
+The equivalent of an `<hr>` HTML tag can be achieved using three hyphens (`---`).
+For example:
+
+```
+Text above the line.
+
+---
+
+And text below the line.
+```
+
+### Tables
+
+Basic tables can be written using the syntax described below. Note that markdown tables have limited
+features and cannot contain nested toplevel elements unlike other elements discussed above –
+only inline elements are allowed. Tables must always contain a header row with column names. Cells
+cannot span multiple rows or columns of the table.
+
+```
+| Column One | Column Two | Column Three |
+|:---------- | ---------- |:------------:|
+| Row `1` | Column `2` | |
+| *Row* 2 | **Row** 2 | Column ``3`` |
+```
+
+!!! note
+ As illustrated in the above example each column of `|` characters must be aligned vertically.
+
+ A `:` character on either end of a column's header separator (the row containing `-` characters)
+ specifies whether the row is left-aligned, right-aligned, or (when `:` appears on both ends) center-aligned.
+ Providing no `:` characters will default to right-aligning the column.
+
+### Admonitions
+
+Specially formatted blocks, known as admonitions, can be used to highlight particular remarks.
+They can be defined using the following `!!!` syntax:
+
+```
+!!! note
+
+ This is the content of the note.
+
+!!! warning "Beware!"
+
+ And this is another one.
+
+ This warning admonition has a custom title: `"Beware!"`.
+```
+
+The first word after `!!!` declares the type of the admonition.
+There are standard admonition types that should produce special styling.
+Namely (in order of decreasing severity): `danger`, `warning`, `info`/`note`, and `tip`.
+
+You can also use your own admonition types, as long as the type name only contains lowercase Latin characters (a-z).
+For example, you could have a `terminology` block like this:
+
+```
+!!! terminology "julia vs Julia"
+
+ Strictly speaking, "Julia" refers to the language,
+ and "julia" to the standard implementation.
+```
+
+However, unless the code rendering the Markdown special-cases that particular admonition type, it will get the default styling.
+
+A custom title for the box can be provided as a string (in double quotes) after the admonition type.
+If no title text is specified after the admonition type, then the type name will be used as the title (e.g. `"Note"` for the `note` admonition).
+
+Admonitions, like most other toplevel elements, can contain other toplevel elements (e.g. lists, images).
+
+## Markdown Syntax Extensions
+
+Julia's markdown supports interpolation in a very similar way to basic string literals, with the
+difference that it will store the object itself in the Markdown tree (as opposed to converting
+it to a string). When the Markdown content is rendered the usual `show` methods will be called,
+and these can be overridden as usual. This design allows the Markdown to be extended with arbitrarily
+complex features (such as references) without cluttering the basic syntax.
+
+In principle, the Markdown parser itself can also be arbitrarily extended by packages, or an entirely
+custom flavour of Markdown can be used, but this should generally be unnecessary.
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Mmap/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Mmap/docs/src/index.md
new file mode 100644
index 0000000..ada88b1
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Mmap/docs/src/index.md
@@ -0,0 +1,7 @@
+# Memory-mapped I/O
+
+```@docs
+Mmap.Anonymous
+Mmap.mmap
+Mmap.sync!
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/NetworkOptions/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/NetworkOptions/README.md
new file mode 100644
index 0000000..f8df24d
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/NetworkOptions/README.md
@@ -0,0 +1,270 @@
+# NetworkOptions
+
+[![Build Status](https://travis-ci.org/JuliaLang/NetworkOptions.jl.svg?branch=master)](https://travis-ci.org/JuliaLang/NetworkOptions.jl)
+[![Codecov](https://codecov.io/gh/JuliaLang/NetworkOptions.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaLang/NetworkOptions.jl)
+
+The `NetworkOptions` package acts as a mediator between ways of configuring
+network transport mechanisms (SSL/TLS, SSH, proxies, etc.) and Julia packages
+that provide access to transport mechanisms. This allows the a common interface
+to configuring things like TLS and SSH host verification and proxies via
+environment variables (currently) and other configuration mechanisms (in the
+future), while packages that need to configure these mechanisms can simply
+ask `NetworkOptions` what to do in specific situations without worrying about
+how that configuration is expressed.
+
+## API
+
+### ca_roots
+
+```jl
+ca_roots() :: Union{Nothing, String}
+```
+The `ca_roots()` function tells the caller where, if anywhere, to find a file or
+directory of PEM-encoded certificate authority roots. By default, on systems
+like Windows and macOS where the built-in TLS engines know how to verify hosts
+using the system's built-in certificate verification mechanism, this function
+will return `nothing`. On classic UNIX systems (excluding macOS), root
+certificates are typically stored in a file in `/etc`: the common places for the
+current UNIX system will be searched and if one of these paths exists, it will
+be returned; if none of these typical root certificate paths exist, then the
+path to the set of root certificates that are bundled with Julia is returned.
+
+The default value returned by `ca_roots()` may be overridden by setting the
+`JULIA_SSL_CA_ROOTS_PATH`, `SSL_CERT_DIR`, or `SSL_CERT_FILE` environment
+variables, in which case this function will always return the value of the first
+of these variables that is set (whether the path exists or not). If
+`JULIA_SSL_CA_ROOTS_PATH` is set to the empty string, then the other variables
+are ignored (as if unset); if the other variables are set to the empty string,
+they behave is if they are not set.
+
+### ca_roots_path
+
+```jl
+ca_roots_path() :: String
+```
+The `ca_roots_path()` function is similar to the `ca_roots()` function except
+that it always returns a path to a file or directory of PEM-encoded certificate
+authority roots. When called on a system like Windows or macOS, where system
+root certificates are not stored in the file system, it will currently return
+the path to the set of root certificates that are bundled with Julia. (In the
+future, this function may instead extract the root certificates from the system
+and save them to a file whose path would be returned.)
+
+If it is possible to configure a library that uses TLS to use the system
+certificates that is generally preferrable: i.e. it is better to use
+`ca_roots()` which returns `nothing` to indicate that the system certs should be
+used. The `ca_roots_path()` function should only be used when configuring
+libraries which _require_ a path to a file or directory for root certificates.
+
+The default value returned by `ca_roots_path()` may be overridden by setting the
+`JULIA_SSL_CA_ROOTS_PATH`, `SSL_CERT_DIR`, or `SSL_CERT_FILE` environment
+variables, in which case this function will always return the value of the first
+of these variables that is set (whether the path exists or not). If
+`JULIA_SSL_CA_ROOTS_PATH` is set to the empty string, then the other variables
+are ignored (as if unset); if the other variables are set to the empty string,
+they behave is if they are not set.
+
+### ssh_dir
+
+```jl
+ssh_dir() :: String
+```
+The `ssh_dir()` function returns the location of the directory where the `ssh`
+program keeps/looks for configuration files. By default this is `~/.ssh` but
+this can be overridden by setting the envirnoment variable `SSH_DIR`.
+
+### ssh_key_name
+
+```jl
+ssh_key_name() :: String
+```
+The `ssh_key_name()` function returns the base name of key files that SSH should
+use for when estabilishing a connection. There is usually no reason that this
+function should be called directly and libraries should generally use the
+`ssh_key_path` and `ssh_pub_key_path` functions to get full paths. If the
+environment variable `SSH_KEY_NAME` is set then this function returns that;
+otherwise it returns `id_rsa` by default.
+
+### ssh_key_path
+
+```jl
+ssh_key_path() :: String
+```
+The `ssh_key_path()` function returns the path of the SSH private key file that
+should be used for SSH connections. If the `SSH_KEY_PATH` environment variable
+is set then it will return that value. Otherwise it defaults to returning
+```jl
+joinpath(ssh_dir(), ssh_key_name())
+```
+This default value in turn depends on the `SSH_DIR` and `SSH_KEY_NAME`
+environment variables.
+
+### ssh_pub_key_path
+
+```jl
+ssh_pub_key_path() :: String
+```
+The `ssh_pub_key_path()` function returns the path of the SSH public key file
+that should be used for SSH connections. If the `SSH_PUB_KEY_PATH` environment
+variable is set then it will return that value. If that isn't set but
+`SSH_KEY_PATH` is set, it will return that path with the `.pub` suffix appended.
+If neither is set, it defaults to returning
+```jl
+joinpath(ssh_dir(), ssh_key_name() * ".pub")
+```
+This default value in turn depends on the `SSH_DIR` and `SSH_KEY_NAME`
+environment variables.
+
+### ssh_key_pass
+
+```jl
+ssh_key_pass() :: String
+```
+The `ssh_key_pass()` function returns the value of the environment variable
+`SSH_KEY_PASS` if it is set or `nothing` if it is not set. In the future, this
+may be able to find a password by other means, such as secure system storage, so
+packages that need a pawword to decrypt an SSH private key should use this API
+instead of directly checking the environment variable so that they gain such
+capabilities automatically when they are added.
+
+### ssh_known_hosts_files
+
+```jl
+ssh_known_hosts_files() :: Vector{String}
+```
+The `ssh_known_hosts_files()` function returns a vector of paths of SSH known
+hosts files that should be used when establishing the identities of remote
+servers for SSH connections. By default this function returns
+```jl
+[joinpath(ssh_dir(), "known_hosts"), bundled_known_hosts]
+```
+where `bundled_known_hosts` is the path of a copy of a known hosts file that is
+bundled with this package (containing known hosts keys for `github.com` and
+`gitlab.com`). If the environment variable `SSH_KNOWN_HOSTS_FILES` is set,
+however, then its value is split into paths on the `:` character (or on `;` on
+Windows) and this vector of paths is returned instead. If any component of this
+vector is empty, it is expanded to the default known hosts paths.
+
+Packages that use `ssh_known_hosts_files()` should ideally look for matching
+entries by comparing the host name and key types, considering the first entry in
+any of the files which matches to be the definitive identity of the host. If the
+caller cannot compare the key type (e.g. because it has been hashes) then it
+must approximate the above algorithm by looking for all matching entries for a
+host in each file: if a file has any entries for a host then one of them must
+match; the caller should only continue to search further known hosts files if
+there are no entries for the host in question in an earlier file.
+
+### ssh_known_hosts_file
+
+```jl
+ssh_known_hosts_file() :: String
+```
+The `ssh_known_hosts_file()` function returns a single path of an SSH known
+hosts file that should be used when establishing the identities of remote
+servers for SSH connections. It returns the first path returned by
+`ssh_known_hosts_files` that actually exists. Callers who can look in more than
+one known hosts file should use `ssh_known_hosts_files` instead and look for
+host matches in all the files returned as described in that function's docs.
+
+### verify_host
+
+```jl
+verify_host(url::AbstractString, [transport::AbstractString]) :: Bool
+```
+The `verify_host` function tells the caller whether the identity of a host
+should be verified when communicating over secure transports like TLS or SSH.
+The `url` argument may be:
+
+1. a proper URL staring with `proto://`
+2. an `ssh`-style bare host name or host name prefixed with `user@`
+3. an `scp`-style host as above, followed by `:` and a path location
+
+In each case the host name part is parsed out and the decision about whether to
+verify or not is made based solely on the host name, not anything else about the
+input URL. In particular, the protocol of the URL does not matter (more below).
+
+The `transport` argument indicates the kind of transport that the query is
+about. The currently known values are `SSL` (alias `TLS`) and `SSH`. If the
+transport is ommitted, the query will return `true` only if the host name should
+not be verified regardless of transport.
+
+The host name is matched against the host patterns in the relavent environment
+variables depending on whether `transport` is supplied and what its value is:
+
+- `JULIA_NO_VERIFY_HOSTS` — hosts that should not be verified for any transport
+- `JULIA_SSL_NO_VERIFY_HOSTS` — hosts that should not be verified for SSL/TLS
+- `JULIA_SSH_NO_VERIFY_HOSTS` — hosts that should not be verified for SSH
+- `JULIA_ALWAYS_VERIFY_HOSTS` — hosts that should always be verified
+
+The values of each of these variables is a comma-separated list of host name
+patterns with the following syntax — each pattern is split on `.` into parts and
+each part must one of:
+
+1. A literal domain name component consisting of one or more ASCII letter,
+ digit, hyphen or underscore (technically not part of a legal host name, but
+ sometimes used). A literal domain name component matches only itself.
+2. A `**`, which matches zero or more domain name components.
+3. A `*`, which match any one domain name component.
+
+When matching a host name against a pattern list in one of these variables, the
+host name is split on `.` into components and that sequence of words is matched
+against the pattern: a literal pattern matches exactly one host name component
+with that value; a `*` pattern matches exactly one host name component with any
+value; a `**` pattern matches any number of host name components. For example:
+
+- `**` matches any host name
+- `**.org` matches any host name in the `.org` top-level domain
+- `example.com` matches only the exact host name `example.com`
+- `*.example.com` matches `api.example.com` but not `example.com` or
+ `v1.api.example.com`
+- `**.example.com` matches any domain under `example.com`, including
+ `example.com` itself, `api.example.com` and `v1.api.example.com`
+
+#### Example scenarios
+
+Suppose you want to not verify any hosts under `safe.example.com` for all
+protocols, skip SSL host verification for just `ssl.example.com`, and skip SSH
+host verification for `ssh.example.com` and its immediate first level
+subdomains. Then you could set the following environment variable values:
+```sh
+export JULIA_NO_VERIFY_HOSTS="**.safe.example.com"
+export JULIA_SSL_NO_VERIFY_HOSTS="ssl.example.com"
+export JULIA_SSH_NO_VERIFY_HOSTS="ssh.example.com,*.ssh.example.com"
+```
+With this configuration:
+
+- `example.com` would be verified for all protocols
+- `safe.example.com`, `api.safe.example.com`, `v1.api.safe.example.com` and so
+ on would be unverified for all transports
+- `ssl.example.com` would be unverified for SSL/TLS transport
+- `sub.ssl.example.com` would be verified for all transports, including SSL/TLS
+- `ssh.example.com` and `sub.ssh.example.com` would be unverified for SSH only
+- `sub.sub.ssh.example.com` would be verified for all transports
+
+Note that the protocol of `url` need not match the transport mechanism being
+queried: the protocol of the URL is entirely discarded. The reason for this is
+that the typical usage of this utility function is to configure a library to
+enable or disable specific features like TLS host verification based on a URL.
+If the URL does not actually use the TLS transport mechanism, then it doesn't
+matter if verification for that transport is enabled or not. Moreover, different
+protocols can use the same transport: for example, `https` and `ftps` protocols
+both use TLS and `ssh`, `scp` and `sftp` protocols all use SSH.
+
+A common scenario that occur behind firewalls is for all connections to external
+systems to go through a transparent man-in-the-middle proxy: any SSL/TLS
+connection to a host under `example.com` would be internal and should have a
+valid certificate but any connection outside of `example.com` would go through
+the proxy, which uses a self-signed certificate. For such a scenario the best
+solution would be to deploy a CA root certificate to all clients, but if that's
+not possible, then configuring clients to verify hosts under `example.com` but
+not verify other SSL/TLS connections would be a viable solution. In fact, as
+long as the man-in-the-middle proxy verifies all upstream TLS connections, this
+is still secure (although not private from the proxy, of course). Such a
+configuration can be accomplished with the following exports:
+```sh
+export JULIA_ALWAYS_VERIFY_HOSTS="**.example.com"
+export JULIA_SSL_NO_VERIFY_HOSTS="**"
+```
+This configuration causes all domains under `example.com` to always be verified
+for all protocols, including SSL/TLS, while skiping host verifiction for SSL/TLS
+connections to all other hosts.
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/LICENSE.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/LICENSE.md
new file mode 100644
index 0000000..eb29596
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/LICENSE.md
@@ -0,0 +1,47 @@
+The Pkg.jl package is licensed under the MIT "Expat" License:
+
+> Copyright (c) 2017: Stefan Karpinski.
+>
+> Permission is hereby granted, free of charge, to any person obtaining a copy
+> of this software and associated documentation files (the "Software"), to deal
+> in the Software without restriction, including without limitation the rights
+> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+> copies of the Software, and to permit persons to whom the Software is
+> furnished to do so, subject to the following conditions:
+>
+> The above copyright notice and this permission notice shall be included in all
+> copies or substantial portions of the Software.
+>
+> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+> SOFTWARE.
+>
+
+The file PlatformEngines.jl is under the following license:
+
+The BinaryProvider.jl package is licensed under the MIT "Expat" License:
+
+> Copyright (c) 2017: SimonDanisch.
+>
+> Permission is hereby granted, free of charge, to any person obtaining a copy
+> of this software and associated documentation files (the "Software"), to deal
+> in the Software without restriction, including without limitation the rights
+> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+> copies of the Software, and to permit persons to whom the Software is
+> furnished to do so, subject to the following conditions:
+>
+> The above copyright notice and this permission notice shall be included in all
+> copies or substantial portions of the Software.
+>
+> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+> SOFTWARE.
+>
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/README.md
new file mode 100644
index 0000000..3389eeb
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/README.md
@@ -0,0 +1,34 @@
+# Pkg
+
+| **Documentation** | **Build Status** |
+|:-----------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------:|
+| [![][docs-v1-img]][docs-v1-url] [![][docs-dev-img]][docs-dev-url] | [![][travis-img]][travis-url] [![][appveyor-img]][appveyor-url] [![][codecov-img]][codecov-url] |
+
+Development repository for Julia's package manager,
+shipped with Julia v1.0 and above.
+
+#### Using the development version of Pkg.jl
+
+If you want to develop this package do the following steps:
+- Clone the repo anywhere.
+- In line 2 of the `Project.toml` file (the line that begins with `uuid = ...`), modify the UUID, e.g. change the `44c` to `54c`.
+- Change the current directory to the Pkg repo you just cloned and start julia with `julia --project`.
+- `import Pkg` will now load the files in the cloned repo instead of the Pkg stdlib .
+- To test your changes, simply do `include("test/runtests.jl")`.
+
+If you need to build Julia from source with a git checkout of Pkg, then instead use `make DEPS_GIT=Pkg` when building Julia. The `Pkg` repo is in `stdlib/Pkg`, and created initially with a detached `HEAD`. If you're doing this from a pre-existing Julia repository, you may need to `make clean` beforehand.
+
+[docs-dev-img]: https://img.shields.io/badge/docs-dev-blue.svg
+[docs-dev-url]: https://julialang.github.io/Pkg.jl/dev/
+
+[docs-v1-img]: https://img.shields.io/badge/docs-v1-blue.svg
+[docs-v1-url]: https://julialang.github.io/Pkg.jl/v1/
+
+[travis-img]: https://travis-ci.com/JuliaLang/Pkg.jl.svg?branch=master
+[travis-url]: https://travis-ci.com/JuliaLang/Pkg.jl
+
+[appveyor-img]: https://ci.appveyor.com/api/projects/status/cgno2xgwapugpg4t/branch/master?svg=true
+[appveyor-url]: https://ci.appveyor.com/project/JuliaLang/pkg-jl/branch/master
+
+[codecov-img]: https://codecov.io/gh/JuliaLang/Pkg.jl/branch/master/graph/badge.svg
+[codecov-url]: https://codecov.io/gh/JuliaLang/Pkg.jl
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/api.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/api.md
new file mode 100644
index 0000000..29a54c4
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/api.md
@@ -0,0 +1,88 @@
+# [**12.** API Reference](@id API-Reference)
+
+This section describes the function interface, or "API mode",
+for interacting with Pkg.jl. The function API is recommended
+for non-interactive usage, for example in scripts.
+
+## General API Reference
+
+Certain options are generally useful and can be specified in any API call.
+You can specify these options by setting keyword arguments.
+
+### Redirecting output
+
+Use the `io::IOBuffer` keyword argument to redirect Pkg output.
+For example, `Pkg.add("Example"; io=devnull)` will discard any output produced by the `add` call.
+
+## Package API Reference
+
+In the REPL mode, packages (with associated version, UUID, URL etc) are parsed from strings,
+for example `"Package#master"`,`"Package@v0.1"`, `"www.mypkg.com/MyPkg#my/feature"`.
+
+In the API mode, it is possible to use strings as arguments for simple commands (like `Pkg.add(["PackageA", "PackageB"])`,
+but more complicated commands, which e.g. specify URLs or version range, require the use of a more structured format over strings.
+This is done by creating an instance of [`PackageSpec`](@ref) which is passed in to functions.
+
+```@docs
+PackageSpec
+PackageMode
+UpgradeLevel
+Pkg.add
+Pkg.develop
+Pkg.activate
+Pkg.rm
+Pkg.update
+Pkg.test
+Pkg.build
+Pkg.pin
+Pkg.free
+Pkg.instantiate
+Pkg.resolve
+Pkg.gc
+Pkg.status
+Pkg.precompile
+Pkg.offline
+Pkg.setprotocol!
+Pkg.dependencies
+Pkg.project
+Pkg.undo
+Pkg.redo
+```
+
+
+## Registry API Reference
+
+!!! compat "Julia 1.1"
+ Pkg's registry handling requires at least Julia 1.1.
+
+The function API for registries uses [`RegistrySpec`](@ref)s, similar to
+[`PackageSpec`](@ref).
+
+```@docs
+RegistrySpec
+Pkg.Registry.add
+Pkg.Registry.rm
+Pkg.Registry.update
+Pkg.Registry.status
+```
+
+## [Artifacts API Reference](@id Artifacts-Reference)
+
+!!! compat "Julia 1.3"
+ Pkg's artifacts API requires at least Julia 1.3.
+
+```@docs
+Pkg.Artifacts.create_artifact
+Pkg.Artifacts.remove_artifact
+Pkg.Artifacts.verify_artifact
+Pkg.Artifacts.artifact_meta
+Pkg.Artifacts.artifact_hash
+Pkg.Artifacts.bind_artifact!
+Pkg.Artifacts.unbind_artifact!
+Pkg.Artifacts.download_artifact
+Pkg.Artifacts.find_artifacts_toml
+Pkg.Artifacts.ensure_artifact_installed
+Pkg.Artifacts.ensure_all_artifacts_installed
+Pkg.Artifacts.@artifact_str
+Pkg.Artifacts.archive_artifact
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/artifacts.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/artifacts.md
new file mode 100644
index 0000000..7b1ade6
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/artifacts.md
@@ -0,0 +1,229 @@
+# [**8.** Artifacts](@id Artifacts)
+
+!!! compat "Julia 1.3"
+ Pkg's artifacts functionality requires at least Julia 1.3.
+
+`Pkg` can install and manage containers of data that are not Julia packages. These containers can contain platform-specific binaries, datasets, text, or any other kind of data that would be convenient to place within an immutable, life-cycled datastore.
+These containers, (called "Artifacts") can be created locally, hosted anywhere, and automatically downloaded and unpacked upon installation of your Julia package.
+This mechanism is also used to provide the binary dependencies for packages built with [`BinaryBuilder.jl`](https://github.com/JuliaPackaging/BinaryBuilder.jl).
+
+## Basic Usage
+
+`Pkg` artifacts are declared in an `Artifacts.toml` file, which can be placed in your current directory or in the root of your package.
+Currently, `Pkg` supports downloading of tarfiles (which can be compressed) from a URL.
+Following is a minimal `Artifacts.toml` file which will permit the downloading of a `socrates.tar.gz` file from `github.com`.
+In this example, a single artifact, given the name `socrates`, is defined.
+
+```TOML
+# a simple Artifacts.toml file
+[socrates]
+git-tree-sha1 = "43563e7631a7eafae1f9f8d9d332e3de44ad7239"
+
+ [[socrates.download]]
+ url = "https://github.com/staticfloat/small_bin/raw/master/socrates.tar.gz"
+ sha256 = "e65d2f13f2085f2c279830e863292312a72930fee5ba3c792b14c33ce5c5cc58"
+```
+
+If this `Artifacts.toml` file is placed in your current directory, then `socrates.tar.gz` can be downloaded, unpacked and used with `artifact"socrates"`.
+Since this tarball contains a folder `bin`, and a text file named `socrates` within that folder, we could access the content of that file as follows.
+
+```julia
+using Pkg.Artifacts
+
+rootpath = artifact"socrates"
+open(joinpath(rootpath, "bin", "socrates")) do file
+ println(read(file, String))
+end
+```
+
+If you have an existing tarball that is accessible via a `url`, it could also be be accessed in this manner.
+To create the `Artifacts.toml` you must compute two hashes: the `sha256` hash of the download file, and the `git-tree-sha1` of the unpacked content.
+These can be computed as follows.
+
+```julia
+using Tar, Inflate, SHA
+
+filename = "socrates.tar.gz"
+println("sha256: ", bytes2hex(open(sha256, filename)))
+println("git-tree-sha1: ", Tar.tree_hash(IOBuffer(inflate_gzip(filename))))
+```
+
+To access this artifact from within a package you create, place the `Artifacts.toml` at the root of your package, adjacent to `Project.toml`. Then, make sure to add `Pkg` in your `deps` and set `julia = "1.3"` or higher in your `compat` section.
+
+## `Artifacts.toml` files
+
+`Pkg` provides an API for working with artifacts, as well as a TOML file format for recording artifact usage in your packages, and to automate downloading of artifacts at package install time.
+Artifacts can always be referred to by content hash, but are typically accessed by a name that is bound to a content hash in an `Artifacts.toml` file that lives in a project's source tree.
+
+!!! note
+ It is possible to use the alternate name `JuliaArtifacts.toml`, similar
+ to how it is possible to use `JuliaProject.toml` and `JuliaManifest.toml`
+ instead of `Project.toml` and `Manifest.toml`, respectively.
+
+An example `Artifacts.toml` file is shown here:
+
+```TOML
+# Example Artifacts.toml file
+[socrates]
+git-tree-sha1 = "43563e7631a7eafae1f9f8d9d332e3de44ad7239"
+lazy = true
+
+ [[socrates.download]]
+ url = "https://github.com/staticfloat/small_bin/raw/master/socrates.tar.gz"
+ sha256 = "e65d2f13f2085f2c279830e863292312a72930fee5ba3c792b14c33ce5c5cc58"
+
+ [[socrates.download]]
+ url = "https://github.com/staticfloat/small_bin/raw/master/socrates.tar.bz2"
+ sha256 = "13fc17b97be41763b02cbb80e9d048302cec3bd3d446c2ed6e8210bddcd3ac76"
+
+[[c_simple]]
+arch = "x86_64"
+git-tree-sha1 = "4bdf4556050cb55b67b211d4e78009aaec378cbc"
+libc = "musl"
+os = "linux"
+
+ [[c_simple.download]]
+ sha256 = "411d6befd49942826ea1e59041bddf7dbb72fb871bb03165bf4e164b13ab5130"
+ url = "https://github.com/JuliaBinaryWrappers/c_simple_jll.jl/releases/download/c_simple+v1.2.3+0/c_simple.v1.2.3.x86_64-linux-musl.tar.gz"
+
+[[c_simple]]
+arch = "x86_64"
+git-tree-sha1 = "51264dbc770cd38aeb15f93536c29dc38c727e4c"
+os = "macos"
+
+ [[c_simple.download]]
+ sha256 = "6c17d9e1dc95ba86ec7462637824afe7a25b8509cc51453f0eb86eda03ed4dc3"
+ url = "https://github.com/JuliaBinaryWrappers/c_simple_jll.jl/releases/download/c_simple+v1.2.3+0/c_simple.v1.2.3.x86_64-apple-darwin14.tar.gz"
+
+[processed_output]
+git-tree-sha1 = "1c223e66f1a8e0fae1f9fcb9d3f2e3ce48a82200"
+```
+
+This `Artifacts.toml` binds three artifacts; one named `socrates`, one named `c_simple` and one named `processed_output`.
+The single required piece of information for an artifact is its `git-tree-sha1`.
+Because artifacts are addressed only by their content hash, the purpose of an `Artifacts.toml` file is to provide metadata about these artifacts, such as binding a human-readable name to a content hash, providing information about where an artifact may be downloaded from, or even binding a single name to multiple hashes, keyed by platform-specific constraints such as operating system or libgfortran version.
+
+## Artifact types and properties
+
+In the above example, the `socrates` artifact showcases a platform-independent artifact with multiple download locations.
+When downloading and installing the `socrates` artifact, URLs will be attempted in-order until one succeeds.
+The `socrates` artifact is marked as `lazy`, which means that it will not be automatically downloaded when the containing package is installed, but rather will be downloaded on-demand when the package first attempts to use it.
+
+The `c_simple` artifact showcases a platform-dependent artifact, where each entry in the `c_simple` array contains keys that help the calling package choose the appropriate download based on the particulars of the host machine.
+Note that each artifact contains both a `git-tree-sha1` and a `sha256` for each download entry. This is to ensure that the downloaded tarball is secure before attempting to unpack it, as well as enforcing that all tarballs must expand to the same overall tree hash.
+
+The `processed_output` artifact contains no `download` stanza, and so cannot be installed.
+An artifact such as this would be the result of code that was previously run, generating a new artifact and binding the resultant hash to a name within this project.
+
+## Using Artifacts
+
+Artifacts can be manipulated using convenient APIs exposed from the `Pkg.Artifacts` namespace.
+As a motivating example, let us imagine that we are writing a package that needs to load the [Iris machine learning dataset](https://archive.ics.uci.edu/ml/datasets/iris).
+While we could just download the dataset during a build step into the package directory, and many packages currently do precisely this, that has some significant drawbacks:
+
+* First, it modifies the package directory, making package installation stateful, which we want to avoid.
+ In the future, we would like to reach the point where packages can be installed completely read-only, instead of being able to modify themselves after installation.
+
+* Second, the downloaded data is not shared across different versions of our package.
+ If we have three different versions of the package installed for use by various projects, then we need three different copies of the data, even if it hasn't changed between those versions.
+ Moreover, each time we upgrade or downgrade the package, unless we do something clever (and probably brittle), we have to download the data again.
+
+With artifacts, we will instead check to see if our `iris` artifact already exists on-disk and only if it doesn't will we download and install it, after which we can bind the result into our `Artifacts.toml` file:
+
+```julia
+using Pkg.Artifacts
+
+# This is the path to the Artifacts.toml we will manipulate
+artifact_toml = joinpath(@__DIR__, "Artifacts.toml")
+
+# Query the `Artifacts.toml` file for the hash bound to the name "iris"
+# (returns `nothing` if no such binding exists)
+iris_hash = artifact_hash("iris", artifact_toml)
+
+# If the name was not bound, or the hash it was bound to does not exist, create it!
+if iris_hash == nothing || !artifact_exists(iris_hash)
+ # create_artifact() returns the content-hash of the artifact directory once we're finished creating it
+ iris_hash = create_artifact() do artifact_dir
+ # We create the artifact by simply downloading a few files into the new artifact directory
+ iris_url_base = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris"
+ download("$(iris_url_base)/iris.data", joinpath(artifact_dir, "iris.csv"))
+ download("$(iris_url_base)/bezdekIris.data", joinpath(artifact_dir, "bezdekIris.csv"))
+ download("$(iris_url_base)/iris.names", joinpath(artifact_dir, "iris.names"))
+ end
+
+ # Now bind that hash within our `Artifacts.toml`. `force = true` means that if it already exists,
+ # just overwrite with the new content-hash. Unless the source files change, we do not expect
+ # the content hash to change, so this should not cause unnecessary version control churn.
+ bind_artifact!(artifact_toml, "iris", iris_hash)
+end
+
+# Get the path of the iris dataset, either newly created or previously generated.
+# this should be something like `~/.julia/artifacts/dbd04e28be047a54fbe9bf67e934be5b5e0d357a`
+iris_dataset_path = artifact_path(iris_hash)
+```
+
+For the specific use case of using artifacts that were previously bound, we have the shorthand notation `artifact"name"` which will automatically search for the `Artifacts.toml` file contained within the current package, look up the given artifact by name, install it if it is not yet installed, then return the path to that given artifact.
+An example of this shorthand notation is given below:
+
+```julia
+using Pkg.Artifacts
+
+# For this to work, an `Artifacts.toml` file must be in the current working directory
+# (or in the root of the current package) and must define a mapping for the "iris"
+# artifact. If it does not exist on-disk, it will be downloaded.
+iris_dataset_path = artifact"iris"
+```
+
+## The `Pkg.Artifacts` API
+
+The `Artifacts` API is broken up into three levels: hash-aware functions, name-aware functions and utility functions.
+
+* **Hash-aware** functions deal with content-hashes and essentially nothing else. These methods allow you to query whether an artifact exists, what its path is, to verify that an artifact satisfies its content hash on-disk, etc. Hash-aware functions include: `artifact_exists()`, `artifact_path()`, `remove_artifact()`, `verify_artifact()` and `archive_artifact()`. Note that in general you should not use `remove_artifact()` and should instead use `Pkg.gc()` to cleanup artifact installations.
+
+* **Name-aware** functions deal with bound names within an `Artifacts.toml` file, and as such, typically require both a path to an `Artifacts.toml` file as well as the artifact name. Name-aware functions include: `artifact_meta()`, `artifact_hash()`, `bind_artifact!()`, `unbind_artifact!()`, `download_artifact()` and `ensure_artifact_installed()`.
+
+* **Utility** functions deal with miscellaneous aspects of artifact life, such as `create_artifact()`, `ensure_all_artifacts_installed()`, and even the `@artifact_str` string macro.
+
+For a full listing of docstrings and methods, see the [Artifacts Reference](@ref) section.
+
+## Overriding artifact locations
+
+It is occasionally necessary to be able to override the location and content of an artifact.
+A common use case is a computing environment where certain versions of a binary dependency must be used, regardless of what version of this dependency a package was published with.
+While a typical Julia configuration would download, unpack and link against a generic library, a system administrator may wish to disable this and instead use a library already installed on the local machine.
+To enable this, `Pkg` supports a per-depot `Overrides.toml` file placed within the `artifacts` depot directory (e.g. `~/.julia/artifacts/Overrides.toml` for the default user depot) that can override the location of an artifact either by content-hash or by package UUID and bound artifact name.
+Additionally, the destination location can be either an absolute path, or a replacement artifact content hash.
+This allows sysadmins to create their own artifacts which they can then use by overriding other packages to use the new artifact.
+
+```TOML
+# Override single hash to absolute path
+78f35e74ff113f02274ce60dab6e92b4546ef806 = "/path/to/replacement"
+
+# Override single hash to new artifact content-hash
+683942669b4639019be7631caa28c38f3e1924fe = "d826e316b6c0d29d9ad0875af6ca63bf67ed38c3"
+
+# Override package bindings by specifying the package UUID and bound artifact name
+# For demonstration purposes we assume this package is called `Foo`
+[d57dbccd-ca19-4d82-b9b8-9d660942965b]
+libfoo = "/path/to/libfoo"
+libbar = "683942669b4639019be7631caa28c38f3e1924fe"
+```
+
+Due to the layered nature of `Pkg` depots, multiple `Overrides.toml` files may be in effect at once.
+This allows the "inner" `Overrides.toml` files to override the overrides placed within the "outer" `Overrides.toml` files.
+To remove an override and re-enable default location logic for an artifact, insert an entry mapping to the empty string:
+
+```TOML
+78f35e74ff113f02274ce60dab6e92b4546ef806 = "/path/to/new/replacement"
+683942669b4639019be7631caa28c38f3e1924fe = ""
+
+[d57dbccd-ca19-4d82-b9b8-9d660942965b]
+libfoo = ""
+```
+
+If the two `Overrides.toml` snippets as given above are layered on top of eachother, the end result will be mapping the content-hash `78f35e74ff113f02274ce60dab6e92b4546ef806` to `"/path/to/new/replacement"`, and mapping `Foo.libbar` to the artifact identified by the content-hash `683942669b4639019be7631caa28c38f3e1924fe`.
+Note that while that hash was previously overridden, it is no longer, and therefore `Foo.libbar` will look directly at locations such as `~/.julia/artifacts/683942669b4639019be7631caa28c38f3e1924fe`.
+
+Most methods that are affected by overrides have the ability to ignore overrides by setting `honor_overrides=false` as a keyword argument within them.
+For UUID/name based overrides to work, `Artifacts.toml` files must be loaded with the knowledge of the UUID of the loading package.
+This is deduced automatically by the `artifacts""` string macro, however if you are for some reason manually using the `Pkg.Artifacts` API within your package and you wish to honor overrides, you must provide the package UUID to API calls like `artifact_meta()` and `ensure_artifact_installed()` via the `pkg_uuid` keyword argument.
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/basedocs.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/basedocs.md
new file mode 100644
index 0000000..21a12c5
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/basedocs.md
@@ -0,0 +1,25 @@
+```@meta
+EditURL = "https://github.com/JuliaLang/Pkg.jl/blob/master/docs/src/basedocs.md"
+```
+
+# Pkg
+
+Pkg is Julia's builtin package manager, and handles operations
+such as installing, updating and removing packages.
+
+!!! note
+ What follows is a very brief introduction to Pkg. For more
+ information on `Project.toml` files, `Manifest.toml` files, package
+ version compatibility (`[compat]`), environments, registries, etc.,
+ it is highly recommended to read the full manual, which is available here:
+ [https://julialang.github.io/Pkg.jl/v1/](https://julialang.github.io/Pkg.jl/v1/).
+
+```@eval
+import Markdown
+file = joinpath(Sys.STDLIB, "Pkg", "docs", "src", "getting-started.md")
+str = read(file, String)
+str = replace(str, r"^#.*$"m => "")
+str = replace(str, "[API Reference](@ref)" =>
+ "[API Reference](https://julialang.github.io/Pkg.jl/v1/api/)")
+Markdown.parse(str)
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/compatibility.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/compatibility.md
new file mode 100644
index 0000000..94f160e
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/compatibility.md
@@ -0,0 +1,201 @@
+# [**6.** Compatibility](@id Compatibility)
+
+Compatibility refers to the ability to restrict the versions of the dependencies that your project is compatible with.
+If the compatibility for a dependency is not given, the project is assumed to be compatible with all versions of that dependency.
+
+Compatibility for a dependency is entered in the `Project.toml` file as for example:
+
+```toml
+[compat]
+julia = "1.0"
+Example = "0.4.3"
+```
+
+After a compatibility entry is put into the project file, `up` can be used to apply it.
+
+The format of the version specifier is described in detail below.
+
+!!! info
+ There is currently no way to give compatibility from the Pkg REPL mode so for now, one has to manually edit the project file.
+
+## Version specifier format
+
+Similar to other package managers, the Julia package manager respects [semantic versioning](https://semver.org/) (semver).
+As an example, a version specifier given as e.g. `1.2.3` is therefore assumed to be compatible with the versions `[1.2.3 - 2.0.0)` where `)` is a non-inclusive upper bound.
+More specifically, a version specifier is either given as a **caret specifier**, e.g. `^1.2.3` or as a **tilde specifier**, e.g. `~1.2.3`.
+Caret specifiers are the default and hence `1.2.3 == ^1.2.3`. The difference between a caret and tilde is described in the next section.
+The union of multiple version specifiers can be formed by comma separating individual version specifiers, e.g.
+```toml
+[compat]
+Example = "1.2, 2"
+```
+will result in `[1.2.0, 3.0.0)`. Note leading zeros are treated differently, e.g. `Example = "0.2, 1"` would only result in `[0.2.0-0.3.0, 1.0.0-2.0.0]`. See the next section for more information on versions with leading zeros.
+
+### [Behavior of versions with leading zeros (0.0.x and 0.x.y)](@id compat-pre-1.0)
+
+While the semver specification says that all versions with a major version of 0 (versions before 1.0.0) are incompatible
+with each other, we have decided to only apply that for when both the major and minor versions are zero. In other words,
+0.0.1 and 0.0.2 are considered incompatible. A pre-1.0 version with non-zero minor version (`0.a.b` with `a != 0`) is
+considered compatible with versions with the same minor version and smaller or equal patch versions (`0.a.c` with `c <= b`);
+i.e., the versions 0.2.2 and 0.2.3 are compatible with 0.2.1 and 0.2.0. Versions with a major version of 0 and different
+minor versions are not considered compatible, so the version 0.3.0 might have breaking changes from 0.2.0. To that end, the
+`[compat]` entry:
+
+```toml
+[compat]
+Example = "0.0.1"
+```
+
+results in a versionbound on `Example` as `[0.0.1, 0.0.2)` (which is equivalent to only the version 0.0.1), while the
+`[compat]` entry:
+
+```toml
+[compat]
+Example = "0.2.1"
+```
+
+results in a versionbound on Example as `[0.2.1, 0.3.0)`.
+
+In particular, a package may set `version = "0.2.4"` when it has feature additions compared to 0.2.3 as long as it
+remains backward compatible with 0.2.0. See also [The `version` field](@ref).
+
+### Caret specifiers
+
+A caret specifier allows upgrade that would be compatible according to semver.
+An updated dependency is considered compatible if the new version does not modify the left-most non zero digit in the version specifier.
+
+Some examples are shown below.
+
+```toml
+[compat]
+PkgA = "^1.2.3" # [1.2.3, 2.0.0)
+PkgB = "^1.2" # [1.2.0, 2.0.0)
+PkgC = "^1" # [1.0.0, 2.0.0)
+PkgD = "^0.2.3" # [0.2.3, 0.3.0)
+PkgE = "^0.0.3" # [0.0.3, 0.0.4)
+PkgF = "^0.0" # [0.0.0, 0.1.0)
+PkgG = "^0" # [0.0.0, 1.0.0)
+```
+
+### Tilde specifiers
+
+A tilde specifier provides more limited upgrade possibilities. When specifying major, minor
+and patch versions, or when specifying major and minor versions, only the patch version is
+allowed to change. If you only specify a major version, then both minor and patch versions
+are allowed to be upgraded (`~1` is thus equivalent to `^1`).
+For example:
+
+```toml
+[compat]
+PkgA = "~1.2.3" # [1.2.3, 1.3.0)
+PkgB = "~1.2" # [1.2.0, 1.3.0)
+PkgC = "~1" # [1.0.0, 2.0.0)
+PkgD = "~0.2.3" # [0.2.3, 0.3.0)
+PkgE = "~0.0.3" # [0.0.3, 0.0.4)
+PkgF = "~0.0" # [0.0.0, 0.1.0)
+PkgG = "~0" # [0.0.0, 1.0.0)
+```
+
+For all versions with a major version of 0 the tilde and caret specifiers are equivalent.
+
+### Equality specifier
+
+Equality can be used to specify an exact version:
+
+```toml
+[compat]
+PkgA = "= 1.2.3" # [1.2.3, 1.2.3]
+```
+
+### Inequality specifiers
+
+Inequalities can also be used to specify version ranges:
+
+```toml
+[compat]
+PkgB = ">= 1.2.3" # [1.2.3, ∞)
+PkgC = "≥ 1.2.3" # [1.2.3, ∞)
+PkgD = "< 1.2.3" # [0.0.0, 1.2.3) = [0.0.0, 1.2.2]
+```
+
+### Hyphen specifiers
+
+Hyphen syntax can also be used to specify version ranges. Make sure that you have a space on both sides of the hyphen.
+
+```toml
+[compat]
+PkgA = "1.2.3 - 4.5.6" # [1.2.3, 4.5.6]
+PkgA = "0.2.3 - 4.5.6" # [0.2.3, 4.5.6]
+```
+
+Any unspecified trailing numbers in the first end-point are considered to be zero:
+
+```toml
+[compat]
+PkgA = "1.2 - 4.5.6" # [1.2.0, 4.5.6]
+PkgA = "1 - 4.5.6" # [1.0.0, 4.5.6]
+PkgA = "0.2 - 4.5.6" # [0.2.0, 4.5.6]
+PkgA = "0.2 - 0.5.6" # [0.2.0, 0.5.6]
+```
+
+Any unspecified trailing numbers in the second end-point will be considered to be wildcards:
+
+```toml
+[compat]
+PkgA = "1.2.3 - 4.5" # 1.2.3 - 4.5.* = [1.2.3, 4.6.0)
+PkgA = "1.2.3 - 4" # 1.2.3 - 4.*.* = [1.2.3, 5.0.0)
+PkgA = "1.2 - 4.5" # 1.2.0 - 4.5.* = [1.2.0, 4.6.0)
+PkgA = "1.2 - 4" # 1.2.0 - 4.*.* = [1.2.0, 5.0.0)
+PkgA = "1 - 4.5" # 1.0.0 - 4.5.* = [1.0.0, 4.6.0)
+PkgA = "1 - 4" # 1.0.0 - 4.*.* = [1.0.0, 5.0.0)
+PkgA = "0.2.3 - 4.5" # 0.2.3 - 4.5.* = [0.2.3, 4.6.0)
+PkgA = "0.2.3 - 4" # 0.2.3 - 4.*.* = [0.2.3, 5.0.0)
+PkgA = "0.2 - 4.5" # 0.2.0 - 4.5.* = [0.2.0, 4.6.0)
+PkgA = "0.2 - 4" # 0.2.0 - 4.*.* = [0.2.0, 5.0.0)
+PkgA = "0.2 - 0.5" # 0.2.0 - 0.5.* = [0.2.0, 0.6.0)
+PkgA = "0.2 - 0" # 0.2.0 - 0.*.* = [0.2.0, 1.0.0)
+```
+
+!!! compat "Julia 1.4"
+ Hyphen specifiers requires at least Julia 1.4, so it is strongly recomended to also add
+ ```toml
+ [compat]
+ julia = "1.4"
+ ```
+ to the project file when using them.
+
+## Fixing conflicts
+
+Version conflicts were introduced previously with an [example](@ref conflicts)
+of a conflict arising in a package `D` used by two other packages, `B` and `C`.
+Our analysis of the error message revealed that `B` is using an outdated
+version of `D`.
+To fix it, the first thing to try is to `pkg> dev B` so that
+you can modify `B` and its compatibility requirements.
+If you open its `Project.toml` file in an editor, you would probably notice something like
+
+```toml
+[compat]
+D = "0.1"
+```
+
+Usually the first step is to modify this to something like
+```toml
+[compat]
+D = "0.1, 0.2"
+```
+
+This indicates that `B` is compatible with both versions 0.1 and version 0.2; if you `pkg> up`
+this would fix the package error.
+However, there is one major concern you need to address first: perhaps there was an incompatible change
+in `v0.2` of `D` that breaks `B`.
+Before proceeding further, you should update all packages and then run `B`'s tests, scanning the
+output of `pkg> test B` to be sure that `v0.2` of `D` is in fact being used.
+(It is possible that an additional dependency of `D` pins it to `v0.1`, and you wouldn't want to be misled into thinking that you had tested `B` on the newer version.)
+If the new version was used and the tests still pass,
+you can assume that `B` didn't need any further updating to accomodate `v0.2` of `D`;
+you can safely submit this change as a pull request to `B` so that a new release is made.
+If instead an error is thrown, it indicates that `B` requires more extensive updates to be
+compatible with the latest version of `D`; those updates will need to be completed before
+it becomes possible to use both `A` and `B` simultaneously.
+You can, though, continue to use the independently of one another.
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/creating-packages.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/creating-packages.md
new file mode 100644
index 0000000..a21887c
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/creating-packages.md
@@ -0,0 +1,290 @@
+# **5.** Creating Packages
+
+A package is a project with a `name`, `uuid` and `version` entry in the `Project.toml` file, and a `src/PackageName.jl` file that defines the module `PackageName`.
+This file is executed when the package is loaded.
+
+### Generating files for a package
+
+!!! note
+ The [PkgTemplates](https://github.com/invenia/PkgTemplates.jl) package offers a very easy, repeatable, and
+ customizable way to generate the files for a new package. We recommend that you use PkgTemplates for creating
+ new packages instead of using the minimal `pkg> generate` functionality described below.
+
+To generate files for a new package, use `pkg> generate`.
+
+```julia-repl
+(v1.0) pkg> generate HelloWorld
+```
+
+This creates a new project `HelloWorld` with the following files (visualized with the external [`tree` command](https://linux.die.net/man/1/tree)):
+
+```julia-repl
+julia> cd("HelloWorld")
+
+shell> tree .
+.
+├── Project.toml
+└── src
+ └── HelloWorld.jl
+
+1 directory, 2 files
+```
+
+The `Project.toml` file contains the name of the package, its unique UUID, its version, the authors and potential dependencies:
+
+```toml
+name = "HelloWorld"
+uuid = "b4cd1eb8-1e24-11e8-3319-93036a3eb9f3"
+version = "0.1.0"
+authors = ["Some One <someone@email.com>"]
+
+[deps]
+```
+
+The content of `src/HelloWorld.jl` is:
+
+```julia
+module HelloWorld
+
+greet() = print("Hello World!")
+
+end # module
+```
+
+We can now activate the project and load the package:
+
+```julia-repl
+pkg> activate .
+
+julia> import HelloWorld
+
+julia> HelloWorld.greet()
+Hello World!
+```
+
+### Adding dependencies to the project
+
+Let’s say we want to use the standard library package `Random` and the registered package `JSON` in our project.
+We simply `add` these packages (note how the prompt now shows the name of the newly generated project,
+since we `activate`d it):
+
+```julia-repl
+(HelloWorld) pkg> add Random JSON
+ Resolving package versions...
+ Updating "~/Documents/HelloWorld/Project.toml"
+ [682c06a0] + JSON v0.17.1
+ [9a3f8284] + Random
+ Updating "~/Documents/HelloWorld/Manifest.toml"
+ [34da2185] + Compat v0.57.0
+ [682c06a0] + JSON v0.17.1
+ [4d1e1d77] + Nullables v0.0.4
+ ...
+```
+
+Both `Random` and `JSON` got added to the project’s `Project.toml` file, and the resulting dependencies got added to the `Manifest.toml` file.
+The resolver has installed each package with the highest possible version, while still respecting the compatibility that each package enforces on its dependencies.
+
+We can now use both `Random` and `JSON` in our project. Changing `src/HelloWorld.jl` to
+
+```julia
+module HelloWorld
+
+import Random
+import JSON
+
+greet() = print("Hello World!")
+greet_alien() = print("Hello ", Random.randstring(8))
+
+end # module
+```
+
+and reloading the package, the new `greet_alien` function that uses `Random` can be called:
+
+```julia-repl
+julia> HelloWorld.greet_alien()
+Hello aT157rHV
+```
+
+### Adding a build step to the package
+
+The build step is executed the first time a package is installed or when explicitly invoked with `build`.
+A package is built by executing the file `deps/build.jl`.
+
+```julia-repl
+julia> print(read("deps/build.jl", String))
+println("I am being built...")
+
+(HelloWorld) pkg> build
+ Building HelloWorld → `deps/build.log`
+ Resolving package versions...
+
+julia> print(read("deps/build.log", String))
+I am being built...
+```
+
+If the build step fails, the output of the build step is printed to the console
+
+```julia-repl
+julia> print(read("deps/build.jl", String))
+error("Ooops")
+
+(HelloWorld) pkg> build
+ Building HelloWorld → `deps/build.log`
+ Resolving package versions...
+┌ Error: Error building `HelloWorld`:
+│ ERROR: LoadError: Ooops
+│ Stacktrace:
+│ [1] error(::String) at ./error.jl:33
+│ [2] top-level scope at none:0
+│ [3] include at ./boot.jl:317 [inlined]
+│ [4] include_relative(::Module, ::String) at ./loading.jl:1071
+│ [5] include(::Module, ::String) at ./sysimg.jl:29
+│ [6] include(::String) at ./client.jl:393
+│ [7] top-level scope at none:0
+│ in expression starting at /Users/kristoffer/.julia/dev/Pkg/HelloWorld/deps/build.jl:1
+└ @ Pkg.Operations Operations.jl:938
+```
+
+### Adding tests to the package
+
+When a package is tested the file `test/runtests.jl` is executed:
+
+```julia-repl
+julia> print(read("test/runtests.jl", String))
+println("Testing...")
+
+(HelloWorld) pkg> test
+ Testing HelloWorld
+ Resolving package versions...
+Testing...
+ Testing HelloWorld tests passed
+```
+
+Tests are run in a new Julia process, where the package itself, and any
+test-specific dependencies, are available, see below.
+
+#### Test-specific dependencies in Julia 1.2 and above
+
+!!! compat "Julia 1.2"
+ This section only applies to Julia 1.2 and above. For specifying test dependencies
+ on previous Julia versions, see [Test-specific dependencies in Julia 1.0 and 1.1](@ref).
+
+!!! note
+ The exact interaction between `Project.toml`, `test/Project.toml` and their corresponding
+ `Manifest.toml`s are not fully worked out, and may be subject to change in future versions.
+ The old method of adding test-specific dependencies, described in the next section, will
+ therefore be supported throughout all Julia 1.X releases.
+
+In Julia 1.2 and later the test environment is given by `test/Project.toml`. Thus, when running
+tests, this will be the active project, and only dependencies to the `test/Project.toml` project
+can be used. Note that Pkg will add the tested package itself implictly.
+
+!!! note
+ If no `test/Project.toml` exists Pkg will use the old style test-setup, as
+ described in [Test-specific dependencies in Julia 1.0 and 1.1](@ref).
+
+To add a test-specific dependency, i.e. a dependency that is available only when testing,
+it is thus enough to add this dependency to the `test/Project.toml` project. This can be
+done from the Pkg REPL by activating this environment, and then use `add` as one normally
+does. Lets add the `Test` standard library as a test dependency:
+
+```julia-repl
+(HelloWorld) pkg> activate ./test
+[ Info: activating environment at `~/HelloWorld/test/Project.toml`.
+
+(test) pkg> add Test
+ Resolving package versions...
+ Updating `~/HelloWorld/test/Project.toml`
+ [8dfed614] + Test
+ Updating `~/HelloWorld/test/Manifest.toml`
+ [...]
+```
+
+We can now use `Test` in the test script and we can see that it gets installed when testing:
+
+```julia-repl
+julia> print(read("test/runtests.jl", String))
+using Test
+@test 1 == 1
+
+(HelloWorld) pkg> test
+ Testing HelloWorld
+ Resolving package versions...
+ Updating `/var/folders/64/76tk_g152sg6c6t0b4nkn1vw0000gn/T/tmpPzUPPw/Project.toml`
+ [d8327f2a] + HelloWorld v0.1.0 [`~/.julia/dev/Pkg/HelloWorld`]
+ [8dfed614] + Test
+ Updating `/var/folders/64/76tk_g152sg6c6t0b4nkn1vw0000gn/T/tmpPzUPPw/Manifest.toml`
+ [d8327f2a] + HelloWorld v0.1.0 [`~/.julia/dev/Pkg/HelloWorld`]
+ Testing HelloWorld tests passed```
+```
+
+#### Test-specific dependencies in Julia 1.0 and 1.1
+
+!!! note
+ The method of adding test-specific dependencies described in this section will
+ be replaced by the method from the previous section in future Julia versions.
+ The method in this section will, however, be supported throughout all Julia 1.X
+ releases.
+
+In Julia 1.0 and Julia 1.1 test-specific dependencies are added to the main
+`Project.toml`. To add `Markdown` and `Test` as test-dependencies, add the following:
+
+```toml
+[extras]
+Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Markdown", "Test"]
+```
+
+### Package naming guidelines
+
+Package names should be sensible to most Julia users, *even to those who are not domain experts*.
+The following guidelines applies to the `General` registry, but may be useful for other package
+registries as well.
+
+Since the `General` registry belongs to the entire community, people may have opinions about
+your package name when you publish it, especially if it's ambiguous or can be confused with
+something other than what it is. Usually you will then get suggestions for a new name that
+may fit your package better.
+
+1. Avoid jargon. In particular, avoid acronyms unless there is minimal possibility of confusion.
+
+ * It's ok to say `USA` if you're talking about the USA.
+ * It's not ok to say `PMA`, even if you're talking about positive mental attitude.
+2. Avoid using `Julia` in your package name or prefixing it with `Ju`.
+
+ * It is usually clear from context and to your users that the package is a Julia package.
+ * Package names already have a `.jl` extension, which communicates to users that `Package.jl` is a Julia package.
+ * Having Julia in the name can imply that the package is connected to, or endorsed by, contributors
+ to the Julia language itself.
+3. Packages that provide most of their functionality in association with a new type should have pluralized
+ names.
+
+ * `DataFrames` provides the `DataFrame` type.
+ * `BloomFilters` provides the `BloomFilter` type.
+ * In contrast, `JuliaParser` provides no new type, but instead new functionality in the `JuliaParser.parse()`
+ function.
+4. Err on the side of clarity, even if clarity seems long-winded to you.
+
+ * `RandomMatrices` is a less ambiguous name than `RndMat` or `RMT`, even though the latter are shorter.
+5. A less systematic name may suit a package that implements one of several possible approaches to
+ its domain.
+
+ * Julia does not have a single comprehensive plotting package. Instead, `Gadfly`, `PyPlot`, `Winston`
+ and other packages each implement a unique approach based on a particular design philosophy.
+ * In contrast, `SortingAlgorithms` provides a consistent interface to use many well-established
+ sorting algorithms.
+6. Packages that wrap external libraries or programs should be named after those libraries or programs.
+
+ * `CPLEX.jl` wraps the `CPLEX` library, which can be identified easily in a web search.
+ * `MATLAB.jl` provides an interface to call the MATLAB engine from within Julia.
+7. Avoid naming a package closely to an existing package
+ * `Websocket` is too close to `WebSockets` and can be confusing to users. Rather use a new name such as `SimpleWebsockets`.
+
+### Registering packages
+
+Once a package is ready it can be registered with the [General Registry](https://github.com/JuliaRegistries/General).
+Currently packages are submitted via [`Registrator`](https://juliaregistrator.github.io/).
+In addition to `Registrator`, [`TagBot`](https://github.com/apps/julia-tagbot) helps manage the process of tagging releases.
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/environments.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/environments.md
new file mode 100644
index 0000000..9d5bd46
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/environments.md
@@ -0,0 +1,108 @@
+# **4.** Working with Environments
+
+The following discusses Pkg's interaction with environments. For more on the role environments play in code loading, including the "stack" of environments from which code can be loaded, see [this section in the Julia manual](https://docs.julialang.org/en/v1/manual/code-loading/#Environments-1).
+
+## Creating your own projects
+
+So far we have added packages to the default project at `~/.julia/environments/v1.0`. It is however easy to create other, independent, projects.
+It should be pointed out that when two projects use the same package at the same version, the content of this package is not duplicated.
+In order to create a new project, create a directory for it and then activate that directory to make it the "active project", which package operations manipulate:
+
+```julia-repl
+julia> mkdir("MyProject")
+
+julia> cd("MyProject")
+/Users/kristoffer/MyProject
+
+(v1.0) pkg> activate .
+
+(MyProject) pkg> st
+ Status `Project.toml`
+```
+
+Note that the REPL prompt changed when the new project is activated. Since this is a newly created project, the status command shows that it contains no packages, and in fact, it has no project or manifest file until we add a package to it:
+
+```julia-repl
+julia> readdir()
+0-element Array{String,1}
+
+(MyProject) pkg> add Example
+ Updating registry at `~/.julia/registries/General`
+ Updating git-repo `https://github.com/JuliaRegistries/General.git`
+ Resolving package versions...
+ Updating `Project.toml`
+ [7876af07] + Example v0.5.1
+ Updating `Manifest.toml`
+ [7876af07] + Example v0.5.1
+ [8dfed614] + Test
+Precompiling project...
+ 1 dependency successfully precompiled in 2 seconds
+
+julia> readdir()
+2-element Array{String,1}:
+ "Manifest.toml"
+ "Project.toml"
+
+julia> print(read("Project.toml", String))
+[deps]
+Example = "7876af07-990d-54b4-ab0e-23690620f79a"
+
+julia> print(read("Manifest.toml", String))
+[[Example]]
+deps = ["Test"]
+git-tree-sha1 = "8eb7b4d4ca487caade9ba3e85932e28ce6d6e1f8"
+uuid = "7876af07-990d-54b4-ab0e-23690620f79a"
+version = "0.5.1"
+
+[[Test]]
+uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+```
+
+This new environment is completely separate from the one we used earlier.
+
+
+## Project Precompilation
+
+By default any package that is added to a project or updated in a Pkg action will be automatically precompiled, along
+with its dependencies. The exception is the `develop` command, which neither builds nor precompiles the package, when
+that happens is left up to the user to decide.
+
+If a package that has been updated is already loaded in the session, the precompilation process will go ahead and precompile
+the new version, and any packages that depend on it, but will note that the package cannot be used until session restart.
+
+To disable this auto-precompilation, set `ENV["JULIA_PKG_PRECOMPILE_AUTO"]=0`, after which precompilation can be triggered
+manually either serially via code loading
+
+```julia-repl
+julia> using Example
+[ Info: Precompiling Example [7876af07-990d-54b4-ab0e-23690620f79a]
+```
+
+ or the parallel precompilation, which can be significantly faster when many dependencies are involved, via
+
+```julia-repl
+pkg> precompile
+Precompiling project...
+ 23 dependencies successfully precompiled in 36 seconds
+```
+
+## Using someone else's project
+
+Simply clone their project using e.g. `git clone`, `cd` to the project directory and call
+
+```julia-repl
+(v1.0) pkg> activate .
+
+(SomeProject) pkg> instantiate
+```
+
+If the project contains a manifest, this will install the packages in the same state that is given by that manifest.
+Otherwise, it will resolve the latest versions of the dependencies compatible with the project.
+
+!!! note "Specifying project on startup"
+ Instead of using `activate` from within Julia you can specify the project on startup using
+ the `--project=<path>` flag. For example, to run a script from the command line using the
+ environment in the current directory you can run
+ ```bash
+ $ julia --project=. myscript.jl
+ ```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/faq.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/faq.md
new file mode 100644
index 0000000..3809353
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/faq.md
@@ -0,0 +1,3 @@
+# **8.** FAQ
+
+### Should `Manifest.toml` be checked in to the package?
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/getting-started.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/getting-started.md
new file mode 100644
index 0000000..acfb0c5
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/getting-started.md
@@ -0,0 +1,216 @@
+# **2.** Getting Started
+
+What follows is a quick overview of Pkg, Julia's package manager.
+It should help new users become familiar with basic Pkg features.
+
+## Basic Usage
+
+Pkg comes with a REPL.
+Enter the Pkg REPL by pressing `]` from the Julia REPL.
+To get back to the Julia REPL, press backspace or ^C.
+
+!!! note
+ This guide relies on the Pkg REPL to execute Pkg commands.
+ For non-interactive use, we recommend the Pkg API.
+ The Pkg API is fully documented in the [API Reference](@ref) section of the Pkg documentation.
+
+Upon entering the Pkg REPL, you should see a similar prompt:
+
+```julia-repl
+(v1.1) pkg>
+```
+
+To add a package, use `add`:
+
+```julia-repl
+(v1.1) pkg> add Example
+```
+
+!!! note
+ Some Pkg output has been omitted in order to keep this guide focused.
+ This will help maintain a good pace and not get bogged down in details.
+ If you require more details, refer to subsequent sections of the Pkg manual.
+
+We can also specify multiple packages at once:
+
+```julia-repl
+(v1.1) pkg> add JSON StaticArrays
+```
+
+To remove packages, use `rm`:
+
+```julia-repl
+(v1.1) pkg> rm JSON StaticArrays
+```
+
+So far, we have referred only to registered packages.
+Pkg also supports working with unregistered packages.
+To add an unregistered package, specify a URL:
+
+```julia-repl
+(v1.1) pkg> add https://github.com/JuliaLang/Example.jl
+```
+
+Use `rm` to remove this package by name:
+
+```julia-repl
+(v1.1) pkg> rm Example
+```
+
+Use `update` to update an installed package:
+
+```julia-repl
+(v1.1) pkg> update Example
+```
+
+To update all installed packages, use `update` without any arguments:
+
+```julia-repl
+(v1.1) pkg> update
+```
+
+## Getting Started with Environments
+
+Up to this point, we have covered basic package management: adding, updating and removing packages.
+This will be familiar if you have used other package managers.
+Pkg offers significant advantages over traditional package managers
+by organizing dependencies into **environments**.
+
+You may have noticed the `(v1.1)` in the REPL prompt.
+This lets us know `v1.1` is the **active environment**.
+The active environment is the environment that will be modified by Pkg commands such as `add`, `rm` and `update`.
+
+Let's set up a new environment so we may experiment.
+To set the active environment, use `activate`:
+
+```julia-repl
+(v1.1) pkg> activate tutorial
+[ Info: activating new environment at `/tmp/tutorial/Project.toml`.
+```
+
+Pkg lets us know we are creating a new environment and that this environment
+will be stored in the `/tmp/tutorial` directory.
+
+Pkg has also updated the REPL prompt in order to reflect the new
+active environment:
+
+```julia-repl
+(tutorial) pkg>
+```
+
+We can ask for information about the active environment by using `status`:
+
+```julia-repl
+(tutorial) pkg> status
+ Status `/tmp/tutorial/Project.toml`
+ (empty environment)
+```
+
+`/tmp/tutorial/Project.toml` is the location of the active environment's **project file**.
+A project file is where Pkg stores metadata for an environment.
+Notice this new environment is empty.
+Let us add a package and observe:
+
+```julia-repl
+(tutorial) pkg> add Example
+...
+
+(tutorial) pkg> status
+ Status `/tmp/tutorial/Project.toml`
+ [7876af07] Example v0.5.1
+```
+
+We can see `tutorial` now contains `Example` as a dependency.
+
+## Modifying A Dependency
+
+Say we are working on `Example` and feel it needs new functionality.
+How can we modify the source code?
+We can use `develop` to set up a git clone of the `Example` package.
+
+```julia-repl
+(tutorial) pkg> develop --local Example
+...
+
+(tutorial) pkg> status
+ Status `/tmp/tutorial/Project.toml`
+ [7876af07] Example v0.5.1+ [`dev/Example`]
+```
+
+Notice the feedback has changed.
+`dev/Example` refers to the location of the newly created clone.
+If we look inside the `/tmp/tutorial` directory, we will notice the following files:
+
+```
+tutorial
+├── dev
+│ └── Example
+├── Manifest.toml
+└── Project.toml
+```
+
+Instead of loading a registered version of `Example`,
+Julia will load the source code contained in `tutorial/dev/Example`.
+
+Let's try it out.
+First we modify the file at `tutorial/dev/Example/src/Example.jl` and add a simple function:
+
+```julia
+plusone(x::Int) = x + 1
+```
+
+Now we can go back to the Julia REPL and load the package:
+
+```julia-repl
+julia> import Example
+```
+
+!!! warn
+ A package can only be loaded once per Julia session.
+ If you have run `import Example` in the current Julia session, you will
+ have to restart Julia and rerun `activate tutorial` in the Pkg REPL.
+ [Revise.jl](https://github.com/timholy/Revise.jl/) can make this process
+ significantly more pleasant, but setting it up is beyond the scope of this guide.
+
+Julia should load our new code. Let's test it:
+
+```julia-repl
+julia> Example.plusone(1)
+2
+```
+
+Say we have a change of heart and decide the world is not ready for such elegant code.
+We can tell Pkg to stop using the local clone and use a registered version instead.
+We do this with `free`:
+
+```julia-repl
+(tutorial) pkg> free Example
+```
+
+When you are done experimenting with `tutorial`, you can return to the **default
+environment** by running `activate` with no arguments:
+
+```julia-repl
+(tutorial) pkg> activate
+
+(v1.1) pkg>
+```
+
+## Asking for Help
+
+If you are ever stuck, you can ask `Pkg` for help:
+
+```julia-repl
+(v1.1) pkg> ?
+```
+
+You should see a list of available commands along with short descriptions.
+You can ask for more detailed help by specifying a command:
+
+```julia-repl
+(v1.1) pkg> ?develop
+```
+
+This guide should help you get started with `Pkg`.
+`Pkg` has much more to offer in terms of powerful package management,
+read the full manual to learn more!
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/glossary.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/glossary.md
new file mode 100644
index 0000000..f632140
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/glossary.md
@@ -0,0 +1,124 @@
+# [**9.** Glossary](@id Glossary)
+
+**Project:** a source tree with a standard layout, including a `src` directory
+for the main body of Julia code, a `test` directory for testing the project,
+a `docs` directory for documentation files, and optionally a `deps` directory for a build
+script and its outputs. A project will typically also have a project file and
+may optionally have a manifest file:
+
+- **Project file:** a file in the root directory of a project, named
+ `Project.toml` (or `JuliaProject.toml`), describing metadata about the project,
+ including its name, UUID (for packages), authors, license, and the names and
+ UUIDs of packages and libraries that it depends on.
+
+- **Manifest file:** a file in the root directory of a project, named
+ `Manifest.toml` (or `JuliaManifest.toml`), describing a complete dependency graph
+ and exact versions of each package and library used by a project.
+
+**Package:** a project which provides reusable functionality that can be used by
+other Julia projects via `import X` or `using X`. A package should have a
+project file with a `uuid` entry giving its package UUID. This UUID is used to
+identify the package in projects that depend on it.
+
+!!! note
+ For legacy reasons it is possible to load a package without a project file or
+ UUID from the REPL or the top-level of a script. It is not possible, however,
+ to load a package without a project file or UUID from a project with them. Once
+ you've loaded from a project file, everything needs a project file and UUID.
+
+**Application:** a project which provides standalone functionality not intended
+to be reused by other Julia projects. For example a web application or a
+commmand-line utility, or simulation/analytics code accompanying a scientific paper.
+An application may have a UUID but does not need one.
+An application may also provide global configuration options for packages it
+depends on. Packages, on the other hand, may not provide global configuration
+since that could conflict with the configuration of the main application.
+
+!!! note
+ **Projects _vs._ Packages _vs._ Applications:**
+
+ 1. **Project** is an umbrella term: packages and applications are kinds of projects.
+ 2. **Packages** should have UUIDs, applications can have a UUIDs but don't need them.
+ 3. **Applications** can provide global configuration, whereas packages cannot.
+
+**Library (future work):** a compiled binary dependency (not written in Julia)
+packaged to be used by a Julia project. These are currently typically built in-
+place by a `deps/build.jl` script in a project’s source tree, but in the future
+we plan to make libraries first-class entities directly installed and upgraded
+by the package manager.
+
+**Environment:** the combination of the top-level name map provided by a project
+file combined with the dependency graph and map from packages to their entry points
+provided by a manifest file. For more detail see the manual section on code loading.
+
+- **Explicit environment:** an environment in the form of an explicit project
+ file and an optional corresponding manifest file together in a directory. If the
+ manifest file is absent then the implied dependency graph and location maps are
+ empty.
+
+- **Implicit environment:** an environment provided as a directory (without a
+ project file or manifest file) containing packages with entry points of the form
+ `X.jl`, `X.jl/src/X.jl` or `X/src/X.jl`. The top-level name map is implied by
+ these entry points. The dependency graph is implied by the existence of project
+ files inside of these package directories, e.g. `X.jl/Project.toml` or
+ `X/Project.toml`. The dependencies of the `X` package are the dependencies in
+ the corresponding project file if there is one. The location map is implied by
+ the entry points themselves.
+
+**Registry:** a source tree with a standard layout recording metadata about a
+registered set of packages, the tagged versions of them which are available, and
+which versions of packages are compatible or incompatible with each other. A
+registry is indexed by package name and UUID, and has a directory for each
+registered package providing the following metadata about it:
+
+- name – e.g. `DataFrames`
+- UUID – e.g. `a93c6f00-e57d-5684-b7b6-d8193f3e46c0`
+- authors – e.g. `Jane Q. Developer <jane@example.com>`
+- license – e.g. MIT, BSD3, or GPLv2
+- repository – e.g. `https://github.com/JuliaData/DataFrames.jl.git`
+- description – a block of text summarizing the functionality of a package
+- keywords – e.g. `data`, `tabular`, `analysis`, `statistics`
+- versions – a list of all registered version tags
+
+For each registered version of a package, the following information is provided:
+
+- its semantic version number – e.g. `v1.2.3`
+- its git tree SHA-1 hash – e.g. `7ffb18ea3245ef98e368b02b81e8a86543a11103`
+- a map from names to UUIDs of dependencies
+- which versions of other packages it is compatible/incompatible with
+
+Dependencies and compatibility are stored in a compressed but human-readable
+format using ranges of package versions.
+
+**Depot:** a directory on a system where various package-related resources live,
+including:
+
+- `environments`: shared named environments (e.g. `v1.0`, `devtools`)
+- `clones`: bare clones of package repositories
+- `compiled`: cached compiled package images (`.ji` files)
+- `config`: global configuration files (e.g. `startup.jl`)
+- `dev`: default directory for package development
+- `logs`: log files (e.g. `manifest_usage.toml`, `repl_history.jl`)
+- `packages`: installed package versions
+- `registries`: clones of registries (e.g. `General`)
+
+**Load path:** a stack of environments where package identities, their
+dependencies, and entry-points are searched for. The load path is controlled in
+Julia by the `LOAD_PATH` global variable which is populated at startup based on
+the value of the `JULIA_LOAD_PATH` environment variable. The first entry is your
+primary environment, often the current project, while later entries provide
+additional packages one may want to use from the REPL or top-level scripts.
+
+**Depot path:** a stack of depot locations where the package manager, as well as
+Julia's code loading mechanisms, look for registries, installed packages, named
+environments, repo clones, cached compiled package images, and configuration
+files. The depot path is controlled by the Julia `DEPOT_PATH` global variable
+which is populated at startup based on the value of the `JULIA_DEPOT_PATH`
+environment variable. The first entry is the “user depot” and should be writable
+by and owned by the current user. The user depot is where: registries are
+cloned, new package versions are installed, named environments are created and
+updated, package repos are cloned, newly compiled package image files are saved,
+log files are written, development packages are checked out by default, and
+global configuration data is saved. Later entries in the depot path are treated
+as read-only and are appropriate for registries, packages, etc. installed and
+managed by system administrators.
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/index.md
new file mode 100644
index 0000000..8cbab92
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/index.md
@@ -0,0 +1,65 @@
+# **1.** Introduction
+
+Welcome to the documentation for Pkg, Julia's package manager.
+The documentation covers many things, for example managing package
+installations, developing packages, working with package registries and more.
+
+Throughout the manual the REPL interface to Pkg is used in the examples.
+There is also a functional API interface, which is preferred when not working
+interactively. This API is documented in the [API Reference](@ref) section.
+
+## Background and Design
+
+Pkg is a complete rewrite of Julia's old package manager[^1] and was released
+together with Julia v1.0. Unlike traditional
+package managers, which install and manage a single global set of packages, Pkg
+is designed around “environments”: independent sets of packages that can be
+local to an individual project or shared and selected by name. The exact set of
+packages and versions in an environment is captured in a _manifest file_ which
+can be checked into a project repository and tracked in version control,
+significantly improving reproducibility of projects. If you’ve ever tried to run
+code you haven’t used in a while only to find that you can’t get anything to
+work because you’ve updated or uninstalled some of the packages your project was
+using, you’ll understand the motivation for this approach. In Pkg, since each
+project maintains its own independent set of package versions, you’ll never have
+this problem again. Moreover, if you check out a project on a new system, you
+can simply materialize the environment described by its manifest file and
+immediately be up and running with a known-good set of dependencies.
+
+Since environments are managed and updated independently from each other,
+“dependency hell” is significantly alleviated in Pkg. If you want to use the
+latest and greatest version of some package in a new project but you’re stuck on
+an older version in a different project, that’s no problem – since they have
+separate environments they can just use different versions, which are both
+installed at the same time in different locations on your system. The location
+of each package version is canonical, so when environments use the same versions
+of packages, they can share installations, avoiding unnecessary duplication of
+the package. Old package versions that are no longer used by any environments
+are periodically “garbage collected” by the package manager.
+
+Pkg’s approach to local environments may be familiar to people who have used
+Python’s `virtualenv` or Ruby’s `bundler`. In Julia, instead of hacking the
+language’s code loading mechanisms to support environments, we have the benefit
+that Julia natively understands them. In addition, Julia environments are
+“stackable”: you can overlay one environment with another and thereby have
+access to additional packages outside of the primary environment. This makes it
+easy to work on a project, which provides the primary environment, while still
+having access from the REPL to all your usual dev tools like profilers,
+debuggers, and so on, just by having an environment including these dev tools
+later in the load path.
+
+Last but not least, Pkg is designed to support federated package registries.
+This means that it allows multiple registries managed by different parties to
+interact seamlessly. In particular, this includes private registries which can
+live behind corporate firewalls. You can install and update your own packages
+from a private registry with exactly the same tools and workflows that you use
+to install and manage official Julia packages. If you urgently need to apply a
+hotfix for a public package that’s critical to your company’s product, you can
+tag a private version of it in your company’s internal registry and get a fix to
+your developers and ops teams quickly and easily without having to wait for an
+upstream patch to be accepted and published. Once an official fix is published,
+however, you can just upgrade your dependencies and you'll be back on an
+official release again.
+
+[^1]: Often denoted `Pkg2`, now archived as `OldPkg` at
+ [`github.com/JuliaAttic/OldPkg.jl`](https://github.com/JuliaAttic/OldPkg.jl).
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/managing-packages.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/managing-packages.md
new file mode 100644
index 0000000..45df972
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/managing-packages.md
@@ -0,0 +1,428 @@
+# **3.** Managing Packages
+
+## Adding packages
+
+There are two ways of adding packages, either using the `add` command or the `dev` command.
+The most frequently used is `add` and its usage is described first.
+
+### Adding registered packages
+
+In the Pkg REPL, packages can be added with the `add` command followed by the name of the package, for example:
+
+```julia-repl
+(v1.0) pkg> add Example
+ Cloning default registries into /Users/kristoffer/.julia/registries
+ Cloning registry General from "https://github.com/JuliaRegistries/General.git"
+ Updating registry at `~/.julia/registries/General`
+ Updating git-repo `https://github.com/JuliaRegistries/General.git`
+ Resolving package versions...
+ Updating `~/.julia/environments/v1.0/Project.toml`
+ [7876af07] + Example v0.5.1
+ Updating `~/.julia/environments/v1.0/Manifest.toml`
+ [7876af07] + Example v0.5.1
+ [8dfed614] + Test
+```
+
+Here we added the package Example to the current project. In this example, we are using a fresh Julia installation,
+and this is our first time adding a package using Pkg. By default, Pkg clones Julia's General registry,
+and uses this registry to look up packages requested for inclusion in the current environment.
+The status update shows a short form of the package UUID to the left, then the package name, and the version.
+Since standard libraries (e.g. `Test`) are shipped with Julia, they do not have a version. The project status contains the packages
+you have added yourself, in this case, `Example`:
+
+```julia-repl
+(v1.0) pkg> st
+ Status `Project.toml`
+ [7876af07] Example v0.5.1
+```
+
+The manifest status shows all the packages in the environment, including recursive dependencies:
+
+```julia-repl
+(v1.0) pkg> st --manifest
+ Status `Manifest.toml`
+ [7876af07] Example v0.5.1
+ [8dfed614] Test
+```
+
+It is possible to add multiple packages in one command as `pkg> add A B C`.
+
+After a package is added to the project, it can be loaded in Julia:
+
+```julia-repl
+julia> using Example
+
+julia> Example.hello("User")
+"Hello, User"
+```
+
+A specific version can be installed by appending a version after a `@` symbol, e.g. `@v0.4`, to the package name:
+
+```julia-repl
+(v1.0) pkg> add Example@0.4
+ Resolving package versions...
+ Updating `~/.julia/environments/v1.0/Project.toml`
+ [7876af07] + Example v0.4.1
+ Updating `~/.julia/environments/v1.0/Manifest.toml`
+ [7876af07] + Example v0.4.1
+```
+
+If a branch (or a certain commit) of `Example` has a hotfix that is not yet included in a registered version,
+we can explicitly track that branch (or commit) by appending `#branchname` (or `#commitSHA1`) to the package name:
+
+```julia-repl
+(v1.0) pkg> add Example#master
+ Updating git-repo `https://github.com/JuliaLang/Example.jl.git`
+ Resolving package versions...
+ Updating `~/.julia/environments/v1.0/Project.toml`
+ [7876af07] ~ Example v0.5.1 ⇒ v0.5.1+ #master (https://github.com/JuliaLang/Example.jl.git)
+ Updating `~/.julia/environments/v1.0/Manifest.toml`
+ [7876af07] ~ Example v0.5.1 ⇒ v0.5.1+ #master (https://github.com/JuliaLang/Example.jl.git)
+```
+
+The status output now shows that we are tracking the `master` branch of `Example`.
+When updating packages, we will pull updates from that branch.
+
+To go back to tracking the registry version of `Example`, the command `free` is used:
+
+```julia-repl
+(v1.0) pkg> free Example
+ Resolving package versions...
+ Updating `~/.julia/environments/v1.0/Project.toml`
+ [7876af07] ~ Example v0.5.1+ #master (https://github.com/JuliaLang/Example.jl.git) ⇒ v0.5.1
+ Updating `~/.julia/environments/v1.0/Manifest.toml`
+ [7876af07] ~ Example v0.5.1+ #master )https://github.com/JuliaLang/Example.jl.git) ⇒ v0.5.1
+```
+
+
+### Adding unregistered packages
+
+If a package is not in a registry, it can be added by specifying a URL to the repository:
+
+```julia-repl
+(v1.0) pkg> add https://github.com/fredrikekre/ImportMacros.jl
+ Updating git-repo `https://github.com/fredrikekre/ImportMacros.jl`
+ Resolving package versions...
+Downloaded MacroTools ─ v0.4.1
+ Updating `~/.julia/environments/v1.0/Project.toml`
+ [e6797606] + ImportMacros v0.0.0 # (https://github.com/fredrikekre/ImportMacros.jl)
+ Updating `~/.julia/environments/v1.0/Manifest.toml`
+ [e6797606] + ImportMacros v0.0.0 # (https://github.com/fredrikekre/ImportMacros.jl)
+ [1914dd2f] + MacroTools v0.4.1
+```
+
+The dependencies of the unregistered package (here `MacroTools`) got installed.
+For unregistered packages we could have given a branch name (or commit SHA1) to track using `#`, just like for registered packages.
+
+If you want to add a package using the SSH-based `git` protocol, you have to use quotes because the URL contains a `@`. For example,
+```julia-repl
+(v1.0) pkg> add "git@github.com:fredrikekre/ImportMacros.jl.git"
+ Cloning git-repo `git@github.com:fredrikekre/ImportMacros.jl.git`
+ Updating git-repo `git@github.com:fredrikekre/ImportMacros.jl.git`
+ Updating registry at `~/.julia/registries/General`
+ Resolving package versions...
+Updating `~/.julia/environments/v1/Project.toml`
+ [92a963f6] + ImportMacros v1.0.0 `git@github.com:fredrikekre/ImportMacros.jl.git#master`
+Updating `~/.julia/environments/v1/Manifest.toml`
+ [92a963f6] + ImportMacros v1.0.0 `git@github.com:fredrikekre/ImportMacros.jl.git#master`
+```
+
+### Adding a local package
+
+Instead of giving a URL of a git repo to `add` we could instead have given a local path to a git repo.
+This works similar to adding a URL. The local repository will be tracked (at some branch) and updates
+from that local repo are pulled when packages are updated.
+Note tracking a package through `add` is distinct from `develop`:
+changes to files in the local package repository will not immediately be reflected when loading that package.
+The changes would have to be committed and the packages updated in order to pull in the changes.
+
+In addition, it is possible to add packages relatively to the `Manifest.toml` file, see [Developing packages](@ref developing) for an example.
+
+### [Developing packages](@id developing)
+
+By only using `add` your Manifest will always have a "reproducible state", in other words, as long as the repositories and registries used are still accessible
+it is possible to retrieve the exact state of all the dependencies in the project. This has the advantage that you can send your project (`Project.toml`
+and `Manifest.toml`) to someone else and they can "instantiate" that project in the same state as you had it locally.
+However, when you are developing a package, it is more convenient to load packages at their current state at some path. For this reason, the `dev` command exists.
+
+Let's try to `dev` a registered package:
+
+```julia-repl
+(v1.0) pkg> dev Example
+ Updating git-repo `https://github.com/JuliaLang/Example.jl.git`
+ Resolving package versions...
+ Updating `~/.julia/environments/v1.0/Project.toml`
+ [7876af07] + Example v0.5.1+ [`~/.julia/dev/Example`]
+ Updating `~/.julia/environments/v1.0/Manifest.toml`
+ [7876af07] + Example v0.5.1+ [`~/.julia/dev/Example`]
+```
+
+The `dev` command fetches a full clone of the package to `~/.julia/dev/` (the path can be changed by setting the environment variable `JULIA_PKG_DEVDIR`).
+When importing `Example` julia will now import it from `~/.julia/dev/Example` and whatever local changes have been made to the files in that path are consequently
+reflected in the code loaded. When we used `add` we said that we tracked the package repository, we here say that we track the path itself.
+Note the package manager will never touch any of the files at a tracked path. It is therefore up to you to pull updates, change branches etc.
+If we try to `dev` a package at some branch that already exists at `~/.julia/dev/` the package manager we will simply use the existing path.
+For example:
+
+```julia-repl
+(v1.0) pkg> dev Example
+ Updating git-repo `https://github.com/JuliaLang/Example.jl.git`
+[ Info: Path `/Users/kristoffer/.julia/dev/Example` exists and looks like the correct package, using existing path instead of cloning
+```
+
+Note the info message saying that it is using the existing path.
+When tracking a path, the package manager will never modify the files at that path.
+
+If `dev` is used on a local path, that path to that package is recorded and used when loading that package.
+The path will be recorded relative to the project file, unless it is given as an absolute path.
+
+To stop tracking a path and use the registered version again, use `free`:
+
+```julia-repl
+(v1.0) pkg> free Example
+ Resolving package versions...
+ Updating `~/.julia/environments/v1.0/Project.toml`
+ [7876af07] ↓ Example v0.5.1+ [`~/.julia/dev/Example`] ⇒ v0.5.1
+ Updating `~/.julia/environments/v1.0/Manifest.toml`
+ [7876af07] ↓ Example v0.5.1+ [`~/.julia/dev/Example`] ⇒ v0.5.1
+```
+
+It should be pointed out that by using `dev` your project is now inherently stateful.
+Its state depends on the current content of the files at the path and the manifest cannot be "instantiated" by someone else without
+knowing the exact content of all the packages that are tracking a path.
+
+Note that if you add a dependency to a package that tracks a local path, the Manifest (which contains the whole dependency graph) will become
+out of sync with the actual dependency graph. This means that the package will not be able to load that dependency since it is not recorded
+in the Manifest. To synchronize the Manifest, use the REPL command `resolve`.
+
+In addition to absolute paths, `add` and `dev` can accept relative paths to packages.
+In this case, the relative path from the active project to the package is stored.
+This approach is useful when the relative location of tracked dependencies is more important than their absolute location.
+For example, the tracked dependencies can be stored inside of the active project directory.
+The whole directory can be moved and `Pkg` will still be able to find the dependencies
+because their path relative to the active project is preserved even though their absolute path has changed.
+
+## Removing packages
+
+Packages can be removed from the current project by using `pkg> rm Package`.
+This will only remove packages that exist in the project; to remove a package that only
+exists as a dependency use `pkg> rm --manifest DepPackage`.
+Note that this will remove all packages that depend on `DepPackage`.
+
+## [Updating packages](@id updating)
+
+When new versions of packages that the project is using are released, it is a good idea to update. Simply calling `up` will try to update *all* the dependencies of the project
+to the latest compatible version. Sometimes this is not what you want. You can specify a subset of the dependencies to upgrade by giving them as arguments to `up`, e.g:
+
+```julia-repl
+(v1.0) pkg> up Example
+```
+
+If `Example` has a dependency which is also a dependency for another explicitly added package, that dependency will not be updated. If you only want to update the minor version of packages, to reduce the risk that your project breaks, you can give the `--minor` flag, e.g:
+
+```julia-repl
+(v1.0) pkg> up --minor Example
+```
+
+Packages that track a local repository are not updated when a minor upgrade is done.
+Packages that track a path are never touched by the package manager.
+
+## Pinning a package
+
+A pinned package will never be updated. A package can be pinned using `pin`, for example:
+
+```julia-repl
+(v1.0) pkg> pin Example
+ Resolving package versions...
+ Updating `~/.julia/environments/v1.0/Project.toml`
+ [7876af07] ~ Example v0.5.1 ⇒ v0.5.1 ⚲
+ Updating `~/.julia/environments/v1.0/Manifest.toml`
+ [7876af07] ~ Example v0.5.1 ⇒ v0.5.1 ⚲
+```
+
+Note the pin symbol `⚲` showing that the package is pinned. Removing the pin is done using `free`
+
+```julia-repl
+(v1.0) pkg> free Example
+ Updating `~/.julia/environments/v1.0/Project.toml`
+ [7876af07] ~ Example v0.5.1 ⚲ ⇒ v0.5.1
+ Updating `~/.julia/environments/v1.0/Manifest.toml`
+ [7876af07] ~ Example v0.5.1 ⚲ ⇒ v0.5.1
+```
+
+## Testing packages
+
+The tests for a package can be run using `test`command:
+
+```julia-repl
+(v1.0) pkg> test Example
+ Testing Example
+ Testing Example tests passed
+```
+
+## Building packages
+
+The build step of a package is automatically run when a package is first installed.
+The output of the build process is directed to a file.
+To explicitly run the build step for a package, the `build` command is used:
+
+```julia-repl
+(v1.0) pkg> build MbedTLS
+ Building MbedTLS → `~/.julia/packages/MbedTLS/h1Vu/deps/build.log`
+
+julia> print(read("~/.julia/packages/MbedTLS/h1Vu/deps/build.log", String))
+┌ Warning: `wait(t::Task)` is deprecated, use `fetch(t)` instead.
+│ caller = macro expansion at OutputCollector.jl:63 [inlined]
+└ @ Core OutputCollector.jl:63
+...
+[ Info: using prebuilt binaries
+```
+
+## [Interpreting and resolving version conflicts](@id conflicts)
+
+An environment consists of a set of mutually-compatible packages.
+Sometimes, you can find yourself in a situation in which two packages you'd like to use simultaneously
+have incompatible requirements.
+In such cases you'll get an "Unsatisfiable requirements" error:
+
+```@setup conflict
+using Pkg
+include(joinpath(pkgdir(Pkg), "test", "resolve_utils.jl"))
+using .ResolveUtils
+deps_data = Any[["A", v"1.0.0", "C", v"0.2"],
+ ["B", v"1.0.0", "D", v"0.1"],
+ ["C", v"0.1.0", "D", v"0.1"],
+ ["C", v"0.1.1", "D", v"0.1"],
+ ["C", v"0.2.0", "D", v"0.2"],
+ ["D", v"0.1.0"],
+ ["D", v"0.2.0"],
+ ["D", v"0.2.1"]]
+reqs_data = Any[["A", "*"],
+ ["B", "*"]]
+```
+
+```@example conflict
+print("pkg> add A\n", try resolve_tst(deps_data, reqs_data) catch e sprint(showerror, e) end) # hide
+```
+
+This message means that a package named `D` has a version conflict.
+Even if you have never `add`ed `D` directly, this kind of error can arise
+if `D` is required by other packages that you are trying to use.
+
+The error message has a lot of crucial information.
+It may be easiest to interpret piecewise:
+
+```
+Unsatisfiable requirements detected for package D [756980fe]:
+ D [756980fe] log:
+ ├─possible versions are: [0.1.0, 0.2.0-0.2.1] or uninstalled
+```
+means that `D` has three released versions, `v0.1.0`, `v0.2.0`, and `v0.2.1`.
+You also have the option of not having it installed at all.
+Each of these options might have different implications for the set of other packages that can be installed.
+
+Crucially, notice the stroke characters (vertical and horizontal lines) and their indentation.
+Together, these connect *messages* to specific *packages*.
+For instance the right stroke of `├─` indicates that the message to its right (`possible versions...`)
+is connected to the package pointed to by its vertical stroke (`D`).
+This same principle applies to the next line:
+
+```
+ ├─restricted by compatibility requirements with B [f4259836] to versions: 0.1.0
+```
+The vertical stroke here is also aligned under `D`, and thus this message
+is in reference to `D`.
+Specifically, there's some other package `B` that depends on version `v0.1.0` of `D`.
+Notice that this is not the newest version of `D`.
+
+Next comes some information about `B`:
+
+```
+ │ └─B [f4259836] log:
+ │ ├─possible versions are: 1.0.0 or uninstalled
+ │ └─restricted to versions * by an explicit requirement, leaving only versions 1.0.0
+```
+The two lines below the first have a vertical stroke that aligns with `B`,
+and thus they provide information about `B`.
+They tell you that `B` has just one release, `v1.0.0`.
+You've not specified a particular version of `B` (`restricted to versions *` means that any version will do),
+but the `explicit requirement` means that you've asked for `B` to be part of your environment,
+for example by `pkg> add B`.
+You might have asked for `B` previously, and the requirement is still active.
+
+The conflict becomes clear with the line
+```
+└─restricted by compatibility requirements with C [c99a7cb2] to versions: 0.2.0 — no versions left
+```
+
+Here again the vertical stroke aligns with `D`: this means that `D` is *also* required by another package, `C`.
+`C` requires `v0.2.0` of `D`, and this conflicts with `B`'s need for `v0.1.0` of `D`.
+This explains the conflict.
+
+But wait, you might ask, what is `C` and why do I need it at all?
+The next few lines introduce the problem:
+
+```
+ └─C [c99a7cb2] log:
+ ├─possible versions are: [0.1.0-0.1.1, 0.2.0] or uninstalled
+ └─restricted by compatibility requirements with A [29c70717] to versions: 0.2.0
+```
+These provide more information about `C`, revealing that it has 3 released versions: `v0.1.0`, `v0.1.1`, and `v0.2.0`.
+Moreover, `C` is required by another package `A`.
+Indeed, `A`'s requirements are such that we need `v0.2.0` of `C`.
+`A`'s origin is revealed on the next lines:
+
+```
+ └─A [29c70717] log:
+ ├─possible versions are: 1.0.0 or uninstalled
+ └─restricted to versions * by an explicit requirement, leaving only versions 1.0.0
+```
+
+So we can see that `A` was `explicitly required`, and in this case it's because we were trying to
+`add` it to our environment.
+
+In summary, we explicitly asked to use `A` and `B`, but this gave a conflict for `D`.
+The reason was that `B` and `C` require conflicting versions of `D`.
+Even though `C` isn't something we asked for explicitly, it was needed by `A`.
+
+To fix such errors, you have a number of options:
+
+- try [updating your packages](@ref updating). It's possible the developers of these packages have recently released new versions that are mutually compatible.
+- remove either `A` or `B` from your environment. Perhaps `B` is left over from something you were previously working on, and you don't need it anymore. If you don't need `A` and `B` at the same time, this is the easiest way to fix the problem.
+- try reporting your conflict. In this case, we were able to deduce that `B` requires an outdated version of `D`. You could thus report an issue in the development repository of `B.jl` asking for an updated version.
+- try fixing the problem yourself.
+ This becomes easier once you understand `Project.toml` files and how they declare their compatiblity requirements. We'll return to this example in [Fixing conflicts](@ref).
+
+## Garbage collecting old, unused packages
+
+As packages are updated and projects are deleted, installed package versions and artifacts that were
+once used will inevitably become old and not used from any existing project.
+`Pkg` keeps a log of all projects used so it can go through the log and see exactly which projects still exist
+and what packages/artifacts those projects used.
+If a package or artifact is not marked as used by any project, it is added to a list of orphaned packages.
+Packages and artifacts that are in the orphan list for 30 days without being used again are deleted from the system on the next garbage collection.
+This timing is configurable via the `collect_delay` keyword argument to `Pkg.gc()`.
+A value of `0` will cause anything currently not in use immediately, skipping the orphans list entirely;
+If you are short on disk space and want to clean out as many unused packages and artifacts as possible, you may want to try this, but if you need these versions again, you will have to download them again.
+To run a typical garbage collection with default arguments, simply use the `gc` command at the `pkg>` REPL:
+
+```julia-repl
+(v1.0) pkg> gc
+ Active manifests at:
+ `~/BinaryProvider/Manifest.toml`
+ ...
+ `~/Compat.jl/Manifest.toml`
+ Active artifacts:
+ `~/src/MyProject/Artifacts.toml`
+
+ Deleted ~/.julia/packages/BenchmarkTools/1cAj: 146.302 KiB
+ Deleted ~/.julia/packages/Cassette/BXVB: 795.557 KiB
+ ...
+ Deleted `~/.julia/artifacts/e44cdf2579a92ad5cbacd1cddb7414c8b9d2e24e` (152.253 KiB)
+ Deleted `~/.julia/artifacts/f2df5266567842bbb8a06acca56bcabf813cd73f` (21.536 MiB)
+
+ Deleted 36 package installations (113.205 MiB)
+ Deleted 15 artifact installations (20.759 GiB)
+```
+
+Note that only packages in `~/.julia/packages` are deleted.
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/registries.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/registries.md
new file mode 100644
index 0000000..be6f8e2
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/registries.md
@@ -0,0 +1,81 @@
+# **7.** Registries
+
+Registries contain information about packages, such as
+available releases and dependencies, and where they can be downloaded.
+The `General` registry (https://github.com/JuliaRegistries/General)
+is the default one, and is installed automatically.
+
+## Managing registries
+
+!!! compat "Julia 1.1"
+ Pkg's registry handling requires at least Julia 1.1.
+
+Registries can be added, removed and updated from either the Pkg REPL
+or by using the function based API. In this section we will describe the
+REPL interface. The registry API is documented in
+the [Registry API Reference](@ref) section.
+
+### Adding registries
+
+A custom registry can be added with the `registry add` command
+from the Pkg REPL. Usually this will be done with a URL to the
+registry. Here we add the `General` registry:
+
+```julia-repl
+pkg> registry add https://github.com/JuliaRegistries/General
+ Cloning registry from "https://github.com/JuliaRegistries/General"
+ Added registry `General` to `~/.julia/registries/General`
+```
+
+and now all the packages registered in `General` are available for e.g. adding.
+To see which registries are currently installed you can use the `registry status`
+(or `registry st`) command
+
+```julia-repl
+pkg> registry st
+Registry Status
+ [23338594] General (https://github.com/JuliaRegistries/General.git)
+```
+
+Registries are always added to the user depot, which is the first entry in `DEPOT_PATH` (cf. the [Glossary](@ref) section).
+
+### Removing registries
+
+Registries can be removed with the `registry remove` (or `registry rm`) command.
+Here we remove the `General` registry
+
+```julia-repl
+pkg> registry rm General
+ Removing registry `General` from ~/.julia/registries/General
+
+pkg> registry st
+Registry Status
+ (no registries found)
+```
+
+In case there are multiple registries named `General` installed you have to
+disambiguate with the `uuid`, just as when manipulating packages, e.g.
+
+```julia-repl
+pkg> registry rm General=23338594-aafe-5451-b93e-139f81909106
+ Removing registry `General` from ~/.julia/registries/General
+```
+
+### Updating registries
+
+The `registry update` (or `registry up`) command is available to update registries.
+Here we update the `General` registry:
+
+```julia-repl
+pkg> registry up General
+ Updating registry at `~/.julia/registries/General`
+ Updating git-repo `https://github.com/JuliaRegistries/General`
+```
+
+and to update all installed registries just do:
+
+```julia-repl
+pkg> registry up
+ Updating registry at `~/.julia/registries/General`
+ Updating git-repo `https://github.com/JuliaRegistries/General`
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/toml-files.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/toml-files.md
new file mode 100644
index 0000000..d79f9ef
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/toml-files.md
@@ -0,0 +1,270 @@
+# [**10.** `Project.toml` and `Manifest.toml`](@id Project-and-Manifest)
+
+Two files that are central to Pkg are `Project.toml` and `Manifest.toml`. `Project.toml`
+and `Manifest.toml` are written in [TOML](https://github.com/toml-lang/toml) (hence the
+`.toml` extension) and include information about dependencies, versions, package names,
+UUIDs etc.
+
+!!! note
+ The `Project.toml` and `Manifest.toml` files are not only used by the package manager;
+ they are also used by Julia's code loading, and determine e.g. what `using Example`
+ should do. For more details see the section about
+ [Code Loading](https://docs.julialang.org/en/v1/manual/code-loading/)
+ in the Julia manual.
+
+
+## `Project.toml`
+
+The project file describes the project on a high level, for example the package/project
+dependencies and compatibility constraints are listed in the project file. The file entries
+are described below.
+
+
+### The `authors` field
+
+For a package, the optional `authors` field is a list of strings describing the
+package authors, in the form `NAME <EMAIL>`. For example:
+```toml
+authors = ["Some One <someone@email.com>",
+ "Foo Bar <foo@bar.com>"]
+```
+
+
+### The `name` field
+
+The name of the package/project is determined by the `name` field, for example:
+```toml
+name = "Example"
+```
+The name can contain word characters `[a-zA-Z0-9_]`, but can not start with a number. For
+packages it is recommended to follow the
+[package naming guidelines](@ref Package-naming-guidelines). The `name` field is mandatory
+for packages.
+
+
+### The `uuid` field
+
+`uuid` is a string with a [universally unique identifier]
+(https://en.wikipedia.org/wiki/Universally_unique_identifier) for the package/project, for example:
+```toml
+uuid = "7876af07-990d-54b4-ab0e-23690620f79a"
+```
+The `uuid` field is mandatory for packages.
+
+!!! note
+ It is recommended that `UUIDs.uuid4()` is used to generate random UUIDs.
+
+
+### The `version` field
+
+`version` is a string with the version number for the package/project. It should consist of
+three numbers, major version, minor version and patch number, separated with a `.`, for example:
+```toml
+version = "1.2.5"
+```
+Julia uses [Semantic Versioning](https://semver.org/) (SemVer) and the `version` field
+should follow SemVer. The basic rules are:
+* Before 1.0.0, anything goes, but when you make breaking changes the minor version should
+ be incremented.
+* After 1.0.0 only make breaking changes when incrementing the major version.
+* After 1.0.0 no new public API should be added without incrementing the minor version.
+ This includes, in particular, new types, functions, methods and method overloads, from
+ `Base` or other packages.
+See also the section on [Compatibility](@ref).
+
+Note that Pkg.jl deviates from the SemVer specification when it comes to versions pre-1.0.0. See
+the section on [pre-1.0 behavior](@ref compat-pre-1.0) for more details.
+
+
+### The `[deps]` section
+
+All dependencies of the package/project are listed in the `[deps]` section. Each dependency
+is listed as a name-uuid pair, for example:
+
+```toml
+[deps]
+Example = "7876af07-990d-54b4-ab0e-23690620f79a"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+```
+
+Typically it is not needed to manually add entries to the `[deps]` section; this is instead
+handled by Pkg operations such as `add`.
+
+
+### The `[compat]` section
+
+Compatibility constraints for the dependencies listed under `[deps]` can be listed in the
+`[compat]` section.
+Example:
+
+```toml
+[deps]
+Example = "7876af07-990d-54b4-ab0e-23690620f79a"
+
+[compat]
+Example = "1.2"
+```
+
+The [Compatibility](@ref) section describes the different possible compatibility
+constraints in detail. It is also possible to list constraints on `julia` itself, although
+`julia` is not listed as a dependency in the `[deps]` section:
+
+```toml
+[compat]
+julia = "1.1"
+```
+
+
+## `Manifest.toml`
+
+The manifest file is an absolute record of the state of the packages in the environment.
+It includes exact information about (direct and indirect) dependencies of the project.
+Given a `Project.toml` + `Manifest.toml` pair, it is possible to instantiate the exact same
+package environment, which is very useful for reproducibility.
+For the details, see [`Pkg.instantiate`](@ref).
+
+!!! note
+ The `Manifest.toml` file is generated and maintained by Pkg and, in general, this file
+ should *never* be modified manually.
+
+
+### `Manifest.toml` entries
+
+Each dependency has its own section in the manifest file, and its content varies depending
+on how the dependency was added to the environment. Every
+dependency section includes a combination of the following entries:
+
+* `uuid`: the [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier)
+ for the dependency, for example `uuid = "7876af07-990d-54b4-ab0e-23690620f79a"`.
+* `deps`: a vector listing the dependencies of the dependency, for example
+ `deps = ["Example", "JSON"]`.
+* `version`: a version number, for example `version = "1.2.6"`.
+* `path`: a file path to the source code, for example `path = /home/user/Example`.
+* `repo-url`: a URL to the repository where the source code was found,
+ for example `repo-url = "https://github.com/JuliaLang/Example.jl.git"`.
+* `repo-rev`: a git revision, for example a branch `repo-rev = "master"`
+ or a commit `repo-rev = "66607a62a83cb07ab18c0b35c038fcd62987c9b1"`.
+* `git-tree-sha1`: a content hash of the source tree, for example
+ `git-tree-sha1 = "ca3820cc4e66f473467d912c4b2b3ae5dc968444"`.
+
+
+#### Added package
+
+When a package is added from a package registry, for example by invoking `pkg> add Example`
+or with a specific version `pkg> add Example@1.2`, the resulting `Manifest.toml` entry looks
+like:
+
+```toml
+[[Example]]
+deps = ["DependencyA", "DependencyB"]
+git-tree-sha1 = "8eb7b4d4ca487caade9ba3e85932e28ce6d6e1f8"
+uuid = "7876af07-990d-54b4-ab0e-23690620f79a"
+version = "1.2.3"
+```
+
+Note, in particular, that no `repo-url` is present, since that information is included in
+the registry where this package were found.
+
+#### Added package by branch
+
+The resulting dependency section when adding a package specified by a branch, e.g.
+`pkg> add Example#master` or `pkg> add https://github.com/JuliaLang/Example.jl.git`,
+looks like:
+
+```toml
+[[Example]]
+deps = ["DependencyA", "DependencyB"]
+git-tree-sha1 = "54c7a512469a38312a058ec9f429e1db1f074474"
+repo-rev = "master"
+repo-url = "https://github.com/JuliaLang/Example.jl.git"
+uuid = "7876af07-990d-54b4-ab0e-23690620f79a"
+version = "1.2.4"
+```
+
+Note that both the branch we are tracking (`master`) and the remote repository url
+(`"https://github.com/JuliaLang/Example.jl.git"`) are stored in the manifest.
+
+#### Added package by commit
+
+The resulting dependency section when adding a package specified by a commit, e.g.
+`pkg> add Example#cf6ba6cc0be0bb5f56840188563579d67048be34`, looks like:
+
+```toml
+[[Example]]
+deps = ["DependencyA", "DependencyB"]
+git-tree-sha1 = "54c7a512469a38312a058ec9f429e1db1f074474"
+repo-rev = "cf6ba6cc0be0bb5f56840188563579d67048be34"
+repo-url = "https://github.com/JuliaLang/Example.jl.git"
+uuid = "7876af07-990d-54b4-ab0e-23690620f79a"
+version = "1.2.4"
+```
+
+The only difference from tracking a branch is the content of `repo-rev`.
+
+#### Developed package
+
+The resulting dependency section when adding a package with `develop`,
+e.g. `pkg> develop Example` or `pkg> develop /path/to/local/folder/Example`,
+looks like:
+
+```toml
+[[Example]]
+deps = ["DependencyA", "DependencyB"]
+path = "/home/user/.julia/dev/Example/"
+uuid = "7876af07-990d-54b4-ab0e-23690620f79a"
+version = "1.2.4"
+```
+
+Note that the path to the source code is included, and changes made to that
+source tree is directly reflected.
+
+#### Pinned package
+
+Pinned packages are also recorded in the manifest file; the resulting
+dependency section for e.g. `pkg> add Example; pin Example` looks like:
+
+```toml
+[[Example]]
+deps = ["DependencyA", "DependencyB"]
+git-tree-sha1 = "54c7a512469a38312a058ec9f429e1db1f074474"
+pinned = true
+uuid = "7876af07-990d-54b4-ab0e-23690620f79a"
+version = "1.2.4"
+```
+
+The only difference is the addition of the `pinned = true` entry.
+
+#### Multiple package with the same name
+
+Julia differentiates packages based on UUID, which means that the name alone is not enough
+to identify a package. It is possible to have multiple packages in the same environment
+with the same name, but with different UUID. In such a situation the `Manifest.toml` file
+looks a bit different. Consider for example the situation where you have added `A` and `B`
+to your environment, and the `Project.toml` file looks as follows:
+
+```toml
+[deps]
+A = "ead4f63c-334e-11e9-00e6-e7f0a5f21b60"
+B = "edca9bc6-334e-11e9-3554-9595dbb4349c"
+```
+
+If `A` now depends on `B = "f41f7b98-334e-11e9-1257-49272045fb24"`, i.e. *another* package
+named `B` there will be two different `B` packages in the `Manifest.toml` file. In this
+case the full `Manifest.toml` file, with `git-tree-sha1` and `version` fields removed for
+clarity, looks like:
+
+```toml
+[[A]]
+uuid = "ead4f63c-334e-11e9-00e6-e7f0a5f21b60"
+
+ [A.deps]
+ B = "f41f7b98-334e-11e9-1257-49272045fb24"
+
+[[B]]
+uuid = "f41f7b98-334e-11e9-1257-49272045fb24"
+[[B]]
+uuid = "edca9bc6-334e-11e9-3554-9595dbb4349c"
+```
+
+There is now an array of the two `B` packages, and the `[deps]` section for `A` has been
+expanded in order to be explicit about which `B` package `A` depends on.
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/SameNameDifferentUUID/dev/Unregistered/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/SameNameDifferentUUID/dev/Unregistered/README.md
new file mode 100644
index 0000000..26b9f8d
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/SameNameDifferentUUID/dev/Unregistered/README.md
@@ -0,0 +1,2 @@
+# Unregistered.jl
+Test repo
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/SandboxFallback2/dev/Unregistered/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/SandboxFallback2/dev/Unregistered/README.md
new file mode 100644
index 0000000..26b9f8d
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/SandboxFallback2/dev/Unregistered/README.md
@@ -0,0 +1,2 @@
+# Unregistered.jl
+Test repo
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/TestDepTrackingPath/test/dev/Unregistered/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/TestDepTrackingPath/test/dev/Unregistered/README.md
new file mode 100644
index 0000000..26b9f8d
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/TestDepTrackingPath/test/dev/Unregistered/README.md
@@ -0,0 +1,2 @@
+# Unregistered.jl
+Test repo
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/TransferSubgraph/dev/Unregistered/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/TransferSubgraph/dev/Unregistered/README.md
new file mode 100644
index 0000000..26b9f8d
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/TransferSubgraph/dev/Unregistered/README.md
@@ -0,0 +1,2 @@
+# Unregistered.jl
+Test repo
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Printf/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Printf/docs/src/index.md
new file mode 100644
index 0000000..828e527
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Printf/docs/src/index.md
@@ -0,0 +1,6 @@
+# Printf
+
+```@docs
+Printf.@printf
+Printf.@sprintf
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Profile/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Profile/docs/src/index.md
new file mode 100644
index 0000000..ac60bb9
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Profile/docs/src/index.md
@@ -0,0 +1,17 @@
+# [Profiling](@id lib-profiling)
+
+```@docs
+Profile.@profile
+```
+
+The methods in `Profile` are not exported and need to be called e.g. as `Profile.print()`.
+
+```@docs
+Profile.clear
+Profile.print
+Profile.init
+Profile.fetch
+Profile.retrieve
+Profile.callers
+Profile.clear_malloc_data
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/REPL/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/REPL/docs/src/index.md
new file mode 100644
index 0000000..168d3e9
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/REPL/docs/src/index.md
@@ -0,0 +1,692 @@
+# The Julia REPL
+
+Julia comes with a full-featured interactive command-line REPL (read-eval-print loop) built into
+the `julia` executable. In addition to allowing quick and easy evaluation of Julia statements,
+it has a searchable history, tab-completion, many helpful keybindings, and dedicated help and
+shell modes. The REPL can be started by simply calling `julia` with no arguments or double-clicking
+on the executable:
+
+```@eval
+io = IOBuffer()
+Base.banner(io)
+banner = String(take!(io))
+import Markdown
+Markdown.parse("```\n\$ julia\n\n$(banner)\njulia>\n```")
+```
+
+To exit the interactive session, type `^D` -- the control key together with the `d` key on a blank
+line -- or type `exit()` followed by the return or enter key. The REPL greets you with a banner
+and a `julia>` prompt.
+
+## The different prompt modes
+
+### The Julian mode
+
+The REPL has five main modes of operation. The first and most common is the Julian prompt. It
+is the default mode of operation; each new line initially starts with `julia>`. It is here that
+you can enter Julia expressions. Hitting return or enter after a complete expression has been
+entered will evaluate the entry and show the result of the last expression.
+
+```jldoctest
+julia> string(1 + 2)
+"3"
+```
+
+There are a number useful features unique to interactive work. In addition to showing the result,
+the REPL also binds the result to the variable `ans`. A trailing semicolon on the line can be
+used as a flag to suppress showing the result.
+
+```jldoctest
+julia> string(3 * 4);
+
+julia> ans
+"12"
+```
+
+In Julia mode, the REPL supports something called *prompt pasting*. This activates when pasting
+text that starts with `julia> ` into the REPL. In that case, only expressions starting with
+`julia> ` are parsed, others are removed. This makes it possible to paste a chunk of code
+that has been copied from a REPL session without having to scrub away prompts and outputs. This
+feature is enabled by default but can be disabled or enabled at will with `REPL.enable_promptpaste(::Bool)`.
+If it is enabled, you can try it out by pasting the code block above this paragraph straight into
+the REPL. This feature does not work on the standard Windows command prompt due to its limitation
+at detecting when a paste occurs.
+
+Objects are printed at the REPL using the [`show`](@ref) function with a specific [`IOContext`](@ref).
+In particular, the `:limit` attribute is set to `true`.
+Other attributes can receive in certain `show` methods a default value if it's not already set,
+like `:compact`.
+It's possible, as an experimental feature, to specify the attributes used by the REPL via the
+`Base.active_repl.options.iocontext` dictionary (associating values to attributes). For example:
+
+```julia-repl
+julia> rand(2, 2)
+2×2 Array{Float64,2}:
+ 0.8833 0.329197
+ 0.719708 0.59114
+
+julia> show(IOContext(stdout, :compact => false), "text/plain", rand(2, 2))
+ 0.43540323669187075 0.15759787870609387
+ 0.2540832269192739 0.4597637838786053
+julia> Base.active_repl.options.iocontext[:compact] = false;
+
+julia> rand(2, 2)
+2×2 Array{Float64,2}:
+ 0.2083967319174056 0.13330606013126012
+ 0.6244375177790158 0.9777957560761545
+```
+
+In order to define automatically the values of this dictionary at startup time, one can use the
+[`atreplinit`](@ref) function in the `~/.julia/config/startup.jl` file, for example:
+```julia
+atreplinit() do repl
+ repl.options.iocontext[:compact] = false
+end
+```
+
+### Help mode
+
+When the cursor is at the beginning of the line, the prompt can be changed to a help mode by typing
+`?`. Julia will attempt to print help or documentation for anything entered in help mode:
+
+```julia-repl
+julia> ? # upon typing ?, the prompt changes (in place) to: help?>
+
+help?> string
+search: string String Cstring Cwstring RevString randstring bytestring SubString
+
+ string(xs...)
+
+ Create a string from any values using the print function.
+```
+
+Macros, types and variables can also be queried:
+
+```
+help?> @time
+ @time
+
+ A macro to execute an expression, printing the time it took to execute, the number of allocations,
+ and the total number of bytes its execution caused to be allocated, before returning the value of the
+ expression.
+
+ See also @timev, @timed, @elapsed, and @allocated.
+
+help?> Int32
+search: Int32 UInt32
+
+ Int32 <: Signed
+
+ 32-bit signed integer type.
+```
+
+A string or regex literal searches all docstrings using [`apropos`](@ref):
+
+```
+help?> "aprop"
+REPL.stripmd
+Base.Docs.apropos
+
+help?> r"ap..p"
+Base.:∘
+Base.shell_escape_posixly
+Distributed.CachingPool
+REPL.stripmd
+Base.Docs.apropos
+```
+
+Help mode can be exited by pressing backspace at the beginning of the line.
+
+### [Shell mode](@id man-shell-mode)
+
+Just as help mode is useful for quick access to documentation, another common task is to use the
+system shell to execute system commands. Just as `?` entered help mode when at the beginning
+of the line, a semicolon (`;`) will enter the shell mode. And it can be exited by pressing backspace
+at the beginning of the line.
+
+```julia-repl
+julia> ; # upon typing ;, the prompt changes (in place) to: shell>
+
+shell> echo hello
+hello
+```
+!!! note
+ For Windows users, Julia's shell mode does not expose windows shell commands.
+ Hence, this will fail:
+
+```julia-repl
+julia> ; # upon typing ;, the prompt changes (in place) to: shell>
+
+shell> dir
+ERROR: IOError: could not spawn `dir`: no such file or directory (ENOENT)
+Stacktrace!
+.......
+```
+However, you can get access to `PowerShell` like this:
+```julia-repl
+julia> ; # upon typing ;, the prompt changes (in place) to: shell>
+
+shell> powershell
+Windows PowerShell
+Copyright (C) Microsoft Corporation. All rights reserved.
+PS C:\Users\elm>
+```
+... and to `cmd.exe` like that (see the `dir` command):
+```julia-repl
+julia> ; # upon typing ;, the prompt changes (in place) to: shell>
+
+shell> cmd
+Microsoft Windows [version 10.0.17763.973]
+(c) 2018 Microsoft Corporation. All rights reserved.
+C:\Users\elm>dir
+ Volume in drive C has no label
+ Volume Serial Number is 1643-0CD7
+ Directory of C:\Users\elm
+
+29/01/2020 22:15 <DIR> .
+29/01/2020 22:15 <DIR> ..
+02/02/2020 08:06 <DIR> .atom
+```
+
+### Pkg mode
+
+The Package manager mode accepts specialized commands for loading and updating packages. It is entered
+by pressing the `]` key at the Julian REPL prompt and exited by pressing CTRL-C or pressing the backspace key
+at the beginning of the line. The prompt for this mode is `pkg>`. It supports its own help-mode, which is
+entered by pressing `?` at the beginning of the line of the `pkg>` prompt. The Package manager mode is
+documented in the Pkg manual, available at [https://julialang.github.io/Pkg.jl/v1/](https://julialang.github.io/Pkg.jl/v1/).
+
+### Search modes
+
+In all of the above modes, the executed lines get saved to a history file, which can be searched.
+ To initiate an incremental search through the previous history, type `^R` -- the control key
+together with the `r` key. The prompt will change to ```(reverse-i-search)`':```, and as you
+type the search query will appear in the quotes. The most recent result that matches the query
+will dynamically update to the right of the colon as more is typed. To find an older result using
+the same query, simply type `^R` again.
+
+Just as `^R` is a reverse search, `^S` is a forward search, with the prompt ```(i-search)`':```.
+ The two may be used in conjunction with each other to move through the previous or next matching
+results, respectively.
+
+## Key bindings
+
+The Julia REPL makes great use of key bindings. Several control-key bindings were already introduced
+above (`^D` to exit, `^R` and `^S` for searching), but there are many more. In addition to the
+control-key, there are also meta-key bindings. These vary more by platform, but most terminals
+default to using alt- or option- held down with a key to send the meta-key (or can be configured
+to do so), or pressing Esc and then the key.
+
+| Keybinding | Description |
+|:------------------- |:---------------------------------------------------------------------------------------------------------- |
+| **Program control** |   |
+| `^D` | Exit (when buffer is empty) |
+| `^C` | Interrupt or cancel |
+| `^L` | Clear console screen |
+| Return/Enter, `^J` | New line, executing if it is complete |
+| meta-Return/Enter | Insert new line without executing it |
+| `?` or `;` | Enter help or shell mode (when at start of a line) |
+| `^R`, `^S` | Incremental history search, described above |
+| **Cursor movement** |   |
+| Right arrow, `^F` | Move right one character |
+| Left arrow, `^B` | Move left one character |
+| ctrl-Right, `meta-F`| Move right one word |
+| ctrl-Left, `meta-B` | Move left one word |
+| Home, `^A` | Move to beginning of line |
+| End, `^E` | Move to end of line |
+| Up arrow, `^P` | Move up one line (or change to the previous history entry that matches the text before the cursor) |
+| Down arrow, `^N` | Move down one line (or change to the next history entry that matches the text before the cursor) |
+| Shift-Arrow Key | Move cursor according to the direction of the Arrow key, while activating the region ("shift selection") |
+| Page-up, `meta-P` | Change to the previous history entry |
+| Page-down, `meta-N` | Change to the next history entry |
+| `meta-<` | Change to the first history entry (of the current session if it is before the current position in history) |
+| `meta->` | Change to the last history entry |
+| `^-Space` | Set the "mark" in the editing region (and de-activate the region if it's active) |
+| `^-Space ^-Space` | Set the "mark" in the editing region and make the region "active", i.e. highlighted |
+| `^G` | De-activate the region (i.e. make it not highlighted) |
+| `^X^X` | Exchange the current position with the mark |
+| **Editing** |   |
+| Backspace, `^H` | Delete the previous character, or the whole region when it's active |
+| Delete, `^D` | Forward delete one character (when buffer has text) |
+| meta-Backspace | Delete the previous word |
+| `meta-d` | Forward delete the next word |
+| `^W` | Delete previous text up to the nearest whitespace |
+| `meta-w` | Copy the current region in the kill ring |
+| `meta-W` | "Kill" the current region, placing the text in the kill ring |
+| `^K` | "Kill" to end of line, placing the text in the kill ring |
+| `^Y` | "Yank" insert the text from the kill ring |
+| `meta-y` | Replace a previously yanked text with an older entry from the kill ring |
+| `^T` | Transpose the characters about the cursor |
+| `meta-Up arrow` | Transpose current line with line above |
+| `meta-Down arrow` | Transpose current line with line below |
+| `meta-u` | Change the next word to uppercase |
+| `meta-c` | Change the next word to titlecase |
+| `meta-l` | Change the next word to lowercase |
+| `^/`, `^_` | Undo previous editing action |
+| `^Q` | Write a number in REPL and press `^Q` to open editor at corresponding stackframe or method |
+| `meta-Left Arrow` | indent the current line on the left |
+| `meta-Right Arrow` | indent the current line on the right |
+| `meta-.` | insert last word from previous history entry |
+
+### Customizing keybindings
+
+Julia's REPL keybindings may be fully customized to a user's preferences by passing a dictionary
+to `REPL.setup_interface`. The keys of this dictionary may be characters or strings. The key
+`'*'` refers to the default action. Control plus character `x` bindings are indicated with `"^x"`.
+Meta plus `x` can be written `"\\M-x"` or `"\ex"`, and Control plus `x` can be written
+`"\\C-x"` or `"^x"`.
+The values of the custom keymap must be `nothing` (indicating
+that the input should be ignored) or functions that accept the signature
+`(PromptState, AbstractREPL, Char)`.
+The `REPL.setup_interface` function must be called before the REPL is initialized, by registering
+the operation with [`atreplinit`](@ref) . For example, to bind the up and down arrow keys to move through
+history without prefix search, one could put the following code in `~/.julia/config/startup.jl`:
+
+```julia
+import REPL
+import REPL.LineEdit
+
+const mykeys = Dict{Any,Any}(
+ # Up Arrow
+ "\e[A" => (s,o...)->(LineEdit.edit_move_up(s) || LineEdit.history_prev(s, LineEdit.mode(s).hist)),
+ # Down Arrow
+ "\e[B" => (s,o...)->(LineEdit.edit_move_down(s) || LineEdit.history_next(s, LineEdit.mode(s).hist))
+)
+
+function customize_keys(repl)
+ repl.interface = REPL.setup_interface(repl; extra_repl_keymap = mykeys)
+end
+
+atreplinit(customize_keys)
+```
+
+Users should refer to `LineEdit.jl` to discover the available actions on key input.
+
+## Tab completion
+
+In both the Julian and help modes of the REPL, one can enter the first few characters of a function
+or type and then press the tab key to get a list all matches:
+
+```julia-repl
+julia> stri[TAB]
+stride strides string strip
+
+julia> Stri[TAB]
+StridedArray StridedMatrix StridedVecOrMat StridedVector String
+```
+
+The tab key can also be used to substitute LaTeX math symbols with their Unicode equivalents,
+and get a list of LaTeX matches as well:
+
+```julia-repl
+julia> \pi[TAB]
+julia> π
+π = 3.1415926535897...
+
+julia> e\_1[TAB] = [1,0]
+julia> e₁ = [1,0]
+2-element Array{Int64,1}:
+ 1
+ 0
+
+julia> e\^1[TAB] = [1 0]
+julia> e¹ = [1 0]
+1×2 Array{Int64,2}:
+ 1 0
+
+julia> \sqrt[TAB]2 # √ is equivalent to the sqrt function
+julia> √2
+1.4142135623730951
+
+julia> \hbar[TAB](h) = h / 2\pi[TAB]
+julia> ħ(h) = h / 2π
+ħ (generic function with 1 method)
+
+julia> \h[TAB]
+\hat \hermitconjmatrix \hkswarow \hrectangle
+\hatapprox \hexagon \hookleftarrow \hrectangleblack
+\hbar \hexagonblack \hookrightarrow \hslash
+\heartsuit \hksearow \house \hspace
+
+julia> α="\alpha[TAB]" # LaTeX completion also works in strings
+julia> α="α"
+```
+
+A full list of tab-completions can be found in the [Unicode Input](@ref) section of the manual.
+
+Completion of paths works for strings and julia's shell mode:
+
+```julia-repl
+julia> path="/[TAB]"
+.dockerenv .juliabox/ boot/ etc/ lib/ media/ opt/ root/ sbin/ sys/ usr/
+.dockerinit bin/ dev/ home/ lib64/ mnt/ proc/ run/ srv/ tmp/ var/
+shell> /[TAB]
+.dockerenv .juliabox/ boot/ etc/ lib/ media/ opt/ root/ sbin/ sys/ usr/
+.dockerinit bin/ dev/ home/ lib64/ mnt/ proc/ run/ srv/ tmp/ var/
+```
+
+Tab completion can help with investigation of the available methods matching the input arguments:
+
+```julia-repl
+julia> max([TAB] # All methods are displayed, not shown here due to size of the list
+
+julia> max([1, 2], [TAB] # All methods where `Vector{Int}` matches as first argument
+max(x, y) in Base at operators.jl:215
+max(a, b, c, xs...) in Base at operators.jl:281
+
+julia> max([1, 2], max(1, 2), [TAB] # All methods matching the arguments.
+max(x, y) in Base at operators.jl:215
+max(a, b, c, xs...) in Base at operators.jl:281
+```
+
+Keywords are also displayed in the suggested methods after `;`, see below line where `limit`
+and `keepempty` are keyword arguments:
+
+```julia-repl
+julia> split("1 1 1", [TAB]
+split(str::AbstractString; limit, keepempty) in Base at strings/util.jl:302
+split(str::T, splitter; limit, keepempty) where T<:AbstractString in Base at strings/util.jl:277
+```
+
+The completion of the methods uses type inference and can therefore see if the arguments match
+even if the arguments are output from functions. The function needs to be type stable for the
+completion to be able to remove non-matching methods.
+
+Tab completion can also help completing fields:
+
+```julia-repl
+julia> import UUIDs
+
+julia> UUIDs.uuid[TAB]
+uuid1 uuid4 uuid_version
+```
+
+Fields for output from functions can also be completed:
+
+```julia-repl
+julia> split("","")[1].[TAB]
+lastindex offset string
+```
+
+The completion of fields for output from functions uses type inference, and it can only suggest
+fields if the function is type stable.
+
+Dictionary keys can also be tab completed:
+
+```julia-repl
+julia> foo = Dict("qwer1"=>1, "qwer2"=>2, "asdf"=>3)
+Dict{String,Int64} with 3 entries:
+ "qwer2" => 2
+ "asdf" => 3
+ "qwer1" => 1
+
+julia> foo["q[TAB]
+
+"qwer1" "qwer2"
+julia> foo["qwer
+```
+
+## Customizing Colors
+
+The colors used by Julia and the REPL can be customized, as well. To change the
+color of the Julia prompt you can add something like the following to your
+`~/.julia/config/startup.jl` file, which is to be placed inside your home directory:
+
+```julia
+function customize_colors(repl)
+ repl.prompt_color = Base.text_colors[:cyan]
+end
+
+atreplinit(customize_colors)
+```
+
+The available color keys can be seen by typing `Base.text_colors` in the help mode of the REPL.
+In addition, the integers 0 to 255 can be used as color keys for terminals
+with 256 color support.
+
+You can also change the colors for the help and shell prompts and
+input and answer text by setting the appropriate field of `repl` in the `customize_colors` function
+above (respectively, `help_color`, `shell_color`, `input_color`, and `answer_color`). For the
+latter two, be sure that the `envcolors` field is also set to false.
+
+It is also possible to apply boldface formatting by using
+`Base.text_colors[:bold]` as a color. For instance, to print answers in
+boldface font, one can use the following as a `~/.julia/config/startup.jl`:
+
+```julia
+function customize_colors(repl)
+ repl.envcolors = false
+ repl.answer_color = Base.text_colors[:bold]
+end
+
+atreplinit(customize_colors)
+```
+
+You can also customize the color used to render warning and informational messages by
+setting the appropriate environment variables. For instance, to render error, warning, and informational
+messages respectively in magenta, yellow, and cyan you can add the following to your
+`~/.julia/config/startup.jl` file:
+
+```julia
+ENV["JULIA_ERROR_COLOR"] = :magenta
+ENV["JULIA_WARN_COLOR"] = :yellow
+ENV["JULIA_INFO_COLOR"] = :cyan
+```
+
+## TerminalMenus
+
+TerminalMenus is a submodule of the Julia REPL and enables small, low-profile interactive menus in the terminal.
+
+### Examples
+
+```julia
+import REPL
+using REPL.TerminalMenus
+
+options = ["apple", "orange", "grape", "strawberry",
+ "blueberry", "peach", "lemon", "lime"]
+
+```
+
+#### RadioMenu
+
+The RadioMenu allows the user to select one option from the list. The `request`
+function displays the interactive menu and returns the index of the selected
+choice. If a user presses 'q' or `ctrl-c`, `request` will return a `-1`.
+
+
+```julia
+# `pagesize` is the number of items to be displayed at a time.
+# The UI will scroll if the number of options is greater
+# than the `pagesize`
+menu = RadioMenu(options, pagesize=4)
+
+# `request` displays the menu and returns the index after the
+# user has selected a choice
+choice = request("Choose your favorite fruit:", menu)
+
+if choice != -1
+ println("Your favorite fruit is ", options[choice], "!")
+else
+ println("Menu canceled.")
+end
+
+```
+
+Output:
+
+```
+Choose your favorite fruit:
+^ grape
+ strawberry
+ > blueberry
+v peach
+Your favorite fruit is blueberry!
+```
+
+#### MultiSelectMenu
+
+The MultiSelectMenu allows users to select many choices from a list.
+
+```julia
+# here we use the default `pagesize` 10
+menu = MultiSelectMenu(options)
+
+# `request` returns a `Set` of selected indices
+# if the menu us canceled (ctrl-c or q), return an empty set
+choices = request("Select the fruits you like:", menu)
+
+if length(choices) > 0
+ println("You like the following fruits:")
+ for i in choices
+ println(" - ", options[i])
+ end
+else
+ println("Menu canceled.")
+end
+```
+
+Output:
+
+```
+Select the fruits you like:
+[press: d=done, a=all, n=none]
+ [ ] apple
+ > [X] orange
+ [X] grape
+ [ ] strawberry
+ [ ] blueberry
+ [X] peach
+ [ ] lemon
+ [ ] lime
+You like the following fruits:
+ - orange
+ - grape
+ - peach
+```
+
+### Customization / Configuration
+
+#### ConfiguredMenu subtypes
+
+Starting with Julia 1.6, the recommended way to configure menus is via the constructor.
+For instance, the default multiple-selection menu
+
+```
+julia> menu = MultiSelectMenu(options, pagesize=5);
+
+julia> request(menu) # ASCII is used by default
+[press: d=done, a=all, n=none]
+ [ ] apple
+ [X] orange
+ [ ] grape
+ > [X] strawberry
+v [ ] blueberry
+```
+
+can instead be rendered with Unicode selection and navigation characters with
+
+```julia
+julia> menu = MultiSelectMenu(options, pagesize=5, charset=:unicode);
+
+julia> request(menu)
+[press: d=done, a=all, n=none]
+ ⬚ apple
+ ✓ orange
+ ⬚ grape
+ → ✓ strawberry
+↓ ⬚ blueberry
+```
+
+More fine-grained configuration is also possible:
+
+```julia
+julia> menu = MultiSelectMenu(options, pagesize=5, charset=:unicode, checked="YEP!", unchecked="NOPE", cursor='⧐');
+
+julia> request(menu)
+julia> request(menu)
+[press: d=done, a=all, n=none]
+ NOPE apple
+ YEP! orange
+ NOPE grape
+ ⧐ YEP! strawberry
+↓ NOPE blueberry
+```
+
+Aside from the overall `charset` option, for `RadioMenu` the configurable options are:
+
+ - `cursor::Char='>'|'→'`: character to use for cursor
+ - `up_arrow::Char='^'|'↑'`: character to use for up arrow
+ - `down_arrow::Char='v'|'↓'`: character to use for down arrow
+ - `updown_arrow::Char='I'|'↕'`: character to use for up/down arrow in one-line page
+ - `scroll_wrap::Bool=false`: optionally wrap-around at the beginning/end of a menu
+ - `ctrl_c_interrupt::Bool=true`: If `false`, return empty on ^C, if `true` throw InterruptException() on ^C
+
+`MultiSelectMenu` adds:
+
+ - `checked::String="[X]"|"✓"`: string to use for checked
+ - `unchecked::String="[ ]"|"⬚")`: string to use for unchecked
+
+You can create new menu types of your own.
+Types that are derived from `TerminalMenus.ConfiguredMenu` configure the menu options at construction time.
+
+#### Legacy interface
+
+Prior to Julia 1.6, and still supported throughout Julia 1.x, one can also configure menus by calling
+`TerminalMenus.config()`.
+
+## References
+
+### REPL
+
+```@docs
+Base.atreplinit
+```
+
+### TerminalMenus
+
+#### Configuration
+
+```@docs
+REPL.TerminalMenus.Config
+REPL.TerminalMenus.MultiSelectConfig
+REPL.TerminalMenus.config
+```
+
+#### User interaction
+
+```@docs
+REPL.TerminalMenus.request
+```
+
+#### AbstractMenu extension interface
+
+Any subtype of `AbstractMenu` must be mutable, and must contain the fields `pagesize::Int` and
+`pageoffset::Int`.
+Any subtype must also implement the following functions:
+
+```@docs
+REPL.TerminalMenus.pick
+REPL.TerminalMenus.cancel
+REPL.TerminalMenus.writeline
+```
+
+It must also implement either `options` or `numoptions`:
+
+```@docs
+REPL.TerminalMenus.options
+REPL.TerminalMenus.numoptions
+```
+
+If the subtype does not have a field named `selected`, it must also implement
+
+```@docs
+REPL.TerminalMenus.selected
+```
+
+The following are optional but can allow additional customization:
+
+```@docs
+REPL.TerminalMenus.header
+REPL.TerminalMenus.keypress
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/REPL/src/TerminalMenus/LICENSE.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/REPL/src/TerminalMenus/LICENSE.md
new file mode 100644
index 0000000..9ca98a4
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/REPL/src/TerminalMenus/LICENSE.md
@@ -0,0 +1,22 @@
+The TerminalMenus.jl package is licensed under the MIT "Expat" License:
+
+> Copyright (c) 2017: Nick Paul.
+>
+> Permission is hereby granted, free of charge, to any person obtaining a copy
+> of this software and associated documentation files (the "Software"), to deal
+> in the Software without restriction, including without limitation the rights
+> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+> copies of the Software, and to permit persons to whom the Software is
+> furnished to do so, subject to the following conditions:
+>
+> The above copyright notice and this permission notice shall be included in all
+> copies or substantial portions of the Software.
+>
+> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+> SOFTWARE.
+>
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Random/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Random/docs/src/index.md
new file mode 100644
index 0000000..ca86de4
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Random/docs/src/index.md
@@ -0,0 +1,358 @@
+# Random Numbers
+
+```@meta
+DocTestSetup = :(using Random)
+```
+
+Random number generation in Julia uses the [Mersenne Twister library](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/#dSFMT)
+via `MersenneTwister` objects. Julia has a global RNG, which is used by default. Other RNG types
+can be plugged in by inheriting the `AbstractRNG` type; they can then be used to have multiple
+streams of random numbers. Besides `MersenneTwister`, Julia also provides the `RandomDevice` RNG
+type, which is a wrapper over the OS provided entropy.
+
+Most functions related to random generation accept an optional `AbstractRNG` object as first argument,
+which defaults to the global one if not provided. Moreover, some of them accept optionally
+dimension specifications `dims...` (which can be given as a tuple) to generate arrays of random
+values. In a multi-threaded program, you should generally use different RNG objects from different threads
+in order to be thread-safe. However, the default global RNG is thread-safe as of Julia 1.3 (because
+it internally corresponds to a per-thread RNG).
+
+A `MersenneTwister` or `RandomDevice` RNG can generate uniformly random numbers of the following types:
+[`Float16`](@ref), [`Float32`](@ref), [`Float64`](@ref), [`BigFloat`](@ref), [`Bool`](@ref),
+[`Int8`](@ref), [`UInt8`](@ref), [`Int16`](@ref), [`UInt16`](@ref), [`Int32`](@ref),
+[`UInt32`](@ref), [`Int64`](@ref), [`UInt64`](@ref), [`Int128`](@ref), [`UInt128`](@ref),
+[`BigInt`](@ref) (or complex numbers of those types).
+Random floating point numbers are generated uniformly in ``[0, 1)``. As `BigInt` represents
+unbounded integers, the interval must be specified (e.g. `rand(big.(1:6))`).
+
+Additionally, normal and exponential distributions are implemented for some `AbstractFloat` and
+`Complex` types, see [`randn`](@ref) and [`randexp`](@ref) for details.
+
+!!! warning
+ Because the precise way in which random numbers are generated is considered an implementation detail, bug fixes and speed improvements may change the stream of numbers that are generated after a version change. Relying on a specific seed or generated stream of numbers during unit testing is thus discouraged - consider testing properties of the methods in question instead.
+
+## Random numbers module
+```@docs
+Random.Random
+```
+
+## Random generation functions
+
+```@docs
+Random.rand
+Random.rand!
+Random.bitrand
+Random.randn
+Random.randn!
+Random.randexp
+Random.randexp!
+Random.randstring
+```
+
+## Subsequences, permutations and shuffling
+
+```@docs
+Random.randsubseq
+Random.randsubseq!
+Random.randperm
+Random.randperm!
+Random.randcycle
+Random.randcycle!
+Random.shuffle
+Random.shuffle!
+```
+
+## Generators (creation and seeding)
+
+```@docs
+Random.seed!
+Random.AbstractRNG
+Random.MersenneTwister
+Random.RandomDevice
+```
+
+## Hooking into the `Random` API
+
+There are two mostly orthogonal ways to extend `Random` functionalities:
+1) generating random values of custom types
+2) creating new generators
+
+The API for 1) is quite functional, but is relatively recent so it may still have to evolve in subsequent releases of the `Random` module.
+For example, it's typically sufficient to implement one `rand` method in order to have all other usual methods work automatically.
+
+The API for 2) is still rudimentary, and may require more work than strictly necessary from the implementor,
+in order to support usual types of generated values.
+
+### Generating random values of custom types
+
+Generating random values for some distributions may involve various trade-offs. *Pre-computed* values, such as an [alias table](https://en.wikipedia.org/wiki/Alias_method) for discrete distributions, or [“squeezing” functions](https://en.wikipedia.org/wiki/Rejection_sampling) for univariate distributions, can speed up sampling considerably. How much information should be pre-computed can depend on the number of values we plan to draw from a distribution. Also, some random number generators can have certain properties that various algorithms may want to exploit.
+
+The `Random` module defines a customizable framework for obtaining random values that can address these issues. Each invocation of `rand` generates a *sampler* which can be customized with the above trade-offs in mind, by adding methods to `Sampler`, which in turn can dispatch on the random number generator, the object that characterizes the distribution, and a suggestion for the number of repetitions. Currently, for the latter, `Val{1}` (for a single sample) and `Val{Inf}` (for an arbitrary number) are used, with `Random.Repetition` an alias for both.
+
+The object returned by `Sampler` is then used to generate the random values. When implementing the random generation interface for a value `X` that can be sampled from, the implementor should define the method
+
+```julia
+rand(rng, sampler)
+```
+for the particular `sampler` returned by `Sampler(rng, X, repetition)`.
+
+Samplers can be arbitrary values that implement `rand(rng, sampler)`, but for most applications the following predefined samplers may be sufficient:
+
+1. `SamplerType{T}()` can be used for implementing samplers that draw from type `T` (e.g. `rand(Int)`). This is the default returned by `Sampler` for *types*.
+
+2. `SamplerTrivial(self)` is a simple wrapper for `self`, which can be accessed with `[]`. This is the recommended sampler when no pre-computed information is needed (e.g. `rand(1:3)`), and is the default returned by `Sampler` for *values*.
+
+3. `SamplerSimple(self, data)` also contains the additional `data` field, which can be used to store arbitrary pre-computed values, which should be computed in a *custom method* of `Sampler`.
+
+We provide examples for each of these. We assume here that the choice of algorithm is independent of the RNG, so we use `AbstractRNG` in our signatures.
+
+```@docs
+Random.Sampler
+Random.SamplerType
+Random.SamplerTrivial
+Random.SamplerSimple
+```
+
+Decoupling pre-computation from actually generating the values is part of the API, and is also available to the user. As an example, assume that `rand(rng, 1:20)` has to be called repeatedly in a loop: the way to take advantage of this decoupling is as follows:
+
+```julia
+rng = MersenneTwister()
+sp = Random.Sampler(rng, 1:20) # or Random.Sampler(MersenneTwister, 1:20)
+for x in X
+ n = rand(rng, sp) # similar to n = rand(rng, 1:20)
+ # use n
+end
+```
+
+This is the mechanism that is also used in the standard library, e.g. by the default implementation of random array generation (like in `rand(1:20, 10)`).
+
+#### Generating values from a type
+
+Given a type `T`, it's currently assumed that if `rand(T)` is defined, an object of type `T` will be produced. `SamplerType` is the *default sampler for types*. In order to define random generation of values of type `T`, the `rand(rng::AbstractRNG, ::Random.SamplerType{T})` method should be defined, and should return values what `rand(rng, T)` is expected to return.
+
+Let's take the following example: we implement a `Die` type, with a variable number `n` of sides, numbered from `1` to `n`. We want `rand(Die)` to produce a `Die` with a random number of up to 20 sides (and at least 4):
+
+```jldoctest Die
+struct Die
+ nsides::Int # number of sides
+end
+
+Random.rand(rng::AbstractRNG, ::Random.SamplerType{Die}) = Die(rand(rng, 4:20))
+
+# output
+
+```
+
+Scalar and array methods for `Die` now work as expected:
+
+```jldoctest Die; setup = :(Random.seed!(1))
+julia> rand(Die)
+Die(15)
+
+julia> rand(MersenneTwister(0), Die)
+Die(11)
+
+julia> rand(Die, 3)
+3-element Vector{Die}:
+ Die(18)
+ Die(5)
+ Die(4)
+
+julia> a = Vector{Die}(undef, 3); rand!(a)
+3-element Vector{Die}:
+ Die(5)
+ Die(20)
+ Die(15)
+```
+
+#### A simple sampler without pre-computed data
+
+Here we define a sampler for a collection. If no pre-computed data is required, it can be implemented with a `SamplerTrivial` sampler, which is in fact the *default fallback for values*.
+
+In order to define random generation out of objects of type `S`, the following method should be defined: `rand(rng::AbstractRNG, sp::Random.SamplerTrivial{S})`. Here, `sp` simply wraps an object of type `S`, which can be accessed via `sp[]`. Continuing the `Die` example, we want now to define `rand(d::Die)` to produce an `Int` corresponding to one of `d`'s sides:
+
+```jldoctest Die; setup = :(Random.seed!(1))
+julia> Random.rand(rng::AbstractRNG, d::Random.SamplerTrivial{Die}) = rand(rng, 1:d[].nsides);
+
+julia> rand(Die(4))
+3
+
+julia> rand(Die(4), 3)
+3-element Vector{Any}:
+ 4
+ 1
+ 1
+```
+
+Given a collection type `S`, it's currently assumed that if `rand(::S)` is defined, an object of type `eltype(S)` will be produced. In the last example, a `Vector{Any}` is produced; the reason is that `eltype(Die) == Any`. The remedy is to define `Base.eltype(::Type{Die}) = Int`.
+
+#### Generating values for an `AbstractFloat` type
+
+`AbstractFloat` types are special-cased, because by default random values are not produced in the whole type domain, but rather in `[0,1)`. The following method should be implemented for `T <: AbstractFloat`: `Random.rand(::AbstractRNG, ::Random.SamplerTrivial{Random.CloseOpen01{T}})`
+
+#### An optimized sampler with pre-computed data
+
+Consider a discrete distribution, where numbers `1:n` are drawn with given probabilities that sum to one. When many values are needed from this distribution, the fastest method is using an [alias table](https://en.wikipedia.org/wiki/Alias_method). We don't provide the algorithm for building such a table here, but suppose it is available in `make_alias_table(probabilities)` instead, and `draw_number(rng, alias_table)` can be used to draw a random number from it.
+
+Suppose that the distribution is described by
+```julia
+struct DiscreteDistribution{V <: AbstractVector}
+ probabilities::V
+end
+```
+and that we *always* want to build an alias table, regardless of the number of values needed (we learn how to customize this below). The methods
+```julia
+Random.eltype(::Type{<:DiscreteDistribution}) = Int
+
+function Random.Sampler(::Type{<:AbstractRNG}, distribution::DiscreteDistribution, ::Repetition)
+ SamplerSimple(disribution, make_alias_table(distribution.probabilities))
+end
+```
+should be defined to return a sampler with pre-computed data, then
+```julia
+function rand(rng::AbstractRNG, sp::SamplerSimple{<:DiscreteDistribution})
+ draw_number(rng, sp.data)
+end
+```
+will be used to draw the values.
+
+#### Custom sampler types
+
+The `SamplerSimple` type is sufficient for most use cases with precomputed data. However, in order to demonstrate how to use custom sampler types, here we implement something similar to `SamplerSimple`.
+
+Going back to our `Die` example: `rand(::Die)` uses random generation from a range, so there is an opportunity for this optimization. We call our custom sampler `SamplerDie`.
+
+```julia
+import Random: Sampler, rand
+
+struct SamplerDie <: Sampler{Int} # generates values of type Int
+ die::Die
+ sp::Sampler{Int} # this is an abstract type, so this could be improved
+end
+
+Sampler(RNG::Type{<:AbstractRNG}, die::Die, r::Random.Repetition) =
+ SamplerDie(die, Sampler(RNG, 1:die.nsides, r))
+# the `r` parameter will be explained later on
+
+rand(rng::AbstractRNG, sp::SamplerDie) = rand(rng, sp.sp)
+```
+
+It's now possible to get a sampler with `sp = Sampler(rng, die)`, and use `sp` instead of `die` in any `rand` call involving `rng`. In the simplistic example above, `die` doesn't need to be stored in `SamplerDie` but this is often the case in practice.
+
+Of course, this pattern is so frequent that the helper type used above, namely `Random.SamplerSimple`, is available,
+saving us the definition of `SamplerDie`: we could have implemented our decoupling with:
+
+```julia
+Sampler(RNG::Type{<:AbstractRNG}, die::Die, r::Random.Repetition) =
+ SamplerSimple(die, Sampler(RNG, 1:die.nsides, r))
+
+rand(rng::AbstractRNG, sp::SamplerSimple{Die}) = rand(rng, sp.data)
+```
+
+Here, `sp.data` refers to the second parameter in the call to the `SamplerSimple` constructor
+(in this case equal to `Sampler(rng, 1:die.nsides, r)`), while the `Die` object can be accessed
+via `sp[]`.
+
+Like `SamplerDie`, any custom sampler must be a subtype of `Sampler{T}` where `T` is the type
+of the generated values. Note that `SamplerSimple(x, data) isa Sampler{eltype(x)}`,
+so this constrains what the first argument to `SamplerSimple` can be
+(it's recommended to use `SamplerSimple` like in the `Die` example, where
+`x` is simply forwarded while defining a `Sampler` method).
+Similarly, `SamplerTrivial(x) isa Sampler{eltype(x)}`.
+
+Another helper type is currently available for other cases, `Random.SamplerTag`, but is
+considered as internal API, and can break at any time without proper deprecations.
+
+
+#### Using distinct algorithms for scalar or array generation
+
+In some cases, whether one wants to generate only a handful of values or a large number of values
+will have an impact on the choice of algorithm. This is handled with the third parameter of the
+`Sampler` constructor. Let's assume we defined two helper types for `Die`, say `SamplerDie1`
+which should be used to generate only few random values, and `SamplerDieMany` for many values.
+We can use those types as follows:
+
+```julia
+Sampler(RNG::Type{<:AbstractRNG}, die::Die, ::Val{1}) = SamplerDie1(...)
+Sampler(RNG::Type{<:AbstractRNG}, die::Die, ::Val{Inf}) = SamplerDieMany(...)
+```
+
+Of course, `rand` must also be defined on those types (i.e. `rand(::AbstractRNG, ::SamplerDie1)` and `rand(::AbstractRNG, ::SamplerDieMany)`). Note that, as usual, `SamplerTrivial` and `SamplerSimple` can be used if custom types are not necessary.
+
+Note: `Sampler(rng, x)` is simply a shorthand for `Sampler(rng, x, Val(Inf))`, and
+`Random.Repetition` is an alias for `Union{Val{1}, Val{Inf}}`.
+
+
+### Creating new generators
+
+The API is not clearly defined yet, but as a rule of thumb:
+1) any `rand` method producing "basic" types (`isbitstype` integer and floating types in `Base`)
+ should be defined for this specific RNG, if they are needed;
+2) other documented `rand` methods accepting an `AbstractRNG` should work out of the box,
+ (provided the methods from 1) what are relied on are implemented),
+ but can of course be specialized for this RNG if there is room for optimization;
+3) `copy` for pseudo-RNGs should return an independent copy that generates the exact same random sequence as the
+ original from that point when called in the same way. When this is not feasible (e.g. hardware-based RNGs),
+ `copy` must not be implemented.
+
+Concerning 1), a `rand` method may happen to work automatically, but it's not officially
+supported and may break without warnings in a subsequent release.
+
+To define a new `rand` method for an hypothetical `MyRNG` generator, and a value specification `s`
+(e.g. `s == Int`, or `s == 1:10`) of type `S==typeof(s)` or `S==Type{s}` if `s` is a type,
+the same two methods as we saw before must be defined:
+
+1) `Sampler(::Type{MyRNG}, ::S, ::Repetition)`, which returns an object of type say `SamplerS`
+2) `rand(rng::MyRNG, sp::SamplerS)`
+
+It can happen that `Sampler(rng::AbstractRNG, ::S, ::Repetition)` is
+already defined in the `Random` module. It would then be possible to
+skip step 1) in practice (if one wants to specialize generation for
+this particular RNG type), but the corresponding `SamplerS` type is
+considered as internal detail, and may be changed without warning.
+
+
+#### Specializing array generation
+
+In some cases, for a given RNG type, generating an array of random
+values can be more efficient with a specialized method than by merely
+using the decoupling technique explained before. This is for example
+the case for `MersenneTwister`, which natively writes random values in
+an array.
+
+To implement this specialization for `MyRNG`
+and for a specification `s`, producing elements of type `S`,
+the following method can be defined:
+`rand!(rng::MyRNG, a::AbstractArray{S}, ::SamplerS)`,
+where `SamplerS` is the type of the sampler returned by `Sampler(MyRNG, s, Val(Inf))`.
+Instead of `AbstractArray`, it's possible to implement the functionality only for a subtype, e.g. `Array{S}`.
+The non-mutating array method of `rand` will automatically call this specialization internally.
+
+```@meta
+DocTestSetup = nothing
+```
+
+# Reproducibility
+
+By using an RNG parameter initialized with a given seed, you can reproduce the same pseudorandom
+number sequence when running your program multiple times. However, a minor release of Julia (e.g.
+1.3 to 1.4) *may change* the sequence of pseudorandom numbers generated from a specific seed, in
+particular if `MersenneTwister` is used. (Even if the sequence produced by a low-level function like
+[`rand`](@ref) does not change, the output of higher-level functions like [`randsubseq`](@ref) may
+change due to algorithm updates.) Rationale: guaranteeing that pseudorandom streams never change
+prohibits many algorithmic improvements.
+
+If you need to guarantee exact reproducibility of random data, it is advisable to simply *save the
+data* (e.g. as a supplementary attachment in a scientific publication). (You can also, of course,
+specify a particular Julia version and package manifest, especially if you require bit
+reproducibility.)
+
+Software tests that rely on *specific* "random" data should also generally either save the data,
+embed it into the test code, or use third-party packages like
+[StableRNGs.jl](https://github.com/JuliaRandom/StableRNGs.jl). On the other hand, tests that should
+pass for *most* random data (e.g. testing `A \ (A*x) ≈ x` for a random matrix `A = randn(n,n)`) can
+use an RNG with a fixed seed to ensure that simply running the test many times does not encounter a
+failure due to very improbable data (e.g. an extremely ill-conditioned matrix).
+
+The statistical *distribution* from which random samples are drawn *is* guaranteed to be the same
+across any minor Julia releases.
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/SHA/LICENSE.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/SHA/LICENSE.md
new file mode 100644
index 0000000..eec075c
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/SHA/LICENSE.md
@@ -0,0 +1,58 @@
+The SHA.jl package is licensed under the MIT "Expat" License:
+
+> Copyright (c) 2014: Elliot Saba.
+>
+> Permission is hereby granted, free of charge, to any person obtaining
+> a copy of this software and associated documentation files (the
+> "Software"), to deal in the Software without restriction, including
+> without limitation the rights to use, copy, modify, merge, publish,
+> distribute, sublicense, and/or sell copies of the Software, and to
+> permit persons to whom the Software is furnished to do so, subject to
+> the following conditions:
+>
+> The above copyright notice and this permission notice shall be
+> included in all copies or substantial portions of the Software.
+>
+> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+> IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+> CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+> TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+> SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+This package was inspired by the SHA2 [source code from Minix](https://github.com/minix3/minix/blob/b6cbf7203b080219de306404f8022a65b7884f33/common/lib/libc/hash/sha2/sha2.c), itself released under the BSD license:
+
+> sha2.c
+>
+> Version 1.0.0beta1
+>
+> Written by Aaron D. Gifford <me@aarongifford.com>
+>
+> Copyright 2000 Aaron D. Gifford. All rights reserved.
+>
+> Redistribution and use in source and binary forms, with or without
+> modification, are permitted provided that the following conditions
+> are met:
+>
+> 1. Redistributions of source code must retain the above copyright
+> notice, this list of conditions and the following disclaimer.
+>
+> 2. Redistributions in binary form must reproduce the above copyright
+> notice, this list of conditions and the following disclaimer in the
+> documentation and/or other materials provided with the distribution.
+>
+> 3. Neither the name of the copyright holder nor the names of contributors
+> may be used to endorse or promote products derived from this software
+> without specific prior written permission.
+>
+> THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) AND CONTRIBUTOR(S) ``AS IS'' AND
+> ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+> IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+> ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR(S) OR CONTRIBUTOR(S) BE LIABLE
+> FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+> DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+> OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+> HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+> LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+> OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/SHA/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/SHA/docs/src/index.md
new file mode 100644
index 0000000..30c88e5
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/SHA/docs/src/index.md
@@ -0,0 +1,75 @@
+# SHA
+
+
+Usage is very straightforward:
+```julia
+julia> using SHA
+
+julia> bytes2hex(sha256("test"))
+"9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08"
+```
+
+Each exported function (at the time of this writing, SHA-1, SHA-2 224, 256, 384 and 512, and SHA-3 224, 256, 384 and 512 functions are implemented) takes in either an `AbstractVector{UInt8}`, an `AbstractString` or an `IO` object. This makes it trivial to checksum a file:
+
+```julia
+shell> cat /tmp/test.txt
+test
+julia> using SHA
+
+julia> open("/tmp/test.txt") do f
+ sha2_256(f)
+ end
+32-element Array{UInt8,1}:
+ 0x9f
+ 0x86
+ 0xd0
+ 0x81
+ 0x88
+ 0x4c
+ 0x7d
+ 0x65
+ ⋮
+ 0x5d
+ 0x6c
+ 0x15
+ 0xb0
+ 0xf0
+ 0x0a
+ 0x08
+```
+
+Due to the colloquial usage of `sha256` to refer to `sha2_256`, convenience functions are provided, mapping `shaxxx()` function calls to `sha2_xxx()`. For SHA-3, no such colloquialisms exist and the user must use the full `sha3_xxx()` names.
+
+`shaxxx()` takes `AbstractString` and array-like objects (`NTuple` and `Array`) with elements of type `UInt8`.
+
+To create a hash from multiple items the `SHAX_XXX_CTX()` types can be used to create a stateful hash object that
+is updated with `update!` and finalized with `digest!`
+
+```julia
+julia> ctx = SHA2_256_CTX()
+SHA2 256-bit hash state
+
+julia> update!(ctx, b"some data")
+0x0000000000000009
+
+julia> update!(ctx, b"some more data")
+0x0000000000000017
+
+julia> digest!(ctx)
+32-element Vector{UInt8}:
+ 0xbe
+ 0xcf
+ 0x23
+ 0xda
+ 0xaf
+ 0x02
+ ⋮
+ 0x25
+ 0x52
+ 0x19
+ 0xa0
+ 0x8b
+ 0xc5
+```
+
+Note that, at the time of this writing, the SHA3 code is not optimized, and as such is roughly an order of magnitude slower than SHA2.
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Serialization/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Serialization/docs/src/index.md
new file mode 100644
index 0000000..c01ead7
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Serialization/docs/src/index.md
@@ -0,0 +1,7 @@
+# Serialization
+
+```@docs
+Serialization.serialize
+Serialization.deserialize
+Serialization.writeheader
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/SharedArrays/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/SharedArrays/docs/src/index.md
new file mode 100644
index 0000000..7b23ec1
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/SharedArrays/docs/src/index.md
@@ -0,0 +1,11 @@
+# Shared Arrays
+
+```@docs
+SharedArrays.SharedArray
+SharedArrays.SharedVector
+SharedArrays.SharedMatrix
+SharedArrays.procs(::SharedArray)
+SharedArrays.sdata
+SharedArrays.indexpids
+SharedArrays.localindices
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Sockets/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Sockets/docs/src/index.md
new file mode 100644
index 0000000..c294461
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Sockets/docs/src/index.md
@@ -0,0 +1,33 @@
+# Sockets
+
+```@docs
+Sockets.Sockets
+Sockets.connect(::TCPSocket, ::Integer)
+Sockets.connect(::AbstractString)
+Sockets.listen(::Any)
+Sockets.listen(::AbstractString)
+Sockets.getaddrinfo
+Sockets.getipaddr
+Sockets.getipaddrs
+Sockets.islinklocaladdr
+Sockets.getalladdrinfo
+Sockets.DNSError
+Sockets.getnameinfo
+Sockets.getsockname
+Sockets.getpeername
+Sockets.IPAddr
+Sockets.IPv4
+Sockets.IPv6
+Sockets.@ip_str
+Sockets.TCPSocket
+Sockets.UDPSocket
+Sockets.accept
+Sockets.listenany
+Sockets.bind
+Sockets.send
+Sockets.recv
+Sockets.recvfrom
+Sockets.setopt
+Sockets.nagle
+Sockets.quickack
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/SparseArrays/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/SparseArrays/docs/src/index.md
new file mode 100644
index 0000000..286c46c
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/SparseArrays/docs/src/index.md
@@ -0,0 +1,232 @@
+# Sparse Arrays
+
+```@meta
+DocTestSetup = :(using SparseArrays, LinearAlgebra)
+```
+
+Julia has support for sparse vectors and [sparse matrices](https://en.wikipedia.org/wiki/Sparse_matrix)
+in the `SparseArrays` stdlib module. Sparse arrays are arrays that contain enough zeros
+that storing them in a special data structure leads to savings in space and execution time,
+compared to dense arrays.
+
+## [Compressed Sparse Column (CSC) Sparse Matrix Storage](@id man-csc)
+
+In Julia, sparse matrices are stored in the [Compressed Sparse Column (CSC) format](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_.28CSC_or_CCS.29).
+Julia sparse matrices have the type [`SparseMatrixCSC{Tv,Ti}`](@ref), where `Tv` is the
+type of the stored values, and `Ti` is the integer type for storing column pointers and
+row indices. The internal representation of `SparseMatrixCSC` is as follows:
+
+```julia
+struct SparseMatrixCSC{Tv,Ti<:Integer} <: AbstractSparseMatrixCSC{Tv,Ti}
+ m::Int # Number of rows
+ n::Int # Number of columns
+ colptr::Vector{Ti} # Column j is in colptr[j]:(colptr[j+1]-1)
+ rowval::Vector{Ti} # Row indices of stored values
+ nzval::Vector{Tv} # Stored values, typically nonzeros
+end
+```
+
+The compressed sparse column storage makes it easy and quick to access the elements in the column
+of a sparse matrix, whereas accessing the sparse matrix by rows is considerably slower. Operations
+such as insertion of previously unstored entries one at a time in the CSC structure tend to be slow. This is
+because all elements of the sparse matrix that are beyond the point of insertion have to be moved
+one place over.
+
+All operations on sparse matrices are carefully implemented to exploit the CSC data structure
+for performance, and to avoid expensive operations.
+
+If you have data in CSC format from a different application or library, and wish to import it
+in Julia, make sure that you use 1-based indexing. The row indices in every column need to be
+sorted. If your `SparseMatrixCSC` object contains unsorted row indices, one quick way to sort
+them is by doing a double transpose.
+
+In some applications, it is convenient to store explicit zero values in a `SparseMatrixCSC`. These
+*are* accepted by functions in `Base` (but there is no guarantee that they will be preserved in
+mutating operations). Such explicitly stored zeros are treated as structural nonzeros by many
+routines. The [`nnz`](@ref) function returns the number of elements explicitly stored in the
+sparse data structure, including non-structural zeros. In order to count the exact number of
+numerical nonzeros, use [`count(!iszero, x)`](@ref), which inspects every stored element of a sparse
+matrix. [`dropzeros`](@ref), and the in-place [`dropzeros!`](@ref), can be used to
+remove stored zeros from the sparse matrix.
+
+```jldoctest
+julia> A = sparse([1, 1, 2, 3], [1, 3, 2, 3], [0, 1, 2, 0])
+3×3 SparseMatrixCSC{Int64, Int64} with 4 stored entries:
+ 0 ⋅ 1
+ ⋅ 2 ⋅
+ ⋅ ⋅ 0
+
+julia> dropzeros(A)
+3×3 SparseMatrixCSC{Int64, Int64} with 2 stored entries:
+ ⋅ ⋅ 1
+ ⋅ 2 ⋅
+ ⋅ ⋅ ⋅
+```
+
+## Sparse Vector Storage
+
+Sparse vectors are stored in a close analog to compressed sparse column format for sparse
+matrices. In Julia, sparse vectors have the type [`SparseVector{Tv,Ti}`](@ref) where `Tv`
+is the type of the stored values and `Ti` the integer type for the indices. The internal
+representation is as follows:
+
+```julia
+struct SparseVector{Tv,Ti<:Integer} <: AbstractSparseVector{Tv,Ti}
+ n::Int # Length of the sparse vector
+ nzind::Vector{Ti} # Indices of stored values
+ nzval::Vector{Tv} # Stored values, typically nonzeros
+end
+```
+
+As for [`SparseMatrixCSC`](@ref), the `SparseVector` type can also contain explicitly
+stored zeros. (See [Sparse Matrix Storage](@ref man-csc).).
+
+## Sparse Vector and Matrix Constructors
+
+The simplest way to create a sparse array is to use a function equivalent to the [`zeros`](@ref)
+function that Julia provides for working with dense arrays. To produce a
+sparse array instead, you can use the same name with an `sp` prefix:
+
+```jldoctest
+julia> spzeros(3)
+3-element SparseVector{Float64, Int64} with 0 stored entries
+```
+
+The [`sparse`](@ref) function is often a handy way to construct sparse arrays. For
+example, to construct a sparse matrix we can input a vector `I` of row indices, a vector
+`J` of column indices, and a vector `V` of stored values (this is also known as the
+[COO (coordinate) format](https://en.wikipedia.org/wiki/Sparse_matrix#Coordinate_list_.28COO.29)).
+`sparse(I,J,V)` then constructs a sparse matrix such that `S[I[k], J[k]] = V[k]`. The
+equivalent sparse vector constructor is [`sparsevec`](@ref), which takes the (row) index
+vector `I` and the vector `V` with the stored values and constructs a sparse vector `R`
+such that `R[I[k]] = V[k]`.
+
+```jldoctest sparse_function
+julia> I = [1, 4, 3, 5]; J = [4, 7, 18, 9]; V = [1, 2, -5, 3];
+
+julia> S = sparse(I,J,V)
+5×18 SparseMatrixCSC{Int64, Int64} with 4 stored entries:
+⠀⠈⠀⡀⠀⠀⠀⠀⠠
+⠀⠀⠀⠀⠁⠀⠀⠀⠀
+
+julia> R = sparsevec(I,V)
+5-element SparseVector{Int64, Int64} with 4 stored entries:
+ [1] = 1
+ [3] = -5
+ [4] = 2
+ [5] = 3
+```
+
+The inverse of the [`sparse`](@ref) and [`sparsevec`](@ref) functions is
+[`findnz`](@ref), which retrieves the inputs used to create the sparse array.
+[`findall(!iszero, x)`](@ref) returns the cartesian indices of non-zero entries in `x`
+(including stored entries equal to zero).
+
+```jldoctest sparse_function
+julia> findnz(S)
+([1, 4, 5, 3], [4, 7, 9, 18], [1, 2, 3, -5])
+
+julia> findall(!iszero, S)
+4-element Vector{CartesianIndex{2}}:
+ CartesianIndex(1, 4)
+ CartesianIndex(4, 7)
+ CartesianIndex(5, 9)
+ CartesianIndex(3, 18)
+
+julia> findnz(R)
+([1, 3, 4, 5], [1, -5, 2, 3])
+
+julia> findall(!iszero, R)
+4-element Vector{Int64}:
+ 1
+ 3
+ 4
+ 5
+```
+
+Another way to create a sparse array is to convert a dense array into a sparse array using
+the [`sparse`](@ref) function:
+
+```jldoctest
+julia> sparse(Matrix(1.0I, 5, 5))
+5×5 SparseMatrixCSC{Float64, Int64} with 5 stored entries:
+ 1.0 ⋅ ⋅ ⋅ ⋅
+ ⋅ 1.0 ⋅ ⋅ ⋅
+ ⋅ ⋅ 1.0 ⋅ ⋅
+ ⋅ ⋅ ⋅ 1.0 ⋅
+ ⋅ ⋅ ⋅ ⋅ 1.0
+
+julia> sparse([1.0, 0.0, 1.0])
+3-element SparseVector{Float64, Int64} with 2 stored entries:
+ [1] = 1.0
+ [3] = 1.0
+```
+
+You can go in the other direction using the [`Array`](@ref) constructor. The [`issparse`](@ref)
+function can be used to query if a matrix is sparse.
+
+```jldoctest
+julia> issparse(spzeros(5))
+true
+```
+
+## Sparse matrix operations
+
+Arithmetic operations on sparse matrices also work as they do on dense matrices. Indexing of,
+assignment into, and concatenation of sparse matrices work in the same way as dense matrices.
+Indexing operations, especially assignment, are expensive, when carried out one element at a time.
+In many cases it may be better to convert the sparse matrix into `(I,J,V)` format using [`findnz`](@ref),
+manipulate the values or the structure in the dense vectors `(I,J,V)`, and then reconstruct
+the sparse matrix.
+
+## Correspondence of dense and sparse methods
+
+The following table gives a correspondence between built-in methods on sparse matrices and their
+corresponding methods on dense matrix types. In general, methods that generate sparse matrices
+differ from their dense counterparts in that the resulting matrix follows the same sparsity pattern
+as a given sparse matrix `S`, or that the resulting sparse matrix has density `d`, i.e. each matrix
+element has a probability `d` of being non-zero.
+
+Details can be found in the [Sparse Vectors and Matrices](@ref stdlib-sparse-arrays)
+section of the standard library reference.
+
+| Sparse | Dense | Description |
+|:-------------------------- |:---------------------- |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| [`spzeros(m,n)`](@ref) | [`zeros(m,n)`](@ref) | Creates a *m*-by-*n* matrix of zeros. ([`spzeros(m,n)`](@ref) is empty.) |
+| [`sparse(I,n,n)`](@ref) | [`Matrix(I,n,n)`](@ref)| Creates a *n*-by-*n* identity matrix. |
+| [`sparse(A)`](@ref) | [`Array(S)`](@ref) | Interconverts between dense and sparse formats. |
+| [`sprand(m,n,d)`](@ref) | [`rand(m,n)`](@ref) | Creates a *m*-by-*n* random matrix (of density *d*) with iid non-zero elements distributed uniformly on the half-open interval ``[0, 1)``. |
+| [`sprandn(m,n,d)`](@ref) | [`randn(m,n)`](@ref) | Creates a *m*-by-*n* random matrix (of density *d*) with iid non-zero elements distributed according to the standard normal (Gaussian) distribution. |
+| [`sprandn(rng,m,n,d)`](@ref) | [`randn(rng,m,n)`](@ref) | Creates a *m*-by-*n* random matrix (of density *d*) with iid non-zero elements generated with the `rng` random number generator |
+
+# [Sparse Arrays](@id stdlib-sparse-arrays)
+
+```@docs
+SparseArrays.AbstractSparseArray
+SparseArrays.AbstractSparseVector
+SparseArrays.AbstractSparseMatrix
+SparseArrays.SparseVector
+SparseArrays.SparseMatrixCSC
+SparseArrays.sparse
+SparseArrays.sparsevec
+SparseArrays.issparse
+SparseArrays.nnz
+SparseArrays.findnz
+SparseArrays.spzeros
+SparseArrays.spdiagm
+SparseArrays.blockdiag
+SparseArrays.sprand
+SparseArrays.sprandn
+SparseArrays.nonzeros
+SparseArrays.rowvals
+SparseArrays.nzrange
+SparseArrays.droptol!
+SparseArrays.dropzeros!
+SparseArrays.dropzeros
+SparseArrays.permute
+permute!{Tv, Ti, Tp <: Integer, Tq <: Integer}(::SparseMatrixCSC{Tv,Ti}, ::SparseMatrixCSC{Tv,Ti}, ::AbstractArray{Tp,1}, ::AbstractArray{Tq,1})
+```
+
+```@meta
+DocTestSetup = nothing
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/LICENSE.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/LICENSE.md
new file mode 100644
index 0000000..7528792
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/LICENSE.md
@@ -0,0 +1,24 @@
+Statistics.jl is licensed under the MIT License:
+
+> Copyright (c) 2012-2016: Jeff Bezanson, Stefan Karpinski, Viral B. Shah,
+> Dahua Lin, Simon Byrne, Andreas Noack, Douglas Bates, John Myles White,
+> Simon Kornblith, and other contributors.
+
+> Permission is hereby granted, free of charge, to any person obtaining
+> a copy of this software and associated documentation files (the
+> "Software"), to deal in the Software without restriction, including
+> without limitation the rights to use, copy, modify, merge, publish,
+> distribute, sublicense, and/or sell copies of the Software, and to
+> permit persons to whom the Software is furnished to do so, subject to
+> the following conditions:
+>
+> The above copyright notice and this permission notice shall be
+> included in all copies or substantial portions of the Software.
+>
+> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+> NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+> LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+> OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+> WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/README.md
new file mode 100644
index 0000000..db50bb9
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/README.md
@@ -0,0 +1,19 @@
+# Statistics.jl
+
+[![Travis CI Build Status][travis-img]][travis-url]
+
+Development repository for the Statistics standard library (stdlib) that ships with Julia.
+
+#### Using the development version of Statistics.jl
+
+If you want to develop this package, do the following steps:
+- Clone the repo anywhere.
+- In line 2 of the `Project.toml` file (the line that begins with `uuid = ...`), modify the UUID, e.g. change the `107` to `207`.
+- Change the current directory to the Statistics repo you just cloned and start julia with `julia --project`.
+- `import Statistics` will now load the files in the cloned repo instead of the Statistics stdlib.
+- To test your changes, simply do `include("test/runtests.jl")`.
+
+If you need to build Julia from source with a git checkout of Statistics, then instead use `make DEPS_GIT=Statistics` when building Julia. The `Statistics` repo is in `stdlib/Statistics`, and created initially with a detached `HEAD`. If you're doing this from a pre-existing Julia repository, you may need to `make clean` beforehand.
+
+[travis-img]: https://travis-ci.com/JuliaLang/Statistics.jl.svg?branch=master
+[travis-url]: https://travis-ci.com/JuliaLang/Statistics.jl
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/docs/src/index.md
new file mode 100644
index 0000000..93f3db5
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/docs/src/index.md
@@ -0,0 +1,19 @@
+# Statistics
+
+The Statistics standard library module contains basic statistics functionality.
+
+```@docs
+std
+stdm
+var
+varm
+cor
+cov
+mean!
+mean
+median!
+median
+middle
+quantile!
+quantile
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/SuiteSparse/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/SuiteSparse/docs/src/index.md
new file mode 100644
index 0000000..e8654ca
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/SuiteSparse/docs/src/index.md
@@ -0,0 +1,34 @@
+# Sparse Linear Algebra
+
+```@meta
+DocTestSetup = :(using LinearAlgebra, SparseArrays, SuiteSparse)
+```
+
+Sparse matrix solvers call functions from [SuiteSparse](http://suitesparse.com). The following factorizations are available:
+
+| Type | Description |
+|:--------------------------------- |:--------------------------------------------- |
+| `SuiteSparse.CHOLMOD.Factor` | Cholesky factorization |
+| `SuiteSparse.UMFPACK.UmfpackLU` | LU factorization |
+| `SuiteSparse.SPQR.QRSparse` | QR factorization |
+
+Other solvers such as [Pardiso.jl](https://github.com/JuliaSparse/Pardiso.jl/) are as external packages. [Arpack.jl](https://julialinearalgebra.github.io/Arpack.jl/stable/) provides `eigs` and `svds` for iterative solution of eigensystems and singular value decompositions.
+
+These factorizations are described in the [`Linear Algebra`](@ref man-linalg) section of the manual:
+1. [`cholesky`](@ref)
+2. [`ldlt`](@ref)
+3. [`lu`](@ref)
+4. [`qr`](@ref)
+
+```@docs
+SuiteSparse.CHOLMOD.lowrankupdate
+SuiteSparse.CHOLMOD.lowrankupdate!
+SuiteSparse.CHOLMOD.lowrankdowndate
+SuiteSparse.CHOLMOD.lowrankdowndate!
+SuiteSparse.CHOLMOD.lowrankupdowndate!
+```
+
+
+```@meta
+DocTestSetup = nothing
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/TOML/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/TOML/docs/src/index.md
new file mode 100644
index 0000000..36e8ec6
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/TOML/docs/src/index.md
@@ -0,0 +1,132 @@
+# TOML
+
+TOML.jl is a Julia standard library for parsing and writing [TOML
+v1.0](https://toml.io/en/) files.
+
+## Parsing TOML data
+
+```jldoctest
+julia> using TOML
+
+julia> data = """
+ [database]
+ server = "192.168.1.1"
+ ports = [ 8001, 8001, 8002 ]
+ """;
+
+julia> TOML.parse(data)
+Dict{String, Any} with 1 entry:
+ "database" => Dict{String, Any}("server"=>"192.168.1.1", "ports"=>[8001, 8001…
+```
+
+To parse a file, use [`TOML.parsefile`](@ref). If the file has a syntax error,
+an exception is thrown:
+
+```jldoctest
+julia> using TOML
+
+julia> TOML.parse("""
+ value = 0.0.0
+ """)
+ERROR: TOML Parser error:
+none:1:16 error: failed to parse value
+ value = 0.0.0
+ ^
+[...]
+```
+
+There are other versions of the parse functions ([`TOML.tryparse`](@ref)
+and [`TOML.tryparsefile`]) that instead of throwing exceptions on parser error
+returns a [`TOML.ParserError`](@ref) with information:
+
+```jldoctest
+julia> using TOML
+
+julia> err = TOML.tryparse("""
+ value = 0.0.0
+ """);
+
+julia> err.type
+ErrGenericValueError::ErrorType = 14
+
+julia> err.line
+1
+
+julia> err.column
+16
+```
+
+
+## Exporting data to TOML file
+
+The [`TOML.print`](@ref) function is used to print (or serialize) data into TOML
+format.
+
+```jldoctest
+julia> using TOML
+
+julia> data = Dict(
+ "names" => ["Julia", "Julio"],
+ "age" => [10, 20],
+ );
+
+julia> TOML.print(data)
+names = ["Julia", "Julio"]
+age = [10, 20]
+
+julia> fname = tempname();
+
+julia> open(fname, "w") do io
+ TOML.print(io, data)
+ end
+
+julia> TOML.parsefile(fname)
+Dict{String, Any} with 2 entries:
+ "names" => ["Julia", "Julio"]
+ "age" => [10, 20]
+```
+
+Keys can be sorted according to some value
+
+```jldoctest
+julia> using TOML
+
+julia> TOML.print(Dict(
+ "abc" => 1,
+ "ab" => 2,
+ "abcd" => 3,
+ ); sorted=true, by=length)
+ab = 2
+abc = 1
+abcd = 3
+```
+
+For custom structs, pass a function that converts the struct to a supported
+type
+
+```jldoctest
+julia> using TOML
+
+julia> struct MyStruct
+ a::Int
+ b::String
+ end
+
+julia> TOML.print(Dict("foo" => MyStruct(5, "bar"))) do x
+ x isa MyStruct && return [x.a, x.b]
+ error("unhandled type $(typeof(x))")
+ end
+foo = [5, "bar"]
+```
+
+
+## References
+```@docs
+TOML.parse
+TOML.parsefile
+TOML.tryparse
+TOML.tryparsefile
+TOML.print
+TOML.Parser
+TOML.ParserError
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Tar/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Tar/README.md
new file mode 100644
index 0000000..d9c2e08
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Tar/README.md
@@ -0,0 +1,467 @@
+# Tar.jl
+
+[![Build Status](https://travis-ci.org/JuliaIO/Tar.jl.svg?branch=master)](https://travis-ci.org/JuliaIO/Tar.jl)
+[![Codecov](https://codecov.io/gh/JuliaIO/Tar.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaIO/Tar.jl)
+
+The `Tar` package can list, extract and create POSIX TAR archives ("tarballs")
+as specified in [POSIX
+1003.1-2001](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html).
+It is designed to support using the TAR format as a mechanism for sending trees
+of files from one system to another, rather than for the historical use case of
+backing up files for restoration to the same system. Because of this design
+goal, `Tar` intentionally ignores much of the metadata included in the TAR
+format, which does not make sense for the data transfer use case. The package
+also does not aim to read or create legacy non-POSIX variants of the TAR format,
+although it does support reading GNU long name and long link extensions.
+
+## API & Usage
+
+The public API of `Tar` includes five functions and one type:
+
+* `create` — creates a tarball from an on-disk file tree
+* `extract` — extracts a tarball to an on-disk file tree
+* `list` — lists the contents of a tarball as a vector of `Header` objects
+* `rewrite` — rewrite a tarball to the standard format `create` produces
+* `tree_hash` — compute a tree hash of the content of a tarball (default: git
+ SHA1)
+* `Header` — struct representing metadata that `Tar` considers important in a
+ TAR entry
+
+None of these are exported, however: the recommended usage is to do `import Tar`
+and then access all of these names fully qualified as `Tar.create`,
+`Tar.extract` and so on.
+
+<!-- BEGIN: copied from inline doc strings -->
+
+### Tar.create
+
+```jl
+create([ predicate, ] dir, [ tarball ]; [ skeleton ]) -> tarball
+```
+* `predicate :: String --> Bool`
+* `dir :: AbstractString`
+* `tarball :: Union{AbstractString, AbstractCmd, IO}`
+* `skeleton :: Union{AbstractString, AbstractCmd, IO}`
+
+Create a tar archive ("tarball") of the directory `dir`. The resulting archive
+is written to the path `tarball` or if no path is specified, a temporary path is
+created and returned by the function call. If `tarball` is an IO object then the
+tarball content is written to that handle instead (the handle is left open).
+
+If a `predicate` function is passed, it is called on each system path that is
+encountered while recursively searching `dir` and `path` is only included in the
+tarball if `predicate(path)` is true. If `predicate(path)` returns false for a
+directory, then the directory is excluded entirely: nothing under that directory
+will be included in the archive.
+
+If the `skeleton` keyword is passed then the file or IO handle given is used as
+a "skeleton" to generate the tarball. You create a skeleton file by passing the
+`skeleton` keyword to the `extract` command. If `create` is called with that
+skeleton file and the extracted files haven't changed, an identical tarball is
+recreated. The `skeleton` and `predicate` arguments cannot be used together.
+
+### Tar.extract
+
+```jl
+extract([ predicate, ] tarball, [ dir ];
+ [ skeleton, ] [ copy_symlinks ]) -> dir
+```
+* `predicate :: Header --> Bool`
+* `tarball :: Union{AbstractString, AbstractCmd, IO}`
+* `dir :: AbstractString`
+* `skeleton :: Union{AbstractString, AbstractCmd, IO}`
+* `copy_symlinks :: Bool`
+
+Extract a tar archive ("tarball") located at the path `tarball` into the
+directory `dir`. If `tarball` is an IO object instead of a path, then the
+archive contents will be read from that IO stream. The archive is extracted to
+`dir` which must either be an existing empty directory or a non-existent path
+which can be created as a new directory. If `dir` is not specified, the archive
+is extracted into a temporary directory which is returned by `extract`.
+
+If a `predicate` function is passed, it is called on each `Header` object that
+is encountered while extracting `tarball` and the entry is only extracted if the
+`predicate(hdr)` is true. This can be used to selectively extract only parts of
+an archive, to skip entries that cause `extract` to throw an error, or to record
+what is extracted during the extraction process.
+
+If the `skeleton` keyword is passed then a "skeleton" of the extracted tarball
+is written to the file or IO handle given. This skeleton file can be used to
+recreate an identical tarball by passing the `skeleton` keyword to the `create`
+function. The `skeleton` and `predicate` arguments cannot be used together.
+
+If `copy_symlinks` is `true` then instead of extracting symbolic links as such,
+they will be extracted as copies of what they link to if they are internal to
+the tarball and if it is possible to do so. Non-internal symlinks, such as a
+link to `/etc/passwd` will not be copied. Symlinks which are in any way cyclic
+will also not be copied and will instead be skipped. By default, `extract` will
+detect whether symlinks can be created in `dir` or not and will automatically
+copy symlinks if they cannot be created.
+
+### Tar.list
+
+```jl
+list(tarball; [ strict = true ]) -> Vector{Header}
+list(callback, tarball; [ strict = true ])
+```
+* `callback :: Header --> Bool`
+* `tarball :: Union{AbstractString, AbstractCmd, IO}`
+* `strict :: Bool`
+
+List the contents of a tar archive ("tarball") located at the path `tarball`. If
+`tarball` is an IO handle, read the tar contents from that stream. Returns a
+vector of `Header` structs. See [`Header`](@ref) for details. If a `callback` is
+provided then instead of returning a vector of headers, the callback is called
+on each `Header`. This can be useful if the number of items in the tarball is
+large or if you want examine items prior to an error in the tarball.
+
+By default `list` will error if it encounters any tarball contents which the
+`extract` function would refuse to extract. With `strict=false` it will skip
+these checks and list all the the contents of the tar file whether `extract`
+would extract them or not. Beware that malicious tarballs can do all sorts of
+crafty and unexpected things to try to trick you into doing something bad.
+
+If the `tarball` argument is a skeleton file (see `extract` and `create`) then
+`list` will detect that from the file header and appropriately list or iterate
+the headers of the skeleton file.
+
+### Tar.rewrite
+
+```jl
+rewrite([ predicate, ], old_tarball, [ new_tarball ]) -> new_tarball
+```
+* `predicate :: Header --> Bool`
+* `old_tarball :: Union{AbstractString, AbstractCmd, IO}`
+* `new_tarball :: Union{AbstractString, AbstractCmd, IO}`
+
+Rewrite `old_tarball` to the standard format that `create` generates, while also
+checking that it doesn't contain anything that would cause `extract` to raise an
+error. This is functionally equivalent to doing
+```jl
+Tar.create(Tar.extract(predicate, old_tarball), new_tarball)
+```
+However, it never extracts anything to disk and instead uses the `seek` function
+to navigate the old tarball's data. If no `new_tarball` argument is passed, the
+new tarball is written to a temporary file whose path is returned.
+
+If a `predicate` function is passed, it is called on each `Header` object that
+is encountered while extracting `old_tarball` and the entry is skipped unless
+`predicate(hdr)` is true. This can be used to selectively rewrite only parts of
+an archive, to skip entries that would cause `extract` to throw an error, or to
+record what content is encountered during the rewrite process.
+
+### Tar.tree_hash
+
+```jl
+tree_hash([ predicate, ] tarball;
+ [ algorithm = "git-sha1", ]
+ [ skip_empty = false ]) -> hash::String
+```
+* `predicate :: Header --> Bool`
+* `tarball :: Union{AbstractString, AbstractCmd, IO}`
+* `algorithm :: AbstractString`
+* `skip_empty :: Bool`
+
+Compute a tree hash value for the file tree that the tarball contains. By
+default, this uses git's tree hashing algorithm with the SHA1 secure hash
+function (like current versions of git). This means that for any tarball whose
+file tree git can represent—i.e. one with only files, symlinks and non-empty
+directories—the hash value computed by this function will be the same as the
+hash value git would compute for that file tree. Note that tarballs can
+represent file trees with empty directories, which git cannot store, and this
+function can generate hashes for those, which will, by default (see `skip_empty`
+below for how to change this behavior), differ from the hash of a tarball which
+omits those empty directories. In short, the hash function agrees with git on
+all trees which git can represent, but extends (in a consistent way) the domain
+of hashable trees to other trees which git cannot represent.
+
+If a `predicate` function is passed, it is called on each `Header` object that
+is encountered while processing `tarball` and an entry is only hashed if
+`predicate(hdr)` is true. This can be used to selectively hash only parts of an
+archive, to skip entries that cause `extract` to throw an error, or to record
+what is extracted during the hashing process.
+
+Currently supported values for `algorithm` are `git-sha1` (the default) and
+`git-sha256`, which uses the same basic algorithm as `git-sha1` but replaces the
+SHA1 hash function with SHA2-256, the hash function that git will transition to
+using in the future (due to known attacks on SHA1). Support for other file tree
+hashing algorithms may be added in the future.
+
+The `skip_empty` option controls whether directories in the tarball which
+recursively contain no files or symlinks are included in the hash or ignored.
+In general, if you are hashing the content of a tarball or a file tree, you care
+about all directories, not just non-empty ones, so including these in the
+computed hash is the default. So why does this function even provide the option
+to skip empty directories? Because git refuses to store empty directories and
+will ignore them if you try to add them to a repo. So if you compute a reference
+tree hash by by adding files to a git repo and then asking git for the tree
+hash, the hash value that you get will match the hash value computed by
+`tree_hash` with `skip_empty=true`. In other words, this option allows
+`tree_hash` to emulate how git would hash a tree with empty directories. If you
+are hashing trees that may contain empty directories (i.e. do not come from a
+git repo), however, it is recommended that you hash them using a tool (such as
+this one) that does not ignore empty directories.
+
+### Tar.Header
+
+The `Header` type is a struct representing the essential metadata for a single
+record in a tar file with this definition:
+```jl
+struct Header
+ path :: String # path relative to the root
+ type :: Symbol # type indicator (see below)
+ mode :: UInt16 # mode/permissions (best viewed in octal)
+ size :: Int64 # size of record data in bytes
+ link :: String # target path of a symlink
+end
+```
+Types are represented with the following symbols: `file`, `hardlink`, `symlink`,
+`chardev`, `blockdev`, `directory`, `fifo`, or for unknown types, the typeflag
+character as a symbol. Note that [`extract`](#Tarextract) refuses to extract
+records types other than `file`, `symlink` and `directory`; [`list`](#Tarlist)
+will only list other kinds of records if called with `strict=false`.
+
+<!-- END: copied from inline doc strings -->
+
+### Compression
+
+It is typical to compress tarballs when saving of transferring them. In the UNIX
+tradition of doing one thing and doing it well, the `Tar` package does not do
+any kind of compression and instead makes it easy to compose it's API with
+external compression tools. The simplest way to read a compressed archive is to
+use a command-line tool to decompress it. For example:
+```jl
+Tar.list(`gzcat $tarball`)
+Tar.extract(`gzcat $tarball`)
+```
+This will spawn the `gzcat $tarball` command, read the uncompressed tarball data
+from the output of that process, and then close the process. Creating a tarball
+with the `gzip` command is nearly as easy:
+```jl
+Tar.create(dir, pipeline(`gzip -9`, tarball))
+```
+This assumes that `dir` is the directory you want to archive and `tarball` is
+the path you want to create as a compressed archive.
+
+If you want to compress or decompress a tarball in the same process, you can
+using various
+[[TranscodingStreams](https://github.com/JuliaIO/TranscodingStreams.jl)
+packages:
+```jl
+using CodecZlib
+
+tar_gz = open(tarball, write=true)
+tar = GzipCompressorStream(tar_gz)
+Tar.create(dir, tar)
+close(tar)
+```
+This assumes that `dir` is the directory you want to archive and `tarball` is
+the path you want to create as a compressed archive. You can decompress
+in-process in a similar manner:
+```jl
+using CodecZlib
+
+tar_gz = open(tarball)
+tar = GzipDecompressorStream(tar_gz)
+dir = Tar.extract(tar)
+close(tar)
+```
+This assumes that `tarball` is the path of the compressed archive you want to
+extract.
+
+### API comparison with command-line tar
+
+It might be helpful to compare the `Tar` API with command-line `tar`. Unlike
+`tar -c` the `Tar.create` function does not include any of the path you tell it
+to bundle in the resulting TAR file: the location of the data is not part of the
+data. Doing `Tar.create(dir, tarball)` is roughly equivalent to running the
+following `tar` command:
+```sh
+tar -f $tarball -C $dir -c $(cd $dir; ls -A)
+```
+In other words, `tar` is told to change into the directory `dir` before
+constructing the tarball and then include all the top-level items in that
+directory without any path prefix. Note that the above command does not fully
+emulate the behavior of `Tar.create`: it does not sort entries in the same order
+and it still records user and group information, modification times and exact
+permissions. Coaxing command-line `tar` programs to omit this non-portable
+information and use a portable (and `git`-compatible sort order) is non-trivial.
+
+Another difference from command-line `tar`: non-empty directories are also
+omitted from the tarballs that `Tar` creates since no metadata is recorded about
+directories aside from the fact that they exist and the existence of non-empty
+directories is already implied by the fact that they contain something else. If,
+in the future, the ability to record metadata about directories is added,
+tarballs will record entries for non-empty directories with non-default
+metadata.
+
+On the extraction side of things, doing `Tar.extract(tarball, dir)` is roughly
+equivalent to the following commands:
+```sh
+test -d $dir || mkdir $dir
+tar -f $tarball -C $dir -mx
+```
+Again, `tar` is told to change into the directory `dir` before extracting the
+tarball and to extract each path relative to that directory. The `-m` option
+tells `tar` to ignore the modification times recorded in the tarball and just
+let files and directories have their natural modification times.
+
+If the current user has elevated privileges, the `tar` command will attempt to
+change the owner and group of files to what is recorded in the tarball, whereas
+`Tar.extract` will never do that. The `tar` command may also try to restore
+permissions without respecting the current `umask` if the user is an
+administrator. Again, `Tar.extract` will never do that—it behaves the same way
+for any users: by ignoring any user/group/permission information, aside from
+whether plain files are executable by their owner or not. To suppress these
+behaviors with GNU tar, you can use the `--no-same-owner` and
+`--no-same-permissions` options; these options are not broadly supported by
+other `tar` commands, which may not have options to support these behaviors.
+
+## Design & Features
+
+Unlike the `tar` command line tool, which was originally designed to archive
+data in order to restore it back to the same system or to a replica thereof, the
+`Tar` package is designed for using the TAR format to transfer trees of files
+and directories from one system to another. This design goal means that some
+metadata fields supported by the TAR format and used by default by historical
+`tar` tools are not used or supported by `Tar`. In short, the choice of features
+and defaults for `Tar` are designed to support transfer of data, rather than
+backup and restoration.
+
+The TAR format can, for example, record the name and ID of the user that owns
+each file. Recording this information makes perfect sense when using tarballs
+for backup: the `tar` program should run as root when restoring data, so it can
+restore the original owner of each file and directory. On the other hand, this
+ownership information is of no use when using the TAR format to transfer data
+from one system to another: the user names and IDs will not generally be the
+same on different systems, and the tool should _not_ be run as `root`, so it
+cannot change the owner of anything it extracts. For data transfer, ownership
+metadata should be disregarded and need not be recorded in the first place.
+
+Similarly, it makes little sense, when using tarballs for data transfer, to copy
+the modification time of each file from the source system. Those time stamps are
+unlikely to be relevant on the destination system, and in some cases, clock skew
+between the systems could mean that time stamps from the source appear to be in
+the future at the destination. This can confuse some programs and may even be
+perceived as an attempted security breach; most `tar` command line tools print
+warnings when extracting files with time stamps from the future. When using the
+TAR format for data transfer, it is better to ignore time stamps and just let
+the extracted contents have natural modification times.
+
+The features and defaults of the `Tar` package are guided by the principle that
+it uses the TAR format for transmitting data, not as a tool for backup and
+restoration. If you want to use the TAR format for archival purposes, you are
+likely better off using a traditional command line tool like [GNU
+tar](https://www.gnu.org/software/tar/). If, on the other hand, you want to use
+the TAR format to transmit data from one system to another, then you've come to
+the right place.
+
+### File Types
+
+Since `Tar` is designed for transmission of file and directory trees, it
+supports only the following file types:
+
+* plain files
+* directories
+* symlinks
+
+The `Tar` package does not support other file types that the TAR format can
+represent, including: hard links, character devices, block devices, and FIFOs.
+If you attempt to create or extract an archive that contains any of these kinds
+of entries, `Tar` will raise an error. You can, however, list the contents of a
+tarball containing other kinds of entries by passing the `strict=false` flag to
+the `list` function; without this option, `list` raises the same error as
+`extract` would.
+
+In the future, optional support may be added for using hard links within
+archives to avoid duplicating identical files.
+
+### Time Stamps
+
+Also in accordance with its design goal as a data transfer tool, the `Tar`
+package does not record or set modification times upon tarball creation and
+extraction. When creating a tarball, it sets the time stamp of each entry to
+`0`, representing the UNIX epoch (Jan 1st, 1970). When extracting a tarball, it
+ignores the time stamps of entries and lets all extracted content have "natural"
+modification times based on when each file or directory is extracted.
+
+In the future, optional support may be added for recording and restoring time
+stamps.
+
+### Users & Groups
+
+`Tar` ignores user and group names and IDs when creating and extracting
+tarballs. This is due to two facts:
+
+* names and IDs on source and destination systems will generally not match;
+* names and IDs can only be changed if `Tar` is run with elevated privileges.
+
+The first fact means that it probably doesn't make sense to try to restore
+ownership when transferring data, while the second fact means that it's probably
+not possible. Accordingly, `Tar` disregards user and group names and IDs when
+creating and extracting tarballs. During creation, the ID fields are recorded as
+`0` and names fields are recorded as the empty string. When extracting a
+tarball, the user and group fields are ignored entirely and all extracted
+content is owned by the current user.
+
+It is unlikely that support will be added for recording or restoring ownership
+of files or directories since that functionality only makes sense when using the
+TAR format for backup, a purpose better served by using a command line `tar`
+tool.
+
+### Permissions
+
+Upon tarball extraction, `Tar` respects the permissions recorded for each file.
+When creating tarball, however, it ignores most permission information and
+normalizes permissions as follows:
+
+* files that are not executable by the owner are archived with mode `0o644`;
+* files that are executable by the owner are archived with mode `0o755`;
+* directories and symlinks are always archived with mode `0o755`.
+
+In other words, `Tar` records only one significant bit of information: whether
+plain files are executable by their owner or not. No permission information for
+directories or symlinks is considered significant. This one bit of information
+is the only one which makes sense across all platforms, so this choice makes
+`Tar`'s behavior as portable as possible. On systems (like Windows) that do not
+use POSIX modes, whatever permission mechanism exists (_e.g._ ACLs) should be
+queried/modified to determine whether each file is executable by its owner or
+not. Unfortunately, this is currently broken on Windows since `libuv` does not
+correctly support querying or changing the user executable "bit"; this is
+actively being worked on, however, and should be fixed in future versions of
+Julia.
+
+In the future, optional support may be added for recording exact permission
+modes on POSIX systems, and possibly for normalizing permissions on extraction
+in the same way that they are normalized upon archive creation.
+
+### Reproducibility
+
+The information that `Tar` records about permissions is the same information
+that `git` considers to be significant when recording and hashing tree contents
+(admittedly not by coincidence). As a result, an important and useful
+consequence of `Tar`'s design is that it has the following properties:
+
+* if you create a tarball from a file tree and extract it, the new tree will
+ have the same `git` tree hash as the original;
+* if you `git checkout` a file tree and archive it using `Tar`, the resulting
+ TAR archive file is always the same.
+
+One important caveat to keep in mind is that `git` ignores directories that
+recursively contain only directories—_i.e._ unless there's a file or a symlink
+somewhere, `git` will not acknowledge the existence of a subdirectory. This
+means that two trees with the same `git` tree hash can produce different
+tarballs if they differ by subdirectories containing no files or symlinks: `git`
+will ignore those subdirectories, while `Tar` will not. Therefore, they will
+have the same `git` tree hash, but produce different tarballs. Two _identical_
+file trees will always produce identical tarballs, however, and that tarball
+should remain stable in future versions of the `Tar` package.
+
+The `tree_hash` function can be used to compute a git-style tree hash of the
+contents of a tarball (without needing to extract it). Moreover, two tarballs
+created by the `Tar` package will have the same hash if and only if they contain
+the same file tree, which is true if and only if they are identical tarballs.
+You can, however, hash tarballs not created by `Tar` this way to see if they
+represent the same file tree, and you can use the `skip_empty=true` option to
+`tree_hash` to compute the hash that `git` would assign the tree, ignoring empty
+directories.
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Test/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Test/docs/src/index.md
new file mode 100644
index 0000000..8e3ba43
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Test/docs/src/index.md
@@ -0,0 +1,293 @@
+# Unit Testing
+
+```@meta
+DocTestSetup = :(using Test)
+```
+
+## Testing Base Julia
+
+Julia is under rapid development and has an extensive test suite to verify functionality across
+multiple platforms. If you build Julia from source, you can run this test suite with `make test`.
+In a binary install, you can run the test suite using `Base.runtests()`.
+
+```@docs
+Base.runtests
+```
+
+## Basic Unit Tests
+
+The `Test` module provides simple *unit testing* functionality. Unit testing is a way to
+see if your code is correct by checking that the results are what you expect. It can be helpful
+to ensure your code still works after you make changes, and can be used when developing as a way
+of specifying the behaviors your code should have when complete.
+
+Simple unit testing can be performed with the `@test` and `@test_throws` macros:
+
+```@docs
+Test.@test
+Test.@test_throws
+```
+
+For example, suppose we want to check our new function `foo(x)` works as expected:
+
+```jldoctest testfoo
+julia> using Test
+
+julia> foo(x) = length(x)^2
+foo (generic function with 1 method)
+```
+
+If the condition is true, a `Pass` is returned:
+
+```jldoctest testfoo
+julia> @test foo("bar") == 9
+Test Passed
+
+julia> @test foo("fizz") >= 10
+Test Passed
+```
+
+If the condition is false, then a `Fail` is returned and an exception is thrown:
+
+```jldoctest testfoo
+julia> @test foo("f") == 20
+Test Failed at none:1
+ Expression: foo("f") == 20
+ Evaluated: 1 == 20
+ERROR: There was an error during testing
+```
+
+If the condition could not be evaluated because an exception was thrown, which occurs in this
+case because `length` is not defined for symbols, an `Error` object is returned and an exception
+is thrown:
+
+```julia-repl
+julia> @test foo(:cat) == 1
+Error During Test
+ Test threw an exception of type MethodError
+ Expression: foo(:cat) == 1
+ MethodError: no method matching length(::Symbol)
+ Closest candidates are:
+ length(::SimpleVector) at essentials.jl:256
+ length(::Base.MethodList) at reflection.jl:521
+ length(::MethodTable) at reflection.jl:597
+ ...
+ Stacktrace:
+ [...]
+ERROR: There was an error during testing
+```
+
+If we expect that evaluating an expression *should* throw an exception, then we can use `@test_throws`
+to check that this occurs:
+
+```jldoctest testfoo
+julia> @test_throws MethodError foo(:cat)
+Test Passed
+ Thrown: MethodError
+```
+
+## Working with Test Sets
+
+Typically a large number of tests are used to make sure functions work correctly over a range
+of inputs. In the event a test fails, the default behavior is to throw an exception immediately.
+However, it is normally preferable to run the rest of the tests first to get a better picture
+of how many errors there are in the code being tested.
+
+!!! note
+ The `@testset` will create a local scope of its own when running the tests in it.
+
+The `@testset` macro can be used to group tests into *sets*. All the tests in a test set will
+be run, and at the end of the test set a summary will be printed. If any of the tests failed,
+or could not be evaluated due to an error, the test set will then throw a `TestSetException`.
+
+```@docs
+Test.@testset
+```
+
+We can put our tests for the `foo(x)` function in a test set:
+
+```jldoctest testfoo
+julia> @testset "Foo Tests" begin
+ @test foo("a") == 1
+ @test foo("ab") == 4
+ @test foo("abc") == 9
+ end;
+Test Summary: | Pass Total
+Foo Tests | 3 3
+```
+
+Test sets can also be nested:
+
+```jldoctest testfoo
+julia> @testset "Foo Tests" begin
+ @testset "Animals" begin
+ @test foo("cat") == 9
+ @test foo("dog") == foo("cat")
+ end
+ @testset "Arrays $i" for i in 1:3
+ @test foo(zeros(i)) == i^2
+ @test foo(fill(1.0, i)) == i^2
+ end
+ end;
+Test Summary: | Pass Total
+Foo Tests | 8 8
+```
+
+In the event that a nested test set has no failures, as happened here, it will be hidden in the
+summary, unless the `verbose=true` option is passed:
+
+```jldoctest testfoo
+julia> @testset verbose = true "Foo Tests" begin
+ @testset "Animals" begin
+ @test foo("cat") == 9
+ @test foo("dog") == foo("cat")
+ end
+ @testset "Arrays $i" for i in 1:3
+ @test foo(zeros(i)) == i^2
+ @test foo(fill(1.0, i)) == i^2
+ end
+ end;
+Test Summary: | Pass Total
+Foo Tests | 8 8
+ Animals | 2 2
+ Arrays 1 | 2 2
+ Arrays 2 | 2 2
+ Arrays 3 | 2 2
+```
+
+If we do have a test failure, only the details for the failed test sets will be shown:
+
+```julia-repl
+julia> @testset "Foo Tests" begin
+ @testset "Animals" begin
+ @testset "Felines" begin
+ @test foo("cat") == 9
+ end
+ @testset "Canines" begin
+ @test foo("dog") == 9
+ end
+ end
+ @testset "Arrays" begin
+ @test foo(zeros(2)) == 4
+ @test foo(fill(1.0, 4)) == 15
+ end
+ end
+
+Arrays: Test Failed
+ Expression: foo(fill(1.0, 4)) == 15
+ Evaluated: 16 == 15
+[...]
+Test Summary: | Pass Fail Total
+Foo Tests | 3 1 4
+ Animals | 2 2
+ Arrays | 1 1 2
+ERROR: Some tests did not pass: 3 passed, 1 failed, 0 errored, 0 broken.
+```
+
+## Other Test Macros
+
+As calculations on floating-point values can be imprecise, you can perform approximate equality
+checks using either `@test a ≈ b` (where `≈`, typed via tab completion of `\approx`, is the
+[`isapprox`](@ref) function) or use [`isapprox`](@ref) directly.
+
+```jldoctest
+julia> @test 1 ≈ 0.999999999
+Test Passed
+
+julia> @test 1 ≈ 0.999999
+Test Failed at none:1
+ Expression: 1 ≈ 0.999999
+ Evaluated: 1 ≈ 0.999999
+ERROR: There was an error during testing
+```
+
+```@docs
+Test.@inferred
+Test.@test_logs
+Test.@test_deprecated
+Test.@test_warn
+Test.@test_nowarn
+```
+
+## Broken Tests
+
+If a test fails consistently it can be changed to use the `@test_broken` macro. This will denote
+the test as `Broken` if the test continues to fail and alerts the user via an `Error` if the test
+succeeds.
+
+```@docs
+Test.@test_broken
+```
+
+`@test_skip` is also available to skip a test without evaluation, but counting the skipped test
+in the test set reporting. The test will not run but gives a `Broken` `Result`.
+
+```@docs
+Test.@test_skip
+```
+
+## Creating Custom `AbstractTestSet` Types
+
+Packages can create their own `AbstractTestSet` subtypes by implementing the `record` and `finish`
+methods. The subtype should have a one-argument constructor taking a description string, with
+any options passed in as keyword arguments.
+
+```@docs
+Test.record
+Test.finish
+```
+
+`Test` takes responsibility for maintaining a stack of nested testsets as they are executed,
+but any result accumulation is the responsibility of the `AbstractTestSet` subtype. You can access
+this stack with the `get_testset` and `get_testset_depth` methods. Note that these functions are
+not exported.
+
+```@docs
+Test.get_testset
+Test.get_testset_depth
+```
+
+`Test` also makes sure that nested `@testset` invocations use the same `AbstractTestSet`
+subtype as their parent unless it is set explicitly. It does not propagate any properties of the
+testset. Option inheritance behavior can be implemented by packages using the stack infrastructure
+that `Test` provides.
+
+Defining a basic `AbstractTestSet` subtype might look like:
+
+```julia
+import Test: Test, record, finish
+using Test: AbstractTestSet, Result, Pass, Fail, Error
+using Test: get_testset_depth, get_testset
+struct CustomTestSet <: Test.AbstractTestSet
+ description::AbstractString
+ foo::Int
+ results::Vector
+ # constructor takes a description string and options keyword arguments
+ CustomTestSet(desc; foo=1) = new(desc, foo, [])
+end
+
+record(ts::CustomTestSet, child::AbstractTestSet) = push!(ts.results, child)
+record(ts::CustomTestSet, res::Result) = push!(ts.results, res)
+function finish(ts::CustomTestSet)
+ # just record if we're not the top-level parent
+ if get_testset_depth() > 0
+ record(get_testset(), ts)
+ end
+ ts
+end
+```
+
+And using that testset looks like:
+
+```julia
+@testset CustomTestSet foo=4 "custom testset inner 2" begin
+ # this testset should inherit the type, but not the argument.
+ @testset "custom testset inner" begin
+ @test true
+ end
+end
+```
+
+```@meta
+DocTestSetup = nothing
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/UUIDs/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/UUIDs/docs/src/index.md
new file mode 100644
index 0000000..1e6c950
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/UUIDs/docs/src/index.md
@@ -0,0 +1,8 @@
+# UUIDs
+
+```@docs
+UUIDs.uuid1
+UUIDs.uuid4
+UUIDs.uuid5
+UUIDs.uuid_version
+```
diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Unicode/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Unicode/docs/src/index.md
new file mode 100644
index 0000000..aba9d80
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Unicode/docs/src/index.md
@@ -0,0 +1,7 @@
+# Unicode
+
+```@docs
+Unicode.isassigned
+Unicode.normalize
+Unicode.graphemes
+```
diff --git a/external/julia-1.6.7/share/julia/test/clangsa/lit.cfg.py b/external/julia-1.6.7/share/julia/test/clangsa/lit.cfg.py
new file mode 100644
index 0000000..5790eab
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/test/clangsa/lit.cfg.py
@@ -0,0 +1,27 @@
+import os
+import sys
+import re
+import platform
+
+import lit.util
+import lit.formats
+
+config.name = 'Julia-GCChecker'
+config.suffixes = ['.c','.cpp']
+config.test_source_root = os.path.dirname(__file__)
+config.test_format = lit.formats.ShTest(True)
+config.substitutions.append(('%shlibext', '.dylib' if platform.system() == 'Darwin' else '.dll' if
+ platform.system() == 'Windows' else '.so'))
+config.substitutions.append(("%julia_home", os.path.join(os.path.dirname(__file__), "../..")))
+
+path = os.path.pathsep.join((os.path.join(os.path.dirname(__file__),"../../usr/tools"), os.path.join(os.path.dirname(__file__),"../../usr/bin"), config.environment['PATH']))
+config.environment['PATH'] = path
+config.environment['HOME'] = "/tmp"
+config.environment['CLANGSA_FLAGS'] = os.environ.get('CLANGSA_FLAGS', "")
+config.environment['CLANGSA_CXXFLAGS'] = os.environ.get('CLANGSA_CXXFLAGS', "")
+config.environment['CPPFLAGS'] = os.environ.get('CPPFLAGS', "")
+config.environment['CFLAGS'] = os.environ.get('CFLAGS', "")
+config.environment['CXXFLAGS'] = os.environ.get('CXXFLAGS', "")
+
+if platform.machine() == "x86_64":
+ config.available_features.add('x86_64')
diff --git a/external/julia-1.6.7/share/julia/test/llvmpasses/lit.cfg.py b/external/julia-1.6.7/share/julia/test/llvmpasses/lit.cfg.py
new file mode 100644
index 0000000..2054876
--- /dev/null
+++ b/external/julia-1.6.7/share/julia/test/llvmpasses/lit.cfg.py
@@ -0,0 +1,21 @@
+import os
+import sys
+import re
+import platform
+
+import lit.util
+import lit.formats
+
+config.name = 'Julia'
+config.suffixes = ['.ll','.jl']
+config.test_source_root = os.path.dirname(__file__)
+config.test_format = lit.formats.ShTest(True)
+config.substitutions.append(('%shlibext', '.dylib' if platform.system() == 'Darwin' else '.dll' if
+ platform.system() == 'Windows' else '.so'))
+
+path = os.path.pathsep.join((os.path.join(os.path.dirname(__file__),"../../usr/tools"), os.path.join(os.path.dirname(__file__),"../../usr/bin"), config.environment['PATH']))
+config.environment['PATH'] = path
+config.environment['HOME'] = "/tmp"
+
+if platform.machine() == "x86_64":
+ config.available_features.add('x86_64')
diff --git a/external/julia_depot/registries/General/.ci/instantiate.sh b/external/julia_depot/registries/General/.ci/instantiate.sh
new file mode 100755
index 0000000..ce636ec
--- /dev/null
+++ b/external/julia_depot/registries/General/.ci/instantiate.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+# This script runs `Pkg.instantiate()` inside a bounded retry loop.
+
+dir="${1:-}"
+
+if [ -z "$dir" ]; then
+ echo "Usage: $0 <julia-project-dir>" >&2
+ exit 2
+fi
+if [ ! -d "$dir" ]; then
+ echo "Error: '$dir' is not a directory." >&2
+ exit 2
+fi
+
+try_count=0
+while [ "$try_count" -lt 9 ]; do
+ julia --color=yes --project="$dir" -e 'import Pkg; Pkg.instantiate()'
+ if [ $? -eq 0 ]; then exit 0; fi
+ try_count=$((try_count + 1))
+ sleep 20
+done
+
+exit 1
diff --git a/external/julia_depot/registries/General/CONTRIBUTING.md b/external/julia_depot/registries/General/CONTRIBUTING.md
new file mode 100644
index 0000000..62e00fe
--- /dev/null
+++ b/external/julia_depot/registries/General/CONTRIBUTING.md
@@ -0,0 +1,185 @@
+# Contribution guidelines
+
+Anyone can help improve the General registry! Here are a few ways.
+
+## As a package author
+
+You can register your package!
+See [Registering a package in General](https://github.com/JuliaRegistries/General#registering-a-package-in-general) in the README for how to do that.
+The "FAQ" section in the README helps answer many more questions, like [do I need to register a package to install it?](https://github.com/JuliaRegistries/General#do-i-need-to-register-a-package-to-install-it), [should I register my package?](https://github.com/JuliaRegistries/General#should-i-register-my-package), and more.
+
+* Please be aware of the [package naming guidelines][naming-guidelines]
+* We strongly encourage authors to follow best practices like having documentation (or a descriptive README), tests, and continuous integration.
+
+## As a Julia community member
+
+You (yes, you!) can help General be the best registry it can be.
+
+### New package registrations
+
+The first step to getting involved with General is to check out new package registrations.
+They are filed under the ["new package" label](https://github.com/JuliaRegistries/General/pulls?q=is%3Apr+is%3Aopen+label%3A%22new+package%22), and a automatic feed posts them in the `#new-packages-feed` channel in the [community Slack](https://julialang.org/slack/) or [Zulip](https://julialang.zulipchat.com/register/).
+
+When registration is triggered for a new package (or new version of a package), [RegistryCI.jl](RegistryCI)-powered AutoMerge automatically runs and performs [some basic checks](https://juliaregistries.github.io/RegistryCI.jl/stable/guidelines/).
+These are merely guidelines, and not all checks must pass --- if a check fails, the registration can still be manually merged.
+There are a few ways to help here:
+
+1. First, whenever you are engaging with a package author, remember to always be polite and kind-- if you are feeling frustrated, it may be better to not comment at all and let someone else respond instead.
+Not everyone understands things from the same explanations, and some folks may need to translate or overcome other barriers to understanding, or may simply disagree with you.
+When we are helping maintain the General registry, we are acting as representatives of the Julia community, and need to be mindful of that.
+2. If an AutoMerge guideline fails and the package author does not seem to know how to address it, you can help guide them through the process.
+Pointing them to the [FAQ](FAQ) can help, as can updating the FAQ and other guidance to make the process more clear.
+Sometimes folks also just need a bit of help to understand the process, and writing a note can help.
+3. If an AutoMerge fails but you think the package should be manually merged, comment in the PR to explain why.
+ * One common issue here is the name similarity check.
+ This exists to prevent malicious [typosquatting](https://en.wikipedia.org/wiki/Typosquatting).
+ For example, [Flux](https://github.com/FluxML/Flux.jl) is a popular machine learning package.
+ A malicious actor could try to register FIux (with an uppercase-eye instead of a lowercase-ell), and encourage users to install it by writing a tutorial or such.
+ They could then add malicious code to the package to try to steal secrets.
+ Such an event would be an extreme security violation and the package would be yanked or removed from the registry as soon as possible-- but we try to be a bit safer by proactively screening names to require manual merging if they are "too similar" to an existing package name.
+
+ If a package fails the name similarity check, you can help out by taking a look at the two names as well as the package code itself, and try to make a determination if it looks "too close" (e.g. Websockets vs WebSocket), and if the package code contains anything that would indicate malicious activity.
+ You can make a comment in the PR indicating whether or not you think the name similarity is okay. Include `[noblock]` in the comment if you don't want to block AutoMerge.
+ If you have [triage](permissions)-level access or higher to General, you can additionally override automerge by adding the label _Override AutoMerge: name similarity is okay_.
+
+4. Regardless of AutoMerge's status, if you think perhaps something more should be done before registration, feel free to leave a comment in the PR explaining what you think should be done first.
+Any comment without `[noblock]` included in it will block AutoMerge from automatically merging the pull request (editing `[noblock]` into old comments **will** allow it to resume).
+ * For example, occasionally someone will register a package without any content in order to reserve the package name, with the intent to add content later.
+ We don't allow that in General, and ask authors to add content first before registering.
+ * Sometimes authors register packages without any description of what the package is for in the README or without documentation.
+ Since registration is a mechanism to share code with the whole Julia community, such a description is important for the package to be useful.
+ While we don't strictly require such documentation, it can help to give a polite and gentle nudge in the PR comments, or show folks how to write documentation and/or what is helpful to include in a README.
+ We want to encourage best practices (in an inclusive and friendly way!) even when they are not strict requirements.
+ * Sometimes package names are possibly confusing or don't conform to our [naming guidelines][naming-guidelines], but AutoMerge does not detect this.
+ Feel free to comment, describing what you think is confusing or non-compliant about the current name, and any suggestions you have for a more clear name.
+
+### Other PRs to General
+
+Sometimes, the registry needs to be updated in other ways that involve manual pull requests (PRs) rather than auto-generated ones.
+The most common reason is to update the URL for a repository.
+
+#### Updating the URL for a repository
+
+If someone transfers a GitHub repository, [we ask](https://github.com/JuliaRegistries/General#how-do-i-transfer-a-package-to-an-organization-or-another-user) that they update the URL stored in General.
+This is done by manually making a PR to General to update the URL.
+You can review such a PR by checking that the old URL redirects to the new one.
+* If it does, that's a clear sign that the change is legitimate and the new URL is correct.
+ If you have write permissions to General, you can merge the PR; otherwise you can approve it or comment.
+* If it does not, you can ask the author why. This should be handled on a case-by-case basis. Be sure to check that:
+ 1. The package is not being hijacked; check for example that the person making the PR has registered a version of the package before, indicating they are authorized to do so.
+ 2. All the registered revisions of the package are accessible in the new repository.
+ Specifically, this means checking that all the git-tree-shas can be found in the new repository.
+ See [the appendix](#appendix-checking-if-a-repository-contains-all-registered-versions-of-a-package) below for a script to automate this checking.
+
+### Other ways to help
+
+Besides helping out with PRs to General, you can...
+
+* ...improve [General's README](https://github.com/JuliaRegistries/General#general), the [RegistryCI documentation](https://juliaregistries.github.io/RegistryCI.jl/stable/guidelines/), or these guidelines!
+* ...add new checks to AutoMerge (in [RegistryCI](RegistryCI)) or improve existing ones.
+* ...address open issues in [General](https://github.com/JuliaRegistries/General/issues), [RegistryCI.jl](https://github.com/JuliaRegistries/RegistryCI.jl/issues), or [Registrator.jl](https://github.com/JuliaRegistries/Registrator.jl/issues).
+* ...write blog posts and documentation to help folks get started with writing documentation, tests, and setting up CI for their own packages, and find appropriate places to link to it and help out new package authors.
+
+Additionally, if you have elevated [permissions](permissions) to General, there's a few more things you can do:
+
+* [triage] You can add or remove labels to PRs to help communicate the status and to automatically override AutoMerge for name similairty failures
+ * Specifically, adding the label `Override AutoMerge: name similarity is okay` will retrigger AutoMerge and cause it to ignore the distance check between the package name and existing package names. This will not override any other guidelines (e.g. name too short).
+* [triage] You can close PRs if the package author requests it or the registration is superseded by another registration request.
+* [write] You can merge PRs that have the _needs to be manually merged in 3 days_ label once the requisite waiting period has passed, assuming there are no outstanding objections in the PR comments.
+* [write] You can choose to facilitate expedited merge requests, after manually reviewing the package.
+You generally should not merge your own registrations or those you are involved with (though you can make requests to another maintainer). See also [this FAQ entry](https://github.com/JuliaRegistries/General/#who-can-approve-an-early-merge).
+* [write] You can merge improvements to the README, these guidelines, or our workflows.
+* [admin] You can give other contributors triage-level access so they can apply labels to PRs, or write-level permissions to merge PRs.
+
+## Expectations and tips for registry maintainers
+
+We ask registry maintainers to refrain from merging their own PRs or PRs that they are closely involved in.
+
+### Is there a checklist for reviewing PRs that change the URL of a repo?
+
+Here's a checklist.
+
+```md
+1. [ ] I have confirmed that the old URL automatically redirects to the new URL in the web browser.
+2. [ ] The PR preserves the trailing `.git` at the end of the URL.
+3. [ ] The Treecheck CI job ran on this PR and is green.
+```
+
+## Appendix: Checking if a repository contains all registered versions of a package
+
+When someone wishes to move a package from one repo to another, it is important that the new repo contains all of the tree hashes corresponding to registered versions of a package. That way these old versions of the package can continue to be installed from the new repository. In order to check if a given repository contains all of the registered versions of a package, the following script can be used:
+
+```julia
+using RegistryInstances, UUIDs, Git
+
+const GENERAL_UUID = UUID("23338594-aafe-5451-b93e-139f81909106")
+
+pretty_print_row(row) = println(row.pkg_name, ": v", row.version, " ", row.found ? "found" : "is missing")
+pretty_print_table(table) = foreach(pretty_print_row, table)
+
+function check_all_found(table)
+ idx = findfirst(row -> !row.found, table)
+ idx === nothing && return nothing
+ row = table[idx]
+ error(string("Repository missing v", row.version, " of package $(row.pkg_name)"))
+end
+
+function check_packages_versions(pkg_names, repo_url; registry_uuid=GENERAL_UUID, verbose=true, throw=true)
+ if isdir(repo_url)
+ dir = repo_url
+ else
+ dir = mktempdir()
+ run(`$(git()) clone $(repo_url) $dir`)
+ end
+
+ registry = only(filter!(r -> r.uuid == registry_uuid, reachable_registries()))
+
+ table = @NamedTuple{pkg_name::String, version::VersionNumber, found::Bool, tree_sha::Base.SHA1}[]
+
+ for pkg_name in pkg_names
+ pkg = registry.pkgs[only(uuids_from_name(registry, pkg_name))]
+ versions = registry_info(pkg).version_info
+ for version in sort(collect(keys(versions)))
+ tree_sha = versions[version].git_tree_sha1
+ found = success(`$(git()) -C $dir rev-parse -q --verify "$(tree_sha)^{tree}"`)
+
+ push!(table, (; pkg_name, version, found, tree_sha))
+ end
+ end
+ verbose && pretty_print_table(table)
+ throw && check_all_found(table)
+ return table
+end
+
+check_package_versions(pkg_name, repo_url; kw...) = check_packages_versions([pkg_name], repo_url; kw...)
+```
+
+For example, in [General#75319](https://github.com/JuliaRegistries/General/pull/75319), a package author wanted to update the URL associated
+to their package "FastParzenWindows". At the time, the package had 1 registered version. We can check that it is present in the new repository via:
+
+```julia
+julia> check_package_versions("FastParzenWindows", "https://github.com/ngiann/FastParzenWindows.jl.git");
+Cloning into '/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_ke9E8C'...
+...text omitted...
+FastParzenWindows: v0.1.2 found
+```
+
+We see that this version was found in the new repository. This script was based on [this comment from General#35965](https://github.com/JuliaRegistries/General/pull/35965#issuecomment-832721704),
+which involved checking if 4 packages in the same repository all had their versions present in the new repository. That example can be handled as follows:
+
+```julia
+pkg_names = ["ReinforcementLearningBase", "ReinforcementLearningCore",
+ "ReinforcementLearningEnvironments", "ReinforcementLearningZoo"]
+check_packages_versions(pkg_names, "https://github.com/JuliaReinforcementLearning/ReinforcementLearning.jl.git")
+```
+
+You can also use this script against local repositories when modifying Git commit history:
+
+```julia
+check_package_versions("FastParzenWindows", ".")
+```
+
+[FAQ]: https://github.com/JuliaRegistries/General#faq]
+[naming-guidelines]: ./NAMING_GUIDELINES.md
+[permissions]: https://docs.github.com/en/organizations/managing-access-to-your-organizations-repositories/repository-permission-levels-for-an-organization#permission-levels-for-repositories-owned-by-an-organization
+[RegistryCI]: https://github.com/JuliaRegistries/RegistryCI.jl/
diff --git a/flossing_suite/README.md b/flossing_suite/README.md
new file mode 100644
index 0000000..6081c97
--- /dev/null
+++ b/flossing_suite/README.md
@@ -0,0 +1,89 @@
+# Flossing Suite
+
+This directory is a reproducible wrapper around the existing flossing code.
+It separates three questions:
+
+1. Does the Engelken algorithm itself reproduce on a vanilla RNN toy task?
+2. Does a faithful pre/interfloss analogue help TRM/HRM when the flossing phase is separate from task loss?
+3. Do our one-sided variants (`top1_cf`, `spectrum_cf`, `volume_cf`) behave differently from Engelken's two-sided L2 target?
+
+## Important Algorithmic Distinctions
+
+- `engelken_python_flossing.py` is the toy RNN faithful port. It keeps the paper-style separate flossing phase, no task/floss mixed objective, flosses only input/recurrent/bias parameters, uses differentiable QR, and optimizes `mean((lambda_i - lambda_star)^2)`.
+- `step7_interfloss.py` is the HRM/TRM analogue. It also uses separate floss-only episodes. Ordinary training steps use only supervised ACT loss.
+- `step7_interfloss.py --floss-mode engelken_l2` is the Rainer-style two-sided target.
+- `top1_cf`, `spectrum_cf`, and `volume_cf` are our one-sided contractive variants, not the paper method.
+- KL preservation is optional and only applies during floss-only episodes.
+
+## Current Known Sanity Check
+
+Existing toy RNN result:
+
+- Baseline no floss: final eval accuracy about `0.777`.
+- Prefloss: final eval accuracy about `0.997`.
+
+This means the Python port can reproduce a positive toy result. Negative HRM/TRM results should therefore be interpreted as model/task transfer issues, not simply "flossing code cannot work."
+
+## Recommended Workflow
+
+Run smoke tests:
+
+```bash
+bash research/flossing/flossing_suite/smoke_test.sh 0
+```
+
+Launch toy RNN paper-style suite:
+
+```bash
+bash research/flossing/flossing_suite/launch_toy_official_suite.sh
+```
+
+Launch TRM faithful Rainer-style suite:
+
+```bash
+GPU_BASE=0 GPU_PREFLOSS=1 GPU_INTER=3 bash research/flossing/flossing_suite/launch_trm_faithful_suite.sh
+```
+
+Launch TRM CF/volume variants:
+
+```bash
+GPU_TOP1=0 GPU_VOLUME=1 GPU_KL=3 bash research/flossing/flossing_suite/launch_trm_variant_suite.sh
+```
+
+Summarize all available flossing logs:
+
+```bash
+/home/yurenh2/miniconda3/envs/rrm/bin/python research/flossing/flossing_suite/summarize_flossing.py
+```
+
+Check active jobs:
+
+```bash
+bash research/flossing/flossing_suite/status.sh
+```
+
+Wait for current TRM faithful jobs and refresh summary:
+
+```bash
+bash research/flossing/flossing_suite/watch_and_summarize.sh
+```
+
+Outputs go to:
+
+- `results/toy_rnn/`
+- `results/trm_faithful/`
+- `results/trm_variants/`
+- `results/smoke/`
+- `results/summary/`
+
+## Existing Historical Results Included By Summarizer
+
+The summarizer also scans:
+
+- `research/flossing/engelken_python/*.json`
+- `research/flossing/engelken_paper_faithful/*.json`
+- `research/flossing/step6_*.json`
+- `research/flossing/step7_*.json`
+- `research/flossing/flossing_suite/results/**/*.json`
+
+This keeps old negative/positive evidence visible without rerunning everything.
diff --git a/flossing_suite/archive_failed_oom_b8/trm_seed123_baseline_nofloss_b8_10000.cmd.sh b/flossing_suite/archive_failed_oom_b8/trm_seed123_baseline_nofloss_b8_10000.cmd.sh
new file mode 100755
index 0000000..98411d8
--- /dev/null
+++ b/flossing_suite/archive_failed_oom_b8/trm_seed123_baseline_nofloss_b8_10000.cmd.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "/home/yurenh2/rrm"
+export CUDA_VISIBLE_DEVICES="0"
+export PYTHONUNBUFFERED=1
+exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/step7_interfloss.py \
+ --model trm \
+ --ckpt-root "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \
+ --ckpt-name __random__ \
+ --init-seed 123 \
+ --train-steps "10000" \
+ --batch-size 8 \
+ --train-lr 1e-4 \
+ --floss-lr 1e-4 \
+ --floss-mode engelken_l2 \
+ --lambda-star 0 \
+ --k-lyap 4 \
+ --lyap-act-steps 4 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n "1000" \
+ --eval-batch-size 64 \
+ --floss-log-every 10 \
+ --train-puzzle-emb \
+ --puzzle-emb-lr 1e-4 \
+ --puzzle-emb-weight-decay 1.0 \
+ --kl-beta 0 \
+ --floss-steps "0" \
+ --interfloss-at "" \
+ --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b8_10000.json"
diff --git a/flossing_suite/archive_failed_oom_b8/trm_seed123_pre_inter_0_500_b8_k4_10000.cmd.sh b/flossing_suite/archive_failed_oom_b8/trm_seed123_pre_inter_0_500_b8_k4_10000.cmd.sh
new file mode 100755
index 0000000..e3f25ab
--- /dev/null
+++ b/flossing_suite/archive_failed_oom_b8/trm_seed123_pre_inter_0_500_b8_k4_10000.cmd.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "/home/yurenh2/rrm"
+export CUDA_VISIBLE_DEVICES="3"
+export PYTHONUNBUFFERED=1
+exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/step7_interfloss.py \
+ --model trm \
+ --ckpt-root "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \
+ --ckpt-name __random__ \
+ --init-seed 123 \
+ --train-steps "10000" \
+ --batch-size 8 \
+ --train-lr 1e-4 \
+ --floss-lr 1e-4 \
+ --floss-mode engelken_l2 \
+ --lambda-star 0 \
+ --k-lyap 4 \
+ --lyap-act-steps 4 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n "1000" \
+ --eval-batch-size 64 \
+ --floss-log-every 10 \
+ --train-puzzle-emb \
+ --puzzle-emb-lr 1e-4 \
+ --puzzle-emb-weight-decay 1.0 \
+ --kl-beta 0 \
+ --floss-steps "500" \
+ --interfloss-at "0,500" \
+ --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b8_k4_10000.json"
diff --git a/flossing_suite/archive_failed_oom_b8/trm_seed123_prefloss_0_b8_k4_10000.cmd.sh b/flossing_suite/archive_failed_oom_b8/trm_seed123_prefloss_0_b8_k4_10000.cmd.sh
new file mode 100755
index 0000000..86c445d
--- /dev/null
+++ b/flossing_suite/archive_failed_oom_b8/trm_seed123_prefloss_0_b8_k4_10000.cmd.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "/home/yurenh2/rrm"
+export CUDA_VISIBLE_DEVICES="1"
+export PYTHONUNBUFFERED=1
+exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/step7_interfloss.py \
+ --model trm \
+ --ckpt-root "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \
+ --ckpt-name __random__ \
+ --init-seed 123 \
+ --train-steps "10000" \
+ --batch-size 8 \
+ --train-lr 1e-4 \
+ --floss-lr 1e-4 \
+ --floss-mode engelken_l2 \
+ --lambda-star 0 \
+ --k-lyap 4 \
+ --lyap-act-steps 4 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n "1000" \
+ --eval-batch-size 64 \
+ --floss-log-every 10 \
+ --train-puzzle-emb \
+ --puzzle-emb-lr 1e-4 \
+ --puzzle-emb-weight-decay 1.0 \
+ --kl-beta 0 \
+ --floss-steps "500" \
+ --interfloss-at "0" \
+ --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b8_k4_10000.json"
diff --git a/flossing_suite/launch_toy_official_suite.sh b/flossing_suite/launch_toy_official_suite.sh
new file mode 100755
index 0000000..43a2047
--- /dev/null
+++ b/flossing_suite/launch_toy_official_suite.sh
@@ -0,0 +1,77 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="/home/yurenh2/rrm"
+PY="/home/yurenh2/miniconda3/envs/rrm/bin/python"
+OUT_DIR="${ROOT}/research/flossing/flossing_suite/results/toy_rnn"
+mkdir -p "${OUT_DIR}"
+
+GPU_BASE="${GPU_BASE:-0}"
+GPU_PREFLOSS="${GPU_PREFLOSS:-1}"
+GPU_INTER="${GPU_INTER:-3}"
+
+write_and_launch() {
+ local gpu="$1"
+ local name="$2"
+ local pre_epochs="$3"
+ local max_inter="$4"
+ local cmd="${OUT_DIR}/${name}.cmd.sh"
+ local log="${OUT_DIR}/${name}.log"
+ local pid="${OUT_DIR}/${name}.pid"
+
+ cat > "${cmd}" <<EOF
+#!/usr/bin/env bash
+set -euo pipefail
+cd "${ROOT}"
+export CUDA_VISIBLE_DEVICES="${gpu}"
+export PYTHONUNBUFFERED=1
+exec "${PY}" research/flossing/engelken_python_flossing.py \\
+ --hidden-size 80 \\
+ --n-lyap 40 \\
+ --train-epochs 1000 \\
+ --inter-period 100 \\
+ --inter-epochs 100 \\
+ --batch-size 16 \\
+ --input-dim 1 \\
+ --train-steps 300 \\
+ --lyap-steps 55 \\
+ --floss-input-steps 300 \\
+ --seed-ic 1 \\
+ --seed-input 1 \\
+ --seed-net 1 \\
+ --seed-ons 1 \\
+ --lr 0.001 \\
+ --beta1 0.9 \\
+ --beta2 0.999 \\
+ --init-type 1 \\
+ --recurrent-gain 1.0 \\
+ --recurrent-mean-gain 0.0 \\
+ --input-scale 1.0 \\
+ --delay 10 \\
+ --ws-std 1.0 \\
+ --ws-mean 0.0 \\
+ --wr-std 1.0 \\
+ --wr-mean 0.0 \\
+ --b-std 0.1 \\
+ --b-mean 0.0 \\
+ --task -1 \\
+ --lyap-target 0.0 \\
+ --eval-every 100 \\
+ --eval-batches 4 \\
+ --log-every-floss 25 \\
+ --device cuda \\
+ --pre-epochs "${pre_epochs}" \\
+ --max-inter-episodes "${max_inter}" \\
+ --out "${OUT_DIR}/${name}.json"
+EOF
+ chmod +x "${cmd}"
+ setsid bash "${cmd}" > "${log}" 2>&1 < /dev/null &
+ echo $! > "${pid}"
+ echo "${name}: pid $(cat "${pid}") on GPU ${gpu}"
+}
+
+write_and_launch "${GPU_BASE}" "toy_baseline_no_floss_N80_k40_E1000" 0 0
+write_and_launch "${GPU_PREFLOSS}" "toy_prefloss_N80_k40_E1000" 100 0
+write_and_launch "${GPU_INTER}" "toy_pre_inter_N80_k40_E1000" 100 2
+
+echo "queued toy RNN official-style suite in ${OUT_DIR}"
diff --git a/flossing_suite/launch_trm_faithful_suite.sh b/flossing_suite/launch_trm_faithful_suite.sh
new file mode 100755
index 0000000..2b3190c
--- /dev/null
+++ b/flossing_suite/launch_trm_faithful_suite.sh
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="/home/yurenh2/rrm"
+PY="/home/yurenh2/miniconda3/envs/rrm/bin/python"
+OUT_DIR="${ROOT}/research/flossing/flossing_suite/results/trm_faithful"
+CKPT_ROOT="${ROOT}/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro"
+mkdir -p "${OUT_DIR}"
+
+GPU_BASE="${GPU_BASE:-0}"
+GPU_PREFLOSS="${GPU_PREFLOSS:-1}"
+GPU_INTER="${GPU_INTER:-3}"
+TRAIN_STEPS="${TRAIN_STEPS:-20000}"
+PREFLOSS_STEPS="${PREFLOSS_STEPS:-500}"
+INTERFLOSS_STEPS="${INTERFLOSS_STEPS:-100}"
+INTERFLOSS_EVERY="${INTERFLOSS_EVERY:-2000}"
+INTERFLOSS_START="${INTERFLOSS_START:-2000}"
+INTERFLOSS_STOP="${INTERFLOSS_STOP:-10000}"
+EVAL_N="${EVAL_N:-1000}"
+TASK_BATCH_SIZE="${TASK_BATCH_SIZE:-32}"
+FLOSS_BATCH_SIZE="${FLOSS_BATCH_SIZE:-4}"
+
+write_and_launch() {
+ local gpu="$1"
+ local name="$2"
+ local schedule="$3"
+ local floss_steps="$4"
+ local every="$5"
+ local start="$6"
+ local stop="$7"
+ local cmd="${OUT_DIR}/${name}.cmd.sh"
+ local log="${OUT_DIR}/${name}.log"
+ local pid="${OUT_DIR}/${name}.pid"
+
+ cat > "${cmd}" <<EOF
+#!/usr/bin/env bash
+set -euo pipefail
+cd "${ROOT}"
+export CUDA_VISIBLE_DEVICES="${gpu}"
+export PYTHONUNBUFFERED=1
+exec "${PY}" research/flossing/step7_interfloss.py \\
+ --model trm \\
+ --ckpt-root "${CKPT_ROOT}" \\
+ --ckpt-name __random__ \\
+ --init-seed 123 \\
+ --train-steps "${TRAIN_STEPS}" \\
+ --batch-size "${TASK_BATCH_SIZE}" \\
+ --task-batch-size "${TASK_BATCH_SIZE}" \\
+ --floss-batch-size "${FLOSS_BATCH_SIZE}" \\
+ --train-lr 1e-4 \\
+ --floss-lr 1e-4 \\
+ --floss-mode engelken_l2 \\
+ --lambda-star 0 \\
+ --k-lyap 4 \\
+ --lyap-act-steps 4 \\
+ --seed 42 \\
+ --eval-every 1000 \\
+ --eval-n "${EVAL_N}" \\
+ --eval-batch-size 64 \\
+ --floss-log-every 10 \\
+ --train-puzzle-emb \\
+ --puzzle-emb-lr 1e-4 \\
+ --puzzle-emb-weight-decay 1.0 \\
+ --kl-beta 0 \\
+ --floss-steps "${floss_steps}" \\
+ --interfloss-at "${schedule}" \\
+ --interfloss-every "${every}" \\
+ --interfloss-start "${start}" \\
+ --interfloss-stop "${stop}" \\
+ --out "${OUT_DIR}/${name}.json"
+EOF
+ chmod +x "${cmd}"
+ setsid bash "${cmd}" > "${log}" 2>&1 < /dev/null &
+ echo $! > "${pid}"
+ echo "${name}: pid $(cat "${pid}") on GPU ${gpu}"
+}
+
+write_and_launch "${GPU_BASE}" \
+ "trm_seed123_baseline_nofloss_tb${TASK_BATCH_SIZE}_fb${FLOSS_BATCH_SIZE}_${TRAIN_STEPS}" \
+ "" 0 0 0 -1
+write_and_launch "${GPU_PREFLOSS}" \
+ "trm_seed123_prefloss_0_tb${TASK_BATCH_SIZE}_fb${FLOSS_BATCH_SIZE}_k4_${TRAIN_STEPS}" \
+ "0" "${PREFLOSS_STEPS}" 0 0 -1
+write_and_launch "${GPU_INTER}" \
+ "trm_seed123_pre_inter_periodic${INTERFLOSS_EVERY}_tb${TASK_BATCH_SIZE}_fb${FLOSS_BATCH_SIZE}_k4_${TRAIN_STEPS}" \
+ "0" "${INTERFLOSS_STEPS}" "${INTERFLOSS_EVERY}" "${INTERFLOSS_START}" "${INTERFLOSS_STOP}"
+
+echo "queued TRM Engelken-faithful suite in ${OUT_DIR}"
diff --git a/flossing_suite/launch_trm_variant_suite.sh b/flossing_suite/launch_trm_variant_suite.sh
new file mode 100755
index 0000000..86c2e64
--- /dev/null
+++ b/flossing_suite/launch_trm_variant_suite.sh
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="/home/yurenh2/rrm"
+PY="/home/yurenh2/miniconda3/envs/rrm/bin/python"
+OUT_DIR="${ROOT}/research/flossing/flossing_suite/results/trm_variants"
+CKPT_ROOT="${ROOT}/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro"
+mkdir -p "${OUT_DIR}"
+
+GPU_TOP1="${GPU_TOP1:-0}"
+GPU_VOLUME="${GPU_VOLUME:-1}"
+GPU_KL="${GPU_KL:-3}"
+TRAIN_STEPS="${TRAIN_STEPS:-20000}"
+FLOSS_STEPS="${FLOSS_STEPS:-100}"
+INTERFLOSS_EVERY="${INTERFLOSS_EVERY:-2000}"
+INTERFLOSS_START="${INTERFLOSS_START:-2000}"
+INTERFLOSS_STOP="${INTERFLOSS_STOP:-10000}"
+EVAL_N="${EVAL_N:-1000}"
+TASK_BATCH_SIZE="${TASK_BATCH_SIZE:-32}"
+FLOSS_BATCH_SIZE="${FLOSS_BATCH_SIZE:-4}"
+
+write_and_launch() {
+ local gpu="$1"
+ local name="$2"
+ local floss_mode="$3"
+ local kl_beta="$4"
+ local cmd="${OUT_DIR}/${name}.cmd.sh"
+ local log="${OUT_DIR}/${name}.log"
+ local pid="${OUT_DIR}/${name}.pid"
+
+ cat > "${cmd}" <<EOF
+#!/usr/bin/env bash
+set -euo pipefail
+cd "${ROOT}"
+export CUDA_VISIBLE_DEVICES="${gpu}"
+export PYTHONUNBUFFERED=1
+exec "${PY}" research/flossing/step7_interfloss.py \\
+ --model trm \\
+ --ckpt-root "${CKPT_ROOT}" \\
+ --ckpt-name __random__ \\
+ --init-seed 123 \\
+ --train-steps "${TRAIN_STEPS}" \\
+ --batch-size "${TASK_BATCH_SIZE}" \\
+ --task-batch-size "${TASK_BATCH_SIZE}" \\
+ --floss-batch-size "${FLOSS_BATCH_SIZE}" \\
+ --train-lr 1e-4 \\
+ --floss-lr 1e-4 \\
+ --floss-mode "${floss_mode}" \\
+ --lambda-star 0 \\
+ --k-lyap 4 \\
+ --lyap-act-steps 4 \\
+ --seed 42 \\
+ --eval-every 1000 \\
+ --eval-n "${EVAL_N}" \\
+ --eval-batch-size 64 \\
+ --floss-log-every 10 \\
+ --train-puzzle-emb \\
+ --puzzle-emb-lr 1e-4 \\
+ --puzzle-emb-weight-decay 1.0 \\
+ --floss-steps "${FLOSS_STEPS}" \\
+ --interfloss-at "0" \\
+ --interfloss-every "${INTERFLOSS_EVERY}" \\
+ --interfloss-start "${INTERFLOSS_START}" \\
+ --interfloss-stop "${INTERFLOSS_STOP}" \\
+ --kl-beta "${kl_beta}" \\
+ --kl-replay-size 64 \\
+ --kl-batch-size 8 \\
+ --kl-temperature 1.0 \\
+ --out "${OUT_DIR}/${name}.json"
+EOF
+ chmod +x "${cmd}"
+ setsid bash "${cmd}" > "${log}" 2>&1 < /dev/null &
+ echo $! > "${pid}"
+ echo "${name}: pid $(cat "${pid}") on GPU ${gpu}"
+}
+
+write_and_launch "${GPU_TOP1}" \
+ "trm_seed123_top1_cf_periodic${INTERFLOSS_EVERY}_tb${TASK_BATCH_SIZE}_fb${FLOSS_BATCH_SIZE}_k4_${TRAIN_STEPS}" \
+ top1_cf 0
+write_and_launch "${GPU_VOLUME}" \
+ "trm_seed123_volume_cf_periodic${INTERFLOSS_EVERY}_tb${TASK_BATCH_SIZE}_fb${FLOSS_BATCH_SIZE}_k4_${TRAIN_STEPS}" \
+ volume_cf 0
+write_and_launch "${GPU_KL}" \
+ "trm_seed123_volume_cf_kl10_periodic${INTERFLOSS_EVERY}_tb${TASK_BATCH_SIZE}_fb${FLOSS_BATCH_SIZE}_k4_${TRAIN_STEPS}" \
+ volume_cf 10
+
+echo "queued TRM CF/volume variant suite in ${OUT_DIR}"
diff --git a/flossing_suite/results/summary/flossing_eval_curves.csv b/flossing_suite/results/summary/flossing_eval_curves.csv
new file mode 100644
index 0000000..50aad05
--- /dev/null
+++ b/flossing_suite/results/summary/flossing_eval_curves.csv
@@ -0,0 +1,399 @@
+name,family,model,selector,x,acc,loss,kind
+engelken_paper_faithful/smoke,rrm_step7,trm,engelken_l2@0_steps1,0,0.0,,initial
+engelken_paper_faithful/smoke,rrm_step7,trm,engelken_l2@0_steps1,0,0.0,,after_floss
+engelken_paper_faithful/smoke,rrm_step7,trm,engelken_l2@0_steps1,1,0.0,,task
+engelken_paper_faithful/smoke,rrm_step7,trm,engelken_l2@0_steps1,1,0.0,,final
+engelken_python/debug_n80_k40_3epoch,toy_rnn,vanilla_rnn,baseline_no_floss,1,0.5124555160142349,2.1269004344940186,eval
+engelken_python/debug_n80_k40_3epoch,toy_rnn,vanilla_rnn,baseline_no_floss,2,0.5113434163701067,1.894472599029541,eval
+engelken_python/debug_n80_k40_3epoch,toy_rnn,vanilla_rnn,baseline_no_floss,3,0.4853202846975089,1.9043740034103394,eval
+engelken_python/gpu_smoke,toy_rnn,vanilla_rnn,prefloss,1,0.5151515151515151,0.8679313659667969,eval
+engelken_python/gpu_smoke,toy_rnn,vanilla_rnn,prefloss,2,0.49242424242424243,0.9058582782745361,eval
+engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,1,0.5063389679715302,2.151065170764923,eval
+engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,100,0.49888790035587194,0.6981811821460724,eval
+engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,200,0.5025578291814947,0.7067497670650482,eval
+engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,300,0.4968861209964413,0.7166876345872879,eval
+engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,400,0.5023354092526691,0.702445313334465,eval
+engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,500,0.49844306049822057,0.6947141587734222,eval
+engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,600,0.49649688612099646,0.6974123269319534,eval
+engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,700,0.49827624555160144,0.7033253163099289,eval
+engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,800,0.5050600533807829,0.6959485709667206,eval
+engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,900,0.5216303380782918,0.6901399791240692,eval
+engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,1000,0.7770796263345195,0.47778721898794174,eval
+engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,1,0.5019461743772242,1.6856758892536163,eval
+engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,100,0.9623554270462634,0.11776464246213436,eval
+engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,200,0.9935498220640568,0.027145093772560358,eval
+engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,300,0.9963300711743772,0.014187443535774946,eval
+engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,400,0.9975533807829181,0.009522247593849897,eval
+engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,500,0.9973865658362989,0.009814425837248564,eval
+engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,600,0.9983874555160143,0.006339075975120068,eval
+engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,700,0.9981094306049823,0.008150239707902074,eval
+engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,800,0.9981650355871887,0.006329314899630845,eval
+engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,900,0.9981650355871886,0.0068275314988568425,eval
+engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,1000,0.9972753558718862,0.01047352165915072,eval
+engelken_python/smoke,toy_rnn,vanilla_rnn,prefloss,1,0.49242424242424243,0.9093148708343506,eval
+engelken_python/smoke,toy_rnn,vanilla_rnn,prefloss,2,0.5151515151515151,0.9205472469329834,eval
+engelken_python/smoke,toy_rnn,vanilla_rnn,prefloss,3,0.5303030303030303,0.8646541237831116,eval
+flossing_suite/results/smoke/toy_smoke,toy_rnn,vanilla_rnn,prefloss,1,0.5476190476190477,0.8243135213851929,eval
+flossing_suite/results/smoke/toy_smoke,toy_rnn,vanilla_rnn,prefloss,2,0.5238095238095238,0.9111667275428772,eval
+flossing_suite/results/smoke/toy_smoke,toy_rnn,vanilla_rnn,prefloss,3,0.6190476190476191,0.7443239688873291,eval
+flossing_suite/results/smoke/trm_step7_decoupled_periodic_smoke,rrm_step7,trm,engelken_l2@0_steps1_every1,0,0.0,,initial
+flossing_suite/results/smoke/trm_step7_decoupled_periodic_smoke,rrm_step7,trm,engelken_l2@0_steps1_every1,1,0.0,,task
+flossing_suite/results/smoke/trm_step7_decoupled_periodic_smoke,rrm_step7,trm,engelken_l2@0_steps1_every1,2,0.0,,task
+flossing_suite/results/smoke/trm_step7_decoupled_periodic_smoke,rrm_step7,trm,engelken_l2@0_steps1_every1,2,0.0,,final
+flossing_suite/results/smoke/trm_step7_smoke,rrm_step7,trm,engelken_l2@0_steps1,0,0.0,,initial
+flossing_suite/results/smoke/trm_step7_smoke,rrm_step7,trm,engelken_l2@0_steps1,1,0.0,,task
+flossing_suite/results/smoke/trm_step7_smoke,rrm_step7,trm,engelken_l2@0_steps1,1,0.0,,final
+flossing_suite/results/smoke/trm_task_batch32_smoke,rrm_step7,trm,baseline_no_floss,0,0.0,,initial
+flossing_suite/results/smoke/trm_task_batch32_smoke,rrm_step7,trm,baseline_no_floss,1,0.0,,task
+flossing_suite/results/smoke/trm_task_batch32_smoke,rrm_step7,trm,baseline_no_floss,1,0.0,,final
+flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,1,0.5063389679715302,2.151065170764923,eval
+flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,100,0.49888790035587194,0.6981811821460724,eval
+flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,200,0.5025578291814947,0.7067497670650482,eval
+flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,300,0.4968861209964413,0.7166876345872879,eval
+flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,400,0.5023354092526691,0.702445313334465,eval
+flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,500,0.49844306049822057,0.6947141587734222,eval
+flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,600,0.49649688612099646,0.6974123269319534,eval
+flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,700,0.49827624555160144,0.7033253163099289,eval
+flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,800,0.5050600533807829,0.6959485709667206,eval
+flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,900,0.5216303380782918,0.6901399791240692,eval
+flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,1000,0.7770796263345195,0.47778721898794174,eval
+flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,1,0.5019461743772242,1.6856758892536163,eval
+flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,100,0.564279359430605,1.2137768864631653,eval
+flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,200,0.5421485765124555,1.4198356568813324,eval
+flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,300,0.9971085409252668,0.013412912143394351,eval
+flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,400,0.9976089857651246,0.009532386669889092,eval
+flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,500,0.9974977758007118,0.00779245572630316,eval
+flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,600,0.9976645907473309,0.007897141389548779,eval
+flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,700,0.9977201957295374,0.00889350671786815,eval
+flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,800,0.9978314056939501,0.006675782264210284,eval
+flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,900,0.9979982206405694,0.00699911720585078,eval
+flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,1000,0.9978314056939501,0.008064003428444266,eval
+flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,1,0.5019461743772242,1.6856758892536163,eval
+flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,100,0.9623554270462634,0.11776464246213436,eval
+flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,200,0.9935498220640568,0.027145093772560358,eval
+flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,300,0.9963300711743772,0.014187443535774946,eval
+flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,400,0.9975533807829181,0.009522247593849897,eval
+flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,500,0.9973865658362989,0.009814425837248564,eval
+flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,600,0.9983874555160143,0.006339075975120068,eval
+flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,700,0.9981094306049823,0.008150239707902074,eval
+flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,800,0.9981650355871887,0.006329314899630845,eval
+flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,900,0.9981650355871886,0.0068275314988568425,eval
+flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,1000,0.9972753558718862,0.01047352165915072,eval
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000,rrm_step7,trm,baseline_no_floss,0,0.0,,initial
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000,rrm_step7,trm,baseline_no_floss,1000,0.0,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000,rrm_step7,trm,baseline_no_floss,2000,0.0,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000,rrm_step7,trm,baseline_no_floss,3000,0.0,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000,rrm_step7,trm,baseline_no_floss,4000,0.0,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000,rrm_step7,trm,baseline_no_floss,5000,0.001,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000,rrm_step7,trm,baseline_no_floss,6000,0.002,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,0,0.0,,initial
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,1000,0.0,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,2000,0.0,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,3000,0.022,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,4000,0.027,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,5000,0.028,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,6000,0.029,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,7000,0.066,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,8000,0.077,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,9000,0.179,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,10000,0.136,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,11000,0.164,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,12000,0.184,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,13000,0.176,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,14000,0.153,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,15000,0.194,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,16000,0.185,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,17000,0.127,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,18000,0.201,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,19000,0.144,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,20000,0.204,,task
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,20000,0.204,,final
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b4_k4_10000,rrm_step7,trm,"engelken_l2@0,500_steps500",0,0.0,,initial
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b4_k4_10000,rrm_step7,trm,"engelken_l2@0,500_steps500",0,0.0,,after_floss
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,0,0.0,,initial
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,0,0.0,,after_floss
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,1000,0.0,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,2000,0.001,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,2000,0.0,,after_floss
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,3000,0.022,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,4000,0.027,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,4000,0.0,,after_floss
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,5000,0.027,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,6000,0.061,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,6000,0.0,,after_floss
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,7000,0.11,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,8000,0.066,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,8000,0.0,,after_floss
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,9000,0.097,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,10000,0.169,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,10000,0.0,,after_floss
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,11000,0.192,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,12000,0.128,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,13000,0.181,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,14000,0.179,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,15000,0.195,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,16000,0.179,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,17000,0.191,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,18000,0.167,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,19000,0.192,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,20000,0.207,,task
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,20000,0.207,,final
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000,rrm_step7,trm,engelken_l2@0_steps500,0,0.0,,initial
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000,rrm_step7,trm,engelken_l2@0_steps500,0,0.0,,after_floss
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000,rrm_step7,trm,engelken_l2@0_steps500,1000,0.0,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000,rrm_step7,trm,engelken_l2@0_steps500,2000,0.0,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000,rrm_step7,trm,engelken_l2@0_steps500,3000,0.0,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,0,0.0,,initial
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,0,0.0,,after_floss
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,1000,0.0,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,2000,0.0,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,3000,0.023,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,4000,0.026,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,5000,0.029,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,6000,0.067,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,7000,0.11,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,8000,0.125,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,9000,0.157,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,10000,0.153,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,11000,0.15,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,12000,0.184,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,13000,0.208,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,14000,0.126,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,15000,0.21,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,16000,0.166,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,17000,0.105,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,18000,0.184,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,19000,0.172,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,20000,0.21,,task
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,20000,0.21,,final
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,100,0.3125,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,200,0.3359375,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,300,0.3203125,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,400,0.33203125,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,500,0.318359375,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,600,0.359375,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,700,0.33984375,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,800,0.361328125,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,900,0.373046875,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1000,0.359375,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1100,0.349609375,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1200,0.36328125,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1300,0.36328125,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1400,0.37890625,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1500,0.404296875,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1600,0.388671875,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1700,0.380859375,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1800,0.384765625,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1900,0.380859375,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2000,0.400390625,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2100,0.4140625,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2200,0.376953125,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2300,0.400390625,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2400,0.392578125,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2500,0.3828125,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2600,0.3984375,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2700,0.384765625,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2800,0.423828125,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2900,0.419921875,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,3000,0.392578125,,phase2
+step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,3000,0.392578125,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,100,0.0,,phase1
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,200,0.0,,phase1
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,300,0.0,,phase1
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,400,0.0,,phase1
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,500,0.0,,phase1
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,500,0.0,,phase1
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,600,0.0,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,700,0.0,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,800,0.0,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,900,0.0,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1000,0.0,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1100,0.015625,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1200,0.076171875,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1300,0.125,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1400,0.1640625,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1500,0.16796875,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1600,0.2109375,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1700,0.2109375,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1800,0.212890625,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1900,0.2265625,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2000,0.259765625,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2100,0.255859375,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2200,0.296875,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2300,0.2734375,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2400,0.2734375,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2500,0.2890625,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2600,0.318359375,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2700,0.326171875,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2800,0.291015625,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2900,0.330078125,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,3000,0.32421875,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,3100,0.359375,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,3200,0.326171875,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,3300,0.357421875,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,3400,0.34375,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,3500,0.310546875,,phase2
+step6_B_engelken,hrm_step6,hrm,prefloss_engelken,3500,0.310546875,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,100,0.0,,phase1
+step6_C_cf,hrm_step6,hrm,prefloss_cf,200,0.0,,phase1
+step6_C_cf,hrm_step6,hrm,prefloss_cf,300,0.0,,phase1
+step6_C_cf,hrm_step6,hrm,prefloss_cf,400,0.0,,phase1
+step6_C_cf,hrm_step6,hrm,prefloss_cf,500,0.0,,phase1
+step6_C_cf,hrm_step6,hrm,prefloss_cf,500,0.0,,phase1
+step6_C_cf,hrm_step6,hrm,prefloss_cf,600,0.0,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,700,0.0,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,800,0.0,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,900,0.064453125,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,1000,0.119140625,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,1100,0.181640625,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,1200,0.23828125,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,1300,0.259765625,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,1400,0.291015625,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,1500,0.302734375,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,1600,0.298828125,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,1700,0.3046875,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,1800,0.30078125,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,1900,0.3125,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,2000,0.33203125,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,2100,0.31640625,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,2200,0.3359375,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,2300,0.341796875,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,2400,0.33203125,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,2500,0.353515625,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,2600,0.35546875,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,2700,0.361328125,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,2800,0.35546875,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,2900,0.3828125,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,3000,0.3828125,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,3100,0.361328125,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,3200,0.390625,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,3300,0.3984375,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,3400,0.359375,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,3500,0.380859375,,phase2
+step6_C_cf,hrm_step6,hrm,prefloss_cf,3500,0.380859375,,phase2
+step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",0,0.517578125,,initial
+step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",0,0.0,,after_floss
+step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",500,0.0,,after_floss
+step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",1000,0.1796875,,task
+step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",2000,0.583984375,,task
+step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",3000,0.580078125,,task
+step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",4000,0.58984375,,task
+step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",5000,0.611328125,,task
+step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",6000,0.634765625,,task
+step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",7000,0.63671875,,task
+step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",8000,0.662109375,,task
+step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",9000,0.642578125,,task
+step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",10000,0.646484375,,task
+step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",10000,0.646484375,,final
+step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",0,0.59765625,,initial
+step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",0,0.0,,after_floss
+step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",500,0.0,,after_floss
+step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",1000,0.021484375,,task
+step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",2000,0.041015625,,task
+step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",3000,0.255859375,,task
+step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",4000,0.275390625,,task
+step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",5000,0.453125,,task
+step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",6000,0.466796875,,task
+step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",7000,0.494140625,,task
+step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",8000,0.501953125,,task
+step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",9000,0.521484375,,task
+step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",10000,0.51953125,,task
+step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",10000,0.51953125,,final
+step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",0,0.517578125,,initial
+step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",0,0.017578125,,after_floss
+step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",500,0.021484375,,after_floss
+step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",1000,0.177734375,,task
+step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",2000,0.482421875,,task
+step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",3000,0.54296875,,task
+step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",4000,0.5859375,,task
+step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",5000,0.599609375,,task
+step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",6000,0.595703125,,task
+step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",7000,0.62109375,,task
+step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",8000,0.61328125,,task
+step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",9000,0.625,,task
+step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",10000,0.62109375,,task
+step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",10000,0.62109375,,final
+step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",0,0.59765625,,initial
+step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",0,0.025390625,,after_floss
+step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",500,0.013671875,,after_floss
+step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",1000,0.041015625,,task
+step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",2000,0.322265625,,task
+step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",3000,0.439453125,,task
+step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",4000,0.515625,,task
+step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",5000,0.498046875,,task
+step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",6000,0.57421875,,task
+step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",7000,0.556640625,,task
+step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",8000,0.55859375,,task
+step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",9000,0.5546875,,task
+step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",10000,0.595703125,,task
+step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",10000,0.595703125,,final
+step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",0,0.517578125,,initial
+step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",0,0.017578125,,after_floss
+step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",500,0.017578125,,after_floss
+step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",1000,0.193359375,,task
+step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",2000,0.4453125,,task
+step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",3000,0.564453125,,task
+step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",4000,0.564453125,,task
+step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",5000,0.583984375,,task
+step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",6000,0.61328125,,task
+step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",7000,0.63671875,,task
+step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",8000,0.61328125,,task
+step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",9000,0.615234375,,task
+step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",10000,0.619140625,,task
+step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",10000,0.619140625,,final
+step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",0,0.59765625,,initial
+step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",0,0.046875,,after_floss
+step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",500,0.015625,,after_floss
+step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",1000,0.060546875,,task
+step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",2000,0.18359375,,task
+step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",3000,0.408203125,,task
+step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",4000,0.466796875,,task
+step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",5000,0.47265625,,task
+step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",6000,0.5546875,,task
+step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",7000,0.541015625,,task
+step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",8000,0.58203125,,task
+step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",9000,0.55859375,,task
+step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",10000,0.544921875,,task
+step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",10000,0.544921875,,final
+step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",0,0.517578125,,initial
+step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",0,0.025390625,,after_floss
+step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",500,0.015625,,after_floss
+step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",1000,0.208984375,,task
+step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",2000,0.478515625,,task
+step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",3000,0.521484375,,task
+step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",4000,0.556640625,,task
+step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",5000,0.58203125,,task
+step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",6000,0.6171875,,task
+step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",7000,0.607421875,,task
+step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",8000,0.62109375,,task
+step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",9000,0.58984375,,task
+step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",10000,0.607421875,,task
+step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",10000,0.607421875,,final
+step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",0,0.59765625,,initial
+step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",0,0.078125,,after_floss
+step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",500,0.01953125,,after_floss
+step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",1000,0.099609375,,task
+step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",2000,0.322265625,,task
+step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",3000,0.41015625,,task
+step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",4000,0.44921875,,task
+step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",5000,0.498046875,,task
+step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",6000,0.537109375,,task
+step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",7000,0.5625,,task
+step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",8000,0.548828125,,task
+step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",9000,0.556640625,,task
+step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",10000,0.55078125,,task
+step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",10000,0.55078125,,final
+step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",0,0.517578125,,initial
+step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",0,0.0,,after_floss
+step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",500,0.017578125,,after_floss
+step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",1000,0.642578125,,task
+step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",2000,0.63671875,,task
+step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",3000,0.65625,,task
+step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",4000,0.66015625,,task
+step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",5000,0.64453125,,task
+step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",6000,0.638671875,,task
+step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",7000,0.677734375,,task
+step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",8000,0.65234375,,task
+step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",9000,0.658203125,,task
+step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",10000,0.63671875,,task
+step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",10000,0.63671875,,final
diff --git a/flossing_suite/results/summary/flossing_runs_summary.csv b/flossing_suite/results/summary/flossing_runs_summary.csv
new file mode 100644
index 0000000..3adb736
--- /dev/null
+++ b/flossing_suite/results/summary/flossing_runs_summary.csv
@@ -0,0 +1,32 @@
+name,path,family,model,selector,floss_mode,schedule,train_steps,floss_steps,k_lyap,kl_beta,initial_acc,final_acc,best_acc,delta_final,n_evals,n_floss_episodes,status,task_batch_size,floss_batch_size,effective_official_gbs768_steps
+step6_A_baseline_no_prefloss,/home/yurenh2/rrm/research/flossing/step6_A_baseline_no_prefloss.json,hrm_step6,hrm,baseline_no_prefloss,volume_cf,pre=0,3000,0,8,0,0.3046875,0.392578125,0.423828125,0.087890625,31,0,complete,,,
+step6_B_engelken,/home/yurenh2/rrm/research/flossing/step6_B_engelken.json,hrm_step6,hrm,prefloss_engelken,engelken,pre=500,3000,500,2,0,0.3046875,0.310546875,0.359375,0.005859375,37,1,complete,,,
+step6_C_cf,/home/yurenh2/rrm/research/flossing/step6_C_cf.json,hrm_step6,hrm,prefloss_cf,cf,pre=500,3000,500,2,0,0.3046875,0.380859375,0.3984375,0.076171875,37,1,complete,,,
+step7_A_hrm_engelken_interfloss_26040_k8_10k,/home/yurenh2/rrm/research/flossing/step7_A_hrm_engelken_interfloss_26040_k8_10k.json,rrm_step7,hrm,"engelken_l2@0,500_steps500",engelken_l2,"0,500",10000,500,8,0,0.517578125,0.646484375,0.662109375,0.12890625,14,2,complete,8,8,104.16666666666667
+step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,/home/yurenh2/rrm/research/flossing/step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k.json,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",engelken_l2,"0,500",10000,500,8,10.0,0.517578125,0.62109375,0.625,0.103515625,14,2,complete,8,8,104.16666666666667
+step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,/home/yurenh2/rrm/research/flossing/step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k.json,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",engelken_l2,"0,500",10000,500,8,10.0,0.517578125,0.619140625,0.63671875,0.1015625,14,2,complete,8,8,104.16666666666667
+step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,/home/yurenh2/rrm/research/flossing/step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k.json,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",volume_cf,"0,500",10000,500,8,10.0,0.517578125,0.607421875,0.62109375,0.08984375,14,2,complete,8,8,104.16666666666667
+step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,/home/yurenh2/rrm/research/flossing/step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k.json,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",engelken_l2,"0,500",10000,100,8,100.0,0.517578125,0.63671875,0.677734375,0.119140625,14,2,complete,8,8,104.16666666666667
+engelken_paper_faithful/smoke,/home/yurenh2/rrm/research/flossing/engelken_paper_faithful/smoke.json,rrm_step7,trm,engelken_l2@0_steps1,engelken_l2,0,1,1,1,0.0,0.0,0.0,0.0,0.0,4,1,complete,2,2,0.0026041666666666665
+flossing_suite/results/smoke/trm_step7_decoupled_periodic_smoke,/home/yurenh2/rrm/research/flossing/flossing_suite/results/smoke/trm_step7_decoupled_periodic_smoke.json,rrm_step7,trm,engelken_l2@0_steps1_every1,engelken_l2,"0; every=1, start=1, stop=1",2,1,1,0.0,0.0,0.0,0.0,0.0,4,2,complete,8,2,0.020833333333333332
+flossing_suite/results/smoke/trm_step7_smoke,/home/yurenh2/rrm/research/flossing/flossing_suite/results/smoke/trm_step7_smoke.json,rrm_step7,trm,engelken_l2@0_steps1,engelken_l2,0,1,1,1,0.0,0.0,0.0,0.0,0.0,3,1,complete,2,2,0.0026041666666666665
+flossing_suite/results/smoke/trm_task_batch32_smoke,/home/yurenh2/rrm/research/flossing/flossing_suite/results/smoke/trm_task_batch32_smoke.json,rrm_step7,trm,baseline_no_floss,engelken_l2,,1,0,4,0.0,0.0,0.0,0.0,0.0,3,0,complete,32,4,0.041666666666666664
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000,/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000.json,rrm_step7,trm,baseline_no_floss,engelken_l2,,10000,0,4,0.0,0.0,0.002,0.002,0.002,7,0,running_or_incomplete,4,4,52.083333333333336
+flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000.json,rrm_step7,trm,baseline_no_floss,engelken_l2,,20000,0,4,0.0,0.0,0.204,0.204,0.204,22,0,complete,32,4,833.3333333333334
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b4_k4_10000,/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b4_k4_10000.json,rrm_step7,trm,"engelken_l2@0,500_steps500",engelken_l2,"0,500",10000,500,4,0.0,0.0,0.0,0.0,0.0,2,1,running_or_incomplete,4,4,52.083333333333336
+flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000.json,rrm_step7,trm,engelken_l2@0_steps100_every2000,engelken_l2,"0; every=2000, start=2000, stop=10000",20000,100,4,0.0,0.0,0.207,0.207,0.207,28,6,complete,32,4,833.3333333333334
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000,/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000.json,rrm_step7,trm,engelken_l2@0_steps500,engelken_l2,0,10000,500,4,0.0,0.0,0.0,0.0,0.0,5,1,running_or_incomplete,4,4,52.083333333333336
+flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000.json,rrm_step7,trm,engelken_l2@0_steps500,engelken_l2,0,20000,500,4,0.0,0.0,0.21,0.21,0.21,23,1,complete,32,4,833.3333333333334
+step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,/home/yurenh2/rrm/research/flossing/step7_B_trm_engelken_interfloss_26041_k4_batch4_10k.json,rrm_step7,trm,"engelken_l2@0,500_steps500",engelken_l2,"0,500",10000,500,4,0,0.59765625,0.51953125,0.59765625,-0.078125,14,2,complete,4,4,52.083333333333336
+step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,/home/yurenh2/rrm/research/flossing/step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k.json,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",engelken_l2,"0,500",10000,500,4,10.0,0.59765625,0.595703125,0.59765625,-0.001953125,14,2,complete,4,4,52.083333333333336
+step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,/home/yurenh2/rrm/research/flossing/step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k.json,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",engelken_l2,"0,500",10000,500,4,10.0,0.59765625,0.544921875,0.59765625,-0.052734375,14,2,complete,4,4,52.083333333333336
+step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,/home/yurenh2/rrm/research/flossing/step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k.json,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",volume_cf,"0,500",10000,500,4,10.0,0.59765625,0.55078125,0.59765625,-0.046875,14,2,complete,4,4,52.083333333333336
+engelken_python/debug_n80_k40_3epoch,/home/yurenh2/rrm/research/flossing/engelken_python/debug_n80_k40_3epoch.json,toy_rnn,vanilla_rnn,baseline_no_floss,engelken_l2,"pre=0,inter_period=100,max_inter=0",3,0,40,0,0.5124555160142349,0.4853202846975089,0.5124555160142349,-0.027135231316726016,3,0,complete,,,
+engelken_python/gpu_smoke,/home/yurenh2/rrm/research/flossing/engelken_python/gpu_smoke.json,toy_rnn,vanilla_rnn,prefloss,engelken_l2,"pre=1,inter_period=100,max_inter=0",2,1,4,0,0.5151515151515151,0.49242424242424243,0.5151515151515151,-0.022727272727272707,2,1,complete,,,
+engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,/home/yurenh2/rrm/research/flossing/engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid.json,toy_rnn,vanilla_rnn,baseline_no_floss,engelken_l2,"pre=0,inter_period=100,max_inter=0",1000,0,40,0,0.5063389679715302,0.7770796263345195,0.7770796263345195,0.27074065836298933,11,0,complete,,,
+engelken_python/official_py_prefloss_N80_k40_E1000_setsid,/home/yurenh2/rrm/research/flossing/engelken_python/official_py_prefloss_N80_k40_E1000_setsid.json,toy_rnn,vanilla_rnn,prefloss,engelken_l2,"pre=100,inter_period=100,max_inter=0",1000,100,40,0,0.5019461743772242,0.9972753558718862,0.9983874555160143,0.495329181494662,11,1,complete,,,
+engelken_python/smoke,/home/yurenh2/rrm/research/flossing/engelken_python/smoke.json,toy_rnn,vanilla_rnn,prefloss,engelken_l2,"pre=2,inter_period=100,max_inter=0",3,2,4,0,0.49242424242424243,0.5303030303030303,0.5303030303030303,0.037878787878787845,3,1,complete,,,
+flossing_suite/results/smoke/toy_smoke,/home/yurenh2/rrm/research/flossing/flossing_suite/results/smoke/toy_smoke.json,toy_rnn,vanilla_rnn,prefloss,engelken_l2,"pre=2,inter_period=100,max_inter=0",3,2,4,0,0.5476190476190477,0.6190476190476191,0.6190476190476191,0.0714285714285714,3,1,complete,,,
+flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,/home/yurenh2/rrm/research/flossing/flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000.json,toy_rnn,vanilla_rnn,baseline_no_floss,engelken_l2,"pre=0,inter_period=100,max_inter=0",1000,0,40,0,0.5063389679715302,0.7770796263345195,0.7770796263345195,0.27074065836298933,11,0,complete,,,
+flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,/home/yurenh2/rrm/research/flossing/flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000.json,toy_rnn,vanilla_rnn,pre_interfloss,engelken_l2,"pre=100,inter_period=100,max_inter=2",1000,300,40,0,0.5019461743772242,0.9978314056939501,0.9979982206405694,0.4958852313167259,11,3,complete,,,
+flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,/home/yurenh2/rrm/research/flossing/flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000.json,toy_rnn,vanilla_rnn,prefloss,engelken_l2,"pre=100,inter_period=100,max_inter=0",1000,100,40,0,0.5019461743772242,0.9972753558718862,0.9983874555160143,0.495329181494662,11,1,complete,,,
diff --git a/flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000.cmd.sh b/flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000.cmd.sh
new file mode 100755
index 0000000..d7d9526
--- /dev/null
+++ b/flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000.cmd.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "/home/yurenh2/rrm"
+export CUDA_VISIBLE_DEVICES="0"
+export PYTHONUNBUFFERED=1
+exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/engelken_python_flossing.py \
+ --hidden-size 80 \
+ --n-lyap 40 \
+ --train-epochs 1000 \
+ --inter-period 100 \
+ --inter-epochs 100 \
+ --batch-size 16 \
+ --input-dim 1 \
+ --train-steps 300 \
+ --lyap-steps 55 \
+ --floss-input-steps 300 \
+ --seed-ic 1 \
+ --seed-input 1 \
+ --seed-net 1 \
+ --seed-ons 1 \
+ --lr 0.001 \
+ --beta1 0.9 \
+ --beta2 0.999 \
+ --init-type 1 \
+ --recurrent-gain 1.0 \
+ --recurrent-mean-gain 0.0 \
+ --input-scale 1.0 \
+ --delay 10 \
+ --ws-std 1.0 \
+ --ws-mean 0.0 \
+ --wr-std 1.0 \
+ --wr-mean 0.0 \
+ --b-std 0.1 \
+ --b-mean 0.0 \
+ --task -1 \
+ --lyap-target 0.0 \
+ --eval-every 100 \
+ --eval-batches 4 \
+ --log-every-floss 25 \
+ --device cuda \
+ --pre-epochs "0" \
+ --max-inter-episodes "0" \
+ --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000.json"
diff --git a/flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000.cmd.sh b/flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000.cmd.sh
new file mode 100755
index 0000000..13383f9
--- /dev/null
+++ b/flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000.cmd.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "/home/yurenh2/rrm"
+export CUDA_VISIBLE_DEVICES="3"
+export PYTHONUNBUFFERED=1
+exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/engelken_python_flossing.py \
+ --hidden-size 80 \
+ --n-lyap 40 \
+ --train-epochs 1000 \
+ --inter-period 100 \
+ --inter-epochs 100 \
+ --batch-size 16 \
+ --input-dim 1 \
+ --train-steps 300 \
+ --lyap-steps 55 \
+ --floss-input-steps 300 \
+ --seed-ic 1 \
+ --seed-input 1 \
+ --seed-net 1 \
+ --seed-ons 1 \
+ --lr 0.001 \
+ --beta1 0.9 \
+ --beta2 0.999 \
+ --init-type 1 \
+ --recurrent-gain 1.0 \
+ --recurrent-mean-gain 0.0 \
+ --input-scale 1.0 \
+ --delay 10 \
+ --ws-std 1.0 \
+ --ws-mean 0.0 \
+ --wr-std 1.0 \
+ --wr-mean 0.0 \
+ --b-std 0.1 \
+ --b-mean 0.0 \
+ --task -1 \
+ --lyap-target 0.0 \
+ --eval-every 100 \
+ --eval-batches 4 \
+ --log-every-floss 25 \
+ --device cuda \
+ --pre-epochs "100" \
+ --max-inter-episodes "2" \
+ --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000.json"
diff --git a/flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000.cmd.sh b/flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000.cmd.sh
new file mode 100755
index 0000000..db82a74
--- /dev/null
+++ b/flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000.cmd.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "/home/yurenh2/rrm"
+export CUDA_VISIBLE_DEVICES="1"
+export PYTHONUNBUFFERED=1
+exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/engelken_python_flossing.py \
+ --hidden-size 80 \
+ --n-lyap 40 \
+ --train-epochs 1000 \
+ --inter-period 100 \
+ --inter-epochs 100 \
+ --batch-size 16 \
+ --input-dim 1 \
+ --train-steps 300 \
+ --lyap-steps 55 \
+ --floss-input-steps 300 \
+ --seed-ic 1 \
+ --seed-input 1 \
+ --seed-net 1 \
+ --seed-ons 1 \
+ --lr 0.001 \
+ --beta1 0.9 \
+ --beta2 0.999 \
+ --init-type 1 \
+ --recurrent-gain 1.0 \
+ --recurrent-mean-gain 0.0 \
+ --input-scale 1.0 \
+ --delay 10 \
+ --ws-std 1.0 \
+ --ws-mean 0.0 \
+ --wr-std 1.0 \
+ --wr-mean 0.0 \
+ --b-std 0.1 \
+ --b-mean 0.0 \
+ --task -1 \
+ --lyap-target 0.0 \
+ --eval-every 100 \
+ --eval-batches 4 \
+ --log-every-floss 25 \
+ --device cuda \
+ --pre-epochs "100" \
+ --max-inter-episodes "0" \
+ --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000.json"
diff --git a/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000.cmd.sh b/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000.cmd.sh
new file mode 100755
index 0000000..db9568c
--- /dev/null
+++ b/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000.cmd.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "/home/yurenh2/rrm"
+export CUDA_VISIBLE_DEVICES="0"
+export PYTHONUNBUFFERED=1
+exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/step7_interfloss.py \
+ --model trm \
+ --ckpt-root "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \
+ --ckpt-name __random__ \
+ --init-seed 123 \
+ --train-steps "10000" \
+ --batch-size "4" \
+ --train-lr 1e-4 \
+ --floss-lr 1e-4 \
+ --floss-mode engelken_l2 \
+ --lambda-star 0 \
+ --k-lyap 4 \
+ --lyap-act-steps 4 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n "1000" \
+ --eval-batch-size 64 \
+ --floss-log-every 10 \
+ --train-puzzle-emb \
+ --puzzle-emb-lr 1e-4 \
+ --puzzle-emb-weight-decay 1.0 \
+ --kl-beta 0 \
+ --floss-steps "0" \
+ --interfloss-at "" \
+ --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000.json"
diff --git a/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000.cmd.sh b/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000.cmd.sh
new file mode 100755
index 0000000..52a9fb4
--- /dev/null
+++ b/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000.cmd.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "/home/yurenh2/rrm"
+export CUDA_VISIBLE_DEVICES="0"
+export PYTHONUNBUFFERED=1
+exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/step7_interfloss.py \
+ --model trm \
+ --ckpt-root "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \
+ --ckpt-name __random__ \
+ --init-seed 123 \
+ --train-steps "20000" \
+ --batch-size "32" \
+ --task-batch-size "32" \
+ --floss-batch-size "4" \
+ --train-lr 1e-4 \
+ --floss-lr 1e-4 \
+ --floss-mode engelken_l2 \
+ --lambda-star 0 \
+ --k-lyap 4 \
+ --lyap-act-steps 4 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n "1000" \
+ --eval-batch-size 64 \
+ --floss-log-every 10 \
+ --train-puzzle-emb \
+ --puzzle-emb-lr 1e-4 \
+ --puzzle-emb-weight-decay 1.0 \
+ --kl-beta 0 \
+ --floss-steps "0" \
+ --interfloss-at "" \
+ --interfloss-every "0" \
+ --interfloss-start "0" \
+ --interfloss-stop "-1" \
+ --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000.json"
diff --git a/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b4_k4_10000.cmd.sh b/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b4_k4_10000.cmd.sh
new file mode 100755
index 0000000..5c3ba64
--- /dev/null
+++ b/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b4_k4_10000.cmd.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "/home/yurenh2/rrm"
+export CUDA_VISIBLE_DEVICES="3"
+export PYTHONUNBUFFERED=1
+exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/step7_interfloss.py \
+ --model trm \
+ --ckpt-root "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \
+ --ckpt-name __random__ \
+ --init-seed 123 \
+ --train-steps "10000" \
+ --batch-size "4" \
+ --train-lr 1e-4 \
+ --floss-lr 1e-4 \
+ --floss-mode engelken_l2 \
+ --lambda-star 0 \
+ --k-lyap 4 \
+ --lyap-act-steps 4 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n "1000" \
+ --eval-batch-size 64 \
+ --floss-log-every 10 \
+ --train-puzzle-emb \
+ --puzzle-emb-lr 1e-4 \
+ --puzzle-emb-weight-decay 1.0 \
+ --kl-beta 0 \
+ --floss-steps "500" \
+ --interfloss-at "0,500" \
+ --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b4_k4_10000.json"
diff --git a/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000.cmd.sh b/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000.cmd.sh
new file mode 100755
index 0000000..482c147
--- /dev/null
+++ b/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000.cmd.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "/home/yurenh2/rrm"
+export CUDA_VISIBLE_DEVICES="3"
+export PYTHONUNBUFFERED=1
+exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/step7_interfloss.py \
+ --model trm \
+ --ckpt-root "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \
+ --ckpt-name __random__ \
+ --init-seed 123 \
+ --train-steps "20000" \
+ --batch-size "32" \
+ --task-batch-size "32" \
+ --floss-batch-size "4" \
+ --train-lr 1e-4 \
+ --floss-lr 1e-4 \
+ --floss-mode engelken_l2 \
+ --lambda-star 0 \
+ --k-lyap 4 \
+ --lyap-act-steps 4 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n "1000" \
+ --eval-batch-size 64 \
+ --floss-log-every 10 \
+ --train-puzzle-emb \
+ --puzzle-emb-lr 1e-4 \
+ --puzzle-emb-weight-decay 1.0 \
+ --kl-beta 0 \
+ --floss-steps "100" \
+ --interfloss-at "0" \
+ --interfloss-every "2000" \
+ --interfloss-start "2000" \
+ --interfloss-stop "10000" \
+ --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000.json"
diff --git a/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000.cmd.sh b/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000.cmd.sh
new file mode 100755
index 0000000..7f364d6
--- /dev/null
+++ b/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000.cmd.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "/home/yurenh2/rrm"
+export CUDA_VISIBLE_DEVICES="1"
+export PYTHONUNBUFFERED=1
+exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/step7_interfloss.py \
+ --model trm \
+ --ckpt-root "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \
+ --ckpt-name __random__ \
+ --init-seed 123 \
+ --train-steps "10000" \
+ --batch-size "4" \
+ --train-lr 1e-4 \
+ --floss-lr 1e-4 \
+ --floss-mode engelken_l2 \
+ --lambda-star 0 \
+ --k-lyap 4 \
+ --lyap-act-steps 4 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n "1000" \
+ --eval-batch-size 64 \
+ --floss-log-every 10 \
+ --train-puzzle-emb \
+ --puzzle-emb-lr 1e-4 \
+ --puzzle-emb-weight-decay 1.0 \
+ --kl-beta 0 \
+ --floss-steps "500" \
+ --interfloss-at "0" \
+ --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000.json"
diff --git a/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000.cmd.sh b/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000.cmd.sh
new file mode 100755
index 0000000..0e903bd
--- /dev/null
+++ b/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000.cmd.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "/home/yurenh2/rrm"
+export CUDA_VISIBLE_DEVICES="1"
+export PYTHONUNBUFFERED=1
+exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/step7_interfloss.py \
+ --model trm \
+ --ckpt-root "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \
+ --ckpt-name __random__ \
+ --init-seed 123 \
+ --train-steps "20000" \
+ --batch-size "32" \
+ --task-batch-size "32" \
+ --floss-batch-size "4" \
+ --train-lr 1e-4 \
+ --floss-lr 1e-4 \
+ --floss-mode engelken_l2 \
+ --lambda-star 0 \
+ --k-lyap 4 \
+ --lyap-act-steps 4 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n "1000" \
+ --eval-batch-size 64 \
+ --floss-log-every 10 \
+ --train-puzzle-emb \
+ --puzzle-emb-lr 1e-4 \
+ --puzzle-emb-weight-decay 1.0 \
+ --kl-beta 0 \
+ --floss-steps "500" \
+ --interfloss-at "0" \
+ --interfloss-every "0" \
+ --interfloss-start "0" \
+ --interfloss-stop "-1" \
+ --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000.json"
diff --git a/flossing_suite/smoke_test.sh b/flossing_suite/smoke_test.sh
new file mode 100755
index 0000000..8b766b5
--- /dev/null
+++ b/flossing_suite/smoke_test.sh
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+GPU="${1:-0}"
+ROOT="/home/yurenh2/rrm"
+PY="/home/yurenh2/miniconda3/envs/rrm/bin/python"
+OUT_DIR="${ROOT}/research/flossing/flossing_suite/results/smoke"
+mkdir -p "${OUT_DIR}"
+cd "${ROOT}"
+
+echo "[smoke] toy RNN faithful port"
+CUDA_VISIBLE_DEVICES="${GPU}" PYTHONUNBUFFERED=1 "${PY}" research/flossing/engelken_python_flossing.py \
+ --hidden-size 16 \
+ --n-lyap 4 \
+ --train-epochs 3 \
+ --pre-epochs 2 \
+ --inter-epochs 0 \
+ --max-inter-episodes 0 \
+ --batch-size 4 \
+ --train-steps 40 \
+ --lyap-steps 30 \
+ --floss-input-steps 40 \
+ --eval-every 1 \
+ --eval-batches 1 \
+ --device cuda \
+ --out "${OUT_DIR}/toy_smoke.json" \
+ > "${OUT_DIR}/toy_smoke.log" 2>&1
+
+echo "[smoke] TRM interfloss analogue"
+CUDA_VISIBLE_DEVICES="${GPU}" PYTHONUNBUFFERED=1 "${PY}" research/flossing/step7_interfloss.py \
+ --model trm \
+ --ckpt-root "${ROOT}/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \
+ --ckpt-name __random__ \
+ --init-seed 123 \
+ --train-steps 1 \
+ --batch-size 2 \
+ --train-lr 1e-4 \
+ --floss-lr 1e-4 \
+ --floss-steps 1 \
+ --interfloss-at 0 \
+ --floss-mode engelken_l2 \
+ --lambda-star 0 \
+ --k-lyap 1 \
+ --lyap-act-steps 1 \
+ --seed 42 \
+ --eval-every 1 \
+ --eval-n 8 \
+ --eval-batch-size 8 \
+ --floss-log-every 1 \
+ --no-eval-after-floss \
+ --train-puzzle-emb \
+ --puzzle-emb-lr 1e-4 \
+ --puzzle-emb-weight-decay 1.0 \
+ --out "${OUT_DIR}/trm_step7_smoke.json" \
+ > "${OUT_DIR}/trm_step7_smoke.log" 2>&1
+
+echo "[smoke] summarizer"
+"${PY}" research/flossing/flossing_suite/summarize_flossing.py \
+ > "${OUT_DIR}/summarize_smoke.log" 2>&1
+
+echo "smoke OK: ${OUT_DIR}"
diff --git a/flossing_suite/status.sh b/flossing_suite/status.sh
new file mode 100755
index 0000000..39af2d6
--- /dev/null
+++ b/flossing_suite/status.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="/home/yurenh2/rrm"
+cd "${ROOT}"
+
+echo "== GPU =="
+nvidia-smi --query-gpu=index,memory.used,memory.total,utilization.gpu --format=csv,noheader,nounits
+
+echo
+echo "== Active flossing processes =="
+ps -eo pid,ppid,stat,etime,cmd | rg 'engelken_python_flossing.py|step7_interfloss.py' || true
+
+echo
+echo "== Latest logs =="
+for dir in \
+ research/flossing/flossing_suite/results/toy_rnn \
+ research/flossing/flossing_suite/results/trm_faithful \
+ research/flossing/flossing_suite/results/trm_variants \
+ research/flossing/flossing_suite/results/smoke
+do
+ [[ -d "${dir}" ]] || continue
+ for f in "${dir}"/*.log; do
+ [[ -f "${f}" ]] || continue
+ echo
+ echo "-- ${f} --"
+ tail -n 8 "${f}"
+ done
+done
+
+echo
+echo "== Summary files =="
+ls -lh research/flossing/flossing_suite/results/summary 2>/dev/null || true
diff --git a/flossing_suite/summarize_flossing.py b/flossing_suite/summarize_flossing.py
new file mode 100644
index 0000000..e753b13
--- /dev/null
+++ b/flossing_suite/summarize_flossing.py
@@ -0,0 +1,311 @@
+from __future__ import annotations
+
+import csv
+import json
+from pathlib import Path
+from typing import Any
+
+import matplotlib.pyplot as plt
+
+
+ROOT = Path("/home/yurenh2/rrm")
+FLOSS = ROOT / "research/flossing"
+SUITE = FLOSS / "flossing_suite"
+OUT = SUITE / "results/summary"
+
+
+def load_json(path: Path) -> dict[str, Any] | None:
+ try:
+ return json.loads(path.read_text())
+ except Exception as exc:
+ print(f"[skip] {path}: {exc}")
+ return None
+
+
+def fnum(value: Any, default: float = float("nan")) -> float:
+ try:
+ if value is None:
+ return default
+ return float(value)
+ except Exception:
+ return default
+
+
+def run_name(path: Path) -> str:
+ return path.relative_to(FLOSS).with_suffix("").as_posix()
+
+
+def iter_json_paths() -> list[Path]:
+ patterns = [
+ "engelken_python/*.json",
+ "engelken_paper_faithful/*.json",
+ "step6_*.json",
+ "step7_*.json",
+ "flossing_suite/results/**/*.json",
+ ]
+ paths: list[Path] = []
+ for pattern in patterns:
+ paths.extend(FLOSS.glob(pattern))
+ return sorted(set(paths))
+
+
+def classify(path: Path, data: dict[str, Any]) -> str:
+ if "config" in data:
+ return "toy_rnn"
+ if "phase1_steps" in data:
+ return "step6_prefloss_hrm"
+ if "args" in data and "floss_episodes" in data:
+ args = data.get("args", {})
+ model = args.get("model", "unknown")
+ return f"step7_interfloss_{model}"
+ return "unknown"
+
+
+def selector_from_args(args: dict[str, Any]) -> str:
+ schedule = str(args.get("interfloss_at", ""))
+ steps = int(fnum(args.get("floss_steps"), 0))
+ mode = str(args.get("floss_mode", "none"))
+ kl = fnum(args.get("kl_beta"), 0.0)
+ every = int(fnum(args.get("interfloss_every"), 0))
+ if not schedule or steps <= 0:
+ return "baseline_no_floss"
+ label = f"{mode}@{schedule}_steps{steps}"
+ if every > 0:
+ label += f"_every{every}"
+ if kl > 0:
+ label += f"_kl{kl:g}"
+ return label
+
+
+def summarize_toy(path: Path, data: dict[str, Any]) -> tuple[dict[str, Any], list[dict[str, Any]]]:
+ cfg = data.get("config", {})
+ evals = data.get("evals", [])
+ first = evals[0] if evals else {}
+ last = evals[-1] if evals else {}
+ peak = max((fnum(e.get("eval_accuracy")) for e in evals), default=float("nan"))
+ pre_epochs = int(fnum(cfg.get("pre_epochs"), 0))
+ inter_epochs = int(fnum(cfg.get("inter_epochs"), 0))
+ max_inter = int(fnum(cfg.get("max_inter_episodes"), 0))
+ if pre_epochs <= 0 and max_inter <= 0:
+ selector = "baseline_no_floss"
+ elif pre_epochs > 0 and max_inter <= 0:
+ selector = "prefloss"
+ else:
+ selector = "pre_interfloss"
+
+ summary = {
+ "name": run_name(path),
+ "path": str(path),
+ "family": "toy_rnn",
+ "model": "vanilla_rnn",
+ "selector": selector,
+ "floss_mode": "engelken_l2",
+ "schedule": f"pre={pre_epochs},inter_period={cfg.get('inter_period')},max_inter={max_inter}",
+ "train_steps": cfg.get("train_epochs"),
+ "floss_steps": pre_epochs + inter_epochs * max_inter,
+ "k_lyap": cfg.get("n_lyap"),
+ "kl_beta": 0,
+ "initial_acc": fnum(first.get("eval_accuracy")),
+ "final_acc": fnum(last.get("eval_accuracy")),
+ "best_acc": peak,
+ "delta_final": fnum(last.get("eval_accuracy")) - fnum(first.get("eval_accuracy")),
+ "n_evals": len(evals),
+ "n_floss_episodes": (1 if pre_epochs > 0 else 0) + max_inter,
+ "status": "complete" if evals else "unknown",
+ }
+ rows = []
+ for e in evals:
+ rows.append(
+ {
+ "name": summary["name"],
+ "family": summary["family"],
+ "model": summary["model"],
+ "selector": summary["selector"],
+ "x": e.get("epoch"),
+ "acc": e.get("eval_accuracy"),
+ "loss": e.get("eval_loss"),
+ "kind": "eval",
+ }
+ )
+ return summary, rows
+
+
+def summarize_step7(path: Path, data: dict[str, Any]) -> tuple[dict[str, Any], list[dict[str, Any]]]:
+ args = data.get("args", {})
+ evals = data.get("evals", [])
+ first = evals[0] if evals else {}
+ final_acc = fnum(data.get("final_acc"), fnum(evals[-1].get("acc") if evals else None))
+ best = max((fnum(e.get("acc")) for e in evals), default=float("nan"))
+ model = str(args.get("model", "unknown"))
+ train_steps = int(fnum(args.get("train_steps"), 0))
+ task_batch = int(fnum(args.get("task_batch_size", args.get("batch_size")), 0))
+ floss_batch = int(fnum(args.get("floss_batch_size", args.get("batch_size")), 0))
+ every = int(fnum(args.get("interfloss_every"), 0))
+ start = int(fnum(args.get("interfloss_start"), 0))
+ stop = int(fnum(args.get("interfloss_stop"), -1))
+ schedule = str(args.get("interfloss_at", ""))
+ if every > 0:
+ schedule = f"{schedule}; every={every}, start={start}, stop={stop}"
+ summary = {
+ "name": run_name(path),
+ "path": str(path),
+ "family": "rrm_step7",
+ "model": model,
+ "selector": selector_from_args(args),
+ "floss_mode": args.get("floss_mode", "none"),
+ "schedule": schedule,
+ "train_steps": train_steps,
+ "task_batch_size": task_batch,
+ "floss_batch_size": floss_batch,
+ "effective_official_gbs768_steps": train_steps * task_batch / 768 if task_batch else "",
+ "floss_steps": args.get("floss_steps"),
+ "k_lyap": args.get("k_lyap"),
+ "kl_beta": args.get("kl_beta", 0),
+ "initial_acc": fnum(data.get("initial_acc"), fnum(first.get("acc"))),
+ "final_acc": final_acc,
+ "best_acc": best,
+ "delta_final": final_acc - fnum(data.get("initial_acc"), fnum(first.get("acc"))),
+ "n_evals": len(evals),
+ "n_floss_episodes": len(data.get("floss_episodes", [])),
+ "status": "complete" if "final_acc" in data else "running_or_incomplete",
+ }
+ rows = []
+ for e in evals:
+ rows.append(
+ {
+ "name": summary["name"],
+ "family": summary["family"],
+ "model": model,
+ "selector": summary["selector"],
+ "x": e.get("train_step"),
+ "acc": e.get("acc"),
+ "loss": "",
+ "kind": e.get("kind", "eval"),
+ }
+ )
+ return summary, rows
+
+
+def summarize_step6(path: Path, data: dict[str, Any]) -> tuple[dict[str, Any], list[dict[str, Any]]]:
+ args = data.get("args", {})
+ phase1 = data.get("phase1_evals", [])
+ phase2 = data.get("phase2_evals", [])
+ evals = []
+ evals.extend({"kind": "phase1", **e} for e in phase1)
+ evals.extend({"kind": "phase2", **e} for e in phase2)
+ final_acc = fnum(data.get("final_acc"), fnum(phase2[-1].get("acc") if phase2 else None))
+ best = max((fnum(e.get("acc")) for e in evals), default=float("nan"))
+ pre_steps = int(fnum(args.get("prefloss_steps"), 0))
+ selector = "baseline_no_prefloss" if pre_steps <= 0 else f"prefloss_{args.get('floss_mode')}"
+ summary = {
+ "name": run_name(path),
+ "path": str(path),
+ "family": "hrm_step6",
+ "model": "hrm",
+ "selector": selector,
+ "floss_mode": args.get("floss_mode", "none"),
+ "schedule": f"pre={pre_steps}",
+ "train_steps": args.get("train_steps"),
+ "floss_steps": args.get("prefloss_steps"),
+ "k_lyap": args.get("k_lyap"),
+ "kl_beta": 0,
+ "initial_acc": fnum(data.get("initial_acc")),
+ "final_acc": final_acc,
+ "best_acc": best,
+ "delta_final": final_acc - fnum(data.get("initial_acc")),
+ "n_evals": len(evals),
+ "n_floss_episodes": 1 if pre_steps > 0 else 0,
+ "status": "complete" if "final_acc" in data else "running_or_incomplete",
+ }
+ rows = []
+ for e in evals:
+ rows.append(
+ {
+ "name": summary["name"],
+ "family": summary["family"],
+ "model": "hrm",
+ "selector": summary["selector"],
+ "x": e.get("step"),
+ "acc": e.get("acc"),
+ "loss": "",
+ "kind": e.get("kind", "eval"),
+ }
+ )
+ return summary, rows
+
+
+def collect() -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
+ summaries: list[dict[str, Any]] = []
+ eval_rows: list[dict[str, Any]] = []
+ for path in iter_json_paths():
+ data = load_json(path)
+ if data is None:
+ continue
+ family = classify(path, data)
+ if family == "toy_rnn":
+ summary, rows = summarize_toy(path, data)
+ elif family.startswith("step7"):
+ summary, rows = summarize_step7(path, data)
+ elif family == "step6_prefloss_hrm":
+ summary, rows = summarize_step6(path, data)
+ else:
+ continue
+ summaries.append(summary)
+ eval_rows.extend(rows)
+ return summaries, eval_rows
+
+
+def write_csv(path: Path, rows: list[dict[str, Any]]) -> None:
+ if not rows:
+ path.write_text("")
+ return
+ keys = list(rows[0].keys())
+ for row in rows[1:]:
+ for key in row:
+ if key not in keys:
+ keys.append(key)
+ with path.open("w", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=keys)
+ writer.writeheader()
+ writer.writerows(rows)
+
+
+def plot_family(eval_rows: list[dict[str, Any]], family: str, out: Path, title: str) -> None:
+ rows = [r for r in eval_rows if r["family"] == family and r.get("x") not in ("", None)]
+ if not rows:
+ return
+ fig, ax = plt.subplots(figsize=(8.5, 4.6))
+ names = sorted({r["name"] for r in rows})
+ for name in names:
+ nr = [r for r in rows if r["name"] == name]
+ nr.sort(key=lambda r: fnum(r["x"]))
+ xs = [fnum(r["x"]) for r in nr]
+ ys = [fnum(r["acc"]) for r in nr]
+ label = name.split("/")[-1]
+ ax.plot(xs, ys, marker="o", linewidth=1.8, label=label)
+ ax.set_title(title)
+ ax.set_xlabel("epoch / train step")
+ ax.set_ylabel("eval exact accuracy")
+ ax.grid(alpha=0.22)
+ ax.legend(frameon=False, fontsize=7)
+ fig.tight_layout()
+ fig.savefig(out, dpi=180)
+ plt.close(fig)
+
+
+def main() -> None:
+ OUT.mkdir(parents=True, exist_ok=True)
+ summaries, eval_rows = collect()
+ summaries.sort(key=lambda r: (str(r["family"]), str(r["model"]), str(r["name"])))
+ write_csv(OUT / "flossing_runs_summary.csv", summaries)
+ write_csv(OUT / "flossing_eval_curves.csv", eval_rows)
+ plot_family(eval_rows, "toy_rnn", OUT / "toy_rnn_eval_curves.png", "Toy RNN Engelken-style flossing")
+ plot_family(eval_rows, "rrm_step7", OUT / "rrm_step7_eval_curves.png", "HRM/TRM interfloss analogues")
+ plot_family(eval_rows, "hrm_step6", OUT / "hrm_step6_eval_curves.png", "HRM prefloss experiments")
+ print(f"wrote {OUT / 'flossing_runs_summary.csv'}")
+ print(f"wrote {OUT / 'flossing_eval_curves.csv'}")
+ print(f"runs: {len(summaries)} eval points: {len(eval_rows)}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/flossing_suite/watch_and_summarize.sh b/flossing_suite/watch_and_summarize.sh
new file mode 100755
index 0000000..a75c250
--- /dev/null
+++ b/flossing_suite/watch_and_summarize.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="/home/yurenh2/rrm"
+PY="/home/yurenh2/miniconda3/envs/rrm/bin/python"
+cd "${ROOT}"
+
+if [[ "$#" -gt 0 ]]; then
+ pid_files=("$@")
+else
+ pid_files=(research/flossing/flossing_suite/results/trm_faithful/trm_seed123_*.pid)
+fi
+
+echo "watching ${#pid_files[@]} pid files"
+for pf in "${pid_files[@]}"; do
+ [[ -f "${pf}" ]] || continue
+ pid="$(cat "${pf}")"
+ echo "watch ${pf}: pid ${pid}"
+ while kill -0 "${pid}" 2>/dev/null; do
+ sleep 60
+ done
+ echo "done ${pf}: pid ${pid}"
+done
+
+"${PY}" research/flossing/flossing_suite/summarize_flossing.py
+bash research/flossing/flossing_suite/status.sh > research/flossing/flossing_suite/results/summary/final_status.txt
+echo "watch complete"
diff --git a/initial_perturb_robustness.py b/initial_perturb_robustness.py
new file mode 100644
index 0000000..e652080
--- /dev/null
+++ b/initial_perturb_robustness.py
@@ -0,0 +1,286 @@
+"""Inference-time robustness to initial recurrent-state perturbations.
+
+This tests whether trajectory-perturbation training enlarged the correct
+attractor basin. Unlike PTRM-style rollout noise, the perturbation is applied
+once after resetting z_H/z_L, matching the training augmentation mechanism.
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import math
+import sys
+from dataclasses import replace
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import torch
+import yaml
+
+
+TRM_DIR = Path("/home/yurenh2/rrm/trm")
+sys.path.insert(0, str(TRM_DIR))
+
+from models.recursive_reasoning.trm import ( # noqa: E402
+ TinyRecursiveReasoningModel_ACTV1,
+ TinyRecursiveReasoningModel_ACTV1InnerCarry,
+)
+
+
+IGNORE_LABEL_ID = -100
+
+
+def parse_float_list(text: str) -> list[float]:
+ return [float(x.strip()) for x in text.split(",") if x.strip()]
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ data_path = Path(cfg.get("data_path") or cfg["data_paths"][0])
+ train_meta = json.loads((data_path / "train" / "dataset.json").read_text())
+
+ arch_cfg = dict(cfg["arch"])
+ arch_cfg.update(
+ batch_size=cfg["global_batch_size"],
+ seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"],
+ causal=False,
+ )
+
+ model = TinyRecursiveReasoningModel_ACTV1(arch_cfg)
+ state = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in state.items()}
+ missing, unexpected = model.load_state_dict(stripped, strict=False)
+ print(f"[load] {ckpt_root.name}/{ckpt_name} missing={len(missing)} unexpected={len(unexpected)}", flush=True)
+ if missing[:4]:
+ print(f"[load] sample missing: {missing[:4]}", flush=True)
+ if unexpected[:4]:
+ print(f"[load] sample unexpected: {unexpected[:4]}", flush=True)
+ model.to(device).eval()
+ return model, cfg, data_path
+
+
+def load_test_samples(data_path: Path, n_samples: int, seed: int):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ puzzle_ids = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+
+ n = min(n_samples, len(inputs))
+ idx = rng.choice(len(inputs), size=n, replace=False)
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(puzzle_ids[idx].astype(np.int32)),
+ "idx": idx,
+ }
+
+
+def batch_slice(samples: dict[str, Any], start: int, end: int, device: str):
+ return {
+ k: v[start:end].to(device, non_blocking=True)
+ for k, v in samples.items()
+ if k in ("inputs", "labels", "puzzle_identifiers")
+ }
+
+
+def repeat_batch(batch: dict[str, torch.Tensor], repeats: int):
+ if repeats == 1:
+ return batch
+ return {k: v.repeat_interleave(repeats, dim=0) for k, v in batch.items()}
+
+
+def sample_unit_noise_like(tensor: torch.Tensor, generator: torch.Generator, distribution: str):
+ if distribution == "uniform":
+ noise = 2.0 * torch.rand(tensor.shape, device=tensor.device, dtype=torch.float32, generator=generator) - 1.0
+ noise = noise * math.sqrt(3.0)
+ else:
+ noise = torch.randn(tensor.shape, device=tensor.device, dtype=torch.float32, generator=generator)
+ return noise.to(tensor.dtype)
+
+
+def apply_initial_noise(
+ inner: TinyRecursiveReasoningModel_ACTV1InnerCarry,
+ sigma: float,
+ perturb: str,
+ generator: torch.Generator,
+ distribution: str,
+):
+ if sigma <= 0:
+ return inner
+ z_h, z_l = inner.z_H, inner.z_L
+ if perturb in ("h", "both"):
+ z_h = z_h + sigma * sample_unit_noise_like(z_h, generator, distribution)
+ if perturb in ("l", "both"):
+ z_l = z_l + sigma * sample_unit_noise_like(z_l, generator, distribution)
+ return replace(inner, z_H=z_h, z_L=z_l)
+
+
+def correctness(logits: torch.Tensor, labels: torch.Tensor):
+ preds = logits.argmax(dim=-1)
+ mask = labels != IGNORE_LABEL_ID
+ exact = torch.where(mask, preds == labels, True).all(dim=-1)
+ denom = mask.sum(-1).clamp_min(1)
+ token_acc = ((preds == labels) & mask).sum(-1).float() / denom.float()
+ return exact, token_acc
+
+
+@torch.inference_mode()
+def eval_sigma(
+ model,
+ batch: dict[str, torch.Tensor],
+ sigma: float,
+ rollouts: int,
+ perturb: str,
+ distribution: str,
+ generator: torch.Generator,
+):
+ expanded = repeat_batch(batch, rollouts)
+ total = expanded["inputs"].shape[0]
+ with torch.device(expanded["inputs"].device):
+ carry = model.initial_carry(expanded)
+ reset = torch.ones(total, device=expanded["inputs"].device, dtype=torch.bool)
+ inner = model.inner.reset_carry(reset, carry.inner_carry)
+ inner = apply_initial_noise(inner, sigma, perturb, generator, distribution)
+
+ logits = None
+ for _ in range(model.config.halt_max_steps):
+ inner, logits, _q = model.inner(inner, expanded)
+
+ assert logits is not None
+ exact, token_acc = correctness(logits, expanded["labels"])
+ base_bsz = batch["inputs"].shape[0]
+ return exact.view(base_bsz, rollouts), token_acc.view(base_bsz, rollouts)
+
+
+def summarize_sigma(exact: torch.Tensor, token_acc: torch.Tensor) -> dict[str, float]:
+ correct_counts = exact.float().sum(dim=1)
+ rollouts = exact.shape[1]
+ return {
+ "mean_rollout_exact": exact.float().mean().item(),
+ "mean_rollout_token_acc": token_acc.mean().item(),
+ "pass_at_k": exact.any(dim=1).float().mean().item(),
+ "all_k": exact.all(dim=1).float().mean().item(),
+ "correct_count_mean": correct_counts.mean().item(),
+ "correct_count_std": correct_counts.std(unbiased=False).item(),
+ "correct_count_q10": torch.quantile(correct_counts, 0.10).item(),
+ "correct_count_q50": torch.quantile(correct_counts, 0.50).item(),
+ "correct_count_q90": torch.quantile(correct_counts, 0.90).item(),
+ "zero_frac": (correct_counts == 0).float().mean().item(),
+ "full_frac": (correct_counts == rollouts).float().mean().item(),
+ }
+
+
+def write_summary(path: Path, rows: list[dict[str, Any]]) -> None:
+ keys = list(rows[0])
+ for row in rows[1:]:
+ for key in row:
+ if key not in keys:
+ keys.append(key)
+ with path.open("w", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=keys)
+ writer.writeheader()
+ writer.writerows(rows)
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--ckpt-root", required=True)
+ parser.add_argument("--ckpt-name", required=True)
+ parser.add_argument("--label", required=True)
+ parser.add_argument("--n-samples", type=int, default=2000)
+ parser.add_argument("--batch-size", type=int, default=32,
+ help="Number of original problems per batch; expanded batch is batch_size * rollouts.")
+ parser.add_argument("--rollouts", type=int, default=8)
+ parser.add_argument("--sigmas", default="0,3e-5,1e-4,3e-4,1e-3,3e-3,1e-2,3e-2")
+ parser.add_argument("--perturb", choices=["h", "l", "both"], default="both")
+ parser.add_argument("--noise-distribution", choices=["gaussian", "uniform"], default="gaussian")
+ parser.add_argument("--seed", type=int, default=20260605)
+ parser.add_argument("--out-prefix", required=True)
+ args = parser.parse_args()
+
+ device = "cuda"
+ sigmas = parse_float_list(args.sigmas)
+ torch.manual_seed(args.seed)
+ generator = torch.Generator(device=device).manual_seed(args.seed + 101)
+
+ model, cfg, data_path = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ samples = load_test_samples(data_path, args.n_samples, args.seed)
+ n = len(samples["inputs"])
+ print(
+ f"[run] label={args.label} n={n} rollouts={args.rollouts} "
+ f"batch={args.batch_size} sigmas={sigmas}",
+ flush=True,
+ )
+
+ rows: list[dict[str, Any]] = []
+ all_exact = []
+ all_token = []
+ for sigma in sigmas:
+ exact_parts = []
+ token_parts = []
+ for start in range(0, n, args.batch_size):
+ end = min(start + args.batch_size, n)
+ batch = batch_slice(samples, start, end, device)
+ exact, token_acc = eval_sigma(
+ model=model,
+ batch=batch,
+ sigma=sigma,
+ rollouts=args.rollouts,
+ perturb=args.perturb,
+ distribution=args.noise_distribution,
+ generator=generator,
+ )
+ exact_parts.append(exact.cpu())
+ token_parts.append(token_acc.cpu())
+ if end == n or (end // args.batch_size) % 10 == 0:
+ print(f" sigma={sigma:g} [{end}/{n}]", flush=True)
+ exact_all = torch.cat(exact_parts, dim=0)
+ token_all = torch.cat(token_parts, dim=0)
+ row: dict[str, Any] = {
+ "label": args.label,
+ "sigma": sigma,
+ "n_samples": n,
+ "rollouts": args.rollouts,
+ "ckpt_root": str(Path(args.ckpt_root)),
+ "ckpt_name": args.ckpt_name,
+ "perturb": args.perturb,
+ "noise_distribution": args.noise_distribution,
+ **summarize_sigma(exact_all, token_all),
+ }
+ rows.append(row)
+ all_exact.append(exact_all.numpy())
+ all_token.append(token_all.numpy())
+ print(
+ f" sigma={sigma:g} mean={row['mean_rollout_exact']:.4f} "
+ f"pass@K={row['pass_at_k']:.4f} allK={row['all_k']:.4f}",
+ flush=True,
+ )
+
+ out_prefix = Path(args.out_prefix)
+ out_prefix.parent.mkdir(parents=True, exist_ok=True)
+ write_summary(out_prefix.with_suffix(".summary.csv"), rows)
+ meta = {
+ "args": vars(args),
+ "data_path": str(data_path),
+ "config_global_batch_size": cfg.get("global_batch_size"),
+ "sigmas": sigmas,
+ "n_samples": n,
+ }
+ out_prefix.with_suffix(".meta.json").write_text(json.dumps(meta, indent=2, sort_keys=True))
+ np.savez_compressed(
+ out_prefix.with_suffix(".npz"),
+ idx=samples["idx"],
+ sigmas=np.asarray(sigmas, dtype=np.float32),
+ exact=np.stack(all_exact, axis=0),
+ token_acc=np.stack(all_token, axis=0),
+ meta_json=np.asarray(json.dumps(meta, sort_keys=True)),
+ )
+ print(f"[done] {out_prefix}.summary.csv", flush=True)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/initial_perturb_robustness/plots/final_gpu_status.txt b/initial_perturb_robustness/plots/final_gpu_status.txt
new file mode 100644
index 0000000..29c0466
--- /dev/null
+++ b/initial_perturb_robustness/plots/final_gpu_status.txt
@@ -0,0 +1,4 @@
+0, 13, 49140, 0
+1, 13, 49140, 0
+2, 45494, 49140, 0
+3, 13, 49140, 0
diff --git a/initial_perturb_robustness/plots/initial_perturb_robustness_combined.csv b/initial_perturb_robustness/plots/initial_perturb_robustness_combined.csv
new file mode 100644
index 0000000..12fdf18
--- /dev/null
+++ b/initial_perturb_robustness/plots/initial_perturb_robustness_combined.csv
@@ -0,0 +1,28 @@
+label,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,correct_count_q10,correct_count_q50,correct_count_q90,zero_frac,full_frac
+trm_baseline_best,0.0,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8658000230789185,0.9502986073493958,0.8658000230789185,0.8658000230789185,6.926400184631348,2.7269365787506104,0.0,8.0,8.0,0.13420000672340393,0.8658000230789185
+trm_baseline_best,3e-05,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.868399977684021,0.9510218501091003,0.9455999732017517,0.769599974155426,6.947199821472168,2.309980869293213,3.0,8.0,8.0,0.0544000007212162,0.769599974155426
+trm_baseline_best,0.0001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8675500154495239,0.9506199955940247,0.9485999941825867,0.769599974155426,6.940400123596191,2.3122386932373047,2.0,8.0,8.0,0.05139999836683273,0.769599974155426
+trm_baseline_best,0.0003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8685500025749207,0.9512001872062683,0.9476000070571899,0.7698000073432922,6.948400020599365,2.310267925262451,3.0,8.0,8.0,0.052400000393390656,0.7698000073432922
+trm_baseline_best,0.001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8694999814033508,0.9514188766479492,0.9488000273704529,0.7712000012397766,6.955999851226807,2.289206027984619,3.0,8.0,8.0,0.05119999870657921,0.7712000012397766
+trm_baseline_best,0.003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8676750063896179,0.950739860534668,0.9448000192642212,0.7666000127792358,6.941400051116943,2.311615467071533,3.0,8.0,8.0,0.0551999993622303,0.7666000127792358
+trm_baseline_best,0.01,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669000267982483,0.9505268335342407,0.9452000260353088,0.7635999917984009,6.935200214385986,2.3034324645996094,3.0,8.0,8.0,0.05480000004172325,0.7635999917984009
+trm_baseline_best,0.03,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8665000200271606,0.9502792954444885,0.9503999948501587,0.7573999762535095,6.932000160217285,2.2814416885375977,3.0,8.0,8.0,0.04960000142455101,0.7573999762535095
+trm_baseline_best,0.1,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8664500117301941,0.9503796696662903,0.9553999900817871,0.7558000087738037,6.931600093841553,2.2636523246765137,3.0,8.0,8.0,0.044599998742341995,0.7558000087738037
+trm_multi4_best,0.0,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8920000195503235,0.9585950970649719,0.8920000195503235,0.8920000195503235,7.136000156402588,2.4830431938171387,0.0,8.0,8.0,0.1080000028014183,0.8920000195503235
+trm_multi4_best,3e-05,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8941249847412109,0.9596672654151917,0.9577999711036682,0.8095999956130981,7.1529998779296875,2.0940847396850586,4.0,8.0,8.0,0.0421999990940094,0.8095999956130981
+trm_multi4_best,0.0001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8919249773025513,0.9588295817375183,0.9574000239372253,0.8040000200271606,7.13539981842041,2.107004165649414,4.0,8.0,8.0,0.04259999841451645,0.8040000200271606
+trm_multi4_best,0.0003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.9595761895179749,0.9570000171661377,0.8118000030517578,7.1519999504089355,2.106584072113037,4.0,8.0,8.0,0.0430000014603138,0.8118000030517578
+trm_multi4_best,0.001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8934500217437744,0.9593700170516968,0.9535999894142151,0.8091999888420105,7.147600173950195,2.1107852458953857,4.0,8.0,8.0,0.04639999940991402,0.8091999888420105
+trm_multi4_best,0.003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8934749960899353,0.9593802690505981,0.9593999981880188,0.8101999759674072,7.147799968719482,2.1013221740722656,4.0,8.0,8.0,0.0406000018119812,0.8101999759674072
+trm_multi4_best,0.01,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8938249945640564,0.9593154191970825,0.9599999785423279,0.7993999719619751,7.150599956512451,2.065894365310669,4.0,8.0,8.0,0.03999999910593033,0.7993999719619751
+trm_multi4_best,0.03,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8929749727249146,0.9591527581214905,0.9606000185012817,0.7986000180244446,7.143799781799316,2.0669593811035156,4.0,8.0,8.0,0.039400000125169754,0.7986000180244446
+trm_multi4_best,0.1,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8917250037193298,0.9587355256080627,0.9639999866485596,0.79339998960495,7.133800029754639,2.062788724899292,4.0,8.0,8.0,0.035999998450279236,0.79339998960495
+trm_multi4_final,0.0,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8452000021934509,0.9396741390228271,0.8452000021934509,0.8452000021934509,6.761600017547607,2.8937113285064697,0.0,8.0,8.0,0.15479999780654907,0.8452000021934509
+trm_multi4_final,3e-05,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.840499997138977,0.9371709823608398,0.9114000201225281,0.7591999769210815,6.723999977111816,2.6249237060546875,1.0,8.0,8.0,0.08860000222921371,0.7591999769210815
+trm_multi4_final,0.0001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8407999873161316,0.9373589754104614,0.9136000275611877,0.7577999830245972,6.726399898529053,2.612344264984131,1.0,8.0,8.0,0.08640000224113464,0.7577999830245972
+trm_multi4_final,0.0003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8399249911308289,0.9371064305305481,0.9092000126838684,0.7572000026702881,6.719399929046631,2.6245501041412354,1.0,8.0,8.0,0.09080000221729279,0.7572000026702881
+trm_multi4_final,0.001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8385249972343445,0.9361668229103088,0.9089999794960022,0.7588000297546387,6.708199977874756,2.6401236057281494,1.0,8.0,8.0,0.09099999815225601,0.7588000297546387
+trm_multi4_final,0.003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8406999707221985,0.9372812509536743,0.9165999889373779,0.753000020980835,6.725599765777588,2.6047465801239014,1.0,8.0,8.0,0.08340000361204147,0.753000020980835
+trm_multi4_final,0.01,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.838100016117096,0.9362147450447083,0.9132000207901001,0.7477999925613403,6.704800128936768,2.612366199493408,1.0,8.0,8.0,0.0868000015616417,0.7477999925613403
+trm_multi4_final,0.03,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8380249738693237,0.9364882707595825,0.9197999835014343,0.7378000020980835,6.70419979095459,2.575946807861328,1.0,8.0,8.0,0.08020000159740448,0.7378000020980835
+trm_multi4_final,0.1,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.841949999332428,0.9376598596572876,0.9305999875068665,0.7305999994277954,6.735599994659424,2.5063304901123047,2.0,8.0,8.0,0.06939999759197235,0.7305999994277954
diff --git a/initial_perturb_robustness/plots/initial_perturb_robustness_conditional.csv b/initial_perturb_robustness/plots/initial_perturb_robustness_conditional.csv
new file mode 100644
index 0000000..b7bf3a1
--- /dev/null
+++ b/initial_perturb_robustness/plots/initial_perturb_robustness_conditional.csv
@@ -0,0 +1,28 @@
+label,sigma,clean_acc,n_clean_success,n_clean_fail,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail
+baseline best,0.0,0.8658,4329,671,1.0,1.0,0.0,0.0
+baseline best,2.9999999242136255e-05,0.8658,4329,671,0.9607010857010857,0.8833448833448834,0.2729135618479881,0.6140089418777943
+baseline best,9.999999747378752e-05,0.8658,4329,671,0.959026334026334,0.8838068838068838,0.27738450074515647,0.6363636363636364
+baseline best,0.0003000000142492354,0.8658,4329,671,0.9602102102102102,0.8824208824208825,0.27719821162444114,0.6274217585692996
+baseline best,0.0010000000474974513,0.8658,4329,671,0.9605855855855856,0.8854238854238854,0.2818554396423249,0.6348733233979136
+baseline best,0.003000000026077032,0.8658,4329,671,0.9599214599214599,0.8791868791868792,0.2725409836065574,0.6080476900149031
+baseline best,0.009999999776482582,0.8658,4329,671,0.9584488334488335,0.8768768768768769,0.27626676602086436,0.6005961251862891
+baseline best,0.029999999329447746,0.8658,4329,671,0.9548972048972049,0.86994686994687,0.29619970193740686,0.6482861400894188
+baseline best,0.10000000149011612,0.8658,4329,671,0.9538865788865789,0.8655578655578655,0.3023472429210134,0.6795827123695977
+multi4 best,0.0,0.892,4460,540,1.0,1.0,0.0,0.0
+multi4 best,2.9999999242136255e-05,0.892,4460,540,0.9675728699551569,0.9042600896860986,0.2875,0.6314814814814815
+multi4 best,9.999999747378752e-05,0.892,4460,540,0.965695067264574,0.897982062780269,0.2826388888888889,0.6222222222222222
+multi4 best,0.0003000000142492354,0.892,4460,540,0.9668721973094171,0.905829596412556,0.29212962962962963,0.6296296296296297
+multi4 best,0.0010000000474974513,0.892,4460,540,0.9665078475336323,0.9013452914798207,0.2900462962962963,0.5981481481481481
+multi4 best,0.003000000026077032,0.892,4460,540,0.9664798206278027,0.9035874439461884,0.29050925925925924,0.6481481481481481
+multi4 best,0.009999999776482582,0.892,4460,540,0.965695067264574,0.8917040358744395,0.3002314814814815,0.6425925925925926
+multi4 best,0.029999999329447746,0.892,4460,540,0.9631726457399103,0.8905829596412556,0.31319444444444444,0.6555555555555556
+multi4 best,0.10000000149011612,0.892,4460,540,0.9617432735426009,0.8847533632286996,0.31342592592592594,0.6833333333333333
+multi4 final,0.0,0.8452,4226,774,1.0,1.0,0.0,0.0
+multi4 final,2.9999999242136255e-05,0.8452,4226,774,0.9613700899195456,0.8946994794131566,0.18055555555555555,0.4483204134366925
+multi4 final,9.999999747378752e-05,0.8452,4226,774,0.959979886417416,0.8928064363464269,0.19008397932816537,0.4625322997416021
+multi4 final,0.0003000000142492354,0.8452,4226,774,0.9602756743965926,0.8916232844297208,0.18281653746770027,0.4405684754521964
+multi4 final,0.0010000000474974513,0.8452,4226,774,0.9595362044486512,0.8937529578797918,0.17781007751937986,0.4418604651162791
+multi4 final,0.003000000026077032,0.8452,4226,774,0.9595362044486512,0.8868906767628963,0.19186046511627908,0.4844961240310077
+multi4 final,0.009999999776482582,0.8452,4226,774,0.9556613819214387,0.8797917652626597,0.19622093023255813,0.46511627906976744
+multi4 final,0.029999999329447746,0.8452,4226,774,0.9528513961192617,0.8684335068622812,0.21107881136950904,0.5064599483204134
+multi4 final,0.10000000149011612,0.8452,4226,774,0.949420255560814,0.8563653573118788,0.25516795865633074,0.5762273901808785
diff --git a/initial_perturb_robustness/smoke_baseline.summary.csv b/initial_perturb_robustness/smoke_baseline.summary.csv
new file mode 100644
index 0000000..c5fd40c
--- /dev/null
+++ b/initial_perturb_robustness/smoke_baseline.summary.csv
@@ -0,0 +1,3 @@
+label,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,correct_count_q10,correct_count_q50,correct_count_q90,zero_frac,full_frac
+trm_baseline_smoke,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9537037014961243,0.875,0.875,1.75,0.6614378094673157,1.0,2.0,2.0,0.125,0.875
+trm_baseline_smoke,0.001,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8125,0.9309413433074951,0.875,0.75,1.625,0.6959705352783203,0.5,2.0,2.0,0.125,0.75
diff --git a/initial_perturb_robustness/smoke_baseline_b32k8.summary.csv b/initial_perturb_robustness/smoke_baseline_b32k8.summary.csv
new file mode 100644
index 0000000..e7ca899
--- /dev/null
+++ b/initial_perturb_robustness/smoke_baseline_b32k8.summary.csv
@@ -0,0 +1,2 @@
+label,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,correct_count_q10,correct_count_q50,correct_count_q90,zero_frac,full_frac
+trm_baseline_b32k8_smoke,0.0,32,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.90625,0.9637345671653748,0.90625,0.90625,7.25,2.3318448066711426,8.0,8.0,8.0,0.09375,0.90625
diff --git a/initial_perturb_robustness/smoke_plots/initial_perturb_robustness_combined.csv b/initial_perturb_robustness/smoke_plots/initial_perturb_robustness_combined.csv
new file mode 100644
index 0000000..f7c3ccc
--- /dev/null
+++ b/initial_perturb_robustness/smoke_plots/initial_perturb_robustness_combined.csv
@@ -0,0 +1,4 @@
+label,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,correct_count_q10,correct_count_q50,correct_count_q90,zero_frac,full_frac
+trm_baseline_smoke,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9537037014961243,0.875,0.875,1.75,0.6614378094673157,1.0,2.0,2.0,0.125,0.875
+trm_baseline_smoke,0.001,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8125,0.9309413433074951,0.875,0.75,1.625,0.6959705352783203,0.5,2.0,2.0,0.125,0.75
+trm_baseline_b32k8_smoke,0.0,32,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.90625,0.9637345671653748,0.90625,0.90625,7.25,2.3318448066711426,8.0,8.0,8.0,0.09375,0.90625
diff --git a/initial_perturb_robustness/trm_baseline_best_step58590_n5000_k8.summary.csv b/initial_perturb_robustness/trm_baseline_best_step58590_n5000_k8.summary.csv
new file mode 100644
index 0000000..94b9d4a
--- /dev/null
+++ b/initial_perturb_robustness/trm_baseline_best_step58590_n5000_k8.summary.csv
@@ -0,0 +1,10 @@
+label,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,correct_count_q10,correct_count_q50,correct_count_q90,zero_frac,full_frac
+trm_baseline_best,0.0,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8658000230789185,0.9502986073493958,0.8658000230789185,0.8658000230789185,6.926400184631348,2.7269365787506104,0.0,8.0,8.0,0.13420000672340393,0.8658000230789185
+trm_baseline_best,3e-05,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.868399977684021,0.9510218501091003,0.9455999732017517,0.769599974155426,6.947199821472168,2.309980869293213,3.0,8.0,8.0,0.0544000007212162,0.769599974155426
+trm_baseline_best,0.0001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8675500154495239,0.9506199955940247,0.9485999941825867,0.769599974155426,6.940400123596191,2.3122386932373047,2.0,8.0,8.0,0.05139999836683273,0.769599974155426
+trm_baseline_best,0.0003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8685500025749207,0.9512001872062683,0.9476000070571899,0.7698000073432922,6.948400020599365,2.310267925262451,3.0,8.0,8.0,0.052400000393390656,0.7698000073432922
+trm_baseline_best,0.001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8694999814033508,0.9514188766479492,0.9488000273704529,0.7712000012397766,6.955999851226807,2.289206027984619,3.0,8.0,8.0,0.05119999870657921,0.7712000012397766
+trm_baseline_best,0.003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8676750063896179,0.950739860534668,0.9448000192642212,0.7666000127792358,6.941400051116943,2.311615467071533,3.0,8.0,8.0,0.0551999993622303,0.7666000127792358
+trm_baseline_best,0.01,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669000267982483,0.9505268335342407,0.9452000260353088,0.7635999917984009,6.935200214385986,2.3034324645996094,3.0,8.0,8.0,0.05480000004172325,0.7635999917984009
+trm_baseline_best,0.03,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8665000200271606,0.9502792954444885,0.9503999948501587,0.7573999762535095,6.932000160217285,2.2814416885375977,3.0,8.0,8.0,0.04960000142455101,0.7573999762535095
+trm_baseline_best,0.1,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8664500117301941,0.9503796696662903,0.9553999900817871,0.7558000087738037,6.931600093841553,2.2636523246765137,3.0,8.0,8.0,0.044599998742341995,0.7558000087738037
diff --git a/initial_perturb_robustness/trm_multi4_best_step35805_n5000_k8.summary.csv b/initial_perturb_robustness/trm_multi4_best_step35805_n5000_k8.summary.csv
new file mode 100644
index 0000000..0f352ca
--- /dev/null
+++ b/initial_perturb_robustness/trm_multi4_best_step35805_n5000_k8.summary.csv
@@ -0,0 +1,10 @@
+label,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,correct_count_q10,correct_count_q50,correct_count_q90,zero_frac,full_frac
+trm_multi4_best,0.0,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8920000195503235,0.9585950970649719,0.8920000195503235,0.8920000195503235,7.136000156402588,2.4830431938171387,0.0,8.0,8.0,0.1080000028014183,0.8920000195503235
+trm_multi4_best,3e-05,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8941249847412109,0.9596672654151917,0.9577999711036682,0.8095999956130981,7.1529998779296875,2.0940847396850586,4.0,8.0,8.0,0.0421999990940094,0.8095999956130981
+trm_multi4_best,0.0001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8919249773025513,0.9588295817375183,0.9574000239372253,0.8040000200271606,7.13539981842041,2.107004165649414,4.0,8.0,8.0,0.04259999841451645,0.8040000200271606
+trm_multi4_best,0.0003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.9595761895179749,0.9570000171661377,0.8118000030517578,7.1519999504089355,2.106584072113037,4.0,8.0,8.0,0.0430000014603138,0.8118000030517578
+trm_multi4_best,0.001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8934500217437744,0.9593700170516968,0.9535999894142151,0.8091999888420105,7.147600173950195,2.1107852458953857,4.0,8.0,8.0,0.04639999940991402,0.8091999888420105
+trm_multi4_best,0.003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8934749960899353,0.9593802690505981,0.9593999981880188,0.8101999759674072,7.147799968719482,2.1013221740722656,4.0,8.0,8.0,0.0406000018119812,0.8101999759674072
+trm_multi4_best,0.01,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8938249945640564,0.9593154191970825,0.9599999785423279,0.7993999719619751,7.150599956512451,2.065894365310669,4.0,8.0,8.0,0.03999999910593033,0.7993999719619751
+trm_multi4_best,0.03,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8929749727249146,0.9591527581214905,0.9606000185012817,0.7986000180244446,7.143799781799316,2.0669593811035156,4.0,8.0,8.0,0.039400000125169754,0.7986000180244446
+trm_multi4_best,0.1,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8917250037193298,0.9587355256080627,0.9639999866485596,0.79339998960495,7.133800029754639,2.062788724899292,4.0,8.0,8.0,0.035999998450279236,0.79339998960495
diff --git a/initial_perturb_robustness/trm_multi4_final_step65100_n5000_k8.summary.csv b/initial_perturb_robustness/trm_multi4_final_step65100_n5000_k8.summary.csv
new file mode 100644
index 0000000..b4387d7
--- /dev/null
+++ b/initial_perturb_robustness/trm_multi4_final_step65100_n5000_k8.summary.csv
@@ -0,0 +1,10 @@
+label,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,correct_count_q10,correct_count_q50,correct_count_q90,zero_frac,full_frac
+trm_multi4_final,0.0,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8452000021934509,0.9396741390228271,0.8452000021934509,0.8452000021934509,6.761600017547607,2.8937113285064697,0.0,8.0,8.0,0.15479999780654907,0.8452000021934509
+trm_multi4_final,3e-05,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.840499997138977,0.9371709823608398,0.9114000201225281,0.7591999769210815,6.723999977111816,2.6249237060546875,1.0,8.0,8.0,0.08860000222921371,0.7591999769210815
+trm_multi4_final,0.0001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8407999873161316,0.9373589754104614,0.9136000275611877,0.7577999830245972,6.726399898529053,2.612344264984131,1.0,8.0,8.0,0.08640000224113464,0.7577999830245972
+trm_multi4_final,0.0003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8399249911308289,0.9371064305305481,0.9092000126838684,0.7572000026702881,6.719399929046631,2.6245501041412354,1.0,8.0,8.0,0.09080000221729279,0.7572000026702881
+trm_multi4_final,0.001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8385249972343445,0.9361668229103088,0.9089999794960022,0.7588000297546387,6.708199977874756,2.6401236057281494,1.0,8.0,8.0,0.09099999815225601,0.7588000297546387
+trm_multi4_final,0.003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8406999707221985,0.9372812509536743,0.9165999889373779,0.753000020980835,6.725599765777588,2.6047465801239014,1.0,8.0,8.0,0.08340000361204147,0.753000020980835
+trm_multi4_final,0.01,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.838100016117096,0.9362147450447083,0.9132000207901001,0.7477999925613403,6.704800128936768,2.612366199493408,1.0,8.0,8.0,0.0868000015616417,0.7477999925613403
+trm_multi4_final,0.03,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8380249738693237,0.9364882707595825,0.9197999835014343,0.7378000020980835,6.70419979095459,2.575946807861328,1.0,8.0,8.0,0.08020000159740448,0.7378000020980835
+trm_multi4_final,0.1,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.841949999332428,0.9376598596572876,0.9305999875068665,0.7305999994277954,6.735599994659424,2.5063304901123047,2.0,8.0,8.0,0.06939999759197235,0.7305999994277954
diff --git a/initial_perturb_robustness/watch_and_plot.sh b/initial_perturb_robustness/watch_and_plot.sh
new file mode 100755
index 0000000..eb2ba4f
--- /dev/null
+++ b/initial_perturb_robustness/watch_and_plot.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+set -euo pipefail
+ROOT=/home/yurenh2/rrm
+PY=/home/yurenh2/miniconda3/envs/rrm/bin/python
+cd "${ROOT}"
+PIDS=(
+ research/flossing/initial_perturb_robustness/logs/trm_baseline_best_step58590_n5000_k8.pid
+ research/flossing/initial_perturb_robustness/logs/trm_multi4_best_step35805_n5000_k8.pid
+ research/flossing/initial_perturb_robustness/logs/trm_multi4_final_step65100_n5000_k8.pid
+)
+for pf in "${PIDS[@]}"; do
+ pid=$(cat "${pf}")
+ echo "watch ${pf}: ${pid}"
+ while kill -0 "${pid}" 2>/dev/null; do
+ sleep 60
+ done
+ echo "done ${pf}: ${pid}"
+done
+"${PY}" research/flossing/plot_initial_perturb_robustness.py \
+ --summaries \
+ research/flossing/initial_perturb_robustness/trm_baseline_best_step58590_n5000_k8.summary.csv \
+ research/flossing/initial_perturb_robustness/trm_multi4_best_step35805_n5000_k8.summary.csv \
+ research/flossing/initial_perturb_robustness/trm_multi4_final_step65100_n5000_k8.summary.csv \
+ --out-dir research/flossing/initial_perturb_robustness/plots
+nvidia-smi --query-gpu=index,memory.used,memory.total,utilization.gpu --format=csv,noheader,nounits > research/flossing/initial_perturb_robustness/plots/final_gpu_status.txt
diff --git a/late_perturb_robustness.py b/late_perturb_robustness.py
new file mode 100644
index 0000000..7ad2ba5
--- /dev/null
+++ b/late_perturb_robustness.py
@@ -0,0 +1,323 @@
+"""Inference-time robustness to late recurrent-state perturbations.
+
+For each sample, run the model cleanly for `perturb_after` inner iterations,
+perturb z_H/z_L once, then continue the deterministic recurrent rollout. This
+stress-tests the attractor basin along the inference trajectory rather than
+only at the initial recurrent state.
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import math
+import sys
+from dataclasses import replace
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import torch
+import yaml
+
+
+TRM_DIR = Path("/home/yurenh2/rrm/trm")
+sys.path.insert(0, str(TRM_DIR))
+
+from models.recursive_reasoning.trm import ( # noqa: E402
+ TinyRecursiveReasoningModel_ACTV1,
+ TinyRecursiveReasoningModel_ACTV1InnerCarry,
+)
+
+
+IGNORE_LABEL_ID = -100
+
+
+def parse_float_list(text: str) -> list[float]:
+ return [float(x.strip()) for x in text.split(",") if x.strip()]
+
+
+def parse_int_list(text: str) -> list[int]:
+ return [int(x.strip()) for x in text.split(",") if x.strip()]
+
+
+def is_zero(value: float) -> bool:
+ return abs(value) <= 1e-12
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ data_path = Path(cfg.get("data_path") or cfg["data_paths"][0])
+ train_meta = json.loads((data_path / "train" / "dataset.json").read_text())
+
+ arch_cfg = dict(cfg["arch"])
+ arch_cfg.update(
+ batch_size=cfg["global_batch_size"],
+ seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"],
+ causal=False,
+ )
+
+ model = TinyRecursiveReasoningModel_ACTV1(arch_cfg)
+ state = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in state.items()}
+ missing, unexpected = model.load_state_dict(stripped, strict=False)
+ print(f"[load] {ckpt_root.name}/{ckpt_name} missing={len(missing)} unexpected={len(unexpected)}", flush=True)
+ model.to(device).eval()
+ return model, cfg, data_path
+
+
+def load_test_samples(data_path: Path, n_samples: int, seed: int):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ puzzle_ids = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+
+ n = min(n_samples, len(inputs))
+ idx = rng.choice(len(inputs), size=n, replace=False)
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(puzzle_ids[idx].astype(np.int32)),
+ "idx": idx,
+ }
+
+
+def batch_slice(samples: dict[str, Any], start: int, end: int, device: str):
+ return {
+ k: v[start:end].to(device, non_blocking=True)
+ for k, v in samples.items()
+ if k in ("inputs", "labels", "puzzle_identifiers")
+ }
+
+
+def repeat_batch(batch: dict[str, torch.Tensor], repeats: int):
+ if repeats == 1:
+ return batch
+ return {k: v.repeat_interleave(repeats, dim=0) for k, v in batch.items()}
+
+
+def sample_unit_noise_like(tensor: torch.Tensor, generator: torch.Generator, distribution: str):
+ if distribution == "uniform":
+ noise = 2.0 * torch.rand(tensor.shape, device=tensor.device, dtype=torch.float32, generator=generator) - 1.0
+ noise = noise * math.sqrt(3.0)
+ else:
+ noise = torch.randn(tensor.shape, device=tensor.device, dtype=torch.float32, generator=generator)
+ return noise.to(tensor.dtype)
+
+
+def apply_noise(
+ inner: TinyRecursiveReasoningModel_ACTV1InnerCarry,
+ sigma: float,
+ perturb: str,
+ generator: torch.Generator,
+ distribution: str,
+):
+ if sigma <= 0:
+ return inner
+ z_h, z_l = inner.z_H, inner.z_L
+ if perturb in ("h", "both"):
+ z_h = z_h + sigma * sample_unit_noise_like(z_h, generator, distribution)
+ if perturb in ("l", "both"):
+ z_l = z_l + sigma * sample_unit_noise_like(z_l, generator, distribution)
+ return replace(inner, z_H=z_h, z_L=z_l)
+
+
+def correctness(logits: torch.Tensor, labels: torch.Tensor):
+ preds = logits.argmax(dim=-1)
+ mask = labels != IGNORE_LABEL_ID
+ exact = torch.where(mask, preds == labels, True).all(dim=-1)
+ denom = mask.sum(-1).clamp_min(1)
+ token_acc = ((preds == labels) & mask).sum(-1).float() / denom.float()
+ return exact, token_acc
+
+
+@torch.inference_mode()
+def eval_condition(
+ model,
+ batch: dict[str, torch.Tensor],
+ sigma: float,
+ rollouts: int,
+ perturb_after: int,
+ perturb: str,
+ distribution: str,
+ generator: torch.Generator,
+):
+ expanded = repeat_batch(batch, rollouts)
+ total = expanded["inputs"].shape[0]
+ steps = model.config.halt_max_steps
+ warmup = min(max(perturb_after, 0), steps - 1)
+
+ with torch.device(expanded["inputs"].device):
+ carry = model.initial_carry(expanded)
+ reset = torch.ones(total, device=expanded["inputs"].device, dtype=torch.bool)
+ inner = model.inner.reset_carry(reset, carry.inner_carry)
+
+ logits = None
+ for _ in range(warmup):
+ inner, logits, _q = model.inner(inner, expanded)
+ inner = apply_noise(inner, sigma, perturb, generator, distribution)
+ for _ in range(warmup, steps):
+ inner, logits, _q = model.inner(inner, expanded)
+
+ assert logits is not None
+ exact, token_acc = correctness(logits, expanded["labels"])
+ base_bsz = batch["inputs"].shape[0]
+ return exact.view(base_bsz, rollouts), token_acc.view(base_bsz, rollouts)
+
+
+def summarize(exact: torch.Tensor, token_acc: torch.Tensor, clean_exact: torch.Tensor) -> dict[str, float]:
+ correct_counts = exact.float().sum(dim=1)
+ rollouts = exact.shape[1]
+ clean_success = clean_exact.bool()
+ clean_fail = ~clean_success
+ out = {
+ "mean_rollout_exact": exact.float().mean().item(),
+ "mean_rollout_token_acc": token_acc.mean().item(),
+ "pass_at_k": exact.any(dim=1).float().mean().item(),
+ "all_k": exact.all(dim=1).float().mean().item(),
+ "correct_count_mean": correct_counts.mean().item(),
+ "correct_count_std": correct_counts.std(unbiased=False).item(),
+ "zero_frac": (correct_counts == 0).float().mean().item(),
+ "full_frac": (correct_counts == rollouts).float().mean().item(),
+ "clean_acc": clean_success.float().mean().item(),
+ }
+ if clean_success.any().item():
+ out["retain_mean_on_clean_success"] = exact[clean_success].float().mean().item()
+ out["allK_on_clean_success"] = exact[clean_success].all(dim=1).float().mean().item()
+ else:
+ out["retain_mean_on_clean_success"] = float("nan")
+ out["allK_on_clean_success"] = float("nan")
+ if clean_fail.any().item():
+ out["rescue_mean_on_clean_fail"] = exact[clean_fail].float().mean().item()
+ out["passK_on_clean_fail"] = exact[clean_fail].any(dim=1).float().mean().item()
+ else:
+ out["rescue_mean_on_clean_fail"] = float("nan")
+ out["passK_on_clean_fail"] = float("nan")
+ return out
+
+
+def write_summary(path: Path, rows: list[dict[str, Any]]) -> None:
+ keys = list(rows[0])
+ for row in rows[1:]:
+ for key in row:
+ if key not in keys:
+ keys.append(key)
+ with path.open("w", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=keys)
+ writer.writeheader()
+ writer.writerows(rows)
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--ckpt-root", required=True)
+ parser.add_argument("--ckpt-name", required=True)
+ parser.add_argument("--label", required=True)
+ parser.add_argument("--n-samples", type=int, default=3000)
+ parser.add_argument("--batch-size", type=int, default=32)
+ parser.add_argument("--rollouts", type=int, default=8)
+ parser.add_argument("--sigmas", default="0,0.001,0.003,0.01,0.03,0.1")
+ parser.add_argument("--perturb-afters", default="0,4,8,12,15")
+ parser.add_argument("--perturb", choices=["h", "l", "both"], default="both")
+ parser.add_argument("--noise-distribution", choices=["gaussian", "uniform"], default="gaussian")
+ parser.add_argument("--seed", type=int, default=20260606)
+ parser.add_argument("--out-prefix", required=True)
+ args = parser.parse_args()
+
+ device = "cuda"
+ sigmas = parse_float_list(args.sigmas)
+ perturb_afters = parse_int_list(args.perturb_afters)
+ torch.manual_seed(args.seed)
+ generator = torch.Generator(device=device).manual_seed(args.seed + 101)
+
+ model, cfg, data_path = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ samples = load_test_samples(data_path, args.n_samples, args.seed)
+ n = len(samples["inputs"])
+ print(
+ f"[run] label={args.label} n={n} rollouts={args.rollouts} batch={args.batch_size} "
+ f"afters={perturb_afters} sigmas={sigmas}",
+ flush=True,
+ )
+
+ rows: list[dict[str, Any]] = []
+ all_exact = []
+ all_token = []
+ if not any(is_zero(s) for s in sigmas):
+ sigmas = [0.0] + sigmas
+
+ for after in perturb_afters:
+ clean_exact = None
+ pending: list[tuple[dict[str, Any], torch.Tensor, torch.Tensor]] = []
+ for sigma in sigmas:
+ exact_parts = []
+ token_parts = []
+ for start in range(0, n, args.batch_size):
+ end = min(start + args.batch_size, n)
+ batch = batch_slice(samples, start, end, device)
+ exact, token_acc = eval_condition(
+ model, batch, sigma, args.rollouts, after, args.perturb, args.noise_distribution, generator
+ )
+ exact_parts.append(exact.cpu())
+ token_parts.append(token_acc.cpu())
+ if end == n or (end // args.batch_size) % 10 == 0:
+ print(f" after={after} sigma={sigma:g} [{end}/{n}]", flush=True)
+ exact_all = torch.cat(exact_parts, dim=0)
+ token_all = torch.cat(token_parts, dim=0)
+ if is_zero(sigma):
+ clean_exact = exact_all[:, 0].clone()
+ print(f" after={after} clean grouping done clean_acc={clean_exact.float().mean().item():.4f}", flush=True)
+ if clean_exact is None:
+ pending.append(({"sigma": sigma}, exact_all, token_all))
+ continue
+ row: dict[str, Any] = {
+ "label": args.label,
+ "perturb_after": after,
+ "sigma": sigma,
+ "n_samples": n,
+ "rollouts": args.rollouts,
+ "ckpt_root": str(Path(args.ckpt_root)),
+ "ckpt_name": args.ckpt_name,
+ "perturb": args.perturb,
+ "noise_distribution": args.noise_distribution,
+ **summarize(exact_all, token_all, clean_exact),
+ }
+ rows.append(row)
+ all_exact.append(exact_all.numpy())
+ all_token.append(token_all.numpy())
+ print(
+ f" after={after} sigma={sigma:g} mean={row['mean_rollout_exact']:.4f} "
+ f"retain={row['retain_mean_on_clean_success']:.4f} "
+ f"rescue={row['rescue_mean_on_clean_fail']:.4f}",
+ flush=True,
+ )
+ if pending:
+ raise RuntimeError("sigma=0 must be evaluated before nonzero sigmas")
+
+ out_prefix = Path(args.out_prefix)
+ out_prefix.parent.mkdir(parents=True, exist_ok=True)
+ write_summary(out_prefix.with_suffix(".summary.csv"), rows)
+ meta = {
+ "args": vars(args),
+ "data_path": str(data_path),
+ "config_global_batch_size": cfg.get("global_batch_size"),
+ "sigmas": sigmas,
+ "perturb_afters": perturb_afters,
+ "n_samples": n,
+ }
+ out_prefix.with_suffix(".meta.json").write_text(json.dumps(meta, indent=2, sort_keys=True))
+ np.savez_compressed(
+ out_prefix.with_suffix(".npz"),
+ idx=samples["idx"],
+ sigmas=np.asarray(sigmas, dtype=np.float32),
+ perturb_afters=np.asarray(perturb_afters, dtype=np.int32),
+ exact=np.stack(all_exact, axis=0),
+ token_acc=np.stack(all_token, axis=0),
+ meta_json=np.asarray(json.dumps(meta, sort_keys=True)),
+ )
+ print(f"[done] {out_prefix}.summary.csv", flush=True)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/late_perturb_robustness/plots/final_gpu_status.txt b/late_perturb_robustness/plots/final_gpu_status.txt
new file mode 100644
index 0000000..07bebea
--- /dev/null
+++ b/late_perturb_robustness/plots/final_gpu_status.txt
@@ -0,0 +1,4 @@
+0, 13, 49140, 0
+1, 13, 49140, 0
+2, 45494, 49140, 95
+3, 13, 49140, 0
diff --git a/late_perturb_robustness/plots/late_perturb_robustness_combined.csv b/late_perturb_robustness/plots/late_perturb_robustness_combined.csv
new file mode 100644
index 0000000..144b4c5
--- /dev/null
+++ b/late_perturb_robustness/plots/late_perturb_robustness_combined.csv
@@ -0,0 +1,91 @@
+label,perturb_after,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,zero_frac,full_frac,clean_acc,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail
+trm_baseline_best,0,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,0,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9504892826080322,0.9483333230018616,0.7639999985694885,6.935999870300293,2.303454875946045,0.05166666582226753,0.7639999985694885,0.8669999837875366,0.9578046798706055,0.875048041343689,0.2750626504421234,0.6315789222717285
+trm_baseline_best,0,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8684166669845581,0.9509722590446472,0.9456666707992554,0.7683333158493042,6.947333335876465,2.3008460998535156,0.05433333292603493,0.7683333158493042,0.8669999837875366,0.9602556824684143,0.8800461292266846,0.2697368562221527,0.6090225577354431
+trm_baseline_best,0,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9504145383834839,0.9463333487510681,0.765333354473114,6.935999870300293,2.3080809116363525,0.05366666615009308,0.765333354473114,0.8669999837875366,0.9579008221626282,0.8785082697868347,0.2744360864162445,0.6190476417541504
+trm_baseline_best,0,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8675000071525574,0.9505621790885925,0.9496666789054871,0.7616666555404663,6.940000057220459,2.2861320972442627,0.050333332270383835,0.7616666555404663,0.8669999837875366,0.9567474126815796,0.8723567724227905,0.2857142984867096,0.6390977501869202
+trm_baseline_best,0,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8682083487510681,0.9508457183837891,0.9506666660308838,0.7593333125114441,6.945666790008545,2.265108108520508,0.04933333396911621,0.7593333125114441,0.8669999837875366,0.956122636795044,0.8696655035018921,0.2951127886772156,0.6441102623939514
+trm_baseline_best,4,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,4,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8666250109672546,0.9501558542251587,0.9350000023841858,0.8009999990463257,6.933000087738037,2.4135406017303467,0.06499999761581421,0.8009999990463257,0.8669999837875366,0.9687620401382446,0.9211841821670532,0.20081453025341034,0.5263158082962036
+trm_baseline_best,4,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8679583072662354,0.950590968132019,0.9359999895095825,0.7996666431427002,6.943666458129883,2.404542922973633,0.06400000303983688,0.7996666431427002,0.8669999837875366,0.970588207244873,0.9200307726860046,0.19893483817577362,0.5338345766067505
+trm_baseline_best,4,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8680416941642761,0.9508261680603027,0.9399999976158142,0.8013333082199097,6.944333553314209,2.3910739421844482,0.05999999865889549,0.8013333082199097,0.8669999837875366,0.9685697555541992,0.9227220416069031,0.21271929144859314,0.5664160251617432
+trm_baseline_best,4,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8660833239555359,0.9499126076698303,0.934333324432373,0.7953333258628845,6.928666591644287,2.413485288619995,0.06566666811704636,0.7953333258628845,0.8669999837875366,0.967608630657196,0.915032684803009,0.20426064729690552,0.523809552192688
+trm_baseline_best,4,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8659999966621399,0.9500890970230103,0.9403333067893982,0.7973333597183228,6.927999973297119,2.399336576461792,0.05966666713356972,0.7973333597183228,0.8669999837875366,0.9667435884475708,0.9161860942840576,0.20927318930625916,0.5664160251617432
+trm_baseline_best,8,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,8,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669583201408386,0.9507402181625366,0.9226666688919067,0.8306666612625122,6.935666561126709,2.5147950649261475,0.07733333110809326,0.8306666612625122,0.8669999837875366,0.9784217476844788,0.9573240876197815,0.14035087823867798,0.4436090290546417
+trm_baseline_best,8,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8678333163261414,0.9507989287376404,0.9246666431427002,0.831333339214325,6.942666530609131,2.5057361125946045,0.07533333450555801,0.831333339214325,0.8669999837875366,0.9795271158218384,0.9588619470596313,0.13972431421279907,0.451127827167511
+trm_baseline_best,8,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8676666617393494,0.9505447745323181,0.9229999780654907,0.8306666612625122,6.941333293914795,2.51287841796875,0.07699999958276749,0.8306666612625122,0.8669999837875366,0.980199933052063,0.9573240876197815,0.13408520817756653,0.4411027431488037
+trm_baseline_best,8,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8682083487510681,0.950954258441925,0.9236666560173035,0.8286666870117188,6.945666790008545,2.4972081184387207,0.07633333653211594,0.8286666870117188,0.8669999837875366,0.9788542985916138,0.9554017782211304,0.14692983031272888,0.448621541261673
+trm_baseline_best,8,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8687083125114441,0.9511321783065796,0.9286666512489319,0.824999988079071,6.949666500091553,2.46991229057312,0.07133333384990692,0.824999988079071,0.8669999837875366,0.9784698486328125,0.9504036903381348,0.15319548547267914,0.48120301961898804
+trm_baseline_best,12,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,12,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8673333525657654,0.9506542682647705,0.8926666378974915,0.856333315372467,6.938666820526123,2.641887903213501,0.10733333230018616,0.856333315372467,0.8669999837875366,0.9928392767906189,0.9873125553131104,0.049185462296009064,0.2005012482404709
+trm_baseline_best,12,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8668749928474426,0.9505452513694763,0.8896666765213013,0.8560000061988831,6.934999942779541,2.651435136795044,0.11033333092927933,0.8560000061988831,0.8669999837875366,0.9924548268318176,0.9869281053543091,0.048245612531900406,0.18045112490653992
+trm_baseline_best,12,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669583201408386,0.9506661891937256,0.8913333415985107,0.8556666374206543,6.935666561126709,2.6479289531707764,0.10866666585206985,0.8556666374206543,0.8669999837875366,0.991782009601593,0.9869281053543091,0.05325814709067345,0.19548872113227844
+trm_baseline_best,12,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8671666383743286,0.9506459832191467,0.8989999890327454,0.8539999723434448,6.937333106994629,2.629715919494629,0.10100000351667404,0.8539999723434448,0.8669999837875366,0.991733968257904,0.985005795955658,0.055137842893600464,0.24561403691768646
+trm_baseline_best,12,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8677916526794434,0.950908362865448,0.9043333530426025,0.8516666889190674,6.942333221435547,2.602372169494629,0.09566666930913925,0.8516666889190674,0.8669999837875366,0.9901480078697205,0.9823144674301147,0.07017543911933899,0.2882205545902252
+trm_baseline_best,15,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,15,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9506445527076721,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,15,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9506717920303345,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,15,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9506852030754089,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,15,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8670833110809326,0.9507201910018921,0.8673333525657654,0.8669999837875366,6.936666488647461,2.7151405811309814,0.1326666623353958,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0006265664123930037,0.002506265649572015
+trm_baseline_best,15,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8670833110809326,0.9508404731750488,0.8686666488647461,0.8656666874885559,6.936666488647461,2.7110862731933594,0.1313333362340927,0.8656666874885559,0.8669999837875366,0.9997597336769104,0.9984621405601501,0.0021929824724793434,0.01253132801502943
+trm_multi4_best,0,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0
+trm_multi4_best,0,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8957083225250244,0.9601898789405823,0.9583333134651184,0.8119999766349792,7.165666580200195,2.081879138946533,0.0416666679084301,0.8119999766349792,0.8939999938011169,0.9684004187583923,0.9030573964118958,0.28262579441070557,0.6194968819618225
+trm_multi4_best,0,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8957916498184204,0.9602490067481995,0.9606666564941406,0.8130000233650208,7.166333198547363,2.062037706375122,0.03933333232998848,0.8130000233650208,0.8939999938011169,0.9666293859481812,0.9038031101226807,0.2983490526676178,0.6352201104164124
+trm_multi4_best,0,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8947083353996277,0.9597618579864502,0.9599999785423279,0.8063333630561829,7.1576666831970215,2.0646567344665527,0.03999999910593033,0.8063333630561829,0.8939999938011169,0.9667226076126099,0.8963460326194763,0.28734275698661804,0.6320754885673523
+trm_multi4_best,0,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8953750133514404,0.9600813984870911,0.9603333473205566,0.8009999990463257,7.163000106811523,2.049495220184326,0.03966666758060455,0.8009999990463257,0.8939999938011169,0.9666759967803955,0.8918717503547668,0.29402515292167664,0.6446540951728821
+trm_multi4_best,0,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8950416445732117,0.9601003527641296,0.9629999995231628,0.7983333468437195,7.160333156585693,2.0425376892089844,0.03700000047683716,0.7983333468437195,0.8939999938011169,0.9635066986083984,0.8851603269577026,0.3176100552082062,0.6666666865348816
+trm_multi4_best,4,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0
+trm_multi4_best,4,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8946250081062317,0.9599500894546509,0.9473333358764648,0.8433333039283752,7.1570000648498535,2.1906659603118896,0.052666667848825455,0.8433333039283752,0.8939999938011169,0.9753448963165283,0.9392244815826416,0.21383647620677948,0.5251572132110596
+trm_multi4_best,4,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8962500095367432,0.9606265425682068,0.9523333311080933,0.843999981880188,7.170000076293945,2.1701996326446533,0.04766666516661644,0.843999981880188,0.8939999938011169,0.976416826248169,0.9410887360572815,0.22012577950954437,0.5691823959350586
+trm_multi4_best,4,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8963750004768372,0.9604902863502502,0.9483333230018616,0.8446666598320007,7.171000003814697,2.171119213104248,0.05166666582226753,0.8446666598320007,0.8939999938011169,0.9760906100273132,0.9410887360572815,0.22405660152435303,0.5314465165138245
+trm_multi4_best,4,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8968333601951599,0.9606810808181763,0.9506666660308838,0.8403333425521851,7.174666881561279,2.1587400436401367,0.04933333396911621,0.8403333425521851,0.8939999938011169,0.9759973883628845,0.9377330541610718,0.2291666716337204,0.544025182723999
+trm_multi4_best,4,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8944583535194397,0.9597752094268799,0.9526666402816772,0.8326666951179504,7.155666828155518,2.1656641960144043,0.047333333641290665,0.8326666951179504,0.8939999938011169,0.9737602472305298,0.9284116625785828,0.22562892735004425,0.5628930926322937
+trm_multi4_best,8,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0
+trm_multi4_best,8,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8962083458900452,0.9605055451393127,0.9399999976158142,0.8696666955947876,7.169666767120361,2.2701423168182373,0.05999999865889549,0.8696666955947876,0.8939999938011169,0.9840604066848755,0.9709172248840332,0.15526729822158813,0.45597484707832336
+trm_multi4_best,8,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8970000147819519,0.9608615636825562,0.9416666626930237,0.8696666955947876,7.176000118255615,2.254556179046631,0.05833333358168602,0.8696666955947876,0.8939999938011169,0.9844798445701599,0.9709172248840332,0.1591981202363968,0.46855345368385315
+trm_multi4_best,8,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8955416679382324,0.9602766633033752,0.9399999976158142,0.8673333525657654,7.164333343505859,2.2744951248168945,0.05999999865889549,0.8673333525657654,0.8939999938011169,0.9835943579673767,0.9690529704093933,0.1529088020324707,0.45597484707832336
+trm_multi4_best,8,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8949999809265137,0.9600179195404053,0.9436666369438171,0.8650000095367432,7.159999847412109,2.2588493824005127,0.056333333253860474,0.8650000095367432,0.8939999938011169,0.9836875200271606,0.9668158292770386,0.14701257646083832,0.47484275698661804
+trm_multi4_best,8,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8958333134651184,0.9602567553520203,0.9449999928474426,0.8643333315849304,7.166666507720947,2.251863479614258,0.054999999701976776,0.8643333315849304,0.8939999938011169,0.9831282496452332,0.9656972289085388,0.1595911979675293,0.49685534834861755
+trm_multi4_best,12,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0
+trm_multi4_best,12,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8956666588783264,0.9604059457778931,0.9129999876022339,0.887333333492279,7.165333271026611,2.3898391723632812,0.08699999749660492,0.887333333492279,0.8939999938011169,0.9954324960708618,0.9925428628921509,0.05424528196454048,0.19182389974594116
+trm_multi4_best,12,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8966249823570251,0.9608678221702576,0.9193333387374878,0.887333333492279,7.172999858856201,2.3672215938568115,0.0806666687130928,0.887333333492279,0.8939999938011169,0.995712161064148,0.9925428628921509,0.06092767417430878,0.24528302252292633
+trm_multi4_best,12,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8974583148956299,0.9608935713768005,0.9213333129882812,0.887666642665863,7.179666519165039,2.3568453788757324,0.07866666465997696,0.887666642665863,0.8939999938011169,0.9956655502319336,0.9925428628921509,0.069182388484478,0.26729559898376465
+trm_multi4_best,12,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8966249823570251,0.9606748819351196,0.9223333597183228,0.8870000243186951,7.172999858856201,2.3598878383636475,0.07766667008399963,0.8870000243186951,0.8939999938011169,0.9950596690177917,0.9921700358390808,0.06643081456422806,0.276729553937912
+trm_multi4_best,12,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8972083330154419,0.9610205292701721,0.9259999990463257,0.8849999904632568,7.177666664123535,2.338397264480591,0.07400000095367432,0.8849999904632568,0.8939999938011169,0.9935682415962219,0.9899328947067261,0.08451257646083832,0.31446540355682373
+trm_multi4_best,15,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0
+trm_multi4_best,15,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.9598419666290283,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0
+trm_multi4_best,15,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.9597874879837036,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0
+trm_multi4_best,15,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.9598518013954163,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0
+trm_multi4_best,15,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8941249847412109,0.9598509073257446,0.8946666717529297,0.8939999938011169,7.1529998779296875,2.4601335525512695,0.10533333569765091,0.8939999938011169,0.8939999938011169,1.0,1.0,0.001179245300590992,0.006289307959377766
+trm_multi4_best,15,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8942499756813049,0.9599588513374329,0.8956666588783264,0.893666684627533,7.1539998054504395,2.456070899963379,0.10433333367109299,0.893666684627533,0.8939999938011169,0.9999067783355713,0.9996271729469299,0.003144653979688883,0.015723271295428276
+trm_multi4_final,0,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0
+trm_multi4_final,0,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8319166898727417,0.9341394901275635,0.9086666703224182,0.7463333606719971,6.655333518981934,2.654029369354248,0.09133332967758179,0.7463333606719971,0.8286666870117188,0.9579645991325378,0.8942075371742249,0.22227627038955688,0.4902723729610443
+trm_multi4_final,0,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8315416574478149,0.93404221534729,0.9143333435058594,0.7419999837875366,6.6523332595825195,2.6464054584503174,0.08566666394472122,0.7419999837875366,0.8286666870117188,0.9564561247825623,0.8897827863693237,0.22738327085971832,0.5252918004989624
+trm_multi4_final,0,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8301666378974915,0.9333322644233704,0.9143333435058594,0.7356666922569275,6.641333103179932,2.639954090118408,0.08566666394472122,0.7356666922569275,0.8286666870117188,0.9536906480789185,0.879324197769165,0.23273345828056335,0.5272373557090759
+trm_multi4_final,0,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8319166898727417,0.934055507183075,0.9213333129882812,0.7239999771118164,6.655333518981934,2.594970464706421,0.07866666465997696,0.7239999771118164,0.8286666870117188,0.9528358578681946,0.8688656687736511,0.2470817118883133,0.5661478638648987
+trm_multi4_final,0,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8354583382606506,0.9354007244110107,0.9269999861717224,0.7246666550636292,6.683666706085205,2.5519142150878906,0.0729999989271164,0.7246666550636292,0.8286666870117188,0.9477574229240417,0.861625075340271,0.29231518507003784,0.5992217659950256
+trm_multi4_final,4,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0
+trm_multi4_final,4,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8335833549499512,0.9348888397216797,0.8993333578109741,0.7756666541099548,6.668666839599609,2.732560157775879,0.10066666454076767,0.7756666541099548,0.8286666870117188,0.970585286617279,0.9336283206939697,0.17096303403377533,0.43968871235847473
+trm_multi4_final,4,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8352500200271606,0.9354547262191772,0.9020000100135803,0.7749999761581421,6.682000160217285,2.7150585651397705,0.09799999743700027,0.7749999761581421,0.8286666870117188,0.971238911151886,0.9328238368034363,0.17752918601036072,0.4494163393974304
+trm_multi4_final,4,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8327916860580444,0.9343785643577576,0.8989999890327454,0.7720000147819519,6.6623334884643555,2.7240254878997803,0.10100000351667404,0.7720000147819519,0.8286666870117188,0.9690265655517578,0.9292035102844238,0.17388132214546204,0.4319066107273102
+trm_multi4_final,4,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8316666483879089,0.9340242743492126,0.8986666798591614,0.7730000019073486,6.6533331871032715,2.7423510551452637,0.10133333504199982,0.7730000019073486,0.8286666870117188,0.9677695035934448,0.9267899990081787,0.17339494824409485,0.4319066107273102
+trm_multi4_final,4,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8324583172798157,0.9344562888145447,0.8989999890327454,0.7680000066757202,6.659666538238525,2.7232038974761963,0.10100000351667404,0.7680000066757202,0.8286666870117188,0.9691271185874939,0.9239742755889893,0.1714494228363037,0.4319066107273102
+trm_multi4_final,8,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0
+trm_multi4_final,8,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8332499861717224,0.934550940990448,0.8849999904632568,0.8013333082199097,6.665999889373779,2.8155124187469482,0.11500000208616257,0.8013333082199097,0.8286666870117188,0.9811444282531738,0.9662107825279236,0.11794747412204742,0.344357967376709
+trm_multi4_final,8,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8323749899864197,0.9341363906860352,0.8830000162124634,0.8016666769981384,6.658999919891357,2.8219470977783203,0.11699999868869781,0.8016666769981384,0.8286666870117188,0.9805410504341125,0.967015266418457,0.1157587543129921,0.3346303403377533
+trm_multi4_final,8,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8352916836738586,0.9354629516601562,0.8856666684150696,0.8016666769981384,6.682333469390869,2.7959651947021484,0.11433333158493042,0.8016666769981384,0.8286666870117188,0.9822506308555603,0.9666130542755127,0.12451361864805222,0.344357967376709
+trm_multi4_final,8,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8335000276565552,0.9347721338272095,0.890333354473114,0.7993333339691162,6.668000221252441,2.803647994995117,0.10966666787862778,0.7993333339691162,0.8286666870117188,0.9798873662948608,0.9625905156135559,0.1254863739013672,0.3696497976779938
+trm_multi4_final,8,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8331666588783264,0.9347808957099915,0.8916666507720947,0.7960000038146973,6.665333271026611,2.7949953079223633,0.10833333432674408,0.7960000038146973,0.8286666870117188,0.9781777858734131,0.9593724608421326,0.13180933892726898,0.38910505175590515
+trm_multi4_final,12,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0
+trm_multi4_final,12,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8307916522026062,0.9338425993919373,0.8569999933242798,0.8209999799728394,6.64633321762085,2.936537742614746,0.14300000667572021,0.8209999799728394,0.8286666870117188,0.9936142563819885,0.9907481670379639,0.043287936598062515,0.1750972718000412
+trm_multi4_final,12,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8307499885559082,0.9337176084518433,0.8603333234786987,0.8209999799728394,6.645999908447266,2.9328060150146484,0.13966666162014008,0.8209999799728394,0.8286666870117188,0.9935137033462524,0.9907481670379639,0.04353112727403641,0.18871594965457916
+trm_multi4_final,12,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8307499885559082,0.9339007139205933,0.8583333492279053,0.8203333616256714,6.645999908447266,2.93530535697937,0.14166666567325592,0.8203333616256714,0.8286666870117188,0.993463397026062,0.9899436831474304,0.0437743179500103,0.17898832261562347
+trm_multi4_final,12,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.831166684627533,0.9338744282722473,0.8616666793823242,0.8190000057220459,6.649333477020264,2.921477794647217,0.13833333551883698,0.8190000057220459,0.8286666870117188,0.9930108785629272,0.9883346557617188,0.04839494079351425,0.20428015291690826
+trm_multi4_final,12,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8309583067893982,0.9341686964035034,0.8683333396911621,0.8149999976158142,6.6476664543151855,2.9015274047851562,0.1316666603088379,0.8149999976158142,0.8286666870117188,0.9908487796783447,0.9835076332092285,0.057636186480522156,0.24319066107273102
+trm_multi4_final,15,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0
+trm_multi4_final,15,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9330503940582275,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0
+trm_multi4_final,15,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8287083506584167,0.9331271052360535,0.8289999961853027,0.8286666870117188,6.629666805267334,3.0137219429016113,0.17100000381469727,0.8286666870117188,0.8286666870117188,1.0,1.0,0.00024319066142197698,0.0019455252913758159
+trm_multi4_final,15,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286250233650208,0.9330225586891174,0.8289999961853027,0.828000009059906,6.629000186920166,3.0135293006896973,0.17100000381469727,0.828000009059906,0.8286666870117188,0.9998994469642639,0.9991955161094666,0.00024319066142197698,0.0019455252913758159
+trm_multi4_final,15,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8287500143051147,0.9331192970275879,0.8306666612625122,0.8276666402816772,6.630000114440918,3.009169340133667,0.1693333387374878,0.8276666402816772,0.8286666870117188,0.999698281288147,0.9987932443618774,0.0019455252913758159,0.011673151515424252
+trm_multi4_final,15,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8292916417121887,0.9331137537956238,0.8330000042915344,0.8276666402816772,6.63433313369751,3.0008811950683594,0.16699999570846558,0.8276666402816772,0.8286666870117188,0.9995977282524109,0.9987932443618774,0.0055933850817382336,0.02529182843863964
diff --git a/late_perturb_robustness/smoke_baseline.summary.csv b/late_perturb_robustness/smoke_baseline.summary.csv
new file mode 100644
index 0000000..ec4de26
--- /dev/null
+++ b/late_perturb_robustness/smoke_baseline.summary.csv
@@ -0,0 +1,5 @@
+label,perturb_after,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,zero_frac,full_frac,clean_acc,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail
+smoke_baseline,0,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9614197611808777,0.875,0.875,1.75,0.6614378094673157,0.125,0.875,0.875,1.0,1.0,0.0,0.0
+smoke_baseline,0,0.01,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.9375,0.9834104776382446,1.0,0.875,1.875,0.33071890473365784,0.0,0.875,0.875,0.9642857313156128,0.9285714030265808,0.75,1.0
+smoke_baseline,8,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9614197611808777,0.875,0.875,1.75,0.6614378094673157,0.125,0.875,0.875,1.0,1.0,0.0,0.0
+smoke_baseline,8,0.01,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.90625,0.96875,0.9375,0.875,1.8125,0.5266343355178833,0.0625,0.875,0.875,1.0,1.0,0.25,0.5
diff --git a/late_perturb_robustness/smoke_baseline_expandedclean.summary.csv b/late_perturb_robustness/smoke_baseline_expandedclean.summary.csv
new file mode 100644
index 0000000..ffe22cf
--- /dev/null
+++ b/late_perturb_robustness/smoke_baseline_expandedclean.summary.csv
@@ -0,0 +1,5 @@
+label,perturb_after,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,zero_frac,full_frac,clean_acc,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail
+smoke_baseline_expandedclean,0,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9614197611808777,0.875,0.875,1.75,0.6614378094673157,0.125,0.875,0.875,1.0,1.0,0.0,0.0
+smoke_baseline_expandedclean,0,0.01,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.9375,0.9834104776382446,1.0,0.875,1.875,0.33071890473365784,0.0,0.875,0.875,0.9642857313156128,0.9285714030265808,0.75,1.0
+smoke_baseline_expandedclean,8,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9614197611808777,0.875,0.875,1.75,0.6614378094673157,0.125,0.875,0.875,1.0,1.0,0.0,0.0
+smoke_baseline_expandedclean,8,0.01,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.90625,0.96875,0.9375,0.875,1.8125,0.5266343355178833,0.0625,0.875,0.875,1.0,1.0,0.25,0.5
diff --git a/late_perturb_robustness/smoke_baseline_fastclean.summary.csv b/late_perturb_robustness/smoke_baseline_fastclean.summary.csv
new file mode 100644
index 0000000..fb4e3a6
--- /dev/null
+++ b/late_perturb_robustness/smoke_baseline_fastclean.summary.csv
@@ -0,0 +1,5 @@
+label,perturb_after,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,zero_frac,full_frac,clean_acc,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail
+smoke_baseline_fastclean,0,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9614197611808777,0.875,0.875,1.75,0.6614378094673157,0.125,0.875,0.9375,0.9333333373069763,0.9333333373069763,0.0,0.0
+smoke_baseline_fastclean,0,0.01,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.9375,0.9834104776382446,1.0,0.875,1.875,0.33071890473365784,0.0,0.875,0.9375,0.9333333373069763,0.8666666746139526,1.0,1.0
+smoke_baseline_fastclean,8,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9614197611808777,0.875,0.875,1.75,0.6614378094673157,0.125,0.875,0.9375,0.9333333373069763,0.9333333373069763,0.0,0.0
+smoke_baseline_fastclean,8,0.01,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.90625,0.96875,0.9375,0.875,1.8125,0.5266343355178833,0.0625,0.875,0.9375,0.9666666388511658,0.9333333373069763,0.0,0.0
diff --git a/late_perturb_robustness/smoke_plots/late_perturb_robustness_combined.csv b/late_perturb_robustness/smoke_plots/late_perturb_robustness_combined.csv
new file mode 100644
index 0000000..ec4de26
--- /dev/null
+++ b/late_perturb_robustness/smoke_plots/late_perturb_robustness_combined.csv
@@ -0,0 +1,5 @@
+label,perturb_after,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,zero_frac,full_frac,clean_acc,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail
+smoke_baseline,0,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9614197611808777,0.875,0.875,1.75,0.6614378094673157,0.125,0.875,0.875,1.0,1.0,0.0,0.0
+smoke_baseline,0,0.01,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.9375,0.9834104776382446,1.0,0.875,1.875,0.33071890473365784,0.0,0.875,0.875,0.9642857313156128,0.9285714030265808,0.75,1.0
+smoke_baseline,8,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9614197611808777,0.875,0.875,1.75,0.6614378094673157,0.125,0.875,0.875,1.0,1.0,0.0,0.0
+smoke_baseline,8,0.01,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.90625,0.96875,0.9375,0.875,1.8125,0.5266343355178833,0.0625,0.875,0.875,1.0,1.0,0.25,0.5
diff --git a/late_perturb_robustness/trm_baseline_best_step58590_n3000_k8_late.summary.csv b/late_perturb_robustness/trm_baseline_best_step58590_n3000_k8_late.summary.csv
new file mode 100644
index 0000000..9f376be
--- /dev/null
+++ b/late_perturb_robustness/trm_baseline_best_step58590_n3000_k8_late.summary.csv
@@ -0,0 +1,31 @@
+label,perturb_after,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,zero_frac,full_frac,clean_acc,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail
+trm_baseline_best,0,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,0,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9504892826080322,0.9483333230018616,0.7639999985694885,6.935999870300293,2.303454875946045,0.05166666582226753,0.7639999985694885,0.8669999837875366,0.9578046798706055,0.875048041343689,0.2750626504421234,0.6315789222717285
+trm_baseline_best,0,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8684166669845581,0.9509722590446472,0.9456666707992554,0.7683333158493042,6.947333335876465,2.3008460998535156,0.05433333292603493,0.7683333158493042,0.8669999837875366,0.9602556824684143,0.8800461292266846,0.2697368562221527,0.6090225577354431
+trm_baseline_best,0,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9504145383834839,0.9463333487510681,0.765333354473114,6.935999870300293,2.3080809116363525,0.05366666615009308,0.765333354473114,0.8669999837875366,0.9579008221626282,0.8785082697868347,0.2744360864162445,0.6190476417541504
+trm_baseline_best,0,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8675000071525574,0.9505621790885925,0.9496666789054871,0.7616666555404663,6.940000057220459,2.2861320972442627,0.050333332270383835,0.7616666555404663,0.8669999837875366,0.9567474126815796,0.8723567724227905,0.2857142984867096,0.6390977501869202
+trm_baseline_best,0,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8682083487510681,0.9508457183837891,0.9506666660308838,0.7593333125114441,6.945666790008545,2.265108108520508,0.04933333396911621,0.7593333125114441,0.8669999837875366,0.956122636795044,0.8696655035018921,0.2951127886772156,0.6441102623939514
+trm_baseline_best,4,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,4,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8666250109672546,0.9501558542251587,0.9350000023841858,0.8009999990463257,6.933000087738037,2.4135406017303467,0.06499999761581421,0.8009999990463257,0.8669999837875366,0.9687620401382446,0.9211841821670532,0.20081453025341034,0.5263158082962036
+trm_baseline_best,4,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8679583072662354,0.950590968132019,0.9359999895095825,0.7996666431427002,6.943666458129883,2.404542922973633,0.06400000303983688,0.7996666431427002,0.8669999837875366,0.970588207244873,0.9200307726860046,0.19893483817577362,0.5338345766067505
+trm_baseline_best,4,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8680416941642761,0.9508261680603027,0.9399999976158142,0.8013333082199097,6.944333553314209,2.3910739421844482,0.05999999865889549,0.8013333082199097,0.8669999837875366,0.9685697555541992,0.9227220416069031,0.21271929144859314,0.5664160251617432
+trm_baseline_best,4,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8660833239555359,0.9499126076698303,0.934333324432373,0.7953333258628845,6.928666591644287,2.413485288619995,0.06566666811704636,0.7953333258628845,0.8669999837875366,0.967608630657196,0.915032684803009,0.20426064729690552,0.523809552192688
+trm_baseline_best,4,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8659999966621399,0.9500890970230103,0.9403333067893982,0.7973333597183228,6.927999973297119,2.399336576461792,0.05966666713356972,0.7973333597183228,0.8669999837875366,0.9667435884475708,0.9161860942840576,0.20927318930625916,0.5664160251617432
+trm_baseline_best,8,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,8,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669583201408386,0.9507402181625366,0.9226666688919067,0.8306666612625122,6.935666561126709,2.5147950649261475,0.07733333110809326,0.8306666612625122,0.8669999837875366,0.9784217476844788,0.9573240876197815,0.14035087823867798,0.4436090290546417
+trm_baseline_best,8,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8678333163261414,0.9507989287376404,0.9246666431427002,0.831333339214325,6.942666530609131,2.5057361125946045,0.07533333450555801,0.831333339214325,0.8669999837875366,0.9795271158218384,0.9588619470596313,0.13972431421279907,0.451127827167511
+trm_baseline_best,8,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8676666617393494,0.9505447745323181,0.9229999780654907,0.8306666612625122,6.941333293914795,2.51287841796875,0.07699999958276749,0.8306666612625122,0.8669999837875366,0.980199933052063,0.9573240876197815,0.13408520817756653,0.4411027431488037
+trm_baseline_best,8,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8682083487510681,0.950954258441925,0.9236666560173035,0.8286666870117188,6.945666790008545,2.4972081184387207,0.07633333653211594,0.8286666870117188,0.8669999837875366,0.9788542985916138,0.9554017782211304,0.14692983031272888,0.448621541261673
+trm_baseline_best,8,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8687083125114441,0.9511321783065796,0.9286666512489319,0.824999988079071,6.949666500091553,2.46991229057312,0.07133333384990692,0.824999988079071,0.8669999837875366,0.9784698486328125,0.9504036903381348,0.15319548547267914,0.48120301961898804
+trm_baseline_best,12,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,12,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8673333525657654,0.9506542682647705,0.8926666378974915,0.856333315372467,6.938666820526123,2.641887903213501,0.10733333230018616,0.856333315372467,0.8669999837875366,0.9928392767906189,0.9873125553131104,0.049185462296009064,0.2005012482404709
+trm_baseline_best,12,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8668749928474426,0.9505452513694763,0.8896666765213013,0.8560000061988831,6.934999942779541,2.651435136795044,0.11033333092927933,0.8560000061988831,0.8669999837875366,0.9924548268318176,0.9869281053543091,0.048245612531900406,0.18045112490653992
+trm_baseline_best,12,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669583201408386,0.9506661891937256,0.8913333415985107,0.8556666374206543,6.935666561126709,2.6479289531707764,0.10866666585206985,0.8556666374206543,0.8669999837875366,0.991782009601593,0.9869281053543091,0.05325814709067345,0.19548872113227844
+trm_baseline_best,12,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8671666383743286,0.9506459832191467,0.8989999890327454,0.8539999723434448,6.937333106994629,2.629715919494629,0.10100000351667404,0.8539999723434448,0.8669999837875366,0.991733968257904,0.985005795955658,0.055137842893600464,0.24561403691768646
+trm_baseline_best,12,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8677916526794434,0.950908362865448,0.9043333530426025,0.8516666889190674,6.942333221435547,2.602372169494629,0.09566666930913925,0.8516666889190674,0.8669999837875366,0.9901480078697205,0.9823144674301147,0.07017543911933899,0.2882205545902252
+trm_baseline_best,15,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,15,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9506445527076721,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,15,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9506717920303345,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,15,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9506852030754089,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0
+trm_baseline_best,15,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8670833110809326,0.9507201910018921,0.8673333525657654,0.8669999837875366,6.936666488647461,2.7151405811309814,0.1326666623353958,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0006265664123930037,0.002506265649572015
+trm_baseline_best,15,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8670833110809326,0.9508404731750488,0.8686666488647461,0.8656666874885559,6.936666488647461,2.7110862731933594,0.1313333362340927,0.8656666874885559,0.8669999837875366,0.9997597336769104,0.9984621405601501,0.0021929824724793434,0.01253132801502943
diff --git a/late_perturb_robustness/trm_multi4_best_step35805_n3000_k8_late.summary.csv b/late_perturb_robustness/trm_multi4_best_step35805_n3000_k8_late.summary.csv
new file mode 100644
index 0000000..01a8526
--- /dev/null
+++ b/late_perturb_robustness/trm_multi4_best_step35805_n3000_k8_late.summary.csv
@@ -0,0 +1,31 @@
+label,perturb_after,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,zero_frac,full_frac,clean_acc,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail
+trm_multi4_best,0,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0
+trm_multi4_best,0,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8957083225250244,0.9601898789405823,0.9583333134651184,0.8119999766349792,7.165666580200195,2.081879138946533,0.0416666679084301,0.8119999766349792,0.8939999938011169,0.9684004187583923,0.9030573964118958,0.28262579441070557,0.6194968819618225
+trm_multi4_best,0,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8957916498184204,0.9602490067481995,0.9606666564941406,0.8130000233650208,7.166333198547363,2.062037706375122,0.03933333232998848,0.8130000233650208,0.8939999938011169,0.9666293859481812,0.9038031101226807,0.2983490526676178,0.6352201104164124
+trm_multi4_best,0,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8947083353996277,0.9597618579864502,0.9599999785423279,0.8063333630561829,7.1576666831970215,2.0646567344665527,0.03999999910593033,0.8063333630561829,0.8939999938011169,0.9667226076126099,0.8963460326194763,0.28734275698661804,0.6320754885673523
+trm_multi4_best,0,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8953750133514404,0.9600813984870911,0.9603333473205566,0.8009999990463257,7.163000106811523,2.049495220184326,0.03966666758060455,0.8009999990463257,0.8939999938011169,0.9666759967803955,0.8918717503547668,0.29402515292167664,0.6446540951728821
+trm_multi4_best,0,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8950416445732117,0.9601003527641296,0.9629999995231628,0.7983333468437195,7.160333156585693,2.0425376892089844,0.03700000047683716,0.7983333468437195,0.8939999938011169,0.9635066986083984,0.8851603269577026,0.3176100552082062,0.6666666865348816
+trm_multi4_best,4,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0
+trm_multi4_best,4,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8946250081062317,0.9599500894546509,0.9473333358764648,0.8433333039283752,7.1570000648498535,2.1906659603118896,0.052666667848825455,0.8433333039283752,0.8939999938011169,0.9753448963165283,0.9392244815826416,0.21383647620677948,0.5251572132110596
+trm_multi4_best,4,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8962500095367432,0.9606265425682068,0.9523333311080933,0.843999981880188,7.170000076293945,2.1701996326446533,0.04766666516661644,0.843999981880188,0.8939999938011169,0.976416826248169,0.9410887360572815,0.22012577950954437,0.5691823959350586
+trm_multi4_best,4,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8963750004768372,0.9604902863502502,0.9483333230018616,0.8446666598320007,7.171000003814697,2.171119213104248,0.05166666582226753,0.8446666598320007,0.8939999938011169,0.9760906100273132,0.9410887360572815,0.22405660152435303,0.5314465165138245
+trm_multi4_best,4,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8968333601951599,0.9606810808181763,0.9506666660308838,0.8403333425521851,7.174666881561279,2.1587400436401367,0.04933333396911621,0.8403333425521851,0.8939999938011169,0.9759973883628845,0.9377330541610718,0.2291666716337204,0.544025182723999
+trm_multi4_best,4,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8944583535194397,0.9597752094268799,0.9526666402816772,0.8326666951179504,7.155666828155518,2.1656641960144043,0.047333333641290665,0.8326666951179504,0.8939999938011169,0.9737602472305298,0.9284116625785828,0.22562892735004425,0.5628930926322937
+trm_multi4_best,8,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0
+trm_multi4_best,8,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8962083458900452,0.9605055451393127,0.9399999976158142,0.8696666955947876,7.169666767120361,2.2701423168182373,0.05999999865889549,0.8696666955947876,0.8939999938011169,0.9840604066848755,0.9709172248840332,0.15526729822158813,0.45597484707832336
+trm_multi4_best,8,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8970000147819519,0.9608615636825562,0.9416666626930237,0.8696666955947876,7.176000118255615,2.254556179046631,0.05833333358168602,0.8696666955947876,0.8939999938011169,0.9844798445701599,0.9709172248840332,0.1591981202363968,0.46855345368385315
+trm_multi4_best,8,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8955416679382324,0.9602766633033752,0.9399999976158142,0.8673333525657654,7.164333343505859,2.2744951248168945,0.05999999865889549,0.8673333525657654,0.8939999938011169,0.9835943579673767,0.9690529704093933,0.1529088020324707,0.45597484707832336
+trm_multi4_best,8,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8949999809265137,0.9600179195404053,0.9436666369438171,0.8650000095367432,7.159999847412109,2.2588493824005127,0.056333333253860474,0.8650000095367432,0.8939999938011169,0.9836875200271606,0.9668158292770386,0.14701257646083832,0.47484275698661804
+trm_multi4_best,8,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8958333134651184,0.9602567553520203,0.9449999928474426,0.8643333315849304,7.166666507720947,2.251863479614258,0.054999999701976776,0.8643333315849304,0.8939999938011169,0.9831282496452332,0.9656972289085388,0.1595911979675293,0.49685534834861755
+trm_multi4_best,12,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0
+trm_multi4_best,12,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8956666588783264,0.9604059457778931,0.9129999876022339,0.887333333492279,7.165333271026611,2.3898391723632812,0.08699999749660492,0.887333333492279,0.8939999938011169,0.9954324960708618,0.9925428628921509,0.05424528196454048,0.19182389974594116
+trm_multi4_best,12,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8966249823570251,0.9608678221702576,0.9193333387374878,0.887333333492279,7.172999858856201,2.3672215938568115,0.0806666687130928,0.887333333492279,0.8939999938011169,0.995712161064148,0.9925428628921509,0.06092767417430878,0.24528302252292633
+trm_multi4_best,12,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8974583148956299,0.9608935713768005,0.9213333129882812,0.887666642665863,7.179666519165039,2.3568453788757324,0.07866666465997696,0.887666642665863,0.8939999938011169,0.9956655502319336,0.9925428628921509,0.069182388484478,0.26729559898376465
+trm_multi4_best,12,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8966249823570251,0.9606748819351196,0.9223333597183228,0.8870000243186951,7.172999858856201,2.3598878383636475,0.07766667008399963,0.8870000243186951,0.8939999938011169,0.9950596690177917,0.9921700358390808,0.06643081456422806,0.276729553937912
+trm_multi4_best,12,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8972083330154419,0.9610205292701721,0.9259999990463257,0.8849999904632568,7.177666664123535,2.338397264480591,0.07400000095367432,0.8849999904632568,0.8939999938011169,0.9935682415962219,0.9899328947067261,0.08451257646083832,0.31446540355682373
+trm_multi4_best,15,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0
+trm_multi4_best,15,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.9598419666290283,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0
+trm_multi4_best,15,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.9597874879837036,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0
+trm_multi4_best,15,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.9598518013954163,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0
+trm_multi4_best,15,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8941249847412109,0.9598509073257446,0.8946666717529297,0.8939999938011169,7.1529998779296875,2.4601335525512695,0.10533333569765091,0.8939999938011169,0.8939999938011169,1.0,1.0,0.001179245300590992,0.006289307959377766
+trm_multi4_best,15,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8942499756813049,0.9599588513374329,0.8956666588783264,0.893666684627533,7.1539998054504395,2.456070899963379,0.10433333367109299,0.893666684627533,0.8939999938011169,0.9999067783355713,0.9996271729469299,0.003144653979688883,0.015723271295428276
diff --git a/late_perturb_robustness/trm_multi4_final_step65100_n3000_k8_late.summary.csv b/late_perturb_robustness/trm_multi4_final_step65100_n3000_k8_late.summary.csv
new file mode 100644
index 0000000..b4d80d6
--- /dev/null
+++ b/late_perturb_robustness/trm_multi4_final_step65100_n3000_k8_late.summary.csv
@@ -0,0 +1,31 @@
+label,perturb_after,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,zero_frac,full_frac,clean_acc,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail
+trm_multi4_final,0,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0
+trm_multi4_final,0,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8319166898727417,0.9341394901275635,0.9086666703224182,0.7463333606719971,6.655333518981934,2.654029369354248,0.09133332967758179,0.7463333606719971,0.8286666870117188,0.9579645991325378,0.8942075371742249,0.22227627038955688,0.4902723729610443
+trm_multi4_final,0,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8315416574478149,0.93404221534729,0.9143333435058594,0.7419999837875366,6.6523332595825195,2.6464054584503174,0.08566666394472122,0.7419999837875366,0.8286666870117188,0.9564561247825623,0.8897827863693237,0.22738327085971832,0.5252918004989624
+trm_multi4_final,0,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8301666378974915,0.9333322644233704,0.9143333435058594,0.7356666922569275,6.641333103179932,2.639954090118408,0.08566666394472122,0.7356666922569275,0.8286666870117188,0.9536906480789185,0.879324197769165,0.23273345828056335,0.5272373557090759
+trm_multi4_final,0,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8319166898727417,0.934055507183075,0.9213333129882812,0.7239999771118164,6.655333518981934,2.594970464706421,0.07866666465997696,0.7239999771118164,0.8286666870117188,0.9528358578681946,0.8688656687736511,0.2470817118883133,0.5661478638648987
+trm_multi4_final,0,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8354583382606506,0.9354007244110107,0.9269999861717224,0.7246666550636292,6.683666706085205,2.5519142150878906,0.0729999989271164,0.7246666550636292,0.8286666870117188,0.9477574229240417,0.861625075340271,0.29231518507003784,0.5992217659950256
+trm_multi4_final,4,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0
+trm_multi4_final,4,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8335833549499512,0.9348888397216797,0.8993333578109741,0.7756666541099548,6.668666839599609,2.732560157775879,0.10066666454076767,0.7756666541099548,0.8286666870117188,0.970585286617279,0.9336283206939697,0.17096303403377533,0.43968871235847473
+trm_multi4_final,4,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8352500200271606,0.9354547262191772,0.9020000100135803,0.7749999761581421,6.682000160217285,2.7150585651397705,0.09799999743700027,0.7749999761581421,0.8286666870117188,0.971238911151886,0.9328238368034363,0.17752918601036072,0.4494163393974304
+trm_multi4_final,4,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8327916860580444,0.9343785643577576,0.8989999890327454,0.7720000147819519,6.6623334884643555,2.7240254878997803,0.10100000351667404,0.7720000147819519,0.8286666870117188,0.9690265655517578,0.9292035102844238,0.17388132214546204,0.4319066107273102
+trm_multi4_final,4,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8316666483879089,0.9340242743492126,0.8986666798591614,0.7730000019073486,6.6533331871032715,2.7423510551452637,0.10133333504199982,0.7730000019073486,0.8286666870117188,0.9677695035934448,0.9267899990081787,0.17339494824409485,0.4319066107273102
+trm_multi4_final,4,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8324583172798157,0.9344562888145447,0.8989999890327454,0.7680000066757202,6.659666538238525,2.7232038974761963,0.10100000351667404,0.7680000066757202,0.8286666870117188,0.9691271185874939,0.9239742755889893,0.1714494228363037,0.4319066107273102
+trm_multi4_final,8,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0
+trm_multi4_final,8,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8332499861717224,0.934550940990448,0.8849999904632568,0.8013333082199097,6.665999889373779,2.8155124187469482,0.11500000208616257,0.8013333082199097,0.8286666870117188,0.9811444282531738,0.9662107825279236,0.11794747412204742,0.344357967376709
+trm_multi4_final,8,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8323749899864197,0.9341363906860352,0.8830000162124634,0.8016666769981384,6.658999919891357,2.8219470977783203,0.11699999868869781,0.8016666769981384,0.8286666870117188,0.9805410504341125,0.967015266418457,0.1157587543129921,0.3346303403377533
+trm_multi4_final,8,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8352916836738586,0.9354629516601562,0.8856666684150696,0.8016666769981384,6.682333469390869,2.7959651947021484,0.11433333158493042,0.8016666769981384,0.8286666870117188,0.9822506308555603,0.9666130542755127,0.12451361864805222,0.344357967376709
+trm_multi4_final,8,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8335000276565552,0.9347721338272095,0.890333354473114,0.7993333339691162,6.668000221252441,2.803647994995117,0.10966666787862778,0.7993333339691162,0.8286666870117188,0.9798873662948608,0.9625905156135559,0.1254863739013672,0.3696497976779938
+trm_multi4_final,8,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8331666588783264,0.9347808957099915,0.8916666507720947,0.7960000038146973,6.665333271026611,2.7949953079223633,0.10833333432674408,0.7960000038146973,0.8286666870117188,0.9781777858734131,0.9593724608421326,0.13180933892726898,0.38910505175590515
+trm_multi4_final,12,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0
+trm_multi4_final,12,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8307916522026062,0.9338425993919373,0.8569999933242798,0.8209999799728394,6.64633321762085,2.936537742614746,0.14300000667572021,0.8209999799728394,0.8286666870117188,0.9936142563819885,0.9907481670379639,0.043287936598062515,0.1750972718000412
+trm_multi4_final,12,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8307499885559082,0.9337176084518433,0.8603333234786987,0.8209999799728394,6.645999908447266,2.9328060150146484,0.13966666162014008,0.8209999799728394,0.8286666870117188,0.9935137033462524,0.9907481670379639,0.04353112727403641,0.18871594965457916
+trm_multi4_final,12,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8307499885559082,0.9339007139205933,0.8583333492279053,0.8203333616256714,6.645999908447266,2.93530535697937,0.14166666567325592,0.8203333616256714,0.8286666870117188,0.993463397026062,0.9899436831474304,0.0437743179500103,0.17898832261562347
+trm_multi4_final,12,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.831166684627533,0.9338744282722473,0.8616666793823242,0.8190000057220459,6.649333477020264,2.921477794647217,0.13833333551883698,0.8190000057220459,0.8286666870117188,0.9930108785629272,0.9883346557617188,0.04839494079351425,0.20428015291690826
+trm_multi4_final,12,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8309583067893982,0.9341686964035034,0.8683333396911621,0.8149999976158142,6.6476664543151855,2.9015274047851562,0.1316666603088379,0.8149999976158142,0.8286666870117188,0.9908487796783447,0.9835076332092285,0.057636186480522156,0.24319066107273102
+trm_multi4_final,15,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0
+trm_multi4_final,15,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9330503940582275,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0
+trm_multi4_final,15,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8287083506584167,0.9331271052360535,0.8289999961853027,0.8286666870117188,6.629666805267334,3.0137219429016113,0.17100000381469727,0.8286666870117188,0.8286666870117188,1.0,1.0,0.00024319066142197698,0.0019455252913758159
+trm_multi4_final,15,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286250233650208,0.9330225586891174,0.8289999961853027,0.828000009059906,6.629000186920166,3.0135293006896973,0.17100000381469727,0.828000009059906,0.8286666870117188,0.9998994469642639,0.9991955161094666,0.00024319066142197698,0.0019455252913758159
+trm_multi4_final,15,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8287500143051147,0.9331192970275879,0.8306666612625122,0.8276666402816772,6.630000114440918,3.009169340133667,0.1693333387374878,0.8276666402816772,0.8286666870117188,0.999698281288147,0.9987932443618774,0.0019455252913758159,0.011673151515424252
+trm_multi4_final,15,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8292916417121887,0.9331137537956238,0.8330000042915344,0.8276666402816772,6.63433313369751,3.0008811950683594,0.16699999570846558,0.8276666402816772,0.8286666870117188,0.9995977282524109,0.9987932443618774,0.0055933850817382336,0.02529182843863964
diff --git a/late_perturb_robustness/watch_and_plot.sh b/late_perturb_robustness/watch_and_plot.sh
new file mode 100755
index 0000000..5bb1d8a
--- /dev/null
+++ b/late_perturb_robustness/watch_and_plot.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT=/home/yurenh2/rrm
+PY=/home/yurenh2/miniconda3/envs/rrm/bin/python
+cd "${ROOT}"
+
+PIDS=(
+ research/flossing/late_perturb_robustness/logs/trm_baseline_best_step58590_n3000_k8_late.pid
+ research/flossing/late_perturb_robustness/logs/trm_multi4_best_step35805_n3000_k8_late.pid
+ research/flossing/late_perturb_robustness/logs/trm_multi4_final_step65100_n3000_k8_late.pid
+)
+
+for pf in "${PIDS[@]}"; do
+ pid=$(cat "${pf}")
+ echo "watch ${pf}: ${pid}"
+ while kill -0 "${pid}" 2>/dev/null; do
+ sleep 60
+ done
+ echo "done ${pf}: ${pid}"
+done
+
+"${PY}" research/flossing/plot_late_perturb_robustness.py \
+ --summaries \
+ research/flossing/late_perturb_robustness/trm_baseline_best_step58590_n3000_k8_late.summary.csv \
+ research/flossing/late_perturb_robustness/trm_multi4_best_step35805_n3000_k8_late.summary.csv \
+ research/flossing/late_perturb_robustness/trm_multi4_final_step65100_n3000_k8_late.summary.csv \
+ --out-dir research/flossing/late_perturb_robustness/plots \
+ --slice-sigma 0.1
+
+nvidia-smi --query-gpu=index,memory.used,memory.total,utilization.gpu --format=csv,noheader,nounits \
+ > research/flossing/late_perturb_robustness/plots/final_gpu_status.txt
diff --git a/launch_10k_queue.sh b/launch_10k_queue.sh
new file mode 100755
index 0000000..8376ead
--- /dev/null
+++ b/launch_10k_queue.sh
@@ -0,0 +1,124 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+FLOSS_DIR="/home/yurenh2/rrm/research/flossing"
+CONDA_SH="/home/yurenh2/miniconda3/etc/profile.d/conda.sh"
+HRM_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python"
+TRM_ROOT="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_singleGPU"
+
+wait_for_pid() {
+ local pid="$1"
+ if [[ "${pid}" == "0" ]]; then
+ return 0
+ fi
+ while kill -0 "${pid}" 2>/dev/null; do
+ sleep 60
+ done
+}
+
+run_hrm_baseline() {
+ wait_for_pid "${1:-0}"
+ source "${CONDA_SH}"
+ conda activate rrm
+ cd "${FLOSS_DIR}"
+ CUDA_VISIBLE_DEVICES=0 python step3_train_with_rf.py \
+ --ckpt-root "${HRM_ROOT}" \
+ --ckpt-name step_26040 \
+ --n-steps 10000 \
+ --batch-size 8 \
+ --lr 1e-5 \
+ --alpha-rf 0 \
+ --lambda-star 0 \
+ --rf-mode volume_cf \
+ --k-lyap 0 \
+ --lyap-act-steps 4 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n 512 \
+ --eval-batch-size 32 \
+ --out step3_L_baseline_26040_fast_10k.json \
+ > step3_L_baseline_26040_fast_10k.log 2>&1
+}
+
+run_hrm_volume() {
+ wait_for_pid "${1:-0}"
+ source "${CONDA_SH}"
+ conda activate rrm
+ cd "${FLOSS_DIR}"
+ CUDA_VISIBLE_DEVICES=1 python step3_train_with_rf.py \
+ --ckpt-root "${HRM_ROOT}" \
+ --ckpt-name step_26040 \
+ --n-steps 10000 \
+ --batch-size 8 \
+ --lr 1e-5 \
+ --alpha-rf 10 \
+ --lambda-star -0.15 \
+ --rf-mode volume_cf \
+ --k-lyap 8 \
+ --lyap-act-steps 4 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n 512 \
+ --eval-batch-size 32 \
+ --out step3_M_volume_cf_26040_lstar_neg015_k8_a10_10k.json \
+ > step3_M_volume_cf_26040_lstar_neg015_k8_a10_10k.log 2>&1
+}
+
+run_trm_baseline() {
+ wait_for_pid "${1:-0}"
+ source "${CONDA_SH}"
+ conda activate rrm
+ cd "${FLOSS_DIR}"
+ CUDA_VISIBLE_DEVICES=2 python step5_train_trm_cf.py \
+ --ckpt-root "${TRM_ROOT}" \
+ --ckpt-name step_26041 \
+ --n-steps 10000 \
+ --batch-size 4 \
+ --lr 1e-5 \
+ --alpha-rf 0 \
+ --lambda-star 0.02 \
+ --rf-mode volume_cf \
+ --k-lyap 0 \
+ --lyap-act-steps 4 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n 512 \
+ --eval-batch-size 32 \
+ --out step5_L_trm_baseline_26041_batch4_fast_10k.json \
+ > step5_L_trm_baseline_26041_batch4_fast_10k.log 2>&1
+}
+
+run_trm_volume() {
+ wait_for_pid "${1:-0}"
+ source "${CONDA_SH}"
+ conda activate rrm
+ cd "${FLOSS_DIR}"
+ CUDA_VISIBLE_DEVICES=3 python step5_train_trm_cf.py \
+ --ckpt-root "${TRM_ROOT}" \
+ --ckpt-name step_26041 \
+ --n-steps 10000 \
+ --batch-size 4 \
+ --lr 1e-5 \
+ --alpha-rf 10 \
+ --lambda-star 0.02 \
+ --rf-mode volume_cf \
+ --k-lyap 4 \
+ --lyap-act-steps 4 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n 512 \
+ --eval-batch-size 32 \
+ --out step5_M_trm_volume_cf_26041_lstar002_batch4_k4_a10_10k.json \
+ > step5_M_trm_volume_cf_26041_lstar002_batch4_k4_a10_10k.log 2>&1
+}
+
+cmd="${1:?usage: launch_10k_queue.sh MODE [wait_pid]}"
+wait_pid="${2:-0}"
+
+case "${cmd}" in
+ hrm_baseline) run_hrm_baseline "${wait_pid}" ;;
+ hrm_volume) run_hrm_volume "${wait_pid}" ;;
+ trm_baseline) run_trm_baseline "${wait_pid}" ;;
+ trm_volume) run_trm_volume "${wait_pid}" ;;
+ *) echo "unknown command: ${cmd}" >&2; exit 2 ;;
+esac
diff --git a/launch_dynamics_variants_queue.sh b/launch_dynamics_variants_queue.sh
new file mode 100755
index 0000000..51076b1
--- /dev/null
+++ b/launch_dynamics_variants_queue.sh
@@ -0,0 +1,186 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+FLOSS_DIR="/home/yurenh2/rrm/research/flossing"
+CONDA_SH="/home/yurenh2/miniconda3/etc/profile.d/conda.sh"
+HRM_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python"
+TRM_ROOT="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_singleGPU"
+
+wait_for_pid() {
+ local pid="$1"
+ if [[ "${pid}" == "0" ]]; then
+ return 0
+ fi
+ while kill -0 "${pid}" 2>/dev/null; do
+ sleep 60
+ done
+}
+
+activate_env() {
+ source "${CONDA_SH}"
+ conda activate rrm
+ cd "${FLOSS_DIR}"
+}
+
+run_hrm_queue() {
+ wait_for_pid "${1:-0}"
+ activate_env
+
+ CUDA_VISIBLE_DEVICES=0 python step7_interfloss.py \
+ --model hrm \
+ --ckpt-root "${HRM_ROOT}" \
+ --ckpt-name step_26040 \
+ --train-steps 10000 \
+ --batch-size 8 \
+ --train-lr 1e-5 \
+ --floss-lr 1e-4 \
+ --floss-steps 500 \
+ --interfloss-at 0,500 \
+ --floss-mode engelken_l2 \
+ --lambda-star 0 \
+ --k-lyap 8 \
+ --lyap-act-steps 4 \
+ --lyap-start-act 12 \
+ --kl-beta 10 \
+ --kl-replay-size 64 \
+ --kl-batch-size 4 \
+ --kl-temperature 1 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n 512 \
+ --eval-batch-size 32 \
+ --floss-log-every 10 \
+ --out step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k.json \
+ > step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k.log 2>&1
+
+ CUDA_VISIBLE_DEVICES=0 python step7_interfloss.py \
+ --model hrm \
+ --ckpt-root "${HRM_ROOT}" \
+ --ckpt-name step_26040 \
+ --train-steps 10000 \
+ --batch-size 8 \
+ --train-lr 1e-5 \
+ --floss-lr 1e-4 \
+ --floss-steps 500 \
+ --interfloss-at 0,500 \
+ --floss-mode volume_cf \
+ --lambda-star -0.15 \
+ --k-lyap 8 \
+ --lyap-act-steps 4 \
+ --kl-beta 10 \
+ --kl-replay-size 64 \
+ --kl-batch-size 4 \
+ --kl-temperature 1 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n 512 \
+ --eval-batch-size 32 \
+ --floss-log-every 10 \
+ --out step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k.json \
+ > step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k.log 2>&1
+
+ CUDA_VISIBLE_DEVICES=0 python step8_basin_consistency.py \
+ --model hrm \
+ --ckpt-root "${HRM_ROOT}" \
+ --ckpt-name step_26040 \
+ --train-steps 10000 \
+ --batch-size 8 \
+ --lr 1e-5 \
+ --consistency-beta 1 \
+ --consistency-every 1 \
+ --perturb-after 8 \
+ --noise-std 0.02 \
+ --kl-temperature 1 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n 512 \
+ --eval-batch-size 32 \
+ --out step8_A_hrm_basin_consistency_beta1_noise002_after8_26040_10k.json \
+ > step8_A_hrm_basin_consistency_beta1_noise002_after8_26040_10k.log 2>&1
+}
+
+run_trm_queue() {
+ wait_for_pid "${1:-0}"
+ activate_env
+
+ CUDA_VISIBLE_DEVICES=2 python step7_interfloss.py \
+ --model trm \
+ --ckpt-root "${TRM_ROOT}" \
+ --ckpt-name step_26041 \
+ --train-steps 10000 \
+ --batch-size 4 \
+ --train-lr 1e-5 \
+ --floss-lr 1e-4 \
+ --floss-steps 500 \
+ --interfloss-at 0,500 \
+ --floss-mode engelken_l2 \
+ --lambda-star 0 \
+ --k-lyap 4 \
+ --lyap-act-steps 4 \
+ --lyap-start-act 12 \
+ --kl-beta 10 \
+ --kl-replay-size 64 \
+ --kl-batch-size 4 \
+ --kl-temperature 1 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n 512 \
+ --eval-batch-size 32 \
+ --floss-log-every 10 \
+ --out step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k.json \
+ > step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k.log 2>&1
+
+ CUDA_VISIBLE_DEVICES=2 python step7_interfloss.py \
+ --model trm \
+ --ckpt-root "${TRM_ROOT}" \
+ --ckpt-name step_26041 \
+ --train-steps 10000 \
+ --batch-size 4 \
+ --train-lr 1e-5 \
+ --floss-lr 1e-4 \
+ --floss-steps 500 \
+ --interfloss-at 0,500 \
+ --floss-mode volume_cf \
+ --lambda-star 0.02 \
+ --k-lyap 4 \
+ --lyap-act-steps 4 \
+ --kl-beta 10 \
+ --kl-replay-size 64 \
+ --kl-batch-size 4 \
+ --kl-temperature 1 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n 512 \
+ --eval-batch-size 32 \
+ --floss-log-every 10 \
+ --out step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k.json \
+ > step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k.log 2>&1
+
+ CUDA_VISIBLE_DEVICES=2 python step8_basin_consistency.py \
+ --model trm \
+ --ckpt-root "${TRM_ROOT}" \
+ --ckpt-name step_26041 \
+ --train-steps 10000 \
+ --batch-size 4 \
+ --lr 1e-5 \
+ --consistency-beta 1 \
+ --consistency-every 1 \
+ --perturb-after 8 \
+ --noise-std 0.02 \
+ --kl-temperature 1 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n 512 \
+ --eval-batch-size 32 \
+ --out step8_B_trm_basin_consistency_beta1_noise002_after8_26041_batch4_10k.json \
+ > step8_B_trm_basin_consistency_beta1_noise002_after8_26041_batch4_10k.log 2>&1
+}
+
+cmd="${1:?usage: launch_dynamics_variants_queue.sh MODE [wait_pid]}"
+wait_pid="${2:-0}"
+
+case "${cmd}" in
+ hrm) run_hrm_queue "${wait_pid}" ;;
+ trm) run_trm_queue "${wait_pid}" ;;
+ *) echo "unknown command: ${cmd}" >&2; exit 2 ;;
+esac
diff --git a/launch_engelken_paper_faithful_trm_queue.sh b/launch_engelken_paper_faithful_trm_queue.sh
new file mode 100755
index 0000000..f0c979d
--- /dev/null
+++ b/launch_engelken_paper_faithful_trm_queue.sh
@@ -0,0 +1,78 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="/home/yurenh2/rrm"
+FLOSS_DIR="${ROOT}/research/flossing"
+PY="/home/yurenh2/miniconda3/envs/rrm/bin/python"
+CKPT_ROOT="${ROOT}/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro"
+OUT_DIR="${FLOSS_DIR}/engelken_paper_faithful"
+mkdir -p "${OUT_DIR}"
+
+wait_for_pid() {
+ local pid="${1:-}"
+ if [[ -z "${pid}" || "${pid}" == "0" ]]; then
+ return 0
+ fi
+ while kill -0 "${pid}" 2>/dev/null; do
+ sleep 60
+ done
+}
+
+common_args=(
+ --model trm
+ --ckpt-root "${CKPT_ROOT}"
+ --ckpt-name __random__
+ --init-seed 123
+ --train-steps 10000
+ --batch-size 8
+ --train-lr 1e-4
+ --floss-lr 1e-4
+ --floss-steps 500
+ --floss-mode engelken_l2
+ --lambda-star 0
+ --k-lyap 4
+ --lyap-act-steps 4
+ --seed 42
+ --eval-every 1000
+ --eval-n 1000
+ --eval-batch-size 64
+ --floss-log-every 10
+ --train-puzzle-emb
+ --puzzle-emb-lr 1e-4
+ --puzzle-emb-weight-decay 1.0
+ --kl-beta 0
+)
+
+run_case() {
+ local gpu="$1"
+ local name="$2"
+ local schedule="$3"
+ local extra_floss_steps="${4:-500}"
+ cd "${ROOT}"
+ CUDA_VISIBLE_DEVICES="${gpu}" PYTHONUNBUFFERED=1 "${PY}" research/flossing/step7_interfloss.py \
+ "${common_args[@]}" \
+ --floss-steps "${extra_floss_steps}" \
+ --interfloss-at "${schedule}" \
+ --out "${OUT_DIR}/${name}.json" \
+ > "${OUT_DIR}/${name}.log" 2>&1
+}
+
+base_wait_pid="$(cat "${FLOSS_DIR}/ptrm_official_gbs768_base58590_k100_d64_sigma03_Lonly_n1000_seed0.pid" 2>/dev/null || true)"
+multi_wait_pid="$(cat "${FLOSS_DIR}/ptrm_official_gbs768_multi4_35805_k100_d64_sigma03_Lonly_n1000_seed0.pid" 2>/dev/null || true)"
+
+(
+ wait_for_pid "${base_wait_pid}"
+ run_case 2 "trm_sudoku_seed123_baseline_nofloss_b8_10k" "" 0
+ run_case 2 "trm_sudoku_seed123_pre_interfloss_0_500_b8_k4_10k" "0,500" 500
+) &
+echo $! > "${OUT_DIR}/gpu2_queue.pid"
+
+(
+ wait_for_pid "${multi_wait_pid}"
+ run_case 3 "trm_sudoku_seed123_prefloss_0_b8_k4_10k" "0" 500
+) &
+echo $! > "${OUT_DIR}/gpu3_queue.pid"
+
+echo "queued Engelken-faithful TRM runs"
+echo "gpu2 queue pid: $(cat "${OUT_DIR}/gpu2_queue.pid")"
+echo "gpu3 queue pid: $(cat "${OUT_DIR}/gpu3_queue.pid")"
diff --git a/launch_engelken_python_official_queue.sh b/launch_engelken_python_official_queue.sh
new file mode 100755
index 0000000..d1b3faf
--- /dev/null
+++ b/launch_engelken_python_official_queue.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+cd /home/yurenh2/rrm
+
+PY="/home/yurenh2/miniconda3/envs/rrm/bin/python"
+SCRIPT="research/flossing/engelken_python_flossing.py"
+OUT_DIR="research/flossing/engelken_python"
+mkdir -p "${OUT_DIR}"
+
+common=(
+ --hidden-size 80
+ --n-lyap 40
+ --train-epochs 1000
+ --inter-period 100
+ --inter-epochs 100
+ --batch-size 16
+ --input-dim 1
+ --train-steps 300
+ --lyap-steps 55
+ --floss-input-steps 300
+ --seed-ic 1
+ --seed-input 1
+ --seed-net 1
+ --seed-ons 1
+ --lr 0.001
+ --beta1 0.9
+ --beta2 0.999
+ --init-type 1
+ --recurrent-gain 1.0
+ --recurrent-mean-gain 0.0
+ --input-scale 1.0
+ --delay 10
+ --ws-std 1.0
+ --ws-mean 0.0
+ --wr-std 1.0
+ --wr-mean 0.0
+ --b-std 0.1
+ --b-mean 0.0
+ --task -1
+ --lyap-target 0.0
+ --eval-every 100
+ --eval-batches 4
+ --log-every-floss 25
+ --device cuda
+)
+
+run_case() {
+ local gpu="$1"
+ local name="$2"
+ shift 2
+ CUDA_VISIBLE_DEVICES="${gpu}" PYTHONUNBUFFERED=1 "${PY}" "${SCRIPT}" \
+ "${common[@]}" "$@" \
+ --out "${OUT_DIR}/${name}.json" \
+ > "${OUT_DIR}/${name}.log" 2>&1
+}
+
+(
+ run_case 2 "official_py_baseline_no_floss_N80_k40_E1000" \
+ --pre-epochs 0 --max-inter-episodes 0
+ run_case 2 "official_py_pre_inter_N80_k40_E1000" \
+ --pre-epochs 100 --max-inter-episodes 2
+) &
+echo $! > "${OUT_DIR}/gpu2_python_queue.pid"
+
+(
+ run_case 3 "official_py_prefloss_N80_k40_E1000" \
+ --pre-epochs 100 --max-inter-episodes 0
+) &
+echo $! > "${OUT_DIR}/gpu3_python_queue.pid"
+
+echo "queued Engelken Python official-analogue runs"
+echo "gpu2 queue pid: $(cat "${OUT_DIR}/gpu2_python_queue.pid")"
+echo "gpu3 queue pid: $(cat "${OUT_DIR}/gpu3_python_queue.pid")"
diff --git a/launch_hrm_multi4_perturb_ablation.sh b/launch_hrm_multi4_perturb_ablation.sh
new file mode 100755
index 0000000..07fba63
--- /dev/null
+++ b/launch_hrm_multi4_perturb_ablation.sh
@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+CASE="${1:?case required: h | l | joint | all}"
+GPU="${2:-0}"
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+run_case() {
+ local perturb="$1"
+ local suffix="$2"
+
+ source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+ conda activate rrm
+
+ cd /home/yurenh2/rrm/hrm
+ export WANDB_MODE=offline
+ export CUDA_VISIBLE_DEVICES="${GPU}"
+
+ python pretrain.py \
+ data_path=/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000 \
+ +project_name='Sudoku-extreme-1k-aug-1000 ACT-torch' \
+ +run_name="HierarchicalReasoningModel_ACTV1 multi4-loguniform-${suffix}-repro" \
+ +checkpoint_path="checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 multi4-loguniform-${suffix}-repro" \
+ epochs=20000 eval_interval=2000 checkpoint_every_eval=true \
+ global_batch_size=768 \
+ lr=0.0001 lr_min_ratio=1.0 lr_warmup_steps=2000 \
+ beta1=0.9 beta2=0.95 weight_decay=1.0 \
+ puzzle_emb_lr=0.0001 puzzle_emb_weight_decay=1.0 \
+ +trajectory_augment=true \
+ +trajectory_n=4 \
+ +trajectory_noise_std=0.001 \
+ +trajectory_noise_min=0.00003 \
+ +trajectory_noise_max=0.003 \
+ +trajectory_noise_sampling=loguniform \
+ +trajectory_sigma_start=0.0 \
+ +trajectory_sigma_ramp_steps=5000 \
+ +trajectory_perturb="${perturb}"
+}
+
+case "${CASE}" in
+ h)
+ run_case h honly
+ ;;
+ l)
+ run_case l lonly
+ ;;
+ joint)
+ run_case joint jointnorm
+ ;;
+ all)
+ bash "${SCRIPT_DIR}/launch_hrm_multi4_perturb_ablation.sh" h "${GPU}"
+ bash "${SCRIPT_DIR}/launch_hrm_multi4_perturb_ablation.sh" l "${GPU}"
+ bash "${SCRIPT_DIR}/launch_hrm_multi4_perturb_ablation.sh" joint "${GPU}"
+ ;;
+ *)
+ echo "unknown case: ${CASE}" >&2
+ exit 2
+ ;;
+esac
diff --git a/launch_interfloss_queue.sh b/launch_interfloss_queue.sh
new file mode 100755
index 0000000..1c5807f
--- /dev/null
+++ b/launch_interfloss_queue.sh
@@ -0,0 +1,84 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+FLOSS_DIR="/home/yurenh2/rrm/research/flossing"
+CONDA_SH="/home/yurenh2/miniconda3/etc/profile.d/conda.sh"
+HRM_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python"
+TRM_ROOT="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_singleGPU"
+
+wait_for_pid() {
+ local pid="$1"
+ if [[ "${pid}" == "0" ]]; then
+ return 0
+ fi
+ while kill -0 "${pid}" 2>/dev/null; do
+ sleep 60
+ done
+}
+
+activate_env() {
+ source "${CONDA_SH}"
+ conda activate rrm
+ cd "${FLOSS_DIR}"
+}
+
+run_hrm_engelken() {
+ wait_for_pid "${1:-0}"
+ activate_env
+ CUDA_VISIBLE_DEVICES=0 python step7_interfloss.py \
+ --model hrm \
+ --ckpt-root "${HRM_ROOT}" \
+ --ckpt-name step_26040 \
+ --train-steps 10000 \
+ --batch-size 8 \
+ --train-lr 1e-5 \
+ --floss-lr 1e-4 \
+ --floss-steps 500 \
+ --interfloss-at 0,500 \
+ --floss-mode engelken_l2 \
+ --lambda-star 0 \
+ --k-lyap 8 \
+ --lyap-act-steps 4 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n 512 \
+ --eval-batch-size 32 \
+ --floss-log-every 10 \
+ --out step7_A_hrm_engelken_interfloss_26040_k8_10k.json \
+ > step7_A_hrm_engelken_interfloss_26040_k8_10k.log 2>&1
+}
+
+run_trm_engelken() {
+ wait_for_pid "${1:-0}"
+ activate_env
+ CUDA_VISIBLE_DEVICES=2 python step7_interfloss.py \
+ --model trm \
+ --ckpt-root "${TRM_ROOT}" \
+ --ckpt-name step_26041 \
+ --train-steps 10000 \
+ --batch-size 4 \
+ --train-lr 1e-5 \
+ --floss-lr 1e-4 \
+ --floss-steps 500 \
+ --interfloss-at 0,500 \
+ --floss-mode engelken_l2 \
+ --lambda-star 0 \
+ --k-lyap 4 \
+ --lyap-act-steps 4 \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n 512 \
+ --eval-batch-size 32 \
+ --floss-log-every 10 \
+ --out step7_B_trm_engelken_interfloss_26041_k4_batch4_10k.json \
+ > step7_B_trm_engelken_interfloss_26041_k4_batch4_10k.log 2>&1
+}
+
+cmd="${1:?usage: launch_interfloss_queue.sh MODE [wait_pid]}"
+wait_pid="${2:-0}"
+
+case "${cmd}" in
+ hrm_engelken) run_hrm_engelken "${wait_pid}" ;;
+ trm_engelken) run_trm_engelken "${wait_pid}" ;;
+ *) echo "unknown command: ${cmd}" >&2; exit 2 ;;
+esac
diff --git a/launch_multi4_parallel_repro_config.sh b/launch_multi4_parallel_repro_config.sh
new file mode 100755
index 0000000..d618cfe
--- /dev/null
+++ b/launch_multi4_parallel_repro_config.sh
@@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+CASE="${1:?case required}"
+GPU="${2:-0}"
+
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+
+case "${CASE}" in
+ hrm)
+ cd /home/yurenh2/rrm/hrm
+ export WANDB_MODE=offline
+ export CUDA_VISIBLE_DEVICES="${GPU}"
+ python pretrain.py \
+ data_path=/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000 \
+ +project_name='Sudoku-extreme-1k-aug-1000 ACT-torch' \
+ +run_name='HierarchicalReasoningModel_ACTV1 multi4-loguniform-parallel-repro' \
+ +checkpoint_path='checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 multi4-loguniform-parallel-repro' \
+ epochs=20000 eval_interval=2000 checkpoint_every_eval=true \
+ global_batch_size=768 \
+ lr=0.0001 lr_min_ratio=1.0 lr_warmup_steps=2000 \
+ beta1=0.9 beta2=0.95 weight_decay=1.0 \
+ puzzle_emb_lr=0.0001 puzzle_emb_weight_decay=1.0 \
+ +trajectory_augment=true \
+ +trajectory_parallel=true \
+ +trajectory_n=4 \
+ +trajectory_noise_std=0.001 \
+ +trajectory_noise_min=0.00003 \
+ +trajectory_noise_max=0.003 \
+ +trajectory_noise_sampling=loguniform \
+ +trajectory_sigma_start=0.0 \
+ +trajectory_sigma_ramp_steps=5000 \
+ +trajectory_perturb=both
+ ;;
+ trm)
+ cd /home/yurenh2/rrm/trm
+ export WANDB_MODE=offline
+ export CUDA_VISIBLE_DEVICES="${GPU}"
+ python pretrain.py \
+ data_paths='[/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000]' \
+ data_paths_test='[]' \
+ evaluators='[]' \
+ +project_name='Sudoku-extreme-1k-aug-1000-ACT-torch' \
+ +run_name='pretrain_mlp_t_sudoku_multi4_loguniform_parallel_repro' \
+ +checkpoint_path='checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_multi4_loguniform_parallel_repro' \
+ +load_checkpoint=null \
+ epochs=50000 eval_interval=5000 min_eval_interval=0 checkpoint_every_eval=true \
+ global_batch_size=192 \
+ lr=0.0001 lr_min_ratio=1.0 lr_warmup_steps=2000 \
+ beta1=0.9 beta2=0.95 weight_decay=1.0 \
+ puzzle_emb_lr=0.0001 puzzle_emb_weight_decay=1.0 \
+ ema=true ema_rate=0.999 freeze_weights=false \
+ arch.mlp_t=true arch.pos_encodings=none arch.puzzle_emb_len=16 arch.no_ACT_continue=true \
+ +trajectory_augment=true \
+ +trajectory_parallel=true \
+ +trajectory_n=4 \
+ +trajectory_noise_std=0.001 \
+ +trajectory_noise_min=0.00003 \
+ +trajectory_noise_max=0.003 \
+ +trajectory_noise_sampling=loguniform \
+ +trajectory_sigma_start=0.0 \
+ +trajectory_sigma_ramp_steps=5000 \
+ +trajectory_perturb=both
+ ;;
+ *)
+ echo "unknown case: ${CASE}" >&2
+ exit 2
+ ;;
+esac
diff --git a/launch_multi4_repro_config.sh b/launch_multi4_repro_config.sh
new file mode 100755
index 0000000..8cb5b25
--- /dev/null
+++ b/launch_multi4_repro_config.sh
@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+CASE="${1:?case required}"
+GPU="${2:-0}"
+
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+
+case "${CASE}" in
+ hrm)
+ cd /home/yurenh2/rrm/hrm
+ export WANDB_MODE=offline
+ export CUDA_VISIBLE_DEVICES="${GPU}"
+ python pretrain.py \
+ data_path=/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000 \
+ +project_name='Sudoku-extreme-1k-aug-1000 ACT-torch' \
+ +run_name='HierarchicalReasoningModel_ACTV1 multi4-loguniform-repro' \
+ +checkpoint_path='checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 multi4-loguniform-repro' \
+ epochs=20000 eval_interval=2000 checkpoint_every_eval=true \
+ global_batch_size=768 \
+ lr=0.0001 lr_min_ratio=1.0 lr_warmup_steps=2000 \
+ beta1=0.9 beta2=0.95 weight_decay=1.0 \
+ puzzle_emb_lr=0.0001 puzzle_emb_weight_decay=1.0 \
+ +trajectory_augment=true \
+ +trajectory_n=4 \
+ +trajectory_noise_std=0.001 \
+ +trajectory_noise_min=0.00003 \
+ +trajectory_noise_max=0.003 \
+ +trajectory_noise_sampling=loguniform \
+ +trajectory_sigma_start=0.0 \
+ +trajectory_sigma_ramp_steps=5000 \
+ +trajectory_perturb=both
+ ;;
+ trm)
+ cd /home/yurenh2/rrm/trm
+ export WANDB_MODE=offline
+ export CUDA_VISIBLE_DEVICES="${GPU}"
+ python pretrain.py \
+ data_paths='[/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000]' \
+ data_paths_test='[]' \
+ evaluators='[]' \
+ +project_name='Sudoku-extreme-1k-aug-1000-ACT-torch' \
+ +run_name='pretrain_mlp_t_sudoku_multi4_loguniform_repro' \
+ +checkpoint_path='checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_multi4_loguniform_repro' \
+ +load_checkpoint=null \
+ epochs=50000 eval_interval=5000 min_eval_interval=0 checkpoint_every_eval=true \
+ global_batch_size=192 \
+ lr=0.0001 lr_min_ratio=1.0 lr_warmup_steps=2000 \
+ beta1=0.9 beta2=0.95 weight_decay=1.0 \
+ puzzle_emb_lr=0.0001 puzzle_emb_weight_decay=1.0 \
+ ema=true ema_rate=0.999 freeze_weights=false \
+ arch.mlp_t=true arch.pos_encodings=none arch.puzzle_emb_len=16 arch.no_ACT_continue=true \
+ +trajectory_augment=true \
+ +trajectory_n=4 \
+ +trajectory_noise_std=0.001 \
+ +trajectory_noise_min=0.00003 \
+ +trajectory_noise_max=0.003 \
+ +trajectory_noise_sampling=loguniform \
+ +trajectory_sigma_start=0.0 \
+ +trajectory_sigma_ramp_steps=5000 \
+ +trajectory_perturb=both
+ ;;
+ *)
+ echo "unknown case: ${CASE}" >&2
+ exit 2
+ ;;
+esac
diff --git a/launch_multi4_repro_config_all.sh b/launch_multi4_repro_config_all.sh
new file mode 100755
index 0000000..9d25d87
--- /dev/null
+++ b/launch_multi4_repro_config_all.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+cd /home/yurenh2/rrm/research/flossing
+
+setsid bash launch_multi4_repro_config.sh hrm 2 > multi4_repro_hrm.outer.log 2>&1 < /dev/null &
+echo "hrm pid=$!"
+
+setsid bash launch_multi4_repro_config.sh trm 3 > multi4_repro_trm.outer.log 2>&1 < /dev/null &
+echo "trm pid=$!"
diff --git a/launch_trajectory_perturb_queue.sh b/launch_trajectory_perturb_queue.sh
new file mode 100755
index 0000000..ba681ed
--- /dev/null
+++ b/launch_trajectory_perturb_queue.sh
@@ -0,0 +1,92 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+MODEL="${1:-hrm}"
+GPU="${2:-0}"
+
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+cd /home/yurenh2/rrm/research/flossing
+
+HRM_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python"
+TRM_ROOT="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_singleGPU"
+
+run_one() {
+ local tag="$1"
+ shift
+ echo "[$(date -Is)] START ${tag}"
+ CUDA_VISIBLE_DEVICES="${GPU}" python step9_trajectory_perturb_train.py "$@" \
+ > "${tag}.log" 2>&1
+ echo "[$(date -Is)] DONE ${tag}"
+}
+
+if [[ "${MODEL}" == "hrm" ]]; then
+ run_one step9_A_hrm_single_perturb_sigma1e-3_26040_10k \
+ --model hrm \
+ --ckpt-root "${HRM_ROOT}" \
+ --ckpt-name step_26040 \
+ --mode single_perturbed_ce \
+ --train-steps 10000 \
+ --batch-size 8 \
+ --lr 1e-5 \
+ --noise-std 0.001 \
+ --perturb both \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n 512 \
+ --eval-batch-size 32 \
+ --out step9_A_hrm_single_perturb_sigma1e-3_26040_10k.json
+
+ run_one step9_B_hrm_multi4_perturb_sigma1e-3_26040_10k \
+ --model hrm \
+ --ckpt-root "${HRM_ROOT}" \
+ --ckpt-name step_26040 \
+ --mode multi_perturbed_ce \
+ --n-trajectories 4 \
+ --train-steps 10000 \
+ --batch-size 8 \
+ --lr 1e-5 \
+ --noise-std 0.001 \
+ --perturb both \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n 512 \
+ --eval-batch-size 32 \
+ --out step9_B_hrm_multi4_perturb_sigma1e-3_26040_10k.json
+elif [[ "${MODEL}" == "trm" ]]; then
+ run_one step9_C_trm_single_perturb_sigma1e-3_26041_batch4_10k \
+ --model trm \
+ --ckpt-root "${TRM_ROOT}" \
+ --ckpt-name step_26041 \
+ --mode single_perturbed_ce \
+ --train-steps 10000 \
+ --batch-size 4 \
+ --lr 1e-5 \
+ --noise-std 0.001 \
+ --perturb both \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n 512 \
+ --eval-batch-size 32 \
+ --out step9_C_trm_single_perturb_sigma1e-3_26041_batch4_10k.json
+
+ run_one step9_D_trm_multi4_perturb_sigma1e-3_26041_batch4_10k \
+ --model trm \
+ --ckpt-root "${TRM_ROOT}" \
+ --ckpt-name step_26041 \
+ --mode multi_perturbed_ce \
+ --n-trajectories 4 \
+ --train-steps 10000 \
+ --batch-size 4 \
+ --lr 1e-5 \
+ --noise-std 0.001 \
+ --perturb both \
+ --seed 42 \
+ --eval-every 1000 \
+ --eval-n 512 \
+ --eval-batch-size 32 \
+ --out step9_D_trm_multi4_perturb_sigma1e-3_26041_batch4_10k.json
+else
+ echo "unknown model: ${MODEL}" >&2
+ exit 2
+fi
diff --git a/launch_trajectory_sampling_long.sh b/launch_trajectory_sampling_long.sh
new file mode 100755
index 0000000..4a03650
--- /dev/null
+++ b/launch_trajectory_sampling_long.sh
@@ -0,0 +1,128 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+CASE="${1:?case required}"
+GPU="${2:-0}"
+
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+cd /home/yurenh2/rrm/research/flossing
+
+HRM_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python"
+TRM_ROOT="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_singleGPU"
+
+run_one() {
+ local tag="$1"
+ shift
+ echo "[$(date -Is)] START ${tag} on GPU ${GPU}"
+ CUDA_VISIBLE_DEVICES="${GPU}" python step9_trajectory_perturb_train.py "$@" \
+ > "${tag}.log" 2>&1
+ echo "[$(date -Is)] DONE ${tag}"
+}
+
+case "${CASE}" in
+ hrm_baseline50k)
+ run_one step9_E_hrm_baseline_parallel_fixed_26040_50k \
+ --model hrm \
+ --ckpt-root "${HRM_ROOT}" \
+ --ckpt-name step_26040 \
+ --mode baseline_clean \
+ --rollout-impl parallel_fixed \
+ --train-steps 50000 \
+ --batch-size 8 \
+ --lr 1e-5 \
+ --seed 42 \
+ --eval-every 2500 \
+ --eval-n 1024 \
+ --eval-batch-size 32 \
+ --save-best \
+ --save-final \
+ --save-every-eval \
+ --save-train-state \
+ --out step9_E_hrm_baseline_parallel_fixed_26040_50k.json
+ ;;
+
+ hrm_multi4_loguniform50k)
+ run_one step9_F_hrm_multi4_loguniform_ramp_26040_50k \
+ --model hrm \
+ --ckpt-root "${HRM_ROOT}" \
+ --ckpt-name step_26040 \
+ --mode multi_perturbed_ce \
+ --rollout-impl parallel_fixed \
+ --n-trajectories 4 \
+ --train-steps 50000 \
+ --batch-size 8 \
+ --lr 1e-5 \
+ --noise-std 0.001 \
+ --noise-min 0.00003 \
+ --noise-max 0.003 \
+ --noise-sampling loguniform \
+ --sigma-start 0 \
+ --sigma-ramp-steps 5000 \
+ --perturb both \
+ --seed 42 \
+ --eval-every 2500 \
+ --eval-n 1024 \
+ --eval-batch-size 32 \
+ --save-best \
+ --save-final \
+ --save-every-eval \
+ --save-train-state \
+ --out step9_F_hrm_multi4_loguniform_ramp_26040_50k.json
+ ;;
+
+ trm_baseline50k)
+ run_one step9_G_trm_baseline_parallel_fixed_26041_batch4_50k \
+ --model trm \
+ --ckpt-root "${TRM_ROOT}" \
+ --ckpt-name step_26041 \
+ --mode baseline_clean \
+ --rollout-impl parallel_fixed \
+ --train-steps 50000 \
+ --batch-size 4 \
+ --lr 1e-5 \
+ --seed 42 \
+ --eval-every 2500 \
+ --eval-n 1024 \
+ --eval-batch-size 32 \
+ --save-best \
+ --save-final \
+ --save-every-eval \
+ --save-train-state \
+ --out step9_G_trm_baseline_parallel_fixed_26041_batch4_50k.json
+ ;;
+
+ trm_multi4_loguniform50k)
+ run_one step9_H_trm_multi4_loguniform_ramp_26041_batch4_50k \
+ --model trm \
+ --ckpt-root "${TRM_ROOT}" \
+ --ckpt-name step_26041 \
+ --mode multi_perturbed_ce \
+ --rollout-impl parallel_fixed \
+ --n-trajectories 4 \
+ --train-steps 50000 \
+ --batch-size 4 \
+ --lr 1e-5 \
+ --noise-std 0.001 \
+ --noise-min 0.00003 \
+ --noise-max 0.003 \
+ --noise-sampling loguniform \
+ --sigma-start 0 \
+ --sigma-ramp-steps 5000 \
+ --perturb both \
+ --seed 42 \
+ --eval-every 2500 \
+ --eval-n 1024 \
+ --eval-batch-size 32 \
+ --save-best \
+ --save-final \
+ --save-every-eval \
+ --save-train-state \
+ --out step9_H_trm_multi4_loguniform_ramp_26041_batch4_50k.json
+ ;;
+
+ *)
+ echo "unknown case: ${CASE}" >&2
+ exit 2
+ ;;
+esac
diff --git a/launch_trajectory_sampling_long_all.sh b/launch_trajectory_sampling_long_all.sh
new file mode 100755
index 0000000..c223c6e
--- /dev/null
+++ b/launch_trajectory_sampling_long_all.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+cd /home/yurenh2/rrm/research/flossing
+
+launch_case() {
+ local case_name="$1"
+ local gpu="$2"
+ local outer_log="queue_${case_name}.outer.log"
+ setsid bash launch_trajectory_sampling_long.sh "${case_name}" "${gpu}" \
+ > "${outer_log}" 2>&1 < /dev/null &
+ echo "${case_name} gpu=${gpu} pid=$! outer_log=${outer_log}"
+}
+
+launch_case hrm_baseline50k 0
+launch_case hrm_multi4_loguniform50k 1
+launch_case trm_baseline50k 2
+launch_case trm_multi4_loguniform50k 3
diff --git a/launch_trm_directional_multi4_long.sh b/launch_trm_directional_multi4_long.sh
new file mode 100755
index 0000000..1a56bcc
--- /dev/null
+++ b/launch_trm_directional_multi4_long.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+GPU="${1:-3}"
+
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+
+cd /home/yurenh2/rrm/trm
+export WANDB_MODE=offline
+export CUDA_VISIBLE_DEVICES="${GPU}"
+
+python pretrain.py \
+ arch=trm \
+ data_paths='[/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000]' \
+ data_paths_test='[]' \
+ evaluators='[]' \
+ +project_name='Sudoku-extreme-1k-aug-1000-ACT-torch' \
+ +run_name='pretrain_mlp_t_sudoku_directional_multi4_parallel_c4_eps003_sigma003' \
+ +checkpoint_path='checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_directional_multi4_parallel_c4_eps003_sigma003' \
+ +load_checkpoint=null \
+ epochs=12500 eval_interval=1250 min_eval_interval=0 checkpoint_every_eval=true \
+ global_batch_size=192 \
+ lr=0.0001 lr_min_ratio=1.0 lr_warmup_steps=2000 \
+ beta1=0.9 beta2=0.95 weight_decay=1.0 \
+ puzzle_emb_lr=0.0001 puzzle_emb_weight_decay=1.0 \
+ ema=true ema_rate=0.999 freeze_weights=false \
+ arch.mlp_t=true arch.pos_encodings=none arch.puzzle_emb_len=16 arch.no_ACT_continue=true \
+ +trajectory_augment=true \
+ +trajectory_parallel=true \
+ +trajectory_n=4 \
+ +trajectory_noise_std=0.03 \
+ +trajectory_noise_min=0.003 \
+ +trajectory_noise_max=0.03 \
+ +trajectory_noise_sampling=loguniform \
+ +trajectory_sigma_start=0.0 \
+ +trajectory_sigma_ramp_steps=5000 \
+ +trajectory_perturb=both \
+ +trajectory_directional=true \
+ +trajectory_directional_candidates=4 \
+ +trajectory_directional_fd_eps=0.03 \
+ +trajectory_directional_horizon=16 \
+ +trajectory_directional_sign=alternate
diff --git a/launch_trm_official_gbs768_multi4.sh b/launch_trm_official_gbs768_multi4.sh
new file mode 100755
index 0000000..5bd9575
--- /dev/null
+++ b/launch_trm_official_gbs768_multi4.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+GPU="${1:-0}"
+
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+
+cd /home/yurenh2/rrm/trm
+export WANDB_MODE=offline
+export CUDA_VISIBLE_DEVICES="${GPU}"
+
+python pretrain.py \
+ arch=trm \
+ data_paths='[/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000]' \
+ data_paths_test='[]' \
+ evaluators='[]' \
+ +project_name='Sudoku-extreme-1k-aug-1000-ACT-torch' \
+ +run_name='pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro' \
+ +checkpoint_path='checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro' \
+ +load_checkpoint=null \
+ epochs=50000 eval_interval=2500 min_eval_interval=0 checkpoint_every_eval=true \
+ global_batch_size=768 \
+ lr=0.0001 lr_min_ratio=1.0 lr_warmup_steps=2000 \
+ beta1=0.9 beta2=0.95 weight_decay=1.0 \
+ puzzle_emb_lr=0.0001 puzzle_emb_weight_decay=1.0 \
+ ema=true ema_rate=0.999 freeze_weights=false \
+ arch.mlp_t=true arch.pos_encodings=none arch.L_layers=2 arch.H_cycles=3 arch.L_cycles=6 \
+ +trajectory_augment=true \
+ +trajectory_n=4 \
+ +trajectory_noise_std=0.001 \
+ +trajectory_noise_min=0.00003 \
+ +trajectory_noise_max=0.003 \
+ +trajectory_noise_sampling=loguniform \
+ +trajectory_sigma_start=0.0 \
+ +trajectory_sigma_ramp_steps=5000 \
+ +trajectory_perturb=both
diff --git a/make_meeting_artifacts_v2.py b/make_meeting_artifacts_v2.py
new file mode 100644
index 0000000..4c88412
--- /dev/null
+++ b/make_meeting_artifacts_v2.py
@@ -0,0 +1,364 @@
+from __future__ import annotations
+
+import csv
+import re
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+ROOT = Path("research/flossing")
+OUT = ROOT / "meeting_artifacts_v2"
+
+
+def read_csv(path: Path) -> list[dict[str, str]]:
+ with path.open(newline="") as f:
+ return list(csv.DictReader(f))
+
+
+def f(row: dict[str, str], key: str, default: float = float("nan")) -> float:
+ try:
+ val = row.get(key, "")
+ return float(val) if val != "" else default
+ except Exception:
+ return default
+
+
+def row_for(rows: list[dict[str, str]], step: int) -> dict[str, str]:
+ for row in rows:
+ if int(float(row["step"])) == step:
+ return row
+ raise KeyError(step)
+
+
+def spectrum_metrics(npz_path: Path) -> dict[str, float]:
+ data = np.load(npz_path)
+ spec = data["lyap_spec"].astype(float)
+ exact = data["exact_correct"].astype(bool)
+ mean8 = spec.mean(axis=1)
+ pos_count = (spec > 0).sum(axis=1)
+ return {
+ "sample_exact": float(exact.mean()),
+ "lambda1_all": float(spec[:, 0].mean()),
+ "mean8_all": float(mean8.mean()),
+ "pos_count_all": float(pos_count.mean()),
+ "lambda1_success": float(spec[exact, 0].mean()),
+ "lambda1_fail": float(spec[~exact, 0].mean()),
+ "mean8_success": float(mean8[exact].mean()),
+ "mean8_fail": float(mean8[~exact].mean()),
+ "pos_count_success": float(pos_count[exact].mean()),
+ "pos_count_fail": float(pos_count[~exact].mean()),
+ }
+
+
+def spectrum_arrays(npz_path: Path) -> tuple[np.ndarray, np.ndarray]:
+ data = np.load(npz_path)
+ return data["lyap_spec"].astype(float), data["exact_correct"].astype(bool)
+
+
+def auc_failure_score(score: np.ndarray, exact: np.ndarray) -> float:
+ """AUROC where higher score predicts failure."""
+ fail = ~exact
+ n_fail = int(fail.sum())
+ n_succ = int(exact.sum())
+ if n_fail == 0 or n_succ == 0:
+ return float("nan")
+ order = np.argsort(score)
+ ranks = np.empty_like(order, dtype=float)
+ ranks[order] = np.arange(1, len(score) + 1)
+ return float((ranks[fail].sum() - n_fail * (n_fail + 1) / 2) / (n_fail * n_succ))
+
+
+def get_data() -> dict[str, object]:
+ eval_dir = ROOT / "multi4_eval_compare"
+ spec_dir = ROOT / "official_gbs768_spectrum"
+
+ hrm_base = read_csv(eval_dir / "hrm_baseline_eval.csv")
+ hrm_multi = read_csv(eval_dir / "hrm_multi4_complete_eval.csv")
+ trm_base = read_csv(eval_dir / "trm_official_gbs768_eval.csv")
+ trm_multi = read_csv(eval_dir / "trm_official_gbs768_multi4_eval.csv")
+
+ hrm_points = [
+ {
+ "label": "baseline best",
+ "family": "baseline",
+ "step": 26040,
+ "full_exact": f(row_for(hrm_base, 26040), "all/exact_accuracy"),
+ **spectrum_metrics(ROOT / "diag_hrm_step_26040_512.npz"),
+ },
+ {
+ "label": "multi4 best",
+ "family": "multi4",
+ "step": 23436,
+ "full_exact": f(row_for(hrm_multi, 23436), "exact"),
+ **spectrum_metrics(ROOT / "diag_hrm_multi4_step_23436_512.npz"),
+ },
+ {
+ "label": "multi4 final",
+ "family": "multi4-final",
+ "step": 26040,
+ "full_exact": f(row_for(hrm_multi, 26040), "exact"),
+ **spectrum_metrics(ROOT / "diag_hrm_multi4_step_26040_512.npz"),
+ },
+ ]
+
+ trm_points = [
+ {
+ "label": "baseline best",
+ "family": "baseline",
+ "step": 58590,
+ "full_exact": f(row_for(trm_base, 58590), "all/exact_accuracy"),
+ **spectrum_metrics(spec_dir / "trm_gbs768_base_step58590_n512_k8_seed20260602.npz"),
+ },
+ {
+ "label": "multi4 best",
+ "family": "multi4",
+ "step": 35805,
+ "full_exact": f(row_for(trm_multi, 35805), "all/exact_accuracy"),
+ **spectrum_metrics(spec_dir / "trm_gbs768_multi4_step35805_n512_k8_seed20260602.npz"),
+ },
+ {
+ "label": "multi4 final",
+ "family": "multi4-final",
+ "step": 65100,
+ "full_exact": f(row_for(trm_multi, 65100), "all/exact_accuracy"),
+ **spectrum_metrics(spec_dir / "trm_gbs768_multi4_step65100_n512_k8_seed20260602.npz"),
+ },
+ ]
+
+ return {
+ "hrm_base": hrm_base,
+ "hrm_multi": hrm_multi,
+ "trm_base": trm_base,
+ "trm_multi": trm_multi,
+ "hrm_points": hrm_points,
+ "trm_points": trm_points,
+ }
+
+
+def metric_series(rows: list[dict[str, str]], metric: str) -> tuple[np.ndarray, np.ndarray]:
+ xs, ys = [], []
+ for row in rows:
+ val = f(row, metric)
+ if np.isfinite(val):
+ xs.append(int(float(row["step"])))
+ ys.append(val)
+ order = np.argsort(xs)
+ return np.asarray(xs)[order], np.asarray(ys)[order]
+
+
+def plot_training_curves(data: dict[str, object]) -> None:
+ fig, axes = plt.subplots(1, 2, figsize=(11.8, 4.2))
+ colors = {"baseline": "#475569", "multi4": "#2563eb"}
+
+ panels = [
+ ("HRM", data["hrm_base"], data["hrm_multi"], "all/exact_accuracy", "exact", 0.70),
+ ("TRM official GBS768", data["trm_base"], data["trm_multi"], "all/exact_accuracy", "all/exact_accuracy", 0.94),
+ ]
+ for ax, (title, base_rows, multi_rows, base_metric, multi_metric, ymax) in zip(axes, panels):
+ bx, by = metric_series(base_rows, base_metric) # type: ignore[arg-type]
+ mx, my = metric_series(multi_rows, multi_metric) # type: ignore[arg-type]
+ ax.plot(bx / 1000, by, marker="o", color=colors["baseline"], label="baseline", linewidth=2)
+ ax.plot(mx / 1000, my, marker="o", color=colors["multi4"], label="multi4", linewidth=2)
+ bi, mi = int(np.argmax(by)), int(np.argmax(my))
+ ax.scatter([bx[bi] / 1000], [by[bi]], s=95, color=colors["baseline"], edgecolor="white", zorder=5)
+ ax.scatter([mx[mi] / 1000], [my[mi]], s=95, color=colors["multi4"], edgecolor="white", zorder=5)
+ ax.annotate(f"best {by[bi]:.3f}", (bx[bi] / 1000, by[bi]), xytext=(6, -18), textcoords="offset points", fontsize=8)
+ ax.annotate(f"best {my[mi]:.3f}", (mx[mi] / 1000, my[mi]), xytext=(6, 8), textcoords="offset points", fontsize=8)
+ ax.set_title(title)
+ ax.set_xlabel("optimizer step (k)")
+ ax.set_ylabel("full test exact accuracy")
+ ax.set_ylim(0, ymax)
+ ax.grid(alpha=0.22)
+ ax.legend(frameon=False, loc="lower right")
+
+ fig.suptitle("Trajectory perturbation helps both HRM and TRM, but late checkpoints can collapse")
+ fig.tight_layout()
+ fig.savefig(OUT / "fig1_hrm_trm_training_curves.png", dpi=220)
+ plt.close(fig)
+
+
+def plot_lambda1_motivation() -> None:
+ entries = [
+ ("HRM baseline best\nstep 26040", ROOT / "diag_hrm_step_26040_512.npz"),
+ (
+ "TRM baseline best\nstep 58590",
+ ROOT / "official_gbs768_spectrum/trm_gbs768_base_step58590_n512_k8_seed20260602.npz",
+ ),
+ ]
+ fig, axes = plt.subplots(1, 2, figsize=(11.6, 4.2), sharey=False)
+ for ax, (title, path) in zip(axes, entries):
+ spec, exact = spectrum_arrays(path)
+ lam1 = spec[:, 0]
+ succ = lam1[exact]
+ fail = lam1[~exact]
+ lo, hi = np.quantile(lam1, [0.01, 0.99])
+ pad = 0.08 * (hi - lo)
+ bins = np.linspace(lo - pad, hi + pad, 34)
+ ax.hist(succ, bins=bins, density=True, alpha=0.62, color="#2563eb", label=f"success (n={len(succ)})")
+ ax.hist(fail, bins=bins, density=True, alpha=0.58, color="#dc2626", label=f"failure (n={len(fail)})")
+ ax.axvline(succ.mean(), color="#1d4ed8", linewidth=2)
+ ax.axvline(fail.mean(), color="#b91c1c", linewidth=2)
+ auc = auc_failure_score(lam1, exact)
+ ax.set_title(f"{title}\nacc={exact.mean():.3f}, AUROC={auc:.3f}")
+ ax.set_xlabel("top Lyapunov exponent λ1")
+ ax.set_ylabel("density")
+ ax.grid(alpha=0.2)
+ ax.legend(frameon=False, fontsize=8)
+ fig.suptitle("Motivation: λ1 is a strong success/failure detector in both HRM and TRM")
+ fig.tight_layout()
+ fig.savefig(OUT / "fig0_motivation_lambda1_success_failure_hrm_trm.png", dpi=220)
+ plt.close(fig)
+
+
+def plot_phase(data: dict[str, object]) -> None:
+ fig, axes = plt.subplots(1, 2, figsize=(11.8, 4.4), sharex=False)
+ panels = [("HRM", data["hrm_points"]), ("TRM official GBS768", data["trm_points"])]
+ style = {
+ "baseline": ("#475569", "o"),
+ "multi4": ("#2563eb", "o"),
+ "multi4-final": ("#dc2626", "X"),
+ }
+ for ax, (title, points) in zip(axes, panels):
+ for point in points: # type: ignore[assignment]
+ color, marker = style[point["family"]]
+ ax.scatter(point["mean8_all"], point["full_exact"], s=150, marker=marker, color=color, edgecolor="white", zorder=4)
+ ax.annotate(
+ f"{point['label']}\nstep {point['step']}",
+ (point["mean8_all"], point["full_exact"]),
+ xytext=(8, 5),
+ textcoords="offset points",
+ fontsize=8,
+ )
+ ax.set_title(title)
+ ax.set_xlabel("mean top-8 Lyapunov exponent (lower = more stable)")
+ ax.set_ylabel("full test exact accuracy")
+ ax.grid(alpha=0.22)
+ fig.suptitle("Accuracy-vs-chaotic-volume phase view: best multi4 moves up-left; collapse moves down-right")
+ fig.tight_layout()
+ fig.savefig(OUT / "fig2_accuracy_vs_chaotic_volume_phase.png", dpi=220)
+ plt.close(fig)
+
+
+def plot_spectrum_grid() -> None:
+ entries = [
+ ("HRM baseline best", ROOT / "diag_hrm_step_26040_512.npz"),
+ ("HRM multi4 best", ROOT / "diag_hrm_multi4_step_23436_512.npz"),
+ ("TRM baseline best", ROOT / "official_gbs768_spectrum/trm_gbs768_base_step58590_n512_k8_seed20260602.npz"),
+ ("TRM multi4 best", ROOT / "official_gbs768_spectrum/trm_gbs768_multi4_step35805_n512_k8_seed20260602.npz"),
+ ]
+ fig, axes = plt.subplots(2, 2, figsize=(10.8, 7.2), sharey=False)
+ for ax, (title, path) in zip(axes.flat, entries):
+ spec, exact = spectrum_arrays(path)
+ x = np.arange(1, spec.shape[1] + 1)
+ for mask, color, label in [(exact, "#2563eb", "success"), (~exact, "#dc2626", "failure")]:
+ mean = spec[mask].mean(axis=0)
+ se = spec[mask].std(axis=0) / max(mask.sum(), 1) ** 0.5
+ ax.plot(x, mean, marker="o", color=color, label=label)
+ ax.fill_between(x, mean - se, mean + se, color=color, alpha=0.16, linewidth=0)
+ ax.axhline(0, color="black", linewidth=0.8, alpha=0.55)
+ ax.set_title(f"{title}\nN={len(exact)}, acc={exact.mean():.3f}")
+ ax.set_xlabel("spectrum rank")
+ ax.set_ylabel("finite-time exponent")
+ ax.grid(alpha=0.22)
+ axes.flat[0].legend(frameon=False)
+ fig.suptitle("Success/failure separation is present in both HRM and TRM; multi4 mainly stabilizes successful trajectories")
+ fig.tight_layout()
+ fig.savefig(OUT / "fig3_hrm_trm_success_failure_spectra.png", dpi=220)
+ plt.close(fig)
+
+
+def plot_ptrm() -> None:
+ summary = read_csv(ROOT / "ptrm_same_subset/paired_ptrm_k100_n1000_seed0_summary.csv")[0]
+ labels = ["deterministic", "mean rollout", "Q-selected", "oracle"]
+ base = [float(summary[k]) for k in ["base_det", "base_mean_rollout", "base_qmax", "base_oracle"]]
+ multi = [float(summary[k]) for k in ["multi4_det", "multi4_mean_rollout", "multi4_qmax", "multi4_oracle"]]
+ x = np.arange(len(labels))
+ width = 0.36
+ fig, ax = plt.subplots(figsize=(8.7, 4.3))
+ ax.bar(x - width / 2, base, width, color="#64748b", label="baseline best")
+ ax.bar(x + width / 2, multi, width, color="#2563eb", label="multi4 best")
+ ax.set_xticks(x, labels)
+ ax.set_ylim(0.86, 1.0)
+ ax.set_ylabel("exact accuracy")
+ ax.set_title("PTRM same-subset comparison (n=1000, K=100, D=64, sigma=0.3, L-only)")
+ ax.grid(axis="y", alpha=0.22)
+ ax.legend(frameon=False)
+ for xpos, vals in [(x - width / 2, base), (x + width / 2, multi)]:
+ for xi, val in zip(xpos, vals):
+ ax.text(xi, val + 0.002, f"{val:.3f}", ha="center", va="bottom", fontsize=8)
+ fig.tight_layout()
+ fig.savefig(OUT / "fig4_ptrm_same_subset_comparison.png", dpi=220)
+ plt.close(fig)
+
+
+def write_summary(data: dict[str, object]) -> None:
+ rows = []
+ for model, points in [("HRM", data["hrm_points"]), ("TRM", data["trm_points"])]:
+ for point in points: # type: ignore[assignment]
+ rows.append(
+ {
+ "model": model,
+ "label": point["label"],
+ "step": point["step"],
+ "full_exact": point["full_exact"],
+ "sample_exact": point["sample_exact"],
+ "lambda1_all": point["lambda1_all"],
+ "mean8_all": point["mean8_all"],
+ "pos_count_all": point["pos_count_all"],
+ "lambda1_success": point["lambda1_success"],
+ "lambda1_fail": point["lambda1_fail"],
+ "mean8_success": point["mean8_success"],
+ "mean8_fail": point["mean8_fail"],
+ "pos_count_success": point["pos_count_success"],
+ "pos_count_fail": point["pos_count_fail"],
+ }
+ )
+ with (OUT / "hrm_trm_redesigned_summary.csv").open("w", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=list(rows[0].keys()))
+ writer.writeheader()
+ writer.writerows(rows)
+
+ ptrm = read_csv(ROOT / "ptrm_same_subset/paired_ptrm_k100_n1000_seed0_summary.csv")[0]
+ report = f"""# Meeting Figures v2
+
+## Figure Strategy
+
+0. `fig0_motivation_lambda1_success_failure_hrm_trm.png`: first-exponent success/failure distribution in HRM and TRM. This motivates chaos as a detector before introducing the method.
+1. `fig1_hrm_trm_training_curves.png`: performance over training for HRM and TRM. This answers whether the method improves accuracy and where best/final are.
+2. `fig2_accuracy_vs_chaotic_volume_phase.png`: phase view, with accuracy versus mean top-8 Lyapunov exponent. This answers whether better checkpoints are dynamically more stable.
+3. `fig3_hrm_trm_success_failure_spectra.png`: full success/failure spectrum separation for HRM and TRM best checkpoints. This extends Fig0 beyond λ1.
+4. `fig4_ptrm_same_subset_comparison.png`: PTRM same-subset result. This is a secondary inference-time story.
+
+## Key Numbers
+
+- HRM baseline best: 0.5265 exact. HRM multi4 best: 0.6443 exact. HRM multi4 final: 0.4624 exact.
+- TRM baseline best: 0.8686 exact. TRM multi4 best: 0.8965 exact. TRM multi4 final: 0.8351 exact.
+- HRM multi4 best dynamics sample: mean top-8 exponent {rows[1]['mean8_all']:+.4f}; final {rows[2]['mean8_all']:+.4f}.
+- TRM multi4 best dynamics sample: mean top-8 exponent {rows[4]['mean8_all']:+.4f}; final {rows[5]['mean8_all']:+.4f}.
+- PTRM same subset, K=100: Q-selected {float(ptrm['base_qmax']):.3f} -> {float(ptrm['multi4_qmax']):.3f}; mean rollout {float(ptrm['base_mean_rollout']):.3f} -> {float(ptrm['multi4_mean_rollout']):.3f}.
+
+## Caveats
+
+- Dynamics spectra use N=512 diagnostic samples, not the full test set.
+- PTRM numbers use a fixed N=1000 subset; do not mix its deterministic subset accuracy with full-test W&B exact accuracy.
+- Final checkpoints are collapse diagnostics, not the method's reported performance.
+"""
+ (OUT / "meeting_figures_v2_report.md").write_text(report)
+
+
+def main() -> None:
+ OUT.mkdir(parents=True, exist_ok=True)
+ data = get_data()
+ plot_lambda1_motivation()
+ plot_training_curves(data)
+ plot_phase(data)
+ plot_spectrum_grid()
+ plot_ptrm()
+ write_summary(data)
+ print(f"wrote redesigned artifacts to {OUT}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/make_q_lambda_scatter.py b/make_q_lambda_scatter.py
new file mode 100644
index 0000000..54ff8d2
--- /dev/null
+++ b/make_q_lambda_scatter.py
@@ -0,0 +1,263 @@
+from __future__ import annotations
+
+import csv
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+ROOT = Path("research/flossing")
+IN_DIR = ROOT / "q_lambda_scatter"
+OUT = ROOT / "meeting_artifacts_v2"
+
+RUNS = [
+ (
+ "TRM baseline + PTRM rollouts",
+ IN_DIR / "base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.npz",
+ ),
+ (
+ "TRM multi4 + PTRM rollouts",
+ IN_DIR / "multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.npz",
+ ),
+]
+
+
+def rank_average(values: np.ndarray) -> np.ndarray:
+ order = np.argsort(values, kind="mergesort")
+ sorted_values = values[order]
+ ranks = np.empty(len(values), dtype=float)
+ i = 0
+ while i < len(values):
+ j = i + 1
+ while j < len(values) and sorted_values[j] == sorted_values[i]:
+ j += 1
+ ranks[order[i:j]] = 0.5 * (i + j - 1) + 1.0
+ i = j
+ return ranks
+
+
+def corr(x: np.ndarray, y: np.ndarray) -> float:
+ mask = np.isfinite(x) & np.isfinite(y)
+ x = x[mask]
+ y = y[mask]
+ if len(x) < 3:
+ return float("nan")
+ x = x - x.mean()
+ y = y - y.mean()
+ denom = np.sqrt(np.square(x).sum() * np.square(y).sum())
+ if denom <= 0:
+ return float("nan")
+ return float(np.dot(x, y) / denom)
+
+
+def spearman(x: np.ndarray, y: np.ndarray) -> float:
+ mask = np.isfinite(x) & np.isfinite(y)
+ x = x[mask]
+ y = y[mask]
+ if len(x) < 3:
+ return float("nan")
+ return corr(rank_average(x), rank_average(y))
+
+
+def mean_within_problem_corr(stability: np.ndarray, q_halt: np.ndarray, rank: bool) -> float:
+ vals: list[float] = []
+ for x, y in zip(stability, q_halt):
+ val = spearman(x, y) if rank else corr(x, y)
+ if np.isfinite(val):
+ vals.append(val)
+ return float(np.mean(vals)) if vals else float("nan")
+
+
+def summarize(name: str, path: Path) -> dict[str, float | str]:
+ data = np.load(path)
+ exact = data["exact"].astype(bool)
+ q_halt = data["q_halt"].astype(float)
+ lyap = data["lyap"].astype(float)
+ if lyap.size == 0:
+ raise ValueError(f"{path} does not contain lyap")
+ stability = -lyap
+
+ arange = np.arange(exact.shape[0])
+ q_idx = q_halt.argmax(axis=1)
+ lyap_idx = lyap.argmin(axis=1)
+ correct_count = exact.sum(axis=1)
+ mixed = (correct_count > 0) & (correct_count < exact.shape[1])
+
+ mixed_summary: dict[str, float] = {
+ "mixed_problem_count": float(mixed.sum()),
+ "zero_success_problem_count": float((correct_count == 0).sum()),
+ "full_success_problem_count": float((correct_count == exact.shape[1]).sum()),
+ "mixed_global_pearson_q_vs_stability": float("nan"),
+ "mixed_q_max_exact": float("nan"),
+ "mixed_lambda_min_exact": float("nan"),
+ "mixed_oracle_exact": float("nan"),
+ }
+ if mixed.any():
+ m_arange = np.arange(int(mixed.sum()))
+ m_exact = exact[mixed]
+ m_q = q_halt[mixed]
+ m_lyap = lyap[mixed]
+ mixed_summary.update(
+ {
+ "mixed_global_pearson_q_vs_stability": corr((-m_lyap).reshape(-1), m_q.reshape(-1)),
+ "mixed_q_max_exact": float(m_exact[m_arange, m_q.argmax(axis=1)].mean()),
+ "mixed_lambda_min_exact": float(m_exact[m_arange, m_lyap.argmin(axis=1)].mean()),
+ "mixed_oracle_exact": float(m_exact.any(axis=1).mean()),
+ }
+ )
+
+ out: dict[str, float | str] = {
+ "name": name,
+ "path": str(path),
+ "n_samples": float(exact.shape[0]),
+ "rollouts": float(exact.shape[1]),
+ "mean_rollout_exact": float(exact.mean()),
+ "q_max_exact": float(exact[arange, q_idx].mean()),
+ "lambda_min_exact": float(exact[arange, lyap_idx].mean()),
+ "oracle_pass_exact": float(exact.any(axis=1).mean()),
+ "q_lambda_same_argmax_frac": float((q_idx == lyap_idx).mean()),
+ "global_pearson_q_vs_stability": corr(stability.reshape(-1), q_halt.reshape(-1)),
+ "global_spearman_q_vs_stability": spearman(stability.reshape(-1), q_halt.reshape(-1)),
+ "within_problem_pearson_mean": mean_within_problem_corr(stability, q_halt, rank=False),
+ "within_problem_spearman_mean": mean_within_problem_corr(stability, q_halt, rank=True),
+ "q_success_mean": float(q_halt[exact].mean()) if exact.any() else float("nan"),
+ "q_fail_mean": float(q_halt[~exact].mean()) if (~exact).any() else float("nan"),
+ "lambda_success_mean": float(lyap[exact].mean()) if exact.any() else float("nan"),
+ "lambda_fail_mean": float(lyap[~exact].mean()) if (~exact).any() else float("nan"),
+ }
+ out.update(mixed_summary)
+ return out
+
+
+def scatter_panel(
+ ax: plt.Axes,
+ stability_2d: np.ndarray,
+ q_2d: np.ndarray,
+ exact_2d: np.ndarray,
+ title: str,
+) -> None:
+ stability = stability_2d.reshape(-1)
+ q_halt = q_2d.reshape(-1)
+ exact = exact_2d.reshape(-1)
+ finite = np.isfinite(stability) & np.isfinite(q_halt)
+ exact = exact[finite]
+ q_halt = q_halt[finite]
+ stability = stability[finite]
+ if len(stability) == 0:
+ ax.set_title(title + "\n(no points)")
+ return
+
+ xlo, xhi = np.quantile(stability, [0.005, 0.995])
+ ylo, yhi = np.quantile(q_halt, [0.005, 0.995])
+ visible = (stability >= xlo) & (stability <= xhi) & (q_halt >= ylo) & (q_halt <= yhi)
+
+ ax.scatter(
+ stability[visible & ~exact],
+ q_halt[visible & ~exact],
+ s=8,
+ alpha=0.22,
+ color="#dc2626",
+ linewidths=0,
+ label="incorrect rollout",
+ )
+ ax.scatter(
+ stability[visible & exact],
+ q_halt[visible & exact],
+ s=8,
+ alpha=0.17,
+ color="#2563eb",
+ linewidths=0,
+ label="correct rollout",
+ )
+
+ fit = visible
+ if int(fit.sum()) >= 3:
+ slope, intercept = np.polyfit(stability[fit], q_halt[fit], 1)
+ xs = np.linspace(xlo, xhi, 100)
+ ax.plot(xs, slope * xs + intercept, color="black", linewidth=1.8, alpha=0.75)
+
+ ax.set_title(title)
+ ax.set_xlim(xlo, xhi)
+ ax.set_ylim(ylo, yhi)
+ ax.grid(alpha=0.22)
+
+
+def plot() -> list[dict[str, float | str]]:
+ missing = [path for _name, path in RUNS if not path.exists()]
+ if missing:
+ raise SystemExit("missing input files:\n" + "\n".join(str(p) for p in missing))
+
+ OUT.mkdir(parents=True, exist_ok=True)
+ summaries = [summarize(name, path) for name, path in RUNS]
+
+ fig, axes = plt.subplots(2, len(RUNS), figsize=(12.0, 8.2), sharey="row")
+ if len(RUNS) == 1:
+ axes = np.asarray(axes).reshape(2, 1)
+
+ for col, ((name, path), summary) in enumerate(zip(RUNS, summaries)):
+ data = np.load(path)
+ exact = data["exact"].astype(bool)
+ q_halt = data["q_halt"].astype(float)
+ stability = -data["lyap"].astype(float)
+ correct_count = exact.sum(axis=1)
+ mixed = (correct_count > 0) & (correct_count < exact.shape[1])
+
+ scatter_panel(
+ axes[0, col],
+ stability,
+ q_halt,
+ exact,
+ f"{name}: all rollouts\n"
+ f"r={summary['global_pearson_q_vs_stability']:.2f}, "
+ f"rho={summary['global_spearman_q_vs_stability']:.2f}, "
+ f"Q exact={summary['q_max_exact']:.3f}",
+ )
+ scatter_panel(
+ axes[1, col],
+ stability[mixed],
+ q_halt[mixed],
+ exact[mixed],
+ f"mixed problems only (n={int(summary['mixed_problem_count'])})\n"
+ f"r={summary['mixed_global_pearson_q_vs_stability']:.2f}, "
+ f"Q={summary['mixed_q_max_exact']:.3f}, "
+ f"lambda-min={summary['mixed_lambda_min_exact']:.3f}",
+ )
+
+ for ax in axes[1, :]:
+ ax.set_xlabel("stability proxy = -lambda_1")
+ axes[0, 0].set_ylabel("Q-head halt logit")
+ axes[1, 0].set_ylabel("Q-head halt logit")
+ axes[0, 0].legend(frameon=False, loc="lower right", fontsize=8)
+ fig.suptitle("PTRM Q-head score contains a stability signal; mixed problems reveal selector behavior")
+ fig.tight_layout()
+ fig.savefig(OUT / "fig5_qhead_vs_lambda1_ptrm.png", dpi=240)
+ plt.close(fig)
+
+ out_csv = OUT / "fig5_qhead_vs_lambda1_ptrm_summary.csv"
+ with out_csv.open("w", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=list(summaries[0].keys()))
+ writer.writeheader()
+ writer.writerows(summaries)
+ return summaries
+
+
+def main() -> None:
+ summaries = plot()
+ for row in summaries:
+ print(
+ f"{row['name']}: "
+ f"q_exact={row['q_max_exact']:.4f} "
+ f"lambda_min_exact={row['lambda_min_exact']:.4f} "
+ f"oracle={row['oracle_pass_exact']:.4f} "
+ f"pearson={row['global_pearson_q_vs_stability']:.4f} "
+ f"spearman={row['global_spearman_q_vs_stability']:.4f} "
+ f"within_spearman={row['within_problem_spearman_mean']:.4f} "
+ f"mixed_pearson={row['mixed_global_pearson_q_vs_stability']:.4f}"
+ )
+ print(f"wrote {OUT / 'fig5_qhead_vs_lambda1_ptrm.png'}")
+ print(f"wrote {OUT / 'fig5_qhead_vs_lambda1_ptrm_summary.csv'}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/make_trm_multi4_meeting_artifacts.py b/make_trm_multi4_meeting_artifacts.py
new file mode 100644
index 0000000..5f964ed
--- /dev/null
+++ b/make_trm_multi4_meeting_artifacts.py
@@ -0,0 +1,133 @@
+from __future__ import annotations
+
+from pathlib import Path
+import csv
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+ROOT = Path("research/flossing")
+SPECTRUM_DIR = ROOT / "official_gbs768_spectrum"
+PTRM_DIR = ROOT / "ptrm_same_subset"
+PLOT_DIR = ROOT / "meeting_artifacts"
+
+
+def _read_csv_rows(path: Path) -> list[dict[str, str]]:
+ with path.open(newline="") as fh:
+ return list(csv.DictReader(fh))
+
+
+def _load_spectrum_rows() -> list[tuple[str, int, np.lib.npyio.NpzFile]]:
+ return [
+ (
+ "TRM baseline best",
+ 58590,
+ np.load(SPECTRUM_DIR / "trm_gbs768_base_step58590_n512_k8_seed20260602.npz"),
+ ),
+ (
+ "multi4 best",
+ 35805,
+ np.load(SPECTRUM_DIR / "trm_gbs768_multi4_step35805_n512_k8_seed20260602.npz"),
+ ),
+ (
+ "multi4 final",
+ 65100,
+ np.load(SPECTRUM_DIR / "trm_gbs768_multi4_step65100_n512_k8_seed20260602.npz"),
+ ),
+ ]
+
+
+def plot_spectra() -> None:
+ rows = _load_spectrum_rows()
+ fig, axes = plt.subplots(1, 3, figsize=(13.5, 4.2), sharey=True)
+ for ax, (name, step, data) in zip(axes, rows):
+ spec = data["lyap_spec"].astype(float)
+ exact = data["exact_correct"].astype(bool)
+ x = np.arange(1, spec.shape[1] + 1)
+ for mask, color, label in [
+ (exact, "#1f77b4", "success"),
+ (~exact, "#d62728", "failure"),
+ ]:
+ mean = spec[mask].mean(axis=0)
+ se = spec[mask].std(axis=0) / max(mask.sum(), 1) ** 0.5
+ ax.plot(x, mean, marker="o", color=color, label=label)
+ ax.fill_between(x, mean - se, mean + se, color=color, alpha=0.16, linewidth=0)
+ ax.axhline(0, color="black", linewidth=0.8, alpha=0.55)
+ ax.set_title(f"{name}\nstep {step}, acc={exact.mean():.3f}")
+ ax.set_xlabel("Lyapunov spectrum rank")
+ ax.grid(alpha=0.22)
+ axes[0].set_ylabel("finite-time exponent")
+ axes[0].legend(frameon=False)
+ fig.suptitle("TRM official GBS768: spectrum separates success/failure; multi4 best suppresses successful spectrum")
+ fig.tight_layout()
+ fig.savefig(PLOT_DIR / "trm_gbs768_spectrum_success_failure_n512.png", dpi=220)
+ plt.close(fig)
+
+
+def plot_headline() -> None:
+ rows = _read_csv_rows(SPECTRUM_DIR / "headline_trm_multi4_dynamics_table.csv")
+ labels = ["baseline\nbest", "multi4\nbest", "multi4\nfinal"]
+ fig, axes = plt.subplots(1, 3, figsize=(12, 3.8))
+ metrics = [
+ ("full_exact", "Full test exact acc", "#2f6f4e"),
+ ("mean8_all", "Mean top-8 exponent", "#8a4b2a"),
+ ("pos_count_all", "Positive exponents / 8", "#4d5f8a"),
+ ]
+ for ax, (col, title, color) in zip(axes, metrics):
+ values = np.asarray([float(row[col]) for row in rows])
+ ax.bar(labels, values, color=color, alpha=0.86)
+ ax.set_title(title)
+ ax.grid(axis="y", alpha=0.22)
+ for i, v in enumerate(values):
+ ax.text(i, v, f"{v:.3f}" if col != "pos_count_all" else f"{v:.2f}", ha="center", va="bottom", fontsize=9)
+ fig.suptitle("Accuracy improves at multi4 best while chaotic volume drops; final collapse restores positive spectrum")
+ fig.tight_layout()
+ fig.savefig(PLOT_DIR / "trm_gbs768_headline_dynamics_bars.png", dpi=220)
+ plt.close(fig)
+
+
+def plot_ptrm() -> None:
+ row = _read_csv_rows(PTRM_DIR / "paired_ptrm_k100_n1000_seed0_summary.csv")[0]
+ labels = ["deterministic", "mean rollout", "Q-selected", "oracle"]
+ base = [
+ float(row["base_det"]),
+ float(row["base_mean_rollout"]),
+ float(row["base_qmax"]),
+ float(row["base_oracle"]),
+ ]
+ multi = [
+ float(row["multi4_det"]),
+ float(row["multi4_mean_rollout"]),
+ float(row["multi4_qmax"]),
+ float(row["multi4_oracle"]),
+ ]
+ x = np.arange(len(labels))
+ width = 0.36
+ fig, ax = plt.subplots(figsize=(8.4, 4.2))
+ ax.bar(x - width / 2, base, width, label="baseline best", color="#6b7280")
+ ax.bar(x + width / 2, multi, width, label="multi4 best", color="#2563eb")
+ ax.set_ylim(0.86, 1.0)
+ ax.set_xticks(x, labels)
+ ax.set_ylabel("exact accuracy")
+ ax.set_title("PTRM same-subset comparison (n=1000, K=100, D=64, sigma=0.3, L-only)")
+ ax.grid(axis="y", alpha=0.22)
+ ax.legend(frameon=False)
+ for xpos, vals in [(x - width / 2, base), (x + width / 2, multi)]:
+ for xi, v in zip(xpos, vals):
+ ax.text(xi, v + 0.002, f"{v:.3f}", ha="center", va="bottom", fontsize=8)
+ fig.tight_layout()
+ fig.savefig(PLOT_DIR / "ptrm_same_subset_k100_n1000_comparison.png", dpi=220)
+ plt.close(fig)
+
+
+def main() -> None:
+ PLOT_DIR.mkdir(parents=True, exist_ok=True)
+ plot_spectra()
+ plot_headline()
+ plot_ptrm()
+ print(f"wrote plots to {PLOT_DIR}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/maze_package/README.md b/maze_package/README.md
new file mode 100644
index 0000000..45ab740
--- /dev/null
+++ b/maze_package/README.md
@@ -0,0 +1,30 @@
+# Maze-Hard package (E8) — train on dedicated cards, diagnose after
+
+## Contents
+- `launch_maze_trm.sh` — TRM Maze official recipe (att variant, 50k epochs), 1–2 GPU.
+- dataset already at `/home/yurenh2/rrm/data/maze-30x30-hard-1k` (built 2026-06-13;
+ seq_len 900, vocab 6, 1000 puzzles ×8 dihedral augments).
+
+## Run
+```bash
+bash launch_maze_trm.sh 2 384 # 2x A6000
+bash launch_maze_trm.sh 2 192 # 2x A5000 (->128 if OOM)
+```
+Target: ~75% exact accuracy (official figure). Saves a checkpoint every 5000 epochs
+(10 checkpoints) — needed for the evolution analysis.
+
+## After training: diagnostics
+The 2x2 / FTLE pipeline reads any TRM checkpoint dir (all_config.yaml + step_N). Two caveats
+vs Sudoku, to verify on first run:
+1. ATTENTION arch (not mlp_t): confirm diagnose_trm_joint.py's JVP path runs on att blocks
+ (Sudoku used mlp_t). If the L_level call signature differs, patch the f_L/f_H closures.
+2. seq_len 900 vs 97 → per-sample JVP+QR cost ~9-10x Sudoku. Use n=512 for the headline 2x2
+ and n=256 for the horizon sweep; k_lyap=8 unchanged. Budget ~0.5-1 day on one card, or
+ rsync checkpoints back to the lab box and run via the analysis_2x2 queue.
+
+## What Maze closes
+Kills the "Sudoku-only" limitation. Pre-registered prediction (write BEFORE looking, for the
+paper's credibility): if the wandering-not-settling decomposition is architecture/task-general,
+Maze should show B≈0 (failures don't settle) and the same concurrent-not-antecedent horizon
+profile. A DIFFERENT result (e.g. Maze failures do settle) is also publishable — it bounds the
+claim's scope. Either way the decomposition gets a second task.
diff --git a/maze_package/TRANSFER_README.md b/maze_package/TRANSFER_README.md
new file mode 100644
index 0000000..6e11076
--- /dev/null
+++ b/maze_package/TRANSFER_README.md
@@ -0,0 +1,36 @@
+# Maze training bundle — transfer to your training machine
+
+## What's in this bundle
+- `maze-30x30-hard-1k/` — the built dataset (seq_len 900, vocab 6, 1000 puzzles ×8 augments).
+- `launch_maze_trm_portable.sh` — path-configurable launcher.
+- `diagnose_trm_joint.py`, `step7_interfloss.py` — diagnostic scripts (only if you run
+ diagnostics on the training machine; otherwise rsync checkpoints back to the lab box).
+
+## On the training machine
+1. Have the TinyRecursiveModels repo cloned and the `rrm` conda env (torch 2.7 cu126,
+ flash-attn 2 for Ampere). If the env doesn't exist, recreate from the lab box's
+ `env/requirements.txt` / `pip-freeze.txt`.
+2. Put the dataset somewhere, e.g. `~/data/maze-30x30-hard-1k`.
+3. Launch:
+ ```bash
+ TRM_DIR=~/TinyRecursiveModels DATA_DIR=~/data/maze-30x30-hard-1k \
+ bash launch_maze_trm_portable.sh 2 384 # 2x A6000
+ # or: 2 192 # 2x A5000 (->128 if OOM)
+ ```
+ Target ~75% exact accuracy (official). ~18-28h on 2x A6000, ~24-36h on 2x A5000.
+ Saves one checkpoint per 5000 epochs (10 total) — keep all, the evolution analysis needs them.
+
+## After training
+Preferred: `rsync` the whole run checkpoint dir (checkpoints/maze-.../pretrain_att_maze30x30_*/)
+back to the lab box and run the existing analysis_2x2 queue there. The dir must include
+`all_config.yaml` plus the `step_*` files.
+
+If diagnosing on the training machine, two caveats vs the Sudoku runs:
+1. Maze uses the ATTENTION arch (not mlp_t). Verify diagnose_trm_joint.py's f_L/f_H JVP
+ closures call the attention L_level correctly; patch if the signature differs.
+2. seq_len 900 (vs 97) makes per-sample JVP+QR ~9-10x slower. Use n=512 for the headline 2x2,
+ n=256 for the horizon sweep, k_lyap=8.
+
+## Sanity check before the long run
+A 200-step smoke (epochs=200 eval_interval=200) should complete in minutes and confirm the
+attention model + flash-attn + dataset load without OOM before committing to 50k epochs.
diff --git a/maze_package/diagnose_trm_joint.py b/maze_package/diagnose_trm_joint.py
new file mode 100644
index 0000000..160a7cb
--- /dev/null
+++ b/maze_package/diagnose_trm_joint.py
@@ -0,0 +1,225 @@
+"""TRM Sudoku joint Lyapunov diagnostic — TRM version of diagnose_hrm_joint.py.
+
+Key differences from HRM:
+- TRM has ONE shared L_level (H_layers config is "ignored")
+- z_L update: z_L = L_level(z_L, z_H + input_embeddings)
+- z_H update: z_H = L_level(z_H, z_L) ← same L_level!
+- H_cycles=3, L_cycles=6 (vs HRM 2,2)
+
+Joint tangent block structure:
+- L step: v_L_new = J · (v_L + v_H), v_H_new = v_H, J at (z_L + z_H + ie)
+- H step: v_H_new = J' · (v_H + v_L), v_L_new = v_L, J' at (z_H + z_L)
+J and J' share weights but evaluated at different points.
+"""
+from __future__ import annotations
+import sys, os, yaml, math, argparse, json, time
+from pathlib import Path
+import numpy as np
+import torch
+
+TRM_DIR = Path("/home/yurenh2/rrm/trm")
+sys.path.insert(0, str(TRM_DIR))
+
+from models.recursive_reasoning.trm import TinyRecursiveReasoningModel_ACTV1
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ train_meta = json.loads((Path(cfg["data_paths"][0]) / "train" / "dataset.json").read_text())
+ arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"])
+ model = TinyRecursiveReasoningModel_ACTV1(arch_cfg)
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()}
+ missing, unexpected = model.load_state_dict(stripped, strict=False)
+ print(f"[load] missing={len(missing)} unexpected={len(unexpected)}")
+ if missing[:3]: print(f" sample missing: {missing[:3]}")
+ if unexpected[:3]: print(f" sample unexpected: {unexpected[:3]}")
+ model.to(device).eval()
+ return model, cfg, train_meta
+
+
+def load_test_samples(data_path, n_total, shard_id, num_shards, seed):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ all_idx = rng.choice(len(inputs), size=n_total, replace=False)
+ shard_size = (n_total + num_shards - 1) // num_shards
+ s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total)
+ idx = all_idx[s:e]
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ "idx": idx,
+ }
+
+
+def jvp_through(f, x, v):
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False)
+
+
+def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed):
+ inner = model.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ D = seq_full * hidden
+
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"].to(device),
+ batch["puzzle_identifiers"].to(device))
+
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q0 = torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g)
+ Q, _ = torch.linalg.qr(Q0)
+ log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_lyap_steps = 0
+ step_counter = 0
+
+ drift_zH_per_step, drift_zL_per_step = [], []
+ halted_at = torch.zeros(B, dtype=torch.long, device=device)
+ q_halt_hist, q_continue_hist = [], []
+
+ for act_step in range(cfg.halt_max_steps):
+ z_H_prev = z_H.detach().clone()
+ z_L_prev = z_L.detach().clone()
+
+ with torch.enable_grad():
+ zH, zL = z_H.detach(), z_L.detach()
+ for _h in range(cfg.H_cycles):
+ # L cycles
+ for _l in range(cfg.L_cycles):
+ v_H_j = Q[:, :D, :]
+ v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_L_cols = []
+ f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zL_new, Dv = jvp_through(f_L, zL, v_i)
+ new_v_L_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_L = torch.stack(new_v_L_cols, dim=-1)
+ Q = torch.cat([v_H_j, new_v_L], dim=1)
+ zL = zL_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ # H step (uses SAME L_level!)
+ v_H_j = Q[:, :D, :]
+ v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_H_cols = []
+ f_H = lambda z: inner.L_level(z, zL, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ zH_new, Dv = jvp_through(f_H, zH, v_i)
+ new_v_H_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_H = torch.stack(new_v_H_cols, dim=-1)
+ Q = torch.cat([new_v_H, v_L_j], dim=1)
+ zH = zH_new
+ step_counter += 1
+ if step_counter % t_ons == 0:
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_lyap_steps += 1
+
+ z_H, z_L = zH, zL
+
+ drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu())
+ drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu())
+
+ with torch.no_grad():
+ q_logits = inner.q_head(z_H[:, 0]).float()
+ q_halt, q_continue = q_logits[..., 0], q_logits[..., 1]
+ q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu())
+ new_halt = (q_halt > q_continue) & (halted_at == 0)
+ halted_at[new_halt] = act_step + 1
+ output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float()
+ final_logits = output
+
+ lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy()
+
+ with torch.no_grad():
+ preds = final_logits.argmax(dim=-1)
+ labels = batch["labels"].to(device)
+ mask = labels > 0
+ exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float()
+ token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1)
+ token_acc = token_acc.cpu()
+
+ return {
+ "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(),
+ "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(),
+ "halted_at": halted_at.cpu().numpy(),
+ "q_halt": torch.stack(q_halt_hist, dim=1).numpy(),
+ "q_continue": torch.stack(q_continue_hist, dim=1).numpy(),
+ "lyap_spec": lyap_spec,
+ "exact_correct": exact.numpy(),
+ "token_acc": token_acc.numpy(),
+ }
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_13020")
+ ap.add_argument("--n-samples", type=int, default=512)
+ ap.add_argument("--shard-id", type=int, default=0)
+ ap.add_argument("--num-shards", type=int, default=1)
+ ap.add_argument("--batch-size", type=int, default=16)
+ ap.add_argument("--k-lyap", type=int, default=8)
+ ap.add_argument("--t-ons", type=int, default=1)
+ ap.add_argument("--seed", type=int, default=0)
+ ap.add_argument("--out", default="diag_trm.npz")
+ args = ap.parse_args()
+
+ device = "cuda"
+ model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, "
+ f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, "
+ f"halt_max_steps={model.inner.config.halt_max_steps}, "
+ f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}")
+
+ test = load_test_samples(Path(cfg["data_paths"][0]), args.n_samples, args.shard_id, args.num_shards, args.seed)
+ n = len(test["inputs"])
+ print(f"shard {args.shard_id}/{args.num_shards}: {n} samples")
+
+ res = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue","lyap_spec","exact_correct","token_acc","idx"]}
+ t0 = time.time()
+ for s in range(0, n, args.batch_size):
+ e = min(s + args.batch_size, n)
+ batch = {k: test[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]}
+ out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s)
+ for k, v in out.items():
+ res[k].append(v)
+ res["idx"].append(test["idx"][s:e])
+ ls = out["lyap_spec"]
+ print(f" [{e}/{n}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} "
+ f"λ_1={ls[:,0].mean():+.4f} λ_{args.k_lyap}={ls[:,-1].mean():+.4f}", flush=True)
+
+ saved = {}
+ for k, v in res.items():
+ if not v: continue
+ try: saved[k] = np.concatenate(v, 0)
+ except ValueError: saved[k] = np.stack(v, 0)
+ np.savez_compressed(args.out, **saved)
+ succ = saved["exact_correct"] > 0.5
+ print(f"\nN={len(succ)} acc={succ.mean():.4f}")
+ print(f"{'i':>3} {'all':>10} {'succ':>10} {'fail':>10} {'Δ':>9}")
+ for i in range(saved["lyap_spec"].shape[1]):
+ li = saved["lyap_spec"][:, i]
+ print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} {li[~succ].mean()-li[succ].mean():+9.4f}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/maze_package/launch_maze_trm.sh b/maze_package/launch_maze_trm.sh
new file mode 100755
index 0000000..093bb1e
--- /dev/null
+++ b/maze_package/launch_maze_trm.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+# TRM Maze-Hard 30x30 official recipe, adapted for 2 GPUs. Run on dedicated training cards.
+# Usage: bash launch_maze_trm.sh [NGPU] [GBS]
+# 2x A6000 (48G): bash launch_maze_trm.sh 2 384
+# 2x A5000 (24G): bash launch_maze_trm.sh 2 192 (drop to 128 if OOM)
+# 1x card: bash launch_maze_trm.sh 1 128
+set -eo pipefail
+
+NGPU="${1:-2}"
+GBS="${2:-384}"
+RUN_NAME="pretrain_att_maze30x30_${NGPU}gpu_gbs${GBS}"
+
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+cd /home/yurenh2/rrm/trm
+export WANDB_MODE=offline
+
+COMMON_ARGS=(
+ arch=trm
+ "data_paths=[/home/yurenh2/rrm/data/maze-30x30-hard-1k]"
+ "evaluators=[]"
+ epochs=50000 eval_interval=5000
+ lr=1e-4 puzzle_emb_lr=1e-4 weight_decay=1.0 puzzle_emb_weight_decay=1.0
+ global_batch_size="${GBS}"
+ arch.L_layers=2 arch.H_cycles=3 arch.L_cycles=4
+ +run_name="${RUN_NAME}" ema=True
+ +checkpoint_every_eval=true
+)
+
+LOG="/home/yurenh2/rrm/research/flossing/maze_${RUN_NAME}.log"
+
+if [[ "${NGPU}" -gt 1 ]]; then
+ nohup torchrun --nproc-per-node "${NGPU}" --rdzv_backend=c10d --rdzv_endpoint=localhost:0 \
+ --nnodes=1 pretrain.py "${COMMON_ARGS[@]}" > "${LOG}" 2>&1 &
+else
+ nohup python pretrain.py "${COMMON_ARGS[@]}" > "${LOG}" 2>&1 &
+fi
+echo "launched ${RUN_NAME} (pid $!), log: ${LOG}"
+echo "checkpoints -> trm/checkpoints/maze-30x30-hard-1k.../${RUN_NAME}/ (one per 5000 epochs)"
+echo "monitor: tail -f ${LOG} | grep -E 'accuracy|exact'"
diff --git a/maze_package/launch_maze_trm_portable.sh b/maze_package/launch_maze_trm_portable.sh
new file mode 100755
index 0000000..d801ceb
--- /dev/null
+++ b/maze_package/launch_maze_trm_portable.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+# Portable TRM Maze-Hard launcher — run on any machine with the TRM repo + rrm conda env.
+#
+# Set these two paths for the target machine (env vars or edit here):
+# TRM_DIR = path to the TinyRecursiveModels repo clone (contains pretrain.py)
+# DATA_DIR = path to the maze-30x30-hard-1k dataset (from this bundle)
+# CONDA_SH = path to conda.sh (default tries common locations)
+#
+# Usage: TRM_DIR=~/TinyRecursiveModels DATA_DIR=~/maze-30x30-hard-1k bash launch_maze_trm_portable.sh [NGPU] [GBS]
+# 2x A6000 (48G): ... bash launch_maze_trm_portable.sh 2 384
+# 2x A5000 (24G): ... bash launch_maze_trm_portable.sh 2 192 (-> 128 if OOM)
+set -eo pipefail
+
+TRM_DIR="${TRM_DIR:?set TRM_DIR to the TinyRecursiveModels repo path}"
+DATA_DIR="${DATA_DIR:?set DATA_DIR to the maze-30x30-hard-1k dataset path}"
+NGPU="${1:-2}"
+GBS="${2:-384}"
+RUN_NAME="pretrain_att_maze30x30_${NGPU}gpu_gbs${GBS}"
+
+# conda
+CONDA_SH="${CONDA_SH:-}"
+if [[ -z "${CONDA_SH}" ]]; then
+ for p in "$HOME/miniconda3/etc/profile.d/conda.sh" "$HOME/anaconda3/etc/profile.d/conda.sh" \
+ "/opt/conda/etc/profile.d/conda.sh"; do
+ [[ -f "$p" ]] && CONDA_SH="$p" && break
+ done
+fi
+[[ -f "${CONDA_SH}" ]] && source "${CONDA_SH}" && conda activate "${CONDA_ENV:-rrm}"
+
+cd "${TRM_DIR}"
+export WANDB_MODE=offline
+
+ARGS=(
+ arch=trm
+ "data_paths=[${DATA_DIR}]"
+ "evaluators=[]"
+ epochs=50000 eval_interval=5000
+ lr=1e-4 puzzle_emb_lr=1e-4 weight_decay=1.0 puzzle_emb_weight_decay=1.0
+ global_batch_size="${GBS}"
+ arch.L_layers=2 arch.H_cycles=3 arch.L_cycles=4
+ +run_name="${RUN_NAME}" ema=True
+ +checkpoint_every_eval=true
+)
+LOG="maze_${RUN_NAME}.log"
+
+if [[ "${NGPU}" -gt 1 ]]; then
+ nohup torchrun --nproc-per-node "${NGPU}" --rdzv_backend=c10d --rdzv_endpoint=localhost:0 \
+ --nnodes=1 pretrain.py "${ARGS[@]}" > "${LOG}" 2>&1 &
+else
+ nohup python pretrain.py "${ARGS[@]}" > "${LOG}" 2>&1 &
+fi
+echo "launched ${RUN_NAME} (pid $!)"
+echo "log: ${TRM_DIR}/${LOG}"
+echo "ckpts: ${TRM_DIR}/checkpoints/maze-30x30-hard-1k.../${RUN_NAME}/ (1 per 5000 epochs)"
+echo "watch: tail -f ${TRM_DIR}/${LOG} | grep -E 'exact|accuracy'"
+echo
+echo "When done: rsync the run's checkpoint dir back to the lab box for the diagnostic pipeline,"
+echo "or run diagnostics here (see TRANSFER_README.md, note the attention-arch + n=512 caveats)."
diff --git a/maze_package/pip-freeze.txt b/maze_package/pip-freeze.txt
new file mode 100644
index 0000000..03c81b2
--- /dev/null
+++ b/maze_package/pip-freeze.txt
@@ -0,0 +1,18 @@
+adam_atan2==0.0.3
+argdantic==1.3.3
+coolname==2.2.0
+einops==0.8.1
+flash_attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.7cxx11abiTRUE-cp310-cp310-linux_x86_64.whl#sha256=ce91e246f21d61ad66b1a7555340dbaa28e4aa86edcf00c18f0837422939b529
+huggingface-hub==0.34.4
+hydra-core==1.3.2
+numba==0.61.2
+omegaconf==2.3.0
+pydantic==2.11.7
+pydantic_core==2.33.2
+pydantic-settings==2.14.1
+torch==2.7.0+cu126
+torchaudio==2.7.0+cu126
+torchvision==0.22.0+cu126
+tqdm==4.67.1
+triton==3.3.0
+wandb==0.21.3
diff --git a/maze_package/requirements.txt b/maze_package/requirements.txt
new file mode 100644
index 0000000..d03a446
--- /dev/null
+++ b/maze_package/requirements.txt
@@ -0,0 +1,13 @@
+# Merged from HRM + TRM official; versions pinned per TRM specific_requirements.txt
+# torch handled separately (torch==2.7.0+cu126 via cu126 index)
+einops==0.8.1
+tqdm==4.67.1
+coolname==2.2.0
+pydantic==2.11.7
+pydantic-core==2.33.2
+argdantic==1.3.3
+wandb==0.21.3
+omegaconf==2.3.0
+hydra-core==1.3.2
+huggingface_hub==0.34.4
+numba==0.61.2
diff --git a/maze_package/step7_interfloss.py b/maze_package/step7_interfloss.py
new file mode 100644
index 0000000..3b8e4f0
--- /dev/null
+++ b/maze_package/step7_interfloss.py
@@ -0,0 +1,589 @@
+"""Step 7: Engelken-style interflossing.
+
+This is intentionally not a mixed objective. Ordinary task-training steps use
+only the supervised ACT loss. Flossing episodes use only a Lyapunov-spectrum
+conditioning loss, then task training resumes.
+
+Paper mapping:
+ - preflossing: run a floss-only episode before task training.
+ - interflossing: run short floss-only episodes at selected training steps.
+ - no persistent L_task + alpha * L_floss term is used here.
+"""
+from __future__ import annotations
+
+import argparse
+import importlib
+import json
+import sys
+import time
+from pathlib import Path
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+import yaml
+
+
+HRM_DIR = Path("/home/yurenh2/rrm/hrm")
+TRM_DIR = Path("/home/yurenh2/rrm/trm")
+
+
+def import_stack(model_type: str):
+ repo_dir = HRM_DIR if model_type == "hrm" else TRM_DIR
+ sys.path.insert(0, str(repo_dir))
+ if model_type == "hrm":
+ model_mod = importlib.import_module("models.hrm.hrm_act_v1")
+ model_cls = model_mod.HierarchicalReasoningModel_ACTV1
+ else:
+ model_mod = importlib.import_module("models.recursive_reasoning.trm")
+ model_cls = model_mod.TinyRecursiveReasoningModel_ACTV1
+ losses_mod = importlib.import_module("models.losses")
+ optim_mod = importlib.import_module("adam_atan2")
+ sparse_mod = importlib.import_module("models.sparse_embedding")
+ return model_cls, losses_mod.ACTLossHead, optim_mod.AdamATan2, sparse_mod.CastedSparseEmbeddingSignSGD_Distributed
+
+
+def parse_step_list(text: str) -> set[int]:
+ if not text.strip():
+ return set()
+ out = set()
+ for part in text.split(","):
+ part = part.strip()
+ if not part:
+ continue
+ out.add(int(part))
+ return out
+
+
+def build_interfloss_steps(args) -> set[int]:
+ steps = parse_step_list(args.interfloss_at)
+ if args.interfloss_every and args.interfloss_every > 0:
+ start = max(args.interfloss_start, 0)
+ stop = args.interfloss_stop if args.interfloss_stop >= 0 else args.train_steps
+ stop = min(stop, args.train_steps)
+ steps.update(range(start, stop + 1, args.interfloss_every))
+ return steps
+
+
+def load_model(model_type: str, ckpt_root: Path, ckpt_name: str, device: str, batch_size_override: int | None = None):
+ model_cls, loss_head_cls, adam_cls, sparse_cls = import_stack(model_type)
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ data_path = Path(cfg.get("data_path") or cfg["data_paths"][0])
+ train_meta = json.loads((data_path / "train" / "dataset.json").read_text())
+ arch_cfg.update(
+ batch_size=batch_size_override or cfg["global_batch_size"],
+ seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"],
+ causal=False,
+ )
+ cfg["data_path"] = str(data_path)
+ with torch.device(device):
+ base = model_cls(arch_cfg)
+ head = loss_head_cls(base, loss_type=arch_cfg["loss"]["loss_type"])
+ if ckpt_name != "__random__":
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", ""): v for k, v in sd.items()}
+ missing, unexpected = head.load_state_dict(stripped, strict=False)
+ print(f"[load {ckpt_name}] missing={len(missing)} unexpected={len(unexpected)}")
+ else:
+ print("[load __random__] random initialization from config")
+ return head, base, cfg, adam_cls, sparse_cls
+
+
+def jvp_train(f, x, v):
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=True, strict=False)
+
+
+def compute_joint_lyap_spec(model_type, base, batch, k_lyap, lyap_act_steps, device, seed, lyap_start_act=0):
+ inner = base.inner
+ cfg = inner.config
+ bsz = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ dim = seq_full * hidden
+
+ z_h = inner.H_init.unsqueeze(0).expand(bsz, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_l = inner.L_init.unsqueeze(0).expand(bsz, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = {"cos_sin": inner.rotary_emb() if hasattr(inner, "rotary_emb") else None}
+ input_embeddings = inner._input_embeddings(batch["inputs"], batch["puzzle_identifiers"])
+
+ # Optional late-window measurement: first move to a later recursive state
+ # without differentiating through the warmup trajectory. This regularizes
+ # local late-stage stability instead of penalizing useful early expansion.
+ warmup_acts = min(max(lyap_start_act, 0), cfg.halt_max_steps)
+ if warmup_acts > 0:
+ with torch.no_grad():
+ for _act in range(warmup_acts):
+ for _h in range(cfg.H_cycles):
+ for _l in range(cfg.L_cycles):
+ z_l = inner.L_level(z_l, z_h + input_embeddings, **seq_info)
+ if model_type == "trm":
+ z_h = inner.L_level(z_h, z_l, **seq_info)
+ else:
+ z_h = inner.H_level(z_h, z_l, **seq_info)
+ z_h = z_h.detach()
+ z_l = z_l.detach()
+
+ gen = torch.Generator(device=device).manual_seed(seed)
+ q0 = torch.randn(bsz, 2 * dim, k_lyap, device=device, dtype=torch.float32, generator=gen)
+ q, _ = torch.linalg.qr(q0)
+ log_r_sum = torch.zeros(bsz, k_lyap, device=device, dtype=torch.float32)
+ n_steps = 0
+
+ n_act = min(lyap_act_steps, max(cfg.halt_max_steps - warmup_acts, 1))
+ for _act in range(n_act):
+ for _h in range(cfg.H_cycles):
+ for _l in range(cfg.L_cycles):
+ v_h = q[:, :dim, :]
+ v_l = q[:, dim:, :]
+ v_comb = v_h + v_l
+ new_v_l_cols = []
+ f_l = lambda z: inner.L_level(z, z_h + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(bsz, seq_full, hidden).to(inner.forward_dtype)
+ z_l_new, d_v = jvp_train(f_l, z_l, v_i)
+ new_v_l_cols.append(d_v.reshape(bsz, dim).to(torch.float32))
+ q = torch.cat([v_h, torch.stack(new_v_l_cols, dim=-1)], dim=1)
+ z_l = z_l_new
+ q, r = torch.linalg.qr(q)
+ log_r_sum = log_r_sum + r.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_steps += 1
+
+ v_h = q[:, :dim, :]
+ v_l = q[:, dim:, :]
+ v_comb = v_h + v_l
+ new_v_h_cols = []
+ if model_type == "trm":
+ f_h = lambda z: inner.L_level(z, z_l, **seq_info)
+ else:
+ f_h = lambda z: inner.H_level(z, z_l, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(bsz, seq_full, hidden).to(inner.forward_dtype)
+ z_h_new, d_v = jvp_train(f_h, z_h, v_i)
+ new_v_h_cols.append(d_v.reshape(bsz, dim).to(torch.float32))
+ q = torch.cat([torch.stack(new_v_h_cols, dim=-1), v_l], dim=1)
+ z_h = z_h_new
+ q, r = torch.linalg.qr(q)
+ log_r_sum = log_r_sum + r.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_steps += 1
+
+ return log_r_sum / max(n_steps, 1)
+
+
+def floss_loss_from_spec(spec, mode: str, lambda_star: float):
+ if mode == "engelken_l2":
+ return (spec ** 2).mean(), spec
+ if mode == "spectrum_cf":
+ excess = (spec - lambda_star).clamp_min(0.0)
+ return (excess ** 2).mean(), excess
+ if mode == "volume_cf":
+ volume = spec.mean(dim=1)
+ excess = (volume - lambda_star).clamp_min(0.0)
+ return (excess ** 2).mean(), excess
+ if mode == "top1_cf":
+ excess = (spec[:, 0] - lambda_star).clamp_min(0.0)
+ return (excess ** 2).mean(), excess
+ raise ValueError(f"unknown floss mode: {mode}")
+
+
+def load_train_batches(data_path: Path, batch_size: int, n_iters: int, seed: int = 0):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "train" / "all__inputs.npy")
+ labels = np.load(data_path / "train" / "all__labels.npy")
+ pid = np.load(data_path / "train" / "all__puzzle_identifiers.npy")
+ n = len(inputs)
+ for _ in range(n_iters):
+ idx = rng.choice(n, size=batch_size, replace=False)
+ yield {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ }
+
+
+def sample_replay_batch(data_path: Path, n_samples: int, seed: int):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "train" / "all__inputs.npy")
+ labels = np.load(data_path / "train" / "all__labels.npy")
+ pid = np.load(data_path / "train" / "all__puzzle_identifiers.npy")
+ idx = rng.choice(len(inputs), size=n_samples, replace=False)
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ }
+
+
+def move_batch(batch: dict[str, torch.Tensor], device: str):
+ return {k: v.to(device) for k, v in batch.items()}
+
+
+def rollout_logits(base, batch, device):
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ for _ in range(base.config.halt_max_steps):
+ carry, outputs = base(carry=carry, batch=batch)
+ return outputs["logits"]
+
+
+def build_kl_replay(args, base, data_path, device, episode_idx):
+ if args.kl_beta <= 0 or args.kl_replay_size <= 0:
+ return None
+
+ replay = sample_replay_batch(
+ data_path,
+ n_samples=args.kl_replay_size,
+ seed=args.seed + 200000 + episode_idx,
+ )
+ teacher_chunks = []
+ base.eval()
+ with torch.no_grad():
+ for start in range(0, args.kl_replay_size, args.kl_batch_size):
+ end = min(start + args.kl_batch_size, args.kl_replay_size)
+ batch = move_batch({k: v[start:end] for k, v in replay.items()}, device)
+ logits = rollout_logits(base, batch, device)
+ teacher_chunks.append(logits.detach().to(torch.float32).cpu())
+
+ replay["teacher_logits"] = torch.cat(teacher_chunks, dim=0)
+ replay["mask"] = replay["labels"] > 0
+ return replay
+
+
+def kl_preservation_loss(args, base, replay, step, device):
+ if replay is None:
+ return torch.zeros((), device=device)
+
+ n_replay = replay["inputs"].shape[0]
+ batch_size = min(args.kl_batch_size, n_replay)
+ start = (step * batch_size) % n_replay
+ if start + batch_size <= n_replay:
+ idx = torch.arange(start, start + batch_size)
+ else:
+ idx = torch.cat([torch.arange(start, n_replay), torch.arange(0, start + batch_size - n_replay)])
+
+ batch = move_batch(
+ {
+ "inputs": replay["inputs"][idx],
+ "labels": replay["labels"][idx],
+ "puzzle_identifiers": replay["puzzle_identifiers"][idx],
+ },
+ device,
+ )
+ teacher_logits = replay["teacher_logits"][idx].to(device)
+ mask = replay["mask"][idx].to(device)
+ was_training = base.training
+ base.eval()
+ student_logits = rollout_logits(base, batch, device).to(torch.float32)
+ if was_training:
+ base.train()
+ set_puzzle_embedding_mode(base, args.train_puzzle_emb)
+ temp = args.kl_temperature
+ student_logp = F.log_softmax(student_logits / temp, dim=-1)
+ teacher_p = F.softmax(teacher_logits / temp, dim=-1)
+ kl_per_token = F.kl_div(student_logp, teacher_p, reduction="none").sum(dim=-1) * (temp ** 2)
+ if mask.any():
+ return kl_per_token[mask].mean()
+ return kl_per_token.mean()
+
+
+def evaluate(head, base, data_path, n_samples, batch_size, device, seed=42):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ idx_all = rng.choice(len(inputs), size=n_samples, replace=False)
+ head.eval()
+ correct = 0
+ token_correct = 0
+ token_total = 0
+ for start in range(0, n_samples, batch_size):
+ end = min(start + batch_size, n_samples)
+ idx = idx_all[start:end]
+ batch = {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)).to(device),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)).to(device),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)).to(device),
+ }
+ with torch.no_grad():
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ for _ in range(base.config.halt_max_steps):
+ carry, outputs = base(carry=carry, batch=batch)
+ preds = outputs["logits"].argmax(dim=-1)
+ mask = batch["labels"] > 0
+ exact = ((preds == batch["labels"]) | ~mask).all(dim=-1).float()
+ correct += exact.sum().item()
+ token_correct += ((preds == batch["labels"]) & mask).sum().item()
+ token_total += mask.sum().item()
+ return correct / n_samples, token_correct / max(token_total, 1)
+
+
+def write_log(path: str, log: dict):
+ Path(path).write_text(json.dumps(log, indent=2))
+
+
+def freeze_puzzle_embedding(base):
+ base.inner.puzzle_emb.eval()
+
+
+def set_puzzle_embedding_mode(base, train_puzzle_emb: bool):
+ if train_puzzle_emb:
+ base.inner.puzzle_emb.train()
+ else:
+ freeze_puzzle_embedding(base)
+
+
+def make_optimizers(args, base, head, adam_cls, sparse_cls, lr: float, weight_decay: float, train_puzzle_emb: bool):
+ optimizers = []
+ if train_puzzle_emb and getattr(base.inner.config, "puzzle_emb_ndim", 0) > 0:
+ optimizers.append(
+ sparse_cls(
+ base.inner.puzzle_emb.buffers(),
+ lr=lr if args.puzzle_emb_lr is None else args.puzzle_emb_lr,
+ weight_decay=args.puzzle_emb_weight_decay,
+ world_size=1,
+ )
+ )
+ optimizers.append(adam_cls(head.parameters(), lr=lr, betas=(0.9, 0.95), weight_decay=weight_decay))
+ return optimizers
+
+
+def optim_zero_grad(optimizers):
+ for optim in optimizers:
+ optim.zero_grad(set_to_none=True)
+
+
+def optim_step(optimizers):
+ for optim in optimizers:
+ optim.step()
+
+
+def run_floss_episode(args, head, base, adam_cls, data_path, device, log, episode_idx, train_step):
+ print(
+ f"\n=== Floss episode {episode_idx} at train_step={train_step}: "
+ f"{args.floss_steps} steps, mode={args.floss_mode}, lr={args.floss_lr} ===",
+ flush=True,
+ )
+ optimizers = make_optimizers(
+ args, base, head, adam_cls, args.sparse_cls,
+ lr=args.floss_lr, weight_decay=0.0, train_puzzle_emb=False,
+ )
+ replay = build_kl_replay(args, base, data_path, device, episode_idx)
+ train_iter = load_train_batches(
+ data_path,
+ args.floss_batch_size,
+ args.floss_steps,
+ seed=args.seed + 100000 + episode_idx * 1000,
+ )
+ episode = {"episode": episode_idx, "train_step": train_step, "steps": []}
+ t0 = time.time()
+
+ for step, batch in enumerate(train_iter):
+ batch = {k: v.to(device) for k, v in batch.items()}
+ head.train()
+ set_puzzle_embedding_mode(base, False)
+ spec = compute_joint_lyap_spec(
+ args.model,
+ base,
+ batch,
+ k_lyap=args.k_lyap,
+ lyap_act_steps=args.lyap_act_steps,
+ device=device,
+ seed=args.seed + episode_idx * 10000 + step,
+ lyap_start_act=args.lyap_start_act,
+ )
+ optim_zero_grad(optimizers)
+ floss_loss, excess = floss_loss_from_spec(spec, args.floss_mode, args.lambda_star)
+ floss_loss.backward()
+ kl_loss = kl_preservation_loss(args, base, replay, step, device)
+ if args.kl_beta > 0:
+ (args.kl_beta * kl_loss).backward()
+ torch.nn.utils.clip_grad_norm_([p for p in head.parameters() if p.requires_grad], 1.0)
+ optim_step(optimizers)
+
+ detached = spec.detach()
+ total_loss = floss_loss.detach() + args.kl_beta * kl_loss.detach()
+ rec = {
+ "step": step,
+ "loss": float(total_loss.item()),
+ "floss_loss": float(floss_loss.item()),
+ "kl_loss": float(kl_loss.item()),
+ "lyap1_mean": float(detached[:, 0].mean().item()),
+ "lyap1_max": float(detached[:, 0].max().item()),
+ "lyap_mean": float(detached.mean().item()),
+ "volume_mean": float(detached.mean(dim=1).mean().item()),
+ "volume_max": float(detached.mean(dim=1).max().item()),
+ "frac_active": float((excess.detach() > 0).float().mean().item()),
+ }
+ episode["steps"].append(rec)
+ if step % args.floss_log_every == 0 or step == args.floss_steps - 1:
+ print(
+ f" F[{step:>4}/{args.floss_steps}] dt={time.time() - t0:.1f}s "
+ f"loss={rec['loss']:.6f} floss={rec['floss_loss']:.6f} "
+ f"kl={rec['kl_loss']:.6f} lyap1={rec['lyap1_mean']:+.4f} "
+ f"vol={rec['volume_mean']:+.4f} active={rec['frac_active']:.2f}",
+ flush=True,
+ )
+
+ log["floss_episodes"].append(episode)
+ if args.eval_after_floss:
+ acc, tok_acc = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print(f" >> FLOSS EVAL train_step={train_step}: exact_acc={acc:.4f}", flush=True)
+ log["evals"].append(
+ {"kind": "after_floss", "train_step": train_step, "episode": episode_idx, "acc": acc, "tok_acc": tok_acc}
+ )
+ write_log(args.out, log)
+
+
+def run_task_step(args, head, base, batch, optimizers, device):
+ batch = {k: v.to(device) for k, v in batch.items()}
+ head.train()
+ set_puzzle_embedding_mode(base, args.train_puzzle_emb)
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ loss_sum = 0.0
+ n_loss = 0
+ for _ in range(base.config.halt_max_steps):
+ carry, loss, _metrics, _outputs, all_finish = head(return_keys=[], carry=carry, batch=batch)
+ loss_sum = loss_sum + loss
+ n_loss += 1
+ if all_finish:
+ break
+ sup_loss = loss_sum / max(n_loss, 1) / batch["inputs"].shape[0]
+ optim_zero_grad(optimizers)
+ sup_loss.backward()
+ torch.nn.utils.clip_grad_norm_([p for p in head.parameters() if p.requires_grad], 1.0)
+ optim_step(optimizers)
+ return sup_loss
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", choices=["hrm", "trm"], required=True)
+ parser.add_argument("--ckpt-root", required=True)
+ parser.add_argument("--ckpt-name", required=True,
+ help="Checkpoint file name, or __random__ to initialize from config without loading weights.")
+ parser.add_argument("--train-steps", type=int, default=10000)
+ parser.add_argument("--batch-size", type=int, default=8)
+ parser.add_argument("--task-batch-size", type=int, default=None,
+ help="Supervised task microbatch size. Defaults to --batch-size.")
+ parser.add_argument("--floss-batch-size", type=int, default=None,
+ help="Flossing microbatch size. Defaults to --batch-size.")
+ parser.add_argument("--train-lr", type=float, default=1e-5)
+ parser.add_argument("--floss-lr", type=float, default=1e-4)
+ parser.add_argument("--floss-steps", type=int, default=500)
+ parser.add_argument("--interfloss-at", default="0,500")
+ parser.add_argument("--interfloss-every", type=int, default=0,
+ help="If >0, also run floss episodes periodically every N task optimizer steps.")
+ parser.add_argument("--interfloss-start", type=int, default=0,
+ help="First task optimizer step for periodic interfloss.")
+ parser.add_argument("--interfloss-stop", type=int, default=-1,
+ help="Last task optimizer step for periodic interfloss. -1 means train_steps.")
+ parser.add_argument("--floss-mode", choices=["engelken_l2", "spectrum_cf", "volume_cf", "top1_cf"], default="engelken_l2")
+ parser.add_argument("--lambda-star", type=float, default=0.0)
+ parser.add_argument("--k-lyap", type=int, default=8)
+ parser.add_argument("--lyap-act-steps", type=int, default=4)
+ parser.add_argument("--lyap-start-act", type=int, default=0,
+ help="Warm up this many ACT steps before measuring/flossing the Lyapunov window.")
+ parser.add_argument("--seed", type=int, default=42)
+ parser.add_argument("--eval-every", type=int, default=1000)
+ parser.add_argument("--eval-n", type=int, default=512)
+ parser.add_argument("--eval-batch-size", type=int, default=32)
+ parser.add_argument("--floss-log-every", type=int, default=10)
+ parser.add_argument("--eval-after-floss", action=argparse.BooleanOptionalAction, default=True)
+ parser.add_argument("--kl-beta", type=float, default=0.0,
+ help="Episode-start replay-logit KL weight during floss-only steps.")
+ parser.add_argument("--kl-replay-size", type=int, default=64)
+ parser.add_argument("--kl-batch-size", type=int, default=8)
+ parser.add_argument("--kl-temperature", type=float, default=1.0)
+ parser.add_argument("--init-seed", type=int, default=None,
+ help="Torch seed used before model construction. Use this for matched from-scratch runs.")
+ parser.add_argument("--train-puzzle-emb", action=argparse.BooleanOptionalAction, default=False,
+ help="Train sparse puzzle embeddings. Requires --batch-size to match the model local embedding batch.")
+ parser.add_argument("--puzzle-emb-lr", type=float, default=None,
+ help="Sparse puzzle embedding LR. Defaults to current phase LR.")
+ parser.add_argument("--puzzle-emb-weight-decay", type=float, default=1.0)
+ parser.add_argument("--out", default="step7_interfloss_log.json")
+ args = parser.parse_args()
+ if args.task_batch_size is None:
+ args.task_batch_size = args.batch_size
+ if args.floss_batch_size is None:
+ args.floss_batch_size = args.batch_size
+ args.batch_size = args.task_batch_size
+
+ device = "cuda"
+ if args.init_seed is not None:
+ torch.manual_seed(args.init_seed)
+ np.random.seed(args.init_seed)
+ interfloss_steps = build_interfloss_steps(args)
+ head, base, cfg, adam_cls, sparse_cls = load_model(
+ args.model,
+ Path(args.ckpt_root),
+ args.ckpt_name,
+ device,
+ batch_size_override=args.task_batch_size if args.train_puzzle_emb else None,
+ )
+ args.sparse_cls = sparse_cls
+ data_path = Path(cfg["data_path"])
+
+ print(f"\n=== Initial eval (loaded {args.ckpt_name}) ===")
+ acc0, tok0 = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print(f" initial: exact_acc={acc0:.4f} token_acc={tok0:.4f}", flush=True)
+
+ log = {
+ "args": {k: v for k, v in vars(args).items() if k != "sparse_cls"},
+ "initial_acc": acc0,
+ "initial_tok_acc": tok0,
+ "interfloss_steps": sorted(interfloss_steps),
+ "task_steps": [],
+ "floss_episodes": [],
+ "evals": [{"kind": "initial", "train_step": 0, "acc": acc0, "tok_acc": tok0}],
+ }
+ write_log(args.out, log)
+
+ task_optimizers = make_optimizers(
+ args, base, head, adam_cls, sparse_cls,
+ lr=args.train_lr, weight_decay=cfg["weight_decay"], train_puzzle_emb=args.train_puzzle_emb,
+ )
+ train_iter = load_train_batches(data_path, args.task_batch_size, args.train_steps, seed=args.seed)
+ episode_idx = 0
+ t0 = time.time()
+
+ for train_step, batch in enumerate(train_iter):
+ if train_step in interfloss_steps:
+ run_floss_episode(args, head, base, adam_cls, data_path, device, log, episode_idx, train_step)
+ episode_idx += 1
+
+ sup_loss = run_task_step(args, head, base, batch, task_optimizers, device)
+ rec = {"train_step": train_step + 1, "sup_loss": float(sup_loss.item())}
+ log["task_steps"].append(rec)
+ if train_step % 50 == 0 or train_step == args.train_steps - 1:
+ print(
+ f" T[{train_step + 1:>5}/{args.train_steps}] dt={time.time() - t0:.1f}s "
+ f"sup={rec['sup_loss']:.4f}",
+ flush=True,
+ )
+
+ if (train_step + 1) % args.eval_every == 0:
+ acc, tok_acc = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print(f" >> TASK EVAL @ step {train_step + 1}: exact_acc={acc:.4f} delta={acc - acc0:+.4f}", flush=True)
+ log["evals"].append({"kind": "task", "train_step": train_step + 1, "acc": acc, "tok_acc": tok_acc})
+ write_log(args.out, log)
+
+ if args.train_steps in interfloss_steps:
+ run_floss_episode(args, head, base, adam_cls, data_path, device, log, episode_idx, args.train_steps)
+
+ acc_f, tok_f = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print("\n=== Final eval ===")
+ print(f" initial={acc0:.4f} final={acc_f:.4f} delta={acc_f - acc0:+.4f}", flush=True)
+ log["final_acc"] = acc_f
+ log["final_tok_acc"] = tok_f
+ log["evals"].append({"kind": "final", "train_step": args.train_steps, "acc": acc_f, "tok_acc": tok_f})
+ write_log(args.out, log)
+ print(f"log -> {args.out}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/maze_pred_dump.py b/maze_pred_dump.py
new file mode 100644
index 0000000..ebbc735
--- /dev/null
+++ b/maze_pred_dump.py
@@ -0,0 +1,85 @@
+"""Dump per-cell predictions for a TRM-Maze checkpoint (plain forward, no JVP) so we can
+analyze WHERE failure errors are (connected detour = coherent stable wrong path vs scattered).
+Saves preds, labels, inputs, exact_correct, idx for n test puzzles.
+"""
+from __future__ import annotations
+import sys, argparse
+from pathlib import Path
+import numpy as np
+import torch
+
+sys.path.insert(0, "/home/yurenh2/rrm/research/flossing")
+from diagnose_trm_joint_maze import load_model, load_test_samples # att+maze-capable loader
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_130200")
+ ap.add_argument("--data", required=True)
+ ap.add_argument("--n", type=int, default=512)
+ ap.add_argument("--batch-size", type=int, default=32)
+ ap.add_argument("--seed", type=int, default=0)
+ ap.add_argument("--out", required=True)
+ args = ap.parse_args()
+ device = "cuda"
+ model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ inner = model.inner
+ test = load_test_samples(Path(args.data), args.n, 0, 1, args.seed)
+ n = len(test["inputs"])
+ pe = inner.puzzle_emb_len
+
+ preds_all, labels_all, inputs_all, exact_all, idx_all = [], [], [], [], []
+ ans_drift_full_all, ans_drift_ans_all, ldrift_all = [], [], []
+ for s in range(0, n, args.batch_size):
+ e = min(s + args.batch_size, n)
+ batch = {k: test[k][s:e].to(device) for k in ["inputs", "labels", "puzzle_identifiers"]}
+ B = batch["inputs"].shape[0]
+ seq_full = inner.config.seq_len + pe
+ hidden = inner.config.hidden_size
+ with torch.no_grad():
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ inp_emb = inner._input_embeddings(batch["inputs"], batch["puzzle_identifiers"])
+ labels = batch["labels"]
+ ans_mask = (labels != batch["inputs"]) # SOLUTION-SPACE cells: where task requires change
+ prev = None; prev_zH = None
+ adrift_full, adrift_ans, ldrift = [], [], [] # answer Hamming drift + LATENT z_H drift
+ for _ in range(inner.config.halt_max_steps):
+ for _h in range(inner.config.H_cycles):
+ for _l in range(inner.config.L_cycles):
+ z_L = inner.L_level(z_L, z_H + inp_emb, **seq_info)
+ z_H = inner.L_level(z_H, z_L, **seq_info)
+ p = inner.lm_head(z_H)[:, pe:].float().argmax(-1) # decode answer THIS step
+ if prev is None:
+ adrift_full.append(torch.zeros(B, device=device)); adrift_ans.append(torch.zeros(B, device=device))
+ ldrift.append(torch.zeros(B, device=device))
+ else:
+ adrift_full.append((p != prev).float().sum(-1))
+ adrift_ans.append(((p != prev) & ans_mask).float().sum(-1))
+ ldrift.append((z_H - prev_zH).float().flatten(1).norm(dim=1)) # latent z_H drift
+ prev = p; prev_zH = z_H.detach()
+ preds = prev
+ mask = labels > 0
+ exact = ((preds == labels) | ~mask).all(-1)
+ preds_all.append(preds.cpu().numpy()); labels_all.append(labels.cpu().numpy())
+ inputs_all.append(batch["inputs"].cpu().numpy()); exact_all.append(exact.cpu().numpy())
+ idx_all.append(test["idx"][s:e])
+ ans_drift_full_all.append(torch.stack(adrift_full, 1).cpu().numpy())
+ ans_drift_ans_all.append(torch.stack(adrift_ans, 1).cpu().numpy())
+ ldrift_all.append(torch.stack(ldrift, 1).cpu().numpy())
+ print(f" [{e}/{n}] exact={exact.float().mean():.3f}", flush=True)
+
+ np.savez_compressed(args.out,
+ preds=np.concatenate(preds_all), labels=np.concatenate(labels_all),
+ inputs=np.concatenate(inputs_all), exact_correct=np.concatenate(exact_all).astype(np.float32),
+ idx=np.concatenate(idx_all),
+ ans_drift_full=np.concatenate(ans_drift_full_all), # (N, steps) decoded-answer Hamming drift
+ ans_drift_ans=np.concatenate(ans_drift_ans_all),
+ drift_zH=np.concatenate(ldrift_all)) # (N, steps) over solution-space cells only
+ print("saved", args.out)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/meeting_artifacts/trm_multi4_dynamics_report.md b/meeting_artifacts/trm_multi4_dynamics_report.md
new file mode 100644
index 0000000..184656c
--- /dev/null
+++ b/meeting_artifacts/trm_multi4_dynamics_report.md
@@ -0,0 +1,57 @@
+# TRM multi4 Dynamics Report
+
+Date: 2026-06-02
+
+## Main Result
+
+Official GBS768 Sudoku-Extreme runs show a real best-checkpoint gain from trajectory perturbation training.
+
+| model | step | full exact | full token acc | dynamics sample exact | lambda1 mean | top-8 mean | positive count / 8 |
+|---|---:|---:|---:|---:|---:|---:|---:|
+| TRM baseline best | 58590 | 0.8686 | 0.9508 | 0.8750 | +0.0282 | +0.0135 | 7.84 |
+| TRM multi4 best | 35805 | 0.8965 | 0.9604 | 0.9004 | +0.0204 | +0.0066 | 3.84 |
+| TRM multi4 final | 65100 | 0.8351 | 0.9350 | 0.8242 | +0.0323 | +0.0185 | 8.00 |
+
+Interpretation: multi4 best improves full-test exact accuracy by +2.78pp over baseline best, while suppressing the Lyapunov spectrum volume on successful trajectories. The final checkpoint collapses below baseline and its spectrum becomes broadly positive again.
+
+## Success vs Failure Separation
+
+The N=512 spectrum sample keeps the original observation: failures have much larger positive exponents.
+
+| model | success lambda1 | failure lambda1 | success top-8 mean | failure top-8 mean | success positive count | failure positive count |
+|---|---:|---:|---:|---:|---:|---:|
+| TRM baseline best | +0.0176 | +0.1026 | +0.0080 | +0.0516 | 7.82 | 8.00 |
+| TRM multi4 best | +0.0112 | +0.1035 | +0.0020 | +0.0485 | 3.38 | 8.00 |
+| TRM multi4 final | +0.0191 | +0.0942 | +0.0129 | +0.0449 | 8.00 | 8.00 |
+
+Interpretation: multi4 does not make failures stable; it mainly shifts successful solutions into less chaotic basins. This is the right story for stable attractor training: success becomes less volume-expanding, while bad basins remain chaotic.
+
+## PTRM Same-Subset Result
+
+Existing PTRM paired files already use identical `idx` with `n=1000`, `K=100`, `D=64`, `sigma=0.3`, L-only perturbation.
+
+| metric | baseline best | multi4 best | delta |
+|---|---:|---:|---:|
+| deterministic exact on subset | 0.8870 | 0.9110 | +0.0240 |
+| mean rollout exact | 0.9419 | 0.9542 | +0.0123 |
+| Q-selected exact | 0.9840 | 0.9880 | +0.0040 |
+| oracle pass@100 | 0.9850 | 0.9880 | +0.0030 |
+| correct rollout count / 100 | 94.19 | 95.42 | +1.23 |
+
+Interpretation: PTRM is near saturated on Sudoku, so Q-selected/oracle improvements are small. The stronger signal is that multi4 increases deterministic accuracy and the density of correct stochastic rollouts on the same test subset.
+
+## Caveats
+
+- Full TRM eval uses all 422,786 Sudoku-Extreme test puzzles. PTRM runs above use a fixed 1,000-sample subset, so deterministic PTRM-subset numbers should not be mixed with full-test W&B exact accuracy.
+- The N=512 Lyapunov spectrum is a diagnostic sample, not full-test evaluation.
+- Final checkpoint is not the result to report as the method's performance; it is an overtraining/collapse diagnostic.
+- A larger paired PTRM run is currently queued: `n=10000`, `K=25`, `D=64`, `sigma=0.3`, L-only, same seed. It is slow and should be treated as a robustness check, not a blocker.
+
+## Artifacts
+
+- Headline CSV: `research/flossing/official_gbs768_spectrum/headline_trm_multi4_dynamics_table.csv`
+- Spectrum summary CSV: `research/flossing/official_gbs768_spectrum/summary_n512_k8_seed20260602.csv`
+- PTRM paired CSV: `research/flossing/ptrm_same_subset/paired_ptrm_k100_n1000_seed0_summary.csv`
+- Spectrum plot: `research/flossing/meeting_artifacts/trm_gbs768_spectrum_success_failure_n512.png`
+- Headline bar plot: `research/flossing/meeting_artifacts/trm_gbs768_headline_dynamics_bars.png`
+- PTRM plot: `research/flossing/meeting_artifacts/ptrm_same_subset_k100_n1000_comparison.png`
diff --git a/meeting_artifacts_v2/fig5_qhead_vs_lambda1_ptrm_summary.csv b/meeting_artifacts_v2/fig5_qhead_vs_lambda1_ptrm_summary.csv
new file mode 100644
index 0000000..bcbf401
--- /dev/null
+++ b/meeting_artifacts_v2/fig5_qhead_vs_lambda1_ptrm_summary.csv
@@ -0,0 +1,3 @@
+name,path,n_samples,rollouts,mean_rollout_exact,q_max_exact,lambda_min_exact,oracle_pass_exact,q_lambda_same_argmax_frac,global_pearson_q_vs_stability,global_spearman_q_vs_stability,within_problem_pearson_mean,within_problem_spearman_mean,q_success_mean,q_fail_mean,lambda_success_mean,lambda_fail_mean,mixed_problem_count,zero_success_problem_count,full_success_problem_count,mixed_global_pearson_q_vs_stability,mixed_q_max_exact,mixed_lambda_min_exact,mixed_oracle_exact
+TRM baseline + PTRM rollouts,research/flossing/q_lambda_scatter/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.npz,512.0,25.0,0.93890625,0.97265625,0.97265625,0.97265625,0.04296875,0.7524171235289728,-0.025655130770112743,0.12497240516050691,0.09656841990607949,7.796669578964886,-10.659986413043478,5.632336168325687,6.921443493469901,58.0,14.0,440.0,0.7857503437605575,1.0,1.0,1.0
+TRM multi4 + PTRM rollouts,research/flossing/q_lambda_scatter/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.npz,512.0,25.0,0.944609375,0.974609375,0.974609375,0.974609375,0.0859375,0.7289674235313497,-0.13633997124282113,0.18866459504767938,0.11859038341758327,7.7404597169382185,-10.081937588152327,5.7459801223593345,7.271883726456269,43.0,13.0,456.0,0.7911647653323942,1.0,1.0,1.0
diff --git a/meeting_artifacts_v2/hrm_trm_redesigned_summary.csv b/meeting_artifacts_v2/hrm_trm_redesigned_summary.csv
new file mode 100644
index 0000000..c5a4ad0
--- /dev/null
+++ b/meeting_artifacts_v2/hrm_trm_redesigned_summary.csv
@@ -0,0 +1,7 @@
+model,label,step,full_exact,sample_exact,lambda1_all,mean8_all,pos_count_all,lambda1_success,lambda1_fail,mean8_success,mean8_fail,pos_count_success,pos_count_fail
+HRM,baseline best,26040,0.5265287756919861,0.5,-0.0569394779099639,-0.10732079181130239,0.98046875,-0.14642834789538028,0.03254939207545249,-0.18957092847483636,-0.025070655147768406,0.01953125,1.94140625
+HRM,multi4 best,23436,0.6443189,0.654296875,-0.04733450568929953,-0.112159715874796,1.166015625,-0.10103540500250659,0.054302789621007604,-0.16998952501653278,-0.0027078172167066595,0.05970149253731343,3.2598870056497176
+HRM,multi4 final,26040,0.46235448,0.4296875,0.02874533511322852,-0.040662085491063316,1.626953125,0.03573289099003887,0.02348073821974127,-0.053027883530268646,-0.031345388338237384,1.8181818181818181,1.4828767123287672
+TRM,baseline best,58590,0.8686309456825256,0.875,0.02823458132615997,0.013457294571722192,7.841796875,0.01761167685357837,0.10259491263423115,0.008003413600119422,0.05163446137294159,7.819196428571429,8.0
+TRM,multi4 best,35805,0.8964653611183167,0.900390625,0.020381716455975862,0.0065844104191477015,3.841796875,0.01118387160416574,0.10352301992037717,0.0019524994650864183,0.04845325257252518,3.3817787418655096,8.0
+TRM,multi4 final,65100,0.8350536823272705,0.82421875,0.03232463403946895,0.018508151198432188,8.0,0.01912124014472792,0.09423388096814354,0.012883653437293365,0.04488079625621645,8.0,8.0
diff --git a/meeting_artifacts_v2/meeting_figures_v2_report.md b/meeting_artifacts_v2/meeting_figures_v2_report.md
new file mode 100644
index 0000000..5158c99
--- /dev/null
+++ b/meeting_artifacts_v2/meeting_figures_v2_report.md
@@ -0,0 +1,26 @@
+# Meeting Figures v2
+
+## Figure Strategy
+
+0. `fig0_motivation_lambda1_success_failure_hrm_trm.png`: first-exponent success/failure distribution in HRM and TRM. This motivates chaos as a detector before introducing the method.
+1. `fig1_hrm_trm_training_curves.png`: performance over training for HRM and TRM. This answers whether the method improves accuracy and where best/final are.
+2. `fig2_accuracy_vs_chaotic_volume_phase.png`: phase view, with accuracy versus mean top-8 Lyapunov exponent. This answers whether better checkpoints are dynamically more stable.
+3. `fig3_hrm_trm_success_failure_spectra.png`: full success/failure spectrum separation for HRM and TRM best checkpoints. This extends Fig0 beyond λ1.
+4. `fig4_ptrm_same_subset_comparison.png`: PTRM same-subset result. This is a secondary inference-time story.
+5. `fig5_qhead_vs_lambda1_ptrm.png`: PTRM Q-head halt logit versus finite-difference stability proxy `-lambda_1`. The bottom row isolates mixed problems where trajectory selection actually matters.
+
+## Key Numbers
+
+- HRM baseline best: 0.5265 exact. HRM multi4 best: 0.6443 exact. HRM multi4 final: 0.4624 exact.
+- TRM baseline best: 0.8686 exact. TRM multi4 best: 0.8965 exact. TRM multi4 final: 0.8351 exact.
+- HRM multi4 best dynamics sample: mean top-8 exponent -0.1122; final -0.0407.
+- TRM multi4 best dynamics sample: mean top-8 exponent +0.0066; final +0.0185.
+- PTRM same subset, K=100: Q-selected 0.984 -> 0.988; mean rollout 0.942 -> 0.954.
+- PTRM Q-vs-stability, K=25/N=512: mixed-problem Pearson is 0.786 for baseline and 0.791 for multi4. In both runs, Q-max selection and lambda-min selection reach the same oracle exact accuracy on this subset.
+
+## Caveats
+
+- Dynamics spectra use N=512 diagnostic samples, not the full test set.
+- PTRM numbers use a fixed N=1000 subset; do not mix its deterministic subset accuracy with full-test W&B exact accuracy.
+- Final checkpoints are collapse diagnostics, not the method's reported performance.
+- Q-head is not a pure lambda ranker: global Spearman is weak because most problems are all-success/all-failure across K rollouts. The strongest evidence is the mixed-problem class separation and selector equivalence.
diff --git a/merge_and_analyze.py b/merge_and_analyze.py
new file mode 100644
index 0000000..3aeb28f
--- /dev/null
+++ b/merge_and_analyze.py
@@ -0,0 +1,157 @@
+"""Merge shard npz files and produce top-k Lyapunov analysis + plots."""
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+from scipy import stats
+import argparse, glob, os
+
+ap = argparse.ArgumentParser()
+ap.add_argument("--in-glob", default="/home/yurenh2/rrm/research/flossing/diag_8k_shard*.npz")
+ap.add_argument("--out-dir", default="/home/yurenh2/rrm/research/flossing/plots_topk")
+ap.add_argument("--merged-npz", default="/home/yurenh2/rrm/research/flossing/diag_8k.npz")
+args = ap.parse_args()
+
+os.makedirs(args.out_dir, exist_ok=True)
+
+files = sorted(glob.glob(args.in_glob))
+print(f"Merging {len(files)} files:")
+for f in files: print(f" {f}")
+
+merged = {}
+for f in files:
+ d = np.load(f)
+ for k in d.files:
+ merged.setdefault(k, []).append(d[k])
+for k in list(merged.keys()):
+ merged[k] = np.concatenate(merged[k], axis=0)
+np.savez_compressed(args.merged_npz, **merged)
+print(f"Merged → {args.merged_npz}")
+print(f"N={len(merged['exact_correct'])}")
+
+lyap_spec = merged["lyap_spec"] # (N, K)
+exact = merged["exact_correct"].astype(bool)
+token_acc = merged["token_acc"]
+drift_zH = merged["drift_zH"]
+drift_zL = merged["drift_zL"]
+q_halt = merged["q_halt"]; q_continue = merged["q_continue"]
+N, K = lyap_spec.shape
+print(f"K (top-k) = {K}")
+
+succ = lyap_spec[exact]; fail = lyap_spec[~exact]
+print(f"acc = {exact.mean():.4f}")
+
+# --- per-lambda stats ---
+print(f"\n--per-λ stats (Δ = mean_fail - mean_succ; positive ⇒ failures are LESS contractive) --")
+print(f"{'i':>3} {'mean_all':>10} {'mean_succ':>10} {'mean_fail':>10} {'Δ':>8} {'cohen_d':>8} {'auroc':>7} {'p_t':>10}")
+for i in range(K):
+ li = lyap_spec[:, i]; ls = li[exact]; lf = li[~exact]
+ pooled = np.sqrt(((ls.size-1)*ls.var() + (lf.size-1)*lf.var()) / (ls.size+lf.size-2))
+ d_c = (ls.mean() - lf.mean()) / pooled
+ delta = lf.mean() - ls.mean()
+ auc = stats.mannwhitneyu(lf, ls, alternative="greater").statistic / (lf.size * ls.size)
+ t, p = stats.ttest_ind(ls, lf, equal_var=False)
+ print(f"{i+1:>3} {li.mean():+10.4f} {ls.mean():+10.4f} {lf.mean():+10.4f} {delta:+8.4f} {abs(d_c):>8.3f} {auc:>7.3f} {p:>10.2e}")
+
+# --- combined predictor: which λ best separates? sum? mean? λ_1 alone? ---
+print(f"\n--combined predictors--")
+def auc(score, target):
+ # AUC: probability the score is higher for fail than succ
+ return stats.mannwhitneyu(score[~exact], score[exact], alternative="greater").statistic / ((~exact).sum() * exact.sum())
+for label, score in [
+ ("λ_1 alone", lyap_spec[:,0]),
+ ("λ_K alone", lyap_spec[:,-1]),
+ ("mean λ", lyap_spec.mean(axis=1)),
+ ("λ_1 + λ_K", lyap_spec[:,0] + lyap_spec[:,-1]),
+ ("sum λ", lyap_spec.sum(axis=1)),
+ ("max(λ) (= λ_1)", lyap_spec.max(axis=1)),
+]:
+ print(f" {label:18s}: AUROC = {auc(score, ~exact):.4f}")
+
+# --- plot 1: Lyapunov spectrum mean ± std ---
+fig, ax = plt.subplots(1, 1, figsize=(7,5))
+mean_s = succ.mean(0); std_s = succ.std(0)
+mean_f = fail.mean(0); std_f = fail.std(0)
+x = np.arange(1, K+1)
+ax.fill_between(x, mean_s-std_s, mean_s+std_s, color="C0", alpha=0.25)
+ax.plot(x, mean_s, "C0-o", label=f"success (n={succ.shape[0]})", lw=2)
+ax.fill_between(x, mean_f-std_f, mean_f+std_f, color="C3", alpha=0.25)
+ax.plot(x, mean_f, "C3-o", label=f"failure (n={fail.shape[0]})", lw=2)
+ax.axhline(0, color="k", ls=":", lw=0.6)
+ax.set_xlabel(r"Lyapunov index $i$")
+ax.set_ylabel(r"$\lambda_i$ (per inner cycle)")
+ax.set_title(f"HRM Sudoku-Extreme-1k @ step_26040: top-{K} Lyapunov spectrum\n"
+ f"N={N}, acc={exact.mean():.3f}")
+ax.legend()
+ax.grid(alpha=0.3)
+fig.tight_layout()
+fig.savefig(f"{args.out_dir}/lyap_spectrum.png", dpi=130)
+plt.close()
+
+# --- plot 2: λ_1 hist (re-do at 8k) ---
+fig, ax = plt.subplots(1, 1, figsize=(7,4))
+lo = min(lyap_spec[:,0].min(), -1.5); hi = max(lyap_spec[:,0].max(), -0.1)
+bins = np.linspace(lo, hi, 60)
+ax.hist(succ[:,0], bins=bins, alpha=0.55, label=f"success (n={succ.shape[0]})", color="C0", density=True)
+ax.hist(fail[:,0], bins=bins, alpha=0.55, label=f"failure (n={fail.shape[0]})", color="C3", density=True)
+ax.axvline(succ[:,0].mean(), color="C0", ls="--", lw=1)
+ax.axvline(fail[:,0].mean(), color="C3", ls="--", lw=1)
+ax.set_xlabel(r"$\lambda_1$ (top Lyapunov exponent)")
+ax.set_ylabel("density")
+auc1 = auc(lyap_spec[:,0], ~exact)
+ax.set_title(f"$\\lambda_1$ distribution by outcome (N={N}, AUROC={auc1:.3f})")
+ax.legend()
+fig.tight_layout()
+fig.savefig(f"{args.out_dir}/lyap1_hist.png", dpi=130)
+plt.close()
+
+# --- plot 3: 2D histogram of (λ_1, λ_8) ---
+fig, ax = plt.subplots(1, 1, figsize=(6.5,5.5))
+ax.scatter(lyap_spec[exact,0], lyap_spec[exact,-1], s=3, alpha=0.3, color="C0", label="success")
+ax.scatter(lyap_spec[~exact,0], lyap_spec[~exact,-1], s=3, alpha=0.3, color="C3", label="failure")
+ax.set_xlabel(r"$\lambda_1$")
+ax.set_ylabel(r"$\lambda_{%d}$" % K)
+ax.set_title(f"Top vs bottom of the top-{K} spectrum")
+ax.legend()
+ax.plot([-1.5,0], [-1.5,0], "k:", lw=0.6, label="diag")
+fig.tight_layout()
+fig.savefig(f"{args.out_dir}/lyap1_vs_lyapK.png", dpi=130)
+plt.close()
+
+# --- plot 4: spectrum slope and gap analysis ---
+slope = (lyap_spec[:, 0] - lyap_spec[:, -1]) # λ_1 - λ_K = "spread"
+spread_succ = slope[exact]; spread_fail = slope[~exact]
+fig, ax = plt.subplots(1, 1, figsize=(6,4))
+ax.hist(spread_succ, bins=40, alpha=0.55, label=f"success", color="C0", density=True)
+ax.hist(spread_fail, bins=40, alpha=0.55, label=f"failure", color="C3", density=True)
+ax.set_xlabel(r"$\lambda_1 - \lambda_{%d}$ (spectrum spread)" % K)
+ax.set_ylabel("density")
+spread_auc = auc(slope, ~exact)
+ax.set_title(f"Top-bottom Lyapunov spread (AUROC={spread_auc:.3f})\n"
+ f"succ={spread_succ.mean():.4f}±{spread_succ.std():.4f}, "
+ f"fail={spread_fail.mean():.4f}±{spread_fail.std():.4f}")
+ax.legend()
+fig.tight_layout()
+fig.savefig(f"{args.out_dir}/lyap_spread.png", dpi=130)
+plt.close()
+
+# --- plot 5: drift per ACT step (revisited) ---
+fig, axes = plt.subplots(1, 2, figsize=(12,4))
+for ax, drift, name in [(axes[0], drift_zH, "$\\|\\Delta z_H\\|$"),
+ (axes[1], drift_zL, "$\\|\\Delta z_L\\|$")]:
+ mean_s = drift[exact].mean(0); std_s = drift[exact].std(0)
+ mean_f = drift[~exact].mean(0); std_f = drift[~exact].std(0)
+ x = np.arange(drift.shape[1])
+ ax.fill_between(x, mean_s-std_s, mean_s+std_s, color="C0", alpha=0.2)
+ ax.plot(x, mean_s, "C0-o", label="success", lw=2)
+ ax.fill_between(x, mean_f-std_f, mean_f+std_f, color="C3", alpha=0.2)
+ ax.plot(x, mean_f, "C3-o", label="failure", lw=2)
+ ax.set_xlabel("ACT step")
+ ax.set_title(name)
+ ax.legend()
+ ax.set_yscale("log")
+fig.tight_layout()
+fig.savefig(f"{args.out_dir}/drift_per_act.png", dpi=130)
+plt.close()
+
+print(f"\nplots saved to {args.out_dir}/")
diff --git a/monitor_dynamics_experiments.sh b/monitor_dynamics_experiments.sh
new file mode 100755
index 0000000..b7e74b9
--- /dev/null
+++ b/monitor_dynamics_experiments.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+set -eo pipefail
+
+FLOSS_DIR="/home/yurenh2/rrm/research/flossing"
+CONDA_SH="/home/yurenh2/miniconda3/etc/profile.d/conda.sh"
+LOG="${FLOSS_DIR}/monitor_dynamics_experiments.log"
+REPORT="${FLOSS_DIR}/dynamics_experiment_report.md"
+
+PATTERN='step3_M_volume_cf_26040_lstar_neg015|step7_C_hrm_engelken_interfloss_kl10|step7_D_trm_engelken_interfloss_kl10|step7_I_hrm_engelken_interfloss_kl100_short|launch_dynamics_variants_queue|step7_E_hrm_late|step7_F_trm_late|step7_G_hrm_volume|step7_H_trm_volume|step8_A_hrm_basin|step8_B_trm_basin|step9_[A-Z]_|launch_trajectory_perturb_queue|launch_trajectory_sampling_long'
+
+cd "${FLOSS_DIR}"
+source "${CONDA_SH}"
+conda activate rrm
+
+echo "[$(date --iso-8601=seconds)] monitor started" >> "${LOG}"
+echo "[$(date --iso-8601=seconds)] report target: ${REPORT}" >> "${LOG}"
+
+last_snapshot=""
+while true; do
+ active="$(pgrep -af "${PATTERN}" || true)"
+ if [[ -z "${active}" ]]; then
+ echo "[$(date --iso-8601=seconds)] no active monitored processes; generating final report" >> "${LOG}"
+ python analyze_dynamics_experiments.py > dynamics_experiment_report.stdout 2>&1
+ echo "[$(date --iso-8601=seconds)] final report generated" >> "${LOG}"
+ exit 0
+ fi
+
+ snapshot="$(echo "${active}" | sed 's/ */ /g' | cut -c1-260)"
+ if [[ "${snapshot}" != "${last_snapshot}" ]]; then
+ echo "[$(date --iso-8601=seconds)] active processes:" >> "${LOG}"
+ echo "${snapshot}" >> "${LOG}"
+ last_snapshot="${snapshot}"
+ fi
+
+ # Keep a rolling partial report useful while long queues are still running.
+ python analyze_dynamics_experiments.py > dynamics_experiment_report.stdout 2>&1 || true
+ sleep 300
+done
diff --git a/multi4_eval_compare/hrm_baseline_eval.csv b/multi4_eval_compare/hrm_baseline_eval.csv
new file mode 100644
index 0000000..afc2b27
--- /dev/null
+++ b/multi4_eval_compare/hrm_baseline_eval.csv
@@ -0,0 +1,11 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps
+hrm_baseline,2604,0.016369983553886414,0.6336106061935425,0.8522627949714661,0.9927079081535339,0.020887982100248337,16
+hrm_baseline,5208,0.06169315055012703,0.6744286417961121,0.7430469989776611,0.9772745370864868,0.07281211763620377,16
+hrm_baseline,7812,0.1358133852481842,0.6985693573951721,0.676047682762146,0.996868371963501,0.0211492907255888,16
+hrm_baseline,10416,0.20248068869113922,0.7259970307350159,0.6480565667152405,0.9983821511268616,0.017285015434026718,16
+hrm_baseline,13020,0.3024248778820038,0.7580602169036865,0.5614010691642761,0.9971805810928345,0.02416178770363331,16
+hrm_baseline,15624,0.36705803871154785,0.7842973470687866,0.49664124846458435,0.9964591860771179,0.02256467007100582,16
+hrm_baseline,18228,0.46287721395492554,0.8077820539474487,0.4641122817993164,0.9953309893608093,0.02509351819753647,16
+hrm_baseline,20832,0.4912721812725067,0.8198283314704895,0.4137057960033417,0.9955745935440063,0.02796635963022709,16
+hrm_baseline,23436,0.5193856954574585,0.8260135054588318,0.41173747181892395,0.9975590705871582,0.024772455915808678,16
+hrm_baseline,26040,0.5265287756919861,0.8270824551582336,0.4161679148674011,0.9960216283798218,0.03298119083046913,16
diff --git a/multi4_eval_compare/hrm_honly_step26040_eval.csv b/multi4_eval_compare/hrm_honly_step26040_eval.csv
new file mode 100644
index 0000000..d3e3ab9
--- /dev/null
+++ b/multi4_eval_compare/hrm_honly_step26040_eval.csv
@@ -0,0 +1,2 @@
+step,accuracy,exact_accuracy,lm_loss,q_halt_accuracy,q_halt_loss,steps
+26040,0.85720146,0.62264127,0.35935268,0.99625105,0.024241636,16.0
diff --git a/multi4_eval_compare/hrm_matched_compare.csv b/multi4_eval_compare/hrm_matched_compare.csv
new file mode 100644
index 0000000..71b8e16
--- /dev/null
+++ b/multi4_eval_compare/hrm_matched_compare.csv
@@ -0,0 +1,6 @@
+family,step,base_exact,multi4_exact,delta_exact,base_acc,multi4_acc,delta_acc,base_loss,multi4_loss,delta_loss,base_steps,multi4_steps
+hrm,2604,0.016369983553886414,0.0123774204403162,-0.003992563113570213,0.6336106061935425,0.6303551197052002,-0.003255486488342285,0.8522627949714661,0.8603715300559998,0.008108735084533691,16,16
+hrm,5208,0.06169315055012703,0.025012653321027756,-0.036680497229099274,0.6744286417961121,0.6685170531272888,-0.005911588668823242,0.7430469989776611,0.7412638068199158,-0.0017831921577453613,16,16
+hrm,7812,0.1358133852481842,0.04651052877306938,-0.08930285647511482,0.6985693573951721,0.687346339225769,-0.011223018169403076,0.676047682762146,0.7044885158538818,0.02844083309173584,16,16
+hrm,10416,0.20248068869113922,0.2006617933511734,-0.0018188953399658203,0.7259970307350159,0.7243355512619019,-0.0016614794731140137,0.6480565667152405,0.6340938210487366,-0.013962745666503906,16,16
+hrm,13020,0.3024248778820038,0.34697696566581726,0.04455208778381348,0.7580602169036865,0.7794705033302307,0.02141028642654419,0.5614010691642761,0.49915269017219543,-0.06224837899208069,16,16
diff --git a/multi4_eval_compare/hrm_multi4_complete_eval.csv b/multi4_eval_compare/hrm_multi4_complete_eval.csv
new file mode 100644
index 0000000..9aac158
--- /dev/null
+++ b/multi4_eval_compare/hrm_multi4_complete_eval.csv
@@ -0,0 +1,11 @@
+run,step,exact,accuracy,loss
+hrm_multi4,2604,0.0123774204403162,0.6303551197052002,0.8603715300559998
+hrm_multi4,5208,0.025012653321027756,0.6685170531272888,0.7412638068199158
+hrm_multi4,7812,0.04651052877306938,0.687346339225769,0.7044885158538818
+hrm_multi4,10416,0.2006617933511734,0.7243355512619019,0.6340938210487366
+hrm_multi4,13020,0.34697696566581726,0.7794705033302307,0.49915269017219543
+hrm_multi4,15624,0.4653252363204956,0.8156101703643799,0.42743897438049316
+hrm_multi4,18228,0.5790873169898987,0.8494690656661987,0.3459467887878418
+hrm_multi4,20832,0.6393187,0.8660642,0.3186217
+hrm_multi4,23436,0.6443189,0.8684137,0.30427682
+hrm_multi4,26040,0.46235448,0.80298746,0.603943
diff --git a/multi4_eval_compare/hrm_multi4_eval.csv b/multi4_eval_compare/hrm_multi4_eval.csv
new file mode 100644
index 0000000..0f7dbc9
--- /dev/null
+++ b/multi4_eval_compare/hrm_multi4_eval.csv
@@ -0,0 +1,6 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps
+hrm_multi4,2604,0.0123774204403162,0.6303551197052002,0.8603715300559998,0.9879773855209351,0.020903315395116806,16
+hrm_multi4,5208,0.025012653321027756,0.6685170531272888,0.7412638068199158,0.9968069195747375,0.019309692084789276,16
+hrm_multi4,7812,0.04651052877306938,0.687346339225769,0.7044885158538818,0.9902976751327515,0.03646523132920265,16
+hrm_multi4,10416,0.2006617933511734,0.7243355512619019,0.6340938210487366,0.9991532564163208,0.011545917019248009,16
+hrm_multi4,13020,0.34697696566581726,0.7794705033302307,0.49915269017219543,0.9987062215805054,0.01581510715186596,16
diff --git a/multi4_eval_compare/hrm_multi4_horizon_sweep_768.csv b/multi4_eval_compare/hrm_multi4_horizon_sweep_768.csv
new file mode 100644
index 0000000..3bdace3
--- /dev/null
+++ b/multi4_eval_compare/hrm_multi4_horizon_sweep_768.csv
@@ -0,0 +1,41 @@
+step,split,horizon,count,exact_accuracy,accuracy,lm_loss,q_halt_loss,steps
+23436,train,2,768.0,0.2955729166666667,0.8364840348561605,0.39225157833573504,0.04459714392820994,2.0
+23436,train,3,768.0,0.5755208333333334,0.8917663097381592,0.2618181909650845,0.01976812755068143,3.0
+23436,train,4,768.0,0.6796875,0.9122781753540039,0.21020127075343642,0.006546831379334132,4.0
+23436,train,5,768.0,0.734375,0.9230967362721761,0.182652537414814,0.00717167928814888,5.0
+23436,train,6,768.0,0.7669270833333334,0.9298482735951742,0.16631383252369117,0.00374875341852506,6.0
+23436,train,8,768.0,0.8033854166666666,0.9389146169026693,0.14464881393209095,0.0037066793690125146,8.0
+23436,train,10,768.0,0.8268229166666666,0.9462127685546875,0.12909535948177878,0.00991838239133358,10.0
+23436,train,12,768.0,0.84375,0.9500546455383301,0.11931335874268405,0.004430865868926048,12.0
+23436,train,14,768.0,0.85546875,0.9517585436503092,0.11357831630187847,0.0033199011037747064,14.0
+23436,train,16,768.0,0.859375,0.9536232948303223,0.11014115689326769,0.003578242535392443,16.0
+23436,test,2,768.0,0.12760416666666666,0.7265946865081787,0.6354224683840087,0.025561923782030743,2.0
+23436,test,3,768.0,0.2526041666666667,0.7604809602101644,0.5506175992783068,0.012921225279569626,3.0
+23436,test,4,768.0,0.3138020833333333,0.7742091019948324,0.515490498860075,0.003827621228992939,4.0
+23436,test,5,768.0,0.3619791666666667,0.7867315610249838,0.48693293612013444,0.00506168728073438,5.0
+23436,test,6,768.0,0.3984375,0.7953317960103353,0.46390732880926117,0.0035611667359868684,6.0
+23436,test,8,768.0,0.4583333333333333,0.8105709552764893,0.4314775246277862,0.005083844686547915,8.0
+23436,test,10,768.0,0.48828125,0.8183674812316895,0.4161860321298086,0.003043775757153829,10.0
+23436,test,12,768.0,0.51171875,0.8260512351989746,0.4016971584101859,0.004879387095570564,12.0
+23436,test,14,768.0,0.5325520833333334,0.831050713857015,0.3878147863194716,0.00292336226751407,14.0
+23436,test,16,768.0,0.5559895833333334,0.8361946741739908,0.37473119346002354,0.0035912382105986276,16.0
+26040,train,2,768.0,0.7825520833333334,0.963814894358317,0.09390118849629236,0.04166571795940399,2.0
+26040,train,3,768.0,0.9127604166666666,0.9810153643290201,0.051538720202730794,0.008062846958637238,3.0
+26040,train,4,768.0,0.9309895833333334,0.9841659863789877,0.04344159450663771,0.012521501630544662,4.0
+26040,train,5,768.0,0.9401041666666666,0.9858539899190267,0.04013312268752094,0.015388640264670054,5.0
+26040,train,6,768.0,0.9401041666666666,0.9870595932006836,0.03853385294570503,0.023091336091359455,6.0
+26040,train,8,768.0,0.94921875,0.9884098370869955,0.03620980748266996,0.01947430024544398,8.0
+26040,train,10,768.0,0.9557291666666666,0.9893261591593424,0.03444665149376691,0.03555429975191752,10.0
+26040,train,12,768.0,0.953125,0.9895029067993164,0.034414704500878884,0.04184401035308838,12.0
+26040,train,14,768.0,0.9557291666666666,0.9893904527028402,0.03586123670240371,0.037144094705581665,14.0
+26040,train,16,768.0,0.9518229166666666,0.9894386132558187,0.03654265702438753,0.041033936043580375,16.0
+26040,test,2,768.0,0.11588541666666667,0.7054398854573568,0.8908620335632751,0.040651207168896995,2.0
+26040,test,3,768.0,0.1875,0.7230902512868246,0.7620792943957264,0.03934991856416067,3.0
+26040,test,4,768.0,0.23567708333333334,0.731706460316976,0.7335753038165317,0.11375004053115845,4.0
+26040,test,5,768.0,0.25,0.7362075646718343,0.7385935210540664,0.160938690106074,5.0
+26040,test,6,768.0,0.2669270833333333,0.7397923469543457,0.743103274276764,0.17507813374201456,6.0
+26040,test,8,768.0,0.296875,0.7452899614969889,0.7468322750276379,0.19044278065363565,8.0
+26040,test,10,768.0,0.3138020833333333,0.7472190856933594,0.7515058945457195,0.21485831340154013,10.0
+26040,test,12,768.0,0.3229166666666667,0.7502892812093099,0.7620030015396181,0.22753063837687174,12.0
+26040,test,14,768.0,0.3307291666666667,0.7518165111541748,0.7553974518406182,0.2158371408780416,14.0
+26040,test,16,768.0,0.3346354166666667,0.7514950434366862,0.7639393310889216,0.23628832896550497,16.0
diff --git a/multi4_eval_compare/trm_baseline_eval.csv b/multi4_eval_compare/trm_baseline_eval.csv
new file mode 100644
index 0000000..0b6bc11
--- /dev/null
+++ b/multi4_eval_compare/trm_baseline_eval.csv
@@ -0,0 +1,11 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps
+trm_baseline,26041,0.5575894117355347,0.8469749093055725,0.35285070538520813,0.9997445344924927,0.003797376062721014,16
+trm_baseline,52082,0.6295099854469299,0.8704391121864319,0.2973524034023285,0.9998486042022705,0.0011324305087327957,16
+trm_baseline,78123,0.6993892788887024,0.8920264840126038,0.2477506548166275,0.9998935461044312,0.0006260558147914708,16
+trm_baseline,104164,0.72928386926651,0.9020143151283264,0.22608688473701477,0.9998770356178284,0.001141014276072383,16
+trm_baseline,130205,0.7653990387916565,0.914251446723938,0.19878524541854858,0.999917209148407,0.0010435190051794052,16
+trm_baseline,156246,0.7596656680107117,0.9119072556495667,0.2041437327861786,0.999862790107727,0.0011596218682825565,16
+trm_baseline,182287,0.7541900873184204,0.9094774723052979,0.2100999504327774,0.9998249411582947,0.0013093978632241488,16
+trm_baseline,208328,0.7732800841331482,0.9166732430458069,0.19410833716392517,0.9998320937156677,0.0033004307188093662,16
+trm_baseline,234369,0.7750374674797058,0.9172152280807495,0.19279460608959198,0.9998533725738525,0.0032994903158396482,16
+trm_baseline,260410,0.7742025256156921,0.9169425964355469,0.19348150491714478,0.9998462796211243,0.002894919365644455,16
diff --git a/multi4_eval_compare/trm_gbs768_multi4_step65100_det_n10000_seed20260602.summary.csv b/multi4_eval_compare/trm_gbs768_multi4_step65100_det_n10000_seed20260602.summary.csv
new file mode 100644
index 0000000..38ca8f5
--- /dev/null
+++ b/multi4_eval_compare/trm_gbs768_multi4_step65100_det_n10000_seed20260602.summary.csv
@@ -0,0 +1,2 @@
+correct_count/det_fail_mean,correct_count/det_success_mean,correct_count/full_frac,correct_count/ge_1_frac,correct_count/mean,correct_count/median,correct_count/q10,correct_count/q25,correct_count/q75,correct_count/q90,correct_count/std,correct_count/zero_frac,deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/det_fail_frac,oracle_pass/det_success_frac,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/det_fail_frac,q_max/det_success_frac,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+0.0,1.0,0.8312000036239624,0.8312000036239624,0.8312000036239624,1.0,0.0,1.0,1.0,1.0,0.3745751678943634,0.1687999963760376,0.8312000036239624,0.9333752989768982,0.0,0.0,0.0,0.8312000036239624,0.9333752989768982,10000.0,0.0,0.0,1.0,0.8312000036239624,0.9333752989768982,1.0,0.0,0.0,0.0,1.0,0.8312000036239624,0.9333752989768982,3.762489080429077,0.8312000036239624,0.9333752989768982,1.0,16.0
diff --git a/multi4_eval_compare/trm_matched_compare.csv b/multi4_eval_compare/trm_matched_compare.csv
new file mode 100644
index 0000000..32006a5
--- /dev/null
+++ b/multi4_eval_compare/trm_matched_compare.csv
@@ -0,0 +1,2 @@
+family,step,base_exact,multi4_exact,delta_exact,base_acc,multi4_acc,delta_acc,base_loss,multi4_loss,delta_loss,base_steps,multi4_steps
+trm,26041,0.5575894117355347,0.7394047975540161,0.18181538581848145,0.8469749093055725,0.9067130088806152,0.059738099575042725,0.35285070538520813,0.21417337656021118,-0.13867732882499695,16,16
diff --git a/multi4_eval_compare/trm_multi4_eval.csv b/multi4_eval_compare/trm_multi4_eval.csv
new file mode 100644
index 0000000..f3d16b5
--- /dev/null
+++ b/multi4_eval_compare/trm_multi4_eval.csv
@@ -0,0 +1,2 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps
+trm_multi4,26041,0.7394047975540161,0.9067130088806152,0.21417337656021118,0.9997232556343079,0.0024572687689214945,16
diff --git a/multi4_eval_compare/trm_multi4_eval_full.csv b/multi4_eval_compare/trm_multi4_eval_full.csv
new file mode 100644
index 0000000..3411ff5
--- /dev/null
+++ b/multi4_eval_compare/trm_multi4_eval_full.csv
@@ -0,0 +1,11 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps
+trm_multi4_loguniform_repro,26041,0.7394047975540161,0.9067130088806152,0.21417337656021118,0.9997232556343079,0.0024572687689214945,16
+trm_multi4_loguniform_repro,52082,0.8449901342391968,0.9424432516098022,0.1342233270406723,0.9998746514320374,0.0010290677892044187,16
+trm_multi4_loguniform_repro,78123,0.8417639136314392,0.9411031007766724,0.13769488036632538,0.9997493028640747,0.0022251552436500788,16
+trm_multi4_loguniform_repro,104164,0.8547161221504211,0.9456510543823242,0.12779666483402252,0.999834418296814,0.0019055778393521905,16
+trm_multi4_loguniform_repro,130205,0.8536233305931091,0.9453508853912354,0.1282763034105301,0.9998888373374939,0.0018422487191855907,16
+trm_multi4_loguniform_repro,156246,0.8489472270011902,0.9433866739273071,0.13273237645626068,0.999782383441925,0.003051365725696087,16
+trm_multi4_loguniform_repro,182287,0.8558868765830994,0.9459810256958008,0.12728126347064972,0.9997469186782837,0.0024399051908403635,16
+trm_multi4_loguniform_repro,208328,0.8404204249382019,0.9403222799301147,0.13971956074237823,0.9996499419212341,0.0029723909683525562,16
+trm_multi4_loguniform_repro,234369,0.845432460308075,0.9419097304344177,0.13668429851531982,0.9997681975364685,0.0016449446557089686,16
+trm_multi4_loguniform_repro,260410,0.826143741607666,0.9344042539596558,0.15468436479568481,0.999701976776123,0.001810370129533112,16
diff --git a/multi4_eval_compare/trm_official_gbs768_eval.csv b/multi4_eval_compare/trm_official_gbs768_eval.csv
new file mode 100644
index 0000000..66dfe1b
--- /dev/null
+++ b/multi4_eval_compare/trm_official_gbs768_eval.csv
@@ -0,0 +1,11 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps
+trm_official_gbs768,6510,0.20006339251995087,0.7261562347412109,0.6155200004577637,0.9993779063224792,0.0063577014952898026,16
+trm_official_gbs768,13020,0.6248409152030945,0.8679871559143066,0.30110087990760803,0.9997587203979492,0.0018232133006677032,16
+trm_official_gbs768,19530,0.7098508477210999,0.8961462378501892,0.23719573020935059,0.9997185468673706,0.002359110629186034,16
+trm_official_gbs768,26040,0.7558197379112244,0.9113300442695618,0.20295456051826477,0.9997066855430603,0.0026622479781508446,16
+trm_official_gbs768,32550,0.7946407794952393,0.9247151017189026,0.17315243184566498,0.9997208714485168,0.0024344150442630053,16
+trm_official_gbs768,39060,0.8247435092926025,0.9351920485496521,0.14995059370994568,0.9996594190597534,0.002962446305900812,16
+trm_official_gbs768,45570,0.8479396104812622,0.9433117508888245,0.1323014795780182,0.9996806979179382,0.0026382978539913893,16
+trm_official_gbs768,52080,0.8633161783218384,0.9488447904586792,0.12009253352880478,0.9997137784957886,0.002587266732007265,16
+trm_official_gbs768,58590,0.8686309456825256,0.9508475661277771,0.11555595695972443,0.9998438954353333,0.0014915302162989974,16
+trm_official_gbs768,65100,0.86624675989151,0.9500409364700317,0.11748332530260086,0.9996523261070251,0.002605273388326168,16
diff --git a/multi4_eval_compare/trm_official_gbs768_multi4_eval.csv b/multi4_eval_compare/trm_official_gbs768_multi4_eval.csv
new file mode 100644
index 0000000..ec353ce
--- /dev/null
+++ b/multi4_eval_compare/trm_official_gbs768_multi4_eval.csv
@@ -0,0 +1,21 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps,_runtime,_timestamp
+trm_official_gbs768_multi4,3255,0.0175999198108911,0.6504403352737427,0.7941210269927979,0.9824000597000122,0.108424387872219,16.0,12550.718766357,1780192940.452497
+trm_official_gbs768_multi4,6510,0.2277795374393463,0.7379271984100342,0.5934479236602783,0.999262034893036,0.0035974220372736,16.0,25064.702453699,1780205454.4563622
+trm_official_gbs768_multi4,9765,0.6357069611549377,0.8709613084793091,0.2931223213672638,0.9997137784957886,0.0014165197499096,16.0,37579.311725746,1780217969.132149
+trm_official_gbs768_multi4,13020,0.75853031873703,0.910834014415741,0.2033131867647171,0.9997611045837402,0.0021604059729725,16.0,50092.402719112,1780230482.2738986
+trm_official_gbs768_multi4,16275,0.8263778686523438,0.9346956610679626,0.1507271528244018,0.9997800588607788,0.0041020140051841,16.0,62595.579823935,1780242985.398887
+trm_official_gbs768_multi4,19530,0.8654520511627197,0.9488762617111206,0.1198361814022064,0.9997090697288512,0.0052023055031895,16.0,75096.299217356,1780255486.102359
+trm_official_gbs768_multi4,22785,0.8817486763000488,0.9549695253372192,0.1065462753176689,0.9997114539146424,0.0037784865126013,16.0,87607.26667599,1780267997.105077
+trm_official_gbs768_multi4,26040,0.8879267573356628,0.957356870174408,0.1014027148485183,0.999642848968506,0.0053329654037952,16.0,100118.838503384,1780280508.634669
+trm_official_gbs768_multi4,29295,0.8933455944061279,0.959309697151184,0.0971761047840118,0.9996144771575928,0.0041324645280838,16.0,112620.42551711,1780293010.168183
+trm_official_gbs768_multi4,32550,0.8962169885635376,0.960436463356018,0.0948082581162452,0.9996026158332824,0.0144010595977306,16.0,125134.755165262,1780305524.5418072
+trm_official_gbs768_multi4,35805,0.8964653611183167,0.9604493975639344,0.0945562794804573,0.9995790123939514,0.0068304436281323,16.0,137714.797781532,1780318104.6386163
+trm_official_gbs768_multi4,39060,0.8957250118255615,0.9601802825927734,0.0952448472380638,0.999541163444519,0.0185395441949367,16.0,150233.754675447,1780330623.5781178
+trm_official_gbs768_multi4,42315,0.888913094997406,0.9574248790740968,0.1008101180195808,0.9995600581169128,0.0061048995703458,16.0,162739.436977464,1780343129.2559526
+trm_official_gbs768_multi4,45570,0.8858169317245483,0.9561300873756408,0.1036654934287071,0.9994938373565674,0.0054858992807567,16.0,175251.082236918,1780355640.8929477
+trm_official_gbs768_multi4,48825,0.882732629776001,0.9547808170318604,0.1068678125739097,0.9994110465049744,0.0072551541961729,16.0,187778.40637965,1780368168.2221627
+trm_official_gbs768_multi4,52080,0.8782954216003418,0.9530280232429504,0.1104752272367477,0.99934720993042,0.0089566921815276,16.0,200285.373208387,1780380675.1846614
+trm_official_gbs768_multi4,55335,0.8694067597389221,0.94942307472229,0.1194151788949966,0.9987700581550598,0.0272370912134647,16.0,212785.271448664,1780393175.0770009
+trm_official_gbs768_multi4,58590,0.8620365858078003,0.9463443756103516,0.1268824934959411,0.9983466863632202,0.0163928251713514,16.0,225280.516080387,1780405670.3403552
+trm_official_gbs768_multi4,61845,0.850763738155365,0.941650390625,0.1378339380025863,0.9978050589561462,0.0215476993471384,16.0,237778.331426833,1780418168.1822684
+trm_official_gbs768_multi4,65100,0.8350536823272705,0.9350366592407228,0.1547555029392242,0.9962841868400574,0.0310162808746099,16.0,250280.395200839,1780430673.009492
diff --git a/multi4_eval_compare/trm_official_gbs768_multi4_step16275_eval.csv b/multi4_eval_compare/trm_official_gbs768_multi4_step16275_eval.csv
new file mode 100644
index 0000000..a34ae57
--- /dev/null
+++ b/multi4_eval_compare/trm_official_gbs768_multi4_step16275_eval.csv
@@ -0,0 +1,2 @@
+step,accuracy,exact_accuracy,lm_loss,q_halt_accuracy,q_halt_loss,steps
+16275,0.934574,0.82621706,0.15090619,0.99974453,0.00405054,16.0
diff --git a/multi4_eval_compare/wallclock_eval.csv b/multi4_eval_compare/wallclock_eval.csv
new file mode 100644
index 0000000..268a924
--- /dev/null
+++ b/multi4_eval_compare/wallclock_eval.csv
@@ -0,0 +1,30 @@
+run,step,runtime_sec,runtime_hms,timestamp_local,ckpt_exists,ckpt_mtime_local,exact,accuracy,loss
+hrm_baseline,2604,1440.547846523,00:24:00.55,2026-05-22 06:32:03,True,2026-05-22 06:32:04,0.016369983553886414,0.6336106061935425,0.8522627949714661
+hrm_baseline,5208,2915.282126881,00:48:35.28,2026-05-22 06:56:38,True,2026-05-22 06:56:39,0.06169315055012703,0.6744286417961121,0.7430469989776611
+hrm_baseline,7812,3726.920135005,01:02:06.92,2026-05-22 07:10:09,True,2026-05-22 07:10:10,0.1358133852481842,0.6985693573951721,0.676047682762146
+hrm_baseline,10416,4458.114451179,01:14:18.11,2026-05-22 07:22:21,True,2026-05-22 07:22:22,0.20248068869113922,0.7259970307350159,0.6480565667152405
+hrm_baseline,13020,5801.015399971,01:36:41.02,2026-05-22 07:44:43,True,2026-05-22 07:44:45,0.3024248778820038,0.7580602169036865,0.5614010691642761
+hrm_baseline,15624,7219.496414876,02:00:19.50,2026-05-22 08:08:22,True,2026-05-22 08:08:23,0.36705803871154785,0.7842973470687866,0.49664124846458435
+hrm_baseline,18228,8616.18880162,02:23:36.19,2026-05-22 08:31:39,True,2026-05-22 08:31:40,0.46287721395492554,0.8077820539474487,0.4641122817993164
+hrm_baseline,20832,9904.193793478,02:45:04.19,2026-05-22 08:53:07,True,2026-05-22 08:53:08,0.4912721812725067,0.8198283314704895,0.4137057960033417
+hrm_baseline,23436,10634.360398376,02:57:14.36,2026-05-22 09:05:17,True,2026-05-22 09:05:18,0.5193856954574585,0.8260135054588318,0.41173747181892395
+hrm_baseline,26040,11366.743085113,03:09:26.74,2026-05-22 09:17:29,True,2026-05-22 09:17:30,0.5265287756919861,0.8270824551582336,0.4161679148674011
+hrm_multi4,2604,5810.841404196,01:36:50.84,2026-05-27 22:44:06,True,2026-05-27 22:44:07,0.0123774204403162,0.6303551197052002,0.8603715300559998
+hrm_multi4,5208,11587.805059004,03:13:07.81,2026-05-28 00:20:23,True,2026-05-28 00:20:24,0.025012653321027756,0.6685170531272888,0.7412638068199158
+hrm_multi4,7812,17365.375767937,04:49:25.38,2026-05-28 01:56:41,True,2026-05-28 01:56:42,0.04651052877306938,0.687346339225769,0.7044885158538818
+hrm_multi4,10416,23142.813901422,06:25:42.81,2026-05-28 03:32:58,True,2026-05-28 03:32:59,0.2006617933511734,0.7243355512619019,0.6340938210487366
+hrm_multi4,13020,29347.517355686,08:09:07.52,2026-05-28 05:16:23,True,2026-05-28 05:16:24,0.34697696566581726,0.7794705033302307,0.49915269017219543
+hrm_multi4,15624,35123.513458465,09:45:23.51,2026-05-28 06:52:39,True,2026-05-28 06:52:40,0.4653252363204956,0.8156101703643799,0.42743897438049316
+hrm_multi4,18228,40898.005173388,11:21:38.01,2026-05-28 08:28:54,True,2026-05-28 08:28:55,0.5790873169898987,0.8494690656661987,0.3459467887878418
+trm_baseline,26041,8593.415472305,02:23:13.42,2026-05-23 01:55:09,True,2026-05-23 01:55:10,0.5575894117355347,0.8469749093055725,0.35285070538520813
+trm_baseline,52082,17155.876633182,04:45:55.88,2026-05-23 04:17:52,True,2026-05-23 04:17:52,0.6295099854469299,0.8704391121864319,0.2973524034023285
+trm_baseline,78123,25719.395089913,07:08:39.40,2026-05-23 06:40:35,True,2026-05-23 06:40:36,0.6993892788887024,0.8920264840126038,0.2477506548166275
+trm_baseline,104164,34282.065662564,09:31:22.07,2026-05-23 09:03:18,True,2026-05-23 09:03:18,0.72928386926651,0.9020143151283264,0.22608688473701477
+trm_baseline,130205,42852.348430037,11:54:12.35,2026-05-23 11:26:08,True,2026-05-23 11:26:09,0.7653990387916565,0.914251446723938,0.19878524541854858
+trm_baseline,156246,51422.119869545,14:17:02.12,2026-05-23 13:48:58,True,2026-05-23 13:48:58,0.7596656680107117,0.9119072556495667,0.2041437327861786
+trm_baseline,182287,59826.120123505,16:37:06.12,2026-05-23 16:09:02,True,2026-05-23 16:09:02,0.7541900873184204,0.9094774723052979,0.2100999504327774
+trm_baseline,208328,68138.229200214,18:55:38.23,2026-05-23 18:27:34,True,2026-05-23 18:27:34,0.7732800841331482,0.9166732430458069,0.19410833716392517
+trm_baseline,234369,76460.482892161,21:14:20.48,2026-05-23 20:46:17,True,2026-05-23 20:46:17,0.7750374674797058,0.9172152280807495,0.19279460608959198
+trm_baseline,260410,84783.527271934,23:33:03.53,2026-05-23 23:05:00,True,2026-05-23 23:05:00,0.7742025256156921,0.9169425964355469,0.19348150491714478
+trm_multi4,26041,22216.1844709,06:10:16.18,2026-05-28 03:18:26,True,2026-05-28 03:18:26,0.7394047975540161,0.9067130088806152,0.21417337656021118
+trm_multi4,52082,44853.735386924,12:27:33.74,2026-05-28 09:35:44,True,2026-05-28 09:35:44,0.8449901342391968,0.9424432516098022,0.1342233270406723
diff --git a/official_gbs768_spectrum/headline_trm_multi4_dynamics_table.csv b/official_gbs768_spectrum/headline_trm_multi4_dynamics_table.csv
new file mode 100644
index 0000000..742ac68
--- /dev/null
+++ b/official_gbs768_spectrum/headline_trm_multi4_dynamics_table.csv
@@ -0,0 +1,4 @@
+model,step,full_exact,full_token_acc,lm_loss,dyn_sample_exact,lambda1_all,mean8_all,tail4_all,pos_count_all
+TRM baseline best,58590,0.8686309456825256,0.9508475661277772,0.1155559569597244,0.875,0.02823458132615997,0.013457294571722192,0.0075273313675370546,7.841796875
+TRM multi4 best,35805,0.8964653611183167,0.9604493975639344,0.0945562794804573,0.900390625,0.020381716455975862,0.0065844104191477015,0.001402141885882835,3.841796875
+TRM multi4 final,65100,0.8350536823272705,0.9350366592407228,0.1547555029392242,0.82421875,0.03232463403946895,0.018508151198432188,0.013372940185377047,8.0
diff --git a/official_gbs768_spectrum/summary_n512_k8_seed20260602.csv b/official_gbs768_spectrum/summary_n512_k8_seed20260602.csv
new file mode 100644
index 0000000..c255252
--- /dev/null
+++ b/official_gbs768_spectrum/summary_n512_k8_seed20260602.csv
@@ -0,0 +1,4 @@
+run,step,n,sample_exact,sample_token_acc,lambda1_all,lambda1_success,lambda1_fail,lambda1_fail_minus_success,mean8_all,mean8_success,mean8_fail,mean8_fail_minus_success,tail4_all,tail4_success,tail4_fail,pos_count_all,pos_count_success,pos_count_fail,pos_mass_all,pos_mass_success,pos_mass_fail,lambda2_all,lambda2_success,lambda2_fail,lambda3_all,lambda3_success,lambda3_fail,lambda4_all,lambda4_success,lambda4_fail,lambda5_all,lambda5_success,lambda5_fail,lambda6_all,lambda6_success,lambda6_fail,lambda7_all,lambda7_success,lambda7_fail,lambda8_all,lambda8_success,lambda8_fail
+baseline_best,58590,512,0.875,0.9539689430384897,0.02823458132615997,0.01761167685357837,0.10259491263423115,0.08498323578065278,0.013457294571722192,0.008003413600119422,0.05163446137294159,0.04363104777282217,0.0075273313675370546,0.004122858266632009,0.03135864307387237,7.841796875,7.819196428571429,8.0,0.10779670139772268,0.0641854171711784,0.4130756909835327,0.0205107267238418,0.012619587999194794,0.07574869779637083,0.016147883449207256,0.0097831444752176,0.060701056267134845,0.012655839604420294,0.007521466406436568,0.04859645199030638,0.010066905798097991,0.00580994511748096,0.03986563056241721,0.00827578879948021,0.00458740731747704,0.0340944591735024,0.006620997387619565,0.003523945934349396,0.028300357560510747,0.0051456334849504515,0.00257013469722064,0.02317412499905913
+multi4_best,35805,512,0.900390625,0.9625048226444051,0.020381716455975862,0.01118387160416574,0.10352301992037717,0.09233914831621143,0.0065844104191477015,0.0019524994650864183,0.04845325257252518,0.046500753107438765,0.001402141885882835,-0.0014188478887233206,0.02690167690732279,3.841796875,3.3817787418655096,8.0,0.06377898653446445,0.027952091227886174,0.38762602058020146,0.012663036363773195,0.0057181008775463405,0.07543980615103946,0.008348809059507634,0.003078277385450445,0.055990281642652025,0.005673153930393582,0.0013151374084221035,0.04506620523684165,0.0034857525132654388,-9.921478389921372e-05,0.035891045140577296,0.002008319042374751,-0.001002505589948276,0.02922381228749074,0.0006402704918926361,-0.0019078789586364823,0.023673542976087213,-0.0005257745040014861,-0.0026657922224093103,0.018818307225135902
+multi4_final,65100,512,0.82421875,0.9289882326847874,0.03232463403946895,0.01912124014472792,0.09423388096814354,0.07511264082341562,0.018508151198432188,0.012883653437293365,0.04488079625621645,0.03199714281892309,0.013372940185377047,0.010672304166179654,0.026035922408724824,8.0,8.0,8.0,0.1480652095874575,0.10306922749834692,0.3590463700497316,0.02459628393262392,0.015458292881759563,0.0674433086377879,0.020185894951282535,0.013495850063401376,0.05155477209223641,0.017466635922573914,0.01230462774373944,0.041670718716664445,0.015463502108104876,0.011429727322452865,0.034377423880828754,0.013748909759215167,0.010808219788353272,0.027537478289256494,0.012621539073734311,0.010437874606770786,0.022860499129941068,0.011657809800453833,0.010013394947141692,0.019368288334872988
diff --git a/official_gbs768_spectrum/summary_n64_k8_seed20260602.csv b/official_gbs768_spectrum/summary_n64_k8_seed20260602.csv
new file mode 100644
index 0000000..5a12638
--- /dev/null
+++ b/official_gbs768_spectrum/summary_n64_k8_seed20260602.csv
@@ -0,0 +1,5 @@
+name,n,acc,lambda1_all,lambda1_success,lambda1_fail,mean8_all,mean8_success,mean8_fail,tail4_all,pos_count_all,pos_count_success,pos_count_fail,pos_mass_all
+baseline_best,64,0.875,0.027470633,0.015822656,0.109006464,0.013350397,0.0077010575,0.052895777,0.0074741757,7.796875,7.767857142857143,8.0,0.10691354
+multi4_bestish,64,0.953125,0.016227467,0.012409109,0.09386742,0.004405942,0.002358008,0.046047267,-0.00011889404,3.375,3.1475409836065573,8.0,0.048455432
+multi4_late,64,0.84375,0.031355858,0.01815009,0.10266701,0.018412568,0.01310199,0.047089677,0.013622271,8.0,8.0,8.0,0.14730054
+multi4_final,64,0.859375,0.028622076,0.01753769,0.09636,0.01713102,0.01240586,0.046007,0.012961711,8.0,8.0,8.0,0.13704816
diff --git a/ordinary_multi4_eval_n1000_seed20260611.csv b/ordinary_multi4_eval_n1000_seed20260611.csv
new file mode 100644
index 0000000..b587ea1
--- /dev/null
+++ b/ordinary_multi4_eval_n1000_seed20260611.csv
@@ -0,0 +1,6 @@
+step,n,seed,exact_acc,token_acc,elapsed_sec
+6510,1000,20260611,0.231,0.7405925925925926,56.891759634017944
+13020,1000,20260611,0.763,0.912679012345679,55.79828500747681
+19530,1000,20260611,0.869,0.9508641975308642,55.84409546852112
+26040,1000,20260611,0.89,0.9582716049382716,55.78691911697388
+32550,1000,20260611,0.915,0.9673827160493828,55.736825942993164
diff --git a/paper/claims.md b/paper/claims.md
new file mode 100644
index 0000000..590b388
--- /dev/null
+++ b/paper/claims.md
@@ -0,0 +1,37 @@
+# Claim table (write structure FROM this, not from session chronology)
+
+Rule: every claim states its evidence, its strongest counter-reading, and where that
+counter-reading is killed or conceded. A claim that can't fill all four columns gets cut
+or demoted to an observation in the discussion.
+
+| # | Claim | Evidence | Strongest counter-reading | Disposition |
+|---|---|---|---|---|
+| C1 | Per-example full-trajectory FTLE separates success/failure near-perfectly in trained HRM and TRM on Sudoku-Extreme | AUC(−λ₁→correct): HRM 0.984 (n=8192) / 0.987 (n=2048, 2nd estimator); TRM official 87.6% ckpt 0.993 (n=2048). Medians: HRM joint −0.152 vs +0.032; TRM +0.012 vs +0.103 | Estimator artifact | Replicates across two estimator implementations and two architectures; report both scales, never cross-compare |
+| C2 | Failures overwhelmingly never settle; settled-wrong is rare (HRM) to absent (TRM) | TRM: 0/254 failures settled, threshold-free (min wrong-drift > late-drift of 96.5% of correct); HRM: 21/3894 (0.55%) strict-band; replicated 5/971 on 2nd estimator | Threshold choice ("settled" is arbitrary) | State threshold-free version (distribution separation); show full percentile sweep; define settled band by its narrow characteristic velocity |
+| C3 | The rare settled-wrong failures are selector-blind: success-like contraction AND success-like halting confidence | n=21: λ₁ med −0.842 (A: −0.867), q_halt(final) +7.47 (= A), all would halt early (halted_at 4–9), token_acc med 0.62 | Small n | Concede explicitly; the point is existence + boundedness (~0.5%), not precision |
+| C4 | The FTLE-outcome signal is not reducible to non-settling, and not a difficulty artifact | Drift-decile-matched AUC within unsettled stratum: 0.879 (n=8192) / 0.900 (2nd estimator); #givens-bin AUC 0.982 vs 0.984 overall | (a) residual within-decile drift variation; (b) #givens is a weak difficulty proxy | (a) deciles are narrow (table shows ranges); (b) concede openly, name solver-backtracks as the proper proxy, future work |
+| C5 | The chaotic signature is outcome-concurrent, not antecedent: nothing in the first 4 ACT steps forecasts eventual success among not-yet-solved examples — and on HRM the dynamical signals point the other way | Restricted to not-correct@4: TRM AUC λ₁ 0.543 / drift 0.492 / q_halt 0.521 (n=626); HRM λ₁ 0.448 / drift 0.312 (sign-reversed; +drift→success AUC 0.688) / q_halt 0.734 (n=1342) | Window length (only 4 steps tested) | Concede; horizon sweep = explicit future work; 4 chosen to match training window |
+| C6a | Correction: TRM failures do not "plateau at stable high-loss attractors" in state space | C2 + λ₁(D)=+0.103 + residual velocity ≈56/step ≈0.77× early; their own Fig 5 oscillation consistent | We strawman "attractor" (bounded attracting set can be chaotic) | Quote their exact wording ("stable", "local minima", "stabilize rather than explore"); credit their loss/boundedness data and intervention; correct only the settledness reading |
+| C6b | Refinement: Ren & Liu's four modes confirmed and quantified; wrong-fixed-point mode is real but marginal at trajectory end | Their mode (4) = our strict B (0.5%); mode (3) = our D (~99.5% of failures) | We measure end-of-window only; mid-trajectory lingering invisible | Concede explicitly; their non-trivial-success lingering claim untouched |
+| C7a | Training widens the success/failure gap from the failure side: failures become more expansive while the success regime barely moves | λ₁(D): +0.036→+0.102 over the TRM series, λ₁(A) within ±0.03 of 0 throughout | Estimator-scale / single-run | Holds; HRM series shows the mass-migration version. Robust claim. |
+| C7b | **DEMOTED by matched-objective control (E6).** Under MATCHED objective (step9 fixed-unroll, E-vs-F / G-vs-H), perturbation training's effect on the wandering cell is small and inconsistent at matched step | HRM fD 0.387→0.369 / 0.385→0.379 / 0.381→**0.387**(↑ at best) / 0.410→0.387; TRM 0.344→0.305 / 0.396→0.361 / 0.334→**0.361**(↑ at best) / 0.312→0.270 | The earlier large shrink (D 274→175) was partly a fixed-unroll-vs-ACT-streaming artifact, NOT a pure intervention effect | **Rewrite §3.4**: report the matched effect as small/equivocal; drop the strong "intervention shrinks wandering" reading. The May-28 mismatched comparison is retired. C7a stands; C7b does not support an intervention claim. |
+
+## The spine (one sentence)
+Direct per-example measurement of settling and tangent expansion decomposes recursive-reasoner
+failure: failures are overwhelmingly trajectories that never settle, the chaotic signature is
+concurrent with — not antecedent to — the outcome, and it is not explained by non-convergence
+alone or by problem difficulty.
+
+## What this paper is NOT claiming (write these into Discussion to pre-empt)
+- No mechanism for WHY trajectories fail to find the settled band (explicitly open).
+- No claim that early intervention is impossible in principle — only that λ/drift at 4 steps carry no signal.
+- No claim about tasks beyond Sudoku-Extreme or models beyond HRM/TRM.
+
+## Anti-patterns checklist (apply at style pass)
+- [ ] No chronological narration of the investigation; structure = claims order.
+- [ ] Numbers in topic sentences; every section's first sentence is a finding, not a plan.
+- [ ] No "notably/crucially/interestingly/delve/underscore"; hedges only where the claim table says concede.
+- [ ] Related work = positioning (each paragraph ends with the gap), not annotated bibliography.
+- [ ] Negative result (C5) framed as a finding with its own section, not a limitation apology.
+- [ ] Limitations: specific, short, no re-hedging of already-scoped claims.
+- [ ] Prose paragraphs in the body; tables only for numbers; no bullet lists in Results.
diff --git a/paper/experiment_framework.md b/paper/experiment_framework.md
new file mode 100644
index 0000000..07a69d0
--- /dev/null
+++ b/paper/experiment_framework.md
@@ -0,0 +1,55 @@
+# Experiment framework — locked 2026-06-12
+
+Purpose: every paper claim gets its evidence gap named and the run that closes it specified,
+so all remaining GPU/CPU work is execution against this table. Targets: workshop freeze
+~Jun 25 (COLM deadline Jul 12), ICLR freeze ~Sep 5 (deadline ~Sep 16, unconfirmed).
+
+## Locked measurement protocol (uniformity rules)
+- Estimator: current `diagnose_{hrm,trm}_joint.py` (joint JVP+QR, k=8, t_ons=1); seed 0 primary;
+ n=2048 headline, n=512 series/sweeps; batch 16 (TRM) / 32 (HRM).
+- λ values never compared across estimator implementations; scale ownership stated at first use.
+- Settled = low band of late z_H drift (mean of final 4 segments), Otsu primary + percentile
+ sweep + threshold-free statement; z_L and combined-drift definitions reported as robustness.
+- idx-pairing: same seed + same n across any runs that will be joined per example.
+- All new npz under analysis_2x2/ subdirs with queue logs; nothing hand-run on GPU.
+
+## Claims → gaps → runs
+
+| ID | Closes | Run | Spec | Cost | Priority / target |
+|---|---|---|---|---|---|
+| E1 | C1/C2 formal uncertainty; C7-TRM matched pair | OFFLINE: bootstrap CIs (cells, AUCs, B-fraction rule-of-three bound); z_L/combined settling robustness; 2×2 on existing TRM official multi4 npz (@35805 best, @65100 final) vs baseline @58590 | existing npz | CPU, now | **P0 / workshop** |
+| E2 | C1/C2 run-level replication (HRM has 1 training run) | diagnose step9_E (HRM fixed-unroll baseline 50k) best.pt + final.pt + 2 mid ckpts, n=512–2048 | loader risk: step9 .pt may need key remap; fix on first failure | ~1–2 h shared | **P1 / workshop** |
+| E5 | C5 single-horizon caveat → "when does fate become legible" curve | horizon sweep h∈{2,4,6,8,10,12} segments, both models, idx-paired to full-window retest (seed 0, n=2048) | env-var horizon variant of *_short.py | ~5 h shared | **P1 / workshop headline fig** |
+| E6 | C7 matched-objective intervention probe | (a) verify provenance of diag_hrm_multi4_* run; (b) diagnose step9 pairs E vs F (HRM), G vs H (TRM) at {12500, 25000, best, final}, n=512; compare at matched ACCURACY (not matched step) + best-vs-best | ~2–3 h shared | **P1 / ICLR (workshop if cheap)** |
+| E3 | C3 "would have halted" + C-cell fate beyond window | 32-segment unroll restricted to B∪C idx sets (≤300 examples/model) + true-ACT inference on HRM's 21 B-cell idx | ~1 h shared | P2 / ICLR |
+| E4 | C4 difficulty proxy weakness | CPU sudoku solver with backtrack counting on the 81-token inputs; redo difficulty control binned by backtracks | CPU only | P2 / ICLR |
+| E7 | Sec-5 implication → measured result | restart-and-select on TRM official @58590: K∈{1,2,4,8,16,32} h_init Gaussian restarts; select by q_halt vs late-drift vs λ-proxy vs oracle; n=1024; quantify B-blind-spot empirically | ~5–10 h shared | P2 / ICLR |
+| E8 | "Sudoku-only" limitation (biggest generalization win) | build Maze dataset; train TRM official-recipe Maze; full diagnostic suite on it | **multi-day dedicated GPU** — needs explicit go | P2-gated / ICLR |
+
+### E8 detail — which model/variant (decided 2026-06-13)
+- **PRIMARY = TRM, attention variant.** Official Maze-Hard recipe = `arch=trm`, mlp_t=False (default),
+ L_layers=2 H_cycles=3 L_cycles=4. NOTE: our Sudoku TRM analysis used mlp_t=TRUE (attention OFF).
+ So TRM-Maze varies BOTH the task (Sudoku→Maze) AND the token mixer (mlp_t→attention) at once.
+ Framing for the paper: this is a STRONGER generality test (decomposition surviving task + mixer),
+ but state the dual change honestly; do not call it a pure same-architecture task transfer.
+- Optional matched control (only if cheap + GPU spare): TRM-Maze with arch.mlp_t=true to isolate
+ task-only transfer. Not the validated recipe → may underperform 75%; treat as secondary.
+- HRM-Maze (completes the {HRM,TRM}×{Sudoku,Maze} grid): deferred. Heavier (27M, HRM Maze recipe
+ costlier). Decide AFTER TRM-Maze lands — nice-to-have if TRM-Maze confirms, important if it surprises.
+- Diagnostic caveat stands: attention arch → verify JVP closures in diagnose_trm_joint.py; seq 900 → n=512/256.
+| E9 | transverse/readout framing | logit-space FTLE (diagnose-script modification) | dev + ~2 h | P3 / ICLR appendix |
+| E10 | sampling-seed robustness (if bootstrap deemed insufficient) | rerun headline diags seeds 1,2 | ~5 h shared | P3 |
+
+## Decision rules
+- Workshop version ships on E1+E2+E5 (+E6 if done): decomposition, controls, horizon curve.
+- ICLR adds E3/E4/E6/E7 (+E8 if approved in time: training must START by ~Jul 1).
+- Any FAILED queue job: fix loader/script, rerun; never substitute hand-run results.
+- New results that contradict current text → claims.md updated first, prose second.
+
+## Status board (update as runs land)
+- [x] E1 offline batch — done 2026-06-12, see offline_followups/phase1_e1.md
+- [x] E2 done — step9_E (2nd HRM run, fixed-unroll): B≈0 replicates (best 1/743=0.13%, final 3/832=0.36%). C2 STRENGTHENED (now 2 estimators × 2 archs × 2 training runs × 2 objectives).
+- [x] E5 done — horizon sweep {2,4,6,8,10,12}×2 models. C5 STRENGTHENED to a curve: dynamics never legible early (TRM AUC≈0.48–0.53 all H; HRM reversed/below 0.5 all H); only HRM q_halt predicts (0.81→0.64). See phase1/fig_E5_horizon_sweep.png. NEW: undecided-pool eventual-success drops to 4%(HRM)/19%(TRM) by H=12 yet dynamics still don't discriminate.
+- [x] E6 done — matched-objective. **C7b demoted** (effect small/inconsistent at matched step); C7a (λ₁(D) rises over training) stands. §3.4 needs rewrite.
+- [ ] E3, E4, E7 — phase-2
+- [ ] E8 Maze — GO (user has dedicated GPUs). Build dataset + package launch/diagnostic; train on user cards.
diff --git a/paper/intro.md b/paper/intro.md
new file mode 100644
index 0000000..85f06e7
--- /dev/null
+++ b/paper/intro.md
@@ -0,0 +1,57 @@
+# Recursive Reasoning Models Fail by Wandering, Not by Settling
+
+## 1 Introduction
+
+Recursive reasoning models such as the Hierarchical Reasoning Model (HRM; Wang et al., 2025)
+and the Tiny Recursive Model (TRM; Jolicoeur-Martineau, 2025) solve constraint-satisfaction
+puzzles that defeat far larger language models, by iterating a small network on a latent state
+for hundreds of updates per puzzle. When such a model fails, what is dynamically different
+about the trajectory it produced? Two recent mechanistic studies answer in attractor language.
+Failed TRM runs "plateau at stable high-loss attractors" (Efstathiou & Balwani, 2026); failed
+HRM runs converge to spurious fixed points that rival the correct one (Ren & Liu, 2026). The
+evidence behind both labels is indirect, resting on loss plateaus and two-dimensional
+projections of 512-dimensional trajectories, and the labels disagree about the basic character
+of failure: premature stability in one account, partly aimless drift in the other. Neither
+measures the trajectory's stability directly. We do, per example, and the measurements support
+a third description: recursive reasoning models fail by wandering, not by settling.
+
+Across 2,048 to 8,192 held-out Sudoku-Extreme puzzles, correct trajectories end inside a
+narrow low-velocity band of the latent dynamics, and failures essentially never do. In an
+official-recipe TRM at 87.6% test accuracy, none of 254 failures settles: the least mobile
+failure still moves faster at the end of inference than 96.5% of successes, a separation of
+distributions that no threshold choice can undo, and failed trajectories remain locally
+expansive throughout (median leading finite-time Lyapunov exponent λ₁ = +0.103, against +0.012
+for successes; AUC 0.993). HRM shows the same structure with one addition. Settled-but-wrong
+trajectories exist, but they account for 0.55% of failures, carry success-like contraction
+(λ₁ = −0.84, against −0.87 for settled successes) and success-like halting confidence, and
+every one of them would have halted early under adaptive computation. The wrong-attractor
+failure mode is real, rare, and the only failure a confidence-based selector cannot catch.
+
+Two controls locate what the Lyapunov signature adds, and a third experiment locates when it
+exists. Matched for displacement level within the unsettled population, λ₁ still separates
+eventual successes from failures (decile-matched AUC 0.88–0.90), so the exponent does more
+than restate non-convergence. Binned by the number of givens, the separation is unchanged
+(within-bin AUC 0.982, against 0.984 unconditioned), so it is not an artifact of problem
+difficulty. It is, however, strictly retrospective. Restricted to puzzles still unsolved after
+four of sixteen segments, neither early-window exponents nor early state velocity predicts
+which trajectories will eventually succeed (AUC ≈ 0.5 in TRM), and in HRM the association
+inverts — among the undecided, the trajectories that move more in the early segments are the
+ones that go on to solve the puzzle (positive-direction AUC 0.69). The chaos of failure
+arrives with the failure; nothing dynamical in the early trajectory anticipates it.
+
+These measurements redraw the intervention map for this model class. Because failure is almost
+never a stable wrong answer, restart-and-select inference strategies have a high ceiling and a
+quantifiable blind spot of roughly half a percent. Because the early trajectory carries no
+dynamical death sentence, compute spent on early failure prediction is compute wasted, and
+restart diversity is the better buy. Our contributions: (i) per-example, outcome-conditioned
+measurement of settling and finite-time Lyapunov spectra in HRM and TRM, at sample sizes up to
+8,192 and replicated across two estimator implementations; (ii) a decomposition of failure
+that corrects the settled-attractor reading and bounds the wrong-attractor mode at ~0.5% of
+failures; (iii) controls showing the signature is not reducible to non-convergence or
+difficulty; (iv) evidence that the signature is concurrent with the outcome and carries no
+early-warning content at the granularity tested.
+
+---
+*[em-dash count: 1. Contrast-template count: title + one echo (end of ¶1). Flourish count:
+1 ("death sentence", ¶4) — cuttable. "essentially never" is the one hedge in ¶2, scoped by
+the 0.55% in the next sentence.]*
diff --git a/paper/outline.md b/paper/outline.md
new file mode 100644
index 0000000..0dde354
--- /dev/null
+++ b/paper/outline.md
@@ -0,0 +1,79 @@
+# Outline — "Recursive Reasoning Models Fail by Wandering, Not by Settling" (title FIXED 2026-06-12)
+
+Status: intro.md ✅ (v2, audited) · setup_results.md ✅ (Secs 2–3) · style_contract.md ✅ ·
+remaining: Sec 4 (relation to prior accounts), Sec 5 (implications), Sec 6 (limitations),
+abstract, tables T1–T3 + figures F3/F4 composition.
+
+Target: ~8 pages main. Every section header below lists [claims served] and [assets].
+
+## 1 Introduction [C1, spine]
+- Para 1: recursive reasoners (HRM/TRM) solve hard puzzles by iterating a latent state; when they
+ fail, what is dynamically different? Existing mechanistic accounts infer dynamics from loss
+ curves and 2-D projections; we measure the dynamics directly, per example.
+- Para 2: the answer, with numbers (settling × correctness decomposition; B≈0; AUC 0.99;
+ concurrent-not-antecedent).
+- Para 3: contributions (4 items, one line each): (i) per-example outcome-conditioned FTLE/settling
+ measurement at n≤8192 across two architectures; (ii) failure-mode decomposition correcting two
+ published labels; (iii) independence controls (drift-matched, difficulty-binned); (iv) the
+ early-window null + sign reversal.
+- NO general AI-reasoning throat-clearing. First sentence is about the object of study.
+
+## 2 Setup [assets: estimator details from diagnose_trm_joint.py; OBSERVATIONS.md provenance table]
+- 2.1 Models & task: HRM 27M @26040 (acc .526), TRM-MLP official recipe @58590 (acc .876),
+ Sudoku-Extreme-1k-aug; fixed 16-step unroll, ACT recorded not applied.
+- 2.2 Measurements: joint (z_H,z_L) tangent dynamics, JVP+QR, k=8, per-sub-update normalization;
+ per-ACT-step state displacement (drift); q_halt; exact/token accuracy. Estimator-scale caveat.
+- 2.3 The 2×2 design: settled band defined by bimodal late-drift split (Otsu primary, full
+ percentile sweep + threshold-free statement in appendix); cells A/B/C/D.
+
+## 3 Results
+- 3.1 Decomposition [C1, C2, C3; assets: cells tables, fig_*_scatter, fig_*_lyap_by_cell,
+ strict-B table + fig_hrm_strictB_profiles]
+ Lead: "Across 2048–8192 held-out puzzles, no TRM failure and 0.55% of HRM failures end in the
+ settled band." Then per-cell λ₁; then the 21 selector-blind examples (their three lowest
+ token-acc are all 17-givens puzzles).
+- 3.2 What the signal is not [C4; assets: decile table, givens table]
+ Drift-matched AUC 0.88–0.90; givens-binned AUC unchanged. One paragraph each, tables carry
+ the numbers.
+- 3.3 When the signal exists [C5; assets: early_pairing_{trm,hrm}.md tables]
+ The early-window null; the HRM sign reversal (drift@4 +direction AUC 0.688); q_halt@4 0.734
+ vs TRM 0.521 (factual note: TRM removed the continue head). Frame as the temporal anatomy of
+ the signature.
+- 3.4 Training evolution [C7; assets: evolution_{trm,hrm}.png/csv; multi4 quick-compare]
+ Gap widens via λ₁(D); multi4 shrinks D-cell mass at matched steps (preliminary, objective
+ caveat); multi4 collapse = λ₁(A) sign flip.
+
+## 4 Relation to prior accounts [C6a, C6b; assets: papers/notes/*]
+- Para 1: network-level Lyapunov–performance work (Vogt 2022; AeLLE 2024; Engelken flossing
+ App. D.3 trains-vs-fails at network level, opposite sign) → none condition per example on outcome.
+- Para 2: the 2026 mechanistic trio. Efstathiou & Balwani: credit loss/boundedness/intervention;
+ quote and correct the settledness reading (C6a). Ren & Liu: confirm + quantify their taxonomy
+ (C6b). Es'kin & Smorkalov (CMM): their endpoint-stability losses + engineered early repeller
+ are consistent, at the design level, with where our measurements localize the signal — cite,
+ don't claim confirmation.
+- Para 3: stability-by-construction line (monDEQ, Jacobian-reg DEQ, REN/Sandwich; TRM's own
+ TorchDEQ negative result; Solve-the-Loop) — what "enforce settling" buys and where it failed;
+ our measurements say which kind of settling is the operative one.
+
+## 5 Implications (restrained, half page)
+- Intervention design space bifurcates: widen/deepen the settled tube at training time
+ (perturbation training, equilibrium losses) vs restart-and-select at inference
+ (q_halt tracks correctness at trajectory end; selector-blind ceiling ≈0.5%).
+- Early pruning/reallocation unsupported at 4-step granularity; on HRM the gradient of usable
+ early signal lives in the learned head, not the generic dynamical quantities.
+
+## 6 Limitations & future
+Sudoku-Extreme only; two models; #givens is a weak difficulty proxy (solver backtracks next);
+single early horizon (sweep queued); end-of-window criterion blind to mid-trajectory lingering;
+no mechanism offered for why settling fails — measurement paper.
+
+## Figures plan (all exist or one rerun away)
+F1: drift–λ₁ scatter, both models (have).
+F2: per-cell λ₁ + strict-B profiles inset (have).
+F3: decile-matched AUC + givens-binned AUC (compose from CSVs).
+F4: early-window pairing summary (compose: 3 signals × 2 models, restricted set).
+F5: checkpoint evolution (have).
+
+## Order of writing
+1. Results 3.1–3.3 (numbers already final) → 2. Setup → 3. Sec 4 (notes ready) → 4. Intro →
+5. Implications/Limitations → 6. style pass against claims.md checklist.
diff --git a/paper/rainer_followup_draft.md b/paper/rainer_followup_draft.md
new file mode 100644
index 0000000..12091de
--- /dev/null
+++ b/paper/rainer_followup_draft.md
@@ -0,0 +1,37 @@
+Subject: Re: Question on gradient flossing vs forward trajectory stability in recursive reasoning models
+
+Hi Rainer,
+
+A short follow-up to my email of June 5 — we have since measured the things I was speculating
+about, and two results seem worth sharing because they sharpen the question I asked you.
+
+First, conditioning per-example finite-time Lyapunov spectra on both outcome and terminal
+settling (n = 2048–8192, two architectures) shows that failure is almost exclusively
+non-settling: in an official-recipe TRM at 87.6% accuracy, none of 254 failed trajectories
+ever enters the low-velocity band that all successes occupy, and they remain locally expansive
+to the end (median λ₁ +0.10 vs +0.01). "Converged to the wrong attractor" failures exist in
+HRM but make up only ~0.5% of failures. The chaotic signature also survives two controls: it
+persists after matching trajectories on displacement level (so it is not just re-measuring
+non-convergence), and after binning by puzzle difficulty.
+
+Second — and this is the part that genuinely surprised us — the signature is strictly
+concurrent. Among puzzles still unsolved after a quarter of the inference budget, neither the
+early-window exponents nor early state velocity predict which trajectories will eventually
+succeed (AUC ≈ 0.5); in HRM the association even inverts, with eventually-successful
+trajectories moving more in the early phase. So the failed trajectories are not "born chaotic":
+chaos at the end and failure appear together.
+
+This makes me think the right framing for my earlier question is reachability of the settled
+region (escape from a long chaotic transient) rather than per-example landscape quality, which
+would be consistent with your view of flossing as a learning-time tool rather than an
+inference-time one. If you know of work that conditions finite-time exponents on trajectory
+fate in this way — in transient-chaos settings or elsewhere — I would be grateful for a
+pointer; we have not found a precedent.
+
+Best,
+Yuren
+
+---
+[Notes, not part of the email: numbers from analysis_2x2/OBSERVATIONS.md addenda 1-2. Send only
+if/after Rainer replies to the June 5 email, or as a gentle bump after ~2 weeks (June 19+).
+The "born chaotic" phrasing mirrors his literature's transient-chaos vocabulary deliberately.]
diff --git a/paper/readiness.md b/paper/readiness.md
new file mode 100644
index 0000000..f07f78e
--- /dev/null
+++ b/paper/readiness.md
@@ -0,0 +1,119 @@
+# Path to a citable, build-on-able preprint — status
+
+Framing (locked 2026-06-19, per user correction): the axis is **expansive vs more-expansive**
+(graded; for TRM both classes have λ₁>0), NOT settled-vs-chaotic. The phenomenon is the
+**cleanness of a graded separation**; the **mechanism is explicitly OPEN** and is the natural seed
+for follow-on projects. This preprint = rigorous phenomenology + precise characterization +
+honest open-mechanism. Do NOT force a fixed-point / suppression-of-chaos framing (rejected).
+
+## Tier 0 — measurement bulletproofing (others build on it)
+- [x] **T0.1 estimator validation** — `paper/validation/`: QR/Benettin core recovers known spectra
+ to <1e-3 (diagonal, symmetric, non-normal asymptotic) and Hénon λ₁ to 8e-5. PASS. Confirms the
+ numerical core (orthonormalization cadence, log|diagR| bookkeeping, ordering, averaging).
+- [ ] **T0.2 robustness reruns (GPU)** — λ stability vs t_ons, tangent-basis seed, k>8. Window
+ dependence already covered offline (Char 3). Small queue; spec below.
+- [ ] **T0.3 language/scope pass** — finite-time vs asymptotic, "expansive not chaotic" for HRM
+ (negative λ), metric/coordinate-dependence caveat (Lohmiller–Slotine). Prose task.
+
+## Characterization (replaces the rejected Tier 2; describes WHAT, not WHY)
+- [x] **Char 1 whole-spectrum** — separation is a ~rigid shift of the ENTIRE k=8 spectrum, not a
+ single mode (per-exponent AUC uniformly 0.98–0.99; HRM gap ≈constant −0.16/exponent). Spectral
+ MEAN separates ≥ λ₁ alone (AUC 0.991–0.995). CAVEAT: KS-proxy Σλ⁺ is the wrong aggregate for HRM
+ (all-negative spectra → 0); use spectral mean for HRM.
+- [x] **Char 2 shape** — two overlapping UNIMODAL classes with well-separated means, NOT two
+ discrete clusters (within-class BC 0.26–0.40). Outcome is a moderately sharp threshold on the
+ λ₁ continuum (25→75% transition spans 12–30% of the λ₁ spread).
+- [x] **Char 3 integration-time scaling (the key descriptor)** — separation BUILDS monotonically
+ with window H: Cohen's d 1.06→4.84 (TRM, H=2→16), 0.03→3.45 (HRM). Near-zero at H=2, near-perfect
+ at the full 16-segment budget. The cleanness is an integration-time phenomenon. COHERENCE with
+ E5: this accumulation tracks the unfolding of outcomes (more trajectories revealed by larger H),
+ NOT anticipation — among undecided@H examples λ₁ still doesn't predict (E5). State both together.
+- [x] **Char 4 effect size** — "clean" quantified: Cohen's d 3.4–4.8, distributional overlap
+ <10% (TRM hist-overlap 0.049). Beyond AUC.
+
+## Tier 1 — causal content (the level-up from correlation)
+- [ ] **T1 inference-side causal probe** — nudge a failing trajectory toward lower expansion (or
+ toward the success-mean manifold) mid-rollout and measure outcome recovery; conversely inject
+ expansion into a settling-correct trajectory. Tests settling⟹correct as causal, not correlational.
+ Spec next. GPU.
+
+## Open-mechanism (NOT this paper; the hook for follow-ons)
+Why a graded (both-expansive) difference separates so cleanly. Char 1–4 bound the description;
+the why is deferred. Candidate angles are the user's to pursue, not asserted here.
+
+## Maze cross-task result + checkpoint evolution (2026-06-20)
+
+**Deflationary finding stands and is now grounded:** the FTLE/CLV separation reduces to
+convergence+confidence (λ1, full k=8 spectrum, AND leading-CLV geometry all reduce; partial-corr
+→0 once drift+q_halt controlled). The dynamical signal is a (redundant) convergence readout.
+
+**Maze (TRM att, friend's run, all 10 ckpts, k=1):** separation WEAK (λ1 Cohen's d 0.2–0.5 vs
+Sudoku 3–5). Failures SETTLE (B/fail 0.81–0.98, D/fail 0.02–0.19) at ALL ckpts and are NEAR-MISSES
+(token_acc ~0.97). Opposite of Sudoku (failures wander, far-from-correct token ~0.63).
+
+**Checkpoint evolution (the key new result, offline):** wandering is a LATE-TRAINING property.
+Sudoku HRM failures SETTLE early (B/fail ~0.9 at acc 2–15%) then flip to WANDER late
+(D/fail ~1.0 at acc 50%), transition ~step 13–18k. So "failures wander" is learned, not intrinsic.
+BUT matched-accuracy contrast cuts the other way: at acc≈0.76, Sudoku-TRM D/fail=1.00 vs
+Maze-TRM D/fail=0.19 — same skill, opposite dynamics → TASK STRUCTURE also matters, not just maturity.
+And early-Sudoku settling (token 0.63, confidently-wrong) ≠ Maze settling (token 0.97, near-miss):
+not the same phenomenon. Fig: analysis_2x2/checkpoint_evolution_wander.png.
+
+**Task structure (offline):** Maze solution path (median 113 cells) passes through ~76 branch
+points (67% of path cells at deg≥3 junctions; 48% of open cells are junctions) → abundant
+locally-coherent alternative paths = many STABLE WRONG ANSWERS available. Sudoku: unique
+globally-coupled solution, a wrong cell violates constraints globally → no local near-miss
+equilibrium. This structurally explains settle-to-near-miss (Maze) vs wander (Sudoku).
+
+**Unresolved confound (queued):** TRM-Maze never develops wandering, but can't tell task-structure
+from TRM-Maze SATURATION (Maze too easy for TRM). Queued before HRM-Maze:
+(1) continue-train TRM-Maze from step_130200 (does acc climb toward ~1.0 = saturation, or plateau?);
+(2) per-cell failure structure (are failure errors a connected detour = coherent stable wrong path,
+or scattered?). Then HRM-Maze (harder model-task fit, more likely to be stressed into wandering).
+
+## Solution-space test (2026-06-20) — refutes the measurement-artifact concern, strengthens task-structure
+User asked: is weak Maze separation an artifact of analyzing the FULL latent (88% trivial copy)
+instead of the SOLUTION space? Tested directly: per-step decoded-ANSWER Hamming drift over
+solution cells (label!=input), Maze vs Sudoku control.
+- MAZE: failures SETTLE in solution space too (late answer-drift median 0.00, 98.4% settled;
+ AUC 0.30). Same conclusion as full-latent. NOT an artifact.
+- SUDOKU control: failures DON'T settle in solution space (late drift median 8.5/step, 0% settled;
+ AUC 0.99). Same as full-latent. Both spaces agree.
+- Per-cell failure STRUCTURE (direct task-structure evidence): MAZE failures = CONNECTED DETOUR
+ (97% have ≤2 error components, median 22 cells one blob) = a coherent stable wrong PATH.
+ SUDOKU failures = SCATTERED (100% have ≥5 components, median 13) = no coherent wrong answer.
+ Fig: analysis_2x2/maze_failure_detour.png. This is the mechanism-grounding for why Maze settles
+ (stable wrong answers exist as detours) and Sudoku wanders (no stable wrong answer).
+
+## CORRECTION (2026-06-20) — Maze exact-match labeling was the artifact; failure=more-chaotic HOLDS
+The earlier "Maze dissociates / completeness≠correctness" reading was largely a LABELING ARTIFACT,
+not a real dynamical dissociation. Maze exact-match marks VALID alternative solutions (incl.
+equal-length valid shortest paths) as "failures"; 100% of exact-match "failures" are valid connected
+paths (complete answers) → they settle, trivially. That is a benchmark-design flaw, not a result.
+**Under the correct criterion (CONNECTIVITY = is it a valid complete path = is it actually solved):**
+genuine failures (broken/disconnected) ARE more chaotic — AUC(-late_drift→connected) = 0.864 @step_13020
+(15 broken), 0.895 pooled (18 broken); bootstrap 95% CI [0.80, 0.96], excludes 0.5. So
+"failure = more chaotic" is TASK-GENERAL (Sudoku + Maze) once failure is defined by validity.
+LIMITATION (now RESOLVED): trained Maze SATURATES before the first saved ckpt (step_13020 already
+97% complete) → only n=18 broken from existing ckpts. FIX DONE: fresh early-save TRM-Maze run
+(maze_earlysave_freshTRM, saved every 250 epochs) captured the broken-rich pre-saturation phase;
+cheap forward dumps (drift_zH + connectivity, no JVP) on 8 early ckpts give **n=4096, 1835 broken**.
+**Pooled: AUC(-latent drift_zH -> connected/complete) = 0.834, bootstrap 95% CI [0.822, 0.846]**
+(broken late-drift median 1.06 vs connected 0.56). Per-ckpt AUC rises with training 0.66->0.88
+(mirrors Sudoku's separation-grows-with-training). So 'genuine failure (incomplete) = more chaotic'
+is now LARGE-N BULLETPROOF on Maze under validity labeling. Fig: maze_broken_morechaotic.png.
+Honest detail: 'more chaotic' is a LATENT-dynamics property (drift_zH AUC 0.834, λ1 AUC 0.86);
+the DECODED-answer drift does NOT separate (ans_drift AUC 0.38) — broken paths commit an incomplete
+decoded answer while churning internally. Consistent with the FTLE/drift (latent) story.
+
+## Synthesis for the paper (current honest thesis, corrected)
+Genuine failures (incomplete/invalid answers) are MORE CHAOTIC — measurable, task-general (Sudoku;
+Maze under validity labeling). Mechanism: the dynamical signal detects answer completeness/convergence
+(FTLE reducible to drift+q_halt). On unique-solution tasks completeness=correctness, so it predicts
+correctness directly. On multi-solution tasks exact-match mislabels valid alternatives as failures;
+use validity labeling. The phenomenon stands; the convergence-detection mechanism is the honest
+interpretation, not a refutation.
+
+## Status: offline T0.1 + Char 1–4 + Maze evolution + task structure DONE. Running: TRM CLV (done),
+## HRM CLV (queued on card1), maze-followup queue (continue-train + per-cell, waiting for GPU).
+## Remaining: T0.2/T0.3, T1, HRM-Maze (after saturation test).
diff --git a/paper/sample_intro.md b/paper/sample_intro.md
new file mode 100644
index 0000000..183faa4
--- /dev/null
+++ b/paper/sample_intro.md
@@ -0,0 +1,49 @@
+# Sample section: Introduction (taste-calibration draft)
+
+Recursive reasoning models solve constraint-satisfaction problems that defeat much larger
+language models by iterating a small network on a latent state — up to several hundred state
+updates per puzzle in the Hierarchical Reasoning Model (HRM) and the Tiny Recursive Model
+(TRM). When such a model fails, what is dynamically different about the trajectory it
+produced? Recent mechanistic studies have answered with attractor language: failed runs
+"plateau at stable high-loss attractors" (Efstathiou & Balwani, 2026), or converge to spurious
+fixed points that rival the correct one (Ren & Liu, 2026). These accounts rest on indirect
+evidence — loss plateaus, two-dimensional projections of 512-dimensional trajectories — and
+the two papers do not agree: one describes failure as premature stability, the other partly as
+wandering. Neither measures stability itself.
+
+We measure it directly. For every test puzzle we record two per-example quantities along the
+full 16-segment inference trajectory: the finite-time Lyapunov spectrum of the joint latent
+dynamics, and the per-segment state displacement. Conditioning these on outcome over 2,048 to
+8,192 puzzles per model yields a complete decomposition of failure for HRM (52.6% accuracy)
+and an official-recipe TRM (87.6%), and the decomposition contradicts the settled-attractor
+picture. Correct trajectories enter a narrow low-velocity band and stay in it; failed
+trajectories never do. In TRM, not one of 254 failures settles — the least mobile failure still
+moves faster at the end of inference than 96.5% of successes — while remaining locally
+expansive (median λ₁ = +0.103 versus +0.012 for successes; AUC 0.993). In HRM, settled-but-wrong
+trajectories exist but account for 0.55% of failures; the other 99.45% wander. Failure in these
+models is not a wrong attractor. It is the sustained absence of settling.
+
+Two controls sharpen what the Lyapunov signature adds. Matched for displacement level within
+the unsettled population, λ₁ still separates eventual successes from failures (decile-matched
+AUC 0.88–0.90), so the exponent is not merely re-measuring non-convergence; and binning by
+puzzle givens leaves the separation intact (within-bin AUC 0.982 versus 0.984 overall), so it
+is not a difficulty artifact. The signature is, however, strictly retrospective. Restricted to
+puzzles still unsolved after four segments, nothing dynamical about those first four segments
+predicts which will eventually be solved: AUC ≈ 0.5 in TRM for exponent, displacement, and
+halting confidence alike — and in HRM the association inverts, with eventual successes moving
+*more* in the early trajectory than eventual failures (AUC 0.69 in the positive direction).
+The chaos of failure is concurrent with the outcome, not an omen visible at the start.
+
+These measurements reframe both the diagnosis and the levers. Because failure is almost never
+a stable wrong answer, selection-based inference strategies have a high ceiling — final-step
+halting confidence tracks correctness on all but the ~0.5% of failures that settle confidently
+— and because the early trajectory carries no dynamical death sentence, compute is better
+spent on restarts than on early pruning. We quantify both points, correct the published
+attractor labels they depend on, and release the per-example measurement tooling.
+
+---
+*[Style notes for review, not part of the draft: (1) every paragraph opens with a finding or a
+question, none with "In recent years"; (2) the two prior papers are quoted precisely and
+credited for what their data shows before the correction is made; (3) hedges appear only where
+the claim table concedes (e.g., "almost never", "~0.5%"); (4) the one rhetorical flourish —
+"not an omen" — is load-bearing; cut it if it reads as flavor.]*
diff --git a/paper/setup_results.md b/paper/setup_results.md
new file mode 100644
index 0000000..d6976d5
--- /dev/null
+++ b/paper/setup_results.md
@@ -0,0 +1,129 @@
+# 2 Setup
+
+**Models and task.** We study two trained recursive reasoners on Sudoku-Extreme with the
+1k×1000-augmentation training set: HRM (27M parameters; checkpoint at step 26,040; 52.6% exact
+accuracy on our evaluation samples) and TRM-MLP trained with the official recipe (5M; global
+batch size 768; checkpoint at step 58,590, the best of its run; 86.9% on the full test set,
+87.6% on our n=2,048 sample). Inference runs a fixed 16-segment unroll; the adaptive-computation
+halting signal (q_halt) is recorded at every segment but not applied, so every trajectory is
+observed for the full budget. Answers are decoded at segment 16.
+
+**Per-example measurements.** Along each trajectory we record three families of quantities.
+First, the leading k=8 finite-time Lyapunov exponents of the joint latent dynamics: tangent
+vectors in the concatenated (z_H, z_L) space are propagated with Jacobian-vector products
+through every state update and re-orthonormalized by QR at each sub-step; λ_i is the
+time-average of the log diagonal of R over the full trajectory (336 sub-updates for TRM, 64
+for HRM). Second, the per-segment state displacement ‖z^{(t)} − z^{(t−1)}‖ for z_H and z_L
+separately ("drift"). Third, q_halt, exact correctness, and token accuracy. Exponent values
+are comparable only within an estimator implementation; we replicate the HRM analysis under a
+second, earlier implementation (n=8,192) and report its scale separately.
+
+**The settling criterion.** Late drift — the mean z_H displacement over the final four
+segments — is bimodal in log scale for every checkpoint we examine: a narrow low-velocity band
+(characteristic residual velocity 0.96 per segment for HRM, 18.5 for TRM, interquartile width
+under 10%) separated from a high-velocity mode by one to two orders of magnitude. We call a
+trajectory *settled* if its late drift falls in the low band. Thresholds are set by Otsu's
+method on the pooled log distribution; every result below is reported with a full percentile
+sweep, and the headline TRM result is threshold-free. Settled is a band property, not a fixed
+point: both bands have nonzero characteristic velocity.
+
+**Design.** Crossing the settling criterion with answer correctness yields four cells:
+settled-correct (A), settled-wrong (B), unsettled-correct (C), unsettled-wrong (D). The
+analysis asks three questions. How is failure distributed over B versus D? What does λ₁ add
+beyond the settling split? And when along the trajectory does the discriminative signal exist?
+
+# 3 Results
+
+## 3.1 Failure is wandering: the 2×2 decomposition
+
+At the end of inference, success and failure occupy different dynamical regimes almost without
+exception (Table 1). In TRM, 254 of 2,048 puzzles are answered incorrectly and none of them is
+settled: the minimum late drift among failures (log₁₀ = 1.66, ≈46 per segment) exceeds the
+late drift of 96.5% of successes, so no threshold assignment can place a failure in the
+settled band. Failed trajectories also remain locally expansive over the full window (median
+λ₁ = +0.103, IQR +0.094 to +0.111) while settled successes sit at the edge of contraction
+(+0.011). The same decomposition at a mid-training checkpoint, and across a ten-checkpoint
+series, shows the settled-wrong cell empty from 20% of training onward.
+
+HRM adds the one exception, and it is small. At the strict band threshold, 21 of 3,894
+failures (0.55%; n=8,192) end settled; the replication under the second estimator gives
+5 of 971 (0.5%; n=2,048). These settled-wrong trajectories are dynamically indistinguishable
+from successes: λ₁ median −0.842 against −0.867 for settled-correct, drift profiles inside the
+A band from segment ~4 onward (Figure 2), and final halting confidence identical to successes
+(median q_halt +7.47 in both cells, against −9.6 for wandering failures). All 21 crossed the
+halting threshold between segments 4 and 9; under adaptive computation each would have stopped
+early, confident and wrong. Their token accuracy spans 0.41–0.88, and the three least accurate
+are all 17-givens (minimum-clue) puzzles. This cell is the wrong-attractor mode of Ren & Liu
+(2026), measured: it exists, it carries exactly the contraction signature their account
+predicts, and it is two orders of magnitude less common than wandering.
+
+The unsettled-correct cell (C) is the mirror curiosity: 3–7% of successes are still moving at
+segment 16 (70 of 1,794 in TRM; 57 of 1,077 in HRM), with halting confidence as high as
+settled successes. Their existence shows the decode head can read a correct answer off a
+moving state; we do not observe what happens to them past the window.
+
+## 3.2 What the exponent is not measuring
+
+The λ₁ separation is not a restatement of the settling split. Within the unsettled population,
+where every trajectory is still moving, λ₁ ranks eventual successes above failures inside
+narrow displacement bands: splitting unsettled HRM trajectories into late-drift deciles (decile
+width ≤0.04 log units over most of the range) gives within-decile AUC from 0.97 at low drift
+to 0.69 at the highest decile, weighted mean 0.879 (n=8,192); the second estimator gives 0.900
+(n=2,048). A trajectory's expansion rate carries outcome information beyond how fast it is
+moving.
+
+The separation is also not a difficulty artifact, at least not at the resolution of clue
+count. Accuracy varies strongly with the number of givens (Spearman +0.28), and λ₁ is itself
+difficulty-correlated (−0.35 overall, −0.16/−0.18 within outcome classes), yet conditioning
+removes nothing: within givens bins, AUC(−λ₁ → correct) is 0.976–0.987 (weighted 0.982)
+against 0.984 unconditioned. Givens count is a coarse proxy — solver backtrack counts would be
+the sharper control — but at this resolution the dynamical signature is orthogonal to how hard
+the puzzle is.
+
+## 3.3 When the signal exists: concurrent, with no early warning
+
+The discriminative power of the dynamics is a property of the realized trajectory, and it is
+absent at the start. We re-measured both models over only the first four segments (idx-paired
+with the full-window runs, same sampling) and asked whether anything visible by segment 4
+forecasts the final outcome. Unconditioned, early-window λ₁ appears predictive (AUC 0.89 TRM,
+0.73 HRM), but the appearance is inherited from puzzles already solved by segment 4 (69% of
+TRM successes, 34% of HRM's). Restricted to the decision-relevant population — puzzles not yet
+correct at segment 4 — prediction collapses (Table 3). In TRM (n=626, of which 59% eventually
+succeed), AUC is 0.543 for early λ₁, 0.492 for early drift, 0.521 for early halting
+confidence. In HRM (n=1,342, 28% eventually succeed) the dynamical associations invert:
+eventual successes have marginally higher early λ₁ (reverse AUC 0.448) and substantially
+higher early displacement (AUC 0.688 in the positive direction). Among undecided HRM
+trajectories, the ones still moving vigorously are the ones that go on to solve the puzzle.
+
+One early signal does carry information, and it is learned, not dynamical: HRM's q_halt at
+segment 4 predicts eventual success at AUC 0.734. TRM's does not (0.521); TRM's training
+removes HRM's Q-learning continue head in favor of a binary halt loss, a difference we note
+without interpreting. Window length is the untested variable here: four segments matches the
+deep-supervision horizon, and we have not yet swept longer prefixes.
+
+## 3.4 Training widens the gap from the failure side
+
+Over the TRM training series (ten checkpoints, 512 puzzles each), λ₁ of wandering failures
+rises monotonically from +0.036 to +0.102 while λ₁ of settled successes stays within ±0.03 of
+zero; the settled-wrong cell empties by step 52k and stays empty. The outcome separation grows
+over training because the failures become more expansive, while the success regime barely
+moves. HRM's series shows a mass migration instead: at early checkpoints nearly all
+trajectories are low-drift and wrong (the model barely updates state), this cell drains
+through mid-training into high-drift wandering, and accuracy growth then tracks transfer from
+wandering into the settled-correct band.
+
+A preliminary intervention probe is consistent with the decomposition. HRM checkpoints trained
+with multi-rollout initial-state perturbation (K=4, log-uniform noise) shrink the wandering-
+failure cell at matched steps relative to an ordinary baseline (D: 274→175 at step 20,832 and
+247→176 at 23,436, accuracy +0.20 and +0.15), with surviving failures more expansive, and the
+known late-run collapse of this variant coincides with the settled band itself destabilizing
+(λ₁ of settled successes flipping to +0.04). The comparison baseline differs in training
+objective (ACT-streaming versus fixed unroll), so we report this as directional evidence
+pending a matched-objective control.
+
+---
+*[Section pass notes: em-dash count 2 (§3.2, §3.3 one each). Contrast-template count: 0
+(budget spent in title/intro). Flourish count: 1 ("mirror curiosity", §3.1) — cuttable.
+Tables referenced: T1 = 2×2 cells both models; T2 = decile + givens AUCs; T3 = early-window
+restricted AUCs. All numbers traceable to analysis_2x2/OBSERVATIONS.md (+ addenda) and
+offline_followups/followups.md.]*
diff --git a/paper/style_contract.md b/paper/style_contract.md
new file mode 100644
index 0000000..4e0f319
--- /dev/null
+++ b/paper/style_contract.md
@@ -0,0 +1,47 @@
+# Style contract (operative checklist for every section pass)
+
+Sources: Shaib et al. 2026 slop taxonomy (density/relevance/coherence are the load-bearing
+dimensions); Buschek's four reviewer complaints; 2026 banned-pattern lists. Applied as a per-
+section pass, not a vibe.
+
+## Hard bans (lexical)
+delve, tapestry, landscape (figurative), testament, pivotal, showcase, intricate, vibrant,
+seamless, elegant, dramatically, novel (self-describing), comprehensive (self-describing),
+notably, crucially, interestingly, importantly, "It is worth noting", "Moreover," as sentence
+opener, "In recent years", "has garnered attention", "paradigm shift".
+Technical terms that overlap ban lists (e.g., "robust" in "threshold-robust") stay.
+
+## Punctuation & rhythm
+- Em dash: ≤2 per section, never two in one sentence, never as paired parenthetical.
+- No exclamation marks. Semicolons fine.
+- Every paragraph contains at least one sentence under ~12 words.
+- No uniform paragraph shapes: don't end three consecutive paragraphs with a summary clause.
+
+## Structure
+- The contrast template ("X, not Y" / "not X, but Y") has a total budget of ONE device:
+ the title and its single echo at the end of paragraph 1. Zero elsewhere. Express other
+ contrasts by stating the positive finding and letting the numbers carry the negation.
+- Rule-of-three closers only when the three items are an exhaustive measured list.
+- No chronological narration of the investigation. Structure follows claims.md.
+- Results topic sentences contain a finding or a number, never a plan ("We then examine...").
+
+## Integrity (Buschek's four, mapped to our risks)
+- Marketing language: zero unevidenced evaluative adjectives. "near-perfect" only with the AUC
+ in the same sentence.
+- Performative related work: every citation paired with the precise claim it supports; check
+ each against papers/notes/*.md before the citation lands.
+- Misrepresentation: prior-work characterizations quote verbatim with page/section; corrections
+ target the quoted words only (claims.md C6a discipline).
+- Stretched summaries: no interpretive metaphors for results. One flourish per section maximum,
+ and it must be cuttable without losing content.
+
+## Density (the actual anti-slop weapon)
+- Every sentence must add a number, a definition, a caveat, or a logical step. Otherwise cut.
+- Modal verbs (could/might/may) confined to Discussion and future work.
+- Hedges appear only where claims.md concedes; one hedge per concession, not a seesaw.
+
+## Honesty rails (project-specific)
+- Never compare λ values across estimator versions; state scale ownership at first use.
+- "Settled" always defined as the measured low-velocity band, with its residual velocity given.
+- No mechanism claims; observations and their direct logical consequences only.
+- No promises (code release, future experiments) that the authors have not decided.
diff --git a/paper/validation/validate_le_estimator.py b/paper/validation/validate_le_estimator.py
new file mode 100644
index 0000000..7ad4fd0
--- /dev/null
+++ b/paper/validation/validate_le_estimator.py
@@ -0,0 +1,107 @@
+"""T0.1 — validate the QR/Benettin FTLE estimator core against systems with KNOWN spectra.
+
+Reimplements the IDENTICAL accumulation used in diagnose_{trm,hrm}_joint.py:
+ Q in R^{n x k} init random-orthonormal; each step apply the (known) Jacobian to Q's columns;
+ every t_ons steps QR-decompose, accumulate sum of log|diag(R)|; LE_i = sum / n_qr_steps.
+
+Test systems (known answers):
+ (a) diagonal linear map LE_i = log|d_i| (exact at all T)
+ (b) symmetric linear map LE_i = log|eig_i| (exact; eig=singular values)
+ (c) non-normal (shear) map LE_i = log|eig_i| asympt. (finite-time transient from singular values)
+ (d) Henon map (a=1.4,b=0.3) LE = {+0.41922, -1.62319} (nonlinear chaotic; literature value)
+
+A passing result = recovered exponents match known to within tolerance, confirming the QR core
+(orthonormalization cadence, log|diag R| bookkeeping, ordering, averaging) is correct.
+No GPU, no model — this isolates the numerical estimator.
+"""
+from __future__ import annotations
+import numpy as np
+
+RNG = np.random.default_rng(0)
+
+
+def qr_le(jac_fn, x0, n_steps, k, t_ons=1, warmup=0):
+ """Benettin/QR LE estimate. jac_fn(x)->(x_next, J) gives next state and Jacobian at x.
+ Mirrors diagnose_*_joint.py: QR every t_ons steps, accumulate log|diag R|, average over QR steps."""
+ x = np.asarray(x0, float)
+ d = x.shape[0]
+ Q, _ = np.linalg.qr(RNG.standard_normal((d, k)))
+ log_R_sum = np.zeros(k)
+ n_qr = 0
+ for t in range(n_steps):
+ x, J = jac_fn(x)
+ Q = J @ Q
+ if (t + 1) % t_ons == 0:
+ Q, R = np.linalg.qr(Q)
+ if t >= warmup:
+ log_R_sum += np.log(np.clip(np.abs(np.diag(R)), 1e-30, None))
+ n_qr += 1
+ return np.sort(log_R_sum / max(n_qr, 1))[::-1]
+
+
+def run():
+ out = ["# T0.1 estimator validation (QR/Benettin core vs known spectra)", ""]
+ tol = 5e-3
+
+ # (a) diagonal
+ d_vals = np.array([1.5, 0.8, 0.3, 0.05])
+ M = np.diag(d_vals)
+ known = np.sort(np.log(np.abs(d_vals)))[::-1]
+ est = qr_le(lambda x: (x, M), np.ones(4), 4000, k=4) # linear: J state-independent, don't grow x
+ out += [f"(a) diagonal linear: known {np.round(known,4)}",
+ f" recovered {np.round(est,4)} max|err|={np.max(np.abs(est-known)):.2e} "
+ f"{'PASS' if np.max(np.abs(est-known))<tol else 'FAIL'}"]
+
+ # (b) symmetric
+ A = RNG.standard_normal((5, 5)); S = (A + A.T) / 2
+ # scale so spectral radius < ~1.3 (keep magnitudes spread, finite)
+ S = 0.9 * S / np.max(np.abs(np.linalg.eigvalsh(S)))
+ eig = np.linalg.eigvalsh(S)
+ known = np.sort(np.log(np.abs(eig)))[::-1]
+ est = qr_le(lambda x: (x, S), np.ones(5), 8000, k=5)
+ out += [f"(b) symmetric linear: known {np.round(known,4)}",
+ f" recovered {np.round(est,4)} max|err|={np.max(np.abs(est-known)):.2e} "
+ f"{'PASS' if np.max(np.abs(est-known))<tol else 'FAIL'}"]
+
+ # (c) non-normal shear: LE -> log|eig| asymptotically; finite-time transient from singular values
+ N = np.array([[1.1, 5.0], [0.0, 0.6]]) # eigenvalues 1.1, 0.6 (triangular); highly non-normal
+ known = np.sort(np.log(np.abs(np.linalg.eigvals(N))))[::-1]
+ est_long = qr_le(lambda x: (x, N), np.ones(2), 40000, k=2)
+ sv = np.sort(np.log(np.linalg.svd(N, compute_uv=False)))[::-1]
+ est_short = qr_le(lambda x: (x, N), np.ones(2), 5, k=2)
+ out += [f"(c) non-normal shear: known asymptotic log|eig| {np.round(known,4)}",
+ f" recovered (T=40000) {np.round(est_long,4)} "
+ f"max|err|={np.max(np.abs(est_long-known)):.2e} "
+ f"{'PASS' if np.max(np.abs(est_long-known))<1e-2 else 'FAIL'}",
+ f" single-step log singular values {np.round(sv,4)} (finite-time transient ref)",
+ f" recovered (T=5, finite-time) {np.round(est_short,4)} "
+ f"(should sit between sv and asymptotic -> confirms finite-time != asymptotic)"]
+
+ # (d) Henon map
+ a, b = 1.4, 0.3
+ def henon(x):
+ xn = np.array([1 - a * x[0] ** 2 + x[1], b * x[0]])
+ J = np.array([[-2 * a * x[0], 1.0], [b, 0.0]])
+ return xn, J
+ # settle onto attractor first
+ x = np.array([0.1, 0.1])
+ for _ in range(1000):
+ x, _ = henon(x)
+ known = np.array([0.41922, -1.62319]) # literature (Sprott)
+ est = qr_le(henon, x, 200000, k=2, warmup=1000)
+ out += [f"(d) Henon (a=1.4,b=0.3): literature {np.round(known,4)} (sum={known.sum():.4f})",
+ f" recovered {np.round(est,4)} (sum={est.sum():.4f}) "
+ f"|err λ1|={abs(est[0]-known[0]):.2e} "
+ f"{'PASS' if abs(est[0]-known[0])<5e-3 else 'FAIL'}"]
+
+ out += ["", "Interpretation: (a)(b) confirm exact recovery for normal maps; (c) confirms the",
+ "estimator converges to log|eig| asymptotically while finite-time windows reflect",
+ "singular-value growth (the regime our paper operates in); (d) confirms correct",
+ "recovery on a known chaotic nonlinear system. The QR core is validated."]
+ print("\n".join(out))
+ from pathlib import Path
+ Path(__file__).resolve().parent.joinpath("validation_results.md").write_text("\n".join(out))
+
+
+if __name__ == "__main__":
+ run()
diff --git a/paper/validation/validation_results.md b/paper/validation/validation_results.md
new file mode 100644
index 0000000..008a73a
--- /dev/null
+++ b/paper/validation/validation_results.md
@@ -0,0 +1,17 @@
+# T0.1 estimator validation (QR/Benettin core vs known spectra)
+
+(a) diagonal linear: known [ 0.4055 -0.2231 -1.204 -2.9957]
+ recovered [ 0.4047 -0.2236 -1.2032 -2.9953] max|err|=7.99e-04 PASS
+(b) symmetric linear: known [-0.1054 -0.6391 -0.9467 -1.4414 -3.074 ]
+ recovered [-0.1055 -0.6392 -0.9465 -1.4415 -3.0738] max|err|=1.85e-04 PASS
+(c) non-normal shear: known asymptotic log|eig| [ 0.0953 -0.5108]
+ recovered (T=40000) [ 0.0953 -0.5109] max|err|=3.84e-05 PASS
+ single-step log singular values [ 1.6396 -2.0551] (finite-time transient ref)
+ recovered (T=5, finite-time) [ 0.5468 -0.9623] (should sit between sv and asymptotic -> confirms finite-time != asymptotic)
+(d) Henon (a=1.4,b=0.3): literature [ 0.4192 -1.6232] (sum=-1.2040)
+ recovered [ 0.4193 -1.6233] (sum=-1.2040) |err λ1|=8.44e-05 PASS
+
+Interpretation: (a)(b) confirm exact recovery for normal maps; (c) confirms the
+estimator converges to log|eig| asymptotically while finite-time windows reflect
+singular-value growth (the regime our paper operates in); (d) confirms correct
+recovery on a known chaotic nonlinear system. The QR core is validated. \ No newline at end of file
diff --git a/papers/txt/engelken2023_gradient_flossing.txt b/papers/txt/engelken2023_gradient_flossing.txt
new file mode 100644
index 0000000..6d70608
--- /dev/null
+++ b/papers/txt/engelken2023_gradient_flossing.txt
@@ -0,0 +1,1879 @@
+ Gradient Flossing: Improving Gradient Descent
+ through Dynamic Control of Jacobians
+
+
+ Rainer Engelken
+ Zuckerman Mind Brain Behavior Institute
+ Columbia University
+arXiv:2312.17306v1 [cs.LG] 28 Dec 2023
+
+
+
+
+ New York, USA
+ re2365@columbia.edu
+
+
+
+ Abstract
+ Training recurrent neural networks (RNNs) remains a challenge due to the insta-
+ bility of gradients across long time horizons, which can lead to exploding and
+ vanishing gradients. Recent research has linked these problems to the values
+ of Lyapunov exponents for the forward-dynamics, which describe the growth or
+ shrinkage of infinitesimal perturbations. Here, we propose gradient flossing, a
+ novel approach to tackling gradient instability by pushing Lyapunov exponents
+ of the forward dynamics toward zero during learning. We achieve this by regu-
+ larizing Lyapunov exponents through backpropagation using differentiable linear
+ algebra. This enables us to "floss" the gradients, stabilizing them and thus improv-
+ ing network training. We demonstrate that gradient flossing controls not only the
+ gradient norm but also the condition number of the long-term Jacobian, facilitating
+ multidimensional error feedback propagation. We find that applying gradient
+ flossing prior to training enhances both the success rate and convergence speed for
+ tasks involving long time horizons. For challenging tasks, we show that gradient
+ flossing during training can further increase the time horizon that can be bridged
+ by backpropagation through time. Moreover, we demonstrate the effectiveness of
+ our approach on various RNN architectures and tasks of variable temporal com-
+ plexity. Additionally, we provide a simple implementation of our gradient flossing
+ algorithm that can be used in practice. Our results indicate that gradient flossing
+ via regularizing Lyapunov exponents can significantly enhance the effectiveness of
+ RNN training and mitigate the exploding and vanishing gradients problem.
+
+
+ 1 Introduction
+ Recurrent neural networks are commonly used both in machine learning and computational neu-
+ roscience for tasks that involve input-to-output mappings over sequences and dynamic trajectories.
+ Training is often achieved through gradient descent by the backpropagation of error information
+ across time steps [1, 2, 3, 4]. This amounts to unrolling the network dynamics in time and recursively
+ applying the chain rule to calculate the gradient of the loss with respect to the network parameters.
+ Mathematically, evaluating the product of Jacobians of the recurrent state update describes how error
+ signals travel across time steps. When trained on tasks that have long-range temporal dependencies,
+ recurrent neural networks are prone to exploding and vanishing gradients [5, 6, 7, 8]. These arise from
+ the exponential amplification or attenuation of recursive derivatives of recurrent network states over
+ many time steps. Intuitively, to evaluate how an output error depends on a small parameter change at
+ a much earlier point in time, the error information has to be propagated through the recurrent network
+ states iteratively. Mathematically, this corresponds to a product of Jacobians that describe how
+ changes in one recurrent network state depend on changes in the previous network state. Together,
+ this product forms the long-term Jacobian. The singular value spectrum of the long-term Jacobian reg-
+
+ 37th Conference on Neural Information Processing Systems (NeurIPS 2023).
+ ulates how well error signals can propagate backwards along multiple time steps, allowing temporal
+credit assignment. A close mathematical correspondence of these singular values and the Lyapunov
+exponents of the forward dynamics was established recently [9, 10, 11, 12]. Lyapunov exponents
+characterize the asymptotic average rate of exponential divergence or convergence of nearby initial
+conditions and are a cornerstone of dynamical systems theory [13, 14]. We will use this link to
+improve the trainability of RNNs.
+Previous approaches that tackled the problem of exploding or vanishing gradients have suggested
+solutions at different levels. First, specialized units such as LSTM and GRU were introduced,
+which have additional latent variables that can be decoupled from the recurrent network states via
+multiplicative (gating) interactions. The gating interactions shield the latent memory state, which can
+therefore transport information across multiple time steps [5, 6, 15]. Second, exploding gradients can
+be avoided by gradient clipping, which re-scales the gradient norm [16] or their individual elements
+[17] if they become too large [18]. Third, normalization schemes like batch normalization prevent
+saturated nonlinearities that contribute to vanishing gradients [19]. Third, it was suggested that the
+problem of exploding/vanishing gradients can be ameliorated by specialized network architectures,
+for example, antisymmetric networks [20], orthogonal/unitary initializations [21, 22, 23], coupled
+oscillatory RNNs [24], Lipschitz RNNs [25], linear recurrent units [26], echo state networks [27, 28],
+(recurrent) highway networks [29, 30], and stable limit cycle neural networks [11, 31, 32]. Fourth, for
+large networks, a suitable choice of weights can guarantee a well-conditioned Jacobian at initialization
+[21, 33, 34, 35, 36, 37, 38, 39, 40, 41]. These initializations are based on mean-field methods, which
+become exact only in the large-network limit. Such initialization schemes have also been suggested
+for gated networks [40]. However, even when initializing the network with well-behaved gradients,
+gradients will typically not retain their stability during training once the network parameters have
+changed.
+Here, we propose a novel approach to tackling this challenge by introducing gradient flossing, a
+technique that keeps gradients well-behaved throughout training. Gradient flossing is based on
+a recently described link between the gradients of backpropagation through time and Lyapunov
+exponents, which are the time-averaged logarithms of the singular values of the long-term Jacobian
+[9, 11, 12, 32]. Gradient flossing regularizes one or several Lyapunov exponents to keep them close
+to zero during training. This improves not only the error gradient norm but also the condition number
+of the long-term Jacobian. As a result, error signals can be propagated back over longer time horizons.
+We first demonstrate that the Lyapunov exponents can be controlled during training by including an
+additional loss term. We then demonstrate that gradient flossing improves the gradient norm and
+effective dimension of the gradient signal. We find empirically that gradient flossing improves test
+accuracy and convergence speed on synthetic tasks over a range of temporal complexities. Finally,
+we find that gradient flossing during training further helps to bridge long-time horizons and show
+that it combines well with other approaches to ameliorate exploding and vanishing gradients, such as
+dynamic mean-field theory for initialization, orthogonal initialization and gated units.
+Our contributions include:
+
+ • Gradient flossing, a novel approach to the problem of exploding and vanishing gradients in
+ recurrent neural networks based on regularization of Lyapunov exponents.
+
+ • Analytical estimates of the condition number of the long-term Jacobian based on Lyapunov
+ exponents.
+
+ • Empirical evidence that gradient flossing improves training on tasks that involve bridging
+ long time horizons.
+
+
+2 RNN Gradients and Lyapunov Exponents
+
+We begin by revisiting the established mathematical relationship between the gradients of the loss
+function, computed via backpropagation through time, and Lyapunov exponents [9, 12], and how
+it relates to the problem of vanishing and exploding gradients. In backpropagation through time,
+network parameters θ are iteratively updated by stochastic gradient descent such that a loss Lt is
+locally reduced [1, 2, 3, 4]. For RNN dynamics hs+1 = fθ (hs , xs+1 ), with recurrent network state
+h, external input x, and parameters θ, the gradient of the loss Lt with respect to θ is evaluated by
+
+
+ 2
+ unrolling the network dynamics in time. The resulting expression for the gradient is given by:
+ τ =t−1 t−1
+ !
+ ∂Lt ∂Lt X Y ∂hτ ′ +1 ∂hτ ∂Lt X ∂hτ
+ = = Tt (hτ ) (1)
+ ∂θ ∂ht ′
+ ∂hτ ′ ∂θ ∂ht τ ∂θ
+ τ =t−l τ =τ
+
+where Tt (hτ ) is composed of a product of one-step Jacobians Ds = ∂hs+1
+ ∂hs :
+ t−1 t−1
+ Y ∂hτ ′ +1 Y
+ Tt (hτ ) = = Dτ ′ (2)
+ ∂hτ ′
+ τ ′ =τ ′ τ =τ
+
+Due to the chain of matrix multiplications in Tt , the gradients tend to vanish or explode exponentially
+with time. This complicates training particularly when the task loss at time t dependents on inputs x
+or states h from many time steps prior which creates long temporal dependencies [5, 6, 7, 8]. How
+well error signals can propagate back in time is constrained by the tangent space dynamics along
+trajectory ht , which dictate how local perturbations around each point on the trajectory stretch, rotate,
+shear, or compress as the system evolves.
+The singular values of the Jacobian’s product Tt , which determine how quickly gradients vanish or
+explode during backpropagation through time, are directly related to the Lyapunov exponents of the
+forward dynamics [9, 12]: Lyapunov exponents λ1 ≥ λ2 · · · ≥ λN are defined as the asymptotic
+time-averaged logarithms of the singular values of the long-term Jacobian [13, 42, 43]
+ 1
+ λi = lim log(σi,t ) (3)
+ t→∞ t − τ
+
+where σi,t denotes the ith singular value of Tt (hτ ) with σ1,t ≥ σ2,t . . . σN,t (See Appendix I
+for details). This means that positive Lyapunov exponents in the forward dynamics correspond to
+exponentially exploding gradient modes, while negative Lyapunov exponents in the forward dynamics
+correspond to exponentially vanishing gradient modes.
+In summary, the Lyapunov exponents give the average asymptotic exponential growth rates of
+infinitesimal perturbations in the tangent space of the forward dynamics, which also constrain the
+signal propagation in backpropagation for long time horizons. Lyapunov exponents close to zero in
+the forward dynamics correspond to tangent space directions along which error signals are neither
+drastically attenuated nor amplified in backpropagation through time. Such close-to-neutral modes in
+the tangent dynamics can propagate information reliably across many time steps.
+
+3 Gradient Flossing: Idea and Algorithm
+We now leverage the mathematical connection established between Lyapunov exponents and the
+prevalent issue of exploding and vanishing gradients for regularizing the singular values of the
+long-term Jacobian. We term this procedure gradient flossing. To prevent exploding and vanishing
+gradients, we constrain Lyapunov exponents to be close to zero. This ensures that the corresponding
+directions in tangent space grow and shrink on average only slowly. This leads to a better-conditioned
+long-term Jacobian Tt (hτ ). We achieve this by using the sum of the squares of the first k largest
+Lyapunov exponent λ1 , λ2 . . . λk as a loss function:
+ k
+ X
+ Lflossing = λ2i (4)
+ i=1
+
+and evaluate the gradient obtained from backpropagation through time:
+ k
+ ∂Lflossing X ∂λ2i
+ = (5)
+ ∂θ i=1
+ ∂θ
+
+This might seem like an ill-fated enterprise, as the gradient expression in Eq 5 suffers from its
+own problem of exploding and vanishing gradients. However, instead of calculating the Lyapunov
+exponents by directly evaluating the long-term Jacobian Tt (Eq 2), we use an established iterative
+reorthonormalization method involving QR decomposition that avoids directly evaluating the ill-
+conditioned long-term Jacobian [12, 44].
+
+
+ 3
+ First, we evolve an initially orthonormal system Qs = [q1s , q2s , . . . qks ] in the tangent space along the
+trajectory using the Jacobian Ds = ∂h s+1
+ ∂hs . This means to calculate
+
+ Q
+ e s+1 = Ds Qs (6)
+at every time-step. Second, we extract the exponential growth rates using the QR decomposition,
+ Qe s+1 = Qs+1 Rs+1 ,
+which decomposes Q e s+1 uniquely into the product of an orthonormal matrix Qs+1 of size N × k
+ ⊤
+so Qs+1 Qs+1 = 1k×k and an upper triangular matrix Rs+1 of size k × k with positive diagonal
+elements. Note that the QR decomposition does not have to be applied at every step, just sufficiently
+often, i.e., once every tONS such that Q
+ e does not become ill-conditioned.
+The Lyapunov exponents are given by time-averaged logarithms of the diagonal entries of Rs [43, 44]:
+ t t
+ 1 Y 1X
+ λi = lim log Rsii = lim log Rsii . (7)
+ t→∞ t t→∞ t
+ s=1 s=1
+This way, the Lyapunov exponent can be expressed in terms of a temporal average over the diagonal
+elements of the Rs -matrix of a QR decomposition of the iterated Jacobian. To propagate the gradient
+of the square of the Lyapunov exponents backward through time in gradient flossing, we used an
+analytical expression for the pullback of the QR decomposition [45]: The backward pass of the QR
+decomposition is given by [45, 46, 47, 48]
+ Q = Q + Q copyltu(M) R−T ,
+  
+ (8)
+ T T
+where M = RR − Q Q and the copyltu function generates a symmetric matrix by copying
+the lower triangle of the input matrix to its upper triangle, with the element [copyltu(M )]ij =
+Mmax(i,j),min(i,j) [45, 46, 47, 48]. We denote here adjoint variable as T = ∂L/∂T . A simple
+implementation of this algorithm in pseudocode is:
+Algorithm 1 Algorithm for gradient flossing of k tangent space directions
+ initialize h, Q
+ for e = 1 → E do
+ for t = 1 → T do
+ h ← fθ (h, x)
+ dht
+ D ← dh t−1
+ Q←D·Q
+ if t ≡ 0 (mod tONS ) then
+ Q, R ← qr(Q)
+ γi += log(Rii )
+ end if
+ end for
+ λi = γi /T
+ ∂Lflossing
+ θe+1 ← θe − η ∂θ
+ end for
+For clarity, we described gradient flossing in terms of stochastic gradient descent, but we actually
+implemented it with the ADAM optimizer using standard hyperparameters η, β1 and β2 . An example
+implementation is available here. Note that this algorithm also works for different recurrent network
+architectures. In this case, the Jacobians D has size n × n, where n is the number of dynamic
+variables of the recurrent network model. For example, in case of a single recurrent network of
+N LSTM units, the Jacobian has size 2N × 2N [9, 12, 41]. The Jacobian matrix D can either be
+calculated analytically or it can be obtained via automatic differentiation.
+
+4 Gradient Flossing: Control of Lyapunov Exponents
+In Fig 1, we demonstrate that gradient flossing can set one or several Lyapunov exponents to a
+target value via gradient descent with the ADAM optimizer in random Vanilla RNNs initialized with
+different weight variances. The N units of the recurrent neural network follow the dynamics
+ hs+1 = f (hs , xs+1 ) = Wϕ(hs ) + Vxs+1 . (9)
+
+
+ 4
+ C
+ t−1
+ ∂hτ ′ +1 10 1
+ number of flossed i
+ Y
+ Tt (hτ ) = k = 32
+ ∂hτ ′ k = 16
+ τ ′ =τ
+ 10 3 k=1
+
+
+
+
+ 2
+ i
+ i=1
+ k
+ 1
+ k
+ 10 5
+
+
+ 0 20 40 60 80 100
+ Epochs
+ B D
+ 0.0 0
+ 0.5 2
+1(1/ )
+
+
+
+
+ i(1/ )
+ 1.0 4 number of flossed i
+ k = 32
+ 1.5 target 1 6 k = 16
+ actual 1 k=1
+ 2.0
+ 0 10 20 30 40 50 60 70 80 0.0 0.2 0.4 0.6 0.8 1.0
+ Epochs i/N
+Figure 1: Gradient flossing controls Lyapunov exponents and gradient signal propagation
+A) Exploding and vanishing gradients in backpropagation through time arise from amplifica-
+ Qt−1 ∂h ′
+tion/attenuation of product of Jacobians that form the long-term Jacobian Tt (hτ ) = τ ′ =τ ∂hτ +1 .
+ τ′
+B) First Lyapunov exponent of Vanilla RNN as a function of training epochs. Minimizing the
+mean squared error between estimated first Lyapunov exponent and target Lyapunov exponent
+λ1 = −1, −0.5, 0 by gradient descent. 10 Vanilla RNNs were initialized with Gaussian recurrent
+weights Wij ∼ N (0, g 2 /N ) where values of g were drawn g ∼ Unif(0, 1). C) Gradient flossing
+minimizes the square of Lyapunov exponents over epochs. D) Full Lyapunov spectrum of Vanilla
+RNN after a different number of Lyapunov exponents are pushed to zero via gradient flossing. Note,
+the variability of the Lyapunov exponents that were not flossed. Parameters: network size N = 32
+with 10 network realizations. Error bars in C indicate the 25% and 75% percentiles and solid line
+shows median.
+
+
+The initial entries of W are drawn independently from a Gaussian distribution with zero mean and
+variance g 2 /N , where g is a gain parameter that controls the heterogeneity of weights. We here use
+the transfer function ϕ(x) = tanh(x). (See appendix B for gradient flossing with ReLU and LSTM
+units). xs is a sequence of inputs and V is the input weight. xs is a stream of i.i.d. Gaussian input
+xs ∼ N (0, 1) and the input weights V are N (0, 1). Both W and V are trained during gradient
+flossing.
+In Fig 1B, we show that for randomly initialized RNNs, the Lyapunov exponent can be modified by
+gradient flossing to match a desired target value. The networks were initialized with 10 different values
+of initial weight strength g chosen uniformly between 0 and 1. During gradient flossing, they quickly
+approached three different target values of the first Lyapunov exponents λtarget 1 = {−1, −0.5, 0}
+within less than 100 training epochs with batch size B = 1. We note that gradient flossing with
+positive target λtarget
+ 1 seems not to arrive at a positive Lyapunov exponent λ1 .
+Fig 1C shows gradient flossing for different numbers of Lyapunov exponents k. Here, during gradient-
+descent, the sum of the squares of 1, 16, or 32 Lyapunov exponents is used as loss in gradient flossing
+(see Fig 1A). Fig 1D shows the Lyapunov spectrum after flossing, which now has 1, 16, or 32
+Lyapunov exponents close to zero. We conclude that gradient flossing can selectively manipulate one,
+several, or all Lyapunov exponents before or during network training. Gradient flossing also works for
+RNNs of ReLU and LSTM units (See appendix B. Further, we find that the computational bottleneck 
+of gradient flossing is the QR decomposition, which has a computational complexity of O N k 2 ,
+both in the forward pass and in the backward pass. Thus, gradient flossing of the entire Lyapunov
+spectrum is computationally expensive. However, as we will show, not all Lyapunov exponents need
+to be flossed and only short episodes of gradient flossing are sufficient for significantly improving the
+training performance.
+
+
+ 5
+ 5 Gradient Flossing: Condition Number of the Long-Term Jacobian
+ A B C
+condition number 2(Tt( ))
+ 1034 1026 initial
+ after flossing 1030
+ 1025 theory
+ 1019 simulations
+
+
+
+
+ 2 (theory)
+ 2(Tt( ))
+ 1020
+ 1016 1012 k
+ initial 1010 5
+ 107 after flossing 105 10
+ theory 15
+ simulations 100
+ 0 100 200 300 5 10 15 100 1010 1020 1030
+ time horizon t tangent space dimension m 2 (numerical)
+Figure 2: Gradient flossing reduces condition number of the long-term Jacobian A) Condition
+number κ2 of long-term Jacobian Tt (hτ ) as a function of time horizon t − τ at initialization (blue)
+and after gradient flossing (orange). Direct numerical simulations are done with arbitrary precision
+floating point arithmetic (transparent lines) with 256 bits per float, asymptotic theory based on
+Lyapunov exponents (dashed lines) (Eq 10). B) Condition number for different number of tangent
+space dimensions m. Simulations (dots) and Lyapunov exponent based theory (dashed lines) at
+initialization (blue) and after gradient flossing (orange). Gradient flossing increases the number of
+tangent space dimensions available for backpropagation for a given condition number (Grey dotted
+line as a guide for eye for κ2 = 105 .) First 15 Lyapunov exponents were flossed. C) Comparison of
+condition number obtained via direct numerical simulations vs. Lyapunov exponent-based. Colors
+denote the number of flossed Lyapunov exponents k. Parameters: g = 1, batch size b = 1, N = 80,
+epochs = 500, T = 500, gradient flossing for Ef = 500 epochs. Input xs identical to delayed XOR
+task in Fig 3D.
+
+A well-conditioned Jacobian is essential for efficient and fast learning [21, 49, 50]. Gradient
+flossing improves the condition number of the long-term Jacobian which constrains the error signal
+propagation across long time horizons in backpropagation (Fig 2). The condition number κ2 of a
+linear map A measures how close the map is to being singular and is given by the ratio of the largest
+singular value σmax and the smallest singular values σmin , so κ2 (A) = σσmax (A)
+ min (A)
+ . According to the
+ p
+rule of thumb given in [51], if κ2 (A) = 10 , one can anticipate losing at least p digits of precision
+when solving the equation Ax = b. Note that the long-term Jacobian Tt is composed of a product of
+Jacobians, which generically makes it ill-conditioned. To nevertheless quantify the condition number
+numerically, we use arbitrary-precision arithmetic with 256 bits per float. We find numerically that
+the condition number of Tt exponentially diverges with the number of time steps (Fig 2A). We
+compare the numerically measured condition number κ2 with an asymptotic approximation of the
+condition number based on Lyapunov exponents that are calculated in the forward pass and find a
+good match (Fig 2A).
+Our theoretical estimate of the condition number κ2 of an orthonormal system Q of size N × m that
+is temporally evolved by the long-term Jacobian Tt is:
+
+ e t+τ ) = κ2 Tt (hτ )Qt = σ1 (Tt (hτ )) ≈ exp ((λ1 − λm )(t − τ )) .
+ 
+ κ2 (Q (10)
+ σm (Tt (hτ ))
+where σ1 (Tt (hτ )) and σm (Tt (hτ )) are the first and mth singular value of the long-term Jacobian.
+We note that this theoretical estimate of the condition number follows from the asymptotic definition
+of Lyapunov exponents and should be exact in the limit of long times. We find that gradient flossing
+reduces the condition number by a factor whose magnitude increases exponentially with time (orange
+in Fig 2A). Thus, we can expect that gradient flossing has a stronger effect on problems with a long
+time horizon to bridge. We will later confirm this numerically.
+Moreover, Lyapunov exponents enable the estimation of the number of gradient dimensions available
+for the backpropagation of error signals. Generally, the long-term Jacobian is ill-conditioned, however,
+the Lyapunov spectrum provides for a given number of tangent space dimensions an estimate of the
+condition number. This indicates how close to singular the gradient signal for a given number of
+tangent space dimensions is. Given a fixed acceptable condition number—determined, for example,
+by noise level or floating-point precision—we observe that gradient flossing increases the number of
+usable tangent space dimensions for backpropagation (Fig 2B).
+
+
+ 6
+ Finally, we show that the asymptotic estimate of the condition number based on Lyapunov exponents
+can even predict differences in condition number that originate from finite network size N (Fig 2C).
+We emphasize that this goes beyond mean-field methods, which become exact only in the large-
+network limit N → ∞ and usually do not capture finite-size effects [52] (see appendix G).
+
+6 Initial Gradient Flossing Improves Trainability
+ A delayed copy task B delayed XOR task
+ 10 2 10 2
+test loss
+
+
+
+
+ test loss
+ no gradient flossing no gradient flossing
+ 10 3 gradient flossing gradient flossing
+ 10 3
+ 10 4
+ 0 2000 4000 6000 8000 10000 0 2000 4000 6000 8000 10000
+ Epochs Epochs
+ C delayed copy task D delayed XOR task
+ 10 2 10 2
+
+ 10 3 10 3
+test loss
+
+
+
+
+ test loss
+
+ 10 4
+ 10 4
+
+ 20 40 60 80 10 20 30 40 50 60 70 80
+ task difficulty (delay d) task difficulty (delay d)
+Figure 3: Gradient flossing improves trainability on tasks that involve long time horizons A) Test
+error for Vanilla RNNs trained on delayed copy task yt = xt−d for d = 40 with and without gradient
+flossing flossing. Solid lines are medians across 5 network realizations. B) Same as A for delayed
+XOR task with yt = |xt−d/2 − xt−d |. C) Mean final test loss as a function of task difficulty (delay d)
+for delayed copy task. D) Mean final test loss as a function of task difficulty (delay d) for delayed
+XOR task. Parameters: g = 1, batch size b = 16, N = 80, epochs = 104 , T = 300, gradient flossing
+for Ef = 500 epochs on k = 75 before training. Shaded regions in C and D indicate the 20% and
+80% percentiles and solid line shows mean. Dots are individual runs. Task loss: MSE(y, ŷ).
+
+We next present numerical results on two tasks with variable spatial and temporal complexity,
+demonstrating that gradient flossing before training improves the trainability of Vanilla RNNs. We
+call gradient flossing before training in the following preflossing. For preflossing, we first initialize the
+ Pk
+network randomly, then minimize Lflossing = i=1 λ2i using the ADAM optimizer and subsequently
+train on the tasks. We deliberately do not use sequential MNIST or similar toy tasks commonly used
+to probe exploding/vanishing gradients, because we want a task where the structure of long-range
+dependencies in the data is transparent and can be varied as desired.
+First, we consider the delayed copy task, where a scalar stream of random input numbers x must be
+reproduced by the output y delayed by d time steps, i.e. yt = xt−d . Although the task itself is trivial
+and can be solved even by a linear network through a delay line (see appendix E), RNNs encounter
+vanishing gradients for large delays d during training even with ’critical’ initialization with g = 1.
+Our experiments show that gradient flossing can substantially improve the performance of RNNs
+on this task (Fig 3A, C). While Vanilla RNNs without gradient flossing fail to train reliably beyond
+d = 20, Vanilla RNNs with gradient flossing can be reliably trained for d = 40 (Fig 3C). Note that
+we flossed here k = 40 Lyapunov exponents before training. We will later investigate the role of the
+number of flossed Lyapunov exponents.
+Second, we consider the temporal XOR task, which requires the RNN to perform a nonlinear input-
+output computation on a sequential stream of scalar inputs, i.e., yt = |xt−d/2 − xt−d |, where d
+denotes a time delay of d time steps (For details see appendix H). Fig 3D demonstrates that gradient
+flossing helps to train networks on a substantially longer delay d. We found similar improvements
+through gradient flossing for RNNs initialized with orthogonal weights (see appendix G).
+
+
+ 7
+ 7 Gradient Flossing During Training
+ A delayed temporal XOR B delayed spatial XOR
+ 100 flossing during training 100
+test accuracy (%) 90 preflossing 90
+
+
+
+
+ test accuracy (%)
+ no flossing
+ 80 80
+ 70 70
+ 60 60
+ 50 50
+ 0 2000 4000 6000 8000 10000 0 2000 4000 6000 8000 10000
+ Epochs Epochs
+ C D
+ 100 100
+ 90 90
+test accuracy (%)
+
+
+
+
+ test accuracy (%)
+ 80 80 flossing during training
+ preflossing
+ 70 70 no flossing
+ 60 60
+ 50 50
+ 10 20
+ 30 40 50 60 70 10 20 30 40 50 60 70
+ complexity (delay d) complexity (delay d)
+Figure 4: Gradient flossing during training further improves trainability
+A) Test accuracy for Vanilla RNNs trained on delayed temporal binary XOR task yt = xt−d/2 ⊕ xt−d
+with gradient flossing during training (green), preflossing (gradient flossing before training) (orange),
+and with no gradient flossing (blue) for d = 70. Solid lines are mean across 20 network realizations,
+individual network realizations shown in transparent fine lines. B) Same as A for delayed spatial
+XOR task with yt = x1t−d ⊕ x2t−d ⊕ x3t−d . Parameters (g = 1, batch size b = 16). C) Test accuracy
+as a function of task difficulty (delay d) for delayed temporal XOR task. D) Test accuracy as a
+function of task difficulty (delay d) for delayed spatial XOR task. Parameters: g = 1, batch size
+b = 16, N = 80, epochs = 104 , T = 300, gradient flossing for Ef = 500 epochs on k = 75 before
+training and during training for green lines, and only before training for orange lines. Same plotting
+conventions as previous figure. Task loss: cross-entropy between y and ŷ.
+We next investigate the effects of gradient flossing during the training and find that gradient flossing
+during training can further improve trainability. We trained RNNs on two more challenging tasks
+with variable temporal complexity and performed gradient flossing either both during and before
+training, only before training, or not at all.
+Fig 4A shows the test accuracy for Vanilla RNNs training on the delayed temporal XOR task
+yt = xt−d/2 ⊕ xt−d with random Bernoulli process x ∈ {0, 1}. The accuracy of Vanilla RNNs
+falls to chance level for d ≥ 40 (Fig 4C). With gradient flossing before training, the trainability
+can be improved, but still goes to chance level for d = 70. In contrast, for networks with gradient
+flossing during training, the accuracy is improved to > 80% at d = 70. In this case, we preflossed
+for 500 epochs before task training and again after 500 epochs of training on the task. In Fig 4B,
+D the networks have to perform the nonlinear XOR operation yt = x1t−d ⊕ x2t−d ⊕ x3t−d on a
+three-dimensional binary input signal x1 , x2 , and x3 and generate the correct output with a delay of
+d steps. While the solution of the task itself is not difficult and could even be implemented by hand
+(see appendix), the task is challenging for backpropagation through time because nonlinear temporal
+associations bridging long time horizons have to be formed. Again, we observe that gradient flossing
+before training improves the performance compared to baseline, but starts failing for long delays
+d > 60. In contrast, networks that are also flossed during training can solve even more difficult tasks
+(Fig 4D). We find that after gradient flossing, the norm of the error gradient with respect to initial
+conditions h0 is amplified (appendix C). Interestingly, gradient flossing can also be detrimental to
+task performance if it is continued throughout all training epochs (appendix C)
+We note that merely regularizing the spectral radius of the recurrent weight matrix W or the individual
+one-step Jacobians Ds numerically or based on mean-field theory does not yield such a training
+improvement. This suggests that taking the temporal correlations between Jacobians Ds into account
+is important for improving trainability.
+
+
+ 8
+ 7.1 Gradient Flossing for Different Numbers of Flossed Lyapunov Exponents
+
+We investigated how many Lyapunov exponents k have to be flossed to achieve an improvement in
+training success (Fig 5). We studied this in the binary temporal delayed XOR task with gradient
+flossing during training (same as Fig 3) and varied the task difficulty by changing the delay d.
+We found that as the task becomes more difficult, networks where not enough Lyapunov exponents
+k are flossed begin to fall below 100% test accuracy (Fig 5A). Correspondingly, when measuring
+final test accuracy as a function of the number of flossed Lyapunov exponents, we observed that
+more Lyapunov exponent k have to be flossed to achieve 100% accuracy as the tasks become more
+difficult (Fig 5B). We also show the entire parameter plane of median test accuracy as a function of
+both number of flossed Lyapunov exponents k and task difficulty (delay d), and found the same trend
+(Fig 5B). Overall, we found that tasks with larger delay d require more Lyapunov exponents close to
+zero. We note that this might also partially be caused by the ’streaming’ nature of the task: in our
+tasks, longer delays automatically imply that more values have to be stored as at any moment all the
+values in the ’delay line’ have to be remembered to successfully solve the tasks. This is different from
+tasks where a single variable has to be stored and recalled after a long delay. It would be interesting
+to study tasks where the number of delay steps and the number of items in memory can be varied
+independently.
+Finally, we did the same analysis on networks with only preflossing (gradient flossing before training)
+and found the same trend (supplement Fig 7D), however, in that case even if all N Lyapunov
+exponents were flossed, thus k = N , they were not able to solve the most difficult tasks. This seems
+to indicate that gradient flossing during training cannot be replaced by just gradient flossing more
+Lyapunov exponents before training.
+
+
+
+
+Figure 5: Gradient flossing for different numbers of flossed Lyapunov exponents
+A) Test accuracy for delayed temporal XOR task as a function of delay d with different numbers
+flossed Lyapunov exponents k. B) Same data as A but here test accuracy as a function of number
+of flossed Lyapunov exponents k. Parameters: g = 1, batch size b = 16, N = 80, epochs = 104
+for delayed temporal XOR, epochs = 5000 for delayed spatial XOR, T = 300, gradient flossing
+for Ef = 500 epochs before training and during training for A, B. Shaded areas are 25% and 75%
+percentile, solid lines are means, transparent dots are individual simulations, task loss: cross-entropy
+between y and ŷ.
+8 Limitations
+The mathematical connection between Lyapunov exponents and backpropagation through time
+exploited in gradient flossing is rigorously established only in the infinite-time limit. It would be
+interesting to extend our analysis to finite-time Lyapunov exponents.
+Furthermore, the backpropagation through time gradient involves a sum over products of Jacobians
+of different time periods t − τ , but the Lyapunov exponent only considers the asymptotic longest
+product. Additionally, Lyapunov exponents characterize the asymptotic dynamics on the attractor of
+the dynamics, whereas RNNs often exploit transient dynamics from some initial conditions outside
+or towards the attractor.
+Although our proposed method focuses on exploiting Lyapunov exponents, it neglects the geometry
+of covariant Lyapunov vectors [53], which could be used to improve training performance, speed,
+and reliability. Additionally, it is important to investigate how sensitive the method is to the choice
+of orthonormal basis employed because it is only guaranteed to become unique asymptotically [54].
+
+
+ 9
+ Finally, the computational cost of our method scales with O(N k 2 ), where N is the network size
+and k is the number of Lyapunov exponents calculated. To reduce the computational cost, we
+suggest doing QR decomposition only sufficiently often to ensure that the orthonormal system is
+not ill-conditioned and using gradient flossing only intermittently or as pretraining. One could also
+calculate the Lyapunov spectrum for a shorter time interval or use a cheaper proxy for the Lyapunov
+spectrum and investigate more efficient gradient flossing schedules.
+
+
+9 Discussion
+
+We tackle the problem of gradient signal propagation in recurrent neural networks through a dy-
+namical systems lens. We introduce a novel method called gradient flossing that addresses the
+problem of gradient instability during training. Our approach enhances gradient signal stability both
+before and during training by regularizing Lyapunov exponents. By keeping the long-term Jacobian
+well-conditioned, gradient flossing optimizes both training accuracy and speed. To achieve this,
+we combine established dynamical systems methods for calculating Lyapunov exponents with an
+analytical pullback of the QR factorization. This allows us to establish and maintain gradient stability
+in a in a manner that is memory-efficient, numerically stable, and exact across long time horizons.
+Our method is applicable to arbitrary RNN architectures, nonlinearities, and also neural ODEs [55].
+Empirically, pre-training with gradient flossing enhances both training speed and accuracy. For
+difficult temporal credit assignment problems, gradient flossing throughout training further enhances
+signal propagation. We also demonstrate the versatility of our method on a set of synthetic tasks
+with controllable time-complexity and show that it can be combined with other approaches to tackle
+exploding and vanishing gradients, such as dynamic mean-field theory for initialization, orthogonal
+initialization and specialized single units, such as LSTMs.
+Prior research on exploding and vanishing gradients mainly focused on selecting network architectures
+that are less prone to exploding/vanishing gradients or finding parameter initializations that provide
+well-conditioned gradients at least at the beginning of training. Our introduced gradient flossing can
+be seen as a complementary approach that can further enhance gradient stability throughout training.
+Compared to the work on picking good parameter initializations based on random matrix theory [41]
+and mean-field heuristics [40], gradient flossing provides several improvements: First, mean-field
+theory only considers the gradient flow at initialization, while gradient flossing can maintain gradient
+flow and well-conditioned Jacobians throughout the training process. Second, random matrix theory
+and mean-field heuristics are usually confined to the limit of large networks [52], while gradient
+flossing can be used for networks of any size. The link between Lyapunov exponents and the gradients
+of backpropagation through time has been described previously [9, 12] and has been spelled out
+analytically and studied numerically [10, 11, 56, 57, 58]. In contrast, we use Lyapunov exponents
+here not only as a diagnostic tool for gradient stability but also to show that they can directly be part
+of the cure for exploding and vanishing gradients.
+Future investigations could delve further into the roles of the second to N th Lyapunov exponents
+in trainability, and how it is related to the task at hand, the rank of the parameter update, the dimen-
+sionality of the solution space, as well as the network dynamics (see also [32, 59, 60]). Our results
+suggest a trade-off between trainability across long time horizons and the nonlinear task demands
+that is worth exploring in more detail (appendix C). Applying gradient flossing to real-time recurrent
+learning and its biologically plausible variants is another avenue [61]. Extending gradient flossing to
+feedforward networks, state-space models and transformers is a promising avenue for future research
+(see also [62, 63]). While Lyapunov exponents are only strictly defined for dynamical systems, such
+as maps or flows that are endomorphisms, the long-term Jacobian of deep feedforward networks
+could be treated similarly. This could also provide a link between the stability of the network against
+adversarial examples and its dynamic stability, as measured by Lyapunov exponents. Given that
+time-varying input can suppress chaos in recurrent networks [9, 12, 64, 65, 66, 67], we anticipate they
+may exacerbate vanishing gradients. Gradient flossing could also be applied in neural architecture
+search, to identify and optimize trainable networks. Finally, gradient flossing is applicable to other
+model parameters, as well. For instance, gradients of Lyapunov exponents with respect to single-
+unit parameters could optimize the activation function and single-neuron biophysics in biologically
+plausible neuron models.
+
+
+
+ 10
+ Acknowledgments and Disclosure of Funding
+I thank E. Izhikevich, F. Wolf, S. Goedeke, J. Lindner, L.F. Abbott, L. Logiaco, M. Schottdorf, G.
+Wayne and P. Sokol for fruitful discussions and J. Stone, L.F. Abbott, M. Ding, O. Marshall, S.
+Goedeke, S. Lippl, M.P. Puelma-Touzel, J. Lindner and the reviewers for feedback on the manuscript.
+I thank Jinguo Liu for the Julia package BackwardsLinalg.jl. Research supported by NSF NeuroNex
+Award (DBI-1707398), the Gatsby Charitable Foundation (GAT3708), the Simons Collaboration for
+the Global Brain (542939SPI), and the Swartz Foundation (2021-6).
+
+References
+ [1] P. WERBOS. Beyond Regression :. Ph. D. dissertation, Harvard University, 1974.
+ [2] DB Parker. Learning-logic (TR-47). Center for Computational Research in Economics and Management
+ Science. MIT-Press, Cambridge, Mass, 8, 1985.
+ [3] Y. LECUN. Une procedure d’apprentissage ponr reseau a seuil asymetrique. Proceedings of Cognitiva 85,
+ pages 599–604, 1985.
+ [4] David E. Rumelhart, Geoffrey E. Hinton, and Ronald J. Williams. Learning representations by back-
+ propagating errors. Nature, 323(6088):533, October 1986.
+ [5] Sepp Hochreiter. Untersuchungen zu dynamischen neuronalen Netzen. PhD thesis, 1991.
+ [6] Sepp Hochreiter and Jürgen Schmidhuber. Long Short-Term Memory. Neural Computation, 9(8):1735–
+ 1780, 1997.
+ [7] Y. Bengio, P. Simard, and P. Frasconi. Learning long-term dependencies with gradient descent is difficult.
+ IEEE Transactions on Neural Networks, 5(2):157–166, March 1994.
+ [8] Razvan Pascanu, Tomas Mikolov, and Yoshua Bengio. On the difficulty of training Recurrent Neural
+ Networks. arXiv:1211.5063 [cs], November 2012. arXiv: 1211.5063.
+ [9] Rainer Engelken, Fred Wolf, and L. F. Abbott. Lyapunov spectra of chaotic recurrent neural networks.
+ arXiv:2006.02427 [nlin, q-bio], June 2020. arXiv: 2006.02427.
+[10] Jonas Mikhaeil, Zahra Monfared, and Daniel Durstewitz. On the difficulty of learning chaotic dynamics
+ with RNNs. Advances in Neural Information Processing Systems, 35:11297–11312, December 2022.
+[11] Il Memming Park, Ábel Ságodi, and Piotr Aleksander Sokół. Persistent learning signals and working
+ memory without continuous attractors. ArXiv, page arXiv:2308.12585v1, August 2023.
+[12] Rainer Engelken, Fred Wolf, and L. F. Abbott. Lyapunov spectra of chaotic recurrent neural networks.
+ Physical Review Research, 5(4):043044, October 2023.
+[13] J. P. Eckmann and D. Ruelle. Ergodic theory of chaos and strange attractors. Reviews of Modern Physics,
+ 57(3):617–656, July 1985.
+[14] Arkady Pikovsky and Antonio Politi. Lyapunov Exponents: A Tool to Explore Complex Dynamics.
+ Cambridge University Press, Cambridge, February 2016.
+[15] Kyunghyun Cho, Bart van Merrienboer, Dzmitry Bahdanau, and Yoshua Bengio. On the Properties of
+ Neural Machine Translation: Encoder-Decoder Approaches. Technical report, September 2014. ADS
+ Bibcode: 2014arXiv1409.1259C Type: article.
+[16] Razvan Pascanu, Tomas Mikolov, and Yoshua Bengio. On the difficulty of training recurrent neural
+ networks. In Proceedings of the 30th International Conference on International Conference on Machine
+ Learning - Volume 28, ICML’13, pages III–1310–III–1318, Atlanta, GA, USA, June 2013. JMLR.org.
+[17] Tomáš Mikolov. Statistical language models based on neural networks. Ph.D. thesis, Brno University of
+ Technology, Faculty of Information Technology, Brno, CZ, 2012.
+[18] Ian Goodfellow, Yoshua Bengio, and Aaron Courville. Deep Learning. MIT Press, November 2016.
+ Google-Books-ID: omivDQAAQBAJ.
+[19] Sergey Ioffe and Christian Szegedy. Batch Normalization: Accelerating Deep Network Training by
+ Reducing Internal Covariate Shift. In Proceedings of the 32nd International Conference on Machine
+ Learning, pages 448–456. PMLR, June 2015.
+
+
+ 11
+ [20] Bo Chang, Minmin Chen, Eldad Haber, and Ed H. Chi. AntisymmetricRNN: A Dynamical System View
+ on Recurrent Neural Networks. International Conference on Learning Representations, December 2018.
+
+[21] Andrew M. Saxe, James L. McClelland, and Surya Ganguli. Exact solutions to the nonlinear dynamics of
+ learning in deep linear neural networks. arXiv:1312.6120 [cond-mat, q-bio, stat], December 2013. arXiv:
+ 1312.6120.
+
+[22] Martin Arjovsky, Amar Shah, and Yoshua Bengio. Unitary Evolution Recurrent Neural Networks. In
+ Proceedings of The 33rd International Conference on Machine Learning, pages 1120–1128. PMLR, June
+ 2016.
+
+[23] Kyle Helfrich, Devin Willmott, and Qiang Ye. Orthogonal Recurrent Neural Networks with Scaled Cayley
+ Transform. In Proceedings of the 35th International Conference on Machine Learning, pages 1969–1978.
+ PMLR, July 2018.
+
+[24] T. Konstantin Rusch and Siddhartha Mishra. Coupled Oscillatory Recurrent Neural Network (coRNN):
+ An accurate and (gradient) stable architecture for learning long time dependencies. arXiv e-prints, page
+ arXiv:2010.00951, October 2020.
+
+[25] N. Benjamin Erichson, Omri Azencot, Alejandro Queiruga, Liam Hodgkinson, and Michael W. Mahoney.
+ Lipschitz Recurrent Neural Networks. International Conference on Learning Representations, January
+ 2021.
+
+[26] Antonio Orvieto, Samuel L Smith, Albert Gu, Anushan Fernando, Caglar Gulcehre, Razvan Pascanu, and
+ Soham De. Resurrecting Recurrent Neural Networks for Long Sequences. Technical report, March 2023.
+ ADS Bibcode: 2023arXiv230306349O Type: article.
+
+[27] Herbert Jaeger. The “echo state” approach to analysing and training recurrent neural networks-with an
+ erratum note. Bonn, Germany: German National Research Center for Information Technology GMD
+ Technical Report, 148:34, 2001.
+
+[28] Mustafa C. Ozturk, Dongming Xu, and José C. Príncipe. Analysis and Design of Echo State Networks.
+ Neural Computation, 19(1):111–138, January 2007.
+
+[29] Rupesh K Srivastava, Klaus Greff, and Jürgen Schmidhuber. Training Very Deep Networks. In Advances
+ in Neural Information Processing Systems, volume 28. Curran Associates, Inc., 2015.
+
+[30] Julian Georg Zilly, Rupesh Kumar Srivastava, Jan Koutnik, and Jürgen Schmidhuber. Recurrent Highway
+ Networks. In Proceedings of the 34th International Conference on Machine Learning, pages 4189–4198.
+ PMLR, July 2017.
+
+[31] Piotr A. Sokół, Ian Jordan, Eben Kadile, and Il Memming Park. Adjoint Dynamics of Stable Limit Cycle
+ Neural Networks. In 2019 53rd Asilomar Conference on Signals, Systems, and Computers, pages 884–887,
+ November 2019. ISSN: 2576-2303.
+
+[32] Piotr A. Sokół. Geometry of Learning and Representations in Neural Networks. PhD thesis, Stony Brook
+ University, May 2023.
+
+[33] Xavier Glorot, Antoine Bordes, and Yoshua Bengio. Deep Sparse Rectifier Neural Networks. volume 15,
+ pages 315–323, April 2011.
+
+[34] Ben Poole, Subhaneil Lahiri, Maithra Raghu, Jascha Sohl-Dickstein, and Surya Ganguli. Exponential
+ expressivity in deep neural networks through transient chaos. arXiv:1606.05340 [cond-mat, stat], June
+ 2016. arXiv: 1606.05340.
+
+[35] Minmin Chen, Jeffrey Pennington, and Samuel S. Schoenholz. Dynamical Isometry and a Mean Field
+ Theory of RNNs: Gating Enables Signal Propagation in Recurrent Neural Networks. arXiv:1806.05394
+ [cs, stat], August 2018. arXiv: 1806.05394.
+
+[36] Boris Hanin and Mihai Nica. Products of Many Large Random Matrices and Gradients in Deep Neural
+ Networks. December 2018.
+
+[37] Samuel S. Schoenholz, Justin Gilmer, Surya Ganguli, and Jascha Sohl-Dickstein. Deep Information
+ Propagation. November 2016.
+
+[38] Boris Hanin and David Rolnick. How to Start Training: The Effect of Initialization and Architecture. In
+ Advances in Neural Information Processing Systems, volume 31. Curran Associates, Inc., 2018.
+
+
+ 12
+ [39] Piotr A. Sokol and Il Memming Park. Information Geometry of Orthogonal Initializations and Training.
+ Technical report, October 2018. ADS Bibcode: 2018arXiv181003785S Type: article.
+[40] Dar Gilboa, Bo Chang, Minmin Chen, Greg Yang, Samuel S. Schoenholz, Ed H. Chi, and Jeffrey
+ Pennington. Dynamical Isometry and a Mean Field Theory of LSTMs and GRUs. January 2019.
+[41] Tankut Can, Kamesh Krishnamurthy, and David J. Schwab. Gating creates slow modes and controls
+ phase-space complexity in GRUs and LSTMs. arXiv:2002.00025 [cond-mat, stat], January 2020. arXiv:
+ 2002.00025.
+[42] Valery Iustinovich Oseledets. A multiplicative ergodic theorem. Characteristic Ljapunov, exponents of
+ dynamical systems. Trudy Moskovskogo Matematicheskogo Obshchestva, 19:179–210, 1968.
+[43] Karlheinz Geist, Ulrich Parlitz, and Werner Lauterborn. Comparison of Different Methods for Computing
+ Lyapunov Exponents. Progress of Theoretical Physics, 83(5):875–893, May 1990.
+[44] Giancarlo Benettin, Luigi Galgani, Antonio Giorgilli, and Jean-Marie Strelcyn. Lyapunov Characteristic
+ Exponents for smooth dynamical systems and for hamiltonian systems; A method for computing all of
+ them. Part 2: Numerical application. Meccanica, 15(1):21–30, March 1980.
+[45] Hai-Jun Liao, Jin-Guo Liu, Lei Wang, and Tao Xiang. Differentiable Programming Tensor Networks.
+ Physical Review X, 9(3):031041, September 2019. arXiv:1903.09650 [cond-mat, physics:quant-ph].
+[46] S. F. Walter and L. Lehmann. Algorithmic Differentiation of Linear Algebra Functions with Application
+ in Optimum Experimental Design (Extended Version). Technical report, January 2010. ADS Bibcode:
+ 2010arXiv1001.1654W Type: article.
+[47] Robert Schreiber and Charles Van Loan. A Storage-Efficient $WY$ Representation for Products of
+ Householder Transformations. SIAM Journal on Scientific and Statistical Computing, 10(1):53–57, January
+ 1989.
+[48] Matthias Seeger, Asmus Hetzel, Zhenwen Dai, Eric Meissner, and Neil D. Lawrence. Auto-Differentiating
+ Linear Algebra. Technical report, October 2017. ADS Bibcode: 2017arXiv171008717S Type: article.
+[49] Jeffrey Pennington, Samuel Schoenholz, and Surya Ganguli. Resurrecting the sigmoid in deep learning
+ through dynamical isometry: theory and practice. In Advances in Neural Information Processing Systems,
+ volume 30. Curran Associates, Inc., 2017.
+[50] Jeffrey Pennington, Samuel S. Schoenholz, and Surya Ganguli. The Emergence of Spectral Universality in
+ Deep Networks. arXiv:1802.09979 [cs, stat], February 2018. arXiv: 1802.09979.
+[51] E. Cheney and David Kincaid. Numerical Mathematics and Computing. Cengage Learning, August 2007.
+ Google-Books-ID: ZUfVZELlrMEC.
+[52] Yasaman Bahri, Jonathan Kadmon, Jeffrey Pennington, Sam S. Schoenholz, Jascha Sohl-Dickstein, and
+ Surya Ganguli. Statistical Mechanics of Deep Learning. Annual Review of Condensed Matter Physics,
+ 11(1):501–528, 2020.
+[53] F. Ginelli, P. Poggi, A. Turchi, H. Chaté, R. Livi, and A. Politi. Characterizing Dynamics with Covariant
+ Lyapunov Vectors. Physical Review Letters, 99(13):130601, September 2007.
+[54] Sergey V. Ershov and Alexei B. Potapov. On the concept of stationary Lyapunov basis. Physica D:
+ Nonlinear Phenomena, 118(3):167–198, July 1998.
+[55] Ricky T. Q. Chen, Yulia Rubanova, Jesse Bettencourt, and David K Duvenaud. Neural Ordinary Differential
+ Equations. In Advances in Neural Information Processing Systems, volume 31. Curran Associates, Inc.,
+ 2018.
+[56] Javed Lindner. Investigating the exploding and vanishing gradients problem with Lyapunov exponents.
+ Master’s thesis, RWTH Aaachen, Aaachen/Juelich, 2021.
+[57] Ryan Vogt, Maximilian Puelma Touzel, Eli Shlizerman, and Guillaume Lajoie. On Lyapunov Exponents
+ for RNNs: Understanding Information Propagation Using Dynamical Systems Tools. Frontiers in Applied
+ Mathematics and Statistics, 8, 2022.
+[58] Kamesh Krishnamurthy, Tankut Can, and David J. Schwab. Theory of Gating in Recurrent Neural Networks.
+ Physical Review X, 12(1):011011, January 2022.
+[59] L. S. Pontryagin. Mathematical Theory of Optimal Processes: The Mathematical Theory of Optimal
+ Processes. Routledge, New York, 1st edition edition, March 1987.
+
+
+ 13
+ [60] Daniel Liberzon. Calculus of Variations and Optimal Control Theory: A Concise Introduction. In Calculus
+ of Variations and Optimal Control Theory. Princeton University Press, December 2011.
+
+[61] Owen Marschall, Kyunghyun Cho, and Cristina Savin. A unified framework of online learning algorithms
+ for training recurrent neural networks. The Journal of Machine Learning Research, 21(1):135:5320–
+ 135:5353, January 2020.
+
+[62] Judy Hoffman, Daniel A. Roberts, and Sho Yaida. Robust Learning with Jacobian Regularization. Technical
+ report, August 2019. ADS Bibcode: 2019arXiv190802729H Type: article.
+[63] Hongyi Zhang, Yann N. Dauphin, and Tengyu Ma. Fixup Initialization: Residual Learning Without
+ Normalization. Technical report, January 2019. ADS Bibcode: 2019arXiv190109321Z Type: article.
+
+[64] L. Molgedey, J. Schuchhardt, and H. G. Schuster. Suppressing chaos in neural networks by noise. Physical
+ Review Letters, 69(26):3717–3719, December 1992.
+
+[65] Kanaka Rajan, L. F. Abbott, and Haim Sompolinsky. Stimulus-dependent suppression of chaos in recurrent
+ neural networks. Physical Review E, 82(1):011903, July 2010.
+
+[66] Jannis Schuecker, Sven Goedeke, David Dahmen, and Moritz Helias. Functional methods for disordered
+ neural networks. arXiv:1605.06758 [cond-mat, q-bio], May 2016. arXiv: 1605.06758.
+
+[67] Rainer Engelken, Alessandro Ingrosso, Ramin Khajeh, Sven Goedeke, and L. F. Abbott. Input correlations
+ impede suppression of chaos and learning in balanced firing-rate networks. PLOS Computational Biology,
+ 18(12):e1010590, December 2022.
+
+[68] Edward Ott. Chaos in Dynamical Systems. Cambridge University Press, August 2002. Google-Books-ID:
+ PfXoAwAAQBAJ.
+[69] Angelo Vulpiani, Fabio Cecconi, and Massimo Cencini. Chaos: From Simple Models to Complex Systems.
+ World Scientific Pub Co Inc, Hackensack, NJ, September 2009.
+
+
+
+
+ 14
+ A Backpropagation Through QR Decomposition
+The backward pass of the QR decomposition is given by [45, 46, 47, 48]
+ Q = Q + Q copyltu(M) R−T
+  
+ (11)
+ T T
+where M = RR − Q Q and the copyltu function generates a symmetric matrix by copying the lower
+triangle of the input matrix to its upper triangle, with the element [copyltu(M )]ij = Mmax(i,j),min(i,j)
+[45, 46, 47, 48]. Adjoint variable are written here as T = ∂L/∂T .
+Using an analytical pullback is more memory-efficient and less computationally costly than directly doing
+automatic differentiation through the QR-decomposition. Moreover, from a practical perspective, for QR
+decomposition, often BLAS/LAPACK routines are utilized which are not amenable to common differentiable
+programming frameworks like TensorFlow, PyTorch, JAX and Zygote. In our implementation of gradient
+flossing, we used the Julia package BackwardsLinalg.jl by Jinguo Liu available at here .
+
+B Further Details and Analysis of Gradient Flossing
+An example implementation of gradient flossing in Flux, a machine learning library in Julia is available here.
+We are actively developing implementations for other widely used differentiable programming frameworks.
+
+B.1 Gradient Flossing for recurrent LSTM and ReLU networks
+
+ A LSTM C ReLU
+ 0.2 target 1 6 target 1
+ actual 1 actual 1
+ 4
+ 0.0
+ 2
+ 1
+
+
+
+
+ 1
+
+
+
+
+ 0.2
+ 0
+ 0.4
+ 2
+ 0 20 40 60 80 100 0 20 40 60 80 100
+ Epochs Epochs
+ B D 101
+ 10 1
+ 10 2 10 1
+ 10 3
+ 2
+
+
+
+
+ 2
+ 1
+
+
+
+
+ 1
+
+
+
+
+ 10 4 10 3
+ 10 5
+ 10 6 10 5
+ 0 20 40 60 80 100 0 20 40 60 80 100
+ Epochs Epochs
+
+
+Figure 6: Gradient flossing for recurrent LSTM networks and recurrent ReLU networks A) First
+Lyapunov exponent of LSTM network as a function of training epochs. Minimizing the mean
+squared error between estimated first Lyapunov exponent and target Lyapunov exponent λ1 = 0
+by gradient descent. First Lyapunov exponent of LSTM network (solid lines) converges to target
+value (thick dashed lines) within less than 100 epochs. 10 random LSTM RNNs were initialized with
+Gaussian recurrent weights, where standard deviations of weight scaling were drawn g ∼ Unif(0, 1).
+B) Gradient flossing minimizes the square of the first Lyapunov exponent of random recurrent
+LSTM networks over epochs. C) Same as A for recurrent ReLU network. Here networks were
+initialized with Gaussian recurrent weights Wij ∼ N (−0.1, g 2 /N ) where values of g were drawn
+g ∼ Unif(0, 1) D) B) for recurrent ReLU network. Parameters: network size N = 32 with 10
+network realizations. Shaded regions in B, D are 25% and 75% percentiles, solid line shows median.
+
+We demonstrate that gradient flossing can also be applied to recurrent LSTM and ReLU networks in Fig 6. To
+this end, we generated random LSTM networks where the weights of all the different gates and biases were
+
+
+ 15
+ independently and identically distributed (i.i.d.) and sampled from Gaussian distributions of different variance.
+Our results show that gradient flossing can also constrain the Lyapunov exponent to be close to zero. The
+dynamics of each of the N LSTM units follows the map [6]:
+ ft = σg (Uf ht−1 + Wf xt + bf ) (12)
+ ot = σg (Uo ht−1 + Wo xt + bo ) (13)
+ it = σg (Ui ht−1 + Wi xt + bi ) (14)
+ c̃t = σh (Uc ht−1 + Wc xt + bc ) (15)
+ ct = ft ⊙ ct−1 + it ⊙ c̃t (16)
+ ht = ot ⊙ ϕ(ct ) (17)
+ 1
+where ⊙ denotes the Hadamard product, σg (x) = 1+exp(−x) is the sigmoid function, σh (x) = tanh(x) and
+ 2
+entries of the matrices Ux are drawn from Ux ∼ N (0, gx /N ). For simplicity, the bias terms bx are scalars.
+Subscripts f , o and i denote respectively the forget gate, the output gate, the input gate, and c is the cell state.
+In each LSTM unit, there are two dynamic variables c and h, and three gates f , o, and i that control the flow
+if signals into and out of the cell c. We set the values gih , gix , gf x , bf , gch , gcx , gcx , gox to be uniformly
+distributed between 0 and 1 and initialize bi , gf h ,bc ,b0 as zero.
+During gradient flossing, the actual Lyapunov exponents of different random network realizations converge close
+to the target Lyapunov exponent λtarget
+ 1 = 0 in fewer than 100 epochs as shown in Fig 6A. Fig 6B shows that
+the squared Lyapunov exponents converge towards zero. We note that for LSTM networks, a target Lyapunov
+exponent of λtarget
+ 1 = −1 is achieved after 100 gradient flossing steps only for a subset of random network
+realizations (not shown). We speculate that behavior is influenced by the gating structure of LSTM units,
+which seems to naturally place the first Lyapunov exponent close to zero for certain initializations (See also
+[9, 12, 41, 58]).
+For the recurrent ReLU networks, we considered the same Vanilla RNN dynamics as in the main manuscript in
+Eq 9
+ hs+1 = f (hs , xs+1 ) = Wϕ(hs ) + Vxs+1 ,
+The initial entries of W are drawn independently from a Gaussian distribution with a negative mean of −0.1
+and variance g 2 /N , where g is a gain parameter that controls the heterogeneity of weights. We use the transfer
+function ϕ(x) = max(x, 0). xs is a sequence of inputs and V is the input weight. xs is a stream of i.i.d.
+Gaussian input xs ∼ N (0, 1) and the input weights V are N (0, 1). Both W and V are trained during gradient
+flossing. We found that some ReLU network had initially unstable dynamics with positive Lyapunov exponents
+Fig 6C. However, during gradient flossing, these unstable networks were quickly stabilized. Fig 6D shows that
+the squared Lyapunov exponents of ReLU networks converge towards zero.
+
+B.2 Additional Results for Different Numbers of Flossed Lyapunov Exponents
+Additionally to the main Fig 5, we did the same analysis on networks with only preflossing (gradient flossing
+before training) and found that more Lyapunov exponent k have to be flossed to achieve 100% accuracy as the
+tasks become more difficult (Fig 7D), however, in that case even if all N Lyapunov exponents were flossed, thus
+k = N , they were not able to solve the most difficult tasks. This seems to indicate that gradient flossing during
+training cannot be replaced by just gradient flossing more Lyapunov exponents before training.
+
+
+C Additional Results on Gradient Flossing Throughout Training
+We now discuss some additional results on gradient flossing throughout training. First, we analyze how gradient
+flossing affects the gradients and find that during gradient flossing, the norm of gradients that bridge many time
+steps are boosted. Moreover, subordinate singular values of the error norm of the recurrent weights are also
+boosted, indicating that gradient flossing can increase the effective rank of the parameter update. Additionally,
+we show that if gradient flossing is continued throughout training it can be detrimental to the accuracy. Finally,
+we show that Lyapunov exponents of successfully trained networks after training for the spatial delayed XOR
+task have a simple relationship to the delay d.
+
+
+D Gradient Flossing boosts the Gradient Norm for Long Time Horizons
+In this section, we investigate the impact of gradient flossing on the norm and structure of the gradient. It is
+important to note that the complete error gradient of backpropagation through time is composed of a summation
+of products of one-step Jacobians, reflecting the number of "loops" the error signal traverses through the recurrent
+dynamics before reaching its target. Consequently, when the singular values of the long-term Jacobian are
+smaller than 1, the influence of the shorter loops typically dominates the long-term Jacobian.
+
+
+ 16
+ A B
+ 100 100
+ k
+ test accuracy (%)
+
+
+
+
+ test accuracy (%)
+ 80 1 80 delay
+ 20 30
+ 40 50
+ 60 70
+ 60 80 60
+
+ 20 40 60 0 20 40 60 80
+ complexity (delay d) number of flossed i k
+ C 100 D 100
+ 70 70
+ complexity (delay d)
+
+
+
+
+ complexity (delay d)
+ test accuracy (%)
+
+
+
+
+ test accuracy (%)
+ 60 60
+ 50 50
+ 40 40
+ 30 30
+ 20 20
+ 10 10
+ 1 10 20 30 40 50 60 70 80 50 1 10 20 30 40 50 60 70 80 50
+ number of flossed i k number of flossed i k
+
+
+Figure 7: Gradient flossing for different numbers of flossed Lyapunov exponents
+A) Test accuracy for delayed temporal XOR task as a function of delay d with different numbers
+flossed Lyapunov exponents k. B) Same data as A but here test accuracy as a function of number of
+flossed Lyapunov exponents k. C) Median test accuracy for delayed temporal XOR task as a function
+of delay d and k for networks with gradient flossing during training (500 steps of gradient flossing at
+epochs e ∈ {0, 100, 200, 300, 400}). D)Same as B for preflossing only. Parameters: g = 1, batch
+size b = 16, N = 80, epochs = 104 for delayed temporal XOR, epochs = 5000 for delayed spatial
+XOR, T = 300, gradient flossing for Ef = 500 epochs before training and during training for A, B,
+C, and only before training for C. Shaded areas are 25% and 75% percentiles, solid lines are means,
+transparent dots are individual simulations, task loss: cross-entropy btw. y, ŷ.
+
+
+In our tasks, we have full control over the correlation structure of the task and thus know exactly which loop
+length of backpropagation through time is necessary for finding the correct association. We were moreover
+careful in our task design not to have any additional signals in our task that might help to bridge the long time
+scale. In the case of vanishing gradients, the gradient norm is predominantly influenced by the shorter loops,
+even though the actual signal in the gradient originates solely from the loop of length d in our task. To mitigate
+the contamination of spurious signals from shorter loops and effectively extract the gradient that spans long time
+horizons, we focus on the gradient with respect to the initial conditions h0 .
+ τ =t−1 t−1
+ ! τ =t−1 t−1
+ !
+ ∂Lt ∂Lt X Y ∂hτ ′ +1 ∂hτ ∂Lt X Y ∂hτ ′ +1 ∂Lt
+ = = δτ 0 = Tt (h0 ) (18)
+ ∂h0 ∂ht ′
+ ∂h τ ′ ∂h 0 ∂h t ′
+ ∂h τ ′ ∂ht
+ τ =t−l τ =τ τ =t−l τ =τ
+
+We note that the sum conveniently drops as only the longest ’loop’, in other words, the only summand that
+contributes is the product of Jacobians going from 0 to t. By considering this gradient, we can therefore ensure
+that no undesired signals stemming from shorter loops interfere with the analysis. Moreover, we note that we
+use the binary cross entropy loss which makes the derivative ∂L
+ ∂ht
+ t
+ trivial.
+In Fig 8 we show that gradient flossing boosts the gradient with respect to the initial conditions. Specifically, we
+compare two identical networks trained on the binary delayed temporal XOR task with a loop length of d = 70.
+One network is trained with gradient flossing at epochs e ∈ {0, 100, 200, 300, 400}), while the other is trained
+without gradient flossing.
+
+
+ 17
+ dL
+For the network without gradient flossing, the gradient norm of | dh 0
+ | diminishes to extremely small values
+ −6
+(< 10 ) and remains small throughout training. In contrast, for the network trained with gradient flossing, each
+episode of gradient flossing causes the norm | dh dL
+ 0
+ | to spike, surpassing values larger than 10−2 . These findings
+are direct evidence that gradient flossing boosts the gradient norm, facilitating to bridge long time horizons in
+challenging temporal credit assignment tasks. We observe that after several episodes of gradient flossing, the
+ dL
+gradient | dh 0
+ | of the networks stays around 10−4 and eventually rise up to values around 10−2 . Subsequent
+in training, the test accuracy surpasses chance level (Fig 8B). We observed this temporal relationship between
+ dL
+gradient norm | dh 0
+ | and training success consistently across numerous network realizations (Fig 8C and D).
+ dL
+These findings suggest that the gradient norm | dh 0
+ | can be a good predictor of learning success, sometimes
+hundreds of epochs before the accuracy exceeds the chance level of 50%. Indeed, when depicting the gradient
+norm aligned to the last epoch where accuracy was ≤ 50%, we see for many network realizations a gradual
+growth of gradient norm oven epochs before accuracy surpasses chance level (Fig 9A). Analogously, when
+ dL
+plotting the accuracy as a function of epoch aligned with the last epoch with | dh 0
+ | < 0.001, we observe for this
+ dL
+task that the increase of gradient norm | dh0 | reliably precedes the epoch at which the accuracy surpasses the
+chance level (See Fig 9B). We note that when measuring the overlap of the orientation of the gradient vector
+ dL
+ dh0
+ with the first covariant Lyapunov vector of the forward dynamics, we found a significant increase in overlap
+around the training epoch where the accuracy surpasses the chance level both in networks with and without
+gradient flossing. This does not come as a surprise as the covariant Lyapunov vector measure the most unstable
+(or least stable) direction in the tangent space of a trajectory and perturbations of h0 that have to travel over
+many epochs align
+
+D.1 Gradient Flossing Boosts Effective Dimension of Error Gradient
+To further investigate the effect of gradient flossing on training, we investigated the structure of the error gradient
+ dL
+and how it is changed by gradient flossing. To this end, we decompose the recurrent weight gradient σi dW
+into in weighted sum of outer products using singular value decomposition (Fig 10).
+As the Lyapunov exponents are the time-averaged logarithms of the singular values of the asymptotic long-term
+Jacobian Tt (hτ ), this allows us to directly link the effect of pushing Lyapunov exponents toward zero during
+gradient flossing to the structure of the error gradient of the recurrent weights, as they are intimately linked:
+ τ =t−1 t−1
+ !
+ ∂Lt ∂Lt X Y ∂hτ ′ +1 ∂hτ ∂Lt X ∂hτ
+ = = Tt (hτ ) (19)
+ ∂W ∂ht ′
+ ∂h τ ′ ∂W ∂h t
+ τ
+ ∂W
+ τ =t−l τ =τ
+
+We again note that different ’loops’ contribute to the total gradient expression and the Lyapunov exponents only
+characterize the longest loop. Further, we note that in our controlled tasks, depending on delay d, only few of the
+summands are relevant for solving the task. We thus expect the relevant gradient summand that carries important
+signals about the task to be contaminated by summands of both shorter and longer chains, which contribute
+irrelevant fluctuations.
+ dL
+ 
+The singular values of the recurrent weight gradient σi dW as a function of training epoch reveal that the
+subordinate singular values subordinate singular value σ20 and σ40 exhibit peaks at the times of gradient flossing,
+while the first singular value σ1 only shows a slight peak (Fig 10A). This indicates that gradient flossing
+ dL
+increases the effective rank of the recurrent weight gradient dW . In other words, gradient flossing facilitates
+high-dimensional parameter updates. Our interpretation is, as gradient flossing pushes Lyapunov exponents
+to zero, the different summands in the total gradient contribute more equitable as long loops have neither a
+dominant contribution (which would happen for exploding gradients) nor a vanishing contribution (which would
+happen for vanishing gradients). This way, the sum of gradient terms has a higher effective rank.
+In contrast, without gradient flossing, the subordinate singular values (in Fig 10A σ20 and σ40 ) rapidly diminish
+to extremely small values over training epochs and remain very small throughout training. Note however that the
+leading singular values σ1 are of comparable size irrespective whether gradient flossing was performed or not.
+ dL
+We note that similar to the gradient norm of the loss with respect to the initial condition | dh 0
+ |, the subordinate
+singular values seem to predict when the test accuracy of networks with gradient flossing grows beyond chance
+level (Fig 10B). We confirmed this in multiple other network realizations and give here another example we the
+accuracy grows beyond chance only later during training (Fig 11).
+
+D.2 Gradient Flossing Throughout Training Can Be Detrimental
+We find that gradient flossing continued throughout all training epochs can be detrimental for performance
+(Fig 12). We demonstrate this again in the binary delayed temporal XOR task. We compare three different
+conditions: Either, we floss throughout the training every 100 training epochs for 500 flossing epochs (red), or
+we floss only early during training at training epochs e ∈ {0, 100, 200, 300, 400})(green) or we do not floss at
+all (blue).
+
+
+ 18
+ A C
+ 10 2 0.05
+ 10 4 0.04
+ |dhd 0 | 10 6 0.03
+
+
+
+
+ |dhd 0 |
+ 10 8 0.02
+ 10 10 0.01
+ with flossing during training
+ 10 12 without flossing
+ 0.00
+ 0 500 1000 1500 2000 2500 3000 0 500 1000 1500 2000 2500 3000
+ Epochs Epochs
+ B D
+ 100 100
+ accuracy (%)
+
+
+
+
+ accuracy (%)
+ with flossing during training
+ without flossing
+
+
+ 50 50
+ 0 500 1000 1500 2000 2500 3000 0 500 1000 1500 2000 2500 3000
+ Epochs Epochs
+ dL
+Figure 8: Gradient flossing boosts norm of long-term Jacobian A) Gradient norm of | dh 0
+ | as
+a function of training epochs for networks without flossing (blue) and networks with flossing
+during training (orange). Error gradient norm is boosted after gradient flossing at epochs e ∈
+ dL
+{0, 100, 200, 300, 400, 500}). In networks without gradient flossing, the gradient norms | dh 0
+ | are
+much smaller overall. One out of ten random network realizations with solid line, the other 9 with
+transparent line. B) Accuracy as a function of epoch, same depiction and network realizations as in
+A. Note that accuracy of networks with gradient flossing grows beyond chance level approximately
+ dL
+when the gradient norm | dh 0
+ | becomes macroscopically large. C) Same as A in linear scale. Mean
+final test loss as a function of task difficulty (delay d) for delayed copy task. Different colors are
+different network realizations with gradient flossing during training. Black lines are without any
+gradient flossing. D) Accuracy as a function of epochs, same colors as in C. Note that for all network
+ dL
+realizations the moments where gradient norm | dh 0
+ | becomes macroscopically large coincides with
+the moment the accuracy is beyond chance level. Parameters: g = 1, batch size b = 16, N = 80,
+epochs = 104 , T = 300, flossing for Ef = 500 epochs on k = 75 Lyapunov exponents before
+training. Task: binary delayed XOR, delay d = 70, loss: cross entropy(y, ŷ).
+
+
+We observe that after every episode of gradient flossing, the accuracy drops down close to chance level of 50%
+(Fig 12A red line). Between flossing, the accuracy quickly recovers but never reaches 100%. Simultaneously,
+ dL
+when the accuracy drops the test error jumps up (Fig 12B). We also observed that the gradient norm | dh 0
+ | is
+ dL
+initially boosted by gradient flossing, but stays close to indistinguishable once the gradient norm | dh0 | becomes
+macroscopically large (Fig 12C). This suggests that once gradient flossing facilitates signal propagation across
+long time horizons and the network picks up the relevant gradient signal, further gradient flossing can be harmful
+to the actual task execution. We hypothesize that there might be (at least for the Vanilla networks considered
+here), a trade-off between the ability to bridge long time scales which seems to require one or several Lyapunov
+exponents of the forward dynamics close to zero and nonlinear tasks requirements, which require at least a
+fraction of the units to be in the nonlinear regime of the nonlinearity ϕ, where ϕ′ (x) < 1. It would be an
+interesting future research avenue to further investigate this potential trade-off also in other network architectures.
+
+D.3 Lyapunov Exponents after Training With and Without Gradient Flossing
+In Fig 13, we show the first (Fig 13A, B) and the tenth (Fig 13C, D) Lyapunov exponent after training on
+the spatial delayed XOR task both with and without gradient flossing. We find for successful networks with
+gradient flossing a systematic relationship between the first Lyapunov exponent and the delay, that can be fitted
+by approximately by λ1 (d) = −0.2exp.(−0.03delay). Unsuccessful networks with accuracy at chance level
+have a much smaller largest Lyapunov exponent. The same seems to hold true for the tenth Lyapunov exponent.
+In a previous study [57], a similar trend was observed, albeit in the context of a task that did not possess an
+
+
+ 19
+ A B 80
+
+ 75
+ 10 2
+ 70
+ 10 3
+
+
+
+
+ accuracy (%)
+ 65
+ |dhd 0 |
+
+
+
+ 10 4 60
+ 55
+ 10 5
+ 50
+ 45
+ 3000 2000 1000 0 1000 0 100 200 300 400
+ Epochs Epochs
+
+
+Figure 9: Increase of gradient norm precedes epoch when accuracy exceeds chance level
+ dL
+A) Gradient norm of | dh 0
+ | as a function of training epochs for 20 network realizations with flossing
+during training. Epochs are aligned to last epoch where accuracy is ≤ 50%. B) Same task and
+simulations as in A, but here accuracy as a function of epoch, for 20 network realizations with
+ dL
+flossing during training. Epochs are aligned to the last epoch with | dh 0
+ | < 0.001. Different colors
+are different network realizations. Parameters: g = 1, batch size b = 16, N = 80, epochs = 104 ,
+T = 300, flossing for Ef = 500 epochs on k = 75 Lyapunov exponents early during training at
+training epochs e ∈ {0, 100, 200, 300, 400}. Task: binary delayed XOR, delay d = 70, loss: cross
+entropy(y, ŷ).
+
+
+analytically tractable temporal correlation structure, which might partially explain the less conclusive results.
+It is important to note that the numerical evaluation of Lyapunov exponents in recurrent LSTM networks in
+[57] was based solely on the N × N Jacobian of the memory state. From a dynamical systems standpoint, a
+2N × 2N Jacobian matrix encompassing interactions between both memory and cell states into account is
+required [9, 12, 58].
+
+
+E Gradient Flossing for Linear Network
+We provide code for gradient flossing in linear networks here. We find that gradient flossing also helps to train
+linear networks on tasks with many time steps that can be solved by linear networks, for example the copy task,
+but not for tasks the require a nonlinear input-output operation like the temporally delayed XOR task. Full
+analytical description of gradient flossing for linear networks would be a promising avenue for future research as
+networks with linear dynamics can still have nonlinear learning dynamics [21]. However this is beyond the cope
+of the presented work.
+
+
+F Computational Complexity of Gradient Flossing
+We present here a more in-depth scaling analysis of the computational cost of gradient flossing. There are three
+main contributors to the computational cost (table 1): First the RNN step, which has a computational complexity
+of O N 2 b per time step, where N is the dimension of the recurrent network state (which in case of Vanilla
+ 
+networks equals the number of units) and b is the batch-size both in the forward and backward pass. Second, the
+Jacobian step which scales with O N 2 k per time step, where k is the number of flossed Lyapunov exponents.
+Third, the QR decomposition, which scales with O N k2 , where k is the number of Lyapunov exponents
+ 
+considered.
+Together, this results in a total amortized cost of O N 2 b T per training epoch, where T is the number of
+ 
+
+training time steps and a total amortized costs per flossing epoch of O N 2 Tf (1 + k/tONS + k) where Tf is
+ 
+the number of flossing time steps.
+
+
+ 20
+ A
+ 10 2
+ i dW )
+ d
+ (
+
+
+
+ 10 4
+
+
+ 0 500 1000 1500 2000 2500 3000
+ B 100
+ early training flossing
+ without flossing
+ accuracy (%)
+
+
+
+
+ 50
+ 0 500 1000 1500 2000 2500 3000
+ Epochs
+Figure 10: Gradient flossing decreases condition number of recurrent weight error gradient
+ dL
+ 
+A) Singular values of recurrent weight gradient σi dW as a function of training epochs for singular
+values i ∈ 1, 20, 40 for networks without gradient flossing (blue) and early training gradient flossing
+(green). At epochs of gradient flossing, the subordinate singular value σ20 and σ40 are peaked, while
+the first singular value σ1 has only a slight peak. This indicates that gradient flossing increases the
+ dL
+effective rank of the recurrent weight gradient dW . B) Accuracy as a function of training epochs.
+Note that accuracy of networks with gradient flossing grows beyond chance level approximately
+when the subordinate singular values singular value σ20 and σ40 are peaked increase, which enables
+high-dimensional parameters updates. Parameters: g = 1, batch size b = 16, N = 80, epochs = 103 ,
+T = 300, gradient flossing for Ef = 500 epochs on k = 75 Lyapunov exponents before training.
+Task: binary delayed XOR, delay d = 70, loss: cross entropy(y, ŷ).
+
+
+
+
+In case of preflossing, thus, the total computation cost scale with O N 2 [Eb T + Ep Tf (1 + k/tONS + k)] ,
+ 
+where E is the number of training epochs and Ep is the number of preflossing epochs.
+For gradient flossing during training (assuming that there is also preflossing done), the amortized cost scale with
+O N 2 [Eb T + Ep Tp + Ef Tf (1 + k/tONS + k)] , where Ef is the total number of flossing epochs during
+training.
+Empirically, we find that both the number of preflossing epochs Ep and flossing episodes Ef necessary for
+training success is much smaller than the total number of training epochs E. For example, the preflossing for
+500 epochs in the numerical experiment of Fig 3 took ∼ 37 seconds, while the overall training on 10000 training
+epochs with batch size b = 16 took ∼ 1680 seconds. Thus only approximately 2.2% of the total training time
+was spent on gradient flossing. Moreover, Tp can be smaller than T , it just has to be long enough such that the
+temporal correlations in the task can be bridged. In case of the tasks discussed in the manuscript, this would be
+the delay d. It remains an important challenge to infer the suitable number of flossing time steps Tf for tasks
+with unknown temporal correlation structure.
+It would also be interesting to investigate how the CPU hours/wall-clock time/flops/Joule/CO2-emission spent
+on gradient flossing vs on training networks with larger N are trading off against each other. For this, we would
+suggest to first find the smallest network that on median successfully trains on a binary temporal XOR task for
+a fixed given delay d and measure the computational resources involved in training it, e.g. in terms of CPU
+hours. Then compare it to a network with gradient flossing. This would be a promising analysis but is beyond
+our current computational budget. We will start such experiments an might be able to provide results during the
+reviewer period.
+
+
+ 21
+ A
+ 10 2
+ i dW )
+ d
+ (
+
+
+
+ 10 4
+
+
+ 0 500 1000 1500 2000 2500 3000
+ B 100
+ early training flossing
+ without flossing
+ accuracy (%)
+
+
+
+
+ 50
+ 0 500 1000 1500 2000 2500 3000
+ Epochs
+Figure 11: Gradient flossing decreases condition number of recurrent weight error gradient
+Same as Fig 10 for different network realization.
+
+
+
+G Additional controls
+
+We also investigate the effects of gradient flossing during the training with orthogonal weight initializations
+and confirm our finding that gradient flossing improves trainability on tasks that have long time horizons to
+bridge. Moreover, we find that gradient flossing during training can further improve trainability. We replicated
+the two more challenging tasks from the main paper (Fig 4) for orthogonal initialization with variable temporal
+complexity and performed gradient flossing either both during and before training, only before training, or not at
+all.
+Fig 14A shows the test accuracy for Vanilla RNNs with orthogonal initialization trained on the delayed temporal
+XOR task yt = xt−d/2 ⊕ xt−d with random Bernoulli process x ∈ {0, 1}. The accuracy of orthogonal Vanilla
+RNNs falls to chance level for d ≥ 40 (Fig 14C). With gradient flossing before training, the trainability can
+be improved, but still falls close to chance level for d = 70. In contrast, for initially orthogonal networks with
+gradient flossing during training, the accuracy is improved to > 80% at d = 70. In this case, we preflossed for
+500 epochs before task training and again after 500 epochs of training on the task. In Fig 14B, D the networks
+have to perform the nonlinear XOR operation yt = x1t−d ⊕ x2t−d ⊕ x3t−d on a three-dimensional binary input
+signal x1 , x2 , and x3 and generate the correct output with a delay of d steps identical to Fig 4 in the main text.
+Again, we observe similar to networks with Gaussian initialization that flossing before training improves the
+performance compared to baseline, but starts failing for long delays d > 60. In contrast, orthogonal networks
+that are also flossed during training can solve even more difficult tasks (Fig 14D). We note that for Fig 14B and
+D, we trained the network only on 5000 epochs, compared to 10000 epochs in networks with random Gaussian
+initialization because for 10000 epochs, both networks with gradient flossing only before training and with
+gradient flossing before and during training were able to bridge d = 70. These results suggest that orthogonal
+initialization does seem to slightly improve performance for tasks with long time horizons to bridge and gradient
+flossing and additionally boost the performance. Thus orthogonal initialization and gradient flossing seems to go
+well together. It would be interesting to study if orthogonal initialization also reduces the number of gradient
+flossing steps necessary to improve performance.
+
+
+
+H Additional Details on Training Tasks
+
+In this section, we provide a more rigorous definition of the tasks used for training, as discussed in Section 3:
+
+
+ 22
+ A 100
+
+
+ accuracy (%)
+ with flossing throughout training
+ early training flossing
+ without flossing
+ 50
+ 0 500 1000 1500 2000 2500 3000
+ B
+ 100
+ test error
+
+
+
+
+ 10 1
+ 0 500 1000 1500 2000 2500 3000
+ C
+ 10 3 with flossing throughout training
+ early training flossing
+ without flossing
+ |dhd 0 |
+
+
+
+
+ 10 7
+
+ 0 500 1000 1500 2000 2500 3000
+ Epochs
+Figure 12: Gradient flossing throughout training can be detrimental to learning A) Accuracy as a
+function of training epochs for binary temporal delayed XOR task for gradient flossing throughout
+training every 100 training epochs (red). Accuracy drops down close to chance level every time after
+gradient flossing but recovers quickly between. Same for only 5 episodes of gradient flossing at
+epochs e ∈ {0, 100, 200, 300, 400}) (green) and no flossing at all (blue). B) Test error as a function
+ dL
+of training epochs. C) Gradient norm of | dh 0
+ | as a function of training epochs for networks without
+gradient flossing (blue) and networks with flossing throughout training (red) and early training
+gradient flossing (green). Error gradient norm is boosted after each gradient flossing. In networks
+ dL
+without gradient flossing, the gradient norms | dh 0
+ | are much smaller overall. Parameters: g = 1,
+batch size b = 16, N = 80, epochs = 104 , T = 300, gradient flossing for Ef = 500 epochs on
+k = 75 Lyapunov exponents before training. Task: binary delayed XOR, delay d = 70, loss: cross
+entropy(y, ŷ).
+
+
+H.1 Copy task
+For the copy task, the target network readout at time t is yt = xt−d , where d denotes the delay. We chose the
+input to be sampled i.i.d. from a uniform distribution between 0 and 1.
+
+H.2 Temporal XOR task
+The temporal XOR task requires the target network readout yt at time t to be computed as follows:
+ yt = |xt−d/2 − xt−d | (20)
+ 2
+where again d denotes a time delay of d time steps. In the case of x ∈ {0, 1} and y ∈ {0, 1}, the output yt
+follows the truth table of the XOR digital logic gate (Table 2). Thus, the function f (xa , xb ) = |xa − xb | can be
+seen as an analytical representation of the XOR gate. It is important to note that f (x, 0) = x only for x ≥ 0,
+and that this task requires a nonlinearity. The implementation can easily be constructed analytically, for example,
+using two rectified linear units ϕ(x) = max(x, 0) the outbut can be constructed by
+ f (xa , xb ) = |xa − xb | = ϕ(xa − xb ) + ϕ(xb − xa ). (21)
+Together with a delay line to transmit the signal xt−d over time, this can solve the task.
+
+
+ 23
+ A no gradient flossing B gradient flossing during training
+ 0.1
+ 1
+ 0.1
+
+
+
+
+ 1
+ 0.2
+ 0.2
+ 20 40 60 20 40 60
+ complexity (delay d) complexity (delay d)
+ C D
+ 0.1
+ 0.2 0.1
+ 10
+
+
+
+
+ 10
+ 0.3 0.2
+ 20 40 60 20 40 60
+ complexity (delay d) complexity (delay d)
+Figure 13: Lyapunov exponents of trained networks with and without gradient flossing A) First
+Lyapunov exponents λ1 for Vanilla networks trained on spatial delayed XOR task as a function of
+the delay with no gradient flossing. Colored-coded is test accuracy at the end of training where red
+corresponds to 100% accuracy and blue to chance level (50%). B) Same as A for networks with
+gradient flossing during training. Black dashed line shows that Lyapunov exponents of successfully
+trained networks can be approximated by the empirical fit λ1 (d) = −0.2exp.(−0.03delay). (Proto-
+col for gradient flossing during training same as main text Fig 4B). C) Same as A for tenth Lyapunov
+exponents λ10 . D) Same as B for tenth Lyapunov exponents λ10 . Same fit as in B also describes
+λ10 . Parameters: g = 1, batch size b = 16, N = 80, epochs = 104 , T = 300, gradient flossing for
+Ef = 500 epochs on k = 75 Lyapunov exponents before training. Task: binary spatial delayed XOR,
+loss: cross entropy(y, ŷ).
+
+
+I Additional Background on Lyapunov Exponents of RNNs
+
+An autonomous dynamical system is usually defined by a set of ordinary differential equations dh/dt =
+F(h), h ∈ RN in the case of continuous-time dynamics, or as a map hs+1 = f (hs ) in the case of discrete-time
+dynamics. In the following, the theory is presented for discrete-time dynamical systems for ease of notation,
+but everything directly extends to continuous-time systems [43]. Together with an initial condition h0 , the
+map forms a trajectory. As a natural extension of linear stability analysis, one can ask how an infinitesimal
+perturbation h′0 = h0 + ϵu0 evolves in time. Chaotic systems are sensitive to initial conditions; almost all
+infinitesimal perturbations ϵu0 of the initial condition grow exponentially with time |ϵut | ≈ exp(λ1 t)|ϵu0 |.
+Finite-size perturbations, therefore, may lead to a drastically different subsequent behavior. The largest Lyapunov
+exponent λ1 measures the average rate of exponential divergence or convergence of nearby initial conditions:
+
+
+ 1 ||ϵut ||
+ λ1 (h0 ) = lim lim log (22)
+ t→∞ t ϵ→0 ||ϵu0 ||
+In dynamical systems that are ergodic on the attractor, the Lyapunov exponents do not depend on the initial
+conditions as long as the initial conditions are in the basins of attraction of the attractor. Note that it is crucial
+to first take the limit ϵ → 0 and then t → ∞, as λ1 (h0 ) would be trivially zero for a bounded attractor if the
+ ||ϵut ||
+limits are exchanged, as limt→∞ log ||ϵu 0 ||
+ is bounded for finite perturbations even if the system is chaotic. To
+measure k Lyapunov exponents, one has to study the evolution of k independent infinitesimal perturbations us
+spanning the tangent space:
+
+
+ us+1 = Ds us (23)
+
+
+ 24
+ forward pass backward pass
+ O N2 b
+ 
+ RNN dynamics "
+ O N2 k
+ 
+ Jacobian step "
+ O N k2
+ 
+ QR step "
+ O N2 b T
+ 
+ total amortized costs "
+ per training epoch
+ O N 2 Tf (1 + k/tONS + k)
+ 
+ total amortized costs "
+ per gradient flossing epoch
+ O N 2 [Eb T + Ep Tf (1 + k/tONS + k)]
+ 
+ total amortized costs "
+ of preflossing
+ O N 2 [Eb T + Ep Tp + Ef Tf (1 + k/tONS + k)]
+ 
+ total amortized costs "
+ flossing during training
+
+Table 1: Computational cost for gradient flossing and training of RNNs
+N denotes number of neurons, b is the batch size, T is the number of time steps in forward pass of
+training, Tf is the number of time steps in forward pass of flossing, tONS is the reorthonormalization
+interval, k is the number of flossed Lyapunov exponents, E is the number of training epochs, Ep is
+the number of preflossing epochs, Ef is the number of flossing epochs during training. Empirically,
+we find that the necessary number of preflossing epochs Ep and flossing episodes Ef is much smaller
+than both the total number of training epochs E. Moreover, Tp can be smaller than T .
+
+ Table 2: XOR
+ input xt−d input xt−2d target output yt
+ 0 0 0
+ 0 1 1
+ 1 0 1
+ 1 1 0
+
+
+where the N × N Jacobian Ds (hs ) = df (hs )/dh characterizes the evolution of generic infinitesimal perturba-
+tions during one step. Note that this Jacobian along the trajectory is equivalent to a stability matrix only at a
+fixed point, i.e., when hs+1 = f (hs ) = hs .
+We are interested in the asymptotic behavior, and therefore we study the long-term Jacobian
+
+ Tt (h0 ) = Dt−1 (ht−1 ) . . . D1 (h1 )D0 (h0 ). (24)
+Note that Tt (h0 ) is a product of generally noncommuting matrices. The Lyapunov exponents λ1 ≥ λ2 · · · ≥ λN
+are defined as the logarithms of the eigenvalues of the Oseledets matrix
+ 1
+ Λ(h0 ) = lim [Tt (h0 )⊤ Tt (h0 )] 2t , (25)
+ t→∞
+
+where ⊤ denotes the transpose operation. The expression inside the brackets is the Gram matrix of the long-term
+Jacobian Tt (h0 ). Geometrically, the determinant of the Gram matrix is the squared volume of the parallelotope
+spanned by the columns of Tt (h0 ). Thus, the exponential volume growth rate is given by the sum of the
+logarithms of its first k (sorted) eigenvalues. Oseledets’ multiplicative ergodic theorem guarantees the existence
+of the Oseledets matrix Λ(h0 ) for almost all initial conditions h0 [42]. In ergodic systems, the Lyapunov
+exponents λi do not depend on the initial condition h0 . However, for a numerical calculation of the Lyapunov
+spectrum, Eq 25 cannot be used directly because the long-term Jacobian Tt (h0 ) quickly becomes ill-conditioned,
+i.e., the ratio between its largest and smallest singular value diverges exponentially with time.
+
+
+J Algorithm for Calculating Lyapunov Spectrum of Rate Networks
+For calculating the first k Lyapunov exponents, we exploit the fact that the growth rate of a k-dimensional
+infinitesimal volume element is given by λ(m) = m (1)
+ , λ2 = λ(2) − λ1 , λ3 =
+ P
+ i=1 λi . Therefore, λ1 = λ
+ (3)
+λ − λ1 − λ2 , . . . [44]. The volume growth rates can be obtained via QR-decomposition.
+
+
+ 25
+ orthogonal weight initialization
+ A delayed temporal XOR B delayed spatial XOR
+ 100 100
+test accuracy (%) flossing during training
+
+
+
+
+ test accuracy (%)
+ preflossing
+ 80 no flossing 80
+
+ 60 60
+
+ 0 2000 4000 6000 8000 10000 0 1000 2000 3000 4000 5000
+ Epochs Epochs
+ C D
+ 100 100
+test accuracy (%)
+
+
+
+
+ test accuracy (%)
+ 80 80 flossing during training
+ preflossing
+ no flossing
+ 60 60
+
+ 10 20 30 40 50 60 70 10 20 30 40 50 60 70
+ complexity (delay d) complexity (delay d)
+
+Figure 14: Gradient flossing before and during training improves trainability for orthogonal
+nets
+A) Test accuracy for orthogonally initialized vanilla RNNs trained on delayed temporal binary XOR
+task yt = xt−d/2 ⊕ xt−d with gradient flossing during training (green), preflossing (orange), and
+with no gradient flossing (blue) for d = 70. Solid lines are mean, transparent thin lines are individual
+network realizations B) Same as A for delayed spatial XOR task with yt = x1t−d ⊕ x2t−d ⊕ x3t−d .
+C) Test accuracy as a function of task difficulty (delay d) for delayed temporal XOR task. D) Test
+accuracy as a function of task difficulty (delay d) for delayed spatial XOR task. Parameters: g = 1,
+batch size b = 16, N = 80, epochs = 104 for delayed temporal XOR, epochs = 5000 for delayed
+spatial XOR, T = 300, flossing for Ef = 500 epochs on k = 75 Lyapunov exponents before training
+and during training for green lines, and only before training for orange lines. Shaded areas are 25%
+and 75% percentiles, solid lines are means, transparent dots are individual simulations, task loss is
+cross-entropy between y, ŷ.
+
+
+First, we evolve an initially orthonormal system Qs = [q1s , q2s , . . . qm
+ s ] in the tangent space along the trajectory
+using the Jacobian Ds :
+ Qe s+1 = Ds Qs (26)
+A continuous system can be transformed into a discrete system by considering a stroboscopic representation,
+where the trajectory is only considered at certain discrete time points. We use here the notation of discrete
+dynamical systems where this corresponds to performing the product of Jacobians along the trajectory Q
+ e s+1 =
+Ds Qs . We study the discrete network dynamics in the limit of small time step ∆t → 0 and for discrete time
+∆t = 1. The notation can be readily extended to continuous systems [43].
+Second, we extract the exponential growth rates using the QR-decomposition,
+ e s+1 = Qs+1 Rs+1 ,
+ Q
+
+which uniquely decomposes Q e s+1 into an orthonormal matrix Qs+1 of size N × k so Q⊤ s+1 Qs+1 = 1m×m
+and to an upper triangular matrix Rs+1 of size k × k with positive diagonal elements. Geometrically, Qs+1
+describes the rotation of Qs caused by Ds and the diagonal entries of Rs+1 describe the stretching and shrinking
+of the columns of Qs , while the off-diagonal elements represent the shearing. Fig 15 visualizes Ds and the
+QR-decomposition for k = 2.
+The Lyapunov exponents are given by time-averaged logarithms of the diagonal elements of Rs :
+ t t
+ 1 Y 1X
+ λi = lim log Rsii = lim log Rsii . (27)
+ t→∞ t t→∞ t
+ s=1 s=1
+
+
+
+ 26
+ Ds
+ QR
+
+ s+1
+ R22
+qs2 ~2 s+1
+ 2
+ qs+1
+ qs+1 R11 ~1
+ qs+1
+ s+1
+ qs1 R11 1
+ qs+1
+ s+1
+ R22
+
+Figure 15: Geometric illustration of Lyapunov spectrum calculation. An orthonormal matrix
+Qs = [q1s , q2s , . . . qms ], whose columns are the axes of an k-dimensional cube, is rotated and distorted
+by the Jacobian Ds into an k-dimensional parallelotope Q e s+1 = Ds Qs embedded in RN . The
+figure illustrates this for k = 2, in which case the columns of Q e s+1 span a parallelogram, which
+can be divided into a right triangle and a trapezoid and rearranged into a rectangle. Thus, the
+area of the gray parallelogram is the same as that of the orange rectangle. The QR-decomposition
+reorthonormalizes Q e s+1 by decomposing it into the product of an orthonormal matrix Qs+1 =
+[q1s+1 , q2s+1 , . . . qms+1 ] and the upper-triangular matrix R
+ s+1
+ . Qs+1 describes the rotation of Qs
+ s+1
+caused by Ds . The diagonal entries of R gives the stretching/shrinking along the columns of
+Qs+1 ,Qthus the volume of the parallelotope formed by the first k columns of Q e s+1 is given by
+ m
+Vm = i=1 Rs+1 ii . The time-averaged logarithms of the diagonal elements of R s
+ give the Lyapunov
+ 1
+ Qt s 1
+ Pt s
+spectrum: λi = limtsim →∞ tsim log s=1 Rii = limtsim →∞ t s=1 log Rii .
+
+
+Note that the QR-decomposition does not need to be performed at every simulation step, just sufficiently often,
+i.e., once every sONS steps such that Q ONS = Ds+sONS −1 · Ds+sONS −2 . . . Ds · Qs remains well-conditioned
+ e s+s
+[44]. An appropriate reorthonormalization interval sONS = tONS /∆t thus depends on the condition number, the
+ratio of the smallest and largest singular value:
+ s+s
+ e s+s ) = κ2 (Rs+sONS ) = σ1 (Rs+sONS ) R ONS
+ κ2 ( Q = 11 . (28)
+ Rs+s
+ ONS s+s
+ σm (R ONS ) mm
+ ONS
+
+An initial transient should be disregarded in the calculation of the Lyapunov spectrum because h first has to
+converge towards the attractor and Q has to converge to the unique eigenvectors of the Oseledets matrix (Eq 25)
+[54]. A simple example of this algorithm in pseudocode is:
+
+Algorithm 2 Jacobian-based algorithm for Lyapunov spectrum
+ initialize h, Q
+ evolve h until it is on attractor (avoid initial transient)
+ evolve Q until it converges to the eigenvectors of the backward Oseledets matrix
+ set γi = 0
+ for t = 1 → T do
+ h ← f (h)
+ df
+ D ← dh
+ Q←D·Q
+ if s ≡ 0 (mod sONS ) then
+ Q, R ← qr(Q)
+ γi += log(Rii )
+ end if
+ end for
+ λi = γi /T
+
+
+It is guaranteed that under general conditions initially random orthonormal systems will exponentially converge
+towards a unique basis that is given by the eigenvectors of the Oseledets matrix Eq 25 [54]. A minimal example
+of this algorithm in pseudocode is shown in appendix 3. A feasible strategy to determine the reorthonormalization
+time interval tONS is to get first a rough estimate of the Lyapunov spectrum using a short simulation time tsim and
+a small tONS and repeat with a longer simulation time and a tONS based on the Lyapunov spectrum of the rough
+estimate of the Lyapunov spectrum. Another strategy is, to first iteratively adapt tONS on a short simulation
+run to get an acceptable condition number. It should be noted that there exists a diversity of other methods to
+estimate the Lyapunov spectrum [14, 43, 68, 69].
+
+
+ 27
+ K Convergence of Lyapunov Exponents of RNNs
+In Fig. 16, we demonstrate the convergence of the Lyapunov exponents. We show the estimate of the Lyapunov
+exponents λi for i = 1, 20, 60, 80 for different initial conditions but identical network realization.
+
+ 0
+ 10
+i(1/steps)
+
+
+
+
+ 20
+ 30
+ 40
+ 100 101 102
+ steps
+
+
+Figure 16: Convergence of Lyapunov exponents Convergence of selected Lyapunov exponents
+λi for ten identical network realizations with different initial conditions with simulation time (i =
+1, 20, 60, 80) for σ = 1 and g = 1. (Other parameters: N = 80, tsim = 100 steps, tONS = 1).
+
+
+
+
+ 28
+ \ No newline at end of file
diff --git a/papers/txt/gram2025_generative_recursive.txt b/papers/txt/gram2025_generative_recursive.txt
new file mode 100644
index 0000000..d5f299d
--- /dev/null
+++ b/papers/txt/gram2025_generative_recursive.txt
@@ -0,0 +1,1532 @@
+ Generative Recursive Reasoning
+
+
+ Junyeob Baek1†∗ Mingyu Jo1†∗ Minsu Kim1,2
+
+ Mengye Ren3 Yoshua Bengio2,4 Sungjin Ahn1,3†
+arXiv:2605.19376v2 [cs.AI] 20 May 2026
+
+
+
+
+ 1
+ KAIST 2 Mila – Québec AI Institute
+ 3
+ New York University 4 Université de Montréal
+
+
+
+ Abstract
+ How should future neural reasoning systems implement extended computation?
+ Recursive Reasoning Models (RRMs) offer a promising alternative to autoregres-
+ sive sequence extension by performing iterative latent-state refinement with shared
+ transition functions. Yet existing RRMs are largely deterministic, following a
+ single latent trajectory and converging to a single prediction. We introduce Gen-
+ erative Recursive reAsoning Models (GRAM), a framework that turns recursive
+ latent reasoning into probabilistic multi-trajectory computation. GRAM models
+ reasoning as a stochastic latent trajectory, enabling multiple hypotheses, alterna-
+ tive solution strategies, and inference-time scaling through both recursive depth
+ and parallel trajectory sampling. This yields a latent-variable generative model
+ supporting conditional reasoning via pθ (y | x) and, with fixed or absent inputs,
+ unconditional generation via pθ (x). Trained with amortized variational inference,
+ GRAM improves over deterministic recurrent and recursive baselines on structured
+ reasoning and multi-solution constraint satisfaction tasks, while demonstrating an
+ unconditional generation capability. https://ahn-ml.github.io/gram-website
+
+
+ 1 Introduction
+ A central question for future neural reasoning systems is how extended computation should be imple-
+ mented. Large autoregressive models typically scale reasoning by extending a sequence-generation
+ process, whether intermediate computation is expressed explicitly as chain-of-thought tokens or im-
+ plicitly in hidden or latent representations [1–6]. A complementary direction is explored by Recursive
+ Reasoning Models (RRMs), which use repeated computation to refine a persistent latent state rather
+ than to append new elements to an output or reasoning sequence [7–9]. This approach is appealing
+ because it decouples reasoning depth from both parameter scale and output length: a compact model
+ can perform many steps of internal computation by repeatedly applying shared transition functions
+ over time.
+ Recent recursive reasoning models such as HRM [8] and TRM [9] provide early evidence for the
+ potential of this approach in structured reasoning. Rather than producing a solution in a single
+ feedforward pass, they perform extended computation through iterative latent-state refinement, deep
+ supervision across refinement steps, and reasoning-oriented recurrent designs such as hierarchical
+ latent dynamics. These features make them well suited to problems requiring constraint propagation,
+ state tracking, iterative correction, and multi-step inference. More broadly, they build on a principle
+ ∗ Equal contribution
+ † Correspondence to: Junyeob Baek (wnsdlqjtm@kaist.ac.kr), Mingyu Jo (mingyu.jo@kaist.ac.kr), Sungjin Ahn
+
+ (sungjin.ahn@kaist.ac.kr)
+
+
+ Preprint.
+ Solution 1,
+
+
+ Input Task,
+
+
+
+ Solution 2,
+
+
+
+
+ (a) Deterministic RRMs (b) GRAM (Ours)
+
+Figure 1: Comparison of Latent Reasoning Trajectories. Left: N-Queens Example with two valid solutions.
+Right: Given three independent runs for latent reasoning (τ1 , τ2 , τ3 ): (a) Prior RRMs (e.g. HRM, TRM) are
+deterministic—all runs collapse to an identical trajectory, converging to a single solution and failing to explore
+alternatives, while (b) GRAM explores diverse trajectories, producing diverse trajectories that reach multiple
+valid solutions y1 and y2 , while naturally enabling parallel inference-time scaling.
+
+also explored in recurrent Transformer architectures such as Universal Transformers [10] and Looped
+Transformers [7]: shared Transformer blocks can be repeatedly applied to increase computational
+depth without increasing parameter count. Together, these models suggest that reasoning capability
+can emerge not only from scaling model size or generating longer traces, but also from the organization
+of computation itself.
+While recurrent latent-state refinement provides an appealing mechanism for efficiently increasing
+reasoning depth, depth alone is not sufficient for many reasoning problems. A capable reasoning
+system should also be able to maintain uncertainty, consider alternative hypotheses, and explore
+multiple possible solution strategies [11, 12]. This is especially important in settings where ambiguity
+or multiple valid solutions are intrinsic, and more generally in problems where a single refinement
+path may become trapped in a suboptimal reasoning trajectory. In this sense, future RRMs should be
+not only deep, in the sense of repeated refinement, but also wide, in the sense of maintaining and
+exploring multiple latent trajectories in parallel.
+Existing RRMs [7–10], however, remain fundamentally deterministic: given the same input and
+initialization, they follow a single latent trajectory and converge to a single prediction. This deter-
+ministic recursion collapses the space of plausible reasoning paths into a single attractor, leaving
+probabilistic multi-hypothesis latent reasoning largely unexplored within the RRM paradigm. This
+motivates the central question of our work: can recursive latent computation support probabilistic,
+generative, multi-hypothesis reasoning while preserving the efficiency of compact recurrent models?
+In this paper, we propose Generative Recursive reAsoning Models (GRAM), a framework that turns
+recursive latent reasoning into probabilistic multi-trajectory computation. GRAM treats the reasoning
+process itself as a stochastic latent trajectory: at each recursion step, the model samples a transition
+conditioned on the input and the current reasoning state, rather than deterministically updating to a
+single next state. Repeating this process defines a distribution over possible reasoning trajectories,
+allowing the model to maintain multiple hypotheses, explore alternative solution strategies, and scale
+inference not only by increasing recursive depth but also by sampling trajectories in parallel. From
+a probabilistic perspective, GRAM is a latent-variable generative model: it models pθ (y | x) by
+marginalizing over latent reasoning trajectories, while the same recursive process can also define an
+unconditional generative model pθ (x) when the input is fixed or absent.
+We evaluate GRAM on controlled reasoning and generation tasks that serve as probes of the ar-
+chitectural properties targeted by our formulation: recursive refinement, stochastic exploration,
+multi-solution coverage, and inference-time scaling. Given this goal, our experiments focus on
+comparisons with the most relevant deterministic recurrent and recursive latent reasoning baselines,
+including Looped Transformers, HRM, and TRM, rather than frontier-scale general-purpose LLMs
+whose training data, inference budgets, and external scaffolding are not directly comparable. Sudoku-
+Extreme [8] and ARC-AGI [13, 14] test structured reasoning under hard constraints and abstract
+transformations; N-Queens and Graph Coloring evaluate multi-solution recovery; and binarized
+MNIST [15] probes the unconditional generative interpretation.
+Our main contribution is to establish probabilistic multi-trajectory recursion as a design principle
+for future recurrent and recursive reasoning architectures. Concretely, we make three contributions.
+
+
+ 2
+ First, we formulate recursive reasoning as a latent-variable generative process, where solutions are
+obtained by marginalizing over stochastic reasoning trajectories. Second, we introduce width-based
+inference-time scaling, enabling inference to scale not only with recursive depth but also with the
+number of sampled latent trajectories. Third, we provide empirical evidence that this formulation
+yields the intended architectural advantages over deterministic recurrent and recursive baselines,
+improving structured reasoning, multi-solution constraint satisfaction, and unconditional generation.
+
+2 Generative Recursive Reasoning Models
+In this section, we introduce Generative Recursive reAsoning Models (GRAM), an instantiation
+of probabilistic recursive reasoning. We describe the architecture in Section 2.1 and the training
+procedure in Section 2.2, with an architecture schematic shown in Figure 2.
+
+2.1 Architecture
+
+Overview. GRAM models the conditional distri- 𝑦 (A) CE loss
+bution pθ (y | x) by marginalizing over stochas-
+ Prior (B) KL Div. Posterior Decoder
+tic latent reasoning trajectories. Given an input 𝑝𝜃 (⋅ |𝑢𝑡 ) 𝑞𝜙 (⋅ |𝑢𝑡 , 𝑦) ~ 𝜖𝑡 𝑓dec
+x, GRAM first computes an embedding
+ ℎ𝑡−1 𝑓𝐻 𝑢𝑡 ℎ𝑡
+ ex = fenc (x; θ), (1)
+ 𝐾 times
+which is reused throughout the entire recursive 𝑙𝑡−1 𝑓𝐿 𝑓𝐿 𝑙𝑡
+computation. Starting from a fixed initial la-
+ Encoder
+tent state z0 , the model evolves the latent state 𝑥
+ 𝑓enc
+through learned stochastic transitions. The re-
+cursive computation is organized into two nested Figure 2: GRAM Architecture. A single stochastic
+levels: inner and outer loops. latent transition in the hierarchical instantiation z =
+At the inner level, a latent transition samples (h, l). After K low-level refinements via fL , the high-
+ level update fH produces a deterministic proposal ut , to
+a new latent state conditioned on the previous which stochastic guidance ϵt is added: ht = ut + ϵt .
+latent state and the input embedding,
+ zt ∼ pθ (zt | zt−1 , ex ), t = 1, . . . , T. (2)
+At the end of the T transitions, the decoder produces a prediction, ŷ = arg max fdec (zT ; θ). We refer
+to the sequence of T transitions from the initial state z0 to the final state zT as a supervision step. A
+supervision step is the unit at which the decoder is invoked, and the training objective is applied, with
+gradients computed as described in Section 2.2.
+At the outer level, Nsup supervision steps are applied recursively, with the final state of one supervision
+step serving as the initial state of the next, thereby forming the full recursive computation:
+ (1) T transitions (1) (2) T transitions T transitions (N )
+ z0 −−−−−−−→ zT = z0 −−−−−−−→ · · · −−−−−−−→ zT sup , (3)
+ (n) (1)
+where zt denotes the latent state at the t-th transition of the n-th supervision step, z0 is the
+fixed initial state, and the terminal state of one supervision step serves as the initial state of the next
+ (n+1) (n)
+(z0 := zT ). This abstract formulation can be instantiated with various recurrent Transformer
+backbones, including flat designs such as Universal Transformers and Looped Transformers [10, 7],
+as well as hierarchical designs such as HRM and TRM [8, 9].
+Stochastic Latent Transitions. Unlike prior recursive reasoning models (RRMs) that update the
+latent state deterministically and follow a single fixed trajectory [8, 9], GRAM defines pθ (zt |
+zt−1 , ex ) as a stochastic transition, so that repeated computation induces a distribution over latent
+reasoning trajectories. Concretely, GRAM realizes this transition as a learned stochastic residual
+perturbation around a deterministic update: at each transition, the model first computes a deterministic
+update ut from zt−1 and ex , then samples a conditional perturbation from a state-dependent Gaussian,
+and adds it to ut :
+ ϵt ∼ pθ (ϵt | ut ) := N µθ (ut ), σθ2 (ut )I ,
+ 
+ (4)
+ zt = ut + ϵt . (5)
+
+
+ 3
+ We refer to ϵt as the learnable stochastic guidance. The mean µθ (ut ) encodes a state-dependent
+direction in which the trajectory is steered, while the variance σθ2 (ut ) controls the amount of ex-
+ploration. This design allows GRAM to capture uncertainty, prevent convergence to local minima,
+and support robust exploration of the solution space without discarding the deterministic refinement
+performed by ut .
+Hierarchical Instantiation. We instantiate the latent state with two interacting components, z =
+(h, l). The high-level component h is updated once per latent transition and carries abstract reasoning
+state, while the low-level component l is updated K times within a single transition and carries
+fine-grained intermediate computation. This decomposition separates the two roles across time scales,
+with h accumulating slowly across transitions and l refined rapidly within each one.
+With this hierarchical multi-scale structure, a single transition zt−1 → zt is computed as follows.
+The low-level component is first refined for K updates, with the high-level component held fixed:
+ lt,k = fL (ht−1 , lt,k−1 , ex ; θ), k = 1, . . . , K, (6)
+where lt,0 := lt−1 and we write lt := lt,K for the refined low-level component. The high-level
+component is then updated as a stochastic transition conditioned on the refined lt ,
+ ut = fH (ht−1 , lt ; θ), (7)
+ ϵt ∼ pθ (ϵt | ut ) := N µθ (ut ), σθ2 (ut ) I
+ 
+ , (8)
+ ht = ut + ϵt , (9)
+and we set zt = (ht , lt ). Note that stochasticity is introduced only at the high level: the low-
+level refinement is fully deterministic, while the stochastic guidance signal ϵt acts on the slower,
+more abstract component of the latent state, where it can steer the overall reasoning trajectory
+across transitions3 . Under this instantiation, the decoder reads only the high-level component, i.e.,
+fdec (zT ) = fdec (hT ). Additional architectural details are provided in Appendix B.1.
+Modeling Unconditional Distribution. While the description so far focuses on the conditional
+setting pθ (y | x), the same recursive process can also be defined as an unconditional generative model
+pθ (x) when the input is replaced with an empty conditioning embedding. We use this formulation for
+generation tasks in Section 4.3.
+
+2.2 Training
+
+GRAM is trained to model the conditional distribution pθ (y | x), where each training example
+consists of an input x and its corresponding target y. As a probabilistic model, GRAM adopts a
+latent-variable formulation and is optimized by maximizing an evidence lower bound (ELBO) with
+respect to the generative parameters θ and variational parameters ϕ.
+Latent Variable Modeling. We model GRAM as a latent-variable probabilistic model pθ , where
+the full latent trajectory τ = (z0 → · · · → zTTotal ) consists of a sequence of latent variables, with
+TTotal = T × Nsup . The conditional likelihood is defined as
+ Z
+ pθ (y | x) = pθ (y | τ, x) pθ (τ | x) dτ, (10)
+
+where x denotes the input problem and y denotes the corresponding ground-truth output.
+Direct maximum likelihood estimation of log pθ (y | x) is intractable due to the marginalization
+over latent trajectories. We therefore introduce a variational posterior qϕ (τ | x, y) and optimize the
+evidence lower bound (ELBO), jointly training θ and ϕ via variational inference:
+ log pθ (y | x) ≥ Eqϕ (τ |x,y) [log pθ (y | τ, x)] − KL(qϕ (τ | x, y) ∥ pθ (τ | x)) . (11)
+
+During training, latent trajectories are sampled from the variational posterior qϕ (· | x, y), which has
+access to both the input problem x and the target output y. At inference time, where y is unavailable,
+trajectories are instead generated from the learned prior pθ (· | x).
+ 3We also tried injecting noise into the low-level state, but found that it did not improve performance.
+
+
+
+
+ 4
+ Both the prior and the posterior are modeled as conditional Markov processes over latent states:
+ TY
+ Total TY
+ Total
+
+ pθ (τ | x) = p(z0 ) pθ (zt | zt−1 , x), qϕ (τ | x, y) = p(z0 ) qϕ (zt | zt−1 , x, y). (12)
+ t=1 t=1
+Here, z0 is a fixed initial state shared by the prior and posterior. Both transitions are implemented by
+adding reparameterized Gaussian noise ϵt after a deterministic update ut ; the posterior uses the same
+transition module as the prior, but samples from a target-conditioned noise distribution qϕ (ϵt | ut , y),
+whereas the prior uses pθ (ϵt | ut ).
+Since the two processes share the same Markov structure and all stochasticity is introduced through
+ϵ1:TTotal , their trajectory distributions can be equivalently represented in noise space. Moreover,
+since GRAM decodes the output only from the terminal latent state, the likelihood term satisfies
+pθ (y | τ, x) = pθ (y | zTTotal , x). Therefore, the full trajectory-level ELBO can be written as
+   TX Total h i
+ LELBO = Eqϕ log pθ (y | zTTotal , x) − Eqϕ (ϵ<t |x,y) KL(qϕ (ϵt | ut , y) ∥ pθ (ϵt | ut )) . (13)
+ t=1
+Here, ut = fH (ht−1 , lt ) denotes the deterministic high-level update before noise injection, as defined
+in Equation (9). Since ut depends on ht−1 , which is determined by the previously sampled noise
+variables ϵ<t := (ϵ1 , . . . , ϵt−1 ), the expectation averages over these ancestral samples.
+Practical Implementation. In practice, following previous recursive reasoning models [8, 9], we
+train GRAM with deep supervision over Nsup consecutive supervision steps, each consisting of T
+recursive latent transitions. This provides dense learning signals along the full latent trajectory, rather
+than supervising only the final state after TTotal = T × Nsup transitions. The terminal state of each
+step is reused as the initial state of the next step.
+Following standard practice for recurrent models with long computation chains, we apply truncated
+gradient propagation [16, 17], as used in recent recursive reasoning models [8, 9, 18]. In our
+implementation, gradients are propagated only through the final transition of each supervision step,
+ (n) (n)
+zT −1 → zT . This gives the following surrogate objective for each supervision step:
+ (n)  (n)  (n) (n) (n) (n) 
+ LGRAM (x, y; θ, ϕ) = Eqϕ log pθ (y | zT , x) − KL qϕ (ϵT | uT , y) ∥ pθ (ϵT | uT ) , (14)
+ (n)
+where zT is the terminal state of the current supervision step n, and gradients are stopped through
+preceding states. Thus, LGRAM should be viewed as a truncated surrogate objective rather than the
+exact ELBO; it introduces a biased but memory-efficient approximation to the full ELBO. Further
+analysis of this approximation is provided in Appendix A.3, and detailed training hyperparameters
+are listed in Appendix B.2.
+
+2.3 Inference-Time Scaling
+
+GRAM supports two complementary axes of inference-time scaling: depth, by varying the number
+of recursive transitions, and width, by sampling multiple latent reasoning trajectories in parallel.
+For depth, we follow prior recursive reasoning models [8, 9] in adopting adaptive computation
+time (ACT) [8–10], which allows each trajectory to terminate at a learned halting depth (details in
+Appendix A.1). For width — the focus of this section — we draw {τ (i) }N i=1 ∼ pθ (τ(i)| x) from the
+learned prior and decode each terminal state into a candidate output ŷ (i) = fdec (zT ), exploring
+multiple stochastic reasoning paths simultaneously rather than extending a single trajectory.
+To select among candidates, we use either majority voting or best-of-N with a Latent Process Reward
+Model (LPRM). The LPRM is a value head vψ (zt ) trained to predict the final quality of a trajectory
+from its latent state, using a regression target r ∈ [0, 1] given by the final prediction accuracy. At
+inference time, majority voting selects the most frequent prediction, whereas LPRM-guided selection
+chooses the candidate with the highest predicted terminal value. Details of LPRM training are
+provided in Appendix A.2. Overall, this procedure improves robustness and solution quality through
+parallel exploration, without increasing the sequential recursion length.
+
+3 Related Work
+Latent Reasoning. Latent reasoning aims to reduce the inefficiency and verbosity of explicit
+Chain-of-Thought (CoT) by shifting part or all of the reasoning process into latent or continuous
+
+
+ 5
+ Sudoku ARC-AGI-1 ARC-AGI-2
+ 20
+ 100 97.0% 66.7%
+ 87.4% 16.0%
+ 80 60 52.0% 55.7% 15
+Accuracy (%) 61.3% 44.6% 11.1%
+ 60 55.0% 40.3% 9.7%
+ 40 34.5% 10 7.8%
+ 40 5.0%
+ 20 5 3.0%
+ 20
+ 0 0 0
+ Looped HRM TRM GRAM HRM TRM GRAM o3-mini-GPT 5.2 Grok-4- HRM TRM GRAM o3-mini-GPT 5.2 Grok-4-
+ TF (Ours) (Ours) high (low) thinking (Ours) high (low) thinking
+ Recursive Models GRAM (Ours) LLMs
+
+Figure 3: Performance on puzzle benchmarks. On both Sudoku-Extreme and ARC-AGI, GRAM consistently
+outperforms all deterministic recursive baselines (Looped TF, HRM, TRM), demonstrating that stochastic latent
+transitions yield substantial gains within the recursive-reasoning paradigm. Looped TF results on ARC-AGI are
+omitted due to prohibitive training cost (see Section C.1.1) Note that large reasoning model scores are included
+only as external reference points for benchmark difficulty.
+
+representations [1–6]. By avoiding token-by-token generation of intermediate steps, such representa-
+tions can make reasoning traces more compact and reduce generation overhead. Existing approaches
+instantiate this idea through hidden states, latent or soft tokens, continuous thoughts, internal rea-
+soning traces, and recursive state updates for scaling test-time computation [4, 7, 19–23, 18, 24–26].
+However, many remain organized around autoregressive sequence generation, where additional
+computation is tied to generating more tokens, latent positions, or sequential reasoning states.
+Recursive Architectures. Recursive architectures perform iterative state updates and have evolved
+from RNNs to weight-sharing Transformers with adaptive computation [7, 10, 27–32, 25]. Recent
+recursive reasoning models show that increasing inference-time depth can outperform larger static
+models [8, 9, 18, 24]. GRAM builds on this line but formulates recurrence as a probabilistic process:
+instead of following a single deterministic refinement path, it maintains stochastic latent trajectories,
+enabling multi-path exploration and generative sampling.
+Probabilistic Latent State-Space Models. Probabilistic recurrent models use stochastic latent
+transitions to capture uncertainty and multimodal dynamics, often trained with variational infer-
+ence [33–38]. They have been widely used in sequential generative modeling, video prediction,
+and model-based reinforcement learning. GRAM shares this latent state-space view but reinterprets
+stochastic dynamics as computation rather than temporal observation modeling: latent transitions
+define possible reasoning trajectories, supporting multi-hypothesis exploration and both conditional
+pθ (y | x) and unconditional pθ (x) generation.
+
+
+4 Experiments
+
+GRAM is designed as an architecture for probabilistic recursive reasoning, rather than as a general-
+purpose large language reasoning model whose training data, inference budgets, prompting strategies,
+tool use, and external scaffolding are not directly comparable. Following prior work on recurrent and
+recursive reasoning models [8, 9], we therefore evaluate GRAM on standard structured reasoning
+tasks that probe the computational properties targeted by our formulation: iterative latent refinement,
+stochastic trajectory exploration, multi-solution coverage, and inference-time scaling.
+In the following, we first evaluate structured reasoning performance on Sudoku-Extreme and ARC-
+AGI (Section 4.1). We then assess multi-solution behavior on N-Queens and Graph Coloring
+(Section 4.2). Next, we examine the unconditional generative interpretation of GRAM on binarized
+MNIST (Section 4.3). Finally, we perform ablation studies to evaluate the impact of key design
+choices (Section 4.4).
+
+4.1 Challenging Puzzle Tasks
+
+Setup. We evaluate on Sudoku-Extreme [8], which contains 9×9 puzzles with minimal clues re-
+quiring extensive constraint propagation, and ARC-AGI Challenge [13, 14], which tests abstract
+visual reasoning through few-shot pattern recognition. We compare against direct prediction (Trans-
+
+
+ 6
+ 1.0 N= 1.0
+ 1 5 10 20 50
+ 0.8
+ 0.9
+Accuracy Looped TF
+
+
+
+
+ Accuracy
+ Looped TF 0.6 HRM
+ HRM
+ 0.8 TRM TRM
+ GRAM (Ours) 0.4 GRAM (N=1)
+ 0.2
+ 0.6
+ 0.5 0.0
+ 8 16 32 128 320 2 4 6 8 10 12 14 16 18
+ Iterations Number of Solutions
+ Figure 4: (Left) Inference-time scaling on Sudoku-Extreme. While both TRM and GRAM benefit from
+ longer recursion (x-axis), GRAM additionally scales with parallel sampling (N = number of samples); each
+ iteration corresponds to a supervision step, while meaning K× more flat iterations in Looped TF. (Right)
+ Accuracy across number of solutions in N-Queens (8 × 8). Conventional deterministic recursive models suffer
+ a sharp performance drop as the number of possible solutions increases, whereas GRAM maintains consistent
+ performance.
+
+
+ former [39]), a flat recursive baselines (Looped TF [7], HRM [8], TRM [9]). Reported large reasoning
+ model results [40] are included as external reference points for benchmark difficulty, rather than
+ as controlled baselines, since their training and inference settings are not directly comparable to
+ task-specific recursive models. For the scaling analysis, all baselines (Looped TF, HRM, TRM) are
+ reproduced under identical settings following Yang et al. [7] and Jolicoeur-Martineau [9].
+ Stochastic Guidance Improves Reasoning. Figure 3 and Table 8 summarize our main results.
+ GRAM consistently outperforms prior recursive models across all benchmarks. We attribute this
+ improvement to the fundamental difference in how reasoning trajectories are utilized. While Looped
+ TF, HRM, and TRM are restricted to learning from a single deterministic path, GRAM leverages
+ stochastic transitions to explore diverse reasoning trajectories. By training on this richer distribution
+ of solution paths, GRAM acquires more robust reasoning capabilities, allowing it to navigate complex
+ problem spaces more effectively than models constrained to a single sequential refinement process.
+ Detailed experiment results, including more state-of-art methods, are provided in Appendix D.1.
+ Parallel Sampling Provides a New Test-time Scaling Axis. Figure 4 (left) shows that increasing the
+ number of parallel samples consistently improves performance across all iteration counts. Notably,
+ GRAM with N = 20 samples at 16 iterations outperforms all deterministic baselines at 320 iterations,
+ including TRM (97.0% vs 90.5%), despite comparable computational budget. While deterministic
+ recursive models scale only through sequential refinement, GRAM leverages stochastic transitions to
+ explore multiple reasoning paths in parallel. To select the best trajectory, we employ a Latent Process
+ Reward Model (LPRM) that predicts output correctness (Section 2.3). This parallel scaling bypasses
+ the latency bottlenecks of depth-based scaling while achieving superior performance. Additional
+ analysis on the ARC-AGI Challenge is provided in Appendix D.2.
+
+ 4.2 Multi-solution Puzzle Tasks
+
+Setup. To evaluate whether GRAM can capture diverse solutions, we test on N-Queens (8 × 8,
+10 × 10) and Graph Coloring (8-vertex, 10-vertex) tasks, where multiple valid solutions exist for each
+input. We compare against direct prediction (Transformer [39]), recursive models (Looped TF [7],
+HRM [8], TRM [9]), and generative models (Autoregressive Transformer (AR), MDLM [41]). For
+N-Queens, we report accuracy (whether the output satisfies all constraints) and coverage (found /
+total valid solutions, with 20 samples). For Graph Coloring, we report conflict edges (number of
+constraint violations; lower is better) instead of accuracy. Detailed configurations are provided in
+Appendix C.2.
+ Deterministic Recursion Fails on Multi-Solution Tasks. Table 1 reveals that deterministic recursive
+ models structurally cannot capture multiple solutions, with coverage at most 36.1% across all tasks.
+ Figure 4 (right) further illustrates this limitation: as the number of valid solutions increases, all three
+ deterministic recursive baselines exhibit sharp accuracy degradation, whereas GRAM maintains
+ consistent performance regardless of solution count. This confirms that deterministic latent updates
+
+
+ 7
+ Table 1: Evaluation on N-Queens and Graph Coloring benchmarks. Rec. and Gen. indicate whether the
+model uses recursive computation and generative sampling, respectively. Values are mean ± standard deviation
+over runs. Accuracy: single-sample (%). Conflict: constraint-violating edges (↓). Coverage: unique valid
+solutions discovered with 20 samples (%).
+ N-Queens Graph Coloring
+ 8×8 10 × 10 8-vertex 10-vertex
+ Method Rec. Gen. # Params Accuracy Coverage Accuracy Coverage Conflict↓ Coverage Conflict↓ Coverage
+ Direct Pred (8 layers) ✗ ✗ 27M 40.4±1.1 13.7±1.1 13.6±0.5 1.6±0.2 179.3±4.0 19.9±0.2 198.7±5.0 6.7±0.1
+ Direct Pred (32 layers) ✗ ✗ 100M 40.2±1.3 13.6±1.1 13.1±0.4 1.6±0.2 174.0±18.0 19.1±1.7 227.7±34.5 6.5±1.9
+ Looped TF ✓ ✗ 7M 68.4±3.7 23.6±1.9 50.0±7.6 6.2±3.2 136.0±16.1 20.5±1.5 157.3±9.0 7.2±0.7
+ HRM ✓ ✗ 27M 78.7±2.9 26.7±1.3 37.4±0.3 4.7±0.1 109.7±1.5 21.8±0.3 164.3±21.6 8.9±1.7
+ TRM ✓ ✗ 7M 66.8±5.7 36.1±22.5 17.5±11.2 2.0±1.3 109.3±3.1 22.3±0.6 170.7±17.9 6.8±0.3
+ AR ✗ ✓ 10.6M 96.3±1.0 84.8±0.8 90.0±2.2 53.2±0.8 19.0±11.3 83.0±0.7 61.3±8.3 40.0±0.3
+ MDLM ✗ ✓ 12.6M 96.1±1.5 87.2±0.6 74.3±6.6 47.4±2.2 2.7±0.6 84.5±4.0 12.0±7.0 48.2±1.4
+ GRAM (Ours) ✓ ✓ 10M 99.7±0.3 90.3±1.9 89.7±2.7 57.5±3.4 2.7±2.1 85.8±0.5 3.3±1.5 51.3±2.8
+
+
+
+
+cause mode collapse when multiple valid outputs exist for the same input. Additional coverage
+analysis is provided in Appendix D.3.
+Recursive Refinement Yields Sharper Constraint Satisfaction. While generative models (AR,
+MDLM) achieve high coverage, GRAM consistently attains higher accuracy with comparable diver-
+sity. On N-Queens, GRAM reaches 99.7% accuracy versus 96.3% (AR) and 96.1% (MDLM). The
+gap is more pronounced on Graph Coloring, where GRAM reduces conflict edges to 2.7 and 3.3 on 8-
+and 10-vertex tasks, compared to 19.0 and 61.3 for AR. This demonstrates that recursive refinement
+enables stricter constraint satisfaction than generative sampling alone.
+
+4.3 Exploring GRAM as an Unconditional Generator
+
+Setup. To investigate GRAM’s unconditional gen- Table 2: Unconditional generation results on
+erative capability beyond conditional reasoning, we binarized MNIST. We report IS (↑) and FID (↓).
+evaluate generation in two domains: structured con- For iterative models, a step corresponds to a super-
+straint generation on Sudoku (from empty boards, vision step for TRM and GRAM, and a denoising
+ step for D3PM. FID is calculated using real sam-
+evaluated by the fraction of generated boards satis- ples with original pixel values (0–255).
+fying Sudoku constraints) and image generation on
+binarized MNIST [15], where pixel values are thresh- Method IS (↑) FID (↓)
+olded to 0 or 1 (evaluated by Inception Score (IS) [42]
+and FID [43]). In both cases, the input is replaced by VAE 1.70 86.28
+ D3PM (1000 steps) 1.86 74.03
+an empty conditioning signal and the model samples TRM (16 steps) 1.00 303.29
+an output from its learned prior. Baselines include
+D3PM [44], a discrete diffusion model, on both tasks, GRAM (Ours)
+and additionally a VAE [45] trained with binary re- 8 steps 1.85 84.08
+ 16 steps 1.89 77.79
+construction loss on MNIST. To ensure a fair compar- 32 steps 1.91 76.65
+ison with existing literature, FID is calculated using 64 steps 1.95 75.39
+real samples from the original standard MNIST. 128 steps 1.99 74.30
+ 256 steps 2.04 73.34
+Generative Behavior Beyond Reasoning. GRAM
+extends from conditional reasoning to unconditional
+generation in two different domains. On Sudoku
+generation (Figure 5), GRAM produces valid boards
+with 99.05% validity using 10.9M parameters and
+16 supervision steps, surpassing D3PM baselines
+that use up to 55.1M parameters and 1000 denois-
+ing steps. Figure 7 shows qualitative examples, illus-
+trating that the model produces diverse, fully valid
+boards from empty inputs without any explicit con-
+straint checker. On MNIST (Table 2), the deter-
+ministic baseline TRM exhibits mode collapse (FID
+303.29), whereas GRAM produces recognizable dig- Figure 5: Unconditional Sudoku generation. Va-
+its with IS and FID comparable to D3PM. Together, lidity (%) of generated Sudoku puzzles. GRAM
+these results indicate that GRAM’s stochastic latent achieves higher validity than D3PM with substan-
+ tially fewer parameters and steps.
+
+ 8
+ D3PM
+ t=0 t=100 t=200 t=300 t=400 t=500 t=600 t=700 t=800 t=900 t=1000
+GRAM TRM
+ t=0 t=1 t=2 t=4 t=6 t=7 t=9 t=11 t=12 t=14 t=16
+
+
+
+ t=0 t=1 t=2 t=4 t=6 t=7 t=9 t=11 t=12 t=14 t=16 Sample 1 Sample 2 Sample 3 Sample 4
+ (a) Generation Process (b) Samples
+ Figure 6: Visualization of the generation process and samples. (a) The generation process over recursion
+ steps. Each row corresponds to a different model. GRAM (bottom) progressively refines the generated image
+ through recursive latent updates, correcting initial errors. (b) Unconditional generated samples from each model.
+
+ Table 3: Ablation study on Sudoku-Extreme and N-Queens (8 × 8). We evaluate with 5 samples. For (a),
+ Components are added cumulatively to the Looped TF baseline (DS = deep supervision, HR = hierarchical
+ recursion, SG = stochastic guidance). For (b), both stochasticity and learned guidance are essential—removing
+ either significantly degrades performance.
+ (a) Architecture Ablation. (b) Mechanism Ablation.
+
+ Model variant Sudoku N-Queens Model variant Sudoku N-Queens
+ base (Looped TF) 61.25 71.30 GRAM (ours) 93.96 99.69
+ + DS + HR (=HRM, TRM) 55.00 / 87.40 80.70 / 72.90
+ w/o stochastic guidance 82.87 72.91
+ + SG 65.64 86.30
+ stochasticity only 94.88 50.27
+ + DS + SG 73.90 100.00
+ guide only 0.00 0.00
+ + DS + HR + SG (=GRAM) 93.96 99.69 w/ direct prediction 63.43 61.44
+ TRM w/ stochastic decoder 82.87 71.66
+ TRM w/ random init. 78.53 71.82
+
+
+ transitions support generative modeling beyond symbolic reasoning, with constraint satisfaction
+ emerging as a natural byproduct of the recursive generative process.
+ Inference-Time Scaling Transfers to Generation. Table 2 further shows that increasing recursion
+ at inference improves generation quality monotonically (IS 1.85 → 2.04, FID 84.08 → 73.34 from 8
+ to 256 steps), even though training uses only 16 steps. This indicates that the iterative-refinement
+ advantage of recursive models carries over into the generative regime. Figure 6 visualizes this process;
+ additional samples are in Section D.4.
+
+ 4.4 Ablation Study
+
+ We ablate key design choices of GRAM on Sudoku-Extreme and N-Queens (8 × 8) using 5 samples.
+ Table 3 summarizes the results.
+ Stochastic Guidance Provides Consistent Gains Across Architectures. Table 3a shows that
+ stochastic guidance (SG) improves performance regardless of the underlying architecture: SG alone
+ lifts the flat Looped TF baseline, and combining SG with deep supervision already reaches 100%
+ on N-Queens. The full GRAM (with hierarchical recursion on top) achieves the best results overall
+ (93.96% / 99.69%). While the effect of hierarchical recursion is task-dependent, SG yields consistent
+ gains in every configuration, supporting our design of stochastic guidance as the core extension
+ introduced by GRAM.
+ Both Stochasticity and Guidance Are Essential. We ablate each component by modifying the
+ learned distribution ϵt ∼ N (µθ , σθ2 I) in Equation (4). Removing guidance (N (0, σθ2 I)) maintains
+ Sudoku performance (94.88%), indicating that stochasticity alone can enable diverse reasoning paths.
+ However, this variant collapses on N-Queens (50.27%), where structured guidance is necessary to
+ navigate multi-solution spaces. Removing stochasticity (N (µθ , 0)) fails completely (0.0% on both
+ tasks), as deterministic guidance conditioned on the target leads to severe overfitting.
+ Naive Stochasticity Does Not Help TRM. We test two simple approaches to add stochasticity to
+ TRM: (1) stochastic decoding, which samples from the output distribution instead of argmax, and
+
+
+ 9
+ 8 9 648 59 1 648 59 1 648259317 648259317 648259317 648259317
+ 7 79 5 79 68 5 79 68 415 79 683415 79 683415 792683415 792683415
+ 9 6 289 3 6 1289 3 6 1289 3 6 71289 356 71289 356471289 356471289
+
+
+
+D3PM
+ 657 4 8 657 4 831657 4 83165734 983165734 983165734 983165734
+ 3 9 4 3 9 5 4 3 9 5 4 389 56 42389 561 423897561 423897561 423897561
+ 7 8 7 8 2 7 8 2 7 648 2 5 73648 2 5 73648 2 5173648 2 517364892
+ 5 5 13 548 139548 2 139548626 139548626 139548626 139548626
+ 59 593 593 1 8 593 1 8 27593 148 275936148 275936148 275936148
+ 2 8 2 3 8 7 2 3 8 7 2 53 8 47 2953 8 4712953 864712953 864712953
+ t=0 t=125 t=250 t=375 t=500 t=625 t=750 t=875 t=1000
+ 717348652 716348952 716348952 716348952 716348952 716348952 716348952 716348952
+ 483927379 493527861 493527861 493527861 493527861 493527861 493527861 493527861
+ 523971734 528996734 528169734 528169734 528169734 528169734 528169734 528169734
+GRAM
+
+
+ 864235917 864251379 864215379 864215379 864215379 864215379 864215379 864215379
+ 359841126 359784126 359874126 359874126 359874126 359874126 359874126 359874126
+ 172496538 172936548 172936548 172936548 172936548 172936548 172936548 172936548
+ 937812445 937812615 937482615 937482615 937482615 937482615 937482615 937482615
+ 645779283 645179283 645791283 645791283 645791283 645791283 645791283 645791283
+ 281623497 281663497 281653497 281653497 281653497 281653497 281653497 281653497
+ iter=0 iter=1 iter=2 iter=4 iter=6 iter=8 iter=10 iter=13 iter=16
+Figure 7: Qualitative examples of unconditional Sudoku generation by GRAM. Each board is independently
+sampled from an empty grid using the learned prior. GRAM produces diverse, complete boards satisfying all
+row, column, and box constraints, without an explicit constraint checker or search procedure. Incorrect digits are
+highlighted in red.
+
+
+(2) random initialization, which samples z0 from a Gaussian N (0, I) at each inference. Neither
+improves performance, demonstrating that GRAM’s gains stem from the variational framework rather
+than mere randomness.
+
+5 Conclusions and Limitations
+We introduced GRAM, a generative framework that transforms deterministic recursive architectures
+into probabilistic generative models capable of modeling both p(y | x) and p(x) via recursive amor-
+tized variational inference. For reasoning problems, introducing stochasticity into latent transitions
+enables diverse solution discovery and improved exploration compared to deterministic counterparts.
+Notably, we demonstrate GRAM can leverage width-based inference-time scaling as a complement
+to depth: by sampling multiple latent trajectories in parallel, bypassing the latency bottleneck of
+depth-only scaling. Our ablations further reveal that stochastic guidance is a general-purpose exten-
+sion that consistently improves any recursive architecture, and that the gains stem specifically from
+the variational framework — not from mere randomness, as naive stochastic alternatives applied to
+existing models yield no improvement.
+Beyond solution-seeking, GRAM also demonstrates potential as an unconditional generative model
+through recursion-based generation over inputs, with generation quality improving monotonically
+with recursive depth even beyond training-time steps. This suggests new directions for generative
+modeling via hierarchical recursion. Despite these strengths, the sequential nature of deep supervision
+limits training efficiency compared to Transformers, posing a significant barrier to scaling GRAM
+toward larger foundation models.
+
+
+
+
+ 10
+ Acknowledgment
+This research was supported by the Brain Pool Plus Program (No. 2021H1D3A2A03103645) and
+the GRDC (Global Research Development Center) Cooperative Hub Program (RS-2024-00436165)
+through the National Research Foundation of Korea (NRF) funded by the Ministry of Science and
+ICT (MSIT). This work was also supported by the Institute of Information & Communications
+Technology Planning & Evaluation (IITP) grant funded by the Korea government (MSIT) (No. RS-
+2024-00509279, Global AI Frontier Lab) and by the NYU-KAIST Global Innovation and Research
+Institute. Minsu Kim acknowledges funding from the KAIST Jang Young Sil Fellow Program. We
+are especially grateful to Gyubin and Seungju for their non-trivial contributions, and we thank the
+members of the MLML for valuable discussions and feedback throughout this project.
+
+Broader Impacts
+GRAM studies probabilistic recursive reasoning for structured reasoning and generation. By main-
+taining multiple latent trajectories, it may benefit tasks such as constraint satisfaction, and scientific
+problem solving, where uncertainty and multiple valid solutions are common. It also suggests a way
+to improve reasoning through inference-time computation rather than parameter scaling alone. Its
+generality also entails risks: plausible but invalid generations may be mistaken for verified solutions
+in downstream decision-making pipelines, and multi-sample inference may increase computational
+and energy costs at scale. Since our experiments focus on controlled benchmarks, deployment in
+real-world or high-stakes settings would require rigorous validation, uncertainty calibration, and
+domain-specific safeguards.
+
+References
+ [1] Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le,
+ Denny Zhou, et al. Chain-of-thought prompting elicits reasoning in large language models.
+ Advances in neural information processing systems, 35:24824–24837, 2022.
+
+ [2] Shunyu Yao, Dian Yu, Jeffrey Zhao, Izhak Shafran, Tom Griffiths, Yuan Cao, and Karthik
+ Narasimhan. Tree of thoughts: Deliberate problem solving with large language models. Ad-
+ vances in neural information processing systems, 36:11809–11822, 2023.
+
+ [3] Maciej Besta, Nils Blach, Ales Kubicek, Robert Gerstenberger, Michal Podstawski, Lukas
+ Gianinazzi, Joanna Gajda, Tomasz Lehmann, Hubert Niewiadomski, Piotr Nyczyk, et al. Graph
+ of thoughts: Solving elaborate problems with large language models. In Proceedings of the
+ AAAI conference on artificial intelligence, volume 38, pages 17682–17690, 2024.
+
+ [4] Shibo Hao, Sainbayar Sukhbaatar, DiJia Su, Xian Li, Zhiting Hu, Jason Weston, and Yuandong
+ Tian. Training large language models to reason in a continuous latent space. arXiv preprint
+ arXiv:2412.06769, 2024.
+
+ [5] Hanlin Zhu, Shibo Hao, Zhiting Hu, Jiantao Jiao, Stuart Russell, and Yuandong Tian. Reasoning
+ by superposition: A theoretical perspective on chain of continuous thought. arXiv preprint
+ arXiv:2505.12514, 2025.
+
+ [6] Halil Alperen Gozeten, M Emrullah Ildiz, Xuechen Zhang, Hrayr Harutyunyan, Ankit Singh
+ Rawat, and Samet Oymak. Continuous chain of thought enables parallel exploration and
+ reasoning. arXiv preprint arXiv:2505.23648, 2025.
+
+ [7] Liu Yang, Kangwook Lee, Robert Nowak, and Dimitris Papailiopoulos. Looped transformers
+ are better at learning learning algorithms. arXiv preprint arXiv:2311.12424, 2023.
+
+ [8] Guan Wang, Jin Li, Yuhao Sun, Xing Chen, Changling Liu, Yue Wu, Meng Lu, Sen Song, and
+ Yasin Abbasi Yadkori. Hierarchical reasoning model. arXiv preprint arXiv:2506.21734, 2025.
+
+ [9] Alexia Jolicoeur-Martineau. Less is more: Recursive reasoning with tiny networks. arXiv
+ preprint arXiv:2510.04871, 2025.
+
+
+ 11
+ [10] Mostafa Dehghani, Stephan Gouws, Oriol Vinyals, Jakob Uszkoreit, and Łukasz Kaiser. Uni-
+ versal transformers. arXiv preprint arXiv:1807.03819, 2018.
+[11] Daniel Kahneman. Thinking, fast and slow. Farrar, Straus and Giroux, 2011.
+[12] Yoshua Bengio. The consciousness prior. arXiv preprint arXiv:1709.08568, 2017.
+[13] François Chollet. On the measure of intelligence. arXiv preprint arXiv:1911.01547, 2019.
+[14] Francois Chollet, Mike Knoop, Gregory Kamradt, Bryan Landers, and Henry Pinkard. Arc-
+ agi-2: A new challenge for frontier ai reasoning systems. arXiv preprint arXiv:2505.11831,
+ 2025.
+[15] Y. Lecun, L. Bottou, Y. Bengio, and P. Haffner. Gradient-based learning applied to document
+ recognition. Proceedings of the IEEE, 86(11):2278–2324, 1998. doi: 10.1109/5.726791.
+[16] Ronald J Williams and Jing Peng. An efficient gradient-based algorithm for on-line training of
+ recurrent network trajectories. Neural computation, 2(4):490–501, 1990.
+[17] Corentin Tallec and Yann Ollivier. Unbiasing truncated backpropagation through time. arXiv
+ preprint arXiv:1705.08209, 2017.
+[18] Jonas Geiping, Sean McLeish, Neel Jain, John Kirchenbauer, Siddharth Singh, Brian R Bartold-
+ son, Bhavya Kailkhura, Abhinav Bhatele, and Tom Goldstein. Scaling up test-time compute
+ with latent reasoning: A recurrent depth approach. arXiv preprint arXiv:2502.05171, 2025.
+[19] Yufan Zhuang, Liyuan Liu, Chandan Singh, Jingbo Shang, and Jianfeng Gao. Text generation
+ beyond discrete token sampling. arXiv preprint arXiv:2505.14827, 2025.
+[20] Zhen Zhang, Xuehai He, Weixiang Yan, Ao Shen, Chenyang Zhao, Shuohang Wang, Yelong
+ Shen, and Xin Eric Wang. Soft thinking: Unlocking the reasoning potential of llms in continuous
+ concept space. arXiv preprint arXiv:2505.15778, 2025.
+[21] Natasha Butt, Ariel Kwiatkowski, Ismail Labiad, Julia Kempe, and Yann Ollivier. Soft tokens,
+ hard truths. arXiv preprint arXiv:2509.19170, 2025.
+[22] Zhenyi Shen, Hanqi Yan, Linhai Zhang, Zhanghao Hu, Yali Du, and Yulan He. Codi:
+ Compressing chain-of-thought into continuous space via self-distillation. arXiv preprint
+ arXiv:2502.21074, 2025.
+[23] Zhenrui Yue, Bowen Jin, Huimin Zeng, Honglei Zhuang, Zhen Qin, Jinsung Yoon, Lanyu
+ Shang, Jiawei Han, and Dong Wang. Hybrid latent reasoning via reinforcement learning. arXiv
+ preprint arXiv:2505.18454, 2025.
+[24] Sangmin Bae, Yujin Kim, Reza Bayat, Sungnyun Kim, Jiyoun Ha, Tal Schuster, Adam Fisch,
+ Hrayr Harutyunyan, Ziwei Ji, Aaron Courville, et al. Mixture-of-recursions: Learning dynamic
+ recursive depths for adaptive token-level computation. arXiv preprint arXiv:2507.10524, 2025.
+[25] Amirkeivan Mohtashami, Matteo Pagliardini, and Martin Jaggi. Cotformer: A chain-of-
+ thought driven architecture with budget-adaptive computation cost at inference. arXiv preprint
+ arXiv:2310.10845, 2023.
+[26] Sangmin Bae, Adam Fisch, Hrayr Harutyunyan, Ziwei Ji, Seungyeon Kim, and Tal Schuster.
+ Relaxed recursive transformers: Effective parameter sharing with layer-wise lora. arXiv preprint
+ arXiv:2410.20672, 2024.
+[27] Jeffrey L Elman. Finding structure in time. Cognitive science, 14(2):179–211, 1990.
+[28] Sepp Hochreiter and Jürgen Schmidhuber. Long short-term memory. Neural computation, 9(8):
+ 1735–1780, 1997.
+[29] Kyunghyun Cho, Bart Van Merriënboer, Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares,
+ Holger Schwenk, and Yoshua Bengio. Learning phrase representations using rnn encoder-
+ decoder for statistical machine translation. arXiv preprint arXiv:1406.1078, 2014.
+
+
+ 12
+ [30] Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, and Radu
+ Soricut. Albert: A lite bert for self-supervised learning of language representations. arXiv
+ preprint arXiv:1909.11942, 2019.
+[31] Maha Elbayad, Jiatao Gu, Edouard Grave, and Michael Auli. Depth-adaptive transformer. arXiv
+ preprint arXiv:1910.10073, 2019.
+[32] Alex Graves. Adaptive computation time for recurrent neural networks. arXiv preprint
+ arXiv:1603.08983, 2016.
+[33] Junyoung Chung, Kyle Kastner, Laurent Dinh, Kratarth Goel, Aaron Courville, and Yoshua
+ Bengio. A recurrent latent variable model for sequential data, 2016. URL https://arxiv.
+ org/abs/1506.02216.
+[34] Marco Fraccaro, Søren Kaae Sønderby, Ulrich Paquet, and Ole Winther. Sequential neural
+ models with stochastic layers, 2016. URL https://arxiv.org/abs/1605.07571.
+[35] Rahul G. Krishnan, Uri Shalit, and David Sontag. Deep kalman filters, 2015. URL https:
+ //arxiv.org/abs/1511.05121.
+[36] Danijar Hafner, Timothy Lillicrap, Ian Fischer, Ruben Villegas, David Ha, Honglak Lee, and
+ James Davidson. Learning latent dynamics for planning from pixels, 2019. URL https:
+ //arxiv.org/abs/1811.04551.
+[37] Danijar Hafner, Timothy Lillicrap, Mohammad Norouzi, and Jimmy Ba. Mastering atari with
+ discrete world models. arXiv preprint arXiv:2010.02193, 2020.
+[38] Danijar Hafner, Jurgis Pasukonis, Jimmy Ba, and Timothy Lillicrap. Mastering diverse domains
+ through world models. arXiv preprint arXiv:2301.04104, 2023.
+[39] Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
+ Łukasz Kaiser, and Illia Polosukhin. Attention is all you need. Advances in neural information
+ processing systems, 30, 2017.
+[40] ARC-Prize-Foundation. ARC-AGI benchmarking: Leaderboard and dataset for the ARC-AGI
+ benchmark. https://arcprize.org/leaderboard, 2026. Accessed: 2026-1-22.
+[41] Subham Sahoo, Marianne Arriola, Yair Schiff, Aaron Gokaslan, Edgar Marroquin, Justin Chiu,
+ Alexander Rush, and Volodymyr Kuleshov. Simple and effective masked diffusion language
+ models. Advances in Neural Information Processing Systems, 37:130136–130184, 2024.
+[42] Shane Barratt and Rishi Sharma. A note on the inception score. arXiv preprint arXiv:1801.01973,
+ 2018.
+[43] Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, and Sepp Hochreiter.
+ Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in
+ neural information processing systems, 30, 2017.
+[44] Jacob Austin, Daniel D Johnson, Jonathan Ho, Daniel Tarlow, and Rianne Van Den Berg.
+ Structured denoising diffusion models in discrete state-spaces. Advances in neural information
+ processing systems, 34:17981–17993, 2021.
+[45] Diederik P Kingma and Max Welling. Auto-encoding variational bayes. arXiv preprint
+ arXiv:1312.6114, 2013.
+[46] Jianlin Su, Murtadha Ahmed, Yu Lu, Shengfeng Pan, Wen Bo, and Yunfeng Liu. Roformer:
+ Enhanced transformer with rotary position embedding. Neurocomputing, 568:127063, 2024.
+[47] Noam Shazeer. Glu variants improve transformer. arXiv preprint arXiv:2002.05202, 2020.
+[48] Simo Ryu. Minimal implementation of a d3pm (structured denoising diffusion models in
+ discrete state-spaces), in pytorch. https://github.com/cloneofsimo/d3pm, 2024.
+[49] William Peebles and Saining Xie. Scalable diffusion models with transformers. In Proceedings
+ of the IEEE/CVF international conference on computer vision, pages 4195–4205, 2023.
+
+
+ 13
+ [50] Yann LeCun, Léon Bottou, Yoshua Bengio, and Patrick Haffner. Gradient-based learning
+ applied to document recognition. Proceedings of the IEEE, 86(11):2278–2324, 2002.
+[51] Alex Krizhevsky, Ilya Sutskever, and Geoffrey E Hinton. Imagenet classification with deep
+ convolutional neural networks. Advances in neural information processing systems, 25, 2012.
+[52] Stefan Elfwing, Eiji Uchibe, and Kenji Doya. Sigmoid-weighted linear units for neural network
+ function approximation in reinforcement learning. Neural networks, 107:3–11, 2018.
+[53] Yuxin Wu and Kaiming He. Group normalization. In Proceedings of the European conference
+ on computer vision (ECCV), pages 3–19, 2018.
+[54] Ilya Loshchilov and Frank Hutter. Decoupled weight decay regularization. arXiv preprint
+ arXiv:1711.05101, 2017.
+[55] Andrew Brock, Jeff Donahue, and Karen Simonyan. Large scale gan training for high fidelity
+ natural image synthesis. arXiv preprint arXiv:1809.11096, 2018.
+[56] Yang Song and Stefano Ermon. Improved techniques for training score-based generative models.
+ Advances in neural information processing systems, 33:12438–12448, 2020.
+[57] P. Erdős and A. Rényi. On the strength of connectedness of a random graph. Acta Mathematica
+ Academiae Scientiarum Hungarica, 12(1):261–267, Mar 1964. ISSN 1588-2632. doi: 10.1007/
+ BF02066689. URL https://doi.org/10.1007/BF02066689.
+[58] Henrique Lemos, Marcelo Prates, Pedro Avelar, and Luis Lamb. Graph colouring meets deep
+ learning: Effective graph neural network models for combinatorial problems. In 2019 IEEE
+ 31st International Conference on Tools with Artificial Intelligence (ICTAI), pages 879–885.
+ IEEE, 2019.
+[59] Alexey L. Pomerantsev. Principal component analysis (pca). Encyclopedia of Autism Spectrum
+ Disorders, 2014. URL https://api.semanticscholar.org/CorpusID:2534141.
+[60] Jon Louis Bentley. Multidimensional binary search trees used for associative searching. Com-
+ mun. ACM, 18:509–517, 1975. URL https://api.semanticscholar.org/CorpusID:
+ 13091446.
+
+
+
+
+ 14
+ A Additional Method Details
+A.1 Adaptive Computation Time
+
+GRAM optionally adopts adaptive computation time (ACT) [8–10] at inference, allowing each
+trajectory to terminate at a learned halting depth rather than running for a fixed number of supervision
+steps. We follow the Q-learning formulation introduced by HRM [8] and adopted in TRM [9].
+Halt head. The decoder includes an auxiliary head qψ : RD → R2 that maps the high-level state h
+to two scalar values, qψ (h) = (q halt , q continue ). These are interpreted as estimated Q-values for the
+binary action of halting or continuing computation at the current supervision step.
+Training. The halt head is trained jointly with the main objective via a temporal-difference loss.
+ (n)
+After computing the latent state zT at the end of supervision step n, we form Q-learning targets:
+
+ • q̂nhalt = 1[ŷ (n) = y], indicating whether decoding the current state would yield a correct
+ prediction.
+ 
+ • q̂ncontinue = max qn+1halt continue
+ , qn+1 , the bootstrapped value of running one more supervision
+ step.
+
+The halt head is trained by regression to these targets:
+ Nsup h
+ X 2 2 i
+ LACT = qnhalt − q̂nhalt + qncontinue − q̂ncontinue . (15)
+ n=1
+
+This auxiliary loss is added to the main training objective and contributes only through the halt head;
+it does not propagate gradients into the recursive core.
+Inference. At inference, computation proceeds one supervision step at a time. After each step
+n, we evaluate qψ (h(n) ) and halt if qnhalt > qncontinue , returning ŷ (n) as the prediction. Otherwise,
+ max
+computation continues to the next supervision step, up to a maximum budget of Nsup steps. Different
+trajectories sampled in parallel may therefore terminate at different depths, complementing the
+parallel-sampling scheme described in Section 2.3. In practice, we found that using only q halt
+(halting when σ(q halt ) > 0.5, without the continue branch) performs comparably while simplifying
+implementation; our released code uses this variant.
+
+A.2 Latent Process Reward Model (LPRM).
+
+To rank or select among sampled candidates, we train a value head vψ (zt ) to predict the expected
+accuracy of the final output, conditioned on the current latent state zt . The LPRM is trained jointly
+with the main objective via a regression loss:
+ T
+ X
+ LLPRM = (vψ (zt ) − r)2 , (16)
+ t=1
+
+where r ∈ [0, 1] denotes the accuracy of the final prediction for a given trajectory.
+
+A.3 Empirical Validation of the Surrogate Objective
+
+We further analyze the approximation introduced by the surrogate training objective LGRAM used in
+Section 2.2, both qualitatively and empirically.
+Truncation as a gradient approximation. We frame LGRAM as a gradient approximation rather than
+a separate variational objective. The full trajectory-level ELBO (Equation (13)) involves a sum of KL
+terms across all TTotal transitions, and computing its exact gradient requires backpropagation through
+the entire trajectory. To enable training with constant memory, we propagate gradients only through
+the final transition of each supervision step. This is a standard practice in recurrent latent variable
+models with long computation chains: ELBOs over truncated sequences are used, for example, in
+VRNN [33] and SRNN [34], while truncated latent imagination is used in Dreamer-family world
+models [37, 38]. Trading a small gradient bias for training stability via local truncation is therefore
+
+
+ 15
+ well-precedented; what is specific to GRAM is applying this approximation at the level of recursive
+reasoning trajectories rather than temporal sequences.
+Empirical validation. To verify that optimizing LGRAM effectively drives improvement in the full
+variational bound, we compute both quantities on the validation set throughout training. The full
+ELBO LELBO is evaluated as in Equation (13), summing the reconstruction term and KL contributions
+ (n)
+across all TTotal transitions; the surrogate objective is evaluated as the average of LGRAM over the
+Nsup supervision steps. Figure 8 reports the results on Sudoku-Extreme and N-Queens 8 × 8.
+
+ Sudoku-Extreme N-Queens 8 × 8
+ GRAM (training objective) GRAM (training objective)
+ 0.6 ELBO (full) 103 ELBO (full)
+
+
+ 0.5 102
+ ELBO
+
+
+
+
+ ELBO
+ 0.4 101
+
+ 100
+ 0.3
+ 10 1
+ 0.2
+ 10000 20000 30000 40000 50000 60000 0 10000 20000 30000 40000
+ Training Step Training Step
+Figure 8: Full ELBO LELBO and surrogate objective LGRAM throughout training (plotted as −ELBO,
+smaller is better). On both Sudoku-Extreme (left) and N-Queens 8 × 8 (right), both quantities decrease
+monotonically over training, indicating that gradient updates of LGRAM consistently improve the full variational
+bound. The two curves do not coincide because LELBO sums KL contributions across all TTotal transitions
+while LGRAM evaluates only the final-step KL of each supervision step; their gap reflects the cumulative KL
+across earlier transitions, not a failure of optimization. The N-Queens plot uses a log scale on the y-axis due to
+the large dynamic range.
+
+Both LELBO and LGRAM improve monotonically throughout training on both tasks. This indicates
+that, despite the truncation, gradient updates of LGRAM effectively drive improvement in the full
+variational bound. Since LELBO also serves as an indirect estimate of the negative log-likelihood,
+its consistent improvement provides evidence that GRAM optimizes a well-defined data likelihood,
+even though training relies on the surrogate.
+The gap between the two curves in Figure 8 reflects the structural difference between the two
+quantities — LELBO accumulates KL terms across all transitions while LGRAM evaluates only the
+final-step KL of each supervision step — rather than an optimization failure. This gap is consistent
+with LGRAM being a biased but useful surrogate for LELBO .
+
+B Training and Architecture Details
+B.1 Architecture Details
+
+GRAM consists of three components: Encoder, Recursive Core, and Decoder.
+Encoder. Input tokens are mapped to embeddings via a token embedding layer, optionally con-
+catenated with puzzle embeddings (for ARC√ [13, 14]), and combined with positional encodings
+(RoPE) [46]. The embeddings are scaled by D and prepended with 16 puzzle embedding tokens [8].
+Recursive Core. The core maintains two latent states: h (high-level) and l (low-level). For each outer
+step, the low-level state is refined K times via l ← fL (l, h + ex ), injecting the input embedding at
+each iteration. The high-level state is then updated via h ← fH (h, l). Both fL and fH share the same
+architecture: a stack of attention and SwiGLU [47] MLP layers. In addition, as an exception, we use
+[SwiGLU + SwiGLU] network for the Recursive Core module instead of [Attention + SwiGLU] for
+Sudoku tasks, following [9]. For initialization of z0 = (h0 , l0 ), we sample once from the standard
+Gaussian distribution N (0, I), then save the value within the network checkpoint and load it again,
+meaning the initialized z0 has a fixed value.
+Decoder. The decoder extracts content tokens from h (excluding puzzle embedding positions)
+and maps them to logits via a SwiGLU MLP head. An auxiliary head predicts halt decisions and
+correctness values from the first token of h.
+
+
+ 16
+ Table 4: Architecture components.
+ Component Module Description
+ Encoder
+ Token Embedding vocab → D
+ Puzzle Embedding 16 tokens (optional, for ARC)
+ Position Encoding RoPE or learned
+ Recursive Core
+ f L , fH [Attention + SwiGLU] × 2 layers
+ Iterations K low-level, T high-level steps
+ µθ , σ θ , µ ϕ , σ ϕ SwiGLU MLP for each parameter
+ Decoder
+ LM Head Linear(D → vocab)
+ Q Head Linear(D → 2) for halt
+ V Head Linear(D → 1) for value
+
+
+
+Encoder and Decoder for Image Patches. In the MNIST [15] image generation task, we first
+construct a binarized dataset by normalizing the original discrete pixel values (0 ∼ 255) to the
+continuous range [0, 1] and applying a threshold at 0.5. For the network architecture, we employ a
+convolutional patch encoder, following [48, 49].
+The encoding process proceeds in three stages. First, the discrete input tokens x ∈ {0, 1} are
+normalized to the range [−1, 1]. Second, to capture local spatial dependencies before patchification,
+the normalized image passes through a shallow convolutional encoder. This encoder consists of
+two stacked blocks, where each block comprises a 2D convolution [50, 51] with a 5 × 5 kernel and
+padding 2, a SiLU non-linearity [52], and Group Normalization (GN) [53]. Finally, the resulting
+feature map is divided into non-overlapping patches of size P × P and linearly projected to match
+the model’s hidden dimension D. The detailed architectural specifications and dimension transitions
+are summarized in Table 5.
+
+Table 5: Detailed architecture of the Image Patch Encoder for MNIST. H, W denote image resolution, C input
+channels, P patch size, Np the number of patches, and D the hidden dimension.
+ Stage Layer / Operation Output Dim.
+ Input Tokens (B, C, H, W )
+ 1. Norm.
+ Linear Scaling [−1, 1] (B, C, H, W )
+ Conv2d 5 × 5 (p = 2)
+ (B, D/2, H, W )
+ SiLU → GN(32)
+ 2. Conv
+ Conv2d 5 × 5 (p = 2)
+ (B, D/2, H, W )
+ SiLU → GN(32)
+ Flatten Patches (B, Np , P 2 · D
+ 2)
+ 3. Patch
+ Linear Projection (B, Np , D)
+
+
+Hyperparameters. Following Wang et al. [8], Jolicoeur-Martineau [9], both the input and output are
+represented as sequences of shape [B, L], where B denotes the batch size and L the context length.
+Each input sequence includes 16 fixed puzzle embedding tokens. The latent states ht and lt , as well
+as the decoder output, have shape [B, L, D], with embedding dimension D. The Transformer [39]
+backbone uses embedding dimension D = 512, attention heads Nhead =8 , and FFN hidden dimension
+Dh =512. Within a recursion step, meaning a latent transition zt → zt+1 , we use low-level (inner)
+steps K = 6 for Sudoku [8] and K = 4 for all other tasks, with high-level (outer) steps T = 3.
+
+B.2 Training Details
+
+Task Configuration. All tasks represent inputs and outputs as discrete token sequences (Summarized
+in Table 6).
+
+
+ 17
+ • For Sudoku [8], the 9×9 grid is flattened row-by-row into 81 tokens with vocabulary size 11
+ (0=pad, 1=blank, 2–10=digits).
+ • For ARC-AGI [13, 14], variable-size grids are padded to a fixed 30×30 canvas with EOS
+ markers, yielding 900 tokens and vocabulary size 12 (0=pad, 1=eos, 2–11=colors); task-
+ specific puzzle embeddings are prepended to distinguish different ARC tasks.
+ • N-Queens flattens an N × N board row-by-row into N 2 tokens with vocabulary size 3
+ (0=pad, 1=empty, 2=queen).
+ • Graph Coloring encodes the strict upper triangle of the adjacency matrix as n(n−1)/2 tokens,
+ using 0=PAD, 1=no-edge, and 2=edge for inputs and 3 + color_id for output colors.
+ • For image generation on MNIST [15], images are quantized and processed via CNN-based
+ patchification [50, 49], with the encoder applying patchify and the decoder unpatchify. Then,
+ patched input forms 14 × 14 flattened sequence tokens with vocabulary size 3 (0=pad,
+ 1=black, 2=white).
+
+
+ Table 6: Task-specific configurations.
+ Task Seq. Len Vocab Puzzle Emb Encoding
+ Sudoku 81 11 ✗ 9×9 grid, row-major
+ ARC-AGI 900 12 ✓ 30×30 padded canvas
+ N-Queens N2 3 ✗ N × N board
+ n(n−1)
+ Graph Coloring 2
+ 6 ✗ Strict adjacency upper triangle
+ MNIST 196 3 ✗ 14×14 patches
+
+Training Details. We train all models using AdamW [54] with learning rate 10−4 , weight decay
+1.0, and gradient clipping at 1.0. The global batch size is 768. For stability, we apply exponential
+moving average (EMA) with decay 0.9999, following Brock et al. [55] and Song and Ermon [56].
+To prevent posterior collapse, we use a KL balance [37, 38] coefficient of 0.8. The number of deep
+supervision steps is Nsup = 16 for all tasks. The KL coefficient β is set to 0.1 (Sudoku), 0.04/0.1
+(ARC-AGI-1/2), 0.07/0.045 (N-Queens 8 × 8/10 × 10), 0.5/0.45 (Graph Coloring with 8/10 nodes),
+and 0.07 (MNIST). Task-specific training configurations are summarized in Table 7.
+
+ Table 7: Training configurations on NVIDIA RTX 4090 GPUs.
+ Task Epochs GPUs Time
+ Sudoku 50K 8 2h
+ ARC-AGI 200K 8 5 days
+ N-Queens (8×8) 3K 8 1h
+ N-Queens (10×10) 1K 8 3h
+ Graph Coloring (8 nodes) 5K 8 1.5h
+ Graph Coloring (10 nodes) 5K 8 6h
+ MNIST 1.8K 8 16h
+
+
+
+C Additional Details of Experiment Setup
+C.1 Challenging Puzzle Tasks
+
+C.1.1 Looped TF on ARC-AGI
+We report Looped Transformer [7] results on Sudoku-Extreme but omit them on ARC-AGI due to
+prohibitive training cost. Under the same setup used for our other recursive baselines (200K epochs,
+batch size 768, on 8× NVIDIA RTX Pro 6000 GPUs), training Looped TF on Sudoku-Extreme
+already takes 19 hours, and extrapolating to ARC-AGI — which uses substantially longer sequences
+and a larger training set — suggests approximately 97 days (≈ 776 GPU-days) for a full training run.
+This gap stems from two compounding factors. First, Looped TF lacks deep supervision: HRM,
+TRM, and GRAM perform Nsup gradient updates per trajectory (one per segment), whereas Looped
+TF performs only one update at the end of the full trajectory, slowing convergence. Second, Looped
+
+
+ 18
+ TF lacks adaptive halting such as ACT [8–10], so every input must be processed for the maximum
+recursion depth, increasing per-example sequential compute. Both inefficiencies compound at
+ARC-AGI scale, making a full Looped TF training run impractical.
+
+C.2 Multi-solution Puzzle Tasks
+
+C.2.1 N-Queens Problem
+
+ Input Solution 1 Solution 2 Solution 3
+
+
+
+
+Figure 9: Example of an 8 × 8 N-Queens puzzle instance. In this example, 5 queens are removed from the
+full board, leaving 3 queens. The model must find the positions of the remaining queens. This configuration
+admits exactly 3 valid solutions.
+
+Data Generation Details. The N-Queens problem requires placing N queens on an N × N
+chessboard such that no two queens attack each other—meaning no queens share the same row,
+column, or diagonal. Figure 9 illustrates an example where 5 queens are removed from an 8 × 8
+solution, resulting in a puzzle with 3 distinct valid completions.
+To construct the dataset, we first generated all valid complete N-Queens solutions for N = 8 and
+N = 10. We then created puzzle instances by removing a specific number of queens, treating the
+remaining partial configuration as the input and the original complete board as the target label. To
+generate instances yielding diverse valid completions, we removed k ∈ {5, 6, 7} queens for the 8 × 8
+setting and k ∈ {7, 8, 9} queens for the 10 × 10 setting. The distribution of solution counts for our
+generated dataset is shown in Figure 10.
+For evaluation, we employed an 85:15 train-test split. Crucially, to prevent data leakage and ensure
+the model learns to reason rather than memorize, the split was performed based on unique input
+configurations. This guarantees that no input pattern in the test set appears in the training set. Inputs
+are flattened into discrete 1D sequences x ∈ {0, 1, 2}L , where L = N 2 , along with zero-padded
+puzzle embedding tokens. Vocabulary mapping follows: padding (0), empty (1), and queen (2).
+
+ 8x8 N-Queens 10x10 N-Queens
+ 3000 20000
+ 15000
+ Counts
+
+
+
+
+ Counts
+
+
+
+
+ 2000
+ 10000
+ 1000
+ 5000
+ 0 0
+ 3 6 9 12 15 18 0 20 40 60 80
+ Number of solutions Number of solutions
+Figure 10: Distribution of the number of valid solutions for generated N-Queens instances. The dataset
+covers a wide range of solution counts, testing the model’s ability to recover multiple valid outputs.
+
+
+C.2.2 Graph Coloring Problem
+Data Generation Details. The Graph Coloring problem requires assigning one of k colors to each
+node in a graph such that no two adjacent nodes share the same color. We consider graphs with
+N ∈ {8, 10} nodes and use k = 3 colors. Figure 11 illustrates an example instance with N = 8
+nodes and k = 3 colors.
+Graphs are generated using the Erdős–Rényi random graph model [57], following the generation
+pipeline from GNN-GCP [58]. Specifically, for each instance, edges are sampled independently
+
+
+ 19
+ with a fixed probability p, producing a symmetric adjacency matrix. We retain only graphs that are
+3-colorable.
+For each graph, we enumerate all valid 3-colorings and retain only canonical forms to eliminate
+redundant solutions under color permutation (e.g., swapping red and blue). This yields a set of
+structurally distinct solutions per input. The distribution of solution counts is shown in Figure 12.
+The final dataset consists of 7,002 training and 255 test instances for N = 8, and 13,465 training and
+192 test instances for N = 10.
+Input and Output Representation. The input graph is represented by extracting the upper triangular
+portion of the adjacency matrix (excluding the diagonal) and flattening it into a 1D sequence. The
+output is a sequence of length N , where each position encodes the assigned color for the corresponding
+node. Vocabulary mapping is as follows: PAD (0), no edge (1), edge (2), and colors (3, 4, 5) for red,
+blue, and green respectively.
+
+ Input Solution 1 Solution 2 Solution 3 Solution 4
+
+
+
+
+ Figure 11: Graph Coloring Example
+
+
+ Vertex 8 Graph Coloring Vertex 10 Graph Coloring
+ 2000
+ 1500
+ 1500
+ Counts
+
+
+
+
+ Counts
+
+
+
+
+ 1000
+ 1000
+ 500 500
+ 0 0
+ 3 6 9 12 15 18 0 20 40 60 80
+ Number of solutions Number of solutions
+Figure 12: Distribution of the number of valid solutions for generated graph coloring instances. The
+dataset covers a wide range of solution counts, testing the model’s ability to recover multiple valid outputs.
+
+
+D Additional Experiment Results
+D.1 Additional Results on Challenging Puzzle Benchmarks
+
+Table 8 reports test accuracy on three challenging puzzle benchmarks. Here we provide additional
+observations complementing the main text.
+
+GRAM Advances the Recursive-Reasoning Line. Across all three benchmarks, GRAM consis-
+tently outperforms prior recursive baselines (Looped TF, HRM, TRM) while using fewer parameters
+than HRM (10M vs. 27M). The complete failure of direct prediction on Sudoku and ARC-AGI-2 (0%
+in both cases) further confirms that recursive computation is essential for these tasks — single-pass
+models, regardless of capacity, cannot solve them. Together, these results indicate that GRAM’s gains
+arise from how recursive computation is organized (probabilistic, multi-trajectory) rather than from
+increased model capacity.
+
+Sudoku-Extreme Resists Parameter Scaling. All tested large reasoning models (LRMs), including
+Deepseek-R1 (671B), score 0% on Sudoku-Extreme. This suggests that pretrained capacity alone
+does not transfer to constraint-propagation reasoning, and that benchmarks like Sudoku-Extreme
+probe a fundamentally different axis from those captured by general-purpose LRMs. On ARC-AGI,
+more recent LRMs such as Gemini 3 Pro (75.0% on ARC-1, 31.1% on ARC-2) remain substantially
+
+
+ 20
+ Table 8: Test accuracy (%) on Challenging Puzzle Benchmarks. GRAM significantly outperforms prior
+recursive models. All recursive model scores were obtained at 16 supervision steps.
+ Method #Params Sudoku ARC-1 ARC-2
+ Large Reasoning Models
+ Deepseek-R1 671B 0.0 15.8 1.3
+ Claude 3.7 16k N/A 0.0 28.6 0.7
+ o3-mini-high N/A 0.0 34.5 3.0
+ GPT 5.2 (low) N/A – 55.7 9.7
+ Grok-4-thinking 1.7T – 66.7 16.0
+ Gemini 3 Pro N/A – 75.0 31.1
+ Recursive Models
+ Direct Pred 27M 0.0 21.0 0.0
+ Looped TF 7M 61.3 - -
+ HRM 27M 55.0 40.3 5.0
+ TRM 7M 87.4 44.6 7.8
+ GRAM (Ours) 10M 97.0 52.0 11.1
+ Human Results
+ Avg. Human – – 60.2 –
+ Best Human – – 98.0 100.0
+
+
+
+ahead of all recursive models, highlighting that abstract few-shot reasoning still benefits from scale;
+we view these numbers as benchmark-difficulty reference points rather than controlled baselines.
+
+D.2 Scales with Parallel Sampling on ARC-AGI Challenge
+
+To investigate the effect of GRAM’s sampling on the ARC-AGI-1 benchmark, we measured perfor-
+mance without relying on external data augmentation. Typically, TRM achieves its reported accuracy
+by generating 1,000 augmentations for a single problem and performing majority voting over the
+results. Because this augmentation process itself creates a wide variety of samples, we isolated
+the specific effect of generative sampling by performing inference solely on the original problem
+instance and conducting majority voting over multiple sampled paths. For a fair comparison, TRM
+was evaluated using the same hyperparameters as GRAM, including the number of epochs, learning
+rate, and the number of layers.
+As illustrated in Figure 13, removing augmentations causes a performance decline for both GRAM
+and TRM compared to the values reported in Table 8. However, in the case of GRAM, we observe
+that accuracy consistently improves as the model generates more parallel samples. This trend mirrors
+observations in Section 4.2, suggesting that increased inference-time compute through width scaling
+allows the model to explore more plausible reasoning trajectories and recover from initial errors,
+eventually leading to more robust solution discovery.
+
+Interaction between Augmentation and Sampling. A natural question arises: why not combine
+higher levels of augmentation with extensive parallel sampling? To address this, we conducted an
+ablation study examining the interaction between data augmentation and inference-time sampling.
+Figure 14 presents the results across varying augmentation levels (Aug=0 to Aug=50). Without
+augmentation (Aug=0), increasing the number of samples yields consistent accuracy improvements,
+demonstrating that stochastic sampling effectively explores diverse reasoning trajectories. However,
+as the level of augmentation increases, the marginal benefit of additional sampling diminishes
+substantially. At Aug=50, performance saturates regardless of sample count—accuracy remains
+nearly constant whether we draw 1 or 50 samples. This observation reveals that augmentation
+and sampling serve complementary rather than additive roles: both mechanisms enable the model
+to capture solution diversity, but through different means. When training data is limited, parallel
+sampling compensates by exploring varied reasoning paths at inference time. When training data
+is abundant through augmentation, the model has already internalized sufficient diversity during
+training, rendering additional inference-time exploration redundant. Consequently, scaling sampling
+beyond augmentation provides diminishing returns, justifying our experimental design choice to
+evaluate these two scaling axes separately.
+
+
+ 21
+ 0.450
+ 0.425
+ 0.400
+ 0.375
+
+
+
+
+ Accuracy
+ 0.350
+ 0.325
+ 0.300 TRM
+ GRAM ( =0.1)
+ 0.275 GRAM ( =0.05)
+ GRAM ( =0.04)
+ 0.250 1 2 5 10 20 50 100 250 500
+ Number of Samples (N)
+Figure 13: Effect of sampling on ARC-AGI-1 without data augmentation. To isolate the internal sampling
+effect, both models are evaluated on original problem instances without 1,000 augmentations. While removing
+augmentations causes an initial performance drop, GRAM exhibits robust scaling through generative sampling
+as the number of parallel samples N increases, outperforming the TRM baseline.
+
+
+ 0.500
+ 0.475
+ 0.450
+ Accuracy
+
+
+
+
+ 0.425
+ 0.400
+ 0.375 Aug=0
+ Aug=5
+ 0.350 Aug=10
+ Aug=50
+ 0.325
+ 1 2 5 10 20 50 100 250 500
+ Number of Samples (N)
+Figure 14: Effect of augmentation on sampling efficiency. With limited augmentation (Aug=0), parallel
+sampling provides consistent gains. As augmentation increases, sampling benefits diminish—at Aug= 50,
+performance saturates regardless of sample count, suggesting augmentation and sampling serve complementary
+roles in capturing solution diversity.
+
+
+
+D.3 Solution Coverage Analysis
+
+We analyze the ability of GRAM to capture the diversity of the solution space compared to determin-
+istic baselines. Figure 15 presents the solution coverage on 8 × 8 and 10 × 10 N-Queens tasks with
+respect to the total number of valid ground-truth solutions.
+As shown in Figure 15, deterministic recursive models (HRM and TRM) exhibit a sharp decline in
+coverage as the number of possible solutions increases. Since these models are constrained to a single
+fixed reasoning trajectory, they structurally fail to explore alternative paths, resulting in severe mode
+collapse in multi-solution landscapes.
+In contrast, GRAM effectively leverages its generative latent transitions to cover a broader range
+of solutions. As the number of parallel samples N increases (from 1 to 20), the solution coverage
+improves monotonically across both 8 × 8 and 10 × 10 settings. This empirical evidence confirms
+that GRAM’s stochastic guidance mechanism is essential for navigating complex problem spaces
+where multiple valid reasoning paths exist.
+
+
+D.4 Additional Generated Image Samples
+
+In this section, we provide further qualitative results demonstrating GRAM’s capability in uncon-
+ditional image generation. Figure 16 presents a diverse set of samples generated on the binarized
+MNIST dataset, visualized across the recursive inference steps t = 0 to t = 16.
+
+
+ 22
+ 1.0 HRM 1.0 HRM
+ TRM TRM
+ GRAM (N=1) GRAM (N=1)
+ GRAM (N=5) GRAM (N=5)
+ 0.8 GRAM (N=10) 0.8 GRAM (N=10)
+ GRAM (N=20) GRAM (N=20)
+
+
+ 0.6 0.6
+Coverage
+
+
+
+
+ Coverage
+ 0.4 0.4
+
+
+ 0.2 0.2
+
+
+ 0.0 0.0
+ 2 4 6 8 10 12 14 16 18 0 15 30 45 60 75 90
+ Number of Solutions Number of Solutions
+ (a) N-Queens 8 × 8 (b) N-Queens 10 × 10
+
+ Figure 15: Solution coverage analysis on N-Queens (8 × 8 and 10 × 10) with respect to the number of
+ ground-truth solutions. While deterministic baselines (HRM, TRM) suffer from mode collapse as the solution
+ space grows, GRAM demonstrates monotonic improvement in coverage as the number of parallel samples N
+ increases.
+
+
+
+ As observed in the main text, GRAM exhibits a distinct progressive refinement behavior. Starting
+ from a black initialization, the model iteratively adds details and sharpens the structure of the digit.
+ A particularly compelling property of this process is the model’s ability to recover from initially
+ ambiguous or incorrect formations.
+ For instance, in the second row (generating the digit ’2’) and the last row (generating the digit ’1’),
+ the early predictions at t = 1 and t = 2 manifest as disjointed artifacts or incorrect shapes. However,
+ as the recursion proceeds, GRAM effectively leverages its feedback loop to correct these initial errors,
+ resolving the ambiguity and converging to a coherent, high-quality digit by t = 16.
+
+
+
+
+ t=0 t=1 t=2 t=4 t=6 t=7 t=9 t=11 t=12 t=14 t=16
+ Figure 16: Additional generated samples from GRAM. We provide 8 additional samples generated uncondi-
+ tionally on binarized MNIST using GRAM. Each row represents a single generated sample, visualized across its
+ recursive refinement process.
+
+
+
+
+ 23
+ D.5 Additional Experiment Results on Unconditional Sudoku Generation
+
+In this section, we provide additional details on unconditional Sudoku generation. Unlike the
+conditional Sudoku-solving setting, where the input board contains given clues, the model receives an
+entirely blank board and samples a complete 9 × 9 Sudoku board from its learned prior. We evaluate
+each generated board using the standard Sudoku validity criterion: every row, column, and 3 × 3 box
+must contain the digits 1 through 9 exactly once. We report the validity rate over 100K generated
+boards. To check whether high validity comes from repeatedly producing the same board, we also
+compute the fraction of unique boards among valid samples.
+For GRAM, we construct the unconditional training set from Sudoku-Extreme [8], the Sudoku
+benchmark used by HRM and TRM. We sample 50K complete solutions from the original training
+split, discard the clue patterns, and use an all-blank board as input with the complete solution as
+the target. No data augmentation is used. We train GRAM on this derived 50K-solution set for 200
+epochs with learning rate 10−4 , EMA decay 0.999, and KL coefficient 0.05. The resulting model
+contains 10.9M parameters and uses 16 inference steps.
+For D3PM baselines, we use a DiT-style Transformer backbone and evaluate two model sizes. D3PM-
+Big uses hidden dimension 768, 5 Transformer blocks, and 12 attention heads, yielding 55.1M
+parameters, while D3PM-Small uses hidden dimension 512, 3 Transformer blocks, and 8 attention
+heads, yielding 15.9M parameters. Both variants are trained on the same derived training set and
+generate boards with 1000 denoising steps.
+As shown in Table 9, GRAM achieves 99.05% validity, outperforming all D3PM baselines. The
+strongest D3PM baseline, D3PM-Uniform (Big), reaches 91.33% validity while using 55.1M parame-
+ters and 1000 denoising steps. In contrast, GRAM uses fewer parameters and only 16 inference steps.
+In all cases, the valid samples are unique under exact board matching, indicating that the reported
+validity is not due to simple repetition of a small set of boards. These results show that GRAM can
+generate highly constrained symbolic structures from an empty input, supporting its potential as a
+generator beyond conditional puzzle solving.
+Figure 17 illustrates the unconditional Sudoku generation setup. Starting from an empty board,
+the task is to generate complete boards, and validity is determined by whether the generated board
+satisfies all Sudoku constraints. Figure 7 shows qualitative examples of boards generated by GRAM.
+
+Table 9: Unconditional Sudoku generation. We report the ratio of generated boards satisfying Sudoku
+constraints over 100K samples. All valid boards are unique for all methods in this evaluation.
+ Method #Params Steps Validity(%)
+ D3PM-Uniform (Big) 55.1M 1000 91.33
+ D3PM-Uniform (Small) 15.9M 1000 29.24
+ D3PM-Absorb (Big) 55.1M 1000 79.18
+ D3PM-Absorb (Small) 15.9M 1000 21.88
+ GRAM (Ours) 10.9M 16 99.05
+
+
+ Empty input Valid sample Invalid sample
+ 3 6 5 4 7 8 9 2 1 4 3 8 2 9 1 7 6 5
+ 9 4 1 2 5 6 8 7 3 5 2 1 7 3 6 4 9 8
+ 2 7 8 9 1 3 6 5 4 7 6 9 4 5 8 1 2 3
+ 5 2 9 8 4 7 3 1 6 9 1 3 4 4 7 5 8 6
+ 7 3 6 1 9 2 5 4 8 2 5 4 6 8 9 3 1 7
+ 1 8 4 3 6 5 2 9 7 6 8 7 5 1 3 2 4 9
+ 8 9 7 5 3 4 1 6 2 3 7 2 9 6 4 8 5 1
+ 4 1 3 6 2 9 7 8 5 1 9 5 3 2 8 6 7 4
+ 6 5 2 7 8 1 4 3 9 8 4 6 1 7 5 9 3 2
+
+
+Figure 17: Unconditional Sudoku generation setup. Starting from an empty board, the task is to generate
+complete Sudoku boards. The valid sample satisfies all Sudoku constraints, while red entries in the invalid
+sample indicate cells involved in constraint violations.
+
+
+
+ 24
+ D.6 Visualizing Latent Recursion Process
+
+To understand how stochastic guidance shapes reasoning, we visualize latent trajectories during
+recursive computation. Specifically, we track the high-level state h at each supervision step throughout
+the recursion process. For visualization, we project these latent vectors into 2D using PCA [59] and
+interpolate unobserved states via K-D tree [60] to construct a continuous loss landscape.
+Figures 18 and 19 compare TRM and GRAM on the same Sudoku puzzle. TRM follows a single
+deterministic path from initialization to solution, offering no mechanism to escape if the trajectory
+enters a suboptimal region. In contrast, GRAM samples diverse trajectories that explore different
+regions of latent space before converging. While some trajectories become trapped in local minima
+(bright yellow regions), others successfully navigate toward the global optimum (dark blue regions).
+This diversity enables GRAM to discover valid solutions more reliably through parallel exploration.
+
+
+
+
+Figure 18: Latent reasoning trajectory of TRM. The red dot indicates the initial state h0 and the green dot
+indicates the final state hT . Background color represents the loss landscape: bright yellow corresponds to high
+loss regions, while dark blue indicates low loss (optimal) regions. TRM follows a single deterministic path with
+no ability to escape suboptimal trajectories.
+
+
+
+
+ 25
+ Figure 19: Latent reasoning trajectories of GRAM (50 samples). Using the same visualization scheme as
+Figure 18, we show 50 sampled trajectories from GRAM. The stochastic guidance enables diverse exploration
+of the latent space: while some trajectories converge to local minima (right bottom), others successfully reach
+the global optimum (left middle), demonstrating how parallel sampling improves solution discovery.
+
+
+
+
+ 26
+ Licenses
+Table 10: Existing assets, licenses, and source links. We list the existing datasets, benchmarks, and public
+reference implementations used or cited in our experiments. Synthetic N-Queens and Graph Coloring instances
+are generated by the authors and are therefore not external assets.
+
+ Asset Use in this paper License / terms Source link
+ MNIST Binarized MNIST genera- Creative Com- https://keras.io/api/
+ tion experiments mons Attribution- datasets/mnist/
+ Share Alike 3.0
+ ARC-AGI-1 / origi- ARC-AGI reasoning Apache License https://github.com/fchollet/
+ nal ARC benchmark 2.0 ARC-AGI
+ ARC-AGI-2 ARC-AGI-2 reasoning Apache License https://github.com/arcprize/
+ benchmark / reference 2.0 ARC-AGI-2
+ results
+ HRM repository HRM baseline and Apache License https://github.com/
+ Sudoku-Extreme-related 2.0 sapientinc/HRM
+ reference implementation
+ TinyRecursiveModels TRM baseline and recur- MIT License https://github.com/
+ / TRM repository sive reasoning reference SamsungSAILMontreal/
+ implementation TinyRecursiveModels
+ MDLM repository Masked diffusion base- Apache License https://github.com/
+ line reference implemen- 2.0 kuleshov-group/mdlm
+ tation, if public code is
+ used
+ Google Research D3PM image-generation Apache License https://github.com/
+ D3PM implementa- baseline reference imple- 2.0 google-research/
+ tion mentation, if public code google-research/blob/master/
+ is used d3pm/images/diffusion_
+ categorical.py
+ Looped Trans- Looped Transformer base- MIT License https://github.com/Leiay/
+ former repository line reference implemen- looped_transformer
+ tation, if public code is
+ used
+ N-Queens Synthetic multi-solution Not an external N/A
+ constraint satisfaction asset
+ task generated by the
+ authors
+ Graph Coloring Synthetic multi-solution Not an external N/A
+ constraint satisfaction asset
+ task generated by the
+ authors
+
+
+
+
+ 27
+ \ No newline at end of file
diff --git a/papers/txt/gram2026_generative_recursive.txt b/papers/txt/gram2026_generative_recursive.txt
new file mode 100644
index 0000000..d5f299d
--- /dev/null
+++ b/papers/txt/gram2026_generative_recursive.txt
@@ -0,0 +1,1532 @@
+ Generative Recursive Reasoning
+
+
+ Junyeob Baek1†∗ Mingyu Jo1†∗ Minsu Kim1,2
+
+ Mengye Ren3 Yoshua Bengio2,4 Sungjin Ahn1,3†
+arXiv:2605.19376v2 [cs.AI] 20 May 2026
+
+
+
+
+ 1
+ KAIST 2 Mila – Québec AI Institute
+ 3
+ New York University 4 Université de Montréal
+
+
+
+ Abstract
+ How should future neural reasoning systems implement extended computation?
+ Recursive Reasoning Models (RRMs) offer a promising alternative to autoregres-
+ sive sequence extension by performing iterative latent-state refinement with shared
+ transition functions. Yet existing RRMs are largely deterministic, following a
+ single latent trajectory and converging to a single prediction. We introduce Gen-
+ erative Recursive reAsoning Models (GRAM), a framework that turns recursive
+ latent reasoning into probabilistic multi-trajectory computation. GRAM models
+ reasoning as a stochastic latent trajectory, enabling multiple hypotheses, alterna-
+ tive solution strategies, and inference-time scaling through both recursive depth
+ and parallel trajectory sampling. This yields a latent-variable generative model
+ supporting conditional reasoning via pθ (y | x) and, with fixed or absent inputs,
+ unconditional generation via pθ (x). Trained with amortized variational inference,
+ GRAM improves over deterministic recurrent and recursive baselines on structured
+ reasoning and multi-solution constraint satisfaction tasks, while demonstrating an
+ unconditional generation capability. https://ahn-ml.github.io/gram-website
+
+
+ 1 Introduction
+ A central question for future neural reasoning systems is how extended computation should be imple-
+ mented. Large autoregressive models typically scale reasoning by extending a sequence-generation
+ process, whether intermediate computation is expressed explicitly as chain-of-thought tokens or im-
+ plicitly in hidden or latent representations [1–6]. A complementary direction is explored by Recursive
+ Reasoning Models (RRMs), which use repeated computation to refine a persistent latent state rather
+ than to append new elements to an output or reasoning sequence [7–9]. This approach is appealing
+ because it decouples reasoning depth from both parameter scale and output length: a compact model
+ can perform many steps of internal computation by repeatedly applying shared transition functions
+ over time.
+ Recent recursive reasoning models such as HRM [8] and TRM [9] provide early evidence for the
+ potential of this approach in structured reasoning. Rather than producing a solution in a single
+ feedforward pass, they perform extended computation through iterative latent-state refinement, deep
+ supervision across refinement steps, and reasoning-oriented recurrent designs such as hierarchical
+ latent dynamics. These features make them well suited to problems requiring constraint propagation,
+ state tracking, iterative correction, and multi-step inference. More broadly, they build on a principle
+ ∗ Equal contribution
+ † Correspondence to: Junyeob Baek (wnsdlqjtm@kaist.ac.kr), Mingyu Jo (mingyu.jo@kaist.ac.kr), Sungjin Ahn
+
+ (sungjin.ahn@kaist.ac.kr)
+
+
+ Preprint.
+ Solution 1,
+
+
+ Input Task,
+
+
+
+ Solution 2,
+
+
+
+
+ (a) Deterministic RRMs (b) GRAM (Ours)
+
+Figure 1: Comparison of Latent Reasoning Trajectories. Left: N-Queens Example with two valid solutions.
+Right: Given three independent runs for latent reasoning (τ1 , τ2 , τ3 ): (a) Prior RRMs (e.g. HRM, TRM) are
+deterministic—all runs collapse to an identical trajectory, converging to a single solution and failing to explore
+alternatives, while (b) GRAM explores diverse trajectories, producing diverse trajectories that reach multiple
+valid solutions y1 and y2 , while naturally enabling parallel inference-time scaling.
+
+also explored in recurrent Transformer architectures such as Universal Transformers [10] and Looped
+Transformers [7]: shared Transformer blocks can be repeatedly applied to increase computational
+depth without increasing parameter count. Together, these models suggest that reasoning capability
+can emerge not only from scaling model size or generating longer traces, but also from the organization
+of computation itself.
+While recurrent latent-state refinement provides an appealing mechanism for efficiently increasing
+reasoning depth, depth alone is not sufficient for many reasoning problems. A capable reasoning
+system should also be able to maintain uncertainty, consider alternative hypotheses, and explore
+multiple possible solution strategies [11, 12]. This is especially important in settings where ambiguity
+or multiple valid solutions are intrinsic, and more generally in problems where a single refinement
+path may become trapped in a suboptimal reasoning trajectory. In this sense, future RRMs should be
+not only deep, in the sense of repeated refinement, but also wide, in the sense of maintaining and
+exploring multiple latent trajectories in parallel.
+Existing RRMs [7–10], however, remain fundamentally deterministic: given the same input and
+initialization, they follow a single latent trajectory and converge to a single prediction. This deter-
+ministic recursion collapses the space of plausible reasoning paths into a single attractor, leaving
+probabilistic multi-hypothesis latent reasoning largely unexplored within the RRM paradigm. This
+motivates the central question of our work: can recursive latent computation support probabilistic,
+generative, multi-hypothesis reasoning while preserving the efficiency of compact recurrent models?
+In this paper, we propose Generative Recursive reAsoning Models (GRAM), a framework that turns
+recursive latent reasoning into probabilistic multi-trajectory computation. GRAM treats the reasoning
+process itself as a stochastic latent trajectory: at each recursion step, the model samples a transition
+conditioned on the input and the current reasoning state, rather than deterministically updating to a
+single next state. Repeating this process defines a distribution over possible reasoning trajectories,
+allowing the model to maintain multiple hypotheses, explore alternative solution strategies, and scale
+inference not only by increasing recursive depth but also by sampling trajectories in parallel. From
+a probabilistic perspective, GRAM is a latent-variable generative model: it models pθ (y | x) by
+marginalizing over latent reasoning trajectories, while the same recursive process can also define an
+unconditional generative model pθ (x) when the input is fixed or absent.
+We evaluate GRAM on controlled reasoning and generation tasks that serve as probes of the ar-
+chitectural properties targeted by our formulation: recursive refinement, stochastic exploration,
+multi-solution coverage, and inference-time scaling. Given this goal, our experiments focus on
+comparisons with the most relevant deterministic recurrent and recursive latent reasoning baselines,
+including Looped Transformers, HRM, and TRM, rather than frontier-scale general-purpose LLMs
+whose training data, inference budgets, and external scaffolding are not directly comparable. Sudoku-
+Extreme [8] and ARC-AGI [13, 14] test structured reasoning under hard constraints and abstract
+transformations; N-Queens and Graph Coloring evaluate multi-solution recovery; and binarized
+MNIST [15] probes the unconditional generative interpretation.
+Our main contribution is to establish probabilistic multi-trajectory recursion as a design principle
+for future recurrent and recursive reasoning architectures. Concretely, we make three contributions.
+
+
+ 2
+ First, we formulate recursive reasoning as a latent-variable generative process, where solutions are
+obtained by marginalizing over stochastic reasoning trajectories. Second, we introduce width-based
+inference-time scaling, enabling inference to scale not only with recursive depth but also with the
+number of sampled latent trajectories. Third, we provide empirical evidence that this formulation
+yields the intended architectural advantages over deterministic recurrent and recursive baselines,
+improving structured reasoning, multi-solution constraint satisfaction, and unconditional generation.
+
+2 Generative Recursive Reasoning Models
+In this section, we introduce Generative Recursive reAsoning Models (GRAM), an instantiation
+of probabilistic recursive reasoning. We describe the architecture in Section 2.1 and the training
+procedure in Section 2.2, with an architecture schematic shown in Figure 2.
+
+2.1 Architecture
+
+Overview. GRAM models the conditional distri- 𝑦 (A) CE loss
+bution pθ (y | x) by marginalizing over stochas-
+ Prior (B) KL Div. Posterior Decoder
+tic latent reasoning trajectories. Given an input 𝑝𝜃 (⋅ |𝑢𝑡 ) 𝑞𝜙 (⋅ |𝑢𝑡 , 𝑦) ~ 𝜖𝑡 𝑓dec
+x, GRAM first computes an embedding
+ ℎ𝑡−1 𝑓𝐻 𝑢𝑡 ℎ𝑡
+ ex = fenc (x; θ), (1)
+ 𝐾 times
+which is reused throughout the entire recursive 𝑙𝑡−1 𝑓𝐿 𝑓𝐿 𝑙𝑡
+computation. Starting from a fixed initial la-
+ Encoder
+tent state z0 , the model evolves the latent state 𝑥
+ 𝑓enc
+through learned stochastic transitions. The re-
+cursive computation is organized into two nested Figure 2: GRAM Architecture. A single stochastic
+levels: inner and outer loops. latent transition in the hierarchical instantiation z =
+At the inner level, a latent transition samples (h, l). After K low-level refinements via fL , the high-
+ level update fH produces a deterministic proposal ut , to
+a new latent state conditioned on the previous which stochastic guidance ϵt is added: ht = ut + ϵt .
+latent state and the input embedding,
+ zt ∼ pθ (zt | zt−1 , ex ), t = 1, . . . , T. (2)
+At the end of the T transitions, the decoder produces a prediction, ŷ = arg max fdec (zT ; θ). We refer
+to the sequence of T transitions from the initial state z0 to the final state zT as a supervision step. A
+supervision step is the unit at which the decoder is invoked, and the training objective is applied, with
+gradients computed as described in Section 2.2.
+At the outer level, Nsup supervision steps are applied recursively, with the final state of one supervision
+step serving as the initial state of the next, thereby forming the full recursive computation:
+ (1) T transitions (1) (2) T transitions T transitions (N )
+ z0 −−−−−−−→ zT = z0 −−−−−−−→ · · · −−−−−−−→ zT sup , (3)
+ (n) (1)
+where zt denotes the latent state at the t-th transition of the n-th supervision step, z0 is the
+fixed initial state, and the terminal state of one supervision step serves as the initial state of the next
+ (n+1) (n)
+(z0 := zT ). This abstract formulation can be instantiated with various recurrent Transformer
+backbones, including flat designs such as Universal Transformers and Looped Transformers [10, 7],
+as well as hierarchical designs such as HRM and TRM [8, 9].
+Stochastic Latent Transitions. Unlike prior recursive reasoning models (RRMs) that update the
+latent state deterministically and follow a single fixed trajectory [8, 9], GRAM defines pθ (zt |
+zt−1 , ex ) as a stochastic transition, so that repeated computation induces a distribution over latent
+reasoning trajectories. Concretely, GRAM realizes this transition as a learned stochastic residual
+perturbation around a deterministic update: at each transition, the model first computes a deterministic
+update ut from zt−1 and ex , then samples a conditional perturbation from a state-dependent Gaussian,
+and adds it to ut :
+ ϵt ∼ pθ (ϵt | ut ) := N µθ (ut ), σθ2 (ut )I ,
+ 
+ (4)
+ zt = ut + ϵt . (5)
+
+
+ 3
+ We refer to ϵt as the learnable stochastic guidance. The mean µθ (ut ) encodes a state-dependent
+direction in which the trajectory is steered, while the variance σθ2 (ut ) controls the amount of ex-
+ploration. This design allows GRAM to capture uncertainty, prevent convergence to local minima,
+and support robust exploration of the solution space without discarding the deterministic refinement
+performed by ut .
+Hierarchical Instantiation. We instantiate the latent state with two interacting components, z =
+(h, l). The high-level component h is updated once per latent transition and carries abstract reasoning
+state, while the low-level component l is updated K times within a single transition and carries
+fine-grained intermediate computation. This decomposition separates the two roles across time scales,
+with h accumulating slowly across transitions and l refined rapidly within each one.
+With this hierarchical multi-scale structure, a single transition zt−1 → zt is computed as follows.
+The low-level component is first refined for K updates, with the high-level component held fixed:
+ lt,k = fL (ht−1 , lt,k−1 , ex ; θ), k = 1, . . . , K, (6)
+where lt,0 := lt−1 and we write lt := lt,K for the refined low-level component. The high-level
+component is then updated as a stochastic transition conditioned on the refined lt ,
+ ut = fH (ht−1 , lt ; θ), (7)
+ ϵt ∼ pθ (ϵt | ut ) := N µθ (ut ), σθ2 (ut ) I
+ 
+ , (8)
+ ht = ut + ϵt , (9)
+and we set zt = (ht , lt ). Note that stochasticity is introduced only at the high level: the low-
+level refinement is fully deterministic, while the stochastic guidance signal ϵt acts on the slower,
+more abstract component of the latent state, where it can steer the overall reasoning trajectory
+across transitions3 . Under this instantiation, the decoder reads only the high-level component, i.e.,
+fdec (zT ) = fdec (hT ). Additional architectural details are provided in Appendix B.1.
+Modeling Unconditional Distribution. While the description so far focuses on the conditional
+setting pθ (y | x), the same recursive process can also be defined as an unconditional generative model
+pθ (x) when the input is replaced with an empty conditioning embedding. We use this formulation for
+generation tasks in Section 4.3.
+
+2.2 Training
+
+GRAM is trained to model the conditional distribution pθ (y | x), where each training example
+consists of an input x and its corresponding target y. As a probabilistic model, GRAM adopts a
+latent-variable formulation and is optimized by maximizing an evidence lower bound (ELBO) with
+respect to the generative parameters θ and variational parameters ϕ.
+Latent Variable Modeling. We model GRAM as a latent-variable probabilistic model pθ , where
+the full latent trajectory τ = (z0 → · · · → zTTotal ) consists of a sequence of latent variables, with
+TTotal = T × Nsup . The conditional likelihood is defined as
+ Z
+ pθ (y | x) = pθ (y | τ, x) pθ (τ | x) dτ, (10)
+
+where x denotes the input problem and y denotes the corresponding ground-truth output.
+Direct maximum likelihood estimation of log pθ (y | x) is intractable due to the marginalization
+over latent trajectories. We therefore introduce a variational posterior qϕ (τ | x, y) and optimize the
+evidence lower bound (ELBO), jointly training θ and ϕ via variational inference:
+ log pθ (y | x) ≥ Eqϕ (τ |x,y) [log pθ (y | τ, x)] − KL(qϕ (τ | x, y) ∥ pθ (τ | x)) . (11)
+
+During training, latent trajectories are sampled from the variational posterior qϕ (· | x, y), which has
+access to both the input problem x and the target output y. At inference time, where y is unavailable,
+trajectories are instead generated from the learned prior pθ (· | x).
+ 3We also tried injecting noise into the low-level state, but found that it did not improve performance.
+
+
+
+
+ 4
+ Both the prior and the posterior are modeled as conditional Markov processes over latent states:
+ TY
+ Total TY
+ Total
+
+ pθ (τ | x) = p(z0 ) pθ (zt | zt−1 , x), qϕ (τ | x, y) = p(z0 ) qϕ (zt | zt−1 , x, y). (12)
+ t=1 t=1
+Here, z0 is a fixed initial state shared by the prior and posterior. Both transitions are implemented by
+adding reparameterized Gaussian noise ϵt after a deterministic update ut ; the posterior uses the same
+transition module as the prior, but samples from a target-conditioned noise distribution qϕ (ϵt | ut , y),
+whereas the prior uses pθ (ϵt | ut ).
+Since the two processes share the same Markov structure and all stochasticity is introduced through
+ϵ1:TTotal , their trajectory distributions can be equivalently represented in noise space. Moreover,
+since GRAM decodes the output only from the terminal latent state, the likelihood term satisfies
+pθ (y | τ, x) = pθ (y | zTTotal , x). Therefore, the full trajectory-level ELBO can be written as
+   TX Total h i
+ LELBO = Eqϕ log pθ (y | zTTotal , x) − Eqϕ (ϵ<t |x,y) KL(qϕ (ϵt | ut , y) ∥ pθ (ϵt | ut )) . (13)
+ t=1
+Here, ut = fH (ht−1 , lt ) denotes the deterministic high-level update before noise injection, as defined
+in Equation (9). Since ut depends on ht−1 , which is determined by the previously sampled noise
+variables ϵ<t := (ϵ1 , . . . , ϵt−1 ), the expectation averages over these ancestral samples.
+Practical Implementation. In practice, following previous recursive reasoning models [8, 9], we
+train GRAM with deep supervision over Nsup consecutive supervision steps, each consisting of T
+recursive latent transitions. This provides dense learning signals along the full latent trajectory, rather
+than supervising only the final state after TTotal = T × Nsup transitions. The terminal state of each
+step is reused as the initial state of the next step.
+Following standard practice for recurrent models with long computation chains, we apply truncated
+gradient propagation [16, 17], as used in recent recursive reasoning models [8, 9, 18]. In our
+implementation, gradients are propagated only through the final transition of each supervision step,
+ (n) (n)
+zT −1 → zT . This gives the following surrogate objective for each supervision step:
+ (n)  (n)  (n) (n) (n) (n) 
+ LGRAM (x, y; θ, ϕ) = Eqϕ log pθ (y | zT , x) − KL qϕ (ϵT | uT , y) ∥ pθ (ϵT | uT ) , (14)
+ (n)
+where zT is the terminal state of the current supervision step n, and gradients are stopped through
+preceding states. Thus, LGRAM should be viewed as a truncated surrogate objective rather than the
+exact ELBO; it introduces a biased but memory-efficient approximation to the full ELBO. Further
+analysis of this approximation is provided in Appendix A.3, and detailed training hyperparameters
+are listed in Appendix B.2.
+
+2.3 Inference-Time Scaling
+
+GRAM supports two complementary axes of inference-time scaling: depth, by varying the number
+of recursive transitions, and width, by sampling multiple latent reasoning trajectories in parallel.
+For depth, we follow prior recursive reasoning models [8, 9] in adopting adaptive computation
+time (ACT) [8–10], which allows each trajectory to terminate at a learned halting depth (details in
+Appendix A.1). For width — the focus of this section — we draw {τ (i) }N i=1 ∼ pθ (τ(i)| x) from the
+learned prior and decode each terminal state into a candidate output ŷ (i) = fdec (zT ), exploring
+multiple stochastic reasoning paths simultaneously rather than extending a single trajectory.
+To select among candidates, we use either majority voting or best-of-N with a Latent Process Reward
+Model (LPRM). The LPRM is a value head vψ (zt ) trained to predict the final quality of a trajectory
+from its latent state, using a regression target r ∈ [0, 1] given by the final prediction accuracy. At
+inference time, majority voting selects the most frequent prediction, whereas LPRM-guided selection
+chooses the candidate with the highest predicted terminal value. Details of LPRM training are
+provided in Appendix A.2. Overall, this procedure improves robustness and solution quality through
+parallel exploration, without increasing the sequential recursion length.
+
+3 Related Work
+Latent Reasoning. Latent reasoning aims to reduce the inefficiency and verbosity of explicit
+Chain-of-Thought (CoT) by shifting part or all of the reasoning process into latent or continuous
+
+
+ 5
+ Sudoku ARC-AGI-1 ARC-AGI-2
+ 20
+ 100 97.0% 66.7%
+ 87.4% 16.0%
+ 80 60 52.0% 55.7% 15
+Accuracy (%) 61.3% 44.6% 11.1%
+ 60 55.0% 40.3% 9.7%
+ 40 34.5% 10 7.8%
+ 40 5.0%
+ 20 5 3.0%
+ 20
+ 0 0 0
+ Looped HRM TRM GRAM HRM TRM GRAM o3-mini-GPT 5.2 Grok-4- HRM TRM GRAM o3-mini-GPT 5.2 Grok-4-
+ TF (Ours) (Ours) high (low) thinking (Ours) high (low) thinking
+ Recursive Models GRAM (Ours) LLMs
+
+Figure 3: Performance on puzzle benchmarks. On both Sudoku-Extreme and ARC-AGI, GRAM consistently
+outperforms all deterministic recursive baselines (Looped TF, HRM, TRM), demonstrating that stochastic latent
+transitions yield substantial gains within the recursive-reasoning paradigm. Looped TF results on ARC-AGI are
+omitted due to prohibitive training cost (see Section C.1.1) Note that large reasoning model scores are included
+only as external reference points for benchmark difficulty.
+
+representations [1–6]. By avoiding token-by-token generation of intermediate steps, such representa-
+tions can make reasoning traces more compact and reduce generation overhead. Existing approaches
+instantiate this idea through hidden states, latent or soft tokens, continuous thoughts, internal rea-
+soning traces, and recursive state updates for scaling test-time computation [4, 7, 19–23, 18, 24–26].
+However, many remain organized around autoregressive sequence generation, where additional
+computation is tied to generating more tokens, latent positions, or sequential reasoning states.
+Recursive Architectures. Recursive architectures perform iterative state updates and have evolved
+from RNNs to weight-sharing Transformers with adaptive computation [7, 10, 27–32, 25]. Recent
+recursive reasoning models show that increasing inference-time depth can outperform larger static
+models [8, 9, 18, 24]. GRAM builds on this line but formulates recurrence as a probabilistic process:
+instead of following a single deterministic refinement path, it maintains stochastic latent trajectories,
+enabling multi-path exploration and generative sampling.
+Probabilistic Latent State-Space Models. Probabilistic recurrent models use stochastic latent
+transitions to capture uncertainty and multimodal dynamics, often trained with variational infer-
+ence [33–38]. They have been widely used in sequential generative modeling, video prediction,
+and model-based reinforcement learning. GRAM shares this latent state-space view but reinterprets
+stochastic dynamics as computation rather than temporal observation modeling: latent transitions
+define possible reasoning trajectories, supporting multi-hypothesis exploration and both conditional
+pθ (y | x) and unconditional pθ (x) generation.
+
+
+4 Experiments
+
+GRAM is designed as an architecture for probabilistic recursive reasoning, rather than as a general-
+purpose large language reasoning model whose training data, inference budgets, prompting strategies,
+tool use, and external scaffolding are not directly comparable. Following prior work on recurrent and
+recursive reasoning models [8, 9], we therefore evaluate GRAM on standard structured reasoning
+tasks that probe the computational properties targeted by our formulation: iterative latent refinement,
+stochastic trajectory exploration, multi-solution coverage, and inference-time scaling.
+In the following, we first evaluate structured reasoning performance on Sudoku-Extreme and ARC-
+AGI (Section 4.1). We then assess multi-solution behavior on N-Queens and Graph Coloring
+(Section 4.2). Next, we examine the unconditional generative interpretation of GRAM on binarized
+MNIST (Section 4.3). Finally, we perform ablation studies to evaluate the impact of key design
+choices (Section 4.4).
+
+4.1 Challenging Puzzle Tasks
+
+Setup. We evaluate on Sudoku-Extreme [8], which contains 9×9 puzzles with minimal clues re-
+quiring extensive constraint propagation, and ARC-AGI Challenge [13, 14], which tests abstract
+visual reasoning through few-shot pattern recognition. We compare against direct prediction (Trans-
+
+
+ 6
+ 1.0 N= 1.0
+ 1 5 10 20 50
+ 0.8
+ 0.9
+Accuracy Looped TF
+
+
+
+
+ Accuracy
+ Looped TF 0.6 HRM
+ HRM
+ 0.8 TRM TRM
+ GRAM (Ours) 0.4 GRAM (N=1)
+ 0.2
+ 0.6
+ 0.5 0.0
+ 8 16 32 128 320 2 4 6 8 10 12 14 16 18
+ Iterations Number of Solutions
+ Figure 4: (Left) Inference-time scaling on Sudoku-Extreme. While both TRM and GRAM benefit from
+ longer recursion (x-axis), GRAM additionally scales with parallel sampling (N = number of samples); each
+ iteration corresponds to a supervision step, while meaning K× more flat iterations in Looped TF. (Right)
+ Accuracy across number of solutions in N-Queens (8 × 8). Conventional deterministic recursive models suffer
+ a sharp performance drop as the number of possible solutions increases, whereas GRAM maintains consistent
+ performance.
+
+
+ former [39]), a flat recursive baselines (Looped TF [7], HRM [8], TRM [9]). Reported large reasoning
+ model results [40] are included as external reference points for benchmark difficulty, rather than
+ as controlled baselines, since their training and inference settings are not directly comparable to
+ task-specific recursive models. For the scaling analysis, all baselines (Looped TF, HRM, TRM) are
+ reproduced under identical settings following Yang et al. [7] and Jolicoeur-Martineau [9].
+ Stochastic Guidance Improves Reasoning. Figure 3 and Table 8 summarize our main results.
+ GRAM consistently outperforms prior recursive models across all benchmarks. We attribute this
+ improvement to the fundamental difference in how reasoning trajectories are utilized. While Looped
+ TF, HRM, and TRM are restricted to learning from a single deterministic path, GRAM leverages
+ stochastic transitions to explore diverse reasoning trajectories. By training on this richer distribution
+ of solution paths, GRAM acquires more robust reasoning capabilities, allowing it to navigate complex
+ problem spaces more effectively than models constrained to a single sequential refinement process.
+ Detailed experiment results, including more state-of-art methods, are provided in Appendix D.1.
+ Parallel Sampling Provides a New Test-time Scaling Axis. Figure 4 (left) shows that increasing the
+ number of parallel samples consistently improves performance across all iteration counts. Notably,
+ GRAM with N = 20 samples at 16 iterations outperforms all deterministic baselines at 320 iterations,
+ including TRM (97.0% vs 90.5%), despite comparable computational budget. While deterministic
+ recursive models scale only through sequential refinement, GRAM leverages stochastic transitions to
+ explore multiple reasoning paths in parallel. To select the best trajectory, we employ a Latent Process
+ Reward Model (LPRM) that predicts output correctness (Section 2.3). This parallel scaling bypasses
+ the latency bottlenecks of depth-based scaling while achieving superior performance. Additional
+ analysis on the ARC-AGI Challenge is provided in Appendix D.2.
+
+ 4.2 Multi-solution Puzzle Tasks
+
+Setup. To evaluate whether GRAM can capture diverse solutions, we test on N-Queens (8 × 8,
+10 × 10) and Graph Coloring (8-vertex, 10-vertex) tasks, where multiple valid solutions exist for each
+input. We compare against direct prediction (Transformer [39]), recursive models (Looped TF [7],
+HRM [8], TRM [9]), and generative models (Autoregressive Transformer (AR), MDLM [41]). For
+N-Queens, we report accuracy (whether the output satisfies all constraints) and coverage (found /
+total valid solutions, with 20 samples). For Graph Coloring, we report conflict edges (number of
+constraint violations; lower is better) instead of accuracy. Detailed configurations are provided in
+Appendix C.2.
+ Deterministic Recursion Fails on Multi-Solution Tasks. Table 1 reveals that deterministic recursive
+ models structurally cannot capture multiple solutions, with coverage at most 36.1% across all tasks.
+ Figure 4 (right) further illustrates this limitation: as the number of valid solutions increases, all three
+ deterministic recursive baselines exhibit sharp accuracy degradation, whereas GRAM maintains
+ consistent performance regardless of solution count. This confirms that deterministic latent updates
+
+
+ 7
+ Table 1: Evaluation on N-Queens and Graph Coloring benchmarks. Rec. and Gen. indicate whether the
+model uses recursive computation and generative sampling, respectively. Values are mean ± standard deviation
+over runs. Accuracy: single-sample (%). Conflict: constraint-violating edges (↓). Coverage: unique valid
+solutions discovered with 20 samples (%).
+ N-Queens Graph Coloring
+ 8×8 10 × 10 8-vertex 10-vertex
+ Method Rec. Gen. # Params Accuracy Coverage Accuracy Coverage Conflict↓ Coverage Conflict↓ Coverage
+ Direct Pred (8 layers) ✗ ✗ 27M 40.4±1.1 13.7±1.1 13.6±0.5 1.6±0.2 179.3±4.0 19.9±0.2 198.7±5.0 6.7±0.1
+ Direct Pred (32 layers) ✗ ✗ 100M 40.2±1.3 13.6±1.1 13.1±0.4 1.6±0.2 174.0±18.0 19.1±1.7 227.7±34.5 6.5±1.9
+ Looped TF ✓ ✗ 7M 68.4±3.7 23.6±1.9 50.0±7.6 6.2±3.2 136.0±16.1 20.5±1.5 157.3±9.0 7.2±0.7
+ HRM ✓ ✗ 27M 78.7±2.9 26.7±1.3 37.4±0.3 4.7±0.1 109.7±1.5 21.8±0.3 164.3±21.6 8.9±1.7
+ TRM ✓ ✗ 7M 66.8±5.7 36.1±22.5 17.5±11.2 2.0±1.3 109.3±3.1 22.3±0.6 170.7±17.9 6.8±0.3
+ AR ✗ ✓ 10.6M 96.3±1.0 84.8±0.8 90.0±2.2 53.2±0.8 19.0±11.3 83.0±0.7 61.3±8.3 40.0±0.3
+ MDLM ✗ ✓ 12.6M 96.1±1.5 87.2±0.6 74.3±6.6 47.4±2.2 2.7±0.6 84.5±4.0 12.0±7.0 48.2±1.4
+ GRAM (Ours) ✓ ✓ 10M 99.7±0.3 90.3±1.9 89.7±2.7 57.5±3.4 2.7±2.1 85.8±0.5 3.3±1.5 51.3±2.8
+
+
+
+
+cause mode collapse when multiple valid outputs exist for the same input. Additional coverage
+analysis is provided in Appendix D.3.
+Recursive Refinement Yields Sharper Constraint Satisfaction. While generative models (AR,
+MDLM) achieve high coverage, GRAM consistently attains higher accuracy with comparable diver-
+sity. On N-Queens, GRAM reaches 99.7% accuracy versus 96.3% (AR) and 96.1% (MDLM). The
+gap is more pronounced on Graph Coloring, where GRAM reduces conflict edges to 2.7 and 3.3 on 8-
+and 10-vertex tasks, compared to 19.0 and 61.3 for AR. This demonstrates that recursive refinement
+enables stricter constraint satisfaction than generative sampling alone.
+
+4.3 Exploring GRAM as an Unconditional Generator
+
+Setup. To investigate GRAM’s unconditional gen- Table 2: Unconditional generation results on
+erative capability beyond conditional reasoning, we binarized MNIST. We report IS (↑) and FID (↓).
+evaluate generation in two domains: structured con- For iterative models, a step corresponds to a super-
+straint generation on Sudoku (from empty boards, vision step for TRM and GRAM, and a denoising
+ step for D3PM. FID is calculated using real sam-
+evaluated by the fraction of generated boards satis- ples with original pixel values (0–255).
+fying Sudoku constraints) and image generation on
+binarized MNIST [15], where pixel values are thresh- Method IS (↑) FID (↓)
+olded to 0 or 1 (evaluated by Inception Score (IS) [42]
+and FID [43]). In both cases, the input is replaced by VAE 1.70 86.28
+ D3PM (1000 steps) 1.86 74.03
+an empty conditioning signal and the model samples TRM (16 steps) 1.00 303.29
+an output from its learned prior. Baselines include
+D3PM [44], a discrete diffusion model, on both tasks, GRAM (Ours)
+and additionally a VAE [45] trained with binary re- 8 steps 1.85 84.08
+ 16 steps 1.89 77.79
+construction loss on MNIST. To ensure a fair compar- 32 steps 1.91 76.65
+ison with existing literature, FID is calculated using 64 steps 1.95 75.39
+real samples from the original standard MNIST. 128 steps 1.99 74.30
+ 256 steps 2.04 73.34
+Generative Behavior Beyond Reasoning. GRAM
+extends from conditional reasoning to unconditional
+generation in two different domains. On Sudoku
+generation (Figure 5), GRAM produces valid boards
+with 99.05% validity using 10.9M parameters and
+16 supervision steps, surpassing D3PM baselines
+that use up to 55.1M parameters and 1000 denois-
+ing steps. Figure 7 shows qualitative examples, illus-
+trating that the model produces diverse, fully valid
+boards from empty inputs without any explicit con-
+straint checker. On MNIST (Table 2), the deter-
+ministic baseline TRM exhibits mode collapse (FID
+303.29), whereas GRAM produces recognizable dig- Figure 5: Unconditional Sudoku generation. Va-
+its with IS and FID comparable to D3PM. Together, lidity (%) of generated Sudoku puzzles. GRAM
+these results indicate that GRAM’s stochastic latent achieves higher validity than D3PM with substan-
+ tially fewer parameters and steps.
+
+ 8
+ D3PM
+ t=0 t=100 t=200 t=300 t=400 t=500 t=600 t=700 t=800 t=900 t=1000
+GRAM TRM
+ t=0 t=1 t=2 t=4 t=6 t=7 t=9 t=11 t=12 t=14 t=16
+
+
+
+ t=0 t=1 t=2 t=4 t=6 t=7 t=9 t=11 t=12 t=14 t=16 Sample 1 Sample 2 Sample 3 Sample 4
+ (a) Generation Process (b) Samples
+ Figure 6: Visualization of the generation process and samples. (a) The generation process over recursion
+ steps. Each row corresponds to a different model. GRAM (bottom) progressively refines the generated image
+ through recursive latent updates, correcting initial errors. (b) Unconditional generated samples from each model.
+
+ Table 3: Ablation study on Sudoku-Extreme and N-Queens (8 × 8). We evaluate with 5 samples. For (a),
+ Components are added cumulatively to the Looped TF baseline (DS = deep supervision, HR = hierarchical
+ recursion, SG = stochastic guidance). For (b), both stochasticity and learned guidance are essential—removing
+ either significantly degrades performance.
+ (a) Architecture Ablation. (b) Mechanism Ablation.
+
+ Model variant Sudoku N-Queens Model variant Sudoku N-Queens
+ base (Looped TF) 61.25 71.30 GRAM (ours) 93.96 99.69
+ + DS + HR (=HRM, TRM) 55.00 / 87.40 80.70 / 72.90
+ w/o stochastic guidance 82.87 72.91
+ + SG 65.64 86.30
+ stochasticity only 94.88 50.27
+ + DS + SG 73.90 100.00
+ guide only 0.00 0.00
+ + DS + HR + SG (=GRAM) 93.96 99.69 w/ direct prediction 63.43 61.44
+ TRM w/ stochastic decoder 82.87 71.66
+ TRM w/ random init. 78.53 71.82
+
+
+ transitions support generative modeling beyond symbolic reasoning, with constraint satisfaction
+ emerging as a natural byproduct of the recursive generative process.
+ Inference-Time Scaling Transfers to Generation. Table 2 further shows that increasing recursion
+ at inference improves generation quality monotonically (IS 1.85 → 2.04, FID 84.08 → 73.34 from 8
+ to 256 steps), even though training uses only 16 steps. This indicates that the iterative-refinement
+ advantage of recursive models carries over into the generative regime. Figure 6 visualizes this process;
+ additional samples are in Section D.4.
+
+ 4.4 Ablation Study
+
+ We ablate key design choices of GRAM on Sudoku-Extreme and N-Queens (8 × 8) using 5 samples.
+ Table 3 summarizes the results.
+ Stochastic Guidance Provides Consistent Gains Across Architectures. Table 3a shows that
+ stochastic guidance (SG) improves performance regardless of the underlying architecture: SG alone
+ lifts the flat Looped TF baseline, and combining SG with deep supervision already reaches 100%
+ on N-Queens. The full GRAM (with hierarchical recursion on top) achieves the best results overall
+ (93.96% / 99.69%). While the effect of hierarchical recursion is task-dependent, SG yields consistent
+ gains in every configuration, supporting our design of stochastic guidance as the core extension
+ introduced by GRAM.
+ Both Stochasticity and Guidance Are Essential. We ablate each component by modifying the
+ learned distribution ϵt ∼ N (µθ , σθ2 I) in Equation (4). Removing guidance (N (0, σθ2 I)) maintains
+ Sudoku performance (94.88%), indicating that stochasticity alone can enable diverse reasoning paths.
+ However, this variant collapses on N-Queens (50.27%), where structured guidance is necessary to
+ navigate multi-solution spaces. Removing stochasticity (N (µθ , 0)) fails completely (0.0% on both
+ tasks), as deterministic guidance conditioned on the target leads to severe overfitting.
+ Naive Stochasticity Does Not Help TRM. We test two simple approaches to add stochasticity to
+ TRM: (1) stochastic decoding, which samples from the output distribution instead of argmax, and
+
+
+ 9
+ 8 9 648 59 1 648 59 1 648259317 648259317 648259317 648259317
+ 7 79 5 79 68 5 79 68 415 79 683415 79 683415 792683415 792683415
+ 9 6 289 3 6 1289 3 6 1289 3 6 71289 356 71289 356471289 356471289
+
+
+
+D3PM
+ 657 4 8 657 4 831657 4 83165734 983165734 983165734 983165734
+ 3 9 4 3 9 5 4 3 9 5 4 389 56 42389 561 423897561 423897561 423897561
+ 7 8 7 8 2 7 8 2 7 648 2 5 73648 2 5 73648 2 5173648 2 517364892
+ 5 5 13 548 139548 2 139548626 139548626 139548626 139548626
+ 59 593 593 1 8 593 1 8 27593 148 275936148 275936148 275936148
+ 2 8 2 3 8 7 2 3 8 7 2 53 8 47 2953 8 4712953 864712953 864712953
+ t=0 t=125 t=250 t=375 t=500 t=625 t=750 t=875 t=1000
+ 717348652 716348952 716348952 716348952 716348952 716348952 716348952 716348952
+ 483927379 493527861 493527861 493527861 493527861 493527861 493527861 493527861
+ 523971734 528996734 528169734 528169734 528169734 528169734 528169734 528169734
+GRAM
+
+
+ 864235917 864251379 864215379 864215379 864215379 864215379 864215379 864215379
+ 359841126 359784126 359874126 359874126 359874126 359874126 359874126 359874126
+ 172496538 172936548 172936548 172936548 172936548 172936548 172936548 172936548
+ 937812445 937812615 937482615 937482615 937482615 937482615 937482615 937482615
+ 645779283 645179283 645791283 645791283 645791283 645791283 645791283 645791283
+ 281623497 281663497 281653497 281653497 281653497 281653497 281653497 281653497
+ iter=0 iter=1 iter=2 iter=4 iter=6 iter=8 iter=10 iter=13 iter=16
+Figure 7: Qualitative examples of unconditional Sudoku generation by GRAM. Each board is independently
+sampled from an empty grid using the learned prior. GRAM produces diverse, complete boards satisfying all
+row, column, and box constraints, without an explicit constraint checker or search procedure. Incorrect digits are
+highlighted in red.
+
+
+(2) random initialization, which samples z0 from a Gaussian N (0, I) at each inference. Neither
+improves performance, demonstrating that GRAM’s gains stem from the variational framework rather
+than mere randomness.
+
+5 Conclusions and Limitations
+We introduced GRAM, a generative framework that transforms deterministic recursive architectures
+into probabilistic generative models capable of modeling both p(y | x) and p(x) via recursive amor-
+tized variational inference. For reasoning problems, introducing stochasticity into latent transitions
+enables diverse solution discovery and improved exploration compared to deterministic counterparts.
+Notably, we demonstrate GRAM can leverage width-based inference-time scaling as a complement
+to depth: by sampling multiple latent trajectories in parallel, bypassing the latency bottleneck of
+depth-only scaling. Our ablations further reveal that stochastic guidance is a general-purpose exten-
+sion that consistently improves any recursive architecture, and that the gains stem specifically from
+the variational framework — not from mere randomness, as naive stochastic alternatives applied to
+existing models yield no improvement.
+Beyond solution-seeking, GRAM also demonstrates potential as an unconditional generative model
+through recursion-based generation over inputs, with generation quality improving monotonically
+with recursive depth even beyond training-time steps. This suggests new directions for generative
+modeling via hierarchical recursion. Despite these strengths, the sequential nature of deep supervision
+limits training efficiency compared to Transformers, posing a significant barrier to scaling GRAM
+toward larger foundation models.
+
+
+
+
+ 10
+ Acknowledgment
+This research was supported by the Brain Pool Plus Program (No. 2021H1D3A2A03103645) and
+the GRDC (Global Research Development Center) Cooperative Hub Program (RS-2024-00436165)
+through the National Research Foundation of Korea (NRF) funded by the Ministry of Science and
+ICT (MSIT). This work was also supported by the Institute of Information & Communications
+Technology Planning & Evaluation (IITP) grant funded by the Korea government (MSIT) (No. RS-
+2024-00509279, Global AI Frontier Lab) and by the NYU-KAIST Global Innovation and Research
+Institute. Minsu Kim acknowledges funding from the KAIST Jang Young Sil Fellow Program. We
+are especially grateful to Gyubin and Seungju for their non-trivial contributions, and we thank the
+members of the MLML for valuable discussions and feedback throughout this project.
+
+Broader Impacts
+GRAM studies probabilistic recursive reasoning for structured reasoning and generation. By main-
+taining multiple latent trajectories, it may benefit tasks such as constraint satisfaction, and scientific
+problem solving, where uncertainty and multiple valid solutions are common. It also suggests a way
+to improve reasoning through inference-time computation rather than parameter scaling alone. Its
+generality also entails risks: plausible but invalid generations may be mistaken for verified solutions
+in downstream decision-making pipelines, and multi-sample inference may increase computational
+and energy costs at scale. Since our experiments focus on controlled benchmarks, deployment in
+real-world or high-stakes settings would require rigorous validation, uncertainty calibration, and
+domain-specific safeguards.
+
+References
+ [1] Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le,
+ Denny Zhou, et al. Chain-of-thought prompting elicits reasoning in large language models.
+ Advances in neural information processing systems, 35:24824–24837, 2022.
+
+ [2] Shunyu Yao, Dian Yu, Jeffrey Zhao, Izhak Shafran, Tom Griffiths, Yuan Cao, and Karthik
+ Narasimhan. Tree of thoughts: Deliberate problem solving with large language models. Ad-
+ vances in neural information processing systems, 36:11809–11822, 2023.
+
+ [3] Maciej Besta, Nils Blach, Ales Kubicek, Robert Gerstenberger, Michal Podstawski, Lukas
+ Gianinazzi, Joanna Gajda, Tomasz Lehmann, Hubert Niewiadomski, Piotr Nyczyk, et al. Graph
+ of thoughts: Solving elaborate problems with large language models. In Proceedings of the
+ AAAI conference on artificial intelligence, volume 38, pages 17682–17690, 2024.
+
+ [4] Shibo Hao, Sainbayar Sukhbaatar, DiJia Su, Xian Li, Zhiting Hu, Jason Weston, and Yuandong
+ Tian. Training large language models to reason in a continuous latent space. arXiv preprint
+ arXiv:2412.06769, 2024.
+
+ [5] Hanlin Zhu, Shibo Hao, Zhiting Hu, Jiantao Jiao, Stuart Russell, and Yuandong Tian. Reasoning
+ by superposition: A theoretical perspective on chain of continuous thought. arXiv preprint
+ arXiv:2505.12514, 2025.
+
+ [6] Halil Alperen Gozeten, M Emrullah Ildiz, Xuechen Zhang, Hrayr Harutyunyan, Ankit Singh
+ Rawat, and Samet Oymak. Continuous chain of thought enables parallel exploration and
+ reasoning. arXiv preprint arXiv:2505.23648, 2025.
+
+ [7] Liu Yang, Kangwook Lee, Robert Nowak, and Dimitris Papailiopoulos. Looped transformers
+ are better at learning learning algorithms. arXiv preprint arXiv:2311.12424, 2023.
+
+ [8] Guan Wang, Jin Li, Yuhao Sun, Xing Chen, Changling Liu, Yue Wu, Meng Lu, Sen Song, and
+ Yasin Abbasi Yadkori. Hierarchical reasoning model. arXiv preprint arXiv:2506.21734, 2025.
+
+ [9] Alexia Jolicoeur-Martineau. Less is more: Recursive reasoning with tiny networks. arXiv
+ preprint arXiv:2510.04871, 2025.
+
+
+ 11
+ [10] Mostafa Dehghani, Stephan Gouws, Oriol Vinyals, Jakob Uszkoreit, and Łukasz Kaiser. Uni-
+ versal transformers. arXiv preprint arXiv:1807.03819, 2018.
+[11] Daniel Kahneman. Thinking, fast and slow. Farrar, Straus and Giroux, 2011.
+[12] Yoshua Bengio. The consciousness prior. arXiv preprint arXiv:1709.08568, 2017.
+[13] François Chollet. On the measure of intelligence. arXiv preprint arXiv:1911.01547, 2019.
+[14] Francois Chollet, Mike Knoop, Gregory Kamradt, Bryan Landers, and Henry Pinkard. Arc-
+ agi-2: A new challenge for frontier ai reasoning systems. arXiv preprint arXiv:2505.11831,
+ 2025.
+[15] Y. Lecun, L. Bottou, Y. Bengio, and P. Haffner. Gradient-based learning applied to document
+ recognition. Proceedings of the IEEE, 86(11):2278–2324, 1998. doi: 10.1109/5.726791.
+[16] Ronald J Williams and Jing Peng. An efficient gradient-based algorithm for on-line training of
+ recurrent network trajectories. Neural computation, 2(4):490–501, 1990.
+[17] Corentin Tallec and Yann Ollivier. Unbiasing truncated backpropagation through time. arXiv
+ preprint arXiv:1705.08209, 2017.
+[18] Jonas Geiping, Sean McLeish, Neel Jain, John Kirchenbauer, Siddharth Singh, Brian R Bartold-
+ son, Bhavya Kailkhura, Abhinav Bhatele, and Tom Goldstein. Scaling up test-time compute
+ with latent reasoning: A recurrent depth approach. arXiv preprint arXiv:2502.05171, 2025.
+[19] Yufan Zhuang, Liyuan Liu, Chandan Singh, Jingbo Shang, and Jianfeng Gao. Text generation
+ beyond discrete token sampling. arXiv preprint arXiv:2505.14827, 2025.
+[20] Zhen Zhang, Xuehai He, Weixiang Yan, Ao Shen, Chenyang Zhao, Shuohang Wang, Yelong
+ Shen, and Xin Eric Wang. Soft thinking: Unlocking the reasoning potential of llms in continuous
+ concept space. arXiv preprint arXiv:2505.15778, 2025.
+[21] Natasha Butt, Ariel Kwiatkowski, Ismail Labiad, Julia Kempe, and Yann Ollivier. Soft tokens,
+ hard truths. arXiv preprint arXiv:2509.19170, 2025.
+[22] Zhenyi Shen, Hanqi Yan, Linhai Zhang, Zhanghao Hu, Yali Du, and Yulan He. Codi:
+ Compressing chain-of-thought into continuous space via self-distillation. arXiv preprint
+ arXiv:2502.21074, 2025.
+[23] Zhenrui Yue, Bowen Jin, Huimin Zeng, Honglei Zhuang, Zhen Qin, Jinsung Yoon, Lanyu
+ Shang, Jiawei Han, and Dong Wang. Hybrid latent reasoning via reinforcement learning. arXiv
+ preprint arXiv:2505.18454, 2025.
+[24] Sangmin Bae, Yujin Kim, Reza Bayat, Sungnyun Kim, Jiyoun Ha, Tal Schuster, Adam Fisch,
+ Hrayr Harutyunyan, Ziwei Ji, Aaron Courville, et al. Mixture-of-recursions: Learning dynamic
+ recursive depths for adaptive token-level computation. arXiv preprint arXiv:2507.10524, 2025.
+[25] Amirkeivan Mohtashami, Matteo Pagliardini, and Martin Jaggi. Cotformer: A chain-of-
+ thought driven architecture with budget-adaptive computation cost at inference. arXiv preprint
+ arXiv:2310.10845, 2023.
+[26] Sangmin Bae, Adam Fisch, Hrayr Harutyunyan, Ziwei Ji, Seungyeon Kim, and Tal Schuster.
+ Relaxed recursive transformers: Effective parameter sharing with layer-wise lora. arXiv preprint
+ arXiv:2410.20672, 2024.
+[27] Jeffrey L Elman. Finding structure in time. Cognitive science, 14(2):179–211, 1990.
+[28] Sepp Hochreiter and Jürgen Schmidhuber. Long short-term memory. Neural computation, 9(8):
+ 1735–1780, 1997.
+[29] Kyunghyun Cho, Bart Van Merriënboer, Caglar Gulcehre, Dzmitry Bahdanau, Fethi Bougares,
+ Holger Schwenk, and Yoshua Bengio. Learning phrase representations using rnn encoder-
+ decoder for statistical machine translation. arXiv preprint arXiv:1406.1078, 2014.
+
+
+ 12
+ [30] Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, and Radu
+ Soricut. Albert: A lite bert for self-supervised learning of language representations. arXiv
+ preprint arXiv:1909.11942, 2019.
+[31] Maha Elbayad, Jiatao Gu, Edouard Grave, and Michael Auli. Depth-adaptive transformer. arXiv
+ preprint arXiv:1910.10073, 2019.
+[32] Alex Graves. Adaptive computation time for recurrent neural networks. arXiv preprint
+ arXiv:1603.08983, 2016.
+[33] Junyoung Chung, Kyle Kastner, Laurent Dinh, Kratarth Goel, Aaron Courville, and Yoshua
+ Bengio. A recurrent latent variable model for sequential data, 2016. URL https://arxiv.
+ org/abs/1506.02216.
+[34] Marco Fraccaro, Søren Kaae Sønderby, Ulrich Paquet, and Ole Winther. Sequential neural
+ models with stochastic layers, 2016. URL https://arxiv.org/abs/1605.07571.
+[35] Rahul G. Krishnan, Uri Shalit, and David Sontag. Deep kalman filters, 2015. URL https:
+ //arxiv.org/abs/1511.05121.
+[36] Danijar Hafner, Timothy Lillicrap, Ian Fischer, Ruben Villegas, David Ha, Honglak Lee, and
+ James Davidson. Learning latent dynamics for planning from pixels, 2019. URL https:
+ //arxiv.org/abs/1811.04551.
+[37] Danijar Hafner, Timothy Lillicrap, Mohammad Norouzi, and Jimmy Ba. Mastering atari with
+ discrete world models. arXiv preprint arXiv:2010.02193, 2020.
+[38] Danijar Hafner, Jurgis Pasukonis, Jimmy Ba, and Timothy Lillicrap. Mastering diverse domains
+ through world models. arXiv preprint arXiv:2301.04104, 2023.
+[39] Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
+ Łukasz Kaiser, and Illia Polosukhin. Attention is all you need. Advances in neural information
+ processing systems, 30, 2017.
+[40] ARC-Prize-Foundation. ARC-AGI benchmarking: Leaderboard and dataset for the ARC-AGI
+ benchmark. https://arcprize.org/leaderboard, 2026. Accessed: 2026-1-22.
+[41] Subham Sahoo, Marianne Arriola, Yair Schiff, Aaron Gokaslan, Edgar Marroquin, Justin Chiu,
+ Alexander Rush, and Volodymyr Kuleshov. Simple and effective masked diffusion language
+ models. Advances in Neural Information Processing Systems, 37:130136–130184, 2024.
+[42] Shane Barratt and Rishi Sharma. A note on the inception score. arXiv preprint arXiv:1801.01973,
+ 2018.
+[43] Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, and Sepp Hochreiter.
+ Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in
+ neural information processing systems, 30, 2017.
+[44] Jacob Austin, Daniel D Johnson, Jonathan Ho, Daniel Tarlow, and Rianne Van Den Berg.
+ Structured denoising diffusion models in discrete state-spaces. Advances in neural information
+ processing systems, 34:17981–17993, 2021.
+[45] Diederik P Kingma and Max Welling. Auto-encoding variational bayes. arXiv preprint
+ arXiv:1312.6114, 2013.
+[46] Jianlin Su, Murtadha Ahmed, Yu Lu, Shengfeng Pan, Wen Bo, and Yunfeng Liu. Roformer:
+ Enhanced transformer with rotary position embedding. Neurocomputing, 568:127063, 2024.
+[47] Noam Shazeer. Glu variants improve transformer. arXiv preprint arXiv:2002.05202, 2020.
+[48] Simo Ryu. Minimal implementation of a d3pm (structured denoising diffusion models in
+ discrete state-spaces), in pytorch. https://github.com/cloneofsimo/d3pm, 2024.
+[49] William Peebles and Saining Xie. Scalable diffusion models with transformers. In Proceedings
+ of the IEEE/CVF international conference on computer vision, pages 4195–4205, 2023.
+
+
+ 13
+ [50] Yann LeCun, Léon Bottou, Yoshua Bengio, and Patrick Haffner. Gradient-based learning
+ applied to document recognition. Proceedings of the IEEE, 86(11):2278–2324, 2002.
+[51] Alex Krizhevsky, Ilya Sutskever, and Geoffrey E Hinton. Imagenet classification with deep
+ convolutional neural networks. Advances in neural information processing systems, 25, 2012.
+[52] Stefan Elfwing, Eiji Uchibe, and Kenji Doya. Sigmoid-weighted linear units for neural network
+ function approximation in reinforcement learning. Neural networks, 107:3–11, 2018.
+[53] Yuxin Wu and Kaiming He. Group normalization. In Proceedings of the European conference
+ on computer vision (ECCV), pages 3–19, 2018.
+[54] Ilya Loshchilov and Frank Hutter. Decoupled weight decay regularization. arXiv preprint
+ arXiv:1711.05101, 2017.
+[55] Andrew Brock, Jeff Donahue, and Karen Simonyan. Large scale gan training for high fidelity
+ natural image synthesis. arXiv preprint arXiv:1809.11096, 2018.
+[56] Yang Song and Stefano Ermon. Improved techniques for training score-based generative models.
+ Advances in neural information processing systems, 33:12438–12448, 2020.
+[57] P. Erdős and A. Rényi. On the strength of connectedness of a random graph. Acta Mathematica
+ Academiae Scientiarum Hungarica, 12(1):261–267, Mar 1964. ISSN 1588-2632. doi: 10.1007/
+ BF02066689. URL https://doi.org/10.1007/BF02066689.
+[58] Henrique Lemos, Marcelo Prates, Pedro Avelar, and Luis Lamb. Graph colouring meets deep
+ learning: Effective graph neural network models for combinatorial problems. In 2019 IEEE
+ 31st International Conference on Tools with Artificial Intelligence (ICTAI), pages 879–885.
+ IEEE, 2019.
+[59] Alexey L. Pomerantsev. Principal component analysis (pca). Encyclopedia of Autism Spectrum
+ Disorders, 2014. URL https://api.semanticscholar.org/CorpusID:2534141.
+[60] Jon Louis Bentley. Multidimensional binary search trees used for associative searching. Com-
+ mun. ACM, 18:509–517, 1975. URL https://api.semanticscholar.org/CorpusID:
+ 13091446.
+
+
+
+
+ 14
+ A Additional Method Details
+A.1 Adaptive Computation Time
+
+GRAM optionally adopts adaptive computation time (ACT) [8–10] at inference, allowing each
+trajectory to terminate at a learned halting depth rather than running for a fixed number of supervision
+steps. We follow the Q-learning formulation introduced by HRM [8] and adopted in TRM [9].
+Halt head. The decoder includes an auxiliary head qψ : RD → R2 that maps the high-level state h
+to two scalar values, qψ (h) = (q halt , q continue ). These are interpreted as estimated Q-values for the
+binary action of halting or continuing computation at the current supervision step.
+Training. The halt head is trained jointly with the main objective via a temporal-difference loss.
+ (n)
+After computing the latent state zT at the end of supervision step n, we form Q-learning targets:
+
+ • q̂nhalt = 1[ŷ (n) = y], indicating whether decoding the current state would yield a correct
+ prediction.
+ 
+ • q̂ncontinue = max qn+1halt continue
+ , qn+1 , the bootstrapped value of running one more supervision
+ step.
+
+The halt head is trained by regression to these targets:
+ Nsup h
+ X 2 2 i
+ LACT = qnhalt − q̂nhalt + qncontinue − q̂ncontinue . (15)
+ n=1
+
+This auxiliary loss is added to the main training objective and contributes only through the halt head;
+it does not propagate gradients into the recursive core.
+Inference. At inference, computation proceeds one supervision step at a time. After each step
+n, we evaluate qψ (h(n) ) and halt if qnhalt > qncontinue , returning ŷ (n) as the prediction. Otherwise,
+ max
+computation continues to the next supervision step, up to a maximum budget of Nsup steps. Different
+trajectories sampled in parallel may therefore terminate at different depths, complementing the
+parallel-sampling scheme described in Section 2.3. In practice, we found that using only q halt
+(halting when σ(q halt ) > 0.5, without the continue branch) performs comparably while simplifying
+implementation; our released code uses this variant.
+
+A.2 Latent Process Reward Model (LPRM).
+
+To rank or select among sampled candidates, we train a value head vψ (zt ) to predict the expected
+accuracy of the final output, conditioned on the current latent state zt . The LPRM is trained jointly
+with the main objective via a regression loss:
+ T
+ X
+ LLPRM = (vψ (zt ) − r)2 , (16)
+ t=1
+
+where r ∈ [0, 1] denotes the accuracy of the final prediction for a given trajectory.
+
+A.3 Empirical Validation of the Surrogate Objective
+
+We further analyze the approximation introduced by the surrogate training objective LGRAM used in
+Section 2.2, both qualitatively and empirically.
+Truncation as a gradient approximation. We frame LGRAM as a gradient approximation rather than
+a separate variational objective. The full trajectory-level ELBO (Equation (13)) involves a sum of KL
+terms across all TTotal transitions, and computing its exact gradient requires backpropagation through
+the entire trajectory. To enable training with constant memory, we propagate gradients only through
+the final transition of each supervision step. This is a standard practice in recurrent latent variable
+models with long computation chains: ELBOs over truncated sequences are used, for example, in
+VRNN [33] and SRNN [34], while truncated latent imagination is used in Dreamer-family world
+models [37, 38]. Trading a small gradient bias for training stability via local truncation is therefore
+
+
+ 15
+ well-precedented; what is specific to GRAM is applying this approximation at the level of recursive
+reasoning trajectories rather than temporal sequences.
+Empirical validation. To verify that optimizing LGRAM effectively drives improvement in the full
+variational bound, we compute both quantities on the validation set throughout training. The full
+ELBO LELBO is evaluated as in Equation (13), summing the reconstruction term and KL contributions
+ (n)
+across all TTotal transitions; the surrogate objective is evaluated as the average of LGRAM over the
+Nsup supervision steps. Figure 8 reports the results on Sudoku-Extreme and N-Queens 8 × 8.
+
+ Sudoku-Extreme N-Queens 8 × 8
+ GRAM (training objective) GRAM (training objective)
+ 0.6 ELBO (full) 103 ELBO (full)
+
+
+ 0.5 102
+ ELBO
+
+
+
+
+ ELBO
+ 0.4 101
+
+ 100
+ 0.3
+ 10 1
+ 0.2
+ 10000 20000 30000 40000 50000 60000 0 10000 20000 30000 40000
+ Training Step Training Step
+Figure 8: Full ELBO LELBO and surrogate objective LGRAM throughout training (plotted as −ELBO,
+smaller is better). On both Sudoku-Extreme (left) and N-Queens 8 × 8 (right), both quantities decrease
+monotonically over training, indicating that gradient updates of LGRAM consistently improve the full variational
+bound. The two curves do not coincide because LELBO sums KL contributions across all TTotal transitions
+while LGRAM evaluates only the final-step KL of each supervision step; their gap reflects the cumulative KL
+across earlier transitions, not a failure of optimization. The N-Queens plot uses a log scale on the y-axis due to
+the large dynamic range.
+
+Both LELBO and LGRAM improve monotonically throughout training on both tasks. This indicates
+that, despite the truncation, gradient updates of LGRAM effectively drive improvement in the full
+variational bound. Since LELBO also serves as an indirect estimate of the negative log-likelihood,
+its consistent improvement provides evidence that GRAM optimizes a well-defined data likelihood,
+even though training relies on the surrogate.
+The gap between the two curves in Figure 8 reflects the structural difference between the two
+quantities — LELBO accumulates KL terms across all transitions while LGRAM evaluates only the
+final-step KL of each supervision step — rather than an optimization failure. This gap is consistent
+with LGRAM being a biased but useful surrogate for LELBO .
+
+B Training and Architecture Details
+B.1 Architecture Details
+
+GRAM consists of three components: Encoder, Recursive Core, and Decoder.
+Encoder. Input tokens are mapped to embeddings via a token embedding layer, optionally con-
+catenated with puzzle embeddings (for ARC√ [13, 14]), and combined with positional encodings
+(RoPE) [46]. The embeddings are scaled by D and prepended with 16 puzzle embedding tokens [8].
+Recursive Core. The core maintains two latent states: h (high-level) and l (low-level). For each outer
+step, the low-level state is refined K times via l ← fL (l, h + ex ), injecting the input embedding at
+each iteration. The high-level state is then updated via h ← fH (h, l). Both fL and fH share the same
+architecture: a stack of attention and SwiGLU [47] MLP layers. In addition, as an exception, we use
+[SwiGLU + SwiGLU] network for the Recursive Core module instead of [Attention + SwiGLU] for
+Sudoku tasks, following [9]. For initialization of z0 = (h0 , l0 ), we sample once from the standard
+Gaussian distribution N (0, I), then save the value within the network checkpoint and load it again,
+meaning the initialized z0 has a fixed value.
+Decoder. The decoder extracts content tokens from h (excluding puzzle embedding positions)
+and maps them to logits via a SwiGLU MLP head. An auxiliary head predicts halt decisions and
+correctness values from the first token of h.
+
+
+ 16
+ Table 4: Architecture components.
+ Component Module Description
+ Encoder
+ Token Embedding vocab → D
+ Puzzle Embedding 16 tokens (optional, for ARC)
+ Position Encoding RoPE or learned
+ Recursive Core
+ f L , fH [Attention + SwiGLU] × 2 layers
+ Iterations K low-level, T high-level steps
+ µθ , σ θ , µ ϕ , σ ϕ SwiGLU MLP for each parameter
+ Decoder
+ LM Head Linear(D → vocab)
+ Q Head Linear(D → 2) for halt
+ V Head Linear(D → 1) for value
+
+
+
+Encoder and Decoder for Image Patches. In the MNIST [15] image generation task, we first
+construct a binarized dataset by normalizing the original discrete pixel values (0 ∼ 255) to the
+continuous range [0, 1] and applying a threshold at 0.5. For the network architecture, we employ a
+convolutional patch encoder, following [48, 49].
+The encoding process proceeds in three stages. First, the discrete input tokens x ∈ {0, 1} are
+normalized to the range [−1, 1]. Second, to capture local spatial dependencies before patchification,
+the normalized image passes through a shallow convolutional encoder. This encoder consists of
+two stacked blocks, where each block comprises a 2D convolution [50, 51] with a 5 × 5 kernel and
+padding 2, a SiLU non-linearity [52], and Group Normalization (GN) [53]. Finally, the resulting
+feature map is divided into non-overlapping patches of size P × P and linearly projected to match
+the model’s hidden dimension D. The detailed architectural specifications and dimension transitions
+are summarized in Table 5.
+
+Table 5: Detailed architecture of the Image Patch Encoder for MNIST. H, W denote image resolution, C input
+channels, P patch size, Np the number of patches, and D the hidden dimension.
+ Stage Layer / Operation Output Dim.
+ Input Tokens (B, C, H, W )
+ 1. Norm.
+ Linear Scaling [−1, 1] (B, C, H, W )
+ Conv2d 5 × 5 (p = 2)
+ (B, D/2, H, W )
+ SiLU → GN(32)
+ 2. Conv
+ Conv2d 5 × 5 (p = 2)
+ (B, D/2, H, W )
+ SiLU → GN(32)
+ Flatten Patches (B, Np , P 2 · D
+ 2)
+ 3. Patch
+ Linear Projection (B, Np , D)
+
+
+Hyperparameters. Following Wang et al. [8], Jolicoeur-Martineau [9], both the input and output are
+represented as sequences of shape [B, L], where B denotes the batch size and L the context length.
+Each input sequence includes 16 fixed puzzle embedding tokens. The latent states ht and lt , as well
+as the decoder output, have shape [B, L, D], with embedding dimension D. The Transformer [39]
+backbone uses embedding dimension D = 512, attention heads Nhead =8 , and FFN hidden dimension
+Dh =512. Within a recursion step, meaning a latent transition zt → zt+1 , we use low-level (inner)
+steps K = 6 for Sudoku [8] and K = 4 for all other tasks, with high-level (outer) steps T = 3.
+
+B.2 Training Details
+
+Task Configuration. All tasks represent inputs and outputs as discrete token sequences (Summarized
+in Table 6).
+
+
+ 17
+ • For Sudoku [8], the 9×9 grid is flattened row-by-row into 81 tokens with vocabulary size 11
+ (0=pad, 1=blank, 2–10=digits).
+ • For ARC-AGI [13, 14], variable-size grids are padded to a fixed 30×30 canvas with EOS
+ markers, yielding 900 tokens and vocabulary size 12 (0=pad, 1=eos, 2–11=colors); task-
+ specific puzzle embeddings are prepended to distinguish different ARC tasks.
+ • N-Queens flattens an N × N board row-by-row into N 2 tokens with vocabulary size 3
+ (0=pad, 1=empty, 2=queen).
+ • Graph Coloring encodes the strict upper triangle of the adjacency matrix as n(n−1)/2 tokens,
+ using 0=PAD, 1=no-edge, and 2=edge for inputs and 3 + color_id for output colors.
+ • For image generation on MNIST [15], images are quantized and processed via CNN-based
+ patchification [50, 49], with the encoder applying patchify and the decoder unpatchify. Then,
+ patched input forms 14 × 14 flattened sequence tokens with vocabulary size 3 (0=pad,
+ 1=black, 2=white).
+
+
+ Table 6: Task-specific configurations.
+ Task Seq. Len Vocab Puzzle Emb Encoding
+ Sudoku 81 11 ✗ 9×9 grid, row-major
+ ARC-AGI 900 12 ✓ 30×30 padded canvas
+ N-Queens N2 3 ✗ N × N board
+ n(n−1)
+ Graph Coloring 2
+ 6 ✗ Strict adjacency upper triangle
+ MNIST 196 3 ✗ 14×14 patches
+
+Training Details. We train all models using AdamW [54] with learning rate 10−4 , weight decay
+1.0, and gradient clipping at 1.0. The global batch size is 768. For stability, we apply exponential
+moving average (EMA) with decay 0.9999, following Brock et al. [55] and Song and Ermon [56].
+To prevent posterior collapse, we use a KL balance [37, 38] coefficient of 0.8. The number of deep
+supervision steps is Nsup = 16 for all tasks. The KL coefficient β is set to 0.1 (Sudoku), 0.04/0.1
+(ARC-AGI-1/2), 0.07/0.045 (N-Queens 8 × 8/10 × 10), 0.5/0.45 (Graph Coloring with 8/10 nodes),
+and 0.07 (MNIST). Task-specific training configurations are summarized in Table 7.
+
+ Table 7: Training configurations on NVIDIA RTX 4090 GPUs.
+ Task Epochs GPUs Time
+ Sudoku 50K 8 2h
+ ARC-AGI 200K 8 5 days
+ N-Queens (8×8) 3K 8 1h
+ N-Queens (10×10) 1K 8 3h
+ Graph Coloring (8 nodes) 5K 8 1.5h
+ Graph Coloring (10 nodes) 5K 8 6h
+ MNIST 1.8K 8 16h
+
+
+
+C Additional Details of Experiment Setup
+C.1 Challenging Puzzle Tasks
+
+C.1.1 Looped TF on ARC-AGI
+We report Looped Transformer [7] results on Sudoku-Extreme but omit them on ARC-AGI due to
+prohibitive training cost. Under the same setup used for our other recursive baselines (200K epochs,
+batch size 768, on 8× NVIDIA RTX Pro 6000 GPUs), training Looped TF on Sudoku-Extreme
+already takes 19 hours, and extrapolating to ARC-AGI — which uses substantially longer sequences
+and a larger training set — suggests approximately 97 days (≈ 776 GPU-days) for a full training run.
+This gap stems from two compounding factors. First, Looped TF lacks deep supervision: HRM,
+TRM, and GRAM perform Nsup gradient updates per trajectory (one per segment), whereas Looped
+TF performs only one update at the end of the full trajectory, slowing convergence. Second, Looped
+
+
+ 18
+ TF lacks adaptive halting such as ACT [8–10], so every input must be processed for the maximum
+recursion depth, increasing per-example sequential compute. Both inefficiencies compound at
+ARC-AGI scale, making a full Looped TF training run impractical.
+
+C.2 Multi-solution Puzzle Tasks
+
+C.2.1 N-Queens Problem
+
+ Input Solution 1 Solution 2 Solution 3
+
+
+
+
+Figure 9: Example of an 8 × 8 N-Queens puzzle instance. In this example, 5 queens are removed from the
+full board, leaving 3 queens. The model must find the positions of the remaining queens. This configuration
+admits exactly 3 valid solutions.
+
+Data Generation Details. The N-Queens problem requires placing N queens on an N × N
+chessboard such that no two queens attack each other—meaning no queens share the same row,
+column, or diagonal. Figure 9 illustrates an example where 5 queens are removed from an 8 × 8
+solution, resulting in a puzzle with 3 distinct valid completions.
+To construct the dataset, we first generated all valid complete N-Queens solutions for N = 8 and
+N = 10. We then created puzzle instances by removing a specific number of queens, treating the
+remaining partial configuration as the input and the original complete board as the target label. To
+generate instances yielding diverse valid completions, we removed k ∈ {5, 6, 7} queens for the 8 × 8
+setting and k ∈ {7, 8, 9} queens for the 10 × 10 setting. The distribution of solution counts for our
+generated dataset is shown in Figure 10.
+For evaluation, we employed an 85:15 train-test split. Crucially, to prevent data leakage and ensure
+the model learns to reason rather than memorize, the split was performed based on unique input
+configurations. This guarantees that no input pattern in the test set appears in the training set. Inputs
+are flattened into discrete 1D sequences x ∈ {0, 1, 2}L , where L = N 2 , along with zero-padded
+puzzle embedding tokens. Vocabulary mapping follows: padding (0), empty (1), and queen (2).
+
+ 8x8 N-Queens 10x10 N-Queens
+ 3000 20000
+ 15000
+ Counts
+
+
+
+
+ Counts
+
+
+
+
+ 2000
+ 10000
+ 1000
+ 5000
+ 0 0
+ 3 6 9 12 15 18 0 20 40 60 80
+ Number of solutions Number of solutions
+Figure 10: Distribution of the number of valid solutions for generated N-Queens instances. The dataset
+covers a wide range of solution counts, testing the model’s ability to recover multiple valid outputs.
+
+
+C.2.2 Graph Coloring Problem
+Data Generation Details. The Graph Coloring problem requires assigning one of k colors to each
+node in a graph such that no two adjacent nodes share the same color. We consider graphs with
+N ∈ {8, 10} nodes and use k = 3 colors. Figure 11 illustrates an example instance with N = 8
+nodes and k = 3 colors.
+Graphs are generated using the Erdős–Rényi random graph model [57], following the generation
+pipeline from GNN-GCP [58]. Specifically, for each instance, edges are sampled independently
+
+
+ 19
+ with a fixed probability p, producing a symmetric adjacency matrix. We retain only graphs that are
+3-colorable.
+For each graph, we enumerate all valid 3-colorings and retain only canonical forms to eliminate
+redundant solutions under color permutation (e.g., swapping red and blue). This yields a set of
+structurally distinct solutions per input. The distribution of solution counts is shown in Figure 12.
+The final dataset consists of 7,002 training and 255 test instances for N = 8, and 13,465 training and
+192 test instances for N = 10.
+Input and Output Representation. The input graph is represented by extracting the upper triangular
+portion of the adjacency matrix (excluding the diagonal) and flattening it into a 1D sequence. The
+output is a sequence of length N , where each position encodes the assigned color for the corresponding
+node. Vocabulary mapping is as follows: PAD (0), no edge (1), edge (2), and colors (3, 4, 5) for red,
+blue, and green respectively.
+
+ Input Solution 1 Solution 2 Solution 3 Solution 4
+
+
+
+
+ Figure 11: Graph Coloring Example
+
+
+ Vertex 8 Graph Coloring Vertex 10 Graph Coloring
+ 2000
+ 1500
+ 1500
+ Counts
+
+
+
+
+ Counts
+
+
+
+
+ 1000
+ 1000
+ 500 500
+ 0 0
+ 3 6 9 12 15 18 0 20 40 60 80
+ Number of solutions Number of solutions
+Figure 12: Distribution of the number of valid solutions for generated graph coloring instances. The
+dataset covers a wide range of solution counts, testing the model’s ability to recover multiple valid outputs.
+
+
+D Additional Experiment Results
+D.1 Additional Results on Challenging Puzzle Benchmarks
+
+Table 8 reports test accuracy on three challenging puzzle benchmarks. Here we provide additional
+observations complementing the main text.
+
+GRAM Advances the Recursive-Reasoning Line. Across all three benchmarks, GRAM consis-
+tently outperforms prior recursive baselines (Looped TF, HRM, TRM) while using fewer parameters
+than HRM (10M vs. 27M). The complete failure of direct prediction on Sudoku and ARC-AGI-2 (0%
+in both cases) further confirms that recursive computation is essential for these tasks — single-pass
+models, regardless of capacity, cannot solve them. Together, these results indicate that GRAM’s gains
+arise from how recursive computation is organized (probabilistic, multi-trajectory) rather than from
+increased model capacity.
+
+Sudoku-Extreme Resists Parameter Scaling. All tested large reasoning models (LRMs), including
+Deepseek-R1 (671B), score 0% on Sudoku-Extreme. This suggests that pretrained capacity alone
+does not transfer to constraint-propagation reasoning, and that benchmarks like Sudoku-Extreme
+probe a fundamentally different axis from those captured by general-purpose LRMs. On ARC-AGI,
+more recent LRMs such as Gemini 3 Pro (75.0% on ARC-1, 31.1% on ARC-2) remain substantially
+
+
+ 20
+ Table 8: Test accuracy (%) on Challenging Puzzle Benchmarks. GRAM significantly outperforms prior
+recursive models. All recursive model scores were obtained at 16 supervision steps.
+ Method #Params Sudoku ARC-1 ARC-2
+ Large Reasoning Models
+ Deepseek-R1 671B 0.0 15.8 1.3
+ Claude 3.7 16k N/A 0.0 28.6 0.7
+ o3-mini-high N/A 0.0 34.5 3.0
+ GPT 5.2 (low) N/A – 55.7 9.7
+ Grok-4-thinking 1.7T – 66.7 16.0
+ Gemini 3 Pro N/A – 75.0 31.1
+ Recursive Models
+ Direct Pred 27M 0.0 21.0 0.0
+ Looped TF 7M 61.3 - -
+ HRM 27M 55.0 40.3 5.0
+ TRM 7M 87.4 44.6 7.8
+ GRAM (Ours) 10M 97.0 52.0 11.1
+ Human Results
+ Avg. Human – – 60.2 –
+ Best Human – – 98.0 100.0
+
+
+
+ahead of all recursive models, highlighting that abstract few-shot reasoning still benefits from scale;
+we view these numbers as benchmark-difficulty reference points rather than controlled baselines.
+
+D.2 Scales with Parallel Sampling on ARC-AGI Challenge
+
+To investigate the effect of GRAM’s sampling on the ARC-AGI-1 benchmark, we measured perfor-
+mance without relying on external data augmentation. Typically, TRM achieves its reported accuracy
+by generating 1,000 augmentations for a single problem and performing majority voting over the
+results. Because this augmentation process itself creates a wide variety of samples, we isolated
+the specific effect of generative sampling by performing inference solely on the original problem
+instance and conducting majority voting over multiple sampled paths. For a fair comparison, TRM
+was evaluated using the same hyperparameters as GRAM, including the number of epochs, learning
+rate, and the number of layers.
+As illustrated in Figure 13, removing augmentations causes a performance decline for both GRAM
+and TRM compared to the values reported in Table 8. However, in the case of GRAM, we observe
+that accuracy consistently improves as the model generates more parallel samples. This trend mirrors
+observations in Section 4.2, suggesting that increased inference-time compute through width scaling
+allows the model to explore more plausible reasoning trajectories and recover from initial errors,
+eventually leading to more robust solution discovery.
+
+Interaction between Augmentation and Sampling. A natural question arises: why not combine
+higher levels of augmentation with extensive parallel sampling? To address this, we conducted an
+ablation study examining the interaction between data augmentation and inference-time sampling.
+Figure 14 presents the results across varying augmentation levels (Aug=0 to Aug=50). Without
+augmentation (Aug=0), increasing the number of samples yields consistent accuracy improvements,
+demonstrating that stochastic sampling effectively explores diverse reasoning trajectories. However,
+as the level of augmentation increases, the marginal benefit of additional sampling diminishes
+substantially. At Aug=50, performance saturates regardless of sample count—accuracy remains
+nearly constant whether we draw 1 or 50 samples. This observation reveals that augmentation
+and sampling serve complementary rather than additive roles: both mechanisms enable the model
+to capture solution diversity, but through different means. When training data is limited, parallel
+sampling compensates by exploring varied reasoning paths at inference time. When training data
+is abundant through augmentation, the model has already internalized sufficient diversity during
+training, rendering additional inference-time exploration redundant. Consequently, scaling sampling
+beyond augmentation provides diminishing returns, justifying our experimental design choice to
+evaluate these two scaling axes separately.
+
+
+ 21
+ 0.450
+ 0.425
+ 0.400
+ 0.375
+
+
+
+
+ Accuracy
+ 0.350
+ 0.325
+ 0.300 TRM
+ GRAM ( =0.1)
+ 0.275 GRAM ( =0.05)
+ GRAM ( =0.04)
+ 0.250 1 2 5 10 20 50 100 250 500
+ Number of Samples (N)
+Figure 13: Effect of sampling on ARC-AGI-1 without data augmentation. To isolate the internal sampling
+effect, both models are evaluated on original problem instances without 1,000 augmentations. While removing
+augmentations causes an initial performance drop, GRAM exhibits robust scaling through generative sampling
+as the number of parallel samples N increases, outperforming the TRM baseline.
+
+
+ 0.500
+ 0.475
+ 0.450
+ Accuracy
+
+
+
+
+ 0.425
+ 0.400
+ 0.375 Aug=0
+ Aug=5
+ 0.350 Aug=10
+ Aug=50
+ 0.325
+ 1 2 5 10 20 50 100 250 500
+ Number of Samples (N)
+Figure 14: Effect of augmentation on sampling efficiency. With limited augmentation (Aug=0), parallel
+sampling provides consistent gains. As augmentation increases, sampling benefits diminish—at Aug= 50,
+performance saturates regardless of sample count, suggesting augmentation and sampling serve complementary
+roles in capturing solution diversity.
+
+
+
+D.3 Solution Coverage Analysis
+
+We analyze the ability of GRAM to capture the diversity of the solution space compared to determin-
+istic baselines. Figure 15 presents the solution coverage on 8 × 8 and 10 × 10 N-Queens tasks with
+respect to the total number of valid ground-truth solutions.
+As shown in Figure 15, deterministic recursive models (HRM and TRM) exhibit a sharp decline in
+coverage as the number of possible solutions increases. Since these models are constrained to a single
+fixed reasoning trajectory, they structurally fail to explore alternative paths, resulting in severe mode
+collapse in multi-solution landscapes.
+In contrast, GRAM effectively leverages its generative latent transitions to cover a broader range
+of solutions. As the number of parallel samples N increases (from 1 to 20), the solution coverage
+improves monotonically across both 8 × 8 and 10 × 10 settings. This empirical evidence confirms
+that GRAM’s stochastic guidance mechanism is essential for navigating complex problem spaces
+where multiple valid reasoning paths exist.
+
+
+D.4 Additional Generated Image Samples
+
+In this section, we provide further qualitative results demonstrating GRAM’s capability in uncon-
+ditional image generation. Figure 16 presents a diverse set of samples generated on the binarized
+MNIST dataset, visualized across the recursive inference steps t = 0 to t = 16.
+
+
+ 22
+ 1.0 HRM 1.0 HRM
+ TRM TRM
+ GRAM (N=1) GRAM (N=1)
+ GRAM (N=5) GRAM (N=5)
+ 0.8 GRAM (N=10) 0.8 GRAM (N=10)
+ GRAM (N=20) GRAM (N=20)
+
+
+ 0.6 0.6
+Coverage
+
+
+
+
+ Coverage
+ 0.4 0.4
+
+
+ 0.2 0.2
+
+
+ 0.0 0.0
+ 2 4 6 8 10 12 14 16 18 0 15 30 45 60 75 90
+ Number of Solutions Number of Solutions
+ (a) N-Queens 8 × 8 (b) N-Queens 10 × 10
+
+ Figure 15: Solution coverage analysis on N-Queens (8 × 8 and 10 × 10) with respect to the number of
+ ground-truth solutions. While deterministic baselines (HRM, TRM) suffer from mode collapse as the solution
+ space grows, GRAM demonstrates monotonic improvement in coverage as the number of parallel samples N
+ increases.
+
+
+
+ As observed in the main text, GRAM exhibits a distinct progressive refinement behavior. Starting
+ from a black initialization, the model iteratively adds details and sharpens the structure of the digit.
+ A particularly compelling property of this process is the model’s ability to recover from initially
+ ambiguous or incorrect formations.
+ For instance, in the second row (generating the digit ’2’) and the last row (generating the digit ’1’),
+ the early predictions at t = 1 and t = 2 manifest as disjointed artifacts or incorrect shapes. However,
+ as the recursion proceeds, GRAM effectively leverages its feedback loop to correct these initial errors,
+ resolving the ambiguity and converging to a coherent, high-quality digit by t = 16.
+
+
+
+
+ t=0 t=1 t=2 t=4 t=6 t=7 t=9 t=11 t=12 t=14 t=16
+ Figure 16: Additional generated samples from GRAM. We provide 8 additional samples generated uncondi-
+ tionally on binarized MNIST using GRAM. Each row represents a single generated sample, visualized across its
+ recursive refinement process.
+
+
+
+
+ 23
+ D.5 Additional Experiment Results on Unconditional Sudoku Generation
+
+In this section, we provide additional details on unconditional Sudoku generation. Unlike the
+conditional Sudoku-solving setting, where the input board contains given clues, the model receives an
+entirely blank board and samples a complete 9 × 9 Sudoku board from its learned prior. We evaluate
+each generated board using the standard Sudoku validity criterion: every row, column, and 3 × 3 box
+must contain the digits 1 through 9 exactly once. We report the validity rate over 100K generated
+boards. To check whether high validity comes from repeatedly producing the same board, we also
+compute the fraction of unique boards among valid samples.
+For GRAM, we construct the unconditional training set from Sudoku-Extreme [8], the Sudoku
+benchmark used by HRM and TRM. We sample 50K complete solutions from the original training
+split, discard the clue patterns, and use an all-blank board as input with the complete solution as
+the target. No data augmentation is used. We train GRAM on this derived 50K-solution set for 200
+epochs with learning rate 10−4 , EMA decay 0.999, and KL coefficient 0.05. The resulting model
+contains 10.9M parameters and uses 16 inference steps.
+For D3PM baselines, we use a DiT-style Transformer backbone and evaluate two model sizes. D3PM-
+Big uses hidden dimension 768, 5 Transformer blocks, and 12 attention heads, yielding 55.1M
+parameters, while D3PM-Small uses hidden dimension 512, 3 Transformer blocks, and 8 attention
+heads, yielding 15.9M parameters. Both variants are trained on the same derived training set and
+generate boards with 1000 denoising steps.
+As shown in Table 9, GRAM achieves 99.05% validity, outperforming all D3PM baselines. The
+strongest D3PM baseline, D3PM-Uniform (Big), reaches 91.33% validity while using 55.1M parame-
+ters and 1000 denoising steps. In contrast, GRAM uses fewer parameters and only 16 inference steps.
+In all cases, the valid samples are unique under exact board matching, indicating that the reported
+validity is not due to simple repetition of a small set of boards. These results show that GRAM can
+generate highly constrained symbolic structures from an empty input, supporting its potential as a
+generator beyond conditional puzzle solving.
+Figure 17 illustrates the unconditional Sudoku generation setup. Starting from an empty board,
+the task is to generate complete boards, and validity is determined by whether the generated board
+satisfies all Sudoku constraints. Figure 7 shows qualitative examples of boards generated by GRAM.
+
+Table 9: Unconditional Sudoku generation. We report the ratio of generated boards satisfying Sudoku
+constraints over 100K samples. All valid boards are unique for all methods in this evaluation.
+ Method #Params Steps Validity(%)
+ D3PM-Uniform (Big) 55.1M 1000 91.33
+ D3PM-Uniform (Small) 15.9M 1000 29.24
+ D3PM-Absorb (Big) 55.1M 1000 79.18
+ D3PM-Absorb (Small) 15.9M 1000 21.88
+ GRAM (Ours) 10.9M 16 99.05
+
+
+ Empty input Valid sample Invalid sample
+ 3 6 5 4 7 8 9 2 1 4 3 8 2 9 1 7 6 5
+ 9 4 1 2 5 6 8 7 3 5 2 1 7 3 6 4 9 8
+ 2 7 8 9 1 3 6 5 4 7 6 9 4 5 8 1 2 3
+ 5 2 9 8 4 7 3 1 6 9 1 3 4 4 7 5 8 6
+ 7 3 6 1 9 2 5 4 8 2 5 4 6 8 9 3 1 7
+ 1 8 4 3 6 5 2 9 7 6 8 7 5 1 3 2 4 9
+ 8 9 7 5 3 4 1 6 2 3 7 2 9 6 4 8 5 1
+ 4 1 3 6 2 9 7 8 5 1 9 5 3 2 8 6 7 4
+ 6 5 2 7 8 1 4 3 9 8 4 6 1 7 5 9 3 2
+
+
+Figure 17: Unconditional Sudoku generation setup. Starting from an empty board, the task is to generate
+complete Sudoku boards. The valid sample satisfies all Sudoku constraints, while red entries in the invalid
+sample indicate cells involved in constraint violations.
+
+
+
+ 24
+ D.6 Visualizing Latent Recursion Process
+
+To understand how stochastic guidance shapes reasoning, we visualize latent trajectories during
+recursive computation. Specifically, we track the high-level state h at each supervision step throughout
+the recursion process. For visualization, we project these latent vectors into 2D using PCA [59] and
+interpolate unobserved states via K-D tree [60] to construct a continuous loss landscape.
+Figures 18 and 19 compare TRM and GRAM on the same Sudoku puzzle. TRM follows a single
+deterministic path from initialization to solution, offering no mechanism to escape if the trajectory
+enters a suboptimal region. In contrast, GRAM samples diverse trajectories that explore different
+regions of latent space before converging. While some trajectories become trapped in local minima
+(bright yellow regions), others successfully navigate toward the global optimum (dark blue regions).
+This diversity enables GRAM to discover valid solutions more reliably through parallel exploration.
+
+
+
+
+Figure 18: Latent reasoning trajectory of TRM. The red dot indicates the initial state h0 and the green dot
+indicates the final state hT . Background color represents the loss landscape: bright yellow corresponds to high
+loss regions, while dark blue indicates low loss (optimal) regions. TRM follows a single deterministic path with
+no ability to escape suboptimal trajectories.
+
+
+
+
+ 25
+ Figure 19: Latent reasoning trajectories of GRAM (50 samples). Using the same visualization scheme as
+Figure 18, we show 50 sampled trajectories from GRAM. The stochastic guidance enables diverse exploration
+of the latent space: while some trajectories converge to local minima (right bottom), others successfully reach
+the global optimum (left middle), demonstrating how parallel sampling improves solution discovery.
+
+
+
+
+ 26
+ Licenses
+Table 10: Existing assets, licenses, and source links. We list the existing datasets, benchmarks, and public
+reference implementations used or cited in our experiments. Synthetic N-Queens and Graph Coloring instances
+are generated by the authors and are therefore not external assets.
+
+ Asset Use in this paper License / terms Source link
+ MNIST Binarized MNIST genera- Creative Com- https://keras.io/api/
+ tion experiments mons Attribution- datasets/mnist/
+ Share Alike 3.0
+ ARC-AGI-1 / origi- ARC-AGI reasoning Apache License https://github.com/fchollet/
+ nal ARC benchmark 2.0 ARC-AGI
+ ARC-AGI-2 ARC-AGI-2 reasoning Apache License https://github.com/arcprize/
+ benchmark / reference 2.0 ARC-AGI-2
+ results
+ HRM repository HRM baseline and Apache License https://github.com/
+ Sudoku-Extreme-related 2.0 sapientinc/HRM
+ reference implementation
+ TinyRecursiveModels TRM baseline and recur- MIT License https://github.com/
+ / TRM repository sive reasoning reference SamsungSAILMontreal/
+ implementation TinyRecursiveModels
+ MDLM repository Masked diffusion base- Apache License https://github.com/
+ line reference implemen- 2.0 kuleshov-group/mdlm
+ tation, if public code is
+ used
+ Google Research D3PM image-generation Apache License https://github.com/
+ D3PM implementa- baseline reference imple- 2.0 google-research/
+ tion mentation, if public code google-research/blob/master/
+ is used d3pm/images/diffusion_
+ categorical.py
+ Looped Trans- Looped Transformer base- MIT License https://github.com/Leiay/
+ former repository line reference implemen- looped_transformer
+ tation, if public code is
+ used
+ N-Queens Synthetic multi-solution Not an external N/A
+ constraint satisfaction asset
+ task generated by the
+ authors
+ Graph Coloring Synthetic multi-solution Not an external N/A
+ constraint satisfaction asset
+ task generated by the
+ authors
+
+
+
+
+ 27
+ \ No newline at end of file
diff --git a/papers/txt/hrm2025_hierarchical_reasoning.txt b/papers/txt/hrm2025_hierarchical_reasoning.txt
new file mode 100644
index 0000000..3cc1f2e
--- /dev/null
+++ b/papers/txt/hrm2025_hierarchical_reasoning.txt
@@ -0,0 +1,1302 @@
+ Hierarchical Reasoning Model
+ Guan Wang1,† , Jin Li1 , Yuhao Sun1 , Xing Chen1 , Changling Liu1 ,
+ Yue Wu1 , Meng Lu1,† , Sen Song2,† , Yasin Abbasi Yadkori1,†
+ 1
+ Sapient Intelligence, Singapore
+
+
+
+ Abstract
+
+ Reasoning, the process of devising and executing complex goal-oriented action sequences,
+arXiv:2506.21734v3 [cs.AI] 4 Aug 2025
+
+
+
+
+ remains a critical challenge in AI. Current large language models (LLMs) primarily employ
+ Chain-of-Thought (CoT) techniques, which suffer from brittle task decomposition, extensive
+ data requirements, and high latency. Inspired by the hierarchical and multi-timescale pro-
+ cessing in the human brain, we propose the Hierarchical Reasoning Model (HRM), a novel
+ recurrent architecture that attains significant computational depth while maintaining both train-
+ ing stability and efficiency. HRM executes sequential reasoning tasks in a single forward pass
+ without explicit supervision of the intermediate process, through two interdependent recurrent
+ modules: a high-level module responsible for slow, abstract planning, and a low-level mod-
+ ule handling rapid, detailed computations. With only 27 million parameters, HRM achieves
+ exceptional performance on complex reasoning tasks using only 1000 training samples. The
+ model operates without pre-training or CoT data, yet achieves nearly perfect performance on
+ challenging tasks including complex Sudoku puzzles and optimal path finding in large mazes.
+ Furthermore, HRM outperforms much larger models with significantly longer context windows
+ on the Abstraction and Reasoning Corpus (ARC), a key benchmark for measuring artificial
+ general intelligence capabilities. These results underscore HRM’s potential as a transformative
+ advancement toward universal computation and general-purpose reasoning systems.
+
+
+ ARC-AGI-1 ARC-AGI-2 Sudoku-Extreme (9x9) Maze-Hard (30x30)
+ 960 training examples 1120 training examples 1000 training examples 1000 training examples
+ 40.3 5.0 60 55.0 80 74.5
+ 40 5
+ 34.5
+ 60
+ Deepseek R1
+
+
+
+
+ 4
+ Claude 3.7 8K
+
+
+
+
+ 30 40
+ Accuracy %
+
+
+
+
+ 3.0
+ 3
+ Claude 3.7 8K
+
+
+
+
+ Claude 3.7 8K
+ 21.0 21.2
+ Deepseek R1
+
+
+
+
+ Deepseek R1
+ 40
+ o3-mini-high
+
+
+
+
+ o3-mini-high
+
+
+ 20 15.8
+ Direct pred
+
+
+
+
+ Direct pred
+
+
+
+
+ Direct pred
+ o3-mini-high
+
+
+
+
+ o3-mini-high
+ Claude 3.7 8K
+
+
+
+
+ 2 20
+ Deepseek R1
+
+
+
+
+ 1.3
+ Direct pred
+
+
+
+
+ 10 0.9 20
+ 1
+ HRM
+
+
+
+
+ HRM
+
+
+
+
+ HRM
+
+
+
+
+ HRM
+
+ 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
+ 0 0 0 0
+ Chain-of-thought, pretrained Direct prediction, small-sample learning
+
+
+
+ Figure 1: Left: HRM is inspired by hierarchical processing and temporal separation in the brain. It
+ has two recurrent networks operating at different timescales to collaboratively solve tasks. Right:
+ With only about 1000 training examples, the HRM (~27M parameters) surpasses state-of-the-art
+ CoT models on inductive benchmarks (ARC-AGI) and challenging symbolic tree-search puzzles
+ (Sudoku-Extreme, Maze-Hard) where CoT models failed completely. The HRM was randomly
+ initialized, and it solved the tasks directly from inputs without chain of thoughts.
+ 2
+ Tsinghua University † Corresponding author. Contact: research@sapient.inc.
+ Code available at: github.com/sapientinc/HRM
+
+ 1
+ 1 Introduction
+Deep learning, as its name suggests, emerged from the idea of stacking more layers to achieve
+increased representation power and improved performance 1,2 . However, despite the remarkable
+success of large language models, their core architecture is paradoxically shallow 3 . This imposes
+a fundamental constraint on their most sought-after capability: reasoning. The fixed depth of stan-
+dard Transformers places them in computational complexity classes such as AC 0 or T C 0 4 , prevent-
+ing them from solving problems that require polynomial time 5,6 . LLMs are not Turing-complete
+and thus they cannot, at least in a purely end-to-end manner, execute complex algorithmic rea-
+soning that is necessary for deliberate planning or symbolic manipulation tasks 7,8 . For example,
+our results on the Sudoku task show that increasing Transformer model depth can improve per-
+formance,1 but performance remains far from optimal even with very deep models (see Figure 2),
+which supports the conjectured limitations of the LLM scaling paradigm 9 .
+The LLMs literature has relied largely on Chain-of-Thought (CoT) prompting for reasoning 10 .
+CoT externalizes reasoning into token-level language by breaking down complex tasks into sim-
+pler intermediate steps, sequentially generating text using a shallow model 11 . However, CoT for
+reasoning is a crutch, not a satisfactory solution. It relies on brittle, human-defined decompositions
+where a single misstep or a misorder of the steps can derail the reasoning process entirely 12,13 . This
+dependency on explicit linguistic steps tethers reasoning to patterns at the token level. As a result,
+CoT reasoning often requires significant amount of training data and generates a large number of
+tokens for complex reasoning tasks, resulting in slow response times. A more efficient approach is
+needed to minimize these data requirements 14 .
+Towards this goal, we explore “latent reasoning”, where the model conducts computations within
+its internal hidden state space 15,16 . This aligns with the understanding that language is a tool for
+human communication, not the substrate of thought itself 17 ; the brain sustains lengthy, coherent
+chains of reasoning with remarkable efficiency in a latent space, without constant translation back
+to language. However, the power of latent reasoning is still fundamentally constrained by a model’s
+effective computational depth. Naively stacking layers is notoriously difficult due to vanishing gra-
+dients, which plague training stability and effectiveness 1,18 . Recurrent architectures, a natural al-
+ternative for sequential tasks, often suffer from early convergence, rendering subsequent computa-
+tional steps inert, and rely on the biologically implausible, computationally expensive and memory
+intensive Backpropagation Through Time (BPTT) for training 19 .
+The human brain provides a compelling blueprint for achieving the effective computational depth
+that contemporary artificial models lack. It organizes computation hierarchically across corti-
+cal regions operating at different timescales, enabling deep, multi-stage reasoning 20,21,22 . Recur-
+rent feedback loops iteratively refine internal representations, allowing slow, higher-level areas to
+guide, and fast, lower-level circuits to execute—subordinate processing while preserving global
+coherence 23,24,25 . Notably, the brain achieves such depth without incurring the prohibitive credit-
+assignment costs that typically hamper recurrent networks from backpropagation through time 19,26 .
+Inspired by this hierarchical and multi-timescale biological architecture, we propose the Hierar-
+chical Reasoning Model (HRM). HRM is designed to significantly increase the effective compu-
+tational depth. It features two coupled recurrent modules: a high-level (H) module for abstract,
+deliberate reasoning, and a low-level (L) module for fast, detailed computations. This structure
+ 1
+ Simply increasing the model width does not improve performance here.
+
+ 2
+ 100 100
+ Scaling Width - 8 layers fixed Transformer
+ Scaling Depth - 512 hidden size fixed Recurrent Transformer
+ 80 80 HRM
+ Accuracy %
+ 60 60
+
+ 40 40
+
+ 20 20
+ 27M 54M 109M 218M 436M 872M 8 16 32 64 128 256 512
+ Parameters Depth / Transformer layers computed
+
+Figure 2: The necessity of depth for complex reasoning. Left: On Sudoku-Extreme Full, which
+require extensive tree-search and backtracking, increasing a Transformer’s width yields no perfor-
+mance gain, while increasing depth is critical. Right: Standard architectures saturates, failing to
+benefit from increased depth. HRM overcomes this fundamental limitation, effectively using its
+computational depth to achieve near-perfect accuracy.
+
+avoids the rapid convergence of standard recurrent models through a process we term “hierarchi-
+cal convergence.” The slow-updating H-module advances only after the fast-updating L-module
+has completed multiple computational steps and reached a local equilibrium, at which point the
+L-module is reset to begin a new computational phase.
+Furthermore, we propose a one-step gradient approximation for training HRM, which offers im-
+proved efficiency and eliminates the requirement for BPTT. This design maintains a constant mem-
+ory footprint (O(1) compared to BPTT’s O(T ) for T timesteps) throughout the backpropagation
+process, making it scalable and more biologically plausible.
+Leveraging its enhanced effective depth, HRM excels at tasks that demand extensive search and
+backtracking. Using only 1,000 input-output examples, without pre-training or CoT supervi-
+sion, HRM learns to solve problems that are intractable for even the most advanced LLMs. For
+example, it achieves near-perfect accuracy in complex Sudoku puzzles (Sudoku-Extreme Full) and
+optimal pathfinding in 30x30 mazes, where state-of-the-art CoT methods completely fail (0% ac-
+curacy). In the Abstraction and Reasoning Corpus (ARC) AGI Challenge 27,28,29 - a benchmark
+of inductive reasoning - HRM, trained from scratch with only the official dataset (~1000 exam-
+ples), with only 27M parameters and a 30x30 grid context (900 tokens), achieves a performance
+of 40.3%, which substantially surpasses leading CoT-based models like o3-mini-high (34.5%)
+and Claude 3.7 8K context (21.2%), despite their considerably larger parameter sizes and con-
+text lengths, as shown in Figure 1. This represents a promising direction toward the development
+of next-generation AI reasoning systems with universal computational capabilities.
+
+
+2 Hierarchical Reasoning Model
+We present the HRM, inspired by three fundamental principles of neural computation observed in
+the brain:
+• Hierarchical processing: The brain processes information across a hierarchy of cortical ar-
+ eas. Higher-level areas integrate information over longer timescales and form abstract repre-
+ sentations, while lower-level areas handle more immediate, detailed sensory and motor process-
+ ing 20,22,21 .
+
+ 3
+ • Temporal Separation: These hierarchical levels in the brain operate at distinct intrinsic timescales,
+ reflected in neural rhythms (e.g., slow theta waves, 4–8 Hz and fast gamma waves, 30–100
+ Hz) 30,31 . This separation allows for stable, high-level guidance of rapid, low-level computa-
+ tions 32,33 .
+• Recurrent Connectivity: The brain features extensive recurrent connections. These feedback
+ loops enable iterative refinement, yielding more accurate and context-sensitive representations
+ at the cost of additional processing time. Additionally, the brain largely avoids the problematic
+ deep credit assignment problem associated with BPTT 19 .
+The HRM model consists of four learnable components: an input network fI (·; θI ), a low-level re-
+current module fL (·; θL ), a high-level recurrent module fH (·; θH ), and an output network fO (·; θO ).
+The model’s dynamics unfold over N high-level cycles of T low-level timesteps each2 . We index
+the total timesteps of one forward pass by i = 1, . . . , N × T . The modules fL and fH each keep a
+hidden state—zLi for fL and zH i
+ for fH —which are initialized with the vectors zL0 and zH0
+ , respec-
+tively.
+The HRM maps an input vector x to an output prediction vector ŷ as follows. First, the input x is
+projected into a working representation x̃ by the input network:
+
+ x̃ = fI (x; θI ) .
+At each timestep i, the L-module updates its state conditioned on its own previous state, the H-
+module’s current state (which remains fixed throughout the cycle), and the input representation.
+The H-module only updates once per cycle (i.e., every T timesteps) using the L-module’s final
+state at the end of that cycle:
+
+ zLi = fL zLi−1 , zH
+ i−1
+ 
+ , x̃; θL ,
+ (
+ i−1 i−1
+ 
+ i fH zH , zL ; θH if i ≡ 0 (mod T ) ,
+ zH = i−1
+ zH otherwise .
+
+Finally, after N full cycles, a prediction ŷ is extracted from the hidden state of the H-module:
+
+
+ NT
+ ŷ = fO (zH ; θO ) .
+
+This entire N T -timestep process represents a single forward pass of the HRM. A halting mecha-
+nism (detailed later in this section) determines whether the model should terminate, in which case
+ŷ will be used as the final prediction, or continue with an additional forward pass.
+Hierarchical convergence Although convergence is crucial for recurrent networks, standard RNNs
+are fundamentally limited by their tendency to converge too early. As the hidden state settles toward
+a fixed point, update magnitudes shrink, effectively stalling subsequent computation and capping
+the network’s effective depth. To preserve computational power, we actually want convergence to
+proceed very slowly–but engineering that gradual approach is difficult, since pushing convergence
+too far edges the system toward instability.
+ 2
+ While inspired by temporal separation in the brain, our model’s “high-level” and “low-level” modules are concep-
+tual abstractions and do not map directly to specific neural oscillation frequencies.
+
+
+ 4
+ 250 250 250
+ HRM H Recurrent Neural Net Deep Neural Net
+ 200 200 200
+Forward residual
+ HRM L
+ 150 150 150
+ 100 100 100
+ 50 50 50
+ 0 0 0
+ 0 20 40 60 0 20 40 60 0 100 200
+ Step Index # Step Index # Layer Index #
+
+ 60 60
+
+
+
+
+ Layer Index #
+ Step Index #
+
+
+
+
+ Step Index #
+ 200
+ 30 30 100
+
+
+ Principal Components Principal Components Principal Components
+
+Figure 3: Comparison of forward residuals and PCA trajectories. HRM shows hierarchical conver-
+gence: the H-module steadily converges, while the L-module repeatedly converges within cycles
+before being reset by H, resulting in residual spikes. The recurrent neural network exhibits rapid
+convergence with residuals quickly approaching zero. In contrast, the deep neural network experi-
+ences vanishing gradients, with significant residuals primarily in the initial (input) and final layers.
+
+HRM is explicitly designed to counteract this premature convergence through a process we term
+hierarchical convergence. During each cycle, the L-module (an RNN) exhibits stable convergence
+to a local equilibrium. This equilibrium, however, depends on the high-level state zH supplied
+during that cycle. After completing the T steps, the H-module incorporates the sub-computation’s
+outcome (the final state zL ) and performs its own update. This zH update establishes a fresh context
+for the L-module, essentially “restarting” its computational path and initiating a new convergence
+phase toward a different local equilibrium.
+This process allows the HRM to perform a sequence of distinct, stable, nested computations, where
+the H-module directs the overall problem-solving strategy and the L-module executes the intensive
+search or refinement required for each step. Although a standard RNN may approach convergence
+within T iterations, the hierarchical convergence benefits from an enhanced effective depth of N T
+steps. As empirically shown in Figure 3, this mechanism allows HRM both to maintain high
+computational activity (forward residual) over many steps (in contrast to a standard RNN, whose
+activity rapidly decays) and to enjoy stable convergence. This translates into better performance at
+any computation depth, as illustrated in Figure 2.
+Approximate gradient Recurrent models typically use BPTT to compute gradients. However,
+BPTT requires storing the hidden states from the forward pass and then combining them with
+gradients during the backward pass, which demands O(T ) memory for T timesteps. This heavy
+memory burden forces smaller batch sizes and leads to poor GPU utilization, especially for large-
+scale networks. Additionally, because retaining the full history trace through time is biologically
+implausible, it is unlikely that the brain implements BPTT 19 .
+Fortunately, if a recurrent neural network converges to a fixed point, we can avoid unrolling its state
+sequence by applying backpropagation in a single step at that equilibrium point. Moreover, such a
+mechanism could plausibly be implemented in the brain using only local learning rules 34,35 . Based
+
+ 5
+ on this finding, we propose a one-step approximation of the HRM gradient–using the gradient of
+the last state of each module and treating other states as constant. The gradient path is, therefore,
+
+ Output head → final state of the H-module → final state of the L-module → input embedding
+
+The above method needs O(1) memory, does not require unrolling through time, and can be easily
+implemented with an autograd framework such as PyTorch, as shown in Figure 4. Given that
+each module only needs to back-propagate errors through its most recent local synaptic activity,
+this approach aligns well with the perspective that cortical credit assignment relies on short-range,
+temporally local mechanisms rather than on a global replay of activity patterns.
+The one-step gradient approximation is theoretically
+grounded in the mathematics of Deep Equilibrium Mod-
+els (DEQ) 36 which employs the Implicit Function Theo-
+rem (IFT) to bypass BPTT, as detailed next. Consider an
+idealized HRM behavior where, during high-level cycle
+k, the L-module repeatedly updates until its state zL con- def hrm(z, x, N=2, T=2):
+ x = input_embedding(x)
+verges to a local fixed point zL⋆ . This fixed point, given zH, zL = z
+ k−1
+the current high-level state zH , can be expressed as with torch.no_grad():
+ for _i in range(N ∗ T − 1):
+ k−1 zL = L_net(zL, zH, x)
+ zL⋆ = fL (zL⋆ , zH , x̃; θL ) . if (_i + 1) % T == 0:
+ zH = H_net(zH, zL)
+
+The H-module then performs a single update using this # 1−step grad
+ zL = L_net(zL, zH, x)
+converged L-state: zH = H_net(zH, zL)
+ return (zH, zL), output_head(zH)
+ k k−1 ⋆
+ zH = fH (zH , zL ; θH ) . # Deep Supervision
+ for x, y_true in train_dataloader:
+ z = z_init
+With a proper mapping F, the updates to the high-level for step in range(N_supervision):
+ k z, y_hat = hrm(z, x)
+state can be written in a more compact form as zH =
+ k−1 loss = softmax_cross_entropy(y_hat, y_true)
+F(zH ; x̃, θ), where θ = (θI , θL ), and the fixed-point z = z.detach()
+ ⋆ ⋆ ∂F
+can be written as zH = F(zH ; x̃, θ). Let JF = ∂zH be loss.backward()
+the Jacobian of F, and assume that the matrix I − JF is opt.step()
+ opt.zero_grad()
+ ⋆
+invertible at zH and that the mapping F is continuously
+differentiable. The Implicit Function Theorem then al- Figure 4: Top: Diagram of HRM with
+ ⋆
+lows us to calculate the exact gradient of fixed point zH approximate gradient. Bottom: Pseu-
+with respect to the parameters θ without explicit back- docode of HRM with deep supervision
+propagation: training in PyTorch.
+ ⋆ −1 ∂F
+ ∂zH 
+ = I − JF z⋆ . (1)
+ ∂θ H ∂θ z⋆
+ H
+
+
+Calculating the above gradient requires evaluating and inverting matrix (I − JF ) that can be com-
+putationally expensive. Given the Neumann series expansion,
+ (I − JF )−1 = I + JF + JF2 + JF3 + . . . ,
+the so-called 1-step gradient 37 approximates the series by considering only its first term, i.e. (I −
+JF )−1 ≈ I, and leads to the following approximation of Equation (1):
+ ∗ ∗
+ ∂zH ∂fH ∂zH ∂fH ∂zL∗ ∗
+ ∂zH ∂fH ∂zL∗
+ ≈ , ≈ · , ≈ · . (2)
+ ∂θH ∂θH ∂θL ∂zL∗ ∂θL ∂θI ∂zL∗ ∂θI
+ 6
+ ∂z ∗ ∂z ∗
+The gradients of the low-level fixed point, ∂θLL and ∂θLI , can also be approximated using another
+application of the 1-step gradient:
+ ∂zL∗ ∂fL ∂zL∗ ∂fL
+ ≈ , ≈ . (3)
+ ∂θL ∂θL ∂θI ∂θI
+By substituting Equation (3) back into Equation (2), we arrive at the final simplified gradients.
+Before defining our loss function, we must first introduce two key elements of our proposed
+method: deep supervision and adaptive computational time.
+Deep supervision Inspired by the principle that periodic neural oscillations regulate when learning
+occurs in the brain 38 , we incorporate a deep supervision mechanism into HRM, as detailed next.
+Given a data sample (x, y), we run multiple forward passes of the HRM model, each of which we
+refer to as a segment. Let M denote the total number of segments executed before termination.
+For each segment m ∈ {1, . . . , M }, let z m = (zHmN T
+ , zLmN T ) represent the hidden state at the
+conclusion of segment m, encompassing both high-level and low-level state components.
+At each segment m, we apply a deep supervision step as follows:
+ 1. Given the state z m−1 from the previous segment, compute the next state z m and its associated
+ output ŷ m through a forward pass in the HRM model:
+
+ (z m , ŷ m ) ← HRM(z m−1 , x; θ)
+
+ 2. Compute the loss for the current segment:
+
+ Lm ← L OSS(ŷ m , y)
+
+ 3. Update parameters:
+
+ θ ← O PTIMIZER S TEP(θ, ∇θ Lm )
+
+The crucial aspect of this procedure is that the hidden state z m is “detached” from the computa-
+tion graph before being used as the input state for the next segment. Consequently, gradients from
+segment m + 1 do not propagate back through segment m, effectively creating a 1-step approxi-
+mation of the gradient of the recursive deep supervision process 39,40 . This approach provides more
+frequent feedback to the H-module and serves as a regularization mechanism, demonstrating supe-
+rior empirical performance and enhanced stability in deep equilibrium models when compared to
+more complex, Jacobian-based regularization techniques 39,41 . Figure 4 shows pseudocode of deep
+supervision training.
+Adaptive computational time (ACT) The brain dynamically alternates between automatic think-
+ing (“System 1”) and deliberate reasoning (“System 2”) 42 . Neuroscientific evidence shows that
+these cognitive modes share overlapping neural circuits, particularly within regions such as the
+prefrontal cortex and the default mode network 43,44 . This indicates that the brain dynamically mod-
+ulates the “runtime” of these circuits according to task complexity and potential rewards 45,46 .
+Inspired by the above mechanism, we incorporate an adaptive halting strategy into HRM that en-
+ables “thinking, fast and slow”. This integration leverages deep supervision and uses the Q-learning
+
+
+
+ 7
+ algorithm 47 to adaptively determine the number of segments. A Q-head uses the final state of the
+H-module to predict the Q-values Q̂m = (Q̂m m
+ halt , Q̂continue ) of the “halt” and “continue” actions:
+
+ ⊤ mN T
+ Q̂m = σ(θQ zH ) ,
+where σ denotes the sigmoid function applied element-wise. The halt or continue action is chosen
+using a randomized strategy as detailed next. Let Mmax denote the maximum number of segments
+(a fixed hyperparameter) and Mmin denote the minimum number of segments (a random variable).
+The value of Mmin is determined stochastically: with probability ε, it is sampled uniformly from the
+set {2, · · · , Mmax } (to encourage longer thinking), and with probability 1 − ε, it is set to 1. The halt
+action is selected under two conditions: when the segment count surpasses the maximum threshold
+Mmax , or when the estimated halt value Q̂halt exceeds the estimated continue value Q̂continue and the
+segment count has reached at least the minimum threshold Mmin .
+The Q-head is updated through a Q-learning algorithm, which is defined on the following episodic
+Markov Decision Process (MDP). The state of the MDP at segment m is z m , and the action space
+is {halt, continue}. Choosing the action “halt” terminates the episode and returns a binary reward
+indicating prediction correctness, i.e., 1{ŷ m = y}. Choosing “continue” yields a reward of 0 and
+the state transitions to z m+1 . Thus, the Q-learning targets for the two actions Ĝm = (Ĝm m
+ halt , Ĝcontinue )
+are given by
+
+ Ĝm m
+ halt = 1{ŷ = y} ,
+ 
+ Q̂m+1
+ halt , if m ≥ Nmax ,
+ m
+ Ĝcontinue =
+ max(Q̂m+1 , Q̂m+1 ) , otherwise .
+ halt continue
+
+We can now define the loss function of our learning procedure. The overall loss for each supervision
+segment combines both the Q-head loss and the sequence-to-sequence loss:
+
+ Lm m m m
+ ACT = L OSS (ŷ , y) + B INARY C ROSS E NTROPY (Q̂ , Ĝ ) .
+
+Minimizing the above loss enables both accurate predictions and nearly optimal stopping decisions.
+Selecting the “halt” action ends the supervision loop. In practice, sequences are processed in
+batches, which can be easily handled by substituting any halted sample in the batch with a fresh
+sample from the dataloader.
+Figure 5 presents a performance comparison between two HRM variants: one incorporating ACT
+and another employing a fixed computational step count equivalent to ACT’s Mmax parameter. It
+shows that ACT effectively adapts its computational resources based on task complexity, achieving
+significant computational savings with minimal impact on performance.
+Inference-time scaling An effective neural model should exploit additional computational re-
+sources during inference to enhance performance. As illustrated in Figure 5-(c), HRM seamlessly
+achieves inference-time scaling by simply increasing the computational limit parameter, Mmax
+without requiring further training or architectural modifications.
+Additional compute is especially effective for tasks that demand deeper reasoning. On Sudoku—
+a problem that often requires long-term planning—HRM exhibits strong inference-time scaling.
+On the other hand, we find that extra computational resources yield minimal gains in ARC-AGI
+challenge, as solutions generally require only a few transformations.
+
+ 8
+ (a) ACT Compute Spent (b) ACT Performance (c) Inference-time scaling
+ 8 100.0 100.0
+ Fixed M Fixed M
+ 7
+Mean Compute Steps
+
+ ACT (Mmax limit) 97.5 ACT (Mmax limit) 97.5
+ 6 95.0 95.0
+
+
+
+
+ Accuracy %
+
+
+
+
+ Accuracy %
+ 5 92.5 92.5
+ 4 90.0 90.0
+ 3 87.5 87.5 Train Mmax = 2
+ 85.0 85.0 Train Mmax = 4
+ 2 Train Mmax = 8
+ 1 82.5 82.5
+ 2 4 8 2 4 8 2 4 8 16
+ M (Fixed) or Mmax (ACT) M (Fixed) or Mmax (ACT) Inference Mmax
+
+
+Figure 5: Effectiveness of Adaptive Computation Time (ACT) on the Sudoku-Extreme-Full. (a)
+Mean compute steps used by models with ACT versus models with a fixed number of compute steps
+(M ). ACT maintains a low and stable number of average compute steps even as the maximum limit
+(Mmax ) increases. (b) Accuracy comparison. The ACT model achieves performance comparable
+to the fixed-compute model while utilizing substantially fewer computational steps on average. (c)
+Inference-time scalability. Models trained with a specific Mmax can generalize to higher compu-
+tational limits during inference, leading to improved accuracy. For example, a model trained with
+Mmax = 8 continues to see accuracy gains when run with Mmax = 16 during inference.
+
+Stability of Q-learning in ACT The deep Q-learning that underpins our ACT mechanism is
+known to be prone to instability, often requiring stabilization techniques such as replay buffers
+and target networks 48 , which are absent in our design. Our approach, however, achieves stability
+through the intrinsic properties of our model and training procedure. Recent theoretical work by
+Gallici et al. 49 shows that Q-learning can achieve convergence if network parameters are bounded,
+weight decay is incorporated during training, and post-normalization layers are implemented. Our
+model satisfies these conditions through its Post-Norm architecture that employs RMSNorm (a
+layer normalization variant) and the AdamW optimizer. AdamW has been shown to solve an L∞ -
+constrained optimization problem, ensuring that model parameters remain bounded by 1/λ 50 .
+Architectural details We employ a sequence-to-sequence architecture for HRM. Both input and
+output are represented as token sequences: x = (x1 , . . . , xl ) and y = (y1 , . . . , yl′ ) respectively.
+The model includes an embedding layer fI that converts discrete tokens into vector representa-
+tions, and an output head fO (z; θO ) = softmax(θO z) that transforms hidden states into token prob-
+ability distributions ŷ. For small-sample experiments, we replace softmax with stablemax 51 to
+improve generalization performance. The sequence-to-sequence loss is averaged over all tokens,
+ Pl′
+L OSS(ŷ, y) = l1′ i=1 log p(yi ), where p(yi ) is the probability that distribution ŷi assigns to token
+yi . The initial hidden states z 0 are initialized by sampling from a truncated normal distribution with
+standard deviation of 1, truncation of 2, and kept fixed throughout training.
+Both the low-level and high-level recurrent modules fL and fH are implemented using encoder-
+only Transformer 52 blocks with identical architectures and dimensions. These modules take mul-
+tiple inputs, and we use straightforward element-wise addition to combine them, though more
+sophisticated merging techniques such as gating mechanisms could potentially improve perfor-
+mance and is left for future work. For all Transformer blocks in this work—including those in
+the baseline models—we incorporate the enhancements found in modern LLMs (based on Llama 53
+architectures). These improvements include Rotary Positional Encoding 54 , Gated Linear Units 55 ,
+RMSNorm 56 , and the removal of bias terms from linear layers.
+Furthermore, both HRM and recurrent Transformer models implement a Post-Norm architecture
+
+ 9
+ 8 4 5 6
+ 8 7
+ 3 4
+ 3 8 4 2
+ 6 3 8
+ 9 6
+ 5
+ 2 1
+ 2 5 3 8
+
+
+
+
+ 7 8 4 1 2 5 9 6 3
+ 2 6 1 3 8 9 7 4 5
+ 3 5 9 6 4 7 8 1 2
+ 5 3 8 4 9 6 1 2 7
+ 4 1 6 2 7 3 5 9 8
+ 9 7 2 8 5 1 4 3 6
+ 6 9 3 5 1 8 2 7 4
+ 8 4 7 9 6 2 3 5 1
+ 1 2 5 7 3 4 6 8 9
+
+
+
+
+ (a) ARC-AGI (b) Sudoku-Hard (c) Maze navigation (d) Sudoku-Extreme subset difficulty
+
+Figure 6: Left: Visualization of benchmark tasks. Right: Difficulty of Sudoku-Extreme examples.
+
+with weights initialized via truncated LeCun Normal initialization 57,58,59 , while the scale and bias
+parameters are excluded from RMSNorm. All parameters are optimized using the Adam-atan2 op-
+timizer 60 , a scale-invariant variant of Adam 61 , combined with a constant learning rate that includes
+linear warm-up.
+
+
+3 Results
+This section begins by describing the ARC-AGI, Sudoku, and Maze benchmarks, followed by an
+overview of the baseline models and their results. Figure 6-(a,b,c) presents a visual representa-
+tion of the three benchmark tasks, which are selected to evaluate various reasoning abilities in AI
+models.
+
+3.1 Benchmarks
+ARC-AGI Challenge The ARC-AGI benchmark evaluates general fluid intelligence through IQ-
+test-like puzzles that require inductive reasoning 27 . The initial version, ARC-AGI-1, presents chal-
+lenges as input-label grid pairs that force AI systems to extract and generalize abstract rules from
+just a few examples. Each task provides a few input–output demonstration pairs (usually 2–3) and
+a test input. An AI model has two attempts to produce the correct output grid. Although some be-
+lieve that mastering ARC-AGI would signal true artificial general intelligence, its primary purpose
+is to expose the current roadblocks in AGI progress. In fact, both conventional deep learning meth-
+ods and CoT techniques have faced significant challenges with ARC-AGI-1, primarily because it
+requires the ability to generalize to entirely new tasks 28 .
+Addressing the limitations identified in ARC-AGI-1, ARC-AGI-2 significantly expands the bench-
+mark by providing a more comprehensive and carefully refined collection of tasks. These new
+tasks emphasize deeper compositional reasoning, multi-step logic, contextual rule application, and
+symbolic abstraction. Human calibration studies show these tasks are challenging but doable for
+people, while being much harder for current AI systems, offering a clearer measure of general
+reasoning abilities 29 .
+
+
+ 10
+ Sudoku-Extreme Sudoku is a 9×9 logic puzzle, requiring each row, column, and 3×3 block to
+contain the digits 1–9 exactly once. A prediction is considered correct if it exactly matches the
+puzzle’s unique solution. Sudoku’s complex logical structure makes it a popular benchmark for
+evaluating logical reasoning in machine learning 62,63,64 .
+The most frequently used Sudoku dataset in research, namely the Kaggle dataset 65 , can be fully
+solved using elementary single-digit techniques 66 . The minimal 17-clue puzzles 62 , another widely-
+used collection, might seem more challenging due to its small number of clues. However, this
+perception is misleading—since 17 represents the minimum number of clues required to guarantee
+a unique Sudoku solution, these hints need to be highly orthogonal to each other. This orthogonal
+arrangement leads to many direct, easily-resolved solution paths 67 .
+We introduce Sudoku-Extreme, a more challenging dataset that is compiled from the aforemen-
+tioned easy datasets as well as puzzles recognized by the Sudoku community as exceptionally
+difficult for human players:
+• Easy puzzles compiled from Kaggle, 17-clue, plus unbiased samples from the Sudoku puzzle
+ distribution 67 : totaling 1 149 158 puzzles.
+• Challenging puzzles compiled from Magictour 1465, Forum-Hard and Forum-Extreme subsets:
+ totaling 3 104 157 puzzles.
+The compiled data then undergo a strict 90/10 train-test split, ensuring that the test set puzzles
+cannot be derived through equivalent transformations of any training samples. Sudoku-Extreme is
+a down-sampled subset of this data containing 1000 training examples. We use Sudoku-Extreme in
+our main experiments (Figure 1), which focuses on small-sample learning scenarios. To guarantee
+convergence and control overfitting effects in our analysis experiments (Figures 2, 3 and 5), we use
+the complete training data, Sudoku-Extreme-Full, containing 3 831 994 examples.
+We measure puzzle difficulty by counting the number of search backtracks (“guesses”) required
+by a smart Sudoku solver program tdoku, which uses propositional logic to reduce the number of
+guesses 67 . Our Sudoku-Extreme dataset exhibits a mean difficulty of 22 backtracks per puzzle, sig-
+nificantly higher than existing datasets, including recent handmade puzzles Sudoku-Bench 68 which
+average just 0.45 backtracks per puzzle. These subset complexity levels are shown in Figure 6-(d).
+Maze-Hard This task involves finding the optimal path in a 30×30 maze, making it interpretable
+and frequently used for training LLMs in search tasks 69,70,71 . We adopt the instance generation
+procedure of Lehnert et al. 71 , but introduce an additional filter to retain only those instances whose
+difficulty exceeds 110. Here, “difficulty” is defined as the length of the shortest path, which aligns
+with the linear time complexity of the wavefront breadth-first search algorithm on GPUs 72 . A path
+is considered correct if it is valid and optimal—that is, the shortest route from the start to the goal.
+The training and test set both include 1000 examples.
+
+3.2 Evaluation Details
+For all benchmarks, HRM models were initialized with random weights and trained in the sequence-
+to-sequence setup using the input-output pairs. The two-dimensional input and output grids were
+flattened and then padded to the maximum sequence length. The resulting performance is shown in
+Figure 1. Remarkably, HRM attains these results with just ~1000 training examples per task—and
+without pretraining or CoT labels.
+
+
+ 11
+ For ARC-AGI challenge, we start with (1) all demonstration and test input-label pairs from the
+training set, and (2) all demonstration pairs along with test inputs from the evaluation set. The
+dataset is augmented by applying translations, rotations, flips, and color permutations to the puz-
+zles. Each task example is prepended with a learnable special token that represents the puzzle it
+belongs to. At test time, we proceed as follows for each test input in the evaluation set: (1) Gener-
+ate and solve 1000 augmented variants and, for each, apply the inverse-augmentation transform to
+obtain a prediction. (2) Choose the two most popular predictions as the final outputs.3 All reported
+results are obtained by comparing the outputs with the withheld test labels from the evaluation set.
+We augment Sudoku puzzles by applying band and digit permutations, while data augmentation is
+disabled for Maze tasks. Both tasks undergo only a single inference pass.
+For ARC-AGI, the scores of the CoT models are taken from the official leaderboard 29 , while for
+Sudoku and Maze, the scores are obtained by evaluating through the corresponding API.
+In Figure 1, the baselines are grouped based on whether they are pre-trained and use CoT, or neither.
+The “Direct pred” baseline means using “direct prediction without CoT and pre-training”, which
+retains the exact training setup of HRM but swaps in a Transformer architecture. Interestingly, on
+ARC-AGI-1, “Direct pred” matches the performance of Liao and Gu 73 , who built a carefully de-
+signed, domain-specific equivariant network for learning the ARC-AGI task from scratch, without
+pre-training. By substituting the Transformer architecture with HRM’s hierarchical framework and
+implementing ACT, we achieve more than a twofold performance improvement.
+On the Sudoku-Extreme and Maze-Hard benchmarks, the performance gap between HRM and the
+baseline methods is significant, as the baselines almost never manage to solve the tasks. These
+benchmarks that demand lengthy reasoning traces are particularly difficult for CoT-based methods.
+With only 1000 training examples, the “Direct pred” baseline—which employs an 8-layer Trans-
+former identical in size to HRM—fails entirely on these challenging reasoning problems. When
+trained on the larger Sudoku-Extreme-Full dataset, however, “Direct pred” can solve some easy
+Sudoku puzzles and reaches 16.9% accuracy (see Figure 2). Lehnert et al. 71 showed that a large
+vanilla Transformer model with 175M parameters, trained on 1 million examples across multiple
+trials, achieved only marginal success on 30x30 Maze tasks, with accuracy below 20% using the
+pass@64 evaluation metric.
+
+3.3 Visualization of intermediate timesteps
+Although HRM demonstrates strong performance on complex reasoning tasks, it raises an intrigu-
+ing question: what underlying reasoning algorithms does the HRM neural network actually imple-
+ment? Addressing this question is important for enhancing model interpretability and developing a
+deeper understanding of the HRM solution space.
+While a definitive answer lies beyond our current scope, we begin our investigation by analyzing
+state trajectories and their corresponding solution evolution. More specifically, at each timestep
+i and given the low-level and high-level state pair (zLi and zH i
+ ) we perform a preliminary forward
+ i i i
+pass through the H-module to obtain z̄ = fH (zH , zL ; θH ) and its corresponding decoded prediction
+ȳ i = fO (z̄ i ; θO ). The prediction ȳ i is then visualized in Figure 7.
+In the Maze task, HRM appears to initially explore several potential paths simultaneously, subse-
+quently eliminating blocked or inefficient routes, then constructing a preliminary solution outline
+ 3
+ The ARC-AGI allows two attempts for each test input.
+
+ 12
+ Timestep i = 0 Timestep i = 1 Timestep i = 2 Timestep i = 3 Timestep i = 4 Timestep i = 5 Timestep i = 6
+
+
+
+
+ Initial Timestep i = 0 Timestep i = 1 Timestep i = 2 Timestep i = 3 Timestep i = 4 Timestep i = 5 Timestep i = 6 Timestep i = 7
+ 4 89
+ 2 4 3 5 7 1 8 9 6 2 4 3 5 7 1 8 9 6 2 4 3 5 7 1 8 9 3 2 4 3 5 7 1 8 9 3 2 4 1 6 7 5 8 9 3 2 4 1 6 7 5 8 9 3 2 4 1 6 7 5 8 9 3 2 4 1 6 7 5 8 9 3
+ 7 3 1
+ 6 7 8 6 3 4 1 5 4 6 7 8 6 3 4 1 5 4 8 7 9 6 3 4 1 5 2 8 7 9 6 3 4 1 5 2 6 7 9 6 3 4 1 5 2 6 7 9 9 3 4 1 5 2 6 7 9 8 3 4 1 5 2 6 7 9 8 3 4 1 5 2
+ 2 6 5 1 2 7 9 7 3 4 6 5 1 2 8 9 7 3 4 6 5 1 2 8 8 7 3 4 6 5 1 2 8 9 7 3 4 6 5 3 2 1 8 7 6 4 9 5 1 2 8 8 7 3 4 8 5 3 2 1 9 7 6 4 8 5 3 2 1 9 7 6 4
+ 67 8 3 4 8 6 7 2 1 2 5 3 4 8 6 7 2 1 9 5 3 4 8 6 7 2 1 5 5 3 4 8 6 7 2 1 5 5 3 4 9 6 7 2 1 5 5 3 4 8 6 7 2 1 8 5 3 4 9 6 7 2 1 8 5 3 4 9 6 7 2 1 8
+ 3 4 7 2 8 3 1 8 6 4 8 7 2 5 3 1 5 6 4 8 7 2 5 3 1 5 6 4 9 7 2 5 3 1 1 6 4 6 7 2 5 3 8 1 6 4 6 7 2 5 3 1 1 6 4 6 7 2 8 3 8 1 6 4 6 7 2 8 3 5 1 6 4 9
+1 64 23 15649237 9 19645237 7 19645237 7 19645237 7 19645238 7 19645237 7 19648237 5 19648237 5
+ 27 3 8 1 2 7 9 3 4 6 6 9 1 2 7 4 3 9 8 6 9 1 2 7 4 3 5 6 8 9 1 2 7 4 3 5 6 8 9 1 2 7 4 3 5 6 8 9 1 2 7 4 3 5 8 6 9 1 2 7 4 3 5 8 6 9 1 2 7 4 3 5 8 6
+46 12 468128 7 3 7 465128 9 7 3 465128 9 7 3 468128 9 7 3 468128 9 7 3 468128 9 3 7 465128 9 3 7 465128 9 3 7
+3 7 6 1 3979 964 21 3875 564 21 3879 564 21 3879 564 21 3875 864 21 3875 864 21 3875 964 21 3875 964 21
+[7666fa5d] Example Input [7666fa5d] Example Output [7666fa5d] Test Input Timestep i = 0 Timestep i = 1 Timestep i = 2 Timestep i = 3 Timestep i = 4
+
+
+
+
+ [7b80bb43] Test Input Timestep i = 0 Timestep i = 1 Timestep i = 2 Timestep i = 3 Timestep i = 4 Timestep i = 5 Timestep i = 6
+ [7b80bb43] Example Input [7b80bb43] Example Output
+
+
+
+
+ Figure 7: Visualization of intermediate predictions by HRM on benchmark tasks. Top: Maze-
+ Hard—blue cells indicate the predicted path. Middle: Sudoku-Extreme—bold cells represent ini-
+ tial givens; red highlights cells violating Sudoku constraints; grey shading indicates changes from
+ the previous timestep. Bottom: ARC-AGI-2 Task—left: provided example input-output pair; right:
+ intermediate steps solving the test input.
+
+ followed by multiple refinement iterations. In Sudoku, the strategy resembles a depth-first search
+ approach, where the model appears to explore potential solutions and backtracks when it hits dead
+ ends. HRM uses a different approach for ARC tasks, making incremental adjustments to the board
+ and iteratively improving it until reaching a solution. Unlike Sudoku, which involves frequent
+ backtracking, the ARC solution path follows a more consistent progression similar to hill-climbing
+ optimization.
+ Importantly, the model shows that it can adapt to different reasoning approaches, likely choosing an
+ effective strategy for each particular task. Further research is needed to gain more comprehensive
+ insights into these solution strategies.
+
+
+ 4 Brain Correspondence
+ A key principle from systems neuroscience is that a brain region’s functional repertoire—its ability
+ to handle diverse and complex tasks—is closely linked to the dimensionality of its neural represen-
+ tations 75,76 . Higher-order cortical areas, responsible for complex reasoning and decision-making,
+ must handle a wide variety of tasks, demanding more flexible and context-dependent processing 77 .
+ In dynamical systems, this flexibility is often realized through higher-dimensional state-space tra-
+ jectories, which allow for a richer repertoire of potential computations 78 . This principle gives rise
+ to an observable dimensionality hierarchy, where a region’s position in the processing hierarchy
+
+ 13
+ (a) (c) (e)
+
+
+
+
+ (b) (d) (f)
+ 5.0
+
+ 4.5
+ Participation Ratio (PR)
+
+
+
+
+ 4.0
+
+ 3.5
+
+ 3.0
+
+ 2.5
+
+ 2.0
+
+ 0 20 40
+ Position in the hierarchy
+
+
+Figure 8: Hierarchical Dimensionality Organization in the HRM and Mouse Cortex. (a,b) are
+adapted from Posani et al. 74 . (a) Anatomical illustration of mouse cortical areas, color-coded by
+functional modules. (b) Correlation between Participation Ratio (PR), a measure of effective neural
+dimensionality, and hierarchical position across different mouse cortical areas. Higher positions in
+the hierarchy (e.g., MOs, ACAd) exhibit significantly higher PR values compared to lower sensory
+areas (e.g., SSp-n), with a Spearman correlation coefficient of ρ = 0.79 (P = 0.0003). (c,d) Trained
+HRM. (c) PR scaling of the trained HRM with task diversity. The dimensionality of the high-
+level module (zH ) scales with the number of unique tasks (trajectories) included in the analysis,
+indicating an adaptive expansion of its representational capacity. In contrast, the low-level module’s
+(zL ) dimensionality remains stable. (d) PR values for the low-level (zL , PR = 30.22) and high-
+level (zH , PR = 89.95) modules of the trained HRM, computed from neural activity during 100
+unique Sudoku-solving trajectories. A clear dimensionality hierarchy is observed, with the high-
+level module operating in a substantially higher-dimensional space. (e,f) Analysis of Untrained
+Network. To verify that the dimensionality hierarchy is an emergent property of training, the same
+analyses were performed on an untrained HRM with random weights. (e) In contrast to the trained
+model’s scaling in (c), the dimensionality of both modules in the untrained model remains low and
+stable, failing to scale with the number of tasks. (f) Similarly, contrasting with the clear separation
+in (d), the PR values for the untrained model’s modules (zL , PR = 42.09; zH , PR = 40.75) are
+low and nearly identical, showing no evidence of hierarchical separation. This confirms that the
+observed hierarchical organization of dimensionality is a learned property that emerges through
+training, not an artifact of the model’s architecture.
+
+
+
+
+ 14
+ correlates with its effective dimensionality. To quantify this phenomenon, we can examine the
+Participation Ratio (PR), which serves as a standard measure of the effective dimensionality of a
+high-dimensional representation 79 . The PR is calculated using the formula
+ ( i λi )2
+ P
+ PR = P 2 ,
+ i λi
+
+where {λi } are the eigenvalues of the covariance matrix of neural trajectories. Intuitively, a higher
+PR value signifies that variance is distributed more evenly across many dimensions, corresponding
+to a higher-dimensional representation. Conversely, a lower PR value indicates that variance is
+concentrated in only a few principal components, reflecting a more compact, lower-dimensional
+structure.
+The dimensionality hierarchy can be observed, for example, in the mouse cortex, where the PR of
+population activity increases monotonically from low-level sensory areas to high-level associative
+areas, supporting this link between dimensionality and functional complexity 74 (Figure 8 (a,b)).
+We evaluated whether HRM reproduces this neuroscientific principle by calculating the PR for
+both recurrent modules after training on the Sudoku-Extreme Full dataset. The PR computation
+used the covariance matrix derived from neural states gathered across multiple Sudoku-solving
+trajectories. The results show a striking parallel to the biological findings. The low-level module’s
+state (zL ) occupies a relatively small subspace with a participation ratio of 30.22, whereas the high-
+level module’s state (zH ) operates in a substantially larger subspace with a participation ratio of
+89.95, as shown in Figure 8(c). Furthermore, Figure 8(d) shows that increasing the number of
+unique tasks (trajectories) from 10 to 100 causes zH dimensionality to scale up accordingly, while
+zL dimensionality remains stable. These results suggest an emergent separation of representational
+capacity between the modules that parallels their functional roles.
+To confirm that this hierarchical organization is an emergent property of training, and not an artifact
+of the network’s architecture, we performed a control analysis using an identical but untrained
+network with random weights.
+We initialized an identical HRM architecture with random weights and, without any training, mea-
+sured the PR of its modules as the network processed the same task-specific inputs given to the
+trained model.
+The results, shown in Figure 8(e,f), reveal a stark contrast: the high-level and low-level modules of
+the untrained network exhibit no hierarchical separation, with their PR values remaining low and
+nearly indistinguishable from each other. This control analysis validates that the dimensionality
+hierarchy is an emergent property that arises as the model learns to perform complex reasoning.
+The high-to-low PR ratio in HRM (zH /zL ≈ 2.98) closely matches that measured in the mouse
+cortex (≈ 2.25). In contrast, conventional deep networks often exhibit neural collapse, where
+last-layer features converge to a low-dimensional subspace 80,81,82 . HRM therefore departs from the
+collapse pattern and instead fosters a high-dimensional representation in its higher module. This
+is significant because such representations are considered crucial for cognitive flexibility and are a
+hallmark of higher-order brain regions like the prefrontal cortex (PFC), which is central to complex
+reasoning.
+This structural parallel suggests the model has discovered a fundamental organizational principle.
+By learning to partition its representations into a high-capacity, high-dimensional subspace (zH )
+
+ 15
+ and a more specialized, low-dimensional one (zL ), HRM autonomously discovers an organizational
+principle that is thought to be fundamental for achieving robust and flexible reasoning in biological
+systems. This provides a potential mechanistic explanation for the model’s success on complex,
+long-horizon tasks that are intractable for models lacking such a differentiated internal structure.
+We emphasize, however, that this evidence is correlational. While a causal link could be tested
+via intervention (e.g., by constraining the H-module’s dimensionality), such methods are difficult
+to interpret in deep learning due to potential confounding effects on the training process itself.
+Thus, the causal necessity of this emergent hierarchy remains an important question for future
+investigation.
+
+
+5 Related Work
+Reasoning and algorithm learning Given the central role of reasoning problems and their close
+relation to algorithms, researchers have long explored neural architectures that enable algorithm
+learning from training instances. This line of work includes Neural Turing Machines (NTM) 83 ,
+the Differentiable Neural Computer (DNC) 84 , and Neural GPUs 85 –all of which construct iterative
+neural architectures that mimic computational hardware for algorithm execution, and are trained to
+learn algorithms from data. Another notable work in this area is Recurrent Relational Networks
+(RRN) 62 , which executes algorithms on graph representations through graph neural networks.
+Recent studies have integrated algorithm learning approaches with Transformer-based architec-
+tures. Universal Transformers extend the standard Transformer model by introducing a recurrent
+loop over the layers and implementing an adaptive halting mechanism. Geiping et al. 86 demonstrate
+that looped Transformers can generalize to a larger number of recurrent steps during inference than
+what they were trained on. Shen et al. 16 propose adding continuous recurrent reasoning tokens
+to the Transformer. Finally, TransNAR 8 combine recurrent graph neural networks with language
+models.
+Building on the success of CoT-based reasoning, a line of work have introduced fine-tuning meth-
+ods that use reasoning paths from search algorithms (like A*) as SFT targets 87,71,70 .
+We also mention adaptive halting mechanisms designed to allocate additional computational re-
+sources to more challenging problems. This includes the Adaptive Computation Time (ACT) for
+RNNs 88 and follow-up research like PonderNet 89 , which aims to improve the stability of this allo-
+cation process.
+HRM further pushes the boundary of algorithm learning through a brain-inspired computational
+architecture that achieves exceptional data efficiency and model expressiveness, successfully dis-
+covering complex and diverse algorithms from just 1000 training examples.
+Brain-inspired reasoning architectures Developing a model with the reasoning power of the
+brain has long been a goal in brain-inspired computing. Spaun 90 is one notable example, which uses
+spiking neural networks to create distinct modules corresponding to brain regions like the visual
+cortex and prefrontal cortex. This design enables an architecture to perform a range of cognitive
+tasks, from memory recall to simple reasoning puzzles. However, its reasoning relies on hand-
+designed algorithms, which may limit its ability to learn new tasks. Another significant model is the
+Tolman-Eichenbaum Machine (TEM) 91 , which is inspired by the hippocampal-entorhinal system’s
+role in spatial and relational memory tasks. TEM proposes that medial entorhinal cells create a
+basis for structural knowledge, while hippocampal cells link this basis to sensory information. This
+
+ 16
+ allows TEM to generalize and explains the emergence of various cell types like grid, border, and
+place cells. Another approach involves neural sampling models 92 , which view the neural signaling
+process as inference over a distribution, functioning similarly to a Boltzmann machine. These
+models often require hand-made rules to be set up for solving a specific reasoning task. In essence,
+while prior models are restricted to simple reasoning problems, HRM is designed to solve complex
+tasks that are hard for even advanced LLMs, without pre-training or task-specific manual design.
+Hierarchical memory The hierarchical multi-timescale structure also plays an important role in
+how the brain processes memory. Models such as Hierarchical Sequential Models 93 and Clockwork
+RNN 94 use multiple recurrent modules that operate at varying time scales to more effectively cap-
+ture long-range dependencies within sequences, thereby mitigating the forgetting issue in RNNs.
+Similar mechanisms have also been adopted in linear attention methods for memorizing long con-
+texts (see the Discussions section). Since HRM focuses on reasoning, full attention is applied for
+simplicity. Incorporating hierarchical memory into HRM could be a promising future direction.
+
+
+6 Discussions
+Turing-completeness of HRM Like earlier neural reasoning algorithms including the Universal
+Transformer 95 , HRM is computationally universal when given sufficient memory and time con-
+straints. In other words, it falls into the category of models that can simulate any Turing machine,
+overcoming the computational limitations of standard Transformers discussed previously in the in-
+troduction. Given that earlier neural algorithm reasoners were trained as recurrent neural networks,
+they suffer from premature convergence and memory intensive BPTT. Therefore, in practice, their
+effective computational depth remains limited, though still deeper than that of a standard Trans-
+former. By resolving these two challenges and being equipped with adaptive computation, HRM
+could be trained on long reasoning processes, solve complex puzzles requiring intensive depth-first
+search and backtracking, and move closer to practical Turing-completeness.
+Reinforcement learning with chain-of-thought Beyond fine-tuning using human-annotated CoT,
+reinforcement learning (RL) represents another widely adopted training methodology. However,
+recent evidence suggests that RL primarily unlocks existing CoT-like capabilities rather than dis-
+covering fundamentally new reasoning mechanisms 96,97,98,99 . Additionally, CoT-training with RL
+is known for its instability and data inefficiency, often requiring extensive exploration and careful
+reward design. In contrast, HRM takes feedback from dense gradient-based supervision rather than
+relying on a sparse reward signal. Moreover, HRM operates naturally in a continuous space, which
+is biologically plausible and avoids allocating same computational resources to each token, even
+though tokens vary in their reasoning and planning complexity 16 .
+Linear attention Recurrence has been explored not only for its capability in universal computa-
+tion, but also as a means to replace the attention mechanism in Transformers, which suffers from
+quadratic time and memory complexity 100 . Recurrent alternatives offer a more efficient design by
+processing input tokens sequentially and predicting the next token at each time step, similar to early
+RNN-based language models.
+Some linear-attention variants, such as Log-linear Attention 101 , share an RNN-like state-update that
+can be interpreted as propagating multi-timescale summary statistics, thereby retaining long-range
+context without the quadratic memory growth of standard self-attention. However, substituting the
+attention mechanism alone does not change the fact that Transformers are still fixed-depth, and
+
+ 17
+ require CoT as a compensatory mechanism. Notably, linear attention can operate with a reduced
+key-value cache over extended contexts, making them more suitable for deployment on resource-
+constrained edge devices.
+
+
+7 Conclusion
+This work introduces the Hierarchical Reasoning Model, a brain-inspired architecture that lever-
+ages hierarchical structure and multi-timescale processing to achieve substantial computational
+depth without sacrificing training stability or efficiency. With only 27M parameters and train-
+ing on just 1000 examples, HRM effectively solves challenging reasoning problems such as ARC,
+Sudoku, and complex maze navigation–tasks that typically pose significant difficulties for contem-
+porary LLM and chain-of-thought models.
+Although the brain relies heavily on hierarchical structures to enable most cognitive processes,
+these concepts have largely remained confined to academic literature rather than being translated
+into practical applications. The prevailing AI approach continues to favor non-hierarchical models.
+Our results challenge this established paradigm and suggest that the Hierarchical Reasoning Model
+represents a viable alternative to the currently dominant chain-of-thought reasoning methods, ad-
+vancing toward a foundational framework capable of Turing-complete universal computation.
+Acknowledgements We thank Mingli Yuan, Ahmed Murtadha Hasan Mahyoub and Hengshuai
+Yao for their insightful discussions and valuable feedback throughout the course of this work.
+
+
+
+
+ 18
+ References
+ 1. Ian Goodfellow, Yoshua Bengio, and Aaron Courville. Deep Learning. MIT Press, 2016.
+ http://www.deeplearningbook.org.
+ 2. Kaiming He, X. Zhang, Shaoqing Ren, and Jian Sun. Deep residual learning for image
+ recognition. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR),
+ pages 770–778, 2015.
+ 3. Lena Strobl. Average-hard attention transformers are constant-depth uniform threshold
+ circuits, 2023.
+ 4. Tom Bylander. Complexity results for planning. In Proceedings of the 12th International Joint
+ Conference on Artificial Intelligence - Volume 1, IJCAI’91, page 274–279, San Francisco,
+ CA, USA, 1991. Morgan Kaufmann Publishers Inc. ISBN 1558601600.
+ 5. William Merrill and Ashish Sabharwal. A logic for expressing log-precision transformers. In
+ Neural Information Processing Systems, 2023.
+ 6. David Chiang. Transformers in DLOGTIME-uniform TC0 . Transactions on Machine
+ Learning Research, 2025.
+ 7. Lucas Lehnert, Sainbayar Sukhbaatar, DiJia Su, Qinqing Zheng, Paul McVay, Michael
+ Rabbat, and Yuandong Tian. Beyond a*: Better planning with transformers via search
+ dynamics bootstrapping. In First Conference on Language Modeling, 2024.
+ 8. Wilfried Bounsi, Borja Ibarz, Andrew Dudzik, Jessica B. Hamrick, Larisa Markeeva, Alex
+ Vitvitskyi, Razvan Pascanu, and Petar Velivckovi’c. Transformers meet neural algorithmic
+ reasoners. ArXiv, abs/2406.09308, 2024.
+ 9. William Merrill and Ashish Sabharwal. The parallelism tradeoff: Limitations of log-precision
+ transformers. Transactions of the Association for Computational Linguistics, 11:531–545,
+ 2023. doi: 10.1162/tacl_a_00562.
+10. Jason Wei, Yi Tay, et al. Chain-of-thought prompting elicits reasoning in large language
+ models, 2022. arXiv preprint arXiv:2201.11903.
+11. William Merrill and Ashish Sabharwal. The expressive power of transformers with chain of
+ thought. In ICLR, 2024.
+12. Xinyun Chen, Ryan A. Chi, Xuezhi Wang, and Denny Zhou. Premise order matters in
+ reasoning with large language models. ArXiv, abs/2402.08939, 2024.
+13. Rongwu Xu, Zehan Qi, and Wei Xu. Preemptive answer "attacks" on chain-of-thought
+ reasoning. In Annual Meeting of the Association for Computational Linguistics, 2024.
+14. Pablo Villalobos, Anson Ho, Jaime Sevilla, Tamay Besiroglu, Lennart Heim, and Marius
+ Hobbhahn. Will we run out of data? limits of llm scaling based on human-generated data.
+ arXiv preprint arXiv:2211.04325, 2022.
+15. Xinghao Chen, Anhao Zhao, Heming Xia, Xuan Lu, Hanlin Wang, Yanjun Chen, Wei Zhang,
+ Jian Wang, Wenjie Li, and Xiaoyu Shen. Reasoning beyond language: A comprehensive
+ survey on latent chain-of-thought reasoning, 2025.
+16. Xuan Shen, Yizhou Wang, Xiangxi Shi, Yanzhi Wang, Pu Zhao, and Jiuxiang Gu.
+ Training large language models to reason in a continuous latent space. arXiv preprint
+ arXiv:2412.07423, 2024.
+
+
+ 19
+ 17. Evelina Fedorenko, Steven T Piantadosi, and Edward AF Gibson. Language is primarily a
+ tool for communication rather than thought. Nature, 630(8017):575–586, 2024.
+18. Hongyu Wang, Shuming Ma, Li Dong, Shaohan Huang, Dongdong Zhang, and Furu Wei.
+ Deepnet: Scaling transformers to 1,000 layers. IEEE Transactions on Pattern Analysis and
+ Machine Intelligence, 2024.
+19. Timothy P Lillicrap and Adam Santoro. Backpropagation through time and the brain. Current
+ Opinion in Neurobiology, 55:82–89, 2019. ISSN 0959-4388. doi: https://doi.org/10.1016/j.
+ conb.2019.01.011.
+20. John D Murray, Alberto Bernacchia, David J Freedman, Ranulfo Romo, Jonathan D Wallis,
+ Xinying Cai, Camillo Padoa-Schioppa, Tatiana Pasternak, Hyojung Seo, Daeyeol Lee, et al.
+ A hierarchy of intrinsic timescales across primate cortex. Nature neuroscience, 17(12):1661–
+ 1663, 2014.
+21. Roxana Zeraati, Yan-Liang Shi, Nicholas A Steinmetz, Marc A Gieselmann, Alexander
+ Thiele, Tirin Moore, Anna Levina, and Tatiana A Engel. Intrinsic timescales in the
+ visual cortex change with selective attention and reflect spatial connectivity. Nature
+ communications, 14(1):1858, 2023.
+22. Julia M Huntenburg, Pierre-Louis Bazin, and Daniel S Margulies. Large-scale gradients in
+ human cortical organization. Trends in cognitive sciences, 22(1):21–31, 2018.
+23. Victor AF Lamme and Pieter R Roelfsema. The distinct modes of vision offered by
+ feedforward and recurrent processing. Trends in neurosciences, 23(11):571–579, 2000.
+24. Andre M Bastos, W Martin Usrey, Rick A Adams, George R Mangun, Pascal Fries, and Karl J
+ Friston. Canonical microcircuits for predictive coding. Neuron, 76(4):695–711, 2012.
+25. Klara Kaleb, Barbara Feulner, Juan Gallego, and Claudia Clopath. Feedback control guides
+ credit assignment in recurrent neural networks. Advances in Neural Information Processing
+ Systems, 37:5122–5144, 2024.
+26. Timothy P Lillicrap, Adam Santoro, Luke Marris, Colin J Akerman, and Geoffrey Hinton.
+ Backpropagation and the brain. Nature Reviews Neuroscience, 21(6):335–346, 2020.
+27. François Chollet. On the measure of intelligence (abstraction and reasoning corpus), 2019.
+ arXiv preprint arXiv:1911.01547.
+28. Francois Chollet, Mike Knoop, Gregory Kamradt, and Bryan Landers. Arc prize 2024:
+ Technical report. ArXiv, abs/2412.04604, 2024.
+29. Francois Chollet, Mike Knoop, Gregory Kamradt, Bryan Landers, and Henry Pinkard. Arc-
+ agi-2: A new challenge for frontier ai reasoning systems. arXiv preprint arXiv:2505.11831,
+ 2025.
+30. György Buzsáki. Gamma, alpha, delta, and theta oscillations govern cognitive processes.
+ International Journal of Psychophysiology, 39:241–248, 2000.
+31. György Buzsáki. Rhythms of the Brain. Oxford university press, 2006.
+32. Anja Pahor and Norbert Jaušovec. Theta–gamma cross-frequency coupling relates to the level
+ of human intelligence. Intelligence, 46:283–290, 2014.
+33. Adriano BL Tort, Robert W Komorowski, Joseph R Manns, Nancy J Kopell, and Howard
+ Eichenbaum. Theta–gamma coupling increases during the learning of item–context
+ associations. Proceedings of the National Academy of Sciences, 106(49):20942–20947, 2009.
+
+
+ 20
+ 34. Benjamin Scellier and Yoshua Bengio. Equilibrium propagation: Bridging the gap between
+ energy-based models and backpropagation. Frontiers in Computational Neuroscience, 11,
+ 2016.
+35. Guillaume Bellec, Franz Scherr, Anand Subramoney, Elias Hajek, Darjan Salaj, Robert
+ Legenstein, and Wolfgang Maass. A solution to the learning dilemma for recurrent
+ networks of spiking neurons. Nature Communications, 11, 07 2020. doi: 10.1038/
+ s41467-020-17236-y.
+36. Shaojie Bai, J Zico Kolter, and Vladlen Koltun. Deep equilibrium models. In Advances in
+ Neural Information Processing Systems, pages 690–701, 2019.
+37. Zhengyang Geng, Xinyu Zhang, Shaojie Bai, Yisen Wang, and Zhouchen Lin. On training
+ implicit models. ArXiv, abs/2111.05177, 2021.
+38. Katarina Begus and Elizabeth Bonawitz. The rhythm of learning: Theta oscillations as an
+ index of active learning in infancy. Developmental Cognitive Neuroscience, 45:100810, 2020.
+ ISSN 1878-9293. doi: https://doi.org/10.1016/j.dcn.2020.100810.
+39. Shaojie Bai, Zhengyang Geng, Yash Savani, and J. Zico Kolter. Deep Equilibrium
+ Optical Flow Estimation . In 2022 IEEE/CVF Conference on Computer Vision and Pattern
+ Recognition (CVPR), pages 610–620, 2022.
+40. Zaccharie Ramzi, Florian Mannel, Shaojie Bai, Jean-Luc Starck, Philippe Ciuciu, and
+ Thomas Moreau. Shine: Sharing the inverse estimate from the forward pass for bi-level
+ optimization and implicit models. ArXiv, abs/2106.00553, 2021.
+41. Shaojie Bai, Vladlen Koltun, and J. Zico Kolter. Stabilizing equilibrium models by jacobian
+ regularization. In International Conference on Machine Learning, 2021.
+42. Daniel Kahneman and P Egan. Thinking, fast and slow (farrar, straus and giroux, new york),
+ 2011.
+43. Matthew D Lieberman. Social cognitive neuroscience: a review of core processes. Annu. Rev.
+ Psychol., 58(1):259–289, 2007.
+44. Randy L Buckner, Jessica R Andrews-Hanna, and Daniel L Schacter. The brain’s default
+ network: anatomy, function, and relevance to disease. Annals of the new York Academy of
+ Sciences, 1124(1):1–38, 2008.
+45. Marcus E Raichle. The brain’s default mode network. Annual review of neuroscience, 38(1):
+ 433–447, 2015.
+46. Andrew Westbrook and Todd S Braver. Cognitive effort: A neuroeconomic approach.
+ Cognitive, Affective, & Behavioral Neuroscience, 15:395–415, 2015.
+47. Richard S. Sutton and Andrew G. Barto. Reinforcement Learning: An Introduction. MIT
+ Press, Cambridge, MA, 2018.
+48. Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis Antonoglou, Daan
+ Wierstra, and Martin A. Riedmiller. Playing atari with deep reinforcement learning. ArXiv,
+ abs/1312.5602, 2013.
+49. Matteo Gallici, Mattie Fellows, Benjamin Ellis, Bartomeu Pou, Ivan Masmitja,
+ Jakob Nicolaus Foerster, and Mario Martin. Simplifying deep temporal difference learning,
+ 2025.
+
+
+
+ 21
+ 50. Shuo Xie and Zhiyuan Li. Implicit bias of adamw: L inf norm constrained optimization.
+ ArXiv, abs/2404.04454, 2024.
+51. Lucas Prieto, Melih Barsbey, Pedro A. M. Mediano, and Tolga Birdal. Grokking at the edge of
+ numerical stability. In The Thirteenth International Conference on Learning Representations,
+ 2025.
+52. Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
+ Łukasz Kaiser, and Illia Polosukhin. Attention is all you need. In Advances in neural
+ information processing systems, pages 5998–6008, 2017.
+53. Meta AI. Llama 3: State-of-the-art open weight language models. Technical report, Meta,
+ 2024. URL https://ai.meta.com/llama/.
+54. Jianlin Su, Murtadha Ahmed, Yu Lu, Shengfeng Pan, Wen Bo, and Yunfeng Liu. Roformer:
+ Enhanced transformer with rotary position embedding. Neurocomputing, 568:127063, 2024.
+55. Noam M. Shazeer. Glu variants improve transformer. ArXiv, abs/2002.05202, 2020.
+56. Biao Zhang and Rico Sennrich. Root mean square layer normalization. ArXiv,
+ abs/1910.07467, 2019.
+57. Günter Klambauer, Thomas Unterthiner, Andreas Mayr, and Sepp Hochreiter. Self-
+ normalizing neural networks. In Neural Information Processing Systems, 2017.
+58. JAX Developers. jax.nn.initializers.lecun_normal. Google Research, 2025. URL
+ https://docs.jax.dev/en/latest/_autosummary/jax.nn.initializers.lecun_
+ normal.html. Accessed June 22, 2025.
+59. Yann LeCun, Léon Bottou, Genevieve B Orr, and Klaus-Robert Müller. Efficient backprop.
+ In Neural networks: Tricks of the trade, pages 9–50. Springer, 2002.
+60. Katie E Everett, Lechao Xiao, Mitchell Wortsman, Alexander A Alemi, Roman Novak,
+ Peter J Liu, Izzeddin Gur, Jascha Sohl-Dickstein, Leslie Pack Kaelbling, Jaehoon Lee, and
+ Jeffrey Pennington. Scaling exponents across parameterizations and optimizers. In Forty-first
+ International Conference on Machine Learning, 2024.
+61. Diederik P. Kingma and Jimmy Ba. Adam: A method for stochastic optimization, 2017.
+62. Rasmus Berg Palm, Ulrich Paquet, and Ole Winther. Recurrent relational networks. In Neural
+ Information Processing Systems, 2017.
+63. Jieyi Long. Large language model guided tree-of-thought. ArXiv, abs/2305.08291, 2023.
+64. Yilun Du, Jiayuan Mao, and Josh Tenenbaum. Learning iterative reasoning through energy
+ diffusion. ArXiv, abs/2406.11179, 2024.
+65. Kyubyong Park. Can convolutional neural networks crack sudoku puzzles? https:
+ //github.com/Kyubyong/sudoku, 2018.
+66. Single-digit techniques. https://hodoku.sourceforge.net/en/tech_singles.php.
+ Accessed: 2025-06-16.
+67. Tom Dillion. Tdoku: A fast sudoku solver and generator. https://t-dillon.github.io/
+ tdoku/, 2025.
+68. Jeffrey Seely, Yuki Imajuku, Tianyu Zhao, Edoardo Cetin, and Llion Jones. Sudoku-bench:
+ Evaluating creative reasoning with sudoku variants. arXiv preprint arXiv:2505.16135, 2025.
+69. Luke Darlow, Ciaran Regan, Sebastian Risi, Jeffrey Seely, and Llion Jones. Continuous
+ thought machines. arXiv preprint arXiv:2505.05522, 2025.
+
+ 22
+ 70. DiJia Su, Sainbayar Sukhbaatar, Michael Rabbat, Yuandong Tian, and Qinqing Zheng.
+ Dualformer: Controllable fast and slow thinking by learning with randomized reasoning
+ traces, 2025.
+71. Lucas Lehnert, Sainbayar Sukhbaatar, DiJia Su, Qinqing Zheng, Paul McVay, Michael
+ Rabbat, and Yuandong Tian. Beyond a*: Better planning with transformers via search
+ dynamics bootstrapping. In First Conference on Language Modeling, 2024.
+72. Mubbasir Kapadia, Francisco Garcia, Cory D. Boatright, and Norman I. Badler. Dynamic
+ search on the gpu. In 2013 IEEE/RSJ International Conference on Intelligent Robots and
+ Systems, pages 3332–3337, 2013. doi: 10.1109/IROS.2013.6696830.
+73. Isaac Liao and Albert Gu. Arc-agi without pretraining, 2025. URL https:
+ //iliao2345.github.io/blog_posts/arc_agi_without_pretraining/arc_agi_
+ without_pretraining.html.
+74. Lorenzo Posani, Shuqi Wang, Samuel P Muscinelli, Liam Paninski, and Stefano Fusi.
+ Rarely categorical, always high-dimensional: how the neural code changes along the cortical
+ hierarchy. bioRxiv, pages 2024–11, 2025.
+75. Mattia Rigotti, Omri Barak, Melissa R. Warden, Xiao-Jing Wang, Nathaniel D. Daw, Earl K.
+ Miller, and Stefano Fusi. The importance of mixed selectivity in complex cognitive tasks.
+ Nature, 497:585–590, 2013. doi: 10.1038/nature12160.
+76. Valerio Mante, David Sussillo, Krishna V. Shenoy, and William T. Newsome. Context-
+ dependent computation by recurrent dynamics in prefrontal cortex. Nature, 503(7474):78–84,
+ 2013. doi: 10.1038/nature12742.
+77. Earl K. Miller and Jonathan D. Cohen. An integrative theory of prefrontal cortex function.
+ Annual Review of Neuroscience, 24(1):167–202, 2001. doi: 10.1146/annurev.neuro.24.1.167.
+78. Wolfgang Maass. Real-time computing without stable states: a new framework for neural
+ computation based on perturbations. Neural Computation, 14(11):2531–2560, 2002. doi:
+ 10.1162/089976602760407955.
+79. Ege Altan, Sara A. Solla, Lee E. Miller, and Eric J. Perreault. Estimating the dimensionality
+ of the manifold underlying multi-electrode neural recordings. PLoS Computational Biology,
+ 17(11):e1008591, 2021. doi: 10.1371/journal.pcbi.1008591.
+80. Vardan Papyan, X. Y. Han, and David L. Donoho. Prevalence of neural collapse during the
+ terminal phase of deep learning training. Proceedings of the National Academy of Sciences,
+ 117(40):24652–24663, 2020. doi: 10.1073/pnas.2015509117.
+81. Cong Fang, Hangfeng He, Qi Long, and Weijie J. Su. Exploring deep neural networks via
+ layer–peeled model: Minority collapse in imbalanced training. Proceedings of the National
+ Academy of Sciences, 118(43):e2103091118, 2021. doi: 10.1073/pnas.2103091118.
+82. Zhihui Zhu, Tianyu Ding, Jinxin Zhou, Xiao Li, Chong You, Jeremias Sulam, and Qing Qu.
+ A geometric analysis of neural collapse with unconstrained features. In Advances in Neural
+ Information Processing Systems, volume 34 of NeurIPS, pages 29820–29834, 2021.
+83. Alex Graves, Greg Wayne, and Ivo Danihelka. Neural turing machines, 2014.
+84. Alex Graves, Greg Wayne, Malcolm Reynolds, Tim Harley, Ivo Danihelka, Agnieszka
+ Grabska-Barwińska, Sergio Gómez Colmenarejo, Edward Grefenstette, Tiago Ramalho, John
+ Agapiou, et al. Hybrid computing using a neural network with dynamic external memory.
+ Nature, 538(7626):471–476, 2016.
+
+ 23
+ 85. Lukasz Kaiser and Ilya Sutskever. Neural GPUs learn algorithms. In ICLR, 2016.
+ 86. Jonas Geiping, Sean McLeish, Neel Jain, John Kirchenbauer, Siddharth Singh, Brian R.
+ Bartoldson, Bhavya Kailkhura, Abhinav Bhatele, and Tom Goldstein. Scaling up test-time
+ compute with latent reasoning: A recurrent depth approach, 2025.
+ 87. Tiedong Liu and Kian Hsiang Low. Goat: Fine-tuned llama outperforms gpt-4 on arithmetic
+ tasks. ArXiv, abs/2305.14201, 2023.
+ 88. Alex Graves. Adaptive computation time for recurrent neural networks. ArXiv,
+ abs/1603.08983, 2016.
+ 89. Andrea Banino, Jan Balaguer, and Charles Blundell. Pondernet: Learning to ponder. ArXiv,
+ abs/2107.05407, 2021.
+ 90. Chris Eliasmith, Terrence C Stewart, Xuan Choo, Trevor Bekolay, Travis DeWolf, Yichuan
+ Tang, and Daniel Rasmussen. A large-scale model of the functioning brain. science, 338
+ (6111):1202–1205, 2012.
+ 91. James CR Whittington, Timothy H Muller, Shirley Mark, Guifen Chen, Caswell Barry, Neil
+ Burgess, and Timothy EJ Behrens. The tolman-eichenbaum machine: unifying space and
+ relational memory through generalization in the hippocampal formation. Cell, 183(5):1249–
+ 1263, 2020.
+ 92. Lars Buesing, Johannes Bill, Bernhard Nessler, and Wolfgang Maass. Neural dynamics as
+ sampling: a model for stochastic computation in recurrent networks of spiking neurons. PLoS
+ computational biology, 7(11):e1002211, 2011.
+ 93. Salah Hihi and Yoshua Bengio. Hierarchical recurrent neural networks for long-term
+ dependencies. In D. Touretzky, M.C. Mozer, and M. Hasselmo, editors, Advances in Neural
+ Information Processing Systems, volume 8. MIT Press, 1995.
+ 94. Jan Koutník, Klaus Greff, Faustino J. Gomez, and Jürgen Schmidhuber. A clockwork rnn. In
+ International Conference on Machine Learning, 2014.
+ 95. Mostafa Dehghani, Stephan Gouws, Oriol Vinyals, Jakob Uszkoreit, and Lukasz Kaiser.
+ Universal transformers, 2018. arXiv preprint arXiv:1807.03819.
+ 96. Yiping Wang, Qing Yang, Zhiyuan Zeng, Liliang Ren, Lucas Liu, Baolin Peng, Hao Cheng,
+ Xuehai He, Kuan Wang, Jianfeng Gao, Weizhu Chen, Shuohang Wang, Simon Shaolei Du,
+ and Yelong Shen. Reinforcement learning for reasoning in large language models with one
+ training example, 2025. URL https://arxiv.org/abs/2504.20571.
+ 97. Niklas Muennighoff. s1: Simple test-time scaling. arXiv preprint arXiv:2502.23456, 2025.
+ 98. Liang Wen, Yunke Cai, Fenrui Xiao, Xin He, Qi An, Zhenyu Duan, Yimin Du, Junchen Liu,
+ Lifu Tang, Xiaowei Lv, Haosheng Zou, Yongchao Deng, Shousheng Jia, and Xiangzheng
+ Zhang. Light-r1: Curriculum sft, dpo and rl for long cot from scratch and beyond, 2025.
+ 99. Xuefeng Li, Haoyang Zou, and Pengfei Liu. Limr: Less is more for rl scaling, 2025.
+100. Tri Dao and Albert Gu. Transformers are ssms: Generalized models and efficient algorithms
+ through structured state space duality. ArXiv, abs/2405.21060, 2024.
+101. Han Guo, Songlin Yang, Tarushii Goel, Eric P Xing, Tri Dao, and Yoon Kim. Log-linear
+ attention. arXiv preprint arXiv:2506.04761, 2025.
+
+
+
+
+ 24
+ \ No newline at end of file
diff --git a/papers/txt/ptrm2025_probabilistic_trm.txt b/papers/txt/ptrm2025_probabilistic_trm.txt
new file mode 100644
index 0000000..a00f31c
--- /dev/null
+++ b/papers/txt/ptrm2025_probabilistic_trm.txt
@@ -0,0 +1,906 @@
+ Probabilistic Tiny Recursive Model
+
+
+ Amin Sghaier Ali Parviz Alexia Jolicoeur-Martineau
+ Mila – Quebec AI Institute Mila – Quebec AI Institute Independent
+ ILLS & ETS Montreal
+
+ {amin.sghaier, ali.parviz}@mila.quebec
+arXiv:2605.19943v1 [cs.AI] 19 May 2026
+
+
+
+
+ alexia.jolicoeur-martineau@mail.mcgill.ca
+
+
+
+ Abstract
+ Tiny Recursive Models (TRM) solve complex reasoning tasks with a fraction of
+ the parameters of modern large language models (LLMs) by iteratively refining a
+ latent state and final answer. While powerful, their deterministic recursion can lead
+ to convergence at suboptimal solutions, without escape mechanism. A common
+ workaround relies on task-specific input perturbations at test time combined with
+ answer aggregation via voting. We introduce Probabilistic TRM (PTRM), a task-
+ agnostic framework for test-time compute scaling that addresses this limitation
+ through stochastic exploration. PTRM injects Gaussian noise at each deep recursion
+ step, enabling parallel trajectories to explore diverse solution basins, and selects
+ among them using the model’s existing Q head (used for early stopping in the
+ original TRM). Without requiring retraining or task-specific augmentations, PTRM
+ enables substantial accuracy gains across benchmarks, including Sudoku-Extreme
+ (87.4% to 98.75%) and on various puzzles from Pencil Puzzle Bench (62.6% to
+ 91.2%). On the latter, PTRM achieves nearly double the accuracy of frontier LLMs
+ (91.2% vs. 55.1%) at less than 0.0001x the cost, using only 7M parameters.
+
+
+
+ PPBench Puzzles
+ sudoku, lightup, nurikabe, heyawake, and tapa Sudoku-Extreme
+ 100 100 98.75
+ 91.2 87.4
+ 80 80
+ Accuracy (%)
+
+
+
+
+ 62.6
+ 60 55.1 60 55
+ 40 34.7 40
+ 24.5 24.5
+ 20 20
+ 2 0 0 0 0
+ 0 0
+ Direct pred
+
+
+
+
+ Direct pred
+ gemini-3.1-pro
+
+ claude-opus-4-6
+
+
+
+ TRM
+
+
+
+
+ Deepseek R1
+
+
+
+ o3-mini-high
+
+
+
+ HRM
+
+ TRM
+ gpt-5.2@xhigh
+
+
+
+
+ PTRM (ours)
+
+
+
+
+ Claude 3.7 8K
+
+
+
+
+ PTRM (ours)
+ LLM ensemble
+
+
+
+
+ Chain-of-thought, pretrained Direct prediction Probabilistic recursive prediction (ours)
+ LLM ensemble Deterministic recursive prediction
+ Best of 7 strongest LLMs. Assumes access to a perfect verifier.
+
+ Figure 1: PTRM performance comparison. On various PPBench puzzles, PTRM boosts TRM
+ performance by 28.6 points without any retraining. It outperforms the strongest single frontier LLMs
+ by 56.5 points and an ensemble of the seven strongest LLMs (assuming a perfect verifier) by 36
+ points. On Sudoku-Extreme, PTRM reaches a state of the art 98.75%.
+ 1 Introduction
+Tiny Recursive Models (TRM) [1] achieve strong performance on complex reasoning puzzles with
+orders of magnitude fewer parameters than the large language models (LLMs) they outperform on
+tasks like Sudoku-Extreme [2] and ARC-AGI [3, 4]. TRM and its predecessor Hierarchical Reasoning
+Model (HRM) [2] represent an emerging architectural alternative to standard autoregressive reasoning
+models. Rather than autoregressively generating chains of token-level reasoning, they recursively
+refine a latent state. This approach produces a single deterministic answer per input, fitting well with
+tasks where the answer is unique.
+Despite their strong performance, their deterministic inference does not make full use of their
+capabilities. We show that many of TRM’s incorrect answers are from rollouts trapped in bad latent
+space basins (i.e., regions of the latent space which decode to incorrect answers and from which the
+deterministic recursions cannot escape). This observation, which aligns with recent mechanistic work
+on related models [5], suggests that TRM has the capabilities to solve significantly more problems
+but is limited by its standard inference procedure.
+Although each puzzle has a unique correct answer, many distinct latent trajectories can reach it. This
+is analogous to reasoning LLMs, where many reasoning trajectories can lead to the same unique
+answer. However, being non-deterministic, LLMs can be randomly sampled in order to form different
+trajectories (including Chains of Thought and actual answer). By then selecting a trajectory using
+a voting mechanism or based on the answer’s projected value (via a verifier), LLMs can leverage
+test-time compute to achieve very high accuracy [6]. We propose a way to achieve similar test-time
+scaling performance gains by sampling stochastic latent trajectories, each producing a deterministic
+decoded answer, and selecting among the answers using the model’s own Q head.
+TRM’s Q head is trained jointly (as a correctness classifier) with the rest of the network and is
+conventionally used only at training time for adaptive computation (ACT) [7]. It carries valuable
+information that the standard inference procedure discards.
+We propose Probabilistic TRM (PTRM), a test-time compute scaling framework that introduces a
+new width scaling axis. At inference we run K parallel rollouts per puzzle, each receiving Gaussian
+noise injected into the latent at every deep recursion step. The noise causes rollouts to follow different
+latent trajectories and settle in different basins. Among the resulting candidate answers, the Q head
+is used to select the one most likely to be correct. PTRM requires no training changes and no
+task-specific test-time augmentation, yet, as illustrated in Figure 1, delivers substantial accuracy
+gains across diverse reasoning benchmarks.
+
+2 Background: Tiny Recursive Model
+Tiny Recursive Model (TRM) is a single network that iteratively refines a predicted answer y to a
+question x through recursive updates of a reasoning latent z. Specifically, a single latent recursion
+consists of n updates to the latent state z followed by one update to the predicted answer y, all using
+the same two-layer network fθ : z ← fθ (x + y + z) n times, then y ← fθ (y + z).
+fθ distinguishes the two update types by whether the input includes x. A deep recursion runs T
+latent recursions in sequence, with only the final one retaining gradients, allowing the model to
+leverage a large effective depth while keeping training efficient.
+Rather than doing one optimization step per sample, TRM is trained via deep supervision, which
+consists in keeping the previous latent state z and answer y as initialization (after being detached from
+the computational graph) for the next supervision step. This is done for up to Nsup supervision steps.
+The loss at each step is calculated using cross entropy between the predicted answer logits fO (y)
+(where fO is a linear output head) and the ground truth ytrue . This trains the network to progressively
+refine its prediction across reasoning steps. At inference, the recurrence can be unrolled for more
+steps than during training, providing a depth axis for test-time compute scaling (additional steps may
+correct otherwise-incorrect answers).
+Without halting mechanism during training, each puzzle stays in the mini-batch for Nsup supervision
+steps rather than being replaced after each one. To avoid wasting compute on already-solved samples,
+an Adaptive Computational Time (ACT) halting mechanism is used. This is done by adding a binary
+cross entropy loss between a halting logit q̂ = fQ (y) (where fQ is a linear Q head) and the binary exact
+
+
+ 2
+ Correct answer Incorrect answer Cell accuracy Start End
+ Quick success Delayed success Failure
+
+ PC 2 (15% var)
+
+
+
+
+ PC 2 (36% var)
+
+
+
+
+ PC 2 (8% var)
+ PC 1 (84% var) PC 1 (58% var) PC 1 (85% var)
+
+
+ 1.0 1.0 1.0
+ 5.0 5.0 5
+
+
+
+
+ Cell accuracy
+ 2.5 0.9 2.5 0.9 0.8
+Q value
+
+
+
+
+ 0.0 0
+ 0.0 0.8 2.5 0.8
+ 0.6
+ 2.5 5
+ 5.0
+ 0.7 0.7
+ 5.0
+ 0 5 10 15 0 5 10 15 0 5 10 15
+ Supervision step Supervision step Supervision step
+ Figure 2: TRM Trajectory Modes. PCA projection of y (top) and Q value (solid, left axis) with cell
+ accuracy (dashed, right axis) across supervision steps (bottom) for three PPBench puzzles, illustrating
+ three trajectory modes (left to right): quick success, delayed success, and failure (Sec. 3). Latents are
+ projected into the principal plane per puzzle, so PC axes are not comparable across plots. Trajectories
+ fade from light (early steps) to dark (later steps). Circle marks the start and square marks end.
+
+
+ correctness of the predicted answer ŷ = arg max fO (y): Lstep = CE(fO (y), ytrue ) + BCE(q̂, 1[ŷ =
+ ytrue ]). The Q head thus allows the supervision loop to halt early on samples where sigmoid(q̂) > 0.5,
+ improving data efficiency. During inference, the Q head is not used, and the model performs Nsup
+ supervision steps to maximize answer correctness.
+ While TRM is powerful, it sometimes gets stuck into incorrect solutions. In the next section, we will
+ investigate such failures cases in order to determine a way to remedy them.
+
+
+ 3 Problem: When Does TRM Fail?
+
+ 3.1 Analysis of failures and successes
+
+ We present observations about TRM that motivate our method. In this section, we train a TRM on
+ multiple Pencil Puzzle Bench (PPBench) [8] puzzles and inspect the latent dynamics and Q head
+ behavior across supervision steps on a held-out validation set. For each puzzle, we record the latent
+ yt and the Q logit q̂t = fQ (yt ) at every supervision step t = 1, . . . , Nsup , project the latents into
+ the principal plane (PCA per puzzle), and jointly plot the Q value alongside cell accuracy (fraction
+ of correct cells in the predicted answer) over supervision steps. Figure 2 shows paired PCA and
+ Q/cell-accuracy plots for three representative puzzles, illustrating three trajectory modes we observe:
+ Quick success: the trajectory transitions in a few steps from its starting location to a convergence
+ region and remains there. Cell accuracy and the Q value rise together and saturate near their maxima
+ within the same few steps.
+ Delayed success: the trajectory initially oscillates around one region and remains there for multiple
+ supervision steps before sharply escaping to a different region where it converges. During the initial
+
+
+ 3
+ phase, the Q value is negative, and at the step where the trajectory escapes, both Q value and cell
+accuracy spike together.
+Failure: the trajectory oscillates in a bounded region without converging. Cell accuracy never reaches
+near 100%, and the Q value stays negative for all supervision steps.
+We refer to latent space regions that trajectories remain in across multiple supervision steps and
+exhibit similar cell accuracy throughout as basins. Basins where cell accuracy is near-maximal are
+good basins and basins where it is not are bad basins. Initially, failures and delayed successes behave
+similarly (both are caught in bad basins with negative Q). They diverge only later in their trajectories,
+when delayed successes find an escape to a good basin while failures remain stuck.
+
+3.2 The Q head tracks trajectory quality
+
+ 6 1.00
+ 0.95 Figure 3: Q value follows cell ac-
+ 4
+ 0.90 curacy across reasoning. Mean
+ 2
+
+
+
+
+ Cell accuracy
+ Incorrect (28) 0.85 Q value (solid, left axis) and mean
+Q value
+
+
+
+
+ 0 Correct (69) 0.80 cell accuracy (dashed, right axis)
+ Cell accuracy (right axis) over supervision steps, aggregated
+ 2 0.75
+ 0.70
+ over 100 PPBench validation puz-
+ 4 zles, separated by final correctness
+ 0.65
+ 6 (green: correct, red: incorrect).
+ 0.60
+ 0 2 4 6 8 10 12 14
+ Supervision step
+Across all three modes (failures, delayed successes, and quick successes), we find that the Q head’s
+value closely tracks cell accuracy at every supervision step. To further confirm this, Figure 3
+aggregates trajectories from 100 PPBench validation puzzles, separating them by final-answer
+correctness. The aggregate view corroborates the per-puzzle observation: mean Q and mean cell
+accuracy rise together on correct trajectories and remain mostly flat on incorrect ones. Moreover, at
+convergence, the Q logit sharply separates the two populations where q̂ ≈ +6 (sigmoid ≈ 1) for
+correct trajectories and q̂ ≈ −6 (sigmoid ≈ 0) for incorrect ones. The Q head is therefore a reliable
+learned indicator of whether a trajectory has reached a good basin.
+Given that the Q head’s ability to distinguish good from bad trajectories, a natural question follows:
+can we leverage the Q head to identify better trajectories? The main challenge is that the standard
+TRM is inherently deterministic, and thus cannot be used to sample different trajectories for a given
+problem. In the next section, we will show that by simply adding Gaussian noise to the latent state,
+we can sample different parallel trajectories and leverage the Q head to pick the best one.
+
+4 Method: Test-Time Compute Scaling via Stochastic Rollouts
+We propose Probabilistic TRM (PTRM), an inference-time procedure that makes the TRM recursion
+stochastic and selects the best of K resulting trajectories. PTRM requires no special training and
+can be readily applied to any pretrained TRM model. Furthermore it requires no task-specific
+augmentations. PTRM works as follows: at each supervision step, we add Gaussian noise (scaled by
+σ) to the latent state input. The Q head fQ scores each candidate latent output, and the one with the
+highest Q value is selected and then decoded using the model’s output head fO . The algorithm in
+Figure 4 (left) states this formally. PTRM offers two complementary benefits: 1) it enables trajectories
+to escape bad basins where deterministic TRM remains stuck, and 2) it introduces width as a new
+axis for test-time scaling.
+
+4.1 Escaping bad basins
+
+In Sec. 3, we found that some failed deterministic trajectories are caught in bad solution basins in
+latent space, with no way to escape. PTRM lets us test whether stochastic perturbations are enough
+for some of the rollouts of a previously failed puzzle to reach a good solution basin. Figure 5 shows
+K=100 independent rollouts, from the same failed puzzle used in Figure 2 (which fails at K=1),
+
+
+ 4
+ PTRM Inference (a) Standard TRM (deterministic)
+
+ 1: Input: puzzle x, rollouts K, puzzle ··· answer
+ 2: supervision steps D, noise scale σ
+ 3: for k = 1, . . . , K in parallel do depth axis: D deep recursion steps
+ (k) (k)
+ 4: Initialize z0 , y0 (b) PTRM (ours): K stochastic rollouts + Q-head selection
+ 5: for t = 1, . . . , D do +ϵ +ϵ +ϵ
+ ···
+
+
+
+
+ width axis: K rollouts
+ (k)
+ 6: zt−1 += ϵ, ϵ ∼ N (0, σ 2 I) 1 +ϵ +ϵ +ϵ
+ k=
+ (k) (k) (k) (k)
+ 7: zt , yt ← rec(x, zt−1 , yt−1 ) ···
+ puzzle arg maxk Qk
+ 8: end for k=2
+ ·
+ ·
+ ·
+ ·
+ ·
+ ·
+ (k)
+ 9: ŷ (k) ← arg max fO (yD ) k=
+ K
+ ·
+ +ϵ
+ ·
+ +ϵ
+ ·
+ +ϵ
+ (k) (k)
+10: q̂ ← fQ (yD ) ··· final answer
+
+11: end for ∗
+12: return ŷ (k ) , k ∗ = arg maxk q̂ (k) deep recursion step +ϵ Gaussian noise injection
+
+
+
+
+Figure 4: Left: PTRM inference procedure (the rec() function refers to a deep recursion step). Right:
+PTRM mechanism. (a) Standard TRM: a single deterministic rollout. (b) PTRM: K stochastic latent
+rollouts with Gaussian noise ϵ at each deep recursion step, with the Q head selecting the final answer.
+
+
+projected into the principal plane. Most rollouts (92%) remain stuck in the same bad basin, while
+a minority (8%) escape to a distinct region in latent space and produce correct answers. We also
+observe that recurrent noise creates a per-rollout probability of escape: at K = 5 no rollouts escape,
+at K = 25 one does, and at K = 100 eight do. This confirms that noise provides the stochasticity
+needed to occasionally find an escape trajectory.
+
+4.2 Width scaling
+
+Since more rollouts per puzzle compound the chance that at least one reaches a good basin, the
+number of rollouts K is a natural quantity to scale. Given K independent rollouts, pass@K (any
+rollout correct) is the oracle upper bound and best-Q@K (the rollout with highest q̂ is correct) is a
+metric available at inference without a correctness oracle. The choice of Q as selector is motivated by
+Sec. 3’s observation that Q accurately separates correct from incorrect trajectories (Figure 3).
+Figure 6 shows pass@K and best-Q@K as K grows, averaged over 3 seeds on the held-out PPBench
+validation set (sudoku, nurikabe, tapa, lightup, and heyawake). Both metrics rise from 76.4% at
+K = 1 to 89.5% at K = 100, a gain of 13 percentage points. Across all tested K, the gap between
+pass@K and best-Q@K stays under 1pp, making the Q head a strong verifier on this validation set.
+By contrast, mode@K (most frequent answer across rollouts) rises by only 1.3pp over the same
+range, showing that the width-scaling gains come mostly from the Q head’s ability to identify correct
+solutions even when they are rare.
+Interaction with depth scaling. Depth is another scaling axis already supported by TRM, which
+consists of running more deep recursions (supervision steps) at inference than the Nsup the model
+was trained on. On the deterministic baseline (K=1), tripling the depth from 16 to 48 steps raises
+PPBench validation accuracy from 76.4% to 79.5% (+3.1pp). At higher K, depth scaling only
+provides additional gains on specific puzzle types such as sudoku (+4pp at K = 100). Both depth
+and width scaling can be seen as ways to explore the model’s solution space. Since rollouts are
+independent and parallelizable while extra depth is sequential, width is the more practical scaling
+axis.
+PTRM unlocks a simple and task-agnostic recipe for scaling TRM test-time compute. The next
+section evaluates the method across multiple benchmarks and against several baselines, including
+frontier LLMs.
+
+5 Experiments
+This section evaluates PTRM’s performance on diverse reasoning benchmarks. We compare against
+the deterministic TRM baseline, a non-recursive direct-prediction baseline, and frontier LLMs.
+Across several PPBench puzzles [8], Sudoku-Extreme [2], Maze-Hard [2], and ARC-AGI 2 [4],
+PTRM substantially boosts the performance of each pretrained TRM using only inference compute.
+
+
+ 5
+ Correct (8)
+ 10 Incorrect (92)
+ Start
+ 8 End 92.5 pass@K
+ 90.0 best-Q@K
+ 6 mode@K
+ 87.5
+PC 2 (34% var)
+
+
+
+
+ PPBench accuracy (%)
+ 4 85.0
+
+ 2
+ 82.5
+ 80.0
+ 0
+ 77.5
+ 2 75.0
+ 72.5
+ 4 1 5 10 25 100
+ 2.5 0.0 2.5 5.0 7.5 10.0 12.5 Rollouts per puzzle K (log scale)
+ PC 1 (53% var)
+
+Figure 5: Stochastic rollouts escape bad Figure 6: Width scaling. pass@K, best-Q@K,
+basins. Principal plane projection of K = and mode@K as K grows, averaged over 3
+100 independent rollouts of the same failed seeds on a held-out PPBench validation set. The
+puzzle as in Figure 2 (right). 92 rollouts Q head is a strong verifier on the tested puzzles,
+remain caught in the bad basin (red). 8 consistently outperforming selection of the most
+escape to a good basin and produce correct frequent answer.
+answers (green).
+
+
+ 5.1 Setup
+
+Datasets. Pencil Puzzle Bench (PPBench) [8] consists of 62,231 constraint-satisfaction pencil puzzles
+(from 94 puzzle types). From the full PPBench dataset, 300 puzzles (15 puzzles from 20 types)
+selected by Waugh [8] are held out to form the golden set. From the remainder we hold out a
+fixed-size validation set of 100 puzzles per puzzle type (50 for tapa, due to its smaller base size),
+and the rest forms the training set. We filter all three sets to puzzles of six types (sudoku, lightup,
+nurikabe, shakashaka, heyawake, and tapa) of grid size 9×9 for sudoku, and 10×10 for the rest.
+We use the validation set to track performance during training and select the final checkpoint. We
+report per-puzzle accuracy on five of these types on the golden set (TRM already reaches 100% on
+shakashaka, so we omit it from the reported results), with aggregate scores sample-weighted across
+types. We also report results on the Sudoku-Extreme, Maze-Hard, and ARC-AGI 2 datasets.
+ Models and inference. For each benchmark we use a standard TRM checkpoint. For Sudoku-
+ Extreme we use the TRM-MLP variant (which the TRM paper showed to be stronger on Sudoku),
+ and for the other datasets, we use TRM-Att. PTRM inference uses K parallel rollouts each running
+ D supervision steps with Gaussian noise of scale σ added to the latent state at each supervision step.
+ The selected configuration (K, D, σ) varies by benchmark and is given alongside each result. Metrics
+ are averaged across three seeds.
+ Baselines. To isolate the contribution of PTRM’s stochastic rollouts from the underlying backbone,
+ we report standard TRM performance (the same checkpoint as PTRM ran deterministically). For
+ each dataset, we report the performance of frontier LLMs. For Sudoku-Extreme, Maze-Hard, and
+ ARC2 we additionally report the published direct prediction and TRM baselines from [1].
+ Cost estimation. PPBench provides the dollar cost per attempt for each LLM. We convert PTRM’s
+ wall-clock to a comparable dollar figure using a single H100 at $2.50/hr (standard cloud pricing [9])
+ so that cost = $2.50 · tpuzzle /3600, where tpuzzle is the time (in seconds) to complete a puzzle.
+
+
+ 5.2 Pencil Puzzle Bench
+
+ 5.2.1 Per-puzzle accuracy
+
+ Table 1 reports per-puzzle accuracy on the PPBench golden set. PTRM at K=100, D=48, σ=0.2
+ raises aggregate best-Q@K from 62.6% to 91.2%. Increasing supervision depth alone (K=1, D=48)
+ gives a small boost over the standard TRM baseline (K=1, D=16). Most of the gain comes
+ from scaling width (stochastic rollouts). The largest improvements are on puzzle types where
+
+
+ 6
+ the deterministic baseline performed the worst (most headroom): sudoku improves from 46.7% to
+97.8% and tapa from 40.0% to 80.0%.
+
+ % accuracy # Params sudoku lightup nurikabe heyawake tapa agg.
+ Direct prediction 27M 0.0 0.0 0.0 14.3 0.0 2.0
+ TRM (K=1, D=16) 7M 46.7 87.5 74.1 85.7 40.0 62.6
+ TRM (K=1, D=48) 7M 57.8 87.5 74.1 85.7 40.0 66.0
+ PTRM, best-Q@K (K=100, D=16) 7M 93.3 100 88.9 85.7 80.0 89.8
+ PTRM, best-Q@K (K=100, D=48) 7M 97.8 100 88.9 85.7 80.0 91.2
+Table 1: PPBench per-puzzle accuracy on the golden set. PTRM uses the same backbone as
+the deterministic TRM. Scaling depth alone (K=1, D=48) lifts aggregate accuracy by 3.4 points
+over the standard D=16 baseline. Combining depth with K=100 stochastic (σ=0.2) rollouts raises
+accuracy by 28.6 percentage points overall. The direct-prediction baseline is a larger transformer
+trained on the same data.
+
+
+
+5.2.2 Comparison with frontier LLMs on golden set
+PPBench reported per-puzzle results for several frontier LLMs using two strategies: 1) direct response
+from a single prompt, and 2) multi-turn agentic strategy with verification. We report results for direct
+and any (best of any strategy attempted, including agentic). The agentic strategy gives the LLM
+substantially more resources than PTRM has access to. It provides the LLM the ability to iteratively
+verify each move with a perfect verifier. The direct strategy is the fairer comparison since, while
+it may use the model provider’s reasoning harness, it does not have direct access to a multi-turn
+verifier (the LLM could still self-verify by writing verification code within the same response). We
+additionally observe that the agentic strategy was applied selectively in the published PPBench data:
+across the LLMs we compare against, only 9.6% of direct failures on the golden set were retried
+with agentic. We restrict the comparison to the 7 strongest LLMs that attempted every puzzle in our
+golden set: claude-opus-4-6@thinking, gpt-5.2@xhigh, gemini-3.1-pro, gpt-5.2@high,
+claude-sonnet-4-6@thinking, gpt-5.2@medium, and kimi-k2.5. Table 2 lists the top 3 in
+each strategy block.
+We additionally report an ensemble score formed from these 7 LLMs where a puzzle counts as solved
+if at least one of them solved it via any strategy. This ensemble setup is deliberately stacked against
+PTRM. It assumes a perfect verifier since, if any of the 7 LLMs produced a correct answer under
+any strategy, the ensemble counts it as solved, even though in practice we would not have access
+to an oracle verifier. Although it is not deployable, we include the ensemble to demonstrate that
+even under these heavily favorable conditions, frontier LLMs fall well short of PTRM. Ensemble
+cost-per-attempt averages over the attempts of all 7 models on each puzzle, and cost-per-correct
+divides total cost by the number of puzzles the ensemble solved.
+Table 2 reports the comparison. PTRM exceeds the strongest single LLM (direct strategy) by 57
+points aggregate (91.2% vs. 34.7%), and exceeds the LLM ensemble by 36 points (91.2% vs. 55.1%)
+despite the ensemble’s stacked advantages. Cost per attempt is several orders of magnitude higher for
+LLMs than PTRM.
+
+5.3 Sudoku-Extreme, Maze-Hard, and ARC-AGI-2
+
+For each benchmark we use the standard TRM checkpoint trained as described in [1] without
+modification (TRM-MLP for Sudoku-Extreme and TRM-Att for Maze-Hard and ARC-AGI-2).
+Table 3 summarizes results on all three.
+On Sudoku-Extreme, PTRM at K=100, D=64, σ=0.3 raises the deterministic baseline of 87.3% to
+99.06% pass@K and 98.75% best-Q@K, achieving state of the art.
+On Maze-Hard, PTRM at K=100, D=16, σ=1.0 reaches 95.63% pass@K, an 11.83 point gain
+over the 83.8% deterministic baseline. mode@K gives the best PTRM accuracy here at 86.73%
+(+2.93 points), with best-Q@K slightly behind at 85.17% (+1.37 points). While pass@K shows
+that PTRM is able to unlock several correct answers, the Q head identifies them less reliably than on
+the previous benchmarks.
+
+
+ 7
+ % accuracy sudoku lightup nurikabe heyawake tapa agg. $/att. $/corr.
+ Direct
+ gemini-3.1-pro 6.7 75.0 22.2 0.0 30.0 24.5 $0.40 $1.62
+ gpt-5.2@xhigh 20.0 50.0 0.0 0.0 50.0 24.5 $1.79 $7.29
+ claude-opus-4-6@thinking 0.0 87.5 44.4 0.0 60.0 34.7 $2.91 $8.40
+ Any strategy (direct or agentic)†
+ gemini-3.1-pro 6.7 87.5 33.3 0.0 40.0 30.6 $10.38 $33.91
+ gpt-5.2@xhigh 33.3 75.0 0.0 0.0 60.0 34.7 $3.09 $8.90
+ claude-opus-4-6@thinking 0.0 87.5 44.4 0.0 70.0 36.7 $4.38 $11.92
+ LLM ensemble†
+ Any strategy (direct or agentic) 46.7 100 44.4 0.0 80.0 55.1 $2.66 $38.51
+ Ours, trained from scratch, 7M parameters
+ PTRM, best-Q@K 97.8 100 88.9 85.7 80.0 91.2 $0.001 $0.001
+Table 2: PTRM vs. frontier LLMs on PPBench golden. Per-puzzle accuracy and per-attempt /
+per-correct cost on the golden set. LLM costs are from PPBench. PTRM cost is estimated from H100
+wall-clock (Sec. 5.1). The direct and agentic blocks list the 3 highest scoring LLMs on aggregate,
+and the ensemble row uses all 7 listed in Sec. 5.2.2. † Assumes access to a perfect verifier.
+
+
+
+
+On ARC-AGI-2, the standard inference pipeline applies data augmentations and votes across them.
+PTRM adds K stochastic rollouts per augmentation. For selection, we pick the rollout with the
+highest Q value within each augmentation, then vote across augmentations as in the standard pipeline.
+With K=25 and σ=0.2, PTRM lifts pass@1 from 7.36% to 8.47% and pass@100 from 14.31% to
+15.97% over our deterministic TRM baseline, while matching it at pass@2.
+
+
+ Sudoku-Extreme Maze-Hard ARC-AGI-2
+ Method # Params Acc. (%) Acc. (%) pass@1 pass@2 pass@100
+ HRM 27M 55.0 74.5 – 5.0 –
+ TRM 5M / 7M† 87.4 85.3 – 7.8 –
+ Ours
+ Standard TRM, our reproduction 5M / 7M† 87.28 83.80 7.36 9.72 14.31
+ PTRM 5M / 7M† 98.75 86.73 8.47 9.72 15.97
+Table 3: Sudoku-Extreme, Maze-Hard, and ARC-AGI-2 results. For Sudoku-Extreme, K=100,
+D=64, σ=0.3. For Maze-Hard, K=100, D=16, σ=1.0. For ARC-AGI-2, K=25, D=16, σ=0.2.
+pass@k for ARC-AGI-2 reports the top-k predictions from the augmentation-voting pipeline. PTRM
+shows an accuracy improvement over standard TRM across all 3 benchmarks. † Following [1], 5M
+for Sudoku-Extreme (TRM-MLP), 7M for Maze-Hard and ARC-AGI-2 (TRM-Att).
+
+
+5.4 Q head selection as σ grows
+
+With a higher σ value, PTRM finds many correct solutions that the deterministic inference misses.
+For instance, on Maze-Hard, the deterministic model solves 83.8% of puzzles, but PTRM raises
+pass@K to nearly 96%. The extent to which PTRM helps depends on the task, but on every dataset
+we tested, it unlocks correct solutions well beyond the deterministic model’s reach.
+TRM’s jointly trained Q head serves as a strong verifier on most tasks. On PPBench and Sudoku-
+Extreme, best-Q@K reaches values within a point of the saturated pass@K, so PTRM’s exploration
+translates directly into accuracy gains. On Maze-Hard, more exploration (higher σ) produces
+significantly more correct rollouts, but the existing Q head is not able to identify them, leaving
+performance on the table. The gap between best-Q@K and pass@K represents headroom for a
+stronger verifier which is left for future work. Appendix B reports the full σ sweep.
+
+
+ 8
+ 6 Related Work
+
+A long line of work explores recursive computation for iterative reasoning and representation re-
+finement. Early examples include Universal Transformers [10], Mixture-of-Recursions [11], Deep
+Thinking models [12, 13, 14], and HRM [2], all of which investigate the use of repeated computation
+steps to improve reasoning performance. More recent work has introduced methods to substantially
+accelerate TRM training [15], while TRM-style recursive architectures have also been extended to
+language modeling tasks [16].
+Building on this broader perspective of recursive computation, a growing body of work studies
+latent-space reasoning through the reuse of hidden states. Hao et al. [17] propose continuous
+“thinking tokens” derived from Chain-of-Thought (CoT) traces [18], which are autoregressively
+generated and appended to the model context, enabling reasoning directly in latent space without
+producing intermediate textual outputs. Similarly, Zhu et al. [19] formalize learning by superposition
+and demonstrate improvements on tasks such as graph reachability. By avoiding explicit token
+sampling and implicitly representing multiple reasoning trajectories, these approaches may mitigate
+the unfaithfulness and backtracking often observed in standard autoregressive reasoning [20, 21].
+Related to our work, Baek et al. [22] propose a generative version of TRM where the hidden state
+z is sampled instead of deterministic. This improves performance on multiple tasks, but requires
+retraining. Efstathiou and Balwani [23] (concurrent work) propose a similar test-time compute
+method where they only apply noise in the initial hidden state z, while we apply noise at every
+supervision step. Furthermore, they test their method on a small subset of the Sudoku-Extreme
+dataset, and treat it as a proof-of-concept that needs to be developed and tested further. Note that
+Baek et al. [22] also tested applying noise to the initial z with TRM and obtained negative results (no
+improvement in accuracy on two datasets).
+Our observations in Sec. 3 are consistent with the mechanistic analysis of Ren and Liu [5], who
+identify spurious fixed points in HRM’s latent dynamics on Sudoku-Extreme. Their method mitigates
+these attractors through a combination of task-specific training data augmentation, inference-time
+input perturbations, and model bootstrapping across training checkpoints, thereby effectively in-
+creasing test-time compute. However, these interventions are comparatively less general and less
+computationally efficient. In contrast, we observe analogous basin structure in TRM across multiple
+puzzle types and achieve attractor escape using a substantially simpler, task-agnostic mechanism:
+injecting Gaussian noise into the latent state at each supervision step while using a single deterministic
+checkpoint.
+
+
+7 Conclusion
+
+In this work, we introduced Probabilistic TRM (PTRM), a novel test-time scaling paradigm for
+Tiny Recursive Models (TRM) through parallel exploration and selection. This approach scales
+test-time compute using width (K parallel rollouts), yielding substantially larger gains than depth
+scaling (increasing deep recursion steps) alone. PTRM requires no retraining and does not rely on
+task-specific data augmentations making it extremely easy to use and versatile.
+By scaling both width and depth, PTRM obtains significant gains in accuracy when tested on a wide
+selection of puzzles. On PPBench (Sudoku, Lightup, Nurikabe, Heyawake, Tapa puzzles), PTRM
+nearly obtains twice the accuracy (91.2%; $0.001 cost) of ensemble of SOTA LLMs (55.1%; $38.51
+cost) at less than 0.0001x the cost. Furthermore, PTRM improves accuracy on Sudoku (from 87.4%
+to 98.75%), Maze-Hard (from 83.80% to 86.73%), and ARC-AGI (from 7.8% to 8.47% pass@1).
+Limitations. Our experiments focus on reasoning puzzles rather than general tasks. We only test
+on a subset of PPBench puzzles. We are limited to puzzles with a small grid-size due to limited
+computational resources. It is not guaranteed that the method works as well for all types of problems
+(e.g., accuracy gains on ARC-AGI-2 and Heyawake are smaller).
+Future work. It would be interesting to understand why some puzzles benefit from test-time scaling
+more than others. We suspect that problems that are harder to verify (e.g., ARC-AGI-2) benefit less
+from PTRM because the Q head may struggle to distinguish correct solutions from incorrect ones.
+Developing stronger verifiers than the existing Q head is an interesting direction for future work.
+
+
+ 9
+ References
+ [1] Alexia Jolicoeur-Martineau. Less is more: Recursive reasoning with tiny networks. arXiv
+ preprint arXiv:2510.04871, 2025.
+ [2] Guan Wang, Jin Li, Yuhao Sun, Xing Chen, Changling Liu, Yue Wu, Meng Lu, Sen Song, and
+ Yasin Abbasi Yadkori. Hierarchical reasoning model. arXiv preprint arXiv:2506.21734, 2025.
+ [3] François Chollet. On the measure of intelligence. arXiv preprint arXiv:1911.01547, 2019.
+ [4] Francois Chollet, Mike Knoop, Gregory Kamradt, Bryan Landers, and Henry Pinkard. Arc-
+ agi-2: A new challenge for frontier ai reasoning systems. arXiv preprint arXiv:2505.11831,
+ 2025.
+ [5] Zirui Ren and Ziming Liu. Are your reasoning models reasoning or guessing? a mechanistic
+ analysis of hierarchical reasoning models. arXiv preprint arXiv:2601.10679, 2026.
+ [6] Charlie Snell, Jaehoon Lee, Kelvin Xu, and Aviral Kumar. Scaling llm test-time compute opti-
+ mally can be more effective than scaling model parameters. arXiv preprint arXiv:2408.03314,
+ 2024.
+ [7] Alex Graves. Adaptive computation time for recurrent neural networks. arXiv preprint
+ arXiv:1603.08983, 2016.
+ [8] Justin Waugh. Pencil puzzle bench: A benchmark for multi-step verifiable reasoning. arXiv
+ preprint arXiv:2603.02119, 2026.
+ [9] Vast.ai. Rent h100 pcie gpus on vast.ai. https://vast.ai/pricing/gpu/H100-PCIE, 2026.
+ Accessed: 2026-05-01.
+[10] Mostafa Dehghani, Stephan Gouws, Oriol Vinyals, Jakob Uszkoreit, and Łukasz Kaiser. Uni-
+ versal transformers. arXiv preprint arXiv:1807.03819, 2018.
+[11] Sangmin Bae, Yujin Kim, Reza Bayat, Sungnyun Kim, Jiyoun Ha, Tal Schuster, Adam Fisch,
+ Hrayr Harutyunyan, Ziwei Ji, Aaron Courville, et al. Mixture-of-recursions: Learning dynamic
+ recursive depths for adaptive token-level computation. arXiv preprint arXiv:2507.10524, 2025.
+[12] Avi Schwarzschild, Eitan Borgnia, Arjun Gupta, Furong Huang, Uzi Vishkin, Micah Goldblum,
+ and Tom Goldstein. Can you learn an algorithm? generalizing from easy to hard problems with
+ recurrent networks. Advances in Neural Information Processing Systems, 34:6695–6706, 2021.
+[13] Arpit Bansal, Avi Schwarzschild, Eitan Borgnia, Zeyad Emam, Furong Huang, Micah Goldblum,
+ and Tom Goldstein. End-to-end algorithm synthesis with recurrent networks: Extrapolation
+ without overthinking. Advances in Neural Information Processing Systems, 35:20232–20242,
+ 2022.
+[14] Jay Bear, Adam Prugel-Bennett, and Jonathon Hare. Rethinking deep thinking: Stable learning
+ of algorithms using lipschitz constraints. Advances in Neural Information Processing Systems,
+ 37:97027–97052, 2024.
+[15] Navid Hakimi. Form follows function: Recursive stem model. arXiv preprint arXiv:2603.15641,
+ 2026.
+[16] Yinxi Li, Jiaao Chen, Fang Wu, Jiakai Yu, Heli Qi, Weihao Xuan, Haokai Zhao, Pengyu Nie,
+ Di Jin, and Xiangru Tang. Learning multi-step reasoning via persistent latent state propagation.
+ In Workshop on Latent {\&} Implicit Thinking {\textendash} Going Beyond CoT Reasoning,
+ 2026.
+[17] Shibo Hao, Sainbayar Sukhbaatar, DiJia Su, Xian Li, Zhiting Hu, Jason Weston, and Yuandong
+ Tian. Training large language models to reason in a continuous latent space. arXiv preprint
+ arXiv:2412.06769, 2024.
+[18] Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le,
+ Denny Zhou, et al. Chain-of-thought prompting elicits reasoning in large language models.
+ Advances in neural information processing systems, 35:24824–24837, 2022.
+
+
+ 10
+ [19] Hanlin Zhu, Shibo Hao, Zhiting Hu, Jiantao Jiao, Stuart Russell, and Yuandong Tian. Reasoning
+ by superposition: A theoretical perspective on chain of continuous thought. arXiv preprint
+ arXiv:2505.12514, 2025.
+[20] Tamera Lanham, Anna Chen, Ansh Radhakrishnan, Benoit Steiner, Carson Denison, Danny
+ Hernandez, Dustin Li, Esin Durmus, Evan Hubinger, Jackson Kernion, et al. Measuring
+ faithfulness in chain-of-thought reasoning. arXiv preprint arXiv:2307.13702, 2023.
+[21] Yanda Chen, Joe Benton, Ansh Radhakrishnan, Jonathan Uesato, Carson Denison, John Schul-
+ man, Arushi Somani, Peter Hase, Misha Wagner, Fabien Roger, et al. Reasoning models don’t
+ always say what they think. arXiv preprint arXiv:2505.05410, 2025.
+[22] Junyeob Baek, Mingyu Jo, Minsu Kim, Yoshua Bengio, and Sungjin Ahn. Generative recursive
+ reasoning models. ICLR 2026 Workshop on AI with Recursive Self-Improvement, 2026.
+[23] Andreas Efstathiou and Aishwarya Balwani. Recursive reasoning as attractor landscape search:
+ Mechanistic dynamics of the tiny recursive model. Workshop on Latent & Implicit Think-
+ ing – Going Beyond CoT Reasoning, 2026. URL https://openreview.net/forum?id=
+ kKps9W1K7n.
+
+
+
+
+ 11
+ A Implementation Details
+
+A.1 Compute
+
+We train and evaluate all models on a single NVIDIA H100 80GB GPU. PTRM introduces no
+additional training cost over standard TRM since it operates entirely at inference time.
+
+A.2 Models
+
+All experiments use the standard TRM backbone [1] with the released architecture and training recipes.
+Following the TRM paper, we use the MLP variant (TRM-MLP, 5M parameters) for Sudoku-Extreme
+and the attention variant (TRM-Att, 7M parameters) for Maze-Hard, ARC-AGI-2, and PPBench.
+Layout and hyperparameters are unchanged from TRM.
+
+A.3 PPBench dataset construction
+
+Sudoku-Extreme, Maze-Hard, and ARC-AGI-2 use the same checkpoints and data splits as TRM.
+The PPBench dataset is more recent and has previously been used only with frontier LLMs, so we
+detail how we built our training, validation, and golden splits.
+
+Source. PPBench contains 62,231 constraint-satisfaction pencil puzzles spanning 94 puzzle types.
+Of these, 300 puzzles (15 puzzles × 20 types) are held out as the golden benchmark set by Waugh [8].
+
+Filtering. From the remaining 61,931 puzzles we hold out a validation set by sampling 100 puzzles
+from each puzzle type (50 for tapa, due to its smaller base size), and the rest forms the training
+set. We then filter all three sets (training, validation, golden) to retain only puzzles of six types
+(sudoku, lightup, nurikabe, shakashaka, heyawake, tapa) at fixed grid sizes: 9×9 for sudoku
+and 10×10 for the others. Sudoku grids are padded with a pad token to 10×10, giving a uniform
+sequence length of seq_len = 100 across all six puzzle types. The deterministic TRM baseline
+reaches 100% accuracy on shakashaka, so we exclude it from per-puzzle accuracy reporting (no
+headroom to compare against PTRM).
+
+Augmentation. Each training puzzle is expanded into 10 examples using two augmentations: 1)
+trajectory sampling, where the input is set to a random intermediate solve state along the puzzle’s
+solution trajectory rather than always the empty initial grid, while the label is always the fully solved
+grid; and 2) dihedral transformation, where a random dihedral transformation of a square grid, among
+the 8 possibilities given by 4 rotations × 2 {identity, reflection}, is applied to both the input and the
+label. For each puzzle, the first example is the unaugmented (initial state, solved) pair. The remaining
+9 are randomly sampled (trajectory and dihedral transform). Validation and golden splits are not
+augmented.
+
+Resulting splits. The merged multi-type splits use a unified vocabulary of 294 tokens and seq_len =
+100. Per-type sample counts are reported in Table 4.
+
+ puzzle type train val golden
+ sudoku 7,810 97 15
+ lightup 9,504 65 8
+ nurikabe 15,180 55 9
+ heyawake 42,108 70 7
+ tapa 3,663 26 10
+ shakashaka∗ 20,702 62 12
+ total 98,967 375 61
+Table 4: Per-puzzle-type sample counts in the PPBench splits used in training and evaluation.
+∗
+ Shakashaka is included in training but excluded from per-puzzle accuracy reporting because deter-
+ministic TRM already solves all evaluated shakashaka puzzles.
+
+
+
+ 12
+ B Noise Ablation
+
+ We ablate the inference noise level σ on three benchmarks at K=25 (K=100 for Maze-Hard) and
+ D=16 to keep the sweep tractable. For Sudoku-Extreme we randomly sample 1000 puzzles from the
+ test set for the same reason. Figure 7 shows pass@K, best-Q@K, and mode@K as a function of σ,
+ averaged over three random seeds.
+
+ pass@K best-Q@K mode@K K = 1 baseline
+
+ Sudoku-Extreme Maze-Hard ARC-AGI-2 (within-aug)
+ 100
+ 96 5.5
+ 90
+ 94
+ 80 5.0
+ 92
+accuracy (%)
+
+
+
+
+ 70 90 4.5
+ 60 88
+ 50 86 4.0
+ 40 84
+ 3.5
+ 30 82
+ 0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0 0.0 0.2 0.4 0.6 0.8 1.0
+
+
+ Figure 7: pass@K, best-Q@K, and mode@K across σ per rollout batch. On every task,
+ increasing the inference noise consistently produces more correct rollouts (pass@K, blue) up to
+ a task-dependent σ value. The Q head (best-Q@K, orange) tracks the pass@K ceiling closely
+ on Sudoku-Extreme and leaves a larger gap on Maze-Hard and ARC-AGI-2. The shaded region
+ represents the verifier headroom (accuracy that a better verifier could extract). mode@K (green) has
+ the edge over the Q head only on Maze-Hard. For ARC-AGI-2, metrics are per puzzle/augmentation
+ to isolate the Q head’s verification abilities from the augmentation pipeline.
+
+ On Maze-Hard pass@K climbs from 83.8% (deterministic) to nearly 96% by σ≈1.0 and then
+ plateaus. On Sudoku-Extreme it is already near its ceiling at σ=0.1 and stays roughly flat across the
+ sweep. On ARC-AGI-2 it peaks near σ=0.6 before declining. Q head selection nearly matches the
+ ceiling (maximum pass@K) on Sudoku-Extreme while best-Q@K peaks at 98.5% (within a point of
+ pass@K’s peak of 99.3%). On the other hand, the gap between best-Q@K and maximum pass@K
+ is more pronounced on Maze-Hard and ARC-AGI-2 (headroom a stronger verifier could close).
+
+
+ C Q-guided Langevin sampling
+
+ We initially explored Langevin sampling (using the Q head gradient) as a more principled exploration
+ mechanism than the Gaussian noise injection used in PTRM. The idea is to better guide the stochastic
+ search by additionally steering each rollout (using the Q head gradient) toward regions of high Q
+ value. We ultimately found that the gain from this approach was entirely attributable to the Langevin
+ noise term, with the gradient component contributing nothing measurable on top of the equivalent
+ recurrent noise of Sec. 4. We document the approach here as a negative result.
+
+ Motivation. The Q head is trained as a correctness predictor over latent states. Let fQ (z) denote
+ the head’s scalar output. We treated E(z) = − log sigmoid(fQ (z)) as an energy function over latent
+ space. Empirical observations during early experiments suggested that regions of low E correspond
+ to good basins from which the decoded answer is likely correct. PCA visualizations of the latent
+ dynamics showed that ∇z fQ points toward the good-basin region from both good-basin (correct) and
+ bad-basin (incorrect) latents (Figure 8). This made ∇z fQ look like a valuable direction along which
+ to push latents.
+
+ Method. We sample from the target distribution p(z) ∝ e−E(z) = sigmoid(fQ (z)) via Langevin
+ dynamics where at the end of each deep recursion step t = 1, . . . , D we apply N Langevin steps to
+ the latent,
+ p
+ z ← z − η ∇z E(z) + 2η ξ, ξ ∼ N (0, I),
+ The number of Langevin steps N is the additional scaling axis under this scheme.
+
+
+ 13
+ t=0 t=5 t = 10 t = 15
+ Correct (21)
+ Incorrect (4)
+ Q
+
+
+
+
+Figure 8: y latents and their ∇z fQ gradients projected into the principal plane at several recur-
+sive/supervision steps, for multiple rollouts (using recurrent noise) of a single puzzle (correct rollouts
+in green, incorrect in red). Arrows are drawn at each latent in the direction of ∇z fQ . From both
+good-basin and bad-basin latents, gradients point toward the good-basin region. This visualization
+motivated the Langevin sampling experiment described below.
+
+
+Tractable gradient computation. TRM’s original Q head is a linear projection on a single token,
+fQ (y) = w⊤ y[:, 0]+b, so its gradient with respect to this head’s input is a constant vector independent
+of z. For ∇z fQ to be input-dependent, the gradient must flow back through the last latent recursion.
+This works but requires backpropagating through a full latent recursion at every Langevin step, which
+scales poorly with N . To make guidance tractable for large N , we replaced the linear Q head with
+an attention-pooled variant that reads the full latent and produces a scalar through a small nonlinear
+network. With this head, ∇z fQ can be computed by backpropagating through the head alone, which
+is ∼8× faster per step and does not sacrifice accuracy.
+
+The gain came from the noise,√ not the gradient. Comparing Langevin sampling against a noise-
+only ablation (with the same 2η ξ, but with the −η ∇z E(z) term zeroed out) produced essentially
+identical accuracy at matched N . The gradient component contributed nothing measurable on
+top of the equivalent recurrent noise. This prompted us to focus on the noise-only formulation in
+Sec. 4, which is much more impactful since it is: 1) significantly simpler (no retraining, no test-time
+backpropagation), 2) applicable to any TRM checkpoint out of the box, and 3) equally effective.
+
+D Per-puzzle accuracy on the PPBench validation set
+The main paper reports per-puzzle accuracy on the PPBench golden set (Table 1) for direct compara-
+bility with the LLM evaluations from Waugh [8] who used that set. For a lower-variance complement,
+Table 5 reports results on our validation set (313 puzzles across the five reported types vs. 49 for
+golden). Trends match the golden-set results: depth scaling alone (K=1, D=48) provides a small lift,
+and combining depth with stochastic rollouts (K=100, D=48, σ=0.2) raises aggregate best-Q@K
+from 76.4% to 90.4%, a 14.0 percentage-point improvement. The biggest gains again are on puzzles
+where the deterministic baseline has the most headroom (tapa ∼ 40% to 71.8%, sudoku ∼ 69%
+to 93.3%). Types where the baseline is already near ceiling (heyawake at 96.7%) increase only
+marginally.
+
+ % accuracy # Params sudoku lightup nurikabe heyawake tapa agg.
+ Direct prediction 27M 0.0 10.0 4.0 14.0 0.0 6.2
+ TRM (K=1, D=16) 7M 68.7 83.3 76.0 96.7 39.7 76.4
+ TRM (K=1, D=48) 7M 74.0 84.0 76.7 98.0 41.0 78.3
+ PTRM, best-Q@K (K=100, D=48) 7M 93.3 93.3 84.7 100 71.8 90.4
+Table 5: PPBench per-puzzle accuracy on the validation set. PTRM uses the same backbone as the
+deterministic TRM. Results on the larger validation set follow the same trends as on the golden set.
+
+
+
+
+ 14
+ \ No newline at end of file
diff --git a/papers/txt/trm2025_tiny_recursive.txt b/papers/txt/trm2025_tiny_recursive.txt
new file mode 100644
index 0000000..55cf994
--- /dev/null
+++ b/papers/txt/trm2025_tiny_recursive.txt
@@ -0,0 +1,796 @@
+ Less is More: Recursive Reasoning with Tiny Networks
+
+
+
+ Alexia Jolicoeur-Martineau
+ Samsung SAIL Montréal
+ alexia.j@samsung.com
+
+
+ Abstract
+arXiv:2510.04871v1 [cs.LG] 6 Oct 2025
+
+
+
+
+ Hierarchical Reasoning Model (HRM) is a
+ novel approach using two small neural net-
+ works recursing at different frequencies. This
+ biologically inspired method beats Large Lan-
+ guage models (LLMs) on hard puzzle tasks
+ such as Sudoku, Maze, and ARC-AGI while
+ trained with small models (27M parameters)
+ on small data (∼ 1000 examples). HRM holds
+ great promise for solving hard problems with
+ small networks, but it is not yet well un-
+ derstood and may be suboptimal. We pro-
+ pose Tiny Recursive Model (TRM), a much
+ simpler recursive reasoning approach that
+ achieves significantly higher generalization
+ than HRM, while using a single tiny network
+ with only 2 layers. With only 7M parameters,
+ TRM obtains 45% test-accuracy on ARC-AGI-
+ 1 and 8% on ARC-AGI-2, higher than most
+ LLMs (e.g., Deepseek R1, o3-mini, Gemini 2.5
+ Pro) with less than 0.01% of the parameters.
+
+
+
+ 1. Introduction
+ While powerful, Large Language models (LLMs) can
+ struggle on hard question-answer problems. Given
+ that they generate their answer auto-regressively, there
+ is a high risk of error since a single incorrect token can
+ render an answer invalid. To improve their reliabil- Figure 1. Tiny Recursion Model (TRM) recursively improves
+ ity, LLMs rely on Chain-of-thoughts (CoT) (Wei et al., its predicted answer y with a tiny network. It starts with the
+ 2022) and Test-Time Compute (TTC) (Snell et al., 2024). embedded input question x and initial embedded answer
+ y, and latent z. For up to Nsup = 16 improvements steps,
+ CoTs seek to emulate human reasoning by having the
+ it tries to improve its answer y. It does so by i) recursively
+ LLM to sample step-by-step reasoning traces prior to
+ updating n times its latent z given the question x, current
+ giving their answer. Doing so can improve accuracy, answer y, and current latent z (recursive reasoning), and
+ but CoT is expensive, requires high-quality reasoning then ii) updating its answer y given the current answer y
+ data (which may not be available), and can be brittle and current latent z. This recursive process allows the model
+ since the generated reasoning may be wrong. To fur- to progressively improve its answer (potentially address-
+ ther improve reliability, test-time compute can be used ing any errors from its previous answer) in an extremely
+ by reporting the most common answer out of K or the parameter-efficient manner while minimizing overfitting.
+ highest-reward answer (Snell et al., 2024).
+
+ 1
+ Recursive Reasoning with Tiny Networks
+
+However, this may not be enough. LLMs with CoT ers that achieves significantly higher generalization
+and TTC are not enough to beat every problem. While than HRM on a variety of problems. In doing so, we
+LLMs have made significant progress on ARC-AGI improve the state-of-the-art test accuracy on Sudoku-
+(Chollet, 2019) since 2019, human-level accuracy still Extreme from 55% to 87%, Maze-Hard from 75% to
+has not been reached (6 years later, as of writing of 85%, ARC-AGI-1 from 40% to 45%, and ARC-AGI-2
+this paper). Furthermore, LLMs struggle on the newer from 5% to 8%.
+ARC-AGI-2 (e.g., Gemini 2.5 Pro only obtains 4.9% test
+accuracy with a high amount of TTC) (Chollet et al., 2. Background
+2025; ARC Prize Foundation, 2025b).
+ HRM is described in Algorithm 2. We discuss the
+An alternative direction has recently been proposed by
+ details of the algorithm further below.
+Wang et al. (2025). They propose a new way forward
+through their novel Hierarchical Reasoning Model
+(HRM), which obtains high accuracy on puzzle tasks 2.1. Structure and goal
+where LLMs struggle to make a dent (e.g., Sudoku The focus of HRM is supervised learning. Given an
+solving, Maze pathfinding, and ARC-AGI). HRM is a input, produce an output. Both input and output are
+supervised learning model with two main novelties: 1) assumed to have shape [ B, L] (when the shape differs,
+recursive hierarchical reasoning, and 2) deep supervision. padding tokens can be added), where B is the batch-
+Recursive hierarchical reasoning consists of recurs- size and L is the context-length.
+ing multiple times through two small networks ( f L at HRM contains four learnable components: the in-
+high frequency and f H at low frequency) to predict the put embedding f I (·; θ I ), low-level recurrent network
+answer. Each network generates a different latent fea- f L (·; θ L ), high-level recurrent network f H (·; θ H ), and
+ture: f L outputs z H and f H outputs z L . Both features the output head fO (·; θO ). Once the input is embedded,
+(z L , z H ) are used as input to the two networks. The the shape becomes [ B, L, D ] where D is the embedding
+authors provide some biological arguments in favor of size. Each network is a 4-layer Transformers architec-
+recursing at different hierarchies based on the different ture (Vaswani et al., 2017), with RMSNorm (Zhang
+temporal frequencies at which the brains operate and & Sennrich, 2019), no bias (Chowdhery et al., 2023),
+hierarchical processing of sensory inputs. rotary embeddings (Su et al., 2024), and SwiGLU acti-
+Deep supervision consists of improving the answer vation function (Hendrycks & Gimpel, 2016; Shazeer,
+through multiple supervision steps while carrying the 2020).
+two latent features as initialization for the improve-
+ment steps (after detaching them from the computa- 2.2. Recursion at two different frequencies
+tional graph so that their gradients do not propagate). Given the hyperparameters used by Wang et al. (2025)
+This provide residual connections, which emulates (n = 2 f L steps, 1 f H steps; done T = 2 times), a
+very deep neural networks that are too memory ex- forward pass of HRM is done as follows:
+pensive to apply in one forward pass.
+An independent analysis on the ARC-AGI benchmark x ← f I ( x̃ )
+showed that deep supervision seems to be the primary z L ← f L (z L + z H + x ) # without gradients
+driver of the performance gains (ARC Prize Founda- z L ← f L (z L + z H + x ) # without gradients
+tion, 2025a). Using deep supervision doubled accuracy
+ z H ← f H (z L + z H ) # without gradients
+over single-step supervision (going from 19% to 39%
+accuracy), while recursive hierarchical reasoning only z L ← f L (z L + z H + x ) # without gradients
+slightly improved accuracy over a regular model with z L ← z L .detach()
+a single forward pass (going from 35.7% to 39.0% ac- z H ← z H .detach()
+curacy). This suggests that reasoning across different
+ z L ← f L (z L + z H + x ) # with gradients
+supervision steps is worth it, but the recursion done
+in each supervision step is not particularly important. z H ← f H (z L + z H ) # with gradients
+ ŷ ← argmax( fO (z H ))
+In this work, we show that the benefit from recursive
+reasoning can be massively improved, making it much
+more than incremental. We propose Tiny Recursive where ŷ is the predicted output answer, z L and z H are
+Model (TRM), an improved and simplified approach either initialized embeddings or the embeddings of
+using a much smaller tiny network with only 2 lay- the previous deep supervision step (after detaching
+ them from the computational graph). As can be seen,
+
+ 2
+ Recursive Reasoning with Tiny Networks
+
+def hrm(z, x, n=2, T=2): # hierarchical reasoning 2.4. Deep supervision
+ zH, zL = z
+ with torch.no grad(): To improve effective depth, deep supervision is used.
+ for i in range(nT − 2):
+ zL = L net(zL, zH, x) This consists of reusing the previous latent features
+ if (i + 1) % T == 0:
+ zH = H net(zH, zL)
+ (z H and z L ) as initialization for the next forward pass.
+ # 1−step grad This allows the model to reason over many iterations
+ zL = L net(zL, zH, x)
+ zH = H net(zH, zL) and improve its latent features (z L and z H ) until it
+ return (zH, zL), output head(zH), Q head(zH) (hopefully) converges to the correct solution. At most
+def ACT halt(q, y hat, y true): Nsup = 16 supervision steps are used.
+ target halt = (y hat == y true)
+ loss = 0.5∗binary cross entropy(q[0], target halt)
+ return loss 2.5. Adaptive computational time (ACT)
+def ACT continue(q, last step):
+ if last step: With deep supervision, each mini-batch of data sam-
+ target continue = sigmoid(q[0]) ples must be used for Nsup = 16 supervision steps
+ else:
+ target continue = sigmoid(max(q[0], q[1]))) before moving to the next mini-batch. This is expen-
+ loss = 0.5∗binary cross entropy(q[1], target continue)
+ return loss
+ sive, and there is a balance to be reached between
+ optimizing a few data examples for many supervision
+# Deep Supervision
+for x input, y true in train dataloader: steps versus optimizing many data examples with less
+ z = z init supervision steps. To reach a better balance, a halting
+ for step in range(N sup): # deep supervision
+ x = input embedding(x input) mechanism is incorporated to determine whether the
+ z, y pred, q = hrm(z, x)
+ loss = softmax cross entropy(y pred, y true)
+ model should terminate early. It is learned through
+ # Adaptive computational time (ACT) using Q−learning a Q-learning objective that requires passing the z H
+ loss += ACT halt(q, y pred, y true)
+ , , q next = hrm(z, x) # extra forward pass through an additional head and running an additional
+ loss += ACT continue(q next, step == N sup − 1) forward pass (to determine if halting now rather than
+ z = z.detach()
+ loss.backward() later would have been preferable). They call this
+ opt.step()
+ opt.zero grad()
+ method Adaptive computational time (ACT). It is only
+ if q[0] > q[1]: # early−stopping used during training, while the full Nsup = 16 super-
+ break
+ vision steps are done at test time to maximize down-
+ stream performance. ACT greatly diminishes the time
+Figure 2. Pseudocode of Hierarchical Reasoning Models spent per example (on average spending less than 2
+(HRMs). steps on the Sudoku-Extreme dataset rather than the
+ full Nsup = 16 steps), allowing more coverage of the
+a forward pass of HRM consists of applying 6 function dataset given a fixed number of training iterations.
+evaluations, where the first 4 function evaluations are
+detached from the computational graph and are not 2.6. Deep supervision and 1-step gradient
+back-propagated through. The authors uses n = 2 approximations replaces BPTT
+with T = 2 in all experiments, but HRM can be gener-
+alized by allowing for an arbitrary number of L steps Deep supervision and the 1-step gradient approxima-
+(n) and recursions (T) as shown in Algorithm 2. tion provide a more biologically plausible and less
+ computationally-expansive alternative to Backpropa-
+2.3. Fixed-point recursion with 1-step gradient gation Through Time (BPTT) (Werbos, 1974; Rumel-
+ approximation hart et al., 1985; LeCun, 1985) for solving the temporal
+ credit assignment (TCA) (Rumelhart et al., 1985; Wer-
+Assuming that (z L , z H ) reaches a fixed-point (z∗L , z∗H ) bos, 1988; Elman, 1990) problem (Lillicrap & Santoro,
+through recursing from both f L and f H , 2019). The implication is that HRM can learn what
+ would normally require an extremely large network
+ z∗L ≈ f L (z∗L + z H + x ) without having to back-propagate through its entire
+ z∗H ≈ f H (z L + z∗H ) , depth. Given the hyperparameters used by Jang et al.
+ (2023) in all their experiments, HRM effectively rea-
+the Implicit Function Theorem (Krantz & Parks, 2002)
+ sons over nlayers (n + 1) TNsup = 4 ∗ (2 + 1) ∗ 2 ∗ 16 =
+with the 1-step gradient approximation (Bai et al.,
+ 384 layers of effective depth.
+2019) is used to approximate the gradient by back-
+propagating only the last f L and f H steps. This theo-
+rem is used to justify only tracking the gradients of
+the last two steps (out of 6), which greatly reduces
+memory demands.
+
+ 3
+ Recursive Reasoning with Tiny Networks
+
+2.7. Summary of HRM different from the much smaller n = 2 and T = 2 used
+ in every experiment of their paper, we observe the
+HRM leverages recursion from two networks at dif-
+ following:
+ferent frequencies (high frequency versus low fre-
+quency) and deep supervision to learn to improve
+its answer over multiple supervision steps (with ACT 1. the residual for z H is clearly well above 0 at every
+to reduce time spent per data example). This enables step
+the model to imitate extremely large depth without
+requiring backpropagation through all layers. This
+approach obtains significantly higher performance on 2. the residual for z L only becomes closer to 0 after
+hard question-answer tasks that regular supervised many cycles, but it remains significantly above 0
+models struggle with. However, this method is quite
+complicated, relying a bit too heavily on uncertain
+biological arguments and fixed-point theorems that 3. z L is very far from converged after one f L evalu-
+are not guaranteed to be applicable. In the next sec- ation at T cycles, which is when the fixed-point
+tion, we discuss those issues and potential targets for is assumed to be reached and the 1-step gradient
+improvements in HRM. approximation is used
+
+3. Target for improvements in Hierarchical Thus, while the application of the IFT theorem and
+ Reasoning Models 1-step gradient approximation to HRM has some basis
+ since the residuals do generally reduce over time, a
+In this section, we identify key targets for improve-
+ fixed point is unlikely to be reached when the theorem
+ments in HRM, which will be addressed by our pro-
+ is actually applied.
+posed method, Tiny Recursion Models (TRMs).
+ In the next section, we show that we can bypass the
+3.1. Implicit Function Theorem (IFT) with 1-step need for the IFT theorem and 1-step gradient approxi-
+ gradient approximation mation, thus bypassing the issue entirely.
+
+HRM only back-propagates through the last 2 of the 6 3.2. Twice the forward passes with Adaptive
+recursions. The authors justify this by leveraging the computational time (ACT)
+Implicit Function Theorem (IFT) and one-step approx-
+imation, which states that when a recurrent function HRM uses Adaptive computational time (ACT) during
+converges to a fixed point, backpropagation can be training to optimize the time spent of each data sam-
+applied in a single step at that equilibrium point. ple. Without ACT, Nsup = 16 supervision steps would
+ be spent on the same data sample, which is highly in-
+There are concerns about applying this theorem to
+ efficient. They implement ACT through an additional
+HRM. Most importantly, there is no guarantee that
+ Q-learning objective, which decides when to halt and
+a fixed-point is reached. Deep equilibrium models
+ move to a new data sample rather than keep iterating
+normally do fixed-point iteration to solve for the fixed
+ on the same data. This allows much more efficient
+pointz∗ = f (z∗ ) (Bai et al., 2019). However, in the case
+ use of time especially since the average number of su-
+of HRM, it is not iterating to the fixed-point but simply
+ pervision steps during training is quite low with ACT
+doing forward passes of f L and f H . To make matters
+ (less than 2 steps on the Sudoku-Extreme dataset as
+worse, HRM is only doing 4 recursions before stopping
+ per their reported numbers).
+to apply the one-step approximation. After its first
+loop of two f L and 1 f H evaluations, it only apply a However, ACT comes at a cost. This cost is not directly
+single f L evaluation before assuming that a fixed-point shown in the HRM’s paper, but it is shown in their of-
+is reached for both z L and z H (z∗L = f L (z∗L + z H + x ) ficial code. The Q-learning objective relies on a halting
+and z∗H = f H (z∗L + z∗H )). Then, the one-step gradient loss and a continue loss. The continue loss requires an
+approximation is applied to both latent variables in extra forward pass through HRM (with all 6 function
+succession. evaluations). This means that while ACT optimizes
+ time more efficiently per sample, it requires 2 forward
+The authors justify that a fixed-point is reached by
+ passes per optimization step. The exact formulation is
+depicting an example with n = 7 and T = 7 where
+ shown in Algorithm 2.
+the forward residuals is reduced over time (Figure 3
+in Wang et al. (2025)). Even in this setting, which is In the next section, we show that we can bypass the
+ need for two forward passes in ACT.
+
+ 4
+ Recursive Reasoning with Tiny Networks
+
+3.3. Hierarchical interpretation based on complex of 2 passes). Our approach is described in Algorithm 3
+ biological arguments and illustrated in Figure 1. We also provide an ablation
+ in Table 1 on the Sudoku-Extreme dataset (a dataset
+The HRM’s authors justify the two latent variables
+ of difficult Sudokus with only 1K training examples,
+and two networks operating at different hierarchies
+ but 423K test examples). Below, we explain the key
+based on biological arguments, which are very far
+ components of TRMs.
+from artificial neural networks. They even try to match
+HRM to actual brain experiments on mice. While in-
+teresting, this sort of explanation makes it incredibly Table 1. Ablation of TRM on Sudoku-Extreme comparing %
+hard to parse out why HRM is designed the way it Test accuracy, effective depth per supervision step ( T (n +
+is. Given the lack of ablation table in their paper, the 1)nlayers ), number of Forward Passes (NFP) per optimization
+over-reliance on biological arguments and fixed-point step, and number of parameters
+theorems (that are not perfectly applicable), it is hard Method Acc (%) Depth NFP # Params
+to determine what parts of HRM is helping what and HRM 55.0 24 2 27M
+why. Furthermore, it is not clear why they use two TRM (T = 3, n = 6) 87.4 42 1 5M
+latent features rather than other combinations of fea- w/ ACT 86.1 42 2 5M
+tures. w/ separate f H , f L 82.4 42 1 10M
+ no EMA 79.9 42 1 5M
+In the next section, we show that the recursive process w/ 4-layers, n = 3 79.5 48 1 10M
+can be greatly simplified and understood in a much w/ self-attention 74.7 42 1 7M
+simpler manner that does not require any biological w/ T = 2, n = 2 73.7 12 1 5M
+argument, fixed-point theorem, hierarchical interpre- w/ 1-step gradient 56.5 42 1 5M
+tation, nor using two networks. It also explains why 2
+is the optimal number of features (z L and z H ).
+ 4.1. No fixed-point theorem required
+
+def latent recursion(x, y, z, n=6): HRM assumes that the recursions converge to a fixed-
+ for i in range(n): # latent reasoning point for both z L and z H in order to leverage the 1-step
+ z = net(x, y, z)
+ y = net(y, z) # refine output answer gradient approximation (Bai et al., 2019). This allows
+ return y, z
+ the authors to justify only back-propagating through
+def deep recursion(x, y, z, n=6, T=3): the last two function evaluations (1 f L and 1 f H ). To
+ # recursing T−1 times to improve y and z (no gradients needed)
+ with torch.no grad(): bypass this theoretical requirement, we define a full
+ for j in range(T−1): recursion process as containing n evaluations of f L
+ y, z = latent recursion(x, y, z, n)
+ # recursing once to improve y and z and 1 evaluation of f H :
+ y, z = latent recursion(x, y, z, n)
+ return (y.detach(), z.detach()), output head(y), Q head(y) z L ← f L (z L + z H + x )
+# Deep Supervision ...
+for x input, y true in train dataloader:
+ y, z = y init, z init z L ← f L (z L + z H + x )
+ for step in range(N supervision):
+ x = input embedding(x input) z H ← f H (z L + z H ) .
+ (y, z), y hat, q hat = deep recursion(x, y, z)
+ loss = softmax cross entropy(y hat, y true)
+ loss += binary cross entropy(q hat, (y hat == y true)) Then, we simply back-propagate through the full re-
+ loss.backward() cursion process.
+ opt.step()
+ opt.zero grad()
+ if q hat > 0: # early−stopping Through deep supervision, the models learns to take
+ break any (z L , z H ) and improve it through a full recursion
+ process, hopefully making z H closer to the solution.
+Figure 3. Pseudocode of Tiny Recursion Models (TRMs). This means that by the design of the deep supervi-
+ sion goal, running a few full recursion processes (even
+ without gradients) is expected to bring us closer to the
+4. Tiny Recursion Models solution. We propose to run T − 1 recursion processes
+ without gradient to improve (z L , z H ) before running
+In this section, we present Tiny Recursion Models
+ one recursion process with backpropagation.
+(TRMs). Contrary to HRM, TRM requires no com-
+plex mathematical theorem, hierarchy, nor biological Thus, instead of using the 1-step gradient approxi-
+arguments. It generalizes better while requiring only mation, we apply a full recursion process containing
+a single tiny network (instead of two medium-size net- n evaluations of f L and 1 evaluation of f H . This re-
+works) and a single forward pass for the ACT (instead moves entirely the need to assume that a fixed-point
+
+ 5
+ Recursive Reasoning with Tiny Networks
+
+is reached and the use of the IFT theorem with 1-step While this is intuitive, we wanted to verify whether
+gradient approximation. Yet, we can still leverage using more or less features could be helpful. Results
+multiple backpropagation-free recursion processes to are shown in Table 2.
+improve (z L , z H ). With this approach, we obtain a
+ More features (> 2): We tested splitting z into dif-
+massive boost in generalization on Sudoku-Extreme
+ ferent features by treating each of the n recursions as
+(improving TRM from 56.5% to 87.4%; see Table 1).
+ producing a different zi for i = 1, ..., n. Then, each
+ zi is carried across supervision steps. The approach
+4.2. Simpler reinterpretation of z H and z L is described in Algorithm 5. In doing so, we found
+HRM is interpreted as doing hierarchical reasoning performance to drop. This is expected because, as dis-
+over two latent features of different hierarchies due to cussed, there is no apparent need for splitting z into
+arguments from biology. However, one might wonder multiple parts. It does not have to be hierarchical.
+why use two latent features instead of 1, 3, or more? Single feature: Similarly, we tested the idea of taking
+And do we really need to justify these so-called ”hier- a single feature by only carrying z H across supervi-
+archical” features based on biology to make sense of sion steps. The approach is described in Algorithm 4.
+them? We propose a simple non-biological explana- In doing so, we found performance to drop. This is
+tion, which is more natural, and directly answers the expected because, as discussed, it forces the model to
+question of why there are 2 features. store the solution y within z.
+The fact of the matter is: z H is simply the current Thus, we explored using more or less latent variables
+(embedded) solution. The embedding is reversed by on Sudoku-Extreme, but found that having only y and
+applying the output head and rounding to the nearest z lead to better test accuracy in addition to being the
+token using the argmax operation. On the other hand, simplest more natural approach.
+z L is a latent feature that does not directly correspond
+to a solution, but it can be transformed into a solution
+by applying z H ← f H ( x, z L , z H ). We show an example
+on Sudoku-Extreme in Figure 6 to highlight the fact Table 2. TRM on Sudoku-Extreme comparing % Test accu-
+that z H does correspond to the solution, but z L does racy when using more or less latent features
+not. Method # of features Acc (%)
+ TRM y, z (Ours) 2 87.4
+Once this is understood, hierarchy is not needed; there TRM multi-scale z n+1 = 7 77.6
+is simply an input x, a proposed solution y (previously TRM single z 1 71.9
+called z H ), and a latent reasoning feature z (previously
+called z L ). Given the input question x, current solution
+y, and current latent reasoning z, the model recursively
+improves its latent z. Then, given the current latent z 4.3. Single network
+and the previous solution y, the model proposes a new HRM uses two networks, one applied frequently as a
+solution y (or stay at the current solution if its already low-level module f H and one applied rarely as an high-
+good). level module ( f H ). This requires twice the number of
+Although this has no direct influence on the algorithm, parameters compared to regular supervised learning
+this re-interpretation is much simpler and natural. It with a single network.
+answers the question about why two features: remem- As mentioned previously, while f L iterates on the la-
+bering in context the question x, previous reasoning tent reasoning feature z (z L in HRM), the goal of f H
+z, and previous answer y helps the model iterate on is to update the solution y (z H in HRM) given the la-
+the next reasoning z and then the next answer y. If tent reasoning and current solution. Importantly, since
+we were not passing the previous reasoning z, the z ← f L ( x + y + z) contains x but y ← f H (y + z) does
+model would forget how it got to the previous solu- not contains x, the task to achieve (iterating on z versus
+tion y (since z acts similarly as a chain-of-thought). If using z to update y) is directly specified by the inclu-
+we were not passing the previous solution y, then the sion or lack of x in the inputs. Thus, we considered
+model would forget what solution it had and would the possibility that both networks could be replaced
+be forced to store the solution y within z instead of by a single network doing both tasks. In doing so, we
+using it for latent reasoning. Thus, we need both y and obtain better generalization on Sudoku-Extreme (im-
+z separately, and there is no apparent reason why one proving TRM from 82.4% to 87.4%; see Table 1) while
+would need to split z into multiple features. reducing the number of parameters by half. It turns
+ out that a single network is enough.
+
+ 6
+ Recursive Reasoning with Tiny Networks
+
+4.4. Less is more 4.6. No additional forward pass needed with ACT
+We attempted to increase capacity by increasing the As previously mentioned, the implementation of ACT
+number of layers in order to scale the model. Sur- in HRM through Q-learning requires two forward
+prisingly, we found that adding layers decreased gen- passes, which slows down training. We propose a
+eralization due to overfitting. In doing the oppo- simple solution, which is to get rid of the continue loss
+site, decreasing the number of layers while scaling (from the Q-learning) and only learn a halting proba-
+the number of recursions (n) proportionally (to keep bility through a Binary-Cross-Entropy loss of having
+the amount of compute and emulated depth approxi- reached the correct solution. By removing the continue
+mately the same), we found that using 2 layers (instead loss, we remove the need for the expensive second for-
+of 4 layers) maximized generalization. In doing so, we ward pass, while still being able to determine when to
+obtain better generalization on Sudoku-Extreme (im- halt with relatively good accuracy. We found no sig-
+proving TRM from 79.5% to 87.4%; see Table 1) while nificant difference in generalization from this change
+reducing the number of parameters by half (again). (going from 86.1% to 87.4%; see Table 1).
+It is quite surprising that smaller networks are bet-
+ter, but 2 layers seems to be the optimal choice. Bai 4.7. Exponential Moving Average (EMA)
+& Melas-Kyriazi (2024) also observed optimal perfor- On small data (such as Sudoku-Extreme and Maze-
+mance for 2-layers in the context of deep equilibrium Hard), HRM tends to overfit quickly and then diverge.
+diffusion models; however, they had similar perfor- To reduce this problem and improves stability, we
+mance to the bigger networks, while we instead ob- integrate Exponential Moving Average (EMA) of the
+serve better performance with 2 layers. This may ap- weights, a common technique in GANs and diffusion
+pear unusual, as with modern neural networks, gener- models to improve stability (Brock et al., 2018; Song &
+alization tends to directly correlate with model sizes. Ermon, 2020). We find that it prevents sharp collapse
+However, when data is too scarce and model size is and leads to higher generalization (going from 79.9%
+large, there can be an overfitting penalty (Kaplan et al., to 87.4%; see Table 1).
+2020). This is likely an indication that there is too little
+data. Thus, using tiny networks with deep recursion 4.8. Optimal the number of recursions
+and deep supervision appears to allow us to bypass a
+lot of the overfitting. We experimented with different number of recursions
+ by varying T and n and found that T = 3, n = 3
+4.5. attention-free architecture for tasks with small (equivalent to 48 recursions) in HRM and T = 3, n = 6
+ fixed context length in TRM (equivalent to 42 recursions) to lead to optimal
+ generalization on Sudoku-Extreme. More recursions
+Self-attention is particularly good for long-context could be helpful for harder problems (we have not
+lengths when L ≫ D since it only requires a matrix of tested it, given our limited resources); however, in-
+[ D, 3D ] parameters, even though it can account for the creasing either T or n incurs massive slowdowns. We
+whole sequence. However, when focusing on tasks show results at different n and T for HRM and TRM
+where L ≤ D, a linear layer is cheap, requiring only a in Table 3. Note that TRM requires backpropagation
+matrix of [ L, L] parameters. Taking inspiration from through a full recursion process, thus increasing n too
+the MLP-Mixer (Tolstikhin et al., 2021), we can replace much leads to Out Of Memory (OOM) errors. How-
+the self-attention layer with a multilayer perceptron ever, this memory cost is well worth its price in gold.
+(MLP) applied on the sequence length. Using an MLP
+instead of self-attention, we obtain better generaliza- In the following section, we show our main results on
+tion on Sudoku-Extreme (improving from 74.7% to multiple datasets comparing HRM, TRM, and LLMs.
+87.4%; see Table 1). This worked well on Sudoku 9x9
+grids, given the small and fixed context length; how- 5. Results
+ever, we found this architecture to be suboptimal for
+tasks with large context length, such as Maze-Hard Following Wang et al. (2025), we test our approach
+and ARC-AGI (both using 30x30 grids). We show on the following datasets: Sudoku-Extreme (Wang
+results with and without self-attention for all experi- et al., 2025), Maze-Hard (Wang et al., 2025), ARC-AGI-
+ments. 1 (Chollet, 2019) and, ARC-AGI-2 (Chollet et al., 2025).
+ Results are presented in Tables 4 and 5. Hyperparame-
+ ters are detailed in Section 6. Datasets are discussed
+ below.
+
+
+ 7
+ Recursive Reasoning with Tiny Networks
+
+ ity of the MLP on large 30x30 grids). TRM with self-
+Table 3. % Test accuracy on Sudoku-Extreme dataset. HRM
+ attention obtains 85.3% accuracy on Maze-Hard, 44.6%
+versus TRM matched at a similar effective depth per super-
+vision step ( T (n + 1)nlayers )
+ accuracy on ARC-AGI-1, and 7.8% accuracy on ARC-
+ AGI-2 with 7M parameters. This is significantly higher
+ HRM TRM
+ than the 74.5%, 40.3%, and 5.0% obtained by HRM us-
+ n = k, 4 layers n = 2k, 2 layers
+ ing 4 times the number of parameters (27M).
+ k T Depth Acc (%) Depth Acc (%)
+ 1 1 9 46.4 7 63.2
+ 2 2 24 55.0 20 81.9 Table 4. % Test accuracy on Puzzle Benchmarks (Sudoku-
+ 3 3 48 61.6 42 87.4 Extreme and Maze-Hard)
+ 4 4 80 59.5 72 84.2 Method # Params Sudoku Maze
+ 6 3 84 62.3 78 OOM Chain-of-thought, pretrained
+ 3 6 96 58.8 84 85.8 Deepseek R1 671B 0.0 0.0
+ 6 6 168 57.5 156 OOM Claude 3.7 8K ? 0.0 0.0
+ O3-mini-high ? 0.0 0.0
+ Direct prediction, small-sample training
+Sudoku-Extreme consists of extremely difficult Su- Direct pred 27M 0.0 0.0
+doku puzzles (Dillion, 2025; Palm et al., 2018; Park, HRM 27M 55.0 74.5
+2018) (9x9 grid), for which only 1K training samples TRM-Att (Ours) 7M 74.7 85.3
+are used to test small-sample learning. Testing is done TRM-MLP (Ours) 5M/19M 1 87.4 0.0
+on 423K samples. Maze-Hard consists of 30x30 mazes
+generated by the procedure by Lehnert et al. (2024)
+whose shortest path is of length above 110; both the
+training set and test set include 1000 mazes. Table 5. % Test accuracy on ARC-AGI Benchmarks (2 tries)
+ Method # Params ARC-1 ARC-2
+ARC-AGI-1 and ARC-AGI-2 are geometric puzzles in- Chain-of-thought, pretrained
+volving monetary prizes. Each puzzle is designed to Deepseek R1 671B 15.8 1.3
+be easy for a human, yet hard for current AI models. Claude 3.7 16K ? 28.6 0.7
+Each puzzle task consists of 2-3 input–output demon- o3-mini-high ? 34.5 3.0
+stration pairs and 1-2 test inputs to be solved. The final Gemini 2.5 Pro 32K ? 37.0 4.9
+score is computed as the accuracy over all test inputs Grok-4-thinking 1.7T 66.7 16.0
+from two attempts to produce the correct output grid. Bespoke (Grok-4) 1.7T 79.6 29.4
+The maximum grid size is 30x30. ARC-AGI-1 con- Direct prediction, small-sample training
+tains 800 tasks, while ARC-AGI-2 contains 1120 tasks.
+ Direct pred 27M 21.0 0.0
+We also augment our data with the 160 tasks from
+ HRM 27M 40.3 5.0
+the closely related ConceptARC dataset (Moskvichev
+et al., 2023). We provide results on the public evalua- TRM-Att (Ours) 7M 44.6 7.8
+tion set for both ARC-AGI-1 and ARC-AGI-2. TRM-MLP (Ours) 19M 29.6 2.4
+
+While these datasets are small, heavy data-
+augmentation is used in order to improve gen- 6. Conclusion
+eralization. Sudoku-Extreme uses 1000 shuffling
+(done without breaking the Sudoku rules) augmenta- We propose Tiny Recursion Models (TRM), a simple
+tions per data example. Maze-Hard uses 8 dihedral recursive reasoning approach that achieves strong gen-
+transformations per data example. ARC-AGI uses eralization on hard tasks using a single tiny network
+1000 data augmentations (color permutation, dihedral- recursing on its latent reasoning feature and progres-
+group, and translations transformations) per data sively improving its final answer. Contrary to the
+example. The dihedral-group transformations consist Hierarchical Reasoning Model (HRM), TRM requires
+of random 90-degree rotations, horizontal/vertical no fixed-point theorem, no complex biological justi-
+flips, and reflections. fications, and no hierarchy. It significantly reduces
+ the number of parameters by halving the number of
+From the results, we see that TRM without self- layers and replacing the two networks with a single
+attention obtains the best generalization on Sudoku- tiny network. It also simplifies the halting process,
+Extreme (87.4% test accuracy). Meanwhile, TRM with removing the need for the extra forward pass. Over-
+self-attention generalizes better on the other tasks
+(probably due to inductive biases and the overcapac- 1 5M on Sudoku and 19M on Maze
+
+
+
+ 8
+ Recursive Reasoning with Tiny Networks
+
+all, TRM is much simpler than HRM, while achieving gan training for high fidelity natural image synthe-
+better generalization. sis. arXiv preprint arXiv:1809.11096, 2018.
+While our approach led to better generalization on 4 Chollet, F. On the measure of intelligence. arXiv
+benchmarks, every choice made is not guaranteed to preprint arXiv:1911.01547, 2019.
+be optimal on every dataset. For example, we found
+that replacing the self-attention with an MLP worked Chollet, F., Knoop, M., Kamradt, G., Landers, B.,
+extremely well on Sudoku-Extreme (improving test ac- and Pinkard, H. Arc-agi-2: A new challenge
+curacy by 10%), but poorly on other datasets. Different for frontier ai reasoning systems. arXiv preprint
+problem settings may require different architectures arXiv:2505.11831, 2025.
+or number of parameters. Scaling laws are needed
+to parametrize these networks optimally. Although Chowdhery, A., Narang, S., Devlin, J., Bosma, M.,
+we simplified and improved on deep recursion, the Mishra, G., Roberts, A., Barham, P., Chung, H. W.,
+question of why recursion helps so much compared Sutton, C., Gehrmann, S., et al. Palm: Scaling lan-
+to using a larger and deeper network remains to be guage modeling with pathways. Journal of Machine
+explained; we suspect it has to do with overfitting, but Learning Research, 24(240):1–113, 2023.
+we have no theory to back this explaination. Not all
+our ideas made the cut; we briefly discuss some of the Dillion, T. Tdoku: A fast sudoku solver and gener-
+failed ideas that we tried but did not work in Section 6. ator. https://t-dillon.github.io/tdoku/,
+Currently, recursive reasoning models such as HRM 2025.
+and TRM are supervised learning methods rather than
+ Elman, J. L. Finding structure in time. Cognitive science,
+generative models. This means that given an input
+ 14(2):179–211, 1990.
+question, they can only provide a single deterministic
+answer. In many settings, multiple answers exist for a Fedus, W., Zoph, B., and Shazeer, N. Switch transform-
+question. Thus, it would be interesting to extend TRM ers: Scaling to trillion parameter models with simple
+to generative tasks. and efficient sparsity. Journal of Machine Learning Re-
+ search, 23(120):1–39, 2022.
+Acknowledgements
+ Geng, Z. and Kolter, J. Z. Torchdeq: A library for deep
+Thank you Emy Gervais for your invaluable support equilibrium models. arXiv preprint arXiv:2310.18605,
+and extra push. This research was enabled in part 2023.
+by computing resources, software, and technical as-
+sistance provided by Mila and the Digital Research Hendrycks, D. and Gimpel, K. Gaussian error linear
+Alliance of Canada. units (gelus). arXiv preprint arXiv:1606.08415, 2016.
+
+ Jang, Y., Kim, D., and Ahn, S. Hierarchical graph
+References generation with k2-trees. In ICML 2023 Workshop on
+ARC Prize Foundation. The Hidden Drivers of HRM’s Structured Probabilistic Inference Generative Modeling,
+ Performance on ARC-AGI. https://arcprize. 2023.
+ org/blog/hrm-analysis, 2025a. [Online; ac-
+ Kaplan, J., McCandlish, S., Henighan, T., Brown, T. B.,
+ cessed 2025-09-15].
+ Chess, B., Child, R., Gray, S., Radford, A., Wu, J.,
+ARC Prize Foundation. ARC-AGI Leaderboard. and Amodei, D. Scaling laws for neural language
+ https://arcprize.org/leaderboard, 2025b. models. arXiv preprint arXiv:2001.08361, 2020.
+ [Online; accessed 2025-09-24].
+ Kingma, D. P. and Ba, J. Adam: A method for stochas-
+Bai, S., Kolter, J. Z., and Koltun, V. Deep equilibrium tic optimization. arXiv preprint arXiv:1412.6980,
+ models. Advances in neural information processing 2014.
+ systems, 32, 2019.
+ Krantz, S. G. and Parks, H. R. The implicit function
+Bai, X. and Melas-Kyriazi, L. Fixed point diffusion theorem: history, theory, and applications. Springer
+ models. In Proceedings of the IEEE/CVF Conference Science & Business Media, 2002.
+ on Computer Vision and Pattern Recognition, pp. 9430–
+ 9440, 2024. LeCun, Y. Une procedure d’apprentissage ponr reseau
+ a seuil asymetrique. Proceedings of cognitiva 85, pp.
+Brock, A., Donahue, J., and Simonyan, K. Large scale 599–604, 1985.
+
+ 9
+ Recursive Reasoning with Tiny Networks
+
+Lehnert, L., Sukhbaatar, S., Su, D., Zheng, Q., Mcvay, P., all-mlp architecture for vision. Advances in neural
+ Rabbat, M., and Tian, Y. Beyond a*: Better planning information processing systems, 34:24261–24272, 2021.
+ with transformers via search dynamics bootstrap-
+ ping. arXiv preprint arXiv:2402.14083, 2024. Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J.,
+ Jones, L., Gomez, A. N., Kaiser, Ł., and Polosukhin,
+Lillicrap, T. P. and Santoro, A. Backpropagation I. Attention is all you need. Advances in neural
+ through time and the brain. Current opinion in neuro- information processing systems, 30, 2017.
+ biology, 55:82–89, 2019.
+ Wang, G., Li, J., Sun, Y., Chen, X., Liu, C., Wu, Y.,
+Loshchilov, I. and Hutter, F. Decoupled weight decay Lu, M., Song, S., and Yadkori, Y. A. Hierarchical
+ regularization. arXiv preprint arXiv:1711.05101, 2017. reasoning model. arXiv preprint arXiv:2506.21734,
+ 2025.
+Moskvichev, A., Odouard, V. V., and Mitchell, M. The
+ conceptarc benchmark: Evaluating understanding Wei, J., Wang, X., Schuurmans, D., Bosma, M., Xia, F.,
+ and generalization in the arc domain. arXiv preprint Chi, E., Le, Q. V., Zhou, D., et al. Chain-of-thought
+ arXiv:2305.07141, 2023. prompting elicits reasoning in large language mod-
+ els. Advances in neural information processing systems,
+Palm, R., Paquet, U., and Winther, O. Recurrent re-
+ 35:24824–24837, 2022.
+ lational networks. Advances in neural information
+ processing systems, 31, 2018. Werbos, P. Beyond regression: New tools for predic-
+ tion and analysis in the behavioral sciences. PhD
+Park, K. Can convolutional neural networks
+ thesis, Committee on Applied Mathematics, Harvard
+ crack sudoku puzzles? https://github.com/
+ University, Cambridge, MA, 1974.
+ Kyubyong/sudoku, 2018.
+ Werbos, P. J. Generalization of backpropagation with
+Prieto, L., Barsbey, M., Mediano, P. A., and Birdal, T.
+ application to a recurrent gas market model. Neural
+ Grokking at the edge of numerical stability. arXiv
+ networks, 1(4):339–356, 1988.
+ preprint arXiv:2501.04697, 2025.
+ Zhang, B. and Sennrich, R. Root mean square layer
+Rumelhart, D. E., Hinton, G. E., and Williams, R. J.
+ normalization. Advances in Neural Information Pro-
+ Learning internal representations by error propaga-
+ cessing Systems, 32, 2019.
+ tion. Technical report, 1985.
+Shazeer, N. Glu variants improve transformer. arXiv
+ preprint arXiv:2002.05202, 2020.
+Shazeer, N., Mirhoseini, A., Maziarz, K., Davis, A., Le,
+ Q., Hinton, G., and Dean, J. Outrageously large neu-
+ ral networks: The sparsely-gated mixture-of-experts
+ layer. arXiv preprint arXiv:1701.06538, 2017.
+Snell, C., Lee, J., Xu, K., and Kumar, A. Scaling
+ llm test-time compute optimally can be more effec-
+ tive than scaling model parameters. arXiv preprint
+ arXiv:2408.03314, 2024.
+Song, Y. and Ermon, S. Improved techniques for train-
+ ing score-based generative models. Advances in
+ neural information processing systems, 33:12438–12448,
+ 2020.
+Su, J., Ahmed, M., Lu, Y., Pan, S., Bo, W., and Liu,
+ Y. Roformer: Enhanced transformer with rotary
+ position embedding. Neurocomputing, 568:127063,
+ 2024.
+Tolstikhin, I. O., Houlsby, N., Kolesnikov, A., Beyer,
+ L., Zhai, X., Unterthiner, T., Yung, J., Steiner, A.,
+ Keysers, D., Uszkoreit, J., et al. Mlp-mixer: An
+
+ 10
+ Recursive Reasoning with Tiny Networks
+
+Hyper-parameters and setup propagating through the whole n + 1 recursions makes
+ the most sense and works best.
+All models are trained with the AdamW opti-
+mizer(Loshchilov & Hutter, 2017; Kingma & Ba, 2014) We tried removing ACT with the option of stopping
+with β 1 = 0.9, β 2 = 0.95, small learning rate warm- when the solution is reached, but we found that gen-
+up (2K iterations), batch-size 768, hidden-size of 512, eralization dropped significantly. This can probably
+Nsup = 16 max supervision steps, and stable-max loss be attributed to the fact that the model is spending
+(Prieto et al., 2025) for improved stability. TRM uses an too much time on the same data samples rather than
+Exponential Moving Average (EMA) of 0.999. HRM focusing on learning on a wide range of data samples.
+uses n = 2, T = 2 with two 4-layers networks, while We tried weight tying the input embedding and out-
+we use n = 6, T = 3 with one 2-layer network. put head, but this was too constraining and led to a
+For Sudoku-Extreme and Maze-Hard, we train for 60k massive generalization drop.
+epochs with learning rate 1e-4 and weight decay 1.0. We tried using TorchDEQ (Geng & Kolter, 2023) to
+For ARC-AGI, we train for 100K epochs with learning replace the recursion steps by fixed-point iteration as
+rate 1e-4 (with 1e-2 learning rate for the embeddings) done by Deep Equilibrium Models (Bai et al., 2019).
+and weight decay 0.1. The numbers for Deepseek R1, This would provide a better justification for the 1-step
+Claude 3.7 8K, O3-mini-high, Direct prediction, and gradient approximation. However, this slowed down
+HRM from the Table 4 and 5 are taken from Wang et al. training due to the fixed-point iteration and led to
+(2025). Both HRM and TRM add an embedding of worse generalization. This highlights the fact that
+shape [0, 1, D ] on Sudoku-Extreme and Maze-Hard to converging to a fixed-point is not essential.
+the input. For ARC-AGI, each puzzle (containing 2-3
+training examples and 1-2 test examples) at each data-
+augmentation is given a specific embedding of shape
+[0, 1, D ] and, at test-time, the most common answer
+out of the 1000 data augmentations is given as answer.
+Experiments on Sudoku-Extreme were ran with 1 L40S
+with 40Gb of RAM for generally less than 36 hours.
+Experiments on Maze-Hard were ran with 4 L40S with
+40Gb of RAM for less than 24 hours. Experiments on
+ARC-AGI were ran for around 3 days with 4 H100
+with 80Gb of RAM.
+
+Ideas that failed
+In this section, we quickly mention a few ideas that
+did not work to prevent others from making the same
+mistake.
+We tried replacing the SwiGLU MLPs by SwiGLU
+Mixture-of-Experts (MoEs) (Shazeer et al., 2017; Fedus
+et al., 2022), but we found generalization to decrease
+massively. MoEs clearly add too much unnecessary
+capacity, just like increasing the number of layers does.
+Instead of back-propagating through the whole n + 1
+recursions, we tried a compromise between HRM 1-
+step gradient approximation, which back-propagates
+through the last 2 recursions. We did so by decou-
+pling n from the number of last recursions k that we
+back-propagate through. For example, while n = 6
+requires 7 steps with gradients in TRM, we can use
+gradients for only the k = 4 last steps. However, we
+found that this did not help generalization in any way,
+and it made the approach more complicated. Back-
+
+
+ 11
+ Recursive Reasoning with Tiny Networks
+
+Algorithms with different number of latent Example on Sudoku-Extreme
+features
+ 8 3 1
+ 9 6 8 7
+def latent recursion(x, z, n=6): 3 5
+ for i in range(n+1): # latent recursion
+ z = net(x, z) 6 8
+ return z 6 2
+def deep recursion(x, z, n=6, T=3):
+ 7 4 3
+ # recursing T−1 times to improve z (no gradients needed) 9 4
+ with torch.no grad():
+ for j in range(T−1):
+ 2 4 1
+ z = latent recursion(x, z, n) 6 2 5 7
+ # recursing once to improve z
+ z = latent recursion(x, z, n) Input x
+ return z.detach(), output head(y), Q head(y)
+ 5 2 6 7 9 4 8 3 1
+# Deep Supervision
+for x input, y true in train dataloader: 3 9 1 2 6 8 4 7 5
+ z = z init 4 8 7 3 1 5 2 9 6
+ for step in range(N supervision):
+ x = input embedding(x input)
+ 1 6 8 5 3 2 7 4 9
+ z, y hat, q hat = deep recursion(x, z) 9 3 5 4 7 6 1 8 2
+ loss = softmax cross entropy(y hat, y true) 7 4 2 9 8 1 5 6 3
+ loss += binary cross entropy(q hat, (y hat == y true))
+ z = z.detach() 8 7 3 1 5 9 6 2 4
+ loss.backward() 2 5 9 6 4 7 3 1 8
+ opt.step()
+ opt.zero grad() 6 1 4 8 5 3 9 5 7
+ if q[0] > 0: # early−stopping
+ break Output y
+ 5 2 6 7 9 4 8 3 1
+Figure 4. Pseudocode of TRM using a single-z with deep 3 9 1 2 6 8 4 7 5
+supervision training in PyTorch. 4 8 7 3 1 5 2 9 6
+ 1 6 8 5 3 2 7 4 9
+ 9 3 5 4 7 6 1 8 2
+ 7 4 2 9 8 1 5 6 3
+ 8 7 3 1 5 9 6 2 4
+def latent recursion(x, y, z, n=6):
+ for i in range(n): # latent recursion 2 5 9 6 4 7 3 1 8
+ z[i] = net(x, y, z[0], ... , z[n−1]) 6 1 4 8 5 3 9 5 7
+ y = net(y, z[0], ... , z[n−1]) # refine output answer
+ return y, z Tokenized z H (denoted y in TRM)
+def deep recursion(x, y, z, n=6, T=3): 5 5 4 9 4 6 3
+ # recursing T−1 times to improve y and z (no gradients needed)
+ with torch.no grad(): 4 3 1 4 6 5
+ for j in range(T−1): 4 8 4 3 6 6 4
+ y, z = latent recursion(x, y, z, n)
+ # recursing once to improve y and z 9 6 5 3 5 4
+ y, z = latent recursion(x, y, z, n) 3 5 4 3 5 4 4
+ return (y.detach(), z.detach()), output head(y), Q head(y)
+ 6 3 3 3 5 8 8
+# Deep Supervision 3 3 3 6 5 6 6 4
+for x input, y true in train dataloader: 7 5 6 3 3 6 6
+ y, z = y init, z init
+ for step in range(N supervision): 4 3 4 8 3 6 6 4
+ x = input embedding(x input)
+ (y, z), y hat, q hat = deep recursion(x, y, z) Tokenized z L (denoted z in TRM)
+ loss = softmax cross entropy(y hat, y true)
+ loss += binary cross entropy(q hat, (y hat == y true))
+ loss.backward()
+ opt.step()
+ Figure 6. This Sudoku-Extreme example shows an input, ex-
+ opt.zero grad() pected output, and the tokenized z H and z L (after reversing
+ if q[0] > 0: # early−stopping the embedding and using argmax) for a pretrained model.
+ break
+ This highlights the fact that z H corresponds to the predicted
+ response, while z L is a latent feature that cannot be decoded
+Figure 5. Pseudocode of TRM using multi-scale z with deep to a sensible output unless transformed into z H by f H .
+supervision training in PyTorch.
+
+
+
+
+ 12
+ \ No newline at end of file
diff --git a/plot_ckpt_evolution.py b/plot_ckpt_evolution.py
new file mode 100644
index 0000000..1bde7b4
--- /dev/null
+++ b/plot_ckpt_evolution.py
@@ -0,0 +1,105 @@
+"""Plot λ-spectrum and accuracy evolution across training checkpoints."""
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+
+CKPT_STEPS = [2604, 7812, 13020, 18228, 20832, 26040]
+CKPT_EPOCHS = [int(s * 20000 / 26041) for s in CKPT_STEPS] # actual epoch (rounded)
+ROOT = "/home/yurenh2/rrm/research/flossing/ckpt_evolution"
+OUT_DIR = "/home/yurenh2/rrm/research/flossing/plots_evolution"
+import os; os.makedirs(OUT_DIR, exist_ok=True)
+
+datas = {}
+for s in CKPT_STEPS:
+ d = np.load(f"{ROOT}/step_{s}.npz")
+ datas[s] = d
+
+# --- plot 1: λ_1 vs training step, split by success/failure ---
+fig, axes = plt.subplots(1, 2, figsize=(13, 5))
+
+ax = axes[0]
+lam1_s_mean, lam1_f_mean = [], []
+lam1_s_std, lam1_f_std = [], []
+mean_s, mean_f = [], []
+mean_s_std, mean_f_std = [], []
+accs = []
+for s in CKPT_STEPS:
+ d = datas[s]
+ succ = d["exact_correct"] > 0.5
+ L = d["lyap_spec"]
+ lam1_s_mean.append(L[succ,0].mean()); lam1_s_std.append(L[succ,0].std())
+ lam1_f_mean.append(L[~succ,0].mean()); lam1_f_std.append(L[~succ,0].std())
+ mean_s.append(L[succ].mean(axis=1).mean()); mean_s_std.append(L[succ].mean(axis=1).std())
+ mean_f.append(L[~succ].mean(axis=1).mean()); mean_f_std.append(L[~succ].mean(axis=1).std())
+ accs.append(succ.mean())
+
+x = CKPT_EPOCHS
+ax.errorbar(x, lam1_s_mean, yerr=lam1_s_std, fmt="C0-o", capsize=4, label=r"$\lambda_1$ success")
+ax.errorbar(x, lam1_f_mean, yerr=lam1_f_std, fmt="C3-o", capsize=4, label=r"$\lambda_1$ failure")
+ax.errorbar(x, mean_s, yerr=mean_s_std, fmt="C0--s", alpha=0.6, capsize=3, label=r"mean$_{1:8}$ success")
+ax.errorbar(x, mean_f, yerr=mean_f_std, fmt="C3--s", alpha=0.6, capsize=3, label=r"mean$_{1:8}$ failure")
+ax.axhline(0, color="k", ls=":", lw=0.6)
+ax.set_xlabel("training epoch")
+ax.set_ylabel(r"Lyapunov exponent (per inner cycle)")
+ax.set_title("λ evolution during training\n(error bars = within-group std)")
+ax.legend(loc="best", fontsize=9)
+ax.grid(alpha=0.3)
+
+ax2 = axes[1]
+gap = np.array(lam1_f_mean) - np.array(lam1_s_mean)
+ax2.plot(x, gap, "C2-o", label=r"$\lambda_1^{fail} - \lambda_1^{succ}$", lw=2)
+ax2.axhline(0, color="k", ls=":", lw=0.6)
+ax2b = ax2.twinx()
+ax2b.plot(x, accs, "C4-^", label="exact accuracy")
+ax2b.set_ylabel("test exact accuracy", color="C4")
+ax2b.tick_params(axis="y", labelcolor="C4")
+ax2.set_xlabel("training epoch")
+ax2.set_ylabel("λ gap (fail − succ)", color="C2")
+ax2.tick_params(axis="y", labelcolor="C2")
+ax2.set_title("succ/fail λ gap vs accuracy")
+ax2.legend(loc="upper left")
+ax2b.legend(loc="lower right")
+ax2.grid(alpha=0.3)
+
+fig.tight_layout()
+fig.savefig(f"{OUT_DIR}/lyap_vs_training.png", dpi=130)
+plt.close()
+
+# --- plot 2: per-checkpoint λ spectrum (all 8) shape ---
+fig, axes = plt.subplots(2, 3, figsize=(15, 7), sharey=True)
+for ax, s in zip(axes.flat, CKPT_STEPS):
+ d = datas[s]
+ succ = d["exact_correct"] > 0.5
+ L = d["lyap_spec"]
+ idx = np.arange(1, L.shape[1]+1)
+ mean_s_arr = L[succ].mean(0); std_s_arr = L[succ].std(0)
+ mean_f_arr = L[~succ].mean(0); std_f_arr = L[~succ].std(0)
+ ax.fill_between(idx, mean_s_arr-std_s_arr, mean_s_arr+std_s_arr, color="C0", alpha=0.2)
+ ax.plot(idx, mean_s_arr, "C0-o", label=f"succ n={succ.sum()}")
+ ax.fill_between(idx, mean_f_arr-std_f_arr, mean_f_arr+std_f_arr, color="C3", alpha=0.2)
+ ax.plot(idx, mean_f_arr, "C3-o", label=f"fail n={(~succ).sum()}")
+ ax.axhline(0, color="k", ls=":", lw=0.6)
+ ax.set_title(f"step_{s} (epoch≈{int(s*20000/26041)}, acc={succ.mean():.2%})")
+ ax.set_xlabel("λ index")
+ ax.legend(fontsize=8, loc="best")
+ ax.grid(alpha=0.3)
+axes[0,0].set_ylabel(r"$\lambda_i$")
+axes[1,0].set_ylabel(r"$\lambda_i$")
+fig.tight_layout()
+fig.savefig(f"{OUT_DIR}/spectrum_per_ckpt.png", dpi=130)
+plt.close()
+
+# --- summary table ---
+print(f"{'epoch':>6} {'step':>6} {'acc':>7} {'λ_1(s)':>9} {'λ_1(f)':>9} {'gap':>7} "
+ f"{'mean(s)':>9} {'mean(f)':>9} {'mean_gap':>9}")
+for s, ep in zip(CKPT_STEPS, CKPT_EPOCHS):
+ d = datas[s]
+ succ = d["exact_correct"] > 0.5
+ L = d["lyap_spec"]
+ l1s, l1f = L[succ,0].mean(), L[~succ,0].mean()
+ ms, mf = L[succ].mean(), L[~succ].mean()
+ print(f"{ep:>6} {s:>6} {succ.mean():>7.2%} {l1s:+9.3f} {l1f:+9.3f} {l1f-l1s:+7.3f} "
+ f"{ms:+9.3f} {mf:+9.3f} {mf-ms:+9.3f}")
+
+print(f"\nplots → {OUT_DIR}/")
diff --git a/plot_directional_lyap_perturb.py b/plot_directional_lyap_perturb.py
new file mode 100644
index 0000000..218c912
--- /dev/null
+++ b/plot_directional_lyap_perturb.py
@@ -0,0 +1,136 @@
+"""Plot directional finite-difference Lyapunov perturbation robustness."""
+from __future__ import annotations
+
+import argparse
+import csv
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+
+
+COLORS = {
+ "trm_baseline_best": "#334155",
+ "trm_multi4_best": "#0f766e",
+ "trm_multi4_final": "#dc2626",
+}
+MARKERS = {
+ "trm_baseline_best": "o",
+ "trm_multi4_best": "s",
+ "trm_multi4_final": "X",
+}
+
+
+def read_rows(paths: list[Path]) -> list[dict[str, str]]:
+ rows: list[dict[str, str]] = []
+ for path in paths:
+ with path.open() as f:
+ rows.extend(csv.DictReader(f))
+ return rows
+
+
+def f(row: dict[str, str], key: str) -> float:
+ return float(row[key])
+
+
+def write_combined(path: Path, rows: list[dict[str, str]]) -> None:
+ keys: list[str] = []
+ for row in rows:
+ for key in row:
+ if key not in keys:
+ keys.append(key)
+ with path.open("w", newline="") as out:
+ writer = csv.DictWriter(out, fieldnames=keys)
+ writer.writeheader()
+ writer.writerows(rows)
+
+
+def plot_metric_grid(rows: list[dict[str, str]], metric: str, ylabel: str, out: Path) -> None:
+ labels = sorted({r["label"] for r in rows})
+ afters = sorted({int(float(r["perturb_after"])) for r in rows})
+ fig, axes = plt.subplots(1, len(afters), figsize=(4.1 * len(afters), 4.25), sharey=True)
+ if len(afters) == 1:
+ axes = [axes]
+ for ax, after in zip(axes, afters):
+ for label in labels:
+ lr = [r for r in rows if r["label"] == label and int(float(r["perturb_after"])) == after]
+ lr.sort(key=lambda r: f(r, "sigma"))
+ ax.plot(
+ [f(r, "sigma") for r in lr],
+ [f(r, metric) for r in lr],
+ marker=MARKERS.get(label, "o"),
+ linewidth=2.0,
+ markersize=5,
+ color=COLORS.get(label),
+ label=label.replace("trm_", "").replace("_", " "),
+ )
+ ax.set_xscale("symlog", linthresh=1e-3)
+ ax.set_title(f"after {after}")
+ ax.set_xlabel("σ along selected direction")
+ ax.set_ylim(-0.02, 1.02)
+ ax.grid(alpha=0.23)
+ axes[0].set_ylabel(ylabel)
+ axes[-1].legend(frameon=False, fontsize=8, loc="best")
+ fig.suptitle(f"Finite-difference Lyapunov-direction perturbation: {ylabel}")
+ fig.tight_layout()
+ fig.savefig(out, dpi=220, bbox_inches="tight")
+ plt.close(fig)
+
+
+def plot_sigma_slice(rows: list[dict[str, str]], sigma: float, out: Path) -> None:
+ labels = sorted({r["label"] for r in rows})
+ metrics = [
+ ("retain_worst_on_clean_success", "Clean-success robust retention"),
+ ("rescue_best_on_clean_fail", "Clean-fail best-sign rescue"),
+ ]
+ fig, axes = plt.subplots(1, 2, figsize=(11.2, 4.6), sharex=True)
+ for ax, (metric, title) in zip(axes, metrics):
+ for label in labels:
+ lr = [
+ r
+ for r in rows
+ if r["label"] == label and abs(float(r["sigma"]) - sigma) <= max(1e-12, sigma * 1e-6)
+ ]
+ lr.sort(key=lambda r: int(float(r["perturb_after"])))
+ ax.plot(
+ [int(float(r["perturb_after"])) for r in lr],
+ [f(r, metric) for r in lr],
+ marker=MARKERS.get(label, "o"),
+ linewidth=2.1,
+ markersize=6,
+ color=COLORS.get(label),
+ label=label.replace("trm_", "").replace("_", " "),
+ )
+ ax.set_title(title)
+ ax.set_xlabel("Perturb after ACT step")
+ ax.set_ylim(-0.02, 1.02)
+ ax.grid(alpha=0.23)
+ axes[0].set_ylabel("Conditional probability")
+ axes[1].legend(frameon=False, loc="best")
+ fig.suptitle(f"Lyapunov-direction conditional behavior at σ={sigma:g}")
+ fig.tight_layout()
+ fig.savefig(out, dpi=220, bbox_inches="tight")
+ plt.close(fig)
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--summaries", nargs="+", required=True)
+ parser.add_argument("--out-dir", required=True)
+ parser.add_argument("--slice-sigma", type=float, default=0.03)
+ args = parser.parse_args()
+
+ out_dir = Path(args.out_dir)
+ out_dir.mkdir(parents=True, exist_ok=True)
+ rows = read_rows([Path(p) for p in args.summaries])
+ write_combined(out_dir / "directional_lyap_perturb_combined.csv", rows)
+ plot_metric_grid(rows, "mean_sign_exact", "Mean ± sign exact", out_dir / "directional_mean_sign_exact_grid.png")
+ plot_metric_grid(rows, "worst_sign_exact", "Worst-sign exact", out_dir / "directional_worst_sign_exact_grid.png")
+ plot_metric_grid(rows, "retain_worst_on_clean_success", "Clean-success worst-sign retention", out_dir / "directional_retention_worst_grid.png")
+ plot_metric_grid(rows, "rescue_best_on_clean_fail", "Clean-fail best-sign rescue", out_dir / "directional_rescue_best_grid.png")
+ plot_metric_grid(rows, "selected_growth_mean", "Selected direction finite-time growth", out_dir / "directional_selected_growth_grid.png")
+ plot_sigma_slice(rows, args.slice_sigma, out_dir / f"directional_retention_rescue_sigma{args.slice_sigma:g}.png")
+ print(f"wrote {out_dir}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/plot_initial_perturb_robustness.py b/plot_initial_perturb_robustness.py
new file mode 100644
index 0000000..0c8e8c8
--- /dev/null
+++ b/plot_initial_perturb_robustness.py
@@ -0,0 +1,123 @@
+"""Plot initial recurrent-state perturbation robustness curves."""
+from __future__ import annotations
+
+import argparse
+import csv
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+
+
+def read_rows(paths: list[Path]) -> list[dict[str, str]]:
+ rows: list[dict[str, str]] = []
+ for path in paths:
+ with path.open() as f:
+ rows.extend(csv.DictReader(f))
+ return rows
+
+
+def f(row: dict[str, str], key: str) -> float:
+ return float(row[key])
+
+
+def write_combined(path: Path, rows: list[dict[str, str]]) -> None:
+ keys: list[str] = []
+ for row in rows:
+ for key in row:
+ if key not in keys:
+ keys.append(key)
+ with path.open("w", newline="") as out:
+ writer = csv.DictWriter(out, fieldnames=keys)
+ writer.writeheader()
+ writer.writerows(rows)
+
+
+def plot_metric(rows: list[dict[str, str]], metric: str, ylabel: str, out: Path) -> None:
+ labels = []
+ for row in rows:
+ label = row["label"]
+ if label not in labels:
+ labels.append(label)
+
+ colors = {
+ "trm_baseline_best": "#334155",
+ "trm_multi4_best": "#0f766e",
+ "trm_multi4_final": "#dc2626",
+ }
+ markers = {
+ "trm_baseline_best": "o",
+ "trm_multi4_best": "s",
+ "trm_multi4_final": "X",
+ }
+
+ fig, ax = plt.subplots(figsize=(8.2, 5.0))
+ for label in labels:
+ lr = [r for r in rows if r["label"] == label]
+ lr.sort(key=lambda r: f(r, "sigma"))
+ xs = [f(r, "sigma") for r in lr]
+ ys = [f(r, metric) for r in lr]
+ ax.plot(
+ xs,
+ ys,
+ marker=markers.get(label, "o"),
+ linewidth=2.2,
+ markersize=6,
+ color=colors.get(label),
+ label=label.replace("trm_", "").replace("_", " "),
+ )
+ ax.set_xscale("symlog", linthresh=3e-5)
+ ax.set_xlabel("Initial recurrent-state perturbation σ")
+ ax.set_ylabel(ylabel)
+ ax.set_ylim(-0.02, 1.02)
+ ax.grid(alpha=0.24)
+ ax.legend(frameon=False, loc="best")
+ ax.set_title("TRM robustness to initial latent trajectory perturbations")
+ ax.text(
+ 0.0,
+ -0.20,
+ "Perturbation is applied once to z_H/z_L after reset, then the model unrolls deterministically. "
+ "Mean rollout exact is per-trajectory accuracy over K=8 perturbed rollouts.",
+ transform=ax.transAxes,
+ ha="left",
+ va="top",
+ fontsize=9.2,
+ color="#475569",
+ )
+ fig.tight_layout()
+ fig.savefig(out, dpi=220, bbox_inches="tight")
+ plt.close(fig)
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--summaries", nargs="+", required=True)
+ parser.add_argument("--out-dir", required=True)
+ args = parser.parse_args()
+
+ out_dir = Path(args.out_dir)
+ out_dir.mkdir(parents=True, exist_ok=True)
+ rows = read_rows([Path(p) for p in args.summaries])
+ write_combined(out_dir / "initial_perturb_robustness_combined.csv", rows)
+ plot_metric(
+ rows,
+ "mean_rollout_exact",
+ "Mean perturbed-rollout exact accuracy",
+ out_dir / "initial_perturb_robustness_mean_rollout_exact.png",
+ )
+ plot_metric(
+ rows,
+ "pass_at_k",
+ "Pass@K exact accuracy",
+ out_dir / "initial_perturb_robustness_pass_at_k.png",
+ )
+ plot_metric(
+ rows,
+ "all_k",
+ "All-K exact accuracy",
+ out_dir / "initial_perturb_robustness_all_k.png",
+ )
+ print(f"wrote {out_dir}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/plot_late_perturb_robustness.py b/plot_late_perturb_robustness.py
new file mode 100644
index 0000000..d28b3f0
--- /dev/null
+++ b/plot_late_perturb_robustness.py
@@ -0,0 +1,140 @@
+"""Plot late recurrent-state perturbation robustness curves."""
+from __future__ import annotations
+
+import argparse
+import csv
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+
+
+def read_rows(paths: list[Path]) -> list[dict[str, str]]:
+ rows: list[dict[str, str]] = []
+ for path in paths:
+ with path.open() as f:
+ rows.extend(csv.DictReader(f))
+ return rows
+
+
+def f(row: dict[str, str], key: str) -> float:
+ return float(row[key])
+
+
+def write_combined(path: Path, rows: list[dict[str, str]]) -> None:
+ keys: list[str] = []
+ for row in rows:
+ for key in row:
+ if key not in keys:
+ keys.append(key)
+ with path.open("w", newline="") as out:
+ writer = csv.DictWriter(out, fieldnames=keys)
+ writer.writeheader()
+ writer.writerows(rows)
+
+
+COLORS = {
+ "trm_baseline_best": "#334155",
+ "trm_multi4_best": "#0f766e",
+ "trm_multi4_final": "#dc2626",
+}
+MARKERS = {
+ "trm_baseline_best": "o",
+ "trm_multi4_best": "s",
+ "trm_multi4_final": "X",
+}
+
+
+def plot_metric_grid(rows: list[dict[str, str]], metric: str, ylabel: str, out: Path) -> None:
+ labels = sorted({r["label"] for r in rows})
+ afters = sorted({int(float(r["perturb_after"])) for r in rows})
+ fig, axes = plt.subplots(1, len(afters), figsize=(4.0 * len(afters), 4.2), sharey=True)
+ if len(afters) == 1:
+ axes = [axes]
+ for ax, after in zip(axes, afters):
+ for label in labels:
+ lr = [r for r in rows if r["label"] == label and int(float(r["perturb_after"])) == after]
+ lr.sort(key=lambda r: f(r, "sigma"))
+ xs = [f(r, "sigma") for r in lr]
+ ys = [f(r, metric) for r in lr]
+ ax.plot(
+ xs,
+ ys,
+ marker=MARKERS.get(label, "o"),
+ linewidth=2.0,
+ markersize=5,
+ color=COLORS.get(label),
+ label=label.replace("trm_", "").replace("_", " "),
+ )
+ ax.set_xscale("symlog", linthresh=1e-3)
+ ax.set_title(f"after {after}")
+ ax.set_xlabel("σ")
+ ax.grid(alpha=0.23)
+ ax.set_ylim(-0.02, 1.02)
+ axes[0].set_ylabel(ylabel)
+ axes[-1].legend(frameon=False, fontsize=8, loc="best")
+ fig.suptitle(f"Late-state perturbation robustness: {ylabel}")
+ fig.tight_layout()
+ fig.savefig(out, dpi=220, bbox_inches="tight")
+ plt.close(fig)
+
+
+def plot_sigma_slice(rows: list[dict[str, str]], sigma: float, out: Path) -> None:
+ labels = sorted({r["label"] for r in rows})
+ metrics = [
+ ("retain_mean_on_clean_success", "Clean-success retention"),
+ ("rescue_mean_on_clean_fail", "Clean-fail rescue"),
+ ]
+ fig, axes = plt.subplots(1, 2, figsize=(11.2, 4.6), sharex=True)
+ for ax, (metric, title) in zip(axes, metrics):
+ for label in labels:
+ lr = [
+ r
+ for r in rows
+ if r["label"] == label and abs(float(r["sigma"]) - sigma) <= max(1e-12, sigma * 1e-6)
+ ]
+ lr.sort(key=lambda r: int(float(r["perturb_after"])))
+ xs = [int(float(r["perturb_after"])) for r in lr]
+ ys = [f(r, metric) for r in lr]
+ ax.plot(
+ xs,
+ ys,
+ marker=MARKERS.get(label, "o"),
+ linewidth=2.1,
+ markersize=6,
+ color=COLORS.get(label),
+ label=label.replace("trm_", "").replace("_", " "),
+ )
+ ax.set_title(title)
+ ax.set_xlabel("Perturb after ACT step")
+ ax.set_ylim(-0.02, 1.02)
+ ax.grid(alpha=0.23)
+ axes[0].set_ylabel("Conditional probability")
+ axes[1].legend(frameon=False, loc="best")
+ fig.suptitle(f"Late perturbation conditional behavior at σ={sigma:g}")
+ fig.tight_layout()
+ fig.savefig(out, dpi=220, bbox_inches="tight")
+ plt.close(fig)
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--summaries", nargs="+", required=True)
+ parser.add_argument("--out-dir", required=True)
+ parser.add_argument("--slice-sigma", type=float, default=0.1)
+ args = parser.parse_args()
+
+ out_dir = Path(args.out_dir)
+ out_dir.mkdir(parents=True, exist_ok=True)
+ rows = read_rows([Path(p) for p in args.summaries])
+ write_combined(out_dir / "late_perturb_robustness_combined.csv", rows)
+ plot_metric_grid(rows, "mean_rollout_exact", "Mean perturbed-rollout exact", out_dir / "late_perturb_mean_rollout_exact_grid.png")
+ plot_metric_grid(rows, "pass_at_k", "Pass@K exact", out_dir / "late_perturb_pass_at_k_grid.png")
+ plot_metric_grid(rows, "all_k", "All-K exact", out_dir / "late_perturb_all_k_grid.png")
+ plot_metric_grid(rows, "retain_mean_on_clean_success", "Clean-success retention", out_dir / "late_perturb_retention_grid.png")
+ plot_metric_grid(rows, "rescue_mean_on_clean_fail", "Clean-fail rescue", out_dir / "late_perturb_rescue_grid.png")
+ plot_sigma_slice(rows, args.slice_sigma, out_dir / f"late_perturb_retention_rescue_sigma{args.slice_sigma:g}.png")
+ print(f"wrote {out_dir}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/plot_trm_chaos_trajectory.py b/plot_trm_chaos_trajectory.py
new file mode 100644
index 0000000..da909c1
--- /dev/null
+++ b/plot_trm_chaos_trajectory.py
@@ -0,0 +1,67 @@
+"""Plot TRM Lyapunov trajectory across training checkpoints."""
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+
+ROOT = "/home/yurenh2/rrm/research/flossing"
+CKPTS = [
+ (26041, 5000),
+ (52082, 10000),
+ (78123, 15000),
+ (104164, 20000),
+ (130205, 25000),
+ (156246, 30000),
+ (182287, 35000),
+ (208328, 40000),
+ (234369, 45000),
+ (260410, 50000),
+]
+
+acc_list, succ_lam, fail_lam, dlam = [], [], [], []
+for step, epoch in CKPTS:
+ d = np.load(f"{ROOT}/diag_trm_singleGPU_step{step}_512.npz")
+ succ = d["exact_correct"] > 0.5
+ lam_1 = d["lyap_spec"][:, 0]
+ acc_list.append(succ.mean())
+ succ_lam.append(lam_1[succ].mean())
+ fail_lam.append(lam_1[~succ].mean())
+ dlam.append(lam_1[~succ].mean() - lam_1[succ].mean())
+
+epochs = [e for _, e in CKPTS]
+
+fig, axes = plt.subplots(1, 3, figsize=(15, 4.5))
+
+ax = axes[0]
+ax.plot(epochs, acc_list, "ko-", lw=2, ms=8)
+ax.set_xlabel("epoch"); ax.set_ylabel("exact_acc on 512 test")
+ax.set_title("TRM accuracy vs training")
+ax.grid(alpha=0.3)
+for e, a in zip(epochs, acc_list):
+ ax.text(e, a + 0.01, f"{a:.3f}", ha="center", fontsize=9)
+
+ax = axes[1]
+ax.plot(epochs, succ_lam, "C2o-", lw=2, ms=8, label="succ λ_1")
+ax.plot(epochs, fail_lam, "C3o-", lw=2, ms=8, label="fail λ_1")
+ax.axhline(0, color="k", lw=0.5, ls=":")
+ax.set_xlabel("epoch"); ax.set_ylabel(r"$\lambda_1$ (top joint Lyap)")
+ax.set_title("TRM Lyapunov drift: succ → criticality, fail → chaos")
+ax.legend(); ax.grid(alpha=0.3)
+
+ax = axes[2]
+ax.plot(epochs, dlam, "C0o-", lw=2, ms=8)
+ax.set_xlabel("epoch"); ax.set_ylabel(r"$\Delta\lambda$ = fail - succ")
+ax.set_title("Discrimination gap (non-monotonic)")
+ax.grid(alpha=0.3)
+for e, d in zip(epochs, dlam):
+ ax.text(e, d + 0.003, f"{d:+.3f}", ha="center", fontsize=9)
+
+fig.suptitle("TRM chaos onset trajectory (single-GPU, epoch 5K-25K, 5 checkpoints)", fontsize=12)
+fig.tight_layout()
+out = f"{ROOT}/plots_trm_chaos_onset.png"
+fig.savefig(out, dpi=130)
+print(f"→ {out}")
+
+print(f"\n{'epoch':>6} {'acc':>7} {'succ_λ':>9} {'fail_λ':>9} {'Δλ':>8}")
+for e, a, sl, fl, dl in zip(epochs, acc_list, succ_lam, fail_lam, dlam):
+ print(f"{e:>6} {a:>7.3f} {sl:>+9.4f} {fl:>+9.4f} {dl:>+8.4f}")
diff --git a/plot_trm_lyap_hist.py b/plot_trm_lyap_hist.py
new file mode 100644
index 0000000..ffa14b2
--- /dev/null
+++ b/plot_trm_lyap_hist.py
@@ -0,0 +1,69 @@
+"""Per-sample λ_1 histogram: success vs failure on TRM checkpoints."""
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+def roc_auc_score(y_true, y_score):
+ """Simple AUROC: pair-counting."""
+ pos = y_score[y_true == 1]
+ neg = y_score[y_true == 0]
+ if len(pos) == 0 or len(neg) == 0:
+ return float("nan")
+ # Probability that pos > neg (with 0.5 for ties)
+ return float(np.mean(pos[:, None] > neg[None, :]) +
+ 0.5 * np.mean(pos[:, None] == neg[None, :]))
+
+ROOT = "/home/yurenh2/rrm/research/flossing"
+CKPTS = [
+ (26041, 5000),
+ (78123, 15000),
+ (130205, 25000),
+ (182287, 35000),
+ (234369, 45000),
+]
+
+fig, axes = plt.subplots(2, 3, figsize=(16, 8), sharex=True)
+axes = axes.flatten()
+
+bins = np.linspace(-0.2, 0.3, 60)
+
+for ax, (step, epoch) in zip(axes, CKPTS):
+ d = np.load(f"{ROOT}/diag_trm_singleGPU_step{step}_512.npz")
+ lam1 = d["lyap_spec"][:, 0]
+ succ = d["exact_correct"] > 0.5
+
+ # AUROC: predict fail (label=1) from λ_1
+ fail_label = (~succ).astype(int)
+ auroc = roc_auc_score(fail_label, lam1) if fail_label.sum() > 0 and (1 - fail_label).sum() > 0 else float("nan")
+
+ ax.hist(lam1[succ], bins=bins, alpha=0.55, color="C2", density=False,
+ label=f"succ (n={succ.sum()}, μ={lam1[succ].mean():+.3f})")
+ ax.hist(lam1[~succ], bins=bins, alpha=0.55, color="C3", density=False,
+ label=f"fail (n={(~succ).sum()}, μ={lam1[~succ].mean():+.3f})")
+ ax.axvline(0, color="k", lw=0.5, ls=":")
+ ax.set_title(f"epoch {epoch:>5} | acc={succ.mean():.3f} AUROC={auroc:.3f}")
+ ax.set_xlabel(r"$\lambda_1$")
+ ax.set_ylabel("count")
+ ax.legend(fontsize=8, loc="upper left")
+ ax.grid(alpha=0.3)
+
+# Hide last subplot
+axes[-1].axis("off")
+
+fig.suptitle("TRM per-sample $\\lambda_1$ distribution: success (green) vs failure (red)\n"
+ "across training checkpoints. As acc rises, both distributions drift right; gap stays large.",
+ fontsize=11)
+fig.tight_layout()
+out = f"{ROOT}/plots_trm_lyap_hist.png"
+fig.savefig(out, dpi=130)
+print(f"→ {out}")
+
+# Print AUROC summary
+print(f"\n{'epoch':>6} {'acc':>7} {'AUROC':>7}")
+for step, epoch in CKPTS:
+ d = np.load(f"{ROOT}/diag_trm_singleGPU_step{step}_512.npz")
+ lam1 = d["lyap_spec"][:, 0]
+ succ = d["exact_correct"] > 0.5
+ fail_label = (~succ).astype(int)
+ auroc = roc_auc_score(fail_label, lam1) if fail_label.sum() > 0 else float("nan")
+ print(f"{epoch:>6} {succ.mean():>7.3f} {auroc:>7.3f}")
diff --git a/problem_tracks/hrm_learning_events.csv b/problem_tracks/hrm_learning_events.csv
new file mode 100644
index 0000000..b136eff
--- /dev/null
+++ b/problem_tracks/hrm_learning_events.csv
@@ -0,0 +1,37 @@
+kind,from_step,to_step,from_epoch,to_epoch,event,n,from_token_acc_mean,to_token_acc_mean,lambda_max_before,lambda_max_after,lambda_max_delta,mean8_before,mean8_after,mean8_delta,tail_mean_5_8_before,tail_mean_5_8_after,tail_mean_5_8_delta,positive_count_before,positive_count_after,positive_count_delta,positive_sum_before,positive_sum_after,positive_sum_delta
+HRM,2604,5208,2000,4000,learned_fail_to_success,18,0.7578875223795573,1.0,-0.07657270464632246,-0.17219636092583337,-0.09562365627951092,-0.11285677997188436,-0.21286184340715408,-0.10000506343526973,-0.12777486008902392,-0.23019556059605545,-0.10242070050703155,0.0,0.0,0.0,0.0,0.0,0.0
+HRM,2604,5208,2000,4000,lost_success_to_fail,1,1.0,0.9876543283462524,-0.13450603187084198,-0.07122424244880676,0.06328178942203522,-0.22800411842763424,-0.1996746975928545,0.02832942083477974,-0.2574129141867161,-0.25476812198758125,0.0026447921991348267,0.0,0.0,0.0,0.0,0.0,0.0
+HRM,2604,5208,2000,4000,stayed_failure,486,0.6186302904606846,0.650408982494731,-0.09566140517500447,-0.12388100091046021,-0.028219595735455738,-0.11546308205869248,-0.16834527142863306,-0.05288218936994059,-0.12404842582366471,-0.18757295895199227,-0.06352453312832755,0.00205761316872428,0.0,-0.00205761316872428,7.382040621076592e-05,0.0,-7.382040621076592e-05
+HRM,2604,5208,2000,4000,stayed_success,7,1.0,1.0,-0.11943858010428292,-0.2040215368781771,-0.08458295677389417,-0.1954429488895195,-0.2456168457865715,-0.05017389689705202,-0.23143203716192925,-0.26664925685950686,-0.035217219697577615,0.0,0.0,0.0,0.0,0.0,0.0
+HRM,5208,7812,4000,6000,learned_fail_to_success,61,0.7771706102324314,1.0,-0.12015377101106722,-0.19384214617922657,-0.07368837516815936,-0.16908120575406757,-0.25768693831947737,-0.08860573256540982,-0.19075811123017405,-0.28217270231393515,-0.09141459108376113,0.0,0.0,0.0,0.0,0.0,0.0
+HRM,5208,7812,4000,6000,lost_success_to_fail,8,1.0,0.7608024701476097,-0.17022934928536415,-0.08782250410877168,0.08240684517659247,-0.20539564150385559,-0.15151164782582782,0.053883993678027764,-0.22217216342687607,-0.1734354724176228,0.048736691009253263,0.0,0.0,0.0,0.0,0.0,0.0
+HRM,5208,7812,4000,6000,stayed_failure,426,0.6330493253441484,0.6487857195693003,-0.12429110481985298,-0.09598997456555587,0.028301130254297112,-0.16831343441528263,-0.1516454847050923,0.016667949710190356,-0.1872746041962798,-0.17530284127211768,0.011971762924162155,0.0,0.028169014084507043,0.028169014084507043,0.0,0.0003333682517636952,0.0003333682517636952
+HRM,5208,7812,4000,6000,stayed_success,17,1.0,1.0,-0.18622649767819574,-0.2054964128662558,-0.019269915188060087,-0.22986270410611348,-0.2803904714391512,-0.05052776733303771,-0.2489816224312081,-0.31024084481246333,-0.0612592223812552,0.0,0.0,0.0,0.0,0.0,0.0
+HRM,7812,10416,6000,8000,learned_fail_to_success,37,0.7113780484006211,1.0,-0.08760806538693204,-0.16297455137041775,-0.0753664859834857,-0.14557246566589646,-0.2145561670343316,-0.06898370136843517,-0.16746616232636813,-0.23543579715329246,-0.06796963482692435,0.08108108108108109,0.0,-0.08108108108108109,0.0010173672847953196,0.0,-0.0010173672847953196
+HRM,7812,10416,6000,8000,lost_success_to_fail,23,1.0,0.7423510473707448,-0.1990003112865531,-0.0631436346017796,0.1358566766847735,-0.25483094001917733,-0.1147064414664941,0.14012449855268325,-0.2755120544005995,-0.1364581169479567,0.1390539374526428,0.0,0.08695652173913043,0.08695652173913043,0.0,0.0012514755615721579,0.0012514755615721579
+HRM,7812,10416,6000,8000,stayed_failure,397,0.6452094420828207,0.6558447610821472,-0.09660657626821281,-0.057608067364460065,0.03899850890375275,-0.152208786995813,-0.11895045726565188,0.033258329730161136,-0.17599558225488032,-0.14325271154312835,0.03274287071175198,0.022670025188916875,0.21158690176322417,0.1889168765743073,0.00026290248290656757,0.005988442640891181,0.0057255401579846136
+HRM,7812,10416,6000,8000,stayed_success,55,1.0,1.0,-0.19528732320124453,-0.1790378307754343,0.016249492425810207,-0.2658987205729566,-0.23670350800861012,0.029195212564346464,-0.293633671849966,-0.2604374277320775,0.03319624411788854,0.0,0.0,0.0,0.0,0.0,0.0
+HRM,10416,13020,8000,10000,learned_fail_to_success,89,0.7322790950871585,1.0,-0.04101799929441277,-0.15271086389528524,-0.11169286460087248,-0.09962495407328308,-0.1932698722248256,-0.09364491815154252,-0.12410837905795387,-0.2099194038408191,-0.08581102478286523,0.24719101123595505,0.0,-0.24719101123595505,0.006849302283718429,0.0,-0.006849302283718429
+HRM,10416,13020,8000,10000,lost_success_to_fail,27,1.0,0.7261088269728201,-0.17367485462239496,0.021513728808646125,0.19518858343104106,-0.21645684844676266,-0.03794527022020904,0.17851157822655364,-0.23550193473972655,-0.06345155982503288,0.1720503749146937,0.0,1.5925925925925926,1.5925925925925926,0.0,0.05347730467932437,0.05347730467932437
+HRM,10416,13020,8000,10000,stayed_failure,331,0.6413039419226055,0.646301892949015,-0.062453487620328825,-0.012852591411288687,0.04960089620904014,-0.12385183920142286,-0.06788353043882003,0.05596830876260283,-0.14792814935428117,-0.0904394651569504,0.05748868419733076,0.1933534743202417,0.8580060422960725,0.6646525679758308,0.00542781801540489,0.02899685861604139,0.023569040600636502
+HRM,10416,13020,8000,10000,stayed_success,65,1.0,1.0,-0.1721218156699951,-0.17306380122900009,-0.0009419855590050037,-0.23250671096432668,-0.21392998699671947,0.018576723967607206,-0.2565635504917457,-0.2321118678037937,0.02445168268795197,0.0,0.0,0.0,0.0,0.0,0.0
+HRM,13020,15624,10000,12000,learned_fail_to_success,70,0.7045855411461421,1.0,0.006014376534481666,-0.16225147018475192,-0.16826584671923359,-0.050570006326675604,-0.21387550181825646,-0.16330549549158085,-0.07363272376174532,-0.23519662864107108,-0.16156390487932576,1.2714285714285714,0.04285714285714286,-1.2285714285714286,0.05178038399067841,0.0010190683816160475,-0.05076131560906236
+HRM,13020,15624,10000,12000,lost_success_to_fail,53,1.0,0.7137200095743503,-0.15761817691909186,0.027278018394871702,0.18489619531396354,-0.1940195961591221,-0.025757582971462672,0.16826201318765943,-0.20938205160200596,-0.04790710619533268,0.1614749454066733,0.0,1.9811320754716981,1.9811320754716981,0.0,0.06513750078134625,0.06513750078134625
+HRM,13020,15624,10000,12000,stayed_failure,288,0.6396176285213895,0.650462964653141,-0.01421647026637787,0.01285882030697394,0.02707529057335181,-0.06928496676502702,-0.0433290175348999,0.025955949230127118,-0.09199432089897325,-0.06722141937012162,0.024772901528851636,0.8263888888888888,1.4201388888888888,0.59375,0.025754238017027673,0.05246546392016979,0.026711225903142122
+HRM,13020,15624,10000,12000,stayed_success,101,1.0,1.0,-0.1632341642559755,-0.18694999514350502,-0.02371583088752952,-0.20617256620161162,-0.23764922537283031,-0.031476659171218696,-0.22448365954626906,-0.2576010467226405,-0.03311738717637145,0.0,0.009900990099009901,0.009900990099009901,0.0,0.0006469214464178179,0.0006469214464178179
+HRM,15624,18228,12000,14000,learned_fail_to_success,103,0.7352271398294319,1.0,0.030490925259214544,-0.07534685567048496,-0.1058377809296995,-0.023994025667350587,-0.13732833610717615,-0.11333431043982556,-0.04735410741648469,-0.161848950434874,-0.11449484301838929,2.0679611650485437,0.1650485436893204,-1.9029126213592233,0.07311466875773158,0.0056452531665925265,-0.06746941559113906
+HRM,15624,18228,12000,14000,lost_success_to_fail,31,1.0,0.6945440077012585,-0.15140148384436483,0.02598349899879747,0.17738498284316231,-0.19938284069141973,-0.022327345878165355,0.1770554948132544,-0.2194306131362194,-0.04107577972659313,0.17835483340962627,0.0967741935483871,2.193548387096774,2.096774193548387,0.002301122152036236,0.05643840949614382,0.05413728734410758
+HRM,15624,18228,12000,14000,stayed_failure,238,0.6278659618702256,0.6326382416887444,0.008439117317805028,0.016986168880395183,0.008547051562590157,-0.04778371556219153,-0.03500017360435649,0.012783541957835038,-0.0715183711892846,-0.05606890030182948,0.015449470887455116,1.2647058823529411,1.4075630252100841,0.14285714285714285,0.04635096751417605,0.050277759948382766,0.003926792434206718
+HRM,15624,18228,12000,14000,stayed_success,140,1.0,1.0,-0.1824721887375095,-0.07673018319390913,0.10574200554360037,-0.2342356344892843,-0.15195893105800287,0.0822767034312814,-0.25485086226170617,-0.17965281875804068,0.0751980435036655,0.007142857142857143,0.11428571428571428,0.10714285714285714,0.0004667076149157115,0.004967132555404013,0.004500424940488301
+HRM,18228,20832,14000,16000,learned_fail_to_success,53,0.6801770340721562,1.0,0.029857781990255527,-0.09033776694065276,-0.1201955489309083,-0.02323552310622719,-0.1405046565759101,-0.11726913346968293,-0.044821613074504364,-0.1601320755017816,-0.11531046242727723,1.9433962264150944,0.03773584905660377,-1.9056603773584906,0.07517024736526369,0.001606761987479228,-0.07356348537778447
+HRM,18228,20832,14000,16000,lost_success_to_fail,62,1.0,0.6993229821805031,-0.07700098222572237,0.01894234193699254,0.09594332416271491,-0.1369828649574981,-0.034258763347444925,0.10272410161005316,-0.16047281825975065,-0.05768175114210481,0.10279106711764584,0.12903225806451613,1.5161290322580645,1.3870967741935485,0.004257798758185198,0.055054116856134046,0.05079631809794885
+HRM,18228,20832,14000,16000,stayed_failure,216,0.6298582544205366,0.6420324661389545,0.01511913989357978,0.01082697688838784,-0.0042921630051919405,-0.03606808479365708,-0.04700597469852457,-0.010937889904867491,-0.05667686088153274,-0.07056957299265479,-0.013892712111122044,1.3888888888888888,1.1851851851851851,-0.2037037037037037,0.045054048387669354,0.04524669718880937,0.0001926488011400175
+HRM,18228,20832,14000,16000,stayed_success,181,1.0,1.0,-0.0758502258741019,-0.08096245426597146,-0.005112228391869559,-0.14876315657345118,-0.14105833845662638,0.007704818116824814,-0.17609128060117776,-0.1643015043295467,0.011789776271631046,0.13812154696132597,0.09392265193370165,-0.04419889502762431,0.0055960006127519875,0.002948011963715705,-0.0026479886490362826
+HRM,20832,23436,16000,18000,learned_fail_to_success,81,0.6985215691872585,1.0,0.019365609987828606,-0.1429522323111693,-0.1623178422989979,-0.03549199819308005,-0.1934352585099358,-0.15794326031685574,-0.059009656096428824,-0.2128879784380573,-0.15387832234162846,1.5925925925925926,0.04938271604938271,-1.5432098765432098,0.054454889279800286,0.0016134955264903881,-0.052841393753309894
+HRM,20832,23436,16000,18000,lost_success_to_fail,56,1.0,0.7001763715275696,-0.0889871238863894,0.010244773570559116,0.0992318974569485,-0.13914207175756538,-0.04201006224095256,0.09713200951661283,-0.15911803399545274,-0.06400883004399864,0.09510920395145409,0.05357142857142857,1.25,1.1964285714285714,0.0015605158571686064,0.03895758586165928,0.037397070004490676
+HRM,20832,23436,16000,18000,stayed_failure,197,0.6368364998834387,0.6430406727766628,0.009870242634371543,-0.004182763141465431,-0.014053005775836973,-0.04772833506996654,-0.055244518491247334,-0.007516183421280795,-0.07126656950971166,-0.07650410321300027,-0.005237533703288607,1.1218274111675126,0.7918781725888325,-0.3299492385786802,0.04454718683349905,0.02350813897478682,-0.021039047858712228
+HRM,20832,23436,16000,18000,stayed_success,178,1.0,1.0,-0.08122936478852598,-0.1364729007381653,-0.05524353594963931,-0.1414963485435334,-0.2042526071021344,-0.06275625855860097,-0.1646907998960507,-0.2284994021351083,-0.06380860223905759,0.0898876404494382,0.0056179775280898875,-0.08426966292134831,0.002985166644761234,2.8162001726332675e-05,-0.0029570046430349014
+HRM,23436,26040,18000,20000,learned_fail_to_success,56,0.7129629680088588,1.0,0.005080638605813093,-0.1394322982648321,-0.1445129368706452,-0.044478569124012636,-0.17752792286825883,-0.13304935374424617,-0.06530492417940488,-0.19560894649475813,-0.13030402231535326,1.1964285714285714,0.0,-1.1964285714285714,0.036289493061866906,0.0,-0.036289493061866906
+HRM,23436,26040,18000,20000,lost_success_to_fail,59,1.0,0.6947060075856871,-0.13478506021833014,0.03853947392485294,0.1733245341431831,-0.18903239647791498,-0.018911945740879767,0.17012045073703522,-0.2095468487580439,-0.042722457550738356,0.16682439120730555,0.06779661016949153,2.1864406779661016,2.1186440677966103,0.0022151379261986684,0.08461770553129994,0.08240256760510127
+HRM,23436,26040,18000,20000,stayed_failure,197,0.6394059042337582,0.6293789582506655,-0.002714785689557932,0.03312437648933409,0.03583916217889203,-0.05454281095088508,-0.026915141721405106,0.027627669229479977,-0.07613567036232642,-0.05206237327705461,0.024073297085271804,0.8071065989847716,1.868020304568528,1.0609137055837563,0.024266581598078055,0.07105908346029122,0.04679250186221316
+HRM,23436,26040,18000,20000,stayed_success,200,1.0,1.0,-0.13959494297858327,-0.14599876768887043,-0.0064038247102871535,-0.20436154305643867,-0.19294297004467809,0.0114185730117606,-0.22776777878403665,-0.21553697974421085,0.012230799039825798,0.005,0.025,0.02,2.506418153643608e-05,0.0005806464888155461,0.0005555823072791099
diff --git a/problem_tracks/hrm_learning_groups.csv b/problem_tracks/hrm_learning_groups.csv
new file mode 100644
index 0000000..862a54a
--- /dev/null
+++ b/problem_tracks/hrm_learning_groups.csv
@@ -0,0 +1,6 @@
+kind,group,n,fraction,givens_mean,final_success_rate,final_token_acc_mean,initial_lambda_max,final_lambda_max,initial_mean8,final_mean8,initial_tail_mean_5_8,final_tail_mean_5_8,initial_positive_count,final_positive_count,initial_positive_sum,final_positive_sum
+HRM,always,7,0.013671875,33.857142857142854,1.0,1.0,-0.11943858010428292,-0.14200945092099054,-0.1954429488895195,-0.1990994273551873,-0.23143203716192925,-0.2316632675273078,0.0,0.0,0.0,0.0
+HRM,flaky_then_stable,136,0.265625,25.264705882352942,1.0,1.0,-0.09145989122471827,-0.14467768998891936,-0.11213886570562061,-0.18874008171084877,-0.12149259587749839,-0.20950002090402825,0.007352941176470588,0.029411764705882353,0.00026379939278259,0.0007114800186279942
+HRM,never,145,0.283203125,24.613793103448277,0.0,0.6184759497642517,-0.09740061664889599,0.030114806785874453,-0.11696181520819664,-0.029440750938995515,-0.12532571852207183,-0.05453352611592232,0.0,1.7310344827586206,0.0,0.06679639958993307
+HRM,stable_learned,113,0.220703125,25.911504424778762,1.0,1.0,-0.09373763412962445,-0.14458168358995324,-0.1166969095294949,-0.18998062464952534,-0.126381192636569,-0.2119278870522976,0.0,0.008849557522123894,0.0,0.00017139836486461944
+HRM,transient_or_regressed,111,0.216796875,24.26126126126126,0.0,0.6783450135776589,-0.09775018336268158,0.03993409140552562,-0.11691337225639874,-0.019361971456525788,-0.1249423275055649,-0.04386983102941067,0.0,2.2162162162162162,0.0,0.08383428943679072
diff --git a/problem_tracks/hrm_problem_tracks.csv b/problem_tracks/hrm_problem_tracks.csv
new file mode 100644
index 0000000..5abd9f4
--- /dev/null
+++ b/problem_tracks/hrm_problem_tracks.csv
@@ -0,0 +1,513 @@
+kind,panel_row,test_idx,group,n_success_checkpoints,transitions,first_success_step,first_success_epoch,stable_success_step,stable_success_epoch,final_success,final_token_acc,final_lambda_max,final_mean8,final_tail_mean_5_8,final_positive_count,final_positive_sum,success@2604,lambda_max@2604,mean8@2604,positive_count@2604,success@5208,lambda_max@5208,mean8@5208,positive_count@5208,success@7812,lambda_max@7812,mean8@7812,positive_count@7812,success@10416,lambda_max@10416,mean8@10416,positive_count@10416,success@13020,lambda_max@13020,mean8@13020,positive_count@13020,success@15624,lambda_max@15624,mean8@15624,positive_count@15624,success@18228,lambda_max@18228,mean8@18228,positive_count@18228,success@20832,lambda_max@20832,mean8@20832,positive_count@20832,success@23436,lambda_max@23436,mean8@23436,positive_count@23436,success@26040,lambda_max@26040,mean8@26040,positive_count@26040,givens,blanks,label_nonzero
+HRM,0,1156,flaky_then_stable,6,5,7812,6000,23436,18000,True,1.0,-0.11386578530073166,-0.1791773745790124,-0.20598649606108665,0.0,0.0,0,-0.08105741441249847,-0.10682921390980482,0.0,0,-0.1109667718410492,-0.17875822447240353,0.0,1,-0.20275813341140747,-0.22745217755436897,0.0,1,-0.19170622527599335,-0.23354909010231495,0.0,0,0.02433290146291256,-0.048782898811623454,1.0,1,-0.19548740983009338,-0.2606247067451477,0.0,1,-0.1103445515036583,-0.1598678445443511,0.0,0,0.0516466461122036,-0.0007332310196943581,4.0,1,-0.12459072470664978,-0.19830370135605335,0.0,1,-0.11386578530073166,-0.1791773745790124,0.0,26,55,81
+HRM,1,154226,flaky_then_stable,3,3,15624,12000,26040,20000,True,1.0,-0.17432032525539398,-0.19012608006596565,-0.20044921711087227,0.0,0.0,0,-0.0893569216132164,-0.10534530971199274,0.0,0,-0.11911436915397644,-0.18362042121589184,0.0,0,-0.13624557852745056,-0.16201233491301537,0.0,0,-0.06467679142951965,-0.12560781743377447,0.0,0,-0.035421449691057205,-0.07132356753572822,0.0,1,-0.12764427065849304,-0.1966280471533537,0.0,1,-0.12396441400051117,-0.16271582059562206,0.0,0,-0.003133973805233836,-0.026825598411960527,0.0,0,0.017508843913674355,-0.05640754266642034,1.0,1,-0.17432032525539398,-0.19012608006596565,0.0,25,56,81
+HRM,2,303891,never,0,0,,,,,False,0.5925925970077515,0.04334963485598564,-0.011775422201026231,-0.04489142680540681,3.0,0.09005289152264595,0,-0.08744083344936371,-0.11618569679558277,0.0,0,-0.031185444444417953,-0.15935212699696422,0.0,0,-0.0968359038233757,-0.14773352164775133,0.0,0,-0.014268256723880768,-0.10559696471318603,0.0,0,0.035230062901973724,-0.017916359880473465,3.0,0,0.07626649737358093,-0.020894282031804323,2.0,0,0.03003750555217266,-0.021227389224804938,2.0,0,0.035500667989254,-0.02550932369194925,1.0,0,-0.060438062995672226,-0.07342828949913383,0.0,0,0.04334963485598564,-0.011775422201026231,3.0,26,55,81
+HRM,3,222053,stable_learned,7,1,10416,8000,10416,8000,True,1.0,-0.2071765512228012,-0.24858969077467918,-0.26772093772888184,0.0,0.0,0,-0.09200228750705719,-0.10960910841822624,0.0,0,-0.09892820566892624,-0.14713604841381311,0.0,0,-0.03267478570342064,-0.09102069539949298,0.0,1,-0.10943122208118439,-0.21460826694965363,0.0,1,-0.17655587196350098,-0.20325032249093056,0.0,1,-0.012276182882487774,-0.05177123111207038,0.0,1,-0.08942705392837524,-0.16556559316813946,0.0,1,-0.10458061099052429,-0.1448260424658656,0.0,1,-0.1347821056842804,-0.2204152476042509,0.0,1,-0.2071765512228012,-0.24858969077467918,0.0,26,55,81
+HRM,4,363763,transient_or_regressed,1,2,23436,18000,,,False,0.9135802388191223,0.12078392505645752,0.019058187725022435,-0.02996430778875947,4.0,0.27232273295521736,0,-0.0956607311964035,-0.11826338898390532,0.0,0,-0.10222484916448593,-0.1463157655671239,0.0,0,-0.09935523569583893,-0.1558392345905304,0.0,0,-0.04948348551988602,-0.10907729156315327,0.0,0,0.02367616817355156,-0.04444574937224388,1.0,0,-0.006066928617656231,-0.04709025507327169,0.0,0,0.12511900067329407,-0.01771824376191944,1.0,0,0.08418536186218262,-0.023028862662613392,1.0,1,-0.15769964456558228,-0.17340322397649288,0.0,0,0.12078392505645752,0.019058187725022435,4.0,26,55,81
+HRM,5,179627,flaky_then_stable,6,3,7812,6000,18228,14000,True,1.0,-0.15184073150157928,-0.20697208866477013,-0.23023990541696548,0.0,0.0,0,-0.0831434428691864,-0.09786599408835173,0.0,0,-0.1521506905555725,-0.1728367581963539,0.0,1,-0.23640328645706177,-0.28269679844379425,0.0,1,-0.20472881197929382,-0.23640788905322552,0.0,0,0.05762114375829697,-0.03646425495389849,2.0,0,-0.012093840166926384,-0.040888887364417315,0.0,1,-0.10510289669036865,-0.15735683776438236,0.0,1,0.021617604419589043,-0.12414371152408421,1.0,1,-0.09578731656074524,-0.1900615580379963,0.0,1,-0.15184073150157928,-0.20697208866477013,0.0,25,56,81
+HRM,6,37589,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.1684323251247406,-0.22049731947481632,-0.23834837973117828,0.0,0.0,0,-0.1239120289683342,-0.12881583534181118,0.0,0,-0.12672606110572815,-0.17071283608675003,0.0,0,-0.02478225901722908,-0.10847366834059358,0.0,0,-0.060695864260196686,-0.10035469476133585,0.0,1,-0.024248842149972916,-0.14628783473744988,0.0,1,-0.19895927608013153,-0.2523462176322937,0.0,1,-0.1855764389038086,-0.1984721515327692,0.0,1,-0.14784270524978638,-0.1645858120173216,0.0,1,-0.0734117329120636,-0.2032670211046934,0.0,1,-0.1684323251247406,-0.22049731947481632,0.0,27,54,81
+HRM,7,200316,transient_or_regressed,1,2,23436,18000,,,False,0.5802469253540039,0.07623837888240814,0.020627096440875903,-0.013689942366909236,5.0,0.23528186045587063,0,-0.10401137173175812,-0.11836320627480745,0.0,0,-0.17743319272994995,-0.1877981685101986,0.0,0,-0.10754406452178955,-0.14825919084250927,0.0,0,-0.04224729165434837,-0.12005956983193755,0.0,0,-0.03196986764669418,-0.08772195177152753,0.0,0,0.027935191988945007,-0.016914149280637503,2.0,0,0.07758761942386627,-0.02075550565496087,3.0,0,0.022579794749617577,-0.022846951498650014,3.0,1,-0.11384594440460205,-0.15906048938632011,0.0,0,0.07623837888240814,0.020627096440875903,5.0,26,55,81
+HRM,8,308059,transient_or_regressed,2,2,20832,16000,,,False,0.7160493731498718,-0.025471238419413567,-0.05381519463844597,-0.06943106465041637,0.0,0.0,0,-0.08827747404575348,-0.11831961572170258,0.0,0,-0.17799988389015198,-0.20639502443373203,0.0,0,-0.185085266828537,-0.20577313005924225,0.0,0,-0.020675936713814735,-0.11489771050401032,0.0,0,0.019663896411657333,-0.05113722616806626,1.0,0,-0.014644691720604897,-0.04146513412706554,0.0,0,0.07971490919589996,-0.033767752815037966,1.0,1,-0.11342085897922516,-0.15803005546331406,0.0,1,-0.0678880512714386,-0.18402507714927197,0.0,0,-0.025471238419413567,-0.05381519463844597,0.0,26,55,81
+HRM,9,83380,transient_or_regressed,1,2,23436,18000,,,False,0.5185185074806213,0.043796565383672714,-0.028576654382050037,-0.06477387808263302,2.0,0.07921162247657776,0,-0.06389918178319931,-0.09480574913322926,0.0,0,-0.12462137639522552,-0.17808099649846554,0.0,0,-0.14670979976654053,-0.19752134010195732,0.0,0,-0.12155409902334213,-0.17251552920788527,0.0,0,-0.04205986112356186,-0.1160407206043601,0.0,0,-0.050926052033901215,-0.0691212653182447,0.0,0,0.03615952283143997,-0.06071752915158868,1.0,0,-0.008771390654146671,-0.09717646439094096,0.0,1,-0.12687948346138,-0.19934801384806633,0.0,0,0.043796565383672714,-0.028576654382050037,2.0,24,57,81
+HRM,10,420254,transient_or_regressed,2,2,20832,16000,,,False,0.5555555820465088,-0.019189149141311646,-0.04978713905438781,-0.0634863143786788,0.0,0.0,0,-0.11247843503952026,-0.1202986016869545,0.0,0,-0.11566999554634094,-0.15669980458915234,0.0,0,-0.16508938372135162,-0.1893762368708849,0.0,0,-0.14036844670772552,-0.18333986774086952,0.0,0,-0.10684739053249359,-0.11775143723934889,0.0,0,-0.08686476200819016,-0.1029046755284071,0.0,0,-0.025273632258176804,-0.06229214137420058,0.0,1,-0.11132612079381943,-0.14773611165583134,0.0,1,-0.15090186893939972,-0.21543188393115997,0.0,0,-0.019189149141311646,-0.04978713905438781,0.0,24,57,81
+HRM,11,259891,never,0,0,,,,,False,0.5185185074806213,0.004739468451589346,-0.043685309879947454,-0.06040284689515829,1.0,0.004739468451589346,0,-0.10621242225170135,-0.12412736658006907,0.0,0,-0.09989108890295029,-0.1610138127580285,0.0,0,-0.07764464616775513,-0.14612294360995293,0.0,0,-0.1325894594192505,-0.15602816827595234,0.0,0,-0.04567009210586548,-0.11755132675170898,0.0,0,-0.04163777828216553,-0.09943865891546011,0.0,0,-0.04915861040353775,-0.07754568383097649,0.0,0,-0.056535668671131134,-0.08312755823135376,0.0,0,-0.011990513652563095,-0.05390846775844693,0.0,0,0.004739468451589346,-0.043685309879947454,1.0,24,57,81
+HRM,12,194338,never,0,0,,,,,False,0.5061728358268738,-0.02900182269513607,-0.05846175434999168,-0.07226954214274883,0.0,0.0,0,-0.1128225028514862,-0.125034942291677,0.0,0,-0.10998368263244629,-0.1651178114116192,0.0,0,-0.028161337599158287,-0.1232878181617707,0.0,0,-0.05795951187610626,-0.12270857859402895,0.0,0,0.05036511644721031,-0.03937802789732814,2.0,0,-0.018938547000288963,-0.050313004525378346,0.0,0,0.0029688223730772734,-0.029301978327566758,1.0,0,0.06491696834564209,-0.030313938856124878,1.0,0,0.031156431883573532,-0.06243638275191188,1.0,0,-0.02900182269513607,-0.05846175434999168,0.0,24,57,81
+HRM,13,362081,flaky_then_stable,5,3,13020,10000,26040,20000,True,1.0,-0.19864889979362488,-0.2134827021509409,-0.22491351887583733,0.0,0.0,0,-0.11833583563566208,-0.12770420964807272,0.0,0,-0.07866618782281876,-0.15092948824167252,0.0,0,-0.03755491226911545,-0.10978963691741228,0.0,0,-0.007456590421497822,-0.04996244760695845,0.0,1,-0.1682467758655548,-0.1950498316437006,0.0,1,-0.21181261539459229,-0.2455605696886778,0.0,1,0.08854904770851135,-0.12526044715195894,1.0,1,-0.08795183897018433,-0.15541246347129345,0.0,0,0.037649355828762054,-0.03314198151929304,1.0,1,-0.19864889979362488,-0.2134827021509409,0.0,27,54,81
+HRM,14,18267,always,10,0,2604,2000,2604,2000,True,1.0,-0.16348740458488464,-0.23423065058887005,-0.265121653676033,0.0,0.0,1,-0.08480454981327057,-0.17136348132044077,0.0,1,-0.1566430628299713,-0.20689905434846878,0.0,1,-0.27683499455451965,-0.29659897834062576,0.0,1,-0.11046811193227768,-0.2866243114694953,0.0,1,-0.20042376220226288,-0.24629518948495388,0.0,1,-0.26040735840797424,-0.29995203763246536,0.0,1,-0.17318940162658691,-0.22591937892138958,0.0,1,-0.016672465950250626,-0.1202231883071363,0.0,1,-0.14861562848091125,-0.2285411451011896,0.0,1,-0.16348740458488464,-0.23423065058887005,0.0,34,47,81
+HRM,15,119070,flaky_then_stable,7,3,7812,6000,13020,10000,True,1.0,-0.221805140376091,-0.2531065810471773,-0.26999012380838394,0.0,0.0,0,-0.09080832451581955,-0.10254616383463144,0.0,0,-0.13271638751029968,-0.15287203155457973,0.0,1,-0.19181323051452637,-0.2737619783729315,0.0,0,-0.08303822576999664,-0.11964304372668266,0.0,1,-0.1400378793478012,-0.17657064087688923,0.0,1,-0.24969522655010223,-0.2682602610439062,0.0,1,-0.06799595057964325,-0.13848207145929337,0.0,1,-0.12085722386837006,-0.14349190890789032,0.0,1,-0.11736245453357697,-0.17226053774356842,0.0,1,-0.221805140376091,-0.2531065810471773,0.0,25,56,81
+HRM,16,152975,never,0,0,,,,,False,0.5061728358268738,0.055018313229084015,-0.015288913389667869,-0.05784852243959904,3.0,0.12128844670951366,0,-0.10896807163953781,-0.11584682203829288,0.0,0,-0.12756267189979553,-0.16612873412668705,0.0,0,-0.10798763483762741,-0.1594917243346572,0.0,0,-0.050545137375593185,-0.14368581166490912,0.0,0,-0.04915986210107803,-0.06116308271884918,0.0,0,-0.013953926041722298,-0.04260884108953178,0.0,0,0.052421681582927704,-0.047442372888326645,1.0,0,-0.001067058416083455,-0.06646850719698705,0.0,0,-0.02797466143965721,-0.05872841738164425,0.0,0,0.055018313229084015,-0.015288913389667869,3.0,23,58,81
+HRM,17,172350,never,0,0,,,,,False,0.5802469253540039,0.17967337369918823,0.00436968170106411,-0.057625954039394855,2.0,0.3197377622127533,0,-0.10635079443454742,-0.1205888818949461,0.0,0,-0.14766661822795868,-0.1895786989480257,0.0,0,-0.17816829681396484,-0.19320157915353775,0.0,0,-0.16307875514030457,-0.18129798397421837,0.0,0,-0.034269239753484726,-0.0826706257648766,0.0,0,-0.027016913518309593,-0.09562730533070862,0.0,0,0.04754097759723663,-0.038888664450496435,1.0,0,-0.022090647369623184,-0.06377348490059376,0.0,0,-0.045584216713905334,-0.08662750199437141,0.0,0,0.17967337369918823,0.00436968170106411,2.0,25,56,81
+HRM,18,370209,never,0,0,,,,,False,0.4197530746459961,0.040444210171699524,-0.0285088410746539,-0.06228168308734894,3.0,0.050025682779960334,0,-0.1125393807888031,-0.12183179520070553,0.0,0,-0.14945277571678162,-0.17726413533091545,0.0,0,-0.08338265866041183,-0.16759356576949358,0.0,0,-0.030232243239879608,-0.10619758442044258,0.0,0,-0.04615113139152527,-0.11306843534111977,0.0,0,-0.008708708919584751,-0.05749970523174852,0.0,0,-0.013824855908751488,-0.034735935856588185,0.0,0,-0.019790388643741608,-0.05598206724971533,0.0,0,-0.05073218792676926,-0.07944504823535681,0.0,0,0.040444210171699524,-0.0285088410746539,3.0,23,58,81
+HRM,19,222327,transient_or_regressed,1,2,23436,18000,,,False,0.6296296119689941,0.03938264772295952,-0.034480547066777945,-0.058069732040166855,1.0,0.03938264772295952,0,-0.09552529454231262,-0.11621803045272827,0.0,0,-0.10875481367111206,-0.16988379880785942,0.0,0,-0.15804845094680786,-0.17253168858587742,0.0,0,0.028578411787748337,-0.08966566808521748,1.0,0,0.011596119031310081,-0.03028300593723543,2.0,0,0.07822664082050323,0.009378084359923378,3.0,0,0.03916004300117493,-0.03966142679564655,2.0,0,0.09189581871032715,0.0007137608481571078,4.0,1,-0.13091430068016052,-0.1847879197448492,0.0,0,0.03938264772295952,-0.034480547066777945,1.0,26,55,81
+HRM,20,263924,flaky_then_stable,3,5,13020,10000,26040,20000,True,1.0,-0.13712269067764282,-0.21246863529086113,-0.2510906457901001,0.0,0.0,0,-0.09840273857116699,-0.107863062992692,0.0,0,-0.11295482516288757,-0.15084492601454258,0.0,0,-0.06888436526060104,-0.14798007626086473,0.0,0,-0.04392886906862259,-0.15392471197992563,0.0,1,-0.15771366655826569,-0.20193818397819996,0.0,0,-0.008731599897146225,-0.07342194160446525,0.0,1,-0.06564664840698242,-0.08083161152899265,0.0,0,0.027525698766112328,-0.06580567290075123,1.0,0,-0.06353254616260529,-0.08304870873689651,0.0,1,-0.13712269067764282,-0.21246863529086113,0.0,24,57,81
+HRM,21,27024,never,0,0,,,,,False,0.5061728358268738,0.0010409265523776412,-0.025121498401858844,-0.04545409698039293,1.0,0.0010409265523776412,0,-0.08857142180204391,-0.11268060002475977,0.0,0,-0.13035564124584198,-0.15609588474035263,0.0,0,-0.12752017378807068,-0.18764889799058437,0.0,0,-0.14038711786270142,-0.16001477278769016,0.0,0,-0.007685316726565361,-0.06606742390431464,0.0,0,-0.012095706537365913,-0.06616893527098,0.0,0,-0.018788859248161316,-0.043070667423307896,0.0,0,-0.039641864597797394,-0.054449445102363825,0.0,0,-0.04294518753886223,-0.0652588102966547,0.0,0,0.0010409265523776412,-0.025121498401858844,1.0,22,59,81
+HRM,22,309268,never,0,0,,,,,False,0.6790123581886292,0.10945045948028564,0.00308007572311908,-0.034662656020373106,3.0,0.17365067452192307,0,-0.07909658551216125,-0.10371243860572577,0.0,0,-0.1267048716545105,-0.15484633669257164,0.0,0,-0.09151666611433029,-0.14461539406329393,0.0,0,0.07367819547653198,-0.046177340438589454,1.0,0,-0.027268078178167343,-0.06349068321287632,0.0,0,0.11319075524806976,0.0004632720956578851,4.0,0,0.018553629517555237,-0.027992765069939196,3.0,0,-0.0521879605948925,-0.07639155676588416,0.0,0,0.04771382361650467,-0.02159983740421012,1.0,0,0.10945045948028564,0.00308007572311908,3.0,26,55,81
+HRM,23,320288,stable_learned,2,1,23436,18000,23436,18000,True,1.0,-0.06234512850642204,-0.1540177478455007,-0.18079258129000664,0.0,0.0,0,-0.10391557216644287,-0.1190100759267807,0.0,0,-0.15939556062221527,-0.18440141715109348,0.0,0,-0.11168777942657471,-0.15165503416210413,0.0,0,0.035828106105327606,-0.11317651765421033,1.0,0,0.004227354656904936,-0.06838282250100747,1.0,0,-0.020567767322063446,-0.06093196477741003,0.0,0,0.03306933864951134,-0.011508563407915062,2.0,0,0.049855537712574005,-0.028003745013847947,2.0,1,-0.1767846941947937,-0.21426494047045708,0.0,1,-0.06234512850642204,-0.1540177478455007,0.0,24,57,81
+HRM,24,407124,stable_learned,9,1,5208,4000,5208,4000,True,1.0,-0.13275136053562164,-0.19022557884454727,-0.22108694165945053,0.0,0.0,0,-0.1408068984746933,-0.17659533396363258,0.0,1,-0.210317462682724,-0.26534375362098217,0.0,1,-0.10988695919513702,-0.26211460493505,0.0,1,-0.26431453227996826,-0.3139732889831066,0.0,1,-0.2043449878692627,-0.25291983783245087,0.0,1,-0.28749001026153564,-0.31058090925216675,0.0,1,-0.15242929756641388,-0.19355717860162258,0.0,1,-0.09217049181461334,-0.1259266883134842,0.0,1,-0.2065289318561554,-0.22659245878458023,0.0,1,-0.13275136053562164,-0.19022557884454727,0.0,33,48,81
+HRM,25,61471,flaky_then_stable,3,5,13020,10000,26040,20000,True,1.0,-0.10137206315994263,-0.14451754372566938,-0.16892528906464577,0.0,0.0,0,-0.07295535504817963,-0.11198519915342331,0.0,0,-0.1520610749721527,-0.19586916454136372,0.0,0,-0.15449796617031097,-0.17815839126706123,0.0,0,-0.08029519766569138,-0.13414807710796595,0.0,1,-0.1350322663784027,-0.16435758583247662,0.0,0,0.07944535464048386,-0.0028689385653706267,4.0,0,0.12466727197170258,0.04309645981993526,6.0,1,-0.05573032796382904,-0.12511733174324036,0.0,0,-0.034250713884830475,-0.049384926445782185,0.0,1,-0.10137206315994263,-0.14451754372566938,0.0,27,54,81
+HRM,26,315244,never,0,0,,,,,False,0.4691357910633087,0.0184895321726799,-0.023492409149184823,-0.04099574405699968,1.0,0.0184895321726799,0,-0.09548439085483551,-0.11676525790244341,0.0,0,-0.1297392100095749,-0.18947160243988037,0.0,0,-0.1166195422410965,-0.14651704765856266,0.0,0,-0.12598583102226257,-0.1535976193845272,0.0,0,0.022953523322939873,-0.06569734882214107,2.0,0,0.057052724063396454,-0.038119963370263577,2.0,0,0.04617641493678093,-0.021289305295795202,2.0,0,-0.0002951681090053171,-0.06721207759619574,0.0,0,-0.0008038398809731007,-0.09124159213388339,0.0,0,0.0184895321726799,-0.023492409149184823,1.0,22,59,81
+HRM,27,175529,transient_or_regressed,6,4,5208,4000,,,False,0.7777777910232544,0.032243307679891586,-0.008234312506829156,-0.03668481484055519,3.0,0.08108155429363251,0,-0.06429305672645569,-0.11715532653033733,0.0,1,-0.21041452884674072,-0.24830660037696362,0.0,1,-0.19868254661560059,-0.22545412741601467,0.0,1,-0.16130119562149048,-0.21250495873391628,0.0,0,-0.009298698976635933,-0.06485629570670426,0.0,1,-0.2582467198371887,-0.2767813876271248,0.0,1,-0.12507152557373047,-0.18173273652791977,0.0,1,-0.114522784948349,-0.16012751311063766,0.0,0,0.0778331533074379,0.004617098384187557,3.0,0,0.032243307679891586,-0.008234312506829156,3.0,24,57,81
+HRM,28,114564,flaky_then_stable,7,3,7812,6000,13020,10000,True,1.0,-0.14082352817058563,-0.17955897189676762,-0.1960381455719471,0.0,0.0,0,-0.07562214136123657,-0.10023593623191118,0.0,0,-0.11431284993886948,-0.1839986639097333,0.0,1,-0.2668617367744446,-0.30299627780914307,0.0,0,-0.043397702276706696,-0.10335363913327456,0.0,1,-0.16521930694580078,-0.1978563517332077,0.0,1,-0.2418835163116455,-0.2819014899432659,0.0,1,-0.12410382926464081,-0.1583070456981659,0.0,1,-0.08658182621002197,-0.1478037890046835,0.0,1,-0.06806806474924088,-0.16590104904025793,0.0,1,-0.14082352817058563,-0.17955897189676762,0.0,27,54,81
+HRM,29,141970,transient_or_regressed,1,2,13020,10000,,,False,0.5802469253540039,0.0019758567214012146,-0.03594463085755706,-0.05422841012477875,1.0,0.0019758567214012146,0,-0.12402302026748657,-0.12979137431830168,0.0,0,-0.0933777317404747,-0.1610100669786334,0.0,0,-0.05767769366502762,-0.13437839783728123,0.0,0,-0.1065659373998642,-0.149288447573781,0.0,1,-0.18426166474819183,-0.2112691942602396,0.0,0,0.01957218162715435,-0.06127346120774746,1.0,0,-0.020711898803710938,-0.03982161544263363,0.0,0,-0.06364628672599792,-0.1303761014714837,0.0,0,-0.023592999204993248,-0.046610036166384816,0.0,0,0.0019758567214012146,-0.03594463085755706,1.0,24,57,81
+HRM,30,138609,transient_or_regressed,2,2,18228,14000,,,False,0.790123462677002,0.04834447056055069,-0.060740213841199875,-0.0847369134426117,1.0,0.04834447056055069,0,-0.11932717263698578,-0.13575006555765867,0.0,0,-0.17435824871063232,-0.19820700399577618,0.0,0,-0.1452445685863495,-0.17144746892154217,0.0,0,-0.1558118313550949,-0.17436571419239044,0.0,0,-0.013432899489998817,-0.09919752390123904,0.0,0,-0.06495583802461624,-0.09036845993250608,0.0,1,-0.12537212669849396,-0.15712442621588707,0.0,1,-0.12930503487586975,-0.15732655115425587,0.0,0,-0.02944444678723812,-0.07790761790238321,0.0,0,0.04834447056055069,-0.060740213841199875,1.0,23,58,81
+HRM,31,256175,never,0,0,,,,,False,0.7654321193695068,-0.011767792515456676,-0.07463992375414819,-0.09481466189026833,0.0,0.0,0,-0.10305187106132507,-0.11352668143808842,0.0,0,-0.14816050231456757,-0.18363601714372635,0.0,0,-0.12014082074165344,-0.15461034700274467,0.0,0,-0.04617945849895477,-0.13017528597265482,0.0,0,-0.04311029613018036,-0.08403747156262398,0.0,0,-0.0117974653840065,-0.04243318387307227,0.0,0,0.022835545241832733,-0.024318202398717403,1.0,0,0.09579571336507797,-0.009491208242252469,2.0,0,0.03535272181034088,-0.04186463775113225,2.0,0,-0.011767792515456676,-0.07463992375414819,0.0,26,55,81
+HRM,32,420358,transient_or_regressed,1,2,20832,16000,,,False,0.8148148059844971,-0.003655943786725402,-0.049439629045082256,-0.0709108579903841,0.0,0.0,0,-0.10507400333881378,-0.11727395001798868,0.0,0,-0.17358806729316711,-0.1845140289515257,0.0,0,-0.15846125781536102,-0.17846184596419334,0.0,0,-0.042351700365543365,-0.11959797516465187,0.0,0,-0.05211572349071503,-0.07909520715475082,0.0,0,0.01665734127163887,-0.006205882993526757,3.0,0,0.06609850376844406,-0.010481345656444319,3.0,1,-0.14066101610660553,-0.1739635355770588,0.0,0,-0.00840146653354168,-0.042836137814447284,0.0,0,-0.003655943786725402,-0.049439629045082256,0.0,25,56,81
+HRM,33,175168,flaky_then_stable,9,2,2604,2000,7812,6000,True,1.0,-0.1665867418050766,-0.22541602328419685,-0.24837910383939743,0.0,0.0,1,-0.13450603187084198,-0.22800411842763424,0.0,0,-0.07122424244880676,-0.1996746975928545,0.0,1,-0.08253804594278336,-0.22024978790432215,0.0,1,-0.2533324360847473,-0.3142636716365814,0.0,1,-0.19597694277763367,-0.23878281377255917,0.0,1,-0.17401039600372314,-0.31337161734700203,0.0,1,-0.03600281476974487,-0.18809830583631992,0.0,1,-0.08287748694419861,-0.15253890305757523,0.0,1,-0.11349005997180939,-0.2193777859210968,0.0,1,-0.1665867418050766,-0.22541602328419685,0.0,34,47,81
+HRM,34,181621,flaky_then_stable,2,3,18228,14000,26040,20000,True,1.0,-0.15024800598621368,-0.19101960211992264,-0.21175885573029518,0.0,0.0,0,-0.11369182169437408,-0.11868236400187016,0.0,0,-0.15570062398910522,-0.16681717708706856,0.0,0,-0.03403685986995697,-0.16137185506522655,0.0,0,-0.0608392171561718,-0.13530919468030334,0.0,0,-0.035597071051597595,-0.08769796323031187,0.0,0,-0.038817595690488815,-0.08286053547635674,0.0,1,-0.04481222853064537,-0.1361990044824779,0.0,0,0.0008891843026503921,-0.06377688185602892,1.0,0,-0.05787087231874466,-0.07815438089892268,0.0,1,-0.15024800598621368,-0.19101960211992264,0.0,23,58,81
+HRM,35,391825,transient_or_regressed,5,2,13020,10000,,,False,0.7530864477157593,0.12704923748970032,0.05778194451704621,0.029062497429549694,8.0,0.4622555561363697,0,-0.10753694176673889,-0.12086903676390648,0.0,0,-0.1151936799287796,-0.17091068997979164,0.0,0,-0.06583940982818604,-0.10714899841696024,0.0,0,-0.07929041981697083,-0.1008538231253624,0.0,1,-0.10361742973327637,-0.14341271482408047,0.0,1,-0.19410620629787445,-0.24481770396232605,0.0,1,-0.06393851339817047,-0.09506503213196993,0.0,1,-0.07276217639446259,-0.098085748963058,0.0,1,-0.16046491265296936,-0.2130246665328741,0.0,0,0.12704923748970032,0.05778194451704621,8.0,27,54,81
+HRM,36,302029,transient_or_regressed,1,2,23436,18000,,,False,0.6172839403152466,0.0050135040655732155,-0.03279489924898371,-0.05290093831717968,2.0,0.009716779459267855,0,-0.11489268392324448,-0.12282780092209578,0.0,0,-0.1508019119501114,-0.1808254737406969,0.0,0,-0.12742599844932556,-0.16141762025654316,0.0,0,-0.15709877014160156,-0.1856546849012375,0.0,0,-0.07663728296756744,-0.11306761763989925,0.0,0,-0.03566466271877289,-0.07422378705814481,0.0,0,-0.054249852895736694,-0.08044442208483815,0.0,0,-0.03390160948038101,-0.09127283887937665,0.0,1,-0.07155365496873856,-0.11543521285057068,0.0,0,0.0050135040655732155,-0.03279489924898371,2.0,26,55,81
+HRM,37,393748,flaky_then_stable,5,3,13020,10000,20832,16000,True,1.0,-0.12021763622760773,-0.15834645740687847,-0.1777048297226429,0.0,0.0,0,-0.10418527573347092,-0.10856868978589773,0.0,0,-0.13696929812431335,-0.18567239865660667,0.0,0,-0.1249074637889862,-0.15084140561521053,0.0,0,-0.0504322350025177,-0.10502054262906313,0.0,1,-0.12825113534927368,-0.20351621136069298,0.0,1,-0.20912973582744598,-0.22708359733223915,0.0,0,-0.024831410497426987,-0.04951932793483138,0.0,1,-0.11688636243343353,-0.15107431262731552,0.0,1,-0.08803142607212067,-0.16078372299671173,0.0,1,-0.12021763622760773,-0.15834645740687847,0.0,25,56,81
+HRM,38,47424,transient_or_regressed,1,2,15624,12000,,,False,0.4444444477558136,-0.03562118113040924,-0.08595130313187838,-0.099178247153759,0.0,0.0,0,-0.10186590999364853,-0.12060953211039305,0.0,0,-0.11284506320953369,-0.1574572492390871,0.0,0,-0.06217420846223831,-0.11049684509634972,0.0,0,-0.1521277278661728,-0.15930414386093616,0.0,0,-0.08675593137741089,-0.11125319171696901,0.0,1,-0.16189727187156677,-0.24484550952911377,0.0,0,-0.015672920271754265,-0.07032235222868621,0.0,0,-0.05612635239958763,-0.09270093077793717,0.0,0,0.023527730256319046,-0.05946270143613219,1.0,0,-0.03562118113040924,-0.08595130313187838,0.0,23,58,81
+HRM,39,239376,flaky_then_stable,4,5,13020,10000,23436,18000,True,1.0,-0.15730294585227966,-0.20367240719497204,-0.2183753028512001,0.0,0.0,0,-0.0894140750169754,-0.10393067356199026,0.0,0,-0.15371064841747284,-0.1670465674251318,0.0,0,-0.1107216626405716,-0.12772524449974298,0.0,0,-0.04314578324556351,-0.11236922489479184,0.0,1,-0.143083393573761,-0.1909378319978714,0.0,0,0.05012030154466629,0.001938328961841762,3.0,1,-0.0739908218383789,-0.13413155637681484,0.0,0,0.0068004075437784195,-0.033390412805601954,2.0,1,-0.17706583440303802,-0.20227940380573273,0.0,1,-0.15730294585227966,-0.20367240719497204,0.0,26,55,81
+HRM,40,377585,flaky_then_stable,6,5,5208,4000,18228,14000,True,1.0,-0.1885644644498825,-0.22663160786032677,-0.2500862143933773,0.0,0.0,0,-0.018842045217752457,-0.08999251900240779,0.0,1,-0.19724735617637634,-0.2134365513920784,0.0,0,-0.08788061141967773,-0.15246665477752686,0.0,1,-0.19320768117904663,-0.24525247514247894,0.0,0,0.04470159858465195,-0.0054908625315874815,3.0,0,0.1290965974330902,0.03630903565499466,6.0,1,-0.11366662383079529,-0.1267473828047514,0.0,1,-0.06908126175403595,-0.10755722224712372,0.0,1,-0.09442661702632904,-0.2300373613834381,0.0,1,-0.1885644644498825,-0.22663160786032677,0.0,29,52,81
+HRM,41,57082,transient_or_regressed,3,4,15624,12000,,,False,0.8148148059844971,0.01519741676747799,-0.026699264883063734,-0.04877570457756519,2.0,0.025558187626302242,0,-0.11607958376407623,-0.1271662199869752,0.0,0,-0.10482919216156006,-0.14918703213334084,0.0,0,-0.03139419108629227,-0.11511425394564867,0.0,0,-0.04769296199083328,-0.09436270454898477,0.0,0,-0.041893281042575836,-0.06619896739721298,0.0,1,-0.09131751954555511,-0.16970854438841343,0.0,1,-0.004722417797893286,-0.02594325685640797,0.0,0,0.06314264982938766,0.018296758993528783,6.0,1,-0.14902850985527039,-0.193726921454072,0.0,0,0.01519741676747799,-0.026699264883063734,2.0,17,64,81
+HRM,42,401651,flaky_then_stable,4,3,15624,12000,23436,18000,True,1.0,-0.15132591128349304,-0.1931328270584345,-0.22105104103684425,0.0,0.0,0,-0.1010332852602005,-0.11515139508992434,0.0,0,-0.1531202346086502,-0.19696124270558357,0.0,0,-0.08326109498739243,-0.1177400192245841,0.0,0,0.056875161826610565,-0.02938440308207646,1.0,0,0.02743823640048504,-0.03416996350279078,1.0,1,-0.1692737191915512,-0.24249800108373165,0.0,1,-0.14972111582756042,-0.18100756965577602,0.0,0,0.007309155538678169,-0.04505196923855692,2.0,1,-0.17209963500499725,-0.2108390126377344,0.0,1,-0.15132591128349304,-0.1931328270584345,0.0,29,52,81
+HRM,43,414519,transient_or_regressed,2,2,20832,16000,,,False,0.7407407164573669,0.06540856510400772,-0.016345877782441676,-0.054054238833487034,2.0,0.11258906871080399,0,-0.10790689289569855,-0.11979179922491312,0.0,0,-0.10218599438667297,-0.17611240409314632,0.0,0,-0.053971659392118454,-0.10940587380900979,0.0,0,-0.1477566659450531,-0.1700829491019249,0.0,0,-0.04524587094783783,-0.07865988556295633,0.0,0,0.11503288149833679,0.010628473130054772,3.0,0,0.04928430914878845,-0.016339028836227953,3.0,1,-0.09336214512586594,-0.13272842671722174,0.0,1,-0.20527921617031097,-0.22134756110608578,0.0,0,0.06540856510400772,-0.016345877782441676,2.0,25,56,81
+HRM,44,18198,never,0,0,,,,,False,0.6419752836227417,0.016234908252954483,-0.047916961601004004,-0.07104330882430077,1.0,0.016234908252954483,0,-0.11262478679418564,-0.12072780914604664,0.0,0,-0.12465169280767441,-0.16655346285551786,0.0,0,-0.11523475497961044,-0.16366322617977858,0.0,0,-0.032260529696941376,-0.12725541461259127,0.0,0,-0.00829196348786354,-0.06069606263190508,0.0,0,-0.04083418846130371,-0.061916714534163475,0.0,0,0.01586374267935753,-0.04475804651156068,1.0,0,0.018096186220645905,-0.0499706503469497,1.0,0,0.07020419090986252,-0.012884903699159622,3.0,0,0.016234908252954483,-0.047916961601004004,1.0,26,55,81
+HRM,45,273322,flaky_then_stable,5,3,13020,10000,26040,20000,True,1.0,-0.23042401671409607,-0.2548005022108555,-0.27316083014011383,0.0,0.0,0,-0.09165729582309723,-0.10871345363557339,0.0,0,-0.05470428988337517,-0.1534811588935554,0.0,0,-0.11101587116718292,-0.15851191338151693,0.0,0,-0.12224595993757248,-0.14160047750920057,0.0,1,-0.15392255783081055,-0.19679834134876728,0.0,1,-0.17096614837646484,-0.26135599985718727,0.0,1,-0.15168538689613342,-0.18850693851709366,0.0,1,-0.14738763868808746,-0.15940989181399345,0.0,0,-0.000522698275744915,-0.05772980221081525,0.0,1,-0.23042401671409607,-0.2548005022108555,0.0,25,56,81
+HRM,46,308136,never,0,0,,,,,False,0.5802469253540039,0.06609658896923065,-0.013299722457304597,-0.04814209323376417,2.0,0.12633198499679565,0,-0.10115837305784225,-0.10855703055858612,0.0,0,-0.06649649888277054,-0.11969687882810831,0.0,0,-0.12415221333503723,-0.16829665005207062,0.0,0,0.052855413407087326,-0.07308309804648161,1.0,0,-0.05200894922018051,-0.06402593804523349,0.0,0,-0.009578708559274673,-0.04711902793496847,0.0,0,-0.006147616542875767,-0.043172132573090494,0.0,0,-0.06595639884471893,-0.0731706228107214,0.0,0,-0.04535192996263504,-0.05884601455181837,0.0,0,0.06609658896923065,-0.013299722457304597,2.0,26,55,81
+HRM,47,186015,flaky_then_stable,7,5,5208,4000,20832,16000,True,1.0,-0.18855838477611542,-0.22415492497384548,-0.23936858028173447,0.0,0.0,0,-0.08886495232582092,-0.10095743089914322,0.0,1,-0.2122034728527069,-0.23478780314326286,0.0,0,-0.11589555442333221,-0.1496059698984027,0.0,1,-0.16028094291687012,-0.25092738680541515,0.0,1,-0.18479326367378235,-0.21524171717464924,0.0,1,-0.20324242115020752,-0.2406210396438837,0.0,0,0.042976051568984985,-0.020120785804465413,2.0,1,-0.06397520750761032,-0.1336366105824709,0.0,1,-0.14453516900539398,-0.21125951781868935,0.0,1,-0.18855838477611542,-0.22415492497384548,0.0,26,55,81
+HRM,48,221508,stable_learned,8,1,7812,6000,7812,6000,True,1.0,-0.15657763183116913,-0.1808254774659872,-0.1909174770116806,0.0,0.0,0,-0.0843835324048996,-0.10528142936527729,0.0,0,-0.18995267152786255,-0.21291128918528557,0.0,1,-0.19743287563323975,-0.23351897485554218,0.0,1,-0.19875383377075195,-0.22120092995464802,0.0,1,-0.24024248123168945,-0.26890288293361664,0.0,1,-0.12022670358419418,-0.2082744287326932,0.0,1,-0.17581701278686523,-0.2059245239943266,0.0,1,-0.09893756359815598,-0.16699033323675394,0.0,1,-0.20890413224697113,-0.26797126792371273,0.0,1,-0.15657763183116913,-0.1808254774659872,0.0,27,54,81
+HRM,49,414006,transient_or_regressed,1,2,18228,14000,,,False,0.7654321193695068,0.009977901354432106,-0.06836370308883488,-0.09826958924531937,1.0,0.009977901354432106,0,-0.08667013049125671,-0.10853902250528336,0.0,0,-0.07985217869281769,-0.15171733126044273,0.0,0,-0.12157904356718063,-0.1542314076796174,0.0,0,-0.16834817826747894,-0.18346075154840946,0.0,0,-0.04796099662780762,-0.10831537377089262,0.0,0,-0.07143959403038025,-0.09460378810763359,0.0,1,-0.1376708745956421,-0.15731373243033886,0.0,0,-0.04779120907187462,-0.09170553972944617,0.0,0,-0.03292907774448395,-0.0617291359230876,0.0,0,0.009977901354432106,-0.06836370308883488,1.0,25,56,81
+HRM,50,210130,never,0,0,,,,,False,0.5555555820465088,0.0060703568160533905,-0.040714167058467865,-0.06365192029625177,1.0,0.0060703568160533905,0,-0.09921177476644516,-0.12798712123185396,0.0,0,-0.13403964042663574,-0.17106143198907375,0.0,0,-0.050841398537158966,-0.12227499112486839,0.0,0,-0.11832043528556824,-0.1445432361215353,0.0,0,-0.00012460185098461807,-0.09120123067623354,0.0,0,0.014515193179249763,-0.03955268987920135,2.0,0,0.06395789235830307,0.005135840328875929,5.0,0,-0.004897018428891897,-0.02688774431589991,0.0,0,0.08369122445583344,-0.004539337707683444,3.0,0,0.0060703568160533905,-0.040714167058467865,1.0,25,56,81
+HRM,51,158555,never,0,0,,,,,False,0.7037037014961243,0.0246457289904356,-0.03752725524827838,-0.0677984356880188,2.0,0.046830618754029274,0,-0.06161827594041824,-0.11156867165118456,0.0,0,-0.15902435779571533,-0.1735316775739193,0.0,0,-0.12311866879463196,-0.1809122934937477,0.0,0,-0.05506949871778488,-0.11275857966393232,0.0,0,-0.021672992035746574,-0.07205195375718176,0.0,0,0.03407658636569977,-0.029685518704354763,1.0,0,0.04017072916030884,-0.028020698809996247,2.0,0,0.06111929938197136,0.004496073117479682,4.0,0,0.06167502701282501,-0.040556489024311304,2.0,0,0.0246457289904356,-0.03752725524827838,2.0,26,55,81
+HRM,52,372450,never,0,0,,,,,False,0.604938268661499,0.09787671267986298,-0.01156012156570796,-0.04013026412576437,1.0,0.09787671267986298,0,-0.10520179569721222,-0.12293535377830267,0.0,0,-0.1569126397371292,-0.19414063729345798,0.0,0,-0.10227622091770172,-0.1584289651364088,0.0,0,0.012064633890986443,-0.11241288692690432,1.0,0,0.02491665445268154,-0.05693170998711139,1.0,0,0.024158401414752007,-0.044426034728530794,2.0,0,0.04921865463256836,-0.03257214976474643,1.0,0,-0.005600960459560156,-0.047875242482405156,0.0,0,-0.00044380532926879823,-0.04124957996464218,0.0,0,0.09787671267986298,-0.01156012156570796,1.0,26,55,81
+HRM,53,222678,transient_or_regressed,1,2,20832,16000,,,False,0.7160493731498718,0.049877338111400604,-0.03834532597102225,-0.07232430577278137,1.0,0.049877338111400604,0,-0.1075349673628807,-0.12140470556914806,0.0,0,-0.13927637040615082,-0.17716909758746624,0.0,0,-0.09453603625297546,-0.1560121327638626,0.0,0,-0.07915645837783813,-0.14427324384450912,0.0,0,-0.026500802487134933,-0.08538103895261884,0.0,0,-0.11089155077934265,-0.11771781090646982,0.0,0,-0.006166668608784676,-0.024777927668765187,0.0,1,-0.010106218047440052,-0.13065115420613438,0.0,0,-0.044871196150779724,-0.07071761740371585,0.0,0,0.049877338111400604,-0.03834532597102225,1.0,24,57,81
+HRM,54,354892,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.21496820449829102,-0.23454685881733894,-0.24618512392044067,0.0,0.0,0,-0.1212925910949707,-0.12723589409142733,0.0,0,-0.16924236714839935,-0.18309115059673786,0.0,0,-0.039184316992759705,-0.09060181258246303,0.0,0,-0.01914903149008751,-0.04903936665505171,0.0,0,0.03282104432582855,-0.021519244881346822,2.0,1,-0.16282951831817627,-0.23148411512374878,0.0,1,-0.06576642394065857,-0.14289151038974524,0.0,1,-0.12527939677238464,-0.1517292559146881,0.0,1,-0.07732941955327988,-0.17818669509142637,0.0,1,-0.21496820449829102,-0.23454685881733894,0.0,26,55,81
+HRM,55,375675,never,0,0,,,,,False,0.6790123581886292,0.06044427677989006,0.011979118222370744,-0.019185049924999475,4.0,0.17257314547896385,0,-0.09545765817165375,-0.12303825281560421,0.0,0,-0.007334903813898563,-0.14821830403525382,0.0,0,-0.12105832993984222,-0.1538536101579666,0.0,0,-0.16381818056106567,-0.1819623038172722,0.0,0,-0.041047386825084686,-0.0878369128331542,0.0,0,-0.023332899436354637,-0.06011031079106033,0.0,0,0.021460294723510742,-0.023794416687451303,1.0,0,-0.01096675731241703,-0.027069888077676296,0.0,0,-0.07020939886569977,-0.08080397732555866,0.0,0,0.06044427677989006,0.011979118222370744,4.0,24,57,81
+HRM,56,320237,flaky_then_stable,3,5,13020,10000,26040,20000,True,1.0,-0.025996146723628044,-0.0694695224519819,-0.0844543557614088,0.0,0.0,0,-0.07513535022735596,-0.10713670682162046,0.0,0,-0.13233204185962677,-0.16318566538393497,0.0,0,-0.1501556932926178,-0.1649302951991558,0.0,0,-0.00018625939264893532,-0.11394236172782257,0.0,1,-0.02609112672507763,-0.09093799139373004,0.0,0,-0.013295969925820827,-0.041359489667229354,0.0,0,0.031069932505488396,-0.01465619046939537,4.0,1,-0.03360595926642418,-0.05964365182444453,0.0,0,0.009603197686374187,-0.03777688246918842,1.0,1,-0.025996146723628044,-0.0694695224519819,0.0,25,56,81
+HRM,57,69134,transient_or_regressed,3,4,15624,12000,,,False,0.8148148059844971,0.01897527277469635,-0.020526633481495082,-0.04195819655433297,2.0,0.02617732807993889,0,-0.10028977692127228,-0.10589350387454033,0.0,0,-0.15474966168403625,-0.17472624592483044,0.0,0,-0.1600470393896103,-0.19226985983550549,0.0,0,-0.07802964746952057,-0.12131576053798199,0.0,0,0.018257878720760345,-0.06147167831659317,1.0,1,-0.21024808287620544,-0.26777537912130356,0.0,1,-0.0037691453471779823,-0.07128253346309066,0.0,0,-0.012917617335915565,-0.04856008384376764,0.0,1,-0.05716407299041748,-0.09968962986022234,0.0,0,0.01897527277469635,-0.020526633481495082,2.0,26,55,81
+HRM,58,97421,never,0,0,,,,,False,0.6913580298423767,0.07237684726715088,-0.033830913715064526,-0.053738780319690704,1.0,0.07237684726715088,0,-0.1173408254981041,-0.13037600461393595,0.0,0,-0.08580341935157776,-0.15347086545079947,0.0,0,-0.09086237847805023,-0.14109651651233435,0.0,0,-0.025177812203764915,-0.08516571507789195,0.0,0,0.01469729095697403,-0.028958754963241518,1.0,0,0.0862128883600235,-0.02096082374919206,1.0,0,-0.03684147447347641,-0.06151485210284591,0.0,0,0.11590488255023956,-0.00735072555107763,2.0,0,-0.006562014110386372,-0.041837776196189225,0.0,0,0.07237684726715088,-0.033830913715064526,1.0,25,56,81
+HRM,59,330273,flaky_then_stable,6,5,7812,6000,20832,16000,True,1.0,-0.14447817206382751,-0.22292372025549412,-0.26092487946152687,0.0,0.0,0,-0.09574782848358154,-0.11814787518233061,0.0,0,-0.14695708453655243,-0.1687717568129301,0.0,1,-0.22859971225261688,-0.2737988103181124,0.0,0,-0.09173586219549179,-0.13613286893814802,0.0,1,-0.19183388352394104,-0.23355633206665516,0.0,1,-0.12742072343826294,-0.1902554016560316,0.0,0,0.0689731016755104,-0.03263261076062918,1.0,1,-0.16551895439624786,-0.1949943359941244,0.0,1,-0.1786089390516281,-0.2067596074193716,0.0,1,-0.14447817206382751,-0.22292372025549412,0.0,28,53,81
+HRM,60,254864,never,0,0,,,,,False,0.5555555820465088,0.0702439546585083,-0.01065533037763089,-0.026877406053245068,1.0,0.0702439546585083,0,-0.11245755851268768,-0.1231121327728033,0.0,0,-0.07774825394153595,-0.1719502341002226,0.0,0,-0.15252900123596191,-0.1745251566171646,0.0,0,-0.010417068377137184,-0.05505322874523699,0.0,0,0.008516037836670876,-0.03067472588736564,1.0,0,-0.009788112714886665,-0.05837133317254484,0.0,0,-0.014803024008870125,-0.04397499142214656,0.0,0,-0.016803419217467308,-0.053793280152603984,0.0,0,-0.014094429090619087,-0.06801686459220946,0.0,0,0.0702439546585083,-0.01065533037763089,1.0,27,54,81
+HRM,61,69477,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.1204625591635704,-0.18113520462065935,-0.21507825702428818,0.0,0.0,0,-0.10878153890371323,-0.12494845874607563,0.0,0,-0.1436888873577118,-0.17998898774385452,0.0,0,-0.05552127957344055,-0.14508849382400513,0.0,0,0.01434895396232605,-0.08059815596789122,1.0,0,-0.00621369993314147,-0.05066612939117476,0.0,1,-0.14671581983566284,-0.22466827929019928,0.0,1,-0.0051890588365495205,-0.05920296552358195,0.0,1,0.03300897777080536,-0.09699751622974873,2.0,1,-0.14145232737064362,-0.1752503663301468,0.0,1,-0.1204625591635704,-0.18113520462065935,0.0,25,56,81
+HRM,62,21973,transient_or_regressed,4,2,15624,12000,,,False,0.6666666865348816,0.056236632168293,-0.018862557015381753,-0.04509689752012491,2.0,0.06130699906498194,0,-0.07293032854795456,-0.1081262081861496,0.0,0,-0.12480445206165314,-0.17200267687439919,0.0,0,-0.0898868590593338,-0.123998430557549,0.0,0,-0.05724659189581871,-0.10467419307678938,0.0,0,-0.02613621950149536,-0.0649869404733181,0.0,1,-0.22533726692199707,-0.2701130621135235,0.0,1,-0.06734388321638107,-0.12874582782387733,0.0,1,-0.024979252368211746,-0.1308642285875976,0.0,1,-0.19693517684936523,-0.23955047130584717,0.0,0,0.056236632168293,-0.018862557015381753,2.0,25,56,81
+HRM,63,203458,stable_learned,1,1,26040,20000,26040,20000,True,1.0,-0.15206308662891388,-0.1917310692369938,-0.22439974546432495,0.0,0.0,0,-0.09753730148077011,-0.10421207547187805,0.0,0,-0.12378185987472534,-0.1787186563014984,0.0,0,-0.052622269839048386,-0.11676319921389222,0.0,0,-0.012828687205910683,-0.07312459568493068,0.0,0,0.07164709270000458,-0.020792423514649272,1.0,0,0.05289141833782196,-0.01853896153625101,2.0,0,0.03493685647845268,-0.00389753336639842,3.0,0,0.0021564490161836147,-0.04009626738115912,1.0,0,0.05566359683871269,-0.0336300702765584,1.0,1,-0.15206308662891388,-0.1917310692369938,0.0,27,54,81
+HRM,64,174467,flaky_then_stable,5,3,13020,10000,18228,14000,True,1.0,-0.13239604234695435,-0.16992891766130924,-0.18540839105844498,0.0,0.0,0,-0.08548949658870697,-0.10758928023278713,0.0,0,-0.12990422546863556,-0.14337888918817043,0.0,0,-0.11766879260540009,-0.15531804040074348,0.0,0,-0.06383852660655975,-0.10489582736045122,0.0,1,-0.1502472162246704,-0.1719845738261938,0.0,0,0.0418587252497673,-0.0024107879726216197,2.0,1,-0.1302223801612854,-0.16516155563294888,0.0,1,-0.07361232489347458,-0.13582374062389135,0.0,1,-0.0850546658039093,-0.20179999060928822,0.0,1,-0.13239604234695435,-0.16992891766130924,0.0,26,55,81
+HRM,65,44347,stable_learned,1,1,26040,20000,26040,20000,True,1.0,-0.05137450248003006,-0.08427289687097073,-0.0962938480079174,0.0,0.0,0,-0.09533029794692993,-0.1082623777911067,0.0,0,-0.09249173849821091,-0.17171963211148977,0.0,0,-0.08017567545175552,-0.1806363882496953,0.0,0,-0.1371016502380371,-0.15950841270387173,0.0,0,-0.0641121044754982,-0.09824024420231581,0.0,0,0.018490973860025406,-0.039421986788511276,1.0,0,0.004101573955267668,-0.03696399257751182,1.0,0,0.005094541702419519,-0.0400511352927424,1.0,0,0.08646073192358017,-0.026537652302067727,1.0,1,-0.05137450248003006,-0.08427289687097073,0.0,26,55,81
+HRM,66,352714,transient_or_regressed,6,2,7812,6000,,,False,0.5802469253540039,0.05378980562090874,0.006985474785324186,-0.01701565587427467,5.0,0.12843364709988236,0,-0.09041395783424377,-0.10722232144325972,0.0,0,-0.12151768803596497,-0.18163775093853474,0.0,1,-0.19649504125118256,-0.2292026299983263,0.0,1,-0.16107302904129028,-0.2692473344504833,0.0,1,-0.21267643570899963,-0.25001212768256664,0.0,1,-0.2480267584323883,-0.29653872177004814,0.0,1,-0.0783798098564148,-0.1896719466894865,0.0,1,-0.10431613028049469,-0.15876659005880356,0.0,0,0.05546954274177551,-0.0185197259997949,1.0,0,0.05378980562090874,0.006985474785324186,5.0,27,54,81
+HRM,67,293982,flaky_then_stable,5,3,13020,10000,23436,18000,True,1.0,-0.1628555804491043,-0.2055546473711729,-0.2430240362882614,0.0,0.0,0,-0.11611655354499817,-0.12131046410650015,0.0,0,-0.15822722017765045,-0.1856842301785946,0.0,0,-0.10952799767255783,-0.14647315442562103,0.0,0,-0.01286296546459198,-0.09034749772399664,0.0,1,-0.07013614475727081,-0.12025865726172924,0.0,1,-0.2106354832649231,-0.2263799123466015,0.0,1,-0.14366689324378967,-0.16764303296804428,0.0,0,0.028247162699699402,-0.01584658323554322,2.0,1,-0.1655392199754715,-0.2260780856013298,0.0,1,-0.1628555804491043,-0.2055546473711729,0.0,25,56,81
+HRM,68,3396,never,0,0,,,,,False,0.48148149251937866,-0.03499455377459526,-0.056213518138974905,-0.07056169770658016,0.0,0.0,0,-0.07025796175003052,-0.12202704418450594,0.0,0,-0.09222247451543808,-0.1457802029326558,0.0,0,-0.0676996037364006,-0.12759554479271173,0.0,0,-0.03811568021774292,-0.08571697399020195,0.0,0,0.05852213129401207,0.004014157064375468,5.0,0,0.051188886165618896,0.006427596556022763,5.0,0,0.058795392513275146,-0.01806895345544035,3.0,0,0.060008496046066284,-0.002214959662524052,4.0,0,-0.013023993000388145,-0.06563451210968196,0.0,0,-0.03499455377459526,-0.056213518138974905,0.0,25,56,81
+HRM,69,16908,flaky_then_stable,8,3,5208,4000,10416,8000,True,1.0,-0.12794345617294312,-0.19092708081007004,-0.22050897777080536,0.0,0.0,0,-0.05812311917543411,-0.10200207773596048,0.0,1,-0.15649670362472534,-0.1655881255865097,0.0,0,-0.030796557664871216,-0.1514659859240055,0.0,1,-0.22675207257270813,-0.2815363369882107,0.0,1,-0.17859528958797455,-0.23069622553884983,0.0,1,-0.16334208846092224,-0.24833497405052185,0.0,1,-0.03796682506799698,-0.17991058435291052,0.0,1,-0.09015589207410812,-0.13112569320946932,0.0,1,-0.22254078090190887,-0.25166905112564564,0.0,1,-0.12794345617294312,-0.19092708081007004,0.0,26,55,81
+HRM,70,20527,stable_learned,2,1,23436,18000,23436,18000,True,1.0,-0.2251078188419342,-0.23532993905246258,-0.24225867167115211,0.0,0.0,0,-0.08166171610355377,-0.1110903499647975,0.0,0,-0.12015607208013535,-0.14283294510096312,0.0,0,-0.12082371115684509,-0.16387461870908737,0.0,0,-0.1060250997543335,-0.15465099550783634,0.0,0,-0.016017192974686623,-0.06443910021334887,0.0,0,0.01834646239876747,-0.04423019150272012,1.0,0,-0.016902748495340347,-0.06338506797328591,0.0,0,-0.06390049308538437,-0.07453863322734833,0.0,1,-0.08227287977933884,-0.20706056337803602,0.0,1,-0.2251078188419342,-0.23532993905246258,0.0,23,58,81
+HRM,71,384517,transient_or_regressed,1,2,18228,14000,,,False,0.6666666865348816,0.023445945233106613,0.0055934472038643435,-0.004664855863666162,6.0,0.06707225239370018,0,-0.10190968215465546,-0.11576327309012413,0.0,0,-0.15367907285690308,-0.1752918567508459,0.0,0,-0.11451651901006699,-0.18129203002899885,0.0,0,-0.07835979014635086,-0.1370390309020877,0.0,0,-0.050747282803058624,-0.08704893151298165,0.0,0,-0.010242318734526634,-0.052114992518909276,0.0,1,-0.09218861162662506,-0.16483227536082268,0.0,0,0.05779339000582695,-0.030136798712192103,2.0,0,0.0022349099162966013,-0.06976295073400252,1.0,0,0.023445945233106613,0.0055934472038643435,6.0,26,55,81
+HRM,72,215514,flaky_then_stable,5,5,10416,8000,23436,18000,True,1.0,-0.18717466294765472,-0.20732050016522408,-0.21756966784596443,0.0,0.0,0,-0.09690603613853455,-0.12047691363841295,0.0,0,-0.040353938937187195,-0.16491549275815487,0.0,0,-0.12382633984088898,-0.14481466636061668,0.0,1,-0.02131059393286705,-0.04308576416224241,0.0,0,0.12021924555301666,0.02078354940749705,4.0,1,-0.17053215205669403,-0.24986673146486282,0.0,1,-0.10568724572658539,-0.13118094019591808,0.0,0,0.1162983775138855,0.007643289129191544,4.0,1,-0.20333963632583618,-0.21645880676805973,0.0,1,-0.18717466294765472,-0.20732050016522408,0.0,27,54,81
+HRM,73,339970,always,10,0,2604,2000,2604,2000,True,1.0,-0.1688511222600937,-0.21791015937924385,-0.2350173220038414,0.0,0.0,1,-0.13894087076187134,-0.22223976626992226,0.0,1,-0.2235153168439865,-0.26462721079587936,0.0,1,-0.14454472064971924,-0.2419634945690632,0.0,1,-0.24621491134166718,-0.30923383869230747,0.0,1,-0.2277621328830719,-0.2541695889085531,0.0,1,-0.30825090408325195,-0.3289545476436615,0.0,1,-0.0970049723982811,-0.17974962573498487,0.0,1,0.019008098170161247,-0.08370182395447046,1.0,1,-0.05353675037622452,-0.21517594438046217,0.0,1,-0.1688511222600937,-0.21791015937924385,0.0,34,47,81
+HRM,74,261995,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.10175606608390808,-0.1583832437172532,-0.18737824261188507,0.0,0.0,0,-0.12153434753417969,-0.13008894212543964,0.0,0,-0.06060854345560074,-0.1611060881987214,0.0,0,-0.07820869982242584,-0.11677187588065863,0.0,0,0.007718152832239866,-0.05071772896917537,1.0,1,-0.12789368629455566,-0.21879497542977333,0.0,1,-0.18441087007522583,-0.2249552421271801,0.0,1,-0.053949639201164246,-0.11739380098879337,0.0,1,-0.09931956231594086,-0.14408675022423267,0.0,1,-0.16257533431053162,-0.2008624840527773,0.0,1,-0.10175606608390808,-0.1583832437172532,0.0,26,55,81
+HRM,75,52628,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.13874110579490662,-0.19667387194931507,-0.23633918911218643,0.0,0.0,0,-0.10898439586162567,-0.1246098754927516,0.0,0,-0.08507703244686127,-0.15872198715806007,0.0,0,-0.100139319896698,-0.1297505907714367,0.0,0,0.04404421150684357,-0.09714841190725565,1.0,0,0.037700653076171875,-0.017136561044026166,1.0,0,0.000842068693600595,-0.01881025359398336,2.0,1,-0.10849840939044952,-0.1873207688331604,0.0,1,-0.04788167402148247,-0.11033224733546376,0.0,1,-0.17876046895980835,-0.2125169076025486,0.0,1,-0.13874110579490662,-0.19667387194931507,0.0,26,55,81
+HRM,76,409934,stable_learned,8,1,7812,6000,7812,6000,True,1.0,-0.1461394727230072,-0.19881482236087322,-0.21963705122470856,0.0,0.0,0,-0.1076035425066948,-0.11742637772113085,0.0,0,-0.10179399698972702,-0.16400444600731134,0.0,1,-0.2520173192024231,-0.2807216979563236,0.0,1,-0.18581527471542358,-0.22404972463846207,0.0,1,-0.19624987244606018,-0.24144703522324562,0.0,1,-0.0925653800368309,-0.19390494842082262,0.0,1,-0.08922481536865234,-0.14166832249611616,0.0,1,-0.12955597043037415,-0.14312156103551388,0.0,1,-0.23073139786720276,-0.23423185013234615,0.0,1,-0.1461394727230072,-0.19881482236087322,0.0,25,56,81
+HRM,77,412654,never,0,0,,,,,False,0.7407407164573669,0.013321003876626492,-0.025454669987084344,-0.04932592902332544,2.0,0.023358753882348537,0,-0.09637199342250824,-0.1209989758208394,0.0,0,-0.15256991982460022,-0.16206654533743858,0.0,0,-0.13490791618824005,-0.18308980762958527,0.0,0,-0.01451355405151844,-0.13994680368341506,0.0,0,-0.03676346689462662,-0.06429988704621792,0.0,0,0.29950064420700073,-0.0032962560653686523,1.0,0,-0.015197228640317917,-0.050633082166314125,0.0,0,-0.010375483892858028,-0.05452435545157641,0.0,0,-0.004130590707063675,-0.07079463079571724,0.0,0,0.013321003876626492,-0.025454669987084344,2.0,24,57,81
+HRM,78,167007,flaky_then_stable,3,5,13020,10000,26040,20000,True,1.0,-0.14865854382514954,-0.1630553975701332,-0.1719917729496956,0.0,0.0,0,-0.10838057845830917,-0.11996885761618614,0.0,0,-0.10367973893880844,-0.15196441020816565,0.0,0,-0.12364670634269714,-0.1447877138853073,0.0,0,-0.0672464519739151,-0.0847141481935978,0.0,1,-0.1743450164794922,-0.2222774364054203,0.0,0,0.05124282464385033,0.011373260815162212,6.0,1,0.0009899111464619637,-0.07591525826137513,1.0,0,0.07000786066055298,-0.03154705744236708,1.0,0,0.02558794990181923,-0.009258214617148042,4.0,1,-0.14865854382514954,-0.1630553975701332,0.0,25,56,81
+HRM,79,287561,never,0,0,,,,,False,0.5308641791343689,-0.006097759120166302,-0.022626504418440163,-0.0353393005207181,0.0,0.0,0,-0.10110758990049362,-0.12043929193168879,0.0,0,-0.09140848368406296,-0.12841508071869612,0.0,0,-0.07894784212112427,-0.1541511695832014,0.0,0,0.015631146728992462,-0.10161985736340284,1.0,0,0.012466642074286938,-0.026497843602555804,3.0,0,0.06905756890773773,0.01351450348738581,6.0,0,0.047208111733198166,0.013022979255765676,5.0,0,0.027557333931326866,-0.006242328614462167,2.0,0,0.028765041381120682,-0.015157101181102917,2.0,0,-0.006097759120166302,-0.022626504418440163,0.0,25,56,81
+HRM,80,385776,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.1266283541917801,-0.16892217099666595,-0.18008419126272202,0.0,0.0,0,-0.10753427445888519,-0.12235865369439125,0.0,0,-0.13483139872550964,-0.16638988628983498,0.0,0,-0.1038869321346283,-0.13669436797499657,0.0,0,-0.06594051420688629,-0.14565834775567055,0.0,1,-0.19645468890666962,-0.21667773462831974,0.0,1,-0.15499047935009003,-0.22145649045705795,0.0,1,-0.11977706849575043,-0.1854443084448576,0.0,1,0.00973413698375225,-0.14868867420591414,1.0,1,-0.149461030960083,-0.17201309464871883,0.0,1,-0.1266283541917801,-0.16892217099666595,0.0,26,55,81
+HRM,81,215458,flaky_then_stable,2,3,10416,8000,26040,20000,True,1.0,-0.10166774690151215,-0.13816008158028126,-0.15438005328178406,0.0,0.0,0,-0.10137133300304413,-0.11158208176493645,0.0,0,-0.11938758194446564,-0.14476736821234226,0.0,0,-0.09888924658298492,-0.16724861226975918,0.0,1,-0.17587772011756897,-0.23108833841979504,0.0,0,-0.04220571368932724,-0.0688941078260541,0.0,0,0.009074798785150051,-0.030041917343623936,2.0,0,0.01664038375020027,-0.027118313359096646,1.0,0,0.026004288345575333,-0.03341063903644681,1.0,0,0.04801099747419357,-0.02283513406291604,2.0,1,-0.10166774690151215,-0.13816008158028126,0.0,25,56,81
+HRM,82,268094,flaky_then_stable,5,3,13020,10000,18228,14000,True,1.0,-0.1975395530462265,-0.2094011139124632,-0.21842292696237564,0.0,0.0,0,-0.10464023798704147,-0.12124027218669653,0.0,0,-0.09312215447425842,-0.18272371962666512,0.0,0,-0.14162196218967438,-0.1756109930574894,0.0,0,0.05744488164782524,-0.08065328327938914,1.0,1,-0.1332908272743225,-0.18268979154527187,0.0,0,0.00421142065897584,-0.03118549770442769,2.0,1,-0.020717579871416092,-0.1691389330662787,0.0,1,-0.10124551504850388,-0.1484381491318345,0.0,1,-0.17334723472595215,-0.20649681612849236,0.0,1,-0.1975395530462265,-0.2094011139124632,0.0,27,54,81
+HRM,83,209914,flaky_then_stable,5,5,10416,8000,23436,18000,True,1.0,-0.13650116324424744,-0.1690891906619072,-0.1899857297539711,0.0,0.0,0,-0.08536144345998764,-0.10360536444932222,0.0,0,-0.1208895668387413,-0.18023486342281103,0.0,0,-0.06441247463226318,-0.1441410444676876,0.0,1,-0.038433440029621124,-0.07331196218729019,0.0,1,-0.17171452939510345,-0.19920833222568035,0.0,0,0.03195942938327789,-0.024822675331961364,2.0,1,-0.12354369461536407,-0.15266717411577702,0.0,0,0.014324597083032131,-0.02382014598697424,2.0,1,-0.0856877863407135,-0.1831379346549511,0.0,1,-0.13650116324424744,-0.1690891906619072,0.0,24,57,81
+HRM,84,21050,transient_or_regressed,2,4,10416,8000,,,False,0.6419752836227417,0.00838937796652317,-0.023495512607041746,-0.04478210769593716,3.0,0.01964237866923213,0,-0.044277504086494446,-0.09663587249815464,0.0,0,-0.1396823525428772,-0.17744995653629303,0.0,0,-0.12175723165273666,-0.14940922614187002,0.0,1,-0.2438763827085495,-0.251328581944108,0.0,0,0.02230096608400345,-0.044571670703589916,2.0,0,0.04078914597630501,-0.0032454947067890316,4.0,0,0.028232555836439133,-0.03456449694931507,1.0,1,-0.09268753230571747,-0.12214596848934889,0.0,0,0.043816082179546356,-0.03010126599110663,1.0,0,0.00838937796652317,-0.023495512607041746,3.0,28,53,81
+HRM,85,212675,flaky_then_stable,6,5,7812,6000,18228,14000,True,1.0,-0.17270416021347046,-0.21733968891203403,-0.23801613971590996,0.0,0.0,0,-0.09946343302726746,-0.12139117997139692,0.0,0,-0.10046467185020447,-0.16872882097959518,0.0,1,-0.24866017699241638,-0.261909369379282,0.0,0,-0.08158718049526215,-0.1424483461305499,0.0,1,-0.1168135404586792,-0.20052232220768929,0.0,0,0.01551643293350935,-0.020419653083081357,2.0,1,-0.08376267552375793,-0.18198201525956392,0.0,1,-0.08464043587446213,-0.16489313077181578,0.0,1,-0.1316813975572586,-0.23831717297434807,0.0,1,-0.17270416021347046,-0.21733968891203403,0.0,26,55,81
+HRM,86,49529,flaky_then_stable,5,3,13020,10000,23436,18000,True,1.0,-0.1682024896144867,-0.20126814022660255,-0.22077791392803192,0.0,0.0,0,-0.109476238489151,-0.11924502160400152,0.0,0,-0.12745092809200287,-0.15273123234510422,0.0,0,-0.06597045809030533,-0.128727906383574,0.0,0,-0.055004604160785675,-0.0924994507804513,0.0,1,-0.17970439791679382,-0.2124821301549673,0.0,1,-0.23813758790493011,-0.26247395388782024,0.0,1,-0.08536534756422043,-0.16141190845519304,0.0,0,0.020627591758966446,-0.05493051279336214,1.0,1,-0.10943794250488281,-0.1884098146110773,0.0,1,-0.1682024896144867,-0.20126814022660255,0.0,24,57,81
+HRM,87,213046,never,0,0,,,,,False,0.604938268661499,0.08406361937522888,0.031643181762774475,-0.00317335277213715,6.0,0.29604098352137953,0,-0.0842461809515953,-0.1152022946625948,0.0,0,-0.14722461998462677,-0.18199089728295803,0.0,0,-0.12557613849639893,-0.15791724994778633,0.0,0,-0.07645583152770996,-0.1626636702567339,0.0,0,-0.07290153950452805,-0.09602661617100239,0.0,0,-0.011576402932405472,-0.06144758313894272,0.0,0,0.03660346940159798,-0.037388483295217156,2.0,0,-0.06590640544891357,-0.10590603947639465,0.0,0,-0.0582808256149292,-0.0786764589138329,0.0,0,0.08406361937522888,0.031643181762774475,6.0,22,59,81
+HRM,88,43199,never,0,0,,,,,False,0.654321014881134,-0.010590290650725365,-0.05510204122401774,-0.06971409823745489,0.0,0.0,0,-0.10470564663410187,-0.11867236066609621,0.0,0,-0.1270609050989151,-0.18827624432742596,0.0,0,-0.10880934447050095,-0.15713228192180395,0.0,0,-0.1624673455953598,-0.1710633486509323,0.0,0,-0.007393011823296547,-0.06961931218393147,0.0,0,-0.00818178616464138,-0.053385831182822585,0.0,0,-0.04514508694410324,-0.08028050139546394,0.0,0,-0.008183536119759083,-0.05269589030649513,0.0,0,-0.026822715997695923,-0.05086867278441787,0.0,0,-0.010590290650725365,-0.05510204122401774,0.0,26,55,81
+HRM,89,33953,flaky_then_stable,2,3,20832,16000,26040,20000,True,1.0,-0.0034764527808874846,-0.08683538305922411,-0.11478960514068604,0.0,0.0,0,-0.07132087647914886,-0.11203898582607508,0.0,0,-0.14287813007831573,-0.17425881139934063,0.0,0,-0.11037494242191315,-0.15423190779983997,0.0,0,-0.034775521606206894,-0.13934533903375268,0.0,0,-0.025637339800596237,-0.08665754785761237,0.0,0,0.004271009471267462,-0.04554880078649148,1.0,0,0.014210289344191551,-0.020328281680122018,1.0,1,-0.12564708292484283,-0.1803058423101902,0.0,0,-0.015482686460018158,-0.06158867361955345,0.0,1,-0.0034764527808874846,-0.08683538305922411,0.0,23,58,81
+HRM,90,375972,transient_or_regressed,2,4,18228,14000,,,False,0.6666666865348816,0.019948579370975494,-0.004797628498636186,-0.016925638541579247,2.0,0.035263948142528534,0,-0.09104552865028381,-0.11398504301905632,0.0,0,-0.14700953662395477,-0.15683000907301903,0.0,0,-0.08999692648649216,-0.13982718531042337,0.0,0,-0.17337852716445923,-0.1802201345562935,0.0,0,-0.023146867752075195,-0.07360834488645196,0.0,0,-0.014713883399963379,-0.06574301328510046,0.0,1,-0.07589646428823471,-0.1454897029325366,0.0,0,0.021065082401037216,-0.04397852229885757,2.0,1,-0.20304875075817108,-0.22345281019806862,0.0,0,0.019948579370975494,-0.004797628498636186,2.0,23,58,81
+HRM,91,401258,flaky_then_stable,6,5,7812,6000,23436,18000,True,1.0,-0.11461317539215088,-0.20301844365894794,-0.23617121204733849,0.0,0.0,0,-0.07705952972173691,-0.11125023011118174,0.0,0,-0.08731736987829208,-0.12630854081362486,0.0,1,-0.19703368842601776,-0.2719753608107567,0.0,1,-0.17716729640960693,-0.2517898064106703,0.0,1,-0.19851091504096985,-0.21863912418484688,0.0,0,-0.0011732022976502776,-0.02964963721751701,0.0,1,-0.10220591723918915,-0.17283744178712368,0.0,0,0.028265206143260002,-0.030492036137729883,2.0,1,-0.11968769133090973,-0.15537462010979652,0.0,1,-0.11461317539215088,-0.20301844365894794,0.0,25,56,81
+HRM,92,393345,stable_learned,3,1,20832,16000,20832,16000,True,1.0,-0.1799735277891159,-0.21129877865314484,-0.22658110037446022,0.0,0.0,0,-0.110551618039608,-0.1258931178599596,0.0,0,-0.12487460672855377,-0.15443424880504608,0.0,0,-0.15612953901290894,-0.1916033774614334,0.0,0,-0.06690897792577744,-0.1115234512835741,0.0,0,-0.018010519444942474,-0.0677008933853358,0.0,0,0.054714418947696686,0.011558877245988697,3.0,0,0.03503866493701935,-0.037273469410138205,2.0,1,-0.009620919823646545,-0.1249165590852499,0.0,1,-0.15239448845386505,-0.1943833902478218,0.0,1,-0.1799735277891159,-0.21129877865314484,0.0,24,57,81
+HRM,93,234599,flaky_then_stable,2,3,20832,16000,26040,20000,True,1.0,-0.1602064073085785,-0.19778655096888542,-0.21351898834109306,0.0,0.0,0,-0.0888938307762146,-0.11143441125750542,0.0,0,-0.11047853529453278,-0.15994272753596306,0.0,0,-0.1028064638376236,-0.13657853472977877,0.0,0,-0.15981802344322205,-0.17403140477836132,0.0,0,-0.11000792682170868,-0.12973921280354261,0.0,0,-0.11292576789855957,-0.12970687448978424,0.0,0,-0.050509076565504074,-0.0683673731982708,0.0,1,-0.13301339745521545,-0.16011410020291805,0.0,0,-0.0217635165899992,-0.07338983588851988,0.0,1,-0.1602064073085785,-0.19778655096888542,0.0,24,57,81
+HRM,94,303593,flaky_then_stable,3,3,18228,14000,26040,20000,True,1.0,-0.11090092360973358,-0.16151301749050617,-0.190197192132473,0.0,0.0,0,-0.08878928422927856,-0.10503529012203217,0.0,0,-0.12223745882511139,-0.1830582246184349,0.0,0,-0.01639503613114357,-0.1784214531071484,0.0,0,-0.13754242658615112,-0.16445999220013618,0.0,0,-0.10822766274213791,-0.12627002596855164,0.0,0,-0.0913829430937767,-0.10221351869404316,0.0,1,-0.15275748074054718,-0.18020538426935673,0.0,1,-0.05406114459037781,-0.1618475429713726,0.0,0,-0.012788208201527596,-0.04460559715516865,0.0,1,-0.11090092360973358,-0.16151301749050617,0.0,26,55,81
+HRM,95,215065,flaky_then_stable,4,5,13020,10000,23436,18000,True,1.0,-0.18222805857658386,-0.23360453732311726,-0.2539188526570797,0.0,0.0,0,-0.11468885838985443,-0.12339197471737862,0.0,0,-0.09481021016836166,-0.14057997148483992,0.0,0,0.003430446609854698,-0.13772302330471575,1.0,0,-0.01365627720952034,-0.1117947013117373,0.0,1,-0.18596914410591125,-0.22889775969088078,0.0,0,0.08761300146579742,-0.03674849966773763,2.0,1,-0.12471955269575119,-0.20687429327517748,0.0,0,-0.01810361258685589,-0.06197009584866464,0.0,1,-0.15464085340499878,-0.21474852599203587,0.0,1,-0.18222805857658386,-0.23360453732311726,0.0,24,57,81
+HRM,96,24738,stable_learned,3,1,20832,16000,20832,16000,True,1.0,-0.10522373020648956,-0.17896906472742558,-0.20139915496110916,0.0,0.0,0,-0.09813150018453598,-0.1194430673494935,0.0,0,-0.1345806121826172,-0.17708951979875565,0.0,0,-0.08710560202598572,-0.15227385610342026,0.0,0,-0.08523666113615036,-0.134278510697186,0.0,0,0.0726279765367508,-0.01602130150422454,2.0,0,0.03966524079442024,-0.011215629958314821,2.0,0,-0.0035970243625342846,-0.0344269554479979,0.0,1,-0.09472396224737167,-0.1388195250183344,0.0,1,-0.06159213185310364,-0.08873886428773403,0.0,1,-0.10522373020648956,-0.17896906472742558,0.0,24,57,81
+HRM,97,328928,never,0,0,,,,,False,0.6666666865348816,-0.03319307416677475,-0.06119898334145546,-0.07457034289836884,0.0,0.0,0,-0.08866097778081894,-0.11382185574620962,0.0,0,-0.09055261313915253,-0.15436174534261227,0.0,0,-0.09403038024902344,-0.1550994422286749,0.0,0,-0.10963743180036545,-0.16235331539064646,0.0,0,-0.07186725735664368,-0.11102005187422037,0.0,0,-0.00922243483364582,-0.0634629672858864,0.0,0,0.04282399266958237,-0.06192085053771734,1.0,0,-0.03994087874889374,-0.09076444711536169,0.0,0,0.024573523551225662,-0.04625486792065203,1.0,0,-0.03319307416677475,-0.06119898334145546,0.0,25,56,81
+HRM,98,241509,never,0,0,,,,,False,0.7654321193695068,0.022637462243437767,-0.028871755581349134,-0.046701584942638874,1.0,0.022637462243437767,0,-0.1310053914785385,-0.13718230091035366,0.0,0,-0.13285426795482635,-0.15668808482587337,0.0,0,-0.08173416554927826,-0.1393538387492299,0.0,0,-0.08542899787425995,-0.1378931775689125,0.0,0,0.05605560541152954,-0.0479481749352999,2.0,0,0.029044896364212036,-0.04775336477905512,1.0,0,0.052572499960660934,-0.00926357816933887,2.0,0,-0.012604855000972748,-0.05755344917997718,0.0,0,0.017466984689235687,-0.04986605222802609,1.0,0,0.022637462243437767,-0.028871755581349134,1.0,24,57,81
+HRM,99,186024,flaky_then_stable,5,5,10416,8000,23436,18000,True,1.0,-0.06846453994512558,-0.10047802608460188,-0.11685517802834511,0.0,0.0,0,-0.04796747490763664,-0.09693707199767232,0.0,0,-0.10262145102024078,-0.15517476201057434,0.0,0,-0.08941973745822906,-0.1707031186670065,0.0,1,-0.2055378556251526,-0.26434714905917645,0.0,1,-0.19252543151378632,-0.2062663808465004,0.0,0,0.0452783964574337,0.006077458267100155,5.0,1,-0.004718210082501173,-0.04499434883473441,0.0,0,0.00095124397194013,-0.02764059142646147,1.0,1,-0.09632702171802521,-0.1724874060600996,0.0,1,-0.06846453994512558,-0.10047802608460188,0.0,25,56,81
+HRM,100,323763,never,0,0,,,,,False,0.6296296119689941,0.014819055795669556,-0.03787806138279848,-0.07006692234426737,1.0,0.014819055795669556,0,-0.08417387306690216,-0.10002304054796696,0.0,0,-0.13363054394721985,-0.16051568277180195,0.0,0,-0.053681522607803345,-0.18555020727217197,0.0,0,-0.13380299508571625,-0.19459308683872223,0.0,0,-0.023171722888946533,-0.12072127219289541,0.0,0,-0.10983826220035553,-0.12792530469596386,0.0,0,-0.03883786126971245,-0.074597277212888,0.0,0,0.014734074473381042,-0.02810417249565944,1.0,0,-0.06590304523706436,-0.08112007845193148,0.0,0,0.014819055795669556,-0.03787806138279848,1.0,24,57,81
+HRM,101,299776,never,0,0,,,,,False,0.6790123581886292,0.06737080216407776,0.02025125788350124,-0.0024572045949753374,7.0,0.18906888423953205,0,-0.07198084145784378,-0.12417325377464294,0.0,0,-0.14300310611724854,-0.19209407456219196,0.0,0,-0.13978366553783417,-0.16520243510603905,0.0,0,0.022022239863872528,-0.08065965492278337,1.0,0,0.0013722957810387015,-0.04923071696248371,1.0,0,0.007494743913412094,-0.052694708574563265,1.0,0,-0.022878732532262802,-0.05177684500813484,0.0,0,0.052504122257232666,-0.03295153228100389,2.0,0,-0.021065816283226013,-0.06522902566939592,0.0,0,0.06737080216407776,0.02025125788350124,7.0,24,57,81
+HRM,102,353011,flaky_then_stable,6,5,7812,6000,23436,18000,True,1.0,0.01386100985109806,-0.07370613212697208,-0.099849758669734,1.0,0.01386100985109806,0,-0.07824067771434784,-0.09159113094210625,0.0,0,-0.04720664024353027,-0.13050498254597187,0.0,1,-0.16765537858009338,-0.25949224829673767,0.0,1,-0.19891956448554993,-0.23768315464258194,0.0,0,0.008338830433785915,-0.0313505056547001,2.0,1,-0.2247963547706604,-0.2535901293158531,0.0,1,-0.15174295008182526,-0.18198686838150024,0.0,0,0.060941144824028015,0.019642870451207273,6.0,1,-0.05223337933421135,-0.19606406008824706,0.0,1,0.01386100985109806,-0.07370613212697208,1.0,26,55,81
+HRM,103,274707,flaky_then_stable,3,3,18228,14000,26040,20000,True,1.0,-0.16444936394691467,-0.21937327086925507,-0.24631951749324799,0.0,0.0,0,-0.11423653364181519,-0.12004128005355597,0.0,0,-0.09800373762845993,-0.15606053080409765,0.0,0,-0.17290888726711273,-0.1960100643336773,0.0,0,-0.07421648502349854,-0.14776075445115566,0.0,0,0.04682917892932892,-0.032152044237591326,2.0,0,0.07792419195175171,-0.05844520963728428,1.0,1,-0.0025566311087459326,-0.10556047988939099,0.0,1,-0.06562010943889618,-0.13887231331318617,0.0,0,-0.07154647260904312,-0.0939935427159071,0.0,1,-0.16444936394691467,-0.21937327086925507,0.0,23,58,81
+HRM,104,63096,transient_or_regressed,1,2,18228,14000,,,False,0.5308641791343689,-2.1544285118579865e-05,-0.06466713605914265,-0.09137691743671894,0.0,0.0,0,-0.11063554883003235,-0.13006150256842375,0.0,0,-0.14497563242912292,-0.16473953612148762,0.0,0,-0.010112248361110687,-0.13297548796981573,0.0,0,-0.1668534129858017,-0.1792683806270361,0.0,0,-0.08643998950719833,-0.1292272573336959,0.0,0,-0.023329665884375572,-0.07284386409446597,0.0,1,-0.14296329021453857,-0.16431903839111328,0.0,0,0.0006913800607435405,-0.04429886063007871,1.0,0,0.025303814560174942,-0.05376843037083745,1.0,0,-2.1544285118579865e-05,-0.06466713605914265,0.0,24,57,81
+HRM,105,276964,never,0,0,,,,,False,0.6296296119689941,-0.006283950060606003,-0.01716360542923212,-0.02367093600332737,0.0,0.0,0,-0.09428232908248901,-0.11679614428430796,0.0,0,-0.07425931096076965,-0.1607155278325081,0.0,0,-0.11915221810340881,-0.15305515751242638,0.0,0,-0.1556764543056488,-0.16206618025898933,0.0,0,-0.07818225026130676,-0.09454652573913336,0.0,0,-0.008902468718588352,-0.049460397684015334,0.0,0,0.009478027932345867,-0.03413918806472793,1.0,0,0.019606567919254303,-0.048103258246555924,1.0,0,0.034189820289611816,-0.0407434194094094,2.0,0,-0.006283950060606003,-0.01716360542923212,0.0,24,57,81
+HRM,106,406565,transient_or_regressed,1,2,23436,18000,,,False,0.7160493731498718,0.06944755464792252,-0.00901299697579816,-0.03679931489750743,3.0,0.08027476305142045,0,-0.10033504664897919,-0.11521307099610567,0.0,0,-0.15823686122894287,-0.18404642306268215,0.0,0,-0.0978255569934845,-0.14145452994853258,0.0,0,-0.0452958419919014,-0.09519790206104517,0.0,0,0.03678115829825401,-0.008332075405633077,3.0,0,0.07498190551996231,-0.0007399613969027996,2.0,0,0.031603507697582245,0.005481702566612512,4.0,0,0.04406603425741196,0.016715264375307015,7.0,1,-0.22788894176483154,-0.2420729659497738,0.0,0,0.06944755464792252,-0.00901299697579816,3.0,26,55,81
+HRM,107,249322,never,0,0,,,,,False,0.5802469253540039,-0.011186732910573483,-0.055927400826476514,-0.07497474364936352,0.0,0.0,0,-0.08297692239284515,-0.1098587280139327,0.0,0,-0.14630553126335144,-0.17403527535498142,0.0,0,-0.10893452167510986,-0.16056997515261173,0.0,0,-0.08246736228466034,-0.13821174297481775,0.0,0,-0.10421209782361984,-0.12925998214632273,0.0,0,-0.06530041992664337,-0.09008628968149424,0.0,0,0.04566517099738121,-0.04004433489171788,1.0,0,-0.007001407444477081,-0.05243051692377776,0.0,0,-0.08578502386808395,-0.10182576719671488,0.0,0,-0.011186732910573483,-0.055927400826476514,0.0,23,58,81
+HRM,108,207819,flaky_then_stable,5,3,5208,4000,23436,18000,True,1.0,-0.17077229917049408,-0.1976468749344349,-0.21169985458254814,0.0,0.0,0,-0.08491533249616623,-0.09900439064949751,0.0,1,-0.21341437101364136,-0.2229254562407732,0.0,1,-0.17464280128479004,-0.2993176244199276,0.0,1,-0.20048007369041443,-0.23222148790955544,0.0,0,0.02891102246940136,-0.055489915888756514,1.0,0,0.06339944154024124,-0.024557018477935344,2.0,0,-0.0066777667962014675,-0.02417464292375371,0.0,0,0.0004317739512771368,-0.03916393654071726,1.0,1,-0.18484598398208618,-0.20805158838629723,0.0,1,-0.17077229917049408,-0.1976468749344349,0.0,26,55,81
+HRM,109,199646,transient_or_regressed,3,2,15624,12000,,,False,0.7283950448036194,0.08204969763755798,-0.016189820365980268,-0.03937240969389677,1.0,0.08204969763755798,0,-0.1028270348906517,-0.11466396972537041,0.0,0,-0.11451438069343567,-0.16059358790516853,0.0,0,-0.12014584988355637,-0.1517141880467534,0.0,0,0.0006357248639687896,-0.07744602621824015,1.0,0,0.0932072252035141,0.027360960375517607,5.0,1,-0.14924295246601105,-0.18292567133903503,0.0,1,-0.08870181441307068,-0.17684596590697765,0.0,1,-0.021103432402014732,-0.10224020876921713,0.0,0,0.08072451502084732,0.0203458072792273,6.0,0,0.08204969763755798,-0.016189820365980268,1.0,24,57,81
+HRM,110,177526,flaky_then_stable,4,3,15624,12000,20832,16000,True,1.0,0.031576983630657196,-0.036349253728985786,-0.07534550502896309,2.0,0.0630323775112629,0,-0.11064153909683228,-0.12164824083447456,0.0,0,-0.13354742527008057,-0.1664550956338644,0.0,0,-0.0794915184378624,-0.10476494114845991,0.0,0,-0.015416210517287254,-0.05815244046971202,0.0,0,0.04337112605571747,-0.030010636430233717,2.0,1,0.03235407918691635,-0.015268758579622954,2.0,0,0.022475872188806534,-0.015000874700490385,2.0,1,-0.08532999455928802,-0.15928307734429836,0.0,1,-0.13736820220947266,-0.1791331935673952,0.0,1,0.031576983630657196,-0.036349253728985786,2.0,26,55,81
+HRM,111,178681,always,10,0,2604,2000,2604,2000,True,1.0,-0.17463193833827972,-0.21342511661350727,-0.23779062554240227,0.0,0.0,1,-0.21345564723014832,-0.24433917179703712,0.0,1,-0.24837201833724976,-0.2929852157831192,0.0,1,-0.18978027999401093,-0.2713599819689989,0.0,1,-0.138997882604599,-0.24807055667042732,0.0,1,-0.17434541881084442,-0.24124252796173096,0.0,1,-0.25219470262527466,-0.3164190277457237,0.0,1,-0.15624167025089264,-0.19973894208669662,0.0,1,0.030455445870757103,-0.09111304674297571,1.0,1,-0.03174764662981033,-0.19120036531239748,0.0,1,-0.17463193833827972,-0.21342511661350727,0.0,32,49,81
+HRM,112,381040,stable_learned,8,1,7812,6000,7812,6000,True,1.0,-0.1220601499080658,-0.16999728418886662,-0.19016583263874054,0.0,0.0,0,-0.028553826734423637,-0.08975395723246038,0.0,0,-0.10662934184074402,-0.17167688347399235,0.0,1,-0.22335200011730194,-0.2494911104440689,0.0,1,-0.15226006507873535,-0.18623486533761024,0.0,1,-0.1482825130224228,-0.18890414386987686,0.0,1,-0.18249911069869995,-0.23301209695637226,0.0,1,-0.008962308056652546,-0.1225419488037005,0.0,1,-0.061142697930336,-0.15573942754417658,0.0,1,-0.1769033521413803,-0.21168553456664085,0.0,1,-0.1220601499080658,-0.16999728418886662,0.0,25,56,81
+HRM,113,392992,flaky_then_stable,5,3,13020,10000,18228,14000,True,1.0,-0.16334125399589539,-0.22355299815535545,-0.24192402139306068,0.0,0.0,0,-0.09014612436294556,-0.11687249597162008,0.0,0,-0.12681064009666443,-0.17103561386466026,0.0,0,-0.12919984757900238,-0.17768552154302597,0.0,0,0.010598421096801758,-0.06068156368564814,1.0,1,-0.17831097543239594,-0.22021919675171375,0.0,0,0.06222493201494217,0.0001017265603877604,4.0,1,-0.17562603950500488,-0.23639371432363987,0.0,1,-0.17574197053909302,-0.19543814286589622,0.0,1,-0.14093096554279327,-0.2150931879878044,0.0,1,-0.16334125399589539,-0.22355299815535545,0.0,26,55,81
+HRM,114,347650,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.14490534365177155,-0.18442637100815773,-0.20499322935938835,0.0,0.0,0,-0.10107796639204025,-0.11848967336118221,0.0,0,-0.12178786098957062,-0.15085054747760296,0.0,0,-0.025954946875572205,-0.14347569458186626,0.0,0,-0.029575616121292114,-0.1053710924461484,0.0,0,-0.035851046442985535,-0.07916089799255133,0.0,1,-0.17126336693763733,-0.24067062139511108,0.0,1,-0.03462513908743858,-0.1546018342487514,0.0,1,-0.05537479743361473,-0.1178039857186377,0.0,1,-0.18311059474945068,-0.2282789871096611,0.0,1,-0.14490534365177155,-0.18442637100815773,0.0,28,53,81
+HRM,115,349536,never,0,0,,,,,False,0.654321014881134,-0.06075481325387955,-0.08345984388142824,-0.09389353170990944,0.0,0.0,0,-0.11931248009204865,-0.127719865180552,0.0,0,-0.15573637187480927,-0.16993181221187115,0.0,0,-0.10859397053718567,-0.15978784300386906,0.0,0,0.021112512797117233,-0.14485007477924228,1.0,0,0.10018815100193024,-0.03892093105241656,2.0,0,0.10692080110311508,-0.020838956581428647,3.0,0,0.016728803515434265,-0.0408703472930938,2.0,0,-0.0362163670361042,-0.07213028287515044,0.0,0,-0.08223512768745422,-0.11555200535804033,0.0,0,-0.06075481325387955,-0.08345984388142824,0.0,23,58,81
+HRM,116,331963,never,0,0,,,,,False,0.6419752836227417,-0.005975511856377125,-0.04740163835231215,-0.07249695993959904,0.0,0.0,0,-0.1086696982383728,-0.12536645215004683,0.0,0,-0.13250891864299774,-0.17629667930305004,0.0,0,-0.09182329475879669,-0.15193427447229624,0.0,0,-0.029346084222197533,-0.08987952838651836,0.0,0,0.0642690360546112,-0.0021665437088813633,3.0,0,-0.020998090505599976,-0.042752210050821304,0.0,0,0.044320058077573776,-0.03195719770155847,1.0,0,0.0016368500655516982,-0.041868695625453256,1.0,0,0.01095468457788229,-0.034248948155436665,1.0,0,-0.005975511856377125,-0.04740163835231215,0.0,25,56,81
+HRM,117,92373,flaky_then_stable,7,3,7812,6000,18228,14000,True,1.0,-0.17615270614624023,-0.21752890199422836,-0.23365341126918793,0.0,0.0,0,-0.10875194519758224,-0.11719468049705029,0.0,0,-0.11388954520225525,-0.1637101825326681,0.0,1,-0.12578430771827698,-0.26955827325582504,0.0,1,-0.11851237714290619,-0.20431074127554893,0.0,1,-0.1864924430847168,-0.2176555022597313,0.0,0,0.10929971188306808,0.01782199908484472,5.0,1,-0.15063464641571045,-0.16611197032034397,0.0,1,-0.09934058040380478,-0.1288492688909173,0.0,1,-0.14919564127922058,-0.18940441496670246,0.0,1,-0.17615270614624023,-0.21752890199422836,0.0,25,56,81
+HRM,118,284134,flaky_then_stable,5,3,13020,10000,18228,14000,True,1.0,-0.12427160143852234,-0.16481486894190311,-0.1927507221698761,0.0,0.0,0,-0.10775234550237656,-0.11851826962083578,0.0,0,-0.18996191024780273,-0.2053517960011959,0.0,0,-0.05884343013167381,-0.13518449431285262,0.0,0,-0.01068517193198204,-0.07434869976714253,0.0,1,-0.17078033089637756,-0.18368766270577908,0.0,0,0.07727766036987305,-0.003406325413379818,3.0,1,-0.13072113692760468,-0.1627716477960348,0.0,1,-0.10052009671926498,-0.17940523009747267,0.0,1,-0.13204346597194672,-0.22916006855666637,0.0,1,-0.12427160143852234,-0.16481486894190311,0.0,25,56,81
+HRM,119,133524,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.17805318534374237,-0.22335813753306866,-0.24547086656093597,0.0,0.0,0,-0.09922903031110764,-0.1252477364614606,0.0,0,-0.005805238150060177,-0.16669980611186475,0.0,0,-0.0791204571723938,-0.11296170111745596,0.0,0,-0.043551843613386154,-0.07041528588160872,0.0,0,0.02165723219513893,-0.030873628798872232,1.0,0,0.032453443855047226,-0.0011169796925969422,5.0,1,-0.04985684156417847,-0.1644924134016037,0.0,1,-0.13971778750419617,-0.1641644984483719,0.0,1,-0.0924849808216095,-0.23505500704050064,0.0,1,-0.17805318534374237,-0.22335813753306866,0.0,25,56,81
+HRM,120,15615,transient_or_regressed,1,2,20832,16000,,,False,0.5679012537002563,0.019828088581562042,-0.03749887221783865,-0.05841586831957102,2.0,0.020961989066563547,0,-0.0830891877412796,-0.10273745004087687,0.0,0,-0.13370564579963684,-0.16752892918884754,0.0,0,-0.15159723162651062,-0.18529123067855835,0.0,0,-0.1585938185453415,-0.18184033036231995,0.0,0,-0.083518847823143,-0.12624309118837118,0.0,0,-0.05933307856321335,-0.08130346378311515,0.0,0,-0.026880284771323204,-0.07966155256144702,0.0,1,-0.14216265082359314,-0.16561255604028702,0.0,0,-0.014454393647611141,-0.04939245444256812,0.0,0,0.019828088581562042,-0.03749887221783865,2.0,25,56,81
+HRM,121,417974,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.12234537303447723,-0.17912468872964382,-0.21682938933372498,0.0,0.0,0,-0.07846608757972717,-0.09764999058097601,0.0,0,-0.12423911690711975,-0.14079230278730392,0.0,0,-0.13228784501552582,-0.15462314151227474,0.0,0,0.030948763713240623,-0.049702628049999475,1.0,0,0.017871707677841187,-0.06298307282850146,1.0,0,0.03755604475736618,-0.004160646436503157,3.0,1,-0.07120474427938461,-0.11926707345992327,0.0,1,0.08974732458591461,-0.13095714151859283,1.0,1,-0.015253170393407345,-0.14796576730441302,0.0,1,-0.12234537303447723,-0.17912468872964382,0.0,24,57,81
+HRM,122,88569,flaky_then_stable,2,3,18228,14000,26040,20000,True,1.0,-0.18926632404327393,-0.20619083382189274,-0.2149607576429844,0.0,0.0,0,-0.10241498798131943,-0.11268776375800371,0.0,0,-0.14399868249893188,-0.18633925542235374,0.0,0,-0.08837930858135223,-0.1539631001651287,0.0,0,-0.048567719757556915,-0.08852148335427046,0.0,0,-0.016419488936662674,-0.06436560349538922,0.0,0,0.04446494206786156,-0.010909111006185412,2.0,1,-0.06499742716550827,-0.1359361046925187,0.0,0,-0.034067779779434204,-0.06543523259460926,0.0,0,-0.0019766585901379585,-0.046111770265270025,0.0,1,-0.18926632404327393,-0.20619083382189274,0.0,26,55,81
+HRM,123,2265,transient_or_regressed,1,2,13020,10000,,,False,0.5802469253540039,0.08145808428525925,0.004451647720998153,-0.020722885616123676,4.0,0.11850472423247993,0,-0.09704240411520004,-0.11231734976172447,0.0,0,-0.13474446535110474,-0.1503829676657915,0.0,0,-0.07689816504716873,-0.1386263882741332,0.0,0,-0.09591074287891388,-0.12971112970262766,0.0,1,-0.19085294008255005,-0.2377997748553753,0.0,0,0.014646968804299831,-0.0432873546378687,2.0,0,0.04417237266898155,-0.01906970515847206,2.0,0,0.08836661279201508,-0.02569350879639387,2.0,0,0.0006325250724330544,-0.05141170688148122,1.0,0,0.08145808428525925,0.004451647720998153,4.0,24,57,81
+HRM,124,27594,transient_or_regressed,2,2,20832,16000,,,False,0.7407407164573669,0.010600751265883446,-0.0211264227546053,-0.042011721059679985,2.0,0.011430701590143144,0,-0.10482992231845856,-0.11785483825951815,0.0,0,-0.13047575950622559,-0.18028255365788937,0.0,0,-0.0877859815955162,-0.14464274421334267,0.0,0,-0.06800922751426697,-0.11991625651717186,0.0,0,0.014695808291435242,-0.03776803478831425,2.0,0,-0.031144149601459503,-0.06200557993724942,0.0,0,0.013290132395923138,-0.05281138198915869,1.0,1,-0.1367323398590088,-0.15188316069543362,0.0,1,-0.1415012925863266,-0.20858697406947613,0.0,0,0.010600751265883446,-0.0211264227546053,2.0,24,57,81
+HRM,125,409917,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.11628228425979614,-0.17203381843864918,-0.2051367536187172,0.0,0.0,0,-0.12305819243192673,-0.13686503376811743,0.0,0,-0.1656280755996704,-0.18415760807693005,0.0,0,-0.11409297585487366,-0.15585895534604788,0.0,0,0.013644812628626823,-0.055441528325900435,2.0,0,0.0015532446559518576,-0.028646359831327572,1.0,1,-0.1445530652999878,-0.1871376484632492,0.0,1,-0.014247916638851166,-0.10963919758796692,0.0,1,-0.09613814204931259,-0.1462985137477517,0.0,1,-0.12176623195409775,-0.19954667706042528,0.0,1,-0.11628228425979614,-0.17203381843864918,0.0,26,55,81
+HRM,126,345695,transient_or_regressed,4,4,7812,6000,,,False,0.8024691343307495,0.03963196650147438,-0.0027131146052852273,-0.029427896719425917,3.0,0.10488653928041458,0,-0.08651512861251831,-0.10840064939111471,0.0,0,-0.08646456897258759,-0.16280507296323776,0.0,1,-0.11547329276800156,-0.15684500243514776,0.0,0,-0.025167129933834076,-0.12234360026195645,0.0,0,-0.014921607449650764,-0.08047446585260332,0.0,0,0.07731002569198608,-0.010406905726995319,4.0,1,-0.005425227340310812,-0.05401634209556505,0.0,1,-0.06836944073438644,-0.14621199388056993,0.0,1,-0.06426669657230377,-0.18440533056855202,0.0,0,0.03963196650147438,-0.0027131146052852273,3.0,26,55,81
+HRM,127,59696,never,0,0,,,,,False,0.6790123581886292,0.09190850704908371,0.007003140461165458,-0.039812203496694565,3.0,0.22273558378219604,0,-0.09975177049636841,-0.11424319539219141,0.0,0,-0.13060259819030762,-0.16205560602247715,0.0,0,-0.13376006484031677,-0.17377577349543571,0.0,0,-0.13775061070919037,-0.16099754720926285,0.0,0,-0.06177122890949249,-0.10555691877380013,0.0,0,0.03487851470708847,-0.03176852373871952,3.0,0,0.0019966447725892067,-0.06036833196412772,1.0,0,-0.005072804167866707,-0.04708685097284615,0.0,0,0.05910990759730339,-0.07818027259781957,1.0,0,0.09190850704908371,0.007003140461165458,3.0,26,55,81
+HRM,128,290755,transient_or_regressed,3,2,15624,12000,,,False,0.6172839403152466,0.0007787460926920176,-0.033297380152362166,-0.056108647026121616,1.0,0.0007787460926920176,0,-0.05877365916967392,-0.1163525115698576,0.0,0,-0.14222054183483124,-0.1666463389992714,0.0,0,-0.11198394745588303,-0.1645485693588853,0.0,0,-0.13174527883529663,-0.17073271796107292,0.0,0,-0.01846567541360855,-0.06110474141314626,0.0,1,-0.22217914462089539,-0.2561031673103571,0.0,1,-0.10415910929441452,-0.14791916776448488,0.0,1,-0.09619574248790741,-0.16194721683859825,0.0,0,-0.011960121802985668,-0.050188336870633066,0.0,0,0.0007787460926920176,-0.033297380152362166,1.0,24,57,81
+HRM,129,313419,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.14323680102825165,-0.16864370554685593,-0.18402015790343285,0.0,0.0,0,-0.1079181358218193,-0.127550327219069,0.0,0,-0.10831299424171448,-0.1569741815328598,0.0,0,-0.0891316756606102,-0.1257680393755436,0.0,0,-0.03614044189453125,-0.11006407625973225,0.0,1,-0.1713843047618866,-0.2024767193943262,0.0,1,-0.14688405394554138,-0.2185414433479309,0.0,1,-0.14701740443706512,-0.1778020765632391,0.0,1,-0.11720197647809982,-0.13757079001516104,0.0,1,-0.18591831624507904,-0.23116306774318218,0.0,1,-0.14323680102825165,-0.16864370554685593,0.0,23,58,81
+HRM,130,253578,flaky_then_stable,3,3,18228,14000,23436,18000,True,1.0,-0.1412535011768341,-0.19086570665240288,-0.2133588306605816,0.0,0.0,0,-0.11033064126968384,-0.1217549042776227,0.0,0,-0.16598229110240936,-0.1828974075615406,0.0,0,-0.15336860716342926,-0.17662286944687366,0.0,0,-0.11981652677059174,-0.1394887138158083,0.0,0,0.008964885957539082,-0.0717900627059862,1.0,0,-0.00988202728331089,-0.042358856182545424,0.0,1,-0.09247586131095886,-0.1326903784647584,0.0,0,-0.015498350374400616,-0.051794241764582694,0.0,1,-0.17931944131851196,-0.19747735932469368,0.0,1,-0.1412535011768341,-0.19086570665240288,0.0,22,59,81
+HRM,131,336359,never,0,0,,,,,False,0.5555555820465088,-0.06774222105741501,-0.0864538112655282,-0.09782695025205612,0.0,0.0,0,-0.09598588943481445,-0.11504268739372492,0.0,0,-0.11475664377212524,-0.1550504844635725,0.0,0,-0.11394994705915451,-0.1502608247101307,0.0,0,-0.11816900223493576,-0.1839268570765853,0.0,0,-0.10902589559555054,-0.11739295907318592,0.0,0,-0.07447823882102966,-0.08995869010686874,0.0,0,-0.022799313068389893,-0.0834163511171937,0.0,0,0.015412082895636559,-0.07895978004671633,1.0,0,-0.07294364273548126,-0.11151415295898914,0.0,0,-0.06774222105741501,-0.0864538112655282,0.0,24,57,81
+HRM,132,373221,transient_or_regressed,4,6,7812,6000,,,False,0.6913580298423767,0.06097620353102684,0.012697897989710327,-0.008285289120976813,5.0,0.13511113886488602,0,-0.0832199975848198,-0.10658643115311861,0.0,0,-0.07452255487442017,-0.1609131395816803,0.0,1,-0.2415417730808258,-0.2712455913424492,0.0,0,-0.07062964141368866,-0.1246066689491272,0.0,1,-0.03399081528186798,-0.10446814075112343,0.0,1,-0.13857132196426392,-0.1812868546694517,0.0,0,0.061996109783649445,0.02396463230252266,7.0,0,0.035524651408195496,-0.037769024493172765,1.0,1,-0.10664957016706467,-0.16255261469632387,0.0,0,0.06097620353102684,0.012697897989710327,5.0,23,58,81
+HRM,133,371950,never,0,0,,,,,False,0.604938268661499,0.07513071596622467,-0.01495280908420682,-0.04544609785079956,2.0,0.10607311502099037,0,-0.07637259364128113,-0.11493309214711189,0.0,0,-0.09065045416355133,-0.16254067793488503,0.0,0,-0.14273524284362793,-0.16883510537445545,0.0,0,-0.0239183958619833,-0.11487036268226802,0.0,0,0.017161179333925247,-0.023868307966040447,2.0,0,0.0003024396428372711,-0.04057206352081266,1.0,0,0.03496093302965164,-0.021901563923165668,2.0,0,0.1223238930106163,0.02460686006816104,5.0,0,-0.0625656470656395,-0.07708250638097525,0.0,0,0.07513071596622467,-0.01495280908420682,2.0,26,55,81
+HRM,134,265574,flaky_then_stable,4,5,7812,6000,26040,20000,True,1.0,-0.15687598288059235,-0.18923508375883102,-0.2072373367846012,0.0,0.0,0,-0.08799004554748535,-0.11348282732069492,0.0,0,-0.09026854485273361,-0.1840457497164607,0.0,1,-0.2533627152442932,-0.2765609249472618,0.0,1,-0.17121772468090057,-0.2244085744023323,0.0,0,0.0055252304300665855,-0.03314418390436913,1.0,0,0.03695860132575035,-0.02040347270667553,2.0,1,0.029650971293449402,-0.10244343942031264,1.0,0,0.0801728367805481,0.00978895416483283,4.0,0,0.07387412339448929,0.011814676399808377,4.0,1,-0.15687598288059235,-0.18923508375883102,0.0,26,55,81
+HRM,135,327810,never,0,0,,,,,False,0.5432098507881165,-0.032848525792360306,-0.05883320653811097,-0.07682271301746368,0.0,0.0,0,-0.10067768394947052,-0.10987508669495583,0.0,0,-0.06598373502492905,-0.13380760420113802,0.0,0,-0.0032926234416663647,-0.1371951200417243,0.0,0,-0.10228729248046875,-0.13747812435030937,0.0,0,-0.06522711366415024,-0.11231045424938202,0.0,0,-0.057945068925619125,-0.09745452273637056,0.0,0,-0.02509681135416031,-0.06447476148605347,0.0,0,-0.042680345475673676,-0.06479577254503965,0.0,0,-0.031218796968460083,-0.08120516268536448,0.0,0,-0.032848525792360306,-0.05883320653811097,0.0,26,55,81
+HRM,136,26585,never,0,0,,,,,False,0.6419752836227417,-0.00513819232583046,-0.050371992168948054,-0.070247333496809,0.0,0.0,0,-0.10255849361419678,-0.11485217418521643,0.0,0,-0.12076320499181747,-0.16133439261466265,0.0,0,-0.11114215850830078,-0.15566261857748032,0.0,0,-0.07967232167720795,-0.17585340328514576,0.0,0,-0.0528443306684494,-0.09740656986832619,0.0,0,-0.07183780521154404,-0.09605278726667166,0.0,0,0.08716895431280136,-0.029356356244534254,2.0,0,-0.06392118334770203,-0.09600597340613604,0.0,0,-0.04054589942097664,-0.09830342466011643,0.0,0,-0.00513819232583046,-0.050371992168948054,0.0,25,56,81
+HRM,137,113924,never,0,0,,,,,False,0.6296296119689941,0.014822597615420818,-0.03977650206070393,-0.06133430637419224,1.0,0.014822597615420818,0,-0.10116401314735413,-0.12299583945423365,0.0,0,-0.15428611636161804,-0.1863313913345337,0.0,0,-0.05089973658323288,-0.14442261960357428,0.0,0,-0.1172432228922844,-0.14425968658179045,0.0,0,-0.03670276701450348,-0.07437720661982894,0.0,0,0.05109836161136627,-0.002491498424205929,3.0,0,-0.012580346316099167,-0.02710193651728332,0.0,0,0.14007394015789032,-0.0031021706527099013,1.0,0,0.0694504976272583,-0.020322363212471828,2.0,0,0.014822597615420818,-0.03977650206070393,1.0,24,57,81
+HRM,138,300328,always,10,0,2604,2000,2604,2000,True,1.0,-0.09635825455188751,-0.19936455879360437,-0.25190845876932144,0.0,0.0,1,-0.10530703514814377,-0.17616378236562014,0.0,1,-0.22608190774917603,-0.2658533714711666,0.0,1,-0.18922194838523865,-0.2970279324799776,0.0,1,-0.22500786185264587,-0.2866746000945568,0.0,1,-0.2084844410419464,-0.2373969964683056,0.0,1,-0.21251307427883148,-0.2853763774037361,0.0,1,-0.03106638416647911,-0.16832518903538585,0.0,1,-0.08860354870557785,-0.18862508703023195,0.0,1,-0.16552703082561493,-0.21053452789783478,0.0,1,-0.09635825455188751,-0.19936455879360437,0.0,34,47,81
+HRM,139,237277,stable_learned,8,1,7812,6000,7812,6000,True,1.0,-0.17630422115325928,-0.20903681218624115,-0.22355971112847328,0.0,0.0,0,-0.09008689969778061,-0.10990865435451269,0.0,0,-0.16715511679649353,-0.1884547770023346,0.0,1,-0.10774704813957214,-0.216326504945755,0.0,1,-0.14705413579940796,-0.20940391346812248,0.0,1,-0.15050655603408813,-0.2071323450654745,0.0,1,0.06533906608819962,-0.2182555040344596,1.0,1,-0.06945495307445526,-0.13192932587116957,0.0,1,0.0049231029115617275,-0.1382245126296766,1.0,1,-0.09801250696182251,-0.21279056556522846,0.0,1,-0.17630422115325928,-0.20903681218624115,0.0,25,56,81
+HRM,140,14183,transient_or_regressed,1,2,18228,14000,,,False,0.604938268661499,-0.031589608639478683,-0.05349958036094904,-0.06339177768677473,0.0,0.0,0,-0.07159161567687988,-0.09817969799041748,0.0,0,-0.13825085759162903,-0.17873761802911758,0.0,0,-0.04092204570770264,-0.16841231286525726,0.0,0,-0.07024580985307693,-0.1099577285349369,0.0,0,-0.08974204957485199,-0.10741293895989656,0.0,0,-0.06497873365879059,-0.0854879803955555,0.0,1,-0.1696815937757492,-0.182070204988122,0.0,0,-0.04685056954622269,-0.0691476734355092,0.0,0,0.0026350878179073334,-0.05202381935669109,1.0,0,-0.031589608639478683,-0.05349958036094904,0.0,26,55,81
+HRM,141,10350,never,0,0,,,,,False,0.5555555820465088,0.022049035876989365,-0.04857182479463518,-0.07077275216579437,1.0,0.022049035876989365,0,-0.09048228710889816,-0.11319645494222641,0.0,0,-0.13520236313343048,-0.16563314013183117,0.0,0,-0.13467176258563995,-0.16729496233165264,0.0,0,-0.10904832929372787,-0.16111768316477537,0.0,0,-0.061692267656326294,-0.12485918216407299,0.0,0,0.006406179629266262,-0.08807648264337331,1.0,0,-0.043145254254341125,-0.0637609213590622,0.0,0,-0.009873772040009499,-0.052222042926587164,0.0,0,-0.008786162361502647,-0.06868192437104881,0.0,0,0.022049035876989365,-0.04857182479463518,1.0,24,57,81
+HRM,142,121829,never,0,0,,,,,False,0.7160493731498718,-0.01372222788631916,-0.07035879581235349,-0.09515228681266308,0.0,0.0,0,-0.10988351702690125,-0.13021148927509785,0.0,0,-0.11755304038524628,-0.16484123561531305,0.0,0,-0.09507688134908676,-0.14516770746558905,0.0,0,-0.08045892417430878,-0.11070073489099741,0.0,0,0.004205934703350067,-0.060881510842591524,1.0,0,0.12180061638355255,-0.0008428615401498973,3.0,0,-0.013076037168502808,-0.030390871805138886,0.0,0,-0.03021937608718872,-0.07918424159288406,0.0,0,-0.03066360019147396,-0.06939225294627249,0.0,0,-0.01372222788631916,-0.07035879581235349,0.0,25,56,81
+HRM,143,147685,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.16514959931373596,-0.19138234481215477,-0.20334899425506592,0.0,0.0,0,-0.1052079051733017,-0.11499612871557474,0.0,0,-0.08357935398817062,-0.1507503893226385,0.0,0,-0.06807274371385574,-0.14545468147844076,0.0,0,0.009656239300966263,-0.08381743950303644,2.0,0,-0.035180240869522095,-0.06025393446907401,0.0,1,-0.2051813304424286,-0.2262669149786234,0.0,1,-0.11330636590719223,-0.20977637451142073,0.0,1,-0.03762807324528694,-0.12641269294545054,0.0,1,-0.18491588532924652,-0.20035257376730442,0.0,1,-0.16514959931373596,-0.19138234481215477,0.0,25,56,81
+HRM,144,114035,never,0,0,,,,,False,0.5185185074806213,0.0324646420776844,-0.01619418489281088,-0.04292636830359697,3.0,0.06706096138805151,0,-0.09058474004268646,-0.10244579706341028,0.0,0,-0.10721709579229355,-0.143841203302145,0.0,0,-0.11769895255565643,-0.19371298514306545,0.0,0,-0.08984342217445374,-0.13520072400569916,0.0,0,-0.07755216956138611,-0.09796091821044683,0.0,0,0.07835834473371506,-0.0469896006397903,1.0,0,0.03870260342955589,-0.028758260887116194,1.0,0,0.058945298194885254,-0.007059456387651153,4.0,0,-0.02616780810058117,-0.07844244479201734,0.0,0,0.0324646420776844,-0.01619418489281088,3.0,25,56,81
+HRM,145,129570,never,0,0,,,,,False,0.654321014881134,0.046107660979032516,-0.00647722405847162,-0.04178883461281657,4.0,0.11533754598349333,0,-0.09007212519645691,-0.10917545761913061,0.0,0,-0.14526987075805664,-0.18639101460576057,0.0,0,-0.1585851013660431,-0.2124778926372528,0.0,0,-0.1497412919998169,-0.1870898101478815,0.0,0,-0.030410699546337128,-0.07710424857214093,0.0,0,-0.02475619688630104,-0.06355632795020938,0.0,0,0.06681501865386963,-0.0012255583424121141,4.0,0,0.04261917993426323,-0.02559729451604653,3.0,0,-0.03705621510744095,-0.07670936733484268,0.0,0,0.046107660979032516,-0.00647722405847162,4.0,26,55,81
+HRM,146,82468,transient_or_regressed,2,2,18228,14000,,,False,0.7160493731498718,0.016217313706874847,-0.0032967252045636997,-0.014736815239302814,3.0,0.03387520834803581,0,-0.11022227257490158,-0.12053469568490982,0.0,0,-0.1497931182384491,-0.1632749531418085,0.0,0,0.0012507936917245388,-0.09725953341694549,1.0,0,-0.0827171728014946,-0.12147027999162674,0.0,0,0.10328441113233566,-0.03715667984215543,1.0,0,-0.03822394832968712,-0.07695444067940116,0.0,1,-0.07171450555324554,-0.1040650773793459,0.0,1,-0.09038075804710388,-0.1301648272201419,0.0,0,0.04938765615224838,0.0071674357750453055,4.0,0,0.016217313706874847,-0.0032967252045636997,3.0,17,64,81
+HRM,147,343314,never,0,0,,,,,False,0.6666666865348816,0.008198770694434643,-0.024756215105298907,-0.045969292521476746,1.0,0.008198770694434643,0,-0.1041407510638237,-0.12056197039783001,0.0,0,-0.12980300188064575,-0.1555993091315031,0.0,0,-0.10404843091964722,-0.13671695347875357,0.0,0,-0.04334910959005356,-0.12168324273079634,0.0,0,-0.04985394328832626,-0.08428062032908201,0.0,0,0.04021870344877243,-0.04883127997163683,1.0,0,0.03094259463250637,-0.029480106430128217,1.0,0,0.014144934713840485,-0.05839225742965937,1.0,0,-0.05174386501312256,-0.08575717872008681,0.0,0,0.008198770694434643,-0.024756215105298907,1.0,26,55,81
+HRM,148,2260,transient_or_regressed,1,2,18228,14000,,,False,0.7037037014961243,-0.010037705302238464,-0.04656806401908398,-0.06453586649149656,0.0,0.0,0,-0.11864939332008362,-0.12375078070908785,0.0,0,-0.15380394458770752,-0.1950475424528122,0.0,0,-0.11187966167926788,-0.17401626706123352,0.0,0,-0.13773766160011292,-0.1651411224156618,0.0,0,-0.0593671053647995,-0.09629664290696383,0.0,0,0.004526432137936354,-0.06260995735647157,1.0,1,0.010064034722745419,-0.10976068919990212,1.0,0,-0.052939362823963165,-0.07563601341098547,0.0,0,-0.06619112193584442,-0.09368626028299332,0.0,0,-0.010037705302238464,-0.04656806401908398,0.0,23,58,81
+HRM,149,255045,transient_or_regressed,1,2,13020,10000,,,False,0.7160493731498718,0.0141904903575778,-0.01623228914104402,-0.02554823597893119,1.0,0.0141904903575778,0,-0.09296634793281555,-0.12931525707244873,0.0,0,-0.11894010007381439,-0.18148428201675415,0.0,0,-0.11610209196805954,-0.15242419485002756,0.0,0,-0.08364132046699524,-0.12935337331146002,0.0,1,-0.16907262802124023,-0.1851340401917696,0.0,0,0.1339949667453766,0.04432842833921313,6.0,0,0.03558478504419327,-0.013501892506610602,2.0,0,0.09993933141231537,0.0075391901191323996,4.0,0,0.08588922023773193,0.011435986962169409,5.0,0,0.0141904903575778,-0.01623228914104402,1.0,24,57,81
+HRM,150,303870,never,0,0,,,,,False,0.6666666865348816,0.026003718376159668,-0.010269915801472962,-0.028850815258920193,3.0,0.04312443360686302,0,-0.11574995517730713,-0.12296128924936056,0.0,0,-0.1343689262866974,-0.19023394212126732,0.0,0,-0.10247626900672913,-0.17804044298827648,0.0,0,0.030314382165670395,-0.06938156671822071,2.0,0,0.1682283878326416,0.0514125888585113,7.0,0,0.0368429496884346,0.006035818951204419,5.0,0,0.024693172425031662,-0.02758966595865786,1.0,0,0.0069541907869279385,-0.022695821768138558,1.0,0,-0.02591051161289215,-0.05925475945696235,0.0,0,0.026003718376159668,-0.010269915801472962,3.0,26,55,81
+HRM,151,321274,flaky_then_stable,6,3,5208,4000,15624,12000,True,1.0,-0.12296779453754425,-0.16467799805104733,-0.19394228607416153,0.0,0.0,0,-0.11078009009361267,-0.12502211146056652,0.0,1,-0.16734860837459564,-0.19727176241576672,0.0,0,-0.026588520035147667,-0.14943625475279987,0.0,0,-0.1257363259792328,-0.144097700715065,0.0,0,-0.02663150243461132,-0.06268830946646631,0.0,1,-0.19289615750312805,-0.2351247165352106,0.0,1,-0.13286620378494263,-0.18329854682087898,0.0,1,-0.062467169016599655,-0.15089620975777507,0.0,1,-0.15304335951805115,-0.1972825787961483,0.0,1,-0.12296779453754425,-0.16467799805104733,0.0,25,56,81
+HRM,152,74913,never,0,0,,,,,False,0.7037037014961243,0.04288921877741814,-0.0020360726630315185,-0.031520980410277843,4.0,0.10979534033685923,0,-0.12362091988325119,-0.14101327303797007,0.0,0,-0.13586024940013885,-0.16221204213798046,0.0,0,-0.08034581691026688,-0.1599314520135522,0.0,0,-0.03887461870908737,-0.08415931835770607,0.0,0,-0.013866979628801346,-0.03779665706679225,0.0,0,0.021158121526241302,-0.03550464892759919,1.0,0,0.06622909754514694,-0.01256560574984178,2.0,0,0.081409752368927,0.009498033439740539,2.0,0,0.08949299901723862,-0.002623449545353651,3.0,0,0.04288921877741814,-0.0020360726630315185,4.0,26,55,81
+HRM,153,53855,stable_learned,9,1,5208,4000,5208,4000,True,1.0,-0.19944745302200317,-0.23249896056950092,-0.2585940808057785,0.0,0.0,0,-0.0091448575258255,-0.15701821632683277,0.0,1,-0.13895457983016968,-0.20268727652728558,0.0,1,-0.2674733102321625,-0.2946256659924984,0.0,1,-0.08811739087104797,-0.25817866809666157,0.0,1,-0.1660790741443634,-0.23250535875558853,0.0,1,-0.29738354682922363,-0.3329457566142082,0.0,1,-0.17074166238307953,-0.23714887350797653,0.0,1,-0.0665309876203537,-0.11024187318980694,0.0,1,-0.06628942489624023,-0.17677127942442894,0.0,1,-0.19944745302200317,-0.23249896056950092,0.0,34,47,81
+HRM,154,6756,transient_or_regressed,1,2,18228,14000,,,False,0.6172839403152466,0.062354475259780884,0.0007474973681382835,-0.026133297535125166,5.0,0.11350975511595607,0,-0.10199719667434692,-0.12180046830326319,0.0,0,-0.15072210133075714,-0.18158291839063168,0.0,0,-0.1339542418718338,-0.16216637939214706,0.0,0,-0.03740222379565239,-0.10551002295687795,0.0,0,-0.05172058567404747,-0.08332117088139057,0.0,0,-0.00547999469563365,-0.05428097763797268,0.0,1,-0.1252920776605606,-0.1762381661683321,0.0,0,-0.011252026073634624,-0.049159726477228105,0.0,0,-0.0036083830054849386,-0.04459106081048958,0.0,0,0.062354475259780884,0.0007474973681382835,5.0,22,59,81
+HRM,155,367285,stable_learned,8,1,7812,6000,7812,6000,True,1.0,-0.14764171838760376,-0.2022122349590063,-0.22498881444334984,0.0,0.0,0,-0.08581000566482544,-0.10262942686676979,0.0,0,-0.09496166557073593,-0.17496232595294714,0.0,1,-0.20532292127609253,-0.2493259310722351,0.0,1,-0.23492282629013062,-0.25075821205973625,0.0,1,-0.11351050436496735,-0.19171835482120514,0.0,1,-0.16627509891986847,-0.2203085869550705,0.0,1,-0.07583299279212952,-0.17945352382957935,0.0,1,-0.06521531939506531,-0.12511935736984015,0.0,1,-0.1327899992465973,-0.22428016737103462,0.0,1,-0.14764171838760376,-0.2022122349590063,0.0,24,57,81
+HRM,156,214527,never,0,0,,,,,False,0.6172839403152466,0.07907739281654358,-0.025526589946821332,-0.04904045816510916,1.0,0.07907739281654358,0,-0.09286066144704819,-0.10701680555939674,0.0,0,-0.10524691641330719,-0.17315620370209217,0.0,0,-0.04831717163324356,-0.13104492984712124,0.0,0,-0.12585312128067017,-0.15661893039941788,0.0,0,0.008063195273280144,-0.07859151693992317,1.0,0,-0.03611624240875244,-0.05751034617424011,0.0,0,-0.01219379436224699,-0.03621410147752613,0.0,0,0.10957615077495575,-0.02157799038104713,3.0,0,0.11017106473445892,-0.03631232772022486,2.0,0,0.07907739281654358,-0.025526589946821332,1.0,22,59,81
+HRM,157,338862,transient_or_regressed,3,4,15624,12000,,,False,0.5925925970077515,0.0038523059338331223,-0.03770303609780967,-0.05602709110826254,1.0,0.0038523059338331223,0,-0.10699462890625,-0.12267223000526428,0.0,0,-0.11506594717502594,-0.15585724264383316,0.0,0,-0.12286573648452759,-0.15444108843803406,0.0,0,-0.053930025547742844,-0.1324616759084165,0.0,0,-0.035235695540905,-0.10615047113969922,0.0,1,-0.22435949742794037,-0.2308325655758381,0.0,1,-0.10535861551761627,-0.14423386473208666,0.0,0,-0.011609746143221855,-0.07099175243638456,0.0,1,-0.17636196315288544,-0.21118508279323578,0.0,0,0.0038523059338331223,-0.03770303609780967,1.0,23,58,81
+HRM,158,34686,flaky_then_stable,3,3,15624,12000,23436,18000,True,1.0,-0.09061243385076523,-0.1542222024872899,-0.18450530618429184,0.0,0.0,0,-0.10872264206409454,-0.12093247845768929,0.0,0,-0.1344163417816162,-0.16170045547187328,0.0,0,0.030022254213690758,-0.09842365444637835,1.0,0,-0.028351498767733574,-0.07868959358893335,0.0,0,0.10634192079305649,0.025186367332935333,5.0,1,-0.1279141753911972,-0.16607310064136982,0.0,0,0.04266796261072159,-0.0024852334172464907,4.0,0,0.11278533190488815,0.03975005226675421,7.0,1,-0.15858373045921326,-0.1993445847183466,0.0,1,-0.09061243385076523,-0.1542222024872899,0.0,24,57,81
+HRM,159,206660,never,0,0,,,,,False,0.7777777910232544,0.06211359426379204,-0.009700000286102295,-0.04753344878554344,3.0,0.13988716155290604,0,-0.10372267663478851,-0.10908009018748999,0.0,0,-0.14851972460746765,-0.1844094730913639,0.0,0,-0.13346362113952637,-0.16660810820758343,0.0,0,-0.10441000759601593,-0.15560484491288662,0.0,0,-0.0178053118288517,-0.09721905505284667,0.0,0,-0.05067949742078781,-0.09220463503152132,0.0,0,-0.011561821214854717,-0.054571381653659046,0.0,0,-0.007889891974627972,-0.04734514805022627,0.0,0,0.0066136326640844345,-0.05746819730848074,1.0,0,0.06211359426379204,-0.009700000286102295,3.0,26,55,81
+HRM,160,115204,flaky_then_stable,8,3,5208,4000,26040,20000,True,1.0,-0.11968665570020676,-0.18080475088208914,-0.22349385172128677,0.0,0.0,0,-0.1013481542468071,-0.1097720917314291,0.0,1,-0.17778414487838745,-0.20440813712775707,0.0,1,-0.19441786408424377,-0.23600624315440655,0.0,1,-0.19001436233520508,-0.2173102367669344,0.0,1,-0.17951063811779022,-0.22504084184765816,0.0,1,-0.19080357253551483,-0.21544016525149345,0.0,1,0.061768218874931335,-0.07982449838891625,1.0,1,-0.09711950272321701,-0.13616249430924654,0.0,0,0.14345122873783112,0.002381013357080519,2.0,1,-0.11968665570020676,-0.18080475088208914,0.0,25,56,81
+HRM,161,160284,flaky_then_stable,5,3,13020,10000,18228,14000,True,1.0,-0.1953149288892746,-0.22572658397257328,-0.24353985860943794,0.0,0.0,0,-0.09156212210655212,-0.10384531412273645,0.0,0,-0.1490195393562317,-0.1730167530477047,0.0,0,-0.12335212528705597,-0.14814858324825764,0.0,0,-0.020445365458726883,-0.12465146230533719,0.0,1,-0.16269128024578094,-0.21070010587573051,0.0,0,-0.01321372389793396,-0.034374166280031204,0.0,1,-0.01682918146252632,-0.10186932235956192,0.0,1,-0.0063748969696462154,-0.1334064697730355,0.0,1,-0.21092405915260315,-0.22400924749672413,0.0,1,-0.1953149288892746,-0.22572658397257328,0.0,26,55,81
+HRM,162,384719,never,0,0,,,,,False,0.5925925970077515,0.022591469809412956,-0.024340145580936223,-0.047649018466472626,2.0,0.030741172842681408,0,-0.0872664749622345,-0.1123163653537631,0.0,0,-0.14469753205776215,-0.17125227488577366,0.0,0,-0.14204345643520355,-0.18275362439453602,0.0,0,-0.06513682007789612,-0.13051350507885218,0.0,0,0.029463332146406174,-0.04981791484169662,2.0,0,0.03796043246984482,0.003252570459153503,5.0,0,0.07866519689559937,-0.025225046185369138,3.0,0,0.08076406270265579,-0.008325441973283887,4.0,0,-0.05686284601688385,-0.08896406879648566,0.0,0,0.022591469809412956,-0.024340145580936223,2.0,24,57,81
+HRM,163,323015,stable_learned,1,1,26040,20000,26040,20000,True,1.0,-0.15104326605796814,-0.17597567662596703,-0.18857759982347488,0.0,0.0,0,-0.0889572948217392,-0.12369430065155029,0.0,0,-0.10113650560379028,-0.1590097639709711,0.0,0,-0.10397665202617645,-0.13803807832300663,0.0,0,-0.03509428724646568,-0.07835850538685918,0.0,0,0.11644022166728973,0.007289222441613674,3.0,0,0.07901924848556519,0.03926541202235967,7.0,0,0.11561280488967896,0.0424451909493655,8.0,0,0.04325879365205765,-0.014822897152043879,1.0,0,-0.042424511164426804,-0.06892737047746778,0.0,1,-0.15104326605796814,-0.17597567662596703,0.0,25,56,81
+HRM,164,101104,flaky_then_stable,2,3,18228,14000,26040,20000,True,1.0,-0.1513117551803589,-0.1809991281479597,-0.20375853776931763,0.0,0.0,0,-0.09673275798559189,-0.11256120074540377,0.0,0,-0.14332661032676697,-0.18890835717320442,0.0,0,-0.12133720517158508,-0.14498436450958252,0.0,0,0.021455539390444756,-0.0924281997140497,1.0,0,0.046216629445552826,-0.026470963028259575,2.0,0,0.008720447309315205,-0.046234095585532486,1.0,1,-0.0710117444396019,-0.17354521062225103,0.0,0,-0.01342424564063549,-0.05262891505844891,0.0,0,0.002043298911303282,-0.02693894415278919,1.0,1,-0.1513117551803589,-0.1809991281479597,0.0,24,57,81
+HRM,165,391795,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.17522156238555908,-0.21856706216931343,-0.2406838908791542,0.0,0.0,0,-0.09422378242015839,-0.11266920063644648,0.0,0,-0.10831281542778015,-0.16777562908828259,0.0,0,-0.16308827698230743,-0.20081272721290588,0.0,0,-0.04423508793115616,-0.10227453522384167,0.0,0,-0.021840404719114304,-0.08447552099823952,0.0,0,-0.043109118938446045,-0.07010592427104712,0.0,1,-0.15938863158226013,-0.20366472750902176,0.0,1,-0.06206084415316582,-0.16163360653445125,0.0,1,-0.09238487482070923,-0.20991350151598454,0.0,1,-0.17522156238555908,-0.21856706216931343,0.0,27,54,81
+HRM,166,236449,stable_learned,1,1,26040,20000,26040,20000,True,1.0,-0.01559746079146862,-0.07202705345116556,-0.08666547946631908,0.0,0.0,0,-0.06587046384811401,-0.11581744626164436,0.0,0,-0.06836508214473724,-0.1452953740954399,0.0,0,-0.10748063772916794,-0.14462503511458635,0.0,0,0.005675876047462225,-0.09084458247525617,1.0,0,-0.026639509946107864,-0.06758609157986939,0.0,0,0.0197758786380291,-0.0492051875917241,1.0,0,0.03336944058537483,-0.007622552628163248,3.0,0,0.02725134789943695,-0.02512452832888812,2.0,0,-0.03744038939476013,-0.06786365993320942,0.0,1,-0.01559746079146862,-0.07202705345116556,0.0,28,53,81
+HRM,167,55915,flaky_then_stable,6,3,7812,6000,18228,14000,True,1.0,-0.08497180789709091,-0.11538662854582071,-0.13607633486390114,0.0,0.0,0,-0.1080443412065506,-0.11844640038907528,0.0,0,-0.07428376376628876,-0.17495747841894627,0.0,1,-0.1208217591047287,-0.28387567959725857,0.0,1,-0.1877131313085556,-0.23892553709447384,0.0,0,0.028879916295409203,-0.007232191972434521,3.0,0,-0.027879318222403526,-0.03968113707378507,0.0,1,-0.05849113315343857,-0.07517097797244787,0.0,1,-0.06103384494781494,-0.10231717582792044,0.0,1,-0.16098237037658691,-0.19075630232691765,0.0,1,-0.08497180789709091,-0.11538662854582071,0.0,25,56,81
+HRM,168,207723,transient_or_regressed,2,4,13020,10000,,,False,0.5432098507881165,0.023649318143725395,-0.015578766935504973,-0.03141076536849141,1.0,0.023649318143725395,0,-0.09370028972625732,-0.11815992835909128,0.0,0,-0.11866597831249237,-0.16026821173727512,0.0,0,-0.06686466932296753,-0.11894210334867239,0.0,0,-0.020424259826540947,-0.060929936822503805,0.0,1,-0.17744076251983643,-0.20479796081781387,0.0,0,0.14239934086799622,-0.03633459750562906,1.0,1,-0.06343075633049011,-0.10029384959489107,0.0,0,0.002940785838291049,-0.044726094260113314,2.0,0,-0.03459249436855316,-0.05239279242232442,0.0,0,0.023649318143725395,-0.015578766935504973,1.0,23,58,81
+HRM,169,138085,flaky_then_stable,3,3,13020,10000,23436,18000,True,1.0,-0.21221277117729187,-0.22830367274582386,-0.2394486926496029,0.0,0.0,0,-0.0851907879114151,-0.10077533777803183,0.0,0,-0.05075683444738388,-0.15349312406033278,0.0,0,-0.1294165700674057,-0.17421728745102882,0.0,0,0.017714209854602814,-0.07959141861647367,1.0,1,-0.09021928906440735,-0.1731648538261652,0.0,0,0.0013246277812868357,-0.051060720783425495,1.0,0,0.03424555063247681,-0.012880450114607811,2.0,0,-0.016246240586042404,-0.06394204450771213,0.0,1,-0.1398128718137741,-0.17460961639881134,0.0,1,-0.21221277117729187,-0.22830367274582386,0.0,26,55,81
+HRM,170,264730,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.11594745516777039,-0.1920813862234354,-0.21793341264128685,0.0,0.0,0,-0.11615413427352905,-0.12171183247119188,0.0,0,-0.16051539778709412,-0.18337951228022575,0.0,0,-0.0834311991930008,-0.1351517355069518,0.0,0,0.07565510272979736,-0.0628777836682275,1.0,0,0.03119100257754326,0.004761513904668391,4.0,0,-0.0006821934948675334,-0.03234047881414881,0.0,1,-0.10922972857952118,-0.12605583015829325,0.0,1,-0.1136593148112297,-0.1637117015197873,0.0,1,-0.0869806706905365,-0.19149351306259632,0.0,1,-0.11594745516777039,-0.1920813862234354,0.0,27,54,81
+HRM,171,421139,transient_or_regressed,3,2,18228,14000,,,False,0.7530864477157593,0.006265226285904646,-0.027790736930910498,-0.04331798851490021,1.0,0.006265226285904646,0,-0.11592018604278564,-0.12213956564664841,0.0,0,-0.14458370208740234,-0.1727084256708622,0.0,0,-0.06456223130226135,-0.15168133657425642,0.0,0,0.05185914784669876,-0.15172589477151632,1.0,0,0.028963884338736534,-0.0476083024404943,1.0,0,-0.015061596408486366,-0.07867602608166635,0.0,1,-0.10915293544530869,-0.18882239423692226,0.0,1,-0.17053459584712982,-0.19214134849607944,0.0,1,-0.17770259082317352,-0.23857036232948303,0.0,0,0.006265226285904646,-0.027790736930910498,1.0,26,55,81
+HRM,172,111474,transient_or_regressed,6,2,10416,8000,,,False,0.8024691343307495,0.12372609972953796,0.06633682595565915,0.04956693481653929,8.0,0.5306946076452732,0,-0.0908614844083786,-0.1153758903965354,0.0,0,-0.15938496589660645,-0.18117924965918064,0.0,0,-0.13935574889183044,-0.16575983725488186,0.0,1,-0.18098150193691254,-0.2404512893408537,0.0,1,-0.11511670053005219,-0.18420489318668842,0.0,1,-0.20250442624092102,-0.24832054413855076,0.0,1,-0.028899777680635452,-0.15672519290819764,0.0,1,-0.06076236069202423,-0.12022964470088482,0.0,1,-0.20154349505901337,-0.23398763500154018,0.0,0,0.12372609972953796,0.06633682595565915,8.0,26,55,81
+HRM,173,11984,flaky_then_stable,4,3,13020,10000,20832,16000,True,1.0,-0.18430648744106293,-0.21822229772806168,-0.24059801921248436,0.0,0.0,0,-0.12360318005084991,-0.12825415469706059,0.0,0,-0.106353759765625,-0.2051597312092781,0.0,0,-0.15662077069282532,-0.1791807096451521,0.0,0,-0.060497693717479706,-0.08168801944702864,0.0,1,-0.00870540738105774,-0.11626305244863033,0.0,0,0.059777360409498215,-0.007445001800078899,2.0,0,0.0307391919195652,-0.0022484810469904914,4.0,1,-0.0029650763608515263,-0.13187751901568845,0.0,1,-0.23745588958263397,-0.25022014044225216,0.0,1,-0.18430648744106293,-0.21822229772806168,0.0,26,55,81
+HRM,174,332451,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.09939232468605042,-0.1704623457044363,-0.20097576081752777,0.0,0.0,0,-0.0841856449842453,-0.10729130823165178,0.0,0,-0.10403011739253998,-0.16506048757582903,0.0,0,-0.03833752125501633,-0.11388711165636778,0.0,0,0.014722473919391632,-0.05258953711017966,1.0,0,0.00835125520825386,-0.03960766736418009,1.0,0,0.09762182086706161,0.01634180871769786,4.0,1,-0.03697390481829643,-0.064274737611413,0.0,1,0.025410085916519165,-0.11946268565952778,1.0,1,-0.032381005585193634,-0.2072358811274171,0.0,1,-0.09939232468605042,-0.1704623457044363,0.0,24,57,81
+HRM,175,126571,never,0,0,,,,,False,0.5555555820465088,-0.011598850600421429,-0.034819337888620794,-0.04993773624300957,0.0,0.0,0,-0.10020540654659271,-0.11173977609723806,0.0,0,-0.1302051842212677,-0.16138584539294243,0.0,0,-0.04692213237285614,-0.15758333541452885,0.0,0,-0.07757395505905151,-0.12334066443145275,0.0,0,-0.1164126843214035,-0.1305359285324812,0.0,0,0.02885681763291359,-0.03972566337324679,1.0,0,-0.03567331284284592,-0.044606456998735666,0.0,0,-0.051118507981300354,-0.08183654956519604,0.0,0,-0.0036134282127022743,-0.054436786216683686,0.0,0,-0.011598850600421429,-0.034819337888620794,0.0,24,57,81
+HRM,176,10862,never,0,0,,,,,False,0.5679012537002563,-0.0415135994553566,-0.08848319668322802,-0.11072160676121712,0.0,0.0,0,-0.0920334979891777,-0.11784578207880259,0.0,0,-0.1401510089635849,-0.17978794500231743,0.0,0,-0.1265382021665573,-0.16585683077573776,0.0,0,-0.15000121295452118,-0.1860175896435976,0.0,0,-0.09666708111763,-0.1243421183899045,0.0,0,-0.035354726016521454,-0.0581790404394269,0.0,0,-0.009177151136100292,-0.04992141469847411,0.0,0,0.020806360989809036,-0.062359370989724994,1.0,0,-0.03931157663464546,-0.0796417105011642,0.0,0,-0.0415135994553566,-0.08848319668322802,0.0,24,57,81
+HRM,177,364527,transient_or_regressed,1,2,15624,12000,,,False,0.6296296119689941,0.06357182562351227,-0.0012135475408285856,-0.027137036435306072,3.0,0.10521981213241816,0,-0.09746517241001129,-0.11844585184007883,0.0,0,-0.08891160786151886,-0.17697131074965,0.0,0,-0.11568416655063629,-0.16235249117016792,0.0,0,-0.04315907508134842,-0.12513559591025114,0.0,0,0.015699580311775208,-0.047911452827975154,1.0,1,-0.06065429374575615,-0.1112747541628778,0.0,0,0.06389332562685013,-0.018671130063012242,3.0,0,-0.019367266446352005,-0.03801990579813719,0.0,0,0.014833865687251091,-0.022098285688116448,2.0,0,0.06357182562351227,-0.0012135475408285856,3.0,26,55,81
+HRM,178,44562,never,0,0,,,,,False,0.8395061492919922,0.04571892321109772,-0.016482299426570535,-0.045856308192014694,3.0,0.0890518631786108,0,-0.06489585340023041,-0.09405361767858267,0.0,0,-0.1320507973432541,-0.18214455991983414,0.0,0,-0.12768319249153137,-0.19588563404977322,0.0,0,-0.06288792937994003,-0.1087588807567954,0.0,0,0.07114186137914658,-0.06346773449331522,1.0,0,0.0982406958937645,-0.03222416166681796,2.0,0,0.06787183880805969,-0.0029272885876707733,2.0,0,0.02094028890132904,-0.06139612477272749,1.0,0,-0.0388757660984993,-0.08035494294017553,0.0,0,0.04571892321109772,-0.016482299426570535,3.0,26,55,81
+HRM,179,201394,transient_or_regressed,2,4,13020,10000,,,False,0.7530864477157593,0.07990282773971558,0.01127977657597512,-0.02558851637877524,3.0,0.20737014710903168,0,-0.079738549888134,-0.10572031699120998,0.0,0,-0.1599431037902832,-0.17984394170343876,0.0,0,-0.13485002517700195,-0.1931100431829691,0.0,0,-0.09863847494125366,-0.14629457518458366,0.0,1,-0.17533406615257263,-0.1961357779800892,0.0,0,-0.01013709232211113,-0.04480547783896327,0.0,0,-0.020714091137051582,-0.043394700856879354,0.0,0,0.03350343555212021,-0.031320367474108934,1.0,1,-0.19488506019115448,-0.24797693453729153,0.0,0,0.07990282773971558,0.01127977657597512,3.0,26,55,81
+HRM,180,170293,flaky_then_stable,6,5,7812,6000,18228,14000,True,1.0,-0.1728852391242981,-0.21082371845841408,-0.23543338477611542,0.0,0.0,0,-0.05294474586844444,-0.0913232215680182,0.0,0,-0.07288660109043121,-0.12689485494047403,0.0,1,-0.21482187509536743,-0.2984164133667946,0.0,0,-0.11212693154811859,-0.1260416191071272,0.0,1,-0.16891400516033173,-0.22203581035137177,0.0,0,0.0007014750735834241,-0.0263850139308488,1.0,1,-0.1390598714351654,-0.2028380073606968,0.0,1,-0.029090341180562973,-0.15473458217456937,0.0,1,-0.10846315324306488,-0.2278386540710926,0.0,1,-0.1728852391242981,-0.21082371845841408,0.0,26,55,81
+HRM,181,205276,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.17312407493591309,-0.1991148665547371,-0.21637537702918053,0.0,0.0,0,-0.07866156846284866,-0.1066579045727849,0.0,0,-0.15303708612918854,-0.17852363549172878,0.0,0,-0.0669032633304596,-0.15956815518438816,0.0,0,-0.04983071982860565,-0.14842034690082073,0.0,0,-0.08416211605072021,-0.1068340977653861,0.0,1,-0.19997578859329224,-0.24545115418732166,0.0,1,-0.05299460515379906,-0.1678336071781814,0.0,1,-0.10590063780546188,-0.1453264970332384,0.0,1,-0.17064301669597626,-0.18299731239676476,0.0,1,-0.17312407493591309,-0.1991148665547371,0.0,25,56,81
+HRM,182,104453,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.12450814247131348,-0.16178669221699238,-0.17430537939071655,0.0,0.0,0,-0.11437220871448517,-0.1322017451748252,0.0,0,-0.12225549668073654,-0.14611992705613375,0.0,0,-0.045300692319869995,-0.10121566895395517,0.0,0,-0.013119327835738659,-0.056110141216777265,0.0,0,0.03415445610880852,-0.02362125040963292,2.0,0,0.06314865499734879,0.009949691913789138,4.0,1,0.011428100988268852,-0.11326785082928836,2.0,1,-0.07970966398715973,-0.11952304560691118,0.0,1,-0.1530984342098236,-0.22058663703501225,0.0,1,-0.12450814247131348,-0.16178669221699238,0.0,26,55,81
+HRM,183,229568,never,0,0,,,,,False,0.7283950448036194,-0.004357452504336834,-0.055520477588288486,-0.07119819521903992,0.0,0.0,0,-0.09091369807720184,-0.10904592461884022,0.0,0,-0.1400148570537567,-0.1634688712656498,0.0,0,-0.08596312999725342,-0.15736930258572102,0.0,0,-0.13185936212539673,-0.17229377664625645,0.0,0,-0.09253676235675812,-0.12056539487093687,0.0,0,-0.020427893847227097,-0.056501845829188824,0.0,0,-0.01716674119234085,-0.056784728076308966,0.0,0,0.03958773985505104,-0.04364197119139135,1.0,0,-0.06611645966768265,-0.09066090453416109,0.0,0,-0.004357452504336834,-0.055520477588288486,0.0,26,55,81
+HRM,184,346939,never,0,0,,,,,False,0.8641975522041321,0.08217284828424454,0.014048149576410651,-0.02753277448937297,4.0,0.22251629456877708,0,-0.06880050152540207,-0.0917904069647193,0.0,0,-0.16072767972946167,-0.19450311921536922,0.0,0,-0.16855311393737793,-0.1886917855590582,0.0,0,-0.12551189959049225,-0.14814413897693157,0.0,0,-0.039064228534698486,-0.06265170918777585,0.0,0,0.004360439255833626,-0.023867854615673423,2.0,0,0.018966974690556526,-0.02495274561078986,2.0,0,0.003390427678823471,-0.022042994698495022,3.0,0,-0.021635008975863457,-0.04996671946719289,0.0,0,0.08217284828424454,0.014048149576410651,4.0,26,55,81
+HRM,185,234087,flaky_then_stable,7,3,7812,6000,20832,16000,True,1.0,-0.1482323259115219,-0.2040234636515379,-0.22260429710149765,0.0,0.0,0,-0.09413602948188782,-0.10382255353033543,0.0,0,-0.12725482881069183,-0.1486308742314577,0.0,1,-0.22728735208511353,-0.2710073534399271,0.0,1,-0.20898154377937317,-0.25495194643735886,0.0,1,-0.16345137357711792,-0.19355914555490017,0.0,1,-0.22999268770217896,-0.2695341818034649,0.0,0,-0.006937983445823193,-0.023020317894406617,0.0,1,-0.10630705952644348,-0.15140025038272142,0.0,1,-0.07389234006404877,-0.20219090022146702,0.0,1,-0.1482323259115219,-0.2040234636515379,0.0,26,55,81
+HRM,186,323979,transient_or_regressed,1,2,20832,16000,,,False,0.6419752836227417,0.17330187559127808,0.03573188913287595,-0.011620799195952713,5.0,0.336869397200644,0,-0.11973109841346741,-0.13213435746729374,0.0,0,-0.08129694312810898,-0.14644628390669823,0.0,0,-0.11879806220531464,-0.15502885915338993,0.0,0,0.03011438623070717,-0.09765096986666322,1.0,0,-0.018638506531715393,-0.042023558635264635,0.0,0,-0.01680292934179306,-0.029873012797906995,0.0,0,0.004211197607219219,-0.021999718621373177,1.0,1,-0.07932465523481369,-0.11792378686368465,0.0,0,0.0409536138176918,-0.02990708240395179,1.0,0,0.17330187559127808,0.03573188913287595,5.0,24,57,81
+HRM,187,164256,flaky_then_stable,4,3,15624,12000,26040,20000,True,1.0,-0.12400086969137192,-0.17084590811282396,-0.18918225914239883,0.0,0.0,0,-0.10019183158874512,-0.11434285808354616,0.0,0,-0.12880629301071167,-0.1521625742316246,0.0,0,-0.10505320876836777,-0.1707219621166587,0.0,0,-0.13691960275173187,-0.15738226659595966,0.0,0,-0.08536341786384583,-0.1160125881433487,0.0,1,-0.2340897172689438,-0.25671240501105785,0.0,1,-0.10397063940763474,-0.18032481893897057,0.0,1,-0.14514751732349396,-0.16483163833618164,0.0,0,-0.0769418478012085,-0.09207967109978199,0.0,1,-0.12400086969137192,-0.17084590811282396,0.0,24,57,81
+HRM,188,331948,never,0,0,,,,,False,0.5802469253540039,0.010045958682894707,-0.01534153742250055,-0.032044604420661926,2.0,0.014471492730081081,0,-0.09311169385910034,-0.12630764581263065,0.0,0,-0.1263648122549057,-0.16367122158408165,0.0,0,-0.09635819494724274,-0.14962440729141235,0.0,0,0.08021800220012665,-0.05108214006759226,1.0,0,0.06789219379425049,0.014116373844444752,5.0,0,0.02515881136059761,-0.03738573007285595,1.0,0,0.044594667851924896,-0.021088123321533203,2.0,0,0.13170498609542847,-0.027029102842789143,1.0,0,0.0535428449511528,-0.026868028449825943,2.0,0,0.010045958682894707,-0.01534153742250055,2.0,25,56,81
+HRM,189,250737,flaky_then_stable,6,3,10416,8000,18228,14000,True,1.0,-0.19206134974956512,-0.21555672958493233,-0.22621388360857964,0.0,0.0,0,-0.09508391469717026,-0.11347064469009638,0.0,0,-0.16816136240959167,-0.19508690759539604,0.0,0,-0.11645572632551193,-0.16880165319889784,0.0,1,-0.24807551503181458,-0.2706482894718647,0.0,1,-0.23135855793952942,-0.2509997561573982,0.0,0,0.022667691111564636,-0.018768396752420813,3.0,1,-0.06777819991111755,-0.16833041794598103,0.0,1,-0.041244834661483765,-0.13351775985211134,0.0,1,-0.12458490580320358,-0.18181970063596964,0.0,1,-0.19206134974956512,-0.21555672958493233,0.0,25,56,81
+HRM,190,82690,never,0,0,,,,,False,0.7777777910232544,-0.025566188618540764,-0.046084248227998614,-0.06097535230219364,0.0,0.0,0,-0.08449015021324158,-0.10497305262833834,0.0,0,-0.1698114275932312,-0.1909757163375616,0.0,0,-0.11531095951795578,-0.14888800214976072,0.0,0,-0.024146512150764465,-0.061487069353461266,0.0,0,0.14799553155899048,0.07251855681533925,6.0,0,0.10015510022640228,0.032769616082077846,7.0,0,0.005780469626188278,-0.03979689232073724,1.0,0,0.13101953268051147,-0.022183627355843782,1.0,0,0.06035258620977402,-0.045373853761702776,1.0,0,-0.025566188618540764,-0.046084248227998614,0.0,25,56,81
+HRM,191,23822,always,10,0,2604,2000,2604,2000,True,1.0,-0.11930739134550095,-0.17982810456305742,-0.22951463237404823,0.0,0.0,1,-0.09994520992040634,-0.1866961009800434,0.0,1,-0.18374989926815033,-0.19784855283796787,0.0,1,-0.20379027724266052,-0.3128388077020645,0.0,1,-0.13687889277935028,-0.25592612475156784,0.0,1,-0.1116228699684143,-0.23605242744088173,0.0,1,-0.2090590000152588,-0.27362578734755516,0.0,1,-0.10238392651081085,-0.21089817211031914,0.0,1,-0.08828455209732056,-0.166483411565423,0.0,1,-0.14343929290771484,-0.19759582355618477,0.0,1,-0.11930739134550095,-0.17982810456305742,0.0,35,46,81
+HRM,192,35695,never,0,0,,,,,False,0.6419752836227417,0.09558531641960144,0.018258162424899638,-0.027098213089630008,4.0,0.25445815175771713,0,-0.08610032498836517,-0.11587607394903898,0.0,0,-0.15376341342926025,-0.17310957051813602,0.0,0,-0.09002169221639633,-0.1997231924906373,0.0,0,-0.10878237336874008,-0.16992625314742327,0.0,0,-0.10734368860721588,-0.1194630665704608,0.0,0,-0.09116490930318832,-0.11329765897244215,0.0,0,-0.05607964098453522,-0.07674691267311573,0.0,0,-0.04986118525266647,-0.1013959376141429,0.0,0,-0.04311947152018547,-0.070018220692873,0.0,0,0.09558531641960144,0.018258162424899638,4.0,25,56,81
+HRM,193,275783,never,0,0,,,,,False,0.4691357910633087,0.07946502417325974,-0.014291270985268056,-0.05245622433722019,3.0,0.10046897269785404,0,-0.06296781450510025,-0.11220771074295044,0.0,0,-0.1208924651145935,-0.1871270388364792,0.0,0,-0.059449922293424606,-0.16337826801463962,0.0,0,-0.12150930613279343,-0.1346642803400755,0.0,0,-0.048038631677627563,-0.1122618168592453,0.0,0,-0.06517532467842102,-0.09447084553539753,0.0,0,-0.026063766330480576,-0.04774903948418796,0.0,0,0.043097030371427536,-0.03511598752811551,1.0,0,-0.03971725329756737,-0.08603507792577147,0.0,0,0.07946502417325974,-0.014291270985268056,3.0,24,57,81
+HRM,194,404388,flaky_then_stable,7,3,7812,6000,15624,12000,True,1.0,-0.14980335533618927,-0.20497536659240723,-0.22569596767425537,0.0,0.0,0,-0.049613773822784424,-0.0886590713635087,0.0,0,-0.10067698359489441,-0.17357679456472397,0.0,1,-0.07042418420314789,-0.21730465441942215,0.0,1,-0.1818668246269226,-0.2428712137043476,0.0,0,0.0714399591088295,0.03928172588348389,8.0,1,-0.1848113238811493,-0.24721986800432205,0.0,1,-0.09864851832389832,-0.15852334164083004,0.0,1,-0.06114855408668518,-0.1301915030926466,0.0,1,-0.17468541860580444,-0.24064870364964008,0.0,1,-0.14980335533618927,-0.20497536659240723,0.0,25,56,81
+HRM,195,27920,stable_learned,9,1,5208,4000,5208,4000,True,1.0,-0.15460056066513062,-0.17763156816363335,-0.18766851350665092,0.0,0.0,0,-0.07595494389533997,-0.09114549495279789,0.0,1,-0.13649699091911316,-0.19908851571381092,0.0,1,-0.20963367819786072,-0.2849270161241293,0.0,1,-0.08556681871414185,-0.18721903860569,0.0,1,-0.19744902849197388,-0.21854975074529648,0.0,1,-0.2625392973423004,-0.2847689539194107,0.0,1,-0.13548967242240906,-0.18295821361243725,0.0,1,-0.12096729129552841,-0.15935670118778944,0.0,1,-0.060747843235731125,-0.1604302260093391,0.0,1,-0.15460056066513062,-0.17763156816363335,0.0,25,56,81
+HRM,196,310696,never,0,0,,,,,False,0.7037037014961243,-0.0009177908068522811,-0.04174174640502315,-0.058157630264759064,0.0,0.0,0,-0.1268344521522522,-0.13030785135924816,0.0,0,-0.13511145114898682,-0.18486826494336128,0.0,0,-0.14554943144321442,-0.16515114530920982,0.0,0,-0.1451030820608139,-0.18507548421621323,0.0,0,-0.09075042605400085,-0.11300296615809202,0.0,0,-0.030979756265878677,-0.07650613645091653,0.0,0,-0.03382347524166107,-0.05855458090081811,0.0,0,-0.02131369709968567,-0.08680040761828423,0.0,0,-0.05057881772518158,-0.06783316051587462,0.0,0,-0.0009177908068522811,-0.04174174640502315,0.0,25,56,81
+HRM,197,33494,never,0,0,,,,,False,0.5925925970077515,0.10189899057149887,-0.010478858137503266,-0.03801242681220174,1.0,0.10189899057149887,0,-0.11622209846973419,-0.12516444083303213,0.0,0,-0.14292773604393005,-0.17698576115071774,0.0,0,-0.1042480617761612,-0.1440787110477686,0.0,0,-0.10007929801940918,-0.17688005790114403,0.0,0,-0.09879644960165024,-0.11661586351692677,0.0,0,-0.027839025482535362,-0.08720170869491994,0.0,0,-0.02670908533036709,-0.04935981682501733,0.0,0,-0.011907447129487991,-0.05242471117526293,0.0,0,-0.04182637482881546,-0.06552507728338242,0.0,0,0.10189899057149887,-0.010478858137503266,1.0,25,56,81
+HRM,198,410906,never,0,0,,,,,False,0.5061728358268738,0.026934418827295303,-0.07609957316890359,-0.09403224103152752,1.0,0.026934418827295303,0,-0.07314040511846542,-0.11043815780431032,0.0,0,-0.15597230195999146,-0.19095703214406967,0.0,0,-0.0755276158452034,-0.17792331520467997,0.0,0,-0.09859734773635864,-0.1545091513544321,0.0,0,-0.03407026454806328,-0.1198471118696034,0.0,0,-0.08982524275779724,-0.10785479750484228,0.0,0,-0.05853353068232536,-0.07034991355612874,0.0,0,-0.07246488332748413,-0.10422022920101881,0.0,0,-0.03035527467727661,-0.07069143606349826,0.0,0,0.026934418827295303,-0.07609957316890359,1.0,25,56,81
+HRM,199,145476,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.189981147646904,-0.23069869354367256,-0.24639302119612694,0.0,0.0,0,-0.1006317064166069,-0.11752456147223711,0.0,0,-0.11285006999969482,-0.1343475878238678,0.0,0,-0.11223842203617096,-0.1409128662198782,0.0,0,-0.09912638366222382,-0.1332598803564906,0.0,0,0.07319638133049011,-0.0533026996999979,1.0,0,-0.002463705837726593,-0.050936374987941235,0.0,1,-0.17272250354290009,-0.1900652777403593,0.0,1,-0.11018886417150497,-0.15566694736480713,0.0,1,-0.18034689128398895,-0.24566230736672878,0.0,1,-0.189981147646904,-0.23069869354367256,0.0,25,56,81
+HRM,200,333824,stable_learned,7,1,10416,8000,10416,8000,True,1.0,-0.1794012486934662,-0.21649944968521595,-0.23973802477121353,0.0,0.0,0,-0.10647876560688019,-0.11440763808786869,0.0,0,-0.13492116332054138,-0.1669558472931385,0.0,0,-0.03933314234018326,-0.1676346594467759,0.0,1,-0.1893991082906723,-0.2105167843401432,0.0,1,-0.1863744854927063,-0.2145744040608406,0.0,1,-0.1990235447883606,-0.24226926267147064,0.0,1,-0.16213631629943848,-0.19440246373414993,0.0,1,-0.13298553228378296,-0.14995984360575676,0.0,1,-0.2319619506597519,-0.2543411888182163,0.0,1,-0.1794012486934662,-0.21649944968521595,0.0,22,59,81
+HRM,201,82050,never,0,0,,,,,False,0.5679012537002563,0.11828157305717468,0.015981808479409665,-0.0317619132110849,5.0,0.25796508556231856,0,-0.12339252233505249,-0.13159438967704773,0.0,0,-0.10783930867910385,-0.1598616885021329,0.0,0,-0.14321443438529968,-0.17700434103608131,0.0,0,-0.06801190972328186,-0.09628321137279272,0.0,0,0.09284385293722153,-0.029019049718044698,1.0,0,0.006650381721556187,-0.02678933914285153,1.0,0,0.02834743820130825,0.005273305861919653,6.0,0,0.07442766427993774,0.026878277472860646,7.0,0,-0.001708800671622157,-0.043650540144881234,0.0,0,0.11828157305717468,0.015981808479409665,5.0,25,56,81
+HRM,202,114962,flaky_then_stable,7,3,7812,6000,26040,20000,True,1.0,-0.10850811004638672,-0.1681048534810543,-0.19648455828428268,0.0,0.0,0,-0.10270418971776962,-0.11545736156404018,0.0,0,-0.09739480912685394,-0.1752065233886242,0.0,1,-0.20774662494659424,-0.2649378590285778,0.0,1,-0.16878604888916016,-0.22247789055109024,0.0,1,-0.20236746966838837,-0.23874418064951897,0.0,1,-0.14589324593544006,-0.18932639993727207,0.0,1,-0.004695854149758816,-0.12201970058958977,0.0,1,-0.1252216100692749,-0.15528935752809048,0.0,0,0.12066476047039032,-0.0047394438879564404,2.0,1,-0.10850811004638672,-0.1681048534810543,0.0,25,56,81
+HRM,203,6899,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.1339377760887146,-0.16070245765149593,-0.17054979130625725,0.0,0.0,0,-0.09162430465221405,-0.10765458643436432,0.0,0,-0.12195296585559845,-0.16583658568561077,0.0,0,-0.14962969720363617,-0.1672759484499693,0.0,0,-0.05331172049045563,-0.09499515779316425,0.0,1,-0.16185259819030762,-0.2079038191586733,0.0,1,-0.1303543895483017,-0.19169924035668373,0.0,1,0.04377192258834839,-0.11668294295668602,1.0,1,-0.09224921464920044,-0.12947272695600986,0.0,1,-0.10236494988203049,-0.22537683974951506,0.0,1,-0.1339377760887146,-0.16070245765149593,0.0,26,55,81
+HRM,204,237748,transient_or_regressed,1,2,18228,14000,,,False,0.6790123581886292,0.08677016198635101,0.010167380445636809,-0.02292786492034793,4.0,0.1730505032464862,0,-0.09762149304151535,-0.10882292129099369,0.0,0,-0.15453365445137024,-0.17629091441631317,0.0,0,-0.1680869311094284,-0.18575390428304672,0.0,0,-0.12912647426128387,-0.14842474460601807,0.0,0,0.05727067217230797,-0.017205787706188858,3.0,0,0.007310774177312851,-0.022340735769830644,2.0,1,-0.03257312998175621,-0.11642209114506841,0.0,0,0.031125478446483612,-0.03858421044424176,1.0,0,0.001288055907934904,-0.043050692009273916,1.0,0,0.08677016198635101,0.010167380445636809,4.0,26,55,81
+HRM,205,350653,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.13493631780147552,-0.16598410159349442,-0.1866576410830021,0.0,0.0,0,-0.14130404591560364,-0.14648639410734177,0.0,0,-0.1481606364250183,-0.17156987078487873,0.0,0,-0.0820455253124237,-0.13394066132605076,0.0,0,0.02541496232151985,-0.06756189791485667,1.0,0,0.02755586802959442,-0.049352770671248436,1.0,0,0.024185776710510254,-0.012961489788722247,1.0,1,-0.08005372434854507,-0.13948646560311317,0.0,1,-0.07157253473997116,-0.11165957059711218,0.0,1,-0.19833707809448242,-0.24123808555305004,0.0,1,-0.13493631780147552,-0.16598410159349442,0.0,26,55,81
+HRM,206,228664,never,0,0,,,,,False,0.654321014881134,0.08339475095272064,-0.04712798143737018,-0.07916798442602158,1.0,0.08339475095272064,0,-0.06719226390123367,-0.09943375643342733,0.0,0,-0.137693852186203,-0.1858236137777567,0.0,0,-0.1448410600423813,-0.1873540636152029,0.0,0,-0.04915118217468262,-0.09441810194402933,0.0,0,0.014918582513928413,-0.011477384017780423,2.0,0,-0.0011185731273144484,-0.04197533041588031,0.0,0,0.0260721817612648,-0.044662884436547756,1.0,0,0.01503931637853384,-0.019582156382966787,3.0,0,0.031028976663947105,-0.0389218598138541,1.0,0,0.08339475095272064,-0.04712798143737018,1.0,26,55,81
+HRM,207,108128,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.16501203179359436,-0.19600357487797737,-0.21393823251128197,0.0,0.0,0,-0.10614348948001862,-0.11909490451216698,0.0,0,-0.1028948649764061,-0.18002466391772032,0.0,0,-0.09101417660713196,-0.13978907745331526,0.0,0,0.0019080732017755508,-0.046565725991968066,1.0,0,0.09654897451400757,0.03349778847768903,7.0,0,0.060253746807575226,0.018191152019426227,5.0,1,-0.0906023383140564,-0.1627746783196926,0.0,1,-0.09159097075462341,-0.12918226514011621,0.0,1,-0.013397402130067348,-0.20393821119796485,0.0,1,-0.16501203179359436,-0.19600357487797737,0.0,26,55,81
+HRM,208,237555,never,0,0,,,,,False,0.5555555820465088,0.007612365763634443,-0.044177901989314705,-0.06409547384828329,1.0,0.007612365763634443,0,-0.07043327391147614,-0.10339026898145676,0.0,0,-0.10417153686285019,-0.15620617661625147,0.0,0,-0.1531202495098114,-0.18673867546021938,0.0,0,0.0018802920822054148,-0.1151758921041619,1.0,0,-0.011705229990184307,-0.059360083541832864,0.0,0,0.044572778046131134,-0.016030388069339097,2.0,0,0.030044175684452057,0.0015343111008405685,5.0,0,-0.04893241450190544,-0.06258944747969508,0.0,0,0.00970270112156868,-0.0474946980830282,1.0,0,0.007612365763634443,-0.044177901989314705,1.0,24,57,81
+HRM,209,415031,transient_or_regressed,1,2,13020,10000,,,False,0.790123462677002,-0.011238008737564087,-0.045954025350511074,-0.06606322154402733,0.0,0.0,0,-0.09855695068836212,-0.11407413706183434,0.0,0,-0.18077817559242249,-0.19626306183636189,0.0,0,-0.10050997138023376,-0.171446081250906,0.0,0,-0.03377485275268555,-0.12912815902382135,0.0,1,-0.18000337481498718,-0.22468610666692257,0.0,0,-0.021148741245269775,-0.057655069045722485,0.0,0,0.02801898494362831,-0.028036867268383503,2.0,0,-0.060162704437971115,-0.07615422504022717,0.0,0,-0.04784339666366577,-0.08226949023082852,0.0,0,-0.011238008737564087,-0.045954025350511074,0.0,26,55,81
+HRM,210,346143,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.17223934829235077,-0.21097721345722675,-0.2280496470630169,0.0,0.0,0,-0.09281596541404724,-0.10699704568833113,0.0,0,-0.12036047875881195,-0.16221360489726067,0.0,0,-0.09159984439611435,-0.14172197226434946,0.0,0,-0.07112683355808258,-0.11683815810829401,0.0,0,0.017658857628703117,-0.013219729055890639,4.0,0,0.1190657764673233,0.01898943813284859,4.0,1,-0.14372549951076508,-0.20850301906466484,0.0,1,-0.06180737167596817,-0.12366718612611294,0.0,1,-0.07414146512746811,-0.17166088428348303,0.0,1,-0.17223934829235077,-0.21097721345722675,0.0,26,55,81
+HRM,211,348811,flaky_then_stable,7,3,7812,6000,26040,20000,True,1.0,-0.18410712480545044,-0.19431923888623714,-0.20333118364214897,0.0,0.0,0,-0.08700527995824814,-0.11817478016018867,0.0,0,-0.15089499950408936,-0.1754827369004488,0.0,1,-0.2635757029056549,-0.30457618460059166,0.0,1,-0.1615934818983078,-0.19762343727052212,0.0,1,-0.1994624137878418,-0.22261936590075493,0.0,1,-0.20076709985733032,-0.22779029607772827,0.0,1,-0.05426889285445213,-0.12205173587426543,0.0,1,-0.12279098480939865,-0.16735476907342672,0.0,0,0.042435284703969955,0.005488383278134279,5.0,1,-0.18410712480545044,-0.19431923888623714,0.0,26,55,81
+HRM,212,227692,never,0,0,,,,,False,0.5925925970077515,0.09743288159370422,-0.016740572173148394,-0.06473333016037941,3.0,0.1449727900326252,0,-0.11337151378393173,-0.11785734724253416,0.0,0,-0.10066443681716919,-0.15870381146669388,0.0,0,-0.09792395681142807,-0.1304362751543522,0.0,0,-0.10474273562431335,-0.15173695236444473,0.0,0,-0.029487047344446182,-0.049312752671539783,0.0,0,0.023732323199510574,-0.017874662473332137,2.0,0,0.09250816702842712,-0.013856010511517525,2.0,0,0.0840623527765274,0.02809983107727021,8.0,0,0.06880778074264526,-0.03904690663330257,1.0,0,0.09743288159370422,-0.016740572173148394,3.0,21,60,81
+HRM,213,29728,flaky_then_stable,3,3,15624,12000,26040,20000,True,1.0,-0.0180442463606596,-0.07509582652710378,-0.08890468627214432,0.0,0.0,0,-0.09737974405288696,-0.10542266350239515,0.0,0,-0.11116410791873932,-0.1416052682325244,0.0,0,-0.09327512979507446,-0.12952921073883772,0.0,0,-0.08908715099096298,-0.1379631171002984,0.0,0,-0.02695394679903984,-0.07930179545655847,0.0,1,-0.19455179572105408,-0.2400695774704218,0.0,1,-0.07936348766088486,-0.1590019604191184,0.0,0,0.009671850129961967,-0.03799389532650821,1.0,0,-0.026156581938266754,-0.06526494305580854,0.0,1,-0.0180442463606596,-0.07509582652710378,0.0,24,57,81
+HRM,214,221880,flaky_then_stable,7,3,7812,6000,13020,10000,True,1.0,-0.1576349139213562,-0.2007406409829855,-0.22511296719312668,0.0,0.0,0,-0.08933086693286896,-0.1068813381716609,0.0,0,-0.1528424471616745,-0.18208311311900616,0.0,1,-0.14176392555236816,-0.20779522135853767,0.0,0,-0.09494201838970184,-0.12403484061360359,0.0,1,-0.08291961252689362,-0.15093356184661388,0.0,1,-0.15334287285804749,-0.18849403969943523,0.0,1,-0.05497422069311142,-0.1238362705335021,0.0,1,-0.08890955150127411,-0.1326342700049281,0.0,1,-0.13664022088050842,-0.16817590594291687,0.0,1,-0.1576349139213562,-0.2007406409829855,0.0,26,55,81
+HRM,215,202783,stable_learned,7,1,10416,8000,10416,8000,True,1.0,-0.1660497486591339,-0.20248823426663876,-0.22236012294888496,0.0,0.0,0,-0.09276615083217621,-0.10575955174863338,0.0,0,-0.13674139976501465,-0.18115700595080853,0.0,0,-0.11707819998264313,-0.18894661776721478,0.0,1,-0.2152186632156372,-0.2410553339868784,0.0,1,-0.2041991800069809,-0.23219319991767406,0.0,1,-0.20047710835933685,-0.23588314466178417,0.0,1,0.10571852326393127,-0.020104989293031394,1.0,1,0.020002473145723343,-0.1289809043519199,1.0,1,-0.15838292241096497,-0.21591633558273315,0.0,1,-0.1660497486591339,-0.20248823426663876,0.0,25,56,81
+HRM,216,39207,never,0,0,,,,,False,0.604938268661499,0.048097625374794006,-0.01413808812503703,-0.047010380774736404,2.0,0.08484005182981491,0,-0.06804661452770233,-0.10787758696824312,0.0,0,-0.10870663821697235,-0.16962964832782745,0.0,0,-0.05807700753211975,-0.15621371008455753,0.0,0,-0.12684769928455353,-0.1644620280712843,0.0,0,-0.021382397040724754,-0.10012026294134557,0.0,0,-0.00710125919431448,-0.06691553082782775,0.0,0,-0.01624995656311512,-0.03948318143375218,0.0,0,-0.003602872369810939,-0.07105929011595435,0.0,0,2.418706935713999e-05,-0.06473925282853088,1.0,0,0.048097625374794006,-0.01413808812503703,2.0,24,57,81
+HRM,217,74180,never,0,0,,,,,False,0.5308641791343689,0.014378267340362072,-0.03965394932311028,-0.06254274304956198,1.0,0.014378267340362072,0,-0.10344809293746948,-0.11125221475958824,0.0,0,-0.14568112790584564,-0.1691600289195776,0.0,0,-0.09222682565450668,-0.16768492572009563,0.0,0,-0.05179779231548309,-0.15353061817586422,0.0,0,-0.06363127380609512,-0.12250720523297787,0.0,0,-0.005924951285123825,-0.05887153139337897,0.0,0,-0.04352279007434845,-0.06238211411982775,0.0,0,0.024050407111644745,-0.057676803320646286,1.0,0,-0.05620887875556946,-0.07826659921556711,0.0,0,0.014378267340362072,-0.03965394932311028,1.0,26,55,81
+HRM,218,117122,never,0,0,,,,,False,0.5555555820465088,0.09561511874198914,-0.03480727272108197,-0.06945107504725456,1.0,0.09561511874198914,0,-0.10814142227172852,-0.11546940729022026,0.0,0,-0.09022317826747894,-0.16188322566449642,0.0,0,-0.0858263298869133,-0.15854021161794662,0.0,0,-0.08774672448635101,-0.1685229018330574,0.0,0,-0.09125232696533203,-0.11500284913927317,0.0,0,-0.06964094191789627,-0.10303815267980099,0.0,0,-0.03688628971576691,-0.10789653426036239,0.0,0,0.02332233637571335,-0.0748808654025197,1.0,0,0.09483440965414047,-0.02928965084720403,2.0,0,0.09561511874198914,-0.03480727272108197,1.0,22,59,81
+HRM,219,37017,flaky_then_stable,4,3,15624,12000,23436,18000,True,1.0,-0.13403478264808655,-0.18622020073235035,-0.21139803528785706,0.0,0.0,0,-0.10574042052030563,-0.13091248646378517,0.0,0,-0.12703803181648254,-0.16716214083135128,0.0,0,-0.08400150388479233,-0.14972775150090456,0.0,0,-0.06557256728410721,-0.11360779590904713,0.0,0,-0.019727272912859917,-0.07002014550380409,0.0,1,-0.21924442052841187,-0.2559056095778942,0.0,1,-0.05203891545534134,-0.12524176575243473,0.0,0,-0.011943748220801353,-0.05071761761792004,0.0,1,-0.19533169269561768,-0.22377870231866837,0.0,1,-0.13403478264808655,-0.18622020073235035,0.0,24,57,81
+HRM,220,300132,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.1559721827507019,-0.20536543987691402,-0.22909849137067795,0.0,0.0,0,-0.09051340818405151,-0.1320209987461567,0.0,0,-0.08660053461790085,-0.1647039009258151,0.0,0,-0.0771249383687973,-0.1355450339615345,0.0,0,0.013077078387141228,-0.06084198574535549,1.0,0,0.09095209091901779,0.024493115255609155,5.0,1,-0.1697869747877121,-0.20843740180134773,0.0,1,-0.07225259393453598,-0.153579399921,0.0,1,-0.09850418567657471,-0.13548859301954508,0.0,1,-0.1501835137605667,-0.2020343691110611,0.0,1,-0.1559721827507019,-0.20536543987691402,0.0,28,53,81
+HRM,221,212419,flaky_then_stable,5,3,13020,10000,26040,20000,True,1.0,-0.17344078421592712,-0.20465830527245998,-0.2146916799247265,0.0,0.0,0,-0.05966741964221001,-0.10992879094555974,0.0,0,-0.10253896564245224,-0.14231826178729534,0.0,0,-0.06986531615257263,-0.13004956021904945,0.0,0,-0.010354475118219852,-0.08203757042065263,0.0,1,-0.1917427033185959,-0.21958515793085098,0.0,1,-0.14528104662895203,-0.24723536893725395,0.0,1,-0.1359390765428543,-0.18658258579671383,0.0,1,-0.056628551334142685,-0.16633940814062953,0.0,0,0.037031568586826324,-0.007483185967430472,2.0,1,-0.17344078421592712,-0.20465830527245998,0.0,27,54,81
+HRM,222,283218,flaky_then_stable,6,3,10416,8000,15624,12000,True,1.0,-0.17425541579723358,-0.2150886505842209,-0.23318714275956154,0.0,0.0,0,-0.11102068424224854,-0.12624312471598387,0.0,0,-0.06757184863090515,-0.14822345413267612,0.0,0,-0.1000094935297966,-0.13983731344342232,0.0,1,-0.18029353022575378,-0.20682593807578087,0.0,0,0.19247999787330627,-0.017469346174038947,2.0,1,-0.23884151875972748,-0.264403996989131,0.0,1,-0.037102315574884415,-0.1360361333936453,0.0,1,-0.05430225282907486,-0.16752142179757357,0.0,1,-0.18730700016021729,-0.2254333719611168,0.0,1,-0.17425541579723358,-0.2150886505842209,0.0,26,55,81
+HRM,223,28177,flaky_then_stable,5,3,13020,10000,18228,14000,True,1.0,-0.12469634413719177,-0.18908443115651608,-0.22089985758066177,0.0,0.0,0,-0.12266513705253601,-0.136327238753438,0.0,0,-0.16814804077148438,-0.19049014896154404,0.0,0,-0.10871422290802002,-0.18134526535868645,0.0,0,-0.056180551648139954,-0.11043403018265963,0.0,1,-0.14045920968055725,-0.21845915354788303,0.0,0,-0.003120861481875181,-0.03718173335073516,0.0,1,-0.056571703404188156,-0.12131601804867387,0.0,1,-0.07222075760364532,-0.10469828825443983,0.0,1,-0.12451797723770142,-0.19245790503919125,0.0,1,-0.12469634413719177,-0.18908443115651608,0.0,25,56,81
+HRM,224,4206,stable_learned,2,1,23436,18000,23436,18000,True,1.0,-0.14195170998573303,-0.18850757740437984,-0.20180845633149147,0.0,0.0,0,-0.07580150663852692,-0.11509564705193043,0.0,0,-0.1612311452627182,-0.19461726397275925,0.0,0,-0.16917747259140015,-0.17754491232335567,0.0,0,-0.05065559968352318,-0.1290820143185556,0.0,0,0.0005942952702753246,-0.05176957493677037,1.0,0,0.047591738402843475,-0.042949752300046384,2.0,0,-0.0009128667297773063,-0.05118304947245633,0.0,0,0.01004410907626152,-0.06810749840224162,1.0,1,-0.12536466121673584,-0.23040425032377243,0.0,1,-0.14195170998573303,-0.18850757740437984,0.0,25,56,81
+HRM,225,62849,transient_or_regressed,3,2,18228,14000,,,False,0.7160493731498718,0.006593815982341766,-0.05635866615921259,-0.08004133589565754,1.0,0.006593815982341766,0,-0.09993651509284973,-0.12454732786864042,0.0,0,-0.10950720310211182,-0.1549777863547206,0.0,0,-0.06393376737833023,-0.14849846623837948,0.0,0,-0.0342324823141098,-0.08282398665323853,0.0,0,0.03848709911108017,-0.047894759103655815,2.0,0,-0.0034943120554089546,-0.037847575382329524,0.0,1,-0.18179230391979218,-0.20289369486272335,0.0,1,-0.09130053222179413,-0.1414635991677642,0.0,1,-0.06324481219053268,-0.2195883272215724,0.0,0,0.006593815982341766,-0.05635866615921259,1.0,26,55,81
+HRM,226,20504,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.15365126729011536,-0.18964270688593388,-0.20555392652750015,0.0,0.0,0,-0.11116956174373627,-0.12020908016711473,0.0,0,-0.09516247361898422,-0.13846848905086517,0.0,0,-0.05986465513706207,-0.13942377362400293,0.0,0,-0.13646337389945984,-0.16555228643119335,0.0,0,-0.07720266282558441,-0.11239214241504669,0.0,0,0.0141669400036335,-0.05774004713748582,1.0,1,-0.1267877221107483,-0.1435103453695774,0.0,1,0.10096763074398041,-0.06895686639472842,1.0,1,-0.14372135698795319,-0.2257357705384493,0.0,1,-0.15365126729011536,-0.18964270688593388,0.0,23,58,81
+HRM,227,102114,transient_or_regressed,1,2,20832,16000,,,False,0.5555555820465088,0.11331330984830856,0.002953248331323266,-0.054590728133916855,3.0,0.2478269822895527,0,-0.07882952690124512,-0.1176359374076128,0.0,0,-0.15767739713191986,-0.18140659108757973,0.0,0,-0.12584590911865234,-0.1542378719896078,0.0,0,-0.13332131505012512,-0.17657445557415485,0.0,0,-0.03922625631093979,-0.09424645872786641,0.0,0,0.0005954645457677543,-0.08217224488180364,1.0,0,-0.027821920812129974,-0.04884683433920145,0.0,1,-0.12023039907217026,-0.17697339970618486,0.0,0,-0.0044072768650949,-0.08093594916863367,0.0,0,0.11331330984830856,0.002953248331323266,3.0,23,58,81
+HRM,228,394875,never,0,0,,,,,False,0.6172839403152466,-0.028658617287874222,-0.0678517553023994,-0.09112328477203846,0.0,0.0,0,-0.11712637543678284,-0.1271855877712369,0.0,0,-0.08757104724645615,-0.13716319669038057,0.0,0,-0.056903645396232605,-0.12894056178629398,0.0,0,-0.022817812860012054,-0.08470625104382634,0.0,0,0.023981263861060143,-0.05572234315332025,1.0,0,0.033171653747558594,-0.01494785671820864,2.0,0,0.004346497822552919,-0.02874814992537722,2.0,0,0.06701937317848206,-0.047119594644755125,1.0,0,0.052204690873622894,-0.02762566739693284,1.0,0,-0.028658617287874222,-0.0678517553023994,0.0,26,55,81
+HRM,229,389068,flaky_then_stable,7,3,7812,6000,18228,14000,True,1.0,-0.11744418740272522,-0.19265897944569588,-0.2216796539723873,0.0,0.0,0,-0.06641635298728943,-0.09284999128431082,0.0,0,-0.19633018970489502,-0.20518033392727375,0.0,1,-0.19464123249053955,-0.22277973033487797,0.0,1,-0.20278489589691162,-0.22939402796328068,0.0,1,-0.1516304761171341,-0.16493872925639153,0.0,0,0.04152420908212662,-0.041060465620830655,1.0,1,-0.050690725445747375,-0.07885933946818113,0.0,1,-0.09418540447950363,-0.1427624700590968,0.0,1,-0.0900409072637558,-0.11634466052055359,0.0,1,-0.11744418740272522,-0.19265897944569588,0.0,25,56,81
+HRM,230,236824,transient_or_regressed,5,2,10416,8000,,,False,0.8765432238578796,0.17368638515472412,0.014628190023358911,-0.02744967071339488,2.0,0.23671980947256088,0,-0.11857235431671143,-0.1256159981712699,0.0,0,-0.10093627870082855,-0.1656249389052391,0.0,0,0.010110633447766304,-0.059120599646121264,1.0,1,-0.21901211142539978,-0.22728298045694828,0.0,1,-0.17098525166511536,-0.18942129239439964,0.0,1,-0.13183751702308655,-0.16641064174473286,0.0,1,0.01264173910021782,-0.11295449500903487,1.0,1,-0.0988592654466629,-0.13353570457547903,0.0,0,0.08873026818037033,-0.003154526901198551,3.0,0,0.17368638515472412,0.014628190023358911,2.0,17,64,81
+HRM,231,306831,flaky_then_stable,4,3,13020,10000,20832,16000,True,1.0,-0.15496456623077393,-0.24098528549075127,-0.26648732274770737,0.0,0.0,0,-0.07632023096084595,-0.0923275463283062,0.0,0,-0.13719168305397034,-0.16287346743047237,0.0,0,-0.06902842223644257,-0.11728816665709019,0.0,0,-0.012000310234725475,-0.08797980390954763,0.0,1,-0.15418373048305511,-0.17939471267163754,0.0,0,0.03665394335985184,-0.0419061086140573,1.0,0,-0.018514184281229973,-0.055244172690436244,0.0,1,-0.14702080190181732,-0.17426768317818642,0.0,1,-0.09183044731616974,-0.1984181459993124,0.0,1,-0.15496456623077393,-0.24098528549075127,0.0,28,53,81
+HRM,232,53003,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.181192547082901,-0.20390158519148827,-0.21736717596650124,0.0,0.0,0,-0.07622843980789185,-0.09960311837494373,0.0,0,-0.13326908648014069,-0.1583220437169075,0.0,0,-0.09689708054065704,-0.17446342296898365,0.0,0,-0.008221856318414211,-0.08568810194265097,0.0,0,-0.03587796539068222,-0.08480397239327431,0.0,1,-0.12879455089569092,-0.14722834341228008,0.0,1,-0.0626436248421669,-0.19011918921023607,0.0,1,0.039931684732437134,-0.12913027219474316,1.0,1,-0.25485914945602417,-0.26998817175626755,0.0,1,-0.181192547082901,-0.20390158519148827,0.0,26,55,81
+HRM,233,317778,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.1884777545928955,-0.21059718541800976,-0.22329474985599518,0.0,0.0,0,-0.11614161729812622,-0.13463828526437283,0.0,0,-0.15031583607196808,-0.1808877605944872,0.0,0,-0.024349620565772057,-0.14026617887429893,0.0,0,-0.028045453131198883,-0.09466972202062607,0.0,1,-0.16342434287071228,-0.1890337560325861,0.0,1,-0.18421714007854462,-0.2456967644393444,0.0,1,-0.1257319450378418,-0.15728226117789745,0.0,1,-0.05428747460246086,-0.11305393138900399,0.0,1,-0.12216547131538391,-0.21144742332398891,0.0,1,-0.1884777545928955,-0.21059718541800976,0.0,28,53,81
+HRM,234,62848,transient_or_regressed,3,2,15624,12000,,,False,0.7037037014961243,0.029148705303668976,-0.009959557326510549,-0.03611787362024188,4.0,0.06479503586888313,0,-0.09186704456806183,-0.10793764889240265,0.0,0,-0.13574282824993134,-0.16354255750775337,0.0,0,-0.10089068114757538,-0.18758398666977882,0.0,0,-0.05595148354768753,-0.10016499739140272,0.0,0,-0.05670907720923424,-0.11277009779587388,0.0,1,-0.2044503390789032,-0.2667585965245962,0.0,1,-0.10556550323963165,-0.17192343901842833,0.0,1,-0.12227629125118256,-0.14859733171761036,0.0,0,-0.03285561129450798,-0.06891112122684717,0.0,0,0.029148705303668976,-0.009959557326510549,4.0,26,55,81
+HRM,235,358453,never,0,0,,,,,False,0.5555555820465088,-0.02608739212155342,-0.05671756295487285,-0.0734241921454668,0.0,0.0,0,-0.1017083078622818,-0.12329492811113596,0.0,0,-0.11756660789251328,-0.16907890420407057,0.0,0,-0.05331701785326004,-0.16175230499356985,0.0,0,-0.1126260980963707,-0.17251746449619532,0.0,0,-0.09748288244009018,-0.12499399669468403,0.0,0,-0.02014888823032379,-0.07545124809257686,0.0,0,-0.021255305036902428,-0.05786052974872291,0.0,0,-0.06189631670713425,-0.09289398416876793,0.0,0,-0.06496880948543549,-0.10105719696730375,0.0,0,-0.02608739212155342,-0.05671756295487285,0.0,26,55,81
+HRM,236,330583,stable_learned,1,1,26040,20000,26040,20000,True,1.0,-0.11890506744384766,-0.18006020411849022,-0.20435893908143044,0.0,0.0,0,-0.08895216882228851,-0.11564082466065884,0.0,0,-0.07977046817541122,-0.13198301289230585,0.0,0,-0.15133355557918549,-0.18281421065330505,0.0,0,-0.07717962563037872,-0.1410870086401701,0.0,0,0.07565977424383163,-0.07927220407873392,1.0,0,0.14540529251098633,-0.004951055394485593,3.0,0,0.13845284283161163,0.010312311700545251,3.0,0,0.08409249037504196,-0.06878194771707058,1.0,0,-0.030324822291731834,-0.0594903954770416,0.0,1,-0.11890506744384766,-0.18006020411849022,0.0,26,55,81
+HRM,237,95944,transient_or_regressed,1,2,15624,12000,,,False,0.5308641791343689,0.07343986630439758,-0.014950867858715355,-0.04271007934585214,2.0,0.07507150853052735,0,-0.0552365779876709,-0.11270241439342499,0.0,0,-0.13808001577854156,-0.18144110403954983,0.0,0,-0.19089022278785706,-0.21507089771330357,0.0,0,-0.11633458733558655,-0.15186305157840252,0.0,0,-0.1014510989189148,-0.12286214716732502,0.0,1,-0.21116702258586884,-0.23661919683218002,0.0,0,-0.046319544315338135,-0.07898170687258244,0.0,0,-0.003785282140597701,-0.07546291683684103,0.0,0,-0.03765903413295746,-0.0638240510597825,0.0,0,0.07343986630439758,-0.014950867858715355,2.0,26,55,81
+HRM,238,223602,never,0,0,,,,,False,0.7407407164573669,0.07533316314220428,-0.008872227277606726,-0.06020869221538305,3.0,0.17737190052866936,0,-0.10062997788190842,-0.12671955861151218,0.0,0,-0.15164944529533386,-0.17924410291016102,0.0,0,-0.10183218121528625,-0.15766188502311707,0.0,0,-0.1415986716747284,-0.16970326378941536,0.0,0,-0.11580660939216614,-0.13777372986078262,0.0,0,-0.00998532772064209,-0.07866472424939275,0.0,0,0.007694747298955917,-0.046528749633580446,1.0,0,0.085454061627388,-0.05038193234940991,1.0,0,-0.0012221249053254724,-0.06414019207295496,0.0,0,0.07533316314220428,-0.008872227277606726,3.0,23,58,81
+HRM,239,206587,stable_learned,1,1,26040,20000,26040,20000,True,1.0,-0.1963922679424286,-0.21481991186738014,-0.22501517459750175,0.0,0.0,0,-0.10947119444608688,-0.12912119179964066,0.0,0,-0.13257117569446564,-0.16293167509138584,0.0,0,-0.033424898982048035,-0.13993315026164055,0.0,0,-0.052823614329099655,-0.15301161212846637,0.0,0,-0.10229983180761337,-0.12745796982198954,0.0,0,-0.011975571513175964,-0.08327480312436819,0.0,0,0.09230123460292816,-0.03363121394068003,1.0,0,-0.06715935468673706,-0.0797644667327404,0.0,0,-0.05053693801164627,-0.08616372384130955,0.0,1,-0.1963922679424286,-0.21481991186738014,0.0,24,57,81
+HRM,240,282303,never,0,0,,,,,False,0.6913580298423767,0.06028735265135765,-0.036524567170999944,-0.07729283161461353,2.0,0.07274726033210754,0,-0.08296491205692291,-0.10526157636195421,0.0,0,-0.06189974769949913,-0.15177947049960494,0.0,0,-0.06281460076570511,-0.15559654030948877,0.0,0,-0.08652931451797485,-0.1341294227167964,0.0,0,-0.014625802636146545,-0.06847976706922054,0.0,0,-0.03839808702468872,-0.07407506881281734,0.0,0,0.023852845653891563,-0.060213727876544,1.0,0,0.09644734859466553,0.014872483210638165,4.0,0,0.007617817260324955,-0.01847264519892633,2.0,0,0.06028735265135765,-0.036524567170999944,2.0,26,55,81
+HRM,241,310279,flaky_then_stable,3,5,15624,12000,26040,20000,True,1.0,-0.10404469817876816,-0.16431509982794523,-0.18478363379836082,0.0,0.0,0,-0.1089392825961113,-0.11646378692239523,0.0,0,-0.11929731070995331,-0.14464967511594296,0.0,0,-0.12384301424026489,-0.18304836936295033,0.0,0,-0.09972630441188812,-0.1456934493035078,0.0,0,-0.03379423916339874,-0.07585595641285181,0.0,1,-0.17765258252620697,-0.24163498356938362,0.0,0,0.0026115195360034704,-0.03544291201978922,1.0,1,-0.11285550892353058,-0.1509010586887598,0.0,0,-0.044296734035015106,-0.07769468892365694,0.0,1,-0.10404469817876816,-0.16431509982794523,0.0,24,57,81
+HRM,242,59598,stable_learned,1,1,26040,20000,26040,20000,True,1.0,-0.16626207530498505,-0.19820497557520866,-0.2148134894669056,0.0,0.0,0,-0.12520772218704224,-0.1293038222938776,0.0,0,-0.10940449684858322,-0.1396984113380313,0.0,0,-0.09791838377714157,-0.1514408802613616,0.0,0,-0.039157524704933167,-0.12214004714041948,0.0,0,-0.009032394737005234,-0.062263234285637736,0.0,0,-0.007905096746981144,-0.0495994808152318,0.0,0,-0.006752500310540199,-0.05548229138366878,0.0,0,-0.05061524733901024,-0.07893396401777864,0.0,0,-0.04961632937192917,-0.07915810169652104,0.0,1,-0.16626207530498505,-0.19820497557520866,0.0,25,56,81
+HRM,243,397957,never,0,0,,,,,False,0.5802469253540039,0.009224265813827515,-0.01637764845509082,-0.027236116118729115,1.0,0.009224265813827515,0,-0.09872391819953918,-0.11790389474481344,0.0,0,-0.1038203090429306,-0.1599287511780858,0.0,0,-0.10658504068851471,-0.16486057080328465,0.0,0,-0.05646032840013504,-0.09579132124781609,0.0,0,-0.0694759339094162,-0.10029623005539179,0.0,0,-0.050382595509290695,-0.10184657806530595,0.0,0,-0.03528796508908272,-0.05668484419584274,0.0,0,-0.08469098806381226,-0.09665513597428799,0.0,0,-0.02208273485302925,-0.06376611813902855,0.0,0,0.009224265813827515,-0.01637764845509082,1.0,23,58,81
+HRM,244,97265,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.19278153777122498,-0.21719308756291866,-0.2285659834742546,0.0,0.0,0,-0.11104333400726318,-0.1212769104167819,0.0,0,-0.14398187398910522,-0.18722500279545784,0.0,0,-0.08519311249256134,-0.1563026700168848,0.0,0,0.03544388711452484,-0.020086590040591545,4.0,1,-0.21355275809764862,-0.22871770709753036,0.0,1,-0.2479502409696579,-0.26474413834512234,0.0,1,-0.07144833356142044,-0.16488474141806364,0.0,1,-0.13826920092105865,-0.17457001470029354,0.0,1,-0.09996865689754486,-0.18969987519085407,0.0,1,-0.19278153777122498,-0.21719308756291866,0.0,27,54,81
+HRM,245,203158,never,0,0,,,,,False,0.5061728358268738,-0.043591972440481186,-0.06684946501627564,-0.07779359072446823,0.0,0.0,0,-0.0975874811410904,-0.11694183759391308,0.0,0,-0.12061046063899994,-0.1547933742403984,0.0,0,-0.1520390510559082,-0.1683314200490713,0.0,0,-0.06694114208221436,-0.16113154776394367,0.0,0,-0.09849253296852112,-0.11712112557142973,0.0,0,-0.09714534878730774,-0.10561736393719912,0.0,0,-0.0388847216963768,-0.06084999768063426,0.0,0,0.0049958741292357445,-0.027486942224641098,1.0,0,-0.05179747939109802,-0.0710010975599289,0.0,0,-0.043591972440481186,-0.06684946501627564,0.0,23,58,81
+HRM,246,130524,transient_or_regressed,2,4,18228,14000,,,False,0.6419752836227417,-0.004990352317690849,-0.035490788985043764,-0.05547603778541088,0.0,0.0,0,-0.06510833650827408,-0.11598506290465593,0.0,0,-0.11549653857946396,-0.1580311544239521,0.0,0,-0.11293409764766693,-0.1622934453189373,0.0,0,-0.10487478226423264,-0.13808585796505213,0.0,0,0.0028211462777107954,-0.06197114955284633,1.0,0,0.12497782707214355,0.026166780124185607,5.0,1,-0.06807678192853928,-0.1417427370324731,0.0,0,0.10871762037277222,-0.01345633971504867,2.0,1,-0.0799868181347847,-0.1775472043082118,0.0,0,-0.004990352317690849,-0.035490788985043764,0.0,24,57,81
+HRM,247,117111,stable_learned,3,1,20832,16000,20832,16000,True,1.0,-0.11881309002637863,-0.15686153713613749,-0.1696493737399578,0.0,0.0,0,-0.10056652873754501,-0.12327141687273979,0.0,0,-0.05797262117266655,-0.14297229098156095,0.0,0,-0.07982100546360016,-0.14993510954082012,0.0,0,-0.0033703416120260954,-0.07440120363025926,0.0,0,0.1026889830827713,0.03830263181589544,8.0,0,0.0904201865196228,0.05462504830211401,8.0,0,0.11351659148931503,0.03195773169863969,6.0,1,-0.1335289031267166,-0.153052045032382,0.0,1,-0.13030651211738586,-0.2125022727996111,0.0,1,-0.11881309002637863,-0.15686153713613749,0.0,26,55,81
+HRM,248,297930,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.13639754056930542,-0.17814315110445023,-0.21175549551844597,0.0,0.0,0,-0.10664702951908112,-0.11520716175436974,0.0,0,-0.09781040996313095,-0.15857695695012808,0.0,0,-0.057762619107961655,-0.08763106446713209,0.0,0,0.019293973222374916,-0.020601893695129547,3.0,0,0.06175637245178223,-0.013835910358466208,2.0,1,-0.1996692270040512,-0.24653707444667816,0.0,1,-0.08338768780231476,-0.18675567023456097,0.0,1,-0.1410241723060608,-0.15786871500313282,0.0,1,-0.14536920189857483,-0.20609768852591515,0.0,1,-0.13639754056930542,-0.17814315110445023,0.0,26,55,81
+HRM,249,362706,transient_or_regressed,3,4,7812,6000,,,False,0.7777777910232544,0.11626183241605759,0.032474103616550565,0.002494592685252428,6.0,0.2671926845796406,0,-0.09268971532583237,-0.11729269940406084,0.0,0,-0.18642061948776245,-0.21003118716180325,0.0,1,-0.18617314100265503,-0.2607010528445244,0.0,1,-0.20983318984508514,-0.25059183314442635,0.0,0,-0.05725998803973198,-0.07733804965391755,0.0,0,0.019625771790742874,-0.04101095476653427,1.0,0,-0.0032148673199117184,-0.031986917660105973,0.0,1,-0.08155185729265213,-0.10796012915670872,0.0,0,0.054750896990299225,-0.008527684025466442,4.0,0,0.11626183241605759,0.032474103616550565,6.0,24,57,81
+HRM,250,159293,stable_learned,2,1,23436,18000,23436,18000,True,1.0,-0.09929895401000977,-0.19062826223671436,-0.2226349152624607,0.0,0.0,0,-0.09349765628576279,-0.10409473814070225,0.0,0,-0.1337977647781372,-0.147879334166646,0.0,0,-0.03639587014913559,-0.11687024775892496,0.0,0,-0.09517838805913925,-0.13998514972627163,0.0,0,-0.050667114555835724,-0.10778444167226553,0.0,0,-0.012857114896178246,-0.0659436120186001,0.0,0,-0.009624190628528595,-0.0544164190068841,0.0,0,-0.05090567469596863,-0.07117532519623637,0.0,1,-0.17035941779613495,-0.2103243451565504,0.0,1,-0.09929895401000977,-0.19062826223671436,0.0,24,57,81
+HRM,251,155140,transient_or_regressed,2,4,15624,12000,,,False,0.7777777910232544,-0.03817397356033325,-0.07403292134404182,-0.09231505915522575,0.0,0.0,0,-0.09428238123655319,-0.10691407788544893,0.0,0,-0.11379194259643555,-0.16911405697464943,0.0,0,-0.04331266134977341,-0.10261863470077515,0.0,0,-0.09842904657125473,-0.1250978671014309,0.0,0,-0.0209929421544075,-0.0600918757263571,0.0,1,-0.1185247153043747,-0.14771065302193165,0.0,0,0.0323362834751606,-0.02553763729520142,2.0,1,-0.08103477209806442,-0.13400665763765574,0.0,0,0.016226312145590782,-0.04571152770449771,2.0,0,-0.03817397356033325,-0.07403292134404182,0.0,27,54,81
+HRM,252,410219,never,0,0,,,,,False,0.4691357910633087,0.03420727699995041,-0.02360879397019744,-0.05793485790491104,3.0,0.06261359341442585,0,-0.10549084842205048,-0.12449946720153093,0.0,0,-0.12887772917747498,-0.14991354197263718,0.0,0,-0.11351978778839111,-0.15205484814941883,0.0,0,-0.0029863507952541113,-0.10936870353179984,0.0,0,-0.05467228591442108,-0.09562173392623663,0.0,0,0.0024092153180390596,-0.052658445172710344,1.0,0,0.005857790820300579,-0.0395376698166956,2.0,0,-0.020946448668837547,-0.060396878281608224,0.0,0,-0.02279059961438179,-0.052220312878489494,0.0,0,0.03420727699995041,-0.02360879397019744,3.0,20,61,81
+HRM,253,215841,never,0,0,,,,,False,0.6913580298423767,0.14964617788791656,0.05048173782415688,0.0016320203430950642,6.0,0.42373219318687916,0,-0.12603896856307983,-0.1357364784926176,0.0,0,-0.16883710026741028,-0.18685601651668549,0.0,0,-0.06459960341453552,-0.14441021159291267,0.0,0,-0.05454691872000694,-0.08767947414889932,0.0,0,0.007599952630698681,-0.020388836914207786,2.0,0,0.09429150819778442,-0.0041322760516777635,3.0,0,0.03622609004378319,-0.0018407129973638803,4.0,0,0.12343304604291916,0.01238232757896185,5.0,0,0.041865598410367966,-0.019125381018966436,2.0,0,0.14964617788791656,0.05048173782415688,6.0,26,55,81
+HRM,254,269879,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.15324288606643677,-0.19187645241618156,-0.21766024082899094,0.0,0.0,0,-0.05921418219804764,-0.12422112096101046,0.0,0,-0.13934744894504547,-0.17102296650409698,0.0,0,-0.041270337998867035,-0.12949887104332447,0.0,0,-0.025710690766572952,-0.06662897672504187,0.0,0,0.09391271322965622,-0.004187375423498452,3.0,1,-0.18846361339092255,-0.19790512509644032,0.0,1,-0.08980754017829895,-0.13214272167533636,0.0,1,-0.06155950576066971,-0.14944199938327074,0.0,1,-0.15738892555236816,-0.1867921892553568,0.0,1,-0.15324288606643677,-0.19187645241618156,0.0,26,55,81
+HRM,255,109346,transient_or_regressed,1,2,23436,18000,,,False,0.604938268661499,0.05139399319887161,-0.03314109379425645,-0.058068172074854374,1.0,0.05139399319887161,0,-0.07876065373420715,-0.10252790711820126,0.0,0,-0.14814049005508423,-0.17645920999348164,0.0,0,-0.06465448439121246,-0.14018629398196936,0.0,0,0.05331791564822197,-0.10746084665879607,1.0,0,0.026921628043055534,-0.06007105368189514,1.0,0,0.004933221265673637,-0.046062999303103425,2.0,0,-0.04943456873297691,-0.06807411415502429,0.0,0,0.019444871693849564,-0.026244427310302854,1.0,1,-0.044827189296483994,-0.0929996776394546,0.0,0,0.05139399319887161,-0.03314109379425645,1.0,26,55,81
+HRM,256,251014,flaky_then_stable,3,3,10416,8000,23436,18000,True,1.0,-0.14505651593208313,-0.16593581438064575,-0.17960026487708092,0.0,0.0,0,-0.0638812929391861,-0.09611320775002241,0.0,0,-0.11728653311729431,-0.16791380941867828,0.0,0,-0.09380045533180237,-0.14597928617149591,0.0,1,-0.2007993757724762,-0.24257543869316578,0.0,0,0.017823461443185806,-0.04912916198372841,1.0,0,0.0381334088742733,0.00907463880139403,6.0,0,0.14927443861961365,0.034898817306384444,6.0,0,0.05046606808900833,0.0001749419461702928,2.0,1,-0.16929131746292114,-0.18851356767117977,0.0,1,-0.14505651593208313,-0.16593581438064575,0.0,26,55,81
+HRM,257,9333,transient_or_regressed,2,4,15624,12000,,,False,0.6666666865348816,0.02047993242740631,-0.012025107047520578,-0.03479074756614864,3.0,0.048255945555865765,0,-0.10960911959409714,-0.12505285069346428,0.0,0,-0.14689767360687256,-0.16238634660840034,0.0,0,-0.11754654347896576,-0.15866242349147797,0.0,0,-0.08260755240917206,-0.1514430046081543,0.0,0,-0.04882943257689476,-0.08469816111028194,0.0,1,-0.04564421623945236,-0.09550436027348042,0.0,0,0.06319262832403183,0.010477494448423386,6.0,1,0.016762517392635345,-0.02510936209000647,1.0,0,0.058723755180835724,0.0032282689353451133,4.0,0,0.02047993242740631,-0.012025107047520578,3.0,24,57,81
+HRM,258,368982,flaky_then_stable,2,3,18228,14000,26040,20000,True,1.0,-0.10983821749687195,-0.15646705031394958,-0.17505304142832756,0.0,0.0,0,-0.112098328769207,-0.12460900470614433,0.0,0,-0.11049781739711761,-0.1452929386869073,0.0,0,-0.08464831113815308,-0.1378295812755823,0.0,0,0.0005396595224738121,-0.06830613606143743,1.0,0,-0.017841942608356476,-0.07687125913798809,0.0,0,0.02375986985862255,-0.024763040681136772,1.0,1,-0.07019595801830292,-0.10800165496766567,0.0,0,-0.01797349564731121,-0.05303217307664454,0.0,0,0.09233582019805908,0.01616693544201553,5.0,1,-0.10983821749687195,-0.15646705031394958,0.0,26,55,81
+HRM,259,351687,stable_learned,9,1,5208,4000,5208,4000,True,1.0,-0.2156800925731659,-0.2534996420145035,-0.2712399810552597,0.0,0.0,0,-0.05953453481197357,-0.1352533930912614,0.0,1,-0.21257063746452332,-0.25198310799896717,0.0,1,-0.19228123128414154,-0.3364370781928301,0.0,1,-0.1303110122680664,-0.2696842271834612,0.0,1,-0.18715298175811768,-0.2649746835231781,0.0,1,-0.2941432595252991,-0.3081941120326519,0.0,1,-0.10860394686460495,-0.19713099766522646,0.0,1,-0.13323424756526947,-0.17830674909055233,0.0,1,-0.06064697727560997,-0.18622351391240954,0.0,1,-0.2156800925731659,-0.2534996420145035,0.0,34,47,81
+HRM,260,188980,never,0,0,,,,,False,0.5802469253540039,-0.03114645555615425,-0.06179117877036333,-0.07931910641491413,0.0,0.0,0,-0.11220413446426392,-0.12446589767932892,0.0,0,-0.08839735388755798,-0.1604345729574561,0.0,0,-0.05875806137919426,-0.10250801360234618,0.0,0,-0.08872529119253159,-0.14402787666767836,0.0,0,-0.05163484811782837,-0.09987561265006661,0.0,0,0.08325513452291489,-0.04006338398903608,2.0,0,-0.018518909811973572,-0.04576408304274082,0.0,0,0.002600874286144972,-0.03644681122386828,1.0,0,-0.033416274935007095,-0.06272141775116324,0.0,0,-0.03114645555615425,-0.06179117877036333,0.0,24,57,81
+HRM,261,137633,flaky_then_stable,5,3,13020,10000,18228,14000,True,1.0,-0.15810228884220123,-0.1963670216500759,-0.2097870223224163,0.0,0.0,0,-0.0598602294921875,-0.10959549620747566,0.0,0,-0.17013119161128998,-0.19215162470936775,0.0,0,0.010765976272523403,-0.07814328314270824,1.0,0,-0.0038397405296564102,-0.05585348210297525,0.0,1,-0.20916181802749634,-0.22781315073370934,0.0,0,0.023124178871512413,-0.02848596789408475,1.0,1,-0.11218225955963135,-0.1901193354278803,0.0,1,-0.11144842952489853,-0.15674517769366503,0.0,1,-0.013849849812686443,-0.17294677288737148,0.0,1,-0.15810228884220123,-0.1963670216500759,0.0,26,55,81
+HRM,262,274238,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.14829173684120178,-0.18911901116371155,-0.21227257698774338,0.0,0.0,0,-0.08716224879026413,-0.1216577049344778,0.0,0,-0.12208987772464752,-0.166671235114336,0.0,0,-0.11784204840660095,-0.15083577297627926,0.0,0,0.015891410410404205,-0.06033862894400954,1.0,1,-0.15598291158676147,-0.1867839265614748,0.0,1,-0.19619250297546387,-0.23460950329899788,0.0,1,-0.05050606653094292,-0.19183900905773044,0.0,1,-0.008001744747161865,-0.12110908515751362,0.0,1,-0.1344549059867859,-0.18070418387651443,0.0,1,-0.14829173684120178,-0.18911901116371155,0.0,26,55,81
+HRM,263,53591,transient_or_regressed,2,4,18228,14000,,,False,0.654321014881134,-0.03649265691637993,-0.057890879455953836,-0.06658667605370283,0.0,0.0,0,-0.1020490825176239,-0.11550942528992891,0.0,0,-0.18787071108818054,-0.20058155432343483,0.0,0,-0.09204648435115814,-0.14689329359680414,0.0,0,-0.05832836776971817,-0.10856038797646761,0.0,0,0.013721397146582603,-0.008223884098697454,3.0,0,0.008995311334729195,-0.025881048117298633,2.0,1,-0.07218178361654282,-0.16923848446458578,0.0,0,0.040462758392095566,-0.02275049011223018,1.0,1,-0.15302152931690216,-0.2179806511849165,0.0,0,-0.03649265691637993,-0.057890879455953836,0.0,26,55,81
+HRM,264,244993,transient_or_regressed,1,2,23436,18000,,,False,0.5185185074806213,-0.009763742797076702,-0.06180823210161179,-0.08162686694413424,0.0,0.0,0,-0.10358743369579315,-0.1188527811318636,0.0,0,-0.1083860695362091,-0.16700993292033672,0.0,0,-0.0874117910861969,-0.1518225446343422,0.0,0,-0.15154680609703064,-0.1907612606883049,0.0,0,-0.002189099323004484,-0.1078517054556869,0.0,0,-0.0486045777797699,-0.10682876408100128,0.0,0,-0.04987502843141556,-0.07380847446620464,0.0,0,-0.005843230988830328,-0.06390265555819497,0.0,1,-0.17882607877254486,-0.20149914361536503,0.0,0,-0.009763742797076702,-0.06180823210161179,0.0,23,58,81
+HRM,265,111621,flaky_then_stable,4,5,7812,6000,26040,20000,True,1.0,-0.14194315671920776,-0.20577803626656532,-0.2288670390844345,0.0,0.0,0,-0.07105323672294617,-0.10715353861451149,0.0,0,-0.13908472657203674,-0.16687368601560593,0.0,1,-0.19369158148765564,-0.2317418921738863,0.0,0,-0.028031660243868828,-0.1250145377125591,0.0,0,0.06160750612616539,-0.045242589389090426,2.0,1,-0.12409182637929916,-0.18642992805689573,0.0,1,-0.034458227455616,-0.060622530058026314,0.0,0,-0.019109860062599182,-0.05593499168753624,0.0,0,0.00994967296719551,-0.01337655566749163,2.0,1,-0.14194315671920776,-0.20577803626656532,0.0,26,55,81
+HRM,266,215831,never,0,0,,,,,False,0.6296296119689941,-0.06983167678117752,-0.0916182454675436,-0.10288725234568119,0.0,0.0,0,-0.10149561613798141,-0.11950844246894121,0.0,0,-0.1098102331161499,-0.1419685184955597,0.0,0,-0.07955586165189743,-0.11704917345196009,0.0,0,-0.012347709387540817,-0.06725516030564904,0.0,0,0.03578531742095947,-0.002396688796579838,3.0,0,0.11277716606855392,0.031125336419790983,5.0,0,0.09252184629440308,0.003039713221369311,3.0,0,0.08232945203781128,0.02880590909626335,6.0,0,0.051147349178791046,0.013619044795632362,5.0,0,-0.06983167678117752,-0.0916182454675436,0.0,25,56,81
+HRM,267,316537,transient_or_regressed,1,2,23436,18000,,,False,0.6172839403152466,0.011859316378831863,-0.001628192672797013,-0.008788170191110112,5.0,0.022470234835054725,0,-0.08275403082370758,-0.10692797135561705,0.0,0,-0.1451309323310852,-0.1739622876048088,0.0,0,0.01688430830836296,-0.12349293613806367,1.0,0,-0.0890883356332779,-0.1381465345621109,0.0,0,-0.06557665765285492,-0.0930204689502716,0.0,0,0.07916636765003204,-0.016313277184963226,3.0,0,-0.0010666230227798223,-0.04103243208373897,0.0,0,0.0315256305038929,-0.023771994980052114,1.0,1,-0.16652216017246246,-0.19623131677508354,0.0,0,0.011859316378831863,-0.001628192672797013,5.0,24,57,81
+HRM,268,301644,flaky_then_stable,4,5,13020,10000,26040,20000,True,1.0,-0.15189997851848602,-0.17337525263428688,-0.18795952945947647,0.0,0.0,0,-0.09579069912433624,-0.10971513856202364,0.0,0,-0.12218500673770905,-0.17274324223399162,0.0,0,-0.10628732293844223,-0.16657802555710077,0.0,0,-0.08592313528060913,-0.12688892427831888,0.0,1,-0.17885027825832367,-0.21443281136453152,0.0,0,-0.04053780063986778,-0.05937739834189415,0.0,1,-0.07269219309091568,-0.1309366300702095,0.0,1,-0.10685985535383224,-0.13582678139209747,0.0,0,0.006936194375157356,-0.033558193827047944,1.0,1,-0.15189997851848602,-0.17337525263428688,0.0,25,56,81
+HRM,269,151116,transient_or_regressed,4,4,13020,10000,,,False,0.7160493731498718,0.08884911239147186,-0.001638945541344583,-0.031117087695747614,2.0,0.13326863199472427,0,-0.10374283790588379,-0.1289075231179595,0.0,0,-0.11185154318809509,-0.16713609918951988,0.0,0,-0.059467609971761703,-0.15469412552192807,0.0,0,-0.093570277094841,-0.11811305675655603,0.0,1,-0.20947431027889252,-0.23377534188330173,0.0,0,-0.010497352108359337,-0.04858922562561929,0.0,1,-0.050234317779541016,-0.1161344014108181,0.0,1,-0.1005990132689476,-0.1336135808378458,0.0,1,-0.13943271338939667,-0.17503076419234276,0.0,0,0.08884911239147186,-0.001638945541344583,2.0,23,58,81
+HRM,270,244667,transient_or_regressed,2,2,18228,14000,,,False,0.45679011940956116,0.1662749946117401,0.03000259282271145,-0.0006512911204481497,7.0,0.25539916375419125,0,-0.11944516003131866,-0.12565023079514503,0.0,0,-0.13971677422523499,-0.18000421486794949,0.0,0,-0.13808956742286682,-0.16521888598799706,0.0,0,-0.10344075411558151,-0.13664115965366364,0.0,0,-0.037897273898124695,-0.07266581244766712,0.0,0,-0.012899982742965221,-0.048071267432533205,0.0,1,-0.1572304368019104,-0.1980451550334692,0.0,1,-0.12509360909461975,-0.1537772100418806,0.0,0,-0.049015045166015625,-0.0620318204164505,0.0,0,0.1662749946117401,0.03000259282271145,7.0,21,60,81
+HRM,271,107534,transient_or_regressed,3,2,18228,14000,,,False,0.7407407164573669,0.03681459650397301,-0.006078572710976005,-0.0372450016438961,4.0,0.10035142488777637,0,-0.10267876088619232,-0.12326103076338768,0.0,0,-0.13022860884666443,-0.18872315995395184,0.0,0,-0.010794873349368572,-0.1226882686605677,0.0,0,-0.10378894209861755,-0.1499876044690609,0.0,0,-0.07724011689424515,-0.09374449122697115,0.0,0,0.030059218406677246,-0.03830412841125508,2.0,1,-0.11089448630809784,-0.1221829392015934,0.0,1,-0.018511861562728882,-0.13914996571838856,0.0,1,-0.15625451505184174,-0.1898876428604126,0.0,0,0.03681459650397301,-0.006078572710976005,4.0,24,57,81
+HRM,272,391236,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.17697106301784515,-0.2127636820077896,-0.23816990107297897,0.0,0.0,0,-0.10324801504611969,-0.12945163995027542,0.0,0,-0.15759830176830292,-0.19955316931009293,0.0,0,-0.07633935660123825,-0.12580632604658604,0.0,0,0.03917349874973297,-0.10126848518848419,1.0,1,-0.1728726029396057,-0.22039163298904896,0.0,1,-0.21545694768428802,-0.2831729371100664,0.0,1,-0.15253940224647522,-0.19183645397424698,0.0,1,-0.10322874039411545,-0.1507518384605646,0.0,1,-0.23073840141296387,-0.2580492328852415,0.0,1,-0.17697106301784515,-0.2127636820077896,0.0,28,53,81
+HRM,273,244083,transient_or_regressed,1,2,23436,18000,,,False,0.7037037014961243,0.00856078788638115,-0.04223248874768615,-0.058504143729805946,1.0,0.00856078788638115,0,-0.11232888698577881,-0.12164593674242496,0.0,0,-0.1193663477897644,-0.1524054128676653,0.0,0,-0.06361065059900284,-0.13228462170809507,0.0,0,0.046689219772815704,-0.07360728876665235,1.0,0,0.006749460007995367,-0.06729377555893734,1.0,0,0.053852107375860214,-0.03130577702540904,1.0,0,0.029868824407458305,-0.029380872379988432,2.0,0,0.03719685971736908,-0.03991385965491645,1.0,1,-0.16150638461112976,-0.1818549782037735,0.0,0,0.00856078788638115,-0.04223248874768615,1.0,24,57,81
+HRM,274,48553,flaky_then_stable,4,5,7812,6000,23436,18000,True,1.0,-0.10359065234661102,-0.1864038873463869,-0.2109411470592022,0.0,0.0,0,-0.09240491688251495,-0.10533439554274082,0.0,0,-0.15606725215911865,-0.1975680384784937,0.0,1,-0.22222141921520233,-0.27945212833583355,0.0,0,-0.12596777081489563,-0.15205287374556065,0.0,0,-0.05662647634744644,-0.08249044045805931,0.0,1,-0.21269625425338745,-0.25447311252355576,0.0,0,0.02009107731282711,-0.03853341075591743,1.0,0,-0.015620226040482521,-0.03185122716240585,0.0,1,-0.16857171058654785,-0.21296712569892406,0.0,1,-0.10359065234661102,-0.1864038873463869,0.0,27,54,81
+HRM,275,25129,flaky_then_stable,6,3,10416,8000,15624,12000,True,1.0,-0.09742903709411621,-0.15374451130628586,-0.1885913759469986,0.0,0.0,0,-0.09581729024648666,-0.11769845802336931,0.0,0,-0.053074829280376434,-0.1467577638104558,0.0,0,-0.11154128611087799,-0.1736617125570774,0.0,1,-0.018453845754265785,-0.0734529655892402,0.0,0,0.07089753448963165,0.010973383919917978,4.0,1,-0.17362651228904724,-0.2375468909740448,0.0,1,-0.0429203175008297,-0.09968086006119847,0.0,1,-0.04497409984469414,-0.15768122812733054,0.0,1,-0.19836579263210297,-0.24889435805380344,0.0,1,-0.09742903709411621,-0.15374451130628586,0.0,26,55,81
+HRM,276,390174,flaky_then_stable,4,3,15624,12000,23436,18000,True,1.0,-0.14310403168201447,-0.19370186515152454,-0.21111401543021202,0.0,0.0,0,-0.10645060986280441,-0.12445647828280926,0.0,0,-0.14823594689369202,-0.18192709051072598,0.0,0,-0.10326775163412094,-0.1655847867950797,0.0,0,-0.0375196672976017,-0.1283371183089912,0.0,0,-0.028028883039951324,-0.08979744976386428,0.0,1,-0.17065081000328064,-0.21061589382588863,0.0,1,-0.14413252472877502,-0.18383575789630413,0.0,0,0.009642986580729485,-0.05006899603176862,2.0,1,-0.14137548208236694,-0.211851229891181,0.0,1,-0.14310403168201447,-0.19370186515152454,0.0,23,58,81
+HRM,277,172139,never,0,0,,,,,False,0.5679012537002563,0.05843503773212433,0.008827171055600047,-0.015115302056074142,6.0,0.15972593240439892,0,-0.11429043114185333,-0.1199762998148799,0.0,0,-0.14810574054718018,-0.1877471972256899,0.0,0,-0.13553479313850403,-0.1906007956713438,0.0,0,-0.009964006952941418,-0.0881244057090953,0.0,0,-0.01134231686592102,-0.07810050947591662,0.0,0,-0.05565270781517029,-0.08169360551983118,0.0,0,0.06807725131511688,-0.022466233931481838,2.0,0,0.05147623270750046,-0.042261635593604296,1.0,0,-0.04015730693936348,-0.0812241486273706,0.0,0,0.05843503773212433,0.008827171055600047,6.0,23,58,81
+HRM,278,369612,never,0,0,,,,,False,0.7037037014961243,0.07800719887018204,-0.013752914499491453,-0.06646730192005634,3.0,0.1905882991850376,0,-0.10936632007360458,-0.12386610917747021,0.0,0,-0.1896573305130005,-0.20755851082503796,0.0,0,-0.1369033008813858,-0.17525585740804672,0.0,0,-0.09391719102859497,-0.1273148050531745,0.0,0,0.04423165321350098,-0.009193703241180629,4.0,0,0.0958157628774643,-0.0027107030218758155,3.0,0,-0.014016221277415752,-0.05069319170434028,0.0,0,0.012840671464800835,-0.0398772950284183,1.0,0,-0.05631568655371666,-0.07612203294411302,0.0,0,0.07800719887018204,-0.013752914499491453,3.0,25,56,81
+HRM,279,134436,transient_or_regressed,1,2,20832,16000,,,False,0.5802469253540039,0.05610259994864464,-0.017666121246293187,-0.054341334849596024,3.0,0.10927441529929638,0,-0.10296782106161118,-0.12033856753259897,0.0,0,-0.12236808985471725,-0.16567054111510515,0.0,0,-0.08638161420822144,-0.1223958870396018,0.0,0,-0.0451013520359993,-0.09635611344128847,0.0,0,0.039273008704185486,-0.06072113569825888,1.0,0,0.07714051753282547,-0.0130595238879323,2.0,0,0.05966657027602196,-0.023186694917967543,1.0,1,-0.034724246710538864,-0.07342498609796166,0.0,0,-0.012584788724780083,-0.06378450407646596,0.0,0,0.05610259994864464,-0.017666121246293187,3.0,24,57,81
+HRM,280,242300,transient_or_regressed,3,2,18228,14000,,,False,0.6666666865348816,0.020345166325569153,-0.020377232460305095,-0.03716626390814781,2.0,0.037560710683465004,0,-0.11428837478160858,-0.1244181701913476,0.0,0,-0.1496926099061966,-0.18257209472358227,0.0,0,-0.0845978856086731,-0.147960364818573,0.0,0,0.014363843016326427,-0.10147685778792948,1.0,0,0.038483891636133194,-0.024040276672167238,1.0,0,0.026153942570090294,-0.022742692730389535,3.0,1,-0.11640723794698715,-0.15589513536542654,0.0,1,-0.0035714253317564726,-0.09247248954488896,0.0,1,-0.02943814918398857,-0.07303053652867675,0.0,0,0.020345166325569153,-0.020377232460305095,2.0,25,56,81
+HRM,281,283854,flaky_then_stable,6,3,10416,8000,26040,20000,True,1.0,-0.13533489406108856,-0.18728636391460896,-0.21489501371979713,0.0,0.0,0,-0.07999913394451141,-0.10785297024995089,0.0,0,-0.11797778308391571,-0.1639271229505539,0.0,0,-0.11173884570598602,-0.141778115183115,0.0,1,-0.12959134578704834,-0.23265478014945984,0.0,1,-0.17547473311424255,-0.22229114919900894,0.0,1,-0.1615004539489746,-0.22882594354450703,0.0,1,-0.05497640371322632,-0.13860259763896465,0.0,1,-0.10502077639102936,-0.18281625770032406,0.0,0,0.08089922368526459,-0.02167302370071411,1.0,1,-0.13533489406108856,-0.18728636391460896,0.0,26,55,81
+HRM,282,171776,never,0,0,,,,,False,0.8024691343307495,-0.01903664506971836,-0.054161514388397336,-0.07499588653445244,0.0,0.0,0,-0.10518248379230499,-0.12036873120814562,0.0,0,-0.13475105166435242,-0.17834609933197498,0.0,0,-0.04983724653720856,-0.16137700155377388,0.0,0,-0.12918972969055176,-0.1758724097162485,0.0,0,-0.04695224016904831,-0.1036895327270031,0.0,0,0.029465055093169212,-0.06619984493590891,1.0,0,-0.017947997897863388,-0.049881183076649904,0.0,0,-0.017999570816755295,-0.0774795743636787,0.0,0,-0.04136132076382637,-0.06454926077276468,0.0,0,-0.01903664506971836,-0.054161514388397336,0.0,25,56,81
+HRM,283,187057,flaky_then_stable,4,5,13020,10000,26040,20000,True,1.0,-0.15409475564956665,-0.1639392115175724,-0.17249581590294838,0.0,0.0,0,-0.09997522085905075,-0.1176330940797925,0.0,0,-0.11677050590515137,-0.18019472621381283,0.0,0,-0.02932245284318924,-0.08459186647087336,0.0,0,0.03838028386235237,-0.044263551593758166,1.0,1,-0.09286194294691086,-0.11639734543859959,0.0,0,0.07118850201368332,0.024575928400736302,5.0,1,-0.007146700285375118,-0.16307057428639382,0.0,1,-0.07995274662971497,-0.1537617277354002,0.0,0,-0.03273700177669525,-0.0628963983617723,0.0,1,-0.15409475564956665,-0.1639392115175724,0.0,26,55,81
+HRM,284,357512,transient_or_regressed,1,2,23436,18000,,,False,0.5925925970077515,0.043812401592731476,-0.03780749998986721,-0.06951723527163267,1.0,0.043812401592731476,0,-0.12840968370437622,-0.1401983592659235,0.0,0,-0.17798775434494019,-0.19428386352956295,0.0,0,-0.1363019496202469,-0.18582075275480747,0.0,0,-0.017043162137269974,-0.10913274763152003,0.0,0,-0.06725190579891205,-0.09148567635565996,0.0,0,0.13941264152526855,0.005488478306688194,3.0,0,0.05082528293132782,-0.03343410789966583,1.0,0,0.09068220853805542,0.03069118200801313,6.0,1,-0.0663156658411026,-0.0900841262191534,0.0,0,0.043812401592731476,-0.03780749998986721,1.0,24,57,81
+HRM,285,233509,never,0,0,,,,,False,0.6790123581886292,-0.05144181102514267,-0.07031922042369843,-0.08143999241292477,0.0,0.0,0,-0.09839038550853729,-0.11288122739642859,0.0,0,-0.14449813961982727,-0.16170991957187653,0.0,0,-0.13688595592975616,-0.1660632360726595,0.0,0,-0.09036619961261749,-0.1802367027848959,0.0,0,-0.02511025220155716,-0.10553450137376785,0.0,0,-0.02845124527812004,-0.10082823084667325,0.0,0,-0.022291041910648346,-0.05351162562146783,0.0,0,-0.006421692669391632,-0.0919094393029809,0.0,0,-0.024006254971027374,-0.07409954536706209,0.0,0,-0.05144181102514267,-0.07031922042369843,0.0,24,57,81
+HRM,286,156093,never,0,0,,,,,False,0.6790123581886292,0.09017663449048996,0.017210194026120007,-0.013578164158388972,5.0,0.20011465158313513,0,-0.0980108305811882,-0.11192677449434996,0.0,0,-0.15928982198238373,-0.1904127150774002,0.0,0,-0.08428603410720825,-0.12820499017834663,0.0,0,-0.07788313180208206,-0.13953014183789492,0.0,0,-0.04993482679128647,-0.07955474127084017,0.0,0,0.022584814578294754,-0.02256000810302794,2.0,0,-0.0008970568305812776,-0.02578911533782957,0.0,0,0.040424786508083344,-0.028528463910333812,2.0,0,0.018361011520028114,-0.013110877756844275,3.0,0,0.09017663449048996,0.017210194026120007,5.0,24,57,81
+HRM,287,403240,transient_or_regressed,3,4,15624,12000,,,False,0.6790123581886292,0.07545623183250427,0.01710401631135028,-0.011318375502014533,5.0,0.18371946772094816,0,-0.07528268545866013,-0.09578035119920969,0.0,0,-0.1247323602437973,-0.1606555711477995,0.0,0,-0.14740994572639465,-0.17595721781253815,0.0,0,-0.013838670216500759,-0.1269147143466398,0.0,0,0.0568658784031868,-0.01362539827823639,2.0,1,-0.10088641941547394,-0.14836973417550325,0.0,1,-0.064900703728199,-0.17691929172724485,0.0,0,0.0077922046184539795,-0.022442814602982253,1.0,1,-0.10430805385112762,-0.22236592695116997,0.0,0,0.07545623183250427,0.01710401631135028,5.0,27,54,81
+HRM,288,352372,flaky_then_stable,6,5,7812,6000,26040,20000,True,1.0,-0.1681002974510193,-0.18228924460709095,-0.18945590779185295,0.0,0.0,0,-0.11114858835935593,-0.11743558011949062,0.0,0,-0.18958985805511475,-0.2097310610115528,0.0,1,-0.1386304348707199,-0.2457073424011469,0.0,1,-0.14433543384075165,-0.16541482508182526,0.0,1,-0.14241346716880798,-0.1995602548122406,0.0,0,-0.03803583234548569,-0.06329111708328128,0.0,1,-0.01076615508645773,-0.1363315376220271,0.0,1,0.002230502665042877,-0.02996728103607893,1.0,0,0.04507105052471161,-0.003099843394011259,3.0,1,-0.1681002974510193,-0.18228924460709095,0.0,26,55,81
+HRM,289,115724,flaky_then_stable,7,3,7812,6000,13020,10000,True,1.0,-0.19116298854351044,-0.2090867329388857,-0.21881196647882462,0.0,0.0,0,-0.09439709037542343,-0.11229718662798405,0.0,0,-0.04266197606921196,-0.13564229616895318,0.0,1,-0.2140587419271469,-0.2640792056918144,0.0,0,-0.09607453644275665,-0.12529527582228184,0.0,1,-0.0875227153301239,-0.16241919249296188,0.0,1,-0.2391362190246582,-0.2599355895072222,0.0,1,-0.12935860455036163,-0.17253669165074825,0.0,1,-0.09540899842977524,-0.1411536606028676,0.0,1,-0.08615806698799133,-0.1984619740396738,0.0,1,-0.19116298854351044,-0.2090867329388857,0.0,25,56,81
+HRM,290,32827,never,0,0,,,,,False,0.7530864477157593,0.050847411155700684,-0.016007397207431495,-0.043118443340063095,3.0,0.06887348834425211,0,-0.0867040753364563,-0.11703921481966972,0.0,0,-0.11420100927352905,-0.14311864040791988,0.0,0,-0.0645616427063942,-0.14992300514131784,0.0,0,-0.06399080902338028,-0.1321777580305934,0.0,0,-0.0016466337256133556,-0.06097362219588831,0.0,0,0.030360301956534386,-0.02824665722437203,1.0,0,0.11129264533519745,0.03972695767879486,6.0,0,-0.06649665534496307,-0.09418793115764856,0.0,0,0.03863118588924408,-0.03434300806839019,2.0,0,0.050847411155700684,-0.016007397207431495,3.0,24,57,81
+HRM,291,162037,transient_or_regressed,1,2,23436,18000,,,False,0.604938268661499,-0.027006803080439568,-0.05743943969719112,-0.07392190955579281,0.0,0.0,0,-0.11087633669376373,-0.12083284184336662,0.0,0,-0.14442434906959534,-0.17778888531029224,0.0,0,-0.07362421602010727,-0.11158656235784292,0.0,0,-0.1025720164179802,-0.1525770826265216,0.0,0,-0.0909782201051712,-0.11503045167773962,0.0,0,0.003721520071849227,-0.10409066596184857,1.0,0,-0.0802903026342392,-0.0948701798915863,0.0,0,-0.016763882711529732,-0.07681493484415114,0.0,1,-0.15819209814071655,-0.20041325874626637,0.0,0,-0.027006803080439568,-0.05743943969719112,0.0,24,57,81
+HRM,292,17302,never,0,0,,,,,False,0.654321014881134,0.010036211460828781,-0.027804602752439678,-0.047769753728061914,1.0,0.010036211460828781,0,-0.10266278684139252,-0.11273373663425446,0.0,0,-0.13155464828014374,-0.15255341865122318,0.0,0,-0.15525835752487183,-0.17305221408605576,0.0,0,-0.14573730528354645,-0.1742907501757145,0.0,0,-0.08685697615146637,-0.11471949890255928,0.0,0,-0.06872298568487167,-0.09744468703866005,0.0,0,0.02903015911579132,-0.047022522427141666,1.0,0,-0.04033838212490082,-0.06721223052591085,0.0,0,0.024841977283358574,-0.08980745659209788,1.0,0,0.010036211460828781,-0.027804602752439678,1.0,23,58,81
+HRM,293,6212,transient_or_regressed,2,4,10416,8000,,,False,0.5308641791343689,0.07253585755825043,0.013173768878914416,-0.021986535051837564,4.0,0.19333629123866558,0,-0.1119825541973114,-0.12251309957355261,0.0,0,-0.11058762669563293,-0.15062913112342358,0.0,0,0.025189755484461784,-0.08682839735411108,1.0,1,-0.16868744790554047,-0.21032866090536118,0.0,0,-0.009470680728554726,-0.05499695963226259,0.0,0,0.021185021847486496,-0.04498341353610158,1.0,0,0.059950605034828186,0.034967931103892624,8.0,1,-0.09479562938213348,-0.12987417168915272,0.0,0,0.00877153966575861,-0.017766758377547376,2.0,0,0.07253585755825043,0.013173768878914416,4.0,17,64,81
+HRM,294,160912,never,0,0,,,,,False,0.6666666865348816,0.017616840079426765,-0.04721428989432752,-0.0735603366047144,1.0,0.017616840079426765,0,-0.07703018933534622,-0.0916237635537982,0.0,0,-0.12121516466140747,-0.16138234548270702,0.0,0,-0.14582714438438416,-0.18349513597786427,0.0,0,-0.11087685078382492,-0.15448118653148413,0.0,0,-0.04063640907406807,-0.09382060961797833,0.0,0,-0.014361987821757793,-0.05111503764055669,0.0,0,0.025222111493349075,-0.04814887745305896,1.0,0,-0.045794859528541565,-0.086201305501163,0.0,0,0.007415388710796833,-0.0599954224890098,1.0,0,0.017616840079426765,-0.04721428989432752,1.0,24,57,81
+HRM,295,220932,stable_learned,2,1,23436,18000,23436,18000,True,1.0,-0.09793977439403534,-0.15673508774489164,-0.19997262954711914,0.0,0.0,0,-0.1011694073677063,-0.11557457596063614,0.0,0,-0.14337752759456635,-0.1816475000232458,0.0,0,-0.12904289364814758,-0.18545055575668812,0.0,0,0.026616742834448814,-0.053266716888174415,1.0,0,0.04136672988533974,-0.008024610520806164,4.0,0,0.020042674615979195,-0.003762401989661157,4.0,0,0.09013737738132477,-0.0298883025534451,1.0,0,0.036643728613853455,-0.01859109802171588,2.0,1,-0.17121708393096924,-0.20109590701758862,0.0,1,-0.09793977439403534,-0.15673508774489164,0.0,24,57,81
+HRM,296,30809,transient_or_regressed,1,2,20832,16000,,,False,0.43209877610206604,0.06512238085269928,-0.0168319315998815,-0.04099632194265723,1.0,0.06512238085269928,0,-0.09396945685148239,-0.1074230745434761,0.0,0,-0.13885995745658875,-0.16459016129374504,0.0,0,-0.12806208431720734,-0.1761685609817505,0.0,0,-0.093186154961586,-0.16955821961164474,0.0,0,-0.012850895524024963,-0.08515184745192528,0.0,0,-0.0025708237662911415,-0.05726029351353645,0.0,0,0.01665806770324707,-0.054324057418853045,1.0,1,-0.10339179635047913,-0.14633488468825817,0.0,0,-0.021578732877969742,-0.0829233038239181,0.0,0,0.06512238085269928,-0.0168319315998815,1.0,23,58,81
+HRM,297,192110,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.14841808378696442,-0.18252938985824585,-0.19370774552226067,0.0,0.0,0,-0.08970287442207336,-0.10789080709218979,0.0,0,-0.1284763216972351,-0.14884648472070694,0.0,0,-0.05094410106539726,-0.12074342323467135,0.0,0,-0.009960425086319447,-0.08227543777320534,0.0,1,-0.17785292863845825,-0.2193194441497326,0.0,1,-0.1775510162115097,-0.2499491237103939,0.0,1,-0.06331854313611984,-0.19141100998967886,0.0,1,-0.10834640264511108,-0.14422573801130056,0.0,1,-0.21237754821777344,-0.23246445320546627,0.0,1,-0.14841808378696442,-0.18252938985824585,0.0,27,54,81
+HRM,298,341639,flaky_then_stable,5,3,5208,4000,20832,16000,True,1.0,-0.11659245938062668,-0.14640820678323507,-0.16900233551859856,0.0,0.0,0,-0.07791762053966522,-0.10558705870062113,0.0,1,-0.1430564522743225,-0.19059646502137184,0.0,1,-0.17978760600090027,-0.2153207752853632,0.0,0,-0.03148296847939491,-0.1029741638340056,0.0,0,-0.035258468240499496,-0.0719202239997685,0.0,0,0.06631532311439514,-0.02616411109920591,1.0,0,0.06357515603303909,-0.014020514150615782,2.0,1,-0.11851683259010315,-0.14007698744535446,0.0,1,-0.10781140625476837,-0.1865217424929142,0.0,1,-0.11659245938062668,-0.14640820678323507,0.0,27,54,81
+HRM,299,234124,flaky_then_stable,6,5,7812,6000,18228,14000,True,1.0,-0.17132753133773804,-0.233146820217371,-0.26174093410372734,0.0,0.0,0,-0.08442488312721252,-0.1002544928342104,0.0,0,-0.09321071207523346,-0.13759025372564793,0.0,1,-0.19680386781692505,-0.25166231021285057,0.0,0,-0.0187641941010952,-0.0783357392065227,0.0,1,-0.1953040212392807,-0.2098090536892414,0.0,0,0.04530986398458481,-0.0031240772223100066,3.0,1,-0.1329992413520813,-0.18596524000167847,0.0,1,-0.10917504131793976,-0.1489247940480709,0.0,1,-0.09428179264068604,-0.20346027053892612,0.0,1,-0.17132753133773804,-0.233146820217371,0.0,27,54,81
+HRM,300,78005,never,0,0,,,,,False,0.48148149251937866,0.01129234116524458,-0.04752399877179414,-0.0678262859582901,1.0,0.01129234116524458,0,-0.07667509466409683,-0.11307137832045555,0.0,0,-0.13922514021396637,-0.16773114912211895,0.0,0,-0.1284509003162384,-0.1888100728392601,0.0,0,-0.07611662149429321,-0.15642879530787468,0.0,0,-0.038381561636924744,-0.07471664156764746,0.0,0,-0.0036772899329662323,-0.07226868532598019,0.0,0,0.012472884729504585,-0.044984672451391816,1.0,0,0.011408902704715729,-0.045653874141862616,1.0,0,-0.004934551194310188,-0.05114056041929871,0.0,0,0.01129234116524458,-0.04752399877179414,1.0,24,57,81
+HRM,301,165405,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.12130259722471237,-0.1850585611537099,-0.22290796786546707,0.0,0.0,0,-0.07186810672283173,-0.11729002930223942,0.0,0,-0.1089145690202713,-0.17231342196464539,0.0,0,-0.08151531219482422,-0.10203573852777481,0.0,0,0.0733129009604454,-0.02208529075141996,2.0,1,-0.21319256722927094,-0.22855687700212002,0.0,1,-0.1730436384677887,-0.19334685243666172,0.0,1,-0.06867121160030365,-0.14536665566265583,0.0,1,-0.13090813159942627,-0.15356281399726868,0.0,1,-0.2341098040342331,-0.24472913146018982,0.0,1,-0.12130259722471237,-0.1850585611537099,0.0,26,55,81
+HRM,302,403476,flaky_then_stable,5,3,13020,10000,23436,18000,True,1.0,-0.15038971602916718,-0.1907939687371254,-0.21354450657963753,0.0,0.0,0,-0.06115426868200302,-0.08537599723786116,0.0,0,-0.08353878557682037,-0.1684593465179205,0.0,0,-0.13248461484909058,-0.15522927232086658,0.0,0,-0.04760579764842987,-0.11682495661079884,0.0,1,-0.19397573173046112,-0.2358309831470251,0.0,1,-0.19588534533977509,-0.23352773115038872,0.0,1,-0.08777561038732529,-0.14649501256644726,0.0,0,0.04180914908647537,-0.02323232125490904,1.0,1,-0.10939528793096542,-0.1584083503112197,0.0,1,-0.15038971602916718,-0.1907939687371254,0.0,26,55,81
+HRM,303,54951,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.11532007157802582,-0.14717126451432705,-0.16263465210795403,0.0,0.0,0,-0.07385881245136261,-0.106690788641572,0.0,0,-0.08444580435752869,-0.1454552300274372,0.0,0,-0.1128922700881958,-0.157455213367939,0.0,0,-0.0069000753574073315,-0.12016929354285821,0.0,0,0.07634763419628143,-0.028614283073693514,2.0,0,0.05596993863582611,-0.02961075643543154,2.0,1,-0.1526096612215042,-0.16827701777219772,0.0,1,-0.11578260362148285,-0.14513140264898539,0.0,1,-0.11023253202438354,-0.1946568638086319,0.0,1,-0.11532007157802582,-0.14717126451432705,0.0,24,57,81
+HRM,304,178635,transient_or_regressed,3,2,15624,12000,,,False,0.7777777910232544,0.04207847639918327,-0.0033968561328947544,-0.02793277334421873,3.0,0.09477370977401733,0,-0.08979781717061996,-0.11034760717302561,0.0,0,-0.09859248250722885,-0.14837970305234194,0.0,0,-0.08447980135679245,-0.15297355223447084,0.0,0,-0.06974528729915619,-0.11480041593313217,0.0,0,-0.0536608025431633,-0.083708799444139,0.0,1,-0.05802934989333153,-0.09702691668644547,0.0,1,-0.09481896460056305,-0.14489738084375858,0.0,1,-0.10423821210861206,-0.15769642032682896,0.0,0,-0.0056880987249314785,-0.025483981880825013,0.0,0,0.04207847639918327,-0.0033968561328947544,3.0,24,57,81
+HRM,305,376021,never,0,0,,,,,False,0.8765432238578796,-0.0027860652189701796,-0.061379769671475515,-0.08788455836474895,0.0,0.0,0,-0.1163988932967186,-0.12700926885008812,0.0,0,-0.15968209505081177,-0.17867829278111458,0.0,0,-0.050451360642910004,-0.12285582069307566,0.0,0,-0.09631676971912384,-0.12592878378927708,0.0,0,0.027028784155845642,-0.05330059630796313,1.0,0,-0.005094428081065416,-0.05315506103215739,0.0,0,0.03909362852573395,-0.04974001180380583,1.0,0,-0.07361242175102234,-0.09097425267100334,0.0,0,0.08181092143058777,-0.01634242432191968,2.0,0,-0.0027860652189701796,-0.061379769671475515,0.0,26,55,81
+HRM,306,212950,never,0,0,,,,,False,0.5555555820465088,0.016819056123495102,-0.04795565828680992,-0.0710913110524416,1.0,0.016819056123495102,0,-0.04953588545322418,-0.11099988874047995,0.0,0,-0.10582458972930908,-0.14604149665683508,0.0,0,-0.15278217196464539,-0.17494571395218372,0.0,0,-0.0915357917547226,-0.1521664336323738,0.0,0,-0.09992355108261108,-0.1274090800434351,0.0,0,-0.025364426895976067,-0.06570212985388935,0.0,0,-0.05364825949072838,-0.07855986943468451,0.0,0,-0.06396369636058807,-0.07476157788187265,0.0,0,-0.0016064858064055443,-0.02090390946250409,0.0,0,0.016819056123495102,-0.04795565828680992,1.0,23,58,81
+HRM,307,290435,transient_or_regressed,2,4,18228,14000,,,False,0.7530864477157593,0.15622290968894958,0.009930624451953918,-0.026561218313872814,4.0,0.1856898688711226,0,-0.11904361099004745,-0.12593098171055317,0.0,0,-0.11062958836555481,-0.14419104531407356,0.0,0,-0.13840025663375854,-0.15814367681741714,0.0,0,-0.0041598547250032425,-0.05284534324891865,0.0,0,0.00622232910245657,-0.06435329199302942,1.0,0,0.0833229124546051,0.010941467306111008,4.0,1,-0.0801134780049324,-0.133710453286767,0.0,0,0.03803274780511856,0.006979587720707059,5.0,1,-0.1330854892730713,-0.1683710217475891,0.0,0,0.15622290968894958,0.009930624451953918,4.0,17,64,81
+HRM,308,269127,stable_learned,8,1,7812,6000,7812,6000,True,1.0,-0.14351901412010193,-0.1826701369136572,-0.2087603472173214,0.0,0.0,0,-0.09225194901227951,-0.1083313450217247,0.0,0,-0.20474356412887573,-0.21606340073049068,0.0,1,-0.2530059516429901,-0.28180257976055145,0.0,1,-0.13288390636444092,-0.1749155167490244,0.0,1,-0.12481285631656647,-0.19342554174363613,0.0,1,-0.15633979439735413,-0.22829796187579632,0.0,1,-0.05229693651199341,-0.14629816357046366,0.0,1,-0.13201799988746643,-0.15182624571025372,0.0,1,-0.15106089413166046,-0.20663228258490562,0.0,1,-0.14351901412010193,-0.1826701369136572,0.0,28,53,81
+HRM,309,245133,transient_or_regressed,1,2,23436,18000,,,False,0.6296296119689941,-0.00751439668238163,-0.030691877007484436,-0.043036567978560925,0.0,0.0,0,-0.11458103358745575,-0.12651335075497627,0.0,0,-0.15334492921829224,-0.1791689358651638,0.0,0,-0.09436289966106415,-0.16149275191128254,0.0,0,-0.11303442716598511,-0.16775738075375557,0.0,0,-0.09115608781576157,-0.11241720244288445,0.0,0,-0.02800016477704048,-0.10151961678639054,0.0,0,0.011901555582880974,-0.0446267903316766,1.0,0,-0.02775082364678383,-0.07548280665650964,0.0,1,-0.19712398946285248,-0.22846000455319881,0.0,0,-0.00751439668238163,-0.030691877007484436,0.0,26,55,81
+HRM,310,282268,flaky_then_stable,3,5,15624,12000,26040,20000,True,1.0,-0.19565407931804657,-0.20700865983963013,-0.21143247187137604,0.0,0.0,0,-0.096830353140831,-0.10586857795715332,0.0,0,-0.08138289302587509,-0.14726436976343393,0.0,0,-0.07720615714788437,-0.13423546869307756,0.0,0,-0.16210198402404785,-0.17513914965093136,0.0,0,-0.1033492237329483,-0.12010921724140644,0.0,1,0.02832929790019989,-0.08544808533042669,1.0,0,0.08965511620044708,0.0057877969375113025,3.0,1,-0.10179846733808517,-0.16267722006887197,0.0,0,-0.06192507594823837,-0.08899631164968014,0.0,1,-0.19565407931804657,-0.20700865983963013,0.0,23,58,81
+HRM,311,134521,flaky_then_stable,7,3,7812,6000,13020,10000,True,1.0,-0.16377978026866913,-0.18748822808265686,-0.19979466497898102,0.0,0.0,0,-0.09558553993701935,-0.10809405520558357,0.0,0,-0.19151781499385834,-0.20636233314871788,0.0,1,-0.18843448162078857,-0.23499847576022148,0.0,0,-0.08608546108007431,-0.10602062288671732,0.0,1,-0.1812768578529358,-0.22319618985056877,0.0,1,-0.20132222771644592,-0.23422215320169926,0.0,1,-0.15094536542892456,-0.17102566920220852,0.0,1,-0.1324724555015564,-0.1631535291671753,0.0,1,-0.11741562932729721,-0.19887820538133383,0.0,1,-0.16377978026866913,-0.18748822808265686,0.0,24,57,81
+HRM,312,35485,never,0,0,,,,,False,0.6419752836227417,0.10410581529140472,0.015459765796549618,-0.02413515211082995,3.0,0.2236293964087963,0,-0.12513402104377747,-0.1340118795633316,0.0,0,-0.09232347458600998,-0.1611730894073844,0.0,0,-0.14192795753479004,-0.1753640566021204,0.0,0,-0.003440852975472808,-0.11563560969079845,0.0,0,0.06854028254747391,-0.025046521535841748,2.0,0,0.049688901752233505,-0.00021202291827648878,3.0,0,0.03595668822526932,0.004430779255926609,4.0,0,0.02479347586631775,-0.012583601754158735,3.0,0,-0.020230498164892197,-0.062197744846343994,0.0,0,0.10410581529140472,0.015459765796549618,3.0,26,55,81
+HRM,313,421149,stable_learned,1,1,26040,20000,26040,20000,True,1.0,-0.16325944662094116,-0.20904186367988586,-0.2338038645684719,0.0,0.0,0,-0.09460881352424622,-0.11233377363532782,0.0,0,-0.15280033648014069,-0.17934593558311462,0.0,0,-0.085608571767807,-0.1418740088120103,0.0,0,-0.0748913586139679,-0.11481434293091297,0.0,0,-0.0006079025333747268,-0.04262403892062139,0.0,0,-0.011155076324939728,-0.05646898690611124,0.0,0,-0.031093500554561615,-0.0575138577260077,0.0,0,0.030779827386140823,-0.01057897214195691,4.0,0,-0.022815030068159103,-0.051425968296825886,0.0,1,-0.16325944662094116,-0.20904186367988586,0.0,26,55,81
+HRM,314,52487,never,0,0,,,,,False,0.5061728358268738,0.030679380521178246,-0.028635887429118156,-0.05718073341995478,2.0,0.05403308942914009,0,-0.0939868688583374,-0.11499896459281445,0.0,0,-0.09904871881008148,-0.14682870171964169,0.0,0,-0.12432446330785751,-0.16653672140091658,0.0,0,-0.12449797242879868,-0.15818906109780073,0.0,0,-0.0009811725467443466,-0.07515415293164551,0.0,0,-0.009903297759592533,-0.07804209331516176,0.0,0,-0.00125465530436486,-0.04348719351401087,0.0,0,0.0467066615819931,-0.0538217700086534,1.0,0,0.0011112091597169638,-0.060886241524713114,1.0,0,0.030679380521178246,-0.028635887429118156,2.0,25,56,81
+HRM,315,399134,never,0,0,,,,,False,0.6913580298423767,0.018832897767424583,-0.02709220553515479,-0.04564409703016281,2.0,0.01900234119966626,0,-0.0878344401717186,-0.11708068568259478,0.0,0,-0.10166709125041962,-0.16908368282020092,0.0,0,-0.18033990263938904,-0.21101806871592999,0.0,0,-0.15209883451461792,-0.16879702918231487,0.0,0,-0.06841915845870972,-0.09785959497094154,0.0,0,-0.057993512600660324,-0.09056435385718942,0.0,0,-0.050111837685108185,-0.06932628247886896,0.0,0,-0.04214608296751976,-0.07985126273706555,0.0,0,0.046743787825107574,-0.02691275579854846,4.0,0,0.018832897767424583,-0.02709220553515479,2.0,23,58,81
+HRM,316,77431,never,0,0,,,,,False,0.5679012537002563,0.07179681956768036,-0.050001843832433224,-0.07428471930325031,1.0,0.07179681956768036,0,-0.10261975228786469,-0.11673360224813223,0.0,0,-0.11660974472761154,-0.16935215424746275,0.0,0,-0.0808592215180397,-0.16467806603759527,0.0,0,-0.08030184358358383,-0.14469850901514292,0.0,0,-0.04688028246164322,-0.07280537579208612,0.0,0,-0.03313950449228287,-0.051083002239465714,0.0,0,0.03351370245218277,-0.052078822162002325,1.0,0,-0.0540354922413826,-0.07195692835375667,0.0,0,-0.11338676512241364,-0.181673776358366,0.0,0,0.07179681956768036,-0.050001843832433224,1.0,26,55,81
+HRM,317,259570,transient_or_regressed,1,2,23436,18000,,,False,0.7530864477157593,0.03486631438136101,-0.0023804300581105053,-0.02664742013439536,3.0,0.08849904127418995,0,-0.12133723497390747,-0.12617191765457392,0.0,0,-0.13083243370056152,-0.17454246804118156,0.0,0,-0.037106551229953766,-0.10964490100741386,0.0,0,-0.06730004400014877,-0.12329924199730158,0.0,0,0.001662148511968553,-0.03981530128413624,2.0,0,0.038360271602869034,-0.017544768925290555,2.0,0,0.03822246193885803,-0.0007656477391719818,4.0,0,0.10398419946432114,-0.004654322248825338,2.0,1,-0.1692916750907898,-0.18426890298724174,0.0,0,0.03486631438136101,-0.0023804300581105053,3.0,26,55,81
+HRM,318,254558,flaky_then_stable,5,5,10416,8000,23436,18000,True,1.0,-0.14000281691551208,-0.16440114378929138,-0.18547940626740456,0.0,0.0,0,-0.0961325466632843,-0.1280135726556182,0.0,0,-0.10391619801521301,-0.1559124616906047,0.0,0,-0.11156363785266876,-0.15210894867777824,0.0,1,-0.1911488175392151,-0.2197110503911972,0.0,0,-0.032764386385679245,-0.09663816774263978,0.0,1,-0.20081698894500732,-0.24712306074798107,0.0,1,-0.11964046955108643,-0.18298090621829033,0.0,0,0.049812011420726776,-0.057490121107548475,2.0,1,-0.12984594702720642,-0.15903466939926147,0.0,1,-0.14000281691551208,-0.16440114378929138,0.0,24,57,81
+HRM,319,378035,flaky_then_stable,7,3,7812,6000,13020,10000,True,1.0,-0.18233820796012878,-0.21830786392092705,-0.2334720902144909,0.0,0.0,0,-0.09430156648159027,-0.11266178917139769,0.0,0,-0.10970160365104675,-0.15631752647459507,0.0,1,-0.1174786314368248,-0.28340600710362196,0.0,0,-0.04677562415599823,-0.08843632787466049,0.0,1,-0.12696024775505066,-0.195516312494874,0.0,1,-0.18650935590267181,-0.24405218474566936,0.0,1,-0.04639032483100891,-0.08266811817884445,0.0,1,-0.08127661794424057,-0.1355813443660736,0.0,1,-0.14586950838565826,-0.1854462493211031,0.0,1,-0.18233820796012878,-0.21830786392092705,0.0,24,57,81
+HRM,320,406804,never,0,0,,,,,False,0.6172839403152466,0.10459311306476593,0.028978026879485697,-0.005462395143695176,6.0,0.274987421464175,0,-0.10356585681438446,-0.1278136158362031,0.0,0,-0.16696295142173767,-0.19209474883973598,0.0,0,-0.10300372540950775,-0.16157041490077972,0.0,0,-0.08025779575109482,-0.12584983091801405,0.0,0,0.0017966855084523559,-0.06947438647330273,1.0,0,0.08149288594722748,0.004914396209642291,4.0,0,0.020933181047439575,-0.01791386745753698,1.0,0,0.021874114871025085,-0.06030875869328156,2.0,0,0.004793872591108084,-0.047091873188037425,1.0,0,0.10459311306476593,0.028978026879485697,6.0,24,57,81
+HRM,321,344531,never,0,0,,,,,False,0.5802469253540039,0.14109812676906586,0.013922335056122392,-0.02264010882936418,3.0,0.20772827230393887,0,-0.11387377977371216,-0.12324570212513208,0.0,0,-0.11521338671445847,-0.15018379408866167,0.0,0,-0.06547338515520096,-0.16355376783758402,0.0,0,-0.09451058506965637,-0.14252922404557467,0.0,0,0.0412745326757431,-0.028200355358421803,2.0,0,0.05464870482683182,-0.017202075105160475,2.0,0,0.03930767998099327,-0.026062886230647564,2.0,0,0.08951017260551453,0.01562764449045062,4.0,0,-0.04772483557462692,-0.074321907479316,0.0,0,0.14109812676906586,0.013922335056122392,3.0,26,55,81
+HRM,322,363377,stable_learned,7,1,10416,8000,10416,8000,True,1.0,-0.07123517245054245,-0.13229513727128506,-0.18174599669873714,0.0,0.0,0,-0.08857202529907227,-0.11694619245827198,0.0,0,-0.10497382283210754,-0.14786675944924355,0.0,0,-0.10387854278087616,-0.14071045257151127,0.0,1,-0.19568370282649994,-0.23344633169472218,0.0,1,-0.1595497578382492,-0.21120562590658665,0.0,1,-0.17003636062145233,-0.20756536535918713,0.0,1,-0.034745726734399796,-0.06618896452710032,0.0,1,-0.10979777574539185,-0.140905337408185,0.0,1,-0.1344776749610901,-0.20970529317855835,0.0,1,-0.07123517245054245,-0.13229513727128506,0.0,25,56,81
+HRM,323,133292,transient_or_regressed,3,4,15624,12000,,,False,0.7037037014961243,-0.01745031401515007,-0.06993716163560748,-0.09404214285314083,0.0,0.0,0,-0.12039847671985626,-0.13017595186829567,0.0,0,-0.06597413122653961,-0.16260144673287868,0.0,0,-0.07126259803771973,-0.1345505155622959,0.0,0,0.05358497425913811,-0.009116418892517686,2.0,0,0.16744431853294373,0.02099861353963206,4.0,1,-0.19493341445922852,-0.23834428191184998,0.0,1,-0.11412975192070007,-0.16582907363772392,0.0,0,-0.007370129227638245,-0.05020519648678601,0.0,1,-0.19014506042003632,-0.23333091475069523,0.0,0,-0.01745031401515007,-0.06993716163560748,0.0,26,55,81
+HRM,324,182177,stable_learned,2,1,23436,18000,23436,18000,True,1.0,-0.19446393847465515,-0.21584567427635193,-0.22573303058743477,0.0,0.0,0,-0.06316927075386047,-0.10401067696511745,0.0,0,-0.13263042271137238,-0.14160804636776447,0.0,0,-0.06077040359377861,-0.12699761940166354,0.0,0,0.01783914305269718,-0.11187015264295042,1.0,0,-0.01652548834681511,-0.07034960575401783,0.0,0,0.01238955743610859,-0.052782750921323895,1.0,0,0.011722998693585396,-0.019428059342317283,1.0,0,-0.0075462269596755505,-0.04112723463913426,0.0,1,-0.11554860323667526,-0.14474110770970583,0.0,1,-0.19446393847465515,-0.21584567427635193,0.0,27,54,81
+HRM,325,150937,never,0,0,,,,,False,0.6419752836227417,0.028195399791002274,-0.019907687208615243,-0.044628931209445,3.0,0.03957872558385134,0,-0.09859645366668701,-0.11536000110208988,0.0,0,-0.1058526486158371,-0.14122647605836391,0.0,0,-0.058859534561634064,-0.13569868821650743,0.0,0,-0.0738067775964737,-0.1276566395536065,0.0,0,0.028835652396082878,-0.051873578457161784,1.0,0,0.02204519510269165,-0.014395404374226928,3.0,0,0.051638439297676086,0.009939863113686442,5.0,0,0.04777045175433159,-0.0060323376674205065,3.0,0,-0.038104090839624405,-0.05422482592985034,0.0,0,0.028195399791002274,-0.019907687208615243,3.0,24,57,81
+HRM,326,379717,never,0,0,,,,,False,0.6172839403152466,0.0015654953895136714,-0.05586091826262418,-0.07497669570147991,1.0,0.0015654953895136714,0,-0.10624431818723679,-0.11202609166502953,0.0,0,-0.07430335879325867,-0.14437270164489746,0.0,0,-0.1411687433719635,-0.17229284532368183,0.0,0,-0.0732751339673996,-0.11659219861030579,0.0,0,2.5954409466066863e-06,-0.05384663997875805,1.0,0,0.03177209943532944,-0.012765914667397738,3.0,0,0.07327528297901154,0.0007304947357624769,3.0,0,0.008364949375391006,-0.041831092443317175,1.0,0,0.008728114888072014,-0.02282011198985856,3.0,0,0.0015654953895136714,-0.05586091826262418,1.0,25,56,81
+HRM,327,96991,flaky_then_stable,5,3,13020,10000,18228,14000,True,1.0,-0.11992952227592468,-0.2048711311072111,-0.2364223711192608,0.0,0.0,0,-0.10674935579299927,-0.12794051878154278,0.0,0,-0.13426177203655243,-0.16399439238011837,0.0,0,-0.07140950858592987,-0.11025978811085224,0.0,0,-0.04250357672572136,-0.08981785224750638,0.0,1,-0.09140696376562119,-0.1413623159751296,0.0,0,0.03764104098081589,-0.014484356041066349,2.0,1,-0.091415636241436,-0.1499525299295783,0.0,1,-0.14554159343242645,-0.16988927125930786,0.0,1,-0.08299285173416138,-0.1708355937153101,0.0,1,-0.11992952227592468,-0.2048711311072111,0.0,27,54,81
+HRM,328,34793,never,0,0,,,,,False,0.9876543283462524,0.13318893313407898,0.03608835279010236,-0.003844145219773054,6.0,0.3368312492966652,0,-0.10677316039800644,-0.12313007842749357,0.0,0,-0.15571802854537964,-0.18893230147659779,0.0,0,-0.1194080039858818,-0.16294658463448286,0.0,0,-0.07313178479671478,-0.1290052281692624,0.0,0,0.08307263255119324,-0.0034952061250805855,3.0,0,0.047736071050167084,0.012603922048583627,5.0,0,0.19734351336956024,0.02739658090285957,4.0,0,0.13499486446380615,0.022019932337570935,4.0,0,0.07916829735040665,-0.04470367752946913,2.0,0,0.13318893313407898,0.03608835279010236,6.0,26,55,81
+HRM,329,178506,never,0,0,,,,,False,0.5555555820465088,0.013318339362740517,-0.049026996828615665,-0.07265760563313961,1.0,0.013318339362740517,0,-0.09997507184743881,-0.10728038009256124,0.0,0,-0.13818037509918213,-0.17262399941682816,0.0,0,-0.0922059714794159,-0.1558772837743163,0.0,0,-0.16800346970558167,-0.18805600702762604,0.0,0,-0.11711782217025757,-0.12732205167412758,0.0,0,0.009443992748856544,-0.0975852666888386,1.0,0,-0.015845097601413727,-0.07728286506608129,0.0,0,-0.054919905960559845,-0.08419983927160501,0.0,0,-0.024402417242527008,-0.06938878493383527,0.0,0,0.013318339362740517,-0.049026996828615665,1.0,24,57,81
+HRM,330,241389,flaky_then_stable,3,3,18228,14000,26040,20000,True,1.0,-0.18950605392456055,-0.22191964648663998,-0.236445102840662,0.0,0.0,0,-0.11262845247983932,-0.1240997277200222,0.0,0,-0.09098182618618011,-0.15483283624053001,0.0,0,-0.11872658133506775,-0.15280033648014069,0.0,0,-0.07974319159984589,-0.14799907617270947,0.0,0,0.024323720484972,-0.03427442780230194,1.0,0,0.043815918266773224,-0.008684686734341085,2.0,1,-0.1469995677471161,-0.19516126066446304,0.0,1,-0.1178053766489029,-0.15930171217769384,0.0,0,-0.05585582181811333,-0.08296570414677262,0.0,1,-0.18950605392456055,-0.22191964648663998,0.0,25,56,81
+HRM,331,98172,never,0,0,,,,,False,0.654321014881134,-0.031345583498477936,-0.0650672847405076,-0.0750463530421257,0.0,0.0,0,-0.09807492792606354,-0.10409830138087273,0.0,0,-0.14492249488830566,-0.1773186568170786,0.0,0,-0.14374512434005737,-0.17537224292755127,0.0,0,-0.12996484339237213,-0.18373097479343414,0.0,0,-0.12771648168563843,-0.14181060530245304,0.0,0,-0.0468406043946743,-0.10289497440680861,0.0,0,0.012169154360890388,-0.046341265784576535,1.0,0,-0.07576534897089005,-0.08530408795922995,0.0,0,-0.024546964094042778,-0.07667759037576616,0.0,0,-0.031345583498477936,-0.0650672847405076,0.0,26,55,81
+HRM,332,166448,stable_learned,8,1,7812,6000,7812,6000,True,1.0,-0.18856850266456604,-0.2182258702814579,-0.22858699783682823,0.0,0.0,0,-0.09466602653265,-0.10537939518690109,0.0,0,-0.1212511882185936,-0.16419360507279634,0.0,1,-0.20277076959609985,-0.2567667979747057,0.0,1,-0.16029128432273865,-0.1834970135241747,0.0,1,-0.2018122375011444,-0.2301772516220808,0.0,1,-0.21789656579494476,-0.2727798167616129,0.0,1,-0.19846510887145996,-0.21699807047843933,0.0,1,-0.07907042652368546,-0.1560832830145955,0.0,1,-0.006040151230990887,-0.18608405569102615,0.0,1,-0.18856850266456604,-0.2182258702814579,0.0,26,55,81
+HRM,333,169307,flaky_then_stable,5,3,13020,10000,18228,14000,True,1.0,-0.11307262629270554,-0.17736914101988077,-0.22456561028957367,0.0,0.0,0,-0.12019936740398407,-0.1256689354777336,0.0,0,-0.11266728490591049,-0.15210664179176092,0.0,0,-0.045867957174777985,-0.09707704558968544,0.0,0,-0.07879677414894104,-0.10619601514190435,0.0,1,-0.19920924305915833,-0.21761051751673222,0.0,0,0.004975182469934225,-0.016848391096573323,1.0,1,0.028226833790540695,-0.05186634452547878,2.0,1,-0.10739278793334961,-0.1458673682063818,0.0,1,-0.06838661432266235,-0.17970003746449947,0.0,1,-0.11307262629270554,-0.17736914101988077,0.0,17,64,81
+HRM,334,386281,flaky_then_stable,6,3,10416,8000,18228,14000,True,1.0,-0.044196587055921555,-0.1349417599849403,-0.17884549871087074,0.0,0.0,0,-0.07150633633136749,-0.0994182750582695,0.0,0,-0.09915824234485626,-0.15857010055333376,0.0,0,-0.04152005910873413,-0.09148390730842948,0.0,1,-0.16252675652503967,-0.24732743948698044,0.0,1,-0.20686426758766174,-0.22403069399297237,0.0,0,0.06066916137933731,0.021700031065847725,5.0,1,-0.020604023709893227,-0.14041256182827055,0.0,1,-0.07984084635972977,-0.13610416743904352,0.0,1,-0.12993213534355164,-0.18930588848888874,0.0,1,-0.044196587055921555,-0.1349417599849403,0.0,26,55,81
+HRM,335,20092,flaky_then_stable,7,3,7812,6000,20832,16000,True,1.0,-0.1779763400554657,-0.20806479826569557,-0.2255323939025402,0.0,0.0,0,-0.10389991849660873,-0.11206637695431709,0.0,0,-0.09055619686841965,-0.13032987900078297,0.0,1,-0.24528221786022186,-0.2915553506463766,0.0,1,-0.18947701156139374,-0.24363711290061474,0.0,1,-0.15700462460517883,-0.1926779057830572,0.0,1,-0.20941638946533203,-0.2503124810755253,0.0,0,0.04605887085199356,-0.0013060334604233503,3.0,1,-0.10921913385391235,-0.1454474963247776,0.0,1,-0.113816998898983,-0.1718346355482936,0.0,1,-0.1779763400554657,-0.20806479826569557,0.0,26,55,81
+HRM,336,70604,flaky_then_stable,6,3,10416,8000,15624,12000,True,1.0,-0.10116560757160187,-0.1698470078408718,-0.19704461842775345,0.0,0.0,0,-0.06445181369781494,-0.08749955613166094,0.0,0,-0.09586720913648605,-0.1837041126564145,0.0,0,-0.17424052953720093,-0.19465591199696064,0.0,1,-0.2092871367931366,-0.2513564135879278,0.0,0,0.0005378186469897628,-0.048906640222412534,1.0,1,-0.13012301921844482,-0.2343808300793171,0.0,1,-0.0871393084526062,-0.19484101608395576,0.0,1,-0.044745661318302155,-0.1524051371961832,0.0,1,-0.18146638572216034,-0.194833405315876,0.0,1,-0.10116560757160187,-0.1698470078408718,0.0,25,56,81
+HRM,337,79255,never,0,0,,,,,False,0.5925925970077515,0.0439305379986763,-0.024823879124596715,-0.0557399271056056,2.0,0.06407542154192924,0,-0.12230232357978821,-0.13098798412829638,0.0,0,-0.17178678512573242,-0.19071313552558422,0.0,0,-0.14379385113716125,-0.18251368030905724,0.0,0,-0.06578470021486282,-0.1680454807356,0.0,0,-0.044326864182949066,-0.09301829384639859,0.0,0,-0.05911551043391228,-0.09317742986604571,0.0,0,-0.007921716198325157,-0.06609960296191275,0.0,0,-0.039557479321956635,-0.08638572972267866,0.0,0,-0.03930868208408356,-0.07296222820878029,0.0,0,0.0439305379986763,-0.024823879124596715,2.0,25,56,81
+HRM,338,95579,never,0,0,,,,,False,0.5308641791343689,0.008613662794232368,-0.014561554446117952,-0.028694853419438004,1.0,0.008613662794232368,0,-0.08711755275726318,-0.10973381623625755,0.0,0,-0.10994845628738403,-0.1749365795403719,0.0,0,-0.1461096554994583,-0.1861142609268427,0.0,0,-0.07035445421934128,-0.1546362517401576,0.0,0,0.12096645683050156,-0.044141857884824276,1.0,0,-0.05985681712627411,-0.07366457022726536,0.0,0,-0.02972969226539135,-0.06380279525183141,0.0,0,-0.030693022534251213,-0.07351703592576087,0.0,0,-0.004495379980653524,-0.04764455900294706,0.0,0,0.008613662794232368,-0.014561554446117952,1.0,26,55,81
+HRM,339,268871,flaky_then_stable,7,3,7812,6000,20832,16000,True,1.0,-0.18498411774635315,-0.2323385439813137,-0.2612582817673683,0.0,0.0,0,-0.08580024540424347,-0.09932596143335104,0.0,0,-0.10863153636455536,-0.14898139610886574,0.0,1,-0.26403355598449707,-0.2783903554081917,0.0,1,-0.17613175511360168,-0.24086140096187592,0.0,1,-0.19612595438957214,-0.20983349345624447,0.0,1,-0.17550614476203918,-0.2230954673141241,0.0,0,-0.030497126281261444,-0.06344496225938201,0.0,1,-0.03872833028435707,-0.1423501749522984,0.0,1,-0.15767869353294373,-0.19237076677381992,0.0,1,-0.18498411774635315,-0.2323385439813137,0.0,26,55,81
+HRM,340,326575,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.1608864963054657,-0.18101705610752106,-0.1985219269990921,0.0,0.0,0,-0.0718468651175499,-0.11048533860594034,0.0,0,-0.0897284671664238,-0.15136484894901514,0.0,0,-0.07409736514091492,-0.12994520086795092,0.0,0,-0.015350684523582458,-0.07824121508747339,0.0,0,0.07345808297395706,-0.01650184136815369,2.0,1,-0.16323134303092957,-0.20270938612520695,0.0,1,-0.16185906529426575,-0.1697203889489174,0.0,1,-0.12355582416057587,-0.14463676512241364,0.0,1,-0.17746442556381226,-0.22668559476733208,0.0,1,-0.1608864963054657,-0.18101705610752106,0.0,27,54,81
+HRM,341,221979,flaky_then_stable,5,3,13020,10000,18228,14000,True,1.0,-0.22076931595802307,-0.24263912998139858,-0.25314003974199295,0.0,0.0,0,-0.10590308904647827,-0.12395819742232561,0.0,0,-0.09218402951955795,-0.18444226775318384,0.0,0,-0.04935259371995926,-0.08931860094889998,0.0,0,-0.024845048785209656,-0.08202454075217247,0.0,1,-0.16059458255767822,-0.20534244924783707,0.0,0,-0.002819143235683441,-0.04607830662280321,0.0,1,0.021854903548955917,-0.043987233424559236,1.0,1,-0.08398353308439255,-0.1322898454964161,0.0,1,-0.1748933047056198,-0.21383417770266533,0.0,1,-0.22076931595802307,-0.24263912998139858,0.0,26,55,81
+HRM,342,129989,never,0,0,,,,,False,0.6790123581886292,0.010346079245209694,-0.018264659665874206,-0.03426239965483546,1.0,0.010346079245209694,0,-0.13385802507400513,-0.13829457014799118,0.0,0,-0.12647736072540283,-0.18441196717321873,0.0,0,-0.08052864670753479,-0.16225680150091648,0.0,0,-0.024770749732851982,-0.09275388228707016,0.0,0,-0.0005434815539047122,-0.08071118673251476,0.0,0,0.0020048010628670454,-0.05756122004822828,1.0,0,-0.003924266435205936,-0.04428590286988765,0.0,0,0.01389559917151928,-0.024055127578321844,3.0,0,-0.0077129206620156765,-0.039617237460333854,0.0,0,0.010346079245209694,-0.018264659665874206,1.0,27,54,81
+HRM,343,391646,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.20137058198451996,-0.24132917076349258,-0.256786584854126,0.0,0.0,0,-0.10565070807933807,-0.11493469122797251,0.0,0,-0.14625413715839386,-0.16923190467059612,0.0,0,-0.052346181124448776,-0.19104764191433787,0.0,0,-0.09801213443279266,-0.1319094207137823,0.0,0,-0.09974251687526703,-0.13350320421159267,0.0,1,-0.249647319316864,-0.2653876915574074,0.0,1,-0.15128375589847565,-0.18632850609719753,0.0,1,-0.10250890254974365,-0.14475691318511963,0.0,1,-0.18149825930595398,-0.21657200902700424,0.0,1,-0.20137058198451996,-0.24132917076349258,0.0,28,53,81
+HRM,344,194609,flaky_then_stable,5,3,13020,10000,20832,16000,True,1.0,-0.1628635674715042,-0.20208540372550488,-0.2270870991051197,0.0,0.0,0,-0.11017704010009766,-0.1279874797910452,0.0,0,-0.11810370534658432,-0.1723814057186246,0.0,0,-0.12562525272369385,-0.15960044972598553,0.0,0,-0.013915007933974266,-0.052377024898305535,0.0,1,-0.12028130143880844,-0.17372523713856936,0.0,1,-0.2538556754589081,-0.27928997576236725,0.0,0,-0.015870440751314163,-0.04784521693363786,0.0,1,-0.1579870730638504,-0.1724449936300516,0.0,1,-0.1654599905014038,-0.19510171748697758,0.0,1,-0.1628635674715042,-0.20208540372550488,0.0,27,54,81
+HRM,345,191177,flaky_then_stable,5,3,13020,10000,23436,18000,True,1.0,-0.1522839367389679,-0.20875541493296623,-0.24045348539948463,0.0,0.0,0,-0.09425663948059082,-0.12297146581113338,0.0,0,-0.11777684092521667,-0.15307840146124363,0.0,0,0.0008493409841321409,-0.11357306072750362,1.0,0,0.015583954751491547,-0.08634675852954388,1.0,1,-0.09681472182273865,-0.13852596282958984,0.0,1,-0.20237812399864197,-0.25500163808465004,0.0,1,-0.07472221553325653,-0.15635946579277515,0.0,0,0.023975737392902374,-0.017052549170330167,3.0,1,-0.16286958754062653,-0.17809605412185192,0.0,1,-0.1522839367389679,-0.20875541493296623,0.0,17,64,81
+HRM,346,167181,never,0,0,,,,,False,0.6790123581886292,0.012545440346002579,-0.040665374021045864,-0.06760816834867,2.0,0.019558110274374485,0,-0.08465299010276794,-0.1225141603499651,0.0,0,-0.15806345641613007,-0.18681748397648335,0.0,0,-0.16169430315494537,-0.18348823674023151,0.0,0,-0.07466377317905426,-0.1501700859516859,0.0,0,0.006822293624281883,-0.07167899352498353,1.0,0,0.07243990898132324,-0.0280026912514586,2.0,0,0.002099917270243168,-0.030267272552009672,1.0,0,0.009355975314974785,-0.030210447148419917,2.0,0,-0.02998272329568863,-0.06728852679952979,0.0,0,0.012545440346002579,-0.040665374021045864,2.0,25,56,81
+HRM,347,31096,never,0,0,,,,,False,0.6172839403152466,0.004304629750549793,-0.03969529030564445,-0.0690178694203496,1.0,0.004304629750549793,0,-0.1047353446483612,-0.11830421909689903,0.0,0,-0.14104898273944855,-0.17417233996093273,0.0,0,-0.0730607882142067,-0.167453876696527,0.0,0,-0.14800328016281128,-0.18227531388401985,0.0,0,-0.05162562429904938,-0.1151427524164319,0.0,0,-0.007395739667117596,-0.0746179063571617,0.0,0,0.001770093571394682,-0.0616411185474135,1.0,0,-0.00707338098436594,-0.04580141662154347,0.0,0,0.0023535718210041523,-0.037782665342092514,1.0,0,0.004304629750549793,-0.03969529030564445,1.0,25,56,81
+HRM,348,218359,flaky_then_stable,5,3,13020,10000,20832,16000,True,1.0,-0.10945118963718414,-0.2005867250263691,-0.23557908460497856,0.0,0.0,0,-0.10647408664226532,-0.11801979225128889,0.0,0,-0.13579165935516357,-0.17556661367416382,0.0,0,-0.07195109874010086,-0.14385895617306232,0.0,0,0.01293222140520811,-0.06470470468048006,1.0,1,-0.16485606133937836,-0.2065274976193905,0.0,1,-0.14142760634422302,-0.1776354517787695,0.0,0,0.001717597246170044,-0.043781996704638004,1.0,1,-0.017906412482261658,-0.1103372061625123,0.0,1,-0.11923035234212875,-0.20529334899038076,0.0,1,-0.10945118963718414,-0.2005867250263691,0.0,27,54,81
+HRM,349,385443,transient_or_regressed,3,6,5208,4000,,,False,0.5679012537002563,-0.011398345232009888,-0.025523579446598887,-0.03480474790558219,0.0,0.0,0,-0.08323834836483002,-0.09987937659025192,0.0,1,-0.1542903333902359,-0.1918155513703823,0.0,0,-0.13735085725784302,-0.15731041319668293,0.0,0,-0.11865820735692978,-0.1367980418726802,0.0,0,-0.029979050159454346,-0.06329013733193278,0.0,1,-0.16611279547214508,-0.2238083854317665,0.0,0,0.05306600034236908,-0.017895285855047405,2.0,1,0.06839586794376373,-0.12238321267068386,1.0,0,0.01750876009464264,-0.044856292894110084,2.0,0,-0.011398345232009888,-0.025523579446598887,0.0,26,55,81
+HRM,350,405260,never,0,0,,,,,False,0.6666666865348816,-0.03764612227678299,-0.05762583436444402,-0.06959260255098343,0.0,0.0,0,-0.0869797021150589,-0.10673968680202961,0.0,0,-0.12241169065237045,-0.1409463668242097,0.0,0,-0.06946790218353271,-0.15598865412175655,0.0,0,-0.038214199244976044,-0.1371233994141221,0.0,0,-0.09232637286186218,-0.11094011925160885,0.0,0,-0.06543688476085663,-0.10167789366096258,0.0,0,-0.02334357611835003,-0.09152249875478446,0.0,0,-0.04625602066516876,-0.07933506648987532,0.0,0,-0.041155628859996796,-0.10163870267570019,0.0,0,-0.03764612227678299,-0.05762583436444402,0.0,25,56,81
+HRM,351,337508,transient_or_regressed,3,6,13020,10000,,,False,0.7777777910232544,0.035832688212394714,0.006834604631876573,-0.0062313785892911255,4.0,0.07960235141217709,0,-0.09800876677036285,-0.11775700561702251,0.0,0,-0.07949750125408173,-0.14484457671642303,0.0,0,-0.060513418167829514,-0.11354376701638103,0.0,0,-0.0018565913196653128,-0.07747510678018443,0.0,1,-0.14333969354629517,-0.178084596991539,0.0,0,0.05852499231696129,0.013230595737695694,4.0,1,-0.024681132286787033,-0.07743402337655425,0.0,0,0.06890809535980225,0.0020812161965295672,3.0,1,-0.14116983115673065,-0.19446200504899025,0.0,0,0.035832688212394714,0.006834604631876573,4.0,23,58,81
+HRM,352,296930,flaky_then_stable,4,3,13020,10000,26040,20000,True,1.0,-0.17902550101280212,-0.22438077069818974,-0.24157903715968132,0.0,0.0,0,-0.08698704838752747,-0.11337719485163689,0.0,0,-0.114020437002182,-0.18279364705085754,0.0,0,-0.10832410305738449,-0.16753752250224352,0.0,0,-0.11696009337902069,-0.13211736269295216,0.0,1,-0.12982606887817383,-0.17299878410995007,0.0,1,-0.12174289673566818,-0.2168125445023179,0.0,1,-0.14685247838497162,-0.17384845204651356,0.0,0,0.027939340099692345,-0.03211439726874232,1.0,0,-0.0008013877086341381,-0.061692472954746336,0.0,1,-0.17902550101280212,-0.22438077069818974,0.0,25,56,81
+HRM,353,211955,flaky_then_stable,6,5,7812,6000,18228,14000,True,1.0,-0.11950626969337463,-0.18625640869140625,-0.22007109969854355,0.0,0.0,0,-0.10699837654829025,-0.11899911891669035,0.0,0,-0.1386553794145584,-0.19663585349917412,0.0,1,-0.1783466935157776,-0.2845154721289873,0.0,0,-0.08757318556308746,-0.13681369181722403,0.0,1,-0.23640725016593933,-0.24533107317984104,0.0,0,0.11244579404592514,-0.011511425953358412,2.0,1,0.043795522302389145,-0.1345083094201982,1.0,1,-0.13551190495491028,-0.14812077023088932,0.0,1,-0.1671469509601593,-0.24418422393500805,0.0,1,-0.11950626969337463,-0.18625640869140625,0.0,26,55,81
+HRM,354,289526,stable_learned,2,1,23436,18000,23436,18000,True,1.0,-0.22043538093566895,-0.24010609276592731,-0.25088273361325264,0.0,0.0,0,-0.10464175790548325,-0.12051764968782663,0.0,0,-0.175154447555542,-0.20866704173386097,0.0,0,-0.06297549605369568,-0.12285875342786312,0.0,0,-0.016143251210451126,-0.06041430402547121,0.0,0,0.01694856584072113,-0.038262939007836394,1.0,0,0.06395593285560608,-0.009262772742658854,4.0,0,-0.030015958473086357,-0.08095858129672706,0.0,0,0.02547157183289528,-0.04467326425947249,1.0,1,-0.13078242540359497,-0.1888360884040594,0.0,1,-0.22043538093566895,-0.24010609276592731,0.0,26,55,81
+HRM,355,38565,flaky_then_stable,4,3,15624,12000,26040,20000,True,1.0,-0.17101836204528809,-0.18892658315598965,-0.19770464673638344,0.0,0.0,0,-0.08366040885448456,-0.1017997432500124,0.0,0,-0.09280373901128769,-0.14378841314464808,0.0,0,-0.06816976517438889,-0.15336722880601883,0.0,0,-0.11574099957942963,-0.16847308166325092,0.0,0,-0.09227531403303146,-0.11987783573567867,0.0,1,-0.05179564282298088,-0.20932525163516402,0.0,1,-0.006643069442361593,-0.12938910507364199,0.0,1,-0.06703518331050873,-0.13073991239070892,0.0,0,0.005225353874266148,-0.046070527168922126,1.0,1,-0.17101836204528809,-0.18892658315598965,0.0,23,58,81
+HRM,356,74012,flaky_then_stable,3,3,18228,14000,26040,20000,True,1.0,-0.1961296647787094,-0.22947779297828674,-0.24916979297995567,0.0,0.0,0,-0.08467568457126617,-0.09903108049184084,0.0,0,-0.144419863820076,-0.19189339876174927,0.0,0,-0.04550544545054436,-0.12544320756569505,0.0,0,0.04856761544942856,-0.09037585090845823,1.0,0,0.0718168318271637,0.008903203532099724,4.0,0,0.08958390355110168,0.04669261910021305,7.0,1,-0.11280791461467743,-0.13363162893801928,0.0,1,-0.12669774889945984,-0.1467667669057846,0.0,0,1.7058725632068672e-07,-0.027146066868832364,1.0,1,-0.1961296647787094,-0.22947779297828674,0.0,26,55,81
+HRM,357,95070,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.07924547791481018,-0.17028113082051277,-0.21299885585904121,0.0,0.0,0,-0.10462584346532822,-0.13076338917016983,0.0,0,-0.11298713088035583,-0.1705004069954157,0.0,0,-0.1879999041557312,-0.22183272801339626,0.0,0,-0.0934525653719902,-0.15620657708495855,0.0,1,-0.1795649230480194,-0.211312560364604,0.0,1,-0.15566739439964294,-0.24165054224431515,0.0,1,-0.01863308623433113,-0.10642200894653797,0.0,1,-0.06531673669815063,-0.13964969851076603,0.0,1,-0.025712493807077408,-0.1873948727734387,0.0,1,-0.07924547791481018,-0.17028113082051277,0.0,25,56,81
+HRM,358,408908,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.14297209680080414,-0.22041616775095463,-0.25400279089808464,0.0,0.0,0,-0.1247316524386406,-0.13873689156025648,0.0,0,-0.103652223944664,-0.16858458891510963,0.0,0,-0.07686866819858551,-0.12968081422150135,0.0,0,-0.003959250636398792,-0.09383939590770751,0.0,1,-0.15831393003463745,-0.189424442127347,0.0,1,-0.17030112445354462,-0.2367932628840208,0.0,1,-0.04823605716228485,-0.17918651178479195,0.0,1,-0.1001628190279007,-0.1574141588062048,0.0,1,-0.14347270131111145,-0.22791886888444424,0.0,1,-0.14297209680080414,-0.22041616775095463,0.0,26,55,81
+HRM,359,133668,flaky_then_stable,3,3,18228,14000,23436,18000,True,1.0,-0.13442577421665192,-0.17955259047448635,-0.20710934326052666,0.0,0.0,0,-0.08538758754730225,-0.11369147803634405,0.0,0,-0.12898817658424377,-0.18622029945254326,0.0,0,-0.08252902328968048,-0.12604035716503859,0.0,0,-0.01140558160841465,-0.06061105174012482,0.0,0,-0.04464322328567505,-0.06159334909170866,0.0,0,0.03051033429801464,-0.04321431438438594,1.0,1,-0.1787385195493698,-0.18801180087029934,0.0,0,-0.008780776523053646,-0.040364923886954784,0.0,1,-0.19125452637672424,-0.22808830812573433,0.0,1,-0.13442577421665192,-0.17955259047448635,0.0,25,56,81
+HRM,360,131029,transient_or_regressed,2,4,18228,14000,,,False,0.7777777910232544,0.012521747499704361,-0.03840784961357713,-0.06709954980760813,2.0,0.01893898844718933,0,-0.1165521964430809,-0.13818880263715982,0.0,0,-0.13862784206867218,-0.17212927527725697,0.0,0,-0.10293234884738922,-0.13512277323752642,0.0,0,0.005595048423856497,-0.0380624021490803,1.0,0,-0.039368875324726105,-0.07020452944561839,0.0,0,0.013702770695090294,-0.039379103342071176,1.0,1,0.06677376478910446,-0.04074327478883788,1.0,0,0.06886239349842072,-0.008899512002244592,2.0,1,-0.1162705272436142,-0.18998264893889427,0.0,0,0.012521747499704361,-0.03840784961357713,2.0,26,55,81
+HRM,361,369245,never,0,0,,,,,False,0.5555555820465088,0.019100982695817947,-0.04021872785233427,-0.07633885741233826,3.0,0.03669534472282976,0,-0.1056065708398819,-0.12033254280686378,0.0,0,-0.10784424841403961,-0.16814281791448593,0.0,0,-0.14921045303344727,-0.17254221066832542,0.0,0,-0.1583724319934845,-0.190534345805645,0.0,0,-0.1002333015203476,-0.12076276820152998,0.0,0,-0.0816444382071495,-0.12057886458933353,0.0,0,-0.08031675219535828,-0.0958917299285531,0.0,0,-0.0760234147310257,-0.10446155443787575,0.0,0,-0.006796961184591055,-0.06648468755884096,0.0,0,0.019100982695817947,-0.04021872785233427,3.0,24,57,81
+HRM,362,242661,transient_or_regressed,1,2,20832,16000,,,False,0.6913580298423767,0.03885303810238838,-0.04281667014583945,-0.06369820702821016,1.0,0.03885303810238838,0,-0.1247396469116211,-0.1315384805202484,0.0,0,-0.16593745350837708,-0.190822409465909,0.0,0,-0.08564412593841553,-0.16831986606121063,0.0,0,-0.09702585637569427,-0.1556952651590109,0.0,0,-0.04158415645360947,-0.10435657575726509,0.0,0,-0.0049453782849013805,-0.054121460823807865,0.0,0,0.013406498357653618,-0.04094731909572147,2.0,1,-0.1704227328300476,-0.19065876118838787,0.0,0,0.018707286566495895,-0.046255226247012615,1.0,0,0.03885303810238838,-0.04281667014583945,1.0,23,58,81
+HRM,363,131068,stable_learned,2,1,23436,18000,23436,18000,True,1.0,-0.05494636297225952,-0.12542147561907768,-0.14166942611336708,0.0,0.0,0,-0.09902694076299667,-0.11203158274292946,0.0,0,-0.12338148802518845,-0.16474350728094578,0.0,0,-0.013109855353832245,-0.12224376853555441,0.0,0,-0.04866740107536316,-0.11605616845190525,0.0,0,-0.06686359643936157,-0.09002353250980377,0.0,0,0.06637982279062271,-0.004316786420531571,2.0,0,0.0076469737105071545,-0.0647571692825295,1.0,0,-0.036800283938646317,-0.06551175005733967,0.0,1,-0.2284754365682602,-0.25595430471003056,0.0,1,-0.05494636297225952,-0.12542147561907768,0.0,26,55,81
+HRM,364,240805,transient_or_regressed,2,2,7812,6000,,,False,0.7530864477157593,0.05441007763147354,-0.0006763068959116936,-0.027562650269828737,4.0,0.1048401459120214,0,-0.09270550310611725,-0.11471773590892553,0.0,0,-0.08080989122390747,-0.15205992944538593,0.0,1,-0.26576346158981323,-0.302419763058424,0.0,1,-0.1874554455280304,-0.22662930563092232,0.0,0,0.03885406628251076,-0.04068633515998954,2.0,0,0.03262597322463989,-0.044101114734075963,1.0,0,-0.022580157965421677,-0.06400905922055244,0.0,0,-0.027737101539969444,-0.05158241139724851,0.0,0,-0.04196903854608536,-0.0715849450789392,0.0,0,0.05441007763147354,-0.0006763068959116936,4.0,26,55,81
+HRM,365,355466,never,0,0,,,,,False,0.6419752836227417,0.006134814582765102,-0.02823452203301713,-0.051249011885374784,2.0,0.010274501983076334,0,-0.115679070353508,-0.12318922579288483,0.0,0,-0.15156438946723938,-0.16817376762628555,0.0,0,-0.11413805186748505,-0.15975511819124222,0.0,0,-0.14946423470973969,-0.17062274180352688,0.0,0,-0.07923642545938492,-0.10387787502259016,0.0,0,-0.071115642786026,-0.09739155229181051,0.0,0,0.06364784389734268,-0.026130675221793354,3.0,0,-0.023758884519338608,-0.06615497497841716,0.0,0,0.000583491288125515,-0.07367772946599871,1.0,0,0.006134814582765102,-0.02823452203301713,2.0,26,55,81
+HRM,366,84276,transient_or_regressed,2,2,20832,16000,,,False,0.6913580298423767,0.043629616498947144,-0.040274203987792134,-0.07138228043913841,2.0,0.054083460941910744,0,-0.09065033495426178,-0.11858131270855665,0.0,0,-0.11490048468112946,-0.1653314959257841,0.0,0,-0.12706533074378967,-0.17604718543589115,0.0,0,-0.06512714177370071,-0.14803983364254236,0.0,0,0.009038358926773071,-0.07530623022466898,1.0,0,0.11047494411468506,-0.0251687690615654,1.0,0,-0.014819120988249779,-0.06679015909321606,0.0,1,-0.024294918403029442,-0.1409307464491576,0.0,1,-0.16353839635849,-0.22164444625377655,0.0,0,0.043629616498947144,-0.040274203987792134,2.0,25,56,81
+HRM,367,135944,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.1749560832977295,-0.22586717642843723,-0.2466472126543522,0.0,0.0,0,-0.11091829836368561,-0.12710003647953272,0.0,0,-0.14716480672359467,-0.17036020755767822,0.0,0,-0.0951675996184349,-0.14848139509558678,0.0,0,0.05197770148515701,-0.030250933741626795,2.0,0,0.04457524046301842,-0.009547899710014462,3.0,1,-0.07609876245260239,-0.16074582003057003,0.0,1,-0.0909416675567627,-0.16621763072907925,0.0,1,-0.14317600429058075,-0.17271143198013306,0.0,1,-0.19145485758781433,-0.22458223439753056,0.0,1,-0.1749560832977295,-0.22586717642843723,0.0,26,55,81
+HRM,368,109376,flaky_then_stable,7,3,7812,6000,26040,20000,True,1.0,-0.15311375260353088,-0.18307297118008137,-0.19685481488704681,0.0,0.0,0,-0.06329379975795746,-0.10780230816453695,0.0,0,-0.18433547019958496,-0.21614428982138634,0.0,1,-0.21291528642177582,-0.27886918745934963,0.0,1,-0.14724504947662354,-0.2001679204404354,0.0,1,-0.161392480134964,-0.2079829592257738,0.0,1,-0.2269361913204193,-0.24967344477772713,0.0,1,0.03154480457305908,-0.022027709346730262,2.0,1,-0.06288617849349976,-0.1285892715677619,0.0,0,0.01731153205037117,-0.030297319724923,2.0,1,-0.15311375260353088,-0.18307297118008137,0.0,27,54,81
+HRM,369,283196,stable_learned,7,1,10416,8000,10416,8000,True,1.0,-0.15294283628463745,-0.2013245988637209,-0.23454803228378296,0.0,0.0,0,-0.06390789151191711,-0.10445753205567598,0.0,0,-0.14464879035949707,-0.18069804459810257,0.0,0,-0.11343307793140411,-0.15370296873152256,0.0,1,-0.17517364025115967,-0.21305476687848568,0.0,1,-0.1720397174358368,-0.22125626727938652,0.0,1,-0.13266843557357788,-0.24449078179895878,0.0,1,-0.048677682876586914,-0.12776552140712738,0.0,1,0.04244854301214218,-0.16173467505723238,1.0,1,-0.18832610547542572,-0.21025601215660572,0.0,1,-0.15294283628463745,-0.2013245988637209,0.0,26,55,81
+HRM,370,57056,never,0,0,,,,,False,0.7654321193695068,0.08603653311729431,0.03157876373734325,0.008196010021492839,6.0,0.27241959050297737,0,-0.08862116187810898,-0.1094778049737215,0.0,0,-0.10022479295730591,-0.17202168330550194,0.0,0,-0.11940085887908936,-0.16930654272437096,0.0,0,-0.12752242386341095,-0.16496689058840275,0.0,0,-0.03188318759202957,-0.1069354796782136,0.0,0,-0.0642230212688446,-0.10475627426058054,0.0,0,-0.008005211129784584,-0.04457521322183311,0.0,0,-0.05556420609354973,-0.07580565521493554,0.0,0,-0.03983011096715927,-0.06289406726136804,0.0,0,0.08603653311729431,0.03157876373734325,6.0,26,55,81
+HRM,371,110033,transient_or_regressed,1,2,18228,14000,,,False,0.8271604776382446,-0.003117914777249098,-0.03422925964696333,-0.047897580079734325,0.0,0.0,0,-0.08983634412288666,-0.1062761815264821,0.0,0,-0.11660387367010117,-0.1642666133120656,0.0,0,-0.11481152474880219,-0.16161783784627914,0.0,0,0.0034368890337646008,-0.09592419542605057,1.0,0,0.008605828508734703,-0.03354116145055741,3.0,0,0.08874448388814926,-0.013143920485163108,3.0,1,-0.08542953431606293,-0.10592973977327347,0.0,0,0.08805631846189499,0.04001570970285684,8.0,0,0.02997491881251335,-0.018262558849528432,3.0,0,-0.003117914777249098,-0.03422925964696333,0.0,26,55,81
+HRM,372,420979,transient_or_regressed,1,2,23436,18000,,,False,0.7037037014961243,0.12062828242778778,0.019741204701858805,-0.01543542486615479,3.0,0.21994487568736076,0,-0.12061764299869537,-0.13218918070197105,0.0,0,-0.16802042722702026,-0.20403001457452774,0.0,0,-0.14594893157482147,-0.15644176490604877,0.0,0,-0.05159192904829979,-0.09534264774993062,0.0,0,0.022889606654644012,-0.059963579988107085,2.0,0,0.05235612019896507,-0.012924164191645104,3.0,0,0.09100314974784851,0.005988695600535721,4.0,0,0.024416696280241013,-0.02465373386075953,2.0,1,-0.12317831814289093,-0.2393339965492487,0.0,0,0.12062828242778778,0.019741204701858805,3.0,26,55,81
+HRM,373,394495,transient_or_regressed,1,2,23436,18000,,,False,0.5061728358268738,-0.046890661120414734,-0.06446083448827267,-0.07261553034186363,0.0,0.0,0,-0.09326489269733429,-0.11187572684139013,0.0,0,-0.17498263716697693,-0.19028173200786114,0.0,0,-0.1018797904253006,-0.1719906609505415,0.0,0,-0.0774896889925003,-0.13534764386713505,0.0,0,-0.08195547759532928,-0.10876276809722185,0.0,0,-0.023445388302206993,-0.08391246269457042,0.0,0,-0.020214512944221497,-0.048711242619901896,0.0,0,-0.056842222809791565,-0.09961424395442009,0.0,1,-0.15166525542736053,-0.20419257134199142,0.0,0,-0.046890661120414734,-0.06446083448827267,0.0,24,57,81
+HRM,374,247658,flaky_then_stable,5,3,13020,10000,23436,18000,True,1.0,-0.14863580465316772,-0.23021895065903664,-0.2582625225186348,0.0,0.0,0,-0.07076717168092728,-0.11305323708802462,0.0,0,-0.13689327239990234,-0.1621426735073328,0.0,0,-0.12945935130119324,-0.15167497098445892,0.0,0,-0.06950942426919937,-0.10832428000867367,0.0,1,-0.18318168818950653,-0.20028476603329182,0.0,1,-0.2477329671382904,-0.27519072592258453,0.0,1,-0.09484496712684631,-0.17302775010466576,0.0,0,-0.011965066194534302,-0.06978638749569654,0.0,1,-0.12733444571495056,-0.17843466438353062,0.0,1,-0.14863580465316772,-0.23021895065903664,0.0,25,56,81
+HRM,375,304714,transient_or_regressed,6,2,10416,8000,,,False,0.6913580298423767,0.04328858479857445,-0.021945481770671904,-0.04327596817165613,1.0,0.04328858479857445,0,-0.06548050045967102,-0.10622013732790947,0.0,0,-0.08378284424543381,-0.1471362691372633,0.0,0,0.002342200605198741,-0.09613756570615806,1.0,1,-0.1256817728281021,-0.21713553555309772,0.0,1,-0.19374918937683105,-0.21441261284053326,0.0,1,-0.17968815565109253,-0.2329577375203371,0.0,1,-0.05975010618567467,-0.09651705855503678,0.0,1,-0.05844835191965103,-0.09673785418272018,0.0,1,-0.12400989979505539,-0.21204508002847433,0.0,0,0.04328858479857445,-0.021945481770671904,1.0,23,58,81
+HRM,376,391114,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.14496786892414093,-0.17474482581019402,-0.19044207409024239,0.0,0.0,0,-0.08840671181678772,-0.10252165701240301,0.0,0,-0.1427438110113144,-0.18510222993791103,0.0,0,-0.10215985029935837,-0.15893940906971693,0.0,0,-0.011171977035701275,-0.059038374456577,0.0,0,0.03876052796840668,-0.009036419913172722,4.0,0,0.18302473425865173,0.009458569897105917,4.0,1,-0.05614931136369705,-0.18910603132098913,0.0,1,-0.09012800455093384,-0.13255998492240906,0.0,1,-0.18826472759246826,-0.21493091993033886,0.0,1,-0.14496786892414093,-0.17474482581019402,0.0,28,53,81
+HRM,377,115729,transient_or_regressed,1,2,18228,14000,,,False,0.6913580298423767,0.005529975518584251,-0.022697937907651067,-0.036960989236831665,1.0,0.005529975518584251,0,-0.10407738387584686,-0.11752323433756828,0.0,0,-0.1353195160627365,-0.1638388503342867,0.0,0,-0.09227785468101501,-0.15781897958368063,0.0,0,-0.08542338013648987,-0.13628459628671408,0.0,0,0.031209809705615044,-0.019188114907592535,3.0,0,0.05411342903971672,0.003069355560000986,3.0,1,-0.05308299511671066,-0.10267433151602745,0.0,0,0.06050906702876091,-0.04461952578276396,1.0,0,-0.019272800534963608,-0.03784603672102094,0.0,0,0.005529975518584251,-0.022697937907651067,1.0,26,55,81
+HRM,378,268614,flaky_then_stable,5,3,13020,10000,18228,14000,True,1.0,-0.16916625201702118,-0.21249456889927387,-0.2321738861501217,0.0,0.0,0,-0.09441053122282028,-0.1181355994194746,0.0,0,-0.14654476940631866,-0.19643100164830685,0.0,0,-0.10039900243282318,-0.12768675200641155,0.0,0,-0.04462435096502304,-0.10132661834359169,0.0,1,-0.0017773056169971824,-0.033363439186359756,0.0,0,0.023244213312864304,-0.0033231148845516145,3.0,1,-0.12386937439441681,-0.15347955748438835,0.0,1,-0.06346672028303146,-0.09891705960035324,0.0,1,-0.22443924844264984,-0.2639387510716915,0.0,1,-0.16916625201702118,-0.21249456889927387,0.0,26,55,81
+HRM,379,216643,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.15270741283893585,-0.19515208341181278,-0.21241170912981033,0.0,0.0,0,-0.08997619152069092,-0.11427733302116394,0.0,0,-0.11390921473503113,-0.16570864990353584,0.0,0,-0.0553329661488533,-0.13759399112313986,0.0,0,-0.0509209930896759,-0.09640135057270527,0.0,0,-0.007557518780231476,-0.059835346415638924,0.0,0,-0.017278339713811874,-0.033352412981912494,0.0,1,-0.01822424679994583,-0.10707037313841283,0.0,1,-0.18105606734752655,-0.1972534842789173,0.0,1,-0.08178360015153885,-0.18950282502919436,0.0,1,-0.15270741283893585,-0.19515208341181278,0.0,27,54,81
+HRM,380,97682,flaky_then_stable,8,3,5208,4000,23436,18000,True,1.0,-0.10910111665725708,-0.1656905822455883,-0.1888892650604248,0.0,0.0,0,-0.0999847799539566,-0.11541778966784477,0.0,1,-0.17876404523849487,-0.2001467291265726,0.0,1,-0.24907079339027405,-0.2888238839805126,0.0,1,-0.1173337921500206,-0.17979102674871683,0.0,1,-0.15918323397636414,-0.18784174509346485,0.0,1,-0.15162044763565063,-0.24799784272909164,0.0,1,-0.04800928756594658,-0.15178163489326835,0.0,0,0.057581719011068344,-0.020497386634815484,1.0,1,-0.13821566104888916,-0.19393138587474823,0.0,1,-0.10910111665725708,-0.1656905822455883,0.0,23,58,81
+HRM,381,37099,flaky_then_stable,6,3,10416,8000,15624,12000,True,1.0,-0.1355104148387909,-0.16885553114116192,-0.1934223249554634,0.0,0.0,0,-0.10086050629615784,-0.13018856197595596,0.0,0,-0.09846564382314682,-0.13554780092090368,0.0,0,-0.09828504174947739,-0.17001343239098787,0.0,1,-0.16356681287288666,-0.22337941266596317,0.0,0,0.08152545243501663,-0.023657389552681707,2.0,1,-0.2309756875038147,-0.2684537135064602,0.0,1,-0.04798024520277977,-0.13300763396546245,0.0,1,-0.0445672832429409,-0.12578756036236882,0.0,1,-0.19394123554229736,-0.22528240829706192,0.0,1,-0.1355104148387909,-0.16885553114116192,0.0,24,57,81
+HRM,382,292375,never,0,0,,,,,False,0.5925925970077515,0.07987572252750397,-0.049299874692223966,-0.08536835573613644,1.0,0.07987572252750397,0,-0.08970604836940765,-0.10969392396509647,0.0,0,-0.12876154482364655,-0.1862070094794035,0.0,0,-0.027696888893842697,-0.13089439226314425,0.0,0,-0.004752699285745621,-0.06374225299805403,0.0,0,0.012030050158500671,-0.01550931365636643,2.0,0,0.09468948096036911,0.056830975227057934,8.0,0,0.01725776121020317,-0.004250595447956584,4.0,0,0.09205611795186996,0.047595877200365067,8.0,0,0.0515395924448967,-0.004751376574859023,3.0,0,0.07987572252750397,-0.049299874692223966,1.0,26,55,81
+HRM,383,274711,transient_or_regressed,2,4,18228,14000,,,False,0.8518518805503845,0.06590614467859268,0.02959361841203645,0.014722048887051642,8.0,0.2367489472962916,0,-0.08548086881637573,-0.11852424312382936,0.0,0,-0.12368114292621613,-0.17012177221477032,0.0,0,-0.05737074837088585,-0.11600615130737424,0.0,0,-0.0001653609797358513,-0.09523723216261715,0.0,0,-0.01704595610499382,-0.07677377061918378,0.0,0,0.03841274976730347,-0.02950523281469941,2.0,1,-0.059732772409915924,-0.15276734065264463,0.0,0,0.00480201281607151,-0.03609298635274172,1.0,1,-0.17351847887039185,-0.20172196254134178,0.0,0,0.06590614467859268,0.02959361841203645,8.0,24,57,81
+HRM,384,278703,flaky_then_stable,6,3,7812,6000,15624,12000,True,1.0,-0.15790681540966034,-0.22977151162922382,-0.2515872046351433,0.0,0.0,0,-0.09025779366493225,-0.099193193949759,0.0,0,-0.10391831398010254,-0.1502984557300806,0.0,1,-0.18971765041351318,-0.24459644593298435,0.0,0,0.023635730147361755,-0.06020798557437956,2.0,0,0.03796916455030441,-0.02015260956250131,2.0,1,-0.2578667402267456,-0.2859068475663662,0.0,1,-0.15884816646575928,-0.20946337282657623,0.0,1,-0.06808197498321533,-0.12907910346984863,0.0,1,-0.12459876388311386,-0.17623193468898535,0.0,1,-0.15790681540966034,-0.22977151162922382,0.0,26,55,81
+HRM,385,221234,never,0,0,,,,,False,0.5555555820465088,-0.03895607590675354,-0.08598070964217186,-0.097750224173069,0.0,0.0,0,-0.0978023037314415,-0.11086442414671183,0.0,0,-0.12013638019561768,-0.15645506232976913,0.0,0,-0.057368066161870956,-0.1462964708916843,0.0,0,-0.02092500403523445,-0.12501567648723722,0.0,0,-0.019443778321146965,-0.08322261436842382,0.0,0,-0.009514197707176208,-0.07594075938686728,0.0,0,-0.05448644980788231,-0.07693071058019996,0.0,0,-0.033451326191425323,-0.06800515437498689,0.0,0,-0.04882323369383812,-0.09853816824033856,0.0,0,-0.03895607590675354,-0.08598070964217186,0.0,26,55,81
+HRM,386,205191,stable_learned,1,1,26040,20000,26040,20000,True,1.0,-0.15123818814754486,-0.1959385722875595,-0.2193547897040844,0.0,0.0,0,-0.07895052433013916,-0.0993748027831316,0.0,0,-0.12955237925052643,-0.15817666985094547,0.0,0,-0.0996527299284935,-0.14691224321722984,0.0,0,-0.11169617623090744,-0.15115851163864136,0.0,0,-0.05522863194346428,-0.09969849837943912,0.0,0,-0.0150518249720335,-0.05604379600845277,0.0,0,-0.02344820648431778,-0.05566814495250583,0.0,0,0.037438225001096725,-0.0616078854072839,1.0,0,0.02070191502571106,-0.032527318195207044,2.0,1,-0.15123818814754486,-0.1959385722875595,0.0,24,57,81
+HRM,387,287482,never,0,0,,,,,False,0.4444444477558136,0.12683016061782837,0.047988632519263774,0.0009715646738186479,7.0,0.3961522704921663,0,-0.09348442405462265,-0.11188058741390705,0.0,0,-0.11467021703720093,-0.1495907697826624,0.0,0,-0.1261364221572876,-0.17376290075480938,0.0,0,-0.11840619146823883,-0.152982072904706,0.0,0,-0.034810397773981094,-0.07515424117445946,0.0,0,-0.03296433389186859,-0.0707625662907958,0.0,0,0.12006144970655441,0.01803326705703512,3.0,0,-0.04200948774814606,-0.06979944836348295,0.0,0,0.0008018052903935313,-0.050844413475715555,1.0,0,0.12683016061782837,0.047988632519263774,7.0,24,57,81
+HRM,388,283530,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.1680375635623932,-0.20145517587661743,-0.21830536052584648,0.0,0.0,0,-0.10943824052810669,-0.1282729236409068,0.0,0,-0.1650780737400055,-0.20900760404765606,0.0,0,-0.13498084247112274,-0.14793927408754826,0.0,0,0.03219807893037796,-0.04173184555838816,2.0,1,-0.1824406236410141,-0.20979839749634266,0.0,1,-0.10720005631446838,-0.18389282189309597,0.0,1,0.07079249620437622,-0.06845792953390628,2.0,1,-0.1080622598528862,-0.13631952833384275,0.0,1,-0.008892337791621685,-0.11831356061156839,0.0,1,-0.1680375635623932,-0.20145517587661743,0.0,26,55,81
+HRM,389,224414,flaky_then_stable,5,5,7812,6000,20832,16000,True,1.0,-0.17693230509757996,-0.2027637604624033,-0.21610531583428383,0.0,0.0,0,-0.03079763613641262,-0.08585311169736087,0.0,0,-0.10752289742231369,-0.1339777074754238,0.0,1,-0.168776735663414,-0.2569559719413519,0.0,0,-0.09440439939498901,-0.11160691175609827,0.0,1,-0.14784151315689087,-0.18178064189851284,0.0,0,0.049718596041202545,0.006566992873558775,5.0,0,0.1606879085302353,0.02361186221241951,3.0,1,-0.12319132685661316,-0.16162469051778316,0.0,1,-0.08992120623588562,-0.17932629399001598,0.0,1,-0.17693230509757996,-0.2027637604624033,0.0,25,56,81
+HRM,390,343932,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.12158158421516418,-0.18029064685106277,-0.21580560505390167,0.0,0.0,0,-0.1118820533156395,-0.12195133138448,0.0,0,-0.17409223318099976,-0.2051201481372118,0.0,0,-0.10361398756504059,-0.12370016239583492,0.0,0,-0.015568872913718224,-0.05534862191416323,0.0,0,0.11777546256780624,0.013578845537267625,5.0,1,-0.06944922357797623,-0.16829740535467863,0.0,1,0.030832137912511826,-0.06856307201087475,1.0,1,-0.12211649864912033,-0.17698181327432394,0.0,1,-0.1611444056034088,-0.21069122850894928,0.0,1,-0.12158158421516418,-0.18029064685106277,0.0,25,56,81
+HRM,391,399811,always,10,0,2604,2000,2604,2000,True,1.0,-0.1318514347076416,-0.15694018080830574,-0.17331304401159286,0.0,0.0,1,-0.07428634911775589,-0.16918626800179482,0.0,1,-0.21311980485916138,-0.2449585199356079,0.0,1,-0.129270538687706,-0.25381410494446754,0.0,1,-0.2702552080154419,-0.30223462730646133,0.0,1,-0.1634787917137146,-0.21058614924550056,0.0,1,-0.30596017837524414,-0.3182668462395668,0.0,1,0.06060303747653961,-0.1611201437190175,1.0,1,0.017555158585309982,-0.050103276618756354,2.0,1,0.005012836307287216,-0.20433801459148526,1.0,1,-0.1318514347076416,-0.15694018080830574,0.0,34,47,81
+HRM,392,211334,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.13620460033416748,-0.17993752844631672,-0.20047005638480186,0.0,0.0,0,-0.06540165841579437,-0.11830820515751839,0.0,0,-0.12254836410284042,-0.18072752375155687,0.0,0,-0.046450354158878326,-0.12084212992340326,0.0,0,-0.07629571110010147,-0.1300967838615179,0.0,0,0.011474557220935822,-0.04160685068927705,1.0,0,0.12069976329803467,0.0035661842266563326,2.0,1,-0.09344439953565598,-0.13211937807500362,0.0,1,-0.1053917333483696,-0.14199547097086906,0.0,1,-0.17630557715892792,-0.21120324172079563,0.0,1,-0.13620460033416748,-0.17993752844631672,0.0,27,54,81
+HRM,393,46351,transient_or_regressed,1,2,23436,18000,,,False,0.6296296119689941,0.1535540223121643,0.005936720495810732,-0.025808371137827635,2.0,0.15825376193970442,0,-0.11444565653800964,-0.12206534575670958,0.0,0,-0.1118188351392746,-0.17988629825413227,0.0,0,-0.12289377301931381,-0.1555198086425662,0.0,0,0.017724743112921715,-0.07368759531527758,1.0,0,-0.04509437829256058,-0.08063628990203142,0.0,0,-0.016288254410028458,-0.05398821528069675,0.0,0,0.018967900425195694,-0.036942093865945935,2.0,0,0.029310738667845726,-0.04171642542860354,1.0,1,-0.11659331619739532,-0.18191090784966946,0.0,0,0.1535540223121643,0.005936720495810732,2.0,26,55,81
+HRM,394,300514,never,0,0,,,,,False,0.7160493731498718,0.03255656361579895,-0.010620403685607016,-0.027427980676293373,2.0,0.03740041609853506,0,-0.10453853011131287,-0.11513512581586838,0.0,0,-0.11895262449979782,-0.152322961948812,0.0,0,-0.09172816574573517,-0.1682016309350729,0.0,0,-0.04996999353170395,-0.10545758623629808,0.0,0,0.05759765952825546,-0.0455174365779385,1.0,0,0.015597380697727203,-0.028017761884257197,1.0,0,0.12523645162582397,0.01602031197398901,5.0,0,0.043595727533102036,-0.023760281852446496,1.0,0,0.023954791948199272,-0.026456202380359173,1.0,0,0.03255656361579895,-0.010620403685607016,2.0,24,57,81
+HRM,395,181808,flaky_then_stable,6,3,7812,6000,20832,16000,True,1.0,-0.15203645825386047,-0.17527037113904953,-0.18864458799362183,0.0,0.0,0,-0.1337461769580841,-0.14172889851033688,0.0,0,-0.14946922659873962,-0.17590525560081005,0.0,1,-0.1419259011745453,-0.22580545395612717,0.0,1,-0.1573001742362976,-0.21417663991451263,0.0,1,-0.17204180359840393,-0.20995678193867207,0.0,0,-0.13593846559524536,-0.17749685049057007,0.0,0,0.17646372318267822,0.10014295112341642,8.0,1,-0.11002372205257416,-0.14077665098011494,0.0,1,-0.11956679821014404,-0.16511645540595055,0.0,1,-0.15203645825386047,-0.17527037113904953,0.0,17,64,81
+HRM,396,390540,transient_or_regressed,1,2,10416,8000,,,False,0.6419752836227417,0.02272019535303116,-0.016844416910316795,-0.036004170309752226,2.0,0.0401422455906868,0,-0.07949496060609818,-0.11710360832512379,0.0,0,-0.14807994663715363,-0.17957809939980507,0.0,0,-0.13138321042060852,-0.17313816398382187,0.0,1,-0.1575542539358139,-0.18294212967157364,0.0,0,-0.0041991290636360645,-0.06082028109813109,0.0,0,-0.04195808619260788,-0.06300471723079681,0.0,0,0.035062871873378754,-0.036630648450227454,2.0,0,0.12224382162094116,-0.031915494706481695,1.0,0,-0.01221004780381918,-0.06603767431806773,0.0,0,0.02272019535303116,-0.016844416910316795,2.0,24,57,81
+HRM,397,398839,transient_or_regressed,1,2,20832,16000,,,False,0.6419752836227417,0.06462421268224716,-0.013818231644108891,-0.06298978254199028,3.0,0.16158244013786316,0,-0.0931503102183342,-0.11405617836862803,0.0,0,-0.15023788809776306,-0.17274588532745838,0.0,0,-0.11252480745315552,-0.16288181394338608,0.0,0,-0.08973927795886993,-0.1672074981033802,0.0,0,-0.0022791679948568344,-0.09954469534568489,0.0,0,0.0202653706073761,-0.05853243079036474,1.0,0,-0.010243847966194153,-0.05049872910603881,0.0,1,-0.10670047253370285,-0.15704359859228134,0.0,0,-0.03768445551395416,-0.0666585243307054,0.0,0,0.06462421268224716,-0.013818231644108891,3.0,26,55,81
+HRM,398,47904,transient_or_regressed,2,4,13020,10000,,,False,0.5802469253540039,0.12572290003299713,-0.018434974714182317,-0.05495555931702256,1.0,0.12572290003299713,0,-0.09154841303825378,-0.11192364059388638,0.0,0,-0.14381441473960876,-0.16715765185654163,0.0,0,-0.11642777919769287,-0.1594451628625393,0.0,0,-0.08564936369657516,-0.12353658583015203,0.0,1,-0.19034729897975922,-0.20973264425992966,0.0,0,0.014563430100679398,-0.06848951848223805,1.0,0,-0.04856356605887413,-0.07597866095602512,0.0,0,0.0407775416970253,-0.03859109644690761,2.0,1,-0.12225332856178284,-0.16958810202777386,0.0,0,0.12572290003299713,-0.018434974714182317,1.0,22,59,81
+HRM,399,17613,never,0,0,,,,,False,0.6172839403152466,-0.03157331049442291,-0.04734554700553417,-0.05893746390938759,0.0,0.0,0,-0.1030028685927391,-0.1188607094809413,0.0,0,-0.16875678300857544,-0.19584945961833,0.0,0,-0.1428082138299942,-0.18154113553464413,0.0,0,-0.17319419980049133,-0.18610241077840328,0.0,0,-0.08924507349729538,-0.12553690653294325,0.0,0,-0.03130171075463295,-0.09138968354091048,0.0,0,0.0100284144282341,-0.03947301767766476,1.0,0,-0.009828481823205948,-0.07186910975724459,0.0,0,0.012078090570867062,-0.08763877593446523,1.0,0,-0.03157331049442291,-0.04734554700553417,0.0,25,56,81
+HRM,400,400864,never,0,0,,,,,False,0.654321014881134,0.00860588625073433,-0.03661047061905265,-0.053534689359366894,1.0,0.00860588625073433,0,-0.08920137584209442,-0.10789359267801046,0.0,0,-0.10254354774951935,-0.1349825533106923,0.0,0,-0.08100979030132294,-0.15138966403901577,0.0,0,-0.13442184031009674,-0.1502330917865038,0.0,0,-0.01727975159883499,-0.0665606688708067,0.0,0,0.03711897134780884,-0.02642703684978187,2.0,0,0.028172533959150314,-0.02602164668496698,3.0,0,0.040419407188892365,-0.06400118162855506,1.0,0,0.1123448833823204,-0.0047544611152261496,3.0,0,0.00860588625073433,-0.03661047061905265,1.0,26,55,81
+HRM,401,126170,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.2044917643070221,-0.23753300495445728,-0.2557198256254196,0.0,0.0,0,-0.08185022324323654,-0.11332026682794094,0.0,0,-0.15919329226016998,-0.19057826697826385,0.0,0,0.009093395434319973,-0.15537746727932245,1.0,0,-0.06470809876918793,-0.10950881894677877,0.0,1,-0.1589917689561844,-0.1894843503832817,0.0,1,-0.26441168785095215,-0.2820378690958023,0.0,1,-0.08687485754489899,-0.1860483642667532,0.0,1,-0.072946697473526,-0.13794361613690853,0.0,1,-0.10666708648204803,-0.21283475868403912,0.0,1,-0.2044917643070221,-0.23753300495445728,0.0,27,54,81
+HRM,402,357082,never,0,0,,,,,False,0.5432098507881165,-0.01406888198107481,-0.05603055178653449,-0.06565320398658514,0.0,0.0,0,-0.09947415441274643,-0.11239764280617237,0.0,0,-0.1158081591129303,-0.16176424734294415,0.0,0,-0.11842774599790573,-0.1636614641174674,0.0,0,0.027406077831983566,-0.09856382058933377,1.0,0,-0.044155389070510864,-0.0938781350851059,0.0,0,-0.004391882102936506,-0.05519020609790459,0.0,0,0.010148881003260612,-0.03874786850064993,1.0,0,0.05504216253757477,-0.03200619947165251,2.0,0,-0.06571318954229355,-0.09222626034170389,0.0,0,-0.01406888198107481,-0.05603055178653449,0.0,22,59,81
+HRM,403,132759,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.12986087799072266,-0.171750508248806,-0.19503666833043098,0.0,0.0,0,-0.0802205428481102,-0.10317427478730679,0.0,0,-0.09287227690219879,-0.16678701899945736,0.0,0,-0.07649659365415573,-0.12519904226064682,0.0,0,-0.06499959528446198,-0.11598832719027996,0.0,0,-0.008293552324175835,-0.04845313774421811,0.0,1,-0.19208240509033203,-0.2124851755797863,0.0,1,-0.14335665106773376,-0.18327107280492783,0.0,1,-0.14593832194805145,-0.17032517865300179,0.0,1,-0.1885952651500702,-0.21846203692257404,0.0,1,-0.12986087799072266,-0.171750508248806,0.0,27,54,81
+HRM,404,265745,flaky_then_stable,5,5,7812,6000,23436,18000,True,1.0,-0.16482169926166534,-0.2183353416621685,-0.24215612560510635,0.0,0.0,0,-0.08039821684360504,-0.10555974394083023,0.0,0,-0.10337358713150024,-0.15970090590417385,0.0,1,-0.2375560998916626,-0.2650385033339262,0.0,0,-0.06131231412291527,-0.12131234863772988,0.0,1,-0.1298193335533142,-0.20843337289988995,0.0,1,-0.24211005866527557,-0.2658024840056896,0.0,0,0.03353075683116913,-0.0028184149996377528,2.0,0,0.004468944389373064,-0.029617667620186694,2.0,1,-0.21956925094127655,-0.23845125176012516,0.0,1,-0.16482169926166534,-0.2183353416621685,0.0,26,55,81
+HRM,405,190217,flaky_then_stable,5,3,10416,8000,20832,16000,True,1.0,-0.1111285611987114,-0.1725108316168189,-0.2033100612461567,0.0,0.0,0,-0.09173550456762314,-0.11514661833643913,0.0,0,-0.10967305302619934,-0.14671464916318655,0.0,0,-0.07050955295562744,-0.1725226379930973,0.0,1,-0.08428233116865158,-0.19037790689617395,0.0,1,-0.20610716938972473,-0.22664914280176163,0.0,0,0.03621342033147812,-0.004734677349915728,4.0,0,-0.0018803456332534552,-0.020923473726725206,0.0,1,-0.14361247420310974,-0.16607792861759663,0.0,1,-0.12033498287200928,-0.20669237710535526,0.0,1,-0.1111285611987114,-0.1725108316168189,0.0,24,57,81
+HRM,406,386922,flaky_then_stable,6,5,7812,6000,18228,14000,True,1.0,-0.10621023178100586,-0.1752220131456852,-0.19766614958643913,0.0,0.0,0,-0.07045474648475647,-0.10417008399963379,0.0,0,-0.13382381200790405,-0.14978460781276226,0.0,1,-0.23407600820064545,-0.2501941341906786,0.0,0,-0.05228462070226669,-0.09176628943532705,0.0,1,-0.19475552439689636,-0.22036872804164886,0.0,0,0.0025616134516894817,-0.03785236761905253,1.0,1,-0.004339936655014753,-0.056779663485940546,0.0,1,-0.12020017951726913,-0.15870218817144632,0.0,1,-0.08219893276691437,-0.12535526044666767,0.0,1,-0.10621023178100586,-0.1752220131456852,0.0,25,56,81
+HRM,407,277613,transient_or_regressed,4,4,10416,8000,,,False,0.7283950448036194,0.1645480990409851,0.052619826441514306,-0.004644127475330606,5.0,0.43993228452745825,0,-0.10327580571174622,-0.11164536327123642,0.0,0,-0.1345408856868744,-0.18694400414824486,0.0,0,-0.022228654474020004,-0.12938263593241572,0.0,1,-0.150808185338974,-0.20564118772745132,0.0,1,-0.10579369962215424,-0.16653019934892654,0.0,1,-0.07744150608778,-0.11750833783298731,0.0,0,0.019198164343833923,-1.765233446349157e-05,4.0,0,-0.07632289826869965,-0.13661333825439215,0.0,1,-0.13073164224624634,-0.19043251685798168,0.0,0,0.1645480990409851,0.052619826441514306,5.0,26,55,81
+HRM,408,415720,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.15727491676807404,-0.2209740076214075,-0.24328049272298813,0.0,0.0,0,-0.1066618412733078,-0.12058562226593494,0.0,0,-0.07780023664236069,-0.14058003574609756,0.0,0,0.017931625247001648,-0.13577806763350964,1.0,0,0.05595875531435013,-0.04193279007449746,2.0,0,0.028235455974936485,-0.06908709392882884,1.0,1,-0.10012713074684143,-0.20713252574205399,0.0,1,-0.12004929035902023,-0.15423554088920355,0.0,1,-0.0961848646402359,-0.13962081540375948,0.0,1,-0.1741901934146881,-0.24278109148144722,0.0,1,-0.15727491676807404,-0.2209740076214075,0.0,22,59,81
+HRM,409,334392,stable_learned,1,1,26040,20000,26040,20000,True,1.0,-0.06431989371776581,-0.1292191483080387,-0.14831678569316864,0.0,0.0,0,-0.09247429668903351,-0.11828925460577011,0.0,0,-0.1514844298362732,-0.1651869472116232,0.0,0,-0.11101354658603668,-0.1289243856444955,0.0,0,-0.0009520892053842545,-0.12273581163026392,0.0,0,0.02675778791308403,-0.06977907801046968,1.0,0,0.05529334768652916,0.0022981385700404644,4.0,0,0.02902194857597351,-0.02499810140579939,2.0,0,0.0755547434091568,-0.01690834912005812,2.0,0,0.02117840200662613,-0.006828608573414385,4.0,1,-0.06431989371776581,-0.1292191483080387,0.0,25,56,81
+HRM,410,344038,never,0,0,,,,,False,0.604938268661499,0.05657897889614105,-0.01009375078137964,-0.03381209075450897,3.0,0.06430066004395485,0,-0.10718530416488647,-0.1176418662071228,0.0,0,-0.07846581935882568,-0.1617109701037407,0.0,0,-0.0657486617565155,-0.13147437386214733,0.0,0,-0.11307155340909958,-0.1386381732299924,0.0,0,-0.08184490352869034,-0.11248490680009127,0.0,0,0.02467264235019684,-0.04055281705223024,1.0,0,0.05785227566957474,-0.007334168069064617,4.0,0,0.21946606040000916,0.003639346919953823,2.0,0,0.0014788595726713538,-0.03720285436429549,1.0,0,0.05657897889614105,-0.01009375078137964,3.0,24,57,81
+HRM,411,292752,never,0,0,,,,,False,0.6790123581886292,0.08816877007484436,-0.008201815537177026,-0.04003847669810057,2.0,0.12130483612418175,0,-0.07731189578771591,-0.10747639741748571,0.0,0,-0.0673067718744278,-0.14082816615700722,0.0,0,-0.09042732417583466,-0.13843317329883575,0.0,0,-0.10973259061574936,-0.15986242797225714,0.0,0,0.03141888603568077,-0.049986131489276886,1.0,0,0.11665614694356918,-0.008717040065675974,2.0,0,0.03502675145864487,0.018341935880016536,7.0,0,0.061097122728824615,-0.01272511261049658,3.0,0,-0.008150143548846245,-0.056898694252595305,0.0,0,0.08816877007484436,-0.008201815537177026,2.0,26,55,81
+HRM,412,145504,never,0,0,,,,,False,0.5679012537002563,0.058146312832832336,0.003814102499745786,-0.010755898838397115,3.0,0.07531773392111063,0,-0.10295789688825607,-0.11889857240021229,0.0,0,-0.11838546395301819,-0.15258690901100636,0.0,0,-0.12312491983175278,-0.14763109292834997,0.0,0,-0.05882060527801514,-0.16213699989020824,0.0,0,-0.027393890544772148,-0.09936128905974329,0.0,0,-0.0400548130273819,-0.09043694101274014,0.0,0,-0.047978129237890244,-0.07023179298266768,0.0,0,-0.05821498855948448,-0.09304196899756789,0.0,0,0.013982567004859447,-0.05523758812341839,1.0,0,0.058146312832832336,0.003814102499745786,3.0,24,57,81
+HRM,413,108663,never,0,0,,,,,False,0.6913580298423767,0.09884848445653915,-0.006127875996753573,-0.048420706763863564,3.0,0.16190171986818314,0,-0.10939803719520569,-0.12170981895178556,0.0,0,-0.17444777488708496,-0.21069361083209515,0.0,0,0.01414414495229721,-0.12451362330466509,1.0,0,-0.025564108043909073,-0.05927033070474863,0.0,0,0.016513880342245102,-0.03028642083518207,2.0,0,0.06325822323560715,0.020481650943111163,6.0,0,0.03845322132110596,-0.0013415529683697969,3.0,0,-0.0720718502998352,-0.08053120225667953,0.0,0,-0.0015292956959456205,-0.05633627748466097,0.0,0,0.09884848445653915,-0.006127875996753573,3.0,25,56,81
+HRM,414,209089,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.1973147839307785,-0.21570210345089436,-0.22405202686786652,0.0,0.0,0,-0.10085656493902206,-0.11451325099915266,0.0,0,-0.07344099879264832,-0.16185553465038538,0.0,0,-0.0019411134999245405,-0.10157174229971133,0.0,0,-0.0033221449702978134,-0.05876437353435904,0.0,0,0.016923561692237854,-0.020964290044503286,3.0,1,-0.24355262517929077,-0.25879195891320705,0.0,1,-0.08415612578392029,-0.155741348862648,0.0,1,-0.0382826030254364,-0.11336873658001423,0.0,1,-0.13446025550365448,-0.21055172383785248,0.0,1,-0.1973147839307785,-0.21570210345089436,0.0,26,55,81
+HRM,415,25789,flaky_then_stable,4,3,15624,12000,26040,20000,True,1.0,-0.1474311500787735,-0.16864372044801712,-0.18390515819191933,0.0,0.0,0,-0.10552486777305603,-0.12193734850734472,0.0,0,-0.12508413195610046,-0.1571802869439125,0.0,0,-0.07515189051628113,-0.1427327636629343,0.0,0,-0.037822313606739044,-0.10182211175560951,0.0,0,0.048590850085020065,-0.024591611116193235,1.0,1,-0.07253780961036682,-0.1625760281458497,0.0,1,-0.030358649790287018,-0.09242758341133595,0.0,1,-0.0166697409003973,-0.07845817669294775,0.0,0,0.0650055930018425,-0.003266621097282041,3.0,1,-0.1474311500787735,-0.16864372044801712,0.0,17,64,81
+HRM,416,333768,never,0,0,,,,,False,0.5308641791343689,-0.026485612615942955,-0.0537659740075469,-0.07326725404709578,0.0,0.0,0,-0.09683986008167267,-0.1189177418127656,0.0,0,-0.08865711092948914,-0.15272292867302895,0.0,0,-0.15237832069396973,-0.18266975693404675,0.0,0,-0.13868585228919983,-0.15447021648287773,0.0,0,0.04980473220348358,-0.046792361594270915,1.0,0,-0.024400025606155396,-0.06517852563410997,0.0,0,-0.05715777724981308,-0.07470979169011116,0.0,0,-0.046415023505687714,-0.07606619084253907,0.0,0,-0.02097763679921627,-0.06396044162102044,0.0,0,-0.026485612615942955,-0.0537659740075469,0.0,23,58,81
+HRM,417,322679,flaky_then_stable,4,3,15624,12000,23436,18000,True,1.0,-0.14257657527923584,-0.20659055560827255,-0.223227821290493,0.0,0.0,0,-0.07651003450155258,-0.09217868745326996,0.0,0,-0.17156395316123962,-0.18959055840969086,0.0,0,-0.14555807411670685,-0.16628104448318481,0.0,0,-0.1022539734840393,-0.11698205210268497,0.0,0,-0.002886350266635418,-0.07436432724352926,0.0,1,-0.2319175899028778,-0.26372474804520607,0.0,1,-0.07242822647094727,-0.166950561106205,0.0,0,-0.015385748818516731,-0.05656963330693543,0.0,1,-0.20711278915405273,-0.24239851161837578,0.0,1,-0.14257657527923584,-0.20659055560827255,0.0,27,54,81
+HRM,418,267002,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.14335155487060547,-0.16736031137406826,-0.18419058993458748,0.0,0.0,0,-0.07529153674840927,-0.12156555149704218,0.0,0,-0.12628409266471863,-0.16156352683901787,0.0,0,-0.13042159378528595,-0.16640350222587585,0.0,0,-0.035365499556064606,-0.08072769129648805,0.0,0,0.034834884107112885,-0.003773662494495511,4.0,0,0.08982464671134949,0.03592183848377317,7.0,1,-0.18700391054153442,-0.19955904595553875,0.0,1,-0.16943520307540894,-0.1932381484657526,0.0,1,-0.1813865303993225,-0.21950732357800007,0.0,1,-0.14335155487060547,-0.16736031137406826,0.0,28,53,81
+HRM,419,322867,stable_learned,8,1,7812,6000,7812,6000,True,1.0,-0.1883360743522644,-0.20928213745355606,-0.22374069318175316,0.0,0.0,0,-0.06214449927210808,-0.09937799395993352,0.0,0,-0.1268336921930313,-0.1644986942410469,0.0,1,-0.1792360246181488,-0.2774624414741993,0.0,1,-0.2531540095806122,-0.2727324776351452,0.0,1,-0.179852694272995,-0.2110664416104555,0.0,1,-0.09719597548246384,-0.22650154773145914,0.0,1,0.0653456300497055,-0.12259311694651842,1.0,1,-0.1001172885298729,-0.17012471798807383,0.0,1,-0.16038820147514343,-0.23544049076735973,0.0,1,-0.1883360743522644,-0.20928213745355606,0.0,26,55,81
+HRM,420,408180,stable_learned,8,1,7812,6000,7812,6000,True,1.0,-0.0699964314699173,-0.16530825104564428,-0.20746106281876564,0.0,0.0,0,-0.07248856872320175,-0.09658881463110447,0.0,0,-0.1154017299413681,-0.1910073645412922,0.0,1,-0.18117380142211914,-0.2509420271962881,0.0,1,-0.15885108709335327,-0.22302442975342274,0.0,1,-0.12090589851140976,-0.1798314517363906,0.0,1,-0.12125362455844879,-0.21916868351399899,0.0,1,-0.10654481500387192,-0.18990921135991812,0.0,1,-0.16050739586353302,-0.176343010738492,0.0,1,-0.07906777411699295,-0.19638859760016203,0.0,1,-0.0699964314699173,-0.16530825104564428,0.0,27,54,81
+HRM,421,121870,never,0,0,,,,,False,0.6419752836227417,-0.014938939362764359,-0.041192201897501945,-0.05676343012601137,0.0,0.0,0,-0.10969033092260361,-0.11940221674740314,0.0,0,-0.1181393563747406,-0.16779882833361626,0.0,0,-0.048302434384822845,-0.16228908766061068,0.0,0,0.02790091745555401,-0.12909540557302535,1.0,0,-0.022674478590488434,-0.10753254313021898,0.0,0,-0.06453077495098114,-0.08925217483192682,0.0,0,-0.03632686659693718,-0.07359373662620783,0.0,0,-0.016578253358602524,-0.07348434207960963,0.0,0,-0.04160819202661514,-0.07495161332190037,0.0,0,-0.014938939362764359,-0.041192201897501945,0.0,24,57,81
+HRM,422,244435,transient_or_regressed,1,2,20832,16000,,,False,0.6172839403152466,-0.022181451320648193,-0.05883970996364951,-0.07904395088553429,0.0,0.0,0,-0.11735491454601288,-0.12067853286862373,0.0,0,-0.08226579427719116,-0.1420938167721033,0.0,0,-0.11085691303014755,-0.18637951742857695,0.0,0,-0.1153441071510315,-0.17229991406202316,0.0,0,-0.12086260318756104,-0.14651568047702312,0.0,0,-0.056897684931755066,-0.08546551037579775,0.0,0,-0.01852283626794815,-0.06521634384989738,0.0,1,-0.13546404242515564,-0.15369869023561478,0.0,0,0.018465936183929443,-0.052068420802243054,1.0,0,-0.022181451320648193,-0.05883970996364951,0.0,23,58,81
+HRM,423,321750,transient_or_regressed,1,2,15624,12000,,,False,0.5679012537002563,-0.0173735860735178,-0.057289464166387916,-0.07839283905923367,0.0,0.0,0,-0.10103558003902435,-0.10880614910274744,0.0,0,-0.12070332467556,-0.1423686444759369,0.0,0,-0.12237678468227386,-0.18176905252039433,0.0,0,-0.014962846413254738,-0.1150098831858486,0.0,0,-0.08835159242153168,-0.1201626667752862,0.0,1,-0.08738793432712555,-0.147896034643054,0.0,0,-0.027620933949947357,-0.04676491487771273,0.0,0,-0.005005762912333012,-0.06798375549260527,0.0,0,-0.017875241115689278,-0.06178992218337953,0.0,0,-0.0173735860735178,-0.057289464166387916,0.0,24,57,81
+HRM,424,114521,stable_learned,8,1,7812,6000,7812,6000,True,1.0,-0.07856400310993195,-0.1659859074279666,-0.21808180212974548,0.0,0.0,0,-0.0861053615808487,-0.09948641806840897,0.0,0,-0.09753494709730148,-0.15586285386234522,0.0,1,-0.1944189965724945,-0.266515601426363,0.0,1,-0.24664098024368286,-0.27309365570545197,0.0,1,-0.1845552772283554,-0.20865214616060257,0.0,1,-0.20417587459087372,-0.2270128894597292,0.0,1,-0.05381642282009125,-0.14549635909497738,0.0,1,-0.08358385413885117,-0.13028354942798615,0.0,1,-0.12497678399085999,-0.21126355603337288,0.0,1,-0.07856400310993195,-0.1659859074279666,0.0,23,58,81
+HRM,425,211167,never,0,0,,,,,False,0.5802469253540039,0.09887456893920898,-0.0006974267307668924,-0.025727159809321165,3.0,0.11657306924462318,0,-0.10631512850522995,-0.12504335306584835,0.0,0,-0.12151636928319931,-0.1636946266517043,0.0,0,-0.13012593984603882,-0.14324609749019146,0.0,0,0.04069766029715538,-0.060456333914771676,1.0,0,-0.06753125786781311,-0.08513048943132162,0.0,0,0.007833880372345448,-0.04584238410461694,1.0,0,-0.014806769788265228,-0.04890914307907224,0.0,0,-0.004665175452828407,-0.05255624884739518,0.0,0,0.021680360659956932,-0.02688267435223679,2.0,0,0.09887456893920898,-0.0006974267307668924,3.0,23,58,81
+HRM,426,397598,flaky_then_stable,3,3,18228,14000,23436,18000,True,1.0,-0.10035146772861481,-0.19155693612992764,-0.22424331307411194,0.0,0.0,0,-0.10671626776456833,-0.12053693644702435,0.0,0,-0.0772789716720581,-0.1684722937643528,0.0,0,-0.07495655864477158,-0.13895149435847998,0.0,0,-0.048914458602666855,-0.07694047316908836,0.0,0,0.0536918118596077,0.013138777925632894,5.0,0,0.058220233768224716,0.029983837041072547,7.0,1,-0.10679753869771957,-0.15347640495747328,0.0,0,0.13020312786102295,0.04043176626510103,4.0,1,-0.170219287276268,-0.18605910055339336,0.0,1,-0.10035146772861481,-0.19155693612992764,0.0,25,56,81
+HRM,427,263228,flaky_then_stable,5,3,13020,10000,18228,14000,True,1.0,-0.19128406047821045,-0.2187320739030838,-0.23261279240250587,0.0,0.0,0,-0.08068358898162842,-0.1038253465667367,0.0,0,-0.0975586399435997,-0.1587997553870082,0.0,0,-0.11904909461736679,-0.1389502054080367,0.0,0,0.049955256283283234,-0.023754660389386117,2.0,1,-0.15181103348731995,-0.16992049291729927,0.0,0,0.03982957452535629,-0.006319519481621683,2.0,1,-0.16589400172233582,-0.18685991689562798,0.0,1,-0.10445788502693176,-0.1480342112481594,0.0,1,-0.19685503840446472,-0.23306789249181747,0.0,1,-0.19128406047821045,-0.2187320739030838,0.0,26,55,81
+HRM,428,408225,flaky_then_stable,5,3,10416,8000,18228,14000,True,1.0,-0.1569431573152542,-0.2008580807596445,-0.21869618445634842,0.0,0.0,0,-0.09658782929182053,-0.1041166577488184,0.0,0,-0.05333133786916733,-0.14651607815176249,0.0,0,-0.09752916544675827,-0.12700978573411703,0.0,1,-0.15031777322292328,-0.21549112163484097,0.0,0,0.0028777241241186857,-0.026123106625163928,1.0,0,0.007154203485697508,-0.00449949452013243,2.0,1,-0.03822924196720123,-0.08879662118852139,0.0,1,-0.04472731798887253,-0.12785448599606752,0.0,1,-0.2073475420475006,-0.22515736520290375,0.0,1,-0.1569431573152542,-0.2008580807596445,0.0,26,55,81
+HRM,429,83851,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.18105679750442505,-0.2245146706700325,-0.2397538274526596,0.0,0.0,0,-0.12036123871803284,-0.1301093641668558,0.0,0,-0.1670093536376953,-0.18922831490635872,0.0,0,-0.05801840126514435,-0.1124919792637229,0.0,0,-0.014932624064385891,-0.06931042799260467,0.0,0,-0.00757803488522768,-0.023690902395173907,0.0,1,-0.11746596544981003,-0.13834015559405088,0.0,1,-0.07426463812589645,-0.13499930407851934,0.0,1,-0.0905415266752243,-0.14397629536688328,0.0,1,-0.13274633884429932,-0.181831369176507,0.0,1,-0.18105679750442505,-0.2245146706700325,0.0,25,56,81
+HRM,430,194659,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.1210545152425766,-0.19158299826085567,-0.21617960184812546,0.0,0.0,0,-0.1119145080447197,-0.11825613211840391,0.0,0,-0.13015276193618774,-0.1946481540799141,0.0,0,-0.08402960747480392,-0.1372205587103963,0.0,0,-0.07004494965076447,-0.09416528604924679,0.0,0,0.07385607063770294,-0.02093979762867093,2.0,0,0.03061886690557003,-0.0101226605111151,4.0,1,0.08357098698616028,-0.008837603032588959,3.0,1,-0.0665055513381958,-0.14263250771909952,0.0,1,-0.17478567361831665,-0.2141879666596651,0.0,1,-0.1210545152425766,-0.19158299826085567,0.0,27,54,81
+HRM,431,164035,stable_learned,1,1,26040,20000,26040,20000,True,1.0,-0.1704486459493637,-0.19557552598416805,-0.20989041030406952,0.0,0.0,0,-0.09092959016561508,-0.10130669362843037,0.0,0,-0.1255655288696289,-0.17101730778813362,0.0,0,-0.15394923090934753,-0.18430795706808567,0.0,0,-0.14086894690990448,-0.17322522401809692,0.0,0,-0.009215276688337326,-0.07617015717551112,0.0,0,-0.008828705176711082,-0.08779788785614073,0.0,0,-0.02979028970003128,-0.05387641116976738,0.0,0,-0.012477243319153786,-0.0661124624311924,0.0,0,-0.005152182653546333,-0.06539855361916125,0.0,1,-0.1704486459493637,-0.19557552598416805,0.0,24,57,81
+HRM,432,375254,never,0,0,,,,,False,0.6296296119689941,0.03145203739404678,-0.03710077953292057,-0.07268951460719109,2.0,0.04410659521818161,0,-0.09800729155540466,-0.10943535063415766,0.0,0,-0.12590456008911133,-0.17711882293224335,0.0,0,-0.11496075242757797,-0.18587390054017305,0.0,0,-0.1029333621263504,-0.15161123778671026,0.0,0,0.006349853239953518,-0.04920463764574379,1.0,0,-0.03194421902298927,-0.06070264056324959,0.0,0,-0.002646153327077627,-0.04804623470408842,0.0,0,-0.03114037774503231,-0.08278586599044502,0.0,0,-0.030298609286546707,-0.061670363415032625,0.0,0,0.03145203739404678,-0.03710077953292057,2.0,25,56,81
+HRM,433,284352,flaky_then_stable,4,3,13020,10000,23436,18000,True,1.0,-0.15879938006401062,-0.18190930597484112,-0.19029400125145912,0.0,0.0,0,-0.08935078233480453,-0.1122749662026763,0.0,0,-0.12061060965061188,-0.1625448688864708,0.0,0,-0.07206625491380692,-0.11369591299444437,0.0,0,-0.0367995947599411,-0.09648068342357874,0.0,1,-0.17109403014183044,-0.2039567492902279,0.0,1,-0.19171784818172455,-0.20958010107278824,0.0,0,-0.004122225567698479,-0.03457264369353652,0.0,0,0.01630566269159317,-0.03172011760761961,1.0,1,-0.16169671714305878,-0.21872309781610966,0.0,1,-0.15879938006401062,-0.18190930597484112,0.0,24,57,81
+HRM,434,368073,flaky_then_stable,7,3,7812,6000,13020,10000,True,1.0,-0.2052636593580246,-0.23631297796964645,-0.24991688877344131,0.0,0.0,0,-0.09869619458913803,-0.11929588485509157,0.0,0,-0.08722907304763794,-0.1370441084727645,0.0,1,-0.20650941133499146,-0.2391135934740305,0.0,0,-0.03553677350282669,-0.10088932048529387,0.0,1,-0.2017362117767334,-0.23487415350973606,0.0,1,-0.1961309164762497,-0.26058436185121536,0.0,1,-0.0706377699971199,-0.11963296215981245,0.0,1,-0.04640015587210655,-0.09854526119306684,0.0,1,-0.16271242499351501,-0.2213498018682003,0.0,1,-0.2052636593580246,-0.23631297796964645,0.0,24,57,81
+HRM,435,97808,flaky_then_stable,5,5,5208,4000,20832,16000,True,1.0,-0.18190324306488037,-0.20275185257196426,-0.219171442091465,0.0,0.0,0,-0.08826418966054916,-0.10903617925941944,0.0,1,-0.2171095460653305,-0.23948237858712673,0.0,0,-0.11235102266073227,-0.14024790097028017,0.0,0,-0.021985789760947227,-0.08673551096580923,0.0,0,-0.046549584716558456,-0.06357050081714988,0.0,1,-0.1331353634595871,-0.210956247523427,0.0,0,-0.0005625550402328372,-0.03365886780375149,0.0,1,-0.12216506898403168,-0.15584728308022022,0.0,1,-0.07316708564758301,-0.19474951550364494,0.0,1,-0.18190324306488037,-0.20275185257196426,0.0,25,56,81
+HRM,436,364855,transient_or_regressed,1,2,13020,10000,,,False,0.7037037014961243,-0.05175529420375824,-0.07010388281196356,-0.0811469778418541,0.0,0.0,0,-0.11451374739408493,-0.13079689629375935,0.0,0,-0.10878494381904602,-0.15761296451091766,0.0,0,-0.062015634030103683,-0.12753535388037562,0.0,0,-0.09803497046232224,-0.15601680893450975,0.0,1,-0.13710111379623413,-0.19122095219790936,0.0,0,-0.008223715238273144,-0.05493423261214048,0.0,0,-0.0088114058598876,-0.03196326887700707,0.0,0,-0.03388698771595955,-0.047685069497674704,0.0,0,0.005503896623849869,-0.05684148508589715,1.0,0,-0.05175529420375824,-0.07010388281196356,0.0,22,59,81
+HRM,437,31772,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.20185302197933197,-0.22234883718192577,-0.23320380970835686,0.0,0.0,0,-0.08761884272098541,-0.11508525721728802,0.0,0,-0.1309889554977417,-0.18411530181765556,0.0,0,-0.03749910742044449,-0.1346330652013421,0.0,0,0.05762426182627678,-0.044211587170138955,2.0,0,0.015314145013689995,-0.04290138534270227,1.0,0,0.07375743240118027,0.008156815194524825,3.0,1,-0.038739513605833054,-0.14548082603141665,0.0,1,-0.03842667490243912,-0.08827489148825407,0.0,1,-0.20497477054595947,-0.22158895432949066,0.0,1,-0.20185302197933197,-0.22234883718192577,0.0,27,54,81
+HRM,438,112435,stable_learned,1,1,26040,20000,26040,20000,True,1.0,-0.10750677436590195,-0.16477028001099825,-0.19382517784833908,0.0,0.0,0,-0.12483149021863937,-0.1336486665531993,0.0,0,-0.13307803869247437,-0.21184257417917252,0.0,0,-0.11169343441724777,-0.18533228989690542,0.0,0,-0.017931774258613586,-0.11750446073710918,0.0,0,-0.04169885814189911,-0.11194261256605387,0.0,0,-0.014439474791288376,-0.04607996065169573,0.0,0,0.07609964907169342,0.0255249305992038,6.0,0,0.035303547978401184,-0.05071621714159846,1.0,0,0.024569809436798096,-0.03178166225552559,2.0,1,-0.10750677436590195,-0.16477028001099825,0.0,26,55,81
+HRM,439,347431,stable_learned,7,1,10416,8000,10416,8000,True,1.0,-0.1657891869544983,-0.21133543737232685,-0.24254880845546722,0.0,0.0,0,-0.07334955036640167,-0.12127205263823271,0.0,0,-0.13338343799114227,-0.15539861470460892,0.0,0,-0.03502707928419113,-0.1045949961990118,0.0,1,-0.1556457281112671,-0.23799614049494267,0.0,1,-0.14947810769081116,-0.1877366304397583,0.0,1,-0.20401149988174438,-0.2459904421120882,0.0,1,-0.026906674727797508,-0.1471269268076867,0.0,1,-0.139674112200737,-0.1655886322259903,0.0,1,-0.15996122360229492,-0.20941311493515968,0.0,1,-0.1657891869544983,-0.21133543737232685,0.0,24,57,81
+HRM,440,37706,flaky_then_stable,4,5,10416,8000,23436,18000,True,1.0,0.019867895171046257,-0.07250169944018126,-0.10011317022144794,1.0,0.019867895171046257,0,-0.10458489507436752,-0.11590166576206684,0.0,0,-0.1154886931180954,-0.16780762933194637,0.0,0,-0.1559034287929535,-0.17473097145557404,0.0,1,-0.21089142560958862,-0.2556034307926893,0.0,0,0.014188416302204132,-0.015697705202910583,3.0,0,0.09968331456184387,0.0274625712481793,6.0,1,0.035137854516506195,-0.06376831512898207,2.0,0,0.1355423629283905,0.07469055661931634,8.0,1,-0.12576346099376678,-0.21304198168218136,0.0,1,0.019867895171046257,-0.07250169944018126,1.0,25,56,81
+HRM,441,284263,stable_learned,8,1,7812,6000,7812,6000,True,1.0,-0.09951841086149216,-0.19750258419662714,-0.23918679356575012,0.0,0.0,0,-0.09846937656402588,-0.10952997952699661,0.0,0,-0.11304007470607758,-0.15674679353833199,0.0,1,-0.21007493138313293,-0.26778164505958557,0.0,1,-0.19675730168819427,-0.22671680711209774,0.0,1,-0.1635356843471527,-0.2009633146226406,0.0,1,-0.24193213880062103,-0.2766025383025408,0.0,1,-0.18386349081993103,-0.21321445144712925,0.0,1,-0.07083439081907272,-0.14150721859186888,0.0,1,-0.16980445384979248,-0.21829666197299957,0.0,1,-0.09951841086149216,-0.19750258419662714,0.0,27,54,81
+HRM,442,300420,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.12984628975391388,-0.18001840822398663,-0.20928974822163582,0.0,0.0,0,-0.1224714145064354,-0.1297028111293912,0.0,0,-0.15929922461509705,-0.18174929916858673,0.0,0,-0.11839258670806885,-0.16403055377304554,0.0,0,0.0023848682176321745,-0.09949117284850217,1.0,0,-0.0650140792131424,-0.10938435979187489,0.0,0,0.0954139232635498,-0.020679338253103197,3.0,1,-0.07162606716156006,-0.09942267369478941,0.0,1,-0.042731016874313354,-0.10919756907969713,0.0,1,-0.19715631008148193,-0.22825725749135017,0.0,1,-0.12984628975391388,-0.18001840822398663,0.0,23,58,81
+HRM,443,170881,transient_or_regressed,5,2,13020,10000,,,False,0.8024691343307495,0.015545204281806946,-0.04969067871570587,-0.06898937746882439,1.0,0.015545204281806946,0,-0.09321175515651703,-0.10630960017442703,0.0,0,-0.06129550188779831,-0.16738963779062033,0.0,0,-0.13326498866081238,-0.15872923098504543,0.0,0,-0.08632530272006989,-0.11567115597426891,0.0,1,-0.13690660893917084,-0.17014572210609913,0.0,1,-0.2188250869512558,-0.2462692130357027,0.0,1,-0.11557027697563171,-0.18386618234217167,0.0,1,-0.13580182194709778,-0.15268408134579659,0.0,1,-0.12765683233737946,-0.206096563488245,0.0,0,0.015545204281806946,-0.04969067871570587,1.0,25,56,81
+HRM,444,11959,flaky_then_stable,3,5,7812,6000,26040,20000,True,1.0,-0.14703434705734253,-0.1777003463357687,-0.189967330545187,0.0,0.0,0,-0.0747319757938385,-0.09784877020865679,0.0,0,-0.14560699462890625,-0.16128805093467236,0.0,1,-0.21802249550819397,-0.24195612967014313,0.0,0,-0.08184917271137238,-0.12556229531764984,0.0,0,-0.014115124940872192,-0.06371324486099184,0.0,0,0.00413081469014287,-0.04142075002891943,1.0,1,-0.07611607015132904,-0.11147284135222435,0.0,0,-0.005735342390835285,-0.043450227356515825,0.0,0,0.0552988201379776,-0.0013229920914454851,3.0,1,-0.14703434705734253,-0.1777003463357687,0.0,26,55,81
+HRM,445,229749,transient_or_regressed,2,2,13020,10000,,,False,0.6666666865348816,0.20098333060741425,0.041544112958945334,-0.009950939333066344,5.0,0.3862368892878294,0,-0.10643669962882996,-0.12313777673989534,0.0,0,-0.09911841154098511,-0.1598612368106842,0.0,0,-0.08157636225223541,-0.11578508839011192,0.0,0,-0.053091950714588165,-0.07571078464388847,0.0,1,-0.16949562728405,-0.1904337741434574,0.0,1,-0.19585025310516357,-0.2395910732448101,0.0,0,0.06743431091308594,-0.0006330834585241973,3.0,0,0.01918400637805462,-0.029445513617247343,1.0,0,0.12362059950828552,-0.006176070775836706,2.0,0,0.20098333060741425,0.041544112958945334,5.0,24,57,81
+HRM,446,95436,stable_learned,8,1,7812,6000,7812,6000,True,1.0,-0.14169955253601074,-0.17393985204398632,-0.18890142813324928,0.0,0.0,0,-0.0916425883769989,-0.10603985749185085,0.0,0,-0.13979440927505493,-0.17489181831479073,0.0,1,-0.18384437263011932,-0.25773583352565765,0.0,1,-0.17727279663085938,-0.24266633577644825,0.0,1,-0.16418543457984924,-0.2077191136777401,0.0,1,-0.1911689043045044,-0.2517211399972439,0.0,1,-0.1584322154521942,-0.18988991156220436,0.0,1,-0.05812612920999527,-0.11202166602015495,0.0,1,-0.18540984392166138,-0.2371428795158863,0.0,1,-0.14169955253601074,-0.17393985204398632,0.0,27,54,81
+HRM,447,322889,flaky_then_stable,7,3,7812,6000,15624,12000,True,1.0,-0.1472194641828537,-0.18369744904339314,-0.1954377107322216,0.0,0.0,0,-0.09376165270805359,-0.1092583304271102,0.0,0,-0.12862423062324524,-0.16230148635804653,0.0,1,-0.22644352912902832,-0.2676487937569618,0.0,1,-0.1590709388256073,-0.2227593082934618,0.0,0,-0.04255664348602295,-0.0662226933054626,0.0,1,-0.16806715726852417,-0.25864607840776443,0.0,1,-0.1023397371172905,-0.14183285646140575,0.0,1,-0.009626219049096107,-0.1344029710162431,0.0,1,-0.17631694674491882,-0.20683735236525536,0.0,1,-0.1472194641828537,-0.18369744904339314,0.0,29,52,81
+HRM,448,311286,transient_or_regressed,3,4,7812,6000,,,False,0.6790123581886292,0.11756578087806702,0.04001697344938293,0.00674268847797066,7.0,0.3277060203254223,0,-0.10237499326467514,-0.116673419252038,0.0,0,-0.1637573540210724,-0.18897810019552708,0.0,1,-0.20798622071743011,-0.23679350689053535,0.0,1,-0.16457021236419678,-0.22189807519316673,0.0,0,-0.005857301410287619,-0.055113000504206866,0.0,1,-0.13475210964679718,-0.1980103738605976,0.0,0,0.05583857744932175,0.01685517982696183,7.0,0,0.053183164447546005,0.002232391503639519,3.0,0,0.01912534609436989,-0.02258479925512802,1.0,0,0.11756578087806702,0.04001697344938293,7.0,24,57,81
+HRM,449,38334,flaky_then_stable,7,3,5208,4000,13020,10000,True,1.0,-0.19293725490570068,-0.21838374808430672,-0.23241567984223366,0.0,0.0,0,-0.08487152308225632,-0.10213864780962467,0.0,1,-0.1695384681224823,-0.20120809972286224,0.0,0,-0.07711875438690186,-0.13006535358726978,0.0,0,0.04986470565199852,-0.03146674652816728,2.0,1,-0.21838276088237762,-0.2419672477990389,0.0,1,-0.07119423896074295,-0.08466914389282465,0.0,1,-0.12697896361351013,-0.1722822729498148,0.0,1,-0.07943063974380493,-0.131717624142766,0.0,1,-0.1465545892715454,-0.2027247902005911,0.0,1,-0.19293725490570068,-0.21838374808430672,0.0,26,55,81
+HRM,450,376109,stable_learned,4,1,18228,14000,18228,14000,True,1.0,-0.1731739491224289,-0.21284561231732368,-0.22827404364943504,0.0,0.0,0,-0.10312249511480331,-0.12225742638111115,0.0,0,-0.08873098343610764,-0.1464832704514265,0.0,0,-0.09546094387769699,-0.1323210746049881,0.0,0,-0.03952452912926674,-0.1081095184199512,0.0,0,0.012131668627262115,-0.023257125380041543,2.0,0,0.08011408150196075,0.025213797212927602,6.0,1,-0.04497959464788437,-0.0831063836812973,0.0,1,-0.08382144570350647,-0.1436777412891388,0.0,1,-0.09850545227527618,-0.19172324612736702,0.0,1,-0.1731739491224289,-0.21284561231732368,0.0,23,58,81
+HRM,451,373815,transient_or_regressed,1,2,23436,18000,,,False,0.7654321193695068,0.05446624755859375,-0.033113711047917604,-0.054961320012807846,1.0,0.05446624755859375,0,-0.08279091119766235,-0.10995966102927923,0.0,0,-0.11499471962451935,-0.17337927408516407,0.0,0,-0.13388574123382568,-0.1682972889393568,0.0,0,-0.16207973659038544,-0.18306110613048077,0.0,0,-0.03864797204732895,-0.10308450739830732,0.0,0,-0.023956473916769028,-0.0783042786642909,0.0,0,-0.013574203476309776,-0.06045251595787704,0.0,0,-0.07219342887401581,-0.08601411525160074,0.0,1,-0.09540551155805588,-0.17742884438484907,0.0,0,0.05446624755859375,-0.033113711047917604,1.0,25,56,81
+HRM,452,365448,transient_or_regressed,1,2,23436,18000,,,False,0.7037037014961243,-0.000309446215396747,-0.03540382293795119,-0.057262545451521873,0.0,0.0,0,-0.10907142609357834,-0.11701671499758959,0.0,0,-0.16165491938591003,-0.1722674984484911,0.0,0,-0.02164953202009201,-0.1429783832281828,0.0,0,-0.10180162638425827,-0.1490884618833661,0.0,0,-0.013126197271049023,-0.09719090478029102,0.0,0,-0.009462102316319942,-0.04738247219938785,0.0,0,-0.014196844771504402,-0.06626536161638796,0.0,0,-0.040791455656290054,-0.06853996869176626,0.0,1,-0.22521589696407318,-0.23934303224086761,0.0,0,-0.000309446215396747,-0.03540382293795119,0.0,23,58,81
+HRM,453,297325,never,0,0,,,,,False,0.604938268661499,-0.05137219652533531,-0.06579908030107617,-0.07485110592097044,0.0,0.0,0,-0.09032680094242096,-0.11232253070920706,0.0,0,-0.15882091224193573,-0.182364946231246,0.0,0,-0.049399107694625854,-0.10449580289423466,0.0,0,-0.06929438561201096,-0.1307235797867179,0.0,0,-0.05839700251817703,-0.10183912375941873,0.0,0,-0.03588797152042389,-0.06724630575627089,0.0,0,-0.005198296159505844,-0.05400925502181053,0.0,0,0.005365391261875629,-0.04596106271492317,2.0,0,-0.04051819443702698,-0.08108251728117466,0.0,0,-0.05137219652533531,-0.06579908030107617,0.0,23,58,81
+HRM,454,2708,never,0,0,,,,,False,0.5555555820465088,0.07848410308361053,-0.021252433769404888,-0.04233995359390974,1.0,0.07848410308361053,0,-0.11210035532712936,-0.12370707094669342,0.0,0,-0.08652803301811218,-0.15223367139697075,0.0,0,-0.12103800475597382,-0.16210651583969593,0.0,0,-0.10133084654808044,-0.1498905997723341,0.0,0,0.09409483522176743,-0.0387880391208455,2.0,0,-0.019741658121347427,-0.05413003149442375,0.0,0,0.08456697314977646,-0.004755062749609351,2.0,0,0.12820017337799072,-0.01754557475214824,1.0,0,-0.0484907403588295,-0.06826095003634691,0.0,0,0.07848410308361053,-0.021252433769404888,1.0,26,55,81
+HRM,455,363270,never,0,0,,,,,False,0.6172839403152466,0.10195408761501312,0.00939213897800073,-0.02324746234808117,5.0,0.1700831181369722,0,-0.06594761461019516,-0.12216635514050722,0.0,0,-0.10832276195287704,-0.14555139001458883,0.0,0,-0.07902240753173828,-0.1390017569065094,0.0,0,-0.09528714418411255,-0.158915588632226,0.0,0,-0.04611523449420929,-0.08558299951255322,0.0,0,0.02469874918460846,-0.012467379230656661,3.0,0,-0.002681782003492117,-0.02373389611602761,0.0,0,0.002596699632704258,-0.03236117819324136,1.0,0,0.007238985039293766,-0.03789344074903056,1.0,0,0.10195408761501312,0.00939213897800073,5.0,26,55,81
+HRM,456,17115,always,10,0,2604,2000,2604,2000,True,1.0,-0.13957861065864563,-0.19199722073972225,-0.22897713631391525,0.0,0.0,1,-0.11933039873838425,-0.1981120714917779,0.0,1,-0.17666874825954437,-0.24614599533379078,0.0,1,-0.3124152421951294,-0.34799981862306595,0.0,1,-0.2587098479270935,-0.31856903433799744,0.0,1,-0.15298613905906677,-0.22802118957042694,0.0,1,-0.27357664704322815,-0.3017561212182045,0.0,1,-0.09614226222038269,-0.21845338307321072,0.0,1,-0.07052458822727203,-0.12688247673213482,0.0,1,-0.17737820744514465,-0.20791134610772133,0.0,1,-0.13957861065864563,-0.19199722073972225,0.0,34,47,81
+HRM,457,142720,flaky_then_stable,3,5,13020,10000,26040,20000,True,1.0,-0.029423192143440247,-0.11072034295648336,-0.13585609570145607,0.0,0.0,0,-0.11056725680828094,-0.12592520006000996,0.0,0,-0.11526939272880554,-0.15903345681726933,0.0,0,-0.0677945613861084,-0.11508639622479677,0.0,0,-0.0815429612994194,-0.11226705554872751,0.0,1,-0.12413924187421799,-0.16045479755848646,0.0,0,0.0652109682559967,0.01599327102303505,5.0,0,0.07151950895786285,0.012205795384943485,4.0,1,-0.08016563951969147,-0.1350603336468339,0.0,0,0.04928082227706909,-0.006660502782324329,4.0,1,-0.029423192143440247,-0.11072034295648336,0.0,23,58,81
+HRM,458,314525,never,0,0,,,,,False,0.5432098507881165,-0.0483870655298233,-0.0737529220059514,-0.08520659245550632,0.0,0.0,0,-0.11807267367839813,-0.12831636145710945,0.0,0,-0.15845339000225067,-0.17217013984918594,0.0,0,-0.14119620621204376,-0.16473538242280483,0.0,0,-0.17157381772994995,-0.19774862192571163,0.0,0,-0.10146021842956543,-0.1397136878222227,0.0,0,-0.09107765555381775,-0.10494327545166016,0.0,0,-0.04633212089538574,-0.06851305346935987,0.0,0,-0.071221724152565,-0.09064629394561052,0.0,0,-0.011138959787786007,-0.06344721361529082,0.0,0,-0.0483870655298233,-0.0737529220059514,0.0,24,57,81
+HRM,459,113202,transient_or_regressed,6,4,7812,6000,,,False,0.8148148059844971,-0.0024340408854186535,-0.017484950076323003,-0.028911495581269264,0.0,0.0,0,-0.10235857963562012,-0.1209979746490717,0.0,0,-0.07792928069829941,-0.16049356665462255,0.0,1,-0.1721605509519577,-0.2172605786472559,0.0,1,-0.1837140917778015,-0.23244865238666534,0.0,1,-0.17255809903144836,-0.203876793384552,0.0,1,-0.18602101504802704,-0.22288965620100498,0.0,1,-0.1232287809252739,-0.1617951737716794,0.0,0,-0.0195746012032032,-0.03122584125958383,0.0,1,-0.11176325380802155,-0.2264129612594843,0.0,0,-0.0024340408854186535,-0.017484950076323003,0.0,25,56,81
+HRM,460,161422,transient_or_regressed,3,2,18228,14000,,,False,0.5925925970077515,0.04353730008006096,-0.02729915117379278,-0.0530523331835866,1.0,0.04353730008006096,0,-0.08513045310974121,-0.1100678090006113,0.0,0,-0.16800211369991302,-0.19917619414627552,0.0,0,-0.10137473046779633,-0.134330365806818,0.0,0,-0.059744708240032196,-0.0932952337898314,0.0,0,-0.027266262099146843,-0.05560493702068925,0.0,0,0.07250124216079712,-0.02668716770131141,2.0,1,-0.019786763936281204,-0.13502696668729186,0.0,1,-0.11205010116100311,-0.1606956049799919,0.0,1,-0.17078563570976257,-0.20015637390315533,0.0,0,0.04353730008006096,-0.02729915117379278,1.0,24,57,81
+HRM,461,379709,stable_learned,8,1,7812,6000,7812,6000,True,1.0,-0.14610248804092407,-0.19820364750921726,-0.22514642775058746,0.0,0.0,0,-0.10105063766241074,-0.1299783755093813,0.0,0,-0.13003036379814148,-0.1652177032083273,0.0,1,-0.06077193841338158,-0.2259829598478973,0.0,1,-0.1792203038930893,-0.22587933205068111,0.0,1,-0.07341539859771729,-0.1690439935773611,0.0,1,-0.22891834378242493,-0.24932080507278442,0.0,1,0.0005650667590089142,-0.1360489800790674,1.0,1,-0.1648918241262436,-0.18103151209652424,0.0,1,-0.15424351394176483,-0.1955326534807682,0.0,1,-0.14610248804092407,-0.19820364750921726,0.0,25,56,81
+HRM,462,392553,never,0,0,,,,,False,0.654321014881134,0.08207283914089203,0.01606410223757848,-0.002149579697288573,7.0,0.1565517201088369,0,-0.030606742948293686,-0.083210164681077,0.0,0,-0.13243025541305542,-0.1551196575164795,0.0,0,-0.04744917154312134,-0.16774233616888523,0.0,0,-0.01591840386390686,-0.08092186320573092,0.0,0,0.031788840889930725,-0.04119202680885792,1.0,0,-0.01752295345067978,-0.05450862948782742,0.0,0,-0.030391279608011246,-0.05976481502875686,0.0,0,0.01650937646627426,-0.056704006157815456,1.0,0,0.05786387622356415,-0.04884895170107484,1.0,0,0.08207283914089203,0.01606410223757848,7.0,26,55,81
+HRM,463,152224,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.1325080245733261,-0.158989192917943,-0.17790478095412254,0.0,0.0,0,-0.09283250570297241,-0.11513460520654917,0.0,0,-0.1401781141757965,-0.20246277935802937,0.0,0,-0.09956642985343933,-0.12892800569534302,0.0,0,-0.045710109174251556,-0.12330951821058989,0.0,1,-0.1704661101102829,-0.22429948300123215,0.0,1,-0.10761525481939316,-0.14066787995398045,0.0,1,-0.11059360951185226,-0.15511934831738472,0.0,1,-0.1887357234954834,-0.19618609175086021,0.0,1,-0.1617981344461441,-0.22651788033545017,0.0,1,-0.1325080245733261,-0.158989192917943,0.0,26,55,81
+HRM,464,416555,stable_learned,9,1,5208,4000,5208,4000,True,1.0,-0.13876616954803467,-0.19795207493007183,-0.22322246804833412,0.0,0.0,0,-0.05738667771220207,-0.09785085311159492,0.0,1,-0.11592648923397064,-0.20286200754344463,0.0,1,-0.27170422673225403,-0.30200787633657455,0.0,1,-0.1459045708179474,-0.21352773904800415,0.0,1,-0.17148850858211517,-0.19510062411427498,0.0,1,-0.2010394185781479,-0.2521599493920803,0.0,1,-0.007236751727759838,-0.15056219173129648,0.0,1,-0.11496672034263611,-0.1714398618787527,0.0,1,-0.19382868707180023,-0.21980048716068268,0.0,1,-0.13876616954803467,-0.19795207493007183,0.0,27,54,81
+HRM,465,33227,never,0,0,,,,,False,0.604938268661499,-0.00744275189936161,-0.05277957348152995,-0.07288972847163677,0.0,0.0,0,-0.1125030666589737,-0.12693730928003788,0.0,0,-0.1484287679195404,-0.17325189895927906,0.0,0,-0.05978424847126007,-0.13238004595041275,0.0,0,-0.055273473262786865,-0.1642252877354622,0.0,0,-0.034603677690029144,-0.10039506293833256,0.0,0,-0.012710893526673317,-0.08842249982990324,0.0,0,-0.06464768946170807,-0.08692845795303583,0.0,0,-0.014918453060090542,-0.07971850119065493,0.0,0,-0.07473921775817871,-0.10277923289686441,0.0,0,-0.00744275189936161,-0.05277957348152995,0.0,24,57,81
+HRM,466,284808,transient_or_regressed,1,2,18228,14000,,,False,0.6913580298423767,0.05381803959608078,-0.02288976334966719,-0.059280055575072765,2.0,0.08610750734806061,0,-0.09204494953155518,-0.11558660492300987,0.0,0,-0.14041772484779358,-0.167351134121418,0.0,0,-0.08949950337409973,-0.17617597617208958,0.0,0,-0.0812426507472992,-0.13218100741505623,0.0,0,-0.07278688251972198,-0.10202606860548258,0.0,0,-0.08139877766370773,-0.09773620683699846,0.0,1,-0.19269630312919617,-0.21125915460288525,0.0,0,-0.044905297458171844,-0.08969113044440746,0.0,0,-0.035587288439273834,-0.07407036097720265,0.0,0,0.05381803959608078,-0.02288976334966719,2.0,24,57,81
+HRM,467,173117,flaky_then_stable,2,3,20832,16000,26040,20000,True,1.0,-0.13809411227703094,-0.18028605356812477,-0.21562479808926582,0.0,0.0,0,-0.11767417192459106,-0.12836897000670433,0.0,0,-0.16247305274009705,-0.19384107738733292,0.0,0,-0.12437533587217331,-0.18404327984899282,0.0,0,-0.042241208255290985,-0.12307193409651518,0.0,0,-0.00554207619279623,-0.07605814456474036,0.0,0,-0.03410671651363373,-0.055194699205458164,0.0,0,0.0072106472216546535,-0.02573735552141443,1.0,1,-0.09890913218259811,-0.13096911180764437,0.0,0,0.027625098824501038,-0.06729814130812883,1.0,1,-0.13809411227703094,-0.18028605356812477,0.0,24,57,81
+HRM,468,273737,stable_learned,6,1,13020,10000,13020,10000,True,1.0,0.019368015229701996,-0.1420118547976017,-0.19571903347969055,1.0,0.019368015229701996,0,-0.10007964819669724,-0.12662744708359241,0.0,0,-0.14375101029872894,-0.16544498316943645,0.0,0,-0.06472624838352203,-0.10667784418910742,0.0,0,-0.04916907846927643,-0.10258536506444216,0.0,1,-0.16980068385601044,-0.1891763098537922,0.0,1,-0.14120206236839294,-0.18881831504404545,0.0,1,-0.0818033367395401,-0.1712517961859703,0.0,1,-0.033218882977962494,-0.13146228343248367,0.0,1,-0.19545714557170868,-0.2295446079224348,0.0,1,0.019368015229701996,-0.1420118547976017,1.0,28,53,81
+HRM,469,268973,never,0,0,,,,,False,0.40740740299224854,-0.01983005367219448,-0.05906763579696417,-0.08420239202678204,0.0,0.0,0,-0.09867829084396362,-0.11741090565919876,0.0,0,-0.12920773029327393,-0.15542346239089966,0.0,0,-0.11706694960594177,-0.15574384853243828,0.0,0,-0.11442369222640991,-0.16705795377492905,0.0,0,-0.10296472907066345,-0.11609475966542959,0.0,0,-0.024436822161078453,-0.07409549481235445,0.0,0,-0.021830610930919647,-0.0451726692263037,0.0,0,0.0091804638504982,-0.02470065571833402,1.0,0,0.021961208432912827,-0.046127449575578794,1.0,0,-0.01983005367219448,-0.05906763579696417,0.0,21,60,81
+HRM,470,231302,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.16004182398319244,-0.1843466553837061,-0.19435089454054832,0.0,0.0,0,-0.09012579172849655,-0.12260696291923523,0.0,0,-0.19291703402996063,-0.21590713039040565,0.0,0,-0.06649993360042572,-0.13071583211421967,0.0,0,0.051622193306684494,-0.06939796498045325,1.0,1,-0.13866949081420898,-0.17725419625639915,0.0,1,-0.12154106795787811,-0.17847948148846626,0.0,1,-0.0071311756037175655,-0.09415305888978764,0.0,1,-0.03405264765024185,-0.1487556928768754,0.0,1,-0.1419527530670166,-0.21454457007348537,0.0,1,-0.16004182398319244,-0.1843466553837061,0.0,26,55,81
+HRM,471,283309,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.11993443965911865,-0.18718086928129196,-0.20772604271769524,0.0,0.0,0,-0.11211692541837692,-0.12650785129517317,0.0,0,-0.11267630755901337,-0.1748627070337534,0.0,0,-0.049157168716192245,-0.14458253281190991,0.0,0,-0.017467686906456947,-0.0917843768838793,0.0,1,-0.22119402885437012,-0.23535848967731,0.0,1,-0.21149688959121704,-0.23463112115859985,0.0,1,-0.03880209103226662,-0.07187345577403903,0.0,1,-0.1059192419052124,-0.13484421372413635,0.0,1,-0.0916975662112236,-0.2053821822628379,0.0,1,-0.11993443965911865,-0.18718086928129196,0.0,26,55,81
+HRM,472,314697,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.09503080695867538,-0.14854280930012465,-0.188844982534647,0.0,0.0,0,-0.09799744188785553,-0.11539168655872345,0.0,0,-0.13407206535339355,-0.17590170539915562,0.0,0,-0.06362436711788177,-0.13053727243095636,0.0,0,0.05164116248488426,-0.0969679276458919,1.0,0,0.0028045717626810074,-0.023706347012193874,2.0,1,-0.19055674970149994,-0.21816511079669,0.0,1,-0.14220255613327026,-0.1923715379089117,0.0,1,-0.10261484980583191,-0.17405995726585388,0.0,1,-0.05922902747988701,-0.19740992737933993,0.0,1,-0.09503080695867538,-0.14854280930012465,0.0,27,54,81
+HRM,473,343431,never,0,0,,,,,False,0.5432098507881165,0.05569987744092941,-0.019368373788893223,-0.04787747096270323,2.0,0.09146558120846748,0,-0.09002536535263062,-0.11689076386392117,0.0,0,-0.13000410795211792,-0.1724604219198227,0.0,0,-0.12805692851543427,-0.1474247332662344,0.0,0,-0.07107292860746384,-0.15280374605208635,0.0,0,-0.022095901891589165,-0.07811510073952377,0.0,0,0.01383991539478302,-0.039312756387516856,1.0,0,0.03161591663956642,-0.01776347355917096,3.0,0,0.1246480941772461,-0.033290714491158724,2.0,0,0.0014681195607408881,-0.06669635542493779,1.0,0,0.05569987744092941,-0.019368373788893223,2.0,25,56,81
+HRM,474,15145,transient_or_regressed,2,4,18228,14000,,,False,0.790123462677002,0.027311554178595543,-0.012075541773810983,-0.03660632576793432,3.0,0.05765002593398094,0,-0.07007186114788055,-0.0955340201035142,0.0,0,-0.11295636743307114,-0.16594329942017794,0.0,0,-0.12796220183372498,-0.16029204986989498,0.0,0,-0.06979884207248688,-0.12094396352767944,0.0,0,-0.033144693821668625,-0.06277430290356278,0.0,0,0.03636964410543442,-0.007861702237278223,3.0,1,-0.08806616067886353,-0.14660228788852692,0.0,0,-0.026843128725886345,-0.053969798842445016,0.0,1,-0.10599923133850098,-0.20386032946407795,0.0,0,0.027311554178595543,-0.012075541773810983,3.0,26,55,81
+HRM,475,77340,stable_learned,2,1,23436,18000,23436,18000,True,1.0,-0.1716030091047287,-0.2241663634777069,-0.2520177513360977,0.0,0.0,0,-0.06785670667886734,-0.08892253879457712,0.0,0,-0.1409132182598114,-0.18618100509047508,0.0,0,-0.16676655411720276,-0.21116033382713795,0.0,0,-0.044075459241867065,-0.13298676442354918,0.0,0,0.037259940057992935,-0.013531373348087072,3.0,0,-0.0337822325527668,-0.06651845714077353,0.0,0,-0.011075146496295929,-0.034098338801413774,0.0,0,0.043555133044719696,-0.005971067643258721,4.0,1,-0.16660796105861664,-0.20638668723404408,0.0,1,-0.1716030091047287,-0.2241663634777069,0.0,25,56,81
+HRM,476,364763,flaky_then_stable,4,3,15624,12000,20832,16000,True,1.0,-0.1613057404756546,-0.1929642464965582,-0.21082305163145065,0.0,0.0,0,-0.11098167300224304,-0.12689475622028112,0.0,0,-0.10581965744495392,-0.15322310850024223,0.0,0,-0.07875625789165497,-0.15970134921371937,0.0,0,0.0290312971919775,-0.04376558505464345,2.0,0,0.12065690010786057,0.0353271986823529,7.0,1,-0.13552966713905334,-0.14598547667264938,0.0,0,0.12192326039075851,0.02016185875982046,7.0,1,-0.047175973653793335,-0.0911042895168066,0.0,1,-0.22950923442840576,-0.23989027179777622,0.0,1,-0.1613057404756546,-0.1929642464965582,0.0,26,55,81
+HRM,477,50372,flaky_then_stable,3,5,7812,6000,26040,20000,True,1.0,-0.20184916257858276,-0.21707617491483688,-0.22634930536150932,0.0,0.0,0,-0.10651011765003204,-0.11511252634227276,0.0,0,-0.07935000956058502,-0.15056521445512772,0.0,1,-0.18199001252651215,-0.27131191082298756,0.0,0,-0.02717195264995098,-0.1133551427628845,0.0,0,-0.028782835230231285,-0.059025007765740156,0.0,0,0.03356169909238815,-0.02397061826195568,2.0,0,0.0061087217181921005,-0.03348684182856232,1.0,1,-0.044583018869161606,-0.11589060304686427,0.0,0,-0.028265690430998802,-0.048324289033189416,0.0,1,-0.20184916257858276,-0.21707617491483688,0.0,29,52,81
+HRM,478,244494,transient_or_regressed,1,2,15624,12000,,,False,0.8024691343307495,0.004446628037840128,-0.030437298275501234,-0.05276797339320183,1.0,0.004446628037840128,0,-0.09769164770841599,-0.119770641438663,0.0,0,-0.1562507003545761,-0.175814189016819,0.0,0,-0.11708283424377441,-0.1602193582803011,0.0,0,-0.008562097325921059,-0.10196267371065915,0.0,0,-0.025272715836763382,-0.06984734209254384,0.0,1,-0.22158530354499817,-0.24288037419319153,0.0,0,0.04134643077850342,-0.03897758247330785,1.0,0,-9.035987022798508e-05,-0.046063047271672986,0.0,0,0.0585966631770134,-0.015361772246251348,2.0,0,0.004446628037840128,-0.030437298275501234,1.0,24,57,81
+HRM,479,263215,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.19433194398880005,-0.211366206407547,-0.2203390635550022,0.0,0.0,0,-0.10811275243759155,-0.1266051260754466,0.0,0,-0.15996681153774261,-0.17688553221523762,0.0,0,-0.1018441915512085,-0.14180567301809788,0.0,0,-0.046376898884773254,-0.09461431438103318,0.0,1,-0.11947721242904663,-0.1801200434565544,0.0,1,-0.1564617156982422,-0.19531381130218506,0.0,1,-0.015926837921142578,-0.12505595944821835,0.0,1,-0.1335471272468567,-0.16511239483952522,0.0,1,-0.2285635769367218,-0.24441982991993427,0.0,1,-0.19433194398880005,-0.211366206407547,0.0,26,55,81
+HRM,480,244179,stable_learned,1,1,26040,20000,26040,20000,True,1.0,-0.14392954111099243,-0.17211600579321384,-0.18017056211829185,0.0,0.0,0,-0.09416845440864563,-0.1097155399620533,0.0,0,-0.08918469399213791,-0.14138579089194536,0.0,0,-0.07737578451633453,-0.12517052702605724,0.0,0,0.01504267007112503,-0.07559907482936978,1.0,0,0.019564189016819,-0.050458881829399616,2.0,0,0.031881652772426605,-0.035474764299578965,2.0,0,0.1688615381717682,0.051564275772307155,8.0,0,-0.002702452940866351,-0.031917306274408475,0.0,0,-0.07660360634326935,-0.09535452350974083,0.0,1,-0.14392954111099243,-0.17211600579321384,0.0,24,57,81
+HRM,481,322721,never,0,0,,,,,False,0.5061728358268738,-0.016609620302915573,-0.04048834275454283,-0.052349056117236614,0.0,0.0,0,-0.10931115597486496,-0.13044328335672617,0.0,0,-0.13681820034980774,-0.16341733559966087,0.0,0,-0.12480337917804718,-0.1677234247326851,0.0,0,-0.09220653772354126,-0.11261052545160055,0.0,0,-0.002232864499092102,-0.047053643269464374,0.0,0,0.10976704955101013,-0.058151084929704666,1.0,0,0.061063505709171295,-0.028847452078480273,1.0,0,0.016232069581747055,-0.026045067701488733,2.0,0,-0.0606716088950634,-0.07315736124292016,0.0,0,-0.016609620302915573,-0.04048834275454283,0.0,26,55,81
+HRM,482,153835,flaky_then_stable,6,5,7812,6000,26040,20000,True,1.0,-0.21614563465118408,-0.2248272504657507,-0.22989724576473236,0.0,0.0,0,-0.08413737267255783,-0.10373266786336899,0.0,0,-0.07971005141735077,-0.15301927737891674,0.0,1,-0.09027370065450668,-0.24456446152180433,0.0,1,-0.18342366814613342,-0.22997772321105003,0.0,1,-0.18903444707393646,-0.23618879914283752,0.0,1,-0.17737972736358643,-0.28010106086730957,0.0,0,0.041060175746679306,0.0113811717601493,6.0,1,-0.00984515156596899,-0.10643935564439744,0.0,0,0.04662206396460533,-0.00409773834689986,3.0,1,-0.21614563465118408,-0.2248272504657507,0.0,26,55,81
+HRM,483,180158,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.17106842994689941,-0.20970353297889233,-0.2222943902015686,0.0,0.0,0,-0.06046755611896515,-0.09091734234243631,0.0,0,-0.15390443801879883,-0.1984542179852724,0.0,0,-0.16030310094356537,-0.18818379938602448,0.0,0,-0.039738573133945465,-0.1175576401874423,0.0,1,-0.17054814100265503,-0.2111212369054556,0.0,1,-0.2456662356853485,-0.28117062523961067,0.0,1,-0.16701644659042358,-0.2046159729361534,0.0,1,-0.1614232212305069,-0.17197255790233612,0.0,1,-0.1850793957710266,-0.23267626203596592,0.0,1,-0.17106842994689941,-0.20970353297889233,0.0,26,55,81
+HRM,484,359197,never,0,0,,,,,False,0.6419752836227417,0.008077445439994335,-0.03159329004120082,-0.04931687097996473,1.0,0.008077445439994335,0,-0.08021710067987442,-0.10672859568148851,0.0,0,-0.1340274214744568,-0.19331839866936207,0.0,0,-0.10684039443731308,-0.14429728779941797,0.0,0,-0.014492760412395,-0.09097100456710905,0.0,0,-0.022911526262760162,-0.06637474801391363,0.0,0,0.019215382635593414,-0.019889965769834816,2.0,0,-0.002207073150202632,-0.04558424264541827,0.0,0,0.08327504247426987,0.0418009123968659,8.0,0,0.056071385741233826,-0.018791829352267087,1.0,0,0.008077445439994335,-0.03159329004120082,1.0,25,56,81
+HRM,485,251008,never,0,0,,,,,False,0.654321014881134,0.06978140771389008,-0.016864201228599995,-0.048314839601516724,2.0,0.0931775402277708,0,-0.07710368186235428,-0.11376541759818792,0.0,0,-0.09687840193510056,-0.16614786256104708,0.0,0,-0.14182141423225403,-0.18541653454303741,0.0,0,-0.02609062008559704,-0.09728029719553888,0.0,0,0.11037774384021759,-0.02100980513205286,3.0,0,0.06450261175632477,0.017223646049387753,6.0,0,0.08716960996389389,0.004938583141665731,2.0,0,0.12716297805309296,0.021448684958158992,3.0,0,0.08749322593212128,-0.008821476774755865,3.0,0,0.06978140771389008,-0.016864201228599995,2.0,26,55,81
+HRM,486,162158,stable_learned,2,1,23436,18000,23436,18000,True,1.0,-0.08913254737854004,-0.15302850492298603,-0.19408570230007172,0.0,0.0,0,-0.09592791646718979,-0.10901432111859322,0.0,0,-0.10340508818626404,-0.15851133596152067,0.0,0,-0.10889711230993271,-0.1862173257395625,0.0,0,-0.1263185441493988,-0.15629343874752522,0.0,0,-0.08042732626199722,-0.10959711950272322,0.0,0,-0.04325646162033081,-0.07792218029499054,0.0,0,-0.02325337380170822,-0.061753301648423076,0.0,0,0.015659568831324577,-0.07818877534009516,1.0,1,-0.1680339276790619,-0.18349117785692215,0.0,1,-0.08913254737854004,-0.15302850492298603,0.0,25,56,81
+HRM,487,268037,flaky_then_stable,3,3,15624,12000,23436,18000,True,1.0,-0.18777963519096375,-0.2125306986272335,-0.22827157005667686,0.0,0.0,0,-0.10196882486343384,-0.11366553604602814,0.0,0,-0.10574596375226974,-0.1698898123577237,0.0,0,-0.11935238540172577,-0.14937768131494522,0.0,0,0.060978539288043976,-0.0592821235768497,1.0,0,0.030330438166856766,-0.06444665556773543,1.0,1,-0.18641357123851776,-0.2607811465859413,0.0,0,-0.014119584113359451,-0.0387909016571939,0.0,0,-0.031066585332155228,-0.0487329657189548,0.0,1,-0.11316971480846405,-0.2148267775774002,0.0,1,-0.18777963519096375,-0.2125306986272335,0.0,27,54,81
+HRM,488,414236,flaky_then_stable,6,3,10416,8000,18228,14000,True,1.0,-0.1547347605228424,-0.19544532895088196,-0.21081970259547234,0.0,0.0,0,-0.09798109531402588,-0.11949600651860237,0.0,0,-0.053554095327854156,-0.14653443451970816,0.0,0,-0.10264291614294052,-0.1452030511572957,0.0,1,-0.014371078461408615,-0.12310080789029598,0.0,1,-0.16002613306045532,-0.19497712887823582,0.0,0,-0.019628793001174927,-0.03310484508983791,0.0,1,-0.06674352288246155,-0.12586961314082146,0.0,1,-0.11718320101499557,-0.1540768826380372,0.0,1,-0.06192338466644287,-0.0975642055273056,0.0,1,-0.1547347605228424,-0.19544532895088196,0.0,26,55,81
+HRM,489,250967,never,0,0,,,,,False,0.4938271641731262,0.03590772673487663,-0.030916859628632665,-0.05010387022048235,1.0,0.03590772673487663,0,-0.10204876959323883,-0.11258568987250328,0.0,0,-0.11727458238601685,-0.15445894934237003,0.0,0,-0.11441393196582794,-0.15520991757512093,0.0,0,-0.14819982647895813,-0.16930934228003025,0.0,0,-0.07043065875768661,-0.09480075724422932,0.0,0,-0.00841323658823967,-0.05624264385551214,0.0,0,0.020929209887981415,-0.028530764626339078,2.0,0,-0.016342855989933014,-0.08382143313065171,0.0,0,-0.03395548462867737,-0.07025983789935708,0.0,0,0.03590772673487663,-0.030916859628632665,1.0,24,57,81
+HRM,490,342135,transient_or_regressed,2,4,18228,14000,,,False,0.7160493731498718,-0.01935645565390587,-0.06353442976251245,-0.0807412676513195,0.0,0.0,0,-0.09812852740287781,-0.11759866587817669,0.0,0,-0.1640346348285675,-0.1831640526652336,0.0,0,-0.1005222350358963,-0.1668719109147787,0.0,0,-0.09747909009456635,-0.14402462542057037,0.0,0,-0.014275513589382172,-0.06128190434537828,0.0,0,0.022685449570417404,-0.035651944403070956,1.0,1,0.08413258194923401,-0.08960977289825678,1.0,0,-0.010866283439099789,-0.060340722906403244,0.0,1,-0.15446016192436218,-0.21901071071624756,0.0,0,-0.01935645565390587,-0.06353442976251245,0.0,23,58,81
+HRM,491,59002,never,0,0,,,,,False,0.6913580298423767,0.02095910534262657,-0.07333416631445289,-0.09937203489243984,1.0,0.02095910534262657,0,-0.07618856430053711,-0.10833028145134449,0.0,0,-0.1494702398777008,-0.16413938254117966,0.0,0,-0.14462052285671234,-0.15273374877870083,0.0,0,-0.08177454024553299,-0.13954031188040972,0.0,0,-0.010530862025916576,-0.11661590484436601,0.0,0,0.005914328154176474,-0.0798253255779855,1.0,0,-0.01968865841627121,-0.04656722955405712,0.0,0,-0.0514029785990715,-0.0902306754142046,0.0,0,-0.04648084565997124,-0.09466994414106011,0.0,0,0.02095910534262657,-0.07333416631445289,1.0,23,58,81
+HRM,492,338778,flaky_then_stable,5,5,7812,6000,26040,20000,True,1.0,-0.17810975015163422,-0.21767060086131096,-0.233274906873703,0.0,0.0,0,0.035876717418432236,-0.06716783624142408,1.0,0,-0.12615516781806946,-0.1682580467313528,0.0,1,-0.2778669595718384,-0.30735277757048607,0.0,1,-0.16238605976104736,-0.20630884915590286,0.0,1,-0.19682443141937256,-0.2286079339683056,0.0,0,-0.04574422538280487,-0.07323742937296629,0.0,0,0.034550927579402924,-0.023523283540271223,2.0,1,-0.14926768839359283,-0.17040595039725304,0.0,0,0.05770416557788849,-0.01199582009576261,2.0,1,-0.17810975015163422,-0.21767060086131096,0.0,24,57,81
+HRM,493,420469,never,0,0,,,,,False,0.5061728358268738,0.00022660475224256516,-0.0359823607141152,-0.05283741559833288,1.0,0.00022660475224256516,0,-0.06936149299144745,-0.10684109851717949,0.0,0,-0.15219126641750336,-0.18470164015889168,0.0,0,-0.06303159892559052,-0.13736452255398035,0.0,0,0.02930121123790741,-0.11087844334542751,1.0,0,0.05562549829483032,0.00384103087708354,3.0,0,0.06459084898233414,-0.012828531296690926,2.0,0,0.007063733413815498,-0.026463219546712935,1.0,0,0.11417654156684875,0.047102566924877465,7.0,0,0.0398235023021698,0.00022877228911966085,5.0,0,0.00022660475224256516,-0.0359823607141152,1.0,26,55,81
+HRM,494,307975,never,0,0,,,,,False,0.7037037014961243,-0.051151759922504425,-0.06962311640381813,-0.07939979992806911,0.0,0.0,0,-0.08524724096059799,-0.11256251391023397,0.0,0,-0.14448219537734985,-0.17762290500104427,0.0,0,-0.13527733087539673,-0.15806627459824085,0.0,0,-0.1323910355567932,-0.17148155346512794,0.0,0,-0.06333450973033905,-0.09887820202857256,0.0,0,-0.007004893850535154,-0.05986661365022883,0.0,0,0.014939330518245697,-0.061111521907150745,1.0,0,-0.03390566632151604,-0.06416956521570683,0.0,0,0.011067423038184643,-0.04693123267497867,2.0,0,-0.051151759922504425,-0.06962311640381813,0.0,23,58,81
+HRM,495,377141,transient_or_regressed,1,2,23436,18000,,,False,0.7037037014961243,0.02184641733765602,-0.059245015028864145,-0.08529553934931755,1.0,0.02184641733765602,0,-0.08635195344686508,-0.10883380845189095,0.0,0,-0.15087813138961792,-0.1687791310250759,0.0,0,-0.14230935275554657,-0.19364411383867264,0.0,0,-0.00907920766621828,-0.09444983897265047,0.0,0,0.03606801480054855,-0.07531856372952461,1.0,0,0.10665774345397949,0.011237298924243078,5.0,0,0.07752428948879242,0.020750219409819692,5.0,0,0.001992996782064438,-0.04523258167319,1.0,1,0.055351611226797104,-0.010691349551052554,4.0,0,0.02184641733765602,-0.059245015028864145,1.0,25,56,81
+HRM,496,63475,never,0,0,,,,,False,0.5432098507881165,0.10824154317378998,0.0125378766679205,-0.019172575674019754,4.0,0.17699331603944302,0,-0.09860989451408386,-0.11355622671544552,0.0,0,-0.11690413951873779,-0.168419374153018,0.0,0,-0.05948277562856674,-0.1417320230975747,0.0,0,0.03534524887800217,-0.09824256971478462,1.0,0,-0.04264377802610397,-0.10884437896311283,0.0,0,0.0538792759180069,-0.01978370815049857,3.0,0,0.09942569583654404,0.003544655453879386,5.0,0,0.02274017594754696,-0.06971218506805599,1.0,0,-0.04783233627676964,-0.06155086634680629,0.0,0,0.10824154317378998,0.0125378766679205,4.0,26,55,81
+HRM,497,405054,transient_or_regressed,6,2,10416,8000,,,False,0.7530864477157593,0.045843131840229034,0.00037655047344742343,-0.021539534907788038,4.0,0.08917054341873154,0,-0.09706346690654755,-0.11432321928441525,0.0,0,-0.13380873203277588,-0.17578350938856602,0.0,0,-0.08403223007917404,-0.1406553601846099,0.0,1,-0.2162047028541565,-0.23577753640711308,0.0,1,-0.1634335219860077,-0.1808729860931635,0.0,1,-0.2502889037132263,-0.27181072533130646,0.0,1,-0.1427784413099289,-0.17730609141290188,0.0,1,-0.11972349882125854,-0.1402463372796774,0.0,1,-0.17638954520225525,-0.20459843426942825,0.0,0,0.045843131840229034,0.00037655047344742343,4.0,25,56,81
+HRM,498,413485,never,0,0,,,,,False,0.6419752836227417,0.04346686229109764,-0.04269930312875658,-0.077152905985713,1.0,0.04346686229109764,0,-0.11933266371488571,-0.1268499195575714,0.0,0,-0.12310458719730377,-0.1559494249522686,0.0,0,-0.10173164308071136,-0.16946199536323547,0.0,0,-0.008985605090856552,-0.07346580224111676,0.0,0,0.056249432265758514,-0.052620056783780456,1.0,0,-0.003987458534538746,-0.04410915414337069,0.0,0,0.016503840684890747,-0.02841712487861514,1.0,0,-0.007351515349000692,-0.06581569270929322,0.0,0,-0.025389481335878372,-0.05410643154755235,0.0,0,0.04346686229109764,-0.04269930312875658,1.0,25,56,81
+HRM,499,154260,stable_learned,6,1,13020,10000,13020,10000,True,1.0,-0.10092122852802277,-0.18217395432293415,-0.22118183597922325,0.0,0.0,0,-0.07039298862218857,-0.09659343864768744,0.0,0,-0.05131688341498375,-0.18044494604691863,0.0,0,-0.1000865027308464,-0.13354728184640408,0.0,0,-0.07338738441467285,-0.12642989866435528,0.0,1,-0.18313613533973694,-0.2103094421327114,0.0,1,-0.21381041407585144,-0.25229855813086033,0.0,1,-0.06616924703121185,-0.147238502278924,0.0,1,-0.05174446851015091,-0.12348886206746101,0.0,1,-0.11140229552984238,-0.17534588743001223,0.0,1,-0.10092122852802277,-0.18217395432293415,0.0,28,53,81
+HRM,500,320091,transient_or_regressed,6,2,10416,8000,,,False,0.7654321193695068,0.02071388252079487,-0.021625153152854182,-0.044418989680707455,3.0,0.0328401931328699,0,-0.08948750793933868,-0.11582124698907137,0.0,0,-0.07001768052577972,-0.11283621843904257,0.0,0,-0.12778577208518982,-0.13964240811765194,0.0,1,-0.12697437405586243,-0.19789172522723675,0.0,1,-0.18121427297592163,-0.21965575218200684,0.0,1,-0.05538561940193176,-0.09203711524605751,0.0,1,0.034868817776441574,-0.05087925266707316,2.0,1,-0.03751923888921738,-0.0721620349213481,0.0,1,-0.04562965780496597,-0.07647599373012781,0.0,0,0.02071388252079487,-0.021625153152854182,3.0,17,64,81
+HRM,501,77218,stable_learned,5,1,15624,12000,15624,12000,True,1.0,-0.16132943332195282,-0.17849175818264484,-0.187473826110363,0.0,0.0,0,-0.06469953060150146,-0.1044229632243514,0.0,0,-0.15782958269119263,-0.17137881740927696,0.0,0,-0.1254902333021164,-0.16139857284724712,0.0,0,-0.03307043015956879,-0.07807084126397967,0.0,0,0.053403131663799286,-0.015364357852376997,2.0,1,-0.21390490233898163,-0.2821602988988161,0.0,1,0.06954841315746307,-0.08224427746608853,1.0,1,-0.07417292892932892,-0.12611354794353247,0.0,1,-0.12110134959220886,-0.1677328385412693,0.0,1,-0.16132943332195282,-0.17849175818264484,0.0,26,55,81
+HRM,502,135121,flaky_then_stable,3,5,10416,8000,26040,20000,True,1.0,-0.11534428596496582,-0.15978710539638996,-0.1790497563779354,0.0,0.0,0,-0.10722170770168304,-0.11355260107666254,0.0,0,-0.12657351791858673,-0.16600759141147137,0.0,0,-0.09971451759338379,-0.139769216068089,0.0,1,-0.18508493900299072,-0.21146275848150253,0.0,0,-0.046972066164016724,-0.06648523034527898,0.0,0,0.09228010475635529,0.02723345441336278,5.0,0,0.022687409073114395,-0.04733815378858708,1.0,1,-0.14084066450595856,-0.15073926374316216,0.0,0,-0.11955656111240387,-0.18519687838852406,0.0,1,-0.11534428596496582,-0.15978710539638996,0.0,23,58,81
+HRM,503,338595,never,0,0,,,,,False,0.7283950448036194,0.06656619161367416,-0.01519803240080364,-0.03783449390903115,2.0,0.06953549548052251,0,-0.10425515472888947,-0.11318362317979336,0.0,0,-0.10892923921346664,-0.15489273611456156,0.0,0,-0.12093661725521088,-0.16424579173326492,0.0,0,-0.1819339096546173,-0.19671879336237907,0.0,0,-0.11108314990997314,-0.13632484432309866,0.0,0,-0.08781084418296814,-0.10995653457939625,0.0,0,0.0036048544570803642,-0.05122899610432796,1.0,0,-0.06365340948104858,-0.10075615718960762,0.0,0,-0.059611983597278595,-0.08540149591863155,0.0,0,0.06656619161367416,-0.01519803240080364,2.0,24,57,81
+HRM,504,223626,flaky_then_stable,7,5,5208,4000,18228,14000,True,1.0,-0.18137191236019135,-0.20019322261214256,-0.21192951500415802,0.0,0.0,0,-0.07403755933046341,-0.09759374801069498,0.0,1,-0.08760030567646027,-0.19957485981285572,0.0,0,-0.11459815502166748,-0.18149464949965477,0.0,1,-0.2296346127986908,-0.26560376212000847,0.0,1,-0.09905475378036499,-0.1464576032012701,0.0,0,0.0434500053524971,-0.009746083756908774,2.0,1,-0.0947001576423645,-0.16643235180526972,0.0,1,0.050129540264606476,-0.13807897735387087,1.0,1,-0.022709723562002182,-0.21259698225185275,0.0,1,-0.18137191236019135,-0.20019322261214256,0.0,27,54,81
+HRM,505,399325,transient_or_regressed,1,2,18228,14000,,,False,0.654321014881134,0.051410041749477386,-0.022303637582808733,-0.05148208513855934,2.0,0.07088364288210869,0,-0.09159218519926071,-0.10600242670625448,0.0,0,-0.09336872398853302,-0.14257463160902262,0.0,0,-0.034283582121133804,-0.12945155007764697,0.0,0,-0.036588188260793686,-0.08486884878948331,0.0,0,0.051544189453125,0.0005002142424928024,4.0,0,0.0993444174528122,0.001444258145056665,3.0,1,0.0037862597964704037,-0.05872646189527586,1.0,0,-0.020620211958885193,-0.04299960285425186,0.0,0,0.03532106801867485,-0.05419323453679681,1.0,0,0.051410041749477386,-0.022303637582808733,2.0,25,56,81
+HRM,506,316235,never,0,0,,,,,False,0.40740740299224854,0.000406712933909148,-0.028804221459722612,-0.04844156978651881,1.0,0.000406712933909148,0,-0.10285036265850067,-0.12169438041746616,0.0,0,-0.14723145961761475,-0.16341466642916203,0.0,0,-0.12058626115322113,-0.15744062699377537,0.0,0,-0.16411451995372772,-0.17436087131500244,0.0,0,-0.07877010852098465,-0.1199066024273634,0.0,0,-0.03872311860322952,-0.09291246440261602,0.0,0,-0.04819793254137039,-0.07170997885987163,0.0,0,-0.036431364715099335,-0.06977341929450631,0.0,0,0.058381348848342896,-0.045532298274338245,2.0,0,0.000406712933909148,-0.028804221459722612,1.0,24,57,81
+HRM,507,30545,never,0,0,,,,,False,0.6296296119689941,0.04296673461794853,-0.036106481682509184,-0.07610301673412323,2.0,0.08506597951054573,0,-0.09974735230207443,-0.1197953075170517,0.0,0,-0.13289298117160797,-0.16007442213594913,0.0,0,-0.12349866330623627,-0.17258016485720873,0.0,0,-0.181593120098114,-0.19118044339120388,0.0,0,-0.038620129227638245,-0.10459765139967203,0.0,0,-0.046879302710294724,-0.07786989258602262,0.0,0,-0.009983161464333534,-0.045233121840283275,0.0,0,0.015801675617694855,-0.06264863256365061,1.0,0,-0.03956390917301178,-0.06674491195008159,0.0,0,0.04296673461794853,-0.036106481682509184,2.0,25,56,81
+HRM,508,3498,never,0,0,,,,,False,0.5679012537002563,-0.00945576187223196,-0.04398374573793262,-0.05530452262610197,0.0,0.0,0,-0.10951343178749084,-0.12031061667948961,0.0,0,-0.09015395492315292,-0.15785311628133059,0.0,0,-0.11939899623394012,-0.1550031714141369,0.0,0,-0.10106180608272552,-0.14507259242236614,0.0,0,-0.03963102027773857,-0.10457599675282836,0.0,0,-0.059414975345134735,-0.09397546481341124,0.0,0,-0.029369262978434563,-0.06302823848091066,0.0,0,-0.04784463346004486,-0.07225900841876864,0.0,0,0.013578067533671856,-0.05344315443653613,1.0,0,-0.00945576187223196,-0.04398374573793262,0.0,22,59,81
+HRM,509,205267,never,0,0,,,,,False,0.6172839403152466,0.052028998732566833,-0.027059479849413037,-0.05953727010637522,2.0,0.056767482310533524,0,-0.10411148518323898,-0.12177819758653641,0.0,0,-0.1688426434993744,-0.19115813821554184,0.0,0,-0.1558113843202591,-0.18460077233612537,0.0,0,-0.17959468066692352,-0.18809138610959053,0.0,0,-0.025268156081438065,-0.09524528542533517,0.0,0,-0.04022834822535515,-0.07350675296038389,0.0,0,-0.001405079266987741,-0.06134609463333618,0.0,0,0.028859712183475494,-0.06274450267665088,1.0,0,-0.015530123375356197,-0.0598998119821772,0.0,0,0.052028998732566833,-0.027059479849413037,2.0,24,57,81
+HRM,510,89149,transient_or_regressed,1,2,23436,18000,,,False,0.5061728358268738,-0.021405911073088646,-0.04279369115829468,-0.060500260442495346,0.0,0.0,0,-0.11336987465620041,-0.13140498101711273,0.0,0,-0.028146963566541672,-0.18814088916406035,0.0,0,-0.007500344887375832,-0.13681469787843525,0.0,0,-0.06373114138841629,-0.10870589967817068,0.0,0,0.013390859588980675,-0.08093815133906901,1.0,0,0.09015162289142609,-0.023751536791678518,1.0,0,0.08435212075710297,-0.02105990145355463,2.0,0,0.038201428949832916,-0.04220791766420007,1.0,1,-0.15750986337661743,-0.17895638197660446,0.0,0,-0.021405911073088646,-0.04279369115829468,0.0,24,57,81
+HRM,511,6979,never,0,0,,,,,False,0.6666666865348816,0.006250386592000723,-0.03172801557229832,-0.05869694706052542,2.0,0.012284933123737574,0,-0.09556803107261658,-0.12902694568037987,0.0,0,-0.10373257100582123,-0.15324321389198303,0.0,0,-0.14030563831329346,-0.1682114116847515,0.0,0,-0.04028314724564552,-0.1488286596722901,0.0,0,-0.03599575534462929,-0.1237199273891747,0.0,0,-0.0022613590117543936,-0.060907555947778746,0.0,0,0.005064963363111019,-0.0357861464144662,1.0,0,-0.035023316740989685,-0.08527762722223997,0.0,0,0.029115600511431694,-0.06177597143687308,1.0,0,0.006250386592000723,-0.03172801557229832,2.0,24,57,81
diff --git a/problem_tracks/trm_learning_events.csv b/problem_tracks/trm_learning_events.csv
new file mode 100644
index 0000000..717f939
--- /dev/null
+++ b/problem_tracks/trm_learning_events.csv
@@ -0,0 +1,37 @@
+kind,from_step,to_step,from_epoch,to_epoch,event,n,from_token_acc_mean,to_token_acc_mean,lambda_max_before,lambda_max_after,lambda_max_delta,mean8_before,mean8_after,mean8_delta,tail_mean_5_8_before,tail_mean_5_8_after,tail_mean_5_8_delta,positive_count_before,positive_count_after,positive_count_delta,positive_sum_before,positive_sum_after,positive_sum_delta
+TRM,26041,52082,5000,10000,learned_fail_to_success,61,0.6844768270117337,1.0,0.04095346362856751,0.01130250356299421,-0.0296509600655733,0.015569461502346697,-0.0039388805430984365,-0.019508342045445135,0.00450889594701978,-0.009700431184056902,-0.014209327131076683,5.934426229508197,2.0163934426229506,-3.918032786885246,0.14043893001830018,0.027653619372805954,-0.11278531064549423
+TRM,26041,52082,5000,10000,lost_success_to_fail,24,1.0,0.7021604975064596,-0.017263541842112318,0.06876462713504831,0.08602816897716063,-0.03177595781517084,0.03872489225962757,0.07050085007479841,-0.03782351667450712,0.025608245707492944,0.06343176238200006,0.7916666666666666,7.875,7.083333333333333,0.00850467175647888,0.31008046285569435,0.30157579109921545
+TRM,26041,52082,5000,10000,stayed_failure,156,0.6374643859572899,0.6431623934171139,0.031975116116555936,0.05863720388151705,0.02666208776496112,0.010370061944551404,0.03239625788568773,0.022026195941136324,0.0011294046405276813,0.02098523936449335,0.01985583472396567,5.384615384615385,7.923076923076923,2.5384615384615383,0.10369389380041237,0.2593554582230214,0.15566156442260903
+TRM,26041,52082,5000,10000,stayed_success,271,1.0,1.0,-0.03639817160659047,-0.0026813522725212696,0.0337168193340692,-0.04572985614410462,-0.010540279390967392,0.03518957675313723,-0.0497526878224062,-0.01376034084844453,0.03599234697396168,0.11439114391143912,0.7564575645756457,0.6420664206642066,0.0016417544531873067,0.007957042507742226,0.006315288054554919
+TRM,52082,78123,10000,15000,learned_fail_to_success,45,0.6886145393053691,1.0,0.06373828107284175,0.01469134650590907,-0.04904693456693268,0.036088030263393495,-0.01778541501991842,-0.053873445283311915,0.0238580148266111,-0.026259723155009043,-0.05011773798162014,8.0,1.7333333333333334,-6.266666666666667,0.28870424210714796,0.0387454634739293,-0.24995877863321866
+TRM,52082,78123,10000,15000,lost_success_to_fail,25,1.0,0.6765432119369507,0.013729429183294997,0.08733505010604858,0.07360562092275359,-0.0023064174062164965,0.051225730990990995,0.05353214839720749,-0.008355203102109954,0.036106420699507,0.04446162380161695,2.24,8.0,5.76,0.04125833526602946,0.40980584792792796,0.3685475126618985
+TRM,52082,78123,10000,15000,stayed_failure,135,0.6385002299591347,0.63950617313385,0.05873727561836993,0.08107733505743521,0.02234005943906528,0.03229075764848622,0.04857104088231507,0.01628028323382885,0.020849515338098475,0.03474471943832382,0.013895204100225345,7.888888888888889,8.0,0.1111111111111111,0.25859030886300993,0.38856832705852057,0.12997801819551064
+TRM,52082,78123,10000,15000,stayed_success,307,1.0,1.0,-0.0012391839677296163,-0.0045661997550452684,-0.003327015787315652,-0.009899110726142527,-0.032767963419847765,-0.022868852693705238,-0.013393806497078794,-0.03904287366627904,-0.025649067169200245,0.8859934853420195,0.504885993485342,-0.3811074918566775,0.00915886293058166,0.007548392093377316,-0.001610470837204344
+TRM,78123,104164,15000,20000,learned_fail_to_success,42,0.6748971215316227,1.0,0.08431322872638702,0.032027611919190915,-0.05228561680719611,0.05053495965521073,0.006543990983897986,-0.04399096867131275,0.03605126524122343,-0.0033311979622383313,-0.03938246320346176,8.0,3.6904761904761907,-4.309523809523809,0.40427967724168584,0.10858904316375542,-0.29569063407793045
+TRM,78123,104164,15000,20000,lost_success_to_fail,30,1.0,0.6847736616929372,0.011231523806297142,0.09582423393925031,0.08459271013295316,-0.020423608111771803,0.05470764508160452,0.07513125319337632,-0.028769394504100394,0.03790804650634527,0.06667744101044566,1.4333333333333333,8.0,6.566666666666666,0.028053055804533263,0.43766116065283617,0.4096081048483029
+TRM,78123,104164,15000,20000,stayed_failure,118,0.6347562251454693,0.6352793450577784,0.08125136338048063,0.0946442298591137,0.013392866478633072,0.048434453291258124,0.05644692583463423,0.008012472543376102,0.034568173741779715,0.04056167824912059,0.00599350450734087,8.0,8.0,0.0,0.387475626330065,0.45157540667707385,0.06409978034700882
+TRM,78123,104164,15000,20000,stayed_success,322,1.0,1.0,-0.0033467653609375882,-0.0028673611494872695,0.000479404211450319,-0.03182422423116906,-0.01583997491176961,0.01598424931939945,-0.038213564976397704,-0.020403333244893535,0.017810231731504166,0.5900621118012422,0.7204968944099379,0.13043478260869565,0.009997858866017569,0.013265862348969405,0.0032680034829518376
+TRM,104164,130205,20000,25000,learned_fail_to_success,50,0.6804938280582428,1.0,0.09401120468974114,0.03770299475596403,-0.05630820993377711,0.055930792740546166,0.012810704448684191,-0.04312008829186197,0.03986294572241604,0.0028754143451988055,-0.03698753137721724,8.0,5.06,-2.94,0.44744634192436933,0.1199192828458763,-0.32752705907849305
+TRM,104164,130205,20000,25000,lost_success_to_fail,27,1.0,0.6575217269085072,0.0314528225765874,0.10427529282040066,0.07282247024381326,0.0068112800104750525,0.05774478058123754,0.050933500570762485,-0.002804141968630979,0.039058988379245554,0.04186313034787653,3.5185185185185186,8.0,4.481481481481482,0.11630381328804004,0.4619582446499003,0.3456544313618603
+TRM,104164,130205,20000,25000,stayed_failure,98,0.6273620527009575,0.6210632409368243,0.0953284276231211,0.10753870945499867,0.012210281831877572,0.05617782616232313,0.0605833601475009,0.004405533985177778,0.040105838188426376,0.04148375476967087,0.001377916581244493,8.0,8.0,0.0,0.449422609298585,0.4846668811800072,0.03524427188142222
+TRM,104164,130205,20000,25000,stayed_success,337,1.0,1.0,-0.0012681210655689679,0.010519292579277294,0.011787413644846262,-0.014865070802815804,-0.0011317419686626743,0.01373332883415313,-0.01968567295583588,-0.00581690880209499,0.013868764153740892,0.8664688427299704,2.0178041543026706,1.1513353115727003,0.016890636588928174,0.028340769676497318,0.011450133087569142
+TRM,130205,156246,25000,30000,learned_fail_to_success,36,0.6615226318438848,1.0,0.10779489700992902,0.0496451583728938,-0.05814973863703522,0.060211614615076944,0.0020279816988590937,-0.05818363291621785,0.040751232791485056,-0.014924997131149413,-0.05567622992263447,8.0,3.9166666666666665,-4.083333333333333,0.48169291692061555,0.11456112468239452,-0.36713179223822107
+TRM,130205,156246,25000,30000,lost_success_to_fail,35,1.0,0.6684303368840899,0.04063011546220098,0.11092394015618733,0.07029382469398635,0.013878692329477157,0.06066181986326618,0.04678312753378902,0.0034880245831248303,0.04024732473439404,0.03675930015126921,4.771428571428571,8.0,3.2285714285714286,0.13045002815852058,0.48529455890612944,0.35484453074760886
+TRM,130205,156246,25000,30000,stayed_failure,89,0.6157580841793103,0.6254681650172459,0.1064450577571151,0.10696759694412852,0.0005225391870134332,0.05987258701130036,0.059057195764427396,-0.0008153912468729645,0.04104445250757206,0.03980186048848031,-0.0012425920190917475,8.0,8.0,0.0,0.4789806960904029,0.47245756611541917,-0.006523129974983716
+TRM,130205,156246,25000,30000,stayed_success,352,1.0,1.0,0.011386640044993224,0.03174139531593441,0.020354755270941185,-0.0006437956037977619,-0.019161262890415157,-0.018517467286617396,-0.005507410254134774,-0.03439320480515045,-0.028885794551015677,2.1761363636363638,1.5028409090909092,-0.6732954545454546,0.031196171982173837,0.04352239211735051,0.012326220135176673
+TRM,156246,182287,30000,35000,learned_fail_to_success,34,0.691721136079115,1.0,0.10662735976717051,0.040389080076505816,-0.0662382796906647,0.05807228958206799,-0.005224973876813558,-0.06329726345888155,0.03843013702563065,-0.01886841586255126,-0.05729855288818191,8.0,2.3529411764705883,-5.647058823529412,0.4645783166565439,0.0783735490986146,-0.3862047675579293
+TRM,156246,182287,30000,35000,lost_success_to_fail,42,1.0,0.6590241066047123,0.05099954224472943,0.10153728688047045,0.050537744635741025,0.001467981777669333,0.04981639054208894,0.048348408764419606,-0.015194870930394072,0.02946084769584039,0.044655718626234464,3.619047619047619,7.976190476190476,4.357142857142857,0.10997597763036451,0.3985460633134824,0.2885700856831179
+TRM,156246,182287,30000,35000,stayed_failure,90,0.617146776119868,0.6224965714746051,0.10863470890455776,0.1012288083632787,-0.007405900541279051,0.06005329191620048,0.049981245352602045,-0.010072046563598431,0.040493303225634415,0.029898996461027613,-0.010594306764606801,8.0,8.0,0.0,0.4804263353296038,0.39984996282081636,-0.08057637250878745
+TRM,156246,182287,30000,35000,stayed_success,346,1.0,1.0,0.03126652045767183,0.005291063447186988,-0.025975457010484843,-0.019460729569159885,-0.020014630322294655,-0.0005539007531347702,-0.03469804453484909,-0.027679566077947228,0.007018478456901866,1.4971098265895955,0.7543352601156069,-0.7427745664739884,0.042847085125428536,0.022412010842762845,-0.020435074282665687
+TRM,182287,208328,35000,40000,learned_fail_to_success,38,0.6591942835795251,1.0,0.1029400615707824,0.0570142870082667,-0.04592577456251571,0.050415613402851465,0.011271269248601655,-0.03914434415424981,0.02967732811904218,-0.004436708284342067,-0.03411403640338425,8.0,4.657894736842105,-3.3421052631578947,0.4033249072228117,0.13436546987328543,-0.26895943734952626
+TRM,182287,208328,35000,40000,lost_success_to_fail,34,1.0,0.670297747149187,0.04045343530039164,0.10570072382688522,0.06524728852649359,-0.0032343704180963118,0.05597326002658948,0.059207630444685785,-0.017199626846430713,0.035191281082327754,0.052390907928758466,2.6176470588235294,8.0,5.382352941176471,0.08064539896464767,0.4477860802127158,0.36714068124806815
+TRM,182287,208328,35000,40000,stayed_failure,94,0.6239821397243662,0.6187286605226233,0.10067485597856501,0.10716580845257069,0.006490952474005679,0.04973199122546345,0.05547681558775676,0.005744824362293312,0.029792838470150837,0.03494465546162324,0.005151816991472399,7.98936170212766,8.0,0.010638297872340425,0.39786260466566903,0.44381452470205407,0.045951920036385034
+TRM,182287,208328,35000,40000,stayed_success,346,1.0,1.0,0.005284739523452543,0.03552449149677848,0.030239751973325932,-0.020210238754654154,-0.004939072638686333,0.01527116611596782,-0.027843551125774104,-0.016626303564166032,0.011217247561608072,0.7283236994219653,2.0838150289017343,1.3554913294797688,0.022188765480204684,0.050417376088078246,0.02822861060787356
+TRM,208328,234369,40000,45000,learned_fail_to_success,45,0.6496570653385586,1.0,0.10920602844821083,0.04854822551779863,-0.060657802930412195,0.05672489060088992,0.008596817297721928,-0.04812807330316799,0.03611175337185462,-0.004522915925513694,-0.040634669297368316,8.0,4.177777777777778,-3.8222222222222224,0.45379912480711937,0.12477201784753965,-0.3290271069595797
+TRM,208328,234369,40000,45000,lost_success_to_fail,33,1.0,0.6352413098017374,0.062286600188323944,0.10571520346583742,0.043428603277513474,0.01137912593394779,0.052789654194687806,0.041410528260740015,-0.005373023553560911,0.031694172147774334,0.037067195701335244,4.212121212121212,8.0,3.787878787878788,0.14217090340124824,0.42231723355750245,0.2801463301562542
+TRM,208328,234369,40000,45000,stayed_failure,83,0.6230849343610098,0.6302246002547712,0.10545950993356934,0.10543236639126237,-2.7143542306968965e-05,0.05500351119413411,0.052311619395861424,-0.002691891798272684,0.034412918897087585,0.0311619305242616,-0.003250988372825984,8.0,8.0,0.0,0.4400280895530729,0.4184929551668914,-0.021535134386181473
+TRM,208328,234369,40000,45000,stayed_success,351,1.0,1.0,0.03533492637602506,0.014196671783756859,-0.021138254592268202,-0.004718296459711924,-0.014446210490412838,-0.009727914030700913,-0.016364632965068193,-0.022218270485360785,-0.005853637520292594,2.1623931623931623,1.1111111111111112,-1.0512820512820513,0.050879373702047655,0.02393511410796923,-0.026944259594078424
+TRM,234369,260410,45000,50000,learned_fail_to_success,35,0.6613756656646729,1.0,0.10479770622083119,0.029873117019035687,-0.0749245892017955,0.05238420652957367,0.006787718061972165,-0.045596488467601504,0.03146511843827154,-0.001710809776568567,-0.033175928214840104,8.0,4.057142857142857,-3.942857142857143,0.4190736522365894,0.07165596114105678,-0.3474176910955326
+TRM,234369,260410,45000,50000,lost_success_to_fail,37,1.0,0.6593259943498148,0.04475835560997193,0.10352358201871047,0.05876522640873854,0.0072115822668648265,0.05163027802671029,0.044418695759845465,-0.0057618086022950986,0.030947812927282742,0.03670962152957784,4.081081081081081,7.972972972972973,3.891891891891892,0.11042228619555465,0.4130832626408822,0.3026609764453275
+TRM,234369,260410,45000,50000,stayed_failure,81,0.6188081079795037,0.6168267027831372,0.10582183218664593,0.10270964078329227,-0.003112191403353656,0.05247500950303848,0.050216694190713035,-0.0022583153123254484,0.031247762580873605,0.030064451486665617,-0.001183311094207988,8.0,8.0,0.0,0.41980007602430786,0.4017335535257043,-0.018066522498603587
+TRM,234369,260410,45000,50000,stayed_success,359,1.0,1.0,0.01535276542292656,0.011521664883778544,-0.003831100539148015,-0.01378994887914044,0.0011695116181482481,0.014959460497288688,-0.021696259717896474,-0.002636141539739297,0.019060118178157174,1.1894150417827298,3.097493036211699,1.9080779944289694,0.027661117732036104,0.025961193830398188,-0.0016999239016379185
diff --git a/problem_tracks/trm_learning_groups.csv b/problem_tracks/trm_learning_groups.csv
new file mode 100644
index 0000000..a83c198
--- /dev/null
+++ b/problem_tracks/trm_learning_groups.csv
@@ -0,0 +1,6 @@
+kind,group,n,fraction,givens_mean,final_success_rate,final_token_acc_mean,initial_lambda_max,final_lambda_max,initial_mean8,final_mean8,initial_tail_mean_5_8,final_tail_mean_5_8,initial_positive_count,final_positive_count,initial_positive_sum,final_positive_sum
+TRM,always,250,0.48828125,25.692,1.0,1.0,-0.037265183098614214,0.006809890952470596,-0.04636771480855532,-0.0004276458734017012,-0.05030216554750223,-0.0029380860021610716,0.116,2.804,0.001749304014723748,0.012296678611861353
+TRM,flaky_then_stable,108,0.2109375,24.537037037037038,1.0,1.0,0.012910711238402183,0.02768854924549184,-0.006070692985447211,0.006526719410433973,-0.014126331651604469,-0.0014988291085926532,3.3981481481481484,4.074074074074074,0.06504391207292631,0.07025124873246534
+TRM,never,38,0.07421875,24.526315789473685,0.0,0.5925925891650351,0.03881909165188278,0.10224098241642902,0.014017523774256054,0.04867523749463113,0.0033364049057509923,0.028709458999066174,5.894736842105263,8.0,0.12778331804943982,0.38940189995704905
+TRM,stable_learned,36,0.0703125,25.22222222222222,1.0,1.0,0.04277012761708142,0.013583353675332747,0.0174149208698648,0.0016514048641056434,0.0066068671405522155,-0.003051614185500916,6.388888888888889,3.138888888888889,0.14709951728016474,0.03240896413828725
+TRM,transient_or_regressed,80,0.15625,24.375,0.0,0.6479938291013241,0.0264392346231034,0.10330870132893324,0.006250166277587255,0.05160266864550067,-0.002536980269991318,0.031116627584560776,5.025,7.9875,0.0972329065928534,0.41284032943658533
diff --git a/problem_tracks/trm_problem_tracks.csv b/problem_tracks/trm_problem_tracks.csv
new file mode 100644
index 0000000..0322a8b
--- /dev/null
+++ b/problem_tracks/trm_problem_tracks.csv
@@ -0,0 +1,513 @@
+kind,panel_row,test_idx,group,n_success_checkpoints,transitions,first_success_step,first_success_epoch,stable_success_step,stable_success_epoch,final_success,final_token_acc,final_lambda_max,final_mean8,final_tail_mean_5_8,final_positive_count,final_positive_sum,success@26041,lambda_max@26041,mean8@26041,positive_count@26041,success@52082,lambda_max@52082,mean8@52082,positive_count@52082,success@78123,lambda_max@78123,mean8@78123,positive_count@78123,success@104164,lambda_max@104164,mean8@104164,positive_count@104164,success@130205,lambda_max@130205,mean8@130205,positive_count@130205,success@156246,lambda_max@156246,mean8@156246,positive_count@156246,success@182287,lambda_max@182287,mean8@182287,positive_count@182287,success@208328,lambda_max@208328,mean8@208328,positive_count@208328,success@234369,lambda_max@234369,mean8@234369,positive_count@234369,success@260410,lambda_max@260410,mean8@260410,positive_count@260410,givens,blanks,label_nonzero
+TRM,0,1156,always,10,0,26041,5000,26041,5000,True,1.0,0.006261719390749931,-0.00019785021504503675,-0.0021421986166387796,2.0,0.007990005775354803,1,-0.007432084064930677,-0.021371259645093232,0.0,1,-0.006981610786169767,-0.014650021039415151,0.0,1,0.008165371604263783,-0.015432298532687128,1.0,1,-0.019545702263712883,-0.022597349947318435,0.0,1,-0.0016457129968330264,-0.005993720129481517,0.0,1,0.03514038026332855,-0.025738748139701784,1.0,1,-0.022833263501524925,-0.027487563667818904,0.0,1,0.05641128495335579,-0.0017058413359336555,2.0,1,0.02671116031706333,-0.01637521106749773,1.0,1,0.006261719390749931,-0.00019785021504503675,2.0,26,55,81
+TRM,1,154226,flaky_then_stable,9,2,26041,5000,130205,25000,True,1.0,-0.003129795892164111,-0.005875314614968374,-0.007357077091000974,0.0,0.0,1,-0.04258984699845314,-0.04651858797296882,0.0,1,0.017818568274378777,-0.003897864546161145,2.0,1,-0.0076691466383636,-0.03812172677135095,0.0,0,0.10470711439847946,0.05623563611879945,8.0,1,0.03186999261379242,0.010755908122519031,6.0,1,0.05626671016216278,-0.006548094796016812,2.0,1,0.02044833078980446,-0.013509721524314955,1.0,1,0.003520330647006631,-0.013241372929769568,1.0,1,0.015369764529168606,-0.007299313321709633,2.0,1,-0.003129795892164111,-0.005875314614968374,0.0,25,56,81
+TRM,2,303891,never,0,0,,,,,False,0.5432098507881165,0.09316887706518173,0.04491599043831229,0.02652275189757347,8.0,0.35932792350649834,0,0.05498528108000755,0.014848684761091135,5.0,0,0.09076666086912155,0.05310910474509001,8.0,0,0.08551380038261414,0.04787771753035486,8.0,0,0.09942676872015,0.05359577643685043,8.0,0,0.09060579538345337,0.06244437349960208,8.0,0,0.07361156493425369,0.042246295837685466,8.0,0,0.09583936631679535,0.03698386927135289,8.0,0,0.0801393985748291,0.04533419094514102,8.0,0,0.09757068008184433,0.044286479242146015,8.0,0,0.09316887706518173,0.04491599043831229,8.0,26,55,81
+TRM,3,222053,always,10,0,26041,5000,26041,5000,True,1.0,0.007307944819331169,0.0015346816107921768,-6.513459084089845e-06,6.0,0.013604309759102762,1,-0.010886727832257748,-0.02834562410134822,0.0,1,0.002137027680873871,-0.012261236784979701,1.0,1,-0.0012447952758520842,-0.03669428694411181,0.0,1,-0.018220817670226097,-0.0232099078129977,0.0,1,0.0015099969459697604,-0.006017143852659501,1.0,1,0.003501614322885871,-0.03362909590941854,1.0,1,-0.01329417061060667,-0.025814907625317574,0.0,1,0.015296523459255695,-0.010488703730516136,1.0,1,0.024668574333190918,-0.01691429945640266,1.0,1,0.007307944819331169,0.0015346816107921768,6.0,26,55,81
+TRM,4,363763,transient_or_regressed,4,2,130205,25000,,,False,0.654321014881134,0.11888653039932251,0.06039979285560548,0.037642091047018766,8.0,0.48319834284484386,0,0.05615077167749405,0.0288021833403036,8.0,0,0.09579379111528397,0.05860836571082473,8.0,0,0.10158418864011765,0.05414091609418392,8.0,0,0.09205789119005203,0.058108913246542215,8.0,1,0.10870009660720825,0.05845570261590183,8.0,1,0.11822271347045898,0.05059026228263974,8.0,1,0.05990901589393616,0.01882391844992526,6.0,1,0.037786759436130524,0.00885379989631474,5.0,0,0.09718281030654907,0.04559142654761672,8.0,0,0.11888653039932251,0.06039979285560548,8.0,26,55,81
+TRM,5,179627,always,10,0,26041,5000,26041,5000,True,1.0,0.0036104507744312286,-0.0026662608943297528,-0.0047467858530581,1.0,0.0036104507744312286,1,-0.03550131618976593,-0.04685069574043155,0.0,1,-0.012081276625394821,-0.017143636709079146,0.0,1,-0.019132744520902634,-0.042592484038323164,0.0,1,-0.005833679344505072,-0.022204091248568147,0.0,1,-0.003034139983355999,-0.007082004187395796,0.0,1,0.02881353348493576,-0.03680282412096858,1.0,1,-0.018587470054626465,-0.028278885642066598,0.0,1,0.04161381348967552,-0.008778129471465945,1.0,1,-0.002511553466320038,-0.019693084061145782,0.0,1,0.0036104507744312286,-0.0026662608943297528,1.0,25,56,81
+TRM,6,37589,always,10,0,26041,5000,26041,5000,True,1.0,0.014025895856320858,0.0024545961496187374,7.042373181320727e-05,7.0,0.02084470132831484,1,-0.024549104273319244,-0.037975293351337314,0.0,1,-0.0005101070855744183,-0.00781063119211467,0.0,1,-0.006555062253028154,-0.040147141146007925,0.0,1,-0.00439834501594305,-0.01622978155501187,0.0,1,6.176624447107315e-05,-0.004186107469649869,1.0,1,-0.005535021889954805,-0.031469191948417574,0.0,1,-0.0226890966296196,-0.030005630338564515,0.0,1,0.020611053332686424,-0.009529400820611045,3.0,1,0.0010629260214045644,-0.021735565256676637,1.0,1,0.014025895856320858,0.0024545961496187374,7.0,27,54,81
+TRM,7,200316,transient_or_regressed,2,4,104164,20000,,,False,0.654321014881134,0.12140269577503204,0.04932845290750265,0.024845009669661522,8.0,0.3946276232600212,0,0.01633533276617527,-0.004121059784665704,2.0,0,0.051447056233882904,0.022998742701020092,8.0,0,0.08013887703418732,0.04984955117106438,8.0,1,0.03076753206551075,0.00027653957658912987,3.0,0,0.11281942576169968,0.06445710267871618,8.0,1,0.08425885438919067,0.01790530885773478,4.0,0,0.10451790690422058,0.04628661088645458,8.0,0,0.09872143715620041,0.04775250772945583,8.0,0,0.09753701090812683,0.03915295621845871,8.0,0,0.12140269577503204,0.04932845290750265,8.0,26,55,81
+TRM,8,308059,flaky_then_stable,6,3,52082,10000,208328,40000,True,1.0,0.0033923655282706022,-0.0017585570749361068,-0.0036204314092174172,2.0,0.004422951838932931,0,0.06318797171115875,0.01435880440112669,5.0,1,-0.0013545802794396877,-0.007839117286494002,0.0,1,0.01908615045249462,-0.012402364809531718,1.0,1,-0.010291533544659615,-0.020134266349487007,0.0,0,0.09863433241844177,0.0633489079773426,8.0,0,0.10864094644784927,0.05900883628055453,8.0,0,0.12950918078422546,0.056678681867197156,8.0,1,0.03008337877690792,-0.014012786909006536,1.0,1,0.020343100652098656,-0.017462440649978817,1.0,1,0.0033923655282706022,-0.0017585570749361068,2.0,26,55,81
+TRM,9,83380,transient_or_regressed,3,4,130205,25000,,,False,0.654321014881134,0.12334837764501572,0.06765767792239785,0.04721271712332964,8.0,0.5412614233791828,0,0.025207746773958206,0.009430714111658745,7.0,0,0.033056993037462234,0.023118843557313085,8.0,0,0.050779201090335846,0.03913303930312395,8.0,0,0.09000235050916672,0.0688552986830473,8.0,1,0.009308685548603535,-0.0029880520014557987,2.0,1,0.03857949376106262,0.0025996715121436864,4.0,0,0.10085798799991608,0.05828439397737384,8.0,0,0.11706221103668213,0.06426165672019124,8.0,1,0.020331721752882004,-0.003383302624570206,2.0,0,0.12334837764501572,0.06765767792239785,8.0,24,57,81
+TRM,10,420254,transient_or_regressed,6,4,78123,15000,,,False,0.654321014881134,0.08357255160808563,0.04321214579977095,0.02702968055382371,8.0,0.3456971663981676,0,0.03855982795357704,0.025818412424996495,8.0,0,0.05877897888422012,0.03043198585510254,8.0,1,0.03474907949566841,-0.01225949649233371,1.0,1,0.0008194061811082065,-0.015842911474464927,1.0,1,0.08406517654657364,0.03960558888502419,8.0,0,0.09807900339365005,0.05523704062215984,8.0,1,-0.014896349050104618,-0.026173203601501882,0.0,1,0.04695337638258934,0.00987856279425614,4.0,1,0.09338291734457016,0.03654939332045615,8.0,0,0.08357255160808563,0.04321214579977095,8.0,24,57,81
+TRM,11,259891,flaky_then_stable,6,3,104164,20000,208328,40000,True,1.0,0.05110100284218788,0.014857629626931157,-0.0008129261696012691,6.0,0.12443357345182449,0,-0.015488697215914726,-0.020970238256268203,0.0,0,0.03567352145910263,0.0168415370862931,8.0,0,0.07142165303230286,0.03899579239077866,8.0,1,0.03157246485352516,0.005153424135642126,5.0,1,0.01921178586781025,0.0013786217677989043,3.0,1,0.08187287300825119,0.017969658219954,5.0,0,0.10155796259641647,0.04574026761110872,8.0,1,0.07528190314769745,0.01478674085228704,6.0,1,0.0008820609655231237,-0.015014216915005818,1.0,1,0.05110100284218788,0.014857629626931157,6.0,24,57,81
+TRM,12,194338,never,0,0,,,,,False,0.6419752836227417,0.10120008140802383,0.040023627458140254,0.017260119784623384,8.0,0.32018901966512203,0,-0.006921092048287392,-0.014255001093260944,0.0,0,0.031349994242191315,0.012222979756188579,7.0,0,0.09119066596031189,0.04937388235703111,8.0,0,0.11712923645973206,0.05751275643706322,8.0,0,0.1256898045539856,0.06297703995369375,8.0,0,0.10211918503046036,0.05468951165676117,8.0,0,0.09628421813249588,0.04114029195625335,8.0,0,0.11508936434984207,0.05340053327381611,8.0,0,0.1090230941772461,0.046853036154061556,8.0,0,0.10120008140802383,0.040023627458140254,8.0,24,57,81
+TRM,13,362081,always,10,0,26041,5000,26041,5000,True,1.0,-0.0023033057805150747,-0.004131321067688987,-0.005097253946587443,0.0,0.0,1,-0.03836342692375183,-0.049174942541867495,0.0,1,0.0013441088376566768,-0.004935988967190497,1.0,1,-0.02091839909553528,-0.03542912914417684,0.0,1,-0.019339343532919884,-0.022638587281107903,0.0,1,-0.004426061175763607,-0.00999411876546219,0.0,1,0.027841052040457726,-0.027820345014333725,1.0,1,-0.015576225705444813,-0.026839818223379552,0.0,1,0.07379674166440964,0.0023571341298520565,2.0,1,0.022374199703335762,-0.018796640913933516,1.0,1,-0.0023033057805150747,-0.004131321067688987,0.0,27,54,81
+TRM,14,18267,always,10,0,26041,5000,26041,5000,True,1.0,4.540309600997716e-05,-0.0008999938436318189,-0.0014848704158794135,1.0,4.540309600997716e-05,1,-0.059877652674913406,-0.06382079888135195,0.0,1,-0.012831909582018852,-0.01594528672285378,0.0,1,-0.029064297676086426,-0.039815871976315975,0.0,1,-0.023557476699352264,-0.028779699932783842,0.0,1,-0.0009060092270374298,-0.004522258415818214,0.0,1,-0.01936774142086506,-0.03689275379292667,0.0,1,-0.013386666774749756,-0.024643493117764592,0.0,1,0.0008958085090853274,-0.010119142876646947,1.0,1,-0.023058859631419182,-0.027245872654020786,0.0,1,4.540309600997716e-05,-0.0008999938436318189,1.0,34,47,81
+TRM,15,119070,always,10,0,26041,5000,26041,5000,True,1.0,0.03330904617905617,0.0014537751558236778,-0.0037428303039632738,1.0,0.03330904617905617,1,-0.04782940819859505,-0.0559598901309073,0.0,1,-0.01199688296765089,-0.014203674043528736,0.0,1,-0.02126593142747879,-0.04319651238620281,0.0,1,-0.003373011015355587,-0.018898227252066135,0.0,1,-0.004313125275075436,-0.007913499255664647,0.0,1,0.03372712433338165,-0.016638731329294387,2.0,1,-0.01422568317502737,-0.02622829412575811,0.0,1,0.0889613926410675,0.0017203285242430866,2.0,1,0.037715356796979904,-0.017942859325557947,1.0,1,0.03330904617905617,0.0014537751558236778,1.0,25,56,81
+TRM,16,152975,transient_or_regressed,1,2,234369,45000,,,False,0.6419752836227417,0.09565338492393494,0.04326039587613195,0.023427118314430118,8.0,0.3460831670090556,0,0.02175532467663288,0.006484614743385464,5.0,0,0.0520818792283535,0.0319441455649212,8.0,0,0.08536006510257721,0.05807051155716181,8.0,0,0.08455812186002731,0.05681967316195369,8.0,0,0.12751951813697815,0.06377243483439088,8.0,0,0.09545081108808517,0.051109621999785304,8.0,0,0.10050468146800995,0.0554558418225497,8.0,0,0.1050829142332077,0.05073009501211345,8.0,1,0.09530101716518402,0.024524553504306823,7.0,0,0.09565338492393494,0.04326039587613195,8.0,23,58,81
+TRM,17,172350,transient_or_regressed,2,4,104164,20000,,,False,0.7037037014961243,0.1256837397813797,0.05857978551648557,0.03760806890204549,8.0,0.4686382841318846,0,0.009947114624083042,-0.005293910272484936,2.0,0,0.03369177132844925,0.018499728525057435,8.0,0,0.07062448561191559,0.048060381319373846,8.0,1,0.02509929984807968,0.006088777081458829,4.0,0,0.09117764979600906,0.06262086844071746,8.0,0,0.1252734512090683,0.06925643235445023,8.0,0,0.12224840372800827,0.06545593729242682,8.0,1,0.045738134533166885,-1.2008240446448326e-06,3.0,0,0.11431754380464554,0.06354063376784325,8.0,0,0.1256837397813797,0.05857978551648557,8.0,25,56,81
+TRM,18,370209,never,0,0,,,,,False,0.5185185074806213,0.1068333238363266,0.05273477197624743,0.03360715275630355,8.0,0.42187817580997944,0,0.04840861260890961,0.02341597032500431,8.0,0,0.036973293870687485,0.016961806124527357,8.0,0,0.10986050963401794,0.06207602936774492,8.0,0,0.07967216521501541,0.04715713392943144,8.0,0,0.09606034308671951,0.06111013190820813,8.0,0,0.089079350233078,0.05407243291847408,8.0,0,0.09790723770856857,0.045337704475969076,8.0,0,0.09715190529823303,0.051234310027211905,8.0,0,0.1151273176074028,0.04672593716531992,8.0,0,0.1068333238363266,0.05273477197624743,8.0,23,58,81
+TRM,19,222327,always,10,0,26041,5000,26041,5000,True,1.0,-2.0088551536900923e-05,-0.0009132941190728161,-0.0014660543820355088,0.0,0.0,1,-0.04799820855259895,-0.05286828428506851,0.0,1,-0.011844887398183346,-0.015838365186937153,0.0,1,-0.01486145332455635,-0.03960488038137555,0.0,1,-0.013958911411464214,-0.020928880549035966,0.0,1,-0.0034097700845450163,-0.008590606827056035,0.0,1,0.015893906354904175,-0.033357299165800214,1.0,1,0.006465907208621502,-0.024233640753664076,1.0,1,0.033794108778238297,-0.012366141541860998,2.0,1,0.005689592566341162,-0.01820871134987101,1.0,1,-2.0088551536900923e-05,-0.0009132941190728161,0.0,26,55,81
+TRM,20,263924,transient_or_regressed,2,2,104164,20000,,,False,0.6666666865348816,0.09295758605003357,0.04956150287762284,0.03068060614168644,8.0,0.39649202302098274,0,0.034157514572143555,0.015115216607227921,7.0,0,0.05733582004904747,0.03984995395876467,8.0,0,0.08293323218822479,0.05687741609290242,8.0,1,0.07871007174253464,0.0422046713065356,8.0,1,0.009792117401957512,-0.0027612845860858215,1.0,0,0.12599724531173706,0.0706640393473208,8.0,0,0.10398203879594803,0.05544533999636769,8.0,0,0.10333681106567383,0.053818560438230634,8.0,0,0.11015965044498444,0.054350086487829685,8.0,0,0.09295758605003357,0.04956150287762284,8.0,24,57,81
+TRM,21,27024,never,0,0,,,,,False,0.604938268661499,0.08638563007116318,0.03455789090367034,0.015345313237048686,8.0,0.2764631272293627,0,0.05556526035070419,0.028341090393951163,8.0,0,0.06678444892168045,0.034479558700695634,8.0,0,0.09674238413572311,0.05398234585300088,8.0,0,0.11458904296159744,0.05965495714917779,8.0,0,0.09092935919761658,0.05046767997555435,8.0,0,0.10297633707523346,0.053264118963852525,8.0,0,0.11269724369049072,0.04654954862780869,8.0,0,0.09772659838199615,0.05012173834256828,8.0,0,0.10214363038539886,0.04754222463816404,8.0,0,0.08638563007116318,0.03455789090367034,8.0,22,59,81
+TRM,22,309268,transient_or_regressed,9,1,26041,5000,,,False,0.6419752836227417,0.1023574024438858,0.04625401948578656,0.023155274335294962,8.0,0.37003215588629246,1,-0.0173800028860569,-0.03472296288236976,0.0,1,0.015551986172795296,-0.002612924850836862,2.0,1,0.03213735297322273,-0.008577327069360763,2.0,1,-0.016573341563344002,-0.021254900377243757,0.0,1,0.0437554270029068,0.014062965754419565,6.0,1,0.08210181444883347,0.015182952396571636,5.0,1,0.10457192361354828,0.04524546000175178,8.0,1,0.011749908328056335,-0.008249142629210837,1.0,1,0.011133940890431404,-0.01159863974316977,1.0,0,0.1023574024438858,0.04625401948578656,8.0,26,55,81
+TRM,23,320288,flaky_then_stable,6,5,78123,15000,260410,50000,True,1.0,0.026434164494276047,0.00474702903375146,-0.0024226728928624652,5.0,0.04856455570552498,0,0.04190463200211525,0.02359144005458802,8.0,0,0.07404482364654541,0.03200974129140377,8.0,1,0.0037540248595178127,-0.028233441815245897,1.0,1,0.09392571449279785,0.053578601218760014,8.0,1,0.0034194684121757746,-0.005925026765908115,1.0,1,0.02902514673769474,-0.027590614277869463,1.0,0,0.11431149393320084,0.0616195322945714,8.0,1,0.02334466204047203,-0.010007919510826468,1.0,0,0.12649574875831604,0.0636917648371309,8.0,1,0.026434164494276047,0.00474702903375146,5.0,24,57,81
+TRM,24,407124,always,10,0,26041,5000,26041,5000,True,1.0,0.003840940771624446,0.0016106319380924106,0.00026300258468836546,6.0,0.01514623872935772,1,-0.06393436342477798,-0.06708991061896086,0.0,1,-0.013699650764465332,-0.015298411715775728,0.0,1,-0.02472015842795372,-0.04164103791117668,0.0,1,-0.029053963720798492,-0.03342657792381942,0.0,1,-0.0019579220097512007,-0.0041514594631735235,0.0,1,-0.010035520419478416,-0.03440697328187525,0.0,1,-0.017375154420733452,-0.026137752691283822,0.0,1,0.027739832177758217,-0.00940551758685615,1.0,1,-0.010084436275064945,-0.024530373862944543,0.0,1,0.003840940771624446,0.0016106319380924106,6.0,33,48,81
+TRM,25,61471,always,10,0,26041,5000,26041,5000,True,1.0,0.012670169584453106,0.00087924439139897,-0.001801287304260768,3.0,0.014388470153789967,1,-0.039549168199300766,-0.047866484615951777,0.0,1,0.002492823638021946,-0.007332482724450529,1.0,1,-0.002067897468805313,-0.03030853858217597,0.0,1,-0.012822914868593216,-0.020432316232472658,0.0,1,0.014298267662525177,-0.0007757673083688132,2.0,1,0.0030297688208520412,-0.017384680788381957,1.0,1,-0.0060186670161783695,-0.025600595690775663,0.0,1,0.06391679495573044,-0.0036396048963069916,2.0,1,0.0576988086104393,-0.008993474242743105,1.0,1,0.012670169584453106,0.00087924439139897,3.0,27,54,81
+TRM,26,315244,never,0,0,,,,,False,0.5432098507881165,0.11102332919836044,0.05970550375059247,0.03719591349363327,8.0,0.47764403000473976,0,0.07942711561918259,0.02960869303205982,8.0,0,0.07099616527557373,0.03468414186500013,8.0,0,0.06784316897392273,0.04330622171983123,8.0,0,0.08471877872943878,0.048252178356051445,8.0,0,0.10184779018163681,0.06579442135989666,8.0,0,0.12465453147888184,0.07283013872802258,8.0,0,0.11166422814130783,0.05948836822062731,8.0,0,0.10289649665355682,0.060068110935389996,8.0,0,0.0976049154996872,0.051906846230849624,8.0,0,0.11102332919836044,0.05970550375059247,8.0,22,59,81
+TRM,27,175529,always,10,0,26041,5000,26041,5000,True,1.0,0.004258653614670038,7.252512568811653e-05,-0.0015737463545519859,3.0,0.00688329542754218,1,-0.01598750799894333,-0.02833057497628033,0.0,1,-0.014668182469904423,-0.017582261352799833,0.0,1,0.0011688745580613613,-0.030664287565741688,1.0,1,-0.021340763196349144,-0.0237712524831295,0.0,1,-0.0016535709146410227,-0.0072165018355008215,0.0,1,0.057712946087121964,-0.018135484191589057,1.0,1,0.010437006130814552,-0.017216732929227874,1.0,1,0.08570417016744614,-0.00333721237257123,2.0,1,-0.00745236175134778,-0.019168108061421663,0.0,1,0.004258653614670038,7.252512568811653e-05,3.0,24,57,81
+TRM,28,114564,always,10,0,26041,5000,26041,5000,True,1.0,0.0072044129483401775,-8.673192314745393e-05,-0.0025271887534472626,4.0,0.009414899628609419,1,-0.04406457394361496,-0.051273975521326065,0.0,1,-0.011025325395166874,-0.017685405793599784,0.0,1,-0.006266339216381311,-0.04028390330495313,0.0,1,-0.016327913850545883,-0.020520236575976014,0.0,1,-0.0030192523263394833,-0.00852351589128375,0.0,1,0.006734627299010754,-0.031551590538583696,1.0,1,-0.02206995338201523,-0.030242094304412603,0.0,1,0.027034323662519455,-0.006540249509271234,2.0,1,0.019334610551595688,-0.01839829352684319,2.0,1,0.0072044129483401775,-8.673192314745393e-05,4.0,27,54,81
+TRM,29,141970,always,10,0,26041,5000,26041,5000,True,1.0,0.002108609536662698,-0.0011489759999676608,-0.0027373161865398288,2.0,0.0024741539382375777,1,-0.04726424068212509,-0.05219613015651703,0.0,1,-0.01044586580246687,-0.01383635203819722,0.0,1,0.0025393611285835505,-0.03241082557360642,1.0,1,-0.009404308162629604,-0.02102791320066899,0.0,1,0.010522894561290741,-0.0052106383081991225,1.0,1,0.049257200211286545,-0.03224837640300393,1.0,1,-0.023927075788378716,-0.030021899146959186,0.0,1,0.02011517621576786,-0.014130476163700223,1.0,1,-0.0005957981338724494,-0.016373270700569265,0.0,1,0.002108609536662698,-0.0011489759999676608,2.0,24,57,81
+TRM,30,138609,transient_or_regressed,3,5,26041,5000,,,False,0.6666666865348816,0.10120400786399841,0.04419468517880887,0.019200057489797473,8.0,0.35355748143047094,1,-0.03299885615706444,-0.04108579596504569,0.0,0,0.062356412410736084,0.03595641837455332,8.0,0,0.08562343567609787,0.04772266815416515,8.0,0,0.0781993493437767,0.04771148134022951,8.0,1,0.0023546144366264343,-0.005737346229579998,1.0,0,0.10548162460327148,0.04949050163850188,8.0,0,0.07076416909694672,0.03601185081060976,8.0,0,0.06901093572378159,0.034735873457975686,8.0,1,0.009265610948204994,-0.015785526833496988,1.0,0,0.10120400786399841,0.04419468517880887,8.0,23,58,81
+TRM,31,256175,stable_learned,3,1,208328,40000,208328,40000,True,1.0,0.029406124725937843,0.006652040479821153,-0.0038655358948744833,4.0,0.06867846741806716,0,0.013148889876902103,-0.0014934991486370564,3.0,0,0.035257790237665176,0.017751593491993845,8.0,0,0.0787709653377533,0.04363875975832343,8.0,0,0.09915914386510849,0.05111538455821574,8.0,0,0.10819508135318756,0.06065094843506813,8.0,0,0.10750609636306763,0.061804505065083504,8.0,0,0.10884405672550201,0.056297208648175,8.0,1,0.07847143709659576,0.03163953748298809,8.0,1,0.026178549975156784,-0.005030861328123137,2.0,1,0.029406124725937843,0.006652040479821153,4.0,26,55,81
+TRM,32,420358,stable_learned,9,1,52082,10000,52082,10000,True,1.0,0.012964152731001377,-0.003873448760714382,-0.008399720303714275,1.0,0.012964152731001377,0,0.07169795781373978,0.022534565770911286,7.0,1,-0.002114243805408478,-0.010695333243347704,0.0,1,-0.005300628952682018,-0.03190539206843823,0.0,1,-0.005194996017962694,-0.020797777047846466,0.0,1,0.018250487744808197,-0.0025876648323901463,2.0,1,0.0030586407519876957,-0.029112056537996978,1.0,1,-0.007016698829829693,-0.02258968388196081,0.0,1,0.007872262969613075,-0.014004938711877912,2.0,1,0.0017288774251937866,-0.017039840458892286,1.0,1,0.012964152731001377,-0.003873448760714382,1.0,25,56,81
+TRM,33,175168,always,10,0,26041,5000,26041,5000,True,1.0,0.007429452147334814,0.004556036481517367,0.002615887060528621,8.0,0.036448291852138937,1,-0.06185147911310196,-0.06590405851602554,0.0,1,-0.011411992833018303,-0.014532045926898718,0.0,1,-0.02719208598136902,-0.04041793290525675,0.0,1,-0.025662235915660858,-0.029050827492028475,0.0,1,-0.0022146061528474092,-0.006805114011513069,0.0,1,-0.009026418440043926,-0.03956297819968313,0.0,1,-0.011934402398765087,-0.02609948313329369,0.0,1,0.0035695573315024376,-0.01407478388864547,1.0,1,-0.022341033443808556,-0.029655226971954107,0.0,1,0.007429452147334814,0.004556036481517367,8.0,34,47,81
+TRM,34,181621,flaky_then_stable,3,3,156246,30000,260410,50000,True,1.0,0.001267184386961162,-0.004826532342121936,-0.006540142581798136,1.0,0.001267184386961162,0,0.005555230658501387,-0.006121564540080726,2.0,0,0.053419362753629684,0.037158948834985495,8.0,0,0.08000104129314423,0.049032176146283746,8.0,0,0.09002313762903214,0.060700912959873676,8.0,0,0.11336788535118103,0.06435664417222142,8.0,1,0.07435989379882812,0.013382789678871632,4.0,1,0.07910968363285065,0.034685695136431605,8.0,0,0.11026336997747421,0.05740652862004936,8.0,0,0.10047663003206253,0.056632340187206864,8.0,1,0.001267184386961162,-0.004826532342121936,1.0,23,58,81
+TRM,35,391825,always,10,0,26041,5000,26041,5000,True,1.0,0.005310053937137127,0.002108180953655392,-3.70971392840147e-05,6.0,0.020552427624352276,1,-0.0374230332672596,-0.04549784446135163,0.0,1,-0.0007807715446688235,-0.00957566838042112,0.0,1,-0.010149280540645123,-0.034246145631186664,0.0,1,-0.0050306362099945545,-0.019958738062996417,0.0,1,0.0029359739273786545,-0.006036343809682876,1.0,1,0.003973464481532574,-0.029495129245333374,1.0,1,-0.0033128438517451286,-0.02104179165326059,0.0,1,0.012046846561133862,-0.008788127175648697,3.0,1,0.05202462524175644,-0.011528056114912033,1.0,1,0.005310053937137127,0.002108180953655392,6.0,27,54,81
+TRM,36,302029,flaky_then_stable,7,3,78123,15000,234369,45000,True,1.0,0.016370657831430435,-0.001094518491299823,-0.00661092100199312,3.0,0.021267606876790524,0,0.0029136263765394688,-0.006950667724595405,2.0,0,0.0533880814909935,0.03663867642171681,8.0,1,0.004933574702590704,-0.025891400000546128,1.0,1,0.08351153880357742,0.04919528914615512,8.0,1,0.02976829744875431,0.009554193733492866,6.0,1,0.005607255734503269,-0.03082879527937621,1.0,1,0.013917697593569756,-0.009065494901733473,2.0,0,0.10859924554824829,0.05391609203070402,8.0,1,0.049969810992479324,-0.01031015522312373,1.0,1,0.016370657831430435,-0.001094518491299823,3.0,26,55,81
+TRM,37,393748,always,10,0,26041,5000,26041,5000,True,1.0,0.011438971385359764,-0.002149710911908187,-0.006059209583327174,2.0,0.012642669607885182,1,-0.042220067232847214,-0.05071902647614479,0.0,1,-0.008368859067559242,-0.015315925818867981,0.0,1,-0.037181414663791656,-0.04266724130138755,0.0,1,-0.01288349274545908,-0.021606322028674185,0.0,1,0.008971885778009892,-0.001579562966071535,3.0,1,0.04277375712990761,-0.023242410272359848,1.0,1,0.0014713868731632829,-0.024357299946132116,1.0,1,0.042960453778505325,-0.005078268412034959,2.0,1,0.0067208390682935715,-0.017371578054735437,1.0,1,0.011438971385359764,-0.002149710911908187,2.0,25,56,81
+TRM,38,47424,transient_or_regressed,5,3,26041,5000,,,False,0.6296296119689941,0.09716323763132095,0.05230006016790867,0.034473286010324955,8.0,0.41840048134326935,1,-0.03483996167778969,-0.044014810118824244,0.0,0,0.05052480846643448,0.026404740405268967,8.0,0,0.08206464350223541,0.047903179191052914,8.0,0,0.08395294100046158,0.06163737131282687,8.0,1,0.0493648499250412,0.019915478042094037,8.0,1,0.08972842246294022,0.0217537080461625,6.0,1,0.08278796821832657,-0.012147535220719874,1.0,1,0.028363389894366264,-0.004172609304077923,2.0,0,0.10533473640680313,0.05560024012811482,8.0,0,0.09716323763132095,0.05230006016790867,8.0,23,58,81
+TRM,39,239376,always,10,0,26041,5000,26041,5000,True,1.0,-0.0009640224161557853,-0.002923930522229057,-0.0038667215267196298,0.0,0.0,1,-0.023161595687270164,-0.03524667164310813,0.0,1,-0.003939019050449133,-0.014243226672988385,0.0,1,-0.0095213633030653,-0.036706246668472886,0.0,1,0.019512496888637543,-0.0034380423021502793,2.0,1,-0.006234393455088139,-0.009369814419187605,0.0,1,0.03255358710885048,-0.026152771431952715,1.0,1,0.028285551816225052,-0.017406376893632114,2.0,1,0.02650885097682476,-0.001302869786741212,3.0,1,0.03126267343759537,-0.017889442620798945,1.0,1,-0.0009640224161557853,-0.002923930522229057,0.0,26,55,81
+TRM,40,377585,always,10,0,26041,5000,26041,5000,True,1.0,-0.002500612987205386,-0.0040588052361272275,-0.005104279262013733,0.0,0.0,1,-0.007215660531073809,-0.025065042136702687,0.0,1,-0.002265887800604105,-0.008872352162143216,0.0,1,-0.03541497141122818,-0.04245291091501713,0.0,1,-0.01960708014667034,-0.021850206656381488,0.0,1,0.003702840767800808,-0.0023802913929102942,2.0,1,0.027520447969436646,-0.005385189739172347,2.0,1,0.1032874807715416,-0.008359066443517804,1.0,1,0.02089080587029457,-0.009702732859295793,2.0,1,0.014161814004182816,-0.010669442926882766,1.0,1,-0.002500612987205386,-0.0040588052361272275,0.0,29,52,81
+TRM,41,57082,always,10,0,26041,5000,26041,5000,True,1.0,0.011200892738997936,0.0004901092179352418,-0.0038022436783649027,2.0,0.021411003544926643,1,-0.035764846950769424,-0.04375373385846615,0.0,1,0.043062590062618256,0.010845018492545933,5.0,1,-0.004609006457030773,-0.036970161250792444,0.0,1,-0.0012369166361168027,-0.010737410411820747,0.0,1,0.006748992018401623,-0.0032910056615946814,1.0,1,-0.0073382798582315445,-0.03185410029254854,0.0,1,0.1713283360004425,0.004945756867527962,1.0,1,0.007881494238972664,-0.01270452473545447,1.0,1,-0.004153944086283445,-0.015002218598965555,0.0,1,0.011200892738997936,0.0004901092179352418,2.0,17,64,81
+TRM,42,401651,always,10,0,26041,5000,26041,5000,True,1.0,0.008526957593858242,0.00029207587499513465,-0.0025384910077264067,3.0,0.012577231877003214,1,-0.040880560874938965,-0.049219381995499134,0.0,1,0.017446909099817276,0.0008763656805967912,4.0,1,-0.017537694424390793,-0.03425324079580605,0.0,1,-0.0007348871440626681,-0.01482340903748991,0.0,1,0.0036039960104972124,-0.00471050222949998,1.0,1,-0.0057042608968913555,-0.02967744244961068,0.0,1,0.016167202964425087,-0.018349167425185442,1.0,1,0.032648637890815735,-6.650862633250654e-05,3.0,1,-0.000752809050027281,-0.022894367175467778,0.0,1,0.008526957593858242,0.00029207587499513465,3.0,29,52,81
+TRM,43,414519,always,10,0,26041,5000,26041,5000,True,1.0,0.053451668471097946,0.01213401707354933,-0.003870928892865777,5.0,0.11762940743938088,1,-0.027614647522568703,-0.03573848376981914,0.0,1,-0.009915953502058983,-0.012984225410036743,0.0,1,-0.01893557421863079,-0.03836345300078392,0.0,1,-0.012906301766633987,-0.023188486695289612,0.0,1,0.008907596580684185,-0.004432859612279572,2.0,1,0.049701862037181854,-0.013149662176147103,2.0,1,-0.010403970256447792,-0.026445527095347643,0.0,1,0.07054625451564789,0.004120421857805923,3.0,1,-0.0016635372303426266,-0.02008092700270936,0.0,1,0.053451668471097946,0.01213401707354933,5.0,25,56,81
+TRM,44,18198,transient_or_regressed,1,2,234369,45000,,,False,0.7037037014961243,0.11341231316328049,0.05376759287901223,0.030769409146159887,8.0,0.4301407430320978,0,0.06596972048282623,0.035816228250041604,8.0,0,0.0708942711353302,0.04401976568624377,8.0,0,0.0843534916639328,0.056520954705774784,8.0,0,0.11161459237337112,0.06675976794213057,8.0,0,0.12162543833255768,0.06705422000959516,8.0,0,0.1333499252796173,0.07206616271287203,8.0,0,0.09360162168741226,0.04690751840826124,8.0,0,0.10109543800354004,0.05308156460523605,8.0,1,-0.0062327636405825615,-0.019716958166100085,0.0,0,0.11341231316328049,0.05376759287901223,8.0,26,55,81
+TRM,45,273322,always,10,0,26041,5000,26041,5000,True,1.0,0.0017387683037668467,-0.0032665490289218724,-0.005745585076510906,2.0,0.0031162602826952934,1,-0.0013402809854596853,-0.020619072922272608,0.0,1,-0.014316573739051819,-0.01791757787577808,0.0,1,-0.0002544124436099082,-0.04060793124153861,0.0,1,0.007054033689200878,-0.007400354414130561,1.0,1,0.007400344125926495,-0.0033381680914317258,2.0,1,0.044253405183553696,-0.014579697628505528,2.0,1,-0.007654212415218353,-0.02642846293747425,0.0,1,0.054955266416072845,0.008214452282118145,5.0,1,0.00289098615758121,-0.014848149643512443,1.0,1,0.0017387683037668467,-0.0032665490289218724,2.0,25,56,81
+TRM,46,308136,flaky_then_stable,5,5,104164,20000,234369,45000,True,1.0,0.009052409790456295,-0.0018559055461082608,-0.005685164942406118,2.0,0.011221801163628697,0,0.033452361822128296,0.004797024928848259,4.0,0,0.0666641891002655,0.0322093985741958,8.0,0,0.08346537500619888,0.04937193216755986,8.0,1,-0.004997124895453453,-0.014272452506702393,0.0,0,0.11025875061750412,0.05642199399881065,8.0,1,0.047040633857250214,0.014740112077561207,6.0,1,0.10727038979530334,0.004947461828123778,3.0,0,0.11301656067371368,0.05269916635006666,8.0,1,0.04949108883738518,-0.009180456749163568,1.0,1,0.009052409790456295,-0.0018559055461082608,2.0,26,55,81
+TRM,47,186015,always,10,0,26041,5000,26041,5000,True,1.0,0.0032113688066601753,0.001166833965726255,0.0001646265318413498,6.0,0.01110851566772908,1,-0.042889732867479324,-0.050748545210808516,0.0,1,-0.009807687252759933,-0.014992943848483264,0.0,1,0.001995714148506522,-0.038482155912788585,1.0,1,-0.020100807771086693,-0.02428230899386108,0.0,1,-0.00199463265016675,-0.005201895197387785,0.0,1,0.02940160408616066,-0.02430989342974499,1.0,1,-0.015876997262239456,-0.02821142878383398,0.0,1,0.039064593613147736,-0.004660563783545513,2.0,1,0.04733450710773468,-0.013170681428164244,1.0,1,0.0032113688066601753,0.001166833965726255,6.0,26,55,81
+TRM,48,221508,always,10,0,26041,5000,26041,5000,True,1.0,0.001829015789553523,-0.0001430694646842312,-0.0013055599047220312,3.0,0.004225491196848452,1,-0.04551337659358978,-0.05160550167784095,0.0,1,-0.010685035958886147,-0.015293072909116745,0.0,1,-0.0065574184991419315,-0.041057586029637605,0.0,1,-0.01525016501545906,-0.02064368291758001,0.0,1,-0.005332751665264368,-0.007940051087643951,0.0,1,-0.00592279015108943,-0.02753272809786722,0.0,1,-0.018337298184633255,-0.028909629909321666,0.0,1,0.042542241513729095,-0.0038514535553986207,2.0,1,-0.020815201103687286,-0.025440427707508206,0.0,1,0.001829015789553523,-0.0001430694646842312,3.0,27,54,81
+TRM,49,414006,stable_learned,2,1,234369,45000,234369,45000,True,1.0,0.010532768443226814,-0.002463299926603213,-0.0068202173570171,2.0,0.013009322574362159,0,0.040055450052022934,0.015793511985975783,7.0,0,0.05176067352294922,0.02970619103871286,8.0,0,0.07906454056501389,0.04973601480014622,8.0,0,0.0875350832939148,0.05281579960137606,8.0,0,0.10275684297084808,0.06299306871369481,8.0,0,0.10151881724596024,0.06459330301731825,8.0,0,0.11483664065599442,0.05580337648279965,8.0,0,0.09589719027280807,0.04748765320982784,8.0,1,0.021463140845298767,-0.002426935185212642,3.0,1,0.010532768443226814,-0.002463299926603213,2.0,25,56,81
+TRM,50,210130,never,0,0,,,,,False,0.6172839403152466,0.08964993804693222,0.03729473671410233,0.0200174858327955,8.0,0.2983578937128186,0,0.041755251586437225,0.018320750546990894,8.0,0,0.07469210773706436,0.05179934296756983,8.0,0,0.09332922101020813,0.061471405904740095,8.0,0,0.0981285572052002,0.06381776789203286,8.0,0,0.11118289828300476,0.07104260148480535,8.0,0,0.10324238985776901,0.05097209452651441,8.0,0,0.09650111198425293,0.05102777271531522,8.0,0,0.10281454026699066,0.055239110719412565,8.0,0,0.09506338834762573,0.050308587262406945,8.0,0,0.08964993804693222,0.03729473671410233,8.0,25,56,81
+TRM,51,158555,transient_or_regressed,3,6,52082,10000,,,False,0.5308641791343689,0.09021701663732529,0.045054154470562935,0.02438062522560358,8.0,0.3604332357645035,0,0.07821813970804214,0.05132025061175227,8.0,1,0.011450463905930519,-0.003838686126982793,2.0,0,0.10384290665388107,0.06677944771945477,8.0,0,0.0918896347284317,0.05403029453009367,8.0,0,0.10060201585292816,0.06678786035627127,8.0,1,0.029246415942907333,-0.019410644192248583,1.0,0,0.11658885329961777,0.06324978778138757,8.0,1,0.0526483915746212,0.00020203564781695604,3.0,0,0.09416231513023376,0.049724842654541135,8.0,0,0.09021701663732529,0.045054154470562935,8.0,26,55,81
+TRM,52,372450,transient_or_regressed,1,2,156246,30000,,,False,0.6419752836227417,0.10228436440229416,0.05133962561376393,0.03226034203544259,8.0,0.4107170049101114,0,0.028466593474149704,0.010137261095223948,6.0,0,0.06855978816747665,0.04508143733255565,8.0,0,0.07900676131248474,0.045114132575690746,8.0,0,0.11371377855539322,0.05975690600462258,8.0,0,0.10308439284563065,0.05875416495837271,8.0,1,0.009421556256711483,-0.02776171884033829,1.0,0,0.10819811373949051,0.05219156830571592,8.0,0,0.11204086244106293,0.06637358758598566,8.0,0,0.10123950242996216,0.05528841121122241,8.0,0,0.10228436440229416,0.05133962561376393,8.0,26,55,81
+TRM,53,222678,flaky_then_stable,7,3,78123,15000,130205,25000,True,1.0,0.029772143810987473,0.009457272448344156,-0.002476639172527939,5.0,0.08732130448333919,0,0.032810892909765244,0.009719954978208989,6.0,0,0.06178142875432968,0.029323698137886822,8.0,1,-0.010205995291471481,-0.032493385253474116,0.0,0,0.08536376059055328,0.045084313256666064,8.0,1,0.006880044937133789,-0.005280034150928259,2.0,1,0.03741069138050079,-0.024844120256602764,1.0,1,0.04072961583733559,-0.0015174217987805605,3.0,1,0.019223909825086594,-0.007240620732773095,3.0,1,0.02341780625283718,-0.009175663290079683,2.0,1,0.029772143810987473,0.009457272448344156,5.0,24,57,81
+TRM,54,354892,always,10,0,26041,5000,26041,5000,True,1.0,0.013682622462511063,0.0045762813242617995,-0.00047522515524178743,6.0,0.03868404793320224,1,-0.041296835988759995,-0.049744116608053446,0.0,1,-0.001593064866028726,-0.01082446011423599,0.0,1,-0.0018602318596094847,-0.037313340784749016,0.0,1,-0.015210852958261967,-0.023114624549634755,0.0,1,-0.0016846497310325503,-0.008427085980656557,0.0,1,-0.0002339108323212713,-0.023683608898863895,0.0,1,0.015271001495420933,-0.01616179643315263,1.0,1,0.025959592312574387,-0.01180358525016345,1.0,1,0.03312390670180321,-0.014986121677793562,1.0,1,0.013682622462511063,0.0045762813242617995,6.0,26,55,81
+TRM,55,375675,flaky_then_stable,6,5,78123,15000,208328,40000,True,1.0,0.0031702055130153894,-0.0033523819292895496,-0.005269291461445391,1.0,0.0031702055130153894,0,0.034637875854969025,0.009466795658227056,5.0,0,0.05280807986855507,0.020489387679845095,8.0,1,0.04968750476837158,0.008927458227844909,5.0,0,0.09277018159627914,0.05210425681434572,8.0,1,0.012863656505942345,-0.00018376621301285923,2.0,1,0.058993663638830185,-0.008112388546578586,2.0,0,0.08696074038743973,0.03457088093273342,8.0,1,0.04817478358745575,0.012585780588779016,5.0,1,0.010624210350215435,-0.01502375808195211,1.0,1,0.0031702055130153894,-0.0033523819292895496,1.0,24,57,81
+TRM,56,320237,stable_learned,9,1,52082,10000,52082,10000,True,1.0,0.006494130473583937,-0.0004095154727110639,-0.00478165753884241,3.0,0.017038158606737852,0,0.06208376958966255,0.03879459318704903,8.0,1,0.013151725754141808,0.0022133115326141706,4.0,1,0.0023571480996906757,-0.023083358246367425,1.0,1,0.011549429036676884,-0.009048905667441431,2.0,1,-0.0046251146122813225,-0.006183563498780131,0.0,1,0.045879121869802475,-0.02042383886873722,1.0,1,-0.026600878685712814,-0.03154015610925853,0.0,1,0.044329166412353516,-0.006305104834609665,3.0,1,0.051373664289712906,-0.008916489081457257,1.0,1,0.006494130473583937,-0.0004095154727110639,3.0,25,56,81
+TRM,57,69134,flaky_then_stable,8,4,26041,5000,130205,25000,True,1.0,0.00568232499063015,-0.0027316155028529465,-0.0049256523489020765,1.0,0.00568232499063015,1,-0.04490320011973381,-0.04821395594626665,0.0,0,0.07393120229244232,0.04284203075803816,8.0,1,-0.005630874540656805,-0.0340282439137809,0.0,0,0.09902309626340866,0.05597585113719106,8.0,1,0.021876029670238495,-0.0016285754136333708,3.0,1,0.05766592174768448,-0.006625439855270088,2.0,1,-0.002642798237502575,-0.027208040817640722,0.0,1,0.03991800174117088,-0.0010397451696917415,2.0,1,0.03304155543446541,-0.018215581076219678,1.0,1,0.00568232499063015,-0.0027316155028529465,1.0,26,55,81
+TRM,58,97421,flaky_then_stable,5,5,52082,10000,260410,50000,True,1.0,-0.002859578002244234,-0.004401394835440442,-0.005328317522071302,0.0,0.0,0,0.015754161402583122,-0.0028422033647075295,2.0,1,0.06377241015434265,0.020132934391767776,7.0,1,0.05525261163711548,0.013040020159678534,6.0,1,-0.0069015538319945335,-0.016397491446696222,0.0,0,0.10967282950878143,0.05581835424527526,8.0,0,0.12220876663923264,0.06982435751706362,8.0,1,0.013012099079787731,-0.017434105277061462,1.0,0,0.10110481828451157,0.05187281174585223,8.0,0,0.12039220333099365,0.05482341023162007,8.0,1,-0.002859578002244234,-0.004401394835440442,0.0,25,56,81
+TRM,59,330273,always,10,0,26041,5000,26041,5000,True,1.0,0.0003904178738594055,-0.0018561382780717395,-0.0033540026051923633,1.0,0.0003904178738594055,1,-0.04838380962610245,-0.05350808519870043,0.0,1,-0.014856690540909767,-0.017215807223692536,0.0,1,-0.003627605503425002,-0.03888240686501376,0.0,1,0.002179374685510993,-0.017497837106930092,1.0,1,0.003160635707899928,-0.0037517358432523906,1.0,1,0.0026060682721436024,-0.03272197436308488,1.0,1,-0.004609786439687014,-0.02505279384786263,0.0,1,0.043519046157598495,-0.009394616063218564,1.0,1,-0.021903259679675102,-0.025395093020051718,0.0,1,0.0003904178738594055,-0.0018561382780717395,1.0,28,53,81
+TRM,60,254864,never,0,0,,,,,False,0.5925925970077515,0.07359656691551208,0.04452108545228839,0.029825636185705662,8.0,0.3561686836183071,0,0.061777353286743164,0.02401387644931674,8.0,0,0.08577846735715866,0.05166397080756724,8.0,0,0.09177936613559723,0.04775612079538405,8.0,0,0.09546543657779694,0.04927126714028418,8.0,0,0.08724319934844971,0.049241031520068645,8.0,0,0.07880809158086777,0.045728132128715515,8.0,0,0.07741398364305496,0.0448508569970727,8.0,0,0.08234921097755432,0.04171773581765592,8.0,0,0.10083508491516113,0.048985564382746816,8.0,0,0.07359656691551208,0.04452108545228839,8.0,27,54,81
+TRM,61,69477,stable_learned,9,1,52082,10000,52082,10000,True,1.0,0.0032330250833183527,0.0018058921050396748,0.0009390319319209084,8.0,0.014447136840317398,0,0.05226004868745804,0.013870571587176528,5.0,1,-0.00743769621476531,-0.012997824407648295,0.0,1,0.028096478432416916,-0.019439933355897665,1.0,1,-0.01560826413333416,-0.021185355028137565,0.0,1,-0.003580569289624691,-0.007901168544776738,0.0,1,0.016443489119410515,-0.030509623931720853,1.0,1,-0.02962145209312439,-0.032599433325231075,0.0,1,0.011266103945672512,-0.013428200385533273,1.0,1,0.0005120631540194154,-0.022594231922994368,1.0,1,0.0032330250833183527,0.0018058921050396748,8.0,25,56,81
+TRM,62,21973,always,10,0,26041,5000,26041,5000,True,1.0,0.008508843369781971,0.0007648130231245887,-0.00195569361676462,4.0,0.013941278652055189,1,-0.030117543414235115,-0.03950458322651684,0.0,1,-0.001931163016706705,-0.008440464007435367,0.0,1,-0.002159800147637725,-0.031025348900584504,0.0,1,0.005467218346893787,-0.009629728563595563,2.0,1,0.003028974635526538,-0.006095671938965097,1.0,1,0.02461649477481842,-0.01533601843402721,2.0,1,0.001713586738333106,-0.0209484739752952,1.0,1,0.007843172177672386,-0.012046086601912975,2.0,1,-0.018884265795350075,-0.0254668602719903,0.0,1,0.008508843369781971,0.0007648130231245887,4.0,25,56,81
+TRM,63,203458,flaky_then_stable,5,3,52082,10000,182287,35000,True,1.0,0.007512807846069336,0.004070930521038463,0.002250386728974263,8.0,0.0325674441683077,0,0.042726751416921616,0.00993438923615031,7.0,1,-0.002117346040904522,-0.01054324273718521,0.0,0,0.09614014625549316,0.05004083807580173,8.0,0,0.07808869332075119,0.046768815722316504,8.0,0,0.09192243218421936,0.051212456077337265,8.0,0,0.08913952112197876,0.04333923361264169,8.0,1,-0.009081489406526089,-0.027155962656252086,0.0,1,0.02231515571475029,-0.005622491764370352,2.0,1,0.06469298899173737,-0.00868140475358814,1.0,1,0.007512807846069336,0.004070930521038463,8.0,27,54,81
+TRM,64,174467,flaky_then_stable,9,2,26041,5000,182287,35000,True,1.0,0.005014951340854168,0.0009096149005927145,-0.001237908232724294,6.0,0.013659279327839613,1,-0.04717700183391571,-0.056069396901875734,0.0,1,0.003665581811219454,-0.006152402493171394,1.0,1,-0.01703735999763012,-0.0423653528559953,0.0,1,-0.0176103338599205,-0.020679634297266603,0.0,1,0.002198202768340707,-0.007359491341048852,1.0,0,0.08685597032308578,0.05161187774501741,8.0,1,0.14636419713497162,-0.0030671335698571056,1.0,1,0.058280207216739655,-0.00023266213247552514,2.0,1,0.024923745542764664,-0.013560102670453489,1.0,1,0.005014951340854168,0.0009096149005927145,6.0,26,55,81
+TRM,65,44347,flaky_then_stable,5,3,130205,25000,260410,50000,True,1.0,0.04525323212146759,0.013624885912577156,0.0004115916817681864,6.0,0.11547713197069243,0,0.022694390267133713,-0.0007908264524303377,3.0,0,0.07573706656694412,0.028803459368646145,8.0,0,0.07100721448659897,0.044741004006937146,8.0,0,0.09280118346214294,0.04442490451037884,8.0,1,0.04404296353459358,0.020226128239301033,8.0,1,0.06646931916475296,-0.016498913057148457,1.0,1,0.08405780047178268,0.02907382627017796,8.0,1,0.07617714256048203,0.00646210162085481,4.0,0,0.09405148029327393,0.05048083676956594,8.0,1,0.04525323212146759,0.013624885912577156,6.0,26,55,81
+TRM,66,352714,always,10,0,26041,5000,26041,5000,True,1.0,0.034301385283470154,0.014275197725510225,0.002508873527403921,7.0,0.11899798992089927,1,-0.023865079507231712,-0.0343877668492496,0.0,1,-0.004580390173941851,-0.008443528728093952,0.0,1,-0.009266745299100876,-0.032719685696065426,0.0,1,-0.003416513092815876,-0.01885888900142163,0.0,1,0.006331162061542273,-0.0036954722199880052,2.0,1,0.05144596844911575,-0.02486957993824035,1.0,1,-0.02146255411207676,-0.03013360220938921,0.0,1,0.04701415076851845,-0.00017179391579702497,3.0,1,0.02463512495160103,-0.014525855425745249,1.0,1,0.034301385283470154,0.014275197725510225,7.0,27,54,81
+TRM,67,293982,always,10,0,26041,5000,26041,5000,True,1.0,0.01891370490193367,0.002718432047913666,-0.00048470430556335486,6.0,0.024472258970490657,1,-0.04887240380048752,-0.05512660229578614,0.0,1,-0.00945866759866476,-0.0142881068168208,0.0,1,-0.009094023145735264,-0.038163389661349356,0.0,1,-0.014874888584017754,-0.02304110210388899,0.0,1,-0.002864482579752803,-0.00649812541087158,0.0,1,0.04220415651798248,-0.02984647499397397,1.0,1,-0.008741402067244053,-0.02617479267064482,0.0,1,0.00026145417359657586,-0.01117644612895674,1.0,1,0.043321408331394196,-0.017115476308390498,1.0,1,0.01891370490193367,0.002718432047913666,6.0,25,56,81
+TRM,68,3396,never,0,0,,,,,False,0.5308641791343689,0.10996713489294052,0.05497277062386274,0.03481545299291611,8.0,0.43978216499090195,0,0.04106643795967102,0.011517781938891858,6.0,0,0.06632253527641296,0.03091362683335319,8.0,0,0.08152452111244202,0.0466219971422106,8.0,0,0.08369068801403046,0.05513703217729926,8.0,0,0.07417560368776321,0.05426963185891509,8.0,0,0.13927294313907623,0.06325919274240732,8.0,0,0.10898752510547638,0.050066050491295755,8.0,0,0.1232287660241127,0.058416633401066065,8.0,0,0.12721405923366547,0.05108454544097185,8.0,0,0.10996713489294052,0.05497277062386274,8.0,25,56,81
+TRM,69,16908,always,10,0,26041,5000,26041,5000,True,1.0,0.03854050114750862,0.003912154130375711,-0.0021824175637448207,3.0,0.04053124584606849,1,-0.04322049021720886,-0.05370942736044526,0.0,1,-0.011233308352530003,-0.01592369470745325,0.0,1,-0.006410825066268444,-0.04021361644845456,0.0,1,-0.022563111037015915,-0.0238795205950737,0.0,1,-0.00211136182770133,-0.0048769236309453845,0.0,1,0.010805339552462101,-0.0389175129821524,1.0,1,-0.019986238330602646,-0.026574883377179503,0.0,1,-0.0014853095635771751,-0.013406396901700646,0.0,1,0.04644791781902313,-0.013757487176917493,1.0,1,0.03854050114750862,0.003912154130375711,3.0,26,55,81
+TRM,70,20527,flaky_then_stable,9,2,26041,5000,78123,15000,True,1.0,0.021327964961528778,0.0016627993900328875,-0.004790291306562722,4.0,0.03246356034651399,1,-0.04186438396573067,-0.04921188112348318,0.0,0,0.06061437726020813,0.04404729790985584,8.0,1,0.008456078357994556,-0.028690356179140508,1.0,1,-0.003426962299272418,-0.012119624443585053,0.0,1,0.019516833126544952,0.0024823704370646738,4.0,1,0.052694372832775116,-0.0019287928007543087,2.0,1,0.004412603098899126,-0.014870563114527613,1.0,1,0.032996512949466705,-0.003733713669049621,2.0,1,0.1058109849691391,0.05509472405537963,8.0,1,0.021327964961528778,0.0016627993900328875,4.0,23,58,81
+TRM,71,384517,stable_learned,9,1,52082,10000,52082,10000,True,1.0,0.029628805816173553,0.005156962448381819,-0.004251030826708302,4.0,0.05825982289388776,0,0.03319035470485687,0.01142612739931792,6.0,1,-0.006158301141113043,-0.01422450743848458,0.0,1,0.0061112940311431885,-0.026746557326987386,1.0,1,0.027730019763112068,-0.005289395980071276,2.0,1,0.017095131799578667,-0.005401445727329701,1.0,1,-0.00421931641176343,-0.027313214086461812,0.0,1,0.09258759766817093,0.02381881766632432,6.0,1,0.04512196406722069,0.0026747560186777264,4.0,1,0.026400171220302582,-0.003725771326571703,3.0,1,0.029628805816173553,0.005156962448381819,4.0,26,55,81
+TRM,72,215514,always,10,0,26041,5000,26041,5000,True,1.0,0.015155179426074028,0.005732874182285741,0.002930551127064973,8.0,0.04586299345828593,1,-0.04309047386050224,-0.04852205188944936,0.0,1,-0.002045587170869112,-0.009898251621052623,0.0,1,-0.01458529382944107,-0.02627874189056456,0.0,1,0.007088697515428066,-0.006824653304647654,1.0,1,0.0028308986220508814,-0.00608747178921476,1.0,1,0.014286590740084648,-0.018682452267967165,1.0,1,-0.010878154076635838,-0.023032941739074886,0.0,1,0.01852840557694435,-0.0016530388675164431,3.0,1,0.01447109505534172,-0.014114441757556051,1.0,1,0.015155179426074028,0.005732874182285741,8.0,27,54,81
+TRM,73,339970,always,10,0,26041,5000,26041,5000,True,1.0,0.004744160454720259,-7.634657049493399e-05,-0.001774218944774475,4.0,0.006486103215138428,1,-0.06758081167936325,-0.07101952098309994,0.0,1,-0.013321689330041409,-0.014626032556407154,0.0,1,-0.03114965558052063,-0.03955911798402667,0.0,1,-0.020674938336014748,-0.030266360379755497,0.0,1,-0.00041494471952319145,-0.006179888267070055,0.0,1,-0.01883789710700512,-0.03995231445878744,0.0,1,-0.027163319289684296,-0.028801463777199388,0.0,1,0.016338622197508812,-0.01672670105472207,1.0,1,-0.0059638903476297855,-0.027638976054731756,0.0,1,0.004744160454720259,-7.634657049493399e-05,4.0,34,47,81
+TRM,74,261995,always,10,0,26041,5000,26041,5000,True,1.0,0.005964071489870548,-0.001002343818981899,-0.003665185533463955,4.0,0.006641991582000628,1,-0.04398180544376373,-0.04911998054012656,0.0,1,-0.010886823758482933,-0.01345194864552468,0.0,1,-0.02097303234040737,-0.03530041268095374,0.0,1,-0.018981575965881348,-0.023109396686777472,0.0,1,0.0036300315987318754,-0.003191364705344313,1.0,1,0.03327842429280281,-0.026978990994393826,1.0,1,-0.018779447302222252,-0.026845575543120503,0.0,1,0.043681103736162186,-0.008504753466695547,1.0,1,-0.0215305108577013,-0.023885349743068218,0.0,1,0.005964071489870548,-0.001002343818981899,4.0,26,55,81
+TRM,75,52628,flaky_then_stable,8,3,52082,10000,130205,25000,True,1.0,0.0033935531973838806,-0.0022274545626714826,-0.004154581984039396,2.0,0.0038323880871757865,0,0.05828529968857765,0.0267481371993199,8.0,1,0.002474128734320402,-0.0058764161440194584,3.0,1,-0.0019789086654782295,-0.035164964501746,0.0,0,0.12442079186439514,0.05956225236877799,8.0,1,0.008559959009289742,-0.007983786810655147,1.0,1,0.00882664229720831,-0.027662867098115385,1.0,1,-0.018970487639307976,-0.028280447702854872,0.0,1,0.03521743789315224,-0.0045965141034685075,3.0,1,-6.650499562965706e-05,-0.021160148688977642,0.0,1,0.0033935531973838806,-0.0022274545626714826,2.0,26,55,81
+TRM,76,409934,always,10,0,26041,5000,26041,5000,True,1.0,0.0024283018428832293,-0.000539859874152171,-0.002021642227191478,3.0,0.004060789207869675,1,-0.05215219035744667,-0.05594609258696437,0.0,1,-0.010646836832165718,-0.014882612857036293,0.0,1,-0.03938687965273857,-0.04439387423917651,0.0,1,-0.010589546523988247,-0.022559958160854876,0.0,1,-0.0007987208664417267,-0.0035930855374317616,0.0,1,0.03749938681721687,-0.02618488820735365,1.0,1,-0.015292173251509666,-0.025817553978413343,0.0,1,0.06943108886480331,-0.005477039492689073,1.0,1,0.033239688724279404,-0.011771868506912142,2.0,1,0.0024283018428832293,-0.000539859874152171,3.0,25,56,81
+TRM,77,412654,stable_learned,7,1,104164,20000,104164,20000,True,1.0,0.010606574825942516,-0.0003874684916809201,-0.006048486859072,3.0,0.023211120394989848,0,0.03024710901081562,0.007864741986850277,6.0,0,0.05162932723760605,0.024871981237083673,8.0,0,0.1039038822054863,0.05210094293579459,8.0,1,0.01958700641989708,-0.004732330882688984,2.0,1,0.006645877379924059,-0.0004354742559371516,3.0,1,0.06791031360626221,0.002421018056338653,3.0,1,-0.025111133232712746,-0.031003492884337902,0.0,1,0.05167195945978165,-0.0034635716874618083,3.0,1,-0.009133298881351948,-0.01782769651617855,0.0,1,0.010606574825942516,-0.0003874684916809201,3.0,24,57,81
+TRM,78,167007,always,10,0,26041,5000,26041,5000,True,1.0,-0.0015290639130398631,-0.0038194733642740175,-0.0052103548077866435,0.0,0.0,1,-0.052255257964134216,-0.056594294495880604,0.0,1,-0.011631819419562817,-0.014712722972035408,0.0,1,-0.0023057644721120596,-0.03770173990051262,0.0,1,-0.021057726815342903,-0.02382798003964126,0.0,1,-0.005084685981273651,-0.008019518689252436,0.0,1,0.027809960767626762,-0.028944319812580943,1.0,1,0.00348165025934577,-0.022804971144068986,1.0,1,0.001987181371077895,-0.014358750981045887,1.0,1,0.02874426729977131,-0.015115703921765089,1.0,1,-0.0015290639130398631,-0.0038194733642740175,0.0,25,56,81
+TRM,79,287561,flaky_then_stable,5,3,52082,10000,234369,45000,True,1.0,0.009634876623749733,-0.003027466154890135,-0.0071838569128885865,2.0,0.012050009332597256,0,0.012366922572255135,-0.0006445043545681983,4.0,1,-0.004756314679980278,-0.01423240383155644,0.0,1,0.03763478621840477,0.013395285524893552,7.0,1,-0.01067011896520853,-0.01807817630469799,0.0,0,0.10091543197631836,0.049586425768211484,8.0,0,0.10859839618206024,0.05962962773628533,8.0,0,0.08538555353879929,0.03531484003178775,8.0,0,0.09702973067760468,0.04726866679266095,8.0,1,0.03606226295232773,-0.009526327819912694,2.0,1,0.009634876623749733,-0.003027466154890135,2.0,25,56,81
+TRM,80,385776,always,10,0,26041,5000,26041,5000,True,1.0,0.028444260358810425,0.001161380103440024,-0.003534068411681801,2.0,0.028719603433273733,1,-0.05337578058242798,-0.058294827584177256,0.0,1,-0.013523862697184086,-0.01613825059030205,0.0,1,0.00518436636775732,-0.03844994318205863,1.0,1,-0.010201114229857922,-0.018848999519832432,0.0,1,0.001542935031466186,-0.005584430837188847,1.0,1,0.01722271926701069,-0.0256979672703892,1.0,1,0.04459402337670326,-0.01434500835603103,1.0,1,0.038171473890542984,-0.006135702278697863,1.0,1,0.030329816043376923,-0.006407494292943738,2.0,1,0.028444260358810425,0.001161380103440024,2.0,26,55,81
+TRM,81,215458,always,10,0,26041,5000,26041,5000,True,1.0,0.002372502814978361,-0.00526801566593349,-0.007534230011515319,1.0,0.002372502814978361,1,-0.025279000401496887,-0.03647366678342223,0.0,1,-0.004259522072970867,-0.011297752964310348,0.0,1,0.0222418662160635,-0.007240057864692062,1.0,1,-0.017310572788119316,-0.02358324872329831,0.0,1,0.005968959536403418,-0.003887528189807199,2.0,1,0.06535451114177704,-0.00178351609793026,3.0,1,0.01518767699599266,-0.010507949482416734,2.0,1,0.023664690554142,-0.0008261524199042469,3.0,1,0.034913320094347,-2.894019416999072e-05,3.0,1,0.002372502814978361,-0.00526801566593349,1.0,25,56,81
+TRM,82,268094,always,10,0,26041,5000,26041,5000,True,1.0,0.00511393416672945,0.0004164860802120529,-0.0017049780872184783,3.0,0.010445175226777792,1,-0.05325597524642944,-0.05832808231934905,0.0,1,-0.008619495667517185,-0.013888030895031989,0.0,1,-0.014202508144080639,-0.02954689913894981,0.0,1,-0.012470385059714317,-0.02349852886982262,0.0,1,-0.0008659072918817401,-0.006573975886567496,0.0,1,0.030510054901242256,-0.029149430338293314,1.0,1,-0.0037225252017378807,-0.02610348432790488,0.0,1,0.03565947338938713,-0.008485330123221502,1.0,1,-0.008747555315494537,-0.022056124755181372,0.0,1,0.00511393416672945,0.0004164860802120529,3.0,27,54,81
+TRM,83,209914,always,10,0,26041,5000,26041,5000,True,1.0,0.007665717508643866,-0.0010879105248022825,-0.002822725975420326,1.0,0.007665717508643866,1,-0.03174733370542526,-0.04622159944847226,0.0,1,-0.005058246199041605,-0.012300950067583472,0.0,1,-0.0005178921273909509,-0.03504713233996881,0.0,1,-0.006534065585583448,-0.018010263156611472,0.0,1,0.006158912088721991,-0.003453106928645866,1.0,1,0.02732120268046856,-0.018704519316088408,1.0,1,-0.011422712355852127,-0.02698169299401343,0.0,1,0.017169443890452385,-0.011245612804486882,1.0,1,0.047795142978429794,-0.011929496540687978,1.0,1,0.007665717508643866,-0.0010879105248022825,1.0,24,57,81
+TRM,84,21050,always,10,0,26041,5000,26041,5000,True,1.0,0.010362285189330578,-0.0020997743704356253,-0.006002842797897756,2.0,0.0127359121106565,1,-0.034345440566539764,-0.04365929961204529,0.0,1,-0.004156942013651133,-0.008736210758797824,0.0,1,0.019298462197184563,-0.0055099017336033285,1.0,1,-0.01357590314000845,-0.020894357818178833,0.0,1,0.007576208095997572,-0.0048524083103984594,1.0,1,-0.004269885830581188,-0.026478867046535015,0.0,1,0.01100101787596941,-0.024030559579841793,1.0,1,0.028431685641407967,-0.0076577747240662575,1.0,1,-0.014408747665584087,-0.023766940808854997,0.0,1,0.010362285189330578,-0.0020997743704356253,2.0,28,53,81
+TRM,85,212675,always,10,0,26041,5000,26041,5000,True,1.0,0.010571295395493507,-0.00253619127033744,-0.00674821762368083,2.0,0.012102997512556612,1,0.031081149354577065,-0.005065517470939085,2.0,1,-0.012106829322874546,-0.015606272500008345,0.0,1,-0.0041049993596971035,-0.03441080002812669,0.0,1,-0.01941915601491928,-0.030758013017475605,0.0,1,0.009424833580851555,-0.00383557678287616,1.0,1,0.020418353378772736,-0.026320667238906026,1.0,1,0.012603471986949444,-0.02320451964624226,1.0,1,0.02682632766664028,-0.011249380273511633,1.0,1,0.01901652105152607,-0.013933721696957946,1.0,1,0.010571295395493507,-0.00253619127033744,2.0,26,55,81
+TRM,86,49529,always,10,0,26041,5000,26041,5000,True,1.0,-0.001224438427016139,-0.0033537381677888334,-0.004896429367363453,0.0,0.0,1,-0.03782275691628456,-0.04978909296914935,0.0,1,-0.005695614498108625,-0.011733196035493165,0.0,1,-0.0010303895687684417,-0.0398656959150685,0.0,1,-0.0005861861282028258,-0.018732975302555133,0.0,1,0.0015005654422566295,-0.007337664908845909,2.0,1,0.06311360746622086,-0.01451850711600855,2.0,1,-0.02016526274383068,-0.03025465039536357,0.0,1,0.03729911148548126,-0.010974822915159166,2.0,1,-0.010649671778082848,-0.022534444462507963,0.0,1,-0.001224438427016139,-0.0033537381677888334,0.0,24,57,81
+TRM,87,213046,transient_or_regressed,1,2,208328,40000,,,False,0.5555555820465088,0.11483027040958405,0.053384904051199555,0.03160887444391847,8.0,0.42707923240959644,0,0.00691604008898139,-0.004075211210874841,1.0,0,0.04929168522357941,0.03120036143809557,8.0,0,0.09001829475164413,0.05197333171963692,8.0,0,0.08896192908287048,0.06485689664259553,8.0,0,0.10087814927101135,0.07301743142306805,8.0,0,0.12517812848091125,0.06886069197207689,8.0,0,0.11256831139326096,0.05842132307589054,8.0,1,0.06365761905908585,0.03426911274436861,8.0,0,0.1302051991224289,0.06262073968537152,8.0,0,0.11483027040958405,0.053384904051199555,8.0,22,59,81
+TRM,88,43199,never,0,0,,,,,False,0.6419752836227417,0.09541399031877518,0.05205491208471358,0.032757742796093225,8.0,0.4164392966777086,0,0.06923618912696838,0.030687891878187656,8.0,0,0.07459069043397903,0.0436143244151026,8.0,0,0.08997716754674911,0.05246129259467125,8.0,0,0.0985296443104744,0.05989782139658928,8.0,0,0.09583991020917892,0.06064692372456193,8.0,0,0.09300033748149872,0.05564435082487762,8.0,0,0.10331911593675613,0.05362485535442829,8.0,0,0.09499314427375793,0.04548232874367386,8.0,0,0.09717201441526413,0.04645823081955314,8.0,0,0.09541399031877518,0.05205491208471358,8.0,26,55,81
+TRM,89,33953,flaky_then_stable,7,3,52082,10000,130205,25000,True,1.0,0.03550432249903679,0.005029272841056809,-0.003172945638652891,3.0,0.05386637942865491,0,0.04628125950694084,0.013494249869836494,5.0,1,-0.0031952736899256706,-0.012344360467977822,0.0,0,0.0752018466591835,0.04740504268556833,8.0,0,0.13039202988147736,0.05654251924715936,8.0,1,0.00848484132438898,-0.002493282168870792,2.0,1,0.10644136369228363,0.04701561667025089,8.0,1,-0.006390073802322149,-0.023080890241544694,0.0,1,0.04128987342119217,0.0011462335824035108,3.0,1,0.029196109622716904,-0.004113979239264154,2.0,1,0.03550432249903679,0.005029272841056809,3.0,23,58,81
+TRM,90,375972,flaky_then_stable,5,3,78123,15000,182287,35000,True,1.0,-0.0011487934971228242,-0.005411855658167042,-0.007047865190543234,0.0,0.0,0,0.003461414948105812,-0.012533387722214684,1.0,0,0.046402785927057266,0.02319617453031242,8.0,1,0.019491970539093018,-0.013229857024271041,1.0,0,0.08384817838668823,0.056558152195066214,8.0,0,0.09595293551683426,0.05962554784491658,8.0,0,0.11980169266462326,0.0684811556711793,8.0,1,0.12834374606609344,0.008456458104774356,3.0,1,0.10930854827165604,0.039482378226239234,8.0,1,0.015953240916132927,-0.0003568160464055836,4.0,1,-0.0011487934971228242,-0.005411855658167042,0.0,23,58,81
+TRM,91,401258,always,10,0,26041,5000,26041,5000,True,1.0,0.0020586869213730097,0.0002081355960399378,-0.0007885884479037486,4.0,0.004819438559934497,1,-0.044508032500743866,-0.04842871753498912,0.0,1,-0.013848435133695602,-0.017183688702061772,0.0,1,-0.0034879695158451796,-0.0280986471043434,0.0,1,-0.01424423884600401,-0.022334134788252413,0.0,1,0.0003331048064865172,-0.006029092648532242,1.0,1,0.05149884149432182,-0.027042152360081673,1.0,1,-0.009319962002336979,-0.025661705643869936,0.0,1,-0.0044862860813736916,-0.016551656764931977,0.0,1,0.02941271848976612,-0.014698885890538804,1.0,1,0.0020586869213730097,0.0002081355960399378,4.0,25,56,81
+TRM,92,393345,flaky_then_stable,7,5,52082,10000,234369,45000,True,1.0,0.0535949245095253,0.009045039314514725,-0.007221762301924173,4.0,0.10124736372381449,0,0.05586465820670128,0.016821019060444087,6.0,1,-0.008617248386144638,-0.015455899061635137,0.0,1,-0.010447564534842968,-0.04241391678806394,0.0,1,-0.003318113274872303,-0.010922046581981704,0.0,0,0.09952236711978912,0.05829300684854388,8.0,1,0.08033081144094467,0.03289566107559949,8.0,1,0.09680960327386856,0.025329324475023896,5.0,0,0.12450432032346725,0.06511721760034561,8.0,1,-0.004404083825647831,-0.017333351424895227,0.0,1,0.0535949245095253,0.009045039314514725,4.0,24,57,81
+TRM,93,234599,flaky_then_stable,4,3,78123,15000,208328,40000,True,1.0,0.046294257044792175,0.01016221109603066,-0.004110559239052236,4.0,0.09773992572445422,0,0.021968595683574677,0.010782427954836749,8.0,0,0.04393097385764122,0.02117866836488247,8.0,1,0.003508448600769043,-0.027100769337266684,1.0,0,0.07952556759119034,0.04464322188869119,8.0,0,0.10191342979669571,0.06268782960250974,8.0,0,0.12181214243173599,0.06736720353364944,8.0,0,0.09637119621038437,0.04555576224811375,8.0,1,0.1090032085776329,0.03443504637107253,8.0,1,0.034727249294519424,-0.008553936990210786,2.0,1,0.046294257044792175,0.01016221109603066,4.0,24,57,81
+TRM,94,303593,stable_learned,8,1,78123,15000,78123,15000,True,1.0,0.01345769688487053,-0.0013053304137429222,-0.006644105655141175,2.0,0.019592590164393187,0,0.020223520696163177,0.004517835801379988,5.0,0,0.05398011952638626,0.03688373533077538,8.0,1,0.010603707283735275,-0.01306620742980158,1.0,1,0.07475355267524719,0.03404032252728939,8.0,1,0.023886723443865776,0.002944533458503429,4.0,1,0.06785248965024948,0.011892989452462643,4.0,1,0.003383297473192215,-0.0192379557993263,1.0,1,0.025035077705979347,-0.009470743534620851,1.0,1,0.05911574140191078,0.009892681264318526,4.0,1,0.01345769688487053,-0.0013053304137429222,2.0,26,55,81
+TRM,95,215065,always,10,0,26041,5000,26041,5000,True,1.0,0.03121633268892765,0.0025140829966403544,-0.0029273018590174615,3.0,0.03253256011521444,1,-0.034355152398347855,-0.044279730413109064,0.0,1,0.0011395683977752924,-0.010544958029640839,1.0,1,-0.0014171780785545707,-0.040445637350785546,0.0,1,-0.018546244129538536,-0.023375394521281123,0.0,1,0.0023944182321429253,-0.0060215455159777775,1.0,1,0.040232859551906586,-0.016900649206945673,2.0,1,0.019362537190318108,-0.02103907463606447,1.0,1,0.005838131066411734,-0.012118416285375133,2.0,1,0.021726982668042183,-0.014776724856346846,1.0,1,0.03121633268892765,0.0025140829966403544,3.0,24,57,81
+TRM,96,24738,flaky_then_stable,5,5,78123,15000,208328,40000,True,1.0,0.010975110344588757,-0.00018635359447216615,-0.00387580442475155,3.0,0.01439893408678472,0,0.06482422351837158,0.01977668042673031,6.0,0,0.06716082990169525,0.030219342035707086,8.0,1,0.0032344472128897905,-0.027951757510891184,1.0,0,0.08573616296052933,0.05287062516435981,8.0,0,0.09290646016597748,0.0517635743599385,8.0,1,0.06589390337467194,0.013712230091186939,4.0,0,0.06579168140888214,0.03381257178261876,8.0,1,0.06715645641088486,0.02467366289056372,7.0,1,0.06507674604654312,0.018863482859160285,7.0,1,0.010975110344588757,-0.00018635359447216615,3.0,24,57,81
+TRM,97,328928,transient_or_regressed,4,4,52082,10000,,,False,0.6419752836227417,0.07814294099807739,0.04238794220145792,0.02611261955462396,8.0,0.33910353761166334,0,-0.013930894434452057,-0.01903627859428525,0.0,1,-0.008799215778708458,-0.012983069871552289,0.0,0,0.09155578166246414,0.047670017927885056,8.0,0,0.0981263592839241,0.04912946722470224,8.0,1,0.011565839871764183,-0.0033122524473583326,2.0,1,0.040538374334573746,-0.010081689921207726,2.0,1,0.042481549084186554,-0.0013598116638604552,3.0,0,0.10112985223531723,0.055683094542473555,8.0,0,0.09228113293647766,0.04396537446882576,8.0,0,0.07814294099807739,0.04238794220145792,8.0,25,56,81
+TRM,98,241509,never,0,0,,,,,False,0.5555555820465088,0.11017504334449768,0.05196797242388129,0.0332398135215044,8.0,0.41574377939105034,0,0.03025590069591999,0.004195595334749669,4.0,0,0.054637063294649124,0.031314114574342966,8.0,0,0.08073589950799942,0.04873109050095081,8.0,0,0.08027437329292297,0.053447028156369925,8.0,0,0.10879397392272949,0.05320803611539304,8.0,0,0.09728161990642548,0.05426347511820495,8.0,0,0.11486048251390457,0.05530085088685155,8.0,0,0.09052739292383194,0.052561775548383594,8.0,0,0.10448186099529266,0.0437243435299024,8.0,0,0.11017504334449768,0.05196797242388129,8.0,24,57,81
+TRM,99,186024,always,10,0,26041,5000,26041,5000,True,1.0,0.0007154803024604917,-0.002203070422183373,-0.0035806693485938013,2.0,0.0008774942689342424,1,-0.035642109811306,-0.04575248574838042,0.0,1,0.019917616620659828,0.0024543474355596118,5.0,1,-0.017714275047183037,-0.03588660876266658,0.0,1,0.0383574441075325,0.006086920462621492,5.0,1,0.0009329774184152484,-0.008076597718172707,1.0,1,0.0030809545423835516,-0.02970883113448508,1.0,1,-0.027372445911169052,-0.029555077897384763,0.0,1,0.04343428462743759,0.004418305215949658,5.0,1,0.01310175284743309,-0.01036793494131416,2.0,1,0.0007154803024604917,-0.002203070422183373,2.0,25,56,81
+TRM,100,323763,transient_or_regressed,1,2,104164,20000,,,False,0.5679012537002563,0.10995465517044067,0.055122181540355086,0.03352289414033294,8.0,0.4409774523228407,0,0.00838541891425848,0.0005190630326978862,4.0,0,0.03595571592450142,0.022693505277857184,8.0,0,0.06670815497636795,0.03646385087631643,8.0,1,0.08450296521186829,0.04837752878665924,8.0,0,0.10906567424535751,0.06391844991594553,8.0,0,0.0929703637957573,0.05519978818483651,8.0,0,0.09668456017971039,0.05005559301935136,8.0,0,0.1004776656627655,0.05973865790292621,8.0,0,0.11633478105068207,0.06658112583681941,8.0,0,0.10995465517044067,0.055122181540355086,8.0,24,57,81
+TRM,101,299776,always,10,0,26041,5000,26041,5000,True,1.0,0.0011548744514584541,-0.002070046517474111,-0.0036808084114454687,1.0,0.0011548744514584541,1,-0.03445933014154434,-0.043074491899460554,0.0,1,-0.007707083597779274,-0.014822232536971569,0.0,1,-0.029622256755828857,-0.03816012432798743,0.0,1,0.018937787041068077,-0.007169778575189412,2.0,1,0.010188073851168156,-0.002859201429600944,2.0,1,0.019038300961256027,-0.022611742606386542,1.0,1,-0.012821630574762821,-0.025653436430729926,0.0,1,0.01629071682691574,-0.011744278483092785,2.0,1,-0.003153432859107852,-0.016014042863389477,0.0,1,0.0011548744514584541,-0.002070046517474111,1.0,24,57,81
+TRM,102,353011,always,10,0,26041,5000,26041,5000,True,1.0,0.004511181730777025,0.0012137218045609188,-0.0007225393983389949,7.0,0.014154344251437578,1,-0.047680292278528214,-0.053155028726905584,0.0,1,-0.013456059619784355,-0.01768126874230802,0.0,1,-0.012777195312082767,-0.040298264124430716,0.0,1,-0.020274119451642036,-0.023471417371183634,0.0,1,-0.0024193916469812393,-0.00661028700415045,0.0,1,0.029676657170057297,-0.030129902763292193,1.0,1,-0.010456304997205734,-0.02393845224287361,0.0,1,0.04328817501664162,-0.002873995341360569,2.0,1,0.02313035912811756,-0.016922286478802562,1.0,1,0.004511181730777025,0.0012137218045609188,7.0,26,55,81
+TRM,103,274707,always,10,0,26041,5000,26041,5000,True,1.0,0.0017688822699710727,-0.005201783802476712,-0.007357456721365452,1.0,0.0017688822699710727,1,-0.037799328565597534,-0.04643213748931885,0.0,1,0.005172539968043566,-0.005404394454672001,1.0,1,0.0062016649171710014,-0.03777059691492468,1.0,1,0.02487921714782715,-0.0053088194981683046,1.0,1,0.019649887457489967,0.0024154254024324473,4.0,1,0.05798782780766487,-0.023054081248119473,1.0,1,-0.010600205510854721,-0.026532325893640518,0.0,1,0.01306421123445034,-0.010040452922112308,1.0,1,0.022125817835330963,-0.012010523307253607,1.0,1,0.0017688822699710727,-0.005201783802476712,1.0,23,58,81
+TRM,104,63096,always,10,0,26041,5000,26041,5000,True,1.0,0.047143708914518356,0.011839100941870129,-0.002550351266108919,5.0,0.11132513312622905,1,-0.03583928197622299,-0.0449415878392756,0.0,1,-0.00601142318919301,-0.0114822878385894,0.0,1,-0.005176282022148371,-0.02988340990850702,0.0,1,0.008333636447787285,-0.008852095634210855,2.0,1,0.029752720147371292,0.008012212172616273,5.0,1,0.038047656416893005,-0.016331805294612423,2.0,1,0.01447590533643961,-0.013636992254760116,1.0,1,0.08109357953071594,-0.00011551406350918114,2.0,1,0.0031794998794794083,-0.016217203519772738,1.0,1,0.047143708914518356,0.011839100941870129,5.0,24,57,81
+TRM,105,276964,always,10,0,26041,5000,26041,5000,True,1.0,0.000641145627014339,-0.003955048869102029,-0.006149348919279873,1.0,0.000641145627014339,1,-0.03016994521021843,-0.04231826774775982,0.0,1,-0.015770457684993744,-0.018080739304423332,0.0,1,0.003996677231043577,-0.030163761985022575,1.0,1,-0.005744604393839836,-0.020025878679007292,0.0,1,-0.0011709177633747458,-0.004408989814692177,0.0,1,0.03705144673585892,-0.01305777148809284,2.0,1,0.014026423916220665,-0.014554430788848549,2.0,1,0.024344731122255325,-0.005065014353021979,2.0,1,0.01731182262301445,-0.014642856200225651,1.0,1,0.000641145627014339,-0.003955048869102029,1.0,24,57,81
+TRM,106,406565,always,10,0,26041,5000,26041,5000,True,1.0,0.006229056511074305,-0.002378382025199244,-0.00465475203236565,2.0,0.006361189734889194,1,-0.02802051603794098,-0.03992278594523668,0.0,1,-0.0016373511170968413,-0.006944745939108543,0.0,1,-0.0019318348495289683,-0.03691629225795623,0.0,1,0.0014243486803025007,-0.01588472371804528,1.0,1,0.020358962938189507,0.00042444575956324115,3.0,1,0.07927165180444717,0.02282572354306467,7.0,1,0.049647413194179535,0.0037382791051641107,3.0,1,0.04887136071920395,-0.007693622435908765,1.0,1,-0.011467913165688515,-0.02127712732180953,0.0,1,0.006229056511074305,-0.002378382025199244,2.0,26,55,81
+TRM,107,249322,transient_or_regressed,1,2,234369,45000,,,False,0.6172839403152466,0.11127977073192596,0.06426302436739206,0.0445083063095808,8.0,0.5141041949391365,0,0.01569853350520134,0.006395930584403686,8.0,0,0.052504777908325195,0.035889548715204,8.0,0,0.07405299693346024,0.04908741358667612,8.0,0,0.09065327048301697,0.06397410714998841,8.0,0,0.11670511215925217,0.07158201467245817,8.0,0,0.1150636225938797,0.07238512672483921,8.0,0,0.10061057657003403,0.06067189504392445,8.0,0,0.10373147577047348,0.06655138824135065,8.0,1,0.09674607962369919,0.041585710481740534,8.0,0,0.11127977073192596,0.06426302436739206,8.0,23,58,81
+TRM,108,207819,always,10,0,26041,5000,26041,5000,True,1.0,0.00891510583460331,0.002329914685105905,-0.0016468690591864288,5.0,0.0259580931160599,1,-0.0443265326321125,-0.051201362162828445,0.0,1,-0.014883240684866905,-0.016813227208331227,0.0,1,0.008762815035879612,-0.038517106673680246,1.0,1,-0.016872279345989227,-0.02158390125259757,0.0,1,0.005973885767161846,-0.004246671363944188,2.0,1,0.03928181901574135,-0.02796502597630024,1.0,1,-0.004544244147837162,-0.027656884747557342,0.0,1,0.05731779709458351,-0.0007378947921097279,2.0,1,0.016431430354714394,-0.015921838697977364,1.0,1,0.00891510583460331,0.002329914685105905,5.0,26,55,81
+TRM,109,199646,always,10,0,26041,5000,26041,5000,True,1.0,0.014861353673040867,-0.0027563707408262417,-0.009109755046665668,2.0,0.016430412768386304,1,-0.04378592222929001,-0.05049554072320461,0.0,1,0.011728092096745968,-0.007662745570996776,1.0,1,-0.001716748927719891,-0.03518461216299329,0.0,1,0.0129326693713665,-0.014003966003656387,1.0,1,0.006766879465430975,-0.004309619565901812,1.0,1,0.04730205237865448,-0.016045034280978143,1.0,1,-0.023031074553728104,-0.03065384435467422,0.0,1,0.019119733944535255,-0.009800323867239058,2.0,1,-0.0004233418148942292,-0.019739641320484225,0.0,1,0.014861353673040867,-0.0027563707408262417,2.0,24,57,81
+TRM,110,177526,stable_learned,9,1,52082,10000,52082,10000,True,1.0,0.0023162439465522766,-0.0002770476639852859,-0.0016013339045457542,4.0,0.0041889543063007295,0,0.053996723145246506,0.02039948251331225,7.0,1,0.0031830084044486284,-0.006773816130589694,1.0,1,-0.001965567236766219,-0.028538725309772417,0.0,1,-0.012094399891793728,-0.018150458461605012,0.0,1,-0.0009584911167621613,-0.008840363647323102,0.0,1,0.04511352628469467,-0.031466983957216144,1.0,1,0.01090196892619133,-0.018599756294861436,1.0,1,0.018817711621522903,-0.011974691180512309,1.0,1,0.03526598960161209,-0.012605476018507034,1.0,1,0.0023162439465522766,-0.0002770476639852859,4.0,26,55,81
+TRM,111,178681,always,10,0,26041,5000,26041,5000,True,1.0,0.0055397613905370235,0.003067539104449679,0.0014702217922604177,8.0,0.024540312835597433,1,-0.0575530044734478,-0.06197805143892765,0.0,1,-0.01071644015610218,-0.01484444341622293,0.0,1,-0.04478118196129799,-0.04588480945676565,0.0,1,-0.018994005396962166,-0.030245779315009713,0.0,1,-0.0004992526373825967,-0.004514037129411008,0.0,1,-0.013667033053934574,-0.0336401843233034,0.0,1,-0.023862838745117188,-0.028592007234692574,0.0,1,0.031544581055641174,-0.009940149553585798,2.0,1,-0.02564154379069805,-0.0307974168099463,0.0,1,0.0055397613905370235,0.003067539104449679,8.0,32,49,81
+TRM,112,381040,always,10,0,26041,5000,26041,5000,True,1.0,0.0010340166045352817,-0.000839800844914862,-0.001680900895735249,2.0,0.001117256048019044,1,-0.04509539529681206,-0.05229116231203079,0.0,1,-0.011222109198570251,-0.016916609136387706,0.0,1,-0.02638068050146103,-0.04335541417822242,0.0,1,-0.0115848733112216,-0.020265308558009565,0.0,1,0.0013510260032489896,-0.0035187568137189373,1.0,1,-0.005584193393588066,-0.03491743584163487,0.0,1,0.08097154647111893,-0.009864870342426002,1.0,1,0.03140146657824516,-0.010028996912296861,1.0,1,0.0055804201401770115,-0.016847948951181024,1.0,1,0.0010340166045352817,-0.000839800844914862,2.0,25,56,81
+TRM,113,392992,always,10,0,26041,5000,26041,5000,True,1.0,0.003779800608754158,0.0003550605833879672,-0.0017801490175770596,5.0,0.01052046794211492,1,-0.03460134193301201,-0.04582205880433321,0.0,1,0.0033745139371603727,-0.007730852084932849,1.0,1,0.0027189827524125576,-0.0384060496580787,1.0,1,-0.0015562891494482756,-0.012459028861485422,0.0,1,-0.0014245883794501424,-0.007191999917267822,0.0,1,0.015228685922920704,-0.03249407990369946,1.0,1,-0.01121597085148096,-0.026203349814750254,0.0,1,0.07040110975503922,0.002654558455105871,2.0,1,-0.002916423138231039,-0.022407400130759925,0.0,1,0.003779800608754158,0.0003550605833879672,5.0,26,55,81
+TRM,114,347650,always,10,0,26041,5000,26041,5000,True,1.0,0.00739780068397522,-0.0010483801452210173,-0.003137201420031488,1.0,0.00739780068397522,1,-0.02924751490354538,-0.041998423635959625,0.0,1,0.02151607908308506,0.004893105343398929,4.0,1,-0.013696635141968727,-0.02850093669258058,0.0,1,5.3825169743504375e-05,-0.011445494795225386,1.0,1,0.0001154093406512402,-0.008725326643798326,1.0,1,0.02289671078324318,-0.019061246581259184,1.0,1,-0.01346130482852459,-0.029499384108930826,0.0,1,0.05270252004265785,-0.006291121244430542,2.0,1,-0.02047848328948021,-0.025386034976691008,0.0,1,0.00739780068397522,-0.0010483801452210173,1.0,28,53,81
+TRM,115,349536,transient_or_regressed,2,4,52082,10000,,,False,0.6172839403152466,0.10717465728521347,0.05562473274767399,0.035518843680620193,8.0,0.4449978619813919,0,0.008791611529886723,-0.0043781003332696855,3.0,1,0.02677231840789318,0.0022695258376188576,3.0,0,0.06911492347717285,0.04471539007499814,8.0,0,0.10628199577331543,0.05798842990770936,8.0,0,0.10438463091850281,0.05839716596528888,8.0,0,0.10072064399719238,0.06138862855732441,8.0,0,0.09009309113025665,0.0462415327783674,8.0,0,0.08948639035224915,0.053532255813479424,8.0,1,0.07908443361520767,0.021257305445033126,7.0,0,0.10717465728521347,0.05562473274767399,8.0,23,58,81
+TRM,116,331963,never,0,0,,,,,False,0.654321014881134,0.10483328998088837,0.04626844637095928,0.026936128735542297,8.0,0.37014757096767426,0,0.03829101100564003,0.019014588673599064,8.0,0,0.08293429017066956,0.04944717604666948,8.0,0,0.12021632492542267,0.06741831358522177,8.0,0,0.10096177458763123,0.060204660054296255,8.0,0,0.11052413284778595,0.06311263609677553,8.0,0,0.12637759745121002,0.0674008415080607,8.0,0,0.09386485069990158,0.04583482525777072,8.0,0,0.10092567652463913,0.054991395911201835,8.0,0,0.09301888197660446,0.0484865284524858,8.0,0,0.10483328998088837,0.04626844637095928,8.0,25,56,81
+TRM,117,92373,always,10,0,26041,5000,26041,5000,True,1.0,-0.0029779942706227303,-0.005353371845558286,-0.006382608436979353,0.0,0.0,1,-0.04858062416315079,-0.05524981580674648,0.0,1,-0.015022527426481247,-0.01697918609715998,0.0,1,-9.044197213370353e-05,-0.03873434468732739,0.0,1,-0.016662321984767914,-0.022211082745343447,0.0,1,-0.003454037709161639,-0.006415939657017589,0.0,1,0.031621940433979034,-0.03045082651078701,1.0,1,-0.013093739748001099,-0.027084378991276026,0.0,1,0.0376988910138607,-0.0033630874822847545,2.0,1,0.04156379774212837,-0.013404785422608256,1.0,1,-0.0029779942706227303,-0.005353371845558286,0.0,25,56,81
+TRM,118,284134,stable_learned,9,1,52082,10000,52082,10000,True,1.0,-0.0007171890465542674,-0.004252016413374804,-0.00608674599789083,0.0,0.0,0,0.03508434072136879,0.013367188308620825,7.0,1,-0.006965799257159233,-0.010622243338730186,0.0,1,-0.005469281692057848,-0.028122416289988905,0.0,1,-0.01064715813845396,-0.021282224799506366,0.0,1,-0.005638458300381899,-0.010607470292598009,0.0,1,0.04873413220047951,-0.018856662907637656,1.0,1,0.00927831046283245,-0.02391524752601981,1.0,1,0.02521566115319729,-0.004851030244026333,2.0,1,0.005963676143437624,-0.013965315476525575,1.0,1,-0.0007171890465542674,-0.004252016413374804,0.0,25,56,81
+TRM,119,133524,always,10,0,26041,5000,26041,5000,True,1.0,0.0034045400097966194,0.0016916705535550136,0.000504234958498273,7.0,0.01378939495771192,1,-0.046199869364500046,-0.0514186080545187,0.0,1,-0.0015660899225622416,-0.008956197561929002,0.0,1,-0.0038592529017478228,-0.033316526940325275,0.0,1,-0.0182478204369545,-0.021308822091668844,0.0,1,0.0012727626599371433,-0.0031725657026981935,2.0,1,0.01470460370182991,-0.02515879413112998,1.0,1,0.01288430206477642,-0.020393294747918844,1.0,1,0.05426916852593422,-0.0073568532534409314,1.0,1,0.010072380304336548,-0.019253497128374875,1.0,1,0.0034045400097966194,0.0016916705535550136,7.0,25,56,81
+TRM,120,15615,transient_or_regressed,2,2,208328,40000,,,False,0.6790123581886292,0.1260794848203659,0.06242147507146001,0.03870482835918665,8.0,0.49937180057168007,0,0.01864974945783615,0.003311580279842019,4.0,0,0.06427783519029617,0.03209485195111483,8.0,0,0.08810622245073318,0.04730585543438792,8.0,0,0.10231257975101471,0.05785682424902916,8.0,0,0.14603078365325928,0.06989096524193883,8.0,0,0.12512123584747314,0.06699558533728123,8.0,0,0.10529552400112152,0.05066573491785675,8.0,1,0.03374642878770828,-0.004981143632903695,3.0,1,0.035456832498311996,0.0027613988640950993,3.0,0,0.1260794848203659,0.06242147507146001,8.0,25,56,81
+TRM,121,417974,always,10,0,26041,5000,26041,5000,True,1.0,-0.004765220917761326,-0.006154264905489981,-0.007153489044867456,0.0,0.0,1,-0.044493481516838074,-0.049195755738765,0.0,1,-0.012928890995681286,-0.015607148292474449,0.0,1,-0.005194664932787418,-0.03567401983309537,0.0,1,-0.01895022578537464,-0.02263000002130866,0.0,1,0.01584254391491413,-0.0018187631212640554,2.0,1,0.03175123408436775,-0.015600684564560652,1.0,1,-0.013257814571261406,-0.024939398979768157,0.0,1,-0.010975000448524952,-0.01757417607586831,0.0,1,-0.012042822316288948,-0.019633069052360952,0.0,1,-0.004765220917761326,-0.006154264905489981,0.0,24,57,81
+TRM,122,88569,always,10,0,26041,5000,26041,5000,True,1.0,0.02351696416735649,0.0041891446453519166,-0.004994090413674712,4.0,0.05348951881751418,1,-0.039917316287755966,-0.048301405273377895,0.0,1,0.0004120785742998123,-0.008337334787938744,1.0,1,-0.004571534693241119,-0.034946118015795946,0.0,1,-0.005832991562783718,-0.017142875585705042,0.0,1,0.0038436264730989933,-0.006050197465810925,1.0,1,0.047228556126356125,-0.02018758631311357,1.0,1,0.004699704237282276,-0.019266299321316183,1.0,1,0.052614688873291016,-0.005370732280425727,1.0,1,-0.016831103712320328,-0.0245725701097399,0.0,1,0.02351696416735649,0.0041891446453519166,4.0,26,55,81
+TRM,123,2265,never,0,0,,,,,False,0.604938268661499,0.09891806542873383,0.04485063604079187,0.022703229915350676,8.0,0.35880508832633495,0,0.0225073229521513,0.001205308988573961,4.0,0,0.05820346623659134,0.030097349546849728,8.0,0,0.08323821425437927,0.051301995757967234,8.0,0,0.08193131536245346,0.04666431457735598,8.0,0,0.10615812242031097,0.05696936114691198,8.0,0,0.0919622853398323,0.04836882487870753,8.0,0,0.07583648711442947,0.0400160028366372,8.0,0,0.09144528210163116,0.03800164902349934,8.0,0,0.110883928835392,0.05562350363470614,8.0,0,0.09891806542873383,0.04485063604079187,8.0,24,57,81
+TRM,124,27594,flaky_then_stable,9,2,26041,5000,78123,15000,True,1.0,-0.0013420305913314223,-0.0027142133039887995,-0.0035416167811490595,0.0,0.0,1,-0.04039712995290756,-0.047695322427898645,0.0,0,0.07349681854248047,0.039432625751942396,8.0,1,0.03783904016017914,-0.018517078598961234,1.0,1,0.006972400471568108,-0.008384578482946381,2.0,1,0.022870870307087898,0.0071092160360421985,6.0,1,0.04610325023531914,-0.01594014116562903,1.0,1,-0.010982696898281574,-0.027294192812405527,0.0,1,0.026384389027953148,-0.0011330550187267363,3.0,1,-0.0032920062076300383,-0.02005034693866037,0.0,1,-0.0013420305913314223,-0.0027142133039887995,0.0,24,57,81
+TRM,125,409917,always,10,0,26041,5000,26041,5000,True,1.0,-0.0009271354647353292,-0.002790541766444221,-0.004174403904471546,0.0,0.0,1,-0.028080062940716743,-0.034674711525440216,0.0,1,0.01860179752111435,0.0017133814180851914,3.0,1,-0.0015680174110457301,-0.03359153562632855,0.0,1,-0.013585279695689678,-0.02517773292493075,0.0,1,0.006029936019331217,-0.0014474983254331164,3.0,1,0.049227986484766006,-0.02642527688294649,1.0,1,0.011014693416655064,-0.019669751753099263,1.0,1,0.016954468563199043,-0.0074198918882757425,2.0,1,0.0020954038482159376,-0.017633819516049698,1.0,1,-0.0009271354647353292,-0.002790541766444221,0.0,26,55,81
+TRM,126,345695,always,10,0,26041,5000,26041,5000,True,1.0,0.006028168369084597,-0.002451572028803639,-0.005425852024927735,2.0,0.006621453387197107,1,-0.04973936080932617,-0.05595262302085757,0.0,1,-0.005087235011160374,-0.010494908667169511,0.0,1,-0.024752771481871605,-0.035370505414903164,0.0,1,-0.008347369730472565,-0.017163546639494598,0.0,1,-0.0013253299985080957,-0.0080840798036661,0.0,1,0.0397258959710598,-0.023317159386351705,1.0,1,-0.013806473463773727,-0.025645112502388656,0.0,1,0.061243075877428055,-0.0046855880646035075,1.0,1,0.027949418872594833,-0.019577391678467393,1.0,1,0.006028168369084597,-0.002451572028803639,2.0,26,55,81
+TRM,127,59696,flaky_then_stable,6,5,78123,15000,260410,50000,True,1.0,0.07158119976520538,0.03510003548581153,0.013479169690981507,8.0,0.28080028388649225,0,0.006280785426497459,-0.004173689696472138,3.0,0,0.06994735449552536,0.034450552659109235,8.0,1,0.08337762951850891,0.035784360487014055,8.0,1,0.09211387485265732,0.05774590466171503,8.0,0,0.1047968789935112,0.06032880721613765,8.0,1,0.0692853331565857,0.030983917240519077,8.0,1,0.09017740190029144,-0.0013138714712113142,2.0,1,0.10823613405227661,0.040027830022154376,8.0,0,0.12413245439529419,0.054349818266928196,8.0,1,0.07158119976520538,0.03510003548581153,8.0,26,55,81
+TRM,128,290755,flaky_then_stable,6,3,78123,15000,182287,35000,True,1.0,0.0009090612293221056,-0.0058548544548102655,-0.008955468074418604,1.0,0.0009090612293221056,0,-0.0016840628813952208,-0.011777641164371744,0.0,0,0.03201410546898842,0.01671546249417588,8.0,1,0.0020551797933876514,-0.03278136200970039,1.0,1,-0.013577600941061974,-0.02067844499833882,0.0,0,0.09396573901176453,0.044857297325506806,8.0,0,0.12046613544225693,0.06073514395393431,8.0,1,-0.0007922134827822447,-0.026620096381520852,0.0,1,0.0028984390664845705,-0.015006224828539416,1.0,1,0.000690947228576988,-0.01850687794649275,1.0,1,0.0009090612293221056,-0.0058548544548102655,1.0,24,57,81
+TRM,129,313419,always,10,0,26041,5000,26041,5000,True,1.0,0.010253047570586205,-0.0018178217142121866,-0.006510662846267223,3.0,0.014504220918752253,1,-0.014988544397056103,-0.028164590592496097,0.0,1,0.0013434106949716806,-0.007174994338129181,2.0,1,0.00012108320515835658,-0.032522103271730884,1.0,1,0.00013956108887214214,-0.01069786988045962,1.0,1,0.01685062237083912,-0.001077600783901289,3.0,1,0.051432058215141296,-0.010773342568427324,2.0,1,-0.01940140500664711,-0.029239075491204858,0.0,1,0.04033096879720688,0.0009781799744814634,4.0,1,0.02729007601737976,-0.015236180508509278,1.0,1,0.010253047570586205,-0.0018178217142121866,3.0,23,58,81
+TRM,130,253578,always,10,0,26041,5000,26041,5000,True,1.0,-0.001380143454298377,-0.002926078886957839,-0.003811477159615606,0.0,0.0,1,-0.02351001463830471,-0.04079999611712992,0.0,1,-0.012748122215270996,-0.014952840749174356,0.0,1,0.007183244451880455,-0.029987530782818794,1.0,1,-0.01856335625052452,-0.022460670676082373,0.0,1,-0.0018999463645741343,-0.006715808049193583,0.0,1,0.050710223615169525,-0.026390023063868284,1.0,1,0.04324904456734657,-0.013132356136338785,1.0,1,0.006321001797914505,-0.01678367331624031,1.0,1,0.006979995872825384,-0.01386795862345025,1.0,1,-0.001380143454298377,-0.002926078886957839,0.0,22,59,81
+TRM,131,336359,transient_or_regressed,1,2,156246,30000,,,False,0.7160493731498718,0.10150620341300964,0.045253923744894564,0.022746229777112603,8.0,0.3620313899591565,0,0.020142581313848495,0.002624376727908384,4.0,0,0.06356685608625412,0.03191658086143434,8.0,0,0.07711233198642731,0.05304299807175994,8.0,0,0.09917744249105453,0.05802335310727358,8.0,0,0.10298487544059753,0.0579685855191201,8.0,1,0.04739919677376747,-0.01016623160103336,2.0,0,0.11273778229951859,0.05122074787504971,8.0,0,0.1250048279762268,0.0623356937430799,8.0,0,0.11308664828538895,0.060451415134593844,8.0,0,0.10150620341300964,0.045253923744894564,8.0,24,57,81
+TRM,132,373221,always,10,0,26041,5000,26041,5000,True,1.0,0.04265332967042923,0.004810472517419839,-0.0023492701293434948,3.0,0.04834554181434214,1,-0.05012763291597366,-0.052738013211637735,0.0,1,-0.01431208848953247,-0.01691013970412314,0.0,1,-0.008660034276545048,-0.03932257660198957,0.0,1,-0.0016842859331518412,-0.017589572380529717,0.0,1,0.0034054582938551903,-0.0023851459336583503,2.0,1,0.03255864232778549,-0.024492297554388642,1.0,1,-0.018960319459438324,-0.028335053706541657,0.0,1,0.040048230439424515,-0.006959038379136473,2.0,1,-0.015848610550165176,-0.018730845768004656,0.0,1,0.04265332967042923,0.004810472517419839,3.0,23,58,81
+TRM,133,371950,flaky_then_stable,3,3,182287,35000,260410,50000,True,1.0,0.03230714425444603,0.0018168632523156703,-0.00775500014424324,3.0,0.05003966297954321,0,0.014352088794112206,-0.0006115697760833427,4.0,0,0.06679246574640274,0.03059394215233624,8.0,0,0.10127194970846176,0.05214410042390227,8.0,0,0.09394752979278564,0.05347426934167743,8.0,0,0.1088203489780426,0.05068833683617413,8.0,0,0.12352492660284042,0.05971326865255833,8.0,1,0.04451809823513031,0.006786396857933141,5.0,1,0.04534900560975075,0.00728501015692018,4.0,0,0.0975838303565979,0.04598438669927418,8.0,1,0.03230714425444603,0.0018168632523156703,3.0,26,55,81
+TRM,134,265574,always,10,0,26041,5000,26041,5000,True,1.0,0.0024508212227374315,0.0009872053633444011,2.0529143512248993e-05,6.0,0.010512512177228928,1,-0.03776078671216965,-0.0467130308970809,0.0,1,-0.013853808864951134,-0.016053273575380445,0.0,1,-0.018324827775359154,-0.04379966645501554,0.0,1,-0.016830716282129288,-0.022192419739440084,0.0,1,0.0014390635769814253,-0.004569280121359043,1.0,1,0.041592441499233246,-0.030813195277005434,1.0,1,-0.019068248569965363,-0.026011874666437507,0.0,1,-0.004349350463598967,-0.017053980787750334,0.0,1,0.047144815325737,-0.012194466078653932,1.0,1,0.0024508212227374315,0.0009872053633444011,6.0,26,55,81
+TRM,135,327810,transient_or_regressed,1,2,52082,10000,,,False,0.6419752836227417,0.09120148420333862,0.0360234675463289,0.010942790191620588,8.0,0.2881877403706312,0,0.03938184306025505,0.023483132128603756,8.0,1,0.008572632446885109,-0.003748222294234438,2.0,0,0.09395420551300049,0.054012746550142765,8.0,0,0.10044316202402115,0.05276850564405322,8.0,0,0.11748627573251724,0.05897788261063397,8.0,0,0.12173524498939514,0.06232033553533256,8.0,0,0.08981219679117203,0.04481499292887747,8.0,0,0.10936617106199265,0.055053543066605926,8.0,0,0.1277179718017578,0.06230647652409971,8.0,0,0.09120148420333862,0.0360234675463289,8.0,26,55,81
+TRM,136,26585,flaky_then_stable,9,2,26041,5000,78123,15000,True,1.0,0.024321196600794792,0.004197899688733742,-0.0035480846418067813,4.0,0.04777553607709706,1,-0.008959232829511166,-0.023856074665673077,0.0,0,0.047108206897974014,0.032410164596512914,8.0,1,0.032004646956920624,-0.0035152108466718346,2.0,1,-0.00691380025818944,-0.01686177431838587,0.0,1,0.07018066942691803,0.03170508914627135,8.0,1,0.08095109462738037,0.022427670075558126,7.0,1,-0.007384622003883123,-0.026527028472628444,0.0,1,0.037369489669799805,0.01098218432161957,4.0,1,0.035045526921749115,-0.0066787012765416875,1.0,1,0.024321196600794792,0.004197899688733742,4.0,25,56,81
+TRM,137,113924,flaky_then_stable,8,2,26041,5000,234369,45000,True,1.0,0.007588387466967106,-0.0006803460273658857,-0.005628618411719799,3.0,0.018364455550909042,1,-0.029536252841353416,-0.0404441321734339,0.0,1,0.00366386491805315,-0.007091570754710119,1.0,1,0.026518868282437325,-0.0013033829691266874,3.0,1,-0.005103199742734432,-0.01751491951290518,0.0,1,-9.572781891620252e-06,-0.007047633925253649,0.0,1,0.039019808173179626,-0.03174499049782753,1.0,0,0.09800052642822266,0.05091494764201343,8.0,0,0.11965543776750565,0.04895240836776793,8.0,1,0.012557112611830235,-0.01026529454975389,2.0,1,0.007588387466967106,-0.0006803460273658857,3.0,24,57,81
+TRM,138,300328,always,10,0,26041,5000,26041,5000,True,1.0,0.0011586735490709543,-0.0012097018952772487,-0.002775997738353908,3.0,0.0017962973797693849,1,-0.050854407250881195,-0.05638012150302529,0.0,1,-0.010449630208313465,-0.012284915312193334,0.0,1,-0.027096113190054893,-0.038789901649579406,0.0,1,-0.02934160828590393,-0.03305968386121094,0.0,1,-0.005270808935165405,-0.006160803604871035,0.0,1,-0.006308914627879858,-0.03342331520980224,0.0,1,-0.018739240244030952,-0.026172577869147062,0.0,1,0.04805169999599457,-0.004630462935892865,1.0,1,-0.014167330227792263,-0.026990589569322765,0.0,1,0.0011586735490709543,-0.0012097018952772487,3.0,34,47,81
+TRM,139,237277,always,10,0,26041,5000,26041,5000,True,1.0,0.006157425232231617,-0.0014995168821769767,-0.004219184222165495,3.0,0.0076647401438094676,1,-0.052259452641010284,-0.05571508128196001,0.0,1,-0.01302847545593977,-0.01593287638388574,0.0,1,0.007848042994737625,-0.03673897450789809,1.0,1,-0.004619264509528875,-0.020008951367344707,0.0,1,0.0023948827292770147,-0.005128229910042137,1.0,1,0.05698747560381889,-0.028133750893175602,1.0,1,-0.017807690426707268,-0.029019219567999244,0.0,1,0.0501125268638134,-0.001897021778859198,2.0,1,-0.0025604446418583393,-0.02245107147609815,0.0,1,0.006157425232231617,-0.0014995168821769767,3.0,25,56,81
+TRM,140,14183,flaky_then_stable,5,5,52082,10000,234369,45000,True,1.0,0.003145458409562707,0.0017926785803865641,0.0010272158542647958,8.0,0.014341428643092513,0,0.031147550791502,0.003971430873207282,3.0,1,0.06512722373008728,0.021253066312056035,8.0,0,0.10903379321098328,0.04901291779242456,8.0,0,0.11449190974235535,0.04518651496618986,8.0,0,0.10992329567670822,0.05258541158400476,8.0,1,0.037634141743183136,-0.012844150303862989,1.0,1,0.028737960383296013,0.0009885968884191243,3.0,0,0.08930324763059616,0.04823684808798134,8.0,1,0.00044879212509840727,-0.01825628431106452,1.0,1,0.003145458409562707,0.0017926785803865641,8.0,26,55,81
+TRM,141,10350,stable_learned,9,1,52082,10000,52082,10000,True,1.0,0.001495033735409379,-0.0031586887344019488,-0.005014935624785721,1.0,0.001495033735409379,0,0.01986159197986126,0.012861135648563504,8.0,1,-0.006320128217339516,-0.01245653914520517,0.0,1,0.00897250697016716,-0.02271628496237099,1.0,1,-0.004150744061917067,-0.01754053490003571,0.0,1,0.01306296419352293,0.0006299357992247678,3.0,1,0.04610796272754669,-0.013688720995560288,2.0,1,-0.00485547911375761,-0.021851356141269207,0.0,1,0.032712172716856,-0.01017066795611754,2.0,1,-0.007871701382100582,-0.02015673858113587,0.0,1,0.001495033735409379,-0.0031586887344019488,1.0,24,57,81
+TRM,142,121829,flaky_then_stable,4,3,130205,25000,260410,50000,True,1.0,0.01805562898516655,0.00203438731841743,-0.005610085732769221,5.0,0.04012863663956523,0,0.057758912444114685,0.026803406479302794,8.0,0,0.09721634536981583,0.04904248262755573,8.0,0,0.07657671719789505,0.03842324763536453,8.0,0,0.09741627424955368,0.0539051101077348,8.0,1,0.0326831191778183,0.009533481410471722,6.0,1,0.043303217738866806,0.00020649912767112255,4.0,1,0.03382158651947975,-0.0011490960023365915,2.0,0,0.09952086955308914,0.04438569210469723,8.0,0,0.09427396208047867,0.0461229991633445,8.0,1,0.01805562898516655,0.00203438731841743,5.0,25,56,81
+TRM,143,147685,stable_learned,9,1,52082,10000,52082,10000,True,1.0,0.003260717960074544,0.0005995071587676648,-0.0009769353418960236,6.0,0.009914353810017928,0,0.0329718217253685,0.009563513478497043,5.0,1,0.0004726012994069606,-0.005384552157011058,2.0,1,0.003763933666050434,-0.028084538993425667,1.0,1,-0.010833720676600933,-0.019424444646574557,0.0,1,0.0028878534212708473,-0.005353127387934364,1.0,1,0.0462980754673481,-0.025513279251754284,1.0,1,0.004251402337104082,-0.019266497634816915,1.0,1,0.0466068834066391,-0.0029348528041737154,2.0,1,0.03890671208500862,-0.015498494962230325,1.0,1,0.003260717960074544,0.0005995071587676648,6.0,25,56,81
+TRM,144,114035,transient_or_regressed,2,2,104164,20000,,,False,0.6172839403152466,0.09379343688488007,0.05149454087950289,0.032124274875968695,8.0,0.41195632703602314,0,0.04915237054228783,0.02067575987894088,8.0,0,0.06856178492307663,0.03700320748612285,8.0,0,0.09207069128751755,0.058415199629962444,8.0,1,0.1062312051653862,0.04433150589466095,8.0,1,0.0854194313287735,0.04620236228220165,8.0,0,0.11295933276414871,0.05944214342162013,8.0,0,0.07867436110973358,0.046219728887081146,8.0,0,0.10135598480701447,0.05347092519514263,8.0,0,0.10154716670513153,0.057941512670367956,8.0,0,0.09379343688488007,0.05149454087950289,8.0,25,56,81
+TRM,145,129570,transient_or_regressed,3,6,78123,15000,,,False,0.6419752836227417,0.09700321406126022,0.046446121064946055,0.025223843287676573,8.0,0.37156896851956844,0,0.007132421247661114,-0.007215100002213148,2.0,0,0.04832817241549492,0.028453520499169827,8.0,1,-0.007063543424010277,-0.023223714902997017,0.0,0,0.10403997451066971,0.06600657384842634,8.0,1,0.14093145728111267,0.0597374071367085,8.0,0,0.10633522272109985,0.06321576610207558,8.0,0,0.10469797998666763,0.052351633086800575,8.0,0,0.13520373404026031,0.07076595723628998,8.0,1,0.051072247326374054,0.0020157121180091053,3.0,0,0.09700321406126022,0.046446121064946055,8.0,26,55,81
+TRM,146,82468,flaky_then_stable,8,3,52082,10000,234369,45000,True,1.0,0.010210240259766579,0.00038419158227043226,-0.0030214724829420447,4.0,0.015159422589931637,0,0.06026391685009003,0.021191320469370112,7.0,1,0.010708744637668133,-0.0055436124675907195,2.0,1,-0.00624329736456275,-0.03772643505362794,0.0,1,0.002492798026651144,-0.016435553843621165,1.0,1,0.005429049488157034,-0.003017008406459354,2.0,1,-0.0008620662265457213,-0.02580282343114959,0.0,1,0.09232759475708008,9.619281627237797e-05,3.0,0,0.08609595149755478,0.04719993332400918,8.0,1,0.011475227773189545,-0.006395454271114431,1.0,1,0.010210240259766579,0.00038419158227043226,4.0,17,64,81
+TRM,147,343314,flaky_then_stable,6,5,52082,10000,260410,50000,True,1.0,0.043253690004348755,0.01471539496560581,0.004990445973817259,8.0,0.11772315972484648,0,0.01780637539923191,-0.0012399120605550706,3.0,1,-0.0011858961079269648,-0.007692631916143,0.0,0,0.08016564697027206,0.052682746201753616,8.0,0,0.08256639540195465,0.050273383036255836,8.0,1,0.012233898974955082,0.0005496345329447649,4.0,1,0.02185097709298134,-0.022491051466204226,1.0,1,0.08263935148715973,0.006639030645601451,4.0,1,0.04814326763153076,-0.006934174918569624,2.0,0,0.10532068461179733,0.05327557818964124,8.0,1,0.043253690004348755,0.01471539496560581,8.0,26,55,81
+TRM,148,2260,transient_or_regressed,5,4,78123,15000,,,False,0.6666666865348816,0.0933169573545456,0.051847660914063454,0.033355772495269775,8.0,0.41478128731250763,0,0.03326151892542839,0.0071674573118798435,5.0,0,0.04353611171245575,0.019533834885805845,8.0,1,0.009586822241544724,-0.02898183697834611,1.0,0,0.09122628718614578,0.05745167192071676,8.0,1,0.020643014460802078,0.002142540748536703,4.0,1,0.05554977059364319,0.005529207119252533,4.0,1,0.06759298592805862,0.014637251537351403,5.0,1,0.048681918531656265,-0.008535492263035849,1.0,0,0.09751349687576294,0.04885423113591969,8.0,0,0.0933169573545456,0.051847660914063454,8.0,23,58,81
+TRM,149,255045,stable_learned,1,1,260410,50000,260410,50000,True,1.0,-0.0026498991064727306,-0.004344224144006148,-0.005394530948251486,0.0,0.0,0,0.009562646970152855,-0.003125969145912677,3.0,0,0.07731955498456955,0.04311706218868494,8.0,0,0.061982832849025726,0.04141822946257889,8.0,0,0.09469109028577805,0.05768749164417386,8.0,0,0.11401217430830002,0.0628412957303226,8.0,0,0.11437048763036728,0.05472955130971968,8.0,0,0.09289000928401947,0.047132206382229924,8.0,0,0.10786610841751099,0.05826830258592963,8.0,0,0.10119173675775528,0.05554999178275466,8.0,1,-0.0026498991064727306,-0.004344224144006148,0.0,24,57,81
+TRM,150,303870,never,0,0,,,,,False,0.6419752836227417,0.08871836215257645,0.04719284793827683,0.028218155959621072,8.0,0.3775427835062146,0,0.05172356590628624,0.020517035241937265,8.0,0,0.08641204237937927,0.05493770190514624,8.0,0,0.0955955907702446,0.05408199620433152,8.0,0,0.11379185318946838,0.0582313290797174,8.0,0,0.09209369868040085,0.059215429704636335,8.0,0,0.10255929827690125,0.05541014042682946,8.0,0,0.08987502753734589,0.03967930795624852,8.0,0,0.07988147437572479,0.04925217083655298,8.0,0,0.08834253996610641,0.04235593369230628,8.0,0,0.08871836215257645,0.04719284793827683,8.0,26,55,81
+TRM,151,321274,always,10,0,26041,5000,26041,5000,True,1.0,0.0017479206435382366,0.0005169820724404417,-0.0003508463123580441,6.0,0.00645442382665351,1,-0.048775333911180496,-0.05432925885543227,0.0,1,-0.013390407897531986,-0.017759965732693672,0.0,1,-0.044681891798973083,-0.04713428718969226,0.0,1,-0.016651572659611702,-0.022567998617887497,0.0,1,0.0013558523496612906,-0.005073871070635505,1.0,1,0.037671707570552826,-0.03688129736110568,1.0,1,-0.010345226153731346,-0.027200921205803752,0.0,1,0.052184972912073135,-0.008439023746177554,1.0,1,0.03136782348155975,-0.014084178423217963,2.0,1,0.0017479206435382366,0.0005169820724404417,6.0,25,56,81
+TRM,152,74913,flaky_then_stable,5,5,52082,10000,260410,50000,True,1.0,0.0109106982126832,0.0011804260548160528,-0.0014712935080751777,3.0,0.015372633817605674,0,0.03561357036232948,0.007236075500259176,4.0,1,-0.00042880335240624845,-0.006950309394596843,0.0,1,0.012605071067810059,-0.028744940413162112,1.0,1,0.041454967111349106,0.006919504434335977,5.0,0,0.10266181826591492,0.057930491864681244,8.0,0,0.10785800218582153,0.060213361866772175,8.0,1,0.0012122815242037177,-0.021955348624032922,1.0,0,0.08986766636371613,0.05688530905172229,8.0,0,0.08254539221525192,0.05009292042814195,8.0,1,0.0109106982126832,0.0011804260548160528,3.0,26,55,81
+TRM,153,53855,always,10,0,26041,5000,26041,5000,True,1.0,0.00041987912845797837,-0.002200586135586491,-0.003556750772986561,1.0,0.00041987912845797837,1,-0.0497492216527462,-0.05675992835313082,0.0,1,-0.012479206547141075,-0.014312254963442683,0.0,1,-0.038456398993730545,-0.04274928383529186,0.0,1,-0.015914492309093475,-0.026920261792838573,0.0,1,-0.004737157840281725,-0.007488883915357292,0.0,1,-0.013556750491261482,-0.0373910260386765,0.0,1,-0.023844875395298004,-0.028695423156023026,0.0,1,0.03913140296936035,-0.006698863988276571,1.0,1,-0.01595248468220234,-0.026907497085630894,0.0,1,0.00041987912845797837,-0.002200586135586491,1.0,34,47,81
+TRM,154,6756,transient_or_regressed,1,2,208328,40000,,,False,0.6666666865348816,0.10378434509038925,0.05570123600773513,0.03557519009336829,8.0,0.44560988806188107,0,0.02053832821547985,0.007805553010257427,6.0,0,0.07206875830888748,0.040948218666017056,8.0,0,0.08003036677837372,0.0493567893281579,8.0,0,0.08917731791734695,0.05776424193754792,8.0,0,0.10827376693487167,0.06270974269136786,8.0,0,0.10894276201725006,0.06113066431134939,8.0,0,0.0964801236987114,0.04789370275102556,8.0,1,0.03382635861635208,-0.004741608892800286,1.0,0,0.09516756236553192,0.050473149167373776,8.0,0,0.10378434509038925,0.05570123600773513,8.0,22,59,81
+TRM,155,367285,always,10,0,26041,5000,26041,5000,True,1.0,-0.0006533596315421164,-0.002381926831731107,-0.00332428261754103,0.0,0.0,1,-0.03773682937026024,-0.048384987749159336,0.0,1,-0.01571313664317131,-0.017955121351405978,0.0,1,-0.03951624408364296,-0.046849307138472795,0.0,1,-0.01626165769994259,-0.023701810277998447,0.0,1,-0.0007739851134829223,-0.007032642744889017,0.0,1,0.0469181202352047,-0.02606475492939353,1.0,1,-0.012686311267316341,-0.023582296329550445,0.0,1,0.047444723546504974,-0.008673497650306672,2.0,1,-0.0196650642901659,-0.02722291275858879,0.0,1,-0.0006533596315421164,-0.002381926831731107,0.0,24,57,81
+TRM,156,214527,transient_or_regressed,2,2,182287,35000,,,False,0.6172839403152466,0.10920289903879166,0.0572044316213578,0.03782093105837703,8.0,0.4576354529708624,0,0.013717166148126125,-0.004700989607954398,2.0,0,0.05696810036897659,0.024257226614281535,8.0,0,0.09722456336021423,0.05302232364192605,8.0,0,0.10009665787220001,0.06689672591164708,8.0,0,0.11068575084209442,0.053256837418302894,8.0,0,0.10509198158979416,0.05992945609614253,8.0,1,0.1059495285153389,0.06035960512235761,8.0,1,0.12362007051706314,0.05761873256415129,8.0,0,0.10081784427165985,0.05507189850322902,8.0,0,0.10920289903879166,0.0572044316213578,8.0,22,59,81
+TRM,157,338862,flaky_then_stable,8,4,26041,5000,182287,35000,True,1.0,0.010142686776816845,-0.0017086821317207068,-0.007553735165856779,3.0,0.01845142198726535,1,-0.03684594854712486,-0.04372202046215534,0.0,1,-0.009347072802484035,-0.014877772773616016,0.0,0,0.09134018421173096,0.05335148796439171,8.0,1,0.01575123704969883,-0.007136359286960214,1.0,1,0.0034269276075065136,-0.003996070721768774,1.0,0,0.09499137848615646,0.051123612094670534,8.0,1,0.0005241686012595892,-0.018530507833929732,1.0,1,0.05973884090781212,0.021769834216684103,8.0,1,0.06739770621061325,0.012697498459601775,4.0,1,0.010142686776816845,-0.0017086821317207068,3.0,23,58,81
+TRM,158,34686,always,10,0,26041,5000,26041,5000,True,1.0,0.027979198843240738,-0.0011994995657005347,-0.007019485114142299,1.0,0.027979198843240738,1,-0.03217178210616112,-0.040034178644418716,0.0,1,-0.003572870744392276,-0.013313608797034249,0.0,1,0.006107395049184561,-0.03324962343322113,1.0,1,-0.0069928765296936035,-0.018890750128775835,0.0,1,-0.0032108472660183907,-0.006997807475272566,0.0,1,0.05535033345222473,-0.017408681451343,1.0,1,0.003669851226732135,-0.022370592138031498,1.0,1,0.0562455989420414,-0.009116209403146058,1.0,1,-0.004530292470008135,-0.019820485322270542,0.0,1,0.027979198843240738,-0.0011994995657005347,1.0,24,57,81
+TRM,159,206660,flaky_then_stable,5,5,104164,20000,260410,50000,True,1.0,0.029931506142020226,0.0023316281876759604,-0.008001284819329157,4.0,0.05065816477872431,0,0.040491607040166855,0.019805334333796054,8.0,0,0.032489463686943054,0.016367093659937382,8.0,0,0.08353576064109802,0.050014231353998184,8.0,1,0.003175109624862671,-0.01128891357802786,1.0,1,0.02361350692808628,0.004369678674265742,5.0,1,0.04307270050048828,-0.02271667937748134,1.0,0,0.0953749269247055,0.03849510781583376,8.0,1,0.08900703489780426,0.0235816985823476,6.0,0,0.12152651697397232,0.05361765599809587,8.0,1,0.029931506142020226,0.0023316281876759604,4.0,26,55,81
+TRM,160,115204,always,10,0,26041,5000,26041,5000,True,1.0,0.01341544184833765,0.0003846014187729452,-0.003121848392765969,2.0,0.016311039216816425,1,-0.041645172983407974,-0.05026312544941902,0.0,1,-0.004889785777777433,-0.013274482393171638,0.0,1,-0.004746885038912296,-0.03828397544566542,0.0,1,-0.004974651150405407,-0.0157418082235381,0.0,1,0.0012111979303881526,-0.007361207346548326,1.0,1,0.05871466174721718,-0.013786170893581584,2.0,1,-0.009596970863640308,-0.023109666421078146,0.0,1,0.016983510926365852,-0.008944613160565495,2.0,1,0.00930027011781931,-0.015658802818506956,1.0,1,0.01341544184833765,0.0003846014187729452,2.0,25,56,81
+TRM,161,160284,always,10,0,26041,5000,26041,5000,True,1.0,-0.0007756477571092546,-0.002078086879919283,-0.003168834693497047,0.0,0.0,1,-0.0515998937189579,-0.0550419557839632,0.0,1,-0.011824311688542366,-0.01529054471757263,0.0,1,-0.008768425323069096,-0.04178303212393075,0.0,1,-0.015979669988155365,-0.023046123795211315,0.0,1,0.007696255575865507,-0.003798550278588664,2.0,1,0.03592464700341225,-0.028727865777909756,1.0,1,-0.023948542773723602,-0.0288851170334965,0.0,1,0.06963405758142471,-0.002659374848008156,2.0,1,-0.01466398872435093,-0.024630818516016006,0.0,1,-0.0007756477571092546,-0.002078086879919283,0.0,26,55,81
+TRM,162,384719,transient_or_regressed,2,4,104164,20000,,,False,0.5432098507881165,0.09436492621898651,0.04738153680227697,0.026006778236478567,8.0,0.37905229441821575,0,0.0505271777510643,0.025546011282131076,8.0,0,0.05838657543063164,0.03577312035486102,8.0,0,0.08320755511522293,0.05579865165054798,8.0,1,0.05483144521713257,0.011092701723100618,6.0,0,0.09414124488830566,0.052209494169801474,8.0,0,0.1282433271408081,0.07546595623716712,8.0,0,0.10213038325309753,0.04492124787066132,8.0,1,0.09413447976112366,0.04013349045999348,8.0,0,0.09090166538953781,0.04439944657497108,8.0,0,0.09436492621898651,0.04738153680227697,8.0,24,57,81
+TRM,163,323015,flaky_then_stable,8,3,52082,10000,104164,20000,True,1.0,0.00638391962274909,0.0016831545799504966,-0.0016783110331743956,4.0,0.020178480772301555,0,0.045475028455257416,0.018484500193153508,8.0,1,-0.010794701986014843,-0.014549829415045679,0.0,0,0.09013015776872635,0.0516565868165344,8.0,1,-0.002654590643942356,-0.014541521319188178,0.0,1,0.0005742327193729579,-0.008615253471361939,1.0,1,0.0507395900785923,-0.020043761702254415,1.0,1,-0.022637248039245605,-0.030207955511286855,0.0,1,0.015748774632811546,-0.012120076571591198,1.0,1,-0.01054334081709385,-0.021480827359482646,0.0,1,0.00638391962274909,0.0016831545799504966,4.0,25,56,81
+TRM,164,101104,always,10,0,26041,5000,26041,5000,True,1.0,0.008217668160796165,-0.0007785826892359182,-0.0027384976274333894,1.0,0.008217668160796165,1,-0.0400465726852417,-0.04553118301555514,0.0,1,-0.009149063378572464,-0.014319067122414708,0.0,1,-0.009153502061963081,-0.041445157723501325,0.0,1,-0.013459938578307629,-0.020143097382970154,0.0,1,0.00020199790014885366,-0.005840473681018921,1.0,1,0.04874948784708977,-0.014862106181681156,1.0,1,-0.020676614716649055,-0.02934607956558466,0.0,1,0.02317902259528637,-0.008982142546301475,1.0,1,-0.010127297602593899,-0.0229284391971305,0.0,1,0.008217668160796165,-0.0007785826892359182,1.0,24,57,81
+TRM,165,391795,always,10,0,26041,5000,26041,5000,True,1.0,0.016174400225281715,0.0015113445188035257,-0.0017989735933952034,3.0,0.019619247410446405,1,-0.043216463178396225,-0.05301598971709609,0.0,1,-0.009718624874949455,-0.014237681636586785,0.0,1,-0.004742998629808426,-0.0395757625810802,0.0,1,0.005964064970612526,-0.01196102419635281,1.0,1,0.007256380282342434,-0.006122914521256462,2.0,1,0.026717601343989372,-0.027692258823662996,1.0,1,0.08740942925214767,0.007283192171598785,3.0,1,0.021214153617620468,-0.006548867095261812,2.0,1,0.04938620701432228,-0.00729349534958601,1.0,1,0.016174400225281715,0.0015113445188035257,3.0,27,54,81
+TRM,166,236449,stable_learned,9,1,52082,10000,52082,10000,True,1.0,0.009021843783557415,-0.00032872857991605997,-0.0036695069284178317,2.0,0.015695280861109495,0,0.05336541309952736,0.013655864902830217,7.0,1,0.013863432221114635,-0.005223578817094676,1.0,1,-0.0015567326918244362,-0.028294362244196236,0.0,1,0.007075524888932705,-0.014103064662776887,1.0,1,-0.00249579269438982,-0.0068280814157333225,0.0,1,0.03902513161301613,-0.02771090087480843,1.0,1,-0.005293115042150021,-0.024011555244214833,0.0,1,0.03391437605023384,-0.010168640990741551,2.0,1,-0.007294910494238138,-0.022139969456475228,0.0,1,0.009021843783557415,-0.00032872857991605997,2.0,28,53,81
+TRM,167,55915,always,10,0,26041,5000,26041,5000,True,1.0,0.009299405850470066,-0.0001693986206632303,-0.003295799484476447,3.0,0.011828900314867496,1,-0.045266591012477875,-0.049056860618293285,0.0,1,-0.011080899275839329,-0.016112821409478784,0.0,1,-0.017892975360155106,-0.039997189771384,0.0,1,-0.001495680189691484,-0.02117481552704703,0.0,1,0.006472165696322918,-0.0005643206131935585,3.0,1,0.04151998087763786,-0.02882742159999907,1.0,1,-0.02619054540991783,-0.028785642702132463,0.0,1,0.03591744974255562,-0.008812856744043529,2.0,1,0.024454478174448013,-0.01855295989662409,1.0,1,0.009299405850470066,-0.0001693986206632303,3.0,25,56,81
+TRM,168,207723,never,0,0,,,,,False,0.5308641791343689,0.1045580580830574,0.05468553421087563,0.03355410834774375,8.0,0.43748427368700504,0,0.05919178947806358,0.0347729055210948,8.0,0,0.08775575459003448,0.05343754403293133,8.0,0,0.0712289810180664,0.045723630813881755,8.0,0,0.09135426580905914,0.055246198549866676,8.0,0,0.09759406745433807,0.06144919293001294,8.0,0,0.11607223749160767,0.06364039168693125,8.0,0,0.1268039345741272,0.06508523691445589,8.0,0,0.10619588196277618,0.05155473935883492,8.0,0,0.12574481964111328,0.05940475361421704,8.0,0,0.1045580580830574,0.05468553421087563,8.0,23,58,81
+TRM,169,138085,always,10,0,26041,5000,26041,5000,True,1.0,0.0010966300033032894,-0.002500920119928196,-0.004012154357042164,1.0,0.0010966300033032894,1,-0.015039286576211452,-0.029316369560547173,0.0,1,-0.0038089731242507696,-0.009524808003334329,0.0,1,0.012668300420045853,-0.03315747482702136,1.0,1,-0.0025353855453431606,-0.022105402254965156,0.0,1,0.004750798922032118,-0.002167467459003092,2.0,1,0.056267473846673965,-0.005522297549759969,2.0,1,-0.013855833560228348,-0.0215741015272215,0.0,1,0.05614590644836426,-0.0021850094926776364,3.0,1,0.019914668053388596,-0.010439659177791327,1.0,1,0.0010966300033032894,-0.002500920119928196,1.0,26,55,81
+TRM,170,264730,stable_learned,9,1,52082,10000,52082,10000,True,1.0,0.002407622989267111,0.00040593649828224443,-0.0009634060406824574,5.0,0.007202363311080262,0,0.0534244105219841,0.02066507702693343,7.0,1,-0.003945430275052786,-0.010199775104410946,0.0,1,-0.008069107308983803,-0.032963264966383576,0.0,1,0.004077228717505932,-0.013490883451595437,2.0,1,0.015350067988038063,0.0031069756660144776,5.0,1,0.019547423347830772,-0.032631245674565434,1.0,1,0.00019424395577516407,-0.019166476109603536,1.0,1,0.025981886312365532,-0.00924236691207625,1.0,1,0.039831217378377914,-0.011644621408777311,1.0,1,0.002407622989267111,0.00040593649828224443,5.0,27,54,81
+TRM,171,421139,flaky_then_stable,6,4,26041,5000,208328,40000,True,1.0,0.05601981654763222,0.016598077650996856,-0.00019964799867011607,6.0,0.13992935232818127,1,-0.01630616933107376,-0.03473426471464336,0.0,0,0.06179842725396156,0.024249633657746017,8.0,0,0.08627616614103317,0.04806558438576758,8.0,0,0.07480110973119736,0.04148511029779911,8.0,1,0.04476796090602875,0.015379650831164327,7.0,1,0.041581302881240845,-0.01046750895329751,1.0,0,0.08794771134853363,0.03730706451460719,8.0,1,0.0686555728316307,0.014280073210102273,5.0,1,0.04490303620696068,0.008801316434983164,5.0,1,0.05601981654763222,0.016598077650996856,6.0,26,55,81
+TRM,172,111474,always,10,0,26041,5000,26041,5000,True,1.0,0.0012683782260864973,-0.003936366963898763,-0.005440412554889917,1.0,0.0012683782260864973,1,-0.0551949068903923,-0.059081848710775375,0.0,1,-0.012981601059436798,-0.016731072799302638,0.0,1,0.011238766834139824,-0.034308518981561065,1.0,1,-0.020164869725704193,-0.024366687517613173,0.0,1,0.0010970082366839051,-0.007345444042584859,1.0,1,0.02681410126388073,-0.029178300639614463,1.0,1,-0.01439402811229229,-0.02844000910408795,0.0,1,0.03442676365375519,-0.012917537991597783,2.0,1,-0.004293455742299557,-0.022519579739309847,0.0,1,0.0012683782260864973,-0.003936366963898763,1.0,26,55,81
+TRM,173,11984,always,10,0,26041,5000,26041,5000,True,1.0,0.004469249397516251,-0.0007145419622247573,-0.002257772197481245,2.0,0.004923605272779241,1,-0.024092640727758408,-0.04046785319224,0.0,1,0.012895307503640652,0.0021940814185654745,5.0,1,-0.0022942519281059504,-0.03920549133908935,0.0,1,-0.018079673871397972,-0.023324450012296438,0.0,1,0.017596079036593437,2.5941970307030715e-05,3.0,1,0.008951753377914429,-0.019678250420838594,1.0,1,-0.027031728997826576,-0.029527838109061122,0.0,1,0.071267269551754,-0.0062837582372594625,1.0,1,0.010646095499396324,-0.01669027260504663,1.0,1,0.004469249397516251,-0.0007145419622247573,2.0,26,55,81
+TRM,174,332451,stable_learned,8,1,78123,15000,78123,15000,True,1.0,0.011483287438750267,0.0016993614444800187,-0.0008484232894261368,5.0,0.017255822283914313,0,0.05319972336292267,0.023002278234343976,8.0,0,0.10390260070562363,0.05863192304968834,8.0,1,-0.004108858294785023,-0.035295534995384514,0.0,1,-0.0003022486052941531,-0.014274016833951464,0.0,1,-6.456863775383681e-05,-0.0060513453263411066,0.0,1,0.061732158064842224,-0.011518248182255775,1.0,1,-0.011257041245698929,-0.028452386613935232,0.0,1,0.08011972904205322,0.0010117030760738999,2.0,1,0.01745487004518509,-0.010722891369368881,1.0,1,0.011483287438750267,0.0016993614444800187,5.0,24,57,81
+TRM,175,126571,transient_or_regressed,7,3,26041,5000,,,False,0.6790123581886292,0.09073368459939957,0.0467852724250406,0.029968665447086096,8.0,0.3742821794003248,1,0.021863460540771484,0.0023859051943873055,3.0,0,0.06545688956975937,0.03629665612243116,8.0,0,0.07270628958940506,0.05411424534395337,8.0,1,0.033186063170433044,0.00966912264630082,6.0,1,0.09010711312294006,0.040539860958233476,8.0,1,0.048830125480890274,-0.0056342257594224066,2.0,1,0.015382860787212849,-0.01721870934125036,1.0,1,0.07132358103990555,0.027782201679656282,8.0,1,0.03416503965854645,-0.005509780836291611,2.0,0,0.09073368459939957,0.0467852724250406,8.0,24,57,81
+TRM,176,10862,flaky_then_stable,6,7,52082,10000,260410,50000,True,1.0,0.03079306334257126,0.005971417325781658,-0.002605058718472719,5.0,0.05942137539386749,0,0.02560344897210598,0.00594067363999784,4.0,1,0.0368255078792572,0.007950006147439126,3.0,1,0.005555071402341127,-0.029726361331995577,1.0,0,0.10926733911037445,0.06930772867053747,8.0,1,0.11218170076608658,0.06704024039208889,8.0,1,0.057120297104120255,0.0052511056419461966,3.0,0,0.10436815768480301,0.055661550955846906,8.0,1,0.05655515938997269,-0.0009573786373948678,2.0,0,0.10387486964464188,0.045597712276503444,8.0,1,0.03079306334257126,0.005971417325781658,5.0,24,57,81
+TRM,177,364527,always,10,0,26041,5000,26041,5000,True,1.0,0.03188053146004677,0.007842166101909243,-0.0014379880449268967,5.0,0.07026877289172262,1,-0.04316011071205139,-0.05026930198073387,0.0,1,0.005245307460427284,-0.0017928408924490213,3.0,1,-0.024684997275471687,-0.036464127944782376,0.0,1,-0.005762641783803701,-0.015345483145210892,0.0,1,0.004357549361884594,-0.0028443818373489194,2.0,1,0.020782848820090294,-0.02150514372624457,1.0,1,-0.02062203921377659,-0.02855832618661225,0.0,1,0.05228665471076965,-0.004128766886424273,2.0,1,0.03537370264530182,-0.00769569506883272,1.0,1,0.03188053146004677,0.007842166101909243,5.0,26,55,81
+TRM,178,44562,always,10,0,26041,5000,26041,5000,True,1.0,0.005267338361591101,-0.0008371855219593272,-0.0030159142916090786,2.0,0.007969839498400688,1,-0.041671138256788254,-0.04957967158406973,0.0,1,0.05140006169676781,0.030474206432700157,8.0,1,0.006835809908807278,-0.03242556203622371,1.0,1,0.06428852677345276,0.03650739369913936,8.0,1,0.01625947281718254,0.005845895126185496,5.0,1,0.0039701214991509914,-0.018399282183963805,1.0,1,0.030742118135094643,-0.008730691275559366,2.0,1,0.01681762933731079,-0.009635620634071529,2.0,1,0.012903603725135326,-0.011203641930478625,1.0,1,0.005267338361591101,-0.0008371855219593272,2.0,26,55,81
+TRM,179,201394,always,10,0,26041,5000,26041,5000,True,1.0,-0.0029329254757612944,-0.004829373734537512,-0.006011351360939443,0.0,0.0,1,-0.016465552151203156,-0.02947558951564133,0.0,1,0.00209172279573977,-0.00761237287952099,1.0,1,-0.010346092283725739,-0.037663566414266825,0.0,1,-0.016261925920844078,-0.020569770131260157,0.0,1,0.0006980824400670826,-0.005057049791503232,1.0,1,0.027523791417479515,-0.01906856006826274,1.0,1,-0.00429152138531208,-0.025201555574312806,0.0,1,0.027693098410964012,-0.0064708025311119854,2.0,1,0.03609683737158775,-0.009045868238899857,2.0,1,-0.0029329254757612944,-0.004829373734537512,0.0,26,55,81
+TRM,180,170293,always,10,0,26041,5000,26041,5000,True,1.0,0.0031024611089378595,-0.0006225622855708934,-0.002373299648752436,3.0,0.004988487111404538,1,-0.045388054102659225,-0.05379599053412676,0.0,1,-0.010732481256127357,-0.016333347419276834,0.0,1,0.0006958520971238613,-0.04126353916944936,1.0,1,-0.009777083992958069,-0.019521228969097137,0.0,1,0.000500681169796735,-0.00660274399251648,1.0,1,0.02532087452709675,-0.0256292128469795,1.0,1,-0.001753709395416081,-0.025654900950030424,0.0,1,0.027996456250548363,-0.011525741225341335,1.0,1,0.017778780311346054,-0.010818517301231623,1.0,1,0.0031024611089378595,-0.0006225622855708934,3.0,26,55,81
+TRM,181,205276,flaky_then_stable,9,2,26041,5000,78123,15000,True,1.0,0.002776492154225707,-0.0017096569572458975,-0.0034111112472601235,1.0,0.002776492154225707,1,-0.03970722854137421,-0.05062286136671901,0.0,0,0.06552192568778992,0.037702319328673184,8.0,1,0.0012423308799043298,-0.03448406203824561,1.0,1,0.0036932220682501793,-0.012105015339329839,1.0,1,-0.0005628091748803854,-0.007043721969239414,0.0,1,0.04030348360538483,-0.020945632131770253,1.0,1,-0.0035776407457888126,-0.025744673737790436,0.0,1,0.046772491186857224,-0.0013803894398733974,3.0,1,-0.015218612737953663,-0.023491405067034066,0.0,1,0.002776492154225707,-0.0017096569572458975,1.0,25,56,81
+TRM,182,104453,always,10,0,26041,5000,26041,5000,True,1.0,-0.0003868525964207947,-0.0018918479800049681,-0.0030685405945405364,0.0,0.0,1,0.0027959360741078854,-0.015263379027601331,1.0,1,0.014136490412056446,-0.003137407671601977,2.0,1,0.0008539946866221726,-0.03343212187610334,1.0,1,-0.004275121726095676,-0.01510434306692332,0.0,1,0.0009534715791232884,-0.007560372141597327,1.0,1,0.04668913781642914,-0.018153546610847116,1.0,1,-0.010472620837390423,-0.028359699645079672,0.0,1,0.023820117115974426,-0.00992273073643446,1.0,1,0.004015862476080656,-0.01862507819896564,1.0,1,-0.0003868525964207947,-0.0018918479800049681,0.0,26,55,81
+TRM,183,229568,flaky_then_stable,8,3,52082,10000,208328,40000,True,1.0,0.0022582372184842825,-0.003255256211559754,-0.005534777534194291,2.0,0.0031922199414111674,0,0.055219970643520355,0.02572491648606956,8.0,1,0.0043618399649858475,-0.007967022422235459,1.0,1,0.01140676997601986,-0.03112178691662848,1.0,1,0.005493173375725746,-0.007076639980368782,1.0,1,0.03317255526781082,0.005126985837705433,4.0,1,0.08298920094966888,0.016834527836181223,5.0,0,0.10779106616973877,0.04989130003377795,8.0,1,0.08769012242555618,0.032959739182842895,8.0,1,0.00772856455296278,-0.014514127979055047,1.0,1,0.0022582372184842825,-0.003255256211559754,2.0,26,55,81
+TRM,184,346939,flaky_then_stable,4,5,52082,10000,234369,45000,True,1.0,0.005247209686785936,-0.001809102521292516,-0.004631707502994686,3.0,0.006248514095204882,0,0.015505852177739143,-0.00421263744647149,2.0,1,-0.00024218801991082728,-0.012005696120468201,0.0,0,0.08194631338119507,0.042732382426038384,8.0,0,0.06772217154502869,0.03807669598609209,8.0,0,0.09252673387527466,0.05785564985126257,8.0,0,0.0956137403845787,0.05594849423505366,8.0,1,0.004367609042674303,-0.020455819147173315,1.0,0,0.09523152559995651,0.0526040461845696,8.0,1,0.06005767360329628,0.019787532030022703,8.0,1,0.005247209686785936,-0.001809102521292516,3.0,26,55,81
+TRM,185,234087,always,10,0,26041,5000,26041,5000,True,1.0,-0.0006641661166213453,-0.0025356470941915177,-0.003278142074123025,0.0,0.0,1,-0.05379829183220863,-0.055460809264332056,0.0,1,-0.003954695072025061,-0.013668889703694731,0.0,1,-0.008641443215310574,-0.037503677303902805,0.0,1,-0.013967735692858696,-0.025241506285965443,0.0,1,-0.0009740566601976752,-0.004638708269339986,0.0,1,0.03520576283335686,-0.023465901147574186,1.0,1,-0.019796598702669144,-0.02735076774843037,0.0,1,0.03463420271873474,-0.004686433530878276,2.0,1,0.05424449220299721,-0.012059499858878553,1.0,1,-0.0006641661166213453,-0.0025356470941915177,0.0,26,55,81
+TRM,186,323979,always,10,0,26041,5000,26041,5000,True,1.0,-0.004543024115264416,-0.006613475037738681,-0.007904103142209351,0.0,0.0,1,-0.03865868225693703,-0.0464299856685102,0.0,1,0.004511887673288584,-0.009768319578142837,1.0,1,-0.0076932660304009914,-0.031093625992070884,0.0,1,-0.00973974447697401,-0.021399644785560668,0.0,1,0.036879051476716995,0.007313082802284043,5.0,1,0.0321074016392231,-0.01595629856456071,1.0,1,-0.0076558394357562065,-0.028941006283275783,0.0,1,0.028713339939713478,-0.00963745784247294,2.0,1,-0.011496303603053093,-0.021366887725889683,0.0,1,-0.004543024115264416,-0.006613475037738681,0.0,24,57,81
+TRM,187,164256,stable_learned,9,1,52082,10000,52082,10000,True,1.0,-0.002857011742889881,-0.005771412455942482,-0.007093164604157209,0.0,0.0,0,0.07229534536600113,0.03598294348921627,8.0,1,-0.003375592641532421,-0.011878335382789373,0.0,1,0.00784036424010992,-0.029153403476811945,1.0,1,-0.01762760989367962,-0.020473432494327426,0.0,1,0.0046361456625163555,-0.004243768693413585,1.0,1,0.03086964413523674,-0.024004069156944752,1.0,1,0.009137706831097603,-0.01461189251858741,1.0,1,0.021149538457393646,-0.010664045490557328,1.0,1,-0.004561736714094877,-0.016243680438492447,0.0,1,-0.002857011742889881,-0.005771412455942482,0.0,24,57,81
+TRM,188,331948,never,0,0,,,,,False,0.6913580298423767,0.10389910638332367,0.046881236135959625,0.027366695925593376,8.0,0.375049889087677,0,0.04921078681945801,0.015431208892550785,7.0,0,0.0975819006562233,0.05104151018895209,8.0,0,0.08843766152858734,0.05420202622190118,8.0,0,0.10662958025932312,0.06036465452052653,8.0,0,0.0882524698972702,0.051708612591028214,8.0,0,0.08743227273225784,0.049301177030429244,8.0,0,0.1065344512462616,0.04723674128763378,8.0,0,0.0882740467786789,0.054364528274163604,8.0,0,0.0852314904332161,0.044658265775069594,8.0,0,0.10389910638332367,0.046881236135959625,8.0,25,56,81
+TRM,189,250737,always,10,0,26041,5000,26041,5000,True,1.0,0.0001171151379821822,-0.002376046491917805,-0.003989635792095214,1.0,0.0001171151379821822,1,-0.03611503168940544,-0.047715031541883945,0.0,1,-0.004645490553230047,-0.010830251267179847,0.0,1,0.006001525092869997,-0.03579212579643354,1.0,1,-0.004604048561304808,-0.015340239682700485,0.0,1,0.004012345802038908,-0.004241910035489127,1.0,1,0.03802584111690521,-0.026182690227869898,1.0,1,-0.017397847026586533,-0.026312696281820536,0.0,1,-0.0013236816739663482,-0.012668730443692766,0.0,1,0.002990599488839507,-0.016907883720705286,1.0,1,0.0001171151379821822,-0.002376046491917805,1.0,25,56,81
+TRM,190,82690,transient_or_regressed,3,2,104164,20000,,,False,0.7777777910232544,0.10092316567897797,0.05344907334074378,0.033120705746114254,8.0,0.42759258672595024,0,0.038856618106365204,0.019350299902725965,8.0,0,0.0954301580786705,0.05426794430240989,8.0,0,0.11136677116155624,0.0565974498167634,8.0,1,-0.001964452676475048,-0.01627898821607232,0.0,1,0.011784957721829414,-0.00519352886476554,1.0,1,0.05286392197012901,0.0024129900702973828,3.0,0,0.09947839379310608,0.04529081471264362,8.0,0,0.11331503093242645,0.052009733160957694,8.0,0,0.1042296439409256,0.044482596684247255,8.0,0,0.10092316567897797,0.05344907334074378,8.0,25,56,81
+TRM,191,23822,always,10,0,26041,5000,26041,5000,True,1.0,0.0032575568184256554,0.0010059588985313894,-0.0002025677495112177,6.0,0.00978819812007714,1,-0.06426969915628433,-0.0685627143830061,0.0,1,-0.012551196850836277,-0.015307389432564378,0.0,1,-0.032012939453125,-0.03892155131325126,0.0,1,-0.015147145837545395,-0.026486489223316312,0.0,1,-0.0028649859596043825,-0.004606441070791334,0.0,1,-0.02609308809041977,-0.04061063239350915,0.0,1,-0.0263792984187603,-0.028013148112222552,0.0,1,0.026600306853652,-0.009798879269510508,1.0,1,-0.025284413248300552,-0.02936880709603429,0.0,1,0.0032575568184256554,0.0010059588985313894,6.0,35,46,81
+TRM,192,35695,never,0,0,,,,,False,0.5308641791343689,0.1315692663192749,0.06278911931440234,0.04125541355460882,8.0,0.5023129545152187,0,0.01997191645205021,0.004871715673289145,7.0,0,0.03088071011006832,0.017817739280872047,8.0,0,0.07021072506904602,0.04528797627426684,8.0,0,0.09346048533916473,0.069016776047647,8.0,0,0.12627243995666504,0.07984189223498106,8.0,0,0.12725749611854553,0.06903386395424604,8.0,0,0.12136668711900711,0.05839524744078517,8.0,0,0.12574291229248047,0.05831782752647996,8.0,0,0.12572579085826874,0.06766848615370691,8.0,0,0.1315692663192749,0.06278911931440234,8.0,25,56,81
+TRM,193,275783,never,0,0,,,,,False,0.5925925970077515,0.09138352423906326,0.042690374655649066,0.02096835384145379,8.0,0.34152299724519253,0,0.010561698116362095,-0.0015448314079549164,3.0,0,0.057402338832616806,0.022711000172421336,8.0,0,0.08206252753734589,0.03967103082686663,8.0,0,0.08602993935346603,0.04142870754003525,8.0,0,0.104702427983284,0.04579309723339975,8.0,0,0.12137541174888611,0.04650760401273146,8.0,0,0.11261856555938721,0.04725617042277008,8.0,0,0.09727513045072556,0.04489094112068415,8.0,0,0.10249410569667816,0.05324923759326339,8.0,0,0.09138352423906326,0.042690374655649066,8.0,24,57,81
+TRM,194,404388,always,10,0,26041,5000,26041,5000,True,1.0,-0.0016481241909787059,-0.0033460480481153354,-0.0041937067289836705,0.0,0.0,1,-0.04554912447929382,-0.05481495102867484,0.0,1,-0.013254913501441479,-0.017018194193951786,0.0,1,-0.00606007594615221,-0.040331276250071824,0.0,1,-0.0012192189460620284,-0.017005031040753238,0.0,1,-0.003947529476135969,-0.007501828018575907,0.0,1,0.05003964900970459,-0.01987845846451819,1.0,1,-0.0035638604313135147,-0.025334964971989393,0.0,1,0.013939299620687962,-0.00763769808690995,2.0,1,-0.002156574511900544,-0.022576116054551676,0.0,1,-0.0016481241909787059,-0.0033460480481153354,0.0,25,56,81
+TRM,195,27920,always,10,0,26041,5000,26041,5000,True,1.0,0.0013881073100492358,-0.0004002270507044159,-0.0012779417374986224,3.0,0.00211116460559424,1,-0.0510491319000721,-0.0549886217340827,0.0,1,0.00013366807252168655,-0.012613395811058581,1.0,1,-0.04537174478173256,-0.04689130187034607,0.0,1,-0.018180465325713158,-0.025025016628205776,0.0,1,0.00032874627504497766,-0.008130621907184832,1.0,1,0.026069851592183113,-0.029643157264217734,1.0,1,-0.01076733972877264,-0.0242924498161301,0.0,1,0.03413648158311844,-0.0018293227622052655,3.0,1,0.04839461296796799,-0.013838810846209526,1.0,1,0.0013881073100492358,-0.0004002270507044159,3.0,25,56,81
+TRM,196,310696,transient_or_regressed,2,4,156246,30000,,,False,0.6296296119689941,0.10428792983293533,0.04673021216876805,0.02751001948490739,8.0,0.3738416973501444,0,0.04999667778611183,0.02212152539868839,8.0,0,0.06674455106258392,0.03637471585534513,8.0,0,0.07951848208904266,0.047057434218004346,8.0,0,0.09116580337285995,0.05600008135661483,8.0,0,0.09938373416662216,0.05137205542996526,8.0,1,0.06750503182411194,0.032661488105077296,8.0,0,0.08297695219516754,0.03916028025560081,8.0,0,0.11018695682287216,0.04900375986471772,8.0,1,0.0772402361035347,0.028766963252564892,8.0,0,0.10428792983293533,0.04673021216876805,8.0,25,56,81
+TRM,197,33494,never,0,0,,,,,False,0.654321014881134,0.10722068697214127,0.04499820456840098,0.02443372691050172,8.0,0.35998563654720783,0,0.04612818732857704,0.01858588722825516,8.0,0,0.0635518878698349,0.0369351499248296,8.0,0,0.0800853967666626,0.049215053441002965,8.0,0,0.09315712004899979,0.05166026344522834,8.0,0,0.12894371151924133,0.06011811597272754,8.0,0,0.10354451835155487,0.05670561990700662,8.0,0,0.10761329531669617,0.0518789601046592,8.0,0,0.1068757176399231,0.04602559085469693,8.0,0,0.10400598496198654,0.050047788536176085,8.0,0,0.10722068697214127,0.04499820456840098,8.0,25,56,81
+TRM,198,410906,transient_or_regressed,2,4,104164,20000,,,False,0.5679012537002563,0.11282525956630707,0.061744543723762035,0.04369486216455698,8.0,0.4939563497900963,0,0.015383537858724594,0.007245285996759776,8.0,0,0.05326296016573906,0.02945011225529015,8.0,0,0.06857220828533173,0.05176272615790367,8.0,1,0.01808767579495907,0.004306326794903725,5.0,0,0.11356927454471588,0.06646274868398905,8.0,0,0.11172714829444885,0.07257798546925187,8.0,0,0.1063690185546875,0.05182715691626072,8.0,0,0.12032341212034225,0.07055556820705533,8.0,1,0.06425193697214127,0.020916943598422222,7.0,0,0.11282525956630707,0.061744543723762035,8.0,25,56,81
+TRM,199,145476,stable_learned,8,1,78123,15000,78123,15000,True,1.0,0.011323942802846432,-0.002546398015510931,-0.0076706522377207875,2.0,0.015755548607558012,0,0.03068050928413868,0.007279427372850478,5.0,0,0.0432252436876297,0.020788028545212,8.0,1,0.021357981488108635,-0.0002994704700540751,4.0,1,-0.0008265356882475317,-0.01501883609307697,0.0,1,0.024760859087109566,0.010482813129783608,6.0,1,0.05870414525270462,0.01135669722862076,5.0,1,-0.01529764011502266,-0.026986358920112252,0.0,1,0.014449283480644226,-0.007196703867521137,3.0,1,0.02082674391567707,-0.007080905779730529,2.0,1,0.011323942802846432,-0.002546398015510931,2.0,25,56,81
+TRM,200,333824,always,10,0,26041,5000,26041,5000,True,1.0,-0.00048497598618268967,-0.0023632653173990548,-0.0035850967979058623,0.0,0.0,1,-0.02753741666674614,-0.04034123755991459,0.0,1,-0.010623713955283165,-0.013644658029079437,0.0,1,-0.0025451781693845987,-0.03838407437433489,0.0,1,-0.012887950986623764,-0.020941448747180402,0.0,1,0.005281518679112196,-0.0032649852432768967,1.0,1,0.04783658683300018,-0.026113864150829613,1.0,1,0.005308342166244984,-0.021324461442418396,1.0,1,-0.004930068738758564,-0.017496811342425644,0.0,1,-0.016454540193080902,-0.021267125848680735,0.0,1,-0.00048497598618268967,-0.0023632653173990548,0.0,22,59,81
+TRM,201,82050,flaky_then_stable,5,5,104164,20000,208328,40000,True,1.0,0.029758939519524574,0.007637346279807389,-0.0033639913890510798,6.0,0.07749828277155757,0,0.025589844211935997,0.007386452085484052,5.0,0,0.06179454177618027,0.0302244839258492,8.0,0,0.08605613559484482,0.04784350236877799,8.0,1,0.0008532172068953514,-0.013579191378084943,1.0,0,0.1270463466644287,0.0598030025139451,8.0,1,0.08990828692913055,0.03320482448907569,8.0,0,0.11798419803380966,0.04267530975630507,8.0,1,0.07093720883131027,0.015334867675846908,4.0,1,-0.00844425056129694,-0.020520833204500377,0.0,1,0.029758939519524574,0.007637346279807389,6.0,25,56,81
+TRM,202,114962,always,10,0,26041,5000,26041,5000,True,1.0,0.001819728990085423,4.117495171840346e-05,-0.0010807355210999958,4.0,0.004652341698147211,1,-0.03898010030388832,-0.04952500481158495,0.0,1,-0.016388151794672012,-0.01865190570242703,0.0,1,-0.00952416006475687,-0.03686302818823606,0.0,1,0.00753294350579381,-0.012441863247659057,1.0,1,0.005316366907209158,-0.0020350557169876993,1.0,1,0.025612173601984978,-0.03186190431006253,1.0,1,0.0004999120719730854,-0.02211611991515383,1.0,1,0.0040643769316375256,-0.017634313611779362,1.0,1,0.04560348019003868,-0.016479832818731666,1.0,1,0.001819728990085423,4.117495171840346e-05,4.0,25,56,81
+TRM,203,6899,always,10,0,26041,5000,26041,5000,True,1.0,0.0025781244039535522,-0.0012555071939459594,-0.003080603579292074,2.0,0.0029599067056551576,1,-0.04770688712596893,-0.05294690188020468,0.0,1,0.0005853358306922019,-0.007838242239813553,1.0,1,-0.030385535210371017,-0.04155877511948347,0.0,1,0.009076062589883804,-0.0042227464728057384,3.0,1,0.0034276272635906935,-0.0046842743849992985,2.0,1,0.049799028784036636,-0.025204505654983222,1.0,1,-0.01460667047649622,-0.026382078998722136,0.0,1,0.011711414903402328,-0.0073185242945328355,1.0,1,0.034073419868946075,-0.008649769937619567,1.0,1,0.0025781244039535522,-0.0012555071939459594,2.0,26,55,81
+TRM,204,237748,flaky_then_stable,8,3,52082,10000,182287,35000,True,1.0,0.0322447195649147,0.0090825529769063,-0.0012423926964402199,6.0,0.08257834333926439,0,0.027644280344247818,0.006798907008487731,6.0,1,0.021780071780085564,0.0019488382749841549,4.0,1,0.006759550888091326,-0.029927065304946154,1.0,1,0.03898206725716591,0.009902728343149647,6.0,1,0.012324606068432331,-0.0027672782380250283,2.0,0,0.12375655025243759,0.059934572549536824,8.0,1,0.036382488906383514,-0.010058165702503175,2.0,1,0.0188920721411705,-0.008224911536672153,2.0,1,0.030400991439819336,-0.009717246575746685,2.0,1,0.0322447195649147,0.0090825529769063,6.0,26,55,81
+TRM,205,350653,stable_learned,9,1,52082,10000,52082,10000,True,1.0,0.003112028818577528,0.0004153294903517235,-0.0013927280888310634,4.0,0.008893548278138041,0,0.06814505159854889,0.04238415812142193,8.0,1,0.010663583874702454,0.00010070259668282233,4.0,1,-0.022021165117621422,-0.04102650540880859,0.0,1,-0.01354423351585865,-0.020996963139623404,0.0,1,0.011409131810069084,-0.0019158531067660078,2.0,1,0.017513887956738472,-0.03357133222743869,1.0,1,-0.006856455933302641,-0.02416264050407335,0.0,1,0.00828241091221571,-0.010424477164633572,2.0,1,-0.006001225672662258,-0.023955262382514775,0.0,1,0.003112028818577528,0.0004153294903517235,4.0,26,55,81
+TRM,206,228664,flaky_then_stable,9,2,26041,5000,208328,40000,True,1.0,0.011789807118475437,-0.0018260757206007838,-0.00698359205853194,2.0,0.018089935183525085,1,-0.02299191989004612,-0.03573501855134964,0.0,1,0.018433116376399994,-0.003449624899076298,2.0,1,0.0638011172413826,-0.0004079106729477644,2.0,1,0.007116887718439102,-0.013767889817245305,1.0,1,0.013743743300437927,0.0009524891502223909,5.0,1,0.031422313302755356,-0.0024434556835331023,2.0,0,0.10934887826442719,0.046389109338633716,8.0,1,0.038566704839468,0.0023720994795439765,4.0,1,0.08130498230457306,0.02430822773021646,8.0,1,0.011789807118475437,-0.0018260757206007838,2.0,26,55,81
+TRM,207,108128,always,10,0,26041,5000,26041,5000,True,1.0,0.034758780151605606,0.004885898939392064,-0.0005100806447444484,6.0,0.041826540080364794,1,-0.040823835879564285,-0.04790768725797534,0.0,1,-0.0019418619340285659,-0.008193379690055735,0.0,1,-0.01816900260746479,-0.02500191447325051,0.0,1,0.009577694348990917,-0.011283573989203433,1.0,1,0.010985903441905975,-0.00100813993049087,3.0,1,0.05444793030619621,-0.030056620482355356,1.0,1,-0.01121042761951685,-0.028838319121859968,0.0,1,0.00760452589020133,-0.009945295110810548,1.0,1,-0.0010594523046165705,-0.018267279403517023,0.0,1,0.034758780151605606,0.004885898939392064,6.0,26,55,81
+TRM,208,237555,always,10,0,26041,5000,26041,5000,True,1.0,0.013637176714837551,0.00294490414307802,-0.0024570860623498447,4.0,0.03338757739402354,1,-0.030454449355602264,-0.045354644767940044,0.0,1,0.03470722213387489,0.015532585384789854,8.0,1,-0.0048825484700500965,-0.03422117751324549,0.0,1,0.014032869599759579,-0.004161077813478187,1.0,1,0.0773664265871048,0.03726324834860861,8.0,1,0.04568411409854889,-0.01263380009913817,2.0,1,-0.0068901474587619305,-0.024350031453650445,0.0,1,0.06402990221977234,-0.0001697186380624771,3.0,1,0.04445214197039604,-0.014390815980732441,1.0,1,0.013637176714837551,0.00294490414307802,4.0,24,57,81
+TRM,209,415031,transient_or_regressed,5,4,104164,20000,,,False,0.7160493731498718,0.10370030999183655,0.05558732873760164,0.03425172856077552,8.0,0.4446986299008131,0,0.011994773522019386,2.531860809540376e-05,4.0,0,0.05988362431526184,0.03195865580346435,8.0,0,0.08163787424564362,0.054200551472604275,8.0,1,0.06330606341362,0.020761353924172,7.0,1,0.11316297203302383,0.047181275207549334,8.0,1,0.03781143203377724,-0.003734095662366599,2.0,0,0.09174256026744843,0.05601806519553065,8.0,1,0.06449168920516968,0.003804352616498363,3.0,1,0.0033584877382963896,-0.011762980284402147,1.0,0,0.10370030999183655,0.05558732873760164,8.0,26,55,81
+TRM,210,346143,always,10,0,26041,5000,26041,5000,True,1.0,0.0037488287780433893,-0.002150979344150983,-0.003695431281812489,1.0,0.0037488287780433893,1,-0.033349208533763885,-0.04066271148622036,0.0,1,0.002198129426687956,-0.011715339496731758,1.0,1,0.04719354957342148,-0.01620304910466075,1.0,1,-0.004501831717789173,-0.015648823813535273,0.0,1,0.0077662342227995396,-0.006959516846109182,1.0,1,0.04087122157216072,-0.018843306694179773,1.0,1,-0.022302132099866867,-0.03089424828067422,0.0,1,0.022383710369467735,-0.005415228399215266,2.0,1,0.01010959967970848,-0.020306192105636,1.0,1,0.0037488287780433893,-0.002150979344150983,1.0,26,55,81
+TRM,211,348811,always,10,0,26041,5000,26041,5000,True,1.0,0.0036364204715937376,0.0011615331050052191,-0.0002018029281316558,7.0,0.01201585717353737,1,-0.04689823091030121,-0.05147686926648021,0.0,1,-0.013019614852964878,-0.01604439038783312,0.0,1,-0.005301423370838165,-0.0413379892706871,0.0,1,-0.022232316434383392,-0.025457960553467274,0.0,1,0.0008591336663812399,-0.0047080847434699535,1.0,1,0.03471574932336807,-0.03413012903183699,1.0,1,-0.004892326425760984,-0.020425072929356247,0.0,1,0.01778576895594597,-0.009433769155293703,1.0,1,0.011695638298988342,-0.013724053162150085,1.0,1,0.0036364204715937376,0.0011615331050052191,7.0,26,55,81
+TRM,212,227692,transient_or_regressed,1,2,182287,35000,,,False,0.6296296119689941,0.11797458678483963,0.05941451666876674,0.03575267270207405,8.0,0.4753161333501339,0,0.06684021651744843,0.027380049810744822,8.0,0,0.09430423378944397,0.04523526877164841,8.0,0,0.07394885271787643,0.04914484592154622,8.0,0,0.0969853401184082,0.06396831991150975,8.0,0,0.1127384752035141,0.06332401093095541,8.0,0,0.12215587496757507,0.061320788227021694,8.0,1,0.007627119775861502,-0.021896567253861576,1.0,0,0.11910481005907059,0.05343004106543958,8.0,0,0.10044301301240921,0.05094023304991424,8.0,0,0.11797458678483963,0.05941451666876674,8.0,21,60,81
+TRM,213,29728,flaky_then_stable,5,3,104164,20000,234369,45000,True,1.0,0.026516132056713104,0.004179411800578237,-0.004940139420796186,4.0,0.053195852087810636,0,0.004790307953953743,-0.007932017819257453,1.0,0,0.05289449542760849,0.02732641319744289,8.0,0,0.06741923838853836,0.04429500736296177,8.0,1,0.02472749538719654,-6.367775495164096e-05,3.0,1,0.033759526908397675,0.00941147469347925,6.0,1,0.038698457181453705,0.005277029940771172,3.0,0,0.09014707803726196,0.04470185306854546,8.0,0,0.1220305785536766,0.050695547834038734,8.0,1,0.009811477735638618,-0.017942425096407533,1.0,1,0.026516132056713104,0.004179411800578237,4.0,24,57,81
+TRM,214,221880,always,10,0,26041,5000,26041,5000,True,1.0,-0.0018896892433986068,-0.003651705614174716,-0.004866233153734356,0.0,0.0,1,-0.05120520293712616,-0.05734963295981288,0.0,1,-0.010788598097860813,-0.014037572429515421,0.0,1,0.00533174118027091,-0.036324827291537076,1.0,1,0.004678554367274046,-0.02570857579121366,1.0,1,0.004892056342214346,-0.0019964476668974385,3.0,1,0.03719862550497055,-0.018077747576171532,1.0,1,-0.017254814505577087,-0.028984907316043973,0.0,1,0.06870878487825394,0.0012774575443472713,3.0,1,0.033503469079732895,-0.018621174385771155,1.0,1,-0.0018896892433986068,-0.003651705614174716,0.0,26,55,81
+TRM,215,202783,always,10,0,26041,5000,26041,5000,True,1.0,-0.00027919455897063017,-0.001681699384789681,-0.0026693661347962916,0.0,0.0,1,-0.04971752315759659,-0.055543248541653156,0.0,1,-0.006837507244199514,-0.014344908704515547,0.0,1,-0.013398487120866776,-0.04162553604692221,0.0,1,-0.018629716709256172,-0.029252706794068217,0.0,1,0.0008736798190511763,-0.004612850570993032,1.0,1,0.04967186227440834,-0.031257722061127424,1.0,1,-0.017519133165478706,-0.02620942471548915,0.0,1,0.03327939286828041,-0.01101844827644527,1.0,1,0.0007432312704622746,-0.020032036292832345,1.0,1,-0.00027919455897063017,-0.001681699384789681,0.0,25,56,81
+TRM,216,39207,flaky_then_stable,4,3,156246,30000,234369,45000,True,1.0,0.002220967784523964,-0.002485873206751421,-0.004883449641056359,2.0,0.004182151518762112,0,0.054993342608213425,0.01962263991299551,7.0,0,0.045834366232156754,0.03109127702191472,8.0,0,0.07119276374578476,0.052423374727368355,8.0,0,0.10190854966640472,0.05943611264228821,8.0,0,0.11684732139110565,0.06424585357308388,8.0,1,0.09419362246990204,0.03881715645547956,8.0,1,0.005118200089782476,-0.018379703105892986,1.0,0,0.1014389917254448,0.05599244777113199,8.0,1,0.01105805579572916,-0.015016795368865132,1.0,1,0.002220967784523964,-0.002485873206751421,2.0,24,57,81
+TRM,217,74180,transient_or_regressed,2,4,52082,10000,,,False,0.604938268661499,0.09866464883089066,0.03997316432651132,0.01750598452053964,8.0,0.3197853146120906,0,0.043808575719594955,0.02178542106412351,8.0,1,-0.0065943216904997826,-0.011317282216623425,0.0,0,0.08168653398752213,0.05035918112844229,8.0,0,0.11574117839336395,0.0646073673851788,8.0,0,0.11253903061151505,0.0640963192563504,8.0,1,0.022581515833735466,-0.01601977675454691,2.0,0,0.09975122660398483,0.05214291554875672,8.0,0,0.1205405667424202,0.0563267117831856,8.0,0,0.1312665045261383,0.06222180183976889,8.0,0,0.09866464883089066,0.03997316432651132,8.0,26,55,81
+TRM,218,117122,never,0,0,,,,,False,0.48148149251937866,0.10690686106681824,0.04423210595268756,0.020874822745099664,8.0,0.3538568476215005,0,0.004627763759344816,-0.009723829047288746,1.0,0,0.05797513946890831,0.04039745801128447,8.0,0,0.058315210044384,0.03727643215097487,8.0,0,0.0816626250743866,0.05178477428853512,8.0,0,0.09716343134641647,0.05696573620662093,8.0,0,0.10025221109390259,0.0594132449477911,8.0,0,0.09634234756231308,0.05044856132008135,8.0,0,0.08302014321088791,0.043574536219239235,8.0,0,0.10111479461193085,0.05484886304475367,8.0,0,0.10690686106681824,0.04423210595268756,8.0,22,59,81
+TRM,219,37017,flaky_then_stable,9,2,26041,5000,260410,50000,True,1.0,-4.243468356435187e-05,-0.0018324255092920794,-0.002658858080394566,0.0,0.0,1,0.004782132804393768,-0.01397044799523428,1.0,1,-0.007029992062598467,-0.012779337528627366,0.0,1,0.03505445271730423,0.012054391278070398,5.0,1,-0.0165664441883564,-0.02131174993701279,0.0,1,0.007660758215934038,-0.0013680287374882028,3.0,1,0.04680091142654419,-0.030232166405767202,1.0,1,0.009399973787367344,-0.012316186795942485,2.0,1,0.03414475917816162,-0.0027572199978749268,3.0,0,0.08096427470445633,0.037125972332432866,8.0,1,-4.243468356435187e-05,-0.0018324255092920794,0.0,24,57,81
+TRM,220,300132,always,10,0,26041,5000,26041,5000,True,1.0,0.000508293800521642,-0.004113534196221735,-0.005536970100365579,1.0,0.000508293800521642,1,-0.031115764752030373,-0.04222251079045236,0.0,1,0.035054147243499756,0.007369604289124254,6.0,1,0.023261627182364464,-0.015218398766592145,1.0,1,0.04482908546924591,0.010927953728241846,5.0,1,0.010245822370052338,0.0006799787370255217,4.0,1,0.009316425770521164,-0.019767086720094085,1.0,1,-0.03011917881667614,-0.03143389499746263,0.0,1,0.012932855635881424,-0.0055819471017457545,2.0,1,0.009453986771404743,-0.021692302194423974,1.0,1,0.000508293800521642,-0.004113534196221735,1.0,28,53,81
+TRM,221,212419,always,10,0,26041,5000,26041,5000,True,1.0,0.00191064877435565,-0.00205567225930281,-0.0037942551425658166,2.0,0.002860540756955743,1,-0.04085663706064224,-0.04770403727889061,0.0,1,-0.006243064999580383,-0.011417015455663204,0.0,1,-0.014095909893512726,-0.03930140659213066,0.0,1,0.0012904303148388863,-0.012438459205441177,1.0,1,0.0064023700542747974,-0.0024309997534146532,2.0,1,0.00011420035298215225,-0.0288323420454617,1.0,1,-0.0003252728492952883,-0.023485973601054866,0.0,1,0.07122845202684402,-0.0017281815671594813,2.0,1,0.040671948343515396,-0.013255833880975842,1.0,1,0.00191064877435565,-0.00205567225930281,2.0,27,54,81
+TRM,222,283218,always,10,0,26041,5000,26041,5000,True,1.0,0.001252444344572723,-0.0011357194798620185,-0.0022323344310279936,2.0,0.00133518832444679,1,-0.04991106688976288,-0.054529072251170874,0.0,1,0.006053210236132145,-0.006839526642579585,1.0,1,-0.004487451631575823,-0.03591663675615564,0.0,1,-0.015317877754569054,-0.02227642689831555,0.0,1,0.0049282764084637165,-0.004846586176427081,1.0,1,0.04078971967101097,-0.020869304149528034,2.0,1,-0.019879397004842758,-0.02933895797468722,0.0,1,0.060552116483449936,-0.0006091323448345065,2.0,1,0.017031360417604446,-0.01817786763422191,1.0,1,0.001252444344572723,-0.0011357194798620185,2.0,26,55,81
+TRM,223,28177,always,10,0,26041,5000,26041,5000,True,1.0,0.00016010551189538091,-0.003555968376531382,-0.004460494965314865,1.0,0.00016010551189538091,1,-0.03861000016331673,-0.04556959122419357,0.0,1,-0.01227063499391079,-0.014187267981469631,0.0,1,-0.011579284444451332,-0.03598316083662212,0.0,1,-0.006824948359280825,-0.01812874764436856,0.0,1,-0.0006469578947871923,-0.005754802041337825,0.0,1,-0.0003411551588214934,-0.025298931235738564,0.0,1,0.0021624863147735596,-0.022401787573471665,1.0,1,0.059919245541095734,-0.005035351889091544,1.0,1,-0.008669761940836906,-0.020254274597391486,0.0,1,0.00016010551189538091,-0.003555968376531382,1.0,25,56,81
+TRM,224,4206,flaky_then_stable,6,3,104164,20000,234369,45000,True,1.0,0.0389530323445797,0.007782056258292869,-0.0029455334588419646,4.0,0.07403858390171081,0,0.04966237023472786,0.029122381238266826,8.0,0,0.06064504384994507,0.042111005168408155,8.0,0,0.07097543030977249,0.05498504498973489,8.0,1,0.0068425326608121395,-0.007135268944693962,2.0,1,0.08201475441455841,0.036735506379045546,8.0,1,0.06930192559957504,0.026285720690793823,7.0,1,0.08546420186758041,0.01798856377718039,5.0,0,0.11436864733695984,0.055100006284192204,8.0,1,0.10256509482860565,0.04715490504167974,8.0,1,0.0389530323445797,0.007782056258292869,4.0,25,56,81
+TRM,225,62849,stable_learned,9,1,52082,10000,52082,10000,True,1.0,0.039501599967479706,0.002771464154648129,-0.003823225444648415,2.0,0.04000542493304238,0,0.0733887106180191,0.03619923139922321,8.0,1,0.021456683054566383,0.0057713236747076735,6.0,1,0.011895332485437393,-0.03340472746640444,1.0,1,0.008797820657491684,-0.00562167342286557,1.0,1,0.008921733126044273,-0.002281039400259033,3.0,1,0.037620674818754196,-0.0197126719285734,1.0,1,0.14215661585330963,-0.0027389208553358912,1.0,1,0.06389182060956955,-0.005798477155622095,1.0,1,0.03299522399902344,-0.007769756644847803,2.0,1,0.039501599967479706,0.002771464154648129,2.0,26,55,81
+TRM,226,20504,transient_or_regressed,2,2,130205,25000,,,False,0.6913580298423767,0.1122712790966034,0.05240916437469423,0.030366031918674707,8.0,0.4192733149975538,0,0.025021867826581,-0.000663190963678062,4.0,0,0.036656927317380905,0.01664057740708813,8.0,0,0.08849618583917618,0.04343777662143111,8.0,0,0.08565597981214523,0.0477998664136976,8.0,1,0.03333253413438797,0.01157869967573788,6.0,1,0.07967555522918701,0.025402189639862627,7.0,0,0.08897045999765396,0.04216114082373679,8.0,0,0.1113073080778122,0.05428541777655482,8.0,0,0.10230519622564316,0.05051741609349847,8.0,0,0.1122712790966034,0.05240916437469423,8.0,23,58,81
+TRM,227,102114,transient_or_regressed,2,4,130205,25000,,,False,0.6913580298423767,0.09746669977903366,0.0498108227038756,0.0302131709177047,8.0,0.3984865816310048,0,0.016978764906525612,0.005218856182182208,6.0,0,0.05411911755800247,0.03137611737474799,8.0,0,0.07384537905454636,0.04648684058338404,8.0,0,0.08374615013599396,0.052968569565564394,8.0,1,0.07083135098218918,0.02283606039645747,8.0,0,0.10263346880674362,0.054544170619919896,8.0,0,0.09167753159999847,0.04176671837922186,8.0,0,0.08721235394477844,0.05013207159936428,8.0,1,0.02167597971856594,-0.006274699931964278,2.0,0,0.09746669977903366,0.0498108227038756,8.0,23,58,81
+TRM,228,394875,flaky_then_stable,8,3,52082,10000,104164,20000,True,1.0,0.00603833794593811,-0.004537226865068078,-0.008641517139039934,2.0,0.008663325104862452,0,-0.0009605166269466281,-0.012251395135535859,0.0,1,0.005396576132625341,-0.007386535668047145,1.0,0,0.08447159826755524,0.044482265366241336,8.0,1,-0.008197777904570103,-0.02086537389550358,0.0,1,0.02162846550345421,0.005434118367702467,4.0,1,0.035619981586933136,-0.017045481479726732,1.0,1,0.18568143248558044,0.006039337662514299,2.0,1,0.0442868210375309,0.0021015400998294353,3.0,1,0.0133866798132658,-0.015506645780988038,1.0,1,0.00603833794593811,-0.004537226865068078,2.0,26,55,81
+TRM,229,389068,always,10,0,26041,5000,26041,5000,True,1.0,0.004082643426954746,0.0008934290744946338,-0.0010945629474008456,5.0,0.012421355524566025,1,-0.04916318878531456,-0.057116405572742224,0.0,1,-0.014521243050694466,-0.016832389635965228,0.0,1,-0.01153744850307703,-0.04140789725352079,0.0,1,-0.010495726019144058,-0.016829701839014888,0.0,1,-0.0010585711570456624,-0.004555243169306777,0.0,1,0.013788541778922081,-0.03308072313666344,1.0,1,-0.01959385722875595,-0.030351200606673956,0.0,1,0.035524629056453705,-0.010217403818387538,1.0,1,0.018072186037898064,-0.0222227293998003,1.0,1,0.004082643426954746,0.0008934290744946338,5.0,25,56,81
+TRM,230,236824,always,10,0,26041,5000,26041,5000,True,1.0,-0.0009212989243678749,-0.0030108399587334134,-0.004065274377353489,0.0,0.0,1,-0.020934073254466057,-0.036280480213463306,0.0,1,0.004921238869428635,-0.01201312500052154,1.0,1,-0.01107977144420147,-0.03657220606692135,0.0,1,-0.01712377741932869,-0.02124238316901028,0.0,1,0.003837717929854989,-0.0058560879097058205,1.0,1,0.010303722694516182,-0.023739645548630506,1.0,1,-0.006087972316890955,-0.019795799569692463,0.0,1,-0.0033139123115688562,-0.01595648578950204,0.0,1,0.012318581342697144,-0.013708597281947732,1.0,1,-0.0009212989243678749,-0.0030108399587334134,0.0,17,64,81
+TRM,231,306831,always,10,0,26041,5000,26041,5000,True,1.0,0.0013866653898730874,-0.00046584822121076286,-0.0020097485539736226,5.0,0.00511273875599727,1,-0.030482899397611618,-0.04324819101020694,0.0,1,0.014286141842603683,-0.0003810212219832465,4.0,1,-0.010288897901773453,-0.03567403648048639,0.0,1,-0.009040785022079945,-0.017085436615161598,0.0,1,0.00620441697537899,-0.004054837685544044,2.0,1,0.025621263310313225,-0.020811877911910415,1.0,1,-0.007634069304913282,-0.02658812253503129,0.0,1,0.03218556568026543,-0.009020398007123731,2.0,1,-0.004747780971229076,-0.021990093286149204,0.0,1,0.0013866653898730874,-0.00046584822121076286,5.0,28,53,81
+TRM,232,53003,always,10,0,26041,5000,26041,5000,True,1.0,0.0035126167349517345,0.000460483439383097,-0.0011369857238605618,5.0,0.009383851196616888,1,-0.036780018359422684,-0.04755908437073231,0.0,1,-0.010981891304254532,-0.016076477360911667,0.0,1,-0.003648222191259265,-0.037993688689311966,0.0,1,-0.01581602916121483,-0.022366474848240614,0.0,1,-0.0008248994708992541,-0.006061376181605738,0.0,1,0.02918414957821369,-0.01836323522729799,1.0,1,-0.015026899985969067,-0.02999123011250049,0.0,1,0.06656180322170258,-0.00024285900872200727,3.0,1,0.010660833679139614,-0.014663946916698478,1.0,1,0.0035126167349517345,0.000460483439383097,5.0,26,55,81
+TRM,233,317778,always,10,0,26041,5000,26041,5000,True,1.0,0.014273518696427345,0.001311078915023245,-0.0033091945806518197,2.0,0.02733954694122076,1,-0.0434391126036644,-0.05281008640304208,0.0,1,-0.006513523869216442,-0.011495508020743728,0.0,1,-0.011456477455794811,-0.037244262523017824,0.0,1,0.0022645266726613045,-0.011611249785346445,2.0,1,-0.00320923188701272,-0.005479293584357947,0.0,1,0.005743637681007385,-0.026663263328373432,1.0,1,-0.02399265207350254,-0.027711281552910805,0.0,1,0.04080621153116226,-0.006895579979754984,2.0,1,-0.009873397648334503,-0.02634123293682933,0.0,1,0.014273518696427345,0.001311078915023245,2.0,28,53,81
+TRM,234,62848,flaky_then_stable,7,6,26041,5000,182287,35000,True,1.0,0.05851393938064575,0.019614402175648138,0.005106093769427389,7.0,0.15870172809809446,1,-0.0127262556925416,-0.031804491649381816,0.0,0,0.07789362967014313,0.051835038932040334,8.0,1,-0.004458288662135601,-0.038155993330292404,0.0,0,0.08443799614906311,0.05926699168048799,8.0,1,0.017514668405056,3.968432156398194e-05,3.0,0,0.09917646646499634,0.05659095221199095,8.0,1,0.004529933445155621,-0.021515992819331586,1.0,1,0.05813097953796387,0.018497138386010192,6.0,1,-0.0076797110959887505,-0.02267754205968231,0.0,1,0.05851393938064575,0.019614402175648138,7.0,26,55,81
+TRM,235,358453,flaky_then_stable,5,5,104164,20000,234369,45000,True,1.0,0.060322105884552,0.014444024222029839,0.000890185430762358,7.0,0.11791403853567317,0,0.007752910256385803,-0.005486189329531044,2.0,0,0.044170960783958435,0.028722514864057302,8.0,0,0.07555417716503143,0.04350872151553631,8.0,1,0.11833016574382782,0.06507948040962219,8.0,0,0.11042984575033188,0.07075667334720492,8.0,1,0.026569873094558716,-0.012846817437093705,1.0,1,0.0529964417219162,0.017723352604662068,6.0,0,0.11326132714748383,0.059483819641172886,8.0,1,0.021423926576972008,-0.00654017407214269,2.0,1,0.060322105884552,0.014444024222029839,7.0,26,55,81
+TRM,236,330583,transient_or_regressed,4,6,104164,20000,,,False,0.7283950448036194,0.1179669052362442,0.050704695750027895,0.027432451955974102,8.0,0.40563756600022316,0,0.0558493435382843,0.031019905698485672,8.0,0,0.07183355838060379,0.04784472589381039,8.0,0,0.10504776239395142,0.05676551582291722,8.0,1,-0.0020639472641050816,-0.018599099072162062,0.0,0,0.10264892131090164,0.05374545347876847,8.0,1,0.00017687170475255698,-0.03164164482768683,1.0,0,0.11831028014421463,0.046759607968851924,8.0,1,0.029574979096651077,-0.00847998890094459,2.0,1,0.04459366574883461,0.013223047717474401,5.0,0,0.1179669052362442,0.050704695750027895,8.0,26,55,81
+TRM,237,95944,transient_or_regressed,5,3,26041,5000,,,False,0.6666666865348816,0.09490608423948288,0.048063904512673616,0.03131009638309479,8.0,0.38451123610138893,1,0.0028073203284293413,-0.012771485548000783,1.0,1,0.010889335535466671,-0.004449827290955,1.0,0,0.08737904578447342,0.05122835957445204,8.0,1,0.010380500927567482,-0.011054991511628032,1.0,1,0.0663195475935936,0.026061467884574085,8.0,1,0.04775351658463478,0.014402965217414021,7.0,0,0.07113282382488251,0.04823108878917992,8.0,0,0.11646014451980591,0.06164488662034273,8.0,0,0.10717137902975082,0.06209244020283222,8.0,0,0.09490608423948288,0.048063904512673616,8.0,26,55,81
+TRM,238,223602,flaky_then_stable,4,5,104164,20000,260410,50000,True,1.0,0.009927822276949883,-0.003643897653091699,-0.0073101428570225835,1.0,0.009927822276949883,0,0.02292434684932232,-0.004122390586417168,2.0,0,0.059657249599695206,0.028163101174868643,8.0,0,0.07667452096939087,0.04212799412198365,8.0,1,0.002021724358201027,-0.014281785697676241,1.0,1,0.017532164230942726,0.0005187791766729788,3.0,0,0.09282877296209335,0.056521004531532526,8.0,0,0.09022220969200134,0.039448289666324854,8.0,1,0.02502642199397087,-0.008253865787992254,2.0,0,0.08505398780107498,0.045969055499881506,8.0,1,0.009927822276949883,-0.003643897653091699,1.0,23,58,81
+TRM,239,206587,stable_learned,9,1,52082,10000,52082,10000,True,1.0,0.0011507895542308688,-0.0030228061659727246,-0.0047204336151480675,1.0,0.0011507895542308688,0,0.023760119453072548,0.009174148668535054,5.0,1,-0.005433317739516497,-0.01430129149230197,0.0,1,0.01086964551359415,-0.025607575778849423,1.0,1,0.0030785370618104935,-0.014705497247632593,1.0,1,0.059364110231399536,0.030701534124091268,8.0,1,0.05538275092840195,-0.01976932305842638,1.0,1,-0.011579695157706738,-0.026803003274835646,0.0,1,0.002371745416894555,-0.014168924273690209,1.0,1,-0.007594943046569824,-0.020899450755678117,0.0,1,0.0011507895542308688,-0.0030228061659727246,1.0,24,57,81
+TRM,240,282303,flaky_then_stable,8,4,26041,5000,208328,40000,True,1.0,0.06303148716688156,0.014804053673287854,-0.0012935192207805812,5.0,0.12782266922295094,1,-0.02649240382015705,-0.04305993369780481,0.0,1,0.012612090446054935,-0.009437774308025837,1.0,1,0.0016022800700739026,-0.03638005499669816,1.0,0,0.0797521099448204,0.03768180089537054,8.0,1,0.0257274117320776,0.006778597818993148,5.0,1,0.09652485698461533,0.03632166492752731,8.0,0,0.09159673005342484,0.04760506120510399,8.0,1,0.02299213409423828,-0.01051533524878323,2.0,1,0.0031194747425615788,-0.017887028341647238,1.0,1,0.06303148716688156,0.014804053673287854,5.0,26,55,81
+TRM,241,310279,always,10,0,26041,5000,26041,5000,True,1.0,0.002388342982158065,-0.0017341693492198829,-0.0033607204677537084,1.0,0.002388342982158065,1,-0.04643630608916283,-0.05036990623921156,0.0,1,-0.005045983009040356,-0.010064818314276636,0.0,1,-0.009138845838606358,-0.037766190827824175,0.0,1,-0.011315660551190376,-0.019615043769590557,0.0,1,0.006502822041511536,-0.0029122013656888157,2.0,1,0.030877508223056793,-0.015636875446944032,1.0,1,0.01763164810836315,-0.010893294398556463,1.0,1,0.026981689035892487,-0.009666753991041332,2.0,1,0.0045678396709263325,-0.015443414216861129,1.0,1,0.002388342982158065,-0.0017341693492198829,1.0,24,57,81
+TRM,242,59598,transient_or_regressed,3,2,156246,30000,,,False,0.6913580298423767,0.10141207277774811,0.053321026265621185,0.03594268672168255,8.0,0.4265682101249695,0,0.07343029975891113,0.03063841606490314,8.0,0,0.08804802596569061,0.0413991961395368,8.0,0,0.0813058614730835,0.04248048993758857,8.0,0,0.09918847680091858,0.05328639619983733,8.0,0,0.1008404940366745,0.05796309467405081,8.0,1,0.0759953111410141,0.03532121842727065,8.0,1,0.04316898062825203,-0.001405163638992235,2.0,1,0.02608853578567505,-0.004663786763558164,3.0,0,0.1140391156077385,0.06154860137030482,8.0,0,0.10141207277774811,0.053321026265621185,8.0,25,56,81
+TRM,243,397957,flaky_then_stable,8,3,52082,10000,234369,45000,True,1.0,-0.0016730602364987135,-0.0030908352928236127,-0.004047861904837191,0.0,0.0,0,0.03745630010962486,0.006407558306818828,5.0,1,0.006937870290130377,-0.005687886558007449,1.0,1,-0.019883405417203903,-0.0376305659301579,0.0,1,-0.001313579035922885,-0.014053300226805732,0.0,1,0.002676252042874694,-0.0038066397537477314,1.0,1,0.05048872157931328,-0.01820881786989048,1.0,1,-0.02074532024562359,-0.028923654463142157,0.0,0,0.08741099387407303,0.054573606699705124,8.0,1,0.01347858551889658,-0.011124713986646384,1.0,1,-0.0016730602364987135,-0.0030908352928236127,0.0,23,58,81
+TRM,244,97265,always,10,0,26041,5000,26041,5000,True,1.0,0.03521812707185745,0.0058190721008486435,0.0006614559056288272,6.0,0.04732326738303527,1,-0.03704124689102173,-0.04652364645153284,0.0,1,0.006672311574220657,-0.006934197328519076,1.0,1,-0.014663148671388626,-0.04008801048621535,0.0,1,-0.022002778947353363,-0.025053767953068018,0.0,1,-0.0008899794775061309,-0.007159944063459989,0.0,1,-0.010731999762356281,-0.023458278854377568,0.0,1,-0.020330926403403282,-0.028315612580627203,0.0,1,0.028661858290433884,-0.0076754435572183866,1.0,1,-0.023138808086514473,-0.026267095003277063,0.0,1,0.03521812707185745,0.0058190721008486435,6.0,27,54,81
+TRM,245,203158,transient_or_regressed,2,4,104164,20000,,,False,0.48148149251937866,0.08221682906150818,0.04346321779303253,0.025638351682573557,8.0,0.3477057423442602,0,0.04776197299361229,0.0219735712162219,8.0,0,0.06132233887910843,0.03063899092376232,8.0,0,0.08358059823513031,0.04403053876012564,8.0,1,0.017278151586651802,-0.006302021072770003,3.0,0,0.09410917013883591,0.05100649851374328,8.0,0,0.09295886754989624,0.05174601962789893,8.0,0,0.08054827898740768,0.03860128461383283,8.0,1,0.10586103796958923,0.04515297070611268,8.0,0,0.08271819353103638,0.0459196837618947,8.0,0,0.08221682906150818,0.04346321779303253,8.0,23,58,81
+TRM,246,130524,always,10,0,26041,5000,26041,5000,True,1.0,0.03907737508416176,0.01348451612284407,0.0026704802876338363,7.0,0.11025516060180962,1,-0.030496016144752502,-0.03978192387148738,0.0,1,-0.0018235386814922094,-0.007780615385854617,0.0,1,0.05641937255859375,0.016135035582919954,6.0,1,0.00027760796365328133,-0.013952870587672805,1.0,1,0.03744666278362274,0.013852485293412542,7.0,1,0.03246525302529335,-0.021360193437431008,1.0,1,-0.002221985487267375,-0.02254360492224805,0.0,1,0.041874442249536514,-0.005885592312552035,2.0,1,-0.005860614124685526,-0.0174689621780999,0.0,1,0.03907737508416176,0.01348451612284407,7.0,24,57,81
+TRM,247,117111,always,10,0,26041,5000,26041,5000,True,1.0,0.00034159107599407434,-0.002465964076691307,-0.0042248081881552935,2.0,0.00037707985029555857,1,-0.04594675078988075,-0.05163075588643551,0.0,1,0.0022139116190373898,-0.006540839669469278,2.0,1,0.010273805819451809,-0.019225794123485684,1.0,1,-0.005077542271465063,-0.01578483876073733,0.0,1,0.008166303858160973,-0.0096616160008125,1.0,1,0.008804400451481342,-0.017630236216064077,1.0,1,-0.01613049954175949,-0.02415024978108704,0.0,1,0.04362618178129196,-0.006313318997854367,1.0,1,0.005583143327385187,-0.018735599180217832,1.0,1,0.00034159107599407434,-0.002465964076691307,2.0,26,55,81
+TRM,248,297930,always,10,0,26041,5000,26041,5000,True,1.0,0.001696663093753159,-0.00012203282676637173,-0.001411664707120508,4.0,0.004670396214351058,1,-0.043008215725421906,-0.05096411006525159,0.0,1,8.376714686164632e-05,-0.008264151135335851,1.0,1,0.020373329520225525,-0.022107144817709923,1.0,1,-0.01847737841308117,-0.023033670848235488,0.0,1,0.0004587899020407349,-0.0026734993698482867,1.0,1,0.03390338271856308,-0.0251456318073906,1.0,1,-0.019861016422510147,-0.027870505582541227,0.0,1,0.03491662070155144,-0.00896812486462295,1.0,1,0.007713905535638332,-0.01803446130361408,1.0,1,0.001696663093753159,-0.00012203282676637173,4.0,26,55,81
+TRM,249,362706,always,10,0,26041,5000,26041,5000,True,1.0,-0.002260262379422784,-0.003985005838330835,-0.005112986662425101,0.0,0.0,1,0.012352349236607552,-0.009704400203190744,2.0,1,-0.013394400477409363,-0.016998483799397945,0.0,1,-0.0011335985036566854,-0.038213724285014905,0.0,1,0.010926279239356518,-0.01519405678845942,1.0,1,0.02056112512946129,0.002557259358582087,5.0,1,0.014733419753611088,-0.02680126577615738,1.0,1,-0.01644315756857395,-0.025352274999022484,0.0,1,0.03828107938170433,-0.009833375548623735,1.0,1,0.029570909217000008,-0.005382279981859028,2.0,1,-0.002260262379422784,-0.003985005838330835,0.0,24,57,81
+TRM,250,159293,flaky_then_stable,5,5,78123,15000,260410,50000,True,1.0,0.02524685114622116,0.0028992771767661907,-0.002532650702050887,3.0,0.0339493463980034,0,0.04946037381887436,0.024131854763254523,8.0,0,0.0512346476316452,0.02846594259608537,8.0,1,0.023839613422751427,-0.003054215165320784,3.0,1,0.041702866554260254,0.006585595350770745,4.0,1,0.048587821424007416,0.02232063128030859,8.0,0,0.13315600156784058,0.062410006299614906,8.0,0,0.11019372195005417,0.048712703166529536,8.0,1,0.048433706164360046,0.005703033179088379,6.0,0,0.12444985657930374,0.058387431781738997,8.0,1,0.02524685114622116,0.0028992771767661907,3.0,24,57,81
+TRM,251,155140,transient_or_regressed,8,2,52082,10000,,,False,0.7037037014961243,0.08800864964723587,0.028410544153302908,0.008337533101439476,7.0,0.22880277503281832,0,0.05473196133971214,0.02926313620992005,8.0,1,0.005202332511544228,-0.009801014442928135,1.0,1,0.014887627214193344,-0.01954192901030183,1.0,1,0.001104663941077888,-0.011422040508477949,1.0,1,-0.0036678381729871035,-0.00656788595370017,0.0,1,0.0423210971057415,-0.0187850120710209,2.0,1,-0.014894748106598854,-0.02784643112681806,0.0,1,0.021206166595220566,-0.0033844620920717716,2.0,1,0.007503403816372156,-0.018253154878038913,1.0,0,0.08800864964723587,0.028410544153302908,7.0,27,54,81
+TRM,252,410219,flaky_then_stable,7,3,78123,15000,234369,45000,True,1.0,0.05607355758547783,0.018288351726369,0.004778052942128852,7.0,0.14656818122603,0,0.055042341351509094,0.01958745474985335,7.0,0,0.0742662325501442,0.03543168702162802,8.0,1,0.02018258348107338,-0.011425581498770043,1.0,1,0.04237860441207886,0.008063326080446132,6.0,1,0.01841960847377777,0.0035235406685387716,5.0,1,0.036901965737342834,-0.02051914221374318,1.0,1,0.12681354582309723,0.0014560554991476238,2.0,0,0.116451196372509,0.06195967551320791,8.0,1,0.11820812523365021,0.05618634633719921,8.0,1,0.05607355758547783,0.018288351726369,7.0,20,61,81
+TRM,253,215841,transient_or_regressed,6,2,52082,10000,,,False,0.654321014881134,0.10395675897598267,0.05974557530134916,0.04267695173621178,8.0,0.4779646024107933,0,0.012995148077607155,-0.003779973485507071,3.0,1,0.005536424927413464,-0.004548266937490553,2.0,1,0.08283443003892899,0.03866277274210006,8.0,1,0.03932537883520126,0.00903823572298279,5.0,1,0.027429671958088875,0.003133439027806162,5.0,1,0.045961614698171616,-0.008895929946447723,2.0,1,-0.01591082103550434,-0.028498107567429543,0.0,0,0.08781146258115768,0.05362106114625931,8.0,0,0.09050481021404266,0.049710380379110575,8.0,0,0.10395675897598267,0.05974557530134916,8.0,26,55,81
+TRM,254,269879,always,10,0,26041,5000,26041,5000,True,1.0,0.006744096986949444,3.6789300793316215e-05,-0.0015992792905308306,2.0,0.008138747536577284,1,-0.026449881494045258,-0.0439171832986176,0.0,1,-0.004735897295176983,-0.00944427988724783,0.0,1,-0.030204832553863525,-0.03682149155065417,0.0,1,-0.015408563427627087,-0.02010149194393307,0.0,1,-0.001644935691729188,-0.006033211684552953,0.0,1,0.0059553575702011585,-0.022174558136612177,1.0,1,-0.007228408008813858,-0.02357474807649851,0.0,1,0.011528689414262772,-0.010991275077685714,2.0,1,0.045111995190382004,-0.009932179294992238,1.0,1,0.006744096986949444,3.6789300793316215e-05,2.0,26,55,81
+TRM,255,109346,transient_or_regressed,3,3,26041,5000,,,False,0.6913580298423767,0.10989250987768173,0.061567299999296665,0.0421019122004509,8.0,0.4925383999943733,1,-0.033651359379291534,-0.04072002042084932,0.0,1,-0.007745619397610426,-0.014516609429847449,0.0,0,0.0744471549987793,0.03937782603316009,8.0,0,0.0840701162815094,0.04849455179646611,8.0,1,0.011610441841185093,-0.003946593824366573,2.0,0,0.10599164664745331,0.06433330127038062,8.0,0,0.10598421096801758,0.05616140877828002,8.0,0,0.09445098042488098,0.05513498745858669,8.0,0,0.10090453177690506,0.061504637356847525,8.0,0,0.10989250987768173,0.061567299999296665,8.0,26,55,81
+TRM,256,251014,flaky_then_stable,9,2,26041,5000,130205,25000,True,1.0,0.054015837609767914,0.021708722561015747,0.00786015737685375,8.0,0.17366978048812598,1,-0.008876068517565727,-0.024787072092294693,0.0,1,0.004831400234252214,-0.004277746431398555,2.0,1,0.005195440258830786,-0.02357346360804513,1.0,0,0.10671093314886093,0.07309418451040983,8.0,1,0.007167039904743433,-7.498338527511805e-05,4.0,1,0.04784048721194267,-0.014494292307063006,2.0,1,0.0968310534954071,-0.005924721830524504,1.0,1,0.0776989683508873,0.028246169327758253,8.0,1,0.0012945213820785284,-0.017124492005677894,1.0,1,0.054015837609767914,0.021708722561015747,8.0,26,55,81
+TRM,257,9333,always,10,0,26041,5000,26041,5000,True,1.0,0.005231932271271944,-0.00303688389249146,-0.004928272683173418,1.0,0.005231932271271944,1,-0.04764903709292412,-0.0533109144307673,0.0,1,-0.0026918435469269753,-0.011303012899588794,0.0,1,0.0009193325531668961,-0.035406240516749676,1.0,1,-0.012665961869060993,-0.019950688700191677,0.0,1,0.0034393630921840668,-0.005627226481010439,1.0,1,0.055625926703214645,-0.011603944934904575,2.0,1,0.00606091134250164,-0.012474675313569605,1.0,1,0.05460894852876663,0.014075679428060539,7.0,1,0.016853688284754753,-0.010476613009814173,1.0,1,0.005231932271271944,-0.00303688389249146,1.0,24,57,81
+TRM,258,368982,always,10,0,26041,5000,26041,5000,True,1.0,0.01489926502108574,-5.773064913228154e-05,-0.005002646474167705,3.0,0.023191379383206367,1,-0.04283829778432846,-0.051388949155807495,0.0,1,0.03174535557627678,0.014606943790568039,8.0,1,-0.011179519817233086,-0.03648816444911063,0.0,1,0.002681867917999625,-0.015327445493312553,1.0,1,0.016538893803954124,0.0017044564010575414,4.0,1,0.007422146387398243,-0.030613669543527067,1.0,1,-0.013975468464195728,-0.02494359260890633,0.0,1,0.042862989008426666,-0.0015078082215040922,3.0,1,0.01578991487622261,-0.017156454036012292,1.0,1,0.01489926502108574,-5.773064913228154e-05,3.0,26,55,81
+TRM,259,351687,always,10,0,26041,5000,26041,5000,True,1.0,0.000813047809060663,-0.0011080111853516428,-0.002738021925324574,4.0,0.002087998218485154,1,-0.058603424578905106,-0.06276770029217005,0.0,1,-0.012857216410338879,-0.014788092928938568,0.0,1,-0.03124021366238594,-0.0391297172755003,0.0,1,-0.02845918945968151,-0.029103344306349754,0.0,1,0.001505006686784327,-0.0034376303519820794,2.0,1,0.003160341875627637,-0.028446472337236628,1.0,1,-0.010677509009838104,-0.02459212811663747,0.0,1,0.013777663931250572,-0.010421157348901033,1.0,1,-0.023940782994031906,-0.027947437716647983,0.0,1,0.000813047809060663,-0.0011080111853516428,4.0,34,47,81
+TRM,260,188980,never,0,0,,,,,False,0.5802469253540039,0.09719252586364746,0.044397269608452916,0.021856725681573153,8.0,0.35517815686762333,0,0.011603021994233131,0.001958534456207417,4.0,0,0.05216160789132118,0.028008436667732894,8.0,0,0.10342137515544891,0.051397241186350584,8.0,0,0.08938001096248627,0.05317619815468788,8.0,0,0.10529744625091553,0.060110494028776884,8.0,0,0.11122148483991623,0.051547679817304015,8.0,0,0.09857168048620224,0.042628569644875824,8.0,0,0.12307111918926239,0.0595133900642395,8.0,0,0.11012918502092361,0.05128697329200804,8.0,0,0.09719252586364746,0.044397269608452916,8.0,24,57,81
+TRM,261,137633,always,10,0,26041,5000,26041,5000,True,1.0,0.0004368781519588083,-0.003673855376064239,-0.0059659352991729975,1.0,0.0004368781519588083,1,-0.04502272605895996,-0.05161651596426964,0.0,1,0.004416708368808031,-0.004282825946575031,2.0,1,-0.009211203083395958,-0.028232551412656903,0.0,1,-0.01640894263982773,-0.02120307134464383,0.0,1,0.006006511393934488,-0.004833353450521827,2.0,1,0.03276260569691658,-0.012034248597046826,3.0,1,0.0013079344062134624,-0.02187947202764917,1.0,1,0.029931940138339996,-0.00022375390108209103,2.0,1,0.012633765116333961,-0.01800045045092702,1.0,1,0.0004368781519588083,-0.003673855376064239,1.0,26,55,81
+TRM,262,274238,always,10,0,26041,5000,26041,5000,True,1.0,0.011019091121852398,0.0033007633392116986,0.0007174688944360241,6.0,0.02749665593728423,1,-0.04182525724172592,-0.048385532572865486,0.0,1,-0.009289124049246311,-0.012336170300841331,0.0,1,0.012660353444516659,-0.029168873908929527,1.0,1,-0.013311746530234814,-0.023811690625734627,0.0,1,0.0014963457360863686,-0.0040080527105601504,1.0,1,0.031159639358520508,-0.02668194123543799,1.0,1,-0.01690402254462242,-0.025626042624935508,0.0,1,0.03540343791246414,-0.009763098554685712,1.0,1,-0.008148975670337677,-0.022432214813306928,0.0,1,0.011019091121852398,0.0033007633392116986,6.0,26,55,81
+TRM,263,53591,always,10,0,26041,5000,26041,5000,True,1.0,0.01432004477828741,0.0025107309884333517,-0.0012079579319106415,4.0,0.02491767963510938,1,-0.03189104050397873,-0.03992840740829706,0.0,1,0.0032167357858270407,-0.006903903355123475,1.0,1,-0.007960137911140919,-0.035218183998949826,0.0,1,0.0074292514473199844,-0.011393306776881218,1.0,1,0.009323546662926674,-0.00048481890280527296,3.0,1,-0.010385047644376755,-0.03422664455138147,0.0,1,0.15515154600143433,-0.0004136359493713826,1.0,1,0.04114387929439545,-0.0015022462903289124,2.0,1,0.05704790726304054,-0.013472833263222128,1.0,1,0.01432004477828741,0.0025107309884333517,4.0,26,55,81
+TRM,264,244993,flaky_then_stable,4,5,130205,25000,234369,45000,True,1.0,0.04704735800623894,0.010682047839509323,-0.001751790929120034,6.0,0.0995477030519396,0,0.04023426026105881,0.022091051854658872,8.0,0,0.056736528873443604,0.041185063077136874,8.0,0,0.09871301054954529,0.057381377555429935,8.0,0,0.09194166958332062,0.06336543057113886,8.0,1,0.0042620329186320305,-0.005423037684522569,1.0,0,0.11554619669914246,0.06594815431162715,8.0,1,-0.026182319968938828,-0.031527104089036584,0.0,0,0.11547999083995819,0.059853802900761366,8.0,1,0.09729301184415817,0.047135776840150356,8.0,1,0.04704735800623894,0.010682047839509323,6.0,23,58,81
+TRM,265,111621,always,10,0,26041,5000,26041,5000,True,1.0,-0.0003835902316495776,-0.0024249261477962136,-0.004158845636993647,0.0,0.0,1,-0.056633077561855316,-0.05976660316810012,0.0,1,-0.007833083160221577,-0.014264036086387932,0.0,1,0.005164025351405144,-0.02964565414004028,1.0,1,0.008790974505245686,-0.009150075842626393,1.0,1,0.005601809825748205,-0.0006212362277437933,5.0,1,0.042379457503557205,-0.019922878360375762,1.0,1,0.00918265525251627,-0.014438134821830317,1.0,1,0.035825375467538834,-0.00889997064950876,1.0,1,0.05618766322731972,-0.016710330732166767,1.0,1,-0.0003835902316495776,-0.0024249261477962136,0.0,26,55,81
+TRM,266,215831,flaky_then_stable,6,2,26041,5000,260410,50000,True,1.0,0.003537698183208704,0.00031666715460687556,-0.001357559037160172,5.0,0.008453923335764557,1,-0.020330894738435745,-0.03461199440062046,0.0,1,0.0009890686487779021,-0.008122177256154828,1.0,1,-0.009213052690029144,-0.034094431437551975,0.0,1,0.0014766589738428593,-0.009575796982971951,1.0,1,0.019080162048339844,0.0034190934820799157,4.0,0,0.11613205820322037,0.06935809226706624,8.0,0,0.1091407984495163,0.062240778002887964,8.0,0,0.10580602288246155,0.05938316648826003,8.0,0,0.10880324244499207,0.05734210927039385,8.0,1,0.003537698183208704,0.00031666715460687556,5.0,25,56,81
+TRM,267,316537,always,10,0,26041,5000,26041,5000,True,1.0,0.013234950602054596,-0.0002856360515579581,-0.004695249837823212,2.0,0.019581425469368696,1,-0.013087421655654907,-0.033994513330981135,0.0,1,-0.011459629982709885,-0.0169216669164598,0.0,1,-0.005853015463799238,-0.03697876672958955,0.0,1,-0.009576801210641861,-0.022693354170769453,0.0,1,0.003195448312908411,-0.003716143753990764,1.0,1,0.015123292803764343,-0.022797916550189257,2.0,1,-0.01318353321403265,-0.02453083184082061,0.0,1,0.05273286625742912,0.0005378587375162169,4.0,1,-0.0024730272125452757,-0.017084404331399128,0.0,1,0.013234950602054596,-0.0002856360515579581,2.0,24,57,81
+TRM,268,301644,always,10,0,26041,5000,26041,5000,True,1.0,0.0034428327344357967,0.00046217079216148704,-0.0008532097999705002,4.0,0.007110205537173897,1,-0.05040064454078674,-0.05567827634513378,0.0,1,-0.009779201820492744,-0.013756529660895467,0.0,1,0.0012026728363707662,-0.027398063321015798,1.0,1,0.005543154198676348,-0.009873048576992005,1.0,1,0.004580491688102484,-0.0030939631105866283,3.0,1,0.041682060807943344,-0.007611316163092852,2.0,1,0.0006657132762484252,-0.02029436545126373,1.0,1,0.044228445738554,0.012143455933255609,6.0,1,0.020941851660609245,-0.008519499679096043,1.0,1,0.0034428327344357967,0.00046217079216148704,4.0,25,56,81
+TRM,269,151116,always,10,0,26041,5000,26041,5000,True,1.0,0.014432809315621853,0.0013126860467309598,-0.0021022259243181907,4.0,0.01891039207112044,1,-0.047946300357580185,-0.05234906543046236,0.0,1,-0.009390092454850674,-0.014966718037612736,0.0,1,0.01091807521879673,-0.03547284915111959,1.0,1,-0.0044829342514276505,-0.01998392422683537,0.0,1,0.005331155378371477,-0.002122178859281121,3.0,1,0.04797618091106415,-0.026752782054245472,1.0,1,-0.005682673770934343,-0.02269376494223252,0.0,1,0.02540251798927784,-0.0144000822911039,1.0,1,-0.008962450549006462,-0.018987277406267822,0.0,1,0.014432809315621853,0.0013126860467309598,4.0,23,58,81
+TRM,270,244667,flaky_then_stable,7,5,52082,10000,260410,50000,True,1.0,0.03633161634206772,0.006700652214931324,-0.002790150116197765,5.0,0.06544592889258638,0,0.04899459332227707,0.02585787035059184,8.0,1,0.005759438965469599,-0.0068419490999076515,2.0,1,-0.0013323284219950438,-0.03040824885829352,0.0,1,0.01639559119939804,-0.003511173592414707,2.0,1,0.004312093835324049,-0.007976086460985243,1.0,0,0.12223560363054276,0.062051859218627214,8.0,1,0.07044684141874313,0.021684728337277193,7.0,1,0.12339597940444946,0.02096728514879942,4.0,0,0.11709854006767273,0.05262904171831906,8.0,1,0.03633161634206772,0.006700652214931324,5.0,21,60,81
+TRM,271,107534,never,0,0,,,,,False,0.5185185074806213,0.0862712636590004,0.04628258920274675,0.02968098083510995,8.0,0.370260713621974,0,0.04575483128428459,0.018248428255901672,6.0,0,0.06826038658618927,0.039379124995321035,8.0,0,0.08176537603139877,0.055907587986439466,8.0,0,0.0860944464802742,0.05862707132473588,8.0,0,0.10307732224464417,0.0654884958639741,8.0,0,0.10810264945030212,0.06507821613922715,8.0,0,0.11248274147510529,0.06537929503247142,8.0,0,0.097648486495018,0.0575919474940747,8.0,0,0.1304580122232437,0.060060865012928843,8.0,0,0.0862712636590004,0.04628258920274675,8.0,24,57,81
+TRM,272,391236,always,10,0,26041,5000,26041,5000,True,1.0,0.002497156383469701,-0.00022408361837733537,-0.0017171999788843095,3.0,0.0057934707729145885,1,-0.027160154655575752,-0.04382979986257851,0.0,1,-0.007711475249379873,-0.01246733934385702,0.0,1,-0.0005909667816013098,-0.03971594732138328,0.0,1,-0.0009813066571950912,-0.018547721905633807,0.0,1,0.004885340575128794,-0.007423796283546835,1.0,1,0.017792733386158943,-0.026886069681495428,1.0,1,-0.024952851235866547,-0.029663834488019347,0.0,1,0.014319073408842087,-0.010293359624483855,1.0,1,0.06501119583845139,-0.008553855470381677,1.0,1,0.002497156383469701,-0.00022408361837733537,3.0,28,53,81
+TRM,273,244083,flaky_then_stable,5,4,26041,5000,260410,50000,True,1.0,0.06523073464632034,0.0163800495211035,0.0023569875629618764,7.0,0.13813564460724592,1,0.01018210593611002,-0.010567191406153142,2.0,0,0.06041806936264038,0.03301570378243923,8.0,0,0.0866229236125946,0.05256603192538023,8.0,0,0.08193227648735046,0.046233689645305276,8.0,1,0.06673038005828857,0.029077271232381463,8.0,1,0.05314592644572258,-0.022585678263567388,1.0,1,0.005755829159170389,-0.019435739202890545,1.0,0,0.10766632109880447,0.056221603183075786,8.0,0,0.11652486026287079,0.05932387872599065,8.0,1,0.06523073464632034,0.0163800495211035,7.0,24,57,81
+TRM,274,48553,always,10,0,26041,5000,26041,5000,True,1.0,0.0031897618900984526,-0.0003762219512282172,-0.0031201561978377867,5.0,0.00978791870875284,1,-0.043340157717466354,-0.04940625838935375,0.0,1,-0.011731005273759365,-0.017835351987741888,0.0,1,-0.030678115785121918,-0.042060979176312685,0.0,1,0.010217307135462761,-0.011556806042790413,1.0,1,0.005704823881387711,-0.00453757875948213,1.0,1,0.016642270609736443,-0.01777302916161716,1.0,1,0.0034612715244293213,-0.023229113314300776,1.0,1,0.02628798969089985,-0.007294605456991121,3.0,1,0.008637229911983013,-0.01184842427028343,2.0,1,0.0031897618900984526,-0.0003762219512282172,5.0,27,54,81
+TRM,275,25129,always,10,0,26041,5000,26041,5000,True,1.0,0.04280903562903404,0.006450410801335238,0.0002776671026367694,6.0,0.05304790160153061,1,-0.04714920371770859,-0.052699748426675797,0.0,1,-0.0034028617665171623,-0.011052574496716261,0.0,1,-0.02479136548936367,-0.035468922229483724,0.0,1,-0.02466605417430401,-0.02918062685057521,0.0,1,0.011510522104799747,-0.003109752884483896,2.0,1,0.025606706738471985,-0.02082637883722782,1.0,1,-0.013171693310141563,-0.02466241642832756,0.0,1,0.036162275820970535,-0.008967419526015874,1.0,1,-0.01408090628683567,-0.02151508699171245,0.0,1,0.04280903562903404,0.006450410801335238,6.0,26,55,81
+TRM,276,390174,always,10,0,26041,5000,26041,5000,True,1.0,0.013296278193593025,-0.002032260410487652,-0.008220984833315015,2.0,0.020176314748823643,1,-0.03956082835793495,-0.051314452197402716,0.0,1,0.01458408311009407,-0.003935905930120498,1.0,1,-0.0057924771681427956,-0.03857871808577329,0.0,1,0.003207927104085684,-0.01558356877649203,1.0,1,0.01406068168580532,-0.0025520799099467695,2.0,1,0.029659118503332138,-0.03261552727781236,1.0,1,-0.009515325538814068,-0.027133135474286973,0.0,1,0.0007796556456014514,-0.020363205941976048,1.0,1,0.06358420103788376,0.01436937279504491,5.0,1,0.013296278193593025,-0.002032260410487652,2.0,23,58,81
+TRM,277,172139,never,0,0,,,,,False,0.5555555820465088,0.1023760735988617,0.05644163163378835,0.04091198369860649,8.0,0.4515330530703068,0,0.055504702031612396,0.03705254360102117,8.0,0,0.04014013707637787,0.029429644346237183,8.0,0,0.08996552973985672,0.04969130177050829,8.0,0,0.10058199614286423,0.058692801743745804,8.0,0,0.10409538447856903,0.06999425776302814,8.0,0,0.11359860748052597,0.06749845482409,8.0,0,0.0873647928237915,0.05485021369531751,8.0,0,0.1270219087600708,0.07240197621285915,8.0,0,0.1121387630701065,0.056603545090183616,8.0,0,0.1023760735988617,0.05644163163378835,8.0,23,58,81
+TRM,278,369612,transient_or_regressed,1,2,130205,25000,,,False,0.6790123581886292,0.09980745613574982,0.045457843225449324,0.027723421342670918,8.0,0.3636627458035946,0,0.05245056748390198,0.016888049489352852,7.0,0,0.05916685238480568,0.0349542701151222,8.0,0,0.08178334683179855,0.04404837777838111,8.0,0,0.09986276924610138,0.059311507269740105,8.0,1,0.06555041670799255,0.02810925297671929,8.0,0,0.10691327601671219,0.06446752976626158,8.0,0,0.12210431694984436,0.05130841245409101,8.0,0,0.11020064353942871,0.05479666916653514,8.0,0,0.10688135772943497,0.050835757283493876,8.0,0,0.09980745613574982,0.045457843225449324,8.0,25,56,81
+TRM,279,134436,never,0,0,,,,,False,0.6172839403152466,0.0813683569431305,0.04034750210121274,0.023290447890758514,8.0,0.3227800168097019,0,0.04505884274840355,0.011467849253676832,5.0,0,0.06740253418684006,0.03346948092803359,8.0,0,0.06792417168617249,0.038361424347385764,8.0,0,0.1038985475897789,0.044390935567207634,8.0,0,0.09062401950359344,0.040302755078300834,8.0,0,0.09559478610754013,0.04046710568945855,8.0,0,0.1136150062084198,0.04348066996317357,8.0,0,0.10992821305990219,0.04832610534504056,8.0,0,0.096576988697052,0.04157291818410158,8.0,0,0.0813683569431305,0.04034750210121274,8.0,24,57,81
+TRM,280,242300,always,10,0,26041,5000,26041,5000,True,1.0,0.0033795631024986506,-0.0018383704300504178,-0.003370526828803122,1.0,0.0033795631024986506,1,0.022433431819081306,0.001088861157768406,4.0,1,0.026473993435502052,0.004826461739867227,6.0,1,0.06552477180957794,0.034745394019410014,8.0,1,0.012157442048192024,-0.007504672046707128,2.0,1,0.07732328772544861,0.039507963228970766,8.0,1,0.048968344926834106,0.001240159384906292,3.0,1,0.018029194325208664,-0.005430558347143233,3.0,1,0.06139814853668213,0.0017963946447707713,3.0,1,0.0479232594370842,-0.015227404655888677,1.0,1,0.0033795631024986506,-0.0018383704300504178,1.0,25,56,81
+TRM,281,283854,always,10,0,26041,5000,26041,5000,True,1.0,0.0012182812206447124,-0.0002924640357377939,-0.0012458127530408092,4.0,0.0026435387262608856,1,-0.047620102763175964,-0.05585108743980527,0.0,1,-0.016347061842679977,-0.01981441699899733,0.0,1,-0.005906243342906237,-0.04000569420168176,0.0,1,-0.01428714394569397,-0.02098461624700576,0.0,1,0.0031564992386847734,-0.005498776168678887,2.0,1,0.004257116932421923,-0.03097706421976909,1.0,1,-0.0005433217156678438,-0.020263459504349157,0.0,1,0.052769482135772705,-0.008777564973570406,1.0,1,0.03185083717107773,-0.010907626070547849,1.0,1,0.0012182812206447124,-0.0002924640357377939,4.0,26,55,81
+TRM,282,171776,flaky_then_stable,6,3,78123,15000,234369,45000,True,1.0,0.05815820395946503,0.018904677184764296,-0.00024152908008545637,5.0,0.16473479568958282,0,0.03565813973546028,0.009695046785054728,7.0,0,0.04843536391854286,0.03224770678207278,8.0,1,-0.009366287849843502,-0.03687229554634541,0.0,1,0.038266830146312714,0.003973146041971631,4.0,1,0.038864992558956146,0.013092676832457073,7.0,1,0.10283084213733673,0.047597273252904415,8.0,0,0.0968344658613205,0.04726371169090271,8.0,0,0.12440475076436996,0.05940372287295759,8.0,1,0.07843814790248871,0.024752723198616877,8.0,1,0.05815820395946503,0.018904677184764296,5.0,25,56,81
+TRM,283,187057,always,10,0,26041,5000,26041,5000,True,1.0,0.001998868305236101,2.4620682779641356e-05,-0.0014305029635579558,5.0,0.0065399655140936375,1,-0.03773735463619232,-0.046011334750801325,0.0,1,-0.0069198403507471085,-0.012741124024614692,0.0,1,-0.012387004680931568,-0.02367831498850137,0.0,1,0.00916693639010191,-0.012737740180455148,1.0,1,0.01170711312443018,-0.003374164836714044,2.0,1,0.011394359171390533,-0.020901719282846898,1.0,1,-0.022053511813282967,-0.029923652997240424,0.0,1,0.023588528856635094,-0.009803975757677108,1.0,1,0.01827021688222885,-0.01869481522589922,1.0,1,0.001998868305236101,2.4620682779641356e-05,5.0,26,55,81
+TRM,284,357512,always,10,0,26041,5000,26041,5000,True,1.0,0.001465712790377438,-0.0016091648358269595,-0.002567912742961198,1.0,0.001465712790377438,1,-0.05026194080710411,-0.05547224683687091,0.0,1,0.008228923194110394,-0.006321789638604969,1.0,1,-0.00468874629586935,-0.02964009332936257,0.0,1,-0.0055330372415483,-0.015531773387920111,0.0,1,0.005442507099360228,-0.0036649495232268237,1.0,1,0.048347827047109604,-0.013825995803927071,2.0,1,-0.02431534044444561,-0.029638685286045074,0.0,1,0.012076964601874352,-0.009131102997343987,2.0,1,0.020836403593420982,-0.013089671614579856,1.0,1,0.001465712790377438,-0.0016091648358269595,1.0,24,57,81
+TRM,285,233509,transient_or_regressed,4,4,130205,25000,,,False,0.6172839403152466,0.12988685071468353,0.07035251474007964,0.0466662896797061,8.0,0.5628201179206371,0,0.0013564762193709612,-0.008099680620944127,1.0,0,0.015123816207051277,0.009157399705145508,8.0,0,0.056870460510253906,0.03724062815308571,8.0,0,0.10630633682012558,0.06565348990261555,8.0,1,0.029641102999448776,0.014662634173873812,8.0,1,0.05548330396413803,-0.019793264043983072,1.0,1,0.017340468242764473,-0.013319168123416603,1.0,0,0.13131803274154663,0.07609281921759248,8.0,1,0.023415323346853256,-0.0038633696385659277,2.0,0,0.12988685071468353,0.07035251474007964,8.0,24,57,81
+TRM,286,156093,transient_or_regressed,4,3,26041,5000,,,False,0.7283950448036194,0.08001794666051865,0.038199628004804254,0.02032317267730832,8.0,0.30559702403843403,1,-0.0451398603618145,-0.05316335102543235,0.0,0,0.07533767074346542,0.03148290095850825,8.0,0,0.08278503268957138,0.034972736379131675,8.0,0,0.0840449407696724,0.03929072490427643,8.0,0,0.08611354231834412,0.04243990941904485,8.0,0,0.10225607454776764,0.04612080962397158,8.0,1,0.12163188308477402,0.0006534790154546499,1.0,1,0.04523594304919243,-0.005353614760679193,2.0,1,0.03792919963598251,-0.011887308326549828,1.0,0,0.08001794666051865,0.038199628004804254,8.0,24,57,81
+TRM,287,403240,always,10,0,26041,5000,26041,5000,True,1.0,0.0023366014938801527,-0.0005806663775729248,-0.0015379152609966695,1.0,0.0023366014938801527,1,-0.031102841719985008,-0.043893997790291905,0.0,1,-0.012302642688155174,-0.015445455443114042,0.0,1,0.003779852297157049,-0.03640628216089681,1.0,1,-0.02246837504208088,-0.02421555924229324,0.0,1,-0.004404644947499037,-0.0065405540517531335,0.0,1,-0.010814272798597813,-0.034359577926807106,0.0,1,-0.027491547167301178,-0.029038090957328677,0.0,1,0.0036957948468625546,-0.013394385983701795,1.0,1,0.02257768251001835,-0.011812144541181624,1.0,1,0.0023366014938801527,-0.0005806663775729248,1.0,27,54,81
+TRM,288,352372,always,10,0,26041,5000,26041,5000,True,1.0,-0.0013821031898260117,-0.002690280831302516,-0.0036566524649970233,0.0,0.0,1,-0.048082493245601654,-0.057253921404480934,0.0,1,-0.005637733731418848,-0.012830400082748383,0.0,1,0.003351972671225667,-0.02987751955515705,1.0,1,0.008041652850806713,-0.016219267970882356,1.0,1,0.00352527410723269,-0.005447552859550342,1.0,1,-0.010020342655479908,-0.03846071369480342,0.0,1,-0.014942889101803303,-0.027543231029994786,0.0,1,0.018187616020441055,-0.013562245178036392,1.0,1,0.0182644035667181,-0.016667226562276483,1.0,1,-0.0013821031898260117,-0.002690280831302516,0.0,26,55,81
+TRM,289,115724,always,10,0,26041,5000,26041,5000,True,1.0,-0.0035256417468190193,-0.004714913899078965,-0.005577208707109094,0.0,0.0,1,-0.04729641228914261,-0.05483360541984439,0.0,1,-0.014458125457167625,-0.016211753361858428,0.0,1,-0.021275140345096588,-0.0401421282440424,0.0,1,-0.020500173792243004,-0.02432150929234922,0.0,1,-0.0013774429680779576,-0.005908862876822241,0.0,1,0.028971470892429352,-0.03340172627940774,1.0,1,-0.017930615693330765,-0.027370249619707465,0.0,1,-0.004430635832250118,-0.017644783249124885,0.0,1,0.0005508744507096708,-0.02284997504466446,1.0,1,-0.0035256417468190193,-0.004714913899078965,0.0,25,56,81
+TRM,290,32827,transient_or_regressed,3,4,104164,20000,,,False,0.5925925970077515,0.10415306687355042,0.059328670147806406,0.03924683853983879,8.0,0.47462936118245125,0,0.04747107997536659,0.005027550185332075,4.0,0,0.08891503512859344,0.037176064448431134,8.0,0,0.08727800101041794,0.04881151742301881,8.0,1,0.09487981349229813,0.054872621316462755,8.0,0,0.10777227580547333,0.056962966453284025,8.0,0,0.09151085466146469,0.05236651934683323,8.0,1,0.03700045123696327,-0.002855300670489669,3.0,1,0.06983226537704468,0.012618109612958506,5.0,0,0.08877428621053696,0.05180124146863818,8.0,0,0.10415306687355042,0.059328670147806406,8.0,24,57,81
+TRM,291,162037,flaky_then_stable,7,6,26041,5000,182287,35000,True,1.0,0.012644785456359386,-0.0035092845355393365,-0.008858000743202865,2.0,0.014507823972962797,1,0.009263030253350735,-0.017519686720333993,1.0,0,0.07798122614622116,0.04816740658134222,8.0,1,-0.012176786549389362,-0.028507517534308136,0.0,0,0.10045904666185379,0.060461345594376326,8.0,1,0.027703044936060905,0.005434261503978632,6.0,0,0.08818051218986511,0.05683378456160426,8.0,1,-0.005797808989882469,-0.024042375502176583,0.0,1,0.0493919812142849,0.00439219449981465,4.0,1,0.001987480092793703,-0.009623948513763025,1.0,1,0.012644785456359386,-0.0035092845355393365,2.0,24,57,81
+TRM,292,17302,flaky_then_stable,7,5,52082,10000,182287,35000,True,1.0,0.05110366642475128,0.014479365367151331,0.003145931157632731,7.0,0.11658636888023466,0,0.058985695242881775,0.04461397044360638,8.0,1,0.010263175703585148,-0.006788914150092751,1.0,0,0.09508910775184631,0.06415097508579493,8.0,1,0.07614325731992722,0.028250293689779937,8.0,1,0.09560521692037582,0.045325490878894925,8.0,0,0.12406914681196213,0.06411500228568912,8.0,1,0.09818758815526962,0.05254369066096842,8.0,1,-0.006469040643423796,-0.015676359413191676,0.0,1,4.451855056686327e-05,-0.007890803827649506,1.0,1,0.05110366642475128,0.014479365367151331,7.0,23,58,81
+TRM,293,6212,flaky_then_stable,9,2,26041,5000,260410,50000,True,1.0,0.02253061905503273,0.005420127450634027,-0.001998462299525272,5.0,0.05344698182307184,1,-0.03701945021748543,-0.04293791390955448,0.0,1,0.010651390999555588,-0.005638712216750719,2.0,1,-0.003697920823469758,-0.03634648633305915,0.0,1,0.024582281708717346,0.0009253888274542987,4.0,1,0.01728481613099575,-0.0018135164573322982,2.0,1,0.007864776067435741,-0.026176081970334053,1.0,1,0.11171261966228485,6.217030750121921e-05,2.0,1,0.044061847031116486,-0.010425971180666238,1.0,0,0.11105527728796005,0.055607660906389356,8.0,1,0.02253061905503273,0.005420127450634027,5.0,17,64,81
+TRM,294,160912,flaky_then_stable,4,5,130205,25000,234369,45000,True,1.0,0.10010123252868652,0.04707140405662358,0.02732324181124568,8.0,0.3765712324529886,0,0.026659518480300903,0.0012092593242414296,4.0,0,0.06860493868589401,0.028740973910316825,8.0,0,0.07517305761575699,0.05140496278181672,8.0,0,0.09851730614900589,0.06658027414232492,8.0,1,0.09902693331241608,0.047598232980817556,8.0,0,0.11886131763458252,0.07422114629298449,8.0,1,0.06649960577487946,0.011794523132266477,5.0,0,0.11970283091068268,0.05554954940453172,8.0,1,0.08219947665929794,0.032441504357848316,8.0,1,0.10010123252868652,0.04707140405662358,8.0,24,57,81
+TRM,295,220932,stable_learned,9,1,52082,10000,52082,10000,True,1.0,0.009007292799651623,-0.002426587583613582,-0.006743804784491658,3.0,0.011838926351629198,0,0.0576804094016552,0.019771937979385257,7.0,1,0.03838670626282692,0.009081091324333102,5.0,1,-0.004297223407775164,-0.03475257667014375,0.0,1,0.033838607370853424,0.004502413503360003,4.0,1,0.01772436313331127,-0.0008220220724979299,2.0,1,0.07421279698610306,0.006032623408827931,3.0,1,0.007068326231092215,-0.019466287129034754,1.0,1,0.06321172416210175,-0.006746840284904465,1.0,1,0.01711694709956646,-0.007992159808054566,2.0,1,0.009007292799651623,-0.002426587583613582,3.0,24,57,81
+TRM,296,30809,transient_or_regressed,1,2,208328,40000,,,False,0.5555555820465088,0.11739752441644669,0.06381900794804096,0.04237164556980133,8.0,0.5105520635843277,0,0.035852354019880295,0.0233844950562343,8.0,0,0.06286492198705673,0.04368845862336457,8.0,0,0.07218800485134125,0.05662237014621496,8.0,0,0.10272920876741409,0.06280942540615797,8.0,0,0.12002184242010117,0.0738823777064681,8.0,0,0.10615527629852295,0.06133510451763868,8.0,0,0.12011277675628662,0.05520194536074996,8.0,1,0.0888507291674614,0.01764900638954714,4.0,0,0.11444453150033951,0.061707905028015375,8.0,0,0.11739752441644669,0.06381900794804096,8.0,23,58,81
+TRM,297,192110,stable_learned,9,1,52082,10000,52082,10000,True,1.0,0.0028238475788384676,-8.35981009004172e-05,-0.0014060090470593423,3.0,0.005033417488448322,0,0.035979002714157104,0.016043794006691314,6.0,1,0.026801079511642456,0.0024774927860562457,3.0,1,-0.0022876840084791183,-0.03480209852568805,0.0,1,-0.01312370877712965,-0.021075154305435717,0.0,1,0.0036182531621307135,-0.007305574865313247,1.0,1,-0.0107000432908535,-0.02390057872980833,0.0,1,-0.0009369709296151996,-0.024485854533850215,0.0,1,0.0390826091170311,-0.007355358742643148,2.0,1,-0.007312692701816559,-0.015528390416875482,0.0,1,0.0028238475788384676,-8.35981009004172e-05,3.0,27,54,81
+TRM,298,341639,always,10,0,26041,5000,26041,5000,True,1.0,-0.0005696217995136976,-0.003742373184650205,-0.005633562803268433,0.0,0.0,1,-0.04129253700375557,-0.051087545696645975,0.0,1,-0.008224676363170147,-0.015903946245089173,0.0,1,0.0051146503537893295,-0.034086194122210145,1.0,1,-0.0038294477853924036,-0.016440825565950945,0.0,1,0.0021098123397678137,-0.004061275802087039,1.0,1,0.0547034852206707,-0.005814355128677562,3.0,1,0.010013492777943611,-0.015241166751366109,1.0,1,0.044600959867239,-0.0033227177045773715,2.0,1,0.03330070525407791,-0.009459766230065725,1.0,1,-0.0005696217995136976,-0.003742373184650205,0.0,27,54,81
+TRM,299,234124,always,10,0,26041,5000,26041,5000,True,1.0,0.0027222938369959593,-0.002907304296968505,-0.004853222751989961,1.0,0.0027222938369959593,1,-0.04394788667559624,-0.04965401254594326,0.0,1,-0.007585824932903051,-0.012454011535737664,0.0,1,0.014375799335539341,-0.02720425312872976,1.0,1,-0.00407802127301693,-0.01354116655420512,0.0,1,-0.0030374545603990555,-0.008364137262105942,0.0,1,0.027656661346554756,-0.022454957361333072,1.0,1,-0.0232962928712368,-0.029725778615102172,0.0,1,0.05280968174338341,-0.0021759523078799248,2.0,1,0.0030456760432571173,-0.016468575544422492,2.0,1,0.0027222938369959593,-0.002907304296968505,1.0,27,54,81
+TRM,300,78005,flaky_then_stable,4,3,130205,25000,208328,40000,True,1.0,0.01996181346476078,-0.0003508280642563477,-0.005318882176652551,2.0,0.020956838503479958,0,0.019306281581521034,0.0039665130389039405,4.0,0,0.05297987908124924,0.019987321851658635,8.0,0,0.07653359323740005,0.045413392363116145,8.0,0,0.08970049023628235,0.05925530847162008,8.0,1,0.08497878909111023,0.03812693222425878,8.0,0,0.12493369728326797,0.06791514251381159,8.0,0,0.10746478289365768,0.05809704144485295,8.0,1,0.04712781682610512,0.00016541369404876605,3.0,1,0.010570194572210312,-0.004682888396018825,3.0,1,0.01996181346476078,-0.0003508280642563477,2.0,24,57,81
+TRM,301,165405,always,10,0,26041,5000,26041,5000,True,1.0,-0.00047059074859134853,-0.0020594273482856806,-0.0028309037443250418,0.0,0.0,1,-0.038940899074077606,-0.046550939325243235,0.0,1,-0.00580311706289649,-0.00965986488154158,0.0,1,-0.014817762188613415,-0.0377437345450744,0.0,1,-0.008761142380535603,-0.018516419571824372,0.0,1,-0.00230930233374238,-0.00784613360883668,0.0,1,0.03977914899587631,-0.019764001190196723,2.0,1,-0.024822574108839035,-0.030772848520427942,0.0,1,0.005597894079983234,-0.012989046983420849,1.0,1,0.028849082067608833,-0.015441809431649745,1.0,1,-0.00047059074859134853,-0.0020594273482856806,0.0,26,55,81
+TRM,302,403476,always,10,0,26041,5000,26041,5000,True,1.0,0.006882680580019951,0.0003825676035376091,-0.001685355840891134,3.0,0.009842937579378486,1,-0.04378989338874817,-0.05189696233719587,0.0,1,-0.00914381630718708,-0.01533461210783571,0.0,1,-0.007370340172201395,-0.025685838365461677,0.0,1,-0.020516913384199142,-0.024253829615190625,0.0,1,0.016645286232233047,0.0012062506866641343,4.0,1,0.01895572990179062,-0.03603687137365341,1.0,1,-0.020627425983548164,-0.029018760891631246,0.0,1,0.011818338185548782,-0.01172144066367764,1.0,1,-0.005841004196554422,-0.021843968948815018,0.0,1,0.006882680580019951,0.0003825676035376091,3.0,26,55,81
+TRM,303,54951,always,10,0,26041,5000,26041,5000,True,1.0,0.0015141154872253537,-0.0021362453553592786,-0.003988664480857551,2.0,0.0019828156800940633,1,-0.05078159272670746,-0.05337169207632542,0.0,1,-0.006179938092827797,-0.01280455500818789,0.0,1,0.0031862563919276,-0.034421692515024915,1.0,1,-0.02075892500579357,-0.024510767310857773,0.0,1,0.016278479248285294,0.0009395545566803776,3.0,1,0.03633291274309158,-0.019871149270329624,1.0,1,-0.0025784550234675407,-0.023452102090232074,0.0,1,0.06401488184928894,-0.005093819301691838,1.0,1,0.004147524945437908,-0.01603066353709437,1.0,1,0.0015141154872253537,-0.0021362453553592786,2.0,24,57,81
+TRM,304,178635,always,10,0,26041,5000,26041,5000,True,1.0,0.018125616014003754,-7.62641939218156e-05,-0.005055877845734358,2.0,0.02064996282570064,1,-0.04294716566801071,-0.0490078623406589,0.0,1,-0.000545956427231431,-0.010628711519530043,0.0,1,-0.006680050399154425,-0.04134767659706995,0.0,1,-0.009754507802426815,-0.022182465181685984,0.0,1,0.013971615582704544,-0.002327304784557782,2.0,1,0.04825746640563011,-0.02496576146222651,1.0,1,-0.020011093467473984,-0.027031044708564878,0.0,1,0.024010971188545227,-0.010789898049551994,1.0,1,-0.0018306885613128543,-0.018266485756612383,0.0,1,0.018125616014003754,-7.62641939218156e-05,2.0,24,57,81
+TRM,305,376021,never,0,0,,,,,False,0.6419752836227417,0.10957793891429901,0.04664918559137732,0.02324540470726788,8.0,0.37319348473101854,0,0.056820012629032135,0.014225377490220126,6.0,0,0.06608034670352936,0.032950883731245995,8.0,0,0.08752100169658661,0.05210430268198252,8.0,0,0.10636866092681885,0.05939064687117934,8.0,0,0.10669758170843124,0.059460755437612534,8.0,0,0.10953156650066376,0.05860001244582236,8.0,0,0.08939092606306076,0.04311314760707319,8.0,0,0.11075463891029358,0.06548671517521143,8.0,0,0.08810462802648544,0.04508500243537128,8.0,0,0.10957793891429901,0.04664918559137732,8.0,26,55,81
+TRM,306,212950,flaky_then_stable,5,3,130205,25000,182287,35000,True,1.0,0.015168499201536179,0.00027681571373250335,-0.005814705858938396,4.0,0.02547334914561361,0,0.03179674968123436,0.012723906707833521,8.0,0,0.05657150223851204,0.032698812428861856,8.0,0,0.06313025206327438,0.04025393445044756,8.0,0,0.07457559555768967,0.0548378131352365,8.0,1,0.10340048372745514,0.05494161322712898,8.0,0,0.09795745462179184,0.06415181746706367,8.0,1,0.0664401724934578,0.021618706290610135,7.0,1,0.06130822002887726,0.021514907088203472,7.0,1,0.057096533477306366,0.014862492029351415,7.0,1,0.015168499201536179,0.00027681571373250335,4.0,23,58,81
+TRM,307,290435,stable_learned,9,1,52082,10000,52082,10000,True,1.0,0.004294127691537142,-0.0010677552563720383,-0.002495636756066233,1.0,0.004294127691537142,0,0.06118955463171005,0.01728983107386739,7.0,1,0.048967860639095306,0.009664543613325804,5.0,1,0.002688614185899496,-0.033229635388124734,1.0,1,-0.010043846443295479,-0.018007966922596097,0.0,1,0.08083781599998474,0.03285789510118775,8.0,1,0.04611825942993164,0.008569204021114274,4.0,1,0.03659467026591301,-0.004889123665634543,3.0,1,0.0489223413169384,0.008861100390276988,5.0,1,-0.006828560959547758,-0.0178080722107552,0.0,1,0.004294127691537142,-0.0010677552563720383,1.0,17,64,81
+TRM,308,269127,always,10,0,26041,5000,26041,5000,True,1.0,-0.0022516322787851095,-0.003972039703512564,-0.005173297133296728,0.0,0.0,1,-0.05075797811150551,-0.05775544559583068,0.0,1,-0.011543494649231434,-0.01559730083681643,0.0,1,-0.01966247707605362,-0.04312727926298976,0.0,1,0.002022496424615383,-0.02212376252282411,1.0,1,0.002313839504495263,-0.005925915378611535,1.0,1,0.0011635135160759091,-0.034581995438202284,1.0,1,-0.018673300743103027,-0.0288213558960706,0.0,1,0.014950386248528957,-0.015389107575174421,1.0,1,0.012856594286859035,-0.02147849660832435,1.0,1,-0.0022516322787851095,-0.003972039703512564,0.0,28,53,81
+TRM,309,245133,transient_or_regressed,5,4,78123,15000,,,False,0.6172839403152466,0.1087406575679779,0.051597694866359234,0.029591001570224762,8.0,0.41278155893087387,0,0.024820856750011444,0.01312669861363247,8.0,0,0.060663387179374695,0.037484061205759645,8.0,1,0.07569892704486847,0.03697509132325649,8.0,1,0.033798981457948685,0.0054896722140256315,5.0,1,0.09800686687231064,0.05420742300339043,8.0,1,0.05948188900947571,0.0076610965334111825,4.0,0,0.10129911452531815,0.046202652622014284,8.0,1,0.026155151426792145,-0.0010215175861958414,3.0,0,0.14038313925266266,0.059530173894017935,8.0,0,0.1087406575679779,0.051597694866359234,8.0,26,55,81
+TRM,310,282268,flaky_then_stable,6,3,104164,20000,156246,30000,True,1.0,0.009486171416938305,-0.0025522357609588653,-0.00762961246073246,3.0,0.015143538592383265,0,0.03836689889431,0.008360785199329257,5.0,0,0.05226585641503334,0.016186433749680873,7.0,0,0.08233053982257843,0.04502021335065365,8.0,1,0.053997546434402466,0.02451515141001437,8.0,0,0.10302313417196274,0.05387084442190826,8.0,1,0.07032472640275955,0.000655524869216606,4.0,1,0.008911862038075924,-0.023504575830884278,1.0,1,0.024051915854215622,-0.006493970693554729,3.0,1,0.0620514377951622,0.013582900779510965,5.0,1,0.009486171416938305,-0.0025522357609588653,3.0,23,58,81
+TRM,311,134521,always,10,0,26041,5000,26041,5000,True,1.0,0.0043226201087236404,0.0016467463865410537,0.0003114859573543072,7.0,0.01692208764143288,1,-0.049747906625270844,-0.05607616575434804,0.0,1,-0.015629995614290237,-0.017680708784610033,0.0,1,-0.020868919789791107,-0.04050672147423029,0.0,1,-0.016135266050696373,-0.023528077639639378,0.0,1,-0.002050727605819702,-0.004720751952845603,0.0,1,0.027749942615628242,-0.022330083302222192,1.0,1,0.009885713458061218,-0.021367493085563183,1.0,1,0.053523581475019455,-0.004601231384185667,3.0,1,-0.0029413881711661816,-0.023015298356767744,0.0,1,0.0043226201087236404,0.0016467463865410537,7.0,24,57,81
+TRM,312,35485,flaky_then_stable,7,6,26041,5000,260410,50000,True,1.0,0.10215932130813599,0.05135372141376138,0.030793197453022003,8.0,0.410829771310091,1,0.025761082768440247,0.0025604262336855754,4.0,0,0.08838338404893875,0.04863709886558354,8.0,1,-0.008772979490458965,-0.03643551317509264,0.0,0,0.09482228755950928,0.05222522746771574,8.0,1,0.01121759507805109,0.0025111950799328042,4.0,1,0.012981980107724667,-0.01886509170071804,1.0,1,-0.0013297745026648045,-0.017521329515147954,0.0,1,0.046368733048439026,-0.005890961969271302,1.0,0,0.10560284554958344,0.052264753030613065,8.0,1,0.10215932130813599,0.05135372141376138,8.0,26,55,81
+TRM,313,421149,transient_or_regressed,3,4,52082,10000,,,False,0.5679012537002563,0.1015518382191658,0.05006296839565039,0.027536782436072826,8.0,0.4005037471652031,0,0.0019539815839380026,-0.0049430540966568515,2.0,1,0.013888214714825153,-0.0086840255826246,1.0,1,0.05640062317252159,0.00915191980311647,4.0,0,0.09359437227249146,0.039493527030572295,8.0,0,0.08755488693714142,0.04594968003220856,8.0,1,0.039708517491817474,-0.0016880357143236324,3.0,0,0.09172613173723221,0.03873336233664304,8.0,0,0.11041457206010818,0.054463202599436045,8.0,0,0.0878957211971283,0.04390990221872926,8.0,0,0.1015518382191658,0.05006296839565039,8.0,26,55,81
+TRM,314,52487,never,0,0,,,,,False,0.654321014881134,0.11245185881853104,0.05196948582306504,0.03309060912579298,8.0,0.41575588658452034,0,0.03086334839463234,0.018348261481150985,8.0,0,0.06168058514595032,0.04179806564934552,8.0,0,0.09063098579645157,0.05433082417584956,8.0,0,0.0888141542673111,0.05492187291383743,8.0,0,0.1057659462094307,0.06119160680100322,8.0,0,0.11594448983669281,0.056994637940078974,8.0,0,0.09510648995637894,0.0441788419848308,8.0,0,0.11288943886756897,0.05767156812362373,8.0,0,0.10395493358373642,0.043160419911146164,8.0,0,0.11245185881853104,0.05196948582306504,8.0,25,56,81
+TRM,315,399134,transient_or_regressed,1,2,156246,30000,,,False,0.4938271641731262,0.09504705667495728,0.043661841889843345,0.022792589385062456,8.0,0.34929473511874676,0,0.010497982613742352,-0.005120682413689792,2.0,0,0.05921134725213051,0.02388276776764542,8.0,0,0.07099338620901108,0.051374326925724745,8.0,0,0.08385702222585678,0.050016090739518404,8.0,0,0.12363409996032715,0.06850038142874837,8.0,1,0.04459301754832268,-0.024294053204357624,1.0,0,0.09960828721523285,0.04799136891961098,8.0,0,0.13704419136047363,0.06867391848936677,8.0,0,0.1148909330368042,0.06596251367591321,8.0,0,0.09504705667495728,0.043661841889843345,8.0,23,58,81
+TRM,316,77431,never,0,0,,,,,False,0.5308641791343689,0.11746518313884735,0.056983269518241286,0.034050536807626486,8.0,0.4558661561459303,0,0.01982293650507927,0.006102520143031143,5.0,0,0.04251086711883545,0.03198885009624064,8.0,0,0.07626156508922577,0.04840138601139188,8.0,0,0.11171665042638779,0.0672927601262927,8.0,0,0.11210842430591583,0.06891360599547625,8.0,0,0.13051922619342804,0.07540574809536338,8.0,0,0.1018352285027504,0.05245656776241958,8.0,0,0.11622285097837448,0.06446186965331435,8.0,0,0.10696631669998169,0.05387582373805344,8.0,0,0.11746518313884735,0.056983269518241286,8.0,26,55,81
+TRM,317,259570,flaky_then_stable,5,3,130205,25000,182287,35000,True,1.0,0.012089409865438938,-0.0006821637362008914,-0.005183526547625661,3.0,0.017303189379163086,0,0.021656857803463936,0.003882431279635057,5.0,0,0.057534992694854736,0.02470832911785692,8.0,0,0.05446814373135567,0.026243766653351486,8.0,0,0.09781914949417114,0.053633346455171704,8.0,1,0.012228589504957199,0.0002949576883111149,4.0,0,0.12828496098518372,0.054319159826263785,8.0,1,0.018804263323545456,-0.012315473984926939,1.0,1,0.03476373851299286,0.004452468536328524,4.0,1,0.07198790460824966,0.013938226213213056,5.0,1,0.012089409865438938,-0.0006821637362008914,3.0,26,55,81
+TRM,318,254558,stable_learned,7,1,104164,20000,104164,20000,True,1.0,0.09082536399364471,0.04042565112467855,0.019089447101578116,8.0,0.3234052089974284,0,0.00813287403434515,-0.0020158638290013187,4.0,0,0.036267735064029694,0.022037041024304926,8.0,0,0.07973510026931763,0.0499174224678427,8.0,1,0.02465679496526718,0.0061553683335660025,5.0,1,0.058824002742767334,0.0213365640520351,8.0,1,0.06066381558775902,0.003588364299503155,3.0,1,0.08791843056678772,-0.008846976328641176,1.0,1,0.03748849034309387,-0.006273950159084052,2.0,1,-0.002143059391528368,-0.021631999465171248,0.0,1,0.09082536399364471,0.04042565112467855,8.0,24,57,81
+TRM,319,378035,always,10,0,26041,5000,26041,5000,True,1.0,0.010566718876361847,0.0029845772114640567,0.00031226024293573573,6.0,0.02454163460060954,1,-0.05131689831614494,-0.05618990212678909,0.0,1,-0.011445744894444942,-0.015975369256921113,0.0,1,-0.04347633197903633,-0.04631497012451291,0.0,1,-0.019416991621255875,-0.022323196986690164,0.0,1,-0.001302140997722745,-0.0059642665728461,0.0,1,0.034673258662223816,-0.02967564621940255,1.0,1,-0.02329348772764206,-0.028032728703692555,0.0,1,0.010297783650457859,-0.014419772778637707,1.0,1,0.01193520613014698,-0.01921613235026598,1.0,1,0.010566718876361847,0.0029845772114640567,6.0,24,57,81
+TRM,320,406804,transient_or_regressed,1,2,78123,15000,,,False,0.6913580298423767,0.10724902153015137,0.05158593994565308,0.03208874398842454,8.0,0.41268751956522465,0,0.07268457859754562,0.023752047287416644,7.0,0,0.07784959673881531,0.03151899663498625,8.0,1,0.012101788073778152,-0.026823955355212092,1.0,0,0.11108629405498505,0.05540907243266702,8.0,0,0.07171735167503357,0.03081420448143035,8.0,0,0.08505405485630035,0.046243019169196486,8.0,0,0.10551980882883072,0.04737975122407079,8.0,0,0.07110268622636795,0.03842642286326736,8.0,0,0.09849625825881958,0.040687593165785074,8.0,0,0.10724902153015137,0.05158593994565308,8.0,24,57,81
+TRM,321,344531,always,10,0,26041,5000,26041,5000,True,1.0,-0.0017385371029376984,-0.0037518643657676876,-0.004865649156272411,0.0,0.0,1,-0.03379456326365471,-0.045452195685356855,0.0,1,0.006348158698529005,-0.009674864195403643,2.0,1,0.0008966124732978642,-0.0350434567881166,1.0,1,0.012004147283732891,-0.006262894894462079,2.0,1,-0.0009721154347062111,-0.005493447155458853,0.0,1,0.007717652712017298,-0.018532885296735913,1.0,1,-0.007595566101372242,-0.024024947197176516,0.0,1,0.06038425862789154,-0.007563079474493861,1.0,1,0.008501823991537094,-0.01643503416562453,1.0,1,-0.0017385371029376984,-0.0037518643657676876,0.0,26,55,81
+TRM,322,363377,always,10,0,26041,5000,26041,5000,True,1.0,0.002622471423819661,-0.0019642078077595215,-0.003790776652749628,2.0,0.0030015438387636095,1,-0.04495055601000786,-0.053201752714812756,0.0,1,-0.00669039785861969,-0.012966631562449038,0.0,1,-0.004736219998449087,-0.03386519261403009,0.0,1,-0.008488128893077374,-0.02026468131225556,0.0,1,-0.00024973470135591924,-0.008673985150380759,0.0,1,0.04603160172700882,-0.014822784636635333,1.0,1,-0.014415105804800987,-0.024964936776086688,0.0,1,0.10202044993638992,0.0036703120058518834,2.0,1,0.0008241779287345707,-0.02043131188111147,1.0,1,0.002622471423819661,-0.0019642078077595215,2.0,25,56,81
+TRM,323,133292,always,10,0,26041,5000,26041,5000,True,1.0,0.006520980503410101,0.0027438464458100498,0.0004452792927622795,6.0,0.025138818193227053,1,-0.001634996267966926,-0.013380561940721236,0.0,1,-0.005853843409568071,-0.014703554275911301,0.0,1,0.0007466587121598423,-0.03195660716301063,1.0,1,-0.018243804574012756,-0.023940877057611942,0.0,1,0.007051485124975443,-0.003740973726962693,2.0,1,-0.01964527741074562,-0.03688772092573345,0.0,1,-0.02024289034307003,-0.02646592538803816,0.0,1,0.03204459697008133,-0.010600099456496537,1.0,1,0.02939377911388874,-0.01388904987834394,1.0,1,0.006520980503410101,0.0027438464458100498,6.0,26,55,81
+TRM,324,182177,stable_learned,9,1,52082,10000,52082,10000,True,1.0,0.0023185675963759422,0.0002622031606733799,-0.0006941346146049909,4.0,0.004874163743807003,0,0.05978824943304062,0.025528938975185156,8.0,1,-0.00480638537555933,-0.012959296349436045,0.0,1,-0.03074977546930313,-0.03822897607460618,0.0,1,-0.008541369810700417,-0.018060415983200073,0.0,1,-0.0011268045054748654,-0.00531853835855145,0.0,1,-0.00013301025319378823,-0.026047173150800518,0.0,1,-0.009426401928067207,-0.027062120847404003,0.0,1,0.048625390976667404,-0.0017999510309891775,3.0,1,-0.018373463302850723,-0.02501515089534223,0.0,1,0.0023185675963759422,0.0002622031606733799,4.0,27,54,81
+TRM,325,150937,transient_or_regressed,3,6,104164,20000,,,False,0.604938268661499,0.09953086823225021,0.05257914657704532,0.03287668200209737,8.0,0.42063317261636257,0,0.0552726611495018,0.025302771886345,8.0,0,0.07381429523229599,0.03428589378017932,8.0,0,0.09581384062767029,0.06100268615409732,8.0,1,-0.0035211429931223392,-0.0150606261449866,0.0,0,0.11197762191295624,0.06486398028209805,8.0,0,0.10854902118444443,0.0630032210610807,8.0,1,0.08111918717622757,0.010529691164265387,5.0,0,0.12383098155260086,0.05581827973946929,8.0,1,0.04649903625249863,0.00958819134393707,4.0,0,0.09953086823225021,0.05257914657704532,8.0,24,57,81
+TRM,326,379717,transient_or_regressed,1,2,234369,45000,,,False,0.6913580298423767,0.11834660172462463,0.04675879410933703,0.023804166121408343,8.0,0.37407035287469625,0,0.03782345727086067,0.012403918224663357,5.0,0,0.060077082365751266,0.020891027117613703,8.0,0,0.08134092390537262,0.04396730847656727,8.0,0,0.08556005358695984,0.03808212233707309,8.0,0,0.1148657351732254,0.05118578765541315,8.0,0,0.11171144992113113,0.05000579264014959,8.0,0,0.10669296234846115,0.04409587266854942,8.0,0,0.1127268522977829,0.04944310989230871,8.0,1,-0.003006577957421541,-0.017098489101044834,0.0,0,0.11834660172462463,0.04675879410933703,8.0,25,56,81
+TRM,327,96991,always,10,0,26041,5000,26041,5000,True,1.0,-0.0013811023673042655,-0.0025406620843568817,-0.003393768856767565,0.0,0.0,1,-0.02645706571638584,-0.040056393248960376,0.0,1,0.012168661691248417,-0.006080300110625103,1.0,1,-0.005673208739608526,-0.028132859093602747,0.0,1,-0.02100733108818531,-0.025575072970241308,0.0,1,-0.0016309083439409733,-0.008158009441103786,0.0,1,0.004677392542362213,-0.032665589125826955,1.0,1,-0.024584395810961723,-0.03025665576569736,0.0,1,-0.005563518963754177,-0.018263343605212867,0.0,1,-0.01662452705204487,-0.024857051903381944,0.0,1,-0.0013811023673042655,-0.0025406620843568817,0.0,27,54,81
+TRM,328,34793,flaky_then_stable,2,3,52082,10000,260410,50000,True,1.0,0.025119904428720474,0.0012271600135136396,-0.004828855278901756,3.0,0.0314178264234215,0,0.036926254630088806,0.015599959238898009,7.0,1,0.007434862665832043,-0.0047137230867519975,2.0,0,0.06887649744749069,0.0328918588347733,8.0,0,0.10213374346494675,0.05921272630803287,8.0,0,0.0897846668958664,0.05731496191583574,8.0,0,0.09000308066606522,0.04520249692723155,8.0,0,0.1010221466422081,0.0456348821753636,8.0,0,0.11643340438604355,0.05912166810594499,8.0,0,0.10347288101911545,0.05686450935900211,8.0,1,0.025119904428720474,0.0012271600135136396,3.0,26,55,81
+TRM,329,178506,never,0,0,,,,,False,0.5185185074806213,0.11850716918706894,0.05851146834902465,0.038545341696590185,8.0,0.4680917467921972,0,-0.001915840432047844,-0.007167307747295126,0.0,0,0.022527964785695076,0.009450252044189256,7.0,0,0.08472569286823273,0.052680530585348606,8.0,0,0.09511785209178925,0.0632243980653584,8.0,0,0.10892143100500107,0.060015578754246235,8.0,0,0.10794170945882797,0.06161618139594793,8.0,0,0.09288252145051956,0.043456367682665586,8.0,0,0.1105753481388092,0.060929997358471155,8.0,0,0.09446657449007034,0.05006959498859942,8.0,0,0.11850716918706894,0.05851146834902465,8.0,24,57,81
+TRM,330,241389,flaky_then_stable,8,2,26041,5000,104164,20000,True,1.0,5.2558956667780876e-05,-0.004959690733812749,-0.006403264123946428,1.0,5.2558956667780876e-05,1,0.006690066773444414,-0.025655002857092768,1.0,0,0.04602455347776413,0.011984961420239415,6.0,0,0.07946861535310745,0.03197523590642959,8.0,1,0.018000906333327293,-0.010274036321789026,1.0,1,0.002752908505499363,-0.004037111677462235,1.0,1,0.03494887426495552,-0.004317934828577563,3.0,1,0.08140058070421219,0.029410826435196213,8.0,1,0.06936147809028625,-0.0011288244859315455,3.0,1,0.014405789785087109,-0.011122027557576075,1.0,1,5.2558956667780876e-05,-0.004959690733812749,1.0,25,56,81
+TRM,331,98172,stable_learned,1,1,260410,50000,260410,50000,True,1.0,0.090518057346344,0.03194463599356823,0.01085049839457497,8.0,0.2555570879485458,0,0.023090608417987823,0.010269992053508759,8.0,0,0.06646592170000076,0.03800386958755553,8.0,0,0.09619870781898499,0.05347411287948489,8.0,0,0.10737758874893188,0.06732479250058532,8.0,0,0.13028649985790253,0.06704817386344075,8.0,0,0.100799560546875,0.05921399360522628,8.0,0,0.09691932052373886,0.046984739135950804,8.0,0,0.11070393770933151,0.05736309033818543,8.0,0,0.1149364784359932,0.06325410073623061,8.0,1,0.090518057346344,0.03194463599356823,8.0,26,55,81
+TRM,332,166448,always,10,0,26041,5000,26041,5000,True,1.0,0.0178337674587965,0.0019619189554305194,-0.0015954634432091552,4.0,0.022077205416280776,1,-0.0490417554974556,-0.054586872924119234,0.0,1,-0.012563087977468967,-0.017254954087547958,0.0,1,-0.0016126381233334541,-0.03934767015744001,0.0,1,-0.013047242537140846,-0.022333540255203843,0.0,1,-0.003526080399751663,-0.00781351572368294,0.0,1,0.032800957560539246,-0.022018354677129537,1.0,1,-0.002883076900616288,-0.021420893404865637,0.0,1,0.057644255459308624,-0.0006641671934630722,3.0,1,-0.010634070262312889,-0.023248874815180898,0.0,1,0.0178337674587965,0.0019619189554305194,4.0,26,55,81
+TRM,333,169307,always,10,0,26041,5000,26041,5000,True,1.0,0.025400083512067795,0.003113876184215769,-0.0034130967687815428,3.0,0.03881487506441772,1,-0.03617578372359276,-0.04149579629302025,0.0,1,0.0016460232436656952,-0.009520445251837373,1.0,1,0.019754964858293533,-0.026845748303458095,1.0,1,0.011927829124033451,-0.012803003948647529,1.0,1,0.010129416361451149,0.0006213199885678478,4.0,1,-0.016964273527264595,-0.04084261227399111,0.0,1,-0.0013802278554067016,-0.022686070107738487,0.0,1,0.07764940708875656,0.0016772816998127382,3.0,1,0.022825395688414574,0.00438653098535724,5.0,1,0.025400083512067795,0.003113876184215769,3.0,17,64,81
+TRM,334,386281,always,10,0,26041,5000,26041,5000,True,1.0,0.017917262390255928,0.0019697481038747355,-0.0028032163972966373,5.0,0.02715111745055765,1,-0.034038666635751724,-0.04734588460996747,0.0,1,0.009263794869184494,-0.004277487882973219,1.0,1,0.024879785254597664,0.0015361552577815019,3.0,1,0.023197239264845848,0.003025761296157725,4.0,1,0.025036310777068138,0.01004354516044259,7.0,1,0.04418583959341049,0.007408299527014606,5.0,1,-0.007101162802428007,-0.020296970673371106,0.0,1,0.06356988847255707,0.008176111616194248,4.0,1,0.038379207253456116,-0.0027726481202989817,2.0,1,0.017917262390255928,0.0019697481038747355,5.0,26,55,81
+TRM,335,20092,always,10,0,26041,5000,26041,5000,True,1.0,-0.0016024846117943525,-0.003093428022111766,-0.00418935262132436,0.0,0.0,1,-0.040001530200242996,-0.04842031002044678,0.0,1,-0.010935070924460888,-0.0155808663694188,0.0,1,0.005235830321907997,-0.038499711314216256,1.0,1,-0.010756611824035645,-0.020366279408335686,0.0,1,0.011305062100291252,-0.0006169688422232866,3.0,1,0.00923218671232462,-0.02166229282738641,1.0,1,-0.023471539840102196,-0.02899015531875193,0.0,1,0.05296717956662178,-0.007406381308101118,1.0,1,0.02778617851436138,-0.018741637002676725,1.0,1,-0.0016024846117943525,-0.003093428022111766,0.0,26,55,81
+TRM,336,70604,always,10,0,26041,5000,26041,5000,True,1.0,0.001800980418920517,-0.0013937882922618883,-0.0029394359444268048,2.0,0.0022811945527791977,1,-0.05308573693037033,-0.056477710604667664,0.0,1,-0.012269572354853153,-0.01593818247783929,0.0,1,-0.008227040059864521,-0.040484647382982075,0.0,1,-0.004321731626987457,-0.021941066719591618,0.0,1,-0.001624195254407823,-0.006027036244631745,0.0,1,0.011072646826505661,-0.032779736910015345,1.0,1,-0.029964646324515343,-0.030742493690922856,0.0,1,0.020602412521839142,-0.013961207820102572,1.0,1,-0.01138351857662201,-0.023913421668112278,0.0,1,0.001800980418920517,-0.0013937882922618883,2.0,25,56,81
+TRM,337,79255,flaky_then_stable,3,5,52082,10000,260410,50000,True,1.0,0.028806187212467194,0.006798232876462862,-0.001328934042248875,5.0,0.06349916406907141,0,0.03224727511405945,0.02203588536940515,8.0,1,0.04008908197283745,0.01525353838223964,8.0,0,0.09099999070167542,0.052489979192614555,8.0,0,0.09349366277456284,0.06486957240849733,8.0,1,0.044642623513936996,0.01662341648261645,7.0,0,0.10543972253799438,0.06002827687188983,8.0,0,0.10560879111289978,0.04927955917082727,8.0,0,0.11528989672660828,0.05908309482038021,8.0,0,0.11912733316421509,0.06207287032157183,8.0,1,0.028806187212467194,0.006798232876462862,5.0,25,56,81
+TRM,338,95579,never,0,0,,,,,False,0.5679012537002563,0.08857614547014236,0.04833721579052508,0.026124509517103434,8.0,0.38669772632420063,0,0.0433424673974514,0.024810710980091244,8.0,0,0.05848773568868637,0.03331500664353371,8.0,0,0.07631447911262512,0.05421700607985258,8.0,0,0.10512058436870575,0.06469826446846128,8.0,0,0.11469029635190964,0.06019376823678613,8.0,0,0.11307480186223984,0.05825348314829171,8.0,0,0.08025152236223221,0.04519496997818351,8.0,0,0.10112586617469788,0.04806601372547448,8.0,0,0.09929660707712173,0.04526524827815592,8.0,0,0.08857614547014236,0.04833721579052508,8.0,26,55,81
+TRM,339,268871,always,10,0,26041,5000,26041,5000,True,1.0,0.005506931804120541,-0.000649735789920669,-0.0023055931087583303,1.0,0.005506931804120541,1,-0.013051114045083523,-0.034554101643152535,0.0,1,-0.010595124214887619,-0.013923505903221667,0.0,1,-0.016103241592645645,-0.03829544549807906,0.0,1,-0.01874392479658127,-0.022432262310758233,0.0,1,0.014973897486925125,-0.0026811092684511095,2.0,1,0.04309788718819618,0.0012637899999390356,3.0,1,0.03477736562490463,-0.012206629384309053,2.0,1,0.0848628431558609,-0.000141882715979591,3.0,1,0.003733202116563916,-0.018441309890476987,1.0,1,0.005506931804120541,-0.000649735789920669,1.0,26,55,81
+TRM,340,326575,always,10,0,26041,5000,26041,5000,True,1.0,0.009964631870388985,0.0018629467008395295,-0.0008072855862337747,6.0,0.018298730774404248,1,-0.030009012669324875,-0.0434107156470418,0.0,1,-0.0036096088588237762,-0.012165924068540335,0.0,1,0.012427943758666515,-0.018463731568772346,1.0,1,0.006393629126250744,-0.012817271519452333,1.0,1,0.024155961349606514,0.00521769211627543,4.0,1,0.04349703714251518,-0.020271105517167598,1.0,1,4.452391658560373e-05,-0.02273836694757847,1.0,1,0.052120573818683624,-0.0077524068765342236,1.0,1,-0.013813884928822517,-0.022367794532328844,0.0,1,0.009964631870388985,0.0018629467008395295,6.0,27,54,81
+TRM,341,221979,always,10,0,26041,5000,26041,5000,True,1.0,0.0057755568996071815,0.002679143515706528,0.0004281480942154303,7.0,0.025451076275203377,1,0.0005879394011572003,-0.02319576973968651,1.0,1,-0.006205323152244091,-0.011817513790447265,0.0,1,-0.0070790438912808895,-0.03487279935507104,0.0,1,-0.006618969142436981,-0.017639176221564412,0.0,1,0.014856617897748947,0.0005937259847996756,3.0,1,0.057343609631061554,-0.017875418299809098,1.0,1,-0.009882571175694466,-0.021105147781781852,0.0,1,0.018823016434907913,0.001791275863070041,3.0,1,0.016520829871296883,-0.015803057060111314,1.0,1,0.0057755568996071815,0.002679143515706528,7.0,26,55,81
+TRM,342,129989,never,0,0,,,,,False,0.7037037014961243,0.08201338350772858,0.038272623205557466,0.022911937441676855,8.0,0.3061809856444597,0,0.032649215310811996,0.0011968861072091386,2.0,0,0.08462546020746231,0.05143325193785131,8.0,0,0.11691150069236755,0.0567859522998333,8.0,0,0.07369551807641983,0.04834829526953399,8.0,0,0.09974979609251022,0.05539208231493831,8.0,0,0.09020650386810303,0.04961665440350771,8.0,0,0.08656012266874313,0.04302552645094693,8.0,0,0.09443887323141098,0.043642343021929264,8.0,0,0.10734106600284576,0.05064966366626322,8.0,0,0.08201338350772858,0.038272623205557466,8.0,27,54,81
+TRM,343,391646,always,10,0,26041,5000,26041,5000,True,1.0,0.004474247805774212,0.0024558492586947978,0.0008096914389170706,7.0,0.021820543333888054,1,-0.038420651108026505,-0.04646911285817623,0.0,1,-0.011081356555223465,-0.014147837646305561,0.0,1,-0.0038042180240154266,-0.040428224951028824,0.0,1,-0.015799470245838165,-0.021271103993058205,0.0,1,0.0007913049776107073,-0.004920017097902019,2.0,1,0.055193834006786346,-0.010211184082891123,2.0,1,-0.002290316391736269,-0.021722110512200743,0.0,1,0.03999920189380646,-0.009979580790968612,1.0,1,0.050212763249874115,-0.010709589194448199,2.0,1,0.004474247805774212,0.0024558492586947978,7.0,28,53,81
+TRM,344,194609,always,10,0,26041,5000,26041,5000,True,1.0,0.0027242565993219614,0.001175593126390595,-6.849579222034663e-05,7.0,0.012597465014550835,1,-0.04845055192708969,-0.05369759630411863,0.0,1,-0.0013378571020439267,-0.01029367845330853,0.0,1,-0.014512785710394382,-0.03742876194883138,0.0,1,-0.0019694005604833364,-0.018935664294986054,0.0,1,0.003386275377124548,-0.005498485959833488,1.0,1,0.0035710586234927177,-0.027759708813391626,1.0,1,-0.023856785148382187,-0.028623469173908234,0.0,1,0.019246546551585197,-0.010046061368484516,1.0,1,-0.002616000361740589,-0.021686798776499927,0.0,1,0.0027242565993219614,0.001175593126390595,7.0,27,54,81
+TRM,345,191177,transient_or_regressed,7,4,52082,10000,,,False,0.790123462677002,0.09839274734258652,0.0516589533071965,0.031662747729569674,8.0,0.413271626457572,0,0.07182195782661438,0.029573795618489385,8.0,1,-0.006401121616363525,-0.013084354111924767,0.0,1,0.005104153882712126,-0.012954435602296144,1.0,1,-0.00853029452264309,-0.016434550285339355,0.0,1,0.015023228712379932,-0.0014157868718029931,2.0,1,0.014491166919469833,-0.0213974128710106,1.0,1,-0.0010147731518372893,-0.02042931148025673,0.0,0,0.10521458834409714,0.04922838322818279,8.0,1,-0.005617092829197645,-0.01573130948236212,0.0,0,0.09839274734258652,0.0516589533071965,8.0,17,64,81
+TRM,346,167181,transient_or_regressed,3,4,78123,15000,,,False,0.654321014881134,0.09925412386655807,0.04770515183918178,0.02688995422795415,8.0,0.38164121471345425,0,0.043327443301677704,0.017317496043915526,7.0,0,0.07308898866176605,0.040549846831709146,8.0,1,0.05716080963611603,0.010964809407596476,5.0,0,0.09656529128551483,0.05318515840917826,8.0,0,0.12056202441453934,0.059067023918032646,8.0,1,0.057032108306884766,-0.004926324589177966,3.0,1,0.07975687831640244,0.03814865235472098,8.0,0,0.09877408295869827,0.051677141804248095,8.0,0,0.08755470812320709,0.04304761532694101,8.0,0,0.09925412386655807,0.04770515183918178,8.0,25,56,81
+TRM,347,31096,stable_learned,1,1,260410,50000,260410,50000,True,1.0,0.013833948411047459,0.00015740918752271682,-0.004820669943001121,2.0,0.021726086735725403,0,0.0037759325932711363,-0.004679892903368454,1.0,0,0.044080302119255066,0.02729897096287459,8.0,0,0.06927372515201569,0.041802563704550266,8.0,0,0.09202007949352264,0.06132129114121199,8.0,0,0.12287107110023499,0.0663654450327158,8.0,0,0.0902162492275238,0.05976478522643447,8.0,0,0.10384999215602875,0.05950347566977143,8.0,0,0.09451095759868622,0.053745291428640485,8.0,0,0.1114143654704094,0.05545883346349001,8.0,1,0.013833948411047459,0.00015740918752271682,2.0,25,56,81
+TRM,348,218359,always,10,0,26041,5000,26041,5000,True,1.0,0.0013873556163161993,-0.0018423147339490242,-0.003957604843890294,2.0,0.002283455745782703,1,-0.024443678557872772,-0.03955187276005745,0.0,1,-0.007207806687802076,-0.015458917536307126,0.0,1,0.0004088652494829148,-0.033691147375066066,1.0,1,-0.011597206816077232,-0.022138047264888883,0.0,1,0.0013140306109562516,-0.005414362582087051,2.0,1,0.021513178944587708,-0.019954276736825705,1.0,1,-0.007389248814433813,-0.0265001860097982,0.0,1,0.018966643139719963,-0.010491013061255217,1.0,1,-0.011946117505431175,-0.025971784023568034,0.0,1,0.0013873556163161993,-0.0018423147339490242,2.0,27,54,81
+TRM,349,385443,always,10,0,26041,5000,26041,5000,True,1.0,0.0050588068552315235,-0.001953390776179731,-0.004089264723006636,2.0,0.005584198981523514,1,-0.030151735991239548,-0.0409613698720932,0.0,1,0.004968288354575634,-0.005948477759375237,2.0,1,-0.025008786469697952,-0.04226211225613952,0.0,1,0.009244772605597973,-0.009430063015315682,2.0,1,0.02043623849749565,0.004257688007783145,5.0,1,0.03830669820308685,-0.0197421470656991,1.0,1,-0.011225253343582153,-0.024262173334136605,0.0,1,0.053008805960416794,0.004888595314696431,3.0,1,0.03877915441989899,-0.01075430607306771,2.0,1,0.0050588068552315235,-0.001953390776179731,2.0,26,55,81
+TRM,350,405260,transient_or_regressed,3,6,78123,15000,,,False,0.6172839403152466,0.12195510417222977,0.05435616453178227,0.02978215692564845,8.0,0.43484931625425816,0,0.016584627330303192,0.010297721659298986,8.0,0,0.05688314884901047,0.025525044300593436,8.0,1,0.012069839984178543,-0.017773082188796252,1.0,0,0.09635923057794571,0.059155785478651524,8.0,0,0.11579710990190506,0.06282731937244534,8.0,1,0.020962940528988838,-0.015948286978527904,1.0,0,0.11294041574001312,0.054537874879315495,8.0,1,0.10713290423154831,0.049404525430873036,8.0,0,0.12506741285324097,0.059155256021767855,8.0,0,0.12195510417222977,0.05435616453178227,8.0,25,56,81
+TRM,351,337508,always,10,0,26041,5000,26041,5000,True,1.0,-0.00097763875965029,-0.004658026140532456,-0.006830017897300422,0.0,0.0,1,-0.05128152668476105,-0.05441103735938668,0.0,1,-0.007615022826939821,-0.012704329041298479,0.0,1,-0.010904069058597088,-0.038540737121365964,0.0,1,-0.009236009791493416,-0.020863548386842012,0.0,1,0.009706353768706322,-0.003920891731468146,1.0,1,0.052401017397642136,-0.00782580312807113,2.0,1,-0.005157649517059326,-0.025763426208868623,0.0,1,0.005501072388142347,-0.011889939611137379,2.0,1,-0.0003603739896789193,-0.01681594616093207,0.0,1,-0.00097763875965029,-0.004658026140532456,0.0,23,58,81
+TRM,352,296930,flaky_then_stable,9,2,26041,5000,182287,35000,True,1.0,0.01683926023542881,-0.0002886708243750036,-0.007196396356448531,3.0,0.031013573054224253,1,-0.044413916766643524,-0.052584707736968994,0.0,1,-0.0010136355413123965,-0.0101384397567017,0.0,1,-0.006441502366214991,-0.035834252659697086,0.0,1,-0.01546313427388668,-0.024299517506733537,0.0,1,0.002058985410258174,-0.004912669246550649,1.0,0,0.10178802162408829,0.047044685343280435,8.0,1,0.00956468190997839,-0.0164350348059088,1.0,1,0.04031837731599808,-0.007687714911298826,2.0,1,0.002086338121443987,-0.017090359411668032,1.0,1,0.01683926023542881,-0.0002886708243750036,3.0,25,56,81
+TRM,353,211955,always,10,0,26041,5000,26041,5000,True,1.0,0.028430042788386345,0.0015201742244244087,-0.0037666087155230343,2.0,0.0295610879547894,1,-0.05294577777385712,-0.056044137105345726,0.0,1,-0.01003353577107191,-0.014300506911240518,0.0,1,-0.03748830407857895,-0.04332495480775833,0.0,1,0.004383642692118883,-0.019158909271936864,1.0,1,-0.0002710534317884594,-0.0033073448030336294,0.0,1,0.027520952746272087,-0.036004609894007444,1.0,1,-0.016368817538022995,-0.028785516740754247,0.0,1,0.007222768850624561,-0.01471410266822204,2.0,1,0.04012931138277054,-0.017051211092621088,1.0,1,0.028430042788386345,0.0015201742244244087,2.0,26,55,81
+TRM,354,289526,always,10,0,26041,5000,26041,5000,True,1.0,0.0028404267504811287,-0.0008666614885441959,-0.0034639892110135406,4.0,0.006922664935700595,1,-0.028356697410345078,-0.040772638749331236,0.0,1,0.010837643407285213,-0.006921224092366174,1.0,1,-0.010183736681938171,-0.03827306255698204,0.0,1,-0.0026905047707259655,-0.013940698932856321,0.0,1,0.026725441217422485,0.0032049137516878545,4.0,1,0.03524228185415268,-0.02711638156324625,1.0,1,-0.015146222896873951,-0.02562124456744641,0.0,1,0.023725325241684914,-0.009963763382984325,1.0,1,0.0191247146576643,-0.009071593941371248,2.0,1,0.0028404267504811287,-0.0008666614885441959,4.0,26,55,81
+TRM,355,38565,always,10,0,26041,5000,26041,5000,True,1.0,0.024856869131326675,0.005381172250963573,-0.0018061800710711395,5.0,0.05129990971181542,1,-0.047225043177604675,-0.051239386666566133,0.0,1,-0.002358247060328722,-0.010015529056545347,0.0,1,-0.0028354076202958822,-0.03455156131531112,0.0,1,-0.0023837608750909567,-0.015016927703982219,0.0,1,-0.0001163270280812867,-0.004396663674015144,0.0,1,0.02115166373550892,-0.021115746398891133,2.0,1,-0.003461291082203388,-0.024222875596024096,0.0,1,0.022524580359458923,-0.0033466557215433568,2.0,1,0.0069043636322021484,-0.019299815758131444,1.0,1,0.024856869131326675,0.005381172250963573,5.0,23,58,81
+TRM,356,74012,flaky_then_stable,8,3,52082,10000,130205,25000,True,1.0,0.017247945070266724,-0.0005907870800001547,-0.00607033830601722,3.0,0.02078062586952001,0,0.023752320557832718,0.005388035446230788,5.0,1,0.06598102301359177,0.01065313366416376,5.0,1,0.008137100376188755,-0.01645446044858545,1.0,0,0.10934081673622131,0.04618797975126654,8.0,1,-9.087195940082893e-05,-0.008559587006857328,0.0,1,0.034289173781871796,-0.007364362070802599,2.0,1,-0.021645035594701767,-0.03171801846474409,0.0,1,0.039817314594984055,0.003154013160383329,4.0,1,0.018398120999336243,-0.008621915825642645,2.0,1,0.017247945070266724,-0.0005907870800001547,3.0,26,55,81
+TRM,357,95070,always,10,0,26041,5000,26041,5000,True,1.0,-0.0030188574455678463,-0.004881716944510117,-0.006168888765387237,0.0,0.0,1,-0.04873763397336006,-0.054411145858466625,0.0,1,-0.010081645101308823,-0.014625221258029342,0.0,1,-0.0002874550991691649,-0.03129226720920997,0.0,1,-0.0011541317217051983,-0.018451373965945095,0.0,1,0.003849540837109089,-0.0046479096345137805,1.0,1,-0.0035267500206828117,-0.024098642054013908,0.0,1,0.006854616571217775,-0.026735000254120678,1.0,1,0.029067397117614746,-0.004352365736849606,3.0,1,0.048118364065885544,-0.012047084630466998,1.0,1,-0.0030188574455678463,-0.004881716944510117,0.0,25,56,81
+TRM,358,408908,always,10,0,26041,5000,26041,5000,True,1.0,0.000398939911974594,-0.0012722149940600502,-0.002435507223708555,1.0,0.000398939911974594,1,-0.046675555408000946,-0.05375969735905528,0.0,1,-0.00232417369261384,-0.008238781127147377,0.0,1,-0.017710160464048386,-0.038383294362574816,0.0,1,-0.009533176198601723,-0.01637902797665447,0.0,1,-0.0020202333107590675,-0.006453777081333101,0.0,1,0.013244335539638996,-0.025158982374705374,1.0,1,-0.014612652361392975,-0.024961770162917674,0.0,1,0.03845525532960892,-0.008464468875899911,1.0,1,-0.018646935001015663,-0.027085822308436036,0.0,1,0.000398939911974594,-0.0012722149940600502,1.0,26,55,81
+TRM,359,133668,always,10,0,26041,5000,26041,5000,True,1.0,0.003630927996709943,4.842781345359981e-05,-0.002950214606244117,4.0,0.012188280932605267,1,-0.03898213058710098,-0.04945932328701019,0.0,1,-0.006412035785615444,-0.010560228023678064,0.0,1,0.0035822675563395023,-0.032471837650518864,1.0,1,-0.013285248540341854,-0.02155350346583873,0.0,1,0.0014770834241062403,-0.006921075015270617,1.0,1,0.06325604766607285,-0.02530956408008933,1.0,1,0.004943218547850847,-0.018836570263374597,1.0,1,0.00664443289861083,-0.010073212659335695,1.0,1,-0.004358141217380762,-0.018833169771824032,0.0,1,0.003630927996709943,4.842781345359981e-05,4.0,25,56,81
+TRM,360,131029,always,10,0,26041,5000,26041,5000,True,1.0,0.0026510809548199177,0.0005946217770542717,-0.00016508426051586866,5.0,0.005515993470908143,1,-0.04010288044810295,-0.049232042860239744,0.0,1,-0.006070164032280445,-0.009378227114211768,0.0,1,-0.0021727231796830893,-0.03598765467177145,0.0,1,-0.007696240674704313,-0.021870583470445126,0.0,1,0.002558655571192503,-0.002887175200157799,3.0,1,0.052950967103242874,-0.01564300083555281,1.0,1,-0.011374992318451405,-0.02615622349549085,0.0,1,0.0333411730825901,-0.005587261985056102,2.0,1,0.03898799791932106,-0.013920853089075536,1.0,1,0.0026510809548199177,0.0005946217770542717,5.0,26,55,81
+TRM,361,369245,never,0,0,,,,,False,0.6296296119689941,0.11996498703956604,0.05922922072932124,0.03798631206154823,8.0,0.47383376583456993,0,0.01599932461977005,-0.0017872433818411082,2.0,0,0.014624809846282005,0.004615522462700028,5.0,0,0.047815073281526566,0.03157957666553557,8.0,0,0.08644465357065201,0.05554646719247103,8.0,0,0.10266148298978806,0.06906038010492921,8.0,0,0.11994946002960205,0.07087221695110202,8.0,0,0.10826238244771957,0.06811381923034787,8.0,0,0.11530084162950516,0.06947493460029364,8.0,0,0.127641960978508,0.06884481338784099,8.0,0,0.11996498703956604,0.05922922072932124,8.0,24,57,81
+TRM,362,242661,always,10,0,26041,5000,26041,5000,True,1.0,0.002380446996539831,-0.0017365094208798837,-0.003976615029387176,2.0,0.0038270712830126286,1,-0.046559806913137436,-0.05346601502969861,0.0,1,-0.0038240801077336073,-0.010902268724748865,0.0,1,0.00015511459787376225,-0.03549551346441149,1.0,1,-0.0010728465858846903,-0.020182100735837594,0.0,1,0.0029078125953674316,-0.0020937886874889955,2.0,1,0.03688779100775719,-0.028344727121293545,1.0,1,0.002457371912896633,-0.02386419742833823,1.0,1,0.03215605020523071,-0.0027474508970044553,2.0,1,0.001339426962658763,-0.019437002862105146,1.0,1,0.002380446996539831,-0.0017365094208798837,2.0,23,58,81
+TRM,363,131068,always,10,0,26041,5000,26041,5000,True,1.0,0.006922096945345402,0.0013377120903896866,-0.001189897800941253,7.0,0.01615687027515378,1,-0.01003734115511179,-0.029113199096173048,0.0,1,-0.0040230765007436275,-0.011527877359185368,0.0,1,-0.005112167913466692,-0.03786273399600759,0.0,1,-0.01203127857297659,-0.01827107951976359,0.0,1,0.013600289821624756,-0.003553174261469394,2.0,1,0.033294785767793655,-0.029779282165691257,1.0,1,-0.019633276388049126,-0.027864675037562847,0.0,1,0.025983719155192375,-0.012616194551810622,1.0,1,0.03301209956407547,-0.012676294427365065,1.0,1,0.006922096945345402,0.0013377120903896866,7.0,26,55,81
+TRM,364,240805,always,10,0,26041,5000,26041,5000,True,1.0,0.010145938023924828,-0.0015305341148632579,-0.00536405760794878,2.0,0.013431744882836938,1,-0.043511953204870224,-0.05041365651413798,0.0,1,-0.003544445149600506,-0.012438565492630005,0.0,1,0.005505911074578762,-0.02488239819649607,1.0,1,0.004130507353693247,-0.01225280313519761,1.0,1,0.006482685916125774,-0.0027109950588055653,2.0,1,0.048865363001823425,-0.011372018896508962,2.0,1,-0.0059327734634280205,-0.016041574126575142,0.0,1,0.032888684421777725,-0.008812033658614382,2.0,1,0.02409942075610161,-0.014187883934937418,1.0,1,0.010145938023924828,-0.0015305341148632579,2.0,26,55,81
+TRM,365,355466,flaky_then_stable,3,3,130205,25000,234369,45000,True,1.0,0.10485989600419998,0.030456134933046997,0.0029812490101903677,6.0,0.254462162964046,0,0.0014202322345227003,-0.005445716189569794,1.0,0,0.04161188751459122,0.019757405383643345,7.0,0,0.07607153058052063,0.044411624083295465,8.0,0,0.09811212122440338,0.061599822249263525,8.0,1,0.06304611265659332,0.018182252999395132,7.0,0,0.12031479179859161,0.052652452141046524,8.0,0,0.12397768348455429,0.0447802753187716,8.0,0,0.10856928676366806,0.05357300932519138,8.0,1,0.12172627449035645,0.03740663221105933,8.0,1,0.10485989600419998,0.030456134933046997,6.0,26,55,81
+TRM,366,84276,flaky_then_stable,6,3,104164,20000,234369,45000,True,1.0,0.006904762703925371,-0.0022511139832204208,-0.006025041802786291,3.0,0.010473911301232874,0,0.032764408737421036,0.002862856912543066,3.0,0,0.06606923043727875,0.024590052940766327,8.0,0,0.06289467215538025,0.034900388098321855,8.0,1,-0.011738910339772701,-0.018865085090510547,0.0,1,0.03514513745903969,0.010671825395547785,6.0,1,0.039833854883909225,-0.019898962054867297,1.0,1,5.130489626026247e-06,-0.017861660998164552,1.0,0,0.0962805300951004,0.05042587500065565,8.0,1,0.026048118248581886,-0.0030835742072667927,2.0,1,0.006904762703925371,-0.0022511139832204208,3.0,25,56,81
+TRM,367,135944,always,10,0,26041,5000,26041,5000,True,1.0,0.007230115123093128,0.0025307470641564578,0.0007451229612343013,7.0,0.021687175380066037,1,-0.04467832297086716,-0.05265587288886309,0.0,1,-0.0048243701457977295,-0.01043881825171411,0.0,1,-0.006994187831878662,-0.03539694333449006,0.0,1,-0.0041405451484024525,-0.015785278344992548,0.0,1,0.010209093801677227,0.0014481817634077743,3.0,1,0.0019934421870857477,-0.027011711237719283,1.0,1,-0.012969468720257282,-0.02335408132057637,0.0,1,0.10618235170841217,0.004403225248097442,2.0,1,0.019748015329241753,-0.018353157327510417,1.0,1,0.007230115123093128,0.0025307470641564578,7.0,26,55,81
+TRM,368,109376,flaky_then_stable,8,4,26041,5000,208328,40000,True,1.0,0.022008506581187248,0.00823365114047192,0.000812716840300709,7.0,0.0724008793476969,1,-0.04515222832560539,-0.05313797201961279,0.0,0,0.08656439185142517,0.06830050563439727,8.0,1,0.0012699023354798555,-0.028995301661780104,1.0,1,0.010264856740832329,-0.009121821669396013,1.0,1,0.029367011040449142,0.012187951011583209,8.0,1,-0.018077507615089417,-0.03555663791485131,0.0,0,0.0998578891158104,0.05878593446686864,8.0,1,0.04638991132378578,0.024547544424422085,8.0,1,0.030040280893445015,-0.00031645686249248683,3.0,1,0.022008506581187248,0.00823365114047192,7.0,27,54,81
+TRM,369,283196,always,10,0,26041,5000,26041,5000,True,1.0,-0.0012661012588068843,-0.002671780894161202,-0.003712535777594894,0.0,0.0,1,-0.051554691046476364,-0.05664969841018319,0.0,1,-0.013395216315984726,-0.015028833877295256,0.0,1,0.009129267185926437,-0.03497640881687403,1.0,1,-0.015230592340230942,-0.019962929654866457,0.0,1,0.0005596763803623617,-0.004418377277033869,1.0,1,0.034881412982940674,-0.02418457460589707,1.0,1,-0.028173642233014107,-0.029743858380243182,0.0,1,0.058453623205423355,-0.003275257200584747,1.0,1,0.018401645123958588,-0.016926896292716265,1.0,1,-0.0012661012588068843,-0.002671780894161202,0.0,26,55,81
+TRM,370,57056,transient_or_regressed,3,2,182287,35000,,,False,0.6172839403152466,0.09857205301523209,0.05308002373203635,0.03310020826756954,8.0,0.4246401898562908,0,0.041122302412986755,0.028088811319321394,8.0,0,0.06285123527050018,0.03853617329150438,8.0,0,0.09709455817937851,0.051885996013879776,8.0,0,0.1072065606713295,0.06903152680024505,8.0,0,0.12061528861522675,0.0641373940743506,8.0,0,0.10092990845441818,0.0611847210675478,8.0,1,0.04640258476138115,0.006897095445310697,4.0,1,0.028149837628006935,-0.0015339364908868447,3.0,1,0.039178743958473206,-0.0003178477636538446,3.0,0,0.09857205301523209,0.05308002373203635,8.0,26,55,81
+TRM,371,110033,transient_or_regressed,1,2,182287,35000,,,False,0.7283950448036194,0.11229291558265686,0.04964763473253697,0.028917493065819144,8.0,0.3971810778602958,0,0.00849168561398983,-0.0046950814430601895,2.0,0,0.05277601256966591,0.021569567266851664,8.0,0,0.07185190171003342,0.042018847074359655,8.0,0,0.0905870869755745,0.04895136482082307,8.0,0,0.10951878130435944,0.0606453069485724,8.0,0,0.10803848505020142,0.059522501192986965,8.0,1,0.048144806176424026,0.0019221867578380625,3.0,0,0.11372817307710648,0.0520485972519964,8.0,0,0.1255759447813034,0.058049304876476526,8.0,0,0.11229291558265686,0.04964763473253697,8.0,26,55,81
+TRM,372,420979,always,10,0,26041,5000,26041,5000,True,1.0,0.012325746938586235,-0.000740222523745615,-0.003969299723394215,2.0,0.013251957891043276,1,-0.01139871496707201,-0.028745246003381908,0.0,1,-0.0030475312378257513,-0.009727872296934947,0.0,1,0.009798542596399784,-0.025693995063193142,1.0,1,0.016743920743465424,-0.011185160554305185,1.0,1,0.013959147967398167,-0.001963964314199984,2.0,1,0.0026341741904616356,-0.02955170819768682,1.0,1,-0.013307815417647362,-0.030285736545920372,0.0,1,0.02362210862338543,-0.011825055145891383,1.0,1,-0.01903848722577095,-0.027472108136862516,0.0,1,0.012325746938586235,-0.000740222523745615,2.0,26,55,81
+TRM,373,394495,never,0,0,,,,,False,0.5061728358268738,0.08463024348020554,0.046097295824438334,0.030756006948649883,8.0,0.36877836659550667,0,0.01705896109342575,-0.002999325930431951,4.0,0,0.06252797693014145,0.03292360925115645,8.0,0,0.08108843117952347,0.05036365264095366,8.0,0,0.10664358735084534,0.05644568894058466,8.0,0,0.10248790681362152,0.05248423735611141,8.0,0,0.11709845066070557,0.05883250990882516,8.0,0,0.10417603701353073,0.04832935822196305,8.0,0,0.11266498267650604,0.05281772348098457,8.0,0,0.09685758501291275,0.04381847055628896,8.0,0,0.08463024348020554,0.046097295824438334,8.0,24,57,81
+TRM,374,247658,always,10,0,26041,5000,26041,5000,True,1.0,0.00019134397734887898,-0.003243854906031629,-0.004461143515072763,1.0,0.00019134397734887898,1,0.033430375158786774,0.008582154347095639,5.0,1,0.007811215240508318,-0.0033013146021403372,2.0,1,0.011533587239682674,-0.010563584335613996,1.0,1,0.013777485117316246,-0.0012057593630743213,3.0,1,0.004503200296312571,-0.005817126133479178,1.0,1,0.05519276112318039,0.021694598195608705,8.0,1,0.05641023442149162,0.012403064989484847,5.0,1,0.08026604354381561,0.03369992517400533,8.0,1,-0.005353183951228857,-0.012996092264074832,0.0,1,0.00019134397734887898,-0.003243854906031629,1.0,25,56,81
+TRM,375,304714,always,10,0,26041,5000,26041,5000,True,1.0,0.006082173902541399,-0.0027869549521710724,-0.004916943144053221,1.0,0.006082173902541399,1,-0.0316610150039196,-0.042492876295000315,0.0,1,-0.011748523451387882,-0.017980461241677403,0.0,1,-0.011900773271918297,-0.03818235988728702,0.0,1,-0.004026893991976976,-0.01599958271253854,0.0,1,0.007766983471810818,-0.0005891457767575048,4.0,1,0.05305495113134384,-0.01852132671047002,1.0,1,-0.0025659191887825727,-0.027322967624058947,0.0,1,0.04521484673023224,-0.009629408945329487,1.0,1,-0.0006511085666716099,-0.018052763014566153,0.0,1,0.006082173902541399,-0.0027869549521710724,1.0,23,58,81
+TRM,376,391114,always,10,0,26041,5000,26041,5000,True,1.0,0.02036079578101635,0.0025893800557241775,-0.002126973879057914,4.0,0.029222935962025076,1,-0.04593819007277489,-0.05348871508613229,0.0,1,0.005331783555448055,-0.0035597099631559104,3.0,1,-0.02407258190214634,-0.03353701904416084,0.0,1,-0.01564003899693489,-0.021360640646889806,0.0,1,0.015234383754432201,-0.00040055919089354575,2.0,1,0.02802262455224991,-0.02021413373586256,3.0,1,-0.018691619858145714,-0.02687249146401882,0.0,1,0.05090904235839844,0.0001371419166389387,3.0,1,0.004792102612555027,-0.013308543074344925,2.0,1,0.02036079578101635,0.0025893800557241775,4.0,28,53,81
+TRM,377,115729,flaky_then_stable,7,3,52082,10000,156246,30000,True,1.0,0.004724876955151558,-0.005227519606705755,-0.010650370153598487,2.0,0.008907724171876907,0,0.0326266773045063,0.014708500471897423,7.0,1,0.007897566072642803,-0.0011177435308127315,4.0,1,0.04804347828030586,0.018379351182375103,8.0,0,0.09894727915525436,0.05272971373051405,8.0,0,0.11117498576641083,0.0511684634257108,8.0,1,0.03110678121447563,-0.018068442936055362,1.0,1,0.01420327927917242,-0.02517824003007263,1.0,1,0.07022334635257721,0.013162715142243542,4.0,1,0.024185096845030785,9.782100096344948e-05,3.0,1,0.004724876955151558,-0.005227519606705755,2.0,26,55,81
+TRM,378,268614,stable_learned,7,1,104164,20000,104164,20000,True,1.0,-0.002215307205915451,-0.004254193365341052,-0.005552666843868792,0.0,0.0,0,0.03741505369544029,0.02037273911992088,8.0,0,0.09074684977531433,0.05063354526646435,8.0,0,0.08808126300573349,0.05234988545998931,8.0,1,0.008907287381589413,-0.004831107973586768,2.0,1,0.0019983723759651184,-0.005198441329412162,1.0,1,0.051525190472602844,0.004142891964875162,4.0,1,-0.01023014634847641,-0.02496106189209968,0.0,1,0.003541128244251013,-0.01633784972364083,1.0,1,0.005102204624563456,-0.023555797000881284,1.0,1,-0.002215307205915451,-0.004254193365341052,0.0,26,55,81
+TRM,379,216643,always,10,0,26041,5000,26041,5000,True,1.0,0.016243932768702507,0.0011192783422302455,-0.0038721426972188056,4.0,0.024442797526717186,1,-0.03990278020501137,-0.050369507633149624,0.0,1,0.05080312862992287,0.019657467790239025,8.0,1,0.029890041798353195,-0.008421679434832186,2.0,1,-0.013233858160674572,-0.021697860560379922,0.0,1,0.03771910071372986,0.0039936869870871305,4.0,1,0.04111529141664505,-0.022898476803675294,1.0,1,-0.012694597244262695,-0.024825980770401657,0.0,1,0.07485548406839371,0.0021343866683309898,2.0,1,0.05043555423617363,-0.010109393682796508,2.0,1,0.016243932768702507,0.0011192783422302455,4.0,27,54,81
+TRM,380,97682,always,10,0,26041,5000,26041,5000,True,1.0,0.009047883562743664,0.0006189130290294997,-0.0017567823233548552,4.0,0.011978433525655419,1,-0.048519521951675415,-0.05417622160166502,0.0,1,0.0015476399566978216,-0.007822451036190614,1.0,1,-0.027611656114459038,-0.04288007575087249,0.0,1,-0.01564248651266098,-0.02237662603147328,0.0,1,0.006855861749500036,-0.004582491790642962,2.0,1,0.03095289133489132,-0.03360836172942072,1.0,1,-0.016435472294688225,-0.02512141759507358,0.0,1,0.012501935474574566,-0.010044427413959056,1.0,1,0.02199522592127323,-0.012504449114203453,1.0,1,0.009047883562743664,0.0006189130290294997,4.0,23,58,81
+TRM,381,37099,always,10,0,26041,5000,26041,5000,True,1.0,-0.00018579274183139205,-0.0011260981518717017,-0.001897801470477134,0.0,0.0,1,-0.04275020956993103,-0.04990233154967427,0.0,1,-0.013983421958982944,-0.01763628574553877,0.0,1,-0.0234567578881979,-0.04367873468436301,0.0,1,-0.016333138570189476,-0.022740390617400408,0.0,1,-0.0014277296140789986,-0.004588330804836005,0.0,1,0.04485800489783287,-0.029404882807284594,1.0,1,-0.022212188690900803,-0.02876447979360819,0.0,1,0.02963244915008545,-0.010090986383147538,1.0,1,0.004538435954600573,-0.018667677126359195,1.0,1,-0.00018579274183139205,-0.0011260981518717017,0.0,24,57,81
+TRM,382,292375,transient_or_regressed,1,2,234369,45000,,,False,0.7777777910232544,0.10774195939302444,0.052234097849577665,0.032225907780230045,8.0,0.4178727827966213,0,0.04625576362013817,0.008744317688979208,5.0,0,0.07359811663627625,0.03118387731956318,8.0,0,0.08762574195861816,0.04169728420674801,8.0,0,0.10241451859474182,0.05373354535549879,8.0,0,0.11611879616975784,0.051723062293604016,8.0,0,0.10256817191839218,0.05274340067990124,8.0,0,0.12396564334630966,0.05084179528057575,8.0,0,0.12755683064460754,0.05991198029369116,8.0,1,0.039550505578517914,0.011859222406201297,6.0,0,0.10774195939302444,0.052234097849577665,8.0,26,55,81
+TRM,383,274711,always,10,0,26041,5000,26041,5000,True,1.0,0.004192299675196409,-0.002980632627441082,-0.005685248761437833,1.0,0.004192299675196409,1,-0.03636663034558296,-0.045549807604402304,0.0,1,-0.006977544631808996,-0.013201876368839294,0.0,1,-0.012708279304206371,-0.03856930637266487,0.0,1,0.03446441888809204,0.00036471123894443735,4.0,1,0.09034059196710587,0.04142994910944253,8.0,1,0.0494266003370285,0.002892357762902975,3.0,1,0.008970641531050205,-0.0220535023836419,1.0,1,0.042437873780727386,-0.0020526105945464224,2.0,1,-0.0002922595012933016,-0.016901742113986984,0.0,1,0.004192299675196409,-0.002980632627441082,1.0,24,57,81
+TRM,384,278703,always,10,0,26041,5000,26041,5000,True,1.0,0.039323534816503525,0.0032123065684572794,-0.0028988296398893,2.0,0.03959794534603134,1,-0.03745759278535843,-0.04342878796160221,0.0,1,-0.008550950326025486,-0.015213661245070398,0.0,1,-0.011682039126753807,-0.04142142669297755,0.0,1,-0.006436891388148069,-0.018667446391191334,0.0,1,0.0010515989270061255,-0.004140641971389414,1.0,1,0.042831968516111374,-0.017118054442107677,1.0,1,-0.011326628737151623,-0.026046958402730525,0.0,1,0.011202462948858738,-0.01475734676932916,1.0,1,0.03071361780166626,-0.017607805551961064,1.0,1,0.039323534816503525,0.0032123065684572794,2.0,26,55,81
+TRM,385,221234,transient_or_regressed,1,2,234369,45000,,,False,0.6296296119689941,0.09890231490135193,0.057150254026055336,0.036377038806676865,8.0,0.4572020322084427,0,0.009639676660299301,-0.002594796911580488,3.0,0,0.041601572185754776,0.022854155744425952,8.0,0,0.05509207397699356,0.03751296689733863,8.0,0,0.10699434578418732,0.0733167720027268,8.0,0,0.13990992307662964,0.08018873585388064,8.0,0,0.10769861191511154,0.06736795976758003,8.0,0,0.10050870478153229,0.04698395449668169,8.0,0,0.1221030130982399,0.06223374838009477,8.0,1,0.07414983958005905,0.020328005237388425,7.0,0,0.09890231490135193,0.057150254026055336,8.0,26,55,81
+TRM,386,205191,never,0,0,,,,,False,0.5308641791343689,0.10167215019464493,0.05227706511504948,0.033106735441833735,8.0,0.41821652092039585,0,0.04864378273487091,0.026937811402603984,8.0,0,0.06484045088291168,0.04767118766903877,8.0,0,0.0676201656460762,0.04950716905295849,8.0,0,0.10203825682401657,0.06431059818714857,8.0,0,0.1125824898481369,0.05632446054369211,8.0,0,0.10837244242429733,0.06800803961232305,8.0,0,0.11181221902370453,0.053796344669535756,8.0,0,0.11937826126813889,0.05348311131820083,8.0,0,0.11468055099248886,0.053135350812226534,8.0,0,0.10167215019464493,0.05227706511504948,8.0,24,57,81
+TRM,387,287482,transient_or_regressed,2,4,78123,15000,,,False,0.7160493731498718,0.10865354537963867,0.05131639936007559,0.03055271366611123,8.0,0.41053119488060474,0,0.05194846913218498,0.010958021599435597,5.0,0,0.062368810176849365,0.04076603660359979,8.0,1,0.014747056178748608,-0.012956733698956668,1.0,0,0.09522151201963425,0.05672797071747482,8.0,0,0.10456501692533493,0.06203811755403876,8.0,1,0.032300714403390884,-0.021877139864955097,2.0,0,0.12405747920274734,0.05917838728055358,8.0,0,0.11493778228759766,0.05410340637899935,8.0,0,0.1056893914937973,0.050390256801620126,8.0,0,0.10865354537963867,0.05131639936007559,8.0,24,57,81
+TRM,388,283530,always,10,0,26041,5000,26041,5000,True,1.0,0.0014884874690324068,-0.00025061165251827333,-0.0014022703326190822,3.0,0.003717484592925757,1,-0.03936467319726944,-0.04503878206014633,0.0,1,-0.012326367199420929,-0.01599561225157231,0.0,1,-0.0020304848439991474,-0.032394083973485976,0.0,1,-0.0057683587074279785,-0.020772668533027172,0.0,1,0.006825032643973827,-0.004690936591941863,1.0,1,0.04210982099175453,-0.03113480913452804,1.0,1,-0.018964074552059174,-0.027514309855178,0.0,1,0.028195299208164215,-0.007607793202623725,2.0,1,0.021269330754876137,-0.01891502714715898,1.0,1,0.0014884874690324068,-0.00025061165251827333,3.0,26,55,81
+TRM,389,224414,always,10,0,26041,5000,26041,5000,True,1.0,-0.003051399253308773,-0.005358554073609412,-0.0072384970262646675,0.0,0.0,1,-0.04685307294130325,-0.05356735596433282,0.0,1,-0.010789887979626656,-0.015194163075648248,0.0,1,-0.026910578832030296,-0.040700621670112014,0.0,1,-0.008206167258322239,-0.02017489296849817,0.0,1,0.004725809209048748,-0.0026923031982732937,3.0,1,0.019892405718564987,-0.028636779461521655,1.0,1,-0.007422300986945629,-0.027715425821952522,0.0,1,0.09438174962997437,-0.0028525032103061676,1.0,1,-0.007012251764535904,-0.020205535227432847,0.0,1,-0.003051399253308773,-0.005358554073609412,0.0,25,56,81
+TRM,390,343932,always,10,0,26041,5000,26041,5000,True,1.0,0.007562926970422268,0.001107565731217619,-0.001139529820648022,5.0,0.013700421346584335,1,-0.0337439589202404,-0.04075655620545149,0.0,1,-0.00870517361909151,-0.014219940872862935,0.0,1,-0.01282989140599966,-0.028859496931545436,0.0,1,-0.011873992159962654,-0.02076798933558166,0.0,1,-0.00025396488490514457,-0.007027378273050999,0.0,1,0.03628089278936386,-0.02607017895206809,1.0,1,-0.02034876123070717,-0.02544035529717803,0.0,1,0.05113302171230316,0.0010837540467036888,3.0,1,0.003995303064584732,-0.02079782309010625,1.0,1,0.007562926970422268,0.001107565731217619,5.0,25,56,81
+TRM,391,399811,always,10,0,26041,5000,26041,5000,True,1.0,-0.0002196305722463876,-0.0023938037229527254,-0.0037887810613028705,0.0,0.0,1,-0.06398112326860428,-0.0686207264661789,0.0,1,-0.011156314983963966,-0.013863342232070863,0.0,1,-0.023204781115055084,-0.04135027388110757,0.0,1,-0.02279507927596569,-0.029548774240538478,0.0,1,-0.001271237269975245,-0.00428345387626905,0.0,1,-0.009760201908648014,-0.03263469960074872,0.0,1,-0.022838134318590164,-0.02754549402743578,0.0,1,0.029243480414152145,-0.003110601450316608,2.0,1,-0.023935377597808838,-0.02915902412496507,0.0,1,-0.0002196305722463876,-0.0023938037229527254,0.0,34,47,81
+TRM,392,211334,always,10,0,26041,5000,26041,5000,True,1.0,0.00547268521040678,0.003595571659388952,0.0022299384872894734,7.0,0.029357501305639744,1,-0.050536904484033585,-0.0538688818924129,0.0,1,0.009367519058287144,-0.004082669169292785,1.0,1,-0.02457493357360363,-0.03485458274371922,0.0,1,-0.018824245780706406,-0.022548818727955222,0.0,1,0.0009686367120593786,-0.007657920272322372,1.0,1,0.019701730459928513,-0.025170290609821677,1.0,1,0.00487089715898037,-0.020547079446259886,1.0,1,0.02111043594777584,-0.011862899671541527,1.0,1,0.05413473770022392,-0.013861517305485904,1.0,1,0.00547268521040678,0.003595571659388952,7.0,27,54,81
+TRM,393,46351,stable_learned,8,1,78123,15000,78123,15000,True,1.0,0.003895816160365939,-0.0042215543508064,-0.007288664812222123,1.0,0.003895816160365939,0,0.06504472345113754,0.04337833356112242,8.0,0,0.06444663554430008,0.05003677401691675,8.0,1,0.047610294073820114,0.008930781128583476,6.0,1,0.0029227992054075003,-0.013569573377026245,1.0,1,0.0033393846824765205,-0.009660034629632719,2.0,1,-0.00021498615387827158,-0.01818066391570028,0.0,1,-0.014046111144125462,-0.02665615116711706,0.0,1,0.016662107780575752,-0.009200261381920427,1.0,1,0.007865991443395615,-0.01683095609769225,1.0,1,0.003895816160365939,-0.0042215543508064,1.0,26,55,81
+TRM,394,300514,flaky_then_stable,8,3,52082,10000,156246,30000,True,1.0,-0.0031422998290508986,-0.0050958200299646705,-0.006423426675610244,0.0,0.0,0,-0.009157323278486729,-0.017003874527290463,0.0,1,0.011769240722060204,-0.008171691035386175,1.0,1,-0.014119702391326427,-0.03521757305134088,0.0,1,-0.0022740873973816633,-0.01181076347711496,0.0,0,0.10328283160924911,0.055727827828377485,8.0,1,0.03546106442809105,-0.0267022243278916,2.0,1,-0.023505929857492447,-0.03153211669996381,0.0,1,0.04294154793024063,-0.01278358851413941,2.0,1,0.016378657892346382,-0.016611362458206713,1.0,1,-0.0031422998290508986,-0.0050958200299646705,0.0,24,57,81
+TRM,395,181808,always,10,0,26041,5000,26041,5000,True,1.0,0.006158755626529455,-0.0012781439800164662,-0.0037454747362062335,2.0,0.006832153012510389,1,-0.035708341747522354,-0.043712944723665714,0.0,1,-0.009817590937018394,-0.013868515496142209,0.0,1,0.004262077622115612,-0.03330851544160396,1.0,1,-0.0016520179342478514,-0.014562294905772433,0.0,1,0.004956488963216543,-0.005038794348365627,1.0,1,-0.01945856772363186,-0.03803494316525757,0.0,1,-0.00972592644393444,-0.023295988095924258,0.0,1,-0.00208565522916615,-0.015300031314836815,0.0,1,-0.021119974553585052,-0.02228672173805535,0.0,1,0.006158755626529455,-0.0012781439800164662,2.0,17,64,81
+TRM,396,390540,flaky_then_stable,6,3,78123,15000,208328,40000,True,1.0,0.12340833246707916,0.06358740106225014,0.0420707818120718,8.0,0.5086992084980011,0,0.037646982818841934,0.02306959591805935,8.0,0,0.04508918896317482,0.029758788645267487,8.0,1,0.047060467302799225,0.031650955555960536,8.0,1,0.028562381863594055,0.005397462227847427,5.0,1,0.02215166576206684,-0.005016350944060832,2.0,0,0.11594800651073456,0.0693359961733222,8.0,0,0.09941603243350983,0.057492227759212255,8.0,1,0.05063654109835625,-0.0057608227944001555,2.0,1,-0.005847069900482893,-0.01677779108285904,0.0,1,0.12340833246707916,0.06358740106225014,8.0,24,57,81
+TRM,397,398839,stable_learned,9,1,52082,10000,52082,10000,True,1.0,0.03431669622659683,0.011039638746296987,-0.0037605525576509535,5.0,0.10597048653289676,0,0.054169490933418274,0.029793501016683877,8.0,1,0.03147253766655922,0.004025584610644728,5.0,1,0.010244461707770824,-0.01608446950558573,1.0,1,0.02178671397268772,0.001510973403128446,5.0,1,0.012006720528006554,-0.002931697054009419,1.0,1,0.03891809284687042,-0.013272252748720348,1.0,1,0.022079233080148697,-0.016105092130601406,1.0,1,0.0336872898042202,-0.006973666982958093,2.0,1,-0.0010324480244889855,-0.016232343114097603,0.0,1,0.03431669622659683,0.011039638746296987,5.0,26,55,81
+TRM,398,47904,transient_or_regressed,3,6,52082,10000,,,False,0.5185185074806213,0.12767115235328674,0.06586818303912878,0.043368954211473465,8.0,0.5269454643130302,0,0.0587012954056263,0.028298662626184523,8.0,1,0.007020162884145975,-0.004340934414358344,2.0,0,0.1036766991019249,0.0650734780356288,8.0,0,0.07927054911851883,0.05313262389972806,8.0,1,0.042178597301244736,0.01315817586146295,7.0,0,0.1272345930337906,0.06932174228131771,8.0,0,0.0940844789147377,0.06199824856594205,8.0,1,0.040682338178157806,0.005177723651286215,4.0,0,0.1162257120013237,0.06467032060027122,8.0,0,0.12767115235328674,0.06586818303912878,8.0,22,59,81
+TRM,399,17613,transient_or_regressed,3,2,130205,25000,,,False,0.6296296119689941,0.11016923189163208,0.05822343844920397,0.040405046194791794,8.0,0.46578750759363174,0,0.03728289157152176,0.0165768020960968,8.0,0,0.03576836362481117,0.026107966201379895,8.0,0,0.04481049254536629,0.032288249814882874,8.0,0,0.06814678013324738,0.051899841986596584,8.0,1,0.05472905561327934,0.023145070881582797,8.0,1,0.04903196170926094,0.010447315871715546,5.0,1,0.03194598853588104,-0.008930245057854336,1.0,0,0.1052747368812561,0.0669787242077291,8.0,0,0.1198372170329094,0.058790712151676416,8.0,0,0.11016923189163208,0.05822343844920397,8.0,25,56,81
+TRM,400,400864,flaky_then_stable,5,5,104164,20000,260410,50000,True,1.0,0.0404498390853405,0.010837051930138841,-0.0029985408182255924,5.0,0.10253512067720294,0,0.00813415925949812,0.001258466436411254,4.0,0,0.04427628591656685,0.025189817883074284,8.0,0,0.09094314277172089,0.04187570232897997,8.0,1,0.041890621185302734,0.0038381094054784626,4.0,0,0.09662952274084091,0.0512461659964174,8.0,1,0.057673074305057526,0.017262340959860012,6.0,1,0.10254064202308655,0.04373024636879563,8.0,1,0.0875818058848381,0.03714291797950864,8.0,0,0.09816288948059082,0.05684442538768053,8.0,1,0.0404498390853405,0.010837051930138841,5.0,26,55,81
+TRM,401,126170,always,10,0,26041,5000,26041,5000,True,1.0,0.0022893818095326424,-0.0010319038683519466,-0.003036276058992371,4.0,0.003889873289153911,1,-0.02916899509727955,-0.043332444271072745,0.0,1,-0.007162491790950298,-0.011435822932980955,0.0,1,0.018846813589334488,-0.016985392197966576,1.0,1,-0.006449212785810232,-0.01969194848788902,0.0,1,4.1623068682383746e-05,-0.009368839390845096,1.0,1,-0.002304269000887871,-0.02844920929055661,0.0,1,-0.0014296373119577765,-0.02456874532799702,0.0,1,0.061372071504592896,-0.001363478833809495,2.0,1,0.031519629061222076,-0.011652700384729542,1.0,1,0.0022893818095326424,-0.0010319038683519466,4.0,27,54,81
+TRM,402,357082,flaky_then_stable,5,8,26041,5000,260410,50000,True,1.0,0.03493952751159668,0.008556901797419414,-0.0020001932862214744,5.0,0.08284653630107641,1,-0.017824111506342888,-0.028877340257167816,0.0,0,0.07524578273296356,0.036285045789554715,8.0,1,0.07884527742862701,0.03634352097287774,8.0,0,0.084469273686409,0.06491689151152968,8.0,1,0.017466090619564056,0.006207678183272947,5.0,0,0.13469205796718597,0.07391692558303475,8.0,1,0.04841070622205734,0.001230938796652481,3.0,0,0.10384892672300339,0.053621910978108644,8.0,0,0.1175660789012909,0.053911223309114575,8.0,1,0.03493952751159668,0.008556901797419414,5.0,22,59,81
+TRM,403,132759,always,10,0,26041,5000,26041,5000,True,1.0,0.004864614922553301,-0.0013673897774424404,-0.0039465481240767986,2.0,0.006905074696987867,1,-0.022746315225958824,-0.03933796868659556,0.0,1,0.011588537134230137,-0.004021066197310574,2.0,1,-0.009410503320395947,-0.034396415925584733,0.0,1,0.0049478113651275635,-0.013024619547650218,1.0,1,-0.001775814569555223,-0.006686796943540685,0.0,1,0.03081773966550827,-0.025480616837739944,1.0,1,-0.00962724070996046,-0.023626132286153734,0.0,1,0.02036934904754162,-0.008442767662927508,1.0,1,-0.020750543102622032,-0.023311086231842637,0.0,1,0.004864614922553301,-0.0013673897774424404,2.0,27,54,81
+TRM,404,265745,always,10,0,26041,5000,26041,5000,True,1.0,0.006490866653621197,-0.0015159450049395673,-0.003847792453598231,1.0,0.006490866653621197,1,-0.04769952595233917,-0.05589998885989189,0.0,1,-0.010954218916594982,-0.01455717789940536,0.0,1,-0.012827311642467976,-0.0379124820465222,0.0,1,-0.011103253811597824,-0.01975798886269331,0.0,1,0.0012860724236816168,-0.004076042532688007,1.0,1,0.006310597062110901,-0.02704663504846394,1.0,1,0.0025421339087188244,-0.024694053048733622,1.0,1,0.05063632130622864,-0.0012051041703671217,2.0,1,0.03223343938589096,-0.016256008646450937,1.0,1,0.006490866653621197,-0.0015159450049395673,1.0,26,55,81
+TRM,405,190217,always,10,0,26041,5000,26041,5000,True,1.0,0.0002446625439915806,-0.0022830895286460873,-0.003770421724766493,1.0,0.0002446625439915806,1,-0.028508910909295082,-0.04641469963826239,0.0,1,-0.0033787882421165705,-0.011037154035875574,0.0,1,-0.01101800799369812,-0.034495120868086815,0.0,1,-0.014443140476942062,-0.02119521819986403,0.0,1,0.006761351600289345,-0.004518173884207499,1.0,1,0.03790849447250366,-0.019463162636384368,1.0,1,-0.0014151496579870582,-0.021943570973235182,0.0,1,0.016342077404260635,-0.010282849980285391,1.0,1,-0.00354631245136261,-0.0187025701161474,0.0,1,0.0002446625439915806,-0.0022830895286460873,1.0,24,57,81
+TRM,406,386922,always,10,0,26041,5000,26041,5000,True,1.0,0.018035035580396652,0.002516471687727062,-0.0010600151399557944,3.0,0.024379100534133613,1,-0.03202909231185913,-0.03770214691758156,0.0,1,-0.0013129088329151273,-0.01191013383504469,0.0,1,-0.04534851014614105,-0.04735260782763362,0.0,1,-0.020499972626566887,-0.022722739726305008,0.0,1,-0.0012341270921751857,-0.0051655754941748455,0.0,1,-0.00569000793620944,-0.02764892455888912,0.0,1,0.08341449499130249,-0.013080793782137334,1.0,1,0.0029282881878316402,-0.014222166879335418,1.0,1,0.02446807362139225,-0.013425682263914496,1.0,1,0.018035035580396652,0.002516471687727062,3.0,25,56,81
+TRM,407,277613,always,10,0,26041,5000,26041,5000,True,1.0,0.011135675944387913,-0.0005356744513846934,-0.004473933367989957,3.0,0.016994106816127896,1,-0.0481913723051548,-0.05328355822712183,0.0,1,-0.005659707821905613,-0.012445133761502802,0.0,1,-0.013275382108986378,-0.03874153469223529,0.0,1,-0.0037462066393345594,-0.018829538574209437,0.0,1,0.0016240127151831985,-0.004194214852759615,1.0,1,0.029634414240717888,-0.02524574240669608,1.0,1,-0.0025583747774362564,-0.022837148164398968,0.0,1,0.04991207271814346,-0.003134292899630964,2.0,1,-0.0003412562364246696,-0.020862128480075626,0.0,1,0.011135675944387913,-0.0005356744513846934,3.0,26,55,81
+TRM,408,415720,always,10,0,26041,5000,26041,5000,True,1.0,0.014188474044203758,-0.0012049035431118682,-0.004641678533516824,1.0,0.014188474044203758,1,-0.042697008699178696,-0.04976815078407526,0.0,1,0.0036864590365439653,-0.0099177407391835,1.0,1,-0.007702504750341177,-0.03789754695026204,0.0,1,-0.005567245185375214,-0.01894556451588869,0.0,1,0.0188288614153862,0.005048975897807395,6.0,1,0.04576761648058891,-0.018955880834255368,1.0,1,0.01029137521982193,-0.013982722826767713,1.0,1,0.04620247706770897,-0.00840399379376322,1.0,1,-0.005979947280138731,-0.01628009887645021,0.0,1,0.014188474044203758,-0.0012049035431118682,1.0,22,59,81
+TRM,409,334392,never,0,0,,,,,False,0.6419752836227417,0.13424469530582428,0.05339943338185549,0.02778287511318922,8.0,0.4271954670548439,0,0.015576427802443504,0.003881901313434355,6.0,0,0.06158433109521866,0.033514106064103544,8.0,0,0.0997847244143486,0.054008938604965806,8.0,0,0.11124664545059204,0.048965099500492215,8.0,0,0.12479177117347717,0.06688062008470297,8.0,0,0.09863089770078659,0.0562297641299665,8.0,0,0.0970277488231659,0.047064258018508554,8.0,0,0.10431770235300064,0.05227793287485838,8.0,0,0.11347365379333496,0.061590660363435745,8.0,0,0.13424469530582428,0.05339943338185549,8.0,25,56,81
+TRM,410,344038,flaky_then_stable,2,3,182287,35000,260410,50000,True,1.0,0.05052182450890541,0.013013022349696257,-0.0004901034226350021,5.0,0.10752706055063754,0,0.026444081217050552,-0.011407437385059893,1.0,0,0.06744774430990219,0.025702385406475514,8.0,0,0.07394332438707352,0.03674767701886594,8.0,0,0.0831422507762909,0.048496544593945146,8.0,0,0.10719160735607147,0.05619050725363195,8.0,0,0.07050759345293045,0.04431454767473042,8.0,1,-0.01400283444672823,-0.025885378126986325,0.0,0,0.11744861304759979,0.05933787766844034,8.0,0,0.09951896220445633,0.05068833427503705,8.0,1,0.05052182450890541,0.013013022349696257,5.0,24,57,81
+TRM,411,292752,transient_or_regressed,2,2,156246,30000,,,False,0.6666666865348816,0.08964370936155319,0.05209352285601199,0.033332068007439375,8.0,0.4167481828480959,0,0.009474028833210468,-0.0024670280108693987,2.0,0,0.07401176542043686,0.032582315383479,8.0,0,0.08421656489372253,0.044789092848077416,8.0,0,0.08682724088430405,0.05456880014389753,8.0,0,0.10702066123485565,0.06041434942744672,8.0,1,0.06419912725687027,0.028162414440885186,8.0,1,0.10908491164445877,0.049704010714776814,8.0,0,0.09430143237113953,0.05835541244596243,8.0,0,0.08602865040302277,0.045734424609690905,8.0,0,0.08964370936155319,0.05209352285601199,8.0,26,55,81
+TRM,412,145504,never,0,0,,,,,False,0.5555555820465088,0.12080098688602448,0.06698654964566231,0.045189591124653816,8.0,0.5358923971652985,0,0.03901272639632225,0.03020672337152064,8.0,0,0.04961839318275452,0.04203607141971588,8.0,0,0.06153019517660141,0.05185092333704233,8.0,0,0.09763730317354202,0.07197399158030748,8.0,0,0.14129836857318878,0.08392053097486496,8.0,0,0.12382413446903229,0.07616287562996149,8.0,0,0.118044413626194,0.06708278320729733,8.0,0,0.12569363415241241,0.07658988842740655,8.0,0,0.13130392134189606,0.07380581134930253,8.0,0,0.12080098688602448,0.06698654964566231,8.0,24,57,81
+TRM,413,108663,flaky_then_stable,7,6,26041,5000,208328,40000,True,1.0,0.055917900055646896,0.017156876507215202,0.0037037839647382498,7.0,0.14000760368071496,1,-0.04455239325761795,-0.050544303841888905,0.0,0,0.09086906164884567,0.04207007470540702,8.0,1,-7.938425551401451e-05,-0.03709420168252109,0.0,0,0.074045829474926,0.0388926996383816,8.0,1,0.01794462651014328,-0.003697638458106667,1.0,1,0.04453272372484207,-0.003764786204556003,2.0,0,0.10341605544090271,0.0562678100541234,8.0,1,0.01963738165795803,-0.005959161906503141,2.0,1,0.06413967907428741,0.024731469457037747,8.0,1,0.055917900055646896,0.017156876507215202,7.0,25,56,81
+TRM,414,209089,always,10,0,26041,5000,26041,5000,True,1.0,-0.00017009014845825732,-0.003410181470826501,-0.00464075175113976,0.0,0.0,1,-0.04341111704707146,-0.05087922653183341,0.0,1,0.003205238375812769,-0.004188567458186299,2.0,1,0.014730025082826614,-0.022733164252713323,1.0,1,-0.021020740270614624,-0.02391926501877606,0.0,1,0.0181389469653368,-0.004126491563511081,2.0,1,0.017545802518725395,-0.024946060497313738,1.0,1,0.10683377832174301,-0.0017709158128127456,2.0,1,0.03183221071958542,-0.009332954126875848,1.0,1,0.05066972225904465,-0.004626081659807824,1.0,1,-0.00017009014845825732,-0.003410181470826501,0.0,26,55,81
+TRM,415,25789,always,10,0,26041,5000,26041,5000,True,1.0,0.0036919936537742615,-0.001962329327398038,-0.004441462486283854,1.0,0.0036919936537742615,1,-0.04232199117541313,-0.0456671086139977,0.0,1,-0.009834794327616692,-0.016181266400963068,0.0,1,-0.0147643331438303,-0.042399483965709805,0.0,1,-0.0013273413060232997,-0.016625377218588255,0.0,1,0.001325296121649444,-0.003089313628152013,1.0,1,-0.0008128734189085662,-0.03695088114909595,0.0,1,0.002888655522838235,-0.023846630909247324,1.0,1,0.0015993154374882579,-0.015221607711282559,1.0,1,-0.013742879033088684,-0.01846919988747686,0.0,1,0.0036919936537742615,-0.001962329327398038,1.0,17,64,81
+TRM,416,333768,flaky_then_stable,7,3,78123,15000,130205,25000,True,1.0,0.0050173052586615086,-0.004308671748731285,-0.007953361491672695,2.0,0.009374541696161032,0,0.040213778614997864,0.006993204879108816,5.0,0,0.025865918025374413,0.012703255546512082,8.0,1,-0.0015951544046401978,-0.0306264937389642,0.0,0,0.09683377295732498,0.051899004727602005,8.0,1,0.050413426011800766,0.01968027578550391,8.0,1,0.0685608759522438,0.011374286725185812,5.0,1,0.03578443080186844,-0.012762506274157204,2.0,1,0.04149904102087021,0.0015349660607171245,3.0,1,0.027725892141461372,-0.005551359296077862,1.0,1,0.0050173052586615086,-0.004308671748731285,2.0,23,58,81
+TRM,417,322679,stable_learned,7,1,104164,20000,104164,20000,True,1.0,0.008387092500925064,-0.001181892686872743,-0.005666141747497022,3.0,0.017681346856988966,0,0.05334976688027382,0.037826207699254155,8.0,0,0.07658109068870544,0.06484587350860238,8.0,0,0.08804932236671448,0.06967670284211636,8.0,1,-0.004027810413390398,-0.013686119869817048,0.0,1,0.02968485653400421,0.002301413500390481,3.0,1,0.05437181144952774,0.01411796102183871,6.0,1,0.09824840724468231,0.013514058315195143,4.0,1,0.01065098587423563,-0.010817843372933567,2.0,1,0.009085053578019142,-0.012262438656762242,1.0,1,0.008387092500925064,-0.001181892686872743,3.0,27,54,81
+TRM,418,267002,always,10,0,26041,5000,26041,5000,True,1.0,0.000280523207038641,-0.003263990416598972,-0.004757443210110068,1.0,0.000280523207038641,1,-0.024233290925621986,-0.03774623409844935,0.0,1,0.03270658850669861,0.011918829353817273,6.0,1,0.005875065457075834,-0.030582967738155276,1.0,1,0.015019983053207397,-0.005332921340595931,2.0,1,0.044505525380373,0.01911007473245263,8.0,1,0.010026400908827782,-0.025004799565067515,1.0,1,-0.008778968825936317,-0.02449183794669807,0.0,1,0.02691308781504631,-0.007228398229926825,2.0,1,0.047848306596279144,-0.013104000012390316,1.0,1,0.000280523207038641,-0.003263990416598972,1.0,28,53,81
+TRM,419,322867,always,10,0,26041,5000,26041,5000,True,1.0,0.0012026495533064008,-0.0005909697692914051,-0.0018885432145907544,3.0,0.0029187489999458194,1,-0.03826618567109108,-0.042916569858789444,0.0,1,-0.011190659366548061,-0.015972718130797148,0.0,1,-0.04077507182955742,-0.044468414038419724,0.0,1,-0.02351282350718975,-0.025248411344364285,0.0,1,-0.0042569502256810665,-0.0068382457830011845,0.0,1,0.019613614305853844,-0.0333082708530128,1.0,1,-0.010288366116583347,-0.023619952029548585,0.0,1,0.016566215083003044,-0.009622466313885525,1.0,1,-0.004997252486646175,-0.0220630835974589,0.0,1,0.0012026495533064008,-0.0005909697692914051,3.0,26,55,81
+TRM,420,408180,always,10,0,26041,5000,26041,5000,True,1.0,0.0037664626725018024,0.0004880594751739409,-0.00119196481682593,4.0,0.008672335068695247,1,-0.03862100467085838,-0.04743335768580437,0.0,1,-0.012762538157403469,-0.017284801113419235,0.0,1,-0.010236705653369427,-0.041114947642199695,0.0,1,-0.0008992573129944503,-0.018843667370674666,0.0,1,-0.0013107130071148276,-0.005857430791365914,0.0,1,0.05900915712118149,-0.019213883439078927,1.0,1,-0.01847924292087555,-0.02745813736692071,0.0,1,0.03495139256119728,-0.0032855334866326302,3.0,1,0.04790031909942627,-0.012856529443524778,1.0,1,0.0037664626725018024,0.0004880594751739409,4.0,27,54,81
+TRM,421,121870,flaky_then_stable,2,3,130205,25000,260410,50000,True,1.0,0.05773346871137619,0.011392602893465664,-0.005560671983403154,6.0,0.11549971409840509,0,0.0486067496240139,0.03171141538769007,8.0,0,0.06540089845657349,0.04002740443684161,8.0,0,0.10593108832836151,0.06569254957139492,8.0,0,0.09553054720163345,0.05842835549265146,8.0,1,0.020591866225004196,0.0013101201766403392,3.0,0,0.11140143871307373,0.05731394700706005,8.0,0,0.08939991891384125,0.04796660109423101,8.0,0,0.11120665073394775,0.055048329988494515,8.0,0,0.11528124660253525,0.058831952745094895,8.0,1,0.05773346871137619,0.011392602893465664,6.0,24,57,81
+TRM,422,244435,always,10,0,26041,5000,26041,5000,True,1.0,-0.0005296141025610268,-0.005877005496586207,-0.007481390843167901,0.0,0.0,1,-0.02214839868247509,-0.035937648033723235,0.0,1,-0.0077863154001533985,-0.014097937557380646,0.0,1,0.024442292749881744,-0.016209422959946096,1.0,1,-0.017085937783122063,-0.02090263250283897,0.0,1,0.04333122819662094,0.023660761071369052,8.0,1,0.04659963399171829,-0.0033974938560277224,2.0,1,-0.016426829621195793,-0.028873597038909793,0.0,1,0.03001204878091812,-0.0007764110341668129,4.0,1,1.8634567823028192e-05,-0.01885649460427885,1.0,1,-0.0005296141025610268,-0.005877005496586207,0.0,23,58,81
+TRM,423,321750,transient_or_regressed,4,6,52082,10000,,,False,0.6913580298423767,0.09342893958091736,0.04512800555676222,0.023097975179553032,8.0,0.36102404445409775,0,0.02939586155116558,0.010323816473828629,6.0,1,0.0417589396238327,0.011301331527647562,6.0,0,0.06958328932523727,0.04077350231818855,8.0,1,0.012374186888337135,-0.006347582624584902,2.0,1,0.00801317673176527,-0.00422240883926861,1.0,0,0.0962306559085846,0.04785404051654041,8.0,1,0.022567834705114365,-0.012607157812453806,1.0,0,0.09152065217494965,0.05309247435070574,8.0,0,0.08737066388130188,0.04419369576498866,8.0,0,0.09342893958091736,0.04512800555676222,8.0,24,57,81
+TRM,424,114521,always,10,0,26041,5000,26041,5000,True,1.0,0.002476905006915331,-0.0011874472475028597,-0.0024607505183666945,1.0,0.002476905006915331,1,-0.045357268303632736,-0.05074856989085674,0.0,1,-0.006916808895766735,-0.011969249229878187,0.0,1,-0.04607751592993736,-0.047501382417976856,0.0,1,-0.019415421411395073,-0.023503182688727975,0.0,1,0.0019352167146280408,-0.006255408094148152,1.0,1,0.053314924240112305,-0.03069913899526,1.0,1,-0.020584361627697945,-0.027894729748368263,0.0,1,0.015805426985025406,-0.01550731435418129,1.0,1,-0.004318491090089083,-0.020259545010048896,0.0,1,0.002476905006915331,-0.0011874472475028597,1.0,23,58,81
+TRM,425,211167,transient_or_regressed,5,4,104164,20000,,,False,0.5555555820465088,0.10151073336601257,0.052613944513723254,0.0288676586933434,8.0,0.42091155610978603,0,0.05330842733383179,0.02700229937909171,8.0,0,0.06586896628141403,0.03623551200143993,8.0,0,0.09006136655807495,0.05447962507605553,8.0,1,0.005806535016745329,-0.012598965608049184,1.0,1,0.023137371987104416,0.009849441170445061,8.0,1,0.05038418248295784,0.006776451744372025,3.0,0,0.09766901284456253,0.050452252151444554,8.0,1,0.03183133155107498,-0.00419589041848667,3.0,1,0.003396535525098443,-0.011018290853826329,1.0,0,0.10151073336601257,0.052613944513723254,8.0,23,58,81
+TRM,426,397598,flaky_then_stable,7,2,26041,5000,156246,30000,True,1.0,0.002071661874651909,0.0005457675874822598,-0.00023136637082643574,6.0,0.00571310581653961,1,-0.01808922551572323,-0.031707344111055136,0.0,1,0.07953998446464539,0.028227379109011963,8.0,0,0.10768430680036545,0.061665375251322985,8.0,0,0.11126594245433807,0.06738598505035043,8.0,0,0.11232475936412811,0.055756804533302784,8.0,1,0.03726606070995331,-0.0063224927289411426,3.0,1,-0.022246981039643288,-0.03089535585604608,0.0,1,0.02387712150812149,0.006429746761568822,5.0,1,0.06908947229385376,0.019711490676854737,6.0,1,0.002071661874651909,0.0005457675874822598,6.0,25,56,81
+TRM,427,263228,flaky_then_stable,9,2,26041,5000,104164,20000,True,1.0,0.0025855100248008966,0.0005473696928675054,-0.0007373098669631872,6.0,0.008109679009066895,1,-0.04891803115606308,-0.05765760038048029,0.0,1,-0.008612371981143951,-0.012016821769066155,0.0,0,0.09555866569280624,0.05457618273794651,8.0,1,0.09297400712966919,0.046596693340688944,8.0,1,0.0014854611363261938,-0.005750130483647808,1.0,1,-0.002691043308004737,-0.029866316093830392,0.0,1,-0.010926851071417332,-0.026281784405000508,0.0,1,-0.004903231281787157,-0.016613097221124917,0.0,1,-0.010008826851844788,-0.023899362655356526,0.0,1,0.0025855100248008966,0.0005473696928675054,6.0,26,55,81
+TRM,428,408225,always,10,0,26041,5000,26041,5000,True,1.0,-0.0001697334519121796,-0.0032539297571929637,-0.004973843868356198,0.0,0.0,1,-0.049967799335718155,-0.05943189514800906,0.0,1,-0.010573127306997776,-0.01517125137615949,0.0,1,-0.01823418028652668,-0.03913901117630303,0.0,1,-0.022503649815917015,-0.030722251860424876,0.0,1,0.0025239726528525352,-0.0033392093027941883,2.0,1,0.019138392060995102,-0.03459054557606578,1.0,1,-0.0035274596884846687,-0.024732104619033635,0.0,1,0.01780106872320175,-0.009487057162914425,2.0,1,0.026262888684868813,-0.015410931431688368,1.0,1,-0.0001697334519121796,-0.0032539297571929637,0.0,26,55,81
+TRM,429,83851,always,10,0,26041,5000,26041,5000,True,1.0,0.00757557712495327,0.0008180353615898639,-0.0007574295887025073,5.0,0.009843876585364342,1,0.007254665717482567,-0.015852789510972798,1.0,1,0.000572166929487139,-0.008488480460073333,1.0,1,0.002709936583414674,-0.028811552067054436,1.0,1,-0.009968167170882225,-0.018557682167738676,0.0,1,0.005176571663469076,-0.006360583356581628,1.0,1,0.029880987480282784,-0.027609161012151162,2.0,1,-0.005193975288420916,-0.02470539609203115,0.0,1,0.024554111063480377,-0.009833996431552805,2.0,1,0.018910804763436317,-0.01856629876419902,1.0,1,0.00757557712495327,0.0008180353615898639,5.0,25,56,81
+TRM,430,194659,always,10,0,26041,5000,26041,5000,True,1.0,0.005548396613448858,0.0032425984973087907,0.002007143455557525,8.0,0.025940787978470325,1,0.009174790233373642,-0.014504992883303203,1.0,1,0.006223828997462988,-0.002921501567470841,3.0,1,-0.0038035952020436525,-0.035277733724797145,0.0,1,-0.01950429379940033,-0.023366103181615472,0.0,1,0.0014241687022149563,-0.005404979718150571,1.0,1,-0.0038073109462857246,-0.027893255813978612,0.0,1,-0.016059041023254395,-0.02487903437577188,0.0,1,0.02600812166929245,-0.012809786479920149,1.0,1,-0.024196628481149673,-0.026858732104301453,0.0,1,0.005548396613448858,0.0032425984973087907,8.0,27,54,81
+TRM,431,164035,flaky_then_stable,2,3,156246,30000,260410,50000,True,1.0,0.03872266411781311,0.0032817630417412147,-0.008968346868641675,3.0,0.06401432491838932,0,0.04048341140151024,0.022158396895974874,8.0,0,0.0680064782500267,0.04519075294956565,8.0,0,0.07516320049762726,0.057761083357036114,8.0,0,0.09672942012548447,0.059018710162490606,8.0,0,0.1283632069826126,0.06569681875407696,8.0,1,0.04064159840345383,0.012004694905044744,5.0,0,0.09067696332931519,0.03704991920676548,7.0,0,0.0989600345492363,0.04273096041288227,8.0,0,0.09594154357910156,0.04143481072969735,8.0,1,0.03872266411781311,0.0032817630417412147,3.0,24,57,81
+TRM,432,375254,transient_or_regressed,1,2,234369,45000,,,False,0.654321014881134,0.09896039962768555,0.052928615594282746,0.031327873934060335,8.0,0.42342892475426197,0,0.023432033136487007,-0.0011649000807665288,3.0,0,0.042142242193222046,0.026851769187487662,8.0,0,0.06864365935325623,0.04121951013803482,8.0,0,0.09010055661201477,0.06117228325456381,8.0,0,0.11255968362092972,0.06442233640700579,8.0,0,0.10668780654668808,0.06336134532466531,8.0,0,0.11284062266349792,0.053460417315363884,8.0,0,0.11500080674886703,0.0629667560569942,8.0,1,0.12260353565216064,0.037167572329053655,8.0,0,0.09896039962768555,0.052928615594282746,8.0,25,56,81
+TRM,433,284352,always,10,0,26041,5000,26041,5000,True,1.0,0.002214609645307064,-0.002749715175013989,-0.004405732324812561,1.0,0.002214609645307064,1,-0.04422974959015846,-0.050698216538876295,0.0,1,-0.009078293107450008,-0.013534310623072088,0.0,1,-0.006971996743232012,-0.03682735614711419,0.0,1,-0.013949946500360966,-0.024021183024160564,0.0,1,-0.0004256218671798706,-0.004906794230919331,0.0,1,0.06046198680996895,-0.029555869521573186,1.0,1,0.004194838460534811,-0.013986414618557319,2.0,1,0.022515051066875458,0.0009141657792497426,3.0,1,0.005615255795419216,-0.018161331652663648,1.0,1,0.002214609645307064,-0.002749715175013989,1.0,24,57,81
+TRM,434,368073,flaky_then_stable,9,2,26041,5000,260410,50000,True,1.0,0.004864344839006662,0.0004462085125851445,-0.0015657104813726619,5.0,0.010377251834142953,1,-0.04777252674102783,-0.05633418681100011,0.0,1,-0.0077603524550795555,-0.012852729414589703,0.0,1,-0.044431835412979126,-0.04830798972398043,0.0,1,-0.016928698867559433,-0.021839549066498876,0.0,1,-0.002453952794894576,-0.007663093652809039,0.0,1,0.04066003859043121,-0.029436422628350556,1.0,1,-0.0101936599239707,-0.02893002994824201,0.0,1,0.055335938930511475,-0.0052733854681719095,2.0,0,0.07105819135904312,0.045644074911251664,8.0,1,0.004864344839006662,0.0004462085125851445,5.0,24,57,81
+TRM,435,97808,flaky_then_stable,8,3,52082,10000,104164,20000,True,1.0,0.02492140419781208,0.0037850367025384912,-0.0041001561330631375,3.0,0.04681666358374059,0,0.060929350554943085,0.04638623958453536,8.0,1,0.001467366237193346,-0.006522558232973097,2.0,0,0.0934993177652359,0.06752467202022672,8.0,1,0.07085561007261276,0.03771418321412057,8.0,1,0.015761923044919968,0.0009827286266954616,3.0,1,0.04689996317028999,-0.020017681934405118,1.0,1,-0.0029767039231956005,-0.025415304058697075,0.0,1,0.02585025690495968,-0.01029512006243749,2.0,1,-0.0038811287377029657,-0.014349989738548175,0.0,1,0.02492140419781208,0.0037850367025384912,3.0,25,56,81
+TRM,436,364855,transient_or_regressed,1,2,234369,45000,,,False,0.654321014881134,0.11534734070301056,0.06379283405840397,0.04187346249818802,8.0,0.5103426724672318,0,0.07572776824235916,0.03351255669258535,8.0,0,0.05796675384044647,0.04247249010950327,8.0,0,0.08706925064325333,0.05773823568597436,8.0,0,0.0843699723482132,0.06361912284046412,8.0,0,0.11196844279766083,0.07079103915020823,8.0,0,0.1179790198802948,0.07243664748966694,8.0,0,0.09353204816579819,0.06070237047970295,8.0,0,0.12721313536167145,0.0695677655749023,8.0,1,0.07057271152734756,0.03722287155687809,8.0,0,0.11534734070301056,0.06379283405840397,8.0,22,59,81
+TRM,437,31772,stable_learned,9,1,52082,10000,52082,10000,True,1.0,0.01032335590571165,0.001266826688151923,-0.0010870377554965671,6.0,0.014805521015659906,0,0.05117577686905861,0.01906087969064174,6.0,1,-0.0027024918235838413,-0.009135927713941783,0.0,1,-0.004852385260164738,-0.03486575942952186,0.0,1,-0.0005581975565291941,-0.015207251541141886,0.0,1,0.006684368010610342,-0.0014565036126441555,2.0,1,0.051048170775175095,0.0027337624542269623,4.0,1,-0.016808228567242622,-0.027823040261864662,0.0,1,0.04087373986840248,-0.003769377013668418,2.0,1,0.004684756509959698,-0.02402539539616555,1.0,1,0.01032335590571165,0.001266826688151923,6.0,27,54,81
+TRM,438,112435,transient_or_regressed,3,6,130205,25000,,,False,0.654321014881134,0.10857926309108734,0.05545796500518918,0.03698432445526123,8.0,0.44366372004151344,0,0.00900996569544077,-0.006043546811270062,2.0,0,0.05493808537721634,0.0180554947728524,7.0,0,0.09138871729373932,0.04990687733516097,8.0,0,0.1101585328578949,0.059369307244196534,8.0,1,0.06297218799591064,0.03187073569279164,8.0,0,0.10359037667512894,0.059983350802212954,8.0,1,0.043856844305992126,-0.0008585311006754637,3.0,0,0.11701049655675888,0.06511833053082228,8.0,1,0.023377997800707817,-0.006335129728540778,2.0,0,0.10857926309108734,0.05545796500518918,8.0,26,55,81
+TRM,439,347431,always,10,0,26041,5000,26041,5000,True,1.0,0.0002916528610512614,-0.0018252162935823435,-0.0029112910851836205,1.0,0.0002916528610512614,1,-0.03818740323185921,-0.04612612444907427,0.0,1,-0.01303604245185852,-0.0179492081515491,0.0,1,-0.007938023656606674,-0.04110815143212676,0.0,1,0.0041743735782802105,-0.01244745886651799,1.0,1,0.004427288193255663,-0.0016176159988390282,3.0,1,0.04728216305375099,-0.020747622358612716,1.0,1,-0.015549136325716972,-0.026963302632793784,0.0,1,0.03374895453453064,-0.007646675279829651,2.0,1,0.01108516938984394,-0.01714024183456786,1.0,1,0.0002916528610512614,-0.0018252162935823435,1.0,24,57,81
+TRM,440,37706,always,10,0,26041,5000,26041,5000,True,1.0,0.013311759568750858,0.002433266410662327,-0.002820056746713817,4.0,0.030746358272153884,1,-0.040704067796468735,-0.047427186742424965,0.0,1,0.006392221432179213,-0.0029365290247369558,2.0,1,-0.025876211002469063,-0.04083052999339998,0.0,1,0.0159732885658741,-0.004433164867805317,2.0,1,0.018061840906739235,0.006177904462674633,6.0,1,0.03499453887343407,-0.022451122873462737,1.0,1,-0.007598654367029667,-0.026918636984191835,0.0,1,0.05898992717266083,0.006001873480272479,5.0,1,0.008508422411978245,-0.011924450896913186,2.0,1,0.013311759568750858,0.002433266410662327,4.0,25,56,81
+TRM,441,284263,always,10,0,26041,5000,26041,5000,True,1.0,0.00024316564667969942,-0.002853831378160976,-0.004209697479382157,1.0,0.00024316564667969942,1,-0.04089169204235077,-0.05102221528068185,0.0,1,-0.011840221472084522,-0.01655702164862305,0.0,1,-0.018518568947911263,-0.04151966073550284,0.0,1,0.0044288658536970615,-0.02065864490577951,1.0,1,-0.000916853139642626,-0.00714444800541969,0.0,1,0.018992189317941666,-0.03221888211555779,1.0,1,-0.02889212965965271,-0.030812626471742988,0.0,1,0.05864061787724495,-0.007667552737984806,1.0,1,0.014003964141011238,-0.016494850890012458,1.0,1,0.00024316564667969942,-0.002853831378160976,1.0,27,54,81
+TRM,442,300420,transient_or_regressed,8,3,26041,5000,,,False,0.6296296119689941,0.0863022580742836,0.04664188204333186,0.027973270043730736,8.0,0.3731350563466549,1,0.003910372499376535,-0.02330624993192032,1.0,0,0.07271801680326462,0.03580474015325308,8.0,1,-0.009703537449240685,-0.03910956927575171,0.0,1,-0.01643860712647438,-0.023686781292781234,0.0,1,0.002751694992184639,-0.003779497666982934,1.0,1,0.04226242005825043,-0.02849594410508871,1.0,1,0.01033511757850647,-0.02153844735585153,1.0,1,0.030357569456100464,-3.949696838390082e-05,3.0,1,0.08778879046440125,0.02836139258579351,8.0,0,0.0863022580742836,0.04664188204333186,8.0,23,58,81
+TRM,443,170881,always,10,0,26041,5000,26041,5000,True,1.0,0.0018191068666055799,-0.00034426889033056796,-0.0013638185628224164,2.0,0.0035899627255275846,1,-0.04245319217443466,-0.05024907225742936,0.0,1,-0.012995867989957333,-0.016966137452982366,0.0,1,0.01212850771844387,-0.03413176699541509,1.0,1,-0.014008543454110622,-0.021134088863618672,0.0,1,0.0006604851805604994,-0.006670360358839389,1.0,1,-0.017964709550142288,-0.02769254590384662,0.0,1,-0.02563159354031086,-0.030156415421515703,0.0,1,0.04779393598437309,-1.6469566617161036e-05,3.0,1,-0.010141623206436634,-0.020619046059437096,0.0,1,0.0018191068666055799,-0.00034426889033056796,2.0,25,56,81
+TRM,444,11959,always,10,0,26041,5000,26041,5000,True,1.0,0.0010244321310892701,-0.0019508847274209984,-0.0038781428011134267,1.0,0.0010244321310892701,1,0.008521963842213154,-0.009441151283681393,1.0,1,0.00720157939940691,-0.0019780973125307355,2.0,1,0.0016119565116241574,-0.024973163483082317,1.0,1,-0.01913520134985447,-0.022091788705438375,0.0,1,0.006736247334629297,-0.003978813896537758,1.0,1,0.027263207361102104,-0.03017514874227345,1.0,1,-0.009570073336362839,-0.021095350151881576,0.0,1,0.03008856065571308,-0.009947117825504392,2.0,1,-0.009009575471282005,-0.02137369755655527,0.0,1,0.0010244321310892701,-0.0019508847274209984,1.0,26,55,81
+TRM,445,229749,always,10,0,26041,5000,26041,5000,True,1.0,0.00480071734637022,-0.0025553706946084276,-0.005261368351057172,2.0,0.006702156621031463,1,-0.016250794753432274,-0.03459157096222043,0.0,1,-0.0007703551673330367,-0.008989071364339907,0.0,1,-0.003435707651078701,-0.03515703452285379,0.0,1,0.005135051440447569,-0.01047847411246039,1.0,1,0.0046736556105315685,-0.005977644228551071,2.0,1,0.044482357800006866,-0.014301189250545576,1.0,1,-0.016863273456692696,-0.02743109967559576,0.0,1,0.025380056351423264,-0.0005411607271526009,4.0,1,-0.013740369118750095,-0.020861750235781074,0.0,1,0.00480071734637022,-0.0025553706946084276,2.0,24,57,81
+TRM,446,95436,always,10,0,26041,5000,26041,5000,True,1.0,0.0005353681044653058,-0.0014993540571595076,-0.0027296480257064104,1.0,0.0005353681044653058,1,-0.051890622824430466,-0.055238939356058836,0.0,1,-0.013010785914957523,-0.016721861204132438,0.0,1,0.006235913373529911,-0.03943645895924419,1.0,1,0.013988344930112362,-0.01603530754800886,1.0,1,-0.0014113583602011204,-0.004790177888935432,0.0,1,0.00383996800519526,-0.03484281964483671,1.0,1,-0.01886100135743618,-0.026918309275060892,0.0,1,0.010074792429804802,-0.011113065120298415,2.0,1,0.022905923426151276,-0.017670514876954257,1.0,1,0.0005353681044653058,-0.0014993540571595076,1.0,27,54,81
+TRM,447,322889,always,10,0,26041,5000,26041,5000,True,1.0,0.0024633347056806087,0.000260648612481873,-0.0009451621749576589,3.0,0.005869595683179796,1,-0.03361108899116516,-0.044639091938734055,0.0,1,0.0041771079413592815,-0.004722540158581978,2.0,1,-0.008335067890584469,-0.02861573768313974,0.0,1,-0.007409415207803249,-0.016963395522907376,0.0,1,-0.0019347142660990357,-0.007062266187858768,0.0,1,0.007740243803709745,-0.020544306258670986,1.0,1,-9.438369306735694e-05,-0.02134157597538433,0.0,1,0.056559156626462936,0.0017066305445041507,4.0,1,0.020100168883800507,-0.01831796974875033,1.0,1,0.0024633347056806087,0.000260648612481873,3.0,29,52,81
+TRM,448,311286,always,10,0,26041,5000,26041,5000,True,1.0,0.00649089552462101,0.0001712462071736809,-0.002220955204393249,4.0,0.010253790474962443,1,-0.041934795677661896,-0.04745742678642273,0.0,1,-0.003896120237186551,-0.01270906362333335,0.0,1,-0.015512113459408283,-0.028912732028402388,0.0,1,-0.013892321847379208,-0.01921568892430514,0.0,1,0.015869565308094025,-0.000833104488265235,3.0,1,0.0375516340136528,-0.024561122176237404,1.0,1,0.007907571271061897,-0.018732953700236976,1.0,1,0.025954728946089745,-0.009137997607467696,1.0,1,0.05140658840537071,-0.011867606517625973,1.0,1,0.00649089552462101,0.0001712462071736809,4.0,24,57,81
+TRM,449,38334,always,10,0,26041,5000,26041,5000,True,1.0,0.0054345703683793545,0.0012113689408579376,-0.0009929193256539293,6.0,0.014378540887264535,1,-0.04965601861476898,-0.05288400501012802,0.0,1,-0.007144644856452942,-0.011476137675344944,0.0,1,-0.006912181153893471,-0.03973203501664102,0.0,1,0.0012121063191443682,-0.021880428394069895,1.0,1,-0.0019186132121831179,-0.005698976980056614,0.0,1,0.005548843648284674,-0.030966631544288248,1.0,1,-0.015696149319410324,-0.025244980584830046,0.0,1,0.0519501194357872,-0.002045568253379315,2.0,1,0.03003663942217827,-0.013552108022850007,1.0,1,0.0054345703683793545,0.0012113689408579376,6.0,26,55,81
+TRM,450,376109,always,10,0,26041,5000,26041,5000,True,1.0,0.005186494905501604,-0.0028837561258114874,-0.0057872062316164374,2.0,0.007473399862647057,1,-0.04376434162259102,-0.05184360919520259,0.0,1,-0.005943217780441046,-0.014018424495588988,0.0,1,-0.013037928380072117,-0.0364323538960889,0.0,1,-0.009355997666716576,-0.020750155672430992,0.0,1,0.008464992977678776,-0.002862158573407214,1.0,1,0.04341311752796173,-0.017035632394254208,1.0,1,0.006118869408965111,-0.02443798049353063,1.0,1,0.05884834751486778,-0.005758154962677509,2.0,1,0.02920180931687355,-0.01356255286373198,1.0,1,0.005186494905501604,-0.0028837561258114874,2.0,23,58,81
+TRM,451,373815,flaky_then_stable,8,4,26041,5000,260410,50000,True,1.0,0.04792865365743637,0.007537254755334288,-0.005400924215791747,3.0,0.08198079280555248,1,-0.023142067715525627,-0.040622194996103644,0.0,1,-0.006332521792501211,-0.013442453753668815,0.0,1,0.006276363041251898,-0.03032966546015814,1.0,0,0.11535511910915375,0.059384374879300594,8.0,1,0.0037590344436466694,-0.0028827978821936995,2.0,1,0.019185315817594528,-0.016150574840139598,1.0,1,0.1397288739681244,-0.006011061952449381,1.0,1,0.05377964675426483,-0.0025213677436113358,3.0,0,0.09581220895051956,0.03797477507032454,8.0,1,0.04792865365743637,0.007537254755334288,3.0,25,56,81
+TRM,452,365448,transient_or_regressed,1,2,156246,30000,,,False,0.7037037014961243,0.09172391146421432,0.05491004581563175,0.03657789761200547,8.0,0.439280366525054,0,0.007588749285787344,-0.00627495878143236,2.0,0,0.025053294375538826,0.011678237468004227,8.0,0,0.06607572734355927,0.0453305272385478,8.0,0,0.11439204216003418,0.06021668575704098,8.0,0,0.0988086387515068,0.06370983039960265,8.0,1,0.05819503962993622,0.029546294186729938,8.0,0,0.1356113851070404,0.063063885550946,8.0,0,0.10821457207202911,0.061972938710823655,8.0,0,0.11889052391052246,0.05288138147443533,8.0,0,0.09172391146421432,0.05491004581563175,8.0,23,58,81
+TRM,453,297325,never,0,0,,,,,False,0.6666666865348816,0.08939763158559799,0.05197731568478048,0.036390415858477354,8.0,0.41581852547824383,0,0.05189334973692894,0.013245520645796205,7.0,0,0.04932128265500069,0.0365708745084703,8.0,0,0.0971471518278122,0.0496173573192209,8.0,0,0.09686686843633652,0.062420050613582134,8.0,0,0.09727141261100769,0.05938897281885147,8.0,0,0.11000960320234299,0.06258479226380587,8.0,0,0.1072666347026825,0.05454318458214402,8.0,0,0.1082429364323616,0.06545522576197982,8.0,0,0.10572340339422226,0.05157346813939512,8.0,0,0.08939763158559799,0.05197731568478048,8.0,23,58,81
+TRM,454,2708,flaky_then_stable,9,2,26041,5000,156246,30000,True,1.0,0.03487630933523178,0.01008282536349725,-0.000714597903424874,5.0,0.0883904742076993,1,-0.010377652011811733,-0.029629931435920298,0.0,1,-0.006595180369913578,-0.012092158780433238,0.0,1,-0.011921106837689877,-0.03769251506309956,0.0,1,0.030166028067469597,-0.007429331890307367,1.0,0,0.10529311746358871,0.057877044193446636,8.0,1,0.011203276924788952,-0.0248045846237801,1.0,1,0.10188309848308563,-0.00664476677775383,1.0,1,0.06813007593154907,0.0299526798480656,8.0,1,0.0950067937374115,0.03755559881392401,8.0,1,0.03487630933523178,0.01008282536349725,5.0,26,55,81
+TRM,455,363270,transient_or_regressed,4,2,156246,30000,,,False,0.6790123581886292,0.07390633970499039,0.04237792990170419,0.020872787106782198,8.0,0.33902343921363354,0,0.025226255878806114,0.004082713654497638,4.0,0,0.050257083028554916,0.01940724899759516,7.0,0,0.07112648338079453,0.044920315500348806,8.0,0,0.07707773894071579,0.03584970848169178,8.0,0,0.1045147255063057,0.0648058820515871,8.0,1,0.05192837864160538,0.0061956145509611815,4.0,1,-0.018228817731142044,-0.03065771539695561,0.0,1,0.005501298233866692,-0.013177907298086211,1.0,1,0.008452491834759712,-0.01328965459833853,1.0,0,0.07390633970499039,0.04237792990170419,8.0,26,55,81
+TRM,456,17115,always,10,0,26041,5000,26041,5000,True,1.0,0.0014882978284731507,0.0003412590654079395,-0.0002649506377565558,4.0,0.003789875074289739,1,-0.0521753765642643,-0.05785448243841529,0.0,1,-0.009800001047551632,-0.014570640749298036,0.0,1,-0.0283488892018795,-0.03860798059031367,0.0,1,-0.022844862192869186,-0.028640897013247013,0.0,1,-0.0029414850287139416,-0.00500296094105579,0.0,1,-0.02343028597533703,-0.038627538830041885,0.0,1,-0.016120735555887222,-0.025398359866812825,0.0,1,0.007420929614454508,-0.013303394836839288,1.0,1,-0.01130466628819704,-0.02664989826735109,0.0,1,0.0014882978284731507,0.0003412590654079395,4.0,34,47,81
+TRM,457,142720,always,10,0,26041,5000,26041,5000,True,1.0,0.0047073704190552235,-0.001167133275885135,-0.0038019472849555314,3.0,0.006485476216766983,1,-0.030626993626356125,-0.04247822519391775,0.0,1,-0.008101747371256351,-0.013575066346675158,0.0,1,-0.009981905110180378,-0.0388139613205567,0.0,1,-0.010021624155342579,-0.020412038429640234,0.0,1,0.00010703218140406534,-0.008195610666916764,1.0,1,0.02493579126894474,-0.020753278164193034,1.0,1,-0.01863572746515274,-0.02861268841661513,0.0,1,0.009519322775304317,-0.011815883975941688,2.0,1,0.0026147770695388317,-0.014246446429751813,1.0,1,0.0047073704190552235,-0.001167133275885135,3.0,23,58,81
+TRM,458,314525,transient_or_regressed,5,6,52082,10000,,,False,0.5308641791343689,0.09290392696857452,0.049920098623260856,0.03229232179000974,8.0,0.39936078898608685,0,0.041205115616321564,0.01210037745477166,7.0,1,0.076031394302845,0.03415142570156604,8.0,0,0.07799220830202103,0.05058931699022651,8.0,1,0.005924959667026997,-0.009320159180788323,1.0,1,0.04807381331920624,0.023015051032416523,8.0,1,0.07996352761983871,0.019189338836440584,7.0,0,0.09822650253772736,0.04769981559365988,8.0,0,0.10054406523704529,0.050521114841103554,8.0,1,0.04960668832063675,0.0029063495458103716,3.0,0,0.09290392696857452,0.049920098623260856,8.0,24,57,81
+TRM,459,113202,always,10,0,26041,5000,26041,5000,True,1.0,0.0011679617455229163,-0.0009859159545158036,-0.0026584512525005266,3.0,0.002923567604739219,1,-0.0409981943666935,-0.05019959295168519,0.0,1,-0.004113034810870886,-0.011473953782115132,0.0,1,-0.004389447625726461,-0.0388904886203818,0.0,1,-0.008036427199840546,-0.02139694499783218,0.0,1,0.00424214918166399,-0.0027574216364882886,1.0,1,0.045190855860710144,-0.015846066526137292,1.0,1,-0.014371381141245365,-0.026876970776356757,0.0,1,0.04672712832689285,-0.003875569673255086,2.0,1,0.038671188056468964,-0.01358744956087321,1.0,1,0.0011679617455229163,-0.0009859159545158036,3.0,25,56,81
+TRM,460,161422,flaky_then_stable,7,3,52082,10000,182287,35000,True,1.0,0.007768354378640652,0.001278222198379808,-0.0004946102380927186,6.0,0.012498883952503093,0,0.04624471440911293,0.011968412552960217,6.0,1,0.0181821770966053,0.0008093183350865729,3.0,1,0.010527473874390125,-0.02809008432086557,1.0,1,0.0577923022210598,0.015327419961977284,6.0,0,0.11129328608512878,0.06278319796547294,8.0,0,0.10433343797922134,0.05333388992585242,8.0,1,-0.0039141373708844185,-0.02436721755657345,0.0,1,0.05881441757082939,0.0013369939988479018,2.0,1,0.01738322153687477,-0.002610030001960695,2.0,1,0.007768354378640652,0.001278222198379808,6.0,24,57,81
+TRM,461,379709,always,10,0,26041,5000,26041,5000,True,1.0,-6.714008486596867e-05,-0.0019347891911820625,-0.003268301719799638,0.0,0.0,1,-0.05006014183163643,-0.056740273255854845,0.0,1,-0.016791081055998802,-0.017881533596664667,0.0,1,-0.03753746300935745,-0.044845372438430786,0.0,1,-0.01669718511402607,-0.021955983713269234,0.0,1,-0.003806013148277998,-0.007407395401969552,0.0,1,0.024683300405740738,-0.03154684882611036,1.0,1,-0.019444763660430908,-0.029173634480684996,0.0,1,-0.003918591421097517,-0.018597072863485664,0.0,1,0.004270578734576702,-0.020052746403962374,1.0,1,-6.714008486596867e-05,-0.0019347891911820625,0.0,25,56,81
+TRM,462,392553,always,10,0,26041,5000,26041,5000,True,1.0,0.01125576812773943,-0.00036348114736028947,-0.0031972015858627856,3.0,0.01241107631358318,1,-0.035833317786455154,-0.045573749113827944,0.0,1,0.0020690809469670057,-0.007085604302119464,1.0,1,0.0034475908614695072,-0.03285007429076359,1.0,1,-0.015832284465432167,-0.019586520502343774,0.0,1,-0.0019398838048800826,-0.005634450921206735,0.0,1,-0.003540146630257368,-0.025046171562280506,0.0,1,0.00044332368997856975,-0.017408336549124215,1.0,1,0.020447472110390663,-0.006291886485996656,2.0,1,0.01573798432946205,-0.011932763358345255,2.0,1,0.01125576812773943,-0.00036348114736028947,3.0,26,55,81
+TRM,463,152224,always,10,0,26041,5000,26041,5000,True,1.0,0.00455563934519887,0.0014389879816008033,0.00044951559175387956,7.0,0.011803868066635914,1,-0.03947409242391586,-0.0539911394007504,0.0,1,-0.00406574085354805,-0.011071860091760755,0.0,1,-0.006036698818206787,-0.030592906987294555,0.0,1,-0.011151615530252457,-0.019553798250854015,0.0,1,0.00973132811486721,-0.00239686481654644,2.0,1,0.0402781218290329,-0.03223680052906275,1.0,1,-0.02308518812060356,-0.028914472786709666,0.0,1,0.013442537747323513,-0.010370947464252822,1.0,1,0.03419121354818344,-0.01669906359165907,1.0,1,0.00455563934519887,0.0014389879816008033,7.0,26,55,81
+TRM,464,416555,always,10,0,26041,5000,26041,5000,True,1.0,0.006219181232154369,0.0025760090466064867,0.0012805575315724127,8.0,0.020608072372851893,1,-0.05623546242713928,-0.06136343954131007,0.0,1,-0.011122730560600758,-0.014394161757081747,0.0,1,0.005621963180601597,-0.038472276530228555,1.0,1,-0.023074928671121597,-0.025792139815166593,0.0,1,0.0014954761136323214,-0.005562224130699178,1.0,1,0.04373835027217865,-0.02487818757072091,1.0,1,0.00650679599493742,-0.02252783824224025,1.0,1,0.012044848874211311,-0.011263590451562777,2.0,1,0.019497521221637726,-0.017818770720623434,1.0,1,0.006219181232154369,0.0025760090466064867,8.0,27,54,81
+TRM,465,33227,transient_or_regressed,4,2,78123,15000,,,False,0.7160493731498718,0.08467309176921844,0.03935854975134134,0.023864069022238255,8.0,0.31486839801073074,0,0.036065906286239624,0.010407663343357854,6.0,0,0.06785129010677338,0.042462193639948964,8.0,1,0.0005462511908262968,-0.0264886544609908,1.0,1,0.0403493270277977,0.01439010028843768,6.0,1,0.042605847120285034,0.008239695627707988,5.0,1,0.08847393095493317,0.03383721093996428,8.0,0,0.08254484832286835,0.04411512240767479,8.0,0,0.08883217722177505,0.04884590255096555,8.0,0,0.08791101723909378,0.04738900950178504,8.0,0,0.08467309176921844,0.03935854975134134,8.0,24,57,81
+TRM,466,284808,flaky_then_stable,9,2,26041,5000,78123,15000,True,1.0,0.006604431662708521,-0.0018752859614323825,-0.004961867758538574,2.0,0.008494560606777668,1,-0.012613613158464432,-0.029223339399322867,0.0,0,0.06288323551416397,0.04463634034618735,8.0,1,-0.010814585722982883,-0.036783069488592446,0.0,1,0.022184936329722404,-0.0040269716118928045,2.0,1,0.058509621769189835,0.02226814581081271,8.0,1,0.035064857453107834,-0.02417488396167755,1.0,1,0.004516165237873793,-0.02472983911866322,1.0,1,0.032030925154685974,-0.0003825786698143929,3.0,1,0.025506488978862762,-0.002711621200433001,3.0,1,0.006604431662708521,-0.0018752859614323825,2.0,24,57,81
+TRM,467,173117,always,10,0,26041,5000,26041,5000,True,1.0,0.0024945347104221582,-0.0046704426349606365,-0.006882040877826512,1.0,0.0024945347104221582,1,-0.021571995690464973,-0.03393005975522101,0.0,1,0.004927814472466707,-0.00655117975838948,2.0,1,0.020251277834177017,-0.022346297511830926,1.0,1,-0.015729330480098724,-0.019190689316019416,0.0,1,0.02872418239712715,0.005397315515438095,5.0,1,0.029123295098543167,-0.005314915790222585,2.0,1,-0.0012592837447300553,-0.021466263409820385,0.0,1,0.039085693657398224,0.006569939178007189,4.0,1,-0.008013310842216015,-0.01890625071246177,0.0,1,0.0024945347104221582,-0.0046704426349606365,1.0,24,57,81
+TRM,468,273737,always,10,0,26041,5000,26041,5000,True,1.0,0.0010841622715815902,-0.00037252201764204074,-0.0014801644283579662,4.0,0.002940481572295539,1,-0.0438021756708622,-0.05329418648034334,0.0,1,-0.003721417160704732,-0.011492354242363945,0.0,1,0.009459033608436584,-0.03555966168642044,1.0,1,-0.012705577537417412,-0.019439043942838907,0.0,1,0.003162912791594863,-0.006001156260026619,1.0,1,-0.0062436293810606,-0.03549563675187528,0.0,1,-0.012757114134728909,-0.02550855476874858,0.0,1,0.00048478631651960313,-0.017796809745050268,1.0,1,-0.008169104345142841,-0.024849834968335927,0.0,1,0.0010841622715815902,-0.00037252201764204074,4.0,28,53,81
+TRM,469,268973,flaky_then_stable,3,3,156246,30000,234369,45000,True,1.0,0.051789067685604095,0.023577963118441403,0.012191059300675988,8.0,0.18862370494753122,0,0.02182438224554062,0.006213856202521129,5.0,0,0.045476339757442474,0.03355131042189896,8.0,0,0.07928180694580078,0.05478570470586419,8.0,0,0.09182452410459518,0.06900768540799618,8.0,0,0.11252716928720474,0.06654205452650785,8.0,1,0.04301775246858597,-0.028684384655207396,1.0,0,0.11320974677801132,0.06525760283693671,8.0,0,0.10139621049165726,0.06398616265505552,8.0,1,0.04834384843707085,0.014843185024801642,7.0,1,0.051789067685604095,0.023577963118441403,8.0,21,60,81
+TRM,470,231302,always,10,0,26041,5000,26041,5000,True,1.0,0.005574757233262062,0.001190303075418342,-0.0008111384668154642,6.0,0.014873301901388913,1,-0.04345804080367088,-0.051747848745435476,0.0,1,-0.006668957881629467,-0.011611492838710546,0.0,1,-0.015269579365849495,-0.03868628875352442,0.0,1,-0.023330722004175186,-0.0253320992924273,0.0,1,0.006602107081562281,-0.0032935397612163797,1.0,1,-0.006003767251968384,-0.027222166769206524,0.0,1,-0.016527533531188965,-0.028157387394458055,0.0,1,0.03405698761343956,-0.011877942481078207,1.0,1,-0.01152564212679863,-0.023433399852365255,0.0,1,0.005574757233262062,0.001190303075418342,6.0,26,55,81
+TRM,471,283309,always,10,0,26041,5000,26041,5000,True,1.0,0.006938990671187639,0.0033742786217771936,0.0015984282727004029,8.0,0.02699422897421755,1,-0.049085333943367004,-0.05632653646171093,0.0,1,-0.010283208452165127,-0.012103328597731888,0.0,1,0.0013198226224631071,-0.03696030462742783,1.0,1,-0.004571885336190462,-0.014562667172867805,0.0,1,0.007017657160758972,-0.005974225525278598,1.0,1,0.05619971454143524,-0.01646038913168013,1.0,1,0.031430378556251526,-0.015943478560075164,1.0,1,0.017224382609128952,-0.007832249568309635,2.0,1,-0.014230942353606224,-0.018339881440624595,0.0,1,0.006938990671187639,0.0033742786217771936,8.0,26,55,81
+TRM,472,314697,always,10,0,26041,5000,26041,5000,True,1.0,0.0009692724561318755,-0.00010277143519488163,-0.0008355467434739694,4.0,0.0025200154923368245,1,-0.0479317270219326,-0.053292674012482166,0.0,1,0.002519897185266018,-0.0076181706972420216,1.0,1,-0.0018165226792916656,-0.03474273862957489,0.0,1,-0.00971985049545765,-0.017759318347088993,0.0,1,-0.003475119825452566,-0.007620528631377965,0.0,1,-0.020571622997522354,-0.034649570705369115,0.0,1,-0.009164893999695778,-0.02657059533521533,0.0,1,0.06495463103055954,-0.005955629050731659,1.0,1,-0.0019892905838787556,-0.021193821274209768,0.0,1,0.0009692724561318755,-0.00010277143519488163,4.0,27,54,81
+TRM,473,343431,transient_or_regressed,6,4,78123,15000,,,False,0.7037037014961243,0.1193062961101532,0.050829126266762614,0.028973497916013002,8.0,0.4066330101341009,0,0.008776395581662655,-0.005242784565780312,2.0,0,0.0359828807413578,0.01976770395413041,8.0,1,0.007724560797214508,-0.023985949577763677,1.0,0,0.09637284278869629,0.05641550663858652,8.0,1,0.04130624979734421,0.010307114018360153,6.0,1,0.07967421412467957,0.02342521885293536,7.0,1,0.016536608338356018,-0.02177044248674065,1.0,1,0.05756388604640961,0.006165645754663274,3.0,1,0.030839525163173676,1.804772182367742e-05,3.0,0,0.1193062961101532,0.050829126266762614,8.0,25,56,81
+TRM,474,15145,always,10,0,26041,5000,26041,5000,True,1.0,0.004577792249619961,-0.001811627960705664,-0.004646043409593403,3.0,0.005391239479649812,1,-0.0017448123544454575,-0.015339216217398643,0.0,1,0.02638697624206543,0.011034289258532226,8.0,1,-0.000980433658696711,-0.015312529882066883,0.0,1,0.01726856641471386,-0.004783150157891214,2.0,1,0.01964207924902439,0.0033250854758080095,5.0,1,0.0221081729978323,-0.0267889789538458,1.0,1,-0.008623594418168068,-0.022857665782794356,0.0,1,0.029385728761553764,-0.00028225033020135015,3.0,1,0.0577823743224144,0.001528946973849088,3.0,1,0.004577792249619961,-0.001811627960705664,3.0,26,55,81
+TRM,475,77340,always,10,0,26041,5000,26041,5000,True,1.0,-0.002219460206106305,-0.0034498944296501577,-0.004352317948359996,0.0,0.0,1,0.05379185453057289,0.024950292252469808,8.0,1,0.025773977860808372,0.0034204482562927296,5.0,1,-0.017417993396520615,-0.034738068003207445,0.0,1,-0.014227191917598248,-0.022305667982436717,0.0,1,0.004223444499075413,-0.005537925084354356,1.0,1,0.04791168496012688,-0.01976746169384569,1.0,1,0.0004011734272353351,-0.02412234797520796,1.0,1,0.02589544840157032,-0.00725040229735896,1.0,1,0.0030157754663378,-0.018528484361013398,1.0,1,-0.002219460206106305,-0.0034498944296501577,0.0,25,56,81
+TRM,476,364763,always,10,0,26041,5000,26041,5000,True,1.0,0.0061709461733698845,0.0002631318729982013,-0.0019331702933413908,4.0,0.009837736157351173,1,-0.034079670906066895,-0.043676900677382946,0.0,1,0.0034454285632818937,-0.006787536985939369,1.0,1,-0.013137810863554478,-0.032963998964987695,0.0,1,0.0028262611012905836,-0.017787316726753488,1.0,1,0.010014263913035393,-0.00361265245010145,1.0,1,0.034520119428634644,-0.026177294086664915,1.0,1,-0.015724509954452515,-0.026263318955898285,0.0,1,0.03455487638711929,-0.007274867675732821,1.0,1,-0.012159603647887707,-0.024769911193288863,0.0,1,0.0061709461733698845,0.0002631318729982013,4.0,26,55,81
+TRM,477,50372,flaky_then_stable,9,2,26041,5000,78123,15000,True,1.0,0.006361488252878189,0.000670909350446891,-0.0016953813174040988,5.0,0.013057573407422751,1,0.023131929337978363,0.006003938713547541,6.0,0,0.08954158425331116,0.06059651914983988,8.0,1,0.00933857448399067,-0.030450470512732863,1.0,1,-0.014146042987704277,-0.017457804176956415,0.0,1,0.010431554168462753,-0.0024438390537397936,2.0,1,0.021952219307422638,-0.009466700226766989,1.0,1,-0.0038526626303792,-0.02339256403502077,0.0,1,0.02358846925199032,-0.013886720524169505,1.0,1,0.019929829984903336,-0.007402864983305335,2.0,1,0.006361488252878189,0.000670909350446891,5.0,29,52,81
+TRM,478,244494,always,10,0,26041,5000,26041,5000,True,1.0,0.038373298943042755,0.001252154092071578,-0.00742816599085927,3.0,0.04405994270928204,1,0.013809561729431152,-0.004500906652538106,3.0,1,-0.010935126803815365,-0.014149329508654773,0.0,1,0.008844511583447456,-0.030943469842895865,1.0,1,-0.009900997392833233,-0.01777708251029253,0.0,1,0.019896309822797775,0.005214829841861501,4.0,1,0.015403985977172852,-0.02312558062840253,1.0,1,0.013423834927380085,-0.013066839252132922,1.0,1,0.06585477292537689,0.01250013399112504,5.0,1,-0.006413077469915152,-0.01751851406879723,0.0,1,0.038373298943042755,0.001252154092071578,3.0,24,57,81
+TRM,479,263215,always,10,0,26041,5000,26041,5000,True,1.0,0.004368945490568876,0.002181379371904768,0.00010105458204634488,6.0,0.02059272979386151,1,-0.034497540444135666,-0.04357111779972911,0.0,1,0.004746122285723686,-0.010277803987264633,1.0,1,0.002987830201163888,-0.032521201035706326,1.0,1,0.001784458407200873,-0.01550545338250231,1.0,1,0.010070890188217163,-0.004162234850809909,2.0,1,0.00769222155213356,-0.01955917727900669,1.0,1,-0.016768835484981537,-0.0252099868375808,0.0,1,0.004191887564957142,-0.012028559307509568,1.0,1,0.023705115541815758,-0.020985129987820983,1.0,1,0.004368945490568876,0.002181379371904768,6.0,26,55,81
+TRM,480,244179,always,10,0,26041,5000,26041,5000,True,1.0,0.007014677859842777,-0.0005230155184108298,-0.0037254374765325338,4.0,0.010717625758843496,1,-0.04048796743154526,-0.04731117282062769,0.0,1,0.02812650054693222,0.0071320119823212735,6.0,1,0.023860838264226913,-0.02256913436576724,1.0,1,0.02535279653966427,-0.0009656801412347704,3.0,1,0.038328781723976135,0.0103179209982045,6.0,1,0.045669205486774445,-0.016527554136700928,1.0,1,0.02950638346374035,-0.002792961400700733,3.0,1,0.02486964873969555,-0.007827380992239341,3.0,1,0.0020070490427315235,-0.016549319901969284,1.0,1,0.007014677859842777,-0.0005230155184108298,4.0,24,57,81
+TRM,481,322721,never,0,0,,,,,False,0.6913580298423767,0.09101496636867523,0.04778529843315482,0.029996884986758232,8.0,0.38228238746523857,0,0.05521472543478012,0.03102178464177996,8.0,0,0.07455668598413467,0.04503428260795772,8.0,0,0.07694581896066666,0.05260100495070219,8.0,0,0.10328400880098343,0.06257005129009485,8.0,0,0.0881517305970192,0.06082749832421541,8.0,0,0.09604080766439438,0.058564677368849516,8.0,0,0.0852913111448288,0.04675864428281784,8.0,0,0.08926977962255478,0.05264259874820709,8.0,0,0.09235787391662598,0.05323586915619671,8.0,0,0.09101496636867523,0.04778529843315482,8.0,26,55,81
+TRM,482,153835,always,10,0,26041,5000,26041,5000,True,1.0,0.010344092734158039,-0.0018305493576917797,-0.0060853330651298165,2.0,0.016952288802713156,1,-0.04415018483996391,-0.052047706209123135,0.0,1,-0.006181615404784679,-0.01304897980298847,0.0,1,-0.0028897819574922323,-0.03574537017266266,0.0,1,-0.009260877035558224,-0.015631545218639076,0.0,1,0.007418051362037659,-0.003867178747896105,2.0,1,0.03303636983036995,-0.018182098341640085,1.0,1,-0.019978394731879234,-0.02848920808173716,0.0,1,0.009333649650216103,-0.013269875198602676,1.0,1,-0.002999362302944064,-0.02022731301258318,0.0,1,0.010344092734158039,-0.0018305493576917797,2.0,26,55,81
+TRM,483,180158,always,10,0,26041,5000,26041,5000,True,1.0,0.0015000997809693217,-0.0005355015673558228,-0.0020668521901825443,4.0,0.003983396221883595,1,-0.05625559017062187,-0.061219198163598776,0.0,1,-0.006949665490537882,-0.013548929418902844,0.0,1,-0.007763419765979052,-0.04001656227046624,0.0,1,-0.020821062847971916,-0.024332925211638212,0.0,1,-0.006119043566286564,-0.009040338452905416,0.0,1,0.030984103679656982,-0.034833747893571854,1.0,1,0.003206721507012844,-0.021814160398207605,1.0,1,0.03561671823263168,-0.010367663693614304,1.0,1,0.05592885985970497,-0.013052912661805749,1.0,1,0.0015000997809693217,-0.0005355015673558228,4.0,26,55,81
+TRM,484,359197,stable_learned,2,1,234369,45000,234369,45000,True,1.0,0.010808789171278477,0.0013247043170849793,-0.004358029822469689,4.0,0.02802975382655859,0,0.05381006374955177,0.01112469521467574,6.0,0,0.056688342243433,0.0381123055703938,8.0,0,0.07354415208101273,0.04730395320802927,8.0,0,0.09928083419799805,0.054080446949228644,8.0,0,0.11158983409404755,0.06580772483721375,8.0,0,0.09896364063024521,0.06305207731202245,8.0,0,0.09689902514219284,0.05380334402434528,8.0,0,0.10391002893447876,0.05826687486842275,8.0,1,0.0783921629190445,0.00999627445708029,4.0,1,0.010808789171278477,0.0013247043170849793,4.0,25,56,81
+TRM,485,251008,never,0,0,,,,,False,0.6419752836227417,0.11941582709550858,0.04253455129219219,0.018755186698399484,8.0,0.3402764103375375,0,0.05154622346162796,0.004758205497637391,4.0,0,0.07217729091644287,0.037844630191102624,8.0,0,0.10171724855899811,0.04239038843661547,8.0,0,0.09583091735839844,0.04304013168439269,8.0,0,0.10740122199058533,0.059054602868855,8.0,0,0.09222583472728729,0.042338059982284904,8.0,0,0.10706383734941483,0.04749180795624852,8.0,0,0.09868504106998444,0.04444837290793657,8.0,0,0.11560356616973877,0.04008273337967694,8.0,0,0.11941582709550858,0.04253455129219219,8.0,26,55,81
+TRM,486,162158,always,10,0,26041,5000,26041,5000,True,1.0,0.015340541489422321,-0.002200453498517163,-0.006666725152172148,2.0,0.016709488234482706,1,-0.04038750007748604,-0.04692934546619654,0.0,1,-0.008668131195008755,-0.014248066348955035,0.0,1,-0.002574140438809991,-0.03769912643474527,0.0,1,-0.007120998110622168,-0.014362178917508572,0.0,1,0.005582502577453852,-0.006921041174791753,1.0,1,0.06633278727531433,-0.01164004672318697,1.0,1,-0.0021438063122332096,-0.026880233141127974,0.0,1,0.010831018909811974,-0.01405084496946074,1.0,1,0.0051970393396914005,-0.01433926698518917,1.0,1,0.015340541489422321,-0.002200453498517163,2.0,25,56,81
+TRM,487,268037,stable_learned,8,1,78123,15000,78123,15000,True,1.0,0.004175924696028233,-1.6513107766513713e-05,-0.0022041567899577785,4.0,0.008684522297699004,0,0.0648832693696022,0.02862039441242814,8.0,0,0.09048161655664444,0.05790000921115279,8.0,1,-0.006049808580428362,-0.03338220372097567,0.0,1,0.1151004508137703,0.05257176165468991,8.0,1,-0.0006733923801220953,-0.010019508488767315,0.0,1,0.031563930213451385,-0.020806401502341032,1.0,1,0.026476452127099037,-0.022541496204212308,1.0,1,0.0029409138951450586,-0.011005463864421472,1.0,1,-0.01825009472668171,-0.024121364578604698,0.0,1,0.004175924696028233,-1.6513107766513713e-05,4.0,27,54,81
+TRM,488,414236,always,10,0,26041,5000,26041,5000,True,1.0,0.003006597748026252,-0.0011259312159381807,-0.0033838107774499804,3.0,0.005619822186417878,1,-0.034028198570013046,-0.04479916766285896,0.0,1,-0.008795595727860928,-0.014344685478135943,0.0,1,-0.03627592697739601,-0.04325662134215236,0.0,1,-0.010945137590169907,-0.019087783293798566,0.0,1,-0.004100518301129341,-0.008021431334782392,0.0,1,-0.0003989792603533715,-0.027999410489428556,0.0,1,0.13412445783615112,-0.0025933844735845923,1.0,1,0.07238174229860306,0.0005380421062000096,3.0,1,-0.005355730652809143,-0.01959722605533898,0.0,1,0.003006597748026252,-0.0011259312159381807,3.0,26,55,81
+TRM,489,250967,never,0,0,,,,,False,0.5925925970077515,0.11279480904340744,0.03384228685172275,0.010388937895186245,8.0,0.270738294813782,0,0.06290607154369354,0.027331398508977145,8.0,0,0.06620709598064423,0.045964693650603294,8.0,0,0.08133847266435623,0.059282911475747824,8.0,0,0.08727527409791946,0.06052873143926263,8.0,0,0.1165764331817627,0.05459740245714784,8.0,0,0.09950552880764008,0.05598694738000631,8.0,0,0.09805479645729065,0.04026087449165061,8.0,0,0.12169718742370605,0.050225167302414775,8.0,0,0.11068753898143768,0.048950561322271824,8.0,0,0.11279480904340744,0.03384228685172275,8.0,24,57,81
+TRM,490,342135,flaky_then_stable,9,2,26041,5000,182287,35000,True,1.0,0.029281556606292725,0.00428073820512509,-0.003855167466099374,4.0,0.049666575505398214,1,-0.0017410103464499116,-0.015413443339639343,0.0,1,0.023674815893173218,0.005086032393592177,6.0,1,0.009998642839491367,-0.01885876606684178,1.0,1,0.03193454444408417,0.0016704718873370439,3.0,1,0.03850783407688141,0.011546555608219933,6.0,0,0.11002673953771591,0.06564380740746856,8.0,1,0.06829003989696503,-0.009828298818320036,1.0,1,0.02726493589580059,-0.004250214318744838,2.0,1,0.0054685077629983425,-0.01465915417065844,1.0,1,0.029281556606292725,0.00428073820512509,4.0,23,58,81
+TRM,491,59002,transient_or_regressed,2,2,208328,40000,,,False,0.654321014881134,0.10300592333078384,0.04941460047848523,0.028231581207364798,8.0,0.3953168038278818,0,0.026967855170369148,0.007610827364260331,5.0,0,0.05912793427705765,0.039014271926134825,8.0,0,0.08890599757432938,0.05520414933562279,8.0,0,0.08846702426671982,0.05715789832174778,8.0,0,0.0946776270866394,0.05814557895064354,8.0,0,0.11127281188964844,0.07354941638186574,8.0,0,0.0976722240447998,0.055380382807925344,8.0,1,0.04938139766454697,0.015330973714299034,8.0,1,0.05719976872205734,0.016100346081657335,7.0,0,0.10300592333078384,0.04941460047848523,8.0,23,58,81
+TRM,492,338778,always,10,0,26041,5000,26041,5000,True,1.0,0.00026338742463849485,-0.0013323408206815657,-0.0026603648148011416,3.0,0.00042762490556924604,1,-0.048210714012384415,-0.05265455739572644,0.0,1,-0.012428388930857182,-0.01599185843952,0.0,1,-0.010953483171761036,-0.04160660330671817,0.0,1,-0.006421331781893969,-0.021551352052483708,0.0,1,0.002523205941542983,-0.0029089865420246497,1.0,1,0.03969716280698776,-0.020669668447226286,1.0,1,-0.0008163060410879552,-0.024218835278588813,0.0,1,-0.0018809579778462648,-0.015784346469445154,0.0,1,-0.017609892413020134,-0.02443573158234358,0.0,1,0.00026338742463849485,-0.0013323408206815657,3.0,24,57,81
+TRM,493,420469,flaky_then_stable,4,6,26041,5000,260410,50000,True,1.0,0.004470453597605228,-0.0016707468021195382,-0.004347121401224285,2.0,0.008722423110157251,1,-0.036880429834127426,-0.04987143212929368,0.0,0,0.04018985852599144,0.014361564830323914,7.0,0,0.09560562670230865,0.046114277094602585,8.0,1,0.08611240983009338,0.043309652828611434,8.0,0,0.09738998115062714,0.050136503065004945,8.0,0,0.08902180194854736,0.04452458722516894,8.0,1,0.10769711434841156,-0.006541836162796244,2.0,0,0.09393936395645142,0.05950266122817993,8.0,0,0.08564276248216629,0.04796285764314234,8.0,1,0.004470453597605228,-0.0016707468021195382,2.0,26,55,81
+TRM,494,307975,flaky_then_stable,7,4,26041,5000,156246,30000,True,1.0,0.09982780367136002,0.05300452234223485,0.030071466229856014,8.0,0.4240361787378788,1,-0.04407041519880295,-0.05038309656083584,0.0,0,0.06350431591272354,0.037542745703831315,8.0,0,0.09824986755847931,0.06183205312117934,8.0,1,0.046559594571590424,0.02336977343657054,8.0,0,0.10250163078308105,0.06088842498138547,8.0,1,0.03280675783753395,-0.02516582189127803,1.0,1,0.009983801282942295,-0.022901668795384467,1.0,1,-0.005493888631463051,-0.015182747622020543,0.0,1,-0.002052517142146826,-0.016319743648637086,0.0,1,0.09982780367136002,0.05300452234223485,8.0,23,58,81
+TRM,495,377141,always,10,0,26041,5000,26041,5000,True,1.0,0.002784288953989744,-0.0009536357611068524,-0.002022376866079867,1.0,0.002784288953989744,1,-0.05086081847548485,-0.054795388132333755,0.0,1,-0.0022917662281543016,-0.012684010929660872,0.0,1,0.006164990831166506,-0.031330016849096864,1.0,1,-0.009643043391406536,-0.02088868257123977,0.0,1,-0.0005225521745160222,-0.00594595649454277,0.0,1,0.026391418650746346,-0.022889806277817115,1.0,1,0.00027566487551666796,-0.024310475808306364,1.0,1,0.055689625442028046,0.001897490641567856,2.0,1,0.004642258398234844,-0.017290487652644515,1.0,1,0.002784288953989744,-0.0009536357611068524,1.0,25,56,81
+TRM,496,63475,flaky_then_stable,3,3,78123,15000,260410,50000,True,1.0,0.02016642689704895,0.0017060208920156583,-0.005334227374987677,4.0,0.034985076636075974,0,0.030471688136458397,-0.0030551676245522685,2.0,0,0.0557452067732811,0.02229649410583079,8.0,1,0.01771186850965023,-0.035132332937791944,1.0,1,0.01212382223457098,-0.007951020321343094,1.0,0,0.10083380341529846,0.053186543518677354,8.0,0,0.10636328905820847,0.04880914045497775,8.0,0,0.07590638846158981,0.03820945485495031,8.0,0,0.11242757737636566,0.05745100253261626,8.0,0,0.09442195296287537,0.04027324577327818,8.0,1,0.02016642689704895,0.0017060208920156583,4.0,26,55,81
+TRM,497,405054,always,10,0,26041,5000,26041,5000,True,1.0,-0.0007717959815636277,-0.002568144213000778,-0.003957754641305655,0.0,0.0,1,-0.04346076771616936,-0.050300687085837126,0.0,1,-0.013608316890895367,-0.01619759015738964,0.0,1,-0.045756541192531586,-0.0468636117875576,0.0,1,-0.00847890879958868,-0.022818102384917438,0.0,1,0.016336975619196892,-0.0011945345686399378,1.0,1,0.004896942060440779,-0.03599190787645057,1.0,1,-0.019215408712625504,-0.027977736201137304,0.0,1,0.02524149976670742,-0.012104407302103937,1.0,1,-0.00096030009444803,-0.017151210966403596,0.0,1,-0.0007717959815636277,-0.002568144213000778,0.0,25,56,81
+TRM,498,413485,transient_or_regressed,1,2,234369,45000,,,False,0.6666666865348816,0.09640975296497345,0.04104899114463478,0.020443646470084786,8.0,0.32839192915707827,0,0.03406860679388046,0.013997862879477907,7.0,0,0.06388293951749802,0.026937215705402195,8.0,0,0.07850410044193268,0.04342737281695008,8.0,0,0.07596229761838913,0.04351435718126595,8.0,0,0.10601796954870224,0.04711763048544526,8.0,0,0.09018966555595398,0.04508455377072096,8.0,0,0.07730361819267273,0.03396518784575164,8.0,0,0.09895096719264984,0.04359021130949259,8.0,1,0.0827859416604042,0.03653362131444737,8.0,0,0.09640975296497345,0.04104899114463478,8.0,25,56,81
+TRM,499,154260,always,10,0,26041,5000,26041,5000,True,1.0,0.003391074016690254,0.0013328791028470732,0.00045525871973950416,7.0,0.01130887703038752,1,-0.039754096418619156,-0.048790226224809885,0.0,1,-0.008489749394357204,-0.013007326982915401,0.0,1,-0.018451552838087082,-0.03937719948589802,0.0,1,-0.011540839448571205,-0.02224881504662335,0.0,1,-0.001912343897856772,-0.004749233819893561,0.0,1,-0.003388076787814498,-0.03453215767513029,0.0,1,-0.01997007243335247,-0.026978616137057543,0.0,1,0.030942749232053757,-0.008214039233280346,2.0,1,0.021064074710011482,-0.014198383752955124,1.0,1,0.003391074016690254,0.0013328791028470732,7.0,28,53,81
+TRM,500,320091,always,10,0,26041,5000,26041,5000,True,1.0,0.0039006315637379885,-0.0035233708331361413,-0.006885038455948234,2.0,0.006258235080167651,1,-0.018807290121912956,-0.03685919591225684,0.0,1,0.0015478156274184585,-0.00524109773687087,3.0,1,-0.01029868982732296,-0.033158798003569245,0.0,1,0.00407696608453989,-0.013914232142269611,1.0,1,0.013138114474713802,-0.0035522719990694895,2.0,1,-0.00471331225708127,-0.025333174795378,0.0,1,-0.021492859348654747,-0.029064158210530877,0.0,1,-0.01412171870470047,-0.018654562416486442,0.0,1,-0.008064205758273602,-0.01837571884971112,0.0,1,0.0039006315637379885,-0.0035233708331361413,2.0,17,64,81
+TRM,501,77218,always,10,0,26041,5000,26041,5000,True,1.0,0.02510039322078228,0.0010353960387874395,-0.006215571658685803,4.0,0.03314545494504273,1,-0.030787764117121696,-0.0396597960498184,0.0,1,-0.00410970114171505,-0.011222947621718049,0.0,1,0.014962023124098778,-0.028121797600761056,1.0,1,-0.0030098820570856333,-0.01719512339332141,0.0,1,-0.006723834201693535,-0.010980640538036823,0.0,1,0.03933965042233467,-0.022060182644054294,1.0,1,-0.018928037956357002,-0.028749049408361316,0.0,1,0.08755988627672195,0.0066016729979310185,2.0,1,0.011375844478607178,-0.015647594584152102,1.0,1,0.02510039322078228,0.0010353960387874395,4.0,26,55,81
+TRM,502,135121,always,10,0,26041,5000,26041,5000,True,1.0,0.002264054026454687,-0.0035209155721531715,-0.005540212267078459,2.0,0.002391574700595811,1,-0.04436060041189194,-0.04854131955653429,0.0,1,-0.014798326417803764,-0.01665181654971093,0.0,1,0.000544271373655647,-0.03538270167337032,1.0,1,0.0004994375631213188,-0.016433106269687414,1.0,1,0.0036075920797884464,-0.003937956935260445,1.0,1,0.023381870239973068,-0.022190447780303657,1.0,1,-0.013398888520896435,-0.02428496757056564,0.0,1,0.012626790441572666,-0.008825067081488669,2.0,1,-0.012692851945757866,-0.02196434955112636,0.0,1,0.002264054026454687,-0.0035209155721531715,2.0,23,58,81
+TRM,503,338595,flaky_then_stable,3,3,156246,30000,234369,45000,True,1.0,0.017013149335980415,-0.0007197200757218525,-0.004581973305903375,2.0,0.018573153181932867,0,0.003677265951409936,-0.0028944655059603974,2.0,0,0.024675367400050163,0.015463832882232964,8.0,0,0.04344142600893974,0.03395181428641081,8.0,0,0.09629151225090027,0.05993780354037881,8.0,0,0.11353182047605515,0.08422110415995121,8.0,1,0.05295740067958832,0.004303728185959699,4.0,0,0.12568816542625427,0.06985079497098923,8.0,0,0.1204335168004036,0.07426035962998867,8.0,1,0.015273351222276688,-0.009946074686013162,2.0,1,0.017013149335980415,-0.0007197200757218525,2.0,24,57,81
+TRM,504,223626,always,10,0,26041,5000,26041,5000,True,1.0,0.004548605997115374,0.002417385503576952,0.0012821876116504427,7.0,0.01945825864095241,1,-0.044835563749074936,-0.053007543087005615,0.0,1,-0.013814040459692478,-0.01677195227239281,0.0,1,-0.03335157036781311,-0.0405858363956213,0.0,1,-0.01660507172346115,-0.02360890037380159,0.0,1,-0.0005011611501686275,-0.00392037662822986,0.0,1,0.033989228308200836,-0.023691783077083528,1.0,1,0.0011264487402513623,-0.022582591263926588,1.0,1,0.03885851055383682,-0.0055624323431402445,2.0,1,0.012492140755057335,-0.0189550316426903,1.0,1,0.004548605997115374,0.002417385503576952,7.0,27,54,81
+TRM,505,399325,flaky_then_stable,7,6,26041,5000,208328,40000,True,1.0,0.019192544743418694,-0.0007967020646901801,-0.005720265326090157,2.0,0.021035300218500197,1,0.0038084175903350115,-0.010084833578730468,1.0,0,0.08198720216751099,0.0453348804730922,8.0,1,-0.0006536035216413438,-0.01582433265139116,0.0,0,0.09160551428794861,0.06168452510610223,8.0,1,0.026295023038983345,0.009500965672486927,7.0,1,0.04457192122936249,-0.020018232753500342,1.0,0,0.09772138297557831,0.054192403331398964,8.0,1,0.035156723111867905,-0.004795977962203324,2.0,1,0.016369571909308434,-0.0019274768310424406,3.0,1,0.019192544743418694,-0.0007967020646901801,2.0,25,56,81
+TRM,506,316235,transient_or_regressed,2,4,156246,30000,,,False,0.6296296119689941,0.10283684730529785,0.05770305497571826,0.040812538005411625,8.0,0.4616244398057461,0,0.016031472012400627,0.00890950977918692,8.0,0,0.024581285193562508,0.013466166681610048,8.0,0,0.07115685194730759,0.04729582974687219,8.0,0,0.0983378142118454,0.06258463300764561,8.0,0,0.10950490832328796,0.06361038517206907,8.0,1,0.08404611051082611,0.040424798033200204,8.0,0,0.12703371047973633,0.06126392539590597,8.0,0,0.1199335977435112,0.0681708948686719,8.0,1,0.09061505645513535,0.03801567922346294,8.0,0,0.10283684730529785,0.05770305497571826,8.0,24,57,81
+TRM,507,30545,flaky_then_stable,4,3,130205,25000,208328,40000,True,1.0,0.11057506501674652,0.0402694531949237,0.014140337007120252,8.0,0.3221556255593896,0,-0.0040850453078746796,-0.010266376077197492,0.0,0,0.01541012804955244,0.0028440053647500463,5.0,0,0.061146363615989685,0.04120453493669629,8.0,0,0.11064200103282928,0.06994136981666088,8.0,1,0.05363970994949341,0.02225823054322973,7.0,0,0.13128280639648438,0.07031682040542364,8.0,0,0.13005582988262177,0.05636669183149934,8.0,1,0.07572892308235168,0.029768679582048208,8.0,1,0.019560985267162323,-0.006240257935132831,2.0,1,0.11057506501674652,0.0402694531949237,8.0,25,56,81
+TRM,508,3498,flaky_then_stable,6,5,52082,10000,260410,50000,True,1.0,0.007768663112074137,-0.002116965057211928,-0.0067469177301973104,2.0,0.015459613874554634,0,0.041977472603321075,0.014243191268178634,6.0,1,0.010056219063699245,-0.005479984043631703,2.0,0,0.07000593096017838,0.04540069797076285,8.0,0,0.09691918641328812,0.056635212153196335,8.0,1,0.011972017586231232,-0.007447981217410415,1.0,1,0.06774210184812546,0.01854715206718538,6.0,1,0.09087501466274261,0.03253762460371945,8.0,1,0.09745647758245468,0.03942023287527263,8.0,0,0.11910249292850494,0.05240385443903506,8.0,1,0.007768663112074137,-0.002116965057211928,2.0,22,59,81
+TRM,509,205267,transient_or_regressed,9,1,26041,5000,,,False,0.6913580298423767,0.12383750826120377,0.05870254198089242,0.03322139289230108,8.0,0.46962033584713936,1,-0.03785260394215584,-0.04684191709384322,0.0,1,0.029701968654990196,0.010758980126411188,7.0,1,-0.0008571745129302144,-0.0257719981163973,0.0,1,0.0005440515815280378,-0.015375606548332144,1.0,1,-0.0029430401045829058,-0.0064180717745330185,0.0,1,0.05740875378251076,-0.015559811639832333,2.0,1,0.06490273028612137,0.018587004451546818,7.0,1,0.06666387617588043,0.0006111902475822717,2.0,1,0.038390349596738815,0.008952659016358666,5.0,0,0.12383750826120377,0.05870254198089242,8.0,24,57,81
+TRM,510,89149,flaky_then_stable,6,5,52082,10000,208328,40000,True,1.0,0.0915708988904953,0.038108041568193585,0.013907799613662064,8.0,0.3048643325455487,0,0.026808708906173706,0.007638961639713671,5.0,1,-0.007737511768937111,-0.016418478451669216,0.0,1,-0.0021385024301707745,-0.03153126797406003,0.0,0,0.08881904184818268,0.04661730886436999,8.0,1,0.024048447608947754,0.004205335659207776,3.0,0,0.09303178638219833,0.05125097371637821,8.0,0,0.08592571318149567,0.038370750728063285,8.0,1,0.0847727432847023,0.03403472190257162,8.0,1,-0.01145052257925272,-0.01871529978234321,0.0,1,0.0915708988904953,0.038108041568193585,8.0,24,57,81
+TRM,511,6979,stable_learned,1,1,260410,50000,260410,50000,True,1.0,0.01051486935466528,-0.001084508199710399,-0.004592806129949167,2.0,0.012168352026492357,0,0.007596309762448072,-0.00016526534432159679,4.0,0,0.05649362877011299,0.030043136444874108,8.0,0,0.0642768070101738,0.045555445831269026,8.0,0,0.0841195210814476,0.053423349279910326,8.0,0,0.10984981805086136,0.06423144927248359,8.0,0,0.13199053704738617,0.07899909792467952,8.0,0,0.10829892009496689,0.06202394515275955,8.0,0,0.13202497363090515,0.06969117932021618,8.0,0,0.10504193603992462,0.06095804227516055,8.0,1,0.01051486935466528,-0.001084508199710399,2.0,24,57,81
diff --git a/ptrm_gbs768_base_vs_multi4_k100_d64_sigma03_Lonly_n1000_seed0.hist.csv b/ptrm_gbs768_base_vs_multi4_k100_d64_sigma03_Lonly_n1000_seed0.hist.csv
new file mode 100644
index 0000000..ef95a86
--- /dev/null
+++ b/ptrm_gbs768_base_vs_multi4_k100_d64_sigma03_Lonly_n1000_seed0.hist.csv
@@ -0,0 +1,102 @@
+correct_count,base_frac,multi4_frac
+0.0,0.015,0.012
+1.0,0.002,0.007
+2.0,0.0,0.001
+3.0,0.001,0.001
+4.0,0.001,0.0
+5.0,0.003,0.002
+6.0,0.001,0.001
+7.0,0.001,0.001
+8.0,0.0,0.001
+9.0,0.0,0.0
+10.0,0.002,0.001
+11.0,0.0,0.0
+12.0,0.001,0.001
+13.0,0.001,0.001
+14.0,0.0,0.0
+15.0,0.002,0.001
+16.0,0.0,0.0
+17.0,0.0,0.001
+18.0,0.0,0.002
+19.0,0.001,0.0
+20.0,0.002,0.0
+21.0,0.001,0.001
+22.0,0.002,0.0
+23.0,0.001,0.0
+24.0,0.002,0.002
+25.0,0.0,0.001
+26.0,0.0,0.0
+27.0,0.002,0.0
+28.0,0.0,0.0
+29.0,0.001,0.0
+30.0,0.0,0.0
+31.0,0.0,0.001
+32.0,0.0,0.0
+33.0,0.0,0.0
+34.0,0.001,0.0
+35.0,0.0,0.001
+36.0,0.002,0.001
+37.0,0.001,0.0
+38.0,0.0,0.001
+39.0,0.0,0.0
+40.0,0.0,0.0
+41.0,0.0,0.0
+42.0,0.0,0.001
+43.0,0.001,0.0
+44.0,0.001,0.0
+45.0,0.0,0.0
+46.0,0.0,0.001
+47.0,0.001,0.001
+48.0,0.001,0.0
+49.0,0.001,0.0
+50.0,0.0,0.001
+51.0,0.001,0.0
+52.0,0.0,0.001
+53.0,0.002,0.0
+54.0,0.001,0.0
+55.0,0.003,0.0
+56.0,0.0,0.0
+57.0,0.0,0.001
+58.0,0.001,0.001
+59.0,0.002,0.0
+60.0,0.003,0.001
+61.0,0.0,0.0
+62.0,0.0,0.0
+63.0,0.002,0.0
+64.0,0.0,0.0
+65.0,0.001,0.002
+66.0,0.001,0.0
+67.0,0.001,0.001
+68.0,0.0,0.0
+69.0,0.003,0.0
+70.0,0.001,0.0
+71.0,0.001,0.0
+72.0,0.002,0.001
+73.0,0.0,0.0
+74.0,0.0,0.0
+75.0,0.001,0.001
+76.0,0.001,0.001
+77.0,0.0,0.0
+78.0,0.0,0.0
+79.0,0.0,0.001
+80.0,0.002,0.0
+81.0,0.001,0.001
+82.0,0.001,0.0
+83.0,0.003,0.0
+84.0,0.003,0.0
+85.0,0.002,0.001
+86.0,0.001,0.001
+87.0,0.001,0.001
+88.0,0.0,0.004
+89.0,0.003,0.001
+90.0,0.002,0.001
+91.0,0.005,0.001
+92.0,0.001,0.001
+93.0,0.001,0.003
+94.0,0.005,0.002
+95.0,0.007,0.004
+96.0,0.005,0.007
+97.0,0.004,0.006
+98.0,0.007,0.007
+99.0,0.018,0.014
+100.0,0.85,0.889
diff --git a/ptrm_gbs768_base_vs_multi4_k100_d64_sigma03_Lonly_n1000_seed0.kcurve.csv b/ptrm_gbs768_base_vs_multi4_k100_d64_sigma03_Lonly_n1000_seed0.kcurve.csv
new file mode 100644
index 0000000..82b43c1
--- /dev/null
+++ b/ptrm_gbs768_base_vs_multi4_k100_d64_sigma03_Lonly_n1000_seed0.kcurve.csv
@@ -0,0 +1,101 @@
+K,base/oracle_pass,base/q_max_exact,base/correct_count_mean,base/zero_correct_frac,base/correct_count_ge_5_frac,base/correct_count_ge_half_frac,multi4/oracle_pass,multi4/q_max_exact,multi4/correct_count_mean,multi4/zero_correct_frac,multi4/correct_count_ge_5_frac,multi4/correct_count_ge_half_frac,delta/oracle_pass,delta/q_max_exact,delta/correct_count_mean
+1.0,0.942,0.942,0.9419999718666077,0.058,0.942,0.942,0.955,0.955,0.9549999833106995,0.045,0.955,0.955,0.013000000000000012,0.013000000000000012,0.013000011444091797
+2.0,0.961,0.961,1.8899999856948853,0.039,0.929,0.961,0.965,0.965,1.909000039100647,0.035,0.944,0.965,0.0040000000000000036,0.0040000000000000036,0.01900005340576172
+3.0,0.971,0.971,2.8440001010894775,0.029,0.921,0.952,0.968,0.968,2.864000082015991,0.032,0.94,0.956,-0.0030000000000000027,-0.0030000000000000027,0.019999980926513672
+4.0,0.972,0.972,3.7839999198913574,0.028,0.914,0.96,0.969,0.969,3.815000057220459,0.031,0.934,0.96,-0.0030000000000000027,-0.0030000000000000027,0.031000137329101562
+5.0,0.973,0.972,4.730999946594238,0.027,0.91,0.951,0.973,0.973,4.769000053405762,0.027,0.93,0.954,0.0,0.0010000000000000009,0.03800010681152344
+6.0,0.978,0.977,5.672999858856201,0.022,0.928,0.957,0.974,0.974,5.723999977111816,0.026,0.949,0.956,-0.0040000000000000036,-0.0030000000000000027,0.051000118255615234
+7.0,0.978,0.977,6.614999771118164,0.022,0.939,0.951,0.974,0.974,6.675000190734863,0.026,0.952,0.954,-0.0040000000000000036,-0.0030000000000000027,0.06000041961669922
+8.0,0.98,0.979,7.557000160217285,0.02,0.942,0.952,0.974,0.974,7.632999897003174,0.026,0.954,0.956,-0.006000000000000005,-0.0050000000000000044,0.07599973678588867
+9.0,0.982,0.981,8.498000144958496,0.018,0.945,0.945,0.974,0.974,8.59000015258789,0.026,0.955,0.955,-0.008000000000000007,-0.007000000000000006,0.09200000762939453
+10.0,0.982,0.981,9.435999870300293,0.018,0.946,0.946,0.976,0.976,9.543999671936035,0.024,0.956,0.956,-0.006000000000000005,-0.0050000000000000044,0.10799980163574219
+11.0,0.982,0.981,10.380000114440918,0.018,0.951,0.945,0.977,0.977,10.498000144958496,0.023,0.957,0.956,-0.0050000000000000044,-0.0040000000000000036,0.11800003051757812
+12.0,0.982,0.981,11.321999549865723,0.018,0.953,0.951,0.977,0.977,11.45199966430664,0.023,0.958,0.957,-0.0050000000000000044,-0.0040000000000000036,0.13000011444091797
+13.0,0.982,0.981,12.258999824523926,0.018,0.955,0.947,0.977,0.977,12.402999877929688,0.023,0.959,0.955,-0.0050000000000000044,-0.0040000000000000036,0.14400005340576172
+14.0,0.983,0.982,13.20199966430664,0.017,0.961,0.947,0.978,0.978,13.362000465393066,0.022,0.961,0.957,-0.0050000000000000044,-0.0040000000000000036,0.16000080108642578
+15.0,0.983,0.982,14.147000312805176,0.017,0.961,0.946,0.978,0.978,14.319999694824219,0.022,0.961,0.955,-0.0050000000000000044,-0.0040000000000000036,0.17299938201904297
+16.0,0.983,0.982,15.088000297546387,0.017,0.963,0.946,0.978,0.978,15.27400016784668,0.022,0.962,0.955,-0.0050000000000000044,-0.0040000000000000036,0.18599987030029297
+17.0,0.983,0.982,16.038000106811523,0.017,0.963,0.946,0.978,0.978,16.226999282836914,0.022,0.962,0.954,-0.0050000000000000044,-0.0040000000000000036,0.18899917602539062
+18.0,0.983,0.982,16.97800064086914,0.017,0.963,0.946,0.978,0.978,17.18600082397461,0.022,0.965,0.956,-0.0050000000000000044,-0.0040000000000000036,0.20800018310546875
+19.0,0.983,0.982,17.923999786376953,0.017,0.964,0.946,0.979,0.979,18.142000198364258,0.021,0.965,0.956,-0.0040000000000000036,-0.0030000000000000027,0.2180004119873047
+20.0,0.983,0.982,18.868000030517578,0.017,0.964,0.948,0.979,0.979,19.094999313354492,0.021,0.966,0.956,-0.0040000000000000036,-0.0030000000000000027,0.22699928283691406
+21.0,0.983,0.982,19.80299949645996,0.017,0.965,0.948,0.979,0.979,20.051000595092773,0.021,0.966,0.955,-0.0040000000000000036,-0.0030000000000000027,0.2480010986328125
+22.0,0.983,0.982,20.7450008392334,0.017,0.965,0.948,0.979,0.979,21.003999710083008,0.021,0.966,0.956,-0.0040000000000000036,-0.0030000000000000027,0.2589988708496094
+23.0,0.983,0.982,21.691999435424805,0.017,0.966,0.948,0.979,0.979,21.95599937438965,0.021,0.966,0.956,-0.0040000000000000036,-0.0030000000000000027,0.26399993896484375
+24.0,0.983,0.982,22.631000518798828,0.017,0.966,0.95,0.979,0.979,22.913999557495117,0.021,0.966,0.956,-0.0040000000000000036,-0.0030000000000000027,0.28299903869628906
+25.0,0.983,0.982,23.572999954223633,0.017,0.967,0.949,0.979,0.979,23.86199951171875,0.021,0.966,0.956,-0.0040000000000000036,-0.0030000000000000027,0.2889995574951172
+26.0,0.983,0.982,24.518999099731445,0.017,0.968,0.95,0.981,0.981,24.81800079345703,0.019,0.966,0.957,-0.0020000000000000018,-0.0010000000000000009,0.29900169372558594
+27.0,0.983,0.982,25.459999084472656,0.017,0.968,0.949,0.981,0.981,25.768999099731445,0.019,0.966,0.956,-0.0020000000000000018,-0.0010000000000000009,0.30900001525878906
+28.0,0.983,0.982,26.399999618530273,0.017,0.968,0.951,0.981,0.981,26.726999282836914,0.019,0.966,0.957,-0.0020000000000000018,-0.0010000000000000009,0.3269996643066406
+29.0,0.983,0.982,27.33799934387207,0.017,0.968,0.949,0.981,0.981,27.679000854492188,0.019,0.966,0.956,-0.0020000000000000018,-0.0010000000000000009,0.3410015106201172
+30.0,0.983,0.982,28.27899932861328,0.017,0.968,0.951,0.981,0.981,28.636999130249023,0.019,0.966,0.957,-0.0020000000000000018,-0.0010000000000000009,0.3579998016357422
+31.0,0.983,0.982,29.2189998626709,0.017,0.969,0.95,0.981,0.981,29.591999053955078,0.019,0.967,0.957,-0.0020000000000000018,-0.0010000000000000009,0.3729991912841797
+32.0,0.983,0.982,30.163000106811523,0.017,0.969,0.95,0.982,0.982,30.547000885009766,0.018,0.967,0.957,-0.0010000000000000009,0.0,0.3840007781982422
+33.0,0.983,0.982,31.104999542236328,0.017,0.97,0.95,0.982,0.982,31.500999450683594,0.018,0.967,0.957,-0.0010000000000000009,0.0,0.3959999084472656
+34.0,0.983,0.982,32.04600143432617,0.017,0.97,0.951,0.982,0.982,32.45100021362305,0.018,0.967,0.957,-0.0010000000000000009,0.0,0.404998779296875
+35.0,0.983,0.982,32.992000579833984,0.017,0.97,0.95,0.982,0.982,33.4010009765625,0.018,0.967,0.957,-0.0010000000000000009,0.0,0.4090003967285156
+36.0,0.983,0.982,33.928001403808594,0.017,0.971,0.95,0.982,0.982,34.36000061035156,0.018,0.967,0.957,-0.0010000000000000009,0.0,0.43199920654296875
+37.0,0.983,0.982,34.86399841308594,0.017,0.971,0.95,0.982,0.982,35.3129997253418,0.018,0.967,0.957,-0.0010000000000000009,0.0,0.4490013122558594
+38.0,0.984,0.983,35.80799865722656,0.016,0.972,0.951,0.982,0.982,36.26599884033203,0.018,0.968,0.957,-0.0020000000000000018,-0.0010000000000000009,0.45800018310546875
+39.0,0.984,0.983,36.75299835205078,0.016,0.972,0.95,0.982,0.982,37.220001220703125,0.018,0.97,0.957,-0.0020000000000000018,-0.0010000000000000009,0.46700286865234375
+40.0,0.984,0.983,37.70399856567383,0.016,0.973,0.951,0.982,0.982,38.176998138427734,0.018,0.971,0.957,-0.0020000000000000018,-0.0010000000000000009,0.47299957275390625
+41.0,0.984,0.983,38.650001525878906,0.016,0.973,0.951,0.983,0.983,39.132999420166016,0.017,0.972,0.957,-0.0010000000000000009,0.0,0.4829978942871094
+42.0,0.984,0.983,39.590999603271484,0.016,0.973,0.951,0.984,0.984,40.08599853515625,0.016,0.972,0.957,0.0,0.0010000000000000009,0.4949989318847656
+43.0,0.984,0.983,40.53499984741211,0.016,0.973,0.95,0.984,0.984,41.04499816894531,0.016,0.973,0.957,0.0,0.0010000000000000009,0.5099983215332031
+44.0,0.984,0.983,41.47200012207031,0.016,0.973,0.951,0.984,0.984,42.00199890136719,0.016,0.973,0.957,0.0,0.0010000000000000009,0.529998779296875
+45.0,0.984,0.983,42.40800094604492,0.016,0.973,0.95,0.984,0.984,42.96099853515625,0.016,0.973,0.957,0.0,0.0010000000000000009,0.5529975891113281
+46.0,0.984,0.983,43.35100173950195,0.016,0.973,0.951,0.984,0.984,43.91400146484375,0.016,0.973,0.957,0.0,0.0010000000000000009,0.5629997253417969
+47.0,0.984,0.983,44.28900146484375,0.016,0.973,0.949,0.984,0.984,44.86800003051758,0.016,0.974,0.956,0.0,0.0010000000000000009,0.5789985656738281
+48.0,0.984,0.983,45.231998443603516,0.016,0.973,0.95,0.984,0.984,45.823001861572266,0.016,0.974,0.956,0.0,0.0010000000000000009,0.59100341796875
+49.0,0.984,0.983,46.16400146484375,0.016,0.973,0.95,0.984,0.984,46.7760009765625,0.016,0.974,0.956,0.0,0.0010000000000000009,0.61199951171875
+50.0,0.984,0.983,47.10599899291992,0.016,0.973,0.95,0.984,0.984,47.733001708984375,0.016,0.974,0.956,0.0,0.0010000000000000009,0.6270027160644531
+51.0,0.984,0.983,48.053001403808594,0.016,0.975,0.949,0.984,0.984,48.6879997253418,0.016,0.974,0.956,0.0,0.0010000000000000009,0.6349983215332031
+52.0,0.985,0.984,48.99800109863281,0.015,0.975,0.951,0.984,0.984,49.643001556396484,0.016,0.974,0.956,-0.0010000000000000009,0.0,0.6450004577636719
+53.0,0.985,0.984,49.93600082397461,0.015,0.976,0.949,0.985,0.985,50.59600067138672,0.015,0.974,0.956,0.0,0.0010000000000000009,0.6599998474121094
+54.0,0.985,0.984,50.875999450683594,0.015,0.976,0.951,0.985,0.985,51.54499816894531,0.015,0.975,0.956,0.0,0.0010000000000000009,0.6689987182617188
+55.0,0.985,0.984,51.808998107910156,0.015,0.976,0.949,0.985,0.985,52.500999450683594,0.015,0.975,0.956,0.0,0.0010000000000000009,0.6920013427734375
+56.0,0.985,0.984,52.74700164794922,0.015,0.976,0.95,0.985,0.985,53.45899963378906,0.015,0.975,0.956,0.0,0.0010000000000000009,0.7119979858398438
+57.0,0.985,0.984,53.691001892089844,0.015,0.976,0.949,0.985,0.985,54.40999984741211,0.015,0.975,0.956,0.0,0.0010000000000000009,0.7189979553222656
+58.0,0.985,0.984,54.63399887084961,0.015,0.976,0.95,0.985,0.985,55.362998962402344,0.015,0.975,0.956,0.0,0.0010000000000000009,0.7290000915527344
+59.0,0.985,0.984,55.58300018310547,0.015,0.976,0.95,0.985,0.985,56.3120002746582,0.015,0.975,0.956,0.0,0.0010000000000000009,0.7290000915527344
+60.0,0.985,0.984,56.52899932861328,0.015,0.976,0.95,0.985,0.985,57.268001556396484,0.015,0.975,0.956,0.0,0.0010000000000000009,0.7390022277832031
+61.0,0.985,0.984,57.474998474121094,0.015,0.978,0.95,0.985,0.985,58.2239990234375,0.015,0.975,0.956,0.0,0.0010000000000000009,0.7490005493164062
+62.0,0.985,0.984,58.415000915527344,0.015,0.978,0.95,0.985,0.985,59.17599868774414,0.015,0.975,0.956,0.0,0.0010000000000000009,0.7609977722167969
+63.0,0.985,0.984,59.358001708984375,0.015,0.978,0.95,0.985,0.985,60.13199996948242,0.015,0.975,0.956,0.0,0.0010000000000000009,0.7739982604980469
+64.0,0.985,0.984,60.29600143432617,0.015,0.978,0.95,0.986,0.986,61.0880012512207,0.014,0.975,0.956,0.0010000000000000009,0.0020000000000000018,0.7919998168945312
+65.0,0.985,0.984,61.237998962402344,0.015,0.978,0.95,0.986,0.986,62.03499984741211,0.014,0.975,0.956,0.0010000000000000009,0.0020000000000000018,0.7970008850097656
+66.0,0.985,0.984,62.178001403808594,0.015,0.978,0.95,0.986,0.986,62.98400115966797,0.014,0.975,0.956,0.0010000000000000009,0.0020000000000000018,0.805999755859375
+67.0,0.985,0.984,63.11899948120117,0.015,0.978,0.949,0.986,0.986,63.93600082397461,0.014,0.975,0.956,0.0010000000000000009,0.0020000000000000018,0.8170013427734375
+68.0,0.985,0.984,64.05899810791016,0.015,0.978,0.949,0.987,0.987,64.89299774169922,0.013,0.975,0.956,0.0020000000000000018,0.0030000000000000027,0.8339996337890625
+69.0,0.985,0.984,64.99600219726562,0.015,0.978,0.949,0.987,0.987,65.84600067138672,0.013,0.975,0.955,0.0020000000000000018,0.0030000000000000027,0.8499984741210938
+70.0,0.985,0.984,65.93699645996094,0.015,0.978,0.949,0.987,0.987,66.80000305175781,0.013,0.975,0.955,0.0020000000000000018,0.0030000000000000027,0.863006591796875
+71.0,0.985,0.984,66.87899780273438,0.015,0.978,0.949,0.987,0.987,67.75499725341797,0.013,0.975,0.955,0.0020000000000000018,0.0030000000000000027,0.8759994506835938
+72.0,0.985,0.984,67.82499694824219,0.015,0.978,0.949,0.987,0.987,68.70800018310547,0.013,0.975,0.955,0.0020000000000000018,0.0030000000000000027,0.8830032348632812
+73.0,0.985,0.984,68.76899719238281,0.015,0.978,0.949,0.987,0.987,69.66300201416016,0.013,0.976,0.955,0.0020000000000000018,0.0030000000000000027,0.8940048217773438
+74.0,0.985,0.984,69.70500183105469,0.015,0.978,0.95,0.987,0.987,70.61100006103516,0.013,0.976,0.955,0.0020000000000000018,0.0030000000000000027,0.9059982299804688
+75.0,0.985,0.984,70.64600372314453,0.015,0.979,0.949,0.987,0.987,71.56800079345703,0.013,0.976,0.954,0.0020000000000000018,0.0030000000000000027,0.9219970703125
+76.0,0.985,0.984,71.58799743652344,0.015,0.979,0.949,0.987,0.987,72.52100372314453,0.013,0.976,0.954,0.0020000000000000018,0.0030000000000000027,0.9330062866210938
+77.0,0.985,0.984,72.5270004272461,0.015,0.979,0.949,0.987,0.987,73.47799682617188,0.013,0.976,0.954,0.0020000000000000018,0.0030000000000000027,0.9509963989257812
+78.0,0.985,0.984,73.4729995727539,0.015,0.979,0.949,0.987,0.987,74.43399810791016,0.013,0.976,0.955,0.0020000000000000018,0.0030000000000000027,0.96099853515625
+79.0,0.985,0.984,74.41500091552734,0.015,0.979,0.949,0.987,0.987,75.38800048828125,0.013,0.976,0.955,0.0020000000000000018,0.0030000000000000027,0.9729995727539062
+80.0,0.985,0.984,75.35900115966797,0.015,0.979,0.95,0.987,0.987,76.34200286865234,0.013,0.976,0.955,0.0020000000000000018,0.0030000000000000027,0.983001708984375
+81.0,0.985,0.984,76.2959976196289,0.015,0.979,0.949,0.987,0.987,77.29499816894531,0.013,0.976,0.954,0.0020000000000000018,0.0030000000000000027,0.9990005493164062
+82.0,0.985,0.984,77.24099731445312,0.015,0.979,0.95,0.987,0.987,78.2490005493164,0.013,0.976,0.955,0.0020000000000000018,0.0030000000000000027,1.0080032348632812
+83.0,0.985,0.984,78.1729965209961,0.015,0.979,0.949,0.987,0.987,79.20500183105469,0.013,0.976,0.954,0.0020000000000000018,0.0030000000000000027,1.0320053100585938
+84.0,0.985,0.984,79.11799621582031,0.015,0.979,0.949,0.988,0.988,80.16000366210938,0.012,0.976,0.954,0.0030000000000000027,0.0040000000000000036,1.0420074462890625
+85.0,0.985,0.984,80.06800079345703,0.015,0.979,0.949,0.988,0.988,81.10900115966797,0.012,0.976,0.954,0.0030000000000000027,0.0040000000000000036,1.0410003662109375
+86.0,0.985,0.984,81.00800323486328,0.015,0.979,0.95,0.988,0.988,82.06300354003906,0.012,0.976,0.955,0.0030000000000000027,0.0040000000000000036,1.0550003051757812
+87.0,0.985,0.984,81.9489974975586,0.015,0.979,0.949,0.988,0.988,83.01899719238281,0.012,0.976,0.955,0.0030000000000000027,0.0040000000000000036,1.0699996948242188
+88.0,0.985,0.984,82.88400268554688,0.015,0.979,0.949,0.988,0.988,83.97899627685547,0.012,0.977,0.955,0.0030000000000000027,0.0040000000000000036,1.0949935913085938
+89.0,0.985,0.984,83.82599639892578,0.015,0.979,0.949,0.988,0.988,84.93299865722656,0.012,0.978,0.955,0.0030000000000000027,0.0040000000000000036,1.1070022583007812
+90.0,0.985,0.984,84.7699966430664,0.015,0.98,0.95,0.988,0.988,85.88700103759766,0.012,0.978,0.956,0.0030000000000000027,0.0040000000000000036,1.11700439453125
+91.0,0.985,0.984,85.70700073242188,0.015,0.98,0.949,0.988,0.988,86.83999633789062,0.012,0.978,0.955,0.0030000000000000027,0.0040000000000000036,1.13299560546875
+92.0,0.985,0.984,86.64399719238281,0.015,0.98,0.949,0.988,0.988,87.802001953125,0.012,0.978,0.955,0.0030000000000000027,0.0040000000000000036,1.1580047607421875
+93.0,0.985,0.984,87.58699798583984,0.015,0.98,0.949,0.988,0.988,88.75599670410156,0.012,0.978,0.955,0.0030000000000000027,0.0040000000000000036,1.1689987182617188
+94.0,0.985,0.984,88.53199768066406,0.015,0.98,0.949,0.988,0.988,89.70700073242188,0.012,0.978,0.956,0.0030000000000000027,0.0040000000000000036,1.1750030517578125
+95.0,0.985,0.984,89.48300170898438,0.015,0.981,0.949,0.988,0.988,90.66100311279297,0.012,0.978,0.956,0.0030000000000000027,0.0040000000000000036,1.1780014038085938
+96.0,0.985,0.984,90.41999816894531,0.015,0.981,0.95,0.988,0.988,91.61100006103516,0.012,0.979,0.956,0.0030000000000000027,0.0040000000000000036,1.1910018920898438
+97.0,0.985,0.984,91.36399841308594,0.015,0.981,0.949,0.988,0.988,92.56500244140625,0.012,0.979,0.956,0.0030000000000000027,0.0040000000000000036,1.2010040283203125
+98.0,0.985,0.984,92.30400085449219,0.015,0.981,0.949,0.988,0.988,93.51300048828125,0.012,0.979,0.956,0.0030000000000000027,0.0040000000000000036,1.2089996337890625
+99.0,0.985,0.984,93.24600219726562,0.015,0.981,0.949,0.988,0.988,94.46900177001953,0.012,0.979,0.955,0.0030000000000000027,0.0040000000000000036,1.2229995727539062
+100.0,0.985,0.984,94.18800354003906,0.015,0.981,0.949,0.988,0.988,95.41699981689453,0.012,0.979,0.956,0.0030000000000000027,0.0040000000000000036,1.2289962768554688
diff --git a/ptrm_gbs768_base_vs_multi4_k100_d64_sigma03_Lonly_n1000_seed0.summary.csv b/ptrm_gbs768_base_vs_multi4_k100_d64_sigma03_Lonly_n1000_seed0.summary.csv
new file mode 100644
index 0000000..8afa74e
--- /dev/null
+++ b/ptrm_gbs768_base_vs_multi4_k100_d64_sigma03_Lonly_n1000_seed0.summary.csv
@@ -0,0 +1,2 @@
+base/K_for_oracle_pass_0.9,base/K_for_oracle_pass_0.95,base/K_for_oracle_pass_0.975,base/K_for_oracle_pass_0.99,base/K_for_q_max_exact_0.9,base/K_for_q_max_exact_0.95,base/K_for_q_max_exact_0.975,base/K_for_q_max_exact_0.99,base/all_correct_frac,base/correct_count_det_fail_mean,base/correct_count_det_success_mean,base/correct_count_ge_10_frac,base/correct_count_ge_1_frac,base/correct_count_ge_25_frac,base/correct_count_ge_50_frac,base/correct_count_ge_5_frac,base/correct_count_ge_75_frac,base/correct_count_ge_90_frac,base/correct_count_mean,base/correct_count_median,base/correct_count_q10,base/correct_count_q25,base/correct_count_q75,base/correct_count_q90,base/det_exact,base/mean_rollout_exact,base/oracle_det_fail_frac,base/oracle_det_success_frac,base/oracle_pass,base/q_max_det_fail_frac,base/q_max_det_success_frac,base/q_max_exact,base/zero_correct_frac,base_file,base_nonzero_test_zero_frac,base_zero_test_nonzero_frac,both_oracle_success_test_more_frac,delta_correct_count_mean,delta_correct_count_median,delta_correct_count_q10,delta_correct_count_q25,delta_correct_count_q75,delta_correct_count_q90,multi4/K_for_oracle_pass_0.9,multi4/K_for_oracle_pass_0.95,multi4/K_for_oracle_pass_0.975,multi4/K_for_oracle_pass_0.99,multi4/K_for_q_max_exact_0.9,multi4/K_for_q_max_exact_0.95,multi4/K_for_q_max_exact_0.975,multi4/K_for_q_max_exact_0.99,multi4/all_correct_frac,multi4/correct_count_det_fail_mean,multi4/correct_count_det_success_mean,multi4/correct_count_ge_10_frac,multi4/correct_count_ge_1_frac,multi4/correct_count_ge_25_frac,multi4/correct_count_ge_50_frac,multi4/correct_count_ge_5_frac,multi4/correct_count_ge_75_frac,multi4/correct_count_ge_90_frac,multi4/correct_count_mean,multi4/correct_count_median,multi4/correct_count_q10,multi4/correct_count_q25,multi4/correct_count_q75,multi4/correct_count_q90,multi4/det_exact,multi4/mean_rollout_exact,multi4/oracle_det_fail_frac,multi4/oracle_det_success_frac,multi4/oracle_pass,multi4/q_max_det_fail_frac,multi4/q_max_det_success_frac,multi4/q_max_exact,multi4/zero_correct_frac,n_samples,oracle_base_only_frac,oracle_both_fail_frac,oracle_both_success_frac,oracle_test_only_frac,q_max_base_only_frac,q_max_both_fail_frac,q_max_both_success_frac,q_max_test_only_frac,rollouts,test_equal_correct_frac,test_fewer_correct_frac,test_file,test_more_correct_frac
+1.0,2.0,6.0,nan,1.0,2.0,6.0,nan,0.85,54.380531311035156,99.2593002319336,0.976,0.985,0.961,0.949,0.981,0.924,0.905,94.18800354003906,100.0,91.0,100.0,100.0,100.0,0.887,0.94188,0.8672566371681416,1.0,0.985,0.8584070796460177,1.0,0.984,0.015,research/flossing/ptrm_official_gbs768_base58590_k100_d64_sigma03_Lonly_n1000_seed0.npz,0.001,0.004,0.107,1.2289999723434448,0.0,0.0,0.0,0.0,1.0,1.0,1.0,10.0,nan,1.0,1.0,10.0,nan,0.889,53.08988952636719,99.55213928222656,0.974,0.988,0.964,0.956,0.979,0.947,0.935,95.41699981689453,100.0,99.0,100.0,100.0,100.0,0.911,0.95417,0.8651685393258427,1.0,0.988,0.8651685393258427,1.0,0.988,0.012,1000.0,0.001,0.011,0.984,0.004,0.001,0.011,0.983,0.005,100.0,0.831,0.058,research/flossing/ptrm_official_gbs768_multi4_35805_k100_d64_sigma03_Lonly_n1000_seed0.npz,0.111
diff --git a/ptrm_lambda_main_trm_base260410_k100_d64_sigma03_Lonly_n256.summary.csv b/ptrm_lambda_main_trm_base260410_k100_d64_sigma03_Lonly_n256.summary.csv
new file mode 100644
index 0000000..da911d6
--- /dev/null
+++ b/ptrm_lambda_main_trm_base260410_k100_d64_sigma03_Lonly_n256.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,q_fail_mean,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts
+0.80078125,0.9281443357467651,1.0,0.0,7.016035556793213,5.942806243896484,5.796727180480957,0.89453125,0.9583333134651184,0.8801953196525574,0.9530397057533264,256.0,0.3,0.95703125,0.9853395223617554,-9.493144989013672,0.953125,0.9805652499198914,5.361981391906738,0.921875,0.9693769812583923,0.921875,0.9696663022041321,0.90625,0.9643614888191223,0.90234375,0.9642168283462524,7.383934020996094,0.8828125,0.9552469253540039,100.0
diff --git a/ptrm_lambda_main_trm_multi4_104164_k100_d64_sigma03_Lonly_n256.summary.csv b/ptrm_lambda_main_trm_multi4_104164_k100_d64_sigma03_Lonly_n256.summary.csv
new file mode 100644
index 0000000..4827c95
--- /dev/null
+++ b/ptrm_lambda_main_trm_multi4_104164_k100_d64_sigma03_Lonly_n256.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,q_fail_mean,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts
+0.85546875,0.943913996219635,1.0,0.0,7.061921119689941,5.787074089050293,5.67285680770874,0.95703125,0.9817708730697632,0.917773425579071,0.9664150476455688,256.0,0.3,0.96484375,0.9870274066925049,-10.648188591003418,0.96484375,0.9840856194496155,6.301214694976807,0.96484375,0.9841821193695068,0.9609375,0.9824942350387573,0.9609375,0.9828318357467651,0.9609375,0.9830729961395264,7.8197712898254395,0.9140625,0.9647955298423767,100.0
diff --git a/ptrm_lambda_quick_trm_base260410_k100_d64_sigma03_Lonly_n64.summary.csv b/ptrm_lambda_quick_trm_base260410_k100_d64_sigma03_Lonly_n64.summary.csv
new file mode 100644
index 0000000..6705290
--- /dev/null
+++ b/ptrm_lambda_quick_trm_base260410_k100_d64_sigma03_Lonly_n64.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,q_fail_mean,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts
+0.734375,0.9054783582687378,1.0,0.0,7.130214214324951,6.115701675415039,5.848303318023682,0.8125,0.919946014881134,0.7914062738418579,0.9237442016601562,64.0,0.3,0.96875,0.9909335970878601,-10.082420349121094,0.953125,0.982446014881134,3.739248037338257,0.890625,0.9511959552764893,0.859375,0.9392361044883728,0.84375,0.9338349103927612,0.828125,0.9286265969276428,7.382274150848389,0.796875,0.9274691343307495,100.0
diff --git a/ptrm_lambda_quick_trm_multi4_104164_k100_d64_sigma03_Lonly_n64.summary.csv b/ptrm_lambda_quick_trm_multi4_104164_k100_d64_sigma03_Lonly_n64.summary.csv
new file mode 100644
index 0000000..8a06595
--- /dev/null
+++ b/ptrm_lambda_quick_trm_multi4_104164_k100_d64_sigma03_Lonly_n64.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,q_fail_mean,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts
+0.828125,0.9336420297622681,1.0,0.0,7.080788612365723,5.867445468902588,5.692871570587158,0.953125,0.9812886118888855,0.874218761920929,0.9500945806503296,64.0,0.3,0.953125,0.9841821193695068,-10.732453346252441,0.953125,0.9789737462997437,5.486577033996582,0.953125,0.9789737462997437,0.953125,0.9793595671653748,0.953125,0.9793595671653748,0.953125,0.9793595671653748,7.820146560668945,0.859375,0.9432870745658875,100.0
diff --git a/ptrm_official_gbs768_base58590_k100_d64_sigma03_Lonly_n1000_seed0.summary.csv b/ptrm_official_gbs768_base58590_k100_d64_sigma03_Lonly_n1000_seed0.summary.csv
new file mode 100644
index 0000000..4c97143
--- /dev/null
+++ b/ptrm_official_gbs768_base58590_k100_d64_sigma03_Lonly_n1000_seed0.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+0.8870000243186951,0.9580987691879272,0.0,0.0,0.0,0.9418799877166748,0.9775508046150208,1000.0,0.3,0.9850000143051147,0.9952592849731445,0.0,0.0,1.0,0.984000027179718,0.9932839870452881,6.73584508895874,0.9419999718666077,0.9780740141868591,100.0,64.0
diff --git a/ptrm_official_gbs768_multi4_35805_k100_d64_sigma03_Lonly_n1000_seed0.summary.csv b/ptrm_official_gbs768_multi4_35805_k100_d64_sigma03_Lonly_n1000_seed0.summary.csv
new file mode 100644
index 0000000..32c31d1
--- /dev/null
+++ b/ptrm_official_gbs768_multi4_35805_k100_d64_sigma03_Lonly_n1000_seed0.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+0.9110000133514404,0.9657777547836304,0.0,0.0,0.0,0.9541699886322021,0.9822410941123962,1000.0,0.3,0.9879999756813049,0.9961728453636169,0.0,0.0,1.0,0.9879999756813049,0.9951233863830566,6.835533142089844,0.9549999833106995,0.982234537601471,100.0,64.0
diff --git a/ptrm_official_main_trm_base260410_k100_d64_sigma03_Lonly_n1000.summary.csv b/ptrm_official_main_trm_base260410_k100_d64_sigma03_Lonly_n1000.summary.csv
new file mode 100644
index 0000000..5de14e9
--- /dev/null
+++ b/ptrm_official_main_trm_base260410_k100_d64_sigma03_Lonly_n1000.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts
+0.7789999842643738,0.9208394289016724,0.0,0.0,0.8560100197792053,0.9456298351287842,1000.0,0.3,0.972000002861023,0.9905309081077576,0.9509999752044678,0.9796666502952576,4.9160590171813965,0.8650000095367432,0.9488394856452942,100.0
diff --git a/ptrm_official_main_trm_multi4_104164_k100_d64_sigma03_Lonly_n1000.summary.csv b/ptrm_official_main_trm_multi4_104164_k100_d64_sigma03_Lonly_n1000.summary.csv
new file mode 100644
index 0000000..1b2bcf5
--- /dev/null
+++ b/ptrm_official_main_trm_multi4_104164_k100_d64_sigma03_Lonly_n1000.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts
+0.8700000047683716,0.9520000219345093,0.0,0.0,0.9305700063705444,0.9737791419029236,1000.0,0.3,0.9850000143051147,0.99558025598526,0.9850000143051147,0.9944815039634705,6.522988796234131,0.9359999895095825,0.9762221574783325,100.0
diff --git a/ptrm_official_sweep_trm_base260410_k25_d16_sigma03_Lonly_n1000.summary.csv b/ptrm_official_sweep_trm_base260410_k25_d16_sigma03_Lonly_n1000.summary.csv
new file mode 100644
index 0000000..42bc4ce
--- /dev/null
+++ b/ptrm_official_sweep_trm_base260410_k25_d16_sigma03_Lonly_n1000.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts
+0.8009999990463257,0.9269383549690247,0.0,0.0,0.7800800204277039,0.9197090864181519,1000.0,0.3,0.9369999766349792,0.9782345294952393,0.9240000247955322,0.969728410243988,3.55741286277771,0.7990000247955322,0.9265802502632141,25.0
diff --git a/ptrm_official_sweep_trm_multi4_104164_k25_d16_sigma03_Lonly_n1000.summary.csv b/ptrm_official_sweep_trm_multi4_104164_k25_d16_sigma03_Lonly_n1000.summary.csv
new file mode 100644
index 0000000..2ebca3d
--- /dev/null
+++ b/ptrm_official_sweep_trm_multi4_104164_k25_d16_sigma03_Lonly_n1000.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts
+0.875,0.9545185565948486,0.0,0.0,0.8641200065612793,0.9500499367713928,1000.0,0.3,0.9679999947547913,0.9895431995391846,0.9679999947547913,0.9874691367149353,5.327029228210449,0.8730000257492065,0.9539506435394287,25.0
diff --git a/ptrm_rollout_selection.py b/ptrm_rollout_selection.py
new file mode 100644
index 0000000..4fb1dd9
--- /dev/null
+++ b/ptrm_rollout_selection.py
@@ -0,0 +1,643 @@
+"""PTRM-style stochastic rollout evaluation with Q and stability selection.
+
+This is an inference-time experiment: no training, no weight updates.
+
+For each input, run K stochastic recursive trajectories by injecting Gaussian
+noise into the latent state before every ACT step. Select a trajectory by:
+ - Q head score (PTRM)
+ - finite-difference top Lyapunov proxy (lowest lambda)
+ - finite-difference low-rank Lyapunov spectrum proxies
+ - simple Q/lambda hybrid scores
+
+The Lyapunov proxy is computed by pairing each rollout with a tiny shadow
+trajectory that receives the same stochastic noise and is renormalized after
+each ACT step. This is much cheaper than JVP-based exact spectrum estimation
+and is enough to test whether stability can act as a free selector.
+
+The optional spectrum proxy generalizes the shadow trajectory to k orthogonal
+shadows and uses QR re-orthogonalization after every ACT step. This estimates
+the top-k finite-time spectrum in a random tangent subspace. It is much more
+expensive than top-1 because the model batch is multiplied by k + 1.
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import math
+import sys
+from dataclasses import replace
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import torch
+
+TRM_DIR = Path("/home/yurenh2/rrm/trm")
+sys.path.insert(0, str(TRM_DIR))
+
+from models.recursive_reasoning.trm import ( # noqa: E402
+ TinyRecursiveReasoningModel_ACTV1,
+ TinyRecursiveReasoningModel_ACTV1InnerCarry,
+)
+
+
+IGNORE_LABEL_ID = -100
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = json.loads(json.dumps(__import__("yaml").safe_load((ckpt_root / "all_config.yaml").read_text())))
+ train_meta = json.loads((Path(cfg["data_paths"][0]) / "train" / "dataset.json").read_text())
+
+ arch_cfg = dict(cfg["arch"])
+ arch_cfg.update(
+ batch_size=cfg["global_batch_size"],
+ seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"],
+ )
+
+ model = TinyRecursiveReasoningModel_ACTV1(arch_cfg)
+ state = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in state.items()}
+ missing, unexpected = model.load_state_dict(stripped, strict=False)
+ print(f"[load] {ckpt_root.name}/{ckpt_name} missing={len(missing)} unexpected={len(unexpected)}")
+ if missing[:3]:
+ print(f"[load] sample missing: {missing[:3]}")
+ if unexpected[:3]:
+ print(f"[load] sample unexpected: {unexpected[:3]}")
+ model.to(device).eval()
+ return model, cfg
+
+
+def load_test_samples(data_path: Path, n_samples: int, seed: int):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ puzzle_ids = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+
+ n = min(n_samples, len(inputs))
+ idx = rng.choice(len(inputs), size=n, replace=False)
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(puzzle_ids[idx].astype(np.int32)),
+ "idx": idx,
+ }
+
+
+def batch_slice(samples: dict[str, Any], start: int, end: int, device: str):
+ return {
+ k: v[start:end].to(device, non_blocking=True)
+ for k, v in samples.items()
+ if k in ("inputs", "labels", "puzzle_identifiers")
+ }
+
+
+def repeat_batch(batch: dict[str, torch.Tensor], repeats: int):
+ if repeats == 1:
+ return batch
+ return {k: v.repeat_interleave(repeats, dim=0) for k, v in batch.items()}
+
+
+def cat_batches(a: dict[str, torch.Tensor], b: dict[str, torch.Tensor]):
+ return {k: torch.cat([a[k], b[k]], dim=0) for k in a}
+
+
+def _rand_unit_like(inner: TinyRecursiveReasoningModel_ACTV1InnerCarry, generator: torch.Generator):
+ dh = torch.randn(inner.z_H.shape, device=inner.z_H.device, dtype=torch.float32, generator=generator)
+ dl = torch.randn(inner.z_L.shape, device=inner.z_L.device, dtype=torch.float32, generator=generator)
+ norm = torch.sqrt(dh.flatten(1).square().sum(-1) + dl.flatten(1).square().sum(-1)).clamp_min(1e-30)
+ view_h = (dh.shape[0],) + (1,) * (dh.ndim - 1)
+ view_l = (dl.shape[0],) + (1,) * (dl.ndim - 1)
+ return (dh / norm.view(view_h)).to(inner.z_H.dtype), (dl / norm.view(view_l)).to(inner.z_L.dtype)
+
+
+def _q_to_dirs(
+ q: torch.Tensor,
+ z_h_shape: torch.Size,
+ z_l_shape: torch.Size,
+ h_dtype: torch.dtype,
+ l_dtype: torch.dtype,
+):
+ total, _dim, spec_k = q.shape
+ h_numel = math.prod(z_h_shape)
+ q_t = q.transpose(1, 2).contiguous()
+ h_dirs = q_t[:, :, :h_numel].reshape((total, spec_k) + tuple(z_h_shape)).to(h_dtype)
+ l_dirs = q_t[:, :, h_numel:].reshape((total, spec_k) + tuple(z_l_shape)).to(l_dtype)
+ return h_dirs, l_dirs
+
+
+def _dirs_to_q(h_dirs: torch.Tensor, l_dirs: torch.Tensor):
+ q_t = torch.cat([h_dirs.float().flatten(2), l_dirs.float().flatten(2)], dim=2)
+ return q_t.transpose(1, 2).contiguous()
+
+
+def _rand_orthonormal_dirs_like(
+ inner: TinyRecursiveReasoningModel_ACTV1InnerCarry,
+ spec_k: int,
+ generator: torch.Generator,
+):
+ total = inner.z_H.shape[0]
+ h_dirs = torch.randn(
+ (total, spec_k) + tuple(inner.z_H.shape[1:]),
+ device=inner.z_H.device,
+ dtype=torch.float32,
+ generator=generator,
+ )
+ l_dirs = torch.randn(
+ (total, spec_k) + tuple(inner.z_L.shape[1:]),
+ device=inner.z_L.device,
+ dtype=torch.float32,
+ generator=generator,
+ )
+ q, _ = torch.linalg.qr(_dirs_to_q(h_dirs, l_dirs), mode="reduced")
+ return _q_to_dirs(q, inner.z_H.shape[1:], inner.z_L.shape[1:], inner.z_H.dtype, inner.z_L.dtype)
+
+
+def _make_spectrum_shadows(
+ main: TinyRecursiveReasoningModel_ACTV1InnerCarry,
+ h_dirs: torch.Tensor,
+ l_dirs: torch.Tensor,
+ eps: float,
+):
+ total, spec_k = h_dirs.shape[:2]
+ z_h = main.z_H[:, None] + eps * h_dirs.to(main.z_H.dtype)
+ z_l = main.z_L[:, None] + eps * l_dirs.to(main.z_L.dtype)
+ return TinyRecursiveReasoningModel_ACTV1InnerCarry(
+ z_H=z_h.reshape((total * spec_k,) + tuple(main.z_H.shape[1:])).detach(),
+ z_L=z_l.reshape((total * spec_k,) + tuple(main.z_L.shape[1:])).detach(),
+ )
+
+
+def _repeat_inner_batch(batch: dict[str, torch.Tensor], repeats: int):
+ return {k: v.repeat_interleave(repeats, dim=0) for k, v in batch.items()}
+
+
+def _cat_many_batches(batches: list[dict[str, torch.Tensor]]):
+ return {k: torch.cat([b[k] for b in batches], dim=0) for k in batches[0]}
+
+
+def _split_inner(inner: TinyRecursiveReasoningModel_ACTV1InnerCarry, n: int):
+ return (
+ TinyRecursiveReasoningModel_ACTV1InnerCarry(
+ z_H=inner.z_H[:n].contiguous(),
+ z_L=inner.z_L[:n].contiguous(),
+ ),
+ TinyRecursiveReasoningModel_ACTV1InnerCarry(
+ z_H=inner.z_H[n:].contiguous(),
+ z_L=inner.z_L[n:].contiguous(),
+ ),
+ )
+
+
+def _split_spectrum_inner(inner: TinyRecursiveReasoningModel_ACTV1InnerCarry, n_main: int):
+ return (
+ TinyRecursiveReasoningModel_ACTV1InnerCarry(
+ z_H=inner.z_H[:n_main].contiguous(),
+ z_L=inner.z_L[:n_main].contiguous(),
+ ),
+ TinyRecursiveReasoningModel_ACTV1InnerCarry(
+ z_H=inner.z_H[n_main:].contiguous(),
+ z_L=inner.z_L[n_main:].contiguous(),
+ ),
+ )
+
+
+def _cat_inner(a: TinyRecursiveReasoningModel_ACTV1InnerCarry, b: TinyRecursiveReasoningModel_ACTV1InnerCarry):
+ return TinyRecursiveReasoningModel_ACTV1InnerCarry(
+ z_H=torch.cat([a.z_H, b.z_H], dim=0),
+ z_L=torch.cat([a.z_L, b.z_L], dim=0),
+ )
+
+
+def _sample_noise(
+ shape: torch.Size,
+ std: float,
+ generator: torch.Generator,
+ dtype: torch.dtype,
+ device: torch.device,
+):
+ if std <= 0:
+ return torch.zeros(shape, device=device, dtype=dtype)
+ return (std * torch.randn(shape, device=device, dtype=torch.float32, generator=generator)).to(dtype)
+
+
+def _apply_step_noise(
+ inner: TinyRecursiveReasoningModel_ACTV1InnerCarry,
+ noise_h: torch.Tensor,
+ noise_l: torch.Tensor,
+ perturb: str,
+):
+ z_h, z_l = inner.z_H, inner.z_L
+ if perturb in ("h", "both"):
+ z_h = z_h + noise_h
+ if perturb in ("l", "both"):
+ z_l = z_l + noise_l
+ return replace(inner, z_H=z_h, z_L=z_l)
+
+
+def _separation(
+ main: TinyRecursiveReasoningModel_ACTV1InnerCarry,
+ shadow: TinyRecursiveReasoningModel_ACTV1InnerCarry,
+):
+ dh = (shadow.z_H.float() - main.z_H.float()).flatten(1)
+ dl = (shadow.z_L.float() - main.z_L.float()).flatten(1)
+ return torch.sqrt(dh.square().sum(-1) + dl.square().sum(-1)).clamp_min(1e-30)
+
+
+def _renormalize_shadow(
+ main: TinyRecursiveReasoningModel_ACTV1InnerCarry,
+ shadow: TinyRecursiveReasoningModel_ACTV1InnerCarry,
+ eps: float,
+):
+ sep = _separation(main, shadow)
+ view_h = (sep.shape[0],) + (1,) * (main.z_H.ndim - 1)
+ view_l = (sep.shape[0],) + (1,) * (main.z_L.ndim - 1)
+ scale_h = (eps / sep).view(view_h).to(main.z_H.dtype)
+ scale_l = (eps / sep).view(view_l).to(main.z_L.dtype)
+ return TinyRecursiveReasoningModel_ACTV1InnerCarry(
+ z_H=(main.z_H + (shadow.z_H - main.z_H) * scale_h).detach(),
+ z_L=(main.z_L + (shadow.z_L - main.z_L) * scale_l).detach(),
+ )
+
+
+@torch.inference_mode()
+def deterministic_eval(model, batch: dict[str, torch.Tensor]):
+ with torch.device(batch["inputs"].device):
+ carry = model.initial_carry(batch)
+ logits = None
+ q_halt = None
+ steps = 0
+ while True:
+ carry, outputs = model(carry=carry, batch=batch)
+ logits = outputs["logits"]
+ q_halt = outputs["q_halt_logits"]
+ steps += 1
+ if bool(carry.halted.all()):
+ break
+ exact, token_acc = correctness(logits, batch["labels"])
+ return exact, token_acc, q_halt, steps
+
+
+def correctness(logits: torch.Tensor, labels: torch.Tensor):
+ preds = logits.argmax(dim=-1)
+ mask = labels != IGNORE_LABEL_ID
+ exact = torch.where(mask, preds == labels, True).all(dim=-1)
+ denom = mask.sum(-1).clamp_min(1)
+ token_acc = ((preds == labels) & mask).sum(-1).float() / denom.float()
+ return exact, token_acc
+
+
+@torch.inference_mode()
+def ptrm_rollouts(
+ model,
+ batch: dict[str, torch.Tensor],
+ rollouts: int,
+ steps: int,
+ noise_std: float,
+ include_clean: bool,
+ perturb: str,
+ fd_lyap: bool,
+ fd_spectrum_k: int,
+ fd_eps: float,
+ generator: torch.Generator,
+):
+ device = batch["inputs"].device
+ base_batch_size = batch["inputs"].shape[0]
+ expanded = repeat_batch(batch, rollouts)
+ total = expanded["inputs"].shape[0]
+ rollout_id = torch.arange(total, device=device) % rollouts
+
+ with torch.device(device):
+ carry = model.initial_carry(expanded)
+ reset = torch.ones_like(carry.halted)
+ main = model.inner.reset_carry(reset, carry.inner_carry)
+
+ shadow = None
+ lyap_sum = None
+ spec_shadows = None
+ spec_h_dirs = None
+ spec_l_dirs = None
+ lyap_spec_sum = None
+ if fd_spectrum_k > 0:
+ spec_h_dirs, spec_l_dirs = _rand_orthonormal_dirs_like(main, fd_spectrum_k, generator)
+ spec_shadows = _make_spectrum_shadows(main, spec_h_dirs, spec_l_dirs, fd_eps)
+ lyap_spec_sum = torch.zeros(total, fd_spectrum_k, device=device, dtype=torch.float32)
+ elif fd_lyap:
+ dh, dl = _rand_unit_like(main, generator)
+ shadow = TinyRecursiveReasoningModel_ACTV1InnerCarry(
+ z_H=(main.z_H + fd_eps * dh).detach(),
+ z_L=(main.z_L + fd_eps * dl).detach(),
+ )
+ lyap_sum = torch.zeros(total, device=device, dtype=torch.float32)
+
+ logits = None
+ q_halt = None
+ q_continue = None
+ for _ in range(steps):
+ noise_h = _sample_noise(main.z_H.shape, noise_std, generator, main.z_H.dtype, device)
+ noise_l = _sample_noise(main.z_L.shape, noise_std, generator, main.z_L.dtype, device)
+ if include_clean and rollouts > 1:
+ clean_mask = (rollout_id == 0).view((-1,) + (1,) * (main.z_H.ndim - 1))
+ noise_h = torch.where(clean_mask, torch.zeros_like(noise_h), noise_h)
+ noise_l = torch.where(clean_mask, torch.zeros_like(noise_l), noise_l)
+
+ main = _apply_step_noise(main, noise_h, noise_l, perturb)
+ if fd_spectrum_k > 0:
+ assert spec_shadows is not None and lyap_spec_sum is not None
+ shadow_noise_h = noise_h.repeat_interleave(fd_spectrum_k, dim=0)
+ shadow_noise_l = noise_l.repeat_interleave(fd_spectrum_k, dim=0)
+ spec_shadows = _apply_step_noise(spec_shadows, shadow_noise_h, shadow_noise_l, perturb)
+ combined_inner = _cat_inner(main, spec_shadows)
+ combined_batch = _cat_many_batches([expanded, _repeat_inner_batch(expanded, fd_spectrum_k)])
+ combined_inner, combined_logits, (combined_q_halt, combined_q_continue) = model.inner(combined_inner, combined_batch)
+ main, spec_shadows = _split_spectrum_inner(combined_inner, total)
+ logits = combined_logits[:total]
+ q_halt = combined_q_halt[:total]
+ q_continue = combined_q_continue[:total]
+
+ delta_h = (
+ spec_shadows.z_H.reshape((total, fd_spectrum_k) + tuple(main.z_H.shape[1:])).float()
+ - main.z_H[:, None].float()
+ ) / fd_eps
+ delta_l = (
+ spec_shadows.z_L.reshape((total, fd_spectrum_k) + tuple(main.z_L.shape[1:])).float()
+ - main.z_L[:, None].float()
+ ) / fd_eps
+ q, r = torch.linalg.qr(_dirs_to_q(delta_h, delta_l), mode="reduced")
+ diag = torch.diagonal(r, dim1=-2, dim2=-1).abs().clamp_min(1e-30)
+ lyap_spec_sum = lyap_spec_sum + torch.log(diag).float()
+ spec_h_dirs, spec_l_dirs = _q_to_dirs(
+ q, main.z_H.shape[1:], main.z_L.shape[1:], main.z_H.dtype, main.z_L.dtype
+ )
+ spec_shadows = _make_spectrum_shadows(main, spec_h_dirs, spec_l_dirs, fd_eps)
+ elif fd_lyap:
+ assert shadow is not None
+ shadow = _apply_step_noise(shadow, noise_h, noise_l, perturb)
+ combined_inner = _cat_inner(main, shadow)
+ combined_batch = cat_batches(expanded, expanded)
+ combined_inner, combined_logits, (combined_q_halt, combined_q_continue) = model.inner(combined_inner, combined_batch)
+ main, shadow = _split_inner(combined_inner, total)
+ logits = combined_logits[:total]
+ q_halt = combined_q_halt[:total]
+ q_continue = combined_q_continue[:total]
+ sep = _separation(main, shadow)
+ lyap_sum = lyap_sum + torch.log(sep / fd_eps).float() # type: ignore[operator]
+ shadow = _renormalize_shadow(main, shadow, fd_eps)
+ else:
+ main, logits, (q_halt, q_continue) = model.inner(main, expanded)
+
+ assert logits is not None and q_halt is not None
+ exact, token_acc = correctness(logits, expanded["labels"])
+ exact = exact.view(base_batch_size, rollouts)
+ token_acc = token_acc.view(base_batch_size, rollouts)
+ q_halt = q_halt.float().view(base_batch_size, rollouts)
+ q_continue = q_continue.float().view(base_batch_size, rollouts) if q_continue is not None else torch.zeros_like(q_halt)
+ lyap = None
+ lyap_spec = None
+ if fd_spectrum_k > 0:
+ assert lyap_spec_sum is not None
+ lyap_spec = (lyap_spec_sum / max(steps, 1)).view(base_batch_size, rollouts, fd_spectrum_k)
+ lyap_spec = torch.sort(lyap_spec, dim=-1, descending=True).values
+ lyap = lyap_spec[..., 0]
+ elif fd_lyap:
+ assert lyap_sum is not None
+ lyap = (lyap_sum / max(steps, 1)).view(base_batch_size, rollouts)
+ return exact, token_acc, q_halt, q_continue, lyap, lyap_spec
+
+
+def _take_by_idx(values: torch.Tensor, idx: torch.Tensor):
+ return values.gather(1, idx[:, None]).squeeze(1)
+
+
+def _zscore_per_row(values: torch.Tensor):
+ return (values - values.mean(dim=1, keepdim=True)) / values.std(dim=1, keepdim=True).clamp_min(1e-6)
+
+
+def summarize_selectors(exact, token_acc, q_halt, lyap, lyap_spec=None):
+ out: dict[str, float] = {}
+ bsz, rollouts = exact.shape
+ arange = torch.arange(bsz, device=exact.device)
+ correct_counts = exact.float().sum(dim=1)
+
+ selectors = {
+ "rollout0": torch.zeros(bsz, device=exact.device, dtype=torch.long),
+ "q_max": q_halt.argmax(dim=1),
+ "oracle_pass": None,
+ }
+ if lyap is not None:
+ selectors["lyap_min"] = lyap.argmin(dim=1)
+ qz = _zscore_per_row(q_halt)
+ lz = _zscore_per_row(lyap)
+ for alpha in (0.25, 0.5, 1.0, 2.0):
+ selectors[f"q_minus_{alpha:g}lambda"] = (qz - alpha * lz).argmax(dim=1)
+ if lyap_spec is not None:
+ spec_pos_mass = lyap_spec.clamp_min(0).sum(dim=-1)
+ spec_pos_l2 = lyap_spec.clamp_min(0).square().mean(dim=-1).sqrt()
+ spec_mean = lyap_spec.mean(dim=-1)
+ spec_count_pos = (lyap_spec > 0).float().sum(dim=-1)
+ spec_spread = lyap_spec[..., 0] - lyap_spec[..., -1]
+
+ selectors["spec_pos_mass_min"] = spec_pos_mass.argmin(dim=1)
+ selectors["spec_pos_l2_min"] = spec_pos_l2.argmin(dim=1)
+ selectors["spec_mean_min"] = spec_mean.argmin(dim=1)
+ selectors["spec_count_pos_min"] = spec_count_pos.argmin(dim=1)
+ selectors["spec_spread_min"] = spec_spread.argmin(dim=1)
+
+ for name, idx in selectors.items():
+ if idx is None:
+ out[f"{name}/exact"] = exact.any(dim=1).float().mean().item()
+ out[f"{name}/token_acc"] = token_acc.max(dim=1).values.mean().item()
+ else:
+ out[f"{name}/exact"] = exact[arange, idx].float().mean().item()
+ out[f"{name}/token_acc"] = token_acc[arange, idx].mean().item()
+
+ out["mean_rollout/exact"] = exact.float().mean().item()
+ out["mean_rollout/token_acc"] = token_acc.mean().item()
+ out["correct_count/mean"] = correct_counts.mean().item()
+ out["correct_count/std"] = correct_counts.std(unbiased=False).item()
+ out["correct_count/median"] = correct_counts.median().item()
+ out["correct_count/q10"] = torch.quantile(correct_counts, 0.10).item()
+ out["correct_count/q25"] = torch.quantile(correct_counts, 0.25).item()
+ out["correct_count/q75"] = torch.quantile(correct_counts, 0.75).item()
+ out["correct_count/q90"] = torch.quantile(correct_counts, 0.90).item()
+ out["correct_count/zero_frac"] = (correct_counts == 0).float().mean().item()
+ out["correct_count/full_frac"] = (correct_counts == rollouts).float().mean().item()
+ for threshold in (1, 5, 10, 25, 50, 75, 90):
+ if threshold <= rollouts:
+ out[f"correct_count/ge_{threshold}_frac"] = (correct_counts >= threshold).float().mean().item()
+ out["q_mean"] = q_halt.mean().item()
+ if lyap is not None:
+ out["lambda_mean"] = lyap.mean().item()
+ if exact.any().item() and (~exact).any().item():
+ out["lambda_success_mean"] = lyap[exact].mean().item()
+ out["lambda_fail_mean"] = lyap[~exact].mean().item()
+ out["q_success_mean"] = q_halt[exact].mean().item()
+ out["q_fail_mean"] = q_halt[~exact].mean().item()
+ if lyap_spec is not None:
+ spec_pos_mass = lyap_spec.clamp_min(0).sum(dim=-1)
+ spec_pos_l2 = lyap_spec.clamp_min(0).square().mean(dim=-1).sqrt()
+ spec_mean = lyap_spec.mean(dim=-1)
+ spec_count_pos = (lyap_spec > 0).float().sum(dim=-1)
+ spec_spread = lyap_spec[..., 0] - lyap_spec[..., -1]
+ out["spec_k"] = float(lyap_spec.shape[-1])
+ out["spec_pos_mass_mean"] = spec_pos_mass.mean().item()
+ out["spec_pos_l2_mean"] = spec_pos_l2.mean().item()
+ out["spec_mean_mean"] = spec_mean.mean().item()
+ out["spec_count_pos_mean"] = spec_count_pos.mean().item()
+ out["spec_spread_mean"] = spec_spread.mean().item()
+ if exact.any().item() and (~exact).any().item():
+ out["spec_pos_mass_success_mean"] = spec_pos_mass[exact].mean().item()
+ out["spec_pos_mass_fail_mean"] = spec_pos_mass[~exact].mean().item()
+ out["spec_mean_success_mean"] = spec_mean[exact].mean().item()
+ out["spec_mean_fail_mean"] = spec_mean[~exact].mean().item()
+ out["spec_count_pos_success_mean"] = spec_count_pos[exact].mean().item()
+ out["spec_count_pos_fail_mean"] = spec_count_pos[~exact].mean().item()
+ return out
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--ckpt-root", required=True)
+ parser.add_argument("--ckpt-name", default="step_260410")
+ parser.add_argument("--n-samples", type=int, default=512)
+ parser.add_argument("--batch-size", type=int, default=32)
+ parser.add_argument("--rollouts", type=int, default=8)
+ parser.add_argument("--steps", type=int, default=16)
+ parser.add_argument("--noise-std", type=float, default=1e-3)
+ parser.add_argument("--include-clean", action="store_true")
+ parser.add_argument("--perturb", choices=["h", "l", "both"], default="both")
+ parser.add_argument("--fd-lyap", action="store_true")
+ parser.add_argument("--fd-spectrum-k", type=int, default=0)
+ parser.add_argument("--fd-eps", type=float, default=1e-2)
+ parser.add_argument("--seed", type=int, default=0)
+ parser.add_argument("--out-prefix", default="research/flossing/ptrm_selection")
+ args = parser.parse_args()
+
+ device = "cuda"
+ torch.manual_seed(args.seed)
+ generator = torch.Generator(device=device).manual_seed(args.seed + 12345)
+
+ ckpt_root = Path(args.ckpt_root)
+ model, cfg = load_model(ckpt_root, args.ckpt_name, device)
+ samples = load_test_samples(Path(cfg["data_paths"][0]), args.n_samples, args.seed)
+ n = len(samples["inputs"])
+
+ all_det_exact, all_det_token = [], []
+ all_exact, all_token, all_q, all_q_continue, all_lam, all_spec = [], [], [], [], [], []
+
+ for start in range(0, n, args.batch_size):
+ end = min(start + args.batch_size, n)
+ batch = batch_slice(samples, start, end, device)
+ det_exact, det_token, _det_q, det_steps = deterministic_eval(model, batch)
+ exact, token_acc, q_halt, q_continue, lyap, lyap_spec = ptrm_rollouts(
+ model=model,
+ batch=batch,
+ rollouts=args.rollouts,
+ steps=args.steps,
+ noise_std=args.noise_std,
+ include_clean=args.include_clean,
+ perturb=args.perturb,
+ fd_lyap=args.fd_lyap,
+ fd_spectrum_k=args.fd_spectrum_k,
+ fd_eps=args.fd_eps,
+ generator=generator,
+ )
+ all_det_exact.append(det_exact.cpu())
+ all_det_token.append(det_token.cpu())
+ all_exact.append(exact.cpu())
+ all_token.append(token_acc.cpu())
+ all_q.append(q_halt.cpu())
+ all_q_continue.append(q_continue.cpu())
+ if lyap is not None:
+ all_lam.append(lyap.cpu())
+ if lyap_spec is not None:
+ all_spec.append(lyap_spec.cpu())
+ print(
+ f"[{end}/{n}] det={det_exact.float().mean().item():.4f} "
+ f"q_sel={_take_by_idx(exact, q_halt.argmax(1)).float().mean().item():.4f} "
+ f"pass@K={exact.any(1).float().mean().item():.4f} steps={det_steps}",
+ flush=True,
+ )
+
+ det_exact = torch.cat(all_det_exact)
+ det_token = torch.cat(all_det_token)
+ exact = torch.cat(all_exact)
+ token_acc = torch.cat(all_token)
+ q_halt = torch.cat(all_q)
+ q_continue = torch.cat(all_q_continue)
+ lyap = torch.cat(all_lam) if all_lam else None
+ lyap_spec = torch.cat(all_spec) if all_spec else None
+ summary = summarize_selectors(exact, token_acc, q_halt, lyap, lyap_spec)
+ summary["deterministic/exact"] = det_exact.float().mean().item()
+ summary["deterministic/token_acc"] = det_token.mean().item()
+ correct_counts = exact.float().sum(dim=1)
+ oracle_success = exact.any(dim=1)
+ q_selected = exact[torch.arange(exact.shape[0]), q_halt.argmax(dim=1)]
+ det_success = det_exact.bool()
+ det_fail = ~det_success
+ if det_success.any().item():
+ summary["correct_count/det_success_mean"] = correct_counts[det_success].mean().item()
+ summary["oracle_pass/det_success_frac"] = oracle_success[det_success].float().mean().item()
+ summary["q_max/det_success_frac"] = q_selected[det_success].float().mean().item()
+ if det_fail.any().item():
+ summary["correct_count/det_fail_mean"] = correct_counts[det_fail].mean().item()
+ summary["oracle_pass/det_fail_frac"] = oracle_success[det_fail].float().mean().item()
+ summary["q_max/det_fail_frac"] = q_selected[det_fail].float().mean().item()
+ summary["n_samples"] = float(n)
+ summary["rollouts"] = float(args.rollouts)
+ summary["noise_std"] = float(args.noise_std)
+ summary["include_clean"] = float(args.include_clean)
+ summary["fd_lyap"] = float(args.fd_lyap)
+ summary["fd_spectrum_k"] = float(args.fd_spectrum_k)
+ summary["steps"] = float(args.steps)
+ summary["perturb_l"] = float(args.perturb == "l")
+ summary["perturb_h"] = float(args.perturb == "h")
+ summary["perturb_both"] = float(args.perturb == "both")
+
+ out_prefix = Path(args.out_prefix)
+ out_prefix.parent.mkdir(parents=True, exist_ok=True)
+ meta = {
+ "ckpt_root": str(ckpt_root),
+ "ckpt_name": args.ckpt_name,
+ "n_samples": n,
+ "batch_size": args.batch_size,
+ "rollouts": args.rollouts,
+ "steps": args.steps,
+ "noise_std": args.noise_std,
+ "include_clean": args.include_clean,
+ "perturb": args.perturb,
+ "fd_lyap": args.fd_lyap,
+ "fd_spectrum_k": args.fd_spectrum_k,
+ "fd_eps": args.fd_eps,
+ "seed": args.seed,
+ }
+ np.savez_compressed(
+ f"{out_prefix}.npz",
+ idx=samples["idx"],
+ det_exact=det_exact.numpy(),
+ det_token_acc=det_token.numpy(),
+ exact=exact.numpy(),
+ token_acc=token_acc.numpy(),
+ q_halt=q_halt.numpy(),
+ q_continue=q_continue.numpy(),
+ lyap=np.asarray([]) if lyap is None else lyap.numpy(),
+ lyap_spec=np.asarray([]) if lyap_spec is None else lyap_spec.numpy(),
+ meta_json=np.asarray(json.dumps(meta, sort_keys=True)),
+ )
+ with open(f"{out_prefix}.meta.json", "w") as f:
+ json.dump(meta, f, indent=2, sort_keys=True)
+ with open(f"{out_prefix}.summary.csv", "w", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=sorted(summary))
+ writer.writeheader()
+ writer.writerow(summary)
+
+ print("\nsummary")
+ for key in sorted(summary):
+ print(f"{key}: {summary[key]}")
+ print(f"\nsaved {out_prefix}.npz and {out_prefix}.summary.csv")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/ptrm_same_subset/base58590_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv b/ptrm_same_subset/base58590_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv
new file mode 100644
index 0000000..16c5a1b
--- /dev/null
+++ b/ptrm_same_subset/base58590_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv
@@ -0,0 +1,2 @@
+correct_count/det_fail_mean,correct_count/det_success_mean,correct_count/full_frac,correct_count/ge_10_frac,correct_count/ge_1_frac,correct_count/ge_25_frac,correct_count/ge_5_frac,correct_count/mean,correct_count/median,correct_count/q10,correct_count/q25,correct_count/q75,correct_count/q90,correct_count/std,correct_count/zero_frac,deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/det_fail_frac,oracle_pass/det_success_frac,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/det_fail_frac,q_max/det_success_frac,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+13.544709205627441,24.74855613708496,0.847599983215332,0.944599986076355,0.9776999950408936,0.847599983215332,0.9599999785423279,23.2450008392334,25.0,20.0,25.0,25.0,25.0,5.4259724617004395,0.022299999371170998,0.8658000230789185,0.9498122930526733,0.0,0.0,0.0,0.9297999739646912,0.9728542566299438,10000.0,0.3,0.8338301181793213,1.0,0.9776999950408936,0.9922346472740173,0.0,0.0,1.0,0.8338301181793213,0.9998844861984253,0.9775999784469604,0.9908208847045898,6.504925727844238,0.9284999966621399,0.9722283482551575,25.0,64.0
diff --git a/ptrm_same_subset/multi4_35805_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv b/ptrm_same_subset/multi4_35805_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv
new file mode 100644
index 0000000..f991531
--- /dev/null
+++ b/ptrm_same_subset/multi4_35805_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv
@@ -0,0 +1,2 @@
+correct_count/det_fail_mean,correct_count/det_success_mean,correct_count/full_frac,correct_count/ge_10_frac,correct_count/ge_1_frac,correct_count/ge_25_frac,correct_count/ge_5_frac,correct_count/mean,correct_count/median,correct_count/q10,correct_count/q25,correct_count/q75,correct_count/q90,correct_count/std,correct_count/zero_frac,deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/det_fail_frac,oracle_pass/det_success_frac,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/det_fail_frac,q_max/det_success_frac,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+13.846529960632324,24.8140811920166,0.8842999935150146,0.9599000215530396,0.9828000068664551,0.8842999935150146,0.9696999788284302,23.692100524902344,25.0,24.0,25.0,25.0,25.0,4.753871917724609,0.01720000058412552,0.8977000117301941,0.9607678651809692,0.0,0.0,0.0,0.9476839900016785,0.9795129895210266,10000.0,0.3,0.8328445553779602,0.9998885989189148,0.9828000068664551,0.9940740466117859,0.0,0.0,1.0,0.829912006855011,0.9995543956756592,0.982200026512146,0.9926013946533203,6.862946510314941,0.9480999708175659,0.979781448841095,25.0,64.0
diff --git a/ptrm_same_subset/paired_ptrm_k100_n1000_seed0_summary.csv b/ptrm_same_subset/paired_ptrm_k100_n1000_seed0_summary.csv
new file mode 100644
index 0000000..8122604
--- /dev/null
+++ b/ptrm_same_subset/paired_ptrm_k100_n1000_seed0_summary.csv
@@ -0,0 +1,2 @@
+n,rollouts,base_det,multi4_det,delta_det,base_mean_rollout,multi4_mean_rollout,delta_mean_rollout,base_qmax,multi4_qmax,delta_qmax,base_oracle,multi4_oracle,delta_oracle,base_correct_count_mean,multi4_correct_count_mean,delta_correct_count_mean,det_base_only_frac,det_multi4_only_frac,oracle_base_only_frac,oracle_multi4_only_frac
+1000,100,0.887,0.911,0.02400000000000002,0.94188,0.95417,0.012289999999999912,0.984,0.988,0.0040000000000000036,0.985,0.988,0.0030000000000000027,94.188,95.417,1.2289999999999992,0.034,0.058,0.001,0.004
diff --git a/ptrm_selection_smoke_trm_base260410_k4_n64.summary.csv b/ptrm_selection_smoke_trm_base260410_k4_n64.summary.csv
new file mode 100644
index 0000000..4ca4100
--- /dev/null
+++ b/ptrm_selection_smoke_trm_base260410_k4_n64.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,q_fail_mean,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts
+0.703125,0.8964120745658875,1.0,1.0,7.387371063232422,6.39274787902832,5.995985507965088,0.796875,0.9274691343307495,0.71484375,0.8985822200775146,64.0,0.001,0.796875,0.9330633282661438,-11.017979621887207,0.796875,0.9301697015762329,2.173583984375,0.796875,0.9301697015762329,0.796875,0.930748462677002,0.796875,0.9295910596847534,0.796875,0.9292052388191223,7.435792446136475,0.703125,0.8964120745658875,4.0
diff --git a/ptrm_selection_trm_base260410_k8_n512.summary.csv b/ptrm_selection_trm_base260410_k8_n512.summary.csv
new file mode 100644
index 0000000..12a5a4b
--- /dev/null
+++ b/ptrm_selection_trm_base260410_k8_n512.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,q_fail_mean,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts
+0.79296875,0.9237557649612427,1.0,1.0,7.364749908447266,6.310528755187988,6.000424385070801,0.8828125,0.955053985118866,0.772705078125,0.9167057871818542,512.0,0.001,0.888671875,0.9605998992919922,-10.958671569824219,0.888671875,0.9566453695297241,3.255891799926758,0.888671875,0.9565731287002563,0.888671875,0.9565731287002563,0.888671875,0.9566213488578796,0.884765625,0.9557292461395264,7.437174320220947,0.76953125,0.9146653413772583,8.0
diff --git a/ptrm_selection_trm_multi4_104164_k8_n512.summary.csv b/ptrm_selection_trm_multi4_104164_k8_n512.summary.csv
new file mode 100644
index 0000000..4be4cf7
--- /dev/null
+++ b/ptrm_selection_trm_multi4_104164_k8_n512.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,q_fail_mean,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts
+0.841796875,0.9395254254341125,1.0,1.0,7.1426215171813965,6.046740531921387,5.835097789764404,0.9140625,0.9662663340568542,0.838134765625,0.9396581649780273,512.0,0.001,0.91796875,0.9709925055503845,-10.85094928741455,0.91796875,0.9674961566925049,4.857690811157227,0.91796875,0.9673996567726135,0.91796875,0.9672791957855225,0.916015625,0.967013955116272,0.916015625,0.9671344757080078,7.891430377960205,0.826171875,0.9349681735038757,8.0
diff --git a/ptrm_smoke_official_gbs768_base58590.summary.csv b/ptrm_smoke_official_gbs768_base58590.summary.csv
new file mode 100644
index 0000000..a2db928
--- /dev/null
+++ b/ptrm_smoke_official_gbs768_base58590.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+0.75,0.8873456716537476,0.0,0.0,0.0,0.53125,0.8152006268501282,8.0,0.3,0.75,0.9058641791343689,0.0,0.0,1.0,0.75,0.895061731338501,-1.0615234375,0.625,0.8611111044883728,4.0,4.0
diff --git a/ptrm_spectrum_k4_cache_analysis.csv b/ptrm_spectrum_k4_cache_analysis.csv
new file mode 100644
index 0000000..12753c4
--- /dev/null
+++ b/ptrm_spectrum_k4_cache_analysis.csv
@@ -0,0 +1,3 @@
+corr_neg_lambda1_correct,corr_neg_spec_count_pos_correct,corr_neg_spec_lambda1_correct,corr_neg_spec_mean_correct,corr_neg_spec_pos_l2_correct,corr_neg_spec_pos_mass_correct,corr_neg_spec_spread_correct,corr_q_correct,corr_q_lambda1,corr_q_spec_count_pos,corr_q_spec_lambda1,corr_q_spec_mean,corr_q_spec_pos_l2,corr_q_spec_pos_mass,corr_q_spec_spread,deterministic/exact,file,lambda1_both_fail,lambda1_feature_wins,lambda1_min/exact,lambda1_min/token_acc,lambda1_q_wins,mean_rollout/exact,n_samples,oracle_pass/exact,q_max/exact,q_max/token_acc,rollouts,spec_count_pos_both_fail,spec_count_pos_feature_wins,spec_count_pos_min/exact,spec_count_pos_min/token_acc,spec_count_pos_q_wins,spec_lambda1_both_fail,spec_lambda1_feature_wins,spec_lambda1_min/exact,spec_lambda1_min/token_acc,spec_lambda1_q_wins,spec_mean_both_fail,spec_mean_feature_wins,spec_mean_min/exact,spec_mean_min/token_acc,spec_mean_q_wins,spec_pos_l2_both_fail,spec_pos_l2_feature_wins,spec_pos_l2_min/exact,spec_pos_l2_min/token_acc,spec_pos_l2_q_wins,spec_pos_mass_both_fail,spec_pos_mass_feature_wins,spec_pos_mass_min/exact,spec_pos_mass_min/token_acc,spec_pos_mass_q_wins,spec_spread_both_fail,spec_spread_feature_wins,spec_spread_min/exact,spec_spread_min/token_acc,spec_spread_q_wins
+0.8814938732175305,nan,0.8814938732175305,0.881548727791444,0.8816329740738039,0.881548727791444,0.7016889487515001,0.9656375405401326,-0.9166162782344888,nan,-0.9166162782344888,-0.916666132347328,-0.9167544021264978,-0.916666132347328,-0.7300307437150374,0.734375,research/flossing/ptrm_spectrum_k4_trm_base260410_k100_d64_sigma03_Lonly_n64.npz,3.0,2.0,0.890625,0.9523533582687378,4.0,0.78296875,64.0,0.953125,0.921875,0.9720293283462524,100.0,3.0,2.0,0.71875,0.899498462677002,15.0,3.0,2.0,0.890625,0.9523533582687378,4.0,3.0,2.0,0.890625,0.9511959552764893,4.0,3.0,2.0,0.890625,0.9511959552764893,4.0,3.0,2.0,0.890625,0.9511959552764893,4.0,3.0,2.0,0.890625,0.953125,4.0
+0.9158759787150939,nan,0.9158759787150939,0.9174927024166323,0.9174867637523325,0.9174927024166323,0.8654349435118633,0.9987222810222618,-0.9132736884879366,nan,-0.9132736884879366,-0.9147807220489171,-0.9147768468622256,-0.9147807220489171,-0.8639983425079254,0.828125,research/flossing/ptrm_spectrum_k4_trm_multi4_104164_k100_d64_sigma03_Lonly_n64.npz,3.0,0.0,0.953125,0.9826388955116272,0.0,0.87265625,64.0,0.953125,0.953125,0.9814814925193787,100.0,3.0,0.0,0.84375,0.9394290447235107,7.0,3.0,0.0,0.953125,0.9826388955116272,0.0,3.0,0.0,0.953125,0.9814814925193787,0.0,3.0,0.0,0.953125,0.9814814925193787,0.0,3.0,0.0,0.953125,0.9814814925193787,0.0,3.0,0.0,0.953125,0.9814814925193787,0.0
diff --git a/ptrm_spectrum_k4_trm_base260410_k100_d64_sigma03_Lonly_n64.summary.csv b/ptrm_spectrum_k4_trm_base260410_k100_d64_sigma03_Lonly_n64.summary.csv
new file mode 100644
index 0000000..2bf448a
--- /dev/null
+++ b/ptrm_spectrum_k4_trm_base260410_k100_d64_sigma03_Lonly_n64.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,q_fail_mean,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts,spec_count_pos_fail_mean,spec_count_pos_mean,spec_count_pos_min/exact,spec_count_pos_min/token_acc,spec_count_pos_success_mean,spec_k,spec_mean_fail_mean,spec_mean_mean,spec_mean_min/exact,spec_mean_min/token_acc,spec_mean_success_mean,spec_pos_l2_mean,spec_pos_l2_min/exact,spec_pos_l2_min/token_acc,spec_pos_mass_fail_mean,spec_pos_mass_mean,spec_pos_mass_min/exact,spec_pos_mass_min/token_acc,spec_pos_mass_success_mean,spec_spread_mean,spec_spread_min/exact,spec_spread_min/token_acc
+0.734375,0.9054783582687378,0.0,4.0,0.0,7.2596235275268555,6.1520586013793945,5.845053195953369,0.890625,0.9523533582687378,0.7829687595367432,0.921329140663147,64.0,0.3,0.953125,0.9855324029922485,-10.004590034484863,0.921875,0.9720293283462524,3.6044678688049316,0.875,0.944251537322998,0.875,0.944251537322998,0.890625,0.9513888955116272,0.890625,0.9511959552764893,7.37676477432251,0.71875,0.8994985222816467,100.0,4.0,4.0,0.71875,0.8994985222816467,4.0,4.0,6.985388278961182,5.968386173248291,0.890625,0.9511959552764893,5.6864824295043945,5.969665050506592,0.890625,0.9511959552764893,27.941553115844727,23.873544692993164,0.890625,0.9511959552764893,22.745929718017578,0.30672529339790344,0.890625,0.953125
diff --git a/ptrm_spectrum_k4_trm_multi4_104164_k100_d64_sigma03_Lonly_n64.summary.csv b/ptrm_spectrum_k4_trm_multi4_104164_k100_d64_sigma03_Lonly_n64.summary.csv
new file mode 100644
index 0000000..e421497
--- /dev/null
+++ b/ptrm_spectrum_k4_trm_multi4_104164_k100_d64_sigma03_Lonly_n64.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,q_fail_mean,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts,spec_count_pos_fail_mean,spec_count_pos_mean,spec_count_pos_min/exact,spec_count_pos_min/token_acc,spec_count_pos_success_mean,spec_k,spec_mean_fail_mean,spec_mean_mean,spec_mean_min/exact,spec_mean_min/token_acc,spec_mean_success_mean,spec_pos_l2_mean,spec_pos_l2_min/exact,spec_pos_l2_min/token_acc,spec_pos_mass_fail_mean,spec_pos_mass_mean,spec_pos_mass_min/exact,spec_pos_mass_min/token_acc,spec_pos_mass_success_mean,spec_spread_mean,spec_spread_min/exact,spec_spread_min/token_acc
+0.828125,0.9336420297622681,0.0,4.0,0.0,7.07891321182251,5.869143009185791,5.692605972290039,0.953125,0.9826388955116272,0.8726562261581421,0.9494907259941101,64.0,0.3,0.953125,0.9841821193695068,-10.772948265075684,0.953125,0.9814814925193787,5.447077751159668,0.953125,0.9814814925193787,0.953125,0.9814814925193787,0.953125,0.9814814925193787,0.953125,0.9814814925193787,7.8140106201171875,0.84375,0.9394290447235107,100.0,4.0,4.0,0.84375,0.9394290447235107,4.0,4.0,6.798189640045166,5.697390556335449,0.953125,0.9814814925193787,5.536754608154297,5.698462963104248,0.953125,0.9814814925193787,27.192758560180664,22.789562225341797,0.953125,0.9814814925193787,22.147018432617188,0.28217047452926636,0.953125,0.9814814925193787
diff --git a/ptrm_spectrum_smoke.summary.csv b/ptrm_spectrum_smoke.summary.csv
new file mode 100644
index 0000000..cd04f44
--- /dev/null
+++ b/ptrm_spectrum_smoke.summary.csv
@@ -0,0 +1,2 @@
+deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,q_fail_mean,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts,spec_count_pos_fail_mean,spec_count_pos_mean,spec_count_pos_min/exact,spec_count_pos_min/token_acc,spec_count_pos_success_mean,spec_k,spec_mean_fail_mean,spec_mean_mean,spec_mean_min/exact,spec_mean_min/token_acc,spec_mean_success_mean,spec_pos_l2_mean,spec_pos_l2_min/exact,spec_pos_l2_min/token_acc,spec_pos_mass_fail_mean,spec_pos_mass_mean,spec_pos_mass_min/exact,spec_pos_mass_min/token_acc,spec_pos_mass_success_mean,spec_spread_mean,spec_spread_min/exact,spec_spread_min/token_acc
+1.0,1.0,0.0,2.0,0.0,7.168448448181152,6.184332847595215,6.118724346160889,1.0,1.0,0.9375,0.9791666865348816,4.0,0.3,1.0,1.0,-11.25,1.0,1.0,6.9375,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8.149999618530273,1.0,1.0,4.0,2.0,2.0,1.0,1.0,2.0,2.0,7.030957221984863,6.077180862426758,1.0,1.0,6.013596057891846,6.078494548797607,1.0,1.0,14.061914443969727,12.154361724853516,1.0,1.0,12.027192115783691,0.21430405974388123,1.0,1.0
diff --git a/q_lambda_scatter/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv b/q_lambda_scatter/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv
new file mode 100644
index 0000000..1078ef9
--- /dev/null
+++ b/q_lambda_scatter/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv
@@ -0,0 +1,2 @@
+correct_count/det_fail_mean,correct_count/det_success_mean,correct_count/full_frac,correct_count/ge_10_frac,correct_count/ge_1_frac,correct_count/ge_25_frac,correct_count/ge_5_frac,correct_count/mean,correct_count/median,correct_count/q10,correct_count/q25,correct_count/q75,correct_count/q90,correct_count/std,correct_count/zero_frac,deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/det_fail_frac,oracle_pass/det_success_frac,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_fail_mean,q_max/det_fail_frac,q_max/det_success_frac,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+15.185714721679688,24.78506851196289,0.859375,0.94921875,0.97265625,0.859375,0.96875,23.47265625,25.0,22.10000228881836,25.0,25.0,25.0,5.083601474761963,0.02734375,0.86328125,0.9498456716537476,1.0,0.0,0.0,6.921443462371826,5.711092472076416,5.632336139678955,0.97265625,0.989703893661499,0.9389062523841858,0.9769077897071838,512.0,0.3,0.800000011920929,1.0,0.97265625,0.9911988973617554,0.0,0.0,1.0,-10.65998649597168,0.800000011920929,1.0,0.97265625,0.989366352558136,6.669083118438721,0.97265625,0.9893181324005127,0.97265625,0.9891493320465088,0.97265625,0.989149272441864,0.97265625,0.9892698526382446,7.7966694831848145,0.94140625,0.9774305820465088,25.0,64.0
diff --git a/q_lambda_scatter/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv b/q_lambda_scatter/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv
new file mode 100644
index 0000000..7d15a10
--- /dev/null
+++ b/q_lambda_scatter/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv
@@ -0,0 +1,2 @@
+correct_count/det_fail_mean,correct_count/det_success_mean,correct_count/full_frac,correct_count/ge_10_frac,correct_count/ge_1_frac,correct_count/ge_25_frac,correct_count/ge_5_frac,correct_count/mean,correct_count/median,correct_count/q10,correct_count/q25,correct_count/q75,correct_count/q90,correct_count/std,correct_count/zero_frac,deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/det_fail_frac,oracle_pass/det_success_frac,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_fail_mean,q_max/det_fail_frac,q_max/det_success_frac,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+13.054545402526855,24.886215209960938,0.890625,0.953125,0.974609375,0.890625,0.966796875,23.615234375,25.0,24.0,25.0,25.0,25.0,5.043018341064453,0.025390625,0.892578125,0.9596836566925049,1.0,0.0,0.0,7.271883964538574,5.830501556396484,5.7459797859191895,0.974609375,0.9897279739379883,0.9446094036102295,0.9784027934074402,512.0,0.3,0.7636363506317139,1.0,0.974609375,0.9913435578346252,0.0,0.0,1.0,-10.081937789916992,0.7636363506317139,1.0,0.974609375,0.9889563918113708,6.753265857696533,0.974609375,0.9889563918113708,0.974609375,0.9890046119689941,0.974609375,0.9889804720878601,0.974609375,0.9889563918113708,7.74045991897583,0.9375,0.9766348004341125,25.0,64.0
diff --git a/rainer_email_bundle_20260605/README.md b/rainer_email_bundle_20260605/README.md
new file mode 100644
index 0000000..c02097b
--- /dev/null
+++ b/rainer_email_bundle_20260605/README.md
@@ -0,0 +1,17 @@
+# Rainer Email Figure Bundle
+
+Prepared on 2026-06-05.
+
+Contents:
+
+- `email_draft.md`: short email draft with `(Fig. n)` references.
+- `figure_captions.md`: figure captions and caveats.
+- `figures/Fig1_lambda1_success_failure_HRM_TRM.png`
+- `figures/Fig2_full_spectrum_success_failure_shift.png`
+- `figures/Fig3_trajectory_perturbation_improves_peak_accuracy.png`
+- `figures/Fig4_optional_PTRM_Q_head_vs_stability.png`
+
+Suggested attachment strategy:
+
+- Attach Fig. 1, Fig. 2, and Fig. 3 by default.
+- Attach Fig. 4 only if you want to include the PTRM/Q-head side observation.
diff --git a/rainer_email_bundle_20260605/email_draft.md b/rainer_email_bundle_20260605/email_draft.md
new file mode 100644
index 0000000..cd92465
--- /dev/null
+++ b/rainer_email_bundle_20260605/email_draft.md
@@ -0,0 +1,18 @@
+Subject: Question on gradient flossing vs forward trajectory stability in recursive reasoning models
+
+Hi Rainer,
+
+I hope you are doing well. I have been studying recursive reasoning models: small recurrent/iterative models that solve a problem by repeatedly refining a latent reasoning state before emitting an answer, for example HRM/TRM-style models on Sudoku-like reasoning tasks.
+
+We found a strong dynamical signal during inference. If we measure finite-time Lyapunov exponents along the model's recurrent inference trajectory, failed examples are much more chaotic than successful examples. This is already visible in the first exponent (Fig. 1). After measuring more of the spectrum, the effect looks less like one isolated bad mode and more like a broad shift of the spectrum toward expansion on failing examples (Fig. 2).
+
+This made me revisit your gradient flossing work. My current interpretation is that gradient flossing is mainly about improving the stability/conditioning of Jacobian products for learning through recurrent dynamics, while our signal may be more about forward inference stability: whether the correct answer lies in a sufficiently stable attractor basin. We tried some Engelken-style pre/interflossing analogues; they reproduce the toy RNN effect, but in our RRM setting the effect is weak so far. In contrast, a simple forward perturbation training objective looks more promising: for the same supervised pair `(x, y)`, we run additional trajectories with small perturbations to the recurrent latent state and require them to reach the same target. This raises peak accuracy/ceiling in our current runs (Fig. 3).
+
+My question is whether this distinction between gradient-propagation stability and forward-inference attractor stability makes sense from your perspective. If so, would you expect spectrum flossing to need to be local, task-conditioned, or late-trajectory-only to help here? Or is there a more standard dynamical-systems/control framing, such as transverse stability, basin enlargement, shadowing, or stochastic stabilization, that you think is more appropriate?
+
+I attached a small figure pack. Fig. 4 is optional context: in a probabilistic TRM variant with multiple noisy rollouts and a learned Q-head selector, the learned Q score is correlated with a finite-difference stability proxy, suggesting that the selector may be learning a low-dimensional projection of trajectory stability.
+
+I would be grateful for any pointer or sanity check, especially if we are misusing the gradient flossing intuition.
+
+Best,
+Yuren
diff --git a/rainer_email_bundle_20260605/figure_captions.md b/rainer_email_bundle_20260605/figure_captions.md
new file mode 100644
index 0000000..04872f6
--- /dev/null
+++ b/rainer_email_bundle_20260605/figure_captions.md
@@ -0,0 +1,23 @@
+# Figure Captions
+
+## Fig. 1: `figures/Fig1_lambda1_success_failure_HRM_TRM.png`
+
+First finite-time Lyapunov exponent distribution for successful versus failed inference trajectories in HRM and TRM checkpoints. Failures are shifted toward larger/more positive exponents, motivating chaos as a failure detector.
+
+## Fig. 2: `figures/Fig2_full_spectrum_success_failure_shift.png`
+
+Top Lyapunov spectrum for successful versus failed examples. The separation is not only a top-exponent effect; many leading modes shift toward expansion on failed examples.
+
+## Fig. 3: `figures/Fig3_trajectory_perturbation_improves_peak_accuracy.png`
+
+Peak exact accuracy for baseline training versus trajectory perturbation training. The perturbation method keeps the same supervised input/target pair but trains additional recurrent rollouts with small latent-state perturbations to reach the same answer. This is a ceiling/peak result; HRM later shows final-checkpoint collapse.
+
+## Fig. 4: `figures/Fig4_optional_PTRM_Q_head_vs_stability.png`
+
+Optional context. In PTRM-style stochastic multi-rollout inference, the learned Q-head score is correlated with a finite-difference stability proxy on mixed-success problems, suggesting that learned rollout selection may partly approximate a low-dimensional stability score.
+
+# Caveats
+
+- Lyapunov measurements are finite-time diagnostic estimates on sampled subsets, not full asymptotic exponents.
+- Fig. 3 reports best-checkpoint/peak accuracy, not necessarily final-checkpoint accuracy.
+- The gradient flossing analogue is still preliminary; the main question for Rainer is conceptual, not a claim that flossing does or does not work in RRMs.
diff --git a/report_bundle_20260603/MANIFEST.txt b/report_bundle_20260603/MANIFEST.txt
new file mode 100644
index 0000000..8c34669
--- /dev/null
+++ b/report_bundle_20260603/MANIFEST.txt
@@ -0,0 +1,96 @@
+MANIFEST.txt 0 bytes
+README.md 1656 bytes
+figures/fig0_motivation_lambda1_success_failure_hrm_trm.png 118939 bytes
+figures/fig1_hrm_trm_training_curves.png 170270 bytes
+figures/fig2_accuracy_vs_chaotic_volume_phase.png 155777 bytes
+figures/fig3_hrm_trm_success_failure_spectra.png 288918 bytes
+figures/fig4_ptrm_same_subset_comparison.png 92922 bytes
+figures/fig5_qhead_vs_lambda1_ptrm.png 575919 bytes
+raw_npz/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.npz 52783 bytes
+raw_npz/base58590_k25_d64_sigma03_Lonly_n10000_seed20260602.npz 206666 bytes
+raw_npz/diag_hrm_multi4_step_23436_512.npz 95379 bytes
+raw_npz/diag_hrm_multi4_step_26040_512.npz 103275 bytes
+raw_npz/diag_hrm_step_26040_512.npz 98020 bytes
+raw_npz/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.npz 55553 bytes
+raw_npz/multi4_35805_k25_d64_sigma03_Lonly_n10000_seed20260602.npz 241334 bytes
+raw_npz/ptrm_official_gbs768_base58590_k100_d64_sigma03_Lonly_n1000_seed0.npz 64878 bytes
+raw_npz/ptrm_official_gbs768_multi4_35805_k100_d64_sigma03_Lonly_n1000_seed0.npz 75860 bytes
+raw_npz/trm_gbs768_base_step58590_n512_k8_seed20260602.npz 85005 bytes
+raw_npz/trm_gbs768_multi4_step35805_n512_k8_seed20260602.npz 86399 bytes
+raw_npz/trm_gbs768_multi4_step65100_n512_k8_seed20260602.npz 88137 bytes
+scripts/make_meeting_artifacts_v2.py 15829 bytes
+scripts/make_q_lambda_scatter.py 9177 bytes
+supplemental_figures/research/flossing/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/assets/logo.png 133246 bytes
+supplemental_figures/research/flossing/meeting_artifacts/ptrm_same_subset_k100_n1000_comparison.png 92130 bytes
+supplemental_figures/research/flossing/meeting_artifacts/trm_gbs768_headline_dynamics_bars.png 115479 bytes
+supplemental_figures/research/flossing/meeting_artifacts/trm_gbs768_spectrum_success_failure_n512.png 207582 bytes
+supplemental_figures/research/flossing/plots/drift_per_act.png 83159 bytes
+supplemental_figures/research/flossing/plots/lyap_hist.png 42655 bytes
+supplemental_figures/research/flossing/plots/lyap_vs_tokenacc.png 48427 bytes
+supplemental_figures/research/flossing/plots/q_gating.png 54640 bytes
+supplemental_figures/research/flossing/plots/trm_multi4_lambda1_success_failure.png 56142 bytes
+supplemental_figures/research/flossing/plots/trm_multi4_spectrum_pca_success_failure.png 144049 bytes
+supplemental_figures/research/flossing/plots/trm_multi4_spectrum_success_failure.png 70822 bytes
+supplemental_figures/research/flossing/plots_evolution/lyap_vs_training.png 149807 bytes
+supplemental_figures/research/flossing/plots_evolution/spectrum_per_ckpt.png 176309 bytes
+supplemental_figures/research/flossing/plots_halt_bucket.png 318150 bytes
+supplemental_figures/research/flossing/plots_joint/lyap1_hist_joint.png 27564 bytes
+supplemental_figures/research/flossing/plots_joint/lyap_spectrum_joint.png 84674 bytes
+supplemental_figures/research/flossing/plots_joint/n_positive_modes.png 42938 bytes
+supplemental_figures/research/flossing/plots_separate/lyap_H_hist.png 35507 bytes
+supplemental_figures/research/flossing/plots_separate/lyap_L_hist.png 32845 bytes
+supplemental_figures/research/flossing/plots_separate/lyap_L_vs_H_scatter.png 85845 bytes
+supplemental_figures/research/flossing/plots_separate/three_spectra.png 137772 bytes
+supplemental_figures/research/flossing/plots_step3/step3_trajectories.png 356753 bytes
+supplemental_figures/research/flossing/plots_step3_final/abcdef_deltas.png 31948 bytes
+supplemental_figures/research/flossing/plots_step3_final/abcdef_full.png 290935 bytes
+supplemental_figures/research/flossing/plots_tangent/hidden_activity.png 427468 bytes
+supplemental_figures/research/flossing/plots_tangent/position_activity.png 440610 bytes
+supplemental_figures/research/flossing/plots_tangent/sudoku_grid_activity.png 40018 bytes
+supplemental_figures/research/flossing/plots_topk/drift_per_act.png 82295 bytes
+supplemental_figures/research/flossing/plots_topk/lyap1_hist.png 33377 bytes
+supplemental_figures/research/flossing/plots_topk/lyap1_vs_lyapK.png 68811 bytes
+supplemental_figures/research/flossing/plots_topk/lyap_spectrum.png 64412 bytes
+supplemental_figures/research/flossing/plots_topk/lyap_spread.png 31964 bytes
+supplemental_figures/research/flossing/plots_trm_chaos_onset.png 156952 bytes
+supplemental_figures/research/flossing/plots_trm_lyap_hist.png 125598 bytes
+supplemental_figures/research/flossing/problem_tracks/hrm_learn_event_deltas.png 77097 bytes
+supplemental_figures/research/flossing/problem_tracks/hrm_learning_group_dynamics.png 286193 bytes
+supplemental_figures/research/flossing/problem_tracks/hrm_problem_track_heatmap.png 188824 bytes
+supplemental_figures/research/flossing/problem_tracks/trm_learn_event_deltas.png 68376 bytes
+supplemental_figures/research/flossing/problem_tracks/trm_learning_group_dynamics.png 282546 bytes
+supplemental_figures/research/flossing/problem_tracks/trm_problem_track_heatmap.png 182902 bytes
+supplemental_figures/research/flossing/spectrum_microscope/hrm_checkpoint_spectrum_features.png 207071 bytes
+supplemental_figures/research/flossing/spectrum_microscope/hrm_feature_auc.png 139047 bytes
+supplemental_figures/research/flossing/spectrum_microscope/hrm_mean_sorted_spectra_grid.png 198843 bytes
+supplemental_figures/research/flossing/spectrum_microscope/trm_checkpoint_spectrum_features.png 203400 bytes
+supplemental_figures/research/flossing/spectrum_microscope/trm_feature_auc.png 129305 bytes
+supplemental_figures/research/flossing/spectrum_microscope/trm_mean_sorted_spectra_grid.png 218715 bytes
+supplemental_figures/research/flossing/surrogate_flossing/figures/bf_fig02a.png 1884952 bytes
+supplemental_figures/research/flossing/surrogate_flossing/figures/bf_fig03a.png 1435145 bytes
+tables/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv 1713 bytes
+tables/base58590_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv 1260 bytes
+tables/fig5_qhead_vs_lambda1_ptrm_summary.csv 1267 bytes
+tables/headline_trm_multi4_dynamics_table.csv 598 bytes
+tables/hrm_baseline_eval.csv 1295 bytes
+tables/hrm_honly_step26040_eval.csv 138 bytes
+tables/hrm_matched_compare.csv 1119 bytes
+tables/hrm_multi4_complete_eval.csv 692 bytes
+tables/hrm_multi4_eval.csv 689 bytes
+tables/hrm_multi4_horizon_sweep_768.csv 4082 bytes
+tables/hrm_trm_redesigned_summary.csv 1422 bytes
+tables/meeting_figures_v2_report.md 2197 bytes
+tables/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv 1715 bytes
+tables/multi4_35805_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv 1274 bytes
+tables/paired_ptrm_k100_n1000_seed0_summary.csv 534 bytes
+tables/summary_n512_k8_seed20260602.csv 2994 bytes
+tables/summary_n64_k8_seed20260602.csv 716 bytes
+tables/trm_baseline_eval.csv 1315 bytes
+tables/trm_gbs768_multi4_step65100_det_n10000_seed20260602.summary.csv 1052 bytes
+tables/trm_matched_compare.csv 331 bytes
+tables/trm_multi4_eval.csv 220 bytes
+tables/trm_multi4_eval_full.csv 1468 bytes
+tables/trm_official_gbs768_eval.csv 1382 bytes
+tables/trm_official_gbs768_multi4_eval.csv 3457 bytes
+tables/trm_official_gbs768_multi4_step16275_eval.csv 135 bytes
+tables/wallclock_eval.csv 4435 bytes
diff --git a/report_bundle_20260603/README.md b/report_bundle_20260603/README.md
new file mode 100644
index 0000000..ea92ac5
--- /dev/null
+++ b/report_bundle_20260603/README.md
@@ -0,0 +1,34 @@
+# RRM Report Bundle 2026-06-03
+
+This bundle contains report/PPT-ready artifacts for the recursive reasoning dynamics work.
+
+## Recommended Figures
+
+- `figures/fig0_motivation_lambda1_success_failure_hrm_trm.png`: motivation, first Lyapunov exponent separates success and failure for HRM/TRM.
+- `figures/fig1_hrm_trm_training_curves.png`: HRM/TRM baseline versus multi4 training curves.
+- `figures/fig2_accuracy_vs_chaotic_volume_phase.png`: accuracy versus chaotic volume view.
+- `figures/fig3_hrm_trm_success_failure_spectra.png`: success/failure full-spectrum separation.
+- `figures/fig4_ptrm_same_subset_comparison.png`: PTRM same-subset comparison.
+- `figures/fig5_qhead_vs_lambda1_ptrm.png`: PTRM Q-head score versus finite-difference stability proxy.
+
+## Tables
+
+- `tables/meeting_figures_v2_report.md`: concise figure strategy and caveats.
+- `tables/hrm_trm_redesigned_summary.csv`: HRM/TRM headline accuracy and spectrum metrics.
+- `tables/fig5_qhead_vs_lambda1_ptrm_summary.csv`: Q-head versus stability statistics.
+- `tables/*eval*.csv`: eval curves and checkpoint comparisons used to generate the figures.
+- `tables/*summary*.csv`: PTRM selection and Lyapunov diagnostic summaries.
+
+## Raw Data
+
+- `raw_npz/`: compact diagnostic arrays backing the main figures.
+- Excludes model checkpoints, W&B full history dumps, and very large tangent-mode dumps.
+
+## Scripts
+
+- `scripts/make_meeting_artifacts_v2.py`: regenerates the main HRM/TRM/PTRM figure set.
+- `scripts/make_q_lambda_scatter.py`: regenerates the Q-head versus stability figure.
+
+## Supplemental
+
+- `supplemental_figures/`: earlier exploratory PNGs that may be useful as backup slides.
diff --git a/report_bundle_20260603/scripts/make_meeting_artifacts_v2.py b/report_bundle_20260603/scripts/make_meeting_artifacts_v2.py
new file mode 100644
index 0000000..4c88412
--- /dev/null
+++ b/report_bundle_20260603/scripts/make_meeting_artifacts_v2.py
@@ -0,0 +1,364 @@
+from __future__ import annotations
+
+import csv
+import re
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+ROOT = Path("research/flossing")
+OUT = ROOT / "meeting_artifacts_v2"
+
+
+def read_csv(path: Path) -> list[dict[str, str]]:
+ with path.open(newline="") as f:
+ return list(csv.DictReader(f))
+
+
+def f(row: dict[str, str], key: str, default: float = float("nan")) -> float:
+ try:
+ val = row.get(key, "")
+ return float(val) if val != "" else default
+ except Exception:
+ return default
+
+
+def row_for(rows: list[dict[str, str]], step: int) -> dict[str, str]:
+ for row in rows:
+ if int(float(row["step"])) == step:
+ return row
+ raise KeyError(step)
+
+
+def spectrum_metrics(npz_path: Path) -> dict[str, float]:
+ data = np.load(npz_path)
+ spec = data["lyap_spec"].astype(float)
+ exact = data["exact_correct"].astype(bool)
+ mean8 = spec.mean(axis=1)
+ pos_count = (spec > 0).sum(axis=1)
+ return {
+ "sample_exact": float(exact.mean()),
+ "lambda1_all": float(spec[:, 0].mean()),
+ "mean8_all": float(mean8.mean()),
+ "pos_count_all": float(pos_count.mean()),
+ "lambda1_success": float(spec[exact, 0].mean()),
+ "lambda1_fail": float(spec[~exact, 0].mean()),
+ "mean8_success": float(mean8[exact].mean()),
+ "mean8_fail": float(mean8[~exact].mean()),
+ "pos_count_success": float(pos_count[exact].mean()),
+ "pos_count_fail": float(pos_count[~exact].mean()),
+ }
+
+
+def spectrum_arrays(npz_path: Path) -> tuple[np.ndarray, np.ndarray]:
+ data = np.load(npz_path)
+ return data["lyap_spec"].astype(float), data["exact_correct"].astype(bool)
+
+
+def auc_failure_score(score: np.ndarray, exact: np.ndarray) -> float:
+ """AUROC where higher score predicts failure."""
+ fail = ~exact
+ n_fail = int(fail.sum())
+ n_succ = int(exact.sum())
+ if n_fail == 0 or n_succ == 0:
+ return float("nan")
+ order = np.argsort(score)
+ ranks = np.empty_like(order, dtype=float)
+ ranks[order] = np.arange(1, len(score) + 1)
+ return float((ranks[fail].sum() - n_fail * (n_fail + 1) / 2) / (n_fail * n_succ))
+
+
+def get_data() -> dict[str, object]:
+ eval_dir = ROOT / "multi4_eval_compare"
+ spec_dir = ROOT / "official_gbs768_spectrum"
+
+ hrm_base = read_csv(eval_dir / "hrm_baseline_eval.csv")
+ hrm_multi = read_csv(eval_dir / "hrm_multi4_complete_eval.csv")
+ trm_base = read_csv(eval_dir / "trm_official_gbs768_eval.csv")
+ trm_multi = read_csv(eval_dir / "trm_official_gbs768_multi4_eval.csv")
+
+ hrm_points = [
+ {
+ "label": "baseline best",
+ "family": "baseline",
+ "step": 26040,
+ "full_exact": f(row_for(hrm_base, 26040), "all/exact_accuracy"),
+ **spectrum_metrics(ROOT / "diag_hrm_step_26040_512.npz"),
+ },
+ {
+ "label": "multi4 best",
+ "family": "multi4",
+ "step": 23436,
+ "full_exact": f(row_for(hrm_multi, 23436), "exact"),
+ **spectrum_metrics(ROOT / "diag_hrm_multi4_step_23436_512.npz"),
+ },
+ {
+ "label": "multi4 final",
+ "family": "multi4-final",
+ "step": 26040,
+ "full_exact": f(row_for(hrm_multi, 26040), "exact"),
+ **spectrum_metrics(ROOT / "diag_hrm_multi4_step_26040_512.npz"),
+ },
+ ]
+
+ trm_points = [
+ {
+ "label": "baseline best",
+ "family": "baseline",
+ "step": 58590,
+ "full_exact": f(row_for(trm_base, 58590), "all/exact_accuracy"),
+ **spectrum_metrics(spec_dir / "trm_gbs768_base_step58590_n512_k8_seed20260602.npz"),
+ },
+ {
+ "label": "multi4 best",
+ "family": "multi4",
+ "step": 35805,
+ "full_exact": f(row_for(trm_multi, 35805), "all/exact_accuracy"),
+ **spectrum_metrics(spec_dir / "trm_gbs768_multi4_step35805_n512_k8_seed20260602.npz"),
+ },
+ {
+ "label": "multi4 final",
+ "family": "multi4-final",
+ "step": 65100,
+ "full_exact": f(row_for(trm_multi, 65100), "all/exact_accuracy"),
+ **spectrum_metrics(spec_dir / "trm_gbs768_multi4_step65100_n512_k8_seed20260602.npz"),
+ },
+ ]
+
+ return {
+ "hrm_base": hrm_base,
+ "hrm_multi": hrm_multi,
+ "trm_base": trm_base,
+ "trm_multi": trm_multi,
+ "hrm_points": hrm_points,
+ "trm_points": trm_points,
+ }
+
+
+def metric_series(rows: list[dict[str, str]], metric: str) -> tuple[np.ndarray, np.ndarray]:
+ xs, ys = [], []
+ for row in rows:
+ val = f(row, metric)
+ if np.isfinite(val):
+ xs.append(int(float(row["step"])))
+ ys.append(val)
+ order = np.argsort(xs)
+ return np.asarray(xs)[order], np.asarray(ys)[order]
+
+
+def plot_training_curves(data: dict[str, object]) -> None:
+ fig, axes = plt.subplots(1, 2, figsize=(11.8, 4.2))
+ colors = {"baseline": "#475569", "multi4": "#2563eb"}
+
+ panels = [
+ ("HRM", data["hrm_base"], data["hrm_multi"], "all/exact_accuracy", "exact", 0.70),
+ ("TRM official GBS768", data["trm_base"], data["trm_multi"], "all/exact_accuracy", "all/exact_accuracy", 0.94),
+ ]
+ for ax, (title, base_rows, multi_rows, base_metric, multi_metric, ymax) in zip(axes, panels):
+ bx, by = metric_series(base_rows, base_metric) # type: ignore[arg-type]
+ mx, my = metric_series(multi_rows, multi_metric) # type: ignore[arg-type]
+ ax.plot(bx / 1000, by, marker="o", color=colors["baseline"], label="baseline", linewidth=2)
+ ax.plot(mx / 1000, my, marker="o", color=colors["multi4"], label="multi4", linewidth=2)
+ bi, mi = int(np.argmax(by)), int(np.argmax(my))
+ ax.scatter([bx[bi] / 1000], [by[bi]], s=95, color=colors["baseline"], edgecolor="white", zorder=5)
+ ax.scatter([mx[mi] / 1000], [my[mi]], s=95, color=colors["multi4"], edgecolor="white", zorder=5)
+ ax.annotate(f"best {by[bi]:.3f}", (bx[bi] / 1000, by[bi]), xytext=(6, -18), textcoords="offset points", fontsize=8)
+ ax.annotate(f"best {my[mi]:.3f}", (mx[mi] / 1000, my[mi]), xytext=(6, 8), textcoords="offset points", fontsize=8)
+ ax.set_title(title)
+ ax.set_xlabel("optimizer step (k)")
+ ax.set_ylabel("full test exact accuracy")
+ ax.set_ylim(0, ymax)
+ ax.grid(alpha=0.22)
+ ax.legend(frameon=False, loc="lower right")
+
+ fig.suptitle("Trajectory perturbation helps both HRM and TRM, but late checkpoints can collapse")
+ fig.tight_layout()
+ fig.savefig(OUT / "fig1_hrm_trm_training_curves.png", dpi=220)
+ plt.close(fig)
+
+
+def plot_lambda1_motivation() -> None:
+ entries = [
+ ("HRM baseline best\nstep 26040", ROOT / "diag_hrm_step_26040_512.npz"),
+ (
+ "TRM baseline best\nstep 58590",
+ ROOT / "official_gbs768_spectrum/trm_gbs768_base_step58590_n512_k8_seed20260602.npz",
+ ),
+ ]
+ fig, axes = plt.subplots(1, 2, figsize=(11.6, 4.2), sharey=False)
+ for ax, (title, path) in zip(axes, entries):
+ spec, exact = spectrum_arrays(path)
+ lam1 = spec[:, 0]
+ succ = lam1[exact]
+ fail = lam1[~exact]
+ lo, hi = np.quantile(lam1, [0.01, 0.99])
+ pad = 0.08 * (hi - lo)
+ bins = np.linspace(lo - pad, hi + pad, 34)
+ ax.hist(succ, bins=bins, density=True, alpha=0.62, color="#2563eb", label=f"success (n={len(succ)})")
+ ax.hist(fail, bins=bins, density=True, alpha=0.58, color="#dc2626", label=f"failure (n={len(fail)})")
+ ax.axvline(succ.mean(), color="#1d4ed8", linewidth=2)
+ ax.axvline(fail.mean(), color="#b91c1c", linewidth=2)
+ auc = auc_failure_score(lam1, exact)
+ ax.set_title(f"{title}\nacc={exact.mean():.3f}, AUROC={auc:.3f}")
+ ax.set_xlabel("top Lyapunov exponent λ1")
+ ax.set_ylabel("density")
+ ax.grid(alpha=0.2)
+ ax.legend(frameon=False, fontsize=8)
+ fig.suptitle("Motivation: λ1 is a strong success/failure detector in both HRM and TRM")
+ fig.tight_layout()
+ fig.savefig(OUT / "fig0_motivation_lambda1_success_failure_hrm_trm.png", dpi=220)
+ plt.close(fig)
+
+
+def plot_phase(data: dict[str, object]) -> None:
+ fig, axes = plt.subplots(1, 2, figsize=(11.8, 4.4), sharex=False)
+ panels = [("HRM", data["hrm_points"]), ("TRM official GBS768", data["trm_points"])]
+ style = {
+ "baseline": ("#475569", "o"),
+ "multi4": ("#2563eb", "o"),
+ "multi4-final": ("#dc2626", "X"),
+ }
+ for ax, (title, points) in zip(axes, panels):
+ for point in points: # type: ignore[assignment]
+ color, marker = style[point["family"]]
+ ax.scatter(point["mean8_all"], point["full_exact"], s=150, marker=marker, color=color, edgecolor="white", zorder=4)
+ ax.annotate(
+ f"{point['label']}\nstep {point['step']}",
+ (point["mean8_all"], point["full_exact"]),
+ xytext=(8, 5),
+ textcoords="offset points",
+ fontsize=8,
+ )
+ ax.set_title(title)
+ ax.set_xlabel("mean top-8 Lyapunov exponent (lower = more stable)")
+ ax.set_ylabel("full test exact accuracy")
+ ax.grid(alpha=0.22)
+ fig.suptitle("Accuracy-vs-chaotic-volume phase view: best multi4 moves up-left; collapse moves down-right")
+ fig.tight_layout()
+ fig.savefig(OUT / "fig2_accuracy_vs_chaotic_volume_phase.png", dpi=220)
+ plt.close(fig)
+
+
+def plot_spectrum_grid() -> None:
+ entries = [
+ ("HRM baseline best", ROOT / "diag_hrm_step_26040_512.npz"),
+ ("HRM multi4 best", ROOT / "diag_hrm_multi4_step_23436_512.npz"),
+ ("TRM baseline best", ROOT / "official_gbs768_spectrum/trm_gbs768_base_step58590_n512_k8_seed20260602.npz"),
+ ("TRM multi4 best", ROOT / "official_gbs768_spectrum/trm_gbs768_multi4_step35805_n512_k8_seed20260602.npz"),
+ ]
+ fig, axes = plt.subplots(2, 2, figsize=(10.8, 7.2), sharey=False)
+ for ax, (title, path) in zip(axes.flat, entries):
+ spec, exact = spectrum_arrays(path)
+ x = np.arange(1, spec.shape[1] + 1)
+ for mask, color, label in [(exact, "#2563eb", "success"), (~exact, "#dc2626", "failure")]:
+ mean = spec[mask].mean(axis=0)
+ se = spec[mask].std(axis=0) / max(mask.sum(), 1) ** 0.5
+ ax.plot(x, mean, marker="o", color=color, label=label)
+ ax.fill_between(x, mean - se, mean + se, color=color, alpha=0.16, linewidth=0)
+ ax.axhline(0, color="black", linewidth=0.8, alpha=0.55)
+ ax.set_title(f"{title}\nN={len(exact)}, acc={exact.mean():.3f}")
+ ax.set_xlabel("spectrum rank")
+ ax.set_ylabel("finite-time exponent")
+ ax.grid(alpha=0.22)
+ axes.flat[0].legend(frameon=False)
+ fig.suptitle("Success/failure separation is present in both HRM and TRM; multi4 mainly stabilizes successful trajectories")
+ fig.tight_layout()
+ fig.savefig(OUT / "fig3_hrm_trm_success_failure_spectra.png", dpi=220)
+ plt.close(fig)
+
+
+def plot_ptrm() -> None:
+ summary = read_csv(ROOT / "ptrm_same_subset/paired_ptrm_k100_n1000_seed0_summary.csv")[0]
+ labels = ["deterministic", "mean rollout", "Q-selected", "oracle"]
+ base = [float(summary[k]) for k in ["base_det", "base_mean_rollout", "base_qmax", "base_oracle"]]
+ multi = [float(summary[k]) for k in ["multi4_det", "multi4_mean_rollout", "multi4_qmax", "multi4_oracle"]]
+ x = np.arange(len(labels))
+ width = 0.36
+ fig, ax = plt.subplots(figsize=(8.7, 4.3))
+ ax.bar(x - width / 2, base, width, color="#64748b", label="baseline best")
+ ax.bar(x + width / 2, multi, width, color="#2563eb", label="multi4 best")
+ ax.set_xticks(x, labels)
+ ax.set_ylim(0.86, 1.0)
+ ax.set_ylabel("exact accuracy")
+ ax.set_title("PTRM same-subset comparison (n=1000, K=100, D=64, sigma=0.3, L-only)")
+ ax.grid(axis="y", alpha=0.22)
+ ax.legend(frameon=False)
+ for xpos, vals in [(x - width / 2, base), (x + width / 2, multi)]:
+ for xi, val in zip(xpos, vals):
+ ax.text(xi, val + 0.002, f"{val:.3f}", ha="center", va="bottom", fontsize=8)
+ fig.tight_layout()
+ fig.savefig(OUT / "fig4_ptrm_same_subset_comparison.png", dpi=220)
+ plt.close(fig)
+
+
+def write_summary(data: dict[str, object]) -> None:
+ rows = []
+ for model, points in [("HRM", data["hrm_points"]), ("TRM", data["trm_points"])]:
+ for point in points: # type: ignore[assignment]
+ rows.append(
+ {
+ "model": model,
+ "label": point["label"],
+ "step": point["step"],
+ "full_exact": point["full_exact"],
+ "sample_exact": point["sample_exact"],
+ "lambda1_all": point["lambda1_all"],
+ "mean8_all": point["mean8_all"],
+ "pos_count_all": point["pos_count_all"],
+ "lambda1_success": point["lambda1_success"],
+ "lambda1_fail": point["lambda1_fail"],
+ "mean8_success": point["mean8_success"],
+ "mean8_fail": point["mean8_fail"],
+ "pos_count_success": point["pos_count_success"],
+ "pos_count_fail": point["pos_count_fail"],
+ }
+ )
+ with (OUT / "hrm_trm_redesigned_summary.csv").open("w", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=list(rows[0].keys()))
+ writer.writeheader()
+ writer.writerows(rows)
+
+ ptrm = read_csv(ROOT / "ptrm_same_subset/paired_ptrm_k100_n1000_seed0_summary.csv")[0]
+ report = f"""# Meeting Figures v2
+
+## Figure Strategy
+
+0. `fig0_motivation_lambda1_success_failure_hrm_trm.png`: first-exponent success/failure distribution in HRM and TRM. This motivates chaos as a detector before introducing the method.
+1. `fig1_hrm_trm_training_curves.png`: performance over training for HRM and TRM. This answers whether the method improves accuracy and where best/final are.
+2. `fig2_accuracy_vs_chaotic_volume_phase.png`: phase view, with accuracy versus mean top-8 Lyapunov exponent. This answers whether better checkpoints are dynamically more stable.
+3. `fig3_hrm_trm_success_failure_spectra.png`: full success/failure spectrum separation for HRM and TRM best checkpoints. This extends Fig0 beyond λ1.
+4. `fig4_ptrm_same_subset_comparison.png`: PTRM same-subset result. This is a secondary inference-time story.
+
+## Key Numbers
+
+- HRM baseline best: 0.5265 exact. HRM multi4 best: 0.6443 exact. HRM multi4 final: 0.4624 exact.
+- TRM baseline best: 0.8686 exact. TRM multi4 best: 0.8965 exact. TRM multi4 final: 0.8351 exact.
+- HRM multi4 best dynamics sample: mean top-8 exponent {rows[1]['mean8_all']:+.4f}; final {rows[2]['mean8_all']:+.4f}.
+- TRM multi4 best dynamics sample: mean top-8 exponent {rows[4]['mean8_all']:+.4f}; final {rows[5]['mean8_all']:+.4f}.
+- PTRM same subset, K=100: Q-selected {float(ptrm['base_qmax']):.3f} -> {float(ptrm['multi4_qmax']):.3f}; mean rollout {float(ptrm['base_mean_rollout']):.3f} -> {float(ptrm['multi4_mean_rollout']):.3f}.
+
+## Caveats
+
+- Dynamics spectra use N=512 diagnostic samples, not the full test set.
+- PTRM numbers use a fixed N=1000 subset; do not mix its deterministic subset accuracy with full-test W&B exact accuracy.
+- Final checkpoints are collapse diagnostics, not the method's reported performance.
+"""
+ (OUT / "meeting_figures_v2_report.md").write_text(report)
+
+
+def main() -> None:
+ OUT.mkdir(parents=True, exist_ok=True)
+ data = get_data()
+ plot_lambda1_motivation()
+ plot_training_curves(data)
+ plot_phase(data)
+ plot_spectrum_grid()
+ plot_ptrm()
+ write_summary(data)
+ print(f"wrote redesigned artifacts to {OUT}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/report_bundle_20260603/scripts/make_q_lambda_scatter.py b/report_bundle_20260603/scripts/make_q_lambda_scatter.py
new file mode 100644
index 0000000..54ff8d2
--- /dev/null
+++ b/report_bundle_20260603/scripts/make_q_lambda_scatter.py
@@ -0,0 +1,263 @@
+from __future__ import annotations
+
+import csv
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+ROOT = Path("research/flossing")
+IN_DIR = ROOT / "q_lambda_scatter"
+OUT = ROOT / "meeting_artifacts_v2"
+
+RUNS = [
+ (
+ "TRM baseline + PTRM rollouts",
+ IN_DIR / "base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.npz",
+ ),
+ (
+ "TRM multi4 + PTRM rollouts",
+ IN_DIR / "multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.npz",
+ ),
+]
+
+
+def rank_average(values: np.ndarray) -> np.ndarray:
+ order = np.argsort(values, kind="mergesort")
+ sorted_values = values[order]
+ ranks = np.empty(len(values), dtype=float)
+ i = 0
+ while i < len(values):
+ j = i + 1
+ while j < len(values) and sorted_values[j] == sorted_values[i]:
+ j += 1
+ ranks[order[i:j]] = 0.5 * (i + j - 1) + 1.0
+ i = j
+ return ranks
+
+
+def corr(x: np.ndarray, y: np.ndarray) -> float:
+ mask = np.isfinite(x) & np.isfinite(y)
+ x = x[mask]
+ y = y[mask]
+ if len(x) < 3:
+ return float("nan")
+ x = x - x.mean()
+ y = y - y.mean()
+ denom = np.sqrt(np.square(x).sum() * np.square(y).sum())
+ if denom <= 0:
+ return float("nan")
+ return float(np.dot(x, y) / denom)
+
+
+def spearman(x: np.ndarray, y: np.ndarray) -> float:
+ mask = np.isfinite(x) & np.isfinite(y)
+ x = x[mask]
+ y = y[mask]
+ if len(x) < 3:
+ return float("nan")
+ return corr(rank_average(x), rank_average(y))
+
+
+def mean_within_problem_corr(stability: np.ndarray, q_halt: np.ndarray, rank: bool) -> float:
+ vals: list[float] = []
+ for x, y in zip(stability, q_halt):
+ val = spearman(x, y) if rank else corr(x, y)
+ if np.isfinite(val):
+ vals.append(val)
+ return float(np.mean(vals)) if vals else float("nan")
+
+
+def summarize(name: str, path: Path) -> dict[str, float | str]:
+ data = np.load(path)
+ exact = data["exact"].astype(bool)
+ q_halt = data["q_halt"].astype(float)
+ lyap = data["lyap"].astype(float)
+ if lyap.size == 0:
+ raise ValueError(f"{path} does not contain lyap")
+ stability = -lyap
+
+ arange = np.arange(exact.shape[0])
+ q_idx = q_halt.argmax(axis=1)
+ lyap_idx = lyap.argmin(axis=1)
+ correct_count = exact.sum(axis=1)
+ mixed = (correct_count > 0) & (correct_count < exact.shape[1])
+
+ mixed_summary: dict[str, float] = {
+ "mixed_problem_count": float(mixed.sum()),
+ "zero_success_problem_count": float((correct_count == 0).sum()),
+ "full_success_problem_count": float((correct_count == exact.shape[1]).sum()),
+ "mixed_global_pearson_q_vs_stability": float("nan"),
+ "mixed_q_max_exact": float("nan"),
+ "mixed_lambda_min_exact": float("nan"),
+ "mixed_oracle_exact": float("nan"),
+ }
+ if mixed.any():
+ m_arange = np.arange(int(mixed.sum()))
+ m_exact = exact[mixed]
+ m_q = q_halt[mixed]
+ m_lyap = lyap[mixed]
+ mixed_summary.update(
+ {
+ "mixed_global_pearson_q_vs_stability": corr((-m_lyap).reshape(-1), m_q.reshape(-1)),
+ "mixed_q_max_exact": float(m_exact[m_arange, m_q.argmax(axis=1)].mean()),
+ "mixed_lambda_min_exact": float(m_exact[m_arange, m_lyap.argmin(axis=1)].mean()),
+ "mixed_oracle_exact": float(m_exact.any(axis=1).mean()),
+ }
+ )
+
+ out: dict[str, float | str] = {
+ "name": name,
+ "path": str(path),
+ "n_samples": float(exact.shape[0]),
+ "rollouts": float(exact.shape[1]),
+ "mean_rollout_exact": float(exact.mean()),
+ "q_max_exact": float(exact[arange, q_idx].mean()),
+ "lambda_min_exact": float(exact[arange, lyap_idx].mean()),
+ "oracle_pass_exact": float(exact.any(axis=1).mean()),
+ "q_lambda_same_argmax_frac": float((q_idx == lyap_idx).mean()),
+ "global_pearson_q_vs_stability": corr(stability.reshape(-1), q_halt.reshape(-1)),
+ "global_spearman_q_vs_stability": spearman(stability.reshape(-1), q_halt.reshape(-1)),
+ "within_problem_pearson_mean": mean_within_problem_corr(stability, q_halt, rank=False),
+ "within_problem_spearman_mean": mean_within_problem_corr(stability, q_halt, rank=True),
+ "q_success_mean": float(q_halt[exact].mean()) if exact.any() else float("nan"),
+ "q_fail_mean": float(q_halt[~exact].mean()) if (~exact).any() else float("nan"),
+ "lambda_success_mean": float(lyap[exact].mean()) if exact.any() else float("nan"),
+ "lambda_fail_mean": float(lyap[~exact].mean()) if (~exact).any() else float("nan"),
+ }
+ out.update(mixed_summary)
+ return out
+
+
+def scatter_panel(
+ ax: plt.Axes,
+ stability_2d: np.ndarray,
+ q_2d: np.ndarray,
+ exact_2d: np.ndarray,
+ title: str,
+) -> None:
+ stability = stability_2d.reshape(-1)
+ q_halt = q_2d.reshape(-1)
+ exact = exact_2d.reshape(-1)
+ finite = np.isfinite(stability) & np.isfinite(q_halt)
+ exact = exact[finite]
+ q_halt = q_halt[finite]
+ stability = stability[finite]
+ if len(stability) == 0:
+ ax.set_title(title + "\n(no points)")
+ return
+
+ xlo, xhi = np.quantile(stability, [0.005, 0.995])
+ ylo, yhi = np.quantile(q_halt, [0.005, 0.995])
+ visible = (stability >= xlo) & (stability <= xhi) & (q_halt >= ylo) & (q_halt <= yhi)
+
+ ax.scatter(
+ stability[visible & ~exact],
+ q_halt[visible & ~exact],
+ s=8,
+ alpha=0.22,
+ color="#dc2626",
+ linewidths=0,
+ label="incorrect rollout",
+ )
+ ax.scatter(
+ stability[visible & exact],
+ q_halt[visible & exact],
+ s=8,
+ alpha=0.17,
+ color="#2563eb",
+ linewidths=0,
+ label="correct rollout",
+ )
+
+ fit = visible
+ if int(fit.sum()) >= 3:
+ slope, intercept = np.polyfit(stability[fit], q_halt[fit], 1)
+ xs = np.linspace(xlo, xhi, 100)
+ ax.plot(xs, slope * xs + intercept, color="black", linewidth=1.8, alpha=0.75)
+
+ ax.set_title(title)
+ ax.set_xlim(xlo, xhi)
+ ax.set_ylim(ylo, yhi)
+ ax.grid(alpha=0.22)
+
+
+def plot() -> list[dict[str, float | str]]:
+ missing = [path for _name, path in RUNS if not path.exists()]
+ if missing:
+ raise SystemExit("missing input files:\n" + "\n".join(str(p) for p in missing))
+
+ OUT.mkdir(parents=True, exist_ok=True)
+ summaries = [summarize(name, path) for name, path in RUNS]
+
+ fig, axes = plt.subplots(2, len(RUNS), figsize=(12.0, 8.2), sharey="row")
+ if len(RUNS) == 1:
+ axes = np.asarray(axes).reshape(2, 1)
+
+ for col, ((name, path), summary) in enumerate(zip(RUNS, summaries)):
+ data = np.load(path)
+ exact = data["exact"].astype(bool)
+ q_halt = data["q_halt"].astype(float)
+ stability = -data["lyap"].astype(float)
+ correct_count = exact.sum(axis=1)
+ mixed = (correct_count > 0) & (correct_count < exact.shape[1])
+
+ scatter_panel(
+ axes[0, col],
+ stability,
+ q_halt,
+ exact,
+ f"{name}: all rollouts\n"
+ f"r={summary['global_pearson_q_vs_stability']:.2f}, "
+ f"rho={summary['global_spearman_q_vs_stability']:.2f}, "
+ f"Q exact={summary['q_max_exact']:.3f}",
+ )
+ scatter_panel(
+ axes[1, col],
+ stability[mixed],
+ q_halt[mixed],
+ exact[mixed],
+ f"mixed problems only (n={int(summary['mixed_problem_count'])})\n"
+ f"r={summary['mixed_global_pearson_q_vs_stability']:.2f}, "
+ f"Q={summary['mixed_q_max_exact']:.3f}, "
+ f"lambda-min={summary['mixed_lambda_min_exact']:.3f}",
+ )
+
+ for ax in axes[1, :]:
+ ax.set_xlabel("stability proxy = -lambda_1")
+ axes[0, 0].set_ylabel("Q-head halt logit")
+ axes[1, 0].set_ylabel("Q-head halt logit")
+ axes[0, 0].legend(frameon=False, loc="lower right", fontsize=8)
+ fig.suptitle("PTRM Q-head score contains a stability signal; mixed problems reveal selector behavior")
+ fig.tight_layout()
+ fig.savefig(OUT / "fig5_qhead_vs_lambda1_ptrm.png", dpi=240)
+ plt.close(fig)
+
+ out_csv = OUT / "fig5_qhead_vs_lambda1_ptrm_summary.csv"
+ with out_csv.open("w", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=list(summaries[0].keys()))
+ writer.writeheader()
+ writer.writerows(summaries)
+ return summaries
+
+
+def main() -> None:
+ summaries = plot()
+ for row in summaries:
+ print(
+ f"{row['name']}: "
+ f"q_exact={row['q_max_exact']:.4f} "
+ f"lambda_min_exact={row['lambda_min_exact']:.4f} "
+ f"oracle={row['oracle_pass_exact']:.4f} "
+ f"pearson={row['global_pearson_q_vs_stability']:.4f} "
+ f"spearman={row['global_spearman_q_vs_stability']:.4f} "
+ f"within_spearman={row['within_problem_spearman_mean']:.4f} "
+ f"mixed_pearson={row['mixed_global_pearson_q_vs_stability']:.4f}"
+ )
+ print(f"wrote {OUT / 'fig5_qhead_vs_lambda1_ptrm.png'}")
+ print(f"wrote {OUT / 'fig5_qhead_vs_lambda1_ptrm_summary.csv'}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/report_bundle_20260603/tables/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv b/report_bundle_20260603/tables/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv
new file mode 100644
index 0000000..1078ef9
--- /dev/null
+++ b/report_bundle_20260603/tables/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv
@@ -0,0 +1,2 @@
+correct_count/det_fail_mean,correct_count/det_success_mean,correct_count/full_frac,correct_count/ge_10_frac,correct_count/ge_1_frac,correct_count/ge_25_frac,correct_count/ge_5_frac,correct_count/mean,correct_count/median,correct_count/q10,correct_count/q25,correct_count/q75,correct_count/q90,correct_count/std,correct_count/zero_frac,deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/det_fail_frac,oracle_pass/det_success_frac,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_fail_mean,q_max/det_fail_frac,q_max/det_success_frac,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+15.185714721679688,24.78506851196289,0.859375,0.94921875,0.97265625,0.859375,0.96875,23.47265625,25.0,22.10000228881836,25.0,25.0,25.0,5.083601474761963,0.02734375,0.86328125,0.9498456716537476,1.0,0.0,0.0,6.921443462371826,5.711092472076416,5.632336139678955,0.97265625,0.989703893661499,0.9389062523841858,0.9769077897071838,512.0,0.3,0.800000011920929,1.0,0.97265625,0.9911988973617554,0.0,0.0,1.0,-10.65998649597168,0.800000011920929,1.0,0.97265625,0.989366352558136,6.669083118438721,0.97265625,0.9893181324005127,0.97265625,0.9891493320465088,0.97265625,0.989149272441864,0.97265625,0.9892698526382446,7.7966694831848145,0.94140625,0.9774305820465088,25.0,64.0
diff --git a/report_bundle_20260603/tables/base58590_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv b/report_bundle_20260603/tables/base58590_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv
new file mode 100644
index 0000000..16c5a1b
--- /dev/null
+++ b/report_bundle_20260603/tables/base58590_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv
@@ -0,0 +1,2 @@
+correct_count/det_fail_mean,correct_count/det_success_mean,correct_count/full_frac,correct_count/ge_10_frac,correct_count/ge_1_frac,correct_count/ge_25_frac,correct_count/ge_5_frac,correct_count/mean,correct_count/median,correct_count/q10,correct_count/q25,correct_count/q75,correct_count/q90,correct_count/std,correct_count/zero_frac,deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/det_fail_frac,oracle_pass/det_success_frac,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/det_fail_frac,q_max/det_success_frac,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+13.544709205627441,24.74855613708496,0.847599983215332,0.944599986076355,0.9776999950408936,0.847599983215332,0.9599999785423279,23.2450008392334,25.0,20.0,25.0,25.0,25.0,5.4259724617004395,0.022299999371170998,0.8658000230789185,0.9498122930526733,0.0,0.0,0.0,0.9297999739646912,0.9728542566299438,10000.0,0.3,0.8338301181793213,1.0,0.9776999950408936,0.9922346472740173,0.0,0.0,1.0,0.8338301181793213,0.9998844861984253,0.9775999784469604,0.9908208847045898,6.504925727844238,0.9284999966621399,0.9722283482551575,25.0,64.0
diff --git a/report_bundle_20260603/tables/fig5_qhead_vs_lambda1_ptrm_summary.csv b/report_bundle_20260603/tables/fig5_qhead_vs_lambda1_ptrm_summary.csv
new file mode 100644
index 0000000..bcbf401
--- /dev/null
+++ b/report_bundle_20260603/tables/fig5_qhead_vs_lambda1_ptrm_summary.csv
@@ -0,0 +1,3 @@
+name,path,n_samples,rollouts,mean_rollout_exact,q_max_exact,lambda_min_exact,oracle_pass_exact,q_lambda_same_argmax_frac,global_pearson_q_vs_stability,global_spearman_q_vs_stability,within_problem_pearson_mean,within_problem_spearman_mean,q_success_mean,q_fail_mean,lambda_success_mean,lambda_fail_mean,mixed_problem_count,zero_success_problem_count,full_success_problem_count,mixed_global_pearson_q_vs_stability,mixed_q_max_exact,mixed_lambda_min_exact,mixed_oracle_exact
+TRM baseline + PTRM rollouts,research/flossing/q_lambda_scatter/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.npz,512.0,25.0,0.93890625,0.97265625,0.97265625,0.97265625,0.04296875,0.7524171235289728,-0.025655130770112743,0.12497240516050691,0.09656841990607949,7.796669578964886,-10.659986413043478,5.632336168325687,6.921443493469901,58.0,14.0,440.0,0.7857503437605575,1.0,1.0,1.0
+TRM multi4 + PTRM rollouts,research/flossing/q_lambda_scatter/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.npz,512.0,25.0,0.944609375,0.974609375,0.974609375,0.974609375,0.0859375,0.7289674235313497,-0.13633997124282113,0.18866459504767938,0.11859038341758327,7.7404597169382185,-10.081937588152327,5.7459801223593345,7.271883726456269,43.0,13.0,456.0,0.7911647653323942,1.0,1.0,1.0
diff --git a/report_bundle_20260603/tables/headline_trm_multi4_dynamics_table.csv b/report_bundle_20260603/tables/headline_trm_multi4_dynamics_table.csv
new file mode 100644
index 0000000..742ac68
--- /dev/null
+++ b/report_bundle_20260603/tables/headline_trm_multi4_dynamics_table.csv
@@ -0,0 +1,4 @@
+model,step,full_exact,full_token_acc,lm_loss,dyn_sample_exact,lambda1_all,mean8_all,tail4_all,pos_count_all
+TRM baseline best,58590,0.8686309456825256,0.9508475661277772,0.1155559569597244,0.875,0.02823458132615997,0.013457294571722192,0.0075273313675370546,7.841796875
+TRM multi4 best,35805,0.8964653611183167,0.9604493975639344,0.0945562794804573,0.900390625,0.020381716455975862,0.0065844104191477015,0.001402141885882835,3.841796875
+TRM multi4 final,65100,0.8350536823272705,0.9350366592407228,0.1547555029392242,0.82421875,0.03232463403946895,0.018508151198432188,0.013372940185377047,8.0
diff --git a/report_bundle_20260603/tables/hrm_baseline_eval.csv b/report_bundle_20260603/tables/hrm_baseline_eval.csv
new file mode 100644
index 0000000..afc2b27
--- /dev/null
+++ b/report_bundle_20260603/tables/hrm_baseline_eval.csv
@@ -0,0 +1,11 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps
+hrm_baseline,2604,0.016369983553886414,0.6336106061935425,0.8522627949714661,0.9927079081535339,0.020887982100248337,16
+hrm_baseline,5208,0.06169315055012703,0.6744286417961121,0.7430469989776611,0.9772745370864868,0.07281211763620377,16
+hrm_baseline,7812,0.1358133852481842,0.6985693573951721,0.676047682762146,0.996868371963501,0.0211492907255888,16
+hrm_baseline,10416,0.20248068869113922,0.7259970307350159,0.6480565667152405,0.9983821511268616,0.017285015434026718,16
+hrm_baseline,13020,0.3024248778820038,0.7580602169036865,0.5614010691642761,0.9971805810928345,0.02416178770363331,16
+hrm_baseline,15624,0.36705803871154785,0.7842973470687866,0.49664124846458435,0.9964591860771179,0.02256467007100582,16
+hrm_baseline,18228,0.46287721395492554,0.8077820539474487,0.4641122817993164,0.9953309893608093,0.02509351819753647,16
+hrm_baseline,20832,0.4912721812725067,0.8198283314704895,0.4137057960033417,0.9955745935440063,0.02796635963022709,16
+hrm_baseline,23436,0.5193856954574585,0.8260135054588318,0.41173747181892395,0.9975590705871582,0.024772455915808678,16
+hrm_baseline,26040,0.5265287756919861,0.8270824551582336,0.4161679148674011,0.9960216283798218,0.03298119083046913,16
diff --git a/report_bundle_20260603/tables/hrm_honly_step26040_eval.csv b/report_bundle_20260603/tables/hrm_honly_step26040_eval.csv
new file mode 100644
index 0000000..d3e3ab9
--- /dev/null
+++ b/report_bundle_20260603/tables/hrm_honly_step26040_eval.csv
@@ -0,0 +1,2 @@
+step,accuracy,exact_accuracy,lm_loss,q_halt_accuracy,q_halt_loss,steps
+26040,0.85720146,0.62264127,0.35935268,0.99625105,0.024241636,16.0
diff --git a/report_bundle_20260603/tables/hrm_matched_compare.csv b/report_bundle_20260603/tables/hrm_matched_compare.csv
new file mode 100644
index 0000000..71b8e16
--- /dev/null
+++ b/report_bundle_20260603/tables/hrm_matched_compare.csv
@@ -0,0 +1,6 @@
+family,step,base_exact,multi4_exact,delta_exact,base_acc,multi4_acc,delta_acc,base_loss,multi4_loss,delta_loss,base_steps,multi4_steps
+hrm,2604,0.016369983553886414,0.0123774204403162,-0.003992563113570213,0.6336106061935425,0.6303551197052002,-0.003255486488342285,0.8522627949714661,0.8603715300559998,0.008108735084533691,16,16
+hrm,5208,0.06169315055012703,0.025012653321027756,-0.036680497229099274,0.6744286417961121,0.6685170531272888,-0.005911588668823242,0.7430469989776611,0.7412638068199158,-0.0017831921577453613,16,16
+hrm,7812,0.1358133852481842,0.04651052877306938,-0.08930285647511482,0.6985693573951721,0.687346339225769,-0.011223018169403076,0.676047682762146,0.7044885158538818,0.02844083309173584,16,16
+hrm,10416,0.20248068869113922,0.2006617933511734,-0.0018188953399658203,0.7259970307350159,0.7243355512619019,-0.0016614794731140137,0.6480565667152405,0.6340938210487366,-0.013962745666503906,16,16
+hrm,13020,0.3024248778820038,0.34697696566581726,0.04455208778381348,0.7580602169036865,0.7794705033302307,0.02141028642654419,0.5614010691642761,0.49915269017219543,-0.06224837899208069,16,16
diff --git a/report_bundle_20260603/tables/hrm_multi4_complete_eval.csv b/report_bundle_20260603/tables/hrm_multi4_complete_eval.csv
new file mode 100644
index 0000000..9aac158
--- /dev/null
+++ b/report_bundle_20260603/tables/hrm_multi4_complete_eval.csv
@@ -0,0 +1,11 @@
+run,step,exact,accuracy,loss
+hrm_multi4,2604,0.0123774204403162,0.6303551197052002,0.8603715300559998
+hrm_multi4,5208,0.025012653321027756,0.6685170531272888,0.7412638068199158
+hrm_multi4,7812,0.04651052877306938,0.687346339225769,0.7044885158538818
+hrm_multi4,10416,0.2006617933511734,0.7243355512619019,0.6340938210487366
+hrm_multi4,13020,0.34697696566581726,0.7794705033302307,0.49915269017219543
+hrm_multi4,15624,0.4653252363204956,0.8156101703643799,0.42743897438049316
+hrm_multi4,18228,0.5790873169898987,0.8494690656661987,0.3459467887878418
+hrm_multi4,20832,0.6393187,0.8660642,0.3186217
+hrm_multi4,23436,0.6443189,0.8684137,0.30427682
+hrm_multi4,26040,0.46235448,0.80298746,0.603943
diff --git a/report_bundle_20260603/tables/hrm_multi4_eval.csv b/report_bundle_20260603/tables/hrm_multi4_eval.csv
new file mode 100644
index 0000000..0f7dbc9
--- /dev/null
+++ b/report_bundle_20260603/tables/hrm_multi4_eval.csv
@@ -0,0 +1,6 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps
+hrm_multi4,2604,0.0123774204403162,0.6303551197052002,0.8603715300559998,0.9879773855209351,0.020903315395116806,16
+hrm_multi4,5208,0.025012653321027756,0.6685170531272888,0.7412638068199158,0.9968069195747375,0.019309692084789276,16
+hrm_multi4,7812,0.04651052877306938,0.687346339225769,0.7044885158538818,0.9902976751327515,0.03646523132920265,16
+hrm_multi4,10416,0.2006617933511734,0.7243355512619019,0.6340938210487366,0.9991532564163208,0.011545917019248009,16
+hrm_multi4,13020,0.34697696566581726,0.7794705033302307,0.49915269017219543,0.9987062215805054,0.01581510715186596,16
diff --git a/report_bundle_20260603/tables/hrm_multi4_horizon_sweep_768.csv b/report_bundle_20260603/tables/hrm_multi4_horizon_sweep_768.csv
new file mode 100644
index 0000000..3bdace3
--- /dev/null
+++ b/report_bundle_20260603/tables/hrm_multi4_horizon_sweep_768.csv
@@ -0,0 +1,41 @@
+step,split,horizon,count,exact_accuracy,accuracy,lm_loss,q_halt_loss,steps
+23436,train,2,768.0,0.2955729166666667,0.8364840348561605,0.39225157833573504,0.04459714392820994,2.0
+23436,train,3,768.0,0.5755208333333334,0.8917663097381592,0.2618181909650845,0.01976812755068143,3.0
+23436,train,4,768.0,0.6796875,0.9122781753540039,0.21020127075343642,0.006546831379334132,4.0
+23436,train,5,768.0,0.734375,0.9230967362721761,0.182652537414814,0.00717167928814888,5.0
+23436,train,6,768.0,0.7669270833333334,0.9298482735951742,0.16631383252369117,0.00374875341852506,6.0
+23436,train,8,768.0,0.8033854166666666,0.9389146169026693,0.14464881393209095,0.0037066793690125146,8.0
+23436,train,10,768.0,0.8268229166666666,0.9462127685546875,0.12909535948177878,0.00991838239133358,10.0
+23436,train,12,768.0,0.84375,0.9500546455383301,0.11931335874268405,0.004430865868926048,12.0
+23436,train,14,768.0,0.85546875,0.9517585436503092,0.11357831630187847,0.0033199011037747064,14.0
+23436,train,16,768.0,0.859375,0.9536232948303223,0.11014115689326769,0.003578242535392443,16.0
+23436,test,2,768.0,0.12760416666666666,0.7265946865081787,0.6354224683840087,0.025561923782030743,2.0
+23436,test,3,768.0,0.2526041666666667,0.7604809602101644,0.5506175992783068,0.012921225279569626,3.0
+23436,test,4,768.0,0.3138020833333333,0.7742091019948324,0.515490498860075,0.003827621228992939,4.0
+23436,test,5,768.0,0.3619791666666667,0.7867315610249838,0.48693293612013444,0.00506168728073438,5.0
+23436,test,6,768.0,0.3984375,0.7953317960103353,0.46390732880926117,0.0035611667359868684,6.0
+23436,test,8,768.0,0.4583333333333333,0.8105709552764893,0.4314775246277862,0.005083844686547915,8.0
+23436,test,10,768.0,0.48828125,0.8183674812316895,0.4161860321298086,0.003043775757153829,10.0
+23436,test,12,768.0,0.51171875,0.8260512351989746,0.4016971584101859,0.004879387095570564,12.0
+23436,test,14,768.0,0.5325520833333334,0.831050713857015,0.3878147863194716,0.00292336226751407,14.0
+23436,test,16,768.0,0.5559895833333334,0.8361946741739908,0.37473119346002354,0.0035912382105986276,16.0
+26040,train,2,768.0,0.7825520833333334,0.963814894358317,0.09390118849629236,0.04166571795940399,2.0
+26040,train,3,768.0,0.9127604166666666,0.9810153643290201,0.051538720202730794,0.008062846958637238,3.0
+26040,train,4,768.0,0.9309895833333334,0.9841659863789877,0.04344159450663771,0.012521501630544662,4.0
+26040,train,5,768.0,0.9401041666666666,0.9858539899190267,0.04013312268752094,0.015388640264670054,5.0
+26040,train,6,768.0,0.9401041666666666,0.9870595932006836,0.03853385294570503,0.023091336091359455,6.0
+26040,train,8,768.0,0.94921875,0.9884098370869955,0.03620980748266996,0.01947430024544398,8.0
+26040,train,10,768.0,0.9557291666666666,0.9893261591593424,0.03444665149376691,0.03555429975191752,10.0
+26040,train,12,768.0,0.953125,0.9895029067993164,0.034414704500878884,0.04184401035308838,12.0
+26040,train,14,768.0,0.9557291666666666,0.9893904527028402,0.03586123670240371,0.037144094705581665,14.0
+26040,train,16,768.0,0.9518229166666666,0.9894386132558187,0.03654265702438753,0.041033936043580375,16.0
+26040,test,2,768.0,0.11588541666666667,0.7054398854573568,0.8908620335632751,0.040651207168896995,2.0
+26040,test,3,768.0,0.1875,0.7230902512868246,0.7620792943957264,0.03934991856416067,3.0
+26040,test,4,768.0,0.23567708333333334,0.731706460316976,0.7335753038165317,0.11375004053115845,4.0
+26040,test,5,768.0,0.25,0.7362075646718343,0.7385935210540664,0.160938690106074,5.0
+26040,test,6,768.0,0.2669270833333333,0.7397923469543457,0.743103274276764,0.17507813374201456,6.0
+26040,test,8,768.0,0.296875,0.7452899614969889,0.7468322750276379,0.19044278065363565,8.0
+26040,test,10,768.0,0.3138020833333333,0.7472190856933594,0.7515058945457195,0.21485831340154013,10.0
+26040,test,12,768.0,0.3229166666666667,0.7502892812093099,0.7620030015396181,0.22753063837687174,12.0
+26040,test,14,768.0,0.3307291666666667,0.7518165111541748,0.7553974518406182,0.2158371408780416,14.0
+26040,test,16,768.0,0.3346354166666667,0.7514950434366862,0.7639393310889216,0.23628832896550497,16.0
diff --git a/report_bundle_20260603/tables/hrm_trm_redesigned_summary.csv b/report_bundle_20260603/tables/hrm_trm_redesigned_summary.csv
new file mode 100644
index 0000000..c5a4ad0
--- /dev/null
+++ b/report_bundle_20260603/tables/hrm_trm_redesigned_summary.csv
@@ -0,0 +1,7 @@
+model,label,step,full_exact,sample_exact,lambda1_all,mean8_all,pos_count_all,lambda1_success,lambda1_fail,mean8_success,mean8_fail,pos_count_success,pos_count_fail
+HRM,baseline best,26040,0.5265287756919861,0.5,-0.0569394779099639,-0.10732079181130239,0.98046875,-0.14642834789538028,0.03254939207545249,-0.18957092847483636,-0.025070655147768406,0.01953125,1.94140625
+HRM,multi4 best,23436,0.6443189,0.654296875,-0.04733450568929953,-0.112159715874796,1.166015625,-0.10103540500250659,0.054302789621007604,-0.16998952501653278,-0.0027078172167066595,0.05970149253731343,3.2598870056497176
+HRM,multi4 final,26040,0.46235448,0.4296875,0.02874533511322852,-0.040662085491063316,1.626953125,0.03573289099003887,0.02348073821974127,-0.053027883530268646,-0.031345388338237384,1.8181818181818181,1.4828767123287672
+TRM,baseline best,58590,0.8686309456825256,0.875,0.02823458132615997,0.013457294571722192,7.841796875,0.01761167685357837,0.10259491263423115,0.008003413600119422,0.05163446137294159,7.819196428571429,8.0
+TRM,multi4 best,35805,0.8964653611183167,0.900390625,0.020381716455975862,0.0065844104191477015,3.841796875,0.01118387160416574,0.10352301992037717,0.0019524994650864183,0.04845325257252518,3.3817787418655096,8.0
+TRM,multi4 final,65100,0.8350536823272705,0.82421875,0.03232463403946895,0.018508151198432188,8.0,0.01912124014472792,0.09423388096814354,0.012883653437293365,0.04488079625621645,8.0,8.0
diff --git a/report_bundle_20260603/tables/meeting_figures_v2_report.md b/report_bundle_20260603/tables/meeting_figures_v2_report.md
new file mode 100644
index 0000000..5158c99
--- /dev/null
+++ b/report_bundle_20260603/tables/meeting_figures_v2_report.md
@@ -0,0 +1,26 @@
+# Meeting Figures v2
+
+## Figure Strategy
+
+0. `fig0_motivation_lambda1_success_failure_hrm_trm.png`: first-exponent success/failure distribution in HRM and TRM. This motivates chaos as a detector before introducing the method.
+1. `fig1_hrm_trm_training_curves.png`: performance over training for HRM and TRM. This answers whether the method improves accuracy and where best/final are.
+2. `fig2_accuracy_vs_chaotic_volume_phase.png`: phase view, with accuracy versus mean top-8 Lyapunov exponent. This answers whether better checkpoints are dynamically more stable.
+3. `fig3_hrm_trm_success_failure_spectra.png`: full success/failure spectrum separation for HRM and TRM best checkpoints. This extends Fig0 beyond λ1.
+4. `fig4_ptrm_same_subset_comparison.png`: PTRM same-subset result. This is a secondary inference-time story.
+5. `fig5_qhead_vs_lambda1_ptrm.png`: PTRM Q-head halt logit versus finite-difference stability proxy `-lambda_1`. The bottom row isolates mixed problems where trajectory selection actually matters.
+
+## Key Numbers
+
+- HRM baseline best: 0.5265 exact. HRM multi4 best: 0.6443 exact. HRM multi4 final: 0.4624 exact.
+- TRM baseline best: 0.8686 exact. TRM multi4 best: 0.8965 exact. TRM multi4 final: 0.8351 exact.
+- HRM multi4 best dynamics sample: mean top-8 exponent -0.1122; final -0.0407.
+- TRM multi4 best dynamics sample: mean top-8 exponent +0.0066; final +0.0185.
+- PTRM same subset, K=100: Q-selected 0.984 -> 0.988; mean rollout 0.942 -> 0.954.
+- PTRM Q-vs-stability, K=25/N=512: mixed-problem Pearson is 0.786 for baseline and 0.791 for multi4. In both runs, Q-max selection and lambda-min selection reach the same oracle exact accuracy on this subset.
+
+## Caveats
+
+- Dynamics spectra use N=512 diagnostic samples, not the full test set.
+- PTRM numbers use a fixed N=1000 subset; do not mix its deterministic subset accuracy with full-test W&B exact accuracy.
+- Final checkpoints are collapse diagnostics, not the method's reported performance.
+- Q-head is not a pure lambda ranker: global Spearman is weak because most problems are all-success/all-failure across K rollouts. The strongest evidence is the mixed-problem class separation and selector equivalence.
diff --git a/report_bundle_20260603/tables/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv b/report_bundle_20260603/tables/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv
new file mode 100644
index 0000000..7d15a10
--- /dev/null
+++ b/report_bundle_20260603/tables/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv
@@ -0,0 +1,2 @@
+correct_count/det_fail_mean,correct_count/det_success_mean,correct_count/full_frac,correct_count/ge_10_frac,correct_count/ge_1_frac,correct_count/ge_25_frac,correct_count/ge_5_frac,correct_count/mean,correct_count/median,correct_count/q10,correct_count/q25,correct_count/q75,correct_count/q90,correct_count/std,correct_count/zero_frac,deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/det_fail_frac,oracle_pass/det_success_frac,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_fail_mean,q_max/det_fail_frac,q_max/det_success_frac,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+13.054545402526855,24.886215209960938,0.890625,0.953125,0.974609375,0.890625,0.966796875,23.615234375,25.0,24.0,25.0,25.0,25.0,5.043018341064453,0.025390625,0.892578125,0.9596836566925049,1.0,0.0,0.0,7.271883964538574,5.830501556396484,5.7459797859191895,0.974609375,0.9897279739379883,0.9446094036102295,0.9784027934074402,512.0,0.3,0.7636363506317139,1.0,0.974609375,0.9913435578346252,0.0,0.0,1.0,-10.081937789916992,0.7636363506317139,1.0,0.974609375,0.9889563918113708,6.753265857696533,0.974609375,0.9889563918113708,0.974609375,0.9890046119689941,0.974609375,0.9889804720878601,0.974609375,0.9889563918113708,7.74045991897583,0.9375,0.9766348004341125,25.0,64.0
diff --git a/report_bundle_20260603/tables/multi4_35805_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv b/report_bundle_20260603/tables/multi4_35805_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv
new file mode 100644
index 0000000..f991531
--- /dev/null
+++ b/report_bundle_20260603/tables/multi4_35805_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv
@@ -0,0 +1,2 @@
+correct_count/det_fail_mean,correct_count/det_success_mean,correct_count/full_frac,correct_count/ge_10_frac,correct_count/ge_1_frac,correct_count/ge_25_frac,correct_count/ge_5_frac,correct_count/mean,correct_count/median,correct_count/q10,correct_count/q25,correct_count/q75,correct_count/q90,correct_count/std,correct_count/zero_frac,deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/det_fail_frac,oracle_pass/det_success_frac,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/det_fail_frac,q_max/det_success_frac,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+13.846529960632324,24.8140811920166,0.8842999935150146,0.9599000215530396,0.9828000068664551,0.8842999935150146,0.9696999788284302,23.692100524902344,25.0,24.0,25.0,25.0,25.0,4.753871917724609,0.01720000058412552,0.8977000117301941,0.9607678651809692,0.0,0.0,0.0,0.9476839900016785,0.9795129895210266,10000.0,0.3,0.8328445553779602,0.9998885989189148,0.9828000068664551,0.9940740466117859,0.0,0.0,1.0,0.829912006855011,0.9995543956756592,0.982200026512146,0.9926013946533203,6.862946510314941,0.9480999708175659,0.979781448841095,25.0,64.0
diff --git a/report_bundle_20260603/tables/paired_ptrm_k100_n1000_seed0_summary.csv b/report_bundle_20260603/tables/paired_ptrm_k100_n1000_seed0_summary.csv
new file mode 100644
index 0000000..8122604
--- /dev/null
+++ b/report_bundle_20260603/tables/paired_ptrm_k100_n1000_seed0_summary.csv
@@ -0,0 +1,2 @@
+n,rollouts,base_det,multi4_det,delta_det,base_mean_rollout,multi4_mean_rollout,delta_mean_rollout,base_qmax,multi4_qmax,delta_qmax,base_oracle,multi4_oracle,delta_oracle,base_correct_count_mean,multi4_correct_count_mean,delta_correct_count_mean,det_base_only_frac,det_multi4_only_frac,oracle_base_only_frac,oracle_multi4_only_frac
+1000,100,0.887,0.911,0.02400000000000002,0.94188,0.95417,0.012289999999999912,0.984,0.988,0.0040000000000000036,0.985,0.988,0.0030000000000000027,94.188,95.417,1.2289999999999992,0.034,0.058,0.001,0.004
diff --git a/report_bundle_20260603/tables/summary_n512_k8_seed20260602.csv b/report_bundle_20260603/tables/summary_n512_k8_seed20260602.csv
new file mode 100644
index 0000000..c255252
--- /dev/null
+++ b/report_bundle_20260603/tables/summary_n512_k8_seed20260602.csv
@@ -0,0 +1,4 @@
+run,step,n,sample_exact,sample_token_acc,lambda1_all,lambda1_success,lambda1_fail,lambda1_fail_minus_success,mean8_all,mean8_success,mean8_fail,mean8_fail_minus_success,tail4_all,tail4_success,tail4_fail,pos_count_all,pos_count_success,pos_count_fail,pos_mass_all,pos_mass_success,pos_mass_fail,lambda2_all,lambda2_success,lambda2_fail,lambda3_all,lambda3_success,lambda3_fail,lambda4_all,lambda4_success,lambda4_fail,lambda5_all,lambda5_success,lambda5_fail,lambda6_all,lambda6_success,lambda6_fail,lambda7_all,lambda7_success,lambda7_fail,lambda8_all,lambda8_success,lambda8_fail
+baseline_best,58590,512,0.875,0.9539689430384897,0.02823458132615997,0.01761167685357837,0.10259491263423115,0.08498323578065278,0.013457294571722192,0.008003413600119422,0.05163446137294159,0.04363104777282217,0.0075273313675370546,0.004122858266632009,0.03135864307387237,7.841796875,7.819196428571429,8.0,0.10779670139772268,0.0641854171711784,0.4130756909835327,0.0205107267238418,0.012619587999194794,0.07574869779637083,0.016147883449207256,0.0097831444752176,0.060701056267134845,0.012655839604420294,0.007521466406436568,0.04859645199030638,0.010066905798097991,0.00580994511748096,0.03986563056241721,0.00827578879948021,0.00458740731747704,0.0340944591735024,0.006620997387619565,0.003523945934349396,0.028300357560510747,0.0051456334849504515,0.00257013469722064,0.02317412499905913
+multi4_best,35805,512,0.900390625,0.9625048226444051,0.020381716455975862,0.01118387160416574,0.10352301992037717,0.09233914831621143,0.0065844104191477015,0.0019524994650864183,0.04845325257252518,0.046500753107438765,0.001402141885882835,-0.0014188478887233206,0.02690167690732279,3.841796875,3.3817787418655096,8.0,0.06377898653446445,0.027952091227886174,0.38762602058020146,0.012663036363773195,0.0057181008775463405,0.07543980615103946,0.008348809059507634,0.003078277385450445,0.055990281642652025,0.005673153930393582,0.0013151374084221035,0.04506620523684165,0.0034857525132654388,-9.921478389921372e-05,0.035891045140577296,0.002008319042374751,-0.001002505589948276,0.02922381228749074,0.0006402704918926361,-0.0019078789586364823,0.023673542976087213,-0.0005257745040014861,-0.0026657922224093103,0.018818307225135902
+multi4_final,65100,512,0.82421875,0.9289882326847874,0.03232463403946895,0.01912124014472792,0.09423388096814354,0.07511264082341562,0.018508151198432188,0.012883653437293365,0.04488079625621645,0.03199714281892309,0.013372940185377047,0.010672304166179654,0.026035922408724824,8.0,8.0,8.0,0.1480652095874575,0.10306922749834692,0.3590463700497316,0.02459628393262392,0.015458292881759563,0.0674433086377879,0.020185894951282535,0.013495850063401376,0.05155477209223641,0.017466635922573914,0.01230462774373944,0.041670718716664445,0.015463502108104876,0.011429727322452865,0.034377423880828754,0.013748909759215167,0.010808219788353272,0.027537478289256494,0.012621539073734311,0.010437874606770786,0.022860499129941068,0.011657809800453833,0.010013394947141692,0.019368288334872988
diff --git a/report_bundle_20260603/tables/summary_n64_k8_seed20260602.csv b/report_bundle_20260603/tables/summary_n64_k8_seed20260602.csv
new file mode 100644
index 0000000..5a12638
--- /dev/null
+++ b/report_bundle_20260603/tables/summary_n64_k8_seed20260602.csv
@@ -0,0 +1,5 @@
+name,n,acc,lambda1_all,lambda1_success,lambda1_fail,mean8_all,mean8_success,mean8_fail,tail4_all,pos_count_all,pos_count_success,pos_count_fail,pos_mass_all
+baseline_best,64,0.875,0.027470633,0.015822656,0.109006464,0.013350397,0.0077010575,0.052895777,0.0074741757,7.796875,7.767857142857143,8.0,0.10691354
+multi4_bestish,64,0.953125,0.016227467,0.012409109,0.09386742,0.004405942,0.002358008,0.046047267,-0.00011889404,3.375,3.1475409836065573,8.0,0.048455432
+multi4_late,64,0.84375,0.031355858,0.01815009,0.10266701,0.018412568,0.01310199,0.047089677,0.013622271,8.0,8.0,8.0,0.14730054
+multi4_final,64,0.859375,0.028622076,0.01753769,0.09636,0.01713102,0.01240586,0.046007,0.012961711,8.0,8.0,8.0,0.13704816
diff --git a/report_bundle_20260603/tables/trm_baseline_eval.csv b/report_bundle_20260603/tables/trm_baseline_eval.csv
new file mode 100644
index 0000000..0b6bc11
--- /dev/null
+++ b/report_bundle_20260603/tables/trm_baseline_eval.csv
@@ -0,0 +1,11 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps
+trm_baseline,26041,0.5575894117355347,0.8469749093055725,0.35285070538520813,0.9997445344924927,0.003797376062721014,16
+trm_baseline,52082,0.6295099854469299,0.8704391121864319,0.2973524034023285,0.9998486042022705,0.0011324305087327957,16
+trm_baseline,78123,0.6993892788887024,0.8920264840126038,0.2477506548166275,0.9998935461044312,0.0006260558147914708,16
+trm_baseline,104164,0.72928386926651,0.9020143151283264,0.22608688473701477,0.9998770356178284,0.001141014276072383,16
+trm_baseline,130205,0.7653990387916565,0.914251446723938,0.19878524541854858,0.999917209148407,0.0010435190051794052,16
+trm_baseline,156246,0.7596656680107117,0.9119072556495667,0.2041437327861786,0.999862790107727,0.0011596218682825565,16
+trm_baseline,182287,0.7541900873184204,0.9094774723052979,0.2100999504327774,0.9998249411582947,0.0013093978632241488,16
+trm_baseline,208328,0.7732800841331482,0.9166732430458069,0.19410833716392517,0.9998320937156677,0.0033004307188093662,16
+trm_baseline,234369,0.7750374674797058,0.9172152280807495,0.19279460608959198,0.9998533725738525,0.0032994903158396482,16
+trm_baseline,260410,0.7742025256156921,0.9169425964355469,0.19348150491714478,0.9998462796211243,0.002894919365644455,16
diff --git a/report_bundle_20260603/tables/trm_gbs768_multi4_step65100_det_n10000_seed20260602.summary.csv b/report_bundle_20260603/tables/trm_gbs768_multi4_step65100_det_n10000_seed20260602.summary.csv
new file mode 100644
index 0000000..38ca8f5
--- /dev/null
+++ b/report_bundle_20260603/tables/trm_gbs768_multi4_step65100_det_n10000_seed20260602.summary.csv
@@ -0,0 +1,2 @@
+correct_count/det_fail_mean,correct_count/det_success_mean,correct_count/full_frac,correct_count/ge_1_frac,correct_count/mean,correct_count/median,correct_count/q10,correct_count/q25,correct_count/q75,correct_count/q90,correct_count/std,correct_count/zero_frac,deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/det_fail_frac,oracle_pass/det_success_frac,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/det_fail_frac,q_max/det_success_frac,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps
+0.0,1.0,0.8312000036239624,0.8312000036239624,0.8312000036239624,1.0,0.0,1.0,1.0,1.0,0.3745751678943634,0.1687999963760376,0.8312000036239624,0.9333752989768982,0.0,0.0,0.0,0.8312000036239624,0.9333752989768982,10000.0,0.0,0.0,1.0,0.8312000036239624,0.9333752989768982,1.0,0.0,0.0,0.0,1.0,0.8312000036239624,0.9333752989768982,3.762489080429077,0.8312000036239624,0.9333752989768982,1.0,16.0
diff --git a/report_bundle_20260603/tables/trm_matched_compare.csv b/report_bundle_20260603/tables/trm_matched_compare.csv
new file mode 100644
index 0000000..32006a5
--- /dev/null
+++ b/report_bundle_20260603/tables/trm_matched_compare.csv
@@ -0,0 +1,2 @@
+family,step,base_exact,multi4_exact,delta_exact,base_acc,multi4_acc,delta_acc,base_loss,multi4_loss,delta_loss,base_steps,multi4_steps
+trm,26041,0.5575894117355347,0.7394047975540161,0.18181538581848145,0.8469749093055725,0.9067130088806152,0.059738099575042725,0.35285070538520813,0.21417337656021118,-0.13867732882499695,16,16
diff --git a/report_bundle_20260603/tables/trm_multi4_eval.csv b/report_bundle_20260603/tables/trm_multi4_eval.csv
new file mode 100644
index 0000000..f3d16b5
--- /dev/null
+++ b/report_bundle_20260603/tables/trm_multi4_eval.csv
@@ -0,0 +1,2 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps
+trm_multi4,26041,0.7394047975540161,0.9067130088806152,0.21417337656021118,0.9997232556343079,0.0024572687689214945,16
diff --git a/report_bundle_20260603/tables/trm_multi4_eval_full.csv b/report_bundle_20260603/tables/trm_multi4_eval_full.csv
new file mode 100644
index 0000000..3411ff5
--- /dev/null
+++ b/report_bundle_20260603/tables/trm_multi4_eval_full.csv
@@ -0,0 +1,11 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps
+trm_multi4_loguniform_repro,26041,0.7394047975540161,0.9067130088806152,0.21417337656021118,0.9997232556343079,0.0024572687689214945,16
+trm_multi4_loguniform_repro,52082,0.8449901342391968,0.9424432516098022,0.1342233270406723,0.9998746514320374,0.0010290677892044187,16
+trm_multi4_loguniform_repro,78123,0.8417639136314392,0.9411031007766724,0.13769488036632538,0.9997493028640747,0.0022251552436500788,16
+trm_multi4_loguniform_repro,104164,0.8547161221504211,0.9456510543823242,0.12779666483402252,0.999834418296814,0.0019055778393521905,16
+trm_multi4_loguniform_repro,130205,0.8536233305931091,0.9453508853912354,0.1282763034105301,0.9998888373374939,0.0018422487191855907,16
+trm_multi4_loguniform_repro,156246,0.8489472270011902,0.9433866739273071,0.13273237645626068,0.999782383441925,0.003051365725696087,16
+trm_multi4_loguniform_repro,182287,0.8558868765830994,0.9459810256958008,0.12728126347064972,0.9997469186782837,0.0024399051908403635,16
+trm_multi4_loguniform_repro,208328,0.8404204249382019,0.9403222799301147,0.13971956074237823,0.9996499419212341,0.0029723909683525562,16
+trm_multi4_loguniform_repro,234369,0.845432460308075,0.9419097304344177,0.13668429851531982,0.9997681975364685,0.0016449446557089686,16
+trm_multi4_loguniform_repro,260410,0.826143741607666,0.9344042539596558,0.15468436479568481,0.999701976776123,0.001810370129533112,16
diff --git a/report_bundle_20260603/tables/trm_official_gbs768_eval.csv b/report_bundle_20260603/tables/trm_official_gbs768_eval.csv
new file mode 100644
index 0000000..66dfe1b
--- /dev/null
+++ b/report_bundle_20260603/tables/trm_official_gbs768_eval.csv
@@ -0,0 +1,11 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps
+trm_official_gbs768,6510,0.20006339251995087,0.7261562347412109,0.6155200004577637,0.9993779063224792,0.0063577014952898026,16
+trm_official_gbs768,13020,0.6248409152030945,0.8679871559143066,0.30110087990760803,0.9997587203979492,0.0018232133006677032,16
+trm_official_gbs768,19530,0.7098508477210999,0.8961462378501892,0.23719573020935059,0.9997185468673706,0.002359110629186034,16
+trm_official_gbs768,26040,0.7558197379112244,0.9113300442695618,0.20295456051826477,0.9997066855430603,0.0026622479781508446,16
+trm_official_gbs768,32550,0.7946407794952393,0.9247151017189026,0.17315243184566498,0.9997208714485168,0.0024344150442630053,16
+trm_official_gbs768,39060,0.8247435092926025,0.9351920485496521,0.14995059370994568,0.9996594190597534,0.002962446305900812,16
+trm_official_gbs768,45570,0.8479396104812622,0.9433117508888245,0.1323014795780182,0.9996806979179382,0.0026382978539913893,16
+trm_official_gbs768,52080,0.8633161783218384,0.9488447904586792,0.12009253352880478,0.9997137784957886,0.002587266732007265,16
+trm_official_gbs768,58590,0.8686309456825256,0.9508475661277771,0.11555595695972443,0.9998438954353333,0.0014915302162989974,16
+trm_official_gbs768,65100,0.86624675989151,0.9500409364700317,0.11748332530260086,0.9996523261070251,0.002605273388326168,16
diff --git a/report_bundle_20260603/tables/trm_official_gbs768_multi4_eval.csv b/report_bundle_20260603/tables/trm_official_gbs768_multi4_eval.csv
new file mode 100644
index 0000000..ec353ce
--- /dev/null
+++ b/report_bundle_20260603/tables/trm_official_gbs768_multi4_eval.csv
@@ -0,0 +1,21 @@
+run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps,_runtime,_timestamp
+trm_official_gbs768_multi4,3255,0.0175999198108911,0.6504403352737427,0.7941210269927979,0.9824000597000122,0.108424387872219,16.0,12550.718766357,1780192940.452497
+trm_official_gbs768_multi4,6510,0.2277795374393463,0.7379271984100342,0.5934479236602783,0.999262034893036,0.0035974220372736,16.0,25064.702453699,1780205454.4563622
+trm_official_gbs768_multi4,9765,0.6357069611549377,0.8709613084793091,0.2931223213672638,0.9997137784957886,0.0014165197499096,16.0,37579.311725746,1780217969.132149
+trm_official_gbs768_multi4,13020,0.75853031873703,0.910834014415741,0.2033131867647171,0.9997611045837402,0.0021604059729725,16.0,50092.402719112,1780230482.2738986
+trm_official_gbs768_multi4,16275,0.8263778686523438,0.9346956610679626,0.1507271528244018,0.9997800588607788,0.0041020140051841,16.0,62595.579823935,1780242985.398887
+trm_official_gbs768_multi4,19530,0.8654520511627197,0.9488762617111206,0.1198361814022064,0.9997090697288512,0.0052023055031895,16.0,75096.299217356,1780255486.102359
+trm_official_gbs768_multi4,22785,0.8817486763000488,0.9549695253372192,0.1065462753176689,0.9997114539146424,0.0037784865126013,16.0,87607.26667599,1780267997.105077
+trm_official_gbs768_multi4,26040,0.8879267573356628,0.957356870174408,0.1014027148485183,0.999642848968506,0.0053329654037952,16.0,100118.838503384,1780280508.634669
+trm_official_gbs768_multi4,29295,0.8933455944061279,0.959309697151184,0.0971761047840118,0.9996144771575928,0.0041324645280838,16.0,112620.42551711,1780293010.168183
+trm_official_gbs768_multi4,32550,0.8962169885635376,0.960436463356018,0.0948082581162452,0.9996026158332824,0.0144010595977306,16.0,125134.755165262,1780305524.5418072
+trm_official_gbs768_multi4,35805,0.8964653611183167,0.9604493975639344,0.0945562794804573,0.9995790123939514,0.0068304436281323,16.0,137714.797781532,1780318104.6386163
+trm_official_gbs768_multi4,39060,0.8957250118255615,0.9601802825927734,0.0952448472380638,0.999541163444519,0.0185395441949367,16.0,150233.754675447,1780330623.5781178
+trm_official_gbs768_multi4,42315,0.888913094997406,0.9574248790740968,0.1008101180195808,0.9995600581169128,0.0061048995703458,16.0,162739.436977464,1780343129.2559526
+trm_official_gbs768_multi4,45570,0.8858169317245483,0.9561300873756408,0.1036654934287071,0.9994938373565674,0.0054858992807567,16.0,175251.082236918,1780355640.8929477
+trm_official_gbs768_multi4,48825,0.882732629776001,0.9547808170318604,0.1068678125739097,0.9994110465049744,0.0072551541961729,16.0,187778.40637965,1780368168.2221627
+trm_official_gbs768_multi4,52080,0.8782954216003418,0.9530280232429504,0.1104752272367477,0.99934720993042,0.0089566921815276,16.0,200285.373208387,1780380675.1846614
+trm_official_gbs768_multi4,55335,0.8694067597389221,0.94942307472229,0.1194151788949966,0.9987700581550598,0.0272370912134647,16.0,212785.271448664,1780393175.0770009
+trm_official_gbs768_multi4,58590,0.8620365858078003,0.9463443756103516,0.1268824934959411,0.9983466863632202,0.0163928251713514,16.0,225280.516080387,1780405670.3403552
+trm_official_gbs768_multi4,61845,0.850763738155365,0.941650390625,0.1378339380025863,0.9978050589561462,0.0215476993471384,16.0,237778.331426833,1780418168.1822684
+trm_official_gbs768_multi4,65100,0.8350536823272705,0.9350366592407228,0.1547555029392242,0.9962841868400574,0.0310162808746099,16.0,250280.395200839,1780430673.009492
diff --git a/report_bundle_20260603/tables/trm_official_gbs768_multi4_step16275_eval.csv b/report_bundle_20260603/tables/trm_official_gbs768_multi4_step16275_eval.csv
new file mode 100644
index 0000000..a34ae57
--- /dev/null
+++ b/report_bundle_20260603/tables/trm_official_gbs768_multi4_step16275_eval.csv
@@ -0,0 +1,2 @@
+step,accuracy,exact_accuracy,lm_loss,q_halt_accuracy,q_halt_loss,steps
+16275,0.934574,0.82621706,0.15090619,0.99974453,0.00405054,16.0
diff --git a/report_bundle_20260603/tables/wallclock_eval.csv b/report_bundle_20260603/tables/wallclock_eval.csv
new file mode 100644
index 0000000..268a924
--- /dev/null
+++ b/report_bundle_20260603/tables/wallclock_eval.csv
@@ -0,0 +1,30 @@
+run,step,runtime_sec,runtime_hms,timestamp_local,ckpt_exists,ckpt_mtime_local,exact,accuracy,loss
+hrm_baseline,2604,1440.547846523,00:24:00.55,2026-05-22 06:32:03,True,2026-05-22 06:32:04,0.016369983553886414,0.6336106061935425,0.8522627949714661
+hrm_baseline,5208,2915.282126881,00:48:35.28,2026-05-22 06:56:38,True,2026-05-22 06:56:39,0.06169315055012703,0.6744286417961121,0.7430469989776611
+hrm_baseline,7812,3726.920135005,01:02:06.92,2026-05-22 07:10:09,True,2026-05-22 07:10:10,0.1358133852481842,0.6985693573951721,0.676047682762146
+hrm_baseline,10416,4458.114451179,01:14:18.11,2026-05-22 07:22:21,True,2026-05-22 07:22:22,0.20248068869113922,0.7259970307350159,0.6480565667152405
+hrm_baseline,13020,5801.015399971,01:36:41.02,2026-05-22 07:44:43,True,2026-05-22 07:44:45,0.3024248778820038,0.7580602169036865,0.5614010691642761
+hrm_baseline,15624,7219.496414876,02:00:19.50,2026-05-22 08:08:22,True,2026-05-22 08:08:23,0.36705803871154785,0.7842973470687866,0.49664124846458435
+hrm_baseline,18228,8616.18880162,02:23:36.19,2026-05-22 08:31:39,True,2026-05-22 08:31:40,0.46287721395492554,0.8077820539474487,0.4641122817993164
+hrm_baseline,20832,9904.193793478,02:45:04.19,2026-05-22 08:53:07,True,2026-05-22 08:53:08,0.4912721812725067,0.8198283314704895,0.4137057960033417
+hrm_baseline,23436,10634.360398376,02:57:14.36,2026-05-22 09:05:17,True,2026-05-22 09:05:18,0.5193856954574585,0.8260135054588318,0.41173747181892395
+hrm_baseline,26040,11366.743085113,03:09:26.74,2026-05-22 09:17:29,True,2026-05-22 09:17:30,0.5265287756919861,0.8270824551582336,0.4161679148674011
+hrm_multi4,2604,5810.841404196,01:36:50.84,2026-05-27 22:44:06,True,2026-05-27 22:44:07,0.0123774204403162,0.6303551197052002,0.8603715300559998
+hrm_multi4,5208,11587.805059004,03:13:07.81,2026-05-28 00:20:23,True,2026-05-28 00:20:24,0.025012653321027756,0.6685170531272888,0.7412638068199158
+hrm_multi4,7812,17365.375767937,04:49:25.38,2026-05-28 01:56:41,True,2026-05-28 01:56:42,0.04651052877306938,0.687346339225769,0.7044885158538818
+hrm_multi4,10416,23142.813901422,06:25:42.81,2026-05-28 03:32:58,True,2026-05-28 03:32:59,0.2006617933511734,0.7243355512619019,0.6340938210487366
+hrm_multi4,13020,29347.517355686,08:09:07.52,2026-05-28 05:16:23,True,2026-05-28 05:16:24,0.34697696566581726,0.7794705033302307,0.49915269017219543
+hrm_multi4,15624,35123.513458465,09:45:23.51,2026-05-28 06:52:39,True,2026-05-28 06:52:40,0.4653252363204956,0.8156101703643799,0.42743897438049316
+hrm_multi4,18228,40898.005173388,11:21:38.01,2026-05-28 08:28:54,True,2026-05-28 08:28:55,0.5790873169898987,0.8494690656661987,0.3459467887878418
+trm_baseline,26041,8593.415472305,02:23:13.42,2026-05-23 01:55:09,True,2026-05-23 01:55:10,0.5575894117355347,0.8469749093055725,0.35285070538520813
+trm_baseline,52082,17155.876633182,04:45:55.88,2026-05-23 04:17:52,True,2026-05-23 04:17:52,0.6295099854469299,0.8704391121864319,0.2973524034023285
+trm_baseline,78123,25719.395089913,07:08:39.40,2026-05-23 06:40:35,True,2026-05-23 06:40:36,0.6993892788887024,0.8920264840126038,0.2477506548166275
+trm_baseline,104164,34282.065662564,09:31:22.07,2026-05-23 09:03:18,True,2026-05-23 09:03:18,0.72928386926651,0.9020143151283264,0.22608688473701477
+trm_baseline,130205,42852.348430037,11:54:12.35,2026-05-23 11:26:08,True,2026-05-23 11:26:09,0.7653990387916565,0.914251446723938,0.19878524541854858
+trm_baseline,156246,51422.119869545,14:17:02.12,2026-05-23 13:48:58,True,2026-05-23 13:48:58,0.7596656680107117,0.9119072556495667,0.2041437327861786
+trm_baseline,182287,59826.120123505,16:37:06.12,2026-05-23 16:09:02,True,2026-05-23 16:09:02,0.7541900873184204,0.9094774723052979,0.2100999504327774
+trm_baseline,208328,68138.229200214,18:55:38.23,2026-05-23 18:27:34,True,2026-05-23 18:27:34,0.7732800841331482,0.9166732430458069,0.19410833716392517
+trm_baseline,234369,76460.482892161,21:14:20.48,2026-05-23 20:46:17,True,2026-05-23 20:46:17,0.7750374674797058,0.9172152280807495,0.19279460608959198
+trm_baseline,260410,84783.527271934,23:33:03.53,2026-05-23 23:05:00,True,2026-05-23 23:05:00,0.7742025256156921,0.9169425964355469,0.19348150491714478
+trm_multi4,26041,22216.1844709,06:10:16.18,2026-05-28 03:18:26,True,2026-05-28 03:18:26,0.7394047975540161,0.9067130088806152,0.21417337656021118
+trm_multi4,52082,44853.735386924,12:27:33.74,2026-05-28 09:35:44,True,2026-05-28 09:35:44,0.8449901342391968,0.9424432516098022,0.1342233270406723
diff --git a/reverse_floss_finetune.py b/reverse_floss_finetune.py
new file mode 100644
index 0000000..0e767ea
--- /dev/null
+++ b/reverse_floss_finetune.py
@@ -0,0 +1,289 @@
+"""Reverse-flossing fine-tune of a trained HRM checkpoint.
+
+L_total = L_HRM_ACT (existing supervised loss) + alpha * L_RF
+L_RF = mean over batch of max(0, mean_lambda - lambda_star) ** 2
+
+Computes finite-time top-k Lyapunov spectrum during the forward (with grad enabled
+so that L_RF is differentiable wrt model params), then averages and applies a hinge.
+"""
+from __future__ import annotations
+import sys, os, yaml, json, math, time, argparse
+from pathlib import Path
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+HRM_DIR = Path("/home/yurenh2/rrm/hrm")
+sys.path.insert(0, str(HRM_DIR))
+
+from models.hrm.hrm_act_v1 import (
+ HierarchicalReasoningModel_ACTV1,
+ HierarchicalReasoningModel_ACTV1InnerCarry,
+)
+from models.losses import ACTLossHead
+from adam_atan2 import AdamATan2
+
+# ----------------- helpers -----------------
+
+def load_model_for_finetune(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text())
+ arch_cfg.update(
+ batch_size=cfg["global_batch_size"],
+ seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"],
+ causal=False,
+ )
+ base = HierarchicalReasoningModel_ACTV1(arch_cfg)
+ head = ACTLossHead(base, loss_type=arch_cfg["loss"]["loss_type"])
+
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {}
+ for k, v in sd.items():
+ nk = k
+ for prefix in ("_orig_mod.",):
+ if nk.startswith(prefix): nk = nk[len(prefix):]
+ stripped[nk] = v
+ missing, unexpected = head.load_state_dict(stripped, strict=False)
+ print(f"[load] missing={len(missing)} unexpected={len(unexpected)}")
+ if missing[:3]: print(f" sample missing: {missing[:3]}")
+ if unexpected[:3]: print(f" sample unexpected: {unexpected[:3]}")
+ head.to(device)
+ return head, base, cfg, train_meta
+
+
+def jvp_apply(f, x, V):
+ """D_f(x) @ V where V is (B, state_dim, k). Uses create_graph=True so RF loss is
+ differentiable wrt model params."""
+ B, state_dim, k = V.shape
+ out = []
+ fx_last = None
+ for i in range(k):
+ v_i = V[..., i].view_as(x)
+ fx, Dv = torch.autograd.functional.jvp(f, x, v=v_i, create_graph=True, strict=False)
+ out.append(Dv.reshape(B, state_dim).to(torch.float32))
+ fx_last = fx
+ return fx_last, torch.stack(out, dim=-1)
+
+
+def compute_lyap_mean(model, inner, batch, k_lyap, device, seed, lyap_act_steps=4):
+ """Run forward with grad and accumulate top-k Lyapunov estimate per sample.
+ Returns mean_lambda per sample (B,) — DIFFERENTIABLE wrt params.
+
+ `lyap_act_steps`: how many ACT steps to unroll for the Lyapunov estimate.
+ Fewer steps → lower memory but noisier estimate. T = lyap_act_steps * (H*L + H).
+ """
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = inner.config.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ state_dim = seq_full * hidden
+
+ # Init
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"], batch["puzzle_identifiers"])
+
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q0 = torch.randn(B, state_dim, k_lyap, device=device, dtype=torch.float32, generator=g)
+ Q, _ = torch.linalg.qr(Q0)
+ log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_steps = 0
+
+ n_act = min(lyap_act_steps, cfg.halt_max_steps)
+ for _act in range(n_act):
+ for _h in range(cfg.H_cycles):
+ for _l in range(cfg.L_cycles):
+ f = lambda x: inner.L_level(x, z_H + input_embeddings, **seq_info)
+ z_L_new, DQ = jvp_apply(f, z_L, Q)
+ Q = DQ; z_L = z_L_new
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_steps += 1
+ f = lambda x: inner.H_level(x, z_L, **seq_info)
+ z_H_new, DQ = jvp_apply(f, z_H, Q)
+ Q = DQ; z_H = z_H_new
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_steps += 1
+
+ lyap_spec = log_R_sum / max(n_steps, 1) # (B, k)
+ return lyap_spec.mean(dim=-1) # (B,) mean over top-k
+
+
+# ----------------- data -----------------
+
+def load_train_batches(data_path: Path, batch_size: int, n_iters: int, seed: int = 0):
+ """Tiny iterator over the augmented train set."""
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "train" / "all__inputs.npy")
+ labels = np.load(data_path / "train" / "all__labels.npy")
+ pid = np.load(data_path / "train" / "all__puzzle_identifiers.npy")
+ N = len(inputs)
+ for _ in range(n_iters):
+ idx = rng.choice(N, size=batch_size, replace=False)
+ yield {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ }
+
+
+def evaluate(head, base, data_path: Path, n_samples: int, batch_size: int, device: str, seed: int = 42):
+ """Quick exact-accuracy eval on test set."""
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ idx_all = rng.choice(len(inputs), size=n_samples, replace=False)
+ head.eval()
+ correct = 0; token_correct = 0; token_total = 0
+ for s in range(0, n_samples, batch_size):
+ e = min(s + batch_size, n_samples)
+ idx = idx_all[s:e]
+ batch = {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)).to(device),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)).to(device),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)).to(device),
+ }
+ with torch.no_grad():
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ for _ in range(base.config.halt_max_steps):
+ carry, outputs = base(carry=carry, batch=batch)
+ preds = outputs["logits"].argmax(dim=-1)
+ mask = batch["labels"] > 0
+ exact = ((preds == batch["labels"]) | ~mask).all(dim=-1).float()
+ correct += exact.sum().item()
+ token_correct += ((preds == batch["labels"]) & mask).sum().item()
+ token_total += mask.sum().item()
+ return correct / n_samples, token_correct / max(token_total, 1)
+
+
+# ----------------- main -----------------
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_26040")
+ ap.add_argument("--n-steps", type=int, default=200)
+ ap.add_argument("--batch-size", type=int, default=16, help="kept small because RF JVPs use a lot of memory")
+ ap.add_argument("--lr", type=float, default=2e-5)
+ ap.add_argument("--alpha-rf", type=float, default=1.0)
+ ap.add_argument("--lambda-star", type=float, default=-0.85)
+ ap.add_argument("--k-lyap", type=int, default=2)
+ ap.add_argument("--lyap-act-steps", type=int, default=4, help="ACT steps to unroll for Lyapunov measurement")
+ ap.add_argument("--rf-mode", choices=["fixed","horizon"], default="fixed",
+ help="fixed: use --lambda-star as target. horizon: λ*=(1/T)log(eps/loss)")
+ ap.add_argument("--rf-eps", type=float, default=1e-6, help="task tolerance for horizon mode")
+ ap.add_argument("--seed", type=int, default=0)
+ ap.add_argument("--eval-every", type=int, default=50)
+ ap.add_argument("--eval-n", type=int, default=512)
+ ap.add_argument("--eval-batch-size", type=int, default=64)
+ ap.add_argument("--out", default="rf_finetune_log.json")
+ args = ap.parse_args()
+
+ device = "cuda"
+ head, base, cfg, train_meta = load_model_for_finetune(Path(args.ckpt_root), args.ckpt_name, device)
+
+ # Optimizer matching HRM training
+ optim = AdamATan2(head.parameters(), lr=args.lr, betas=(0.9, 0.95), weight_decay=cfg["weight_decay"])
+
+ # Initial eval
+ print(f"\n=== Initial eval (no fine-tune) ===")
+ acc0, tacc0 = evaluate(head, base, Path(cfg["data_path"]), args.eval_n, args.eval_batch_size, device)
+ print(f" initial exact_acc = {acc0:.4f} token_acc = {tacc0:.4f}")
+
+ log = {"args": vars(args), "initial_acc": acc0, "initial_tok_acc": tacc0, "steps": []}
+ t0 = time.time()
+
+ train_iter = load_train_batches(Path(cfg["data_path"]), args.batch_size, args.n_steps, seed=args.seed)
+
+ head.train()
+ # Keep sparse puzzle embedding in eval mode (its local_weights buffer is sized for
+ # the original training batch_size of 768; here we use a smaller batch). The
+ # puzzle_emb table is still in the model, just not updated during fine-tune.
+ base.inner.puzzle_emb.eval()
+ for p in base.inner.puzzle_emb.parameters():
+ p.requires_grad_(False)
+
+ for step, batch in enumerate(train_iter):
+ batch = {k: v.to(device) for k, v in batch.items()}
+ # Make sure model is in train mode but puzzle_emb stays in eval mode (its
+ # local_weights buffer is sized for the original 768 batch_size; we use a smaller batch).
+ head.train()
+ base.inner.puzzle_emb.eval()
+
+ # ---- Supervised ACT loss accumulated over all halt_max_steps ACT steps ----
+ # We use a fresh carry per fine-tune step; run ACT loop fully and sum losses.
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ sup_loss_sum = 0.0
+ n_loss = 0
+ for _ in range(base.config.halt_max_steps):
+ carry, l, metrics, _, all_finish = head(return_keys=[], carry=carry, batch=batch)
+ sup_loss_sum = sup_loss_sum + l
+ n_loss += 1
+ if all_finish: break
+ # Average across ACT steps, then normalize by batch_size
+ sup_loss = sup_loss_sum / max(n_loss, 1) / args.batch_size
+
+ # ---- Reverse-flossing penalty ----
+ # New forward pass dedicated to computing finite-time mean λ (differentiable).
+ mean_lyap = compute_lyap_mean(head, base.inner, batch, args.k_lyap, device,
+ seed=args.seed + step, lyap_act_steps=args.lyap_act_steps)
+ # λ̄ is in nats per inner-cycle.
+ if args.rf_mode == "fixed":
+ lam_star = torch.tensor(args.lambda_star, device=device, dtype=torch.float32)
+ else:
+ # finite-horizon: λ* = (1/T) log(eps / r); r = current per-sample sup loss proxy (use total loss)
+ T = base.config.halt_max_steps * (base.config.H_cycles * base.config.L_cycles + base.config.H_cycles)
+ r = sup_loss.detach().clamp_min(1e-9)
+ lam_star = (1.0 / T) * torch.log(torch.tensor(args.rf_eps, device=device) / r)
+ # Cap at small negative so we don't ask for impossible contraction
+ lam_star = lam_star.clamp(min=-2.0, max=0.0)
+
+ excess = (mean_lyap - lam_star).clamp_min(0.0) # 0 if lyap < star (already contractive enough)
+ rf_loss = (excess ** 2).mean()
+
+ total_loss = sup_loss + args.alpha_rf * rf_loss
+
+ optim.zero_grad(set_to_none=True)
+ total_loss.backward()
+ torch.nn.utils.clip_grad_norm_(head.parameters(), 1.0)
+ optim.step()
+
+ lam_mean_val = mean_lyap.detach().mean().item()
+ rec = {"step": step, "sup_loss": float(sup_loss.item()), "rf_loss": float(rf_loss.item()),
+ "total_loss": float(total_loss.item()), "mean_lyap": lam_mean_val,
+ "lam_star": float(lam_star.mean().item() if lam_star.dim() else lam_star.item()),
+ "excess_frac_nonzero": float((excess > 0).float().mean().item())}
+ log["steps"].append(rec)
+
+ if step % 5 == 0 or step == args.n_steps - 1:
+ print(f" [{step:>4}/{args.n_steps}] dt={time.time()-t0:.1f}s "
+ f"sup={rec['sup_loss']:.4f} rf={rec['rf_loss']:.4f} "
+ f"lyap={lam_mean_val:+.4f} λ*={rec['lam_star']:+.4f} "
+ f"nz={rec['excess_frac_nonzero']:.2f}", flush=True)
+
+ if (step + 1) % args.eval_every == 0:
+ acc, tacc = evaluate(head, base, Path(cfg["data_path"]), args.eval_n, args.eval_batch_size, device)
+ print(f" >> EVAL step {step+1}: exact_acc={acc:.4f} (Δ from init: {acc-acc0:+.4f})")
+ log["steps"][-1]["eval_acc"] = acc
+ log["steps"][-1]["eval_tok_acc"] = tacc
+ head.train()
+
+ print(f"\n=== Final eval ===")
+ acc_f, tacc_f = evaluate(head, base, Path(cfg["data_path"]), args.eval_n, args.eval_batch_size, device)
+ print(f" initial: {acc0:.4f} → final: {acc_f:.4f} (Δ {acc_f-acc0:+.4f})")
+ log["final_acc"] = acc_f; log["final_tok_acc"] = tacc_f
+
+ Path(args.out).write_text(json.dumps(log, indent=2))
+ print(f"log saved → {args.out}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/run_HRM256_after_H.sh b/run_HRM256_after_H.sh
new file mode 100755
index 0000000..e901c30
--- /dev/null
+++ b/run_HRM256_after_H.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Wait for H-runner (Engelken on converged HRM) to finish, then run HRM hidden=256 baseline
+# as capacity-matched control for SRM v1 (hidden=256/512 plateau diagnosis).
+set -e
+cd /home/yurenh2/rrm/research/flossing
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+export CUDA_VISIBLE_DEVICES=2
+
+H_RUNNER_PID=$1
+echo "[$(date '+%H:%M:%S')] HRM256-runner waiting for H-runner PID $H_RUNNER_PID..." >> step4_runner.log
+while kill -0 "$H_RUNNER_PID" 2>/dev/null; do
+ sleep 30
+done
+echo "[$(date '+%H:%M:%S')] H done. Starting HRM hidden=256 baseline (capacity-matched control for SRM v1)" >> step4_runner.log
+
+# HRM hidden=256 from scratch, 3000 steps, no CF (baseline for capacity diagnosis)
+python step4_from_scratch.py \
+ --n-steps 3000 --batch-size 8 \
+ --hidden-size 256 --num-heads 4 \
+ --alpha-rf 0.0 \
+ --warmup-steps 200 \
+ --eval-every 300 --eval-n 512 --eval-batch-size 32 \
+ --out step4_HRM256_baseline_fromscratch.json \
+ > step4_HRM256.log 2>&1
+
+echo "[$(date '+%H:%M:%S')] HRM hidden=256 baseline complete" >> step4_runner.log
diff --git a/run_HRMOrth_after_SRM7M.sh b/run_HRMOrth_after_SRM7M.sh
new file mode 100755
index 0000000..65df8e6
--- /dev/null
+++ b/run_HRMOrth_after_SRM7M.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+set -e
+cd /home/yurenh2/rrm/srm
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+export CUDA_VISIBLE_DEVICES=2
+
+SRM7M_RUNNER_PID=$1
+LOG_DIR=/home/yurenh2/rrm/research/flossing
+echo "[$(date '+%H:%M:%S')] HRMOrth-runner waiting for SRM7M-runner PID $SRM7M_RUNNER_PID..." >> $LOG_DIR/step4_runner.log
+while kill -0 "$SRM7M_RUNNER_PID" 2>/dev/null; do
+ sleep 30
+done
+echo "[$(date '+%H:%M:%S')] SRM7M done. Starting HRM-Orth (codex Q6 patched HRM, hidden=256, s_min=0.95)" >> $LOG_DIR/step4_runner.log
+
+mkdir -p runs ckpts
+
+python scripts/train_hrm_orth.py \
+ --n-steps 3000 --batch-size 8 \
+ --hidden-size 256 --num-heads 4 \
+ --H-cycles 2 --L-cycles 2 --H-layers 4 --L-layers 4 \
+ --orth-s-min 0.95 --cosine-attn-tau 8.0 \
+ --warmup-steps 200 \
+ --eval-every 300 --eval-n 512 --eval-batch-size 32 \
+ --out runs/hrm_orth_v1_sudoku1k_3k.json \
+ --save-ckpt ckpts/hrm_orth_v1_3k.pt \
+ > $LOG_DIR/hrm_orth_run.log 2>&1
+
+echo "[$(date '+%H:%M:%S')] HRM-Orth training complete" >> $LOG_DIR/step4_runner.log
diff --git a/run_H_after_SRM.sh b/run_H_after_SRM.sh
new file mode 100755
index 0000000..a1750e2
--- /dev/null
+++ b/run_H_after_SRM.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Wait for SRM-runner to finish, then run H: Engelken L2 on converged HRM step_26040.
+# Tests whether Engelken hurts in refinement regime (CF hinge gave +9.8% on this ckpt).
+set -e
+cd /home/yurenh2/rrm/research/flossing
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+export CUDA_VISIBLE_DEVICES=2
+
+SRM_RUNNER_PID=$1
+echo "[$(date '+%H:%M:%S')] H-runner waiting for SRM-runner PID $SRM_RUNNER_PID..." >> step4_runner.log
+while kill -0 "$SRM_RUNNER_PID" 2>/dev/null; do
+ sleep 30
+done
+echo "[$(date '+%H:%M:%S')] SRM done. Starting Phase H: Engelken L2 (α=10, k=4) on converged step_26040" >> step4_runner.log
+
+CKPT_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python"
+
+python step3_train_with_rf.py \
+ --ckpt-root "$CKPT_ROOT" \
+ --ckpt-name step_26040 \
+ --n-steps 500 --batch-size 8 \
+ --alpha-rf 10.0 --rf-mode engelken_l2 \
+ --k-lyap 4 --lyap-act-steps 4 \
+ --eval-every 100 --eval-n 512 --eval-batch-size 32 \
+ --out step3_H_engelken_l2_26040.json \
+ > step3_H.log 2>&1
+
+echo "[$(date '+%H:%M:%S')] Phase H complete (Engelken on converged HRM)" >> step4_runner.log
diff --git a/run_SRM7M_after_HRM256.sh b/run_SRM7M_after_HRM256.sh
new file mode 100755
index 0000000..1f79ea7
--- /dev/null
+++ b/run_SRM7M_after_HRM256.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Wait for HRM256 baseline to finish, then run SRM v1 scaled to 7M (TRM-equiv size).
+set -e
+cd /home/yurenh2/rrm/srm
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+export CUDA_VISIBLE_DEVICES=2
+
+HRM256_RUNNER_PID=$1
+LOG_DIR=/home/yurenh2/rrm/research/flossing
+echo "[$(date '+%H:%M:%S')] SRM7M-runner waiting for HRM256-runner PID $HRM256_RUNNER_PID..." >> $LOG_DIR/step4_runner.log
+while kill -0 "$HRM256_RUNNER_PID" 2>/dev/null; do
+ sleep 30
+done
+echo "[$(date '+%H:%M:%S')] HRM256 done. Starting SRM v1 scaled to 7M (n_aol_layers=5, hidden=512)" >> $LOG_DIR/step4_runner.log
+
+mkdir -p runs ckpts
+
+# SRM v1 at TRM-equivalent param count (~7M)
+python scripts/train_srm.py \
+ --n-steps 3000 --batch-size 8 \
+ --hidden-size 512 --n-iters 12 --n-aol-layers 5 \
+ --kappa 0.9 --eta 1.0 --alpha 1.0 \
+ --warmup-steps 200 \
+ --k-lyap 2 --lyap-iters 8 --lyap-every 50 \
+ --eval-every 300 --eval-n 512 --eval-batch-size 32 \
+ --out runs/srm_v1_7M_sudoku1k_3k.json \
+ --save-ckpt ckpts/srm_v1_7M_3k.pt \
+ > $LOG_DIR/srm_v1_7M_run.log 2>&1
+
+echo "[$(date '+%H:%M:%S')] SRM v1 7M training complete" >> $LOG_DIR/step4_runner.log
diff --git a/run_checkpoint_evolution.sh b/run_checkpoint_evolution.sh
new file mode 100755
index 0000000..c5949dd
--- /dev/null
+++ b/run_checkpoint_evolution.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+# (b) For each early checkpoint, run the diagnostic with same sample pool & seed.
+set -euo pipefail
+REPO=/home/yurenh2/rrm/research/flossing
+CKPT_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python"
+source "$(conda info --base)/etc/profile.d/conda.sh"
+conda activate rrm
+cd "$REPO"
+
+N=1024
+K=8
+mkdir -p ckpt_evolution
+
+# Skip 26040 since we already have it. The training did eval at steps:
+# 2604, 5208, 7812, 10416, 13020, 15624, 18228, 20832, 23436, 26040
+# pick a representative subset
+CKPTS=(step_2604 step_7812 step_13020 step_18228 step_20832)
+
+for ckpt in "${CKPTS[@]}"; do
+ echo "==> $ckpt"
+ for shard in 0 1 2; do
+ LOG=ckpt_evolution/${ckpt}_shard${shard}.log
+ OUT=ckpt_evolution/${ckpt}_shard${shard}.npz
+ if [[ -f "$OUT" ]]; then echo "skip $OUT"; continue; fi
+ nohup env CUDA_VISIBLE_DEVICES=$shard python diagnose_hrm.py \
+ --ckpt-root "$CKPT_ROOT" --ckpt-name $ckpt \
+ --n-samples $N --num-shards 3 --shard-id $shard \
+ --batch-size 64 --k-lyap $K \
+ --out "$OUT" > "$LOG" 2>&1 &
+ done
+ # Wait for all shards of THIS checkpoint to finish before moving to next
+ wait
+ echo "<== $ckpt done"
+done
+
+# Final merge per checkpoint
+python - <<'PY'
+import numpy as np, glob, os
+out_dir = "/home/yurenh2/rrm/research/flossing/ckpt_evolution"
+ckpts = ["step_2604","step_7812","step_13020","step_18228","step_20832","step_26040"]
+for c in ckpts:
+ if c == "step_26040":
+ # use existing merged 8k as proxy (subsample to 1024 with same seed for fair comparison?)
+ # Actually run on same 1024 to compare apples-to-apples; we'll do this separately
+ continue
+ files = sorted(glob.glob(f"{out_dir}/{c}_shard*.npz"))
+ if not files:
+ print(f"missing {c}"); continue
+ m = {}
+ for f in files:
+ d = np.load(f)
+ for k in d.files: m.setdefault(k, []).append(d[k])
+ for k in list(m.keys()): m[k] = np.concatenate(m[k], 0)
+ out = f"{out_dir}/{c}.npz"
+ np.savez_compressed(out, **m)
+ print(f"{c}: N={len(m['exact_correct'])} acc={m['exact_correct'].mean():.4f} "
+ f"λ_max(s)={m['lyap_spec'][m['exact_correct']>0.5,0].mean():.3f} "
+ f"λ_max(f)={m['lyap_spec'][m['exact_correct']<0.5,0].mean():.3f}")
+PY
diff --git a/run_ckpt_evolution2.sh b/run_ckpt_evolution2.sh
new file mode 100755
index 0000000..8be9ded
--- /dev/null
+++ b/run_ckpt_evolution2.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+# Sequential per-checkpoint, 3-shard parallel per checkpoint.
+set -e # no -u to avoid conda conflict
+REPO=/home/yurenh2/rrm/research/flossing
+CKPT_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python"
+source "$(conda info --base)/etc/profile.d/conda.sh"
+conda activate rrm
+cd "$REPO"
+
+N=1024
+K=8
+mkdir -p ckpt_evolution
+
+CKPTS=(step_2604 step_7812 step_13020 step_18228 step_20832)
+
+for ckpt in "${CKPTS[@]}"; do
+ if [[ -f "ckpt_evolution/${ckpt}.npz" ]]; then echo "skip $ckpt (merged exists)"; continue; fi
+ echo "==> $ckpt"
+ pids=()
+ for shard in 0 1 2; do
+ LOG=ckpt_evolution/${ckpt}_shard${shard}.log
+ OUT=ckpt_evolution/${ckpt}_shard${shard}.npz
+ if [[ -f "$OUT" ]]; then echo " skip $OUT"; continue; fi
+ nohup env CUDA_VISIBLE_DEVICES=$shard python diagnose_hrm.py \
+ --ckpt-root "$CKPT_ROOT" --ckpt-name $ckpt \
+ --n-samples $N --num-shards 3 --shard-id $shard \
+ --batch-size 64 --k-lyap $K \
+ --out "$OUT" > "$LOG" 2>&1 &
+ pids+=($!)
+ done
+ for pid in "${pids[@]}"; do wait $pid; done
+ echo "<== $ckpt done"
+done
+
+# Final merge per checkpoint
+python - <<'PY'
+import numpy as np, glob, os
+out_dir = "/home/yurenh2/rrm/research/flossing/ckpt_evolution"
+ckpts = ["step_2604","step_7812","step_13020","step_18228","step_20832","step_26040"]
+for c in ckpts:
+ files = sorted(glob.glob(f"{out_dir}/{c}_shard*.npz"))
+ if not files:
+ print(f"missing {c}"); continue
+ m = {}
+ for f in files:
+ d = np.load(f)
+ for k in d.files: m.setdefault(k, []).append(d[k])
+ for k in list(m.keys()): m[k] = np.concatenate(m[k], 0)
+ out = f"{out_dir}/{c}.npz"
+ np.savez_compressed(out, **m)
+ s = m['exact_correct']>0.5
+ print(f"{c}: N={len(m['exact_correct'])} acc={m['exact_correct'].mean():.4f} "
+ f"λ_1(s)={m['lyap_spec'][s,0].mean():+.3f} "
+ f"λ_1(f)={m['lyap_spec'][~s,0].mean():+.3f}")
+PY
diff --git a/run_hrm_diag_all_ckpts.sh b/run_hrm_diag_all_ckpts.sh
new file mode 100755
index 0000000..e887a5e
--- /dev/null
+++ b/run_hrm_diag_all_ckpts.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# Diagnose all 10 HRM checkpoints with joint Lyapunov (512 samples each)
+set -e
+cd /home/yurenh2/rrm/research/flossing
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+export CUDA_VISIBLE_DEVICES=0
+
+CKPT_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python"
+
+for STEP in step_2604 step_5208 step_7812 step_10416 step_13020 step_15624 step_18228 step_20832 step_23436 step_26040; do
+ OUT="diag_hrm_${STEP}_512.npz"
+ LOG="diag_hrm_${STEP}.log"
+ if [ -f "$OUT" ]; then
+ echo "skip $STEP (exists)"
+ continue
+ fi
+ echo "[$(date '+%H:%M:%S')] Starting HRM diagnostic $STEP"
+ python diagnose_hrm_joint.py \
+ --ckpt-root "$CKPT_ROOT" --ckpt-name "$STEP" \
+ --n-samples 512 --batch-size 32 --k-lyap 8 --t-ons 1 --seed 0 \
+ --out "$OUT" > "$LOG" 2>&1
+ echo "[$(date '+%H:%M:%S')] $STEP done"
+done
+
+echo "[$(date '+%H:%M:%S')] All HRM diagnostics complete"
diff --git a/run_srm_after_W.sh b/run_srm_after_W.sh
new file mode 100755
index 0000000..5ecf4e6
--- /dev/null
+++ b/run_srm_after_W.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Wait for the W-runner (X→Y→Z→W pipeline) to finish, then launch SRM-AOL training.
+set -e
+cd /home/yurenh2/rrm/srm
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+export CUDA_VISIBLE_DEVICES=2
+
+W_RUNNER_PID=$1
+LOG_DIR=/home/yurenh2/rrm/research/flossing
+echo "[$(date '+%H:%M:%S')] SRM-runner waiting for W-runner PID $W_RUNNER_PID..." >> $LOG_DIR/step4_runner.log
+while kill -0 "$W_RUNNER_PID" 2>/dev/null; do
+ sleep 30
+done
+echo "[$(date '+%H:%M:%S')] W done. Starting SRM-AOL from-scratch training (Sudoku 1k, hidden=512, 3000 steps)" >> $LOG_DIR/step4_runner.log
+
+# Make sure log/ckpt dirs exist
+mkdir -p runs ckpts
+
+# SRM-AOL training: same setup as XYZ baselines but with SRM model + Lyapunov diag every 25 steps
+python scripts/train_srm.py \
+ --n-steps 3000 --batch-size 8 \
+ --hidden-size 512 --n-iters 12 --n-aol-layers 2 \
+ --kappa 0.9 --eta 1.0 --alpha 1.0 \
+ --warmup-steps 200 \
+ --k-lyap 2 --lyap-iters 8 --lyap-every 50 \
+ --eval-every 300 --eval-n 512 --eval-batch-size 32 \
+ --out runs/srm_aol_v1_sudoku1k_3k.json \
+ --save-ckpt ckpts/srm_aol_v1_3k.pt \
+ > $LOG_DIR/srm_aol_run.log 2>&1
+
+echo "[$(date '+%H:%M:%S')] SRM-AOL training complete" >> $LOG_DIR/step4_runner.log
diff --git a/run_step4_W.sh b/run_step4_W.sh
new file mode 100755
index 0000000..16c0369
--- /dev/null
+++ b/run_step4_W.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Waits for the existing X→Y→Z pipeline to finish, then runs W (Engelken L2).
+set -e
+cd /home/yurenh2/rrm/research/flossing
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+export CUDA_VISIBLE_DEVICES=2
+
+PIPELINE_PID=$1
+echo "[$(date '+%H:%M:%S')] W-runner waiting for pipeline PID $PIPELINE_PID..." >> step4_runner.log
+while kill -0 "$PIPELINE_PID" 2>/dev/null; do
+ sleep 30
+done
+echo "[$(date '+%H:%M:%S')] X→Y→Z done. Starting Phase W: Engelken L2 (k=4, α=1)" >> step4_runner.log
+
+# Phase W: Engelken L2 — push ALL top-4 λ_i toward 0
+python step4_from_scratch.py \
+ --n-steps 3000 --batch-size 8 \
+ --alpha-rf 1.0 --rf-mode engelken_l2 \
+ --k-lyap 4 --lyap-act-steps 4 \
+ --warmup-steps 200 \
+ --eval-every 300 --eval-n 512 --eval-batch-size 32 \
+ --out step4_W_engelken_l2_fromscratch.json \
+ --save-ckpt ckpts/step4_W_final.pt \
+ > step4_W.log 2>&1
+
+echo "[$(date '+%H:%M:%S')] Phase W (Engelken L2) complete" >> step4_runner.log
diff --git a/run_step4_pipeline.sh b/run_step4_pipeline.sh
new file mode 100755
index 0000000..dc2d86d
--- /dev/null
+++ b/run_step4_pipeline.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+set -e
+cd /home/yurenh2/rrm/research/flossing
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+export CUDA_VISIBLE_DEVICES=2
+
+# Wait for Phase 1 (PID $1) to complete
+P1_PID=$1
+echo "[$(date '+%H:%M:%S')] runner waiting for Phase 1 PID $P1_PID..." >> step4_runner.log
+while kill -0 "$P1_PID" 2>/dev/null; do
+ sleep 30
+done
+echo "[$(date '+%H:%M:%S')] Phase 1 done. Starting Phase 2: CF λ*=0" >> step4_runner.log
+
+# Phase 2: CF λ*=0
+python step4_from_scratch.py \
+ --n-steps 3000 --batch-size 8 \
+ --alpha-rf 10.0 --lambda-star 0.0 \
+ --warmup-steps 200 \
+ --eval-every 300 --eval-n 512 --eval-batch-size 32 \
+ --out step4_Y_cf_lstar0_fromscratch.json \
+ > step4_Y.log 2>&1
+
+echo "[$(date '+%H:%M:%S')] Phase 2 done. Starting Phase 3: CF λ*=-0.15" >> step4_runner.log
+
+# Phase 3: CF λ*=-0.15 (natural attractor enforced)
+python step4_from_scratch.py \
+ --n-steps 3000 --batch-size 8 \
+ --alpha-rf 10.0 --lambda-star -0.15 \
+ --warmup-steps 200 \
+ --eval-every 300 --eval-n 512 --eval-batch-size 32 \
+ --out step4_Z_cf_lstar_neg15_fromscratch.json \
+ > step4_Z.log 2>&1
+
+echo "[$(date '+%H:%M:%S')] Pipeline complete (X + Y + Z all done)" >> step4_runner.log
diff --git a/run_step6_prefloss.sh b/run_step6_prefloss.sh
new file mode 100755
index 0000000..172bd3e
--- /dev/null
+++ b/run_step6_prefloss.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# Step 6: Preflossing experiments from step_13020 (epoch 10K, chaos onset)
+# Baseline = original HRM training curve (already have 10 ckpts)
+# Two conditions:
+# B: Engelken prefloss (500 steps Σλ_i²) → task training 3000 steps
+# C: CF prefloss (500 steps Σmax(0,λ_i)²) → task training 3000 steps
+set -e
+cd /home/yurenh2/rrm/research/flossing
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+export CUDA_VISIBLE_DEVICES=0
+
+CKPT_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python"
+SRC_CKPT="step_13020"
+
+echo "[$(date '+%H:%M:%S')] Starting step6 prefloss experiments" >> step6_runner.log
+
+# B: Engelken prefloss (Σλ_i², two-sided)
+echo "[$(date '+%H:%M:%S')] Starting B (engelken prefloss)" >> step6_runner.log
+python step6_prefloss.py \
+ --ckpt-root "$CKPT_ROOT" --ckpt-name "$SRC_CKPT" \
+ --prefloss-steps 500 --train-steps 3000 \
+ --floss-mode engelken --floss-lr 1e-4 --train-lr 1e-5 \
+ --batch-size 8 --k-lyap 2 --lyap-act-steps 16 \
+ --eval-every 100 --eval-n 512 --eval-batch-size 32 \
+ --out step6_B_engelken.json > step6_B.log 2>&1
+echo "[$(date '+%H:%M:%S')] B done" >> step6_runner.log
+
+# C: CF prefloss (Σmax(0,λ_i)², one-sided hinge)
+echo "[$(date '+%H:%M:%S')] Starting C (CF prefloss)" >> step6_runner.log
+python step6_prefloss.py \
+ --ckpt-root "$CKPT_ROOT" --ckpt-name "$SRC_CKPT" \
+ --prefloss-steps 500 --train-steps 3000 \
+ --floss-mode cf --floss-lr 1e-4 --train-lr 1e-5 \
+ --batch-size 8 --k-lyap 2 --lyap-act-steps 16 \
+ --eval-every 100 --eval-n 512 --eval-batch-size 32 \
+ --out step6_C_cf.json > step6_C.log 2>&1
+echo "[$(date '+%H:%M:%S')] C done" >> step6_runner.log
+
+echo "[$(date '+%H:%M:%S')] All step6 prefloss experiments complete" >> step6_runner.log
diff --git a/run_trm_cf_abcd.sh b/run_trm_cf_abcd.sh
new file mode 100755
index 0000000..3086f67
--- /dev/null
+++ b/run_trm_cf_abcd.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# TRM CF experiments from step_260410 (final ckpt, baseline acc ~0.77).
+# A: α=10 λ*=0 (universal default, like ABCDEF on HRM)
+# B: α=10 λ*=+0.03 (保护 TRM succ 区间)
+# C: α=1 λ*=0 (gentler α)
+# D: α=0 baseline (just continue training, no CF)
+set -e
+cd /home/yurenh2/rrm/research/flossing
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+export CUDA_VISIBLE_DEVICES=0
+
+CKPT_ROOT="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_singleGPU"
+# Start from step_26041 (epoch 5K, acc 0.576, succ λ=-0.036 HRM-like margin) — 8/8 attractor 还没结构化
+# 这是 TRM 唯一 succ 有 -0.04 margin、CF λ*=0 完全 vacuous on succ 的 ckpt
+SRC_CKPT="step_26041"
+
+run_cf() {
+ local TAG=$1; local ALPHA=$2; local LSTAR=$3
+ local OUT=step5_${TAG}_trm_${SRC_CKPT}_alpha${ALPHA}_lstar${LSTAR}.json
+ local LOG=step5_${TAG}.log
+ if [ -f "$OUT" ]; then echo "skip $TAG"; return; fi
+ echo "[$(date '+%H:%M:%S')] Starting $TAG (α=$ALPHA λ*=$LSTAR) from $SRC_CKPT" >> step5_runner.log
+ python step5_train_trm_cf.py \
+ --ckpt-root "$CKPT_ROOT" --ckpt-name "$SRC_CKPT" \
+ --n-steps 500 --batch-size 8 \
+ --alpha-rf $ALPHA --rf-mode fixed --lambda-star $LSTAR \
+ --k-lyap 2 --lyap-act-steps 4 \
+ --eval-every 100 --eval-n 512 --eval-batch-size 32 \
+ --out $OUT > $LOG 2>&1
+ echo "[$(date '+%H:%M:%S')] $TAG done" >> step5_runner.log
+}
+
+run_cf D 0 0
+run_cf A 10 0
+run_cf B 10 0.03
+run_cf C 1 0
+
+echo "[$(date '+%H:%M:%S')] All TRM CF conditions complete" >> step5_runner.log
diff --git a/run_trm_cf_ef.sh b/run_trm_cf_ef.sh
new file mode 100755
index 0000000..222a300
--- /dev/null
+++ b/run_trm_cf_ef.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# E: α=3 λ*=0 (calculated TRM-equivalent of HRM α=10)
+# F: α=2 λ*=0 (slightly weaker)
+set -e
+cd /home/yurenh2/rrm/research/flossing
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+export CUDA_VISIBLE_DEVICES=0
+
+PREV_RUNNER_PID=$1
+echo "[$(date '+%H:%M:%S')] EF-runner waiting for prev runner PID $PREV_RUNNER_PID..." >> step5_runner.log
+while kill -0 "$PREV_RUNNER_PID" 2>/dev/null; do
+ sleep 30
+done
+echo "[$(date '+%H:%M:%S')] Previous runner done. Starting E/F (α=3/2 calculated equivalents)" >> step5_runner.log
+
+CKPT_ROOT="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_singleGPU"
+SRC_CKPT="step_26041"
+
+run_cf() {
+ local TAG=$1; local ALPHA=$2; local LSTAR=$3
+ local OUT=step5_${TAG}_trm_${SRC_CKPT}_alpha${ALPHA}_lstar${LSTAR}.json
+ local LOG=step5_${TAG}.log
+ if [ -f "$OUT" ]; then echo "skip $TAG"; return; fi
+ echo "[$(date '+%H:%M:%S')] Starting $TAG (α=$ALPHA λ*=$LSTAR) from $SRC_CKPT" >> step5_runner.log
+ python step5_train_trm_cf.py \
+ --ckpt-root "$CKPT_ROOT" --ckpt-name "$SRC_CKPT" \
+ --n-steps 500 --batch-size 8 \
+ --alpha-rf $ALPHA --rf-mode fixed --lambda-star $LSTAR \
+ --k-lyap 2 --lyap-act-steps 4 \
+ --eval-every 100 --eval-n 512 --eval-batch-size 32 \
+ --out $OUT > $LOG 2>&1
+ echo "[$(date '+%H:%M:%S')] $TAG done" >> step5_runner.log
+}
+
+run_cf E 3 0
+run_cf F 2 0
+
+echo "[$(date '+%H:%M:%S')] EF complete" >> step5_runner.log
diff --git a/run_trm_cf_long.sh b/run_trm_cf_long.sh
new file mode 100755
index 0000000..187298e
--- /dev/null
+++ b/run_trm_cf_long.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Long-run TRM CF: 3000 steps each (6× current 500-step runs).
+# Same 6 conditions: D/A/B/C/E/F from step_26041
+set -e
+cd /home/yurenh2/rrm/research/flossing
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+export CUDA_VISIBLE_DEVICES=0
+
+CKPT_ROOT="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_singleGPU"
+SRC_CKPT="step_26041"
+
+run_cf() {
+ local TAG=$1; local ALPHA=$2; local LSTAR=$3
+ local OUT=step5long_${TAG}_${SRC_CKPT}_alpha${ALPHA}_lstar${LSTAR}.json
+ local LOG=step5long_${TAG}.log
+ if [ -f "$OUT" ]; then echo "skip $TAG"; return; fi
+ echo "[$(date '+%H:%M:%S')] Starting $TAG (α=$ALPHA λ*=$LSTAR) 3000 steps" >> step5_runner.log
+ python step5_train_trm_cf.py \
+ --ckpt-root "$CKPT_ROOT" --ckpt-name "$SRC_CKPT" \
+ --n-steps 3000 --batch-size 8 \
+ --alpha-rf $ALPHA --rf-mode fixed --lambda-star $LSTAR \
+ --k-lyap 2 --lyap-act-steps 4 \
+ --eval-every 200 --eval-n 512 --eval-batch-size 32 \
+ --out $OUT > $LOG 2>&1
+ echo "[$(date '+%H:%M:%S')] $TAG done" >> step5_runner.log
+}
+
+run_cf D 0 0
+run_cf A 10 0
+run_cf B 10 0.03
+run_cf C 1 0
+run_cf E 3 0
+run_cf F 2 0
+
+echo "[$(date '+%H:%M:%S')] All long TRM CF complete" >> step5_runner.log
diff --git a/run_trm_chaos_onset_diag.sh b/run_trm_chaos_onset_diag.sh
new file mode 100755
index 0000000..b50d221
--- /dev/null
+++ b/run_trm_chaos_onset_diag.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Diagnose TRM joint Lyapunov on all saved single-GPU checkpoints.
+# Plots Δλ (succ-fail) trajectory to find chaos onset.
+set -e
+cd /home/yurenh2/rrm/research/flossing
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+export CUDA_VISIBLE_DEVICES=2
+
+CKPT_ROOT="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_singleGPU"
+
+# Diagnose each saved ckpt (skip if output exists)
+for STEP in 26041 52082 78123 104164 130205; do
+ OUT=diag_trm_singleGPU_step${STEP}_512.npz
+ LOG=diag_trm_singleGPU_step${STEP}.log
+ if [ -f "$OUT" ]; then
+ echo "[$(date '+%H:%M:%S')] step_${STEP} already diagnosed, skipping" | tee -a trm_chaos_diag.log
+ continue
+ fi
+ echo "[$(date '+%H:%M:%S')] diagnose step_${STEP}..." | tee -a trm_chaos_diag.log
+ python diagnose_trm_joint.py \
+ --ckpt-root "$CKPT_ROOT" \
+ --ckpt-name step_${STEP} \
+ --n-samples 512 \
+ --batch-size 16 --k-lyap 8 --t-ons 1 --seed 0 \
+ --out $OUT \
+ > $LOG 2>&1
+ echo "[$(date '+%H:%M:%S')] step_${STEP} done" | tee -a trm_chaos_diag.log
+done
+
+echo "[$(date '+%H:%M:%S')] All TRM chaos onset diagnostics complete" | tee -a trm_chaos_diag.log
diff --git a/sanity_lipschitz_check.py b/sanity_lipschitz_check.py
new file mode 100644
index 0000000..91fffee
--- /dev/null
+++ b/sanity_lipschitz_check.py
@@ -0,0 +1,184 @@
+"""Empirical Lipschitz sanity check: perturb init state by small noise,
+measure how OUTPUT and final z_H change. Independent of our JVP code.
+
+If TRM succ samples truly have λ > 0, perturbations should diverge through dynamics.
+If they're actually stable in output subspace, perturbations decay or stay bounded.
+"""
+import sys, yaml, json, math
+from pathlib import Path
+import numpy as np
+import torch
+
+HRM_DIR = Path("/home/yurenh2/rrm/hrm")
+TRM_DIR = Path("/home/yurenh2/rrm/trm")
+
+CKPT_TRM_ROOT = "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_singleGPU"
+CKPT_TRM_NAME = "step_104164"
+CKPT_HRM_ROOT = "/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python"
+CKPT_HRM_NAME = "step_26040"
+
+DEVICE = "cuda"
+
+
+def load_model(repo_dir, ckpt_root, ckpt_name, model_cls_path):
+ # Clear cached modules from other repo to avoid conflicts (HRM/TRM both have models.*)
+ for mod in list(sys.modules.keys()):
+ if mod.startswith("models"):
+ del sys.modules[mod]
+ sys.path[:] = [p for p in sys.path if not (p.endswith("/hrm") or p.endswith("/trm"))]
+ sys.path.insert(0, str(repo_dir))
+ import importlib
+ mod_path, cls_name = model_cls_path.split("@")
+ cls = getattr(importlib.import_module(mod_path), cls_name)
+ cfg = yaml.safe_load((Path(ckpt_root) / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ data_path = Path(cfg.get("data_path") or cfg["data_paths"][0])
+ train_meta = json.loads((data_path / "train" / "dataset.json").read_text())
+ arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], causal=False)
+ model = cls(arch_cfg)
+ sd = torch.load(Path(ckpt_root) / ckpt_name, map_location="cpu", weights_only=True)
+ sd = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()}
+ missing, unexpected = model.load_state_dict(sd, strict=False)
+ print(f" [load] missing={len(missing)} unexpected={len(unexpected)}")
+ model.to(DEVICE).eval()
+ return model, cfg, train_meta, data_path
+
+
+def load_test_samples(data_path, n_total, seed=0):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ idx = rng.choice(len(inputs), size=n_total, replace=False)
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)).to(DEVICE),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)).to(DEVICE),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)).to(DEVICE),
+ }
+
+
+@torch.no_grad()
+def measure_pert_stability(model, batch, eps=1e-2, n_act_steps=8):
+ """For each sample, run UNPERTURBED + PERTURBED full forward.
+ Track:
+ - δz_H (final): norm of z_H change at end of all ACT steps
+ - δz_L (final): same for z_L
+ - argmax flip: did the prediction change?
+ Returns per-sample stats.
+
+ The "growth rate" inferred from δz_final / δz_init can be compared to JVP λ.
+ If λ_JVP > 0, δz_final >> δz_init (expansion). If λ_JVP < 0, δz_final < δz_init.
+ """
+ inner = model.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ dt = inner.forward_dtype
+
+ # Initial z_H, z_L (identical for both runs initially)
+ z_H_0 = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(dt)
+ z_L_0 = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(dt)
+
+ # Perturbation
+ g = torch.Generator(device=DEVICE).manual_seed(42)
+ delta_H = torch.randn(B, seq_full, hidden, generator=g, dtype=torch.float32, device=DEVICE).to(dt) * eps
+ delta_L = torch.randn(B, seq_full, hidden, generator=g, dtype=torch.float32, device=DEVICE).to(dt) * eps
+
+ input_emb = inner._input_embeddings(batch["inputs"], batch["puzzle_identifiers"])
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ init_delta_norm = (delta_H.float().flatten(1).norm(dim=1) +
+ delta_L.float().flatten(1).norm(dim=1)) # (B,) sum of init pert norms
+
+ # Run unperturbed and perturbed in parallel
+ z_H_a, z_L_a = z_H_0.clone(), z_L_0.clone()
+ z_H_b, z_L_b = z_H_0 + delta_H, z_L_0 + delta_L
+
+ has_H_level = hasattr(inner, "H_level") # HRM has separate, TRM uses L_level for both
+
+ n_total = 0
+ for _act in range(n_act_steps):
+ for _h in range(cfg.H_cycles):
+ for _l in range(cfg.L_cycles):
+ z_L_a = inner.L_level(z_L_a, z_H_a + input_emb, **seq_info)
+ z_L_b = inner.L_level(z_L_b, z_H_b + input_emb, **seq_info)
+ n_total += 1
+ # H step: use H_level (HRM) or L_level (TRM)
+ h_mod = inner.H_level if has_H_level else inner.L_level
+ z_H_a = h_mod(z_H_a, z_L_a, **seq_info)
+ z_H_b = h_mod(z_H_b, z_L_b, **seq_info)
+ n_total += 1
+
+ final_delta_norm = ((z_H_b - z_H_a).float().flatten(1).norm(dim=1) +
+ (z_L_b - z_L_a).float().flatten(1).norm(dim=1))
+
+ # Per-sample growth rate per micro-step
+ # δ_final ≈ δ_init * exp(λ * n_total) → λ ≈ log(δ_final/δ_init) / n_total
+ ratio = final_delta_norm / init_delta_norm.clamp_min(1e-12)
+ lam_emp = ratio.log() / n_total
+
+ # Read out predictions for both runs
+ out_a = inner.lm_head(z_H_a)[:, inner.puzzle_emb_len:].float()
+ out_b = inner.lm_head(z_H_b)[:, inner.puzzle_emb_len:].float()
+ pred_a = out_a.argmax(dim=-1)
+ pred_b = out_b.argmax(dim=-1)
+ labels = batch["labels"]
+ mask = labels > 0
+ exact_a = ((pred_a == labels) | ~mask).all(dim=-1)
+ exact_b = ((pred_b == labels) | ~mask).all(dim=-1)
+ pred_flip = (pred_a != pred_b).any(dim=-1) # any token changed
+
+ return {
+ "init_norm": init_delta_norm.cpu(),
+ "final_norm": final_delta_norm.cpu(),
+ "ratio": ratio.cpu(),
+ "lam_emp": lam_emp.cpu(),
+ "succ_a": exact_a.cpu(),
+ "succ_b": exact_b.cpu(),
+ "pred_flip": pred_flip.cpu(),
+ }
+
+
+def main():
+ for name, repo, ckpt_root, ckpt_name, mod_path in [
+ ("HRM step_26040", HRM_DIR, CKPT_HRM_ROOT, CKPT_HRM_NAME,
+ "models.hrm.hrm_act_v1@HierarchicalReasoningModel_ACTV1"),
+ ("TRM step_104164", TRM_DIR, CKPT_TRM_ROOT, CKPT_TRM_NAME,
+ "models.recursive_reasoning.trm@TinyRecursiveReasoningModel_ACTV1"),
+ ]:
+ print(f"\n=== {name} ===")
+ model, cfg, train_meta, data_path = load_model(repo, ckpt_root, ckpt_name, mod_path)
+ batch = load_test_samples(data_path, n_total=64, seed=0)
+ # Limit batch size to model's training batch (puzzle_emb buffer)
+ # Re-batch
+ B = 16
+ results = {"lam_emp": [], "succ_a": [], "ratio": []}
+ for s in range(0, 64, B):
+ e = min(s + B, 64)
+ mb = {k: v[s:e] for k, v in batch.items()}
+ # Rebuild model puzzle_emb buffer if needed — easier: ensure model's batch_size matches
+ r = measure_pert_stability(model, mb, eps=1e-2, n_act_steps=8)
+ results["lam_emp"].append(r["lam_emp"])
+ results["succ_a"].append(r["succ_a"])
+ results["ratio"].append(r["ratio"])
+ lam = torch.cat(results["lam_emp"]).numpy()
+ succ = torch.cat(results["succ_a"]).numpy()
+ ratio = torch.cat(results["ratio"]).numpy()
+ print(f" N=64 acc={succ.mean():.3f}")
+ print(f" finite-diff λ_emp (per micro-step):")
+ print(f" all mean={lam.mean():+.4f} med={np.median(lam):+.4f} range=[{lam.min():+.4f}, {lam.max():+.4f}]")
+ if succ.sum() > 0:
+ print(f" succ mean={lam[succ.astype(bool)].mean():+.4f} med={np.median(lam[succ.astype(bool)]):+.4f}")
+ if (~succ).sum() > 0:
+ print(f" fail mean={lam[~succ.astype(bool)].mean():+.4f} med={np.median(lam[~succ.astype(bool)]):+.4f}")
+ print(f" final/init perturbation ratio:")
+ print(f" all mean={ratio.mean():.3f} med={np.median(ratio):.3f} range=[{ratio.min():.3e}, {ratio.max():.3e}]")
+ # cleanup
+ del model
+ torch.cuda.empty_cache()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/spectrum_microscope/checkpoint_summary.csv b/spectrum_microscope/checkpoint_summary.csv
new file mode 100644
index 0000000..8d5087f
--- /dev/null
+++ b/spectrum_microscope/checkpoint_summary.csv
@@ -0,0 +1,21 @@
+kind,file,step,n,k,acc,n_success,n_failure,raw_monotone_adjacent_fraction,raw_col0_is_sample_max_fraction,raw_col0_success_mean,raw_col0_failure_mean,raw_col0_delta_failure_minus_success,raw_col0_auc_failure,lambda_max_success_mean,lambda_max_failure_mean,lambda_max_delta_failure_minus_success,lambda_max_auc_failure,mean8_success_mean,mean8_failure_mean,mean8_delta_failure_minus_success,mean8_auc_failure,tail_mean_5_8_success_mean,tail_mean_5_8_failure_mean,tail_mean_5_8_delta_failure_minus_success,tail_mean_5_8_auc_failure,positive_sum_success_mean,positive_sum_failure_mean,positive_sum_delta_failure_minus_success,positive_sum_auc_failure,positive_count_success_mean,positive_count_failure_mean,positive_count_delta_failure_minus_success,positive_count_auc_failure,spread_success_mean,spread_failure_mean,spread_delta_failure_minus_success,spread_auc_failure,gap12_success_mean,gap12_failure_mean,gap12_delta_failure_minus_success,gap12_auc_failure,lambda_max_corr_token_acc,mean8_corr_token_acc,tail_mean_5_8_corr_token_acc,positive_sum_corr_token_acc,positive_count_corr_token_acc
+HRM,/home/yurenh2/rrm/research/flossing/diag_hrm_step_2604_512.npz,2604,512,8,0.015625,8,504,0.6551339285714286,0.62890625,-0.12265677284449339,-0.0977591768882814,0.024897595956211993,0.6889880952380952,-0.1213220115751028,-0.0949796658704087,0.026342345704694112,0.6932043650793651,-0.19951309508178383,-0.11536999984130648,0.08414309524047735,0.9992559523809523,-0.23467964679002762,-0.12418151276171326,0.11049813402831436,1.0,0.0,7.118396313181e-05,7.118396313181e-05,0.5009920634920635,0.0,0.001984126984126984,0.001984126984126984,0.5009920634920635,0.13017353788018227,0.034761710632001126,-0.09541182724818115,0.019345238095238096,0.03395136073231697,0.01004286463712416,-0.023908496095192813,0.12177579365079365,0.11864046173536506,-0.24317640561416,-0.364745516214904,0.01654115981917942,0.01654115981917943
+HRM,/home/yurenh2/rrm/research/flossing/diag_hrm_step_5208_512.npz,5208,512,8,0.048828125,25,487,0.7368861607142857,0.798828125,-0.18248083293437958,-0.1252742203170514,0.05720661261732818,0.8788501026694046,-0.18110741019248963,-0.12377287614975867,0.057334534042730964,0.8758110882956879,-0.22203324407339095,-0.16840960289919615,0.0536236411741948,0.9536755646817249,-0.24040259554982185,-0.1877109366994986,0.05269165885032326,0.9394661190965092,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.5,0.06972571730613708,0.07497160217019078,0.005245884864053696,0.5605749486652978,0.01896007537841797,0.0215270076581952,0.00256693227977723,0.5172073921971253,-0.19170281345239037,-0.3935742986571875,-0.44039154437692224,nan,nan
+HRM,/home/yurenh2/rrm/research/flossing/diag_hrm_step_7812_512.npz,7812,512,8,0.15234375,78,434,0.7536272321428571,0.826171875,-0.19744304562799442,-0.09758298332957993,0.09986006229841449,0.9227519792035921,-0.19638217866229704,-0.09583942211473957,0.10054275654755747,0.9273307337823467,-0.2626351442558166,-0.15164301766584318,0.1109921265899734,0.9941214699279215,-0.2882901179866913,-0.1752684197264127,0.11302169826027861,0.9917582417582418,0.0,0.0003272232148648253,0.0003272232148648253,0.5138248847926268,0.0,0.027649769585253458,0.027649769585253458,0.5138248847926268,0.10413952138370429,0.09246484074762525,-0.011674680636079043,0.4651719248493442,0.03829955409925718,0.028275593728860324,-0.010023960370396854,0.4361928394186459,-0.538963951651198,-0.6951799773061379,-0.6975383575513661,-0.06688266082458418,-0.07401140024719328
+HRM,/home/yurenh2/rrm/research/flossing/diag_hrm_step_10416_512.npz,10416,512,8,0.1796875,92,420,0.7572544642857143,0.841796875,-0.1734262867020848,-0.0592695716998562,0.1141567150022286,0.9278985507246377,-0.1725775988408081,-0.05791120557031328,0.1146663932704948,0.9305900621118013,-0.22779642522547636,-0.11871804687665038,0.10907837834882597,0.9611024844720497,-0.25038242412974004,-0.1428806266010118,0.10750179752872824,0.9642857142857143,0.0,0.005729037300833235,0.005729037300833235,0.580952380952381,0.0,0.20476190476190476,0.20476190476190476,0.580952380952381,0.09030715536083216,0.09668695457983717,0.006379799219005014,0.555667701863354,0.028749073491148327,0.03182062698594693,0.003071553494798606,0.5320393374741201,-0.4191290312385547,-0.47253422425178293,-0.4888621923983516,-0.0452883997405569,-0.059274922302819555
+HRM,/home/yurenh2/rrm/research/flossing/diag_hrm_step_13020_512.npz,13020,512,8,0.30078125,154,358,0.7329799107142857,0.779296875,-0.16317386656331334,-0.012345420176165918,0.15082844638714743,0.9784879924544729,-0.16130138939328179,-0.010260718098612039,0.15104067129466975,0.9784698541681782,-0.20199005053763794,-0.06562561695864545,0.13636443357899247,0.9897337299571937,-0.21928635291610057,-0.08840406447549293,0.13088228844060765,0.9910578248567076,0.0,0.03084314924092586,0.03084314924092586,0.7067039106145251,0.0,0.9134078212290503,0.9134078212290503,0.7067039106145251,0.06808332314294485,0.08949387786655578,0.021410554723610933,0.6710440397591235,0.020334010348842756,0.02894391759471739,0.008609907245874633,0.5919248349415948,-0.6439558752057271,-0.6897349633120721,-0.6989600499840098,-0.14942778693878858,-0.20197511884575872
+HRM,/home/yurenh2/rrm/research/flossing/diag_hrm_step_15624_512.npz,15624,512,8,0.333984375,171,341,0.7368861607142857,0.78515625,-0.17926316590140962,0.013238977896144526,0.19250214379755415,0.9800209222959647,-0.1768394878504482,0.015099927341163328,0.19193941519161153,0.9809298417108264,-0.2279172917539989,-0.04059797345319265,0.18731931830080625,0.9898475416302241,-0.24842964750796295,-0.06421948799691395,0.18421015951104902,0.9895731508634734,0.0007992622970837598,0.054435018036423026,0.053635755739339264,0.7755826516437722,0.023391812865497075,1.5073313782991202,1.4839395654336232,0.7767059388451578,0.08114021638005275,0.0924473905291844,0.01130717414913164,0.5924782631064465,0.026074302491693818,0.027355855661720895,0.001281553170027077,0.5126305499819931,-0.7050637096452648,-0.7549696612898014,-0.758163657007401,-0.24676472967716626,-0.2994831724010142
+HRM,/home/yurenh2/rrm/research/flossing/diag_hrm_step_18228_512.npz,18228,512,8,0.474609375,243,269,0.7474888392857143,0.818359375,-0.07771555354618448,0.016638744400673956,0.09435429794685843,0.8841617329845335,-0.07614383449056474,0.018023035920062363,0.0941668704106271,0.8879250998210106,-0.14575748546156192,-0.03353973620840138,0.11221774925316054,0.9716523628130402,-0.1721063231313486,-0.05434106856267585,0.11776525456867276,0.9799899031621461,0.00525456639471437,0.050987723279165634,0.045733156884451266,0.749567824743372,0.13580246913580246,1.4981412639405205,1.362338794804718,0.7569645233833586,0.10785400053824455,0.0830327885870487,-0.024821211951195854,0.35938623464439245,0.03951045055418571,0.027345439433070017,-0.01216501112111569,0.4258111891321309,-0.6086543875688578,-0.7665403099919734,-0.786002777751629,-0.3211841461864595,-0.4146342203458471
+HRM,/home/yurenh2/rrm/research/flossing/diag_hrm_step_20832_512.npz,20832,512,8,0.45703125,234,278,0.7497209821428571,0.80078125,-0.0847591630423951,0.011107698112080065,0.09586686115447517,0.9046762589928058,-0.08308592252134799,0.012636878445990327,0.09572280096733832,0.9064287031912931,-0.14093293187680603,-0.044163071447564366,0.09676986042924166,0.9825831642378405,-0.16335714651812983,-0.06769531056555371,0.09566183595257612,0.9836438541474513,0.002644224575935648,0.04743396344555085,0.0447897388696152,0.7440432269568961,0.0811965811965812,1.2589928057553956,1.1777962245588145,0.747501998401279,0.0904661258792059,0.09196829576401849,0.0015021698848125958,0.5366168603578676,0.03158770251569426,0.029151334776909496,-0.0024363677387847643,0.49451208264157903,-0.5961668167415688,-0.7490505096842796,-0.7670744302809093,-0.25827536960316083,-0.3352751212871484
+HRM,/home/yurenh2/rrm/research/flossing/diag_hrm_step_23436_512.npz,23436,512,8,0.505859375,259,253,0.7444196428571429,0.8125,-0.1396155291305258,-0.002783365376173506,0.13683216375435228,0.9710501014848841,-0.1384992553999928,-0.0009893162803058476,0.13750993911968695,0.9724541028888855,-0.2008695753030298,-0.05231515268090541,0.14855442262212437,0.9922932531628184,-0.22361706499433173,-0.07373835104911058,0.14987871394522115,0.9920032963511224,0.0005239612893938558,0.02692777939243448,0.026403818103040624,0.7254032688815297,0.019305019305019305,0.8932806324110671,0.8739756131060479,0.7251133120698338,0.09569484527981534,0.08370462378894082,-0.011990221490874517,0.4474338822164909,0.0377779275387468,0.025563243349588635,-0.012214684189158165,0.42294016207059687,-0.735475615818423,-0.8331365987869843,-0.8388368318549599,-0.2844985120608491,-0.35566859763323194
+HRM,/home/yurenh2/rrm/research/flossing/diag_hrm_step_26040_512.npz,26040,512,8,0.5,256,256,0.748046875,0.787109375,-0.14642834789538028,0.03254939207545249,0.17897773997083277,0.9890289306640625,-0.14456235250236205,0.034372387226426326,0.17893473972878837,0.9896240234375,-0.18957092847483636,-0.025070655147768406,0.16450027332706796,0.997589111328125,-0.21117772247089306,-0.04990981457450516,0.1612679078963879,0.9984893798828125,0.00045363006938714534,0.074183922140719,0.07373029207133186,0.865997314453125,0.01953125,1.94140625,1.921875,0.8662796020507812,0.07883575186588132,0.09741729416225553,0.01858154229637421,0.62774658203125,0.01608559737815085,0.02983991140987996,0.013754314031729109,0.631378173828125,-0.8022434699246608,-0.8493972056528871,-0.8567110926525265,-0.37718525164073874,-0.4879767533856408
+TRM,/home/yurenh2/rrm/research/flossing/diag_trm_singleGPU_step26041_512.npz,26041,512,8,0.576171875,295,217,0.7438616071428571,0.791015625,-0.03554810737259686,0.034024751214321254,0.06957285858691811,0.9836132156525814,-0.03484145596473462,0.034498983389517714,0.06934043935425233,0.9838475357338123,-0.044594623737682894,0.011831644308724274,0.05642626804640717,0.9900960712333047,-0.04878217898325509,0.002079399892583064,0.050861578875838157,0.9916582051081778,0.002200093488031367,0.11402314361281402,0.11182305012478265,0.9693353120362415,0.1694915254237288,5.539170506912442,5.369678981488713,0.9706006404748887,0.01622369096148759,0.03719716329264318,0.02097347233115559,0.8946653128173084,0.004580460641320037,0.010477446547843793,0.005896985906523756,0.7136608607357651,-0.8221535026414836,-0.8532299277645251,-0.8618572814599218,-0.6425677667973041,-0.7985750358664757
+TRM,/home/yurenh2/rrm/research/flossing/diag_trm_singleGPU_step52082_512.npz,52082,512,8,0.6484375,332,180,0.7435825892857143,0.78125,-0.0007075830156292839,0.05957649097674423,0.06028407399237352,0.981425702811245,-0.00011202936298378704,0.059987526981987886,0.060099556344971675,0.9824129852744311,-0.00932737177132882,0.03324007580221304,0.04256744757354186,0.993591030789826,-0.01301439359083114,0.02160164021022663,0.034616033801057766,0.9948627844712182,0.011575991871503936,0.26611879217404444,0.2545428003025405,0.9932730923694779,0.9879518072289156,7.916666666666667,6.9287148594377514,0.9850485274431058,0.014868822472832896,0.04419851013889355,0.029329687666060658,0.9461680053547523,0.0048919110970939116,0.01239234626862324,0.007500435171529329,0.7928547523427042,-0.822862756235417,-0.8543835399263785,-0.8502975145711263,-0.83615142839377,-0.8756439805499547
+TRM,/home/yurenh2/rrm/research/flossing/diag_trm_singleGPU_step78123_512.npz,78123,512,8,0.6875,352,160,0.7522321428571429,0.94921875,-0.0022565727597132286,0.08176288979593664,0.08401946255564988,0.9937144886363637,-0.002104297534184628,0.08205510303378105,0.08415940056796568,0.99375,-0.03085258081190225,0.04898583621179568,0.07983841702369793,0.9991299715909091,-0.03740866408387236,0.03495748526038369,0.07236614934425606,0.9993607954545455,0.011536654059641065,0.39188668969436546,0.3803500356347244,0.9991299715909091,0.6619318181818182,8.0,7.338068181818182,0.9900568181818182,0.037154979946411586,0.05358391968184151,0.016428939735429922,0.8060191761363636,0.026502143868418152,0.015829400252550842,-0.01067274361586731,0.2559303977272727,-0.8695160653352085,-0.9167732333138554,-0.9212713093750954,-0.9333667142066921,-0.916788261375625
+TRM,/home/yurenh2/rrm/research/flossing/diag_trm_singleGPU_step104164_512.npz,104164,512,8,0.7109375,364,148,0.7659040178571429,0.873046875,0.0007209903823832564,0.09478743761979244,0.09406644723740919,0.9850942975942976,0.0011589818968986748,0.0948834198753576,0.09372443797845893,0.9852242352242352,-0.013257209616115657,0.05609436892523632,0.06935157854135197,0.9912013662013662,-0.018433471481510245,0.04002377992288235,0.05845725140439259,0.9914241164241164,0.024264690904521637,0.44875495140189053,0.4244902604973689,0.9912013662013662,1.0631868131868132,8.0,6.936813186813187,0.9766483516483516,0.02201517710918679,0.06216927877048383,0.04015410166129704,0.9606660231660231,0.008753806905887968,0.020252479617861478,0.01149867271197351,0.8213171963171964,-0.8714207860201784,-0.8978503025935065,-0.9018662195261188,-0.9164290683745636,-0.8552081309221397
+TRM,/home/yurenh2/rrm/research/flossing/diag_trm_singleGPU_step130205_512.npz,130205,512,8,0.755859375,387,125,0.7661830357142857,0.83203125,0.013444696512909148,0.10679112273454666,0.09334642622163751,0.988857881136951,0.014031398803655426,0.10683381146192551,0.09280241265827009,0.9889198966408269,0.0006696076976612102,0.05997022696118802,0.05930061926352681,0.9935503875968992,-0.004693869635777962,0.04096000522933901,0.04565387486511697,0.9943152454780362,0.04017261892318711,0.47976181568950416,0.439589196766317,0.9935503875968992,2.4108527131782944,8.0,5.589147286821706,0.9521963824289406,0.021535266655820777,0.07427249775826932,0.05273723110244854,0.9726511627906976,0.007057571094765433,0.02309795269370079,0.016040381598935356,0.8932093023255814,-0.868494410453853,-0.8995673015890842,-0.9101679267135595,-0.9091943845695494,-0.7232326485538275
+TRM,/home/yurenh2/rrm/research/flossing/diag_trm_singleGPU_step156246_512.npz,156246,512,8,0.7578125,388,124,0.8465401785714286,0.984375,0.033355438992845106,0.1080104663006721,0.074655027307827,0.9934527768540073,0.03340256920781724,0.10808430672172577,0.07468173751390852,0.9935359161955437,-0.01719525050584332,0.059510113856841566,0.07670536436268488,0.9990646824077153,-0.03258687625807819,0.03992759636434306,0.07251447262242125,0.9987944795477219,0.0501136147264783,0.47608091085473253,0.42596729612825424,0.9990646824077153,1.7268041237113403,8.0,6.27319587628866,0.9806701030927835,0.07044461364565689,0.07696695457167563,0.006522340926018735,0.5740563684735617,0.03767048126359648,0.024864713691415324,-0.012805767572181152,0.30377036913867644,-0.8015386582153512,-0.8887892447157182,-0.8908349686685592,-0.9311284896491179,-0.8411922875634292
+TRM,/home/yurenh2/rrm/research/flossing/diag_trm_singleGPU_step182287_512.npz,182287,512,8,0.7421875,380,132,0.7837611607142857,0.9296875,0.00821161610291902,0.10131858266664273,0.0931069665637237,0.9476076555023923,0.008431412303494461,0.10132696061874881,0.09289554831525434,0.9476275917065391,-0.018691345271909503,0.049928791549256966,0.06862013682116647,0.993122009569378,-0.026891200006043378,0.029759585490286223,0.0566507854963296,0.993421052631579,0.027419095844602212,0.3994350857048465,0.37201598986024426,0.993122009569378,0.8973684210526316,7.992424242424242,7.095055821371611,0.9867125199362041,0.03768573965873513,0.07997999362255954,0.04229425396382441,0.8919258373205742,0.019669575117656057,0.027337144778081864,0.007667569660425807,0.7587121212121212,-0.7443356021278564,-0.8907098916862968,-0.9075392398447194,-0.9063416990478439,-0.8813795117907826
+TRM,/home/yurenh2/rrm/research/flossing/diag_trm_singleGPU_step208328_512.npz,208328,512,8,0.75,384,128,0.8138950892857143,0.970703125,0.03753090985757505,0.10676543856970966,0.06923452871213462,0.9824422200520834,0.037651085844269495,0.10677664534887299,0.0691255595046035,0.9825439453125,-0.003334924222756793,0.0556086836418217,0.058943607864578494,0.997314453125,-0.015420041531266785,0.03501016539212287,0.050430206923389655,0.9977620442708334,0.0587247395355727,0.4448694691345736,0.38614472959900087,0.997314453125,2.3385416666666665,8.0,5.661458333333334,0.97265625,0.05609253943835787,0.08039409609409631,0.024301556655738445,0.8315836588541666,0.029487703329541166,0.026957579655572772,-0.0025301236739683937,0.4844767252604167,-0.7847679828439686,-0.9025430240366568,-0.9116696180830379,-0.918178990916473,-0.8241555473543875
+TRM,/home/yurenh2/rrm/research/flossing/diag_trm_singleGPU_step234369_512.npz,234369,512,8,0.7734375,396,116,0.7837611607142857,0.943359375,0.017852102687761037,0.10551282866247769,0.08766072597471665,0.9871125043538836,0.018100257435352516,0.10551282866247769,0.08741257122712517,0.9871125043538836,-0.011827684605397524,0.05244761205414824,0.06427529665954576,0.9948624172762104,-0.020207434739923615,0.03131334408957126,0.05152077882949488,0.995276036224312,0.03539385316928405,0.4195808964331859,0.38418704326390185,0.9948624172762104,1.4595959595959596,8.0,6.540404040404041,0.976010101010101,0.04073868896878162,0.08256442746115399,0.041825738492372366,0.9344740508533612,0.022704324246853612,0.026842519994182832,0.00413819574732922,0.6134839777081157,-0.8095679410757006,-0.8841140003061905,-0.8897475061573502,-0.9111627943782784,-0.8264057387186906
+TRM,/home/yurenh2/rrm/research/flossing/diag_trm_singleGPU_step260410_512.npz,260410,512,8,0.76953125,394,118,0.7299107142857143,0.767578125,0.012635910749513134,0.10288177159125522,0.09024586084174209,0.9893530069689409,0.013151870022697326,0.10296485964524543,0.0898129896225481,0.9894175341994321,0.0016685908707722002,0.05065993657996641,0.04899134570919421,0.9931170954142648,-0.002553942017630222,0.030341437701096412,0.032895379718726636,0.9938484040264992,0.030020373667639433,0.40529236062114143,0.375271986953502,0.9931170954142648,3.182741116751269,7.991525423728813,4.808784306977544,0.9728770541168373,0.01786690075157378,0.08103766449001003,0.06317076373843625,0.9816742665404801,0.006782306742966769,0.028344342895483567,0.021562036152516798,0.9238363589434742,-0.8797240829367476,-0.9146835924338959,-0.9119410149316228,-0.9168566073790785,-0.6899483781164985
diff --git a/spectrum_microscope/feature_and_rank_summary.csv b/spectrum_microscope/feature_and_rank_summary.csv
new file mode 100644
index 0000000..9c083af
--- /dev/null
+++ b/spectrum_microscope/feature_and_rank_summary.csv
@@ -0,0 +1,421 @@
+kind,step,feature,success_mean,failure_mean,success_std,failure_std,delta_failure_minus_success,cohen_d_failure_minus_success,auc_failure,rank
+HRM,2604,raw_col0,-0.12265677284449339,-0.0977591768882814,0.04030181725141351,0.019887727948888698,0.024897595956211993,1.2201966484398439,0.6889880952380952,
+HRM,2604,lambda_max,-0.1213220115751028,-0.0949796658704087,0.0406757630631841,0.01893813405072957,0.026342345704694112,1.3506467767615933,0.6932043650793651,
+HRM,2604,lambda_2,-0.15527337230741978,-0.10502253050753285,0.051366999069597484,0.013746949065550082,0.050250841799886925,3.326888182860868,0.7646329365079365,
+HRM,2604,lambda_min8,-0.2514955494552851,-0.12974137650240983,0.018187644069172424,0.011628171608972693,0.12175417295287524,10.33402690742346,1.0,
+HRM,2604,mean8,-0.19951309508178383,-0.11536999984130648,0.026835524387327246,0.011164146251842607,0.08414309524047735,7.256189941307792,0.9992559523809523,
+HRM,2604,sum8,-1.5961047606542706,-0.9229599987304519,0.21468419509861797,0.08931317001474086,0.6731447619238188,7.256189941307792,0.9992559523809523,
+HRM,2604,tail_mean_5_8,-0.23467964679002762,-0.12418151276171326,0.016120822780361817,0.011009548132980279,0.11049813402831436,9.928603291055337,1.0,
+HRM,2604,positive_sum,0.0,7.118396313181e-05,0.0,0.0015964898287796556,7.118396313181e-05,0.04485241453114739,0.5009920634920635,
+HRM,2604,positive_count,0.0,0.001984126984126984,0.0,0.04449932835715438,0.001984126984126984,0.044852414531147376,0.5009920634920635,
+HRM,2604,spread,0.13017353788018227,0.034761710632001126,0.03566614788119447,0.018501418471145396,-0.09541182724818115,-5.041056199469806,0.019345238095238096,
+HRM,2604,gap12,0.03395136073231697,0.01004286463712416,0.023112847948348347,0.011899667159719966,-0.023908496095192813,-1.9631726429348173,0.12177579365079365,
+HRM,2604,std8,0.043645552140722035,0.011200390642073368,0.014068430859614333,0.005834456424973157,-0.032445161498648664,-5.352424847111728,0.027529761904761904,
+HRM,2604,linear_slope,-0.017908510718760748,-0.00457486676892374,0.006085403645943525,0.0022796695575639184,0.013333643949837007,5.576710060722784,0.9737103174603174,
+HRM,2604,,-0.1213220115751028,-0.0949796658704087,,,0.026342345704694112,,0.6932043650793651,1
+HRM,2604,,-0.15527337230741978,-0.10502253050753285,,,0.050250841799886925,,0.7646329365079365,2
+HRM,2604,,-0.1786546530202031,-0.11098820168436282,,,0.06766645133584029,,0.9010416666666666,3
+HRM,2604,,-0.20213613659143448,-0.11524354962129442,,,0.08689258697014006,,0.9992559523809523,4
+HRM,2604,,-0.21668588370084763,-0.11894965987829935,,,0.09773622382254828,,0.9995039682539683,5
+HRM,2604,,-0.2309022806584835,-0.12242354591569257,,,0.10847873474279093,,1.0,6
+HRM,2604,,-0.23963487334549427,-0.1256114687504513,,,0.11402340459504298,,1.0,7
+HRM,2604,,-0.2514955494552851,-0.12974137650240983,,,0.12175417295287524,,1.0,8
+HRM,5208,raw_col0,-0.18248083293437958,-0.1252742203170514,0.0376326585075004,0.031553078155205715,0.05720661261732818,1.7911034452903252,0.8788501026694046,
+HRM,5208,lambda_max,-0.18110741019248963,-0.12377287614975867,0.03801923261968088,0.031017394544143387,0.057334534042730964,1.822628261680259,0.8758110882956879,
+HRM,5208,lambda_2,-0.2000674855709076,-0.14529988380795386,0.03250797776745435,0.024522774937466737,0.05476760176295373,2.1888689490432336,0.9136755646817248,
+HRM,5208,lambda_min8,-0.2508331274986267,-0.19874447831994943,0.035090540719927454,0.018256776558606847,0.052088649178677254,2.676891600977245,0.9262422997946612,
+HRM,5208,mean8,-0.22203324407339095,-0.16840960289919615,0.030588270494116828,0.017776815061563554,0.0536236411741948,2.87606523753305,0.9536755646817249,
+HRM,5208,sum8,-1.7762659525871276,-1.3472768231935692,0.24470616395293462,0.14221452049250843,0.4289891293935584,2.87606523753305,0.9536755646817249,
+HRM,5208,tail_mean_5_8,-0.24040259554982185,-0.1877109366994986,0.03304058105962134,0.01774764962834564,0.05269165885032326,2.7993921425509725,0.9394661190965092,
+HRM,5208,positive_sum,0.0,0.0,0.0,0.0,0.0,nan,0.5,
+HRM,5208,positive_count,0.0,0.0,0.0,0.0,0.0,nan,0.5,
+HRM,5208,spread,0.06972571730613708,0.07497160217019078,0.0343015088066134,0.03140782858298907,0.005245884864053696,0.16591924597040866,0.5605749486652978,
+HRM,5208,gap12,0.01896007537841797,0.0215270076581952,0.017179519937665654,0.022200656425419553,0.00256693227977723,0.1165452757509607,0.5172073921971253,
+HRM,5208,std8,0.0228393995275196,0.024492692286121307,0.011319899039806042,0.010120307326619346,0.0016532927586017064,0.1620540172637433,0.5596714579055442,
+HRM,5208,linear_slope,-0.009450555088974182,-0.009984845640594028,0.004582812477001151,0.003951880106398978,-0.0005342905516198468,-0.1338130383338356,0.44837782340862425,
+HRM,5208,,-0.18110741019248963,-0.12377287614975867,,,0.057334534042730964,,0.8758110882956879,1
+HRM,5208,,-0.2000674855709076,-0.14529988380795386,,,0.05476760176295373,,0.9136755646817248,2
+HRM,5208,,-0.21122154653072356,-0.1588130125764459,,,0.052408533954277664,,0.9373305954825462,3
+HRM,5208,,-0.22225912809371948,-0.16854730386141634,,,0.05371182423230314,,0.9410266940451746,4
+HRM,5208,,-0.22932057976722717,-0.17662544126260943,,,0.05269513850461774,,0.9349486652977412,5
+HRM,5208,,-0.23728807628154755,-0.18429149904535047,,,0.05299657723619708,,0.9393839835728953,6
+HRM,5208,,-0.24416859865188598,-0.1911823281700851,,,0.05298627048180088,,0.93782340862423,7
+HRM,5208,,-0.2508331274986267,-0.19874447831994943,,,0.052088649178677254,,0.9262422997946612,8
+HRM,7812,raw_col0,-0.19744304562799442,-0.09758298332957993,0.0522901368127734,0.04209234969615372,0.09986006229841449,2.2754791112485506,0.9227519792035921,
+HRM,7812,lambda_max,-0.19638217866229704,-0.09583942211473957,0.051300154923270176,0.04104791121220356,0.10054275654755747,2.3462501082043694,0.9273307337823467,
+HRM,7812,lambda_2,-0.23468173276155424,-0.1241150158435999,0.034332387746725104,0.031863509295347085,0.11056671691795435,3.421528207383091,0.9929103154909606,
+HRM,7812,lambda_min8,-0.3005217000460013,-0.18830426286236482,0.035273835636870095,0.023540145721896902,0.1122174371836365,4.361925605701938,0.9916400803497578,
+HRM,7812,mean8,-0.2626351442558166,-0.15164301766584318,0.03120620075782802,0.02527422505918056,0.1109921265899734,4.217663623434578,0.9941214699279215,
+HRM,7812,sum8,-2.1010811540465326,-1.2131441413267454,0.24964960606262415,0.20219380047344448,0.8879370127197872,4.217663623434578,0.9941214699279215,
+HRM,7812,tail_mean_5_8,-0.2882901179866913,-0.1752684197264127,0.034146731685619464,0.024467123501210327,0.11302169826027861,4.309662608089653,0.9917582417582418,
+HRM,7812,positive_sum,0.0,0.0003272232148648253,0.0,0.002455328813990128,0.0003272232148648253,0.14446900111263877,0.5138248847926268,
+HRM,7812,positive_count,0.0,0.027649769585253458,0.0,0.163967252300988,0.027649769585253458,0.18279933893187883,0.5138248847926268,
+HRM,7812,spread,0.10413952138370429,0.09246484074762525,0.05367465058244875,0.03750037325415418,-0.011674680636079043,-0.28852032665012917,0.4651719248493442,
+HRM,7812,gap12,0.03829955409925718,0.028275593728860324,0.039255726129822074,0.02773318010645003,-0.010023960370396854,-0.335966521242734,0.4361928394186459,
+HRM,7812,std8,0.033543718777644664,0.030105340437515705,0.01699979386761588,0.01199243527848641,-0.0034383783401289586,-0.2664003934957475,0.4700460829493088,
+HRM,7812,linear_slope,-0.013432247076929525,-0.012232394099241935,0.006418967572469651,0.004683884578682541,0.0011998529776875901,0.24010930082876245,0.5298357556422073,
+HRM,7812,,-0.19638217866229704,-0.09583942211473957,,,0.10054275654755747,,0.9273307337823467,1
+HRM,7812,,-0.23468173276155424,-0.1241150158435999,,,0.11056671691795435,,0.9929103154909606,2
+HRM,7812,,-0.251154831968821,-0.13946109571095025,,,0.11169373625787074,,0.9948008980267045,3
+HRM,7812,,-0.2657019387070949,-0.15265492875180486,,,0.11304700995529005,,0.9943577927448896,4
+HRM,7812,,-0.27556606038258624,-0.1625548328191454,,,0.11301122756344084,,0.9925558312655087,5
+HRM,7812,,-0.28451120738799757,-0.17087361736712367,,,0.1136375900208739,,0.9920536452794517,6
+HRM,7812,,-0.29256150413018006,-0.17934096585701687,,,0.1132205382731632,,0.9906947890818859,7
+HRM,7812,,-0.3005217000460013,-0.18830426286236482,,,0.1122174371836365,,0.9916400803497578,8
+HRM,10416,raw_col0,-0.1734262867020848,-0.0592695716998562,0.05142884541858161,0.05837937165239414,0.1141567150022286,1.9920979882043344,0.9278985507246377,
+HRM,10416,lambda_max,-0.1725775988408081,-0.05791120557031328,0.05097113037108893,0.0576897804622528,0.1146663932704948,2.0240431138230965,0.9305900621118013,
+HRM,10416,lambda_2,-0.20132667233195642,-0.08973183255626022,0.05157430227345105,0.049230049747897166,0.1115948397756962,2.2428096158571673,0.9430383022774327,
+HRM,10416,lambda_min8,-0.26288475420164026,-0.15459816015015046,0.04626252379914644,0.03216863980667409,0.1082865940514898,3.0772614530337052,0.9655538302277433,
+HRM,10416,mean8,-0.22779642522547636,-0.11871804687665038,0.04481725431285532,0.03950978913365911,0.10907837834882597,2.68704925673509,0.9611024844720497,
+HRM,10416,sum8,-1.8223714018038109,-0.9497443750132031,0.35853803450284255,0.31607831306927286,0.8726270267906078,2.68704925673509,0.9611024844720497,
+HRM,10416,tail_mean_5_8,-0.25038242412974004,-0.1428806266010118,0.04547986314236225,0.035373315895853914,0.10750179752872824,2.86943459442746,0.9642857142857143,
+HRM,10416,positive_sum,0.0,0.005729037300833235,0.0,0.016580685880679416,0.005729037300833235,0.3807497273477808,0.580952380952381,
+HRM,10416,positive_count,0.0,0.20476190476190476,0.0,0.5172971794646537,0.20476190476190476,0.4361837613683104,0.580952380952381,
+HRM,10416,spread,0.09030715536083216,0.09668695457983717,0.04339138724697264,0.04303169324857104,0.006379799219005014,0.14774562979723496,0.555667701863354,
+HRM,10416,gap12,0.028749073491148327,0.03182062698594693,0.02982211658228237,0.03009338143747547,0.003071553494798606,0.10203251659009754,0.5320393374741201,
+HRM,10416,std8,0.029224929521376775,0.03159564285761782,0.013503150282924078,0.013896013215465845,0.002370713336241042,0.17112952807393322,0.5624223602484472,
+HRM,10416,linear_slope,-0.011799236704348575,-0.012641789825999967,0.0051687887683380404,0.0054043881195896815,-0.0008425531216513914,-0.15680302745477487,0.44280538302277433,
+HRM,10416,,-0.1725775988408081,-0.05791120557031328,,,0.1146663932704948,,0.9305900621118013,1
+HRM,10416,,-0.20132667233195642,-0.08973183255626022,,,0.1115948397756962,,0.9430383022774327,2
+HRM,10416,,-0.21822281260772244,-0.10843677614105954,,,0.1097860364666629,,0.9561076604554866,3
+HRM,10416,,-0.2287146215043638,-0.1221420543415228,,,0.10657256716284101,,0.9570393374741201,4
+HRM,10416,,-0.23784773683418398,-0.1312608235626526,,,0.10658691327153139,,0.9609730848861283,5
+HRM,10416,,-0.24641577381154764,-0.1391826140738669,,,0.10723315973768074,,0.96425983436853,6
+HRM,10416,,-0.25438143167158833,-0.14648090861737728,,,0.10790052305421105,,0.9645703933747412,7
+HRM,10416,,-0.26288475420164026,-0.15459816015015046,,,0.1082865940514898,,0.9655538302277433,8
+HRM,13020,raw_col0,-0.16317386656331334,-0.012345420176165918,0.042856889768654864,0.05753293294627191,0.15082844638714743,2.8114338599603244,0.9784879924544729,
+HRM,13020,lambda_max,-0.16130138939328179,-0.010260718098612039,0.04246364854330817,0.056766761485305045,0.15104067129466975,2.8510786506513477,0.9784698541681782,
+HRM,13020,lambda_2,-0.18163539974212453,-0.03920463569332943,0.03594937083533303,0.04924217904541589,0.1424307640487951,3.1137705865515857,0.9878654864688384,
+HRM,13020,lambda_min8,-0.2293847125362266,-0.09975459596516784,0.03337085685005013,0.03399613242800522,0.12963011657105877,3.826662246171393,0.9925633026191686,
+HRM,13020,mean8,-0.20199005053763794,-0.06562561695864545,0.03340950589770624,0.040423465527332884,0.13636443357899247,3.5397360643892095,0.9897337299571937,
+HRM,13020,sum8,-1.6159204043011035,-0.5250049356691636,0.26727604718164993,0.3233877242186631,1.0909154686319398,3.5397360643892095,0.9897337299571937,
+HRM,13020,tail_mean_5_8,-0.21928635291610057,-0.08840406447549293,0.03310220883476704,0.03609605320353908,0.13088228844060765,3.7086248240398825,0.9910578248567076,
+HRM,13020,positive_sum,0.0,0.03084314924092586,0.0,0.06861793815120783,0.03084314924092586,0.5364936993178259,0.7067039106145251,
+HRM,13020,positive_count,0.0,0.9134078212290503,0.0,1.4668741158227152,0.9134078212290503,0.7432166930372746,0.7067039106145251,
+HRM,13020,spread,0.06808332314294485,0.08949387786655578,0.027504229514443392,0.03908998548619218,0.021410554723610933,0.5935838521022724,0.6710440397591235,
+HRM,13020,gap12,0.020334010348842756,0.02894391759471739,0.021983497110638385,0.02841058153819357,0.008609907245874633,0.3225509499014192,0.5919248349415948,
+HRM,13020,std8,0.02211762934618332,0.029280053002001014,0.008527554659269684,0.012685046355487147,0.007162423655817693,0.6166462697146877,0.6737829209896249,
+HRM,13020,linear_slope,-0.008974770849857386,-0.011782226313356571,0.003290494032488962,0.004987203958465754,-0.0028074554634991856,-0.6166324175830334,0.3271602698977001,
+HRM,13020,,-0.16130138939328179,-0.010260718098612039,,,0.15104067129466975,,0.9784698541681782,1
+HRM,13020,,-0.18163539974212453,-0.03920463569332943,,,0.1424307640487951,,0.9878654864688384,2
+HRM,13020,,-0.19381305298393037,-0.05536385079066649,,,0.13844920219326387,,0.9889719219328158,3
+HRM,13020,,-0.2020251505173646,-0.066559473184584,,,0.13546567733278062,,0.9900420808242038,4
+HRM,13020,,-0.20940989824374776,-0.07677722861677344,,,0.13263266962697431,,0.9897518682434884,5
+HRM,13020,,-0.21611255254935136,-0.08513766296881684,,,0.13097488958053452,,0.990150910541972,6
+HRM,13020,,-0.2222382483350766,-0.09194677035121361,,,0.13029147798386298,,0.9904773996952768,7
+HRM,13020,,-0.2293847125362266,-0.09975459596516784,,,0.12963011657105877,,0.9925633026191686,8
+HRM,15624,raw_col0,-0.17926316590140962,0.013238977896144526,0.06333936702112142,0.05639223105928966,0.19250214379755415,3.267233281666343,0.9800209222959647,
+HRM,15624,lambda_max,-0.1768394878504482,0.015099927341163328,0.06255652597926131,0.05560474698647669,0.19193941519161153,3.301734795638855,0.9809298417108264,
+HRM,15624,lambda_2,-0.20291379034214202,-0.012255928320557568,0.055857221131106714,0.044443027774491516,0.19065786202158444,3.918997308822964,0.9888014268319871,
+HRM,15624,lambda_min8,-0.25797970423050093,-0.07734746318802106,0.05115013271288909,0.03412271586873159,0.18063224104247988,4.439110171642694,0.9891615647133474,
+HRM,15624,mean8,-0.2279172917539989,-0.04059797345319265,0.051764153215948086,0.03785782733320574,0.18731931830080625,4.347201163711293,0.9898475416302241,
+HRM,15624,sum8,-1.8233383340319913,-0.3247837876255412,0.4141132257275847,0.3028626186656459,1.49855454640645,4.347201163711293,0.9898475416302241,
+HRM,15624,tail_mean_5_8,-0.24842964750796295,-0.06421948799691395,0.05133688086396063,0.03522427208678674,0.18421015951104902,4.450431670658403,0.9895731508634734,
+HRM,15624,positive_sum,0.0007992622970837598,0.054435018036423026,0.006311586156326671,0.0829369660710654,0.053635755739339264,0.7897405527146792,0.7755826516437722,
+HRM,15624,positive_count,0.023391812865497075,1.5073313782991202,0.1858508606090141,1.8389972975174267,1.4839395654336232,0.9843141108510152,0.7767059388451578,
+HRM,15624,spread,0.08114021638005275,0.0924473905291844,0.04134252842986076,0.04135575287029946,0.01130717414913164,0.2729069758771571,0.5924782631064465,
+HRM,15624,gap12,0.026074302491693818,0.027355855661720895,0.030170106340969154,0.03224791282031372,0.001281553170027077,0.04051571914395107,0.5126305499819931,
+HRM,15624,std8,0.02674667842261408,0.03010560222499207,0.013449828344422855,0.013216767545140453,0.00335892380237799,0.25215056056737484,0.5836463103016584,
+HRM,15624,linear_slope,-0.010672528676104359,-0.01223319513153774,0.004956322835982859,0.004962746226333024,-0.0015606664554333807,-0.3139972419939842,0.40145084117919433,
+HRM,15624,,-0.1768394878504482,0.015099927341163328,,,0.19193941519161153,,0.9809298417108264,1
+HRM,15624,,-0.20291379034214202,-0.012255928320557568,,,0.19065786202158444,,0.9888014268319871,2
+HRM,15624,,-0.21947924458361856,-0.02918174847592791,,,0.19029749610769064,,0.9899847370135995,3
+HRM,15624,,-0.23038722122393068,-0.041568086182563246,,,0.18881913504136744,,0.9895903002863954,4
+HRM,15624,,-0.23906979466957307,-0.05152652893011264,,,0.18754326573946042,,0.9891444152904255,5
+HRM,15624,,-0.24525522475528438,-0.06034834885985462,,,0.18490687589542976,,0.9893845072113323,6
+HRM,15624,,-0.25141386637649343,-0.06765561100966748,,,0.18375825536682594,,0.9899332887448338,7
+HRM,15624,,-0.25797970423050093,-0.07734746318802106,,,0.18063224104247988,,0.9891615647133474,8
+HRM,18228,raw_col0,-0.07771555354618448,0.016638744400673956,0.06263255583133231,0.04825023331100934,0.09435429794685843,1.695455412874137,0.8841617329845335,
+HRM,18228,lambda_max,-0.07614383449056474,0.018023035920062363,0.06191955650099354,0.04721766951272925,0.0941668704106271,1.7184537831776516,0.8879250998210106,
+HRM,18228,lambda_2,-0.11565428504475044,-0.009322403513007655,0.0531641742419873,0.03970590030902003,0.10633188153174278,2.278284000583591,0.93969434118133,
+HRM,18228,lambda_min8,-0.1839978350288093,-0.06500975266698635,0.04408979332796084,0.024890830302457746,0.11898808236182294,3.361459932446762,0.9805253415331895,
+HRM,18228,mean8,-0.14575748546156192,-0.03353973620840138,0.044649355323357655,0.029756507043572245,0.11221774925316054,2.981199068698324,0.9716523628130402,
+HRM,18228,sum8,-1.1660598836924954,-0.26831788966721104,0.35719484258686124,0.23805205634857796,0.8977419940252843,2.981199068698324,0.9716523628130402,
+HRM,18228,tail_mean_5_8,-0.1721063231313486,-0.05434106856267585,0.04388947466770493,0.02615494238115857,0.11776525456867276,3.293391821754294,0.9799899031621461,
+HRM,18228,positive_sum,0.00525456639471437,0.050987723279165634,0.020732995890211044,0.08954318231022877,0.045733156884451266,0.6868120669084579,0.749567824743372,
+HRM,18228,positive_count,0.13580246913580246,1.4981412639405205,0.4375319789301074,1.874549830104913,1.362338794804718,0.976934071589174,0.7569645233833586,
+HRM,18228,spread,0.10785400053824455,0.0830327885870487,0.05276702488910829,0.037727700400044376,-0.024821211951195854,-0.5445778774761374,0.35938623464439245,
+HRM,18228,gap12,0.03951045055418571,0.027345439433070017,0.03941031702384997,0.025811774229372243,-0.01216501112111569,-0.3682220011353267,0.4258111891321309,
+HRM,18228,std8,0.03507160425503202,0.026979541520148902,0.016995061939562444,0.01236093737005585,-0.00809206273488312,-0.547798964638616,0.3555769730903973,
+HRM,18228,linear_slope,-0.013843636505214217,-0.010839807197199463,0.006474786335485576,0.004877381796879802,0.003003829308014754,0.526724521868034,0.643994676212768,
+HRM,18228,,-0.07614383449056474,0.018023035920062363,,,0.0941668704106271,,0.8879250998210106,1
+HRM,18228,,-0.11565428504475044,-0.009322403513007655,,,0.10633188153174278,,0.93969434118133,2
+HRM,18228,,-0.13658436145940067,-0.024826405557658183,,,0.11175795590174249,,0.9587865436688238,3
+HRM,18228,,-0.14925211017238504,-0.034827842265904144,,,0.1144242679064809,,0.9710710297244787,4
+HRM,18228,,-0.15991492039014282,-0.04385568872452813,,,0.11605923166561469,,0.9757216944329707,5
+HRM,18228,,-0.16865676173886646,-0.050558246030570894,,,0.11809851570829556,,0.9784294827665335,6
+HRM,18228,,-0.1758557753675759,-0.05794058682861804,,,0.11791518853895787,,0.9808924992733337,7
+HRM,18228,,-0.1839978350288093,-0.06500975266698635,,,0.11898808236182294,,0.9805253415331895,8
+HRM,20832,raw_col0,-0.0847591630423951,0.011107698112080065,0.05179253970030646,0.05301531095216162,0.09586686115447517,1.823854994389812,0.9046762589928058,
+HRM,20832,lambda_max,-0.08308592252134799,0.012636878445990327,0.05106450626510651,0.052742167253325714,0.09572280096733832,1.8378553724999471,0.9064287031912931,
+HRM,20832,lambda_2,-0.11467362503704225,-0.01651445633091917,0.0367971718364536,0.04245192890687587,0.09815916870612307,2.451208639805274,0.9615538338559921,
+HRM,20832,lambda_min8,-0.17355204840055388,-0.07933141731802816,0.025333389875238074,0.029643685675570172,0.09422063108252572,3.387855311282286,0.9847045440570621,
+HRM,20832,mean8,-0.14093293187680603,-0.044163071447564366,0.02730668520011817,0.034216690813552435,0.09676986042924166,3.0907030906378394,0.9825831642378405,
+HRM,20832,sum8,-1.1274634550144482,-0.3533045715805149,0.21845348160094535,0.2737335265084195,0.7741588834339332,3.0907030906378394,0.9825831642378405,
+HRM,20832,tail_mean_5_8,-0.16335714651812983,-0.06769531056555371,0.025771193349847434,0.03095169550840937,0.09566183595257612,3.3266133632294994,0.9836438541474513,
+HRM,20832,positive_sum,0.002644224575935648,0.04743396344555085,0.01201348001701435,0.08564483779837649,0.0447897388696152,0.7025433805106365,0.7440432269568961,
+HRM,20832,positive_count,0.0811965811965812,1.2589928057553956,0.3028163032146475,1.764306901214996,1.1777962245588145,0.8931820943664754,0.747501998401279,
+HRM,20832,spread,0.0904661258792059,0.09196829576401849,0.048359900172521494,0.041470557600432875,0.0015021698848125958,0.03350167214206436,0.5366168603578676,
+HRM,20832,gap12,0.03158770251569426,0.029151334776909496,0.038531531642676545,0.031379419072147044,-0.0024363677387847643,-0.06981179252939107,0.49451208264157903,
+HRM,20832,std8,0.029635493468769536,0.030242812565419993,0.01548691425709729,0.013328973304563521,0.000607319096650457,0.042222891088829616,0.5350642562872778,
+HRM,20832,linear_slope,-0.011730722677212124,-0.012155433061707922,0.005628131019498303,0.005137997356258274,-0.00042471038449579807,-0.07897071928397438,0.46315255487917356,
+HRM,20832,,-0.08308592252134799,0.012636878445990327,,,0.09572280096733832,,0.9064287031912931,1
+HRM,20832,,-0.11467362503704225,-0.01651445633091917,,,0.09815916870612307,,0.9615538338559921,2
+HRM,20832,,-0.13199459516610473,-0.03367419474508627,,,0.09832040042101847,,0.971407489393101,3
+HRM,20832,,-0.14428072621743393,-0.04497155668828494,,,0.09930916952914899,,0.9813072618828015,4
+HRM,20832,,-0.1533819133272538,-0.055780962670180886,,,0.0976009506570729,,0.9837207157351042,5
+HRM,20832,,-0.16005228720923775,-0.06414027231336712,,,0.09591201489587063,,0.9815378466457603,6
+HRM,20832,,-0.16644233713547388,-0.07152858996063868,,,0.0949137471748352,,0.982075877759331,7
+HRM,20832,,-0.17355204840055388,-0.07933141731802816,,,0.09422063108252572,,0.9847045440570621,8
+HRM,23436,raw_col0,-0.1396155291305258,-0.002783365376173506,0.05301860295912526,0.04699237400077002,0.13683216375435228,2.724133010730825,0.9710501014848841,
+HRM,23436,lambda_max,-0.1384992553999928,-0.0009893162803058476,0.05250447224033658,0.04620166848766205,0.13750993911968695,2.773083936382069,0.9724541028888855,
+HRM,23436,lambda_2,-0.1762771829387396,-0.02655255962989448,0.040469956144014434,0.038098297215635166,0.14972462330884512,3.800800082859685,0.9906298167167732,
+HRM,23436,lambda_min8,-0.23419410067980814,-0.08469394006924667,0.03386640709764444,0.026086687664204734,0.14950016061056148,4.928736997411976,0.9933615151006455,
+HRM,23436,mean8,-0.2008695753030298,-0.05231515268090541,0.033947683672789115,0.030225828139368114,0.14855442262212437,4.609849292220812,0.9922932531628184,
+HRM,23436,sum8,-1.6069566024242383,-0.4185212214472433,0.2715814693823129,0.2418066251149449,1.188435380976995,4.609849292220812,0.9922932531628184,
+HRM,23436,tail_mean_5_8,-0.22361706499433173,-0.07373835104911058,0.03411302963953862,0.027447186606000325,0.14987871394522115,4.82552624339931,0.9920032963511224,
+HRM,23436,positive_sum,0.0005239612893938558,0.02692777939243448,0.008109939580167196,0.04720688801212081,0.026403818103040624,0.7823886293627679,0.7254032688815297,
+HRM,23436,positive_count,0.019305019305019305,0.8932806324110671,0.255468945014259,1.260714674466594,0.8739756131060479,0.96419741522471,0.7251133120698338,
+HRM,23436,spread,0.09569484527981534,0.08370462378894082,0.04968529497306399,0.03564798046277715,-0.011990221490874517,-0.2762341196411612,0.4474338822164909,
+HRM,23436,gap12,0.0377779275387468,0.025563243349588635,0.03953470269719528,0.02631355107366511,-0.012214684189158165,-0.3622062420071065,0.42294016207059687,
+HRM,23436,std8,0.030851516711156227,0.027460611780833973,0.015623967647626968,0.0115487028962521,-0.0033909049303222534,-0.24591669197947877,0.45445388923649793,
+HRM,23436,linear_slope,-0.012061568754814359,-0.011072201341443112,0.005670060084631647,0.004511003105097667,0.0009893674133712466,0.1924762168548974,0.537320493842233,
+HRM,23436,,-0.1384992553999928,-0.0009893162803058476,,,0.13750993911968695,,0.9724541028888855,1
+HRM,23436,,-0.1762771829387396,-0.02655255962989448,,,0.14972462330884512,,0.9906298167167732,2
+HRM,23436,,-0.19297555731213922,-0.04273562421060532,,,0.1502399331015339,,0.9918354266180354,3
+HRM,23436,,-0.20473634679603991,-0.0532903171299953,,,0.15144602966604462,,0.9897599462816854,4
+HRM,23436,,-0.2130848042226197,-0.06302425350576064,,,0.15006055071685903,,0.9888442931921193,5
+HRM,23436,,-0.2203373938626304,-0.07040388627150813,,,0.14993350759112228,,0.9920490790056008,6
+HRM,23436,,-0.22685196121226867,-0.07683132434992687,,,0.1500206368623418,,0.993285210676515,7
+HRM,23436,,-0.23419410067980814,-0.08469394006924667,,,0.14950016061056148,,0.9933615151006455,8
+HRM,26040,raw_col0,-0.14642834789538028,0.03254939207545249,0.04527756528529629,0.05188324244695014,0.17897773997083277,3.668483857418232,0.9890289306640625,
+HRM,26040,lambda_max,-0.14456235250236205,0.034372387226426326,0.0445438913427663,0.051164910218980536,0.17893473972878837,3.7229411155533625,0.9896240234375,
+HRM,26040,lambda_2,-0.1606479498805129,0.004532475816546366,0.040059015398639194,0.04094857600302106,0.16518042569705926,4.069928514978557,0.994598388671875,
+HRM,26040,lambda_min8,-0.22339810436824337,-0.0630449069358292,0.03390565181671889,0.02569246522937591,0.16035319743241416,5.320346491354966,0.99884033203125,
+HRM,26040,mean8,-0.18957092847483636,-0.025070655147768406,0.03406363794439551,0.029948472745946975,0.16450027332706796,5.119045511636326,0.997589111328125,
+HRM,26040,sum8,-1.516567427798691,-0.20056524118214725,0.27250910355516406,0.2395877819675758,1.3160021866165437,5.119045511636326,0.997589111328125,
+HRM,26040,tail_mean_5_8,-0.21117772247089306,-0.04990981457450516,0.03356353507006993,0.02610197526735439,0.1612679078963879,5.353465196471137,0.9984893798828125,
+HRM,26040,positive_sum,0.00045363006938714534,0.074183922140719,0.004367127622522474,0.09775337039526304,0.07373029207133186,1.0635217051296169,0.865997314453125,
+HRM,26040,positive_count,0.01953125,1.94140625,0.16420194966393517,1.8751261350802344,1.921875,1.441122738975407,0.8662796020507812,
+HRM,26040,spread,0.07883575186588132,0.09741729416225553,0.031928444128407715,0.0416918901108217,0.01858154229637421,0.4994340386095273,0.62774658203125,
+HRM,26040,gap12,0.01608559737815085,0.02983991140987996,0.016253595794805675,0.03062822903705536,0.013754314031729109,0.5598909337106562,0.631378173828125,
+HRM,26040,std8,0.02668870749338241,0.03191496910462624,0.011608993080174152,0.013576770037277246,0.005226261611243829,0.412947284563429,0.614227294921875,
+HRM,26040,linear_slope,-0.010981691455645842,-0.012889701412468924,0.004710611895246819,0.005288854534176942,-0.0019080099568230813,-0.3802410352955103,0.392730712890625,
+HRM,26040,,-0.14456235250236205,0.034372387226426326,,,0.17893473972878837,,0.9896240234375,1
+HRM,26040,,-0.1606479498805129,0.004532475816546366,,,0.16518042569705926,,0.994598388671875,2
+HRM,26040,,-0.17728650986100547,-0.012887344006685453,,,0.16439916585432002,,0.99725341796875,3
+HRM,26040,,-0.18935972567123827,-0.026943501920413837,,,0.16241622375082443,,0.99755859375,4
+HRM,26040,,-0.19926558929728344,-0.03700471520960491,,,0.16226087408767853,,0.997161865234375,5
+HRM,26040,,-0.2072153732588049,-0.04559794080159918,,,0.16161743245720572,,0.9979248046875,6
+HRM,26040,,-0.21483182295924053,-0.053991695350987357,,,0.16084012760825317,,0.998291015625,7
+HRM,26040,,-0.22339810436824337,-0.0630449069358292,,,0.16035319743241416,,0.99884033203125,8
+TRM,26041,raw_col0,-0.03554810737259686,0.034024751214321254,0.01870363225091222,0.020897832583637133,0.06957285858691811,3.531252888915257,0.9836132156525814,
+TRM,26041,lambda_max,-0.03484145596473462,0.034498983389517714,0.018473432249889475,0.020792802192351568,0.06934043935425233,3.550756844011336,0.9838475357338123,
+TRM,26041,lambda_2,-0.03942191660605466,0.02402153684167392,0.016310749242639744,0.017259447813114788,0.06344345344772859,3.787180598359109,0.9867218620635788,
+TRM,26041,lambda_min8,-0.05106514692622221,-0.002698179903125467,0.011085119064177916,0.011362071825839244,0.048366967023096744,4.308753251199119,0.9924392720456143,
+TRM,26041,mean8,-0.044594623737682894,0.011831644308724274,0.013821989430236327,0.013794295206096182,0.05642626804640717,4.077834760786163,0.9900960712333047,
+TRM,26041,sum8,-0.35675698990146315,0.09465315446979419,0.11057591544189062,0.11035436164876945,0.45141014437125737,4.077834760786163,0.9900960712333047,
+TRM,26041,tail_mean_5_8,-0.04878217898325509,0.002079399892583064,0.01196631791196348,0.012024172013885363,0.050861578875838157,4.233398692773437,0.9916582051081778,
+TRM,26041,positive_sum,0.002200093488031367,0.11402314361281402,0.01421645695526589,0.08935805524637302,0.11182305012478265,1.8862819501561976,0.9693353120362415,
+TRM,26041,positive_count,0.1694915254237288,5.539170506912442,0.8015354411924052,2.473748858175321,5.369678981488713,3.112986897737863,0.9706006404748887,
+TRM,26041,spread,0.01622369096148759,0.03719716329264318,0.008406592236301944,0.015014304908682169,0.02097347233115559,1.7932190660758047,0.8946653128173084,
+TRM,26041,gap12,0.004580460641320037,0.010477446547843793,0.0038493887410157913,0.008925523880230393,0.005896985906523756,0.9048998129688157,0.7136608607357651,
+TRM,26041,std8,0.005236287183152611,0.012124661491602866,0.0026599948573616194,0.004708582051673959,0.006888374308450255,1.8729636673455754,0.9025697102241662,
+TRM,26041,linear_slope,-0.0021641803871718748,-0.00502138642230916,0.00110464240925107,0.0019075807064323422,-0.002857206035137285,-1.9030611346523516,0.09438412871983129,
+TRM,26041,,-0.03484145596473462,0.034498983389517714,,,0.06934043935425233,,0.9838475357338123,1
+TRM,26041,,-0.03942191660605466,0.02402153684167392,,,0.06344345344772859,,0.9867218620635788,2
+TRM,26041,,-0.04260402849670184,0.01650647970261417,,,0.059110508199316014,,0.9886745294071702,3
+TRM,26041,,-0.044760872900951654,0.011308554965656139,,,0.05606942786660779,,0.9888932281496524,4
+TRM,26041,,-0.04656553652011237,0.007080294789044223,,,0.053645831309156594,,0.9900648285558072,5
+TRM,26041,,-0.04807392087009325,0.003619178177355381,,,0.05169309904744863,,0.9914395063656956,6
+TRM,26041,,-0.04942411161659254,0.00031630650705811867,,,0.04974041812365066,,0.9920018745606498,7
+TRM,26041,,-0.05106514692622221,-0.002698179903125467,,,0.048366967023096744,,0.9924392720456143,8
+TRM,52082,raw_col0,-0.0007075830156292839,0.05957649097674423,0.01563302124862064,0.01786285175301579,0.06028407399237352,3.6572025021620704,0.981425702811245,
+TRM,52082,lambda_max,-0.00011202936298378704,0.059987526981987886,0.015374456518724242,0.01745859818259001,0.060099556344971675,3.7168507731245786,0.9824129852744311,
+TRM,52082,lambda_2,-0.005003940460077699,0.047595180713364646,0.01119476035297736,0.014550648902540746,0.05259912117344234,4.207151771106897,0.9913487282463186,
+TRM,52082,lambda_min8,-0.014980851835816683,0.015789016843094337,0.005100876039652212,0.010478502750931777,0.03076986867891102,4.123209717568259,0.9948795180722891,
+TRM,52082,mean8,-0.00932737177132882,0.03324007580221304,0.008255353581811876,0.011864955050428574,0.04256744757354186,4.389308032172127,0.993591030789826,
+TRM,52082,sum8,-0.07461897417063056,0.2659206064177043,0.06604282865449501,0.0949196404034286,0.3405395805883349,4.389308032172127,0.993591030789826,
+TRM,52082,tail_mean_5_8,-0.01301439359083114,0.02160164021022663,0.005987197259798179,0.010959990028742617,0.034616033801057766,4.269643585923531,0.9948627844712182,
+TRM,52082,positive_sum,0.011575991871503936,0.26611879217404444,0.03415248125828137,0.09451889482634411,0.2545428003025405,4.069476447187658,0.9932730923694779,
+TRM,52082,positive_count,0.9879518072289156,7.916666666666667,1.8707898976237152,0.3926406329796582,6.9287148594377514,4.536477031738777,0.9850485274431058,
+TRM,52082,spread,0.014868822472832896,0.04419851013889355,0.011296400995867149,0.014227594177523417,0.029329687666060658,2.3595141544089744,0.9461680053547523,
+TRM,52082,gap12,0.0048919110970939116,0.01239234626862324,0.005964070520138895,0.008551042068988442,0.007500435171529329,1.0718995191983391,0.7928547523427042,
+TRM,52082,std8,0.004749769571362592,0.014341179623073927,0.0035482484720922645,0.004466278932342464,0.009591410051711335,2.4572178106315405,0.9492469879518072,
+TRM,52082,linear_slope,-0.0019327436490592746,-0.005970016727121975,0.0014092267435198555,0.0018084067105169593,-0.0040372730780627005,-2.58088353368577,0.04687081659973226,
+TRM,52082,,-0.00011202936298378704,0.059987526981987886,,,0.060099556344971675,,0.9824129852744311,1
+TRM,52082,,-0.005003940460077699,0.047595180713364646,,,0.05259912117344234,,0.9913487282463186,2
+TRM,52082,,-0.0077386240891099585,0.03943772178091523,,,0.047176345870025185,,0.992570281124498,3
+TRM,52082,,-0.009706805895134549,0.03249361610053004,,,0.04220042199566459,,0.9940595716198126,4
+TRM,52082,,-0.011094541287443955,0.027378346368398827,,,0.03847288765584278,,0.9942603748326639,5
+TRM,52082,,-0.012397178906880926,0.023441741805678856,,,0.035838920712559785,,0.9943775100401606,6
+TRM,52082,,-0.013585002333182997,0.019797455823734506,,,0.033382458156917504,,0.9944444444444445,7
+TRM,52082,,-0.014980851835816683,0.015789016843094337,,,0.03076986867891102,,0.9948795180722891,8
+TRM,78123,raw_col0,-0.0022565727597132286,0.08176288979593664,0.02047556742566754,0.013821423689026198,0.08401946255564988,4.495556236204706,0.9937144886363637,
+TRM,78123,lambda_max,-0.002104297534184628,0.08205510303378105,0.0204009983589534,0.013646912543064034,0.08415940056796568,4.526480066902049,0.99375,
+TRM,78123,lambda_2,-0.02860644140260278,0.06622570278123022,0.018516204266289533,0.01042912237357108,0.094832144183833,5.763232776326466,0.9966086647727272,
+TRM,78123,lambda_min8,-0.03925927748059621,0.028471183351939544,0.011339595505391147,0.00872261044820221,0.06773046083253575,6.382305480508571,0.9984907670454546,
+TRM,78123,mean8,-0.03085258081190225,0.04898583621179568,0.014342485713197255,0.007687185353955472,0.07983841702369793,6.30160791137825,0.9991299715909091,
+TRM,78123,sum8,-0.246820646495218,0.39188668969436546,0.11473988570557804,0.061497482831643774,0.6387073361895834,6.30160791137825,0.9991299715909091,
+TRM,78123,tail_mean_5_8,-0.03740866408387236,0.03495748526038369,0.012325993647523768,0.008018631134703627,0.07236614934425606,6.471754546174604,0.9993607954545455,
+TRM,78123,positive_sum,0.011536654059641065,0.39188668969436546,0.040902795702990095,0.061497482831643774,0.3803500356347244,7.860727356127535,0.9991299715909091,
+TRM,78123,positive_count,0.6619318181818182,8.0,1.4229948875160074,0.0,7.338068181818182,6.207148838044777,0.9900568181818182,
+TRM,78123,spread,0.037154979946411586,0.05358391968184151,0.013486949127438155,0.013609179041450824,0.016428939735429922,1.212310517198905,0.8060191761363636,
+TRM,78123,gap12,0.026502143868418152,0.015829400252550842,0.013069863024547872,0.009715015355796698,-0.01067274361586731,-0.8787497331994156,0.2559303977272727,
+TRM,78123,std8,0.011604602271056778,0.01739541406807769,0.004204073782533862,0.0043062553304626895,0.005790811797020913,1.3642874890994128,0.8348188920454546,
+TRM,78123,linear_slope,-0.003830520190287694,-0.007214672225485888,0.0014339527246988522,0.001739567164392657,-0.003384152035198194,-2.1989119917619617,0.06878551136363636,
+TRM,78123,,-0.002104297534184628,0.08205510303378105,,,0.08415940056796568,,0.99375,1
+TRM,78123,,-0.02860644140260278,0.06622570278123022,,,0.094832144183833,,0.9966086647727272,2
+TRM,78123,,-0.03227364238645433,0.05576641424559057,,,0.0880400566320449,,0.9984375,3
+TRM,78123,,-0.03420160883648681,0.04800952859222889,,,0.08221113742871569,,0.9993607954545455,4
+TRM,78123,,-0.035610721931086366,0.04194192207651213,,,0.0775526440075985,,0.9989346590909091,5
+TRM,78123,,-0.0368502135909859,0.03676692203735001,,,0.07361713562833591,,0.99921875,6
+TRM,78123,,-0.037914443332820985,0.03264991357573308,,,0.07056435690855406,,0.9991654829545454,7
+TRM,78123,,-0.03925927748059621,0.028471183351939544,,,0.06773046083253575,,0.9984907670454546,8
+TRM,104164,raw_col0,0.0007209903823832564,0.09478743761979244,0.024787018479211907,0.011434654814441576,0.09406644723740919,4.309474726616314,0.9850942975942976,
+TRM,104164,lambda_max,0.0011589818968986748,0.0948834198753576,0.024561092809033697,0.011365623886545098,0.09372443797845893,4.332229759698265,0.9852242352242352,
+TRM,104164,lambda_2,-0.007594825008989294,0.07463094025749613,0.019864379269071985,0.010009002998617744,0.08222576526648542,4.664820565515803,0.9868762993762994,
+TRM,104164,lambda_min8,-0.020856195212288115,0.03271414110487377,0.010975698248774427,0.009883630334534472,0.05357033631716189,5.010124632836926,0.990588803088803,
+TRM,104164,mean8,-0.013257209616115657,0.05609436892523632,0.015520729699937329,0.007938198592961644,0.06935157854135197,5.028405574333048,0.9912013662013662,
+TRM,104164,sum8,-0.10605767692892526,0.44875495140189053,0.12416583759949863,0.06350558874369315,0.5548126283308158,5.028405574333048,0.9912013662013662,
+TRM,104164,tail_mean_5_8,-0.018433471481510245,0.04002377992288235,0.012223276001797756,0.009319343509472649,0.05845725140439259,5.091132975488244,0.9914241164241164,
+TRM,104164,positive_sum,0.024264690904521637,0.44875495140189053,0.07754420741442453,0.06350558874369315,0.4244902604973689,5.743672965835712,0.9912013662013662,
+TRM,104164,positive_count,1.0631868131868132,8.0,2.049890578221982,0.0,6.936813186813187,4.005564373676197,0.9766483516483516,
+TRM,104164,spread,0.02201517710918679,0.06216927877048383,0.01498024832021951,0.013810725966467343,0.04015410166129704,2.735203685416702,0.9606660231660231,
+TRM,104164,gap12,0.008753806905887968,0.020252479617861478,0.00819564281587485,0.010340605250850503,0.01149867271197351,1.2939471817521953,0.8213171963171964,
+TRM,104164,std8,0.0069570651147322455,0.019906376019402126,0.004729527195838003,0.004316665113535784,0.01294931090466988,2.8010501626618898,0.9623552123552124,
+TRM,104164,linear_slope,-0.0027577501392071387,-0.008257400098767975,0.0019130262867203052,0.001740025312128966,-0.005499649959560836,-2.943632546232543,0.034377784377784375,
+TRM,104164,,0.0011589818968986748,0.0948834198753576,,,0.09372443797845893,,0.9852242352242352,1
+TRM,104164,,-0.007594825008989294,0.07463094025749613,,,0.08222576526648542,,0.9868762993762994,2
+TRM,104164,,-0.0117505402660834,0.06371483053206592,,,0.07546537079814931,,0.9927049302049302,3
+TRM,104164,,-0.014137407624710269,0.0554306410454415,,,0.06956804867015177,,0.9899205524205524,4
+TRM,104164,,-0.016032042398944544,0.04766768324063034,,,0.06369972563957488,,0.9908115533115534,5
+TRM,104164,,-0.017676179287993542,0.04247049147237998,,,0.06014667076037353,,0.9912384912384913,6
+TRM,104164,,-0.019169469026814774,0.03724280387364529,,,0.05641227290046007,,0.9909229284229284,7
+TRM,104164,,-0.020856195212288115,0.03271414110487377,,,0.05357033631716189,,0.990588803088803,8
+TRM,130205,raw_col0,0.013444696512909148,0.10679112273454666,0.023486427996414334,0.01219455059799566,0.09334642622163751,4.376031576635131,0.988857881136951,
+TRM,130205,lambda_max,0.014031398803655426,0.10683381146192551,0.023192331918855984,0.012146058143345401,0.09280241265827009,4.402632530913402,0.9889198966408269,
+TRM,130205,lambda_2,0.006973827708889992,0.08373585876822472,0.018134555439068344,0.010719602982694268,0.07676203105933473,4.606263227387459,0.9911524547803617,
+TRM,130205,lambda_min8,-0.007503867852165352,0.0325613137036562,0.007452319675082162,0.009150933868884997,0.04006518155582155,5.061122135273741,0.9935297157622739,
+TRM,130205,mean8,0.0006696076976612102,0.05997022696118802,0.01255632080391271,0.008090347220741633,0.05930061926352681,5.090986565200385,0.9935503875968992,
+TRM,130205,sum8,0.005356861581289681,0.47976181568950416,0.10045056643130168,0.06472277776593306,0.4744049541082145,5.090986565200385,0.9935503875968992,
+TRM,130205,tail_mean_5_8,-0.004693869635777962,0.04096000522933901,0.008552052484366562,0.008566517277332476,0.04565387486511697,5.325715726578896,0.9943152454780362,
+TRM,130205,positive_sum,0.04017261892318711,0.47976181568950416,0.08580075368147339,0.06472277776593306,0.439589196766317,5.405644251996457,0.9935503875968992,
+TRM,130205,positive_count,2.4108527131782944,8.0,2.517984109505581,0.0,5.589147286821706,2.5481334848480044,0.9521963824289406,
+TRM,130205,spread,0.021535266655820777,0.07427249775826932,0.017082368998095492,0.012493415811398059,0.05273723110244854,3.2725978035379804,0.9726511627906976,
+TRM,130205,gap12,0.007057571094765433,0.02309795269370079,0.0074636242070143,0.01141200015366041,0.016040381598935356,1.8622569574630734,0.8932093023255814,
+TRM,130205,std8,0.0068694509154112035,0.023878361316685696,0.00555242153561603,0.003764861145987127,0.01700891040127449,3.2813896443255404,0.9723410852713178,
+TRM,130205,linear_slope,-0.0028045126760829825,-0.009864693831208917,0.0022784013985253245,0.0014839402866256757,-0.007060181155125934,-3.336045116152337,0.02736950904392765,
+TRM,130205,,0.014031398803655426,0.10683381146192551,,,0.09280241265827009,,0.9889198966408269,1
+TRM,130205,,0.006973827708889992,0.08373585876822472,,,0.07676203105933473,,0.9911524547803617,2
+TRM,130205,,0.0029902539692735794,0.06770775321125984,,,0.06471749924198626,,0.990594315245478,3
+TRM,130205,,0.00013685964258253522,0.05764437133073807,,,0.05750751168815554,,0.9937364341085271,4
+TRM,130205,,-0.0019758053857788626,0.04966452667117119,,,0.05164033205695005,,0.9935297157622739,5
+TRM,130205,,-0.003799696903122596,0.043507651045918465,,,0.04730734794904106,,0.995390180878553,6
+TRM,130205,,-0.005496108402045039,0.03810652949661016,,,0.043602637898655205,,0.9936950904392765,7
+TRM,130205,,-0.007503867852165352,0.0325613137036562,,,0.04006518155582155,,0.9935297157622739,8
+TRM,156246,raw_col0,0.033355438992845106,0.1080104663006721,0.025002590002442805,0.013864813007084868,0.074655027307827,3.2665364943777395,0.9934527768540073,
+TRM,156246,lambda_max,0.03340256920781724,0.10808430672172577,0.02494915155343601,0.013766377311272416,0.07468173751390852,3.2761588495146574,0.9935359161955437,
+TRM,156246,lambda_2,-0.004267912055779233,0.08321959303031044,0.02088632892931947,0.011074430393721442,0.08748750508608967,4.600130722912644,0.9983164283338876,
+TRM,156246,lambda_min8,-0.03704204443783965,0.031117352150050143,0.014879494486086477,0.009113322544810976,0.06815939658788979,4.962722044957253,0.9984411373461922,
+TRM,156246,mean8,-0.01719525050584332,0.059510113856841566,0.017247035290909478,0.0086521040888674,0.07670536436268488,4.905493416653001,0.9990646824077153,
+TRM,156246,sum8,-0.13756200404674657,0.47608091085473253,0.13797628232727582,0.0692168327109392,0.613642914901479,4.905493416653001,0.9990646824077153,
+TRM,156246,tail_mean_5_8,-0.03258687625807819,0.03992759636434306,0.01601823727211162,0.008781581807866947,0.07251447262242125,4.95751039483532,0.9987944795477219,
+TRM,156246,positive_sum,0.0501136147264783,0.47608091085473253,0.061302980660750975,0.0692168327109392,0.42596729612825424,6.715075374905016,0.9990646824077153,
+TRM,156246,positive_count,1.7268041237113403,8.0,1.846591480445282,0.0,6.27319587628866,3.894818695658879,0.9806701030927835,
+TRM,156246,spread,0.07044461364565689,0.07696695457167563,0.019699323135686134,0.01206255980918436,0.006522340926018735,0.35871152290874336,0.5740563684735617,
+TRM,156246,gap12,0.03767048126359648,0.024864713691415324,0.019721474052516917,0.011792263299464622,-0.012805767572181152,-0.7052475995679723,0.30377036913867644,
+TRM,156246,std8,0.02217449738755968,0.024624693724198295,0.005789314256542303,0.0033191908154004772,0.0024501963366386145,0.46158514936634953,0.6084344861988693,
+TRM,156246,linear_slope,-0.008340449245677553,-0.010171585197668857,0.0019240129409615878,0.001295440367378807,-0.0018311359519913046,-1.019769025442712,0.225764881942135,
+TRM,156246,,0.03340256920781724,0.10808430672172577,,,0.07468173751390852,,0.9935359161955437,1
+TRM,156246,,-0.004267912055779233,0.08321959303031044,,,0.08748750508608967,,0.9983164283338876,2
+TRM,156246,,-0.014483049475154648,0.06765439048890144,,,0.08213743996405609,,0.9997297971400066,3
+TRM,156246,,-0.02186610669131716,0.05741223515642266,,,0.07927834184773982,,0.9996258729630861,4
+TRM,156246,,-0.02740773299896337,0.04914349331069858,,,0.07655122630966195,,0.9986489857000332,5
+TRM,156246,,-0.03137805723175935,0.04278114806079576,,,0.07415920529255511,,0.9987944795477219,6
+TRM,156246,,-0.03451967036375039,0.03666839193582775,,,0.07118806229957814,,0.9981709344861989,7
+TRM,156246,,-0.03704204443783965,0.031117352150050143,,,0.06815939658788979,,0.9984411373461922,8
+TRM,182287,raw_col0,0.00821161610291902,0.10131858266664273,0.039662384266621574,0.013299851172299102,0.0931069665637237,2.6679423968269282,0.9476076555023923,
+TRM,182287,lambda_max,0.008431412303494461,0.10132696061874881,0.03951537194488716,0.013286222498512332,0.09289554831525434,2.671516841845828,0.9476275917065391,
+TRM,182287,lambda_2,-0.011238162814161594,0.07398981584066694,0.02055571236362548,0.010772236845608298,0.08522797865482853,4.589412252052702,0.9876395534290271,
+TRM,182287,lambda_min8,-0.029254327355240667,0.021346966996189265,0.008724291266967808,0.009348488329494035,0.05060129435142993,5.681182344322112,0.994377990430622,
+TRM,182287,mean8,-0.018691345271909503,0.049928791549256966,0.015008910662491892,0.008008984399241801,0.06862013682116647,5.052593201220387,0.993122009569378,
+TRM,182287,sum8,-0.14953076217527603,0.39943033239405573,0.12007128529993513,0.06407187519393441,0.5489610945693317,5.052593201220387,0.993122009569378,
+TRM,182287,tail_mean_5_8,-0.026891200006043378,0.029759585490286223,0.01036377701042864,0.008415861434409682,0.0566507854963296,5.712081912112207,0.993421052631579,
+TRM,182287,positive_sum,0.027419095844602212,0.3994350857048465,0.06669793946046551,0.0640642542620319,0.37201598986024426,5.623115921422678,0.993122009569378,
+TRM,182287,positive_count,0.8973684210526316,7.992424242424242,1.7114147490346392,0.08670850865348179,7.095055821371611,4.800652817505138,0.9867125199362041,
+TRM,182287,spread,0.03768573965873513,0.07997999362255954,0.03600937566579265,0.012801000384146902,0.04229425396382441,1.3317723290514567,0.8919258373205742,
+TRM,182287,gap12,0.019669575117656057,0.027337144778081864,0.02859263092954253,0.01272177737475195,0.007667569660425807,0.30050760054036296,0.7587121212121212,
+TRM,182287,std8,0.012072957547342389,0.025573930217646406,0.011206633007638274,0.0037628280017395497,0.013500972670304017,1.3691199403814454,0.8932416267942583,
+TRM,182287,linear_slope,-0.004465965486998445,-0.010477472793735133,0.00375925152452931,0.0014782377742576461,-0.006011507306736688,-1.8047381477774833,0.09675039872408293,
+TRM,182287,,0.008431412303494461,0.10132696061874881,,,0.09289554831525434,,0.9476275917065391,1
+TRM,182287,,-0.011238162814161594,0.07398981584066694,,,0.08522797865482853,,0.9876395534290271,2
+TRM,182287,,-0.017501985224110908,0.05780721207459768,,,0.07530919729870858,,0.99481658692185,3
+TRM,182287,,-0.02165722641632448,0.04726800189889742,,,0.0689252283152219,,0.9936403508771929,4
+TRM,182287,,-0.02433873612210032,0.03828959356090336,,,0.06262832968300368,,0.9937998405103668,5
+TRM,182287,,-0.026251249700037175,0.032453771285486946,,,0.05870502098552412,,0.9919856459330143,6
+TRM,182287,,-0.027720486846795355,0.026948010118565326,,,0.05466849696536068,,0.9926834130781499,7
+TRM,182287,,-0.029254327355240667,0.021346966996189265,,,0.05060129435142993,,0.994377990430622,8
+TRM,208328,raw_col0,0.03753090985757505,0.10676543856970966,0.025288246364197544,0.013259965039384697,0.06923452871213462,3.0198269813394845,0.9824422200520834,
+TRM,208328,lambda_max,0.037651085844269495,0.10677664534887299,0.02512397313308367,0.013229177688388983,0.0691255595046035,3.0337159572727512,0.9825439453125,
+TRM,208328,lambda_2,0.008163382514728331,0.07981906569330022,0.017306379991784124,0.011006946848836352,0.07165568317857189,4.479171352569411,0.9950358072916666,
+TRM,208328,lambda_min8,-0.01844145359408837,0.026382549254776677,0.007653696247770853,0.008303996637685445,0.04482400284886505,5.719781931771222,0.9974161783854166,
+TRM,208328,mean8,-0.003334924222756793,0.0556086836418217,0.01190007171326394,0.007874842223581683,0.058943607864578494,5.332366691079738,0.997314453125,
+TRM,208328,sum8,-0.026679393782054344,0.4448694691345736,0.09520057370611151,0.06299873778865346,0.47154886291662795,5.332366691079738,0.997314453125,
+TRM,208328,tail_mean_5_8,-0.015420041531266785,0.03501016539212287,0.009092084028274655,0.007884764308623952,0.050430206923389655,5.71574334935714,0.9977620442708334,
+TRM,208328,positive_sum,0.0587247395355727,0.4448694691345736,0.06458070830305747,0.06299873778865346,0.38614472959900087,6.003997219261945,0.997314453125,
+TRM,208328,positive_count,2.3385416666666665,8.0,1.834272664190693,0.0,5.661458333333334,3.5570002633923843,0.97265625,
+TRM,208328,spread,0.05609253943835787,0.08039409609409631,0.022109344723255545,0.012045527148538135,0.024301556655738445,1.2083434857858202,0.8315836588541666,
+TRM,208328,gap12,0.029487703329541166,0.026957579655572772,0.018874471888808943,0.011721880967665603,-0.0025301236739683937,-0.1454196424021657,0.4844767252604167,
+TRM,208328,std8,0.017963765622030784,0.025791017951462082,0.006840239440973541,0.0034387390527845256,0.007827252329431298,1.2664678318988167,0.8479817708333334,
+TRM,208328,linear_slope,-0.006622254649920248,-0.0106407870835028,0.0024252301281276825,0.0013270841640566898,-0.004018532433582553,-1.820854993098152,0.08083089192708333,
+TRM,208328,,0.037651085844269495,0.10677664534887299,,,0.0691255595046035,,0.9825439453125,1
+TRM,208328,,0.008163382514728331,0.07981906569330022,,,0.07165568317857189,,0.9950358072916666,2
+TRM,208328,,-0.0025121802484762887,0.06513736300985329,,,0.06764954325832957,,0.997802734375,3
+TRM,208328,,-0.008301515767508741,0.05309573351405561,,,0.06139724928156435,,0.9969482421875,4
+TRM,208328,,-0.012082870785585934,0.04413282019959297,,,0.0562156909851789,,0.9967244466145834,5
+TRM,208328,,-0.014609660524229184,0.03761178605782334,,,0.05222144658205252,,0.9982096354166666,6
+TRM,208328,,-0.016546181221163653,0.0319135060562985,,,0.04845968727746215,,0.99761962890625,7
+TRM,208328,,-0.01844145359408837,0.026382549254776677,,,0.04482400284886505,,0.9974161783854166,8
+TRM,234369,raw_col0,0.017852102687761037,0.10551282866247769,0.028232357301802357,0.013317820753930341,0.08766072597471665,3.4141554224023953,0.9871125043538836,
+TRM,234369,lambda_max,0.018100257435352516,0.10551282866247769,0.027988536327830896,0.013317820753930341,0.08741257122712517,3.4323113101662233,0.9871125043538836,
+TRM,234369,lambda_2,-0.004604066811501098,0.07867030866829486,0.019719553365150716,0.011658536708474037,0.08327437547979595,4.564405392945322,0.9933603274120516,
+TRM,234369,lambda_min8,-0.022638431533429103,0.0229484012013237,0.009298859423757973,0.007111412103997715,0.04558683273475281,5.140542287562098,0.9958420411006618,
+TRM,234369,mean8,-0.011827684605397524,0.05244761205414824,0.014332789786644273,0.007407756691502786,0.06427529665954576,4.9010763491549,0.9948624172762104,
+TRM,234369,sum8,-0.09462147684318019,0.4195808964331859,0.11466231829315418,0.05926205353202229,0.5142023732763661,4.9010763491549,0.9948624172762104,
+TRM,234369,tail_mean_5_8,-0.020207434739923615,0.03131334408957126,0.010792544128259897,0.006986440509582705,0.05152077882949488,5.11274632681911,0.995276036224312,
+TRM,234369,positive_sum,0.03539385316928405,0.4195808964331859,0.06747779646400791,0.05926205353202229,0.38418704326390185,5.835587574554392,0.9948624172762104,
+TRM,234369,positive_count,1.4595959595959596,8.0,1.9837429774009827,0.0,6.540404040404041,3.7415940899671596,0.976010101010101,
+TRM,234369,spread,0.04073868896878162,0.08256442746115399,0.02241355328473139,0.012417076383348144,0.041825738492372366,2.0285057054921545,0.9344740508533612,
+TRM,234369,gap12,0.022704324246853612,0.026842519994182832,0.01763881355441319,0.012612625297507308,0.00413819574732922,0.2482978753359048,0.6134839777081157,
+TRM,234369,std8,0.012928286975100262,0.026692670537360658,0.007005813445186261,0.0036572331056805014,0.013764383562260396,2.145648411526841,0.9478187042842215,
+TRM,234369,linear_slope,-0.004672796752504103,-0.010966535271122464,0.002483784411233101,0.001408622707803367,-0.006293738518618361,-2.749038204201973,0.025535527690700104,
+TRM,234369,,0.018100257435352516,0.10551282866247769,,,0.08741257122712517,,0.9871125043538836,1
+TRM,234369,,-0.004604066811501098,0.07867030866829486,,,0.08327437547979595,,0.9933603274120516,2
+TRM,234369,,-0.011769322974795054,0.06106864564634602,,,0.07283796862114107,,0.9923807035876001,3
+TRM,234369,,-0.015518605532542088,0.049075737097782304,,,0.06459434263032439,,0.9955372692441657,4
+TRM,234369,,-0.01773060891924854,0.04033861649704391,,,0.05806922541629245,,0.9950365726227796,5
+TRM,234369,,-0.019421544255513283,0.03388758148227272,,,0.053309125737786005,,0.9939916405433646,6
+TRM,234369,,-0.02103915425150354,0.0280787771776447,,,0.04911793142914824,,0.9948406478578893,7
+TRM,234369,,-0.022638431533429103,0.0229484012013237,,,0.04558683273475281,,0.9958420411006618,8
+TRM,260410,raw_col0,0.012635910749513134,0.10288177159125522,0.020353665740514393,0.012908135409945213,0.09024586084174209,4.765673402851837,0.9893530069689409,
+TRM,260410,lambda_max,0.013151870022697326,0.10296485964524543,0.020082030885045895,0.012799508784327467,0.0898129896225481,4.804380304771112,0.9894175341994321,
+TRM,260410,lambda_2,0.006369563279730556,0.07462051674976188,0.01397962220127846,0.010195166272820246,0.06825095347003132,5.1588697359447435,0.9895035705067539,
+TRM,260410,lambda_min8,-0.004715030728876456,0.02192719515523542,0.004227385419965169,0.008447152395468038,0.026642225884111873,4.838804799533285,0.9934612406435516,
+TRM,260410,mean8,0.0016685908707722002,0.05065993657996641,0.00833847799504414,0.007612387111020628,0.04899134570919421,5.979751217496621,0.9931170954142648,
+TRM,260410,sum8,0.013348726966177601,0.4052794926397313,0.06670782396035312,0.06089909688816503,0.39193076567355367,5.979751217496621,0.9931170954142648,
+TRM,260410,tail_mean_5_8,-0.002553942017630222,0.030341437701096412,0.004942638713582513,0.0076420050339071404,0.032895379718726636,5.780437375588359,0.9938484040264992,
+TRM,260410,positive_sum,0.030020373667639433,0.40529236062114143,0.06182877199665501,0.06086163404758876,0.375271986953502,6.079454596031284,0.9931170954142648,
+TRM,260410,positive_count,3.182741116751269,7.991525423728813,2.331010673590027,0.09166655785077943,4.808784306977544,2.346537314939504,0.9728770541168373,
+TRM,260410,spread,0.01786690075157378,0.08103766449001003,0.01876103350784552,0.012014923835799683,0.06317076373843625,3.6152538930748515,0.9816742665404801,
+TRM,260410,gap12,0.006782306742966769,0.028344342895483567,0.009279681044121564,0.011687724788651759,0.021562036152516798,2.176626109133382,0.9238363589434742,
+TRM,260410,std8,0.005704458291781968,0.025874699960892623,0.006055896688542684,0.003367481082971138,0.020170241669110654,3.6252488841650736,0.9808999397745849,
+TRM,260410,linear_slope,-0.0022456290199019484,-0.01060911729771913,0.002425449827958501,0.0012733048666694231,-0.008363488277817181,-3.7705968445654303,0.01903553299492386,
+TRM,260410,,0.013151870022697326,0.10296485964524543,,,0.0898129896225481,,0.9894175341994321,1
+TRM,260410,,0.006369563279730556,0.07462051674976188,,,0.06825095347003132,,0.9895035705067539,2
+TRM,260410,,0.0031100270082322196,0.058491040147462134,,,0.05538101313922991,,0.9918265508044395,3
+TRM,260410,,0.0009330347260383872,0.04783732529287621,,,0.04690429056683782,,0.9915684418824744,4
+TRM,260410,,-0.0006271256301962761,0.03984021845321029,,,0.04046734408340657,,0.9943431127935989,5
+TRM,260410,,-0.0017707919630388484,0.03266053826358738,,,0.03443133022662623,,0.9924503140325217,6
+TRM,260410,,-0.0031028197484093065,0.026937798932352562,,,0.03004061868076187,,0.993482749720382,7
+TRM,260410,,-0.004715030728876456,0.02192719515523542,,,0.026642225884111873,,0.9934612406435516,8
diff --git a/srm_design_codex.md b/srm_design_codex.md
new file mode 100644
index 0000000..6d5a167
--- /dev/null
+++ b/srm_design_codex.md
@@ -0,0 +1,69 @@
+# SRM (Stable Recursion Model) — Codex Design Synthesis
+
+Returned 2026-05-22 by codex-rescue.
+
+## Core insight: target mild contraction, not aggressive
+
+Empirical λ_1(success) ≈ -0.15 → effective gain ≈ exp(-0.15) ≈ **0.86**.
+Empirical λ_1(failure) ≈ +0.04 → gain ≈ **1.04**.
+
+**Target κ ∈ (0.85, 0.95)**, NOT 0.3-0.5. Over-contraction kills constraint propagation in Sudoku.
+
+## Architectural sketch
+
+State: `z = (h, ℓ) ∈ R^{d_H + d_L}`, weighted norm `‖z‖_P² = ‖h‖² + η·‖ℓ‖²`.
+
+Joint feature map `ψ_θ(z, x)` via **Sandwich Layers** (Wang & Manchester 2023) constrained `Lip_P(ψ) ≤ 1`.
+
+Block gain operator:
+```
+A = [[a_HH·I, a_HL·U_HL],
+ [a_LH·U_LH, a_LL·I]]
+```
+where `U_HL, U_LH` orthogonal, and gains satisfy block-row-sum under weighted metric:
+- `a_HH + √η · a_HL ≤ κ`
+- `a_LL + η^{-1/2} · a_LH ≤ κ`
+
+with `κ ∈ (0.85, 0.95)`.
+
+Update rule:
+```
+z_{t+1} = (1-α) z_t + α · A · ψ_θ(z_t, x) + b(x)
+```
+
+⇒ `Lip_P(T) ≤ (1-α) + α·κ < 1` by construction.
+
+With `α=1, κ=0.86`: λ_1 ≤ log(0.86) ≈ -0.15 — exactly matches empirical success regime.
+
+## Key methodological corrections vs my initial sketch
+
+1. **Constrain JOINT operator, not individual blocks**. HRM got this wrong: stable H and stable L don't imply stable joint due to cross-coupling J_HL, J_LH. Block-row-sum bound under weighted metric is the right translation of CF's empirical signal.
+
+2. **Use tied-time but single joint operator**: TRM's weight-tying across iterations is good (turns it into iterative solver). But fold H and L into one joint operator (unlike HRM's separate modules) to enforce shared contraction metric.
+
+3. **Damping alone isn't sufficient**: `z + β·f(z)` only contracts if f is already Lipschitz-bounded. Damping is for margin, not the main guarantee.
+
+## Failure modes to watch
+
+1. **Over-contraction**: κ too low → constraint propagation collapses → underperforms TRM
+2. **Fake certification**: approximate spectral norm leaves hidden expansion directions; use exact Sandwich parameterization
+3. **Cross-coupling starvation**: `a_HL, a_LH → 0` → decoupled two-state system loses reasoning capacity (need lower bounds on coupling gains too?)
+
+## Literature to anchor
+
+Primary:
+- **Sandwich Layers** (Wang & Manchester 2023) — exact Lipschitz parameterization
+- **Deep Equilibrium Models** (Bai et al.) — for the fixed-point formulation
+
+Secondary (conceptual):
+- Lipschitz RNN (Erichson 2021)
+- AntisymmetricRNN (Chang 2019)
+- (CoRNN less relevant; oscillatory not the right inductive bias for Sudoku)
+
+## Implementation path
+
+1. Replace HRM/TRM's L_level/H_level with a **single tied joint operator** on (z_H, z_L)
+2. Implement Sandwich layer ψ with `Lip ≤ 1`
+3. Parameterize block gain matrix A with constraint `a_HH + √η·a_HL ≤ κ`, `a_LL + η^{-1/2}·a_LH ≤ κ`
+4. α as learnable sigmoid (margin), κ as hyperparameter or learnable bounded < 1
+5. Sweep κ over {0.85, 0.90, 0.95} to find expressivity sweet spot
diff --git a/srm_design_round2_claude.md b/srm_design_round2_claude.md
new file mode 100644
index 0000000..b63b260
--- /dev/null
+++ b/srm_design_round2_claude.md
@@ -0,0 +1,155 @@
+# SRM Design Round 2 — Claude's analysis (codex placeholder failed twice)
+
+Building on `srm_design_codex.md`. Codex round 1 produced solid joint-operator design; rounds 2-3 hit routing issues. This is my analysis of the 6 implementation questions.
+
+## (1) Attention compatibility
+
+**Recommendation: start with `mlp_t=True` for Sudoku, defer Lipschitz-attention to ARC phase.**
+
+Reasoning: TRM's reported 87% Sudoku 1k uses `mlp_t=True` (MLP over tokens, no self-attention). Their `pretrain_att_sudoku` (with attention) is reportedly ~75%. So for Sudoku specifically, the attention isn't even helping — replacing it with MLP gives **+12% accuracy**. We can use MLP-over-tokens for both L and H updates; MLPs admit cheap Lipschitz bounds (Sandwich / AOL trivially compose with ReLU/GELU).
+
+For ARC where attention likely matters: use **LipsFormer-style attention** (Qi et al. 2023) — normalizes via L_∞ / cosine to bound Lipschitz constant of softmax(QK^T)V. Or skip self-attention and use **cross-attention only** with bounded-norm keys.
+
+## (2) Single vs two-state
+
+**Keep two-state (h, ℓ), with single tied operator.**
+
+Why: even though TRM ties weights, the two states play **functionally distinct roles**:
+- ℓ (z_L) receives `z_H + input_embedding` injection — "deliberation conditioned on current answer + question"
+- h (z_H) receives `z_L` injection — "answer update from deliberation"
+- The hierarchy of update frequencies (H every T steps vs L every step) is preserved
+- Empirical: TRM with H_cycles=3 L_cycles=6 outperforms vanilla recursion (H=1, L=18) on Sudoku
+
+Single state z would force the same vector to serve as both "answer" and "scratchpad", losing the inductive bias that's empirically validated.
+
+## (3) κ fixed vs learnable
+
+**Fixed κ=0.9 to start, learnable with anchor penalty as v2.**
+
+Phase 1 (validation): κ=0.9 fixed. Provably contractive, easy to analyze, no risk of κ-collapse.
+
+Phase 2 (refinement): `κ = sigmoid(c) * (0.99 - 0.5) + 0.5` learnable. Add anchor penalty `λ_anchor · (κ - 0.9)²` to keep near empirical sweet spot.
+
+Why not learnable from start: the empirical observation `success λ_1 ≈ -0.15 ⇔ κ ≈ 0.86` is from a trained HRM. SRM might find a different sweet spot but starting at the empirically-validated value avoids early-training pathology.
+
+## (4) Power iteration cost
+
+**Recommendation: AOL (Almost-Orthogonal Layers, Prach & Lampert 2022).**
+
+Compared to alternatives:
+- **Sandwich layers (Wang & Manchester 2023)**: tightest Lipschitz bound but needs power iteration. For 40h training cost-prohibitive on 4× A6000.
+- **AOL**: Cayley-transform parameterization gives **exact 1-Lipschitz with no iteration** at training time. Slight expressivity loss (orthogonal weights are 1-Lipschitz but not all 1-Lipschitz maps are orthogonal). For our recursive setting where we want exactness, this trade-off is favorable.
+- **Householder reflections**: also exact orthogonal but slower per forward pass than AOL.
+- **Cached spectral estimates**: cheapest but introduces unstable training when stale estimates miss real growth.
+
+AOL is the right balance for our compute budget.
+
+## (5) Multi-modal capacity
+
+**Phase 1: strict-contraction SRM (unique fixed point). Phase 2: small Gaussian noise injection if ARC plateaus.**
+
+Sudoku has unique solutions (most of them — at least the Sudoku-Extreme dataset filters for unique solutions). Unique-fixed-point SRM should match GRAM's 97% — they're getting it through stable dynamics + multi-sample voting at inference, not through architectural multi-modality during training.
+
+For ARC where multiple valid completions could exist: add `z_{t+1} = T(z_t) + σ·ε` with small σ. Mathematically:
+- Strict contraction: T is K-Lipschitz with K < 1, σ=0
+- Stochastic contraction: T is K-Lipschitz, σ small → stationary distribution exists, concentrated near fixed point but with Gaussian fluctuation
+- As σ increases: distribution spreads, eventually multi-modal if T has multiple "near-fixed-points"
+
+The σ value should be tuned per task. For Sudoku σ=0; for ARC σ=0.05-0.2.
+
+Mixture of K operators (alternative): too many extra parameters, harder to analyze, defer.
+
+## (6) Pseudocode for one SRM forward step
+
+```python
+def srm_step(h, l, x, theta, kappa=0.9, eta=1.0, alpha=1.0):
+ """
+ One SRM update step.
+ h, l: (B, seq, hidden) — high-level and low-level states
+ x: input embedding, (B, seq, hidden)
+ theta: parameters (Sandwich/AOL block + gain params + biases)
+ kappa, eta, alpha: SRM hyperparameters
+ """
+ # 1. Joint feature map via AOL block (1-Lipschitz by construction)
+ # Concatenate h with sqrt(eta)*l so the operator naturally acts under
+ # the weighted norm ‖z‖²_P = ‖h‖² + η‖l‖²
+ z = concat(h, sqrt(eta) * l) # (B, seq, 2*hidden)
+ psi = aol_block(z + bias_in(x), theta.aol) # (B, seq, 2*hidden), Lip≤1
+ psi_h, psi_l_scaled = split(psi)
+ psi_l = psi_l_scaled / sqrt(eta) # unscale back
+
+ # 2. Block-gain matrix A — row-sum bounded by kappa under weighted metric
+ # Parameterize a_HH, a_HL (with a_HH + sqrt(eta)*a_HL ≤ kappa) via softmax
+ gain_H = kappa * softmax(theta.gain_H_logits) # (2,) — [a_HH, sqrt(eta)*a_HL]
+ a_HH, a_HL_scaled = gain_H[0], gain_H[1]
+ a_HL = a_HL_scaled / sqrt(eta)
+
+ gain_L = kappa * softmax(theta.gain_L_logits) # similarly
+ a_LL, a_LH_scaled = gain_L[0], gain_L[1]
+ a_LH = a_LH_scaled * sqrt(eta)
+
+ # Apply block gain (U_HL, U_LH are orthogonal mixing matrices)
+ Az_h = a_HH * psi_h + a_HL * (U_HL @ psi_l)
+ Az_l = a_LH * (U_LH @ psi_h) + a_LL * psi_l
+
+ # 3. Damping update — even if κ were slightly above 1, this saves us
+ h_new = (1 - alpha) * h + alpha * Az_h + bias_h(x)
+ l_new = (1 - alpha) * l + alpha * Az_l + bias_l(x)
+
+ return h_new, l_new
+```
+
+## A. Candidate architectures ranked
+
+1. **SRM-Joint-AOL** (top): the above pseudocode. AOL block + block-gain row-sum bound + light damping. Strict contraction, deterministic, ~7-10M params (TRM-scale).
+
+2. **SRM-Joint-Sandwich**: identical structure but Sandwich layers instead of AOL. Tighter Lipschitz bound, slower training. Use if AOL is too restrictive.
+
+3. **SRM-Stochastic** (Phase 2): SRM-Joint-AOL + Gaussian noise injection `σε` per step. Trades strict contraction for multi-modal coverage. Use for ARC if Phase 1 plateaus.
+
+## B. Full math for top candidate
+
+State: `z = (h, ℓ) ∈ R^(2D)`, weighted norm `‖z‖²_P = ‖h‖² + η‖ℓ‖²`.
+
+Per-step map: `T(z, x) = (1-α)z + α·A·ψ(z + b_in(x)) + b_out(x)`.
+
+Where:
+- `ψ: R^(2D) → R^(2D)` is AOL-parameterized, `Lip_P(ψ) ≤ 1`.
+- `A` is block matrix with rows `[a_HH·I, a_HL·U_HL]` and `[a_LH·U_LH, a_LL·I]`, `U_HL, U_LH` orthogonal.
+- Block-row-sum bound: `a_HH + √η·a_HL ≤ κ` and `√η·a_LH + a_LL ≤ κ` with `κ ∈ (0.85, 0.95)`.
+
+Lipschitz of T under P-norm: `Lip_P(T) ≤ (1-α) + α·Lip_P(A·ψ) ≤ (1-α) + α·κ < 1` by construction.
+
+⇒ `log Lip_P(T) ≤ log((1-α) + α·κ) < 0` — by construction non-chaotic.
+
+For α=1, κ=0.9: `λ_1 ≤ log 0.9 ≈ -0.105`. Matches empirical success regime.
+
+## C. Risks (what could prevent ≥87% Sudoku)
+
+1. **AOL expressivity ceiling**: orthogonal weights restrict the function class. Sandwich layers are less restrictive — if AOL caps below 87%, swap in Sandwich (slower training cost).
+
+2. **Block-gain matrix too restrictive**: forcing row-sum ≤ κ might not preserve TRM's specific information flow pattern (e.g., L→H amplification). Solution: lower bounds on `a_HL` and `a_LH` (e.g., ≥ 0.1·κ) to prevent decoupling.
+
+3. **Damping over-smooths**: α=1 means full step. If unstable, drop α. But α<1 reduces effective compute per step (anti-iterative).
+
+4. **Weight tying may not transfer**: TRM ties across H and L (J_L = J_H = J via same module). SRM-Joint already does this. But our `A` and `ψ` blocks are also tied across recursion steps (Banach iteration). If this hurts representational depth, untie A while keeping ψ tied.
+
+5. **Bias terms `b_in(x), b_out(x)`**: must be the only place where input x enters. If we naively use input-dependent ψ (`ψ(z, x)` instead of `ψ(z + b_in(x))`), Lipschitz analysis breaks (Lipschitz w.r.t. z still holds, but coupling with x is opaque).
+
+## D. Smallest validation experiment
+
+**Replace HRM's L_level and H_level with the AOL-Sandwich block (1-Lipschitz) + block-gain A, train Sudoku 1k for 5000 steps (1/4 the full recipe). Compare λ_joint_1 trajectory and final acc to baseline HRM at same step count.**
+
+Pass criteria:
+- λ_joint_1 stays strictly < 0 throughout (validates by-construction stability)
+- Accuracy at 5K steps ≥ HRM's accuracy at 5K steps (baseline ~15% per our checkpoint evolution)
+- If both pass, scale to full 20K-step recipe.
+
+This is ~2-4 hours on 4× A6000 (1/10 the full HRM training). Quick gate on whether SRM-Joint-AOL is viable.
+
+## Notes for synthesis (when codex eventually responds)
+
+- Codex's round 1 framing (joint operator, block-gain, κ=0.85-0.95) is solid. I largely agree.
+- Implementation-wise I lean towards AOL not Sandwich for speed.
+- Two-state I keep; codex left it ambiguous.
+- Noise injection deferred to Phase 2.
diff --git a/step3_train_with_rf.py b/step3_train_with_rf.py
new file mode 100644
index 0000000..6c2e4f9
--- /dev/null
+++ b/step3_train_with_rf.py
@@ -0,0 +1,302 @@
+"""Step 3: Continue HRM training with joint-Lyapunov reverse-flossing regularizer.
+
+L_total = L_HRM_ACT(supervised) + alpha * L_RF
+L_RF = mean over batch of max(0, lambda_joint_1 - lambda_star) ** 2
+
+FORWARD direction (what we measure):
+ - Joint tangent Q in R^{B x 2D x k} evolved via block-matrix Jacobian along
+ z trajectory (using JVP with create_graph=True so RF loss is diff'ble in theta).
+ - Lyapunov spectrum = (1/T) * sum_t log|R_ii(t)| from QR re-orthogonalization.
+BACKWARD direction (what flows to theta):
+ - Standard autograd of L_total through the entire forward graph (including the
+ JVP chain), as in Engelken's flossing. The QR decomposition's backward is
+ handled by PyTorch autograd; no manual pullback needed.
+
+Loaded from intermediate checkpoint (e.g. step_18228, before the success/failure
+contraction gap fully forms in the original training).
+"""
+from __future__ import annotations
+import sys, os, yaml, json, math, time, argparse
+from pathlib import Path
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+HRM_DIR = Path("/home/yurenh2/rrm/hrm")
+sys.path.insert(0, str(HRM_DIR))
+
+from models.hrm.hrm_act_v1 import HierarchicalReasoningModel_ACTV1
+from models.losses import ACTLossHead
+from adam_atan2 import AdamATan2
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text())
+ arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], causal=False)
+ base = HierarchicalReasoningModel_ACTV1(arch_cfg)
+ head = ACTLossHead(base, loss_type=arch_cfg["loss"]["loss_type"])
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", ""): v for k, v in sd.items()}
+ missing, unexpected = head.load_state_dict(stripped, strict=False)
+ print(f"[load {ckpt_name}] missing={len(missing)} unexpected={len(unexpected)}")
+ head.to(device)
+ return head, base, cfg, train_meta
+
+
+def jvp_train(f, x, v):
+ """JVP that participates in the autograd graph (create_graph=True)."""
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=True, strict=False)
+
+
+def compute_joint_lyap_spec(base, batch, k_lyap, lyap_act_steps, device, seed):
+ """Returns per-sample top-k Lyapunov spectrum of joint (z_H, z_L) dynamics,
+ differentiable wrt the model parameters. Shape (B, k)."""
+ inner = base.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ D = seq_full * hidden
+
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"], batch["puzzle_identifiers"])
+
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q0 = torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g)
+ Q, _ = torch.linalg.qr(Q0)
+ log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_steps = 0
+
+ n_act = min(lyap_act_steps, cfg.halt_max_steps)
+ for _act in range(n_act):
+ for _h in range(cfg.H_cycles):
+ for _l in range(cfg.L_cycles):
+ v_H_j = Q[:, :D, :]
+ v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_L_cols = []
+ f_L = lambda z: inner.L_level(z, z_H + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ z_L_new, Dv = jvp_train(f_L, z_L, v_i)
+ new_v_L_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_L = torch.stack(new_v_L_cols, dim=-1)
+ Q = torch.cat([v_H_j, new_v_L], dim=1)
+ z_L = z_L_new
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_steps += 1
+ v_H_j = Q[:, :D, :]
+ v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_H_cols = []
+ f_H = lambda z: inner.H_level(z, z_L, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ z_H_new, Dv = jvp_train(f_H, z_H, v_i)
+ new_v_H_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_H = torch.stack(new_v_H_cols, dim=-1)
+ Q = torch.cat([new_v_H, v_L_j], dim=1)
+ z_H = z_H_new
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_steps += 1
+
+ lyap_spec = log_R_sum / max(n_steps, 1) # (B, k)
+ return lyap_spec # full spectrum (B, k)
+
+
+def load_train_batches(data_path: Path, batch_size: int, n_iters: int, seed: int = 0):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "train" / "all__inputs.npy")
+ labels = np.load(data_path / "train" / "all__labels.npy")
+ pid = np.load(data_path / "train" / "all__puzzle_identifiers.npy")
+ N = len(inputs)
+ for _ in range(n_iters):
+ idx = rng.choice(N, size=batch_size, replace=False)
+ yield {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ }
+
+
+def evaluate(head, base, data_path, n_samples, batch_size, device, seed=42):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ idx_all = rng.choice(len(inputs), size=n_samples, replace=False)
+ head.eval()
+ correct = 0; token_correct = 0; token_total = 0
+ for s in range(0, n_samples, batch_size):
+ e = min(s + batch_size, n_samples)
+ idx = idx_all[s:e]
+ batch = {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)).to(device),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)).to(device),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)).to(device),
+ }
+ with torch.no_grad():
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ for _ in range(base.config.halt_max_steps):
+ carry, outputs = base(carry=carry, batch=batch)
+ preds = outputs["logits"].argmax(dim=-1)
+ mask = batch["labels"] > 0
+ exact = ((preds == batch["labels"]) | ~mask).all(dim=-1).float()
+ correct += exact.sum().item()
+ token_correct += ((preds == batch["labels"]) & mask).sum().item()
+ token_total += mask.sum().item()
+ return correct / n_samples, token_correct / max(token_total, 1)
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_18228",
+ help="start from this checkpoint and continue training")
+ ap.add_argument("--n-steps", type=int, default=2000)
+ ap.add_argument("--batch-size", type=int, default=8)
+ ap.add_argument("--lr", type=float, default=1e-5)
+ ap.add_argument("--alpha-rf", type=float, default=0.0, help="RF weight; 0 = baseline")
+ ap.add_argument("--lambda-star", type=float, default=-0.05,
+ help="joint Lyapunov target. λ_joint_1 should be < λ_star for stable joint dynamics.")
+ ap.add_argument("--rf-mode", choices=["fixed","volume_cf","gelu","engelken_l2"], default="fixed",
+ help="fixed: max(0,λ-λ*)² hinge (one-sided). "
+ "volume_cf: max(0, mean_i λ_i - λ*)² over the measured top-k spectrum. "
+ "gelu: GeLU(λ) attractor at -0.75 (deprecated, kills success). "
+ "engelken_l2: (1/k) Σ λ_i² across full top-k (Engelken 2023, two-sided to 0).")
+ ap.add_argument("--k-lyap", type=int, default=2)
+ ap.add_argument("--lyap-act-steps", type=int, default=4)
+ ap.add_argument("--seed", type=int, default=42)
+ ap.add_argument("--eval-every", type=int, default=200)
+ ap.add_argument("--eval-n", type=int, default=512)
+ ap.add_argument("--eval-batch-size", type=int, default=32)
+ ap.add_argument("--out", default="step3_log.json")
+ args = ap.parse_args()
+
+ device = "cuda"
+ head, base, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ optim = AdamATan2(head.parameters(), lr=args.lr, betas=(0.9, 0.95), weight_decay=cfg["weight_decay"])
+
+ print(f"\n=== Initial eval (loaded {args.ckpt_name}) ===")
+ acc0, tacc0 = evaluate(head, base, Path(cfg["data_path"]), args.eval_n, args.eval_batch_size, device)
+ print(f" initial: exact_acc = {acc0:.4f} token_acc = {tacc0:.4f}")
+
+ log = {"args": vars(args), "initial_acc": acc0, "initial_tok_acc": tacc0, "steps": [], "evals": []}
+ log["evals"].append({"step": 0, "acc": acc0, "tok_acc": tacc0})
+ t0 = time.time()
+ train_iter = load_train_batches(Path(cfg["data_path"]), args.batch_size, args.n_steps, seed=args.seed)
+
+ for step, batch in enumerate(train_iter):
+ batch = {k: v.to(device) for k, v in batch.items()}
+ head.train()
+ # Sparse puzzle embedding has fixed local_weights buffer (training batch=768);
+ # keep it in eval mode (still uses the weights table, just not the local buffer).
+ base.inner.puzzle_emb.eval()
+ for p in base.inner.puzzle_emb.parameters():
+ p.requires_grad_(False)
+
+ # ---- Supervised ACT loss (accumulated over all halt_max_steps ACT steps) ----
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ sup_loss_sum = 0.0
+ n_loss = 0
+ for _ in range(base.config.halt_max_steps):
+ carry, l, metrics, _, all_finish = head(return_keys=[], carry=carry, batch=batch)
+ sup_loss_sum = sup_loss_sum + l
+ n_loss += 1
+ if all_finish: break
+ sup_loss = sup_loss_sum / max(n_loss, 1) / args.batch_size
+
+ # ---- Reverse-flossing penalty (only if alpha > 0) ----
+ if args.alpha_rf > 0:
+ lyap_spec = compute_joint_lyap_spec(
+ base, batch, k_lyap=args.k_lyap,
+ lyap_act_steps=args.lyap_act_steps, device=device,
+ seed=args.seed + step,
+ ) # (B, k)
+ lyap1 = lyap_spec[:, 0]
+ if args.rf_mode == "engelken_l2":
+ # Engelken 2023: push all top-k λ_i² → 0 (two-sided)
+ rf_loss = (lyap_spec ** 2).mean()
+ excess = lyap1 # log signed λ_1
+ elif args.rf_mode == "volume_cf":
+ # One-sided cap on local phase-space volume expansion.
+ # Allows λ_1 > 0 when compensated by enough contraction in other modes.
+ lyap_volume = lyap_spec.mean(dim=1)
+ excess = (lyap_volume - args.lambda_star).clamp_min(0.0)
+ rf_loss = (excess ** 2).mean()
+ elif args.rf_mode == "gelu":
+ rf_loss = torch.nn.functional.gelu(lyap1).mean()
+ excess = lyap1 - (-0.751)
+ else: # fixed (hinge on top-1)
+ excess = (lyap1 - args.lambda_star).clamp_min(0.0)
+ rf_loss = (excess ** 2).mean()
+ else:
+ if args.k_lyap > 0:
+ with torch.no_grad():
+ lyap_spec = compute_joint_lyap_spec(
+ base, batch, k_lyap=args.k_lyap,
+ lyap_act_steps=args.lyap_act_steps, device=device,
+ seed=args.seed + step,
+ )
+ lyap1 = lyap_spec[:, 0]
+ else:
+ # Fast alpha=0 baseline path: skip diagnostic-only Lyapunov work.
+ lyap1 = torch.zeros(batch["inputs"].shape[0], device=device)
+ lyap_spec = lyap1[:, None]
+ rf_loss = torch.zeros((), device=device)
+ excess = (lyap1 - args.lambda_star).clamp_min(0.0)
+
+ total_loss = sup_loss + args.alpha_rf * rf_loss
+
+ optim.zero_grad(set_to_none=True)
+ total_loss.backward()
+ torch.nn.utils.clip_grad_norm_([p for p in head.parameters() if p.requires_grad], 1.0)
+ optim.step()
+
+ rec = {
+ "step": step, "sup_loss": float(sup_loss.item()),
+ "rf_loss": float(rf_loss.item()),
+ "total_loss": float(total_loss.item()),
+ "lyap1_mean": float(lyap1.detach().mean().item()),
+ "lyap1_max": float(lyap1.detach().max().item()),
+ "lyap_volume_mean": float(lyap_spec.detach().mean(dim=1).mean().item()),
+ "lyap_volume_max": float(lyap_spec.detach().mean(dim=1).max().item()),
+ "frac_above_star": float((excess > 0).float().mean().item()),
+ }
+ log["steps"].append(rec)
+ if step % 10 == 0 or step == args.n_steps - 1:
+ print(f" [{step:>4}/{args.n_steps}] dt={time.time()-t0:.1f}s "
+ f"sup={rec['sup_loss']:.4f} rf={rec['rf_loss']:.4f} "
+ f"λj1_mean={rec['lyap1_mean']:+.4f} max={rec['lyap1_max']:+.4f} "
+ f"frac>λ*={rec['frac_above_star']:.2f}", flush=True)
+
+ if (step + 1) % args.eval_every == 0:
+ acc, tacc = evaluate(head, base, Path(cfg["data_path"]),
+ args.eval_n, args.eval_batch_size, device)
+ print(f" >> EVAL @ step {step+1}: exact_acc={acc:.4f} (Δ from init: {acc-acc0:+.4f})", flush=True)
+ log["evals"].append({"step": step + 1, "acc": acc, "tok_acc": tacc})
+
+ acc_f, tacc_f = evaluate(head, base, Path(cfg["data_path"]),
+ args.eval_n, args.eval_batch_size, device)
+ print(f"\n=== Final eval ===")
+ print(f" initial: {acc0:.4f} final: {acc_f:.4f} (Δ {acc_f-acc0:+.4f})")
+ log["final_acc"] = acc_f
+ log["final_tok_acc"] = tacc_f
+ log["evals"].append({"step": args.n_steps, "acc": acc_f, "tok_acc": tacc_f})
+
+ Path(args.out).write_text(json.dumps(log, indent=2))
+ print(f"log → {args.out}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/step4_from_scratch.py b/step4_from_scratch.py
new file mode 100644
index 0000000..e1dd738
--- /dev/null
+++ b/step4_from_scratch.py
@@ -0,0 +1,334 @@
+"""Step 4: From-scratch HRM training with CF regularizer (no checkpoint load).
+
+Tests whether forcing λ_joint_1 → λ* from step 0 affects learning trajectory.
+
+Hypothesis test:
+ - Baseline (α=0): λ naturally drifts toward HRM's attractor (~-0.15)
+ - CF λ*=0: forces λ to stay near 0 (edge of chaos)
+ - CF λ*=-0.15: enforces natural attractor from start (should be neutral)
+
+For each condition we track λ trajectory + acc + halt distribution at fixed steps.
+"""
+from __future__ import annotations
+import sys, os, yaml, json, math, time, argparse
+from pathlib import Path
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+HRM_DIR = Path("/home/yurenh2/rrm/hrm")
+sys.path.insert(0, str(HRM_DIR))
+
+from models.hrm.hrm_act_v1 import HierarchicalReasoningModel_ACTV1
+from models.losses import ACTLossHead
+from models.sparse_embedding import CastedSparseEmbeddingSignSGD_Distributed
+from adam_atan2 import AdamATan2
+
+
+def build_model_from_scratch(data_path: Path, batch_size: int, device: str,
+ hidden_size: int = 512, num_heads: int = 8):
+ """Build HRM with the official Sudoku-1k arch config but at our batch size + arbitrary hidden."""
+ train_meta = json.loads((data_path / "train" / "dataset.json").read_text())
+ arch_cfg = dict(
+ H_cycles=2, H_layers=4,
+ L_cycles=2, L_layers=4,
+ expansion=4,
+ halt_exploration_prob=0.1,
+ halt_max_steps=16,
+ hidden_size=hidden_size,
+ num_heads=num_heads,
+ pos_encodings="rope",
+ puzzle_emb_ndim=hidden_size,
+ loss=dict(loss_type="stablemax_cross_entropy", name="losses@ACTLossHead"),
+ batch_size=batch_size,
+ vocab_size=train_meta["vocab_size"],
+ seq_len=train_meta["seq_len"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"],
+ causal=False,
+ )
+ with torch.device(device):
+ base = HierarchicalReasoningModel_ACTV1(arch_cfg)
+ head = ACTLossHead(base, loss_type=arch_cfg["loss"]["loss_type"])
+ return head, base, train_meta
+
+
+def jvp_train(f, x, v):
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=True, strict=False)
+
+
+def compute_joint_lyap_spec(base, batch, k_lyap, lyap_act_steps, device, seed, with_grad=True):
+ """Returns FULL top-k Lyapunov spectrum (B, k), differentiable wrt theta."""
+ inner = base.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ D = seq_full * hidden
+
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"], batch["puzzle_identifiers"])
+
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q0 = torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g)
+ Q, _ = torch.linalg.qr(Q0)
+ log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_steps = 0
+
+ jvp_fn = (lambda f, x, v: torch.autograd.functional.jvp(f, x, v=v, create_graph=with_grad, strict=False))
+
+ n_act = min(lyap_act_steps, cfg.halt_max_steps)
+ for _act in range(n_act):
+ for _h in range(cfg.H_cycles):
+ for _l in range(cfg.L_cycles):
+ v_H_j = Q[:, :D, :]; v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_L_cols = []
+ f_L = lambda z: inner.L_level(z, z_H + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ z_L_new, Dv = jvp_fn(f_L, z_L, v_i)
+ new_v_L_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_L = torch.stack(new_v_L_cols, dim=-1)
+ Q = torch.cat([v_H_j, new_v_L], dim=1)
+ z_L = z_L_new
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_steps += 1
+ v_H_j = Q[:, :D, :]; v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_H_cols = []
+ f_H = lambda z: inner.H_level(z, z_L, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ z_H_new, Dv = jvp_fn(f_H, z_H, v_i)
+ new_v_H_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_H = torch.stack(new_v_H_cols, dim=-1)
+ Q = torch.cat([new_v_H, v_L_j], dim=1)
+ z_H = z_H_new
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_steps += 1
+
+ lyap_spec = log_R_sum / max(n_steps, 1)
+ return lyap_spec # (B, k_lyap)
+
+
+def load_train_batches(data_path: Path, batch_size: int, n_iters: int, seed: int = 0):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "train" / "all__inputs.npy")
+ labels = np.load(data_path / "train" / "all__labels.npy")
+ pid = np.load(data_path / "train" / "all__puzzle_identifiers.npy")
+ N = len(inputs)
+ for _ in range(n_iters):
+ idx = rng.choice(N, size=batch_size, replace=False)
+ yield {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ }
+
+
+def evaluate(head, base, data_path, n_samples, batch_size, device, seed=42):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ idx_all = rng.choice(len(inputs), size=n_samples, replace=False)
+ head.eval()
+ correct = 0; token_correct = 0; token_total = 0
+ for s in range(0, n_samples, batch_size):
+ e = min(s + batch_size, n_samples)
+ idx = idx_all[s:e]
+ batch = {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)).to(device),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)).to(device),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)).to(device),
+ }
+ with torch.no_grad():
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ for _ in range(base.config.halt_max_steps):
+ carry, outputs = base(carry=carry, batch=batch)
+ preds = outputs["logits"].argmax(dim=-1)
+ mask = batch["labels"] > 0
+ exact = ((preds == batch["labels"]) | ~mask).all(dim=-1).float()
+ correct += exact.sum().item()
+ token_correct += ((preds == batch["labels"]) & mask).sum().item()
+ token_total += mask.sum().item()
+ return correct / n_samples, token_correct / max(token_total, 1)
+
+
+def warmup_constant_lr(step, base_lr, warmup):
+ if step < warmup:
+ return base_lr * step / max(1, warmup)
+ return base_lr
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--data-path", default="/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000")
+ ap.add_argument("--n-steps", type=int, default=2500)
+ ap.add_argument("--batch-size", type=int, default=8)
+ ap.add_argument("--lr", type=float, default=1e-4)
+ ap.add_argument("--puzzle-emb-lr", type=float, default=1e-4)
+ ap.add_argument("--warmup-steps", type=int, default=200)
+ ap.add_argument("--weight-decay", type=float, default=1.0)
+ ap.add_argument("--hidden-size", type=int, default=512, help="HRM hidden dim (default official 512)")
+ ap.add_argument("--num-heads", type=int, default=8, help="must divide hidden_size; default 8")
+ ap.add_argument("--alpha-rf", type=float, default=0.0, help="0 = baseline; >0 = CF/Engelken")
+ ap.add_argument("--rf-mode", choices=["fixed", "volume_cf", "engelken_l2"], default="fixed",
+ help="fixed: hinge max(0, λ_1-λ*)² on top-1; "
+ "volume_cf: hinge max(0, mean_i λ_i-λ*)² over top-k; "
+ "engelken_l2: (1/k) Σ λ_i² across full top-k spectrum")
+ ap.add_argument("--lambda-star", type=float, default=0.0, help="used in fixed and volume_cf modes")
+ ap.add_argument("--k-lyap", type=int, default=2)
+ ap.add_argument("--lyap-act-steps", type=int, default=4)
+ ap.add_argument("--seed", type=int, default=42)
+ ap.add_argument("--eval-every", type=int, default=250)
+ ap.add_argument("--eval-n", type=int, default=512)
+ ap.add_argument("--eval-batch-size", type=int, default=32)
+ ap.add_argument("--out", required=True)
+ ap.add_argument("--save-ckpt", default="", help="path to save final model state_dict (empty = skip)")
+ args = ap.parse_args()
+
+ device = "cuda"
+ torch.manual_seed(args.seed); np.random.seed(args.seed)
+ data_path = Path(args.data_path)
+ head, base, train_meta = build_model_from_scratch(data_path, args.batch_size, device,
+ hidden_size=args.hidden_size,
+ num_heads=args.num_heads)
+ print(f"Built HRM from scratch | params={sum(p.numel() for p in head.parameters()):,} | "
+ f"vocab={train_meta['vocab_size']} seq={train_meta['seq_len']} "
+ f"num_pids={train_meta['num_puzzle_identifiers']}")
+
+ # Two optimizers: SignSGD for puzzle_emb (sparse), AdamATan2 for rest
+ puzzle_emb_opt = CastedSparseEmbeddingSignSGD_Distributed(
+ base.inner.puzzle_emb.buffers(),
+ lr=0,
+ weight_decay=args.weight_decay,
+ world_size=1,
+ )
+ main_opt = AdamATan2(head.parameters(), lr=0, betas=(0.9, 0.95), weight_decay=args.weight_decay)
+
+ # Baseline eval (random init)
+ acc0, tacc0 = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print(f"=== step 0 (random init): exact_acc = {acc0:.4f} token_acc = {tacc0:.4f} ===")
+
+ log = {"args": vars(args), "initial_acc": acc0, "initial_tok_acc": tacc0, "steps": [], "evals": []}
+ log["evals"].append({"step": 0, "acc": acc0, "tok_acc": tacc0})
+ t0 = time.time()
+ train_iter = load_train_batches(data_path, args.batch_size, args.n_steps, seed=args.seed)
+
+ for step, batch in enumerate(train_iter):
+ batch = {k: v.to(device) for k, v in batch.items()}
+
+ # Update LR
+ cur_lr = warmup_constant_lr(step, args.lr, args.warmup_steps)
+ cur_pe_lr = warmup_constant_lr(step, args.puzzle_emb_lr, args.warmup_steps)
+ for pg in main_opt.param_groups: pg["lr"] = cur_lr
+ for pg in puzzle_emb_opt.param_groups: pg["lr"] = cur_pe_lr
+
+ head.train()
+
+ # ACT loss
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ sup_loss_sum = 0.0; n_loss = 0
+ for _ in range(base.config.halt_max_steps):
+ carry, l, metrics, _, all_finish = head(return_keys=[], carry=carry, batch=batch)
+ sup_loss_sum = sup_loss_sum + l
+ n_loss += 1
+ if all_finish: break
+ sup_loss = sup_loss_sum / max(n_loss, 1) / args.batch_size
+
+ # CF / Engelken loss (skip if alpha=0; still measure λ for logging in baseline)
+ if args.alpha_rf > 0:
+ lyap_spec = compute_joint_lyap_spec(base, batch, args.k_lyap, args.lyap_act_steps,
+ device, args.seed + step, with_grad=True)
+ lyap1 = lyap_spec[:, 0]
+ if args.rf_mode == "engelken_l2":
+ # Engelken: L = (1/k) Σ_i λ_i² over batch → push all spectrum toward 0
+ rf_loss = (lyap_spec ** 2).mean()
+ excess = lyap1 # for logging (no hinge here)
+ elif args.rf_mode == "volume_cf":
+ # One-sided cap on local phase-space volume expansion.
+ # Allows λ_1 > 0 when compensated by contraction in other measured modes.
+ lyap_volume = lyap_spec.mean(dim=1)
+ excess = (lyap_volume - args.lambda_star).clamp_min(0.0)
+ rf_loss = (excess ** 2).mean()
+ else: # fixed (hinge on top-1)
+ excess = (lyap1 - args.lambda_star).clamp_min(0.0)
+ rf_loss = (excess ** 2).mean()
+ else:
+ with torch.no_grad():
+ lyap_spec = compute_joint_lyap_spec(base, batch, args.k_lyap, args.lyap_act_steps,
+ device, args.seed + step, with_grad=False)
+ lyap1 = lyap_spec[:, 0]
+ rf_loss = torch.zeros((), device=device)
+ excess = (lyap1 - args.lambda_star).clamp_min(0.0)
+
+ total_loss = sup_loss + args.alpha_rf * rf_loss
+
+ puzzle_emb_opt.zero_grad(set_to_none=True)
+ main_opt.zero_grad(set_to_none=True)
+ total_loss.backward()
+ torch.nn.utils.clip_grad_norm_([p for p in head.parameters() if p.requires_grad], 1.0)
+ main_opt.step()
+ puzzle_emb_opt.step()
+
+ with torch.no_grad():
+ lyap_mean_per_i = lyap_spec.detach().mean(dim=0).cpu().tolist()
+ rec = {
+ "step": step, "lr": cur_lr,
+ "sup_loss": float(sup_loss.item()),
+ "rf_loss": float(rf_loss.item()),
+ "total_loss": float(total_loss.item()),
+ "lyap1_mean": float(lyap1.detach().mean().item()),
+ "lyap1_max": float(lyap1.detach().max().item()),
+ "lyap1_min": float(lyap1.detach().min().item()),
+ "lyap_volume_mean": float(lyap_spec.detach().mean(dim=1).mean().item()),
+ "lyap_volume_max": float(lyap_spec.detach().mean(dim=1).max().item()),
+ "lyap_spec_mean": lyap_mean_per_i,
+ "frac_above_star": float((excess > 0).float().mean().item()),
+ }
+ log["steps"].append(rec)
+ if step % 25 == 0 or step == args.n_steps - 1:
+ print(f" [{step:>4}/{args.n_steps}] dt={time.time()-t0:.0f}s lr={cur_lr:.1e} "
+ f"sup={rec['sup_loss']:.4f} rf={rec['rf_loss']:.4f} "
+ f"λ_mean={rec['lyap1_mean']:+.4f} [{rec['lyap1_min']:+.3f},{rec['lyap1_max']:+.3f}] "
+ f"frac>λ*={rec['frac_above_star']:.2f}", flush=True)
+
+ if (step + 1) % args.eval_every == 0 or step == args.n_steps - 1:
+ acc, tacc = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print(f" >> EVAL @ {step+1}: exact_acc={acc:.4f} tok_acc={tacc:.4f} "
+ f"(Δ from init: {acc-acc0:+.4f})", flush=True)
+ log["evals"].append({"step": step + 1, "acc": acc, "tok_acc": tacc})
+
+ log["final_acc"] = log["evals"][-1]["acc"]
+ log["final_tok_acc"] = log["evals"][-1]["tok_acc"]
+ Path(args.out).write_text(json.dumps(log, indent=2))
+ print(f"\n=== DONE === init {acc0:.4f} → final {log['final_acc']:.4f} log → {args.out}")
+
+ if args.save_ckpt:
+ save_path = Path(args.save_ckpt)
+ save_path.parent.mkdir(parents=True, exist_ok=True)
+ torch.save({
+ "state_dict": head.state_dict(),
+ "args": vars(args),
+ "n_steps_trained": args.n_steps,
+ "final_acc": log["final_acc"],
+ "final_tok_acc": log["final_tok_acc"],
+ "arch_cfg_signature": {
+ "vocab_size": train_meta["vocab_size"],
+ "seq_len": train_meta["seq_len"],
+ "num_puzzle_identifiers": train_meta["num_puzzle_identifiers"],
+ "batch_size": args.batch_size,
+ },
+ }, save_path)
+ print(f"checkpoint saved → {save_path}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/step5_train_trm_cf.py b/step5_train_trm_cf.py
new file mode 100644
index 0000000..b36bcf8
--- /dev/null
+++ b/step5_train_trm_cf.py
@@ -0,0 +1,307 @@
+"""Step 3: Continue HRM training with joint-Lyapunov reverse-flossing regularizer.
+
+L_total = L_HRM_ACT(supervised) + alpha * L_RF
+L_RF = mean over batch of max(0, lambda_joint_1 - lambda_star) ** 2
+
+FORWARD direction (what we measure):
+ - Joint tangent Q in R^{B x 2D x k} evolved via block-matrix Jacobian along
+ z trajectory (using JVP with create_graph=True so RF loss is diff'ble in theta).
+ - Lyapunov spectrum = (1/T) * sum_t log|R_ii(t)| from QR re-orthogonalization.
+BACKWARD direction (what flows to theta):
+ - Standard autograd of L_total through the entire forward graph (including the
+ JVP chain), as in Engelken's flossing. The QR decomposition's backward is
+ handled by PyTorch autograd; no manual pullback needed.
+
+Loaded from intermediate checkpoint (e.g. step_18228, before the success/failure
+contraction gap fully forms in the original training).
+"""
+from __future__ import annotations
+import sys, os, yaml, json, math, time, argparse
+from pathlib import Path
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+TRM_DIR = Path("/home/yurenh2/rrm/trm")
+sys.path.insert(0, str(TRM_DIR))
+
+# Alias TRM model class to HRM name so we can reuse step3 code (TRM is a fork)
+from models.recursive_reasoning.trm import TinyRecursiveReasoningModel_ACTV1 as HierarchicalReasoningModel_ACTV1
+from models.losses import ACTLossHead
+from adam_atan2 import AdamATan2
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ # TRM uses data_paths (list) not data_path
+ data_path = Path(cfg.get("data_path") or cfg["data_paths"][0])
+ train_meta = json.loads((data_path / "train" / "dataset.json").read_text())
+ arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], causal=False)
+ cfg["data_path"] = str(data_path) # normalize for downstream
+ base = HierarchicalReasoningModel_ACTV1(arch_cfg)
+ head = ACTLossHead(base, loss_type=arch_cfg["loss"]["loss_type"])
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", ""): v for k, v in sd.items()}
+ missing, unexpected = head.load_state_dict(stripped, strict=False)
+ print(f"[load {ckpt_name}] missing={len(missing)} unexpected={len(unexpected)}")
+ head.to(device)
+ return head, base, cfg, train_meta
+
+
+def jvp_train(f, x, v):
+ """JVP that participates in the autograd graph (create_graph=True)."""
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=True, strict=False)
+
+
+def compute_joint_lyap_spec(base, batch, k_lyap, lyap_act_steps, device, seed):
+ """Returns per-sample top-k Lyapunov spectrum of joint (z_H, z_L) dynamics
+ for TRM (single shared L_level used for both H and L steps).
+ Differentiable wrt the model parameters. Shape (B, k)."""
+ inner = base.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ D = seq_full * hidden
+
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"], batch["puzzle_identifiers"])
+
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q0 = torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g)
+ Q, _ = torch.linalg.qr(Q0)
+ log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_steps = 0
+
+ n_act = min(lyap_act_steps, cfg.halt_max_steps)
+ for _act in range(n_act):
+ for _h in range(cfg.H_cycles):
+ for _l in range(cfg.L_cycles):
+ # L step (TRM uses L_level, identical to HRM)
+ v_H_j = Q[:, :D, :]; v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_L_cols = []
+ f_L = lambda z: inner.L_level(z, z_H + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ z_L_new, Dv = jvp_train(f_L, z_L, v_i)
+ new_v_L_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_L = torch.stack(new_v_L_cols, dim=-1)
+ Q = torch.cat([v_H_j, new_v_L], dim=1)
+ z_L = z_L_new
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_steps += 1
+ # H step — TRM uses SAME L_level (this is the key TRM difference)
+ v_H_j = Q[:, :D, :]; v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_H_cols = []
+ f_H = lambda z: inner.L_level(z, z_L, **seq_info) # ← L_level not H_level (TRM)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ z_H_new, Dv = jvp_train(f_H, z_H, v_i)
+ new_v_H_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_H = torch.stack(new_v_H_cols, dim=-1)
+ Q = torch.cat([new_v_H, v_L_j], dim=1)
+ z_H = z_H_new
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_steps += 1
+
+ lyap_spec = log_R_sum / max(n_steps, 1) # (B, k)
+ return lyap_spec # full spectrum (B, k)
+
+
+def load_train_batches(data_path: Path, batch_size: int, n_iters: int, seed: int = 0):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "train" / "all__inputs.npy")
+ labels = np.load(data_path / "train" / "all__labels.npy")
+ pid = np.load(data_path / "train" / "all__puzzle_identifiers.npy")
+ N = len(inputs)
+ for _ in range(n_iters):
+ idx = rng.choice(N, size=batch_size, replace=False)
+ yield {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ }
+
+
+def evaluate(head, base, data_path, n_samples, batch_size, device, seed=42):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ idx_all = rng.choice(len(inputs), size=n_samples, replace=False)
+ head.eval()
+ correct = 0; token_correct = 0; token_total = 0
+ for s in range(0, n_samples, batch_size):
+ e = min(s + batch_size, n_samples)
+ idx = idx_all[s:e]
+ batch = {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)).to(device),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)).to(device),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)).to(device),
+ }
+ with torch.no_grad():
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ for _ in range(base.config.halt_max_steps):
+ carry, outputs = base(carry=carry, batch=batch)
+ preds = outputs["logits"].argmax(dim=-1)
+ mask = batch["labels"] > 0
+ exact = ((preds == batch["labels"]) | ~mask).all(dim=-1).float()
+ correct += exact.sum().item()
+ token_correct += ((preds == batch["labels"]) & mask).sum().item()
+ token_total += mask.sum().item()
+ return correct / n_samples, token_correct / max(token_total, 1)
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_18228",
+ help="start from this checkpoint and continue training")
+ ap.add_argument("--n-steps", type=int, default=2000)
+ ap.add_argument("--batch-size", type=int, default=8)
+ ap.add_argument("--lr", type=float, default=1e-5)
+ ap.add_argument("--alpha-rf", type=float, default=0.0, help="RF weight; 0 = baseline")
+ ap.add_argument("--lambda-star", type=float, default=-0.05,
+ help="joint Lyapunov target. λ_joint_1 should be < λ_star for stable joint dynamics.")
+ ap.add_argument("--rf-mode", choices=["fixed","volume_cf","gelu","engelken_l2"], default="fixed",
+ help="fixed: max(0,λ-λ*)² hinge (one-sided). "
+ "volume_cf: max(0, mean_i λ_i - λ*)² over the measured top-k spectrum. "
+ "gelu: GeLU(λ) attractor at -0.75 (deprecated, kills success). "
+ "engelken_l2: (1/k) Σ λ_i² across full top-k (Engelken 2023, two-sided to 0).")
+ ap.add_argument("--k-lyap", type=int, default=2)
+ ap.add_argument("--lyap-act-steps", type=int, default=4)
+ ap.add_argument("--seed", type=int, default=42)
+ ap.add_argument("--eval-every", type=int, default=200)
+ ap.add_argument("--eval-n", type=int, default=512)
+ ap.add_argument("--eval-batch-size", type=int, default=32)
+ ap.add_argument("--out", default="step3_log.json")
+ args = ap.parse_args()
+
+ device = "cuda"
+ head, base, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ optim = AdamATan2(head.parameters(), lr=args.lr, betas=(0.9, 0.95), weight_decay=cfg["weight_decay"])
+
+ print(f"\n=== Initial eval (loaded {args.ckpt_name}) ===")
+ acc0, tacc0 = evaluate(head, base, Path(cfg["data_path"]), args.eval_n, args.eval_batch_size, device)
+ print(f" initial: exact_acc = {acc0:.4f} token_acc = {tacc0:.4f}")
+
+ log = {"args": vars(args), "initial_acc": acc0, "initial_tok_acc": tacc0, "steps": [], "evals": []}
+ log["evals"].append({"step": 0, "acc": acc0, "tok_acc": tacc0})
+ t0 = time.time()
+ train_iter = load_train_batches(Path(cfg["data_path"]), args.batch_size, args.n_steps, seed=args.seed)
+
+ for step, batch in enumerate(train_iter):
+ batch = {k: v.to(device) for k, v in batch.items()}
+ head.train()
+ # Sparse puzzle embedding has fixed local_weights buffer (training batch=768);
+ # keep it in eval mode (still uses the weights table, just not the local buffer).
+ base.inner.puzzle_emb.eval()
+ for p in base.inner.puzzle_emb.parameters():
+ p.requires_grad_(False)
+
+ # ---- Supervised ACT loss (accumulated over all halt_max_steps ACT steps) ----
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ sup_loss_sum = 0.0
+ n_loss = 0
+ for _ in range(base.config.halt_max_steps):
+ carry, l, metrics, _, all_finish = head(return_keys=[], carry=carry, batch=batch)
+ sup_loss_sum = sup_loss_sum + l
+ n_loss += 1
+ if all_finish: break
+ sup_loss = sup_loss_sum / max(n_loss, 1) / args.batch_size
+
+ # ---- Reverse-flossing penalty (only if alpha > 0) ----
+ if args.alpha_rf > 0:
+ lyap_spec = compute_joint_lyap_spec(
+ base, batch, k_lyap=args.k_lyap,
+ lyap_act_steps=args.lyap_act_steps, device=device,
+ seed=args.seed + step,
+ ) # (B, k)
+ lyap1 = lyap_spec[:, 0]
+ if args.rf_mode == "engelken_l2":
+ # Engelken 2023: push all top-k λ_i² → 0 (two-sided)
+ rf_loss = (lyap_spec ** 2).mean()
+ excess = lyap1 # log signed λ_1
+ elif args.rf_mode == "volume_cf":
+ # One-sided cap on local phase-space volume expansion.
+ # Allows λ_1 > 0 when compensated by enough contraction in other modes.
+ lyap_volume = lyap_spec.mean(dim=1)
+ excess = (lyap_volume - args.lambda_star).clamp_min(0.0)
+ rf_loss = (excess ** 2).mean()
+ elif args.rf_mode == "gelu":
+ rf_loss = torch.nn.functional.gelu(lyap1).mean()
+ excess = lyap1 - (-0.751)
+ else: # fixed (hinge on top-1)
+ excess = (lyap1 - args.lambda_star).clamp_min(0.0)
+ rf_loss = (excess ** 2).mean()
+ else:
+ if args.k_lyap > 0:
+ with torch.no_grad():
+ lyap_spec = compute_joint_lyap_spec(
+ base, batch, k_lyap=args.k_lyap,
+ lyap_act_steps=args.lyap_act_steps, device=device,
+ seed=args.seed + step,
+ )
+ lyap1 = lyap_spec[:, 0]
+ else:
+ # Fast alpha=0 baseline path: skip diagnostic-only Lyapunov work.
+ lyap1 = torch.zeros(batch["inputs"].shape[0], device=device)
+ lyap_spec = lyap1[:, None]
+ rf_loss = torch.zeros((), device=device)
+ excess = (lyap1 - args.lambda_star).clamp_min(0.0)
+
+ total_loss = sup_loss + args.alpha_rf * rf_loss
+
+ optim.zero_grad(set_to_none=True)
+ total_loss.backward()
+ torch.nn.utils.clip_grad_norm_([p for p in head.parameters() if p.requires_grad], 1.0)
+ optim.step()
+
+ rec = {
+ "step": step, "sup_loss": float(sup_loss.item()),
+ "rf_loss": float(rf_loss.item()),
+ "total_loss": float(total_loss.item()),
+ "lyap1_mean": float(lyap1.detach().mean().item()),
+ "lyap1_max": float(lyap1.detach().max().item()),
+ "lyap_volume_mean": float(lyap_spec.detach().mean(dim=1).mean().item()),
+ "lyap_volume_max": float(lyap_spec.detach().mean(dim=1).max().item()),
+ "frac_above_star": float((excess > 0).float().mean().item()),
+ }
+ log["steps"].append(rec)
+ if step % 10 == 0 or step == args.n_steps - 1:
+ print(f" [{step:>4}/{args.n_steps}] dt={time.time()-t0:.1f}s "
+ f"sup={rec['sup_loss']:.4f} rf={rec['rf_loss']:.4f} "
+ f"λj1_mean={rec['lyap1_mean']:+.4f} max={rec['lyap1_max']:+.4f} "
+ f"frac>λ*={rec['frac_above_star']:.2f}", flush=True)
+
+ if (step + 1) % args.eval_every == 0:
+ acc, tacc = evaluate(head, base, Path(cfg["data_path"]),
+ args.eval_n, args.eval_batch_size, device)
+ print(f" >> EVAL @ step {step+1}: exact_acc={acc:.4f} (Δ from init: {acc-acc0:+.4f})", flush=True)
+ log["evals"].append({"step": step + 1, "acc": acc, "tok_acc": tacc})
+
+ acc_f, tacc_f = evaluate(head, base, Path(cfg["data_path"]),
+ args.eval_n, args.eval_batch_size, device)
+ print(f"\n=== Final eval ===")
+ print(f" initial: {acc0:.4f} final: {acc_f:.4f} (Δ {acc_f-acc0:+.4f})")
+ log["final_acc"] = acc_f
+ log["final_tok_acc"] = tacc_f
+ log["evals"].append({"step": args.n_steps, "acc": acc_f, "tok_acc": tacc_f})
+
+ Path(args.out).write_text(json.dumps(log, indent=2))
+ print(f"log → {args.out}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/step6_prefloss.py b/step6_prefloss.py
new file mode 100644
index 0000000..af84de0
--- /dev/null
+++ b/step6_prefloss.py
@@ -0,0 +1,310 @@
+"""Step 6: Preflossing experiment (Engelken-style separate phases).
+
+Phase 1 — Pure flossing: only optimize L_floss, no task loss.
+ engelken: L = (1/k) Σ λ_i² (push all top-k toward 0, two-sided)
+ cf: L = (1/k) Σ max(0, λ_i)² (only push positive λ toward 0)
+
+Phase 2 — Pure task training: standard HRM ACT loss, no flossing.
+
+Baseline mode (--prefloss-steps 0): skip phase 1, go straight to task training.
+
+Key fix vs step3: lyap_act_steps defaults to halt_max_steps (16 for HRM).
+"""
+from __future__ import annotations
+import sys, os, yaml, json, math, time, argparse
+from pathlib import Path
+import numpy as np
+import torch
+import torch.nn.functional as F
+
+HRM_DIR = Path("/home/yurenh2/rrm/hrm")
+sys.path.insert(0, str(HRM_DIR))
+
+from models.hrm.hrm_act_v1 import HierarchicalReasoningModel_ACTV1
+from models.losses import ACTLossHead
+from adam_atan2 import AdamATan2
+
+
+def load_model(ckpt_root: Path, ckpt_name: str, device: str):
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text())
+ arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], causal=False)
+ base = HierarchicalReasoningModel_ACTV1(arch_cfg)
+ head = ACTLossHead(base, loss_type=arch_cfg["loss"]["loss_type"])
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", ""): v for k, v in sd.items()}
+ missing, unexpected = head.load_state_dict(stripped, strict=False)
+ print(f"[load {ckpt_name}] missing={len(missing)} unexpected={len(unexpected)}")
+ head.to(device)
+ return head, base, cfg, train_meta
+
+
+def jvp_train(f, x, v):
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=True, strict=False)
+
+
+def compute_joint_lyap_spec(base, batch, k_lyap, lyap_act_steps, device, seed):
+ inner = base.inner
+ cfg = inner.config
+ B = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ D = seq_full * hidden
+
+ z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None)
+ input_embeddings = inner._input_embeddings(batch["inputs"], batch["puzzle_identifiers"])
+
+ g = torch.Generator(device=device).manual_seed(seed)
+ Q0 = torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g)
+ Q, _ = torch.linalg.qr(Q0)
+ log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32)
+ n_steps = 0
+
+ n_act = min(lyap_act_steps, cfg.halt_max_steps)
+ for _act in range(n_act):
+ for _h in range(cfg.H_cycles):
+ for _l in range(cfg.L_cycles):
+ v_H_j = Q[:, :D, :]
+ v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_L_cols = []
+ f_L = lambda z: inner.L_level(z, z_H + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ z_L_new, Dv = jvp_train(f_L, z_L, v_i)
+ new_v_L_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_L = torch.stack(new_v_L_cols, dim=-1)
+ Q = torch.cat([v_H_j, new_v_L], dim=1)
+ z_L = z_L_new
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_steps += 1
+ v_H_j = Q[:, :D, :]
+ v_L_j = Q[:, D:, :]
+ v_comb = v_H_j + v_L_j
+ new_v_H_cols = []
+ f_H = lambda z: inner.H_level(z, z_L, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype)
+ z_H_new, Dv = jvp_train(f_H, z_H, v_i)
+ new_v_H_cols.append(Dv.reshape(B, D).to(torch.float32))
+ new_v_H = torch.stack(new_v_H_cols, dim=-1)
+ Q = torch.cat([new_v_H, v_L_j], dim=1)
+ z_H = z_H_new
+ Q, R = torch.linalg.qr(Q)
+ log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_steps += 1
+
+ return log_R_sum / max(n_steps, 1)
+
+
+def load_train_batches(data_path: Path, batch_size: int, n_iters: int, seed: int = 0):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "train" / "all__inputs.npy")
+ labels = np.load(data_path / "train" / "all__labels.npy")
+ pid = np.load(data_path / "train" / "all__puzzle_identifiers.npy")
+ N = len(inputs)
+ for _ in range(n_iters):
+ idx = rng.choice(N, size=batch_size, replace=False)
+ yield {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ }
+
+
+def evaluate(head, base, data_path, n_samples, batch_size, device, seed=42):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ idx_all = rng.choice(len(inputs), size=n_samples, replace=False)
+ head.eval()
+ correct = 0; token_correct = 0; token_total = 0
+ for s in range(0, n_samples, batch_size):
+ e = min(s + batch_size, n_samples)
+ idx = idx_all[s:e]
+ batch = {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)).to(device),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)).to(device),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)).to(device),
+ }
+ with torch.no_grad():
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ for _ in range(base.config.halt_max_steps):
+ carry, outputs = base(carry=carry, batch=batch)
+ preds = outputs["logits"].argmax(dim=-1)
+ mask = batch["labels"] > 0
+ exact = ((preds == batch["labels"]) | ~mask).all(dim=-1).float()
+ correct += exact.sum().item()
+ token_correct += ((preds == batch["labels"]) & mask).sum().item()
+ token_total += mask.sum().item()
+ return correct / n_samples, token_correct / max(token_total, 1)
+
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument("--ckpt-root", required=True)
+ ap.add_argument("--ckpt-name", default="step_13020")
+ ap.add_argument("--prefloss-steps", type=int, default=500,
+ help="Phase 1: pure flossing steps. 0 = skip (baseline).")
+ ap.add_argument("--train-steps", type=int, default=3000,
+ help="Phase 2: pure task training steps.")
+ ap.add_argument("--floss-mode", choices=["engelken", "cf", "volume_cf"], default="engelken",
+ help="engelken: Σλ_i² (two-sided). cf: Σmax(0,λ_i)² (one-sided hinge). "
+ "volume_cf: max(0, mean_i λ_i)² over the measured top-k spectrum.")
+ ap.add_argument("--batch-size", type=int, default=8)
+ ap.add_argument("--floss-lr", type=float, default=1e-4,
+ help="LR for flossing phase (Engelken uses higher LR)")
+ ap.add_argument("--train-lr", type=float, default=1e-5,
+ help="LR for task training phase")
+ ap.add_argument("--k-lyap", type=int, default=2)
+ ap.add_argument("--lyap-act-steps", type=int, default=16,
+ help="ACT steps for Lyapunov computation (default=halt_max_steps)")
+ ap.add_argument("--seed", type=int, default=42)
+ ap.add_argument("--eval-every", type=int, default=100)
+ ap.add_argument("--eval-n", type=int, default=512)
+ ap.add_argument("--eval-batch-size", type=int, default=32)
+ ap.add_argument("--out", default="step6_log.json")
+ args = ap.parse_args()
+
+ device = "cuda"
+ head, base, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device)
+ data_path = Path(cfg["data_path"])
+
+ print(f"\n=== Initial eval (loaded {args.ckpt_name}) ===")
+ acc0, tacc0 = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print(f" initial: exact_acc = {acc0:.4f} token_acc = {tacc0:.4f}")
+
+ log = {"args": vars(args), "initial_acc": acc0, "initial_tok_acc": tacc0,
+ "phase1_steps": [], "phase1_evals": [], "phase2_steps": [], "phase2_evals": []}
+
+ global_step = 0
+
+ # ========== PHASE 1: Pure flossing ==========
+ if args.prefloss_steps > 0:
+ print(f"\n=== Phase 1: Pure {args.floss_mode} flossing ({args.prefloss_steps} steps, lr={args.floss_lr}) ===")
+ floss_optim = AdamATan2(head.parameters(), lr=args.floss_lr, betas=(0.9, 0.95), weight_decay=0.0)
+ floss_iter = load_train_batches(data_path, args.batch_size, args.prefloss_steps, seed=args.seed)
+ t0 = time.time()
+
+ for step, batch in enumerate(floss_iter):
+ batch = {k: v.to(device) for k, v in batch.items()}
+ head.train()
+ base.inner.puzzle_emb.eval()
+ for p in base.inner.puzzle_emb.parameters():
+ p.requires_grad_(False)
+
+ lyap_spec = compute_joint_lyap_spec(
+ base, batch, k_lyap=args.k_lyap,
+ lyap_act_steps=args.lyap_act_steps, device=device,
+ seed=args.seed + step,
+ )
+
+ if args.floss_mode == "engelken":
+ floss_loss = (lyap_spec ** 2).mean()
+ elif args.floss_mode == "volume_cf":
+ floss_loss = (lyap_spec.mean(dim=1).clamp_min(0.0) ** 2).mean()
+ else:
+ floss_loss = (lyap_spec.clamp_min(0.0) ** 2).mean()
+
+ floss_optim.zero_grad(set_to_none=True)
+ floss_loss.backward()
+ torch.nn.utils.clip_grad_norm_([p for p in head.parameters() if p.requires_grad], 1.0)
+ floss_optim.step()
+
+ lyap1 = lyap_spec[:, 0].detach()
+ rec = {
+ "step": step, "floss_loss": float(floss_loss.item()),
+ "lyap1_mean": float(lyap1.mean().item()),
+ "lyap1_max": float(lyap1.max().item()),
+ "lyap_all_mean": float(lyap_spec.detach().mean().item()),
+ "lyap_volume_mean": float(lyap_spec.detach().mean(dim=1).mean().item()),
+ "lyap_volume_max": float(lyap_spec.detach().mean(dim=1).max().item()),
+ }
+ log["phase1_steps"].append(rec)
+ if step % 10 == 0 or step == args.prefloss_steps - 1:
+ print(f" P1[{step:>4}/{args.prefloss_steps}] dt={time.time()-t0:.1f}s "
+ f"floss={rec['floss_loss']:.6f} "
+ f"λ1={rec['lyap1_mean']:+.4f} max={rec['lyap1_max']:+.4f} "
+ f"λ_all={rec['lyap_all_mean']:+.4f}", flush=True)
+
+ if (step + 1) % args.eval_every == 0:
+ acc, tacc = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print(f" >> P1 EVAL @ step {step+1}: exact_acc={acc:.4f} (Δ={acc-acc0:+.4f})", flush=True)
+ log["phase1_evals"].append({"step": step + 1, "acc": acc, "tok_acc": tacc})
+
+ acc_p1, tacc_p1 = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print(f" Phase 1 final: exact_acc={acc_p1:.4f} (Δ from init: {acc_p1-acc0:+.4f})")
+ log["phase1_final_acc"] = acc_p1
+ log["phase1_evals"].append({"step": args.prefloss_steps, "acc": acc_p1, "tok_acc": tacc_p1})
+ global_step = args.prefloss_steps
+ else:
+ print("\n=== Phase 1 skipped (baseline mode) ===")
+ log["phase1_final_acc"] = acc0
+
+ # ========== PHASE 2: Pure task training ==========
+ print(f"\n=== Phase 2: Pure task training ({args.train_steps} steps, lr={args.train_lr}) ===")
+ train_optim = AdamATan2(head.parameters(), lr=args.train_lr, betas=(0.9, 0.95),
+ weight_decay=cfg["weight_decay"])
+ train_iter = load_train_batches(data_path, args.batch_size, args.train_steps,
+ seed=args.seed + 10000)
+ t0 = time.time()
+ acc_ref = log.get("phase1_final_acc", acc0)
+
+ for step, batch in enumerate(train_iter):
+ batch = {k: v.to(device) for k, v in batch.items()}
+ head.train()
+ base.inner.puzzle_emb.eval()
+ for p in base.inner.puzzle_emb.parameters():
+ p.requires_grad_(False)
+
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ sup_loss_sum = 0.0
+ n_loss = 0
+ for _ in range(base.config.halt_max_steps):
+ carry, l, metrics, _, all_finish = head(return_keys=[], carry=carry, batch=batch)
+ sup_loss_sum = sup_loss_sum + l
+ n_loss += 1
+ if all_finish:
+ break
+ sup_loss = sup_loss_sum / max(n_loss, 1) / args.batch_size
+
+ train_optim.zero_grad(set_to_none=True)
+ sup_loss.backward()
+ torch.nn.utils.clip_grad_norm_([p for p in head.parameters() if p.requires_grad], 1.0)
+ train_optim.step()
+
+ rec = {"step": step, "sup_loss": float(sup_loss.item())}
+ log["phase2_steps"].append(rec)
+ if step % 50 == 0 or step == args.train_steps - 1:
+ print(f" P2[{step:>4}/{args.train_steps}] dt={time.time()-t0:.1f}s "
+ f"sup={rec['sup_loss']:.4f}", flush=True)
+
+ if (step + 1) % args.eval_every == 0:
+ acc, tacc = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print(f" >> P2 EVAL @ step {step+1}: exact_acc={acc:.4f} "
+ f"(Δ from init: {acc-acc0:+.4f}, Δ from P1: {acc-acc_ref:+.4f})", flush=True)
+ log["phase2_evals"].append({"step": global_step + step + 1, "acc": acc, "tok_acc": tacc})
+
+ acc_f, tacc_f = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print(f"\n=== Final eval ===")
+ print(f" initial: {acc0:.4f} phase1_end: {log.get('phase1_final_acc', acc0):.4f} "
+ f"final: {acc_f:.4f} (total Δ: {acc_f-acc0:+.4f})")
+ log["final_acc"] = acc_f
+ log["final_tok_acc"] = tacc_f
+ log["phase2_evals"].append({"step": global_step + args.train_steps, "acc": acc_f, "tok_acc": tacc_f})
+
+ Path(args.out).write_text(json.dumps(log, indent=2))
+ print(f"log → {args.out}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/step7_interfloss.py b/step7_interfloss.py
new file mode 100644
index 0000000..3b8e4f0
--- /dev/null
+++ b/step7_interfloss.py
@@ -0,0 +1,589 @@
+"""Step 7: Engelken-style interflossing.
+
+This is intentionally not a mixed objective. Ordinary task-training steps use
+only the supervised ACT loss. Flossing episodes use only a Lyapunov-spectrum
+conditioning loss, then task training resumes.
+
+Paper mapping:
+ - preflossing: run a floss-only episode before task training.
+ - interflossing: run short floss-only episodes at selected training steps.
+ - no persistent L_task + alpha * L_floss term is used here.
+"""
+from __future__ import annotations
+
+import argparse
+import importlib
+import json
+import sys
+import time
+from pathlib import Path
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+import yaml
+
+
+HRM_DIR = Path("/home/yurenh2/rrm/hrm")
+TRM_DIR = Path("/home/yurenh2/rrm/trm")
+
+
+def import_stack(model_type: str):
+ repo_dir = HRM_DIR if model_type == "hrm" else TRM_DIR
+ sys.path.insert(0, str(repo_dir))
+ if model_type == "hrm":
+ model_mod = importlib.import_module("models.hrm.hrm_act_v1")
+ model_cls = model_mod.HierarchicalReasoningModel_ACTV1
+ else:
+ model_mod = importlib.import_module("models.recursive_reasoning.trm")
+ model_cls = model_mod.TinyRecursiveReasoningModel_ACTV1
+ losses_mod = importlib.import_module("models.losses")
+ optim_mod = importlib.import_module("adam_atan2")
+ sparse_mod = importlib.import_module("models.sparse_embedding")
+ return model_cls, losses_mod.ACTLossHead, optim_mod.AdamATan2, sparse_mod.CastedSparseEmbeddingSignSGD_Distributed
+
+
+def parse_step_list(text: str) -> set[int]:
+ if not text.strip():
+ return set()
+ out = set()
+ for part in text.split(","):
+ part = part.strip()
+ if not part:
+ continue
+ out.add(int(part))
+ return out
+
+
+def build_interfloss_steps(args) -> set[int]:
+ steps = parse_step_list(args.interfloss_at)
+ if args.interfloss_every and args.interfloss_every > 0:
+ start = max(args.interfloss_start, 0)
+ stop = args.interfloss_stop if args.interfloss_stop >= 0 else args.train_steps
+ stop = min(stop, args.train_steps)
+ steps.update(range(start, stop + 1, args.interfloss_every))
+ return steps
+
+
+def load_model(model_type: str, ckpt_root: Path, ckpt_name: str, device: str, batch_size_override: int | None = None):
+ model_cls, loss_head_cls, adam_cls, sparse_cls = import_stack(model_type)
+ cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text())
+ arch_cfg = dict(cfg["arch"])
+ data_path = Path(cfg.get("data_path") or cfg["data_paths"][0])
+ train_meta = json.loads((data_path / "train" / "dataset.json").read_text())
+ arch_cfg.update(
+ batch_size=batch_size_override or cfg["global_batch_size"],
+ seq_len=train_meta["seq_len"],
+ vocab_size=train_meta["vocab_size"],
+ num_puzzle_identifiers=train_meta["num_puzzle_identifiers"],
+ causal=False,
+ )
+ cfg["data_path"] = str(data_path)
+ with torch.device(device):
+ base = model_cls(arch_cfg)
+ head = loss_head_cls(base, loss_type=arch_cfg["loss"]["loss_type"])
+ if ckpt_name != "__random__":
+ sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True)
+ stripped = {k.replace("_orig_mod.", ""): v for k, v in sd.items()}
+ missing, unexpected = head.load_state_dict(stripped, strict=False)
+ print(f"[load {ckpt_name}] missing={len(missing)} unexpected={len(unexpected)}")
+ else:
+ print("[load __random__] random initialization from config")
+ return head, base, cfg, adam_cls, sparse_cls
+
+
+def jvp_train(f, x, v):
+ return torch.autograd.functional.jvp(f, x, v=v, create_graph=True, strict=False)
+
+
+def compute_joint_lyap_spec(model_type, base, batch, k_lyap, lyap_act_steps, device, seed, lyap_start_act=0):
+ inner = base.inner
+ cfg = inner.config
+ bsz = batch["inputs"].shape[0]
+ seq_full = cfg.seq_len + inner.puzzle_emb_len
+ hidden = cfg.hidden_size
+ dim = seq_full * hidden
+
+ z_h = inner.H_init.unsqueeze(0).expand(bsz, seq_full, hidden).clone().to(inner.forward_dtype)
+ z_l = inner.L_init.unsqueeze(0).expand(bsz, seq_full, hidden).clone().to(inner.forward_dtype)
+ seq_info = {"cos_sin": inner.rotary_emb() if hasattr(inner, "rotary_emb") else None}
+ input_embeddings = inner._input_embeddings(batch["inputs"], batch["puzzle_identifiers"])
+
+ # Optional late-window measurement: first move to a later recursive state
+ # without differentiating through the warmup trajectory. This regularizes
+ # local late-stage stability instead of penalizing useful early expansion.
+ warmup_acts = min(max(lyap_start_act, 0), cfg.halt_max_steps)
+ if warmup_acts > 0:
+ with torch.no_grad():
+ for _act in range(warmup_acts):
+ for _h in range(cfg.H_cycles):
+ for _l in range(cfg.L_cycles):
+ z_l = inner.L_level(z_l, z_h + input_embeddings, **seq_info)
+ if model_type == "trm":
+ z_h = inner.L_level(z_h, z_l, **seq_info)
+ else:
+ z_h = inner.H_level(z_h, z_l, **seq_info)
+ z_h = z_h.detach()
+ z_l = z_l.detach()
+
+ gen = torch.Generator(device=device).manual_seed(seed)
+ q0 = torch.randn(bsz, 2 * dim, k_lyap, device=device, dtype=torch.float32, generator=gen)
+ q, _ = torch.linalg.qr(q0)
+ log_r_sum = torch.zeros(bsz, k_lyap, device=device, dtype=torch.float32)
+ n_steps = 0
+
+ n_act = min(lyap_act_steps, max(cfg.halt_max_steps - warmup_acts, 1))
+ for _act in range(n_act):
+ for _h in range(cfg.H_cycles):
+ for _l in range(cfg.L_cycles):
+ v_h = q[:, :dim, :]
+ v_l = q[:, dim:, :]
+ v_comb = v_h + v_l
+ new_v_l_cols = []
+ f_l = lambda z: inner.L_level(z, z_h + input_embeddings, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(bsz, seq_full, hidden).to(inner.forward_dtype)
+ z_l_new, d_v = jvp_train(f_l, z_l, v_i)
+ new_v_l_cols.append(d_v.reshape(bsz, dim).to(torch.float32))
+ q = torch.cat([v_h, torch.stack(new_v_l_cols, dim=-1)], dim=1)
+ z_l = z_l_new
+ q, r = torch.linalg.qr(q)
+ log_r_sum = log_r_sum + r.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_steps += 1
+
+ v_h = q[:, :dim, :]
+ v_l = q[:, dim:, :]
+ v_comb = v_h + v_l
+ new_v_h_cols = []
+ if model_type == "trm":
+ f_h = lambda z: inner.L_level(z, z_l, **seq_info)
+ else:
+ f_h = lambda z: inner.H_level(z, z_l, **seq_info)
+ for i in range(k_lyap):
+ v_i = v_comb[:, :, i].reshape(bsz, seq_full, hidden).to(inner.forward_dtype)
+ z_h_new, d_v = jvp_train(f_h, z_h, v_i)
+ new_v_h_cols.append(d_v.reshape(bsz, dim).to(torch.float32))
+ q = torch.cat([torch.stack(new_v_h_cols, dim=-1), v_l], dim=1)
+ z_h = z_h_new
+ q, r = torch.linalg.qr(q)
+ log_r_sum = log_r_sum + r.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log()
+ n_steps += 1
+
+ return log_r_sum / max(n_steps, 1)
+
+
+def floss_loss_from_spec(spec, mode: str, lambda_star: float):
+ if mode == "engelken_l2":
+ return (spec ** 2).mean(), spec
+ if mode == "spectrum_cf":
+ excess = (spec - lambda_star).clamp_min(0.0)
+ return (excess ** 2).mean(), excess
+ if mode == "volume_cf":
+ volume = spec.mean(dim=1)
+ excess = (volume - lambda_star).clamp_min(0.0)
+ return (excess ** 2).mean(), excess
+ if mode == "top1_cf":
+ excess = (spec[:, 0] - lambda_star).clamp_min(0.0)
+ return (excess ** 2).mean(), excess
+ raise ValueError(f"unknown floss mode: {mode}")
+
+
+def load_train_batches(data_path: Path, batch_size: int, n_iters: int, seed: int = 0):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "train" / "all__inputs.npy")
+ labels = np.load(data_path / "train" / "all__labels.npy")
+ pid = np.load(data_path / "train" / "all__puzzle_identifiers.npy")
+ n = len(inputs)
+ for _ in range(n_iters):
+ idx = rng.choice(n, size=batch_size, replace=False)
+ yield {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ }
+
+
+def sample_replay_batch(data_path: Path, n_samples: int, seed: int):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "train" / "all__inputs.npy")
+ labels = np.load(data_path / "train" / "all__labels.npy")
+ pid = np.load(data_path / "train" / "all__puzzle_identifiers.npy")
+ idx = rng.choice(len(inputs), size=n_samples, replace=False)
+ return {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)),
+ }
+
+
+def move_batch(batch: dict[str, torch.Tensor], device: str):
+ return {k: v.to(device) for k, v in batch.items()}
+
+
+def rollout_logits(base, batch, device):
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ for _ in range(base.config.halt_max_steps):
+ carry, outputs = base(carry=carry, batch=batch)
+ return outputs["logits"]
+
+
+def build_kl_replay(args, base, data_path, device, episode_idx):
+ if args.kl_beta <= 0 or args.kl_replay_size <= 0:
+ return None
+
+ replay = sample_replay_batch(
+ data_path,
+ n_samples=args.kl_replay_size,
+ seed=args.seed + 200000 + episode_idx,
+ )
+ teacher_chunks = []
+ base.eval()
+ with torch.no_grad():
+ for start in range(0, args.kl_replay_size, args.kl_batch_size):
+ end = min(start + args.kl_batch_size, args.kl_replay_size)
+ batch = move_batch({k: v[start:end] for k, v in replay.items()}, device)
+ logits = rollout_logits(base, batch, device)
+ teacher_chunks.append(logits.detach().to(torch.float32).cpu())
+
+ replay["teacher_logits"] = torch.cat(teacher_chunks, dim=0)
+ replay["mask"] = replay["labels"] > 0
+ return replay
+
+
+def kl_preservation_loss(args, base, replay, step, device):
+ if replay is None:
+ return torch.zeros((), device=device)
+
+ n_replay = replay["inputs"].shape[0]
+ batch_size = min(args.kl_batch_size, n_replay)
+ start = (step * batch_size) % n_replay
+ if start + batch_size <= n_replay:
+ idx = torch.arange(start, start + batch_size)
+ else:
+ idx = torch.cat([torch.arange(start, n_replay), torch.arange(0, start + batch_size - n_replay)])
+
+ batch = move_batch(
+ {
+ "inputs": replay["inputs"][idx],
+ "labels": replay["labels"][idx],
+ "puzzle_identifiers": replay["puzzle_identifiers"][idx],
+ },
+ device,
+ )
+ teacher_logits = replay["teacher_logits"][idx].to(device)
+ mask = replay["mask"][idx].to(device)
+ was_training = base.training
+ base.eval()
+ student_logits = rollout_logits(base, batch, device).to(torch.float32)
+ if was_training:
+ base.train()
+ set_puzzle_embedding_mode(base, args.train_puzzle_emb)
+ temp = args.kl_temperature
+ student_logp = F.log_softmax(student_logits / temp, dim=-1)
+ teacher_p = F.softmax(teacher_logits / temp, dim=-1)
+ kl_per_token = F.kl_div(student_logp, teacher_p, reduction="none").sum(dim=-1) * (temp ** 2)
+ if mask.any():
+ return kl_per_token[mask].mean()
+ return kl_per_token.mean()
+
+
+def evaluate(head, base, data_path, n_samples, batch_size, device, seed=42):
+ rng = np.random.default_rng(seed)
+ inputs = np.load(data_path / "test" / "all__inputs.npy")
+ labels = np.load(data_path / "test" / "all__labels.npy")
+ pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy")
+ idx_all = rng.choice(len(inputs), size=n_samples, replace=False)
+ head.eval()
+ correct = 0
+ token_correct = 0
+ token_total = 0
+ for start in range(0, n_samples, batch_size):
+ end = min(start + batch_size, n_samples)
+ idx = idx_all[start:end]
+ batch = {
+ "inputs": torch.from_numpy(inputs[idx].astype(np.int32)).to(device),
+ "labels": torch.from_numpy(labels[idx].astype(np.int32)).to(device),
+ "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)).to(device),
+ }
+ with torch.no_grad():
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ for _ in range(base.config.halt_max_steps):
+ carry, outputs = base(carry=carry, batch=batch)
+ preds = outputs["logits"].argmax(dim=-1)
+ mask = batch["labels"] > 0
+ exact = ((preds == batch["labels"]) | ~mask).all(dim=-1).float()
+ correct += exact.sum().item()
+ token_correct += ((preds == batch["labels"]) & mask).sum().item()
+ token_total += mask.sum().item()
+ return correct / n_samples, token_correct / max(token_total, 1)
+
+
+def write_log(path: str, log: dict):
+ Path(path).write_text(json.dumps(log, indent=2))
+
+
+def freeze_puzzle_embedding(base):
+ base.inner.puzzle_emb.eval()
+
+
+def set_puzzle_embedding_mode(base, train_puzzle_emb: bool):
+ if train_puzzle_emb:
+ base.inner.puzzle_emb.train()
+ else:
+ freeze_puzzle_embedding(base)
+
+
+def make_optimizers(args, base, head, adam_cls, sparse_cls, lr: float, weight_decay: float, train_puzzle_emb: bool):
+ optimizers = []
+ if train_puzzle_emb and getattr(base.inner.config, "puzzle_emb_ndim", 0) > 0:
+ optimizers.append(
+ sparse_cls(
+ base.inner.puzzle_emb.buffers(),
+ lr=lr if args.puzzle_emb_lr is None else args.puzzle_emb_lr,
+ weight_decay=args.puzzle_emb_weight_decay,
+ world_size=1,
+ )
+ )
+ optimizers.append(adam_cls(head.parameters(), lr=lr, betas=(0.9, 0.95), weight_decay=weight_decay))
+ return optimizers
+
+
+def optim_zero_grad(optimizers):
+ for optim in optimizers:
+ optim.zero_grad(set_to_none=True)
+
+
+def optim_step(optimizers):
+ for optim in optimizers:
+ optim.step()
+
+
+def run_floss_episode(args, head, base, adam_cls, data_path, device, log, episode_idx, train_step):
+ print(
+ f"\n=== Floss episode {episode_idx} at train_step={train_step}: "
+ f"{args.floss_steps} steps, mode={args.floss_mode}, lr={args.floss_lr} ===",
+ flush=True,
+ )
+ optimizers = make_optimizers(
+ args, base, head, adam_cls, args.sparse_cls,
+ lr=args.floss_lr, weight_decay=0.0, train_puzzle_emb=False,
+ )
+ replay = build_kl_replay(args, base, data_path, device, episode_idx)
+ train_iter = load_train_batches(
+ data_path,
+ args.floss_batch_size,
+ args.floss_steps,
+ seed=args.seed + 100000 + episode_idx * 1000,
+ )
+ episode = {"episode": episode_idx, "train_step": train_step, "steps": []}
+ t0 = time.time()
+
+ for step, batch in enumerate(train_iter):
+ batch = {k: v.to(device) for k, v in batch.items()}
+ head.train()
+ set_puzzle_embedding_mode(base, False)
+ spec = compute_joint_lyap_spec(
+ args.model,
+ base,
+ batch,
+ k_lyap=args.k_lyap,
+ lyap_act_steps=args.lyap_act_steps,
+ device=device,
+ seed=args.seed + episode_idx * 10000 + step,
+ lyap_start_act=args.lyap_start_act,
+ )
+ optim_zero_grad(optimizers)
+ floss_loss, excess = floss_loss_from_spec(spec, args.floss_mode, args.lambda_star)
+ floss_loss.backward()
+ kl_loss = kl_preservation_loss(args, base, replay, step, device)
+ if args.kl_beta > 0:
+ (args.kl_beta * kl_loss).backward()
+ torch.nn.utils.clip_grad_norm_([p for p in head.parameters() if p.requires_grad], 1.0)
+ optim_step(optimizers)
+
+ detached = spec.detach()
+ total_loss = floss_loss.detach() + args.kl_beta * kl_loss.detach()
+ rec = {
+ "step": step,
+ "loss": float(total_loss.item()),
+ "floss_loss": float(floss_loss.item()),
+ "kl_loss": float(kl_loss.item()),
+ "lyap1_mean": float(detached[:, 0].mean().item()),
+ "lyap1_max": float(detached[:, 0].max().item()),
+ "lyap_mean": float(detached.mean().item()),
+ "volume_mean": float(detached.mean(dim=1).mean().item()),
+ "volume_max": float(detached.mean(dim=1).max().item()),
+ "frac_active": float((excess.detach() > 0).float().mean().item()),
+ }
+ episode["steps"].append(rec)
+ if step % args.floss_log_every == 0 or step == args.floss_steps - 1:
+ print(
+ f" F[{step:>4}/{args.floss_steps}] dt={time.time() - t0:.1f}s "
+ f"loss={rec['loss']:.6f} floss={rec['floss_loss']:.6f} "
+ f"kl={rec['kl_loss']:.6f} lyap1={rec['lyap1_mean']:+.4f} "
+ f"vol={rec['volume_mean']:+.4f} active={rec['frac_active']:.2f}",
+ flush=True,
+ )
+
+ log["floss_episodes"].append(episode)
+ if args.eval_after_floss:
+ acc, tok_acc = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print(f" >> FLOSS EVAL train_step={train_step}: exact_acc={acc:.4f}", flush=True)
+ log["evals"].append(
+ {"kind": "after_floss", "train_step": train_step, "episode": episode_idx, "acc": acc, "tok_acc": tok_acc}
+ )
+ write_log(args.out, log)
+
+
+def run_task_step(args, head, base, batch, optimizers, device):
+ batch = {k: v.to(device) for k, v in batch.items()}
+ head.train()
+ set_puzzle_embedding_mode(base, args.train_puzzle_emb)
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ loss_sum = 0.0
+ n_loss = 0
+ for _ in range(base.config.halt_max_steps):
+ carry, loss, _metrics, _outputs, all_finish = head(return_keys=[], carry=carry, batch=batch)
+ loss_sum = loss_sum + loss
+ n_loss += 1
+ if all_finish:
+ break
+ sup_loss = loss_sum / max(n_loss, 1) / batch["inputs"].shape[0]
+ optim_zero_grad(optimizers)
+ sup_loss.backward()
+ torch.nn.utils.clip_grad_norm_([p for p in head.parameters() if p.requires_grad], 1.0)
+ optim_step(optimizers)
+ return sup_loss
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", choices=["hrm", "trm"], required=True)
+ parser.add_argument("--ckpt-root", required=True)
+ parser.add_argument("--ckpt-name", required=True,
+ help="Checkpoint file name, or __random__ to initialize from config without loading weights.")
+ parser.add_argument("--train-steps", type=int, default=10000)
+ parser.add_argument("--batch-size", type=int, default=8)
+ parser.add_argument("--task-batch-size", type=int, default=None,
+ help="Supervised task microbatch size. Defaults to --batch-size.")
+ parser.add_argument("--floss-batch-size", type=int, default=None,
+ help="Flossing microbatch size. Defaults to --batch-size.")
+ parser.add_argument("--train-lr", type=float, default=1e-5)
+ parser.add_argument("--floss-lr", type=float, default=1e-4)
+ parser.add_argument("--floss-steps", type=int, default=500)
+ parser.add_argument("--interfloss-at", default="0,500")
+ parser.add_argument("--interfloss-every", type=int, default=0,
+ help="If >0, also run floss episodes periodically every N task optimizer steps.")
+ parser.add_argument("--interfloss-start", type=int, default=0,
+ help="First task optimizer step for periodic interfloss.")
+ parser.add_argument("--interfloss-stop", type=int, default=-1,
+ help="Last task optimizer step for periodic interfloss. -1 means train_steps.")
+ parser.add_argument("--floss-mode", choices=["engelken_l2", "spectrum_cf", "volume_cf", "top1_cf"], default="engelken_l2")
+ parser.add_argument("--lambda-star", type=float, default=0.0)
+ parser.add_argument("--k-lyap", type=int, default=8)
+ parser.add_argument("--lyap-act-steps", type=int, default=4)
+ parser.add_argument("--lyap-start-act", type=int, default=0,
+ help="Warm up this many ACT steps before measuring/flossing the Lyapunov window.")
+ parser.add_argument("--seed", type=int, default=42)
+ parser.add_argument("--eval-every", type=int, default=1000)
+ parser.add_argument("--eval-n", type=int, default=512)
+ parser.add_argument("--eval-batch-size", type=int, default=32)
+ parser.add_argument("--floss-log-every", type=int, default=10)
+ parser.add_argument("--eval-after-floss", action=argparse.BooleanOptionalAction, default=True)
+ parser.add_argument("--kl-beta", type=float, default=0.0,
+ help="Episode-start replay-logit KL weight during floss-only steps.")
+ parser.add_argument("--kl-replay-size", type=int, default=64)
+ parser.add_argument("--kl-batch-size", type=int, default=8)
+ parser.add_argument("--kl-temperature", type=float, default=1.0)
+ parser.add_argument("--init-seed", type=int, default=None,
+ help="Torch seed used before model construction. Use this for matched from-scratch runs.")
+ parser.add_argument("--train-puzzle-emb", action=argparse.BooleanOptionalAction, default=False,
+ help="Train sparse puzzle embeddings. Requires --batch-size to match the model local embedding batch.")
+ parser.add_argument("--puzzle-emb-lr", type=float, default=None,
+ help="Sparse puzzle embedding LR. Defaults to current phase LR.")
+ parser.add_argument("--puzzle-emb-weight-decay", type=float, default=1.0)
+ parser.add_argument("--out", default="step7_interfloss_log.json")
+ args = parser.parse_args()
+ if args.task_batch_size is None:
+ args.task_batch_size = args.batch_size
+ if args.floss_batch_size is None:
+ args.floss_batch_size = args.batch_size
+ args.batch_size = args.task_batch_size
+
+ device = "cuda"
+ if args.init_seed is not None:
+ torch.manual_seed(args.init_seed)
+ np.random.seed(args.init_seed)
+ interfloss_steps = build_interfloss_steps(args)
+ head, base, cfg, adam_cls, sparse_cls = load_model(
+ args.model,
+ Path(args.ckpt_root),
+ args.ckpt_name,
+ device,
+ batch_size_override=args.task_batch_size if args.train_puzzle_emb else None,
+ )
+ args.sparse_cls = sparse_cls
+ data_path = Path(cfg["data_path"])
+
+ print(f"\n=== Initial eval (loaded {args.ckpt_name}) ===")
+ acc0, tok0 = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print(f" initial: exact_acc={acc0:.4f} token_acc={tok0:.4f}", flush=True)
+
+ log = {
+ "args": {k: v for k, v in vars(args).items() if k != "sparse_cls"},
+ "initial_acc": acc0,
+ "initial_tok_acc": tok0,
+ "interfloss_steps": sorted(interfloss_steps),
+ "task_steps": [],
+ "floss_episodes": [],
+ "evals": [{"kind": "initial", "train_step": 0, "acc": acc0, "tok_acc": tok0}],
+ }
+ write_log(args.out, log)
+
+ task_optimizers = make_optimizers(
+ args, base, head, adam_cls, sparse_cls,
+ lr=args.train_lr, weight_decay=cfg["weight_decay"], train_puzzle_emb=args.train_puzzle_emb,
+ )
+ train_iter = load_train_batches(data_path, args.task_batch_size, args.train_steps, seed=args.seed)
+ episode_idx = 0
+ t0 = time.time()
+
+ for train_step, batch in enumerate(train_iter):
+ if train_step in interfloss_steps:
+ run_floss_episode(args, head, base, adam_cls, data_path, device, log, episode_idx, train_step)
+ episode_idx += 1
+
+ sup_loss = run_task_step(args, head, base, batch, task_optimizers, device)
+ rec = {"train_step": train_step + 1, "sup_loss": float(sup_loss.item())}
+ log["task_steps"].append(rec)
+ if train_step % 50 == 0 or train_step == args.train_steps - 1:
+ print(
+ f" T[{train_step + 1:>5}/{args.train_steps}] dt={time.time() - t0:.1f}s "
+ f"sup={rec['sup_loss']:.4f}",
+ flush=True,
+ )
+
+ if (train_step + 1) % args.eval_every == 0:
+ acc, tok_acc = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print(f" >> TASK EVAL @ step {train_step + 1}: exact_acc={acc:.4f} delta={acc - acc0:+.4f}", flush=True)
+ log["evals"].append({"kind": "task", "train_step": train_step + 1, "acc": acc, "tok_acc": tok_acc})
+ write_log(args.out, log)
+
+ if args.train_steps in interfloss_steps:
+ run_floss_episode(args, head, base, adam_cls, data_path, device, log, episode_idx, args.train_steps)
+
+ acc_f, tok_f = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print("\n=== Final eval ===")
+ print(f" initial={acc0:.4f} final={acc_f:.4f} delta={acc_f - acc0:+.4f}", flush=True)
+ log["final_acc"] = acc_f
+ log["final_tok_acc"] = tok_f
+ log["evals"].append({"kind": "final", "train_step": args.train_steps, "acc": acc_f, "tok_acc": tok_f})
+ write_log(args.out, log)
+ print(f"log -> {args.out}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/step8_basin_consistency.py b/step8_basin_consistency.py
new file mode 100644
index 0000000..8bde719
--- /dev/null
+++ b/step8_basin_consistency.py
@@ -0,0 +1,199 @@
+"""Step 8: Hidden-state basin consistency regularization.
+
+This tests the "stabilize the correct basin, not the whole Lyapunov spectrum"
+hypothesis. For each task update, we optionally:
+ 1. roll out a clean teacher trajectory to final logits;
+ 2. roll out to an intermediate recursive state;
+ 3. perturb z_H/z_L there;
+ 4. continue the rollout and penalize final-logit KL to the clean teacher.
+
+The supervised loss remains the primary task objective. The consistency term
+directly asks nearby hidden states to land in the same answer basin.
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+import time
+from dataclasses import replace
+from pathlib import Path
+
+import torch
+import torch.nn.functional as F
+
+FLOSS_DIR = Path(__file__).resolve().parent
+sys.path.insert(0, str(FLOSS_DIR))
+
+from step7_interfloss import ( # noqa: E402
+ evaluate,
+ freeze_puzzle_embedding,
+ load_model,
+ load_train_batches,
+ move_batch,
+)
+
+
+def rollout_logits_eval(base, batch, device):
+ base.eval()
+ freeze_puzzle_embedding(base)
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ outputs = None
+ for _ in range(base.config.halt_max_steps):
+ carry, outputs = base(carry=carry, batch=batch)
+ return outputs["logits"]
+
+
+def perturb_inner_carry(carry, noise_std: float):
+ if noise_std <= 0:
+ return carry
+ inner = carry.inner_carry
+ z_h = inner.z_H + noise_std * torch.randn_like(inner.z_H)
+ z_l = inner.z_L + noise_std * torch.randn_like(inner.z_L)
+ return replace(carry, inner_carry=replace(inner, z_H=z_h, z_L=z_l))
+
+
+def perturbed_rollout_logits(base, batch, device, perturb_after: int, noise_std: float):
+ base.eval()
+ freeze_puzzle_embedding(base)
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ outputs = None
+ warmup = min(max(perturb_after, 1), base.config.halt_max_steps - 1)
+ with torch.no_grad():
+ for _ in range(warmup):
+ carry, outputs = base(carry=carry, batch=batch)
+ carry = perturb_inner_carry(carry, noise_std)
+ for _ in range(warmup, base.config.halt_max_steps):
+ carry, outputs = base(carry=carry, batch=batch)
+ return outputs["logits"]
+
+
+def consistency_loss(args, base, batch, device):
+ with torch.no_grad():
+ teacher_logits = rollout_logits_eval(base, batch, device).detach().to(torch.float32)
+ student_logits = perturbed_rollout_logits(
+ base,
+ batch,
+ device,
+ perturb_after=args.perturb_after,
+ noise_std=args.noise_std,
+ ).to(torch.float32)
+ temp = args.kl_temperature
+ teacher_p = F.softmax(teacher_logits / temp, dim=-1)
+ student_logp = F.log_softmax(student_logits / temp, dim=-1)
+ kl_per_token = F.kl_div(student_logp, teacher_p, reduction="none").sum(dim=-1) * (temp ** 2)
+ mask = batch["labels"] > 0
+ if mask.any():
+ return kl_per_token[mask].mean()
+ return kl_per_token.mean()
+
+
+def supervised_backward(head, base, batch, device):
+ head.train()
+ freeze_puzzle_embedding(base)
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+ sup_loss_sum = 0.0
+ n_loss = 0
+ for _ in range(base.config.halt_max_steps):
+ carry, loss, _metrics, _outputs, all_finish = head(return_keys=[], carry=carry, batch=batch)
+ sup_loss_sum = sup_loss_sum + loss
+ n_loss += 1
+ if all_finish:
+ break
+ sup_loss = sup_loss_sum / max(n_loss, 1) / batch["inputs"].shape[0]
+ sup_loss.backward()
+ return sup_loss.detach()
+
+
+def write_log(path: str, log: dict):
+ Path(path).write_text(json.dumps(log, indent=2))
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", choices=["hrm", "trm"], required=True)
+ parser.add_argument("--ckpt-root", required=True)
+ parser.add_argument("--ckpt-name", required=True)
+ parser.add_argument("--train-steps", type=int, default=10000)
+ parser.add_argument("--batch-size", type=int, default=8)
+ parser.add_argument("--lr", type=float, default=1e-5)
+ parser.add_argument("--consistency-beta", type=float, default=1.0)
+ parser.add_argument("--consistency-every", type=int, default=1)
+ parser.add_argument("--perturb-after", type=int, default=8)
+ parser.add_argument("--noise-std", type=float, default=0.02)
+ parser.add_argument("--kl-temperature", type=float, default=1.0)
+ parser.add_argument("--seed", type=int, default=42)
+ parser.add_argument("--eval-every", type=int, default=1000)
+ parser.add_argument("--eval-n", type=int, default=512)
+ parser.add_argument("--eval-batch-size", type=int, default=32)
+ parser.add_argument("--out", default="step8_basin_consistency_log.json")
+ args = parser.parse_args()
+
+ device = "cuda"
+ head, base, cfg, adam_cls = load_model(args.model, Path(args.ckpt_root), args.ckpt_name, device)
+ data_path = Path(cfg["data_path"])
+ optim = adam_cls(head.parameters(), lr=args.lr, betas=(0.9, 0.95), weight_decay=cfg["weight_decay"])
+
+ print(f"\n=== Initial eval (loaded {args.ckpt_name}) ===")
+ acc0, tok0 = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print(f" initial: exact_acc={acc0:.4f} token_acc={tok0:.4f}", flush=True)
+
+ log = {
+ "args": vars(args),
+ "initial_acc": acc0,
+ "initial_tok_acc": tok0,
+ "steps": [],
+ "evals": [{"step": 0, "acc": acc0, "tok_acc": tok0}],
+ }
+ write_log(args.out, log)
+
+ train_iter = load_train_batches(data_path, args.batch_size, args.train_steps, seed=args.seed)
+ t0 = time.time()
+ for step, batch_cpu in enumerate(train_iter):
+ batch = move_batch(batch_cpu, device)
+ optim.zero_grad(set_to_none=True)
+ sup_loss = supervised_backward(head, base, batch, device)
+
+ cons_loss = torch.zeros((), device=device)
+ if args.consistency_beta > 0 and step % args.consistency_every == 0:
+ cons_loss = consistency_loss(args, base, batch, device)
+ (args.consistency_beta * cons_loss).backward()
+
+ torch.nn.utils.clip_grad_norm_([p for p in head.parameters() if p.requires_grad], 1.0)
+ optim.step()
+
+ rec = {
+ "step": step + 1,
+ "sup_loss": float(sup_loss.item()),
+ "consistency_loss": float(cons_loss.detach().item()),
+ "total_loss": float(sup_loss.item() + args.consistency_beta * cons_loss.detach().item()),
+ }
+ log["steps"].append(rec)
+ if step % 50 == 0 or step == args.train_steps - 1:
+ print(
+ f" [{step + 1:>5}/{args.train_steps}] dt={time.time() - t0:.1f}s "
+ f"sup={rec['sup_loss']:.4f} cons={rec['consistency_loss']:.6f}",
+ flush=True,
+ )
+
+ if (step + 1) % args.eval_every == 0:
+ acc, tok_acc = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print(f" >> EVAL @ step {step + 1}: exact_acc={acc:.4f} delta={acc - acc0:+.4f}", flush=True)
+ log["evals"].append({"step": step + 1, "acc": acc, "tok_acc": tok_acc})
+ write_log(args.out, log)
+
+ acc_f, tok_f = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print("\n=== Final eval ===")
+ print(f" initial={acc0:.4f} final={acc_f:.4f} delta={acc_f - acc0:+.4f}", flush=True)
+ log["final_acc"] = acc_f
+ log["final_tok_acc"] = tok_f
+ log["evals"].append({"step": args.train_steps, "acc": acc_f, "tok_acc": tok_f})
+ write_log(args.out, log)
+ print(f"log -> {args.out}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/step9_trajectory_perturb_train.py b/step9_trajectory_perturb_train.py
new file mode 100644
index 0000000..61cbf04
--- /dev/null
+++ b/step9_trajectory_perturb_train.py
@@ -0,0 +1,595 @@
+"""Step 9: Supervised training with Lyapunov-style initial trajectory perturbations.
+
+This is hidden-trajectory augmentation, not a Lyapunov/flossing objective:
+
+ single_perturbed_ce:
+ sample one tiny perturbation of the initial recursive state and train with
+ the original supervised ACT loss on the same (x, y).
+
+ multi_perturbed_ce:
+ run one clean trajectory plus K-1 independently perturbed trajectories on
+ the same (x, y), average the original supervised ACT losses, and update.
+
+The goal is to enlarge the correct answer basin around the model's nominal
+initial latent state without directly optimizing Lyapunov exponents or KL
+consistency to the model's current answer.
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import math
+import sys
+import time
+from dataclasses import replace
+from pathlib import Path
+
+import torch
+import torch.nn.functional as F
+
+FLOSS_DIR = Path(__file__).resolve().parent
+sys.path.insert(0, str(FLOSS_DIR))
+
+from step7_interfloss import ( # noqa: E402
+ evaluate,
+ freeze_puzzle_embedding,
+ load_model,
+ load_train_batches,
+ move_batch,
+ write_log,
+)
+
+IGNORE_LABEL_ID = -100
+
+
+def _randn_like(tensor: torch.Tensor, generator: torch.Generator) -> torch.Tensor:
+ noise = torch.randn(
+ tensor.shape,
+ device=tensor.device,
+ dtype=torch.float32,
+ generator=generator,
+ )
+ return noise.to(tensor.dtype)
+
+
+def _unit_noise_like(tensor: torch.Tensor, generator: torch.Generator, sampling: str) -> torch.Tensor:
+ if sampling == "uniform":
+ # Match Normal(0, 1) variance: U[-sqrt(3), sqrt(3)] has std=1.
+ noise = torch.rand(tensor.shape, device=tensor.device, dtype=torch.float32, generator=generator)
+ noise = (2.0 * noise - 1.0) * math.sqrt(3.0)
+ else:
+ noise = torch.randn(tensor.shape, device=tensor.device, dtype=torch.float32, generator=generator)
+ return noise.to(tensor.dtype)
+
+
+def _expand_first_dim(batch: dict[str, torch.Tensor], n: int) -> dict[str, torch.Tensor]:
+ if n == 1:
+ return batch
+ return {k: v.repeat_interleave(n, dim=0) for k, v in batch.items()}
+
+
+def _noise_target_std(args, train_step: int | None) -> float:
+ if train_step is None or args.sigma_ramp_steps <= 0:
+ return args.noise_std
+ start = args.sigma_start if args.sigma_start is not None else args.noise_std
+ frac = min(max(train_step / args.sigma_ramp_steps, 0.0), 1.0)
+ return float(start + frac * (args.noise_std - start))
+
+
+def _sample_noise_stds(
+ args,
+ batch_size: int,
+ n_trajectories: int,
+ device: str,
+ generator: torch.Generator,
+ train_step: int | None,
+) -> tuple[torch.Tensor, torch.Tensor, float]:
+ """Return per-row perturbation stds and a clean/noisy mask for expanded rows."""
+ total = batch_size * n_trajectories
+ std_target = _noise_target_std(args, train_step)
+ stds = torch.zeros(total, device=device, dtype=torch.float32)
+
+ if args.mode == "baseline_clean":
+ noisy_mask = torch.zeros(total, device=device, dtype=torch.bool)
+ return stds, noisy_mask, std_target
+
+ rollout_id = torch.arange(total, device=device) % n_trajectories
+ noisy_mask = torch.ones(total, device=device, dtype=torch.bool)
+ if args.mode == "multi_perturbed_ce":
+ noisy_mask = rollout_id > 0
+
+ if std_target <= 0:
+ return stds, noisy_mask, std_target
+
+ active_count = int(noisy_mask.sum().item())
+ if active_count == 0:
+ return stds, noisy_mask, std_target
+
+ if args.noise_sampling == "loguniform":
+ ramp_scale = 1.0 if args.noise_std <= 0 else std_target / args.noise_std
+ final_hi = args.noise_max if args.noise_max is not None else args.noise_std
+ final_lo = args.noise_min if args.noise_min is not None else max(final_hi / 10.0, 1e-8)
+ lo = float(final_lo) * ramp_scale
+ hi = float(final_hi) * ramp_scale
+ if hi <= 0:
+ return stds, noisy_mask, std_target
+ lo = max(float(lo), 1e-12)
+ hi = max(float(hi), lo)
+ u = torch.rand(active_count, device=device, dtype=torch.float32, generator=generator)
+ sampled = torch.exp(math.log(lo) + u * (math.log(hi) - math.log(lo)))
+ else:
+ sampled = torch.full((active_count,), std_target, device=device, dtype=torch.float32)
+
+ if args.noise_sampling == "mixture_normal" and args.clean_prob > 0:
+ keep_noisy = torch.rand(active_count, device=device, dtype=torch.float32, generator=generator) >= args.clean_prob
+ sampled = torch.where(keep_noisy, sampled, torch.zeros_like(sampled))
+
+ stds[noisy_mask] = sampled
+ return stds, stds > 0, std_target
+
+
+def _add_state_noise(
+ inner,
+ noise_stds: torch.Tensor,
+ generator: torch.Generator | None,
+ perturb: str,
+ sampling: str,
+):
+ if noise_stds.numel() == 0 or float(noise_stds.max().item()) <= 0:
+ return inner
+ if generator is None:
+ raise ValueError("generator is required when noise is active")
+
+ view_shape = (noise_stds.shape[0],) + (1,) * (inner.z_H.ndim - 1)
+ scaled = noise_stds.view(view_shape)
+ z_h = inner.z_H
+ z_l = inner.z_L
+ if perturb in ("h", "both"):
+ z_h = z_h + scaled.to(z_h.dtype) * _unit_noise_like(z_h, generator, sampling)
+ if perturb in ("l", "both"):
+ z_l = z_l + scaled.to(z_l.dtype) * _unit_noise_like(z_l, generator, sampling)
+ return replace(inner, z_H=z_h, z_L=z_l)
+
+
+def make_loaded_initial_carry(
+ base,
+ batch: dict[str, torch.Tensor],
+ device: str,
+ noise_std: float,
+ generator: torch.Generator | None,
+ perturb: str,
+):
+ """Construct a first-step carry whose current_data is already loaded.
+
+ The ACT wrappers reset any sample with halted=True before the inner forward.
+ To perturb the actual initial recurrent state, we first apply the same reset
+ to H_init/L_init, then mark samples as not halted so the perturbation is not
+ overwritten on the first model call.
+ """
+ with torch.device(device):
+ carry = base.initial_carry(batch)
+
+ reset_flag = torch.ones_like(carry.halted)
+ inner = base.inner.reset_carry(reset_flag, carry.inner_carry)
+
+ if noise_std > 0:
+ if generator is None:
+ raise ValueError("generator is required when noise_std > 0")
+ noise_stds = torch.full((batch["inputs"].shape[0],), noise_std, device=device, dtype=torch.float32)
+ inner = _add_state_noise(inner, noise_stds, generator, perturb, "normal")
+
+ return replace(
+ carry,
+ inner_carry=inner,
+ steps=torch.zeros_like(carry.steps),
+ halted=torch.zeros_like(carry.halted),
+ current_data={k: v for k, v in batch.items()},
+ )
+
+
+def branch_supervised_loss(head, base, batch, carry):
+ loss_sum = 0.0
+ n_loss = 0
+ for _ in range(base.config.halt_max_steps):
+ carry, loss, _metrics, _outputs, all_finish = head(return_keys=[], carry=carry, batch=batch)
+ loss_sum = loss_sum + loss
+ n_loss += 1
+ if all_finish:
+ break
+ return loss_sum / max(n_loss, 1) / batch["inputs"].shape[0]
+
+
+def _token_loss(loss_fn, logits, labels, mask):
+ kwargs = {"ignore_index": IGNORE_LABEL_ID}
+ code = getattr(loss_fn, "__code__", None)
+ arg_names = code.co_varnames[: code.co_argcount + code.co_kwonlyargcount] if code is not None else ()
+ if "valid_mask" in arg_names:
+ kwargs["valid_mask"] = mask
+ return loss_fn(logits, labels, **kwargs)
+
+
+def _step_supervised_loss_vec(head, base, inner_carry, batch, act_step: int):
+ new_inner, logits, (q_halt_logits, q_continue_logits) = base.inner(inner_carry, batch)
+ labels = batch["labels"]
+ with torch.no_grad():
+ mask = labels != IGNORE_LABEL_ID
+ loss_counts = mask.sum(-1)
+ loss_divisor = loss_counts.clamp_min(1).unsqueeze(-1)
+ is_correct = mask & (torch.argmax(logits, dim=-1) == labels)
+ seq_is_correct = is_correct.sum(-1) == loss_counts
+
+ lm_loss_vec = (_token_loss(head.loss_fn, logits, labels, mask) / loss_divisor).sum(-1)
+ q_halt_loss_vec = F.binary_cross_entropy_with_logits(
+ q_halt_logits,
+ seq_is_correct.to(q_halt_logits.dtype),
+ reduction="none",
+ )
+
+ q_continue_loss_vec = torch.zeros_like(q_halt_loss_vec)
+ if not getattr(base.config, "no_ACT_continue", False):
+ with torch.no_grad():
+ next_q_halt_logits, next_q_continue_logits = base.inner(new_inner, batch)[-1]
+ is_last_step = act_step + 1 >= base.config.halt_max_steps
+ target_q_continue = torch.sigmoid(
+ torch.where(
+ torch.full_like(next_q_halt_logits, is_last_step, dtype=torch.bool),
+ next_q_halt_logits,
+ torch.maximum(next_q_halt_logits, next_q_continue_logits),
+ )
+ )
+ q_continue_loss_vec = F.binary_cross_entropy_with_logits(
+ q_continue_logits,
+ target_q_continue,
+ reduction="none",
+ )
+
+ return new_inner, lm_loss_vec + 0.5 * (q_halt_loss_vec + q_continue_loss_vec)
+
+
+def fixed_unroll_supervised_loss(args, head, base, batch, device, generator, train_step: int | None):
+ batch_size = batch["inputs"].shape[0]
+ n_trajectories = args.n_trajectories if args.mode == "multi_perturbed_ce" else 1
+ expanded_batch = _expand_first_dim(batch, n_trajectories)
+ total = expanded_batch["inputs"].shape[0]
+
+ noise_stds, actual_noisy_mask, std_target = _sample_noise_stds(
+ args,
+ batch_size=batch_size,
+ n_trajectories=n_trajectories,
+ device=device,
+ generator=generator,
+ train_step=train_step,
+ )
+
+ with torch.device(device):
+ carry = base.initial_carry(expanded_batch)
+ reset_flag = torch.ones_like(carry.halted)
+ inner = base.inner.reset_carry(reset_flag, carry.inner_carry)
+ inner = _add_state_noise(inner, noise_stds, generator, args.perturb, args.noise_sampling)
+
+ loss_vec_sum = torch.zeros(total, device=device, dtype=torch.float32)
+ inner_carry = inner
+ for act_step in range(base.config.halt_max_steps):
+ inner_carry, step_loss_vec = _step_supervised_loss_vec(head, base, inner_carry, expanded_batch, act_step)
+ loss_vec_sum = loss_vec_sum + step_loss_vec.to(torch.float32)
+
+ per_seq = loss_vec_sum / max(base.config.halt_max_steps, 1)
+ loss = per_seq.mean()
+
+ if args.mode == "multi_perturbed_ce":
+ rollout_id = torch.arange(total, device=device) % n_trajectories
+ clean_mask = rollout_id == 0
+ noisy_slot_mask = rollout_id > 0
+ elif args.mode == "baseline_clean":
+ clean_mask = torch.ones(total, device=device, dtype=torch.bool)
+ noisy_slot_mask = torch.zeros(total, device=device, dtype=torch.bool)
+ else:
+ clean_mask = torch.zeros(total, device=device, dtype=torch.bool)
+ noisy_slot_mask = torch.ones(total, device=device, dtype=torch.bool)
+
+ clean_loss = per_seq.detach()[clean_mask].mean().item() if bool(clean_mask.any()) else 0.0
+ noisy_loss = per_seq.detach()[noisy_slot_mask].mean().item() if bool(noisy_slot_mask.any()) else 0.0
+ active_noise = noise_stds[actual_noisy_mask]
+ return loss, {
+ "clean_loss": float(clean_loss),
+ "noisy_loss_mean": float(noisy_loss),
+ "noise_std_target": float(std_target),
+ "noise_std_mean": float(active_noise.mean().item()) if active_noise.numel() else 0.0,
+ "noise_std_max": float(active_noise.max().item()) if active_noise.numel() else 0.0,
+ "effective_batch": int(total),
+ }
+
+
+def train_loss(args, head, base, batch, device, generator, train_step: int | None):
+ if args.rollout_impl == "parallel_fixed":
+ return fixed_unroll_supervised_loss(args, head, base, batch, device, generator, train_step)
+
+ if args.mode == "baseline_clean":
+ carry = make_loaded_initial_carry(
+ base,
+ batch,
+ device,
+ noise_std=0.0,
+ generator=None,
+ perturb=args.perturb,
+ )
+ loss = branch_supervised_loss(head, base, batch, carry)
+ return loss, {
+ "clean_loss": float(loss.detach().item()),
+ "noisy_loss_mean": 0.0,
+ "noise_std_target": 0.0,
+ "noise_std_mean": 0.0,
+ "noise_std_max": 0.0,
+ "effective_batch": int(batch["inputs"].shape[0]),
+ }
+
+ if args.mode == "single_perturbed_ce":
+ std_target = _noise_target_std(args, train_step)
+ carry = make_loaded_initial_carry(
+ base,
+ batch,
+ device,
+ noise_std=std_target,
+ generator=generator,
+ perturb=args.perturb,
+ )
+ loss = branch_supervised_loss(head, base, batch, carry)
+ return loss, {
+ "clean_loss": 0.0,
+ "noisy_loss_mean": float(loss.detach().item()),
+ "noise_std_target": float(std_target),
+ "noise_std_mean": float(std_target),
+ "noise_std_max": float(std_target),
+ "effective_batch": int(batch["inputs"].shape[0]),
+ }
+
+ if args.mode == "multi_perturbed_ce":
+ if args.n_trajectories < 2:
+ raise ValueError("multi_perturbed_ce requires --n-trajectories >= 2")
+ std_target = _noise_target_std(args, train_step)
+
+ clean_carry = make_loaded_initial_carry(
+ base,
+ batch,
+ device,
+ noise_std=0.0,
+ generator=None,
+ perturb=args.perturb,
+ )
+ clean_loss = branch_supervised_loss(head, base, batch, clean_carry)
+ losses = [clean_loss]
+ noisy_vals = []
+ for _ in range(args.n_trajectories - 1):
+ noisy_carry = make_loaded_initial_carry(
+ base,
+ batch,
+ device,
+ noise_std=std_target,
+ generator=generator,
+ perturb=args.perturb,
+ )
+ noisy_loss = branch_supervised_loss(head, base, batch, noisy_carry)
+ losses.append(noisy_loss)
+ noisy_vals.append(float(noisy_loss.detach().item()))
+
+ total = torch.stack(losses).mean()
+ return total, {
+ "clean_loss": float(clean_loss.detach().item()),
+ "noisy_loss_mean": sum(noisy_vals) / max(len(noisy_vals), 1),
+ "noise_std_target": float(std_target),
+ "noise_std_mean": float(std_target),
+ "noise_std_max": float(std_target),
+ "effective_batch": int(batch["inputs"].shape[0] * args.n_trajectories),
+ }
+
+ raise ValueError(f"unknown mode: {args.mode}")
+
+
+def save_checkpoint(head, save_dir: Path, name: str):
+ save_dir.mkdir(parents=True, exist_ok=True)
+ path = save_dir / name
+ torch.save(head.state_dict(), path)
+ return str(path)
+
+
+def save_training_state(
+ head,
+ optim,
+ generator: torch.Generator,
+ args,
+ save_dir: Path,
+ name: str,
+ train_step: int,
+ best_acc: float,
+ best_step: int,
+):
+ save_dir.mkdir(parents=True, exist_ok=True)
+ path = save_dir / name
+ state = {
+ "format": "step9_training_state_v1",
+ "model_state_dict": head.state_dict(),
+ "optimizer_state_dict": optim.state_dict(),
+ "train_step": int(train_step),
+ "best_acc": float(best_acc),
+ "best_step": int(best_step),
+ "args": vars(args),
+ "torch_rng_state": torch.get_rng_state(),
+ "noise_generator_state": generator.get_state(),
+ }
+ if torch.cuda.is_available():
+ state["cuda_rng_state"] = torch.cuda.get_rng_state()
+ torch.save(state, path)
+ return str(path)
+
+
+def load_training_state(path: Path, head, optim, generator: torch.Generator, device: str):
+ state = torch.load(path, map_location=device, weights_only=False)
+ if "model_state_dict" not in state:
+ raise ValueError(f"{path} is not a step9 training-state checkpoint")
+ missing, unexpected = head.load_state_dict(state["model_state_dict"], strict=False)
+ print(f"[resume {path}] missing={len(missing)} unexpected={len(unexpected)}")
+ if "optimizer_state_dict" in state:
+ optim.load_state_dict(state["optimizer_state_dict"])
+ if "torch_rng_state" in state:
+ torch.set_rng_state(state["torch_rng_state"].cpu())
+ if "cuda_rng_state" in state and torch.cuda.is_available():
+ torch.cuda.set_rng_state(state["cuda_rng_state"].cpu())
+ if "noise_generator_state" in state:
+ generator.set_state(state["noise_generator_state"].cpu())
+ return {
+ "train_step": int(state.get("train_step", 0)),
+ "best_acc": state.get("best_acc"),
+ "best_step": int(state.get("best_step", 0)),
+ }
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", choices=["hrm", "trm"], required=True)
+ parser.add_argument("--ckpt-root", required=True)
+ parser.add_argument("--ckpt-name", required=True)
+ parser.add_argument(
+ "--mode",
+ choices=["baseline_clean", "single_perturbed_ce", "multi_perturbed_ce"],
+ required=True,
+ )
+ parser.add_argument("--train-steps", type=int, default=10000)
+ parser.add_argument("--batch-size", type=int, default=8)
+ parser.add_argument("--lr", type=float, default=1e-5)
+ parser.add_argument("--noise-std", type=float, default=1e-3)
+ parser.add_argument("--noise-min", type=float, default=None)
+ parser.add_argument("--noise-max", type=float, default=None)
+ parser.add_argument(
+ "--noise-sampling",
+ choices=["normal", "uniform", "loguniform", "mixture_normal"],
+ default="normal",
+ )
+ parser.add_argument("--clean-prob", type=float, default=0.0)
+ parser.add_argument("--sigma-start", type=float, default=None)
+ parser.add_argument("--sigma-ramp-steps", type=int, default=0)
+ parser.add_argument("--n-trajectories", type=int, default=4)
+ parser.add_argument("--rollout-impl", choices=["serial_act", "parallel_fixed"], default="parallel_fixed")
+ parser.add_argument("--perturb", choices=["h", "l", "both"], default="both")
+ parser.add_argument("--seed", type=int, default=42)
+ parser.add_argument("--eval-every", type=int, default=1000)
+ parser.add_argument("--eval-n", type=int, default=512)
+ parser.add_argument("--eval-batch-size", type=int, default=32)
+ parser.add_argument("--out", default="step9_trajectory_perturb_log.json")
+ parser.add_argument("--save-dir", default=None)
+ parser.add_argument("--save-best", action="store_true")
+ parser.add_argument("--save-final", action="store_true")
+ parser.add_argument("--save-every-eval", action="store_true")
+ parser.add_argument("--save-train-state", action="store_true")
+ parser.add_argument("--resume-state", default=None)
+ args = parser.parse_args()
+
+ torch.manual_seed(args.seed)
+ device = "cuda"
+ head, base, cfg, adam_cls = load_model(args.model, Path(args.ckpt_root), args.ckpt_name, device)
+ data_path = Path(cfg["data_path"])
+ optim = adam_cls(head.parameters(), lr=args.lr, betas=(0.9, 0.95), weight_decay=cfg["weight_decay"])
+ generator = torch.Generator(device=device).manual_seed(args.seed + 900000)
+ resume_step = 0
+ resume_best_acc = None
+ resume_best_step = 0
+ if args.resume_state:
+ resume_info = load_training_state(Path(args.resume_state), head, optim, generator, device)
+ resume_step = resume_info["train_step"]
+ resume_best_acc = resume_info["best_acc"]
+ resume_best_step = resume_info["best_step"]
+ print(f"[resume] train_step={resume_step} best_acc={resume_best_acc} best_step={resume_best_step}", flush=True)
+
+ print(f"\n=== Initial eval (loaded {args.ckpt_name}) ===")
+ acc0, tok0 = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print(f" initial: exact_acc={acc0:.4f} token_acc={tok0:.4f}", flush=True)
+
+ log = {
+ "args": vars(args),
+ "initial_acc": acc0,
+ "initial_tok_acc": tok0,
+ "steps": [],
+ "evals": [{"kind": "initial", "train_step": 0, "acc": acc0, "tok_acc": tok0}],
+ "checkpoints": [],
+ "resume_state": args.resume_state,
+ "resume_step": resume_step,
+ }
+ write_log(args.out, log)
+ save_dir = Path(args.save_dir) if args.save_dir else Path(str(args.out)).with_suffix("").with_name(Path(str(args.out)).with_suffix("").name + "_ckpts")
+ best_acc = float(resume_best_acc) if resume_best_acc is not None else acc0
+ best_step = resume_best_step if resume_best_acc is not None else 0
+
+ train_iter = load_train_batches(data_path, args.batch_size, args.train_steps, seed=args.seed)
+ for _ in range(min(resume_step, args.train_steps)):
+ next(train_iter)
+ t0 = time.time()
+ for train_step, batch_cpu in enumerate(train_iter, start=resume_step):
+ batch = move_batch(batch_cpu, device)
+ head.train()
+ freeze_puzzle_embedding(base)
+ optim.zero_grad(set_to_none=True)
+ loss, parts = train_loss(args, head, base, batch, device, generator, train_step + 1)
+ loss.backward()
+ torch.nn.utils.clip_grad_norm_([p for p in head.parameters() if p.requires_grad], 1.0)
+ optim.step()
+
+ rec = {
+ "train_step": train_step + 1,
+ "loss": float(loss.detach().item()),
+ **parts,
+ }
+ log["steps"].append(rec)
+ if train_step % 50 == 0 or train_step == args.train_steps - 1:
+ print(
+ f" T[{train_step + 1:>5}/{args.train_steps}] dt={time.time() - t0:.1f}s "
+ f"loss={rec['loss']:.4f} clean={rec['clean_loss']:.4f} "
+ f"noisy={rec['noisy_loss_mean']:.4f} "
+ f"sigma={rec['noise_std_mean']:.2e}/{rec['noise_std_max']:.2e} "
+ f"effB={rec['effective_batch']}",
+ flush=True,
+ )
+
+ if (train_step + 1) % args.eval_every == 0:
+ acc, tok_acc = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print(f" >> EVAL @ step {train_step + 1}: exact_acc={acc:.4f} delta={acc - acc0:+.4f}", flush=True)
+ log["evals"].append({"kind": "task", "train_step": train_step + 1, "acc": acc, "tok_acc": tok_acc})
+ if args.save_every_eval:
+ ckpt_path = save_checkpoint(head, save_dir, f"step_{train_step + 1}.pt")
+ log["checkpoints"].append({"kind": "eval", "train_step": train_step + 1, "acc": acc, "path": ckpt_path})
+ if args.save_best and acc >= best_acc:
+ best_acc = acc
+ best_step = train_step + 1
+ ckpt_path = save_checkpoint(head, save_dir, "best.pt")
+ log["best_acc"] = best_acc
+ log["best_step"] = best_step
+ log["best_checkpoint"] = ckpt_path
+ log["checkpoints"].append({"kind": "best", "train_step": train_step + 1, "acc": acc, "path": ckpt_path})
+ if args.save_train_state:
+ state_path = save_training_state(head, optim, generator, args, save_dir, "best_state.pt", train_step + 1, best_acc, best_step)
+ log["best_state_checkpoint"] = state_path
+ log["checkpoints"].append({"kind": "best_state", "train_step": train_step + 1, "acc": acc, "path": state_path})
+ if args.save_train_state:
+ state_path = save_training_state(head, optim, generator, args, save_dir, "latest_state.pt", train_step + 1, best_acc, best_step)
+ log["latest_state_checkpoint"] = state_path
+ log["checkpoints"].append({"kind": "latest_state", "train_step": train_step + 1, "acc": acc, "path": state_path})
+ write_log(args.out, log)
+
+ acc_f, tok_f = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device)
+ print("\n=== Final eval ===")
+ print(f" initial={acc0:.4f} final={acc_f:.4f} delta={acc_f - acc0:+.4f}", flush=True)
+ log["final_acc"] = acc_f
+ log["final_tok_acc"] = tok_f
+ log["evals"].append({"kind": "final", "train_step": args.train_steps, "acc": acc_f, "tok_acc": tok_f})
+ if args.save_final:
+ ckpt_path = save_checkpoint(head, save_dir, "final.pt")
+ log["final_checkpoint"] = ckpt_path
+ log["checkpoints"].append({"kind": "final", "train_step": args.train_steps, "acc": acc_f, "path": ckpt_path})
+ if args.save_train_state:
+ state_path = save_training_state(head, optim, generator, args, save_dir, "final_state.pt", args.train_steps, best_acc, best_step)
+ log["final_state_checkpoint"] = state_path
+ log["checkpoints"].append({"kind": "final_state", "train_step": args.train_steps, "acc": acc_f, "path": state_path})
+ write_log(args.out, log)
+ print(f"log -> {args.out}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/surrogate_flossing/MinimalGradientFlossinExample.py b/surrogate_flossing/MinimalGradientFlossinExample.py
new file mode 100644
index 0000000..7c8314e
--- /dev/null
+++ b/surrogate_flossing/MinimalGradientFlossinExample.py
@@ -0,0 +1,135 @@
+import torch
+import torch.optim as optim
+import numpy as np
+import matplotlib.pyplot as plt
+
+class VanillaRNN(torch.nn.Module):
+ def __init__(self, Nin, N, Nout):
+ super(VanillaRNN, self).__init__()
+ self.N = N
+ self.W_in = torch.nn.Parameter(torch.randn(N, Nin) / np.sqrt(Nin))
+ self.W_h = torch.nn.Parameter(torch.randn(N, N) / np.sqrt(N))
+ self.b_h = torch.nn.Parameter(torch.zeros(N))
+ self.W_out = torch.nn.Parameter(torch.randn(Nout, N) / np.sqrt(N))
+ self.b_out = torch.nn.Parameter(torch.zeros(Nout))
+
+ def forward(self, x, h_prev):
+ h = torch.tanh(self.W_in @ x + self.W_h @ h_prev + self.b_h)
+ y = self.W_out @ h + self.b_out
+ return h, y
+
+def calculate_jacobian_analytical(vanilla_rnn, h):
+ tanh_prime = 1 / torch.cosh(h)**2
+ jacobian = vanilla_rnn.W_h @ torch.diag(tanh_prime)
+ return jacobian
+
+def calculate_lyapunov_spectrum(vanilla_rnn, x_data, nle, seedIC=1, seedONS=1):
+ n = vanilla_rnn.N
+ steps = len(x_data)
+ ONSstep = 1
+
+ torch.manual_seed(seedIC)
+ h = torch.zeros(n, dtype=torch.float32, requires_grad=True)
+
+ torch.manual_seed(seedONS)
+ Q, R = torch.linalg.qr(torch.randn(n, nle))
+ ls = torch.zeros(nle, dtype=torch.float32)
+
+ for step in range(steps):
+ x = x_data[step]
+ D = calculate_jacobian_analytical(vanilla_rnn, h)
+ h, _ = vanilla_rnn(x, h)
+ Q = D @ Q
+
+ if step % ONSstep == 0 and nle > 0:
+ Q, R = torch.linalg.qr(Q)
+ ls += torch.log(torch.abs(torch.diag(R))) / ONSstep
+
+ return ls
+
+# Model parameters
+Nin = 1
+nb_steps = 1024
+nb_hidden = 64
+Nout = 1
+nle = 16 # number of Lyapunov exponents to floss
+Ef = 41 # number of flossing epochs
+
+# Initialize the RNN
+vanilla_rnn = VanillaRNN(Nin, nb_hidden, Nout)
+optimizer = optim.Adam(vanilla_rnn.parameters())
+
+# Generate input data
+pIn = 0.5 # input probability
+x_data = [torch.tensor(np.random.rand(Nin) < pIn, dtype=torch.float32) for _ in range(nb_steps)]
+
+# Optimization setup
+losses = []
+lyapunov_spectra = []
+
+# Set up the initial plot
+fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
+
+lyapunov_line, = ax1.plot([], [], "k", label="Lyapunov spectrum after flossing")
+ax1.set_xlabel(r"Index $i$")
+ax1.set_ylabel(r"Lyapunov Exponent $\lambda_i$ (1/step)")
+ax1.legend()
+
+loss_line, = ax2.semilogy([], [], "r", label="Loss")
+ax2.set_xlabel("Epoch")
+ax2.set_ylabel("Loss")
+ax2.set_title("Loss over Epochs")
+ax2.legend()
+
+plt.ion() # Turn on interactive mode
+plt.show()
+
+lyapunov_spectrum_initial = calculate_lyapunov_spectrum(vanilla_rnn, x_data[len(x_data)//2:], nle).detach().cpu().numpy()
+ax1.plot(lyapunov_spectrum_initial, "r", label="Lyapunov spectrum before flossing")
+ax1.legend()
+
+# Training loop
+for epoch in range(Ef):
+ torch.manual_seed(epoch)
+ x_data = [torch.tensor(np.random.rand(Nin) < pIn, dtype=torch.float32) for _ in range(nb_steps)]
+
+ optimizer.zero_grad()
+
+ lyapunov_spectrum = calculate_lyapunov_spectrum(vanilla_rnn, x_data[len(x_data) // 2:], nle)
+
+ # Calculate the loss (mean square of the first nle Lyapunov exponents)
+ loss = torch.mean(lyapunov_spectrum**2)
+ print(f"Epoch {epoch}: Loss = {loss.item()}")
+
+ # Backward pass: compute gradients
+ loss.backward()
+
+ # Optimization step
+ optimizer.step()
+
+ # Store the loss and Lyapunov spectrum
+ losses.append(loss.item())
+ lyapunov_spectra.append(lyapunov_spectrum.detach().cpu().numpy())
+
+ if epoch % 10 == 0:
+ print(f"Epoch {epoch}: Loss = {loss.item()}")
+
+ # Update the Lyapunov spectrum plot
+ lyapunov_line.set_ydata(lyapunov_spectrum.detach().cpu().numpy())
+ lyapunov_line.set_xdata(range(len(lyapunov_spectrum)))
+ ax1.relim()
+ ax1.autoscale_view()
+ ax1.legend()
+
+ # Update the loss plot
+ loss_line.set_ydata(losses)
+ loss_line.set_xdata(range(len(losses)))
+ ax2.relim()
+ ax2.autoscale_view()
+
+ # Draw the updated plots
+ fig.canvas.draw()
+ fig.canvas.flush_events()
+
+print("Flossing complete.")
+
diff --git a/surrogate_flossing/MinimalSurrogateGradientFlossinExample.py b/surrogate_flossing/MinimalSurrogateGradientFlossinExample.py
new file mode 100644
index 0000000..678112c
--- /dev/null
+++ b/surrogate_flossing/MinimalSurrogateGradientFlossinExample.py
@@ -0,0 +1,228 @@
+import torch
+import torch.optim as optim
+import numpy as np
+
+
+class SurrGradSpike(torch.autograd.Function):
+ """
+ Here we implement our spiking nonlinearity which also implements
+ the surrogate gradient. By subclassing torch.autograd.Function,
+ we will be able to use all of PyTorch's autograd functionality.
+ Here we use the normalized negative part of a fast sigmoid
+ as this was done in Zenke & Ganguli (2018).
+ """
+
+ scale = 10.0 # controls steepness of surrogate gradient
+
+ @staticmethod
+ def forward(ctx, input):
+ """
+ In the forward pass we compute a step function of the input Tensor
+ and return it. ctx is a context object that we use to stash information which
+ we need to later backpropagate our error signals. To achieve this we use the
+ ctx.save_for_backward method.
+ """
+ ctx.save_for_backward(input)
+ out = torch.zeros_like(input)
+ out[input > 0] = 1.0
+ return out
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ In the backward pass we receive a Tensor we need to compute the
+ surrogate gradient of the loss with respect to the input.
+ Here we use the normalized negative part of a fast sigmoid
+ as this was done in Zenke & Ganguli (2018).
+ """
+ input, = ctx.saved_tensors
+ grad_input = grad_output.clone()
+ grad = grad_input/(SurrGradSpike.scale*torch.abs(input)+1.0)**2
+ return grad
+
+# here we overwrite our naive spike function by the "SurrGradSpike" nonlinearity which implements a surrogate gradient
+spike_fn = SurrGradSpike.apply
+
+
+
+def sgprime(x, g):
+ return 1 / (g * torch.abs(x) + 1)**2
+
+def manual_jacobian_clif15NoReset(mem, syn, x, alpha, beta, v1, iext, gj):
+ n = len(mem)
+ mthr = mem - 1
+ out_derivative = sgprime(mthr, g=gj)
+
+ eye_n = torch.eye(n, dtype=torch.float32, device=mem.device)
+ du_dmem = beta * eye_n + v1 @ torch.diag(out_derivative * (1 - beta))
+ J_dmem_dmem = du_dmem
+ J_dsyn_dmem = v1 @ torch.diag(out_derivative)
+ J_dmem_dsyn = (1 - beta) * alpha * eye_n
+ J_dsyn_dsyn = alpha * eye_n
+
+ # Constructing full Jacobian using block diagonal matrix
+ J_upper = torch.cat([J_dmem_dmem, J_dmem_dsyn], dim=1)
+ J_lower = torch.cat([J_dsyn_dmem, J_dsyn_dsyn], dim=1)
+ J = torch.cat([J_upper, J_lower], dim=0)
+
+ return J
+
+def neuron_update7hardOut(mem, syn, x, gu):
+ mthr = mem - 1
+ out = spike_fn(mthr)
+ rst = out.detach() # like spytorch tutorial: https://github.com/fzenke/spytorch/blob/main/notebooks/SpyTorchTutorial5.ipynb
+ syn = alpha * syn + x + v1 @ out
+
+ mem = (beta * mem + (1 - beta) * syn) * (1 - rst)
+
+ return mem, syn
+
+
+
+def calculate_lyapunov_spectrum(gForward, gBackward, x_data, nle, Win, v1, g):
+ n = nb_hidden
+ nx = nb_inputs
+ steps = len(x_data)
+ ONSstep = 1
+
+ torch.manual_seed(seedIC)
+ mem = torch.zeros(n, dtype=torch.float32)
+ syn = torch.zeros(n, dtype=torch.float32)
+
+ torch.manual_seed(seedONS)
+ Q, R = torch.linalg.qr(torch.randn(2*n, nle))
+ ls = torch.zeros(nle, dtype=torch.float32)
+ #lsall = torch.zeros((nle, steps // ONSstep), dtype=torch.float32) # Preallocate as a matrix
+
+ for step in range(steps):
+ x = x_data[step]
+ D = manual_jacobian_clif15NoReset(mem, syn, Win @ x, alpha, beta, v1, iext, gBackward)
+ #print(f"before D.shape {D.shape}")
+ #print(f"before mem.shape{mem.shape}")
+ mem, syn = neuron_update7hardOut(mem, syn, Win @ x, gForward)
+ #print(f"D.shape {D.shape}")
+ #print(f"mem.shape{mem.shape}")
+ Q = D @ Q
+
+ if step % ONSstep == 0 and nle > 0:
+ Q, R = torch.linalg.qr(Q)
+ ls += torch.log(torch.abs(torch.diag(R))) / ONSstep
+ #lsall[:, step // ONSstep] = torch.log(torch.abs(torch.diag(R))) / ONSstep/
+
+ return ls#torch.mean(lsall[:, steps//2//ONSstep:], axis=1)
+
+# Model parameters
+tau_mem = 10e-3
+tau_syn = 5e-3
+time_step = 1e-3
+alpha, beta = torch.exp(torch.tensor(-time_step / tau_syn)), torch.exp(torch.tensor(-time_step / tau_mem))
+
+nb_inputs = 100
+nb_steps = 1000
+nb_hidden = 64
+iext = 0
+
+# Initialize parameters to optimize as leaf tensors
+Win = torch.randn(nb_hidden, nb_inputs, dtype=torch.float32, requires_grad=True) / torch.sqrt(torch.tensor(nb_inputs, dtype=torch.float32))
+Win = Win - torch.mean(Win, dim=1, keepdim=True)
+Win = Win.clone().detach().requires_grad_(True) # Ensure it is a leaf tensor
+
+v1 = torch.randn(nb_hidden, nb_hidden, dtype=torch.float32, requires_grad=True) / torch.sqrt(torch.tensor(nb_hidden, dtype=torch.float32))
+v1 = v1 - torch.mean(v1, dim=1, keepdim=True)
+v1 = v1.clone().detach().requires_grad_(True) # Ensure it is a leaf tensor
+
+g = torch.tensor(5.0, dtype=torch.float32, requires_grad=False)
+
+# Generate input data
+pSpike = 0.1 # for dt=1e-3 this is 10 Hz input
+x_data = [torch.tensor(np.random.rand(nb_inputs) < pSpike, dtype=torch.float32) for _ in range(nb_steps)]
+
+nle = 50#2 * nb_hidden
+seedIC = seedONS = 1
+subDir = "cLIF_spectrum"
+
+resetSwitch = False
+
+# Optimization setup
+#optimizer = optim.SGD([Win, v1, g], lr=1e-3)
+optimizer = optim.Adam([Win, v1])#, g])
+import matplotlib.pyplot as plt
+
+
+
+import matplotlib.pyplot as plt
+from IPython.display import clear_output
+
+lyapunov_spectrum = calculate_lyapunov_spectrum(1e9, g, x_data[len(x_data)//2:], nle, Win, v1, g)
+# Initialize lists to store the loss and Lyapunov spectrum for plotting
+losses = []
+lyapunov_spectra = []
+
+# Set up the initial plot
+fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))
+
+lyapunov_line, = ax1.plot([], [], "k", label="Lyapunov Spectrum")
+ax1.set_xlabel("Index")
+ax1.set_ylabel("Lyapunov Exponent")
+ax1.set_title("Lyapunov Spectrum")
+ax1.legend()
+
+loss_line, = ax2.plot([], [], "r", label="Loss")
+ax2.set_xlabel("Epoch")
+ax2.set_ylabel("Loss")
+ax2.set_title("Loss over Epochs")
+ax2.legend()
+
+plt.ion() # Turn on interactive mode
+plt.show()
+lyapunov_spectrumInitial=lyapunov_spectrum.detach().cpu().numpy()
+plt.subplot(121)
+plt.plot(lyapunov_spectrumInitial,"r")
+0
+# Training loop
+#Main training loop with profiling
+
+for epoch in range(100):
+ torch.manual_seed(epoch)
+ x_data = [torch.tensor(np.random.rand(nb_inputs) < pSpike, dtype=torch.float32) for _ in range(nb_steps)]
+
+ optimizer.zero_grad()
+
+ lyapunov_spectrum = calculate_lyapunov_spectrum(1e9, g, x_data[len(x_data) // 2:], nle, Win, v1, g)
+
+ # Calculate the loss (mean square of the first nle Lyapunov exponents)
+ loss = torch.mean(lyapunov_spectrum**2)
+ print(f"Epoch {epoch}: Loss = {loss.item()}, Loss dtype = {loss.dtype}")
+
+ # Backward pass: compute gradients
+ loss.backward()
+
+ # Optimization step
+ optimizer.step()
+
+
+
+ # Store the loss and Lyapunov spectrum
+ losses.append(loss.item())
+ lyapunov_spectra.append(lyapunov_spectrum.detach().cpu().numpy())
+
+ if epoch % 10 == 0:
+ print(f"Epoch {epoch}: Loss = {loss.item()}")
+
+ # Update the Lyapunov spectrum plot
+ lyapunov_line.set_ydata(lyapunov_spectrum.detach().cpu().numpy())
+ lyapunov_line.set_xdata(range(len(lyapunov_spectrum)))
+ ax1.relim()
+ ax1.autoscale_view()
+
+ # Update the loss plot
+ loss_line.set_ydata(losses)
+ loss_line.set_xdata(range(len(losses)))
+ ax2.relim()
+ ax2.autoscale_view()
+
+ # Draw the updated plots
+ fig.canvas.draw()
+ fig.canvas.flush_events()
+
+print("Training complete.")
diff --git a/surrogate_flossing/README.md b/surrogate_flossing/README.md
new file mode 100644
index 0000000..40f97aa
--- /dev/null
+++ b/surrogate_flossing/README.md
@@ -0,0 +1,106 @@
+# SurrogateGradientFlossing
+This repository contains the implementation code for the manuscript: <br>
+ __Using Dynamical Systems Theory to Improve Surrogate Gradient Learning in Spiking Neural Networks__ <br>
+
+
+## Overview
+We analyze and optimize gradients of binary and spiking recurrent neural networks using concepts from dynamical systems theory. Specifically, we show that surrogate gradient training can be improved by pushing surrogate Lyapunov exponents to zero during or before training.
+
+## Installation
+
+#### Prerequisites
+- Download [Julia](https://julialang.org/downloads/)
+
+#### Dependencies
+- Julia (1.6)
+- Flux, BackwardsLinalg
+
+## Getting started
+To install the required packages, run the following in the julia REPL after installing Julia:
+
+```
+using Pkg
+
+for pkg in ["Flux", "BackwardsLinalg"]
+ Pkg.add(pkg)
+end
+```
+
+For example, to train a spiking neural network on the delayed XOR task, run:
+```
+include("SurrogateGradientFlossing_ExampleCode.jl")
+# setting parameters:
+N, E, Ef, Ei, Ep, Ni, B, S, T, Tp, Ti, sIC, sIn, sNet, sONS, lr, b1, b2, IC, g, gbar, I1, delay, wsS, wsM, wrS, wrM, bS, bM, nLE, task, intype, Lwnt=
+80, 3001, 100, 500, 500, 2, 16, 1, 300, 55, 300, 1,1,1,1, 0.001f0, 0.9, 0.999, 1, 1.0, 0.0, 1.0,10, 1.0f0, 0.0f0, 1.0f0, 0.0f0, 0.1f0, 0.0f0,75, -1, 3, 0.0
+
+trainSRNNflossing(N, E, Ef, Ei, Ep, Ni, B, S, T, Tp, Ti, sIC, sIn, sNet, sONS, lr, b1, b2, IC, g, gbar, I1, delay, wsS, wsM, wrS, wrM, bS, bM, nLE, task, intype, Lwnt)
+```
+
+## Repository Overview
+_GradientFlossing_ExampleCode.jl_:\
+Example scripts for training networks with gradient flossing before training, with gradient flossing before and during training and without gradient flossing.
+
+
+_GradientFlossing_XOR.jl_:\
+Generates input and target output for copy task and delayed XOR task.
+
+<!---
+
+runOneStimulus.jl trains an RNN on tracking one OU-signal showing that the network becomes more tightly balanced over training epochs.\
+runTwoStimuli.jl trains an RNN on two OU-signal stimuli showing that the network becomes more tightly balanced over training epochs and breaks up into two weakly-connected subnetworks.\
+runTheeStimuli.jl trains an RNN on two OU-signal stimuli showing that the network becomes more tightly balanced over training epochs and breaks up into three weakly-connected subnetworks.\
+![Training RNN on two signals leads to balanced subpopulations](/figures/S=2.svg?raw=true "balanced subnetworks emerge after runTheeStimuli.jl")
+-->
+
+
+<!---
+
+### Training dynamics of eigenvalues:
+Here is a visualization of the recurrent weight matrix and the eigenvalues throughout across training epochs.
+![Training dynamics of networks trained on multiple signals shows first tracking of global mean input](eigenvalue_movie_2D_task.gif)
+-->
+
+
+### Implementation details
+A full specification of packages used and their versions can be found in _packages.txt_ .\
+For learning rates, the default ADAM parameters were used to avoid any impression of fine-tuning.\
+All simulations were run on a single CPU and took on the order of minutes to a few hours.
+
+## Additional results:
+We here provide additional results on surrogate gradient flossing in binary RNNs. The following figures shows that we can manipulate one or several surrogate Lyapunov exponets in binary networks:
+
+**Figure 1: Surrogate gradient flossing** regularizes *surrogate Lyapunov exponents* and facilitates gradient signal propagation in binary neural networks.
+
+![**Surrogate gradient flossing** regularizes *surrogate Lyapunov exponents* and facilitates gradient signal propagation in binary neural networks. **A)** The first *surrogate Lyapunov exponent* of a recurrent binary network plotted as a function of training epochs for different surrogate sharpness $g$. The square of the first *surrogate Lyapunov exponent* is minimized using gradient descent. **B)** *Surrogate Lyapunov spectrum* of a recurrent binary network after different numbers of Lyapunov exponents $k$ have been driven towards zero via *surrogate gradient flossing* for $k\in\{1,16,32\}. The gray lines show the *surrogate Lyapunov spectra* before *surrogate gradient flossing*. Parameters: network size $N=80$, $g=1$ for **B**. Input as in Fig. 3. The thin semitransparent lines in **A** and **B** indicate nine network realizations; the full lines are their average.](./figures/bf_fig02a.png)
+
+
+**A)** The first *surrogate Lyapunov exponent* of a recurrent binary network plotted as a function of training epochs for different surrogate sharpness $g$. The square of the first *surrogate Lyapunov exponent* is minimized using gradient descent.
+
+**B)** *Surrogate Lyapunov spectrum* of a recurrent binary network after different numbers of Lyapunov exponents $k$ have been driven towards zero via *surrogate gradient flossing* for $k\in\{1,16,32\}$. The gray lines show the *surrogate Lyapunov spectra* before *surrogate gradient flossing*. Parameters: network size $N=80$, $g=1$ for **B**. Input as in Fig. 3. The thin semitransparent lines in **A** and **B** indicate nine network realizations; the full lines are their average.
+
+The following figure shows that surrogate gradient flossing improves training in binary RNNs:
+
+**Figure 2: Surrogate gradient flossing improves binary RNN training.**
+
+![**Gradient flossing** improves binary RNN training. **A)** Test accuracy for binary RNNs trained on the delayed temporal binary XOR task $y_t=x_{t-d/2} \oplus x_{t-d}$ with *adaptive gradient flossing* during training (orange) and without *gradient flossing* (blue) for $d=18$. Solid lines are the median across 9 network realizations, and individual network realizations are shown in transparent fine lines. **B)** Mean final test accuracy as a function of task difficulty (delay $d$) for delayed XOR task. **C)** Gradient norm with respect to initial network state $\mathbf{h}_0$. **D)** Gradient norm with respect to initial network state as a function of temporal task complexity $T$ averaged over training epochs.](./figures/bf_fig03a.png)
+
+
+**A)** Test accuracy for binary RNNs trained on the delayed temporal binary XOR task $y_t=x_{t-d/2} \oplus x_{t-d}$ with *adaptive gradient flossing* during training (orange) and without *gradient flossing* (blue) for $d=18$. Solid lines are the median across 9 network realizations, and individual network realizations are shown in transparent fine lines.
+
+**B)** Mean final test accuracy as a function of task difficulty (delay $d$) for delayed XOR task.
+
+**C)** Gradient norm with respect to initial network state $\mathbf{h}_0$.
+
+**D)** Gradient norm with respect to initial network state as a function of temporal task complexity averaged over training epochs.
+
+
+<!---
+### figures/
+Contains all figures of the main text and the supplement.
+-->
+
+
+<!---
+### tex/
+Contains the raw text of the main text and the supplement.
+-->
diff --git a/surrogate_flossing/figures/binary_surrogate_gradient_flossing.md b/surrogate_flossing/figures/binary_surrogate_gradient_flossing.md
new file mode 100644
index 0000000..abc0c63
--- /dev/null
+++ b/surrogate_flossing/figures/binary_surrogate_gradient_flossing.md
@@ -0,0 +1,2 @@
+
+**Figure:** Surrogate gradient flossing regularizes surrogate Lyapunov exponents and facilitates gradient signal propagation in binary neural networks.
diff --git a/toy/toy_transient_chaos.py b/toy/toy_transient_chaos.py
new file mode 100644
index 0000000..7a6c6bd
--- /dev/null
+++ b/toy/toy_transient_chaos.py
@@ -0,0 +1,112 @@
+"""Minimal analytically-grounded toy reproducing 'failure trajectories are more chaotic'.
+
+Framing: recursive reasoning = chaotic SEARCH of latent space until the trajectory lands in the
+solution basin, then it converges (the answer is a stable fixed point). At a fixed readout time T:
+ - captured by T => SUCCESS, finite-time Lyapunov exponent (FTLE) is low (chaotic search for a
+ while, then contraction).
+ - not captured => FAILURE, FTLE stays at the chaotic-saddle value.
+
+Map on [0,1] (input s = solution location, drawn per 'puzzle'):
+ search phase: x <- 4 x (1-x) (fully chaotic, lambda = ln 2 ~ +0.693)
+ capture: if |x - s| < eps -> converging phase: x <- s + 0.5 (x - s) (lambda = ln 0.5 < 0)
+
+This is a textbook chaotic-saddle / transient-chaos system: escape (capture) times are ~geometric,
+and FTLE over [0,T] = (ln2 * t_search + ln0.5 * (T - t_search)) / T. So FTLE separates outcome
+PURELY as a finite-time artifact of capture time -- matching our real-model findings:
+ (i) failure more chaotic, (ii) separation is concurrent/finite-time (vanishes as T->inf since
+ everyone eventually captures), (iii) it is convergence detection, not a deep correctness signal.
+
+Run: python toy_transient_chaos.py (CPU, seconds). Produces toy_transient_chaos.png + stats.
+"""
+from __future__ import annotations
+from pathlib import Path
+import numpy as np
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+
+HERE = Path(__file__).resolve().parent
+rng = np.random.default_rng(0)
+
+
+def run(n=20000, T=16, eps=0.04, seed=0):
+ rg = np.random.default_rng(seed)
+ s = rg.uniform(0.15, 0.85, n) # 'puzzle': solution location
+ x = rg.uniform(0, 1, n) # initial latent
+ captured = np.zeros(n, bool)
+ cap_time = np.full(n, T, int)
+ log_deriv_sum = np.zeros(n)
+ drift = np.zeros((n, T))
+ for t in range(T):
+ x_prev = x.copy()
+ search = ~captured
+ # log|f'| this step
+ ld = np.empty(n)
+ ld[search] = np.log(np.abs(4 * (1 - 2 * x[search])) + 1e-12)
+ ld[~search] = np.log(0.5)
+ # update
+ x_new = x.copy()
+ x_new[search] = 4 * x[search] * (1 - x[search])
+ x_new[~search] = s[~search] + 0.5 * (x[~search] - s[~search])
+ # capture check (enter basin during search)
+ newly = search & (np.abs(x_new - s) < eps)
+ captured |= newly
+ cap_time[newly] = t + 1
+ x = x_new
+ log_deriv_sum += ld
+ drift[:, t] = np.abs(x - x_prev)
+ ftle = log_deriv_sum / T
+ success = captured & (np.abs(x - s) < 0.05) # near solution at readout
+ return dict(ftle=ftle, success=success, cap_time=cap_time, drift=drift, T=T, eps=eps)
+
+
+def auc(score, y):
+ p, n = score[y == 1], score[y == 0]
+ if len(p) == 0 or len(n) == 0:
+ return float("nan")
+ a = np.concatenate([p, n]); o = np.argsort(a); r = np.empty(len(a)); r[o] = np.arange(1, len(a) + 1)
+ return float((r[:len(p)].sum() - len(p) * (len(p) + 1) / 2) / (len(p) * len(n)))
+
+
+def main():
+ d = run(T=16)
+ y = d["success"].astype(int); ftle = d["ftle"]
+ print(f"=== Toy transient-chaos (T=16, eps={d['eps']}) ===")
+ print(f"success rate = {y.mean():.3f}")
+ print(f"FTLE: success median={np.median(ftle[y==1]):+.3f} failure median={np.median(ftle[y==0]):+.3f}")
+ print(f"AUC(-FTLE -> success) = {auc(-ftle, y):.3f} (>0.9 => reproduces 'failure more chaotic')")
+ late_drift = d["drift"][:, -4:].mean(1)
+ print(f"late drift: success median={np.median(late_drift[y==1]):.3f} failure median={np.median(late_drift[y==0]):.3f}")
+ print(f"AUC(-late_drift -> success) = {auc(-late_drift, y):.3f}")
+
+ # FTLE separation vs readout time T (the finite-time / concurrent-not-antecedent prediction)
+ Ts = [4, 8, 16, 32, 64, 128, 256]
+ seps, accs = [], []
+ for T in Ts:
+ dd = run(T=T)
+ yy = dd["success"].astype(int)
+ seps.append(auc(-dd["ftle"], yy)); accs.append(yy.mean())
+ print("\nFTLE-AUC and success-rate vs readout time T (separation is a finite-time effect):")
+ for T, a, ac in zip(Ts, seps, accs):
+ print(f" T={T:>3}: AUC(-FTLE->success)={a:.3f} success_rate={ac:.3f}")
+
+ fig, ax = plt.subplots(1, 3, figsize=(15, 4.2))
+ bins = np.linspace(-0.5, 0.75, 50)
+ ax[0].hist(ftle[y == 1], bins=bins, alpha=0.6, color="tab:green", density=True, label=f"success (n={int(y.sum())})")
+ ax[0].hist(ftle[y == 0], bins=bins, alpha=0.6, color="tab:red", density=True, label=f"failure (n={int((1-y).sum())})")
+ ax[0].axvline(0, color="gray", lw=0.6); ax[0].set_xlabel("finite-time Lyapunov exponent (T=16)")
+ ax[0].set_title(f"Toy: failure more chaotic\nAUC={auc(-ftle,y):.3f}"); ax[0].legend(fontsize=8)
+ ax[1].hist(d["cap_time"][d["cap_time"] < 16], bins=range(0, 17), color="tab:blue", alpha=0.8)
+ ax[1].set_xlabel("capture (escape) time"); ax[1].set_ylabel("count")
+ ax[1].set_title("escape-time distribution (~geometric:\nchaotic-saddle signature)")
+ ax[2].plot(Ts, seps, "o-", label="AUC(-FTLE->success)")
+ ax[2].plot(Ts, accs, "s--", color="tab:purple", label="success rate")
+ ax[2].set_xscale("log"); ax[2].axhline(0.5, color="gray", lw=0.5)
+ ax[2].set_xlabel("readout time T"); ax[2].set_title("separation is FINITE-TIME:\nvanishes as T->inf (all escape)")
+ ax[2].legend(fontsize=8)
+ fig.tight_layout(); fig.savefig(HERE / "toy_transient_chaos.png", dpi=140)
+ print("\nsaved toy_transient_chaos.png")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/track_problem_learning.py b/track_problem_learning.py
new file mode 100644
index 0000000..d811bfd
--- /dev/null
+++ b/track_problem_learning.py
@@ -0,0 +1,375 @@
+"""Track individual test problems across HRM/TRM checkpoint diagnostics.
+
+The HRM and TRM diagnostic runs in this workspace reuse the same 512 test
+indices across checkpoints. This script turns those panel diagnostics into a
+per-problem learning-order view: first success, stable success, regressions,
+and spectrum changes around fail->success / success->fail transitions.
+"""
+from __future__ import annotations
+
+import argparse
+import csv
+import math
+import re
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+ROOT = Path("/home/yurenh2/rrm/research/flossing")
+DATA_ROOT = Path("/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000/test")
+
+
+def parse_step(path: Path, kind: str) -> int:
+ if kind == "HRM":
+ return int(re.search(r"step_(\d+)_512", path.name).group(1))
+ return int(re.search(r"step(\d+)_512", path.name).group(1))
+
+
+def step_to_epoch(step: int, kind: str) -> int:
+ if kind == "HRM":
+ return int(round(step * 20000 / 26040))
+ # TRM file names correspond to 5k, 10k, ... 50k epochs.
+ return int(round(step * 5000 / 26041))
+
+
+def discover(kind: str) -> list[Path]:
+ if kind == "HRM":
+ files = sorted(ROOT.glob("diag_hrm_step_*_512.npz"), key=lambda p: parse_step(p, kind))
+ else:
+ files = sorted(ROOT.glob("diag_trm_singleGPU_step*_512.npz"), key=lambda p: parse_step(p, kind))
+ if not files:
+ raise FileNotFoundError(f"No {kind} diagnostics found")
+ return files
+
+
+def sorted_features(lyap: np.ndarray) -> dict[str, np.ndarray]:
+ s = np.sort(lyap, axis=1)[:, ::-1]
+ pos = np.clip(s, 0, None)
+ return {
+ "lambda_max": s[:, 0],
+ "lambda_2": s[:, 1],
+ "lambda_min8": s[:, -1],
+ "mean8": s.mean(axis=1),
+ "tail_mean_5_8": s[:, 4:].mean(axis=1),
+ "positive_sum": pos.sum(axis=1),
+ "positive_count": (s > 0).sum(axis=1).astype(float),
+ "spread": s[:, 0] - s[:, -1],
+ }
+
+
+def write_csv(path: Path, rows: list[dict]) -> None:
+ if not rows:
+ return
+ keys: list[str] = []
+ for row in rows:
+ for key in row:
+ if key not in keys:
+ keys.append(key)
+ with path.open("w", newline="") as f:
+ writer = csv.DictWriter(f, fieldnames=keys)
+ writer.writeheader()
+ writer.writerows(rows)
+
+
+def classify(success: np.ndarray) -> tuple[str, int | None, int | None, int, int]:
+ """Return group, first_success_idx, stable_success_idx, n_success, transitions."""
+ success = np.asarray(success, dtype=bool)
+ n_success = int(success.sum())
+ transitions = int(np.abs(np.diff(success.astype(int))).sum())
+ first_success_idx = int(np.argmax(success)) if success.any() else None
+
+ stable_success_idx = None
+ for i in range(len(success)):
+ if success[i:].all():
+ stable_success_idx = i
+ break
+
+ if n_success == 0:
+ group = "never"
+ elif n_success == len(success):
+ group = "always"
+ elif stable_success_idx is not None:
+ if transitions <= 1:
+ group = "stable_learned"
+ else:
+ group = "flaky_then_stable"
+ else:
+ group = "transient_or_regressed"
+ return group, first_success_idx, stable_success_idx, n_success, transitions
+
+
+def load_panel(kind: str):
+ files = discover(kind)
+ steps = np.array([parse_step(p, kind) for p in files], dtype=int)
+ epochs = np.array([step_to_epoch(s, kind) for s in steps], dtype=int)
+
+ idx0 = np.load(files[0])["idx"]
+ exact_list = []
+ token_list = []
+ feature_time: dict[str, list[np.ndarray]] = {}
+ spectra = []
+
+ for path in files:
+ data = np.load(path)
+ idx = data["idx"]
+ if not np.array_equal(idx0, idx):
+ raise ValueError(f"{kind} diagnostics do not share the same idx array: {path}")
+ exact_list.append(data["exact_correct"].astype(bool))
+ token_list.append(data["token_acc"].astype(float))
+ lyap = data["lyap_spec"].astype(float)
+ spectra.append(np.sort(lyap, axis=1)[:, ::-1])
+ for name, arr in sorted_features(lyap).items():
+ feature_time.setdefault(name, []).append(arr)
+
+ exact = np.stack(exact_list, axis=1) # (N, T)
+ token_acc = np.stack(token_list, axis=1) # (N, T)
+ spectrum = np.stack(spectra, axis=1) # (N, T, K)
+ features = {k: np.stack(v, axis=1) for k, v in feature_time.items()}
+ return files, idx0.astype(int), steps, epochs, exact, token_acc, spectrum, features
+
+
+def add_problem_features(rows: list[dict], idx: np.ndarray) -> None:
+ inputs = np.load(DATA_ROOT / "all__inputs.npy", mmap_mode="r")
+ labels = np.load(DATA_ROOT / "all__labels.npy", mmap_mode="r")
+ sample_inputs = inputs[idx]
+ sample_labels = labels[idx]
+ # In these Sudoku tensors, 0 is padding, 1 is the blank-cell token, and
+ # digits are represented by 2..10.
+ givens = (sample_inputs > 1).sum(axis=1)
+ blanks = (sample_inputs == 1).sum(axis=1)
+ label_nonzero = (sample_labels > 0).sum(axis=1)
+ for row, g, b, lnz in zip(rows, givens, blanks, label_nonzero):
+ row["givens"] = int(g)
+ row["blanks"] = int(b)
+ row["label_nonzero"] = int(lnz)
+
+
+def analyze_kind(kind: str, out_dir: Path) -> dict:
+ _, idx, steps, epochs, exact, token_acc, spectrum, features = load_panel(kind)
+ n, t = exact.shape
+
+ problem_rows: list[dict] = []
+ for i in range(n):
+ group, first_i, stable_i, n_success, transitions = classify(exact[i])
+ row = {
+ "kind": kind,
+ "panel_row": i,
+ "test_idx": int(idx[i]),
+ "group": group,
+ "n_success_checkpoints": n_success,
+ "transitions": transitions,
+ "first_success_step": int(steps[first_i]) if first_i is not None else "",
+ "first_success_epoch": int(epochs[first_i]) if first_i is not None else "",
+ "stable_success_step": int(steps[stable_i]) if stable_i is not None else "",
+ "stable_success_epoch": int(epochs[stable_i]) if stable_i is not None else "",
+ "final_success": bool(exact[i, -1]),
+ "final_token_acc": float(token_acc[i, -1]),
+ "final_lambda_max": float(features["lambda_max"][i, -1]),
+ "final_mean8": float(features["mean8"][i, -1]),
+ "final_tail_mean_5_8": float(features["tail_mean_5_8"][i, -1]),
+ "final_positive_count": float(features["positive_count"][i, -1]),
+ "final_positive_sum": float(features["positive_sum"][i, -1]),
+ }
+ for j, step in enumerate(steps):
+ row[f"success@{step}"] = int(exact[i, j])
+ row[f"lambda_max@{step}"] = float(features["lambda_max"][i, j])
+ row[f"mean8@{step}"] = float(features["mean8"][i, j])
+ row[f"positive_count@{step}"] = float(features["positive_count"][i, j])
+ problem_rows.append(row)
+ add_problem_features(problem_rows, idx)
+ write_csv(out_dir / f"{kind.lower()}_problem_tracks.csv", problem_rows)
+
+ event_rows: list[dict] = []
+ for j in range(1, t):
+ prev = exact[:, j - 1]
+ cur = exact[:, j]
+ for event_name, mask in [
+ ("learned_fail_to_success", (~prev) & cur),
+ ("lost_success_to_fail", prev & (~cur)),
+ ("stayed_failure", (~prev) & (~cur)),
+ ("stayed_success", prev & cur),
+ ]:
+ if not mask.any():
+ continue
+ row = {
+ "kind": kind,
+ "from_step": int(steps[j - 1]),
+ "to_step": int(steps[j]),
+ "from_epoch": int(epochs[j - 1]),
+ "to_epoch": int(epochs[j]),
+ "event": event_name,
+ "n": int(mask.sum()),
+ "from_token_acc_mean": float(token_acc[mask, j - 1].mean()),
+ "to_token_acc_mean": float(token_acc[mask, j].mean()),
+ }
+ for feat_name in ["lambda_max", "mean8", "tail_mean_5_8", "positive_count", "positive_sum"]:
+ before = features[feat_name][mask, j - 1]
+ after = features[feat_name][mask, j]
+ row[f"{feat_name}_before"] = float(before.mean())
+ row[f"{feat_name}_after"] = float(after.mean())
+ row[f"{feat_name}_delta"] = float((after - before).mean())
+ event_rows.append(row)
+ write_csv(out_dir / f"{kind.lower()}_learning_events.csv", event_rows)
+
+ group_rows: list[dict] = []
+ groups = sorted(set(r["group"] for r in problem_rows))
+ for group in groups:
+ mask = np.array([r["group"] == group for r in problem_rows])
+ row = {
+ "kind": kind,
+ "group": group,
+ "n": int(mask.sum()),
+ "fraction": float(mask.mean()),
+ "givens_mean": float(np.mean([r["givens"] for r, m in zip(problem_rows, mask) if m])),
+ "final_success_rate": float(exact[mask, -1].mean()),
+ "final_token_acc_mean": float(token_acc[mask, -1].mean()),
+ }
+ for feat_name in ["lambda_max", "mean8", "tail_mean_5_8", "positive_count", "positive_sum"]:
+ row[f"initial_{feat_name}"] = float(features[feat_name][mask, 0].mean())
+ row[f"final_{feat_name}"] = float(features[feat_name][mask, -1].mean())
+ group_rows.append(row)
+ write_csv(out_dir / f"{kind.lower()}_learning_groups.csv", group_rows)
+
+ plot_problem_heatmap(kind, steps, epochs, exact, features, problem_rows, out_dir)
+ plot_group_dynamics(kind, steps, epochs, exact, features, problem_rows, out_dir)
+ plot_event_deltas(kind, event_rows, out_dir)
+
+ return {
+ "kind": kind,
+ "steps": steps.tolist(),
+ "epochs": epochs.tolist(),
+ "n": n,
+ "group_rows": group_rows,
+ "event_rows": event_rows,
+ }
+
+
+def sort_order(problem_rows: list[dict], exact: np.ndarray) -> np.ndarray:
+ def key(i: int):
+ r = problem_rows[i]
+ first = r["first_success_epoch"]
+ stable = r["stable_success_epoch"]
+ first_val = int(first) if first != "" else 10**9
+ stable_val = int(stable) if stable != "" else 10**9
+ return (stable_val, first_val, -r["n_success_checkpoints"], r["transitions"], r["test_idx"])
+
+ return np.array(sorted(range(len(problem_rows)), key=key), dtype=int)
+
+
+def plot_problem_heatmap(
+ kind: str,
+ steps: np.ndarray,
+ epochs: np.ndarray,
+ exact: np.ndarray,
+ features: dict[str, np.ndarray],
+ problem_rows: list[dict],
+ out_dir: Path,
+) -> None:
+ order = sort_order(problem_rows, exact)
+ fig, axes = plt.subplots(1, 4, figsize=(14, 8), sharey=True)
+ matrices = [
+ ("success", exact.astype(float), "Greens", 0, 1),
+ ("λmax", features["lambda_max"], "coolwarm", -0.12 if kind == "TRM" else -0.25, 0.12),
+ ("mean8", features["mean8"], "coolwarm", -0.16 if kind == "TRM" else -0.28, 0.08),
+ ("# positive", features["positive_count"], "magma", 0, 8),
+ ]
+ for ax, (title, mat, cmap, vmin, vmax) in zip(axes, matrices):
+ im = ax.imshow(mat[order], aspect="auto", interpolation="nearest", cmap=cmap, vmin=vmin, vmax=vmax)
+ ax.set_title(title)
+ ax.set_xticks(range(len(epochs)))
+ ax.set_xticklabels(epochs, rotation=45, ha="right")
+ ax.set_xlabel("epoch")
+ fig.colorbar(im, ax=ax, fraction=0.045, pad=0.02)
+ axes[0].set_ylabel("test problems sorted by stable learning time")
+ fig.suptitle(f"{kind}: individual problem learning tracks on matched 512-test panel")
+ fig.tight_layout(rect=[0, 0, 1, 0.96])
+ fig.savefig(out_dir / f"{kind.lower()}_problem_track_heatmap.png", dpi=150)
+ plt.close(fig)
+
+
+def plot_group_dynamics(
+ kind: str,
+ steps: np.ndarray,
+ epochs: np.ndarray,
+ exact: np.ndarray,
+ features: dict[str, np.ndarray],
+ problem_rows: list[dict],
+ out_dir: Path,
+) -> None:
+ groups = ["always", "stable_learned", "flaky_then_stable", "transient_or_regressed", "never"]
+ colors = {
+ "always": "C2",
+ "stable_learned": "C0",
+ "flaky_then_stable": "C4",
+ "transient_or_regressed": "C1",
+ "never": "C3",
+ }
+ fig, axes = plt.subplots(2, 2, figsize=(12, 8), sharex=True)
+ for group in groups:
+ mask = np.array([r["group"] == group for r in problem_rows])
+ if not mask.any():
+ continue
+ label = f"{group} n={mask.sum()}"
+ axes[0, 0].plot(epochs, exact[mask].mean(axis=0), "-o", color=colors[group], label=label)
+ axes[0, 1].plot(epochs, features["lambda_max"][mask].mean(axis=0), "-o", color=colors[group])
+ axes[1, 0].plot(epochs, features["mean8"][mask].mean(axis=0), "-o", color=colors[group])
+ axes[1, 1].plot(epochs, features["positive_count"][mask].mean(axis=0), "-o", color=colors[group])
+ axes[0, 0].set_title("success rate")
+ axes[0, 1].set_title("λmax")
+ axes[1, 0].set_title("mean top-8")
+ axes[1, 1].set_title("# positive among top 8")
+ for ax in axes.flat:
+ ax.axhline(0, color="k", lw=0.8, alpha=0.25)
+ ax.grid(alpha=0.3)
+ ax.set_xlabel("epoch")
+ axes[0, 0].legend(fontsize=8, loc="best")
+ fig.suptitle(f"{kind}: dynamics by per-problem learning class")
+ fig.tight_layout(rect=[0, 0, 1, 0.96])
+ fig.savefig(out_dir / f"{kind.lower()}_learning_group_dynamics.png", dpi=150)
+ plt.close(fig)
+
+
+def plot_event_deltas(kind: str, event_rows: list[dict], out_dir: Path) -> None:
+ learn = [r for r in event_rows if r["event"] == "learned_fail_to_success"]
+ if not learn:
+ return
+ epochs = [r["to_epoch"] for r in learn]
+ fig, ax = plt.subplots(figsize=(10, 4))
+ for feat in ["lambda_max", "mean8", "tail_mean_5_8", "positive_count"]:
+ ax.plot(epochs, [r[f"{feat}_delta"] for r in learn], "-o", label=feat)
+ ax.axhline(0, color="k", lw=0.8, alpha=0.35)
+ ax.set_title(f"{kind}: mean spectrum change on fail→success transitions")
+ ax.set_xlabel("to epoch")
+ ax.set_ylabel("after - before")
+ ax.legend(ncol=2, fontsize=8)
+ ax.grid(alpha=0.3)
+ fig.tight_layout()
+ fig.savefig(out_dir / f"{kind.lower()}_learn_event_deltas.png", dpi=150)
+ plt.close(fig)
+
+
+def main() -> None:
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--out-dir", default=str(ROOT / "problem_tracks"))
+ args = parser.parse_args()
+ out_dir = Path(args.out_dir)
+ out_dir.mkdir(parents=True, exist_ok=True)
+
+ for kind in ["HRM", "TRM"]:
+ result = analyze_kind(kind, out_dir)
+ print(f"\n{kind}: N={result['n']} checkpoints={len(result['steps'])}")
+ for row in result["group_rows"]:
+ print(
+ f" {row['group']:<22s} n={row['n']:>3d} "
+ f"final_acc={row['final_success_rate']:.3f} "
+ f"final_mean8={row['final_mean8']:+.4f} "
+ f"final_poscnt={row['final_positive_count']:.2f}"
+ )
+
+ print(f"\nWrote {out_dir}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/trajectory_augmentation_notes.md b/trajectory_augmentation_notes.md
new file mode 100644
index 0000000..0460bf7
--- /dev/null
+++ b/trajectory_augmentation_notes.md
@@ -0,0 +1,190 @@
+# Trajectory Augmentation Notes
+
+## Current Hypothesis
+
+Use Lyapunov-style tiny perturbations of the initial recurrent latent state as
+hidden-trajectory augmentation:
+
+```text
+x, y unchanged
+z0 -> z0 + epsilon
+loss = original supervised ACT/QA loss against y
+```
+
+This tests task-level attractor stability directly: if the dynamics are
+chaotic, a tiny perturbation of the initial trajectory should miss the correct
+answer basin; training forces perturbed trajectories for the same ground-truth
+pair to still reach `y`.
+
+## Running First
+
+Initial queued runs use:
+
+```text
+sigma = 1e-3
+perturb = z_H and z_L
+single_perturbed_ce: one perturbed trajectory, no clean branch
+multi_perturbed_ce: clean plus three perturbed trajectories, averaged CE
+```
+
+No KL, no Lyapunov loss, no JVP/flossing, no data/input augmentation.
+
+## Backup Experiments
+
+Do not conclude from one noise scale. If the current runs fail or are
+ambiguous, test a noise curriculum:
+
+```text
+clean CE warmup for N steps
+then enable trajectory augmentation
+sigma ramp: 0 -> target_sigma
+```
+
+Candidate fixed/ramp target scales:
+
+```text
+1e-5, 3e-5, 1e-4, 3e-4, 1e-3, 3e-3, 1e-2
+```
+
+Instead of treating `sigma` as one fixed value, also test centered noise
+distributions around the no-perturbation trajectory:
+
+```text
+epsilon ~ Normal(0, sigma^2 I)
+sigma sampled per trajectory from LogUniform(sigma_min, sigma_max)
+sigma ramped over training, then sampled in a band around the target
+mixture: p(clean) delta_0 + (1 - p(clean)) Normal(0, sigma^2 I)
+```
+
+The distribution should remain centered at zero so the clean trajectory is the
+mean trajectory. The goal is to cover a small ball/shell around `z0`, not to
+move the model to a new deterministic offset.
+
+Also compare perturb locations:
+
+```text
+z_H only
+z_L only
+z_H and z_L
+```
+
+The expected success signature is not necessarily all `lambda1 < 0`; better
+signals are improved perturbed-rollout success, fewer broad positive modes,
+and deterministic clean accuracy not regressing.
+
+## Long-Train Engineering Note
+
+`step9_trajectory_perturb_train.py` now supports two rollout implementations:
+
+```text
+serial_act:
+ old path; K trajectories are rolled out one by one through the ACT wrapper
+
+parallel_fixed:
+ B -> B*K
+ first rollout is clean for multi_perturbed_ce
+ remaining rollouts sample centered perturbations
+ run exactly halt_max_steps without ACT streaming reset
+ average supervised loss across B*K trajectories
+```
+
+`parallel_fixed` is the default for new runs. It deliberately avoids the ACT
+wrapper's halted-sample reset, because reset would make early-halted copies of
+the same `(x,y)` repeat inside one optimizer step.
+
+Preferred options:
+
+```text
+1. Fixed-unroll multiK:
+ B -> B*K
+ repeat x,y K times
+ initialize clean/noisy z0 variants
+ run exactly halt_max_steps for all trajectories
+ compute supervised loss on fixed rollout outputs
+```
+
+This is simplest and matches the stability question: all initial-neighborhood
+trajectories should reach `y` after the same reasoning budget.
+
+```text
+2. ACT-mask multiK:
+ B -> B*K
+ repeat x,y K times
+ run ACT in parallel
+ maintain active_mask
+ after a trajectory halts, zero/mask later loss contributions
+ normalize per trajectory or by valid trajectory-steps
+```
+
+Do not naively concatenate `B*K` and use the unmodified ACT streaming semantics
+without masking. The wrapper resets halted samples and reloads data, which is
+correct for ordinary streaming training but would make early-halted copies of
+the same `(x,y)` repeat inside one optimizer step.
+
+## Current Long Runs
+
+Started 2026-05-27:
+
+```text
+step9_E_hrm_baseline_parallel_fixed_26040_50k
+step9_F_hrm_multi4_loguniform_ramp_26040_50k
+step9_G_trm_baseline_parallel_fixed_26041_batch4_50k
+step9_H_trm_multi4_loguniform_ramp_26041_batch4_50k
+```
+
+Perturb runs use:
+
+```text
+K = 4
+noise_sampling = loguniform
+sigma interval final = [3e-5, 3e-3]
+sigma ramp = 0 -> final interval over 5000 steps
+perturb = z_H and z_L
+eval_every = 2500
+eval_n = 1024
+save_every_eval + save_best + save_final
+```
+
+These runs include fixed-unroll baselines because the fixed-unroll objective is
+not identical to the old ACT-streaming baseline.
+
+## Long-Run Result Snapshot
+
+Completed 2026-05-27:
+
+```text
+HRM fixed baseline:
+ initial 0.5176, best 0.6328 @ 5000, final 0.5801
+
+HRM multi4 loguniform:
+ initial 0.5176, best 0.6250 @ 7500, final 0.5889
+
+TRM fixed baseline:
+ initial 0.5615, best 0.5947 @ 22500, final 0.4971
+
+TRM multi4 loguniform:
+ initial 0.5615, best 0.6084 @ 42500, final 0.5508
+```
+
+Interpretation: HRM did not get a best-accuracy gain from multi4, though final
+accuracy decayed slightly less. TRM did get both a higher best and a much less
+bad final decay under multi4, matching the earlier 10k signal that trajectory
+augmentation may raise the ceiling or reduce regression, but is still unstable.
+
+## Resume Support
+
+Model-only resume has always worked if `--ckpt-root` points to the original
+config directory and `--ckpt-name` is an absolute path to a saved `best.pt` or
+`final.pt`.
+
+Exact training-state resume is now supported:
+
+```text
+--save-train-state
+ writes latest_state.pt, best_state.pt, final_state.pt
+
+--resume-state path/to/latest_state.pt
+ restores model weights, optimizer state, current train_step, best_acc, and RNG
+```
+
+The launcher now enables `--save-train-state` for future long runs.
diff --git a/watch_ptrm_gbs768_compare.sh b/watch_ptrm_gbs768_compare.sh
new file mode 100755
index 0000000..b4809a2
--- /dev/null
+++ b/watch_ptrm_gbs768_compare.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+cd /home/yurenh2/rrm
+
+BASE="research/flossing/ptrm_official_gbs768_base58590_k100_d64_sigma03_Lonly_n1000_seed0.npz"
+TEST="research/flossing/ptrm_official_gbs768_multi4_35805_k100_d64_sigma03_Lonly_n1000_seed0.npz"
+OUT="research/flossing/ptrm_gbs768_base_vs_multi4_k100_d64_sigma03_Lonly_n1000_seed0"
+PY="/home/yurenh2/miniconda3/envs/rrm/bin/python"
+
+echo "[watch] waiting for PTRM caches"
+while [[ ! -s "${BASE}" || ! -s "${TEST}" ]]; do
+ date "+[watch] %F %T base=$(test -s "${BASE}" && echo yes || echo no) test=$(test -s "${TEST}" && echo yes || echo no)"
+ sleep 120
+done
+
+echo "[watch] both caches exist; running paired comparison"
+"${PY}" research/flossing/compare_ptrm_rollout_counts.py \
+ --base "${BASE}" \
+ --test "${TEST}" \
+ --base-label base \
+ --test-label multi4 \
+ --out-prefix "${OUT}"
+
+echo "[watch] done"
+ls -lh "${OUT}".summary.csv "${OUT}".hist.csv "${OUT}".kcurve.csv
diff --git a/watch_trm_directional_multi4_long.sh b/watch_trm_directional_multi4_long.sh
new file mode 100755
index 0000000..67531b7
--- /dev/null
+++ b/watch_trm_directional_multi4_long.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+RUN_NAME="pretrain_mlp_t_sudoku_directional_multi4_parallel_c4_eps003_sigma003"
+CKPT_DIR="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/${RUN_NAME}"
+TRAIN_LOG="/home/yurenh2/rrm/research/flossing/trm_directional_multi4_long_gpu3.log"
+STATUS_LOG="/home/yurenh2/rrm/research/flossing/trm_directional_multi4_long_status.log"
+
+last_step=""
+while true; do
+ ts="$(date '+%Y-%m-%d %H:%M:%S')"
+ if pgrep -af "${RUN_NAME}" >/tmp/trm_directional_pgrep.txt; then
+ proc="running"
+ else
+ proc="stopped"
+ fi
+
+ latest_step="$(find "${CKPT_DIR}" -maxdepth 1 -type f -name 'step_*' -printf '%f\n' 2>/dev/null | sort -V | tail -1 || true)"
+ gpu_line="$(nvidia-smi --query-gpu=index,memory.used,utilization.gpu --format=csv,noheader,nounits | awk -F', ' '$1==3 {print "gpu3_mem_mb="$2" gpu3_util="$3"%"}')"
+ progress="$(grep -ao '[0-9]\\+/65104' "${TRAIN_LOG}" 2>/dev/null | tail -1 || true)"
+
+ if [[ "${latest_step}" != "${last_step}" ]]; then
+ last_step="${latest_step}"
+ {
+ echo "${ts} proc=${proc} latest_ckpt=${latest_step:-none} progress=${progress:-unknown} ${gpu_line:-gpu3_unknown}"
+ tail -20 "${TRAIN_LOG}" 2>/dev/null || true
+ echo
+ } >> "${STATUS_LOG}"
+ else
+ echo "${ts} proc=${proc} latest_ckpt=${latest_step:-none} progress=${progress:-unknown} ${gpu_line:-gpu3_unknown}" >> "${STATUS_LOG}"
+ fi
+
+ [[ "${proc}" == "running" ]] || break
+ sleep 300
+done