From a6ec4288a2232988b130b2f00bb2565f81706966 Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Mon, 29 Jun 2026 12:15:51 -0500 Subject: Recursive reasoning dynamics: analysis pipeline, paper drafts, toy models Failure=more-chaotic (task-general under validity labeling) reduces to convergence/completeness detection; mechanism (transient chaos vs multistability vs input-induced) under investigation. Co-Authored-By: Claude Fable 5 --- .gitignore | 12 + README.md | 32 + analysis_2x2/OBSERVATIONS.md | 155 ++ analysis_2x2/analyze_2x2.py | 350 ++++ analysis_2x2/analyze_clv.py | 72 + analysis_2x2/analyze_early_connectivity.py | 77 + analysis_2x2/analyze_maze_connectivity.py | 87 + analysis_2x2/analyze_phase1.py | 126 ++ analysis_2x2/analyze_solution_space.py | 95 + analysis_2x2/cells_hrm26040_joint_n2048.csv | 5 + analysis_2x2/cells_hrm26040_n8192.csv | 5 + analysis_2x2/cells_trm_official58590_n2048.csv | 5 + .../cells_trm_official_gbs768_step58590_n512.csv | 5 + .../cells_trm_singleGPU_step130205_n512.csv | 5 + .../cells_trm_singleGPU_step260410_n512.csv | 5 + analysis_2x2/cells_trm_step13020_n512.csv | 5 + .../characterization/characterization_results.md | 70 + analysis_2x2/characterize_separation.py | 182 ++ analysis_2x2/dump_early_ckpts.sh | 20 + analysis_2x2/early_pairing_hrm26040_joint.md | 17 + analysis_2x2/early_pairing_trm_official58590.md | 17 + analysis_2x2/early_window_pairing.py | 85 + analysis_2x2/evolution_hrm.csv | 11 + analysis_2x2/evolution_trm.csv | 11 + analysis_2x2/offline_followups.py | 229 +++ analysis_2x2/offline_followups/followups.md | 88 + analysis_2x2/offline_followups/phase1_e1.md | 27 + analysis_2x2/offline_phase1_e1.py | 123 ++ analysis_2x2/phase1/phase1_results.md | 48 + analysis_2x2/queue_maze_followups.sh | 50 + analysis_2x2/results.md | 69 + analysis_2x2/run_clv_geom.sh | 23 + analysis_2x2/run_maze_diag_queue.sh | 40 + analysis_2x2/run_phase1_queue.sh | 92 + analysis_2x2/run_retest_2x2.sh | 75 + analysis_2x2/sweep_hrm26040_joint_n2048.csv | 20 + analysis_2x2/sweep_hrm26040_n8192.csv | 20 + analysis_2x2/sweep_trm_official58590_n2048.csv | 20 + .../sweep_trm_official_gbs768_step58590_n512.csv | 20 + .../sweep_trm_singleGPU_step130205_n512.csv | 20 + .../sweep_trm_singleGPU_step260410_n512.csv | 20 + analysis_2x2/sweep_trm_step13020_n512.csv | 20 + analyze_diag.py | 115 ++ analyze_dynamics_experiments.py | 289 +++ analyze_halt_bucket.py | 122 ++ analyze_joint.py | 103 ++ analyze_ptrm_rollout_cache.py | 135 ++ analyze_separate.py | 124 ++ analyze_spectrum_microscope.py | 360 ++++ analyze_step3.py | 76 + analyze_step3_all.py | 94 + analyze_tangent.py | 149 ++ analyze_tangent_modes.py | 143 ++ ...rm_multi4_step182287_clean_k4_n1000.summary.csv | 2 + ...rm_multi4_step260410_clean_k4_n1000.summary.csv | 2 + chaos_trm_multi4_success_failure_summary.csv | 11 + compare_ptrm_rollout_counts.py | 203 +++ diagnose_hrm.py | 306 ++++ diagnose_hrm_joint.py | 240 +++ diagnose_hrm_joint_clv.py | 310 ++++ diagnose_hrm_joint_geom.py | 264 +++ diagnose_hrm_joint_horizon.py | 240 +++ diagnose_hrm_joint_short.py | 240 +++ diagnose_hrm_separate.py | 242 +++ diagnose_trm_joint.py | 225 +++ diagnose_trm_joint_clv.py | 304 ++++ diagnose_trm_joint_geom.py | 251 +++ diagnose_trm_joint_horizon.py | 225 +++ diagnose_trm_joint_maze.py | 227 +++ diagnose_trm_joint_short.py | 225 +++ .../plots/directional_lyap_perturb_combined.csv | 73 + .../plots/final_gpu_status.txt | 4 + .../smoke_baseline.summary.csv | 5 + .../smoke_baseline_fixedclean.summary.csv | 5 + ...ine_best_step58590_n1000_c8_dirlyap.summary.csv | 25 + ...ti4_best_step35805_n1000_c8_dirlyap.summary.csv | 25 + ...i4_final_step65100_n1000_c8_dirlyap.summary.csv | 25 + directional_lyap_perturb/watch_and_plot.sh | 32 + .../plots/directional_lyap_perturb_combined.csv | 97 + .../plots/final_gpu_status.txt | 4 + ...ne_best_step58590_n1000_c8_fdeps001.summary.csv | 25 + ...ne_best_step58590_n1000_c8_fdeps003.summary.csv | 25 + ...i4_best_step35805_n1000_c8_fdeps001.summary.csv | 25 + ...i4_best_step35805_n1000_c8_fdeps003.summary.csv | 25 + .../watch_queue_and_plot.sh | 55 + directional_lyap_perturb_robustness.py | 428 +++++ directional_multi4_eval_n1000.csv | 6 + dynamics_experiment_report.md | 86 + engelken_python_flossing.py | 336 ++++ eval_directional_ckpts.py | 86 + eval_trm_gbs192_step260410_n1000.summary.csv | 2 + eval_trm_gbs192_step52082_n1000.summary.csv | 2 + eval_trm_official768_step13020_n1000.summary.csv | 2 + extend_rollout.py | 68 + external/GradientFlossing/README.md | 79 + external/GradientFlossing/packages.txt | 6 + external/julia-1.6.7/LICENSE.md | 26 + .../julia-1.6.7/share/julia/base/ryu/LICENSE.md | 25 + .../share/julia/stdlib/v1.6/ArgTools/README.md | 431 +++++ .../julia/stdlib/v1.6/Artifacts/docs/src/index.md | 21 + .../julia/stdlib/v1.6/Base64/docs/src/index.md | 10 + .../julia/stdlib/v1.6/CRC32c/docs/src/index.md | 6 + .../julia/stdlib/v1.6/Dates/docs/src/index.md | 826 +++++++++ .../stdlib/v1.6/DelimitedFiles/docs/src/index.md | 11 + .../stdlib/v1.6/Distributed/docs/src/index.md | 69 + .../share/julia/stdlib/v1.6/Downloads/README.md | 186 ++ .../julia/stdlib/v1.6/Downloads/docs/src/index.md | 9 + .../stdlib/v1.6/FileWatching/docs/src/index.md | 9 + .../julia/stdlib/v1.6/Future/docs/src/index.md | 9 + .../stdlib/v1.6/InteractiveUtils/docs/src/index.md | 28 + .../stdlib/v1.6/LazyArtifacts/docs/src/index.md | 10 + .../share/julia/stdlib/v1.6/LibCURL/LICENSE.md | 23 + .../share/julia/stdlib/v1.6/LibCURL/README.md | 72 + .../julia/stdlib/v1.6/LibGit2/docs/src/index.md | 161 ++ .../julia/stdlib/v1.6/Libdl/docs/src/index.md | 15 + .../stdlib/v1.6/LinearAlgebra/docs/src/index.md | 681 +++++++ .../julia/stdlib/v1.6/Logging/docs/src/index.md | 311 ++++ .../julia/stdlib/v1.6/Markdown/docs/src/index.md | 398 +++++ .../share/julia/stdlib/v1.6/Mmap/docs/src/index.md | 7 + .../julia/stdlib/v1.6/NetworkOptions/README.md | 270 +++ .../share/julia/stdlib/v1.6/Pkg/LICENSE.md | 47 + .../share/julia/stdlib/v1.6/Pkg/README.md | 34 + .../share/julia/stdlib/v1.6/Pkg/docs/src/api.md | 88 + .../julia/stdlib/v1.6/Pkg/docs/src/artifacts.md | 229 +++ .../julia/stdlib/v1.6/Pkg/docs/src/basedocs.md | 25 + .../stdlib/v1.6/Pkg/docs/src/compatibility.md | 201 +++ .../stdlib/v1.6/Pkg/docs/src/creating-packages.md | 290 +++ .../julia/stdlib/v1.6/Pkg/docs/src/environments.md | 108 ++ .../share/julia/stdlib/v1.6/Pkg/docs/src/faq.md | 3 + .../stdlib/v1.6/Pkg/docs/src/getting-started.md | 216 +++ .../julia/stdlib/v1.6/Pkg/docs/src/glossary.md | 124 ++ .../share/julia/stdlib/v1.6/Pkg/docs/src/index.md | 65 + .../stdlib/v1.6/Pkg/docs/src/managing-packages.md | 428 +++++ .../julia/stdlib/v1.6/Pkg/docs/src/registries.md | 81 + .../julia/stdlib/v1.6/Pkg/docs/src/toml-files.md | 270 +++ .../dev/Unregistered/README.md | 2 + .../SandboxFallback2/dev/Unregistered/README.md | 2 + .../test/dev/Unregistered/README.md | 2 + .../TransferSubgraph/dev/Unregistered/README.md | 2 + .../julia/stdlib/v1.6/Printf/docs/src/index.md | 6 + .../julia/stdlib/v1.6/Profile/docs/src/index.md | 17 + .../share/julia/stdlib/v1.6/REPL/docs/src/index.md | 692 +++++++ .../stdlib/v1.6/REPL/src/TerminalMenus/LICENSE.md | 22 + .../julia/stdlib/v1.6/Random/docs/src/index.md | 358 ++++ .../share/julia/stdlib/v1.6/SHA/LICENSE.md | 58 + .../share/julia/stdlib/v1.6/SHA/docs/src/index.md | 75 + .../stdlib/v1.6/Serialization/docs/src/index.md | 7 + .../stdlib/v1.6/SharedArrays/docs/src/index.md | 11 + .../julia/stdlib/v1.6/Sockets/docs/src/index.md | 33 + .../stdlib/v1.6/SparseArrays/docs/src/index.md | 232 +++ .../share/julia/stdlib/v1.6/Statistics/LICENSE.md | 24 + .../share/julia/stdlib/v1.6/Statistics/README.md | 19 + .../julia/stdlib/v1.6/Statistics/docs/src/index.md | 19 + .../stdlib/v1.6/SuiteSparse/docs/src/index.md | 34 + .../share/julia/stdlib/v1.6/TOML/docs/src/index.md | 132 ++ .../share/julia/stdlib/v1.6/Tar/README.md | 467 +++++ .../share/julia/stdlib/v1.6/Test/docs/src/index.md | 293 +++ .../julia/stdlib/v1.6/UUIDs/docs/src/index.md | 8 + .../julia/stdlib/v1.6/Unicode/docs/src/index.md | 7 + .../share/julia/test/clangsa/lit.cfg.py | 27 + .../share/julia/test/llvmpasses/lit.cfg.py | 21 + .../registries/General/.ci/instantiate.sh | 24 + .../julia_depot/registries/General/CONTRIBUTING.md | 185 ++ flossing_suite/README.md | 89 + .../trm_seed123_baseline_nofloss_b8_10000.cmd.sh | 30 + .../trm_seed123_pre_inter_0_500_b8_k4_10000.cmd.sh | 30 + .../trm_seed123_prefloss_0_b8_k4_10000.cmd.sh | 30 + flossing_suite/launch_toy_official_suite.sh | 77 + flossing_suite/launch_trm_faithful_suite.sh | 88 + flossing_suite/launch_trm_variant_suite.sh | 87 + .../results/summary/flossing_eval_curves.csv | 399 +++++ .../results/summary/flossing_runs_summary.csv | 32 + .../toy_baseline_no_floss_N80_k40_E1000.cmd.sh | 43 + .../toy_rnn/toy_pre_inter_N80_k40_E1000.cmd.sh | 43 + .../toy_rnn/toy_prefloss_N80_k40_E1000.cmd.sh | 43 + .../trm_seed123_baseline_nofloss_b4_10000.cmd.sh | 30 + ..._seed123_baseline_nofloss_tb32_fb4_20000.cmd.sh | 35 + .../trm_seed123_pre_inter_0_500_b4_k4_10000.cmd.sh | 30 + ...pre_inter_periodic2000_tb32_fb4_k4_20000.cmd.sh | 35 + .../trm_seed123_prefloss_0_b4_k4_10000.cmd.sh | 30 + ...trm_seed123_prefloss_0_tb32_fb4_k4_20000.cmd.sh | 35 + flossing_suite/smoke_test.sh | 61 + flossing_suite/status.sh | 33 + flossing_suite/summarize_flossing.py | 311 ++++ flossing_suite/watch_and_summarize.sh | 27 + initial_perturb_robustness.py | 286 +++ .../plots/final_gpu_status.txt | 4 + .../plots/initial_perturb_robustness_combined.csv | 28 + .../initial_perturb_robustness_conditional.csv | 28 + .../smoke_baseline.summary.csv | 3 + .../smoke_baseline_b32k8.summary.csv | 2 + .../initial_perturb_robustness_combined.csv | 4 + ...rm_baseline_best_step58590_n5000_k8.summary.csv | 10 + .../trm_multi4_best_step35805_n5000_k8.summary.csv | 10 + ...trm_multi4_final_step65100_n5000_k8.summary.csv | 10 + initial_perturb_robustness/watch_and_plot.sh | 25 + late_perturb_robustness.py | 323 ++++ late_perturb_robustness/plots/final_gpu_status.txt | 4 + .../plots/late_perturb_robustness_combined.csv | 91 + late_perturb_robustness/smoke_baseline.summary.csv | 5 + .../smoke_baseline_expandedclean.summary.csv | 5 + .../smoke_baseline_fastclean.summary.csv | 5 + .../late_perturb_robustness_combined.csv | 5 + ...seline_best_step58590_n3000_k8_late.summary.csv | 31 + ...multi4_best_step35805_n3000_k8_late.summary.csv | 31 + ...ulti4_final_step65100_n3000_k8_late.summary.csv | 31 + late_perturb_robustness/watch_and_plot.sh | 32 + launch_10k_queue.sh | 124 ++ launch_dynamics_variants_queue.sh | 186 ++ launch_engelken_paper_faithful_trm_queue.sh | 78 + launch_engelken_python_official_queue.sh | 74 + launch_hrm_multi4_perturb_ablation.sh | 60 + launch_interfloss_queue.sh | 84 + launch_multi4_parallel_repro_config.sh | 70 + launch_multi4_repro_config.sh | 68 + launch_multi4_repro_config_all.sh | 10 + launch_trajectory_perturb_queue.sh | 92 + launch_trajectory_sampling_long.sh | 128 ++ launch_trajectory_sampling_long_all.sh | 18 + launch_trm_directional_multi4_long.sh | 43 + launch_trm_official_gbs768_multi4.sh | 37 + make_meeting_artifacts_v2.py | 364 ++++ make_q_lambda_scatter.py | 263 +++ make_trm_multi4_meeting_artifacts.py | 133 ++ maze_package/README.md | 30 + maze_package/TRANSFER_README.md | 36 + maze_package/diagnose_trm_joint.py | 225 +++ maze_package/launch_maze_trm.sh | 40 + maze_package/launch_maze_trm_portable.sh | 58 + maze_package/pip-freeze.txt | 18 + maze_package/requirements.txt | 13 + maze_package/step7_interfloss.py | 589 ++++++ maze_pred_dump.py | 85 + meeting_artifacts/trm_multi4_dynamics_report.md | 57 + .../fig5_qhead_vs_lambda1_ptrm_summary.csv | 3 + .../hrm_trm_redesigned_summary.csv | 7 + meeting_artifacts_v2/meeting_figures_v2_report.md | 26 + merge_and_analyze.py | 157 ++ monitor_dynamics_experiments.sh | 38 + multi4_eval_compare/hrm_baseline_eval.csv | 11 + multi4_eval_compare/hrm_honly_step26040_eval.csv | 2 + multi4_eval_compare/hrm_matched_compare.csv | 6 + multi4_eval_compare/hrm_multi4_complete_eval.csv | 11 + multi4_eval_compare/hrm_multi4_eval.csv | 6 + .../hrm_multi4_horizon_sweep_768.csv | 41 + multi4_eval_compare/trm_baseline_eval.csv | 11 + ...4_step65100_det_n10000_seed20260602.summary.csv | 2 + multi4_eval_compare/trm_matched_compare.csv | 2 + multi4_eval_compare/trm_multi4_eval.csv | 2 + multi4_eval_compare/trm_multi4_eval_full.csv | 11 + multi4_eval_compare/trm_official_gbs768_eval.csv | 11 + .../trm_official_gbs768_multi4_eval.csv | 21 + .../trm_official_gbs768_multi4_step16275_eval.csv | 2 + multi4_eval_compare/wallclock_eval.csv | 30 + .../headline_trm_multi4_dynamics_table.csv | 4 + .../summary_n512_k8_seed20260602.csv | 4 + .../summary_n64_k8_seed20260602.csv | 5 + ordinary_multi4_eval_n1000_seed20260611.csv | 6 + paper/claims.md | 37 + paper/experiment_framework.md | 55 + paper/intro.md | 57 + paper/outline.md | 79 + paper/rainer_followup_draft.md | 37 + paper/readiness.md | 119 ++ paper/sample_intro.md | 49 + paper/setup_results.md | 129 ++ paper/style_contract.md | 47 + paper/validation/validate_le_estimator.py | 107 ++ paper/validation/validation_results.md | 17 + papers/txt/engelken2023_gradient_flossing.txt | 1879 ++++++++++++++++++++ papers/txt/gram2025_generative_recursive.txt | 1532 ++++++++++++++++ papers/txt/gram2026_generative_recursive.txt | 1532 ++++++++++++++++ papers/txt/hrm2025_hierarchical_reasoning.txt | 1302 ++++++++++++++ papers/txt/ptrm2025_probabilistic_trm.txt | 906 ++++++++++ papers/txt/trm2025_tiny_recursive.txt | 796 +++++++++ plot_ckpt_evolution.py | 105 ++ plot_directional_lyap_perturb.py | 136 ++ plot_initial_perturb_robustness.py | 123 ++ plot_late_perturb_robustness.py | 140 ++ plot_trm_chaos_trajectory.py | 67 + plot_trm_lyap_hist.py | 69 + problem_tracks/hrm_learning_events.csv | 37 + problem_tracks/hrm_learning_groups.csv | 6 + problem_tracks/hrm_problem_tracks.csv | 513 ++++++ problem_tracks/trm_learning_events.csv | 37 + problem_tracks/trm_learning_groups.csv | 6 + problem_tracks/trm_problem_tracks.csv | 513 ++++++ ...ti4_k100_d64_sigma03_Lonly_n1000_seed0.hist.csv | 102 ++ ...4_k100_d64_sigma03_Lonly_n1000_seed0.kcurve.csv | 101 ++ ..._k100_d64_sigma03_Lonly_n1000_seed0.summary.csv | 2 + ...e260410_k100_d64_sigma03_Lonly_n256.summary.csv | 2 + ..._104164_k100_d64_sigma03_Lonly_n256.summary.csv | 2 + ...se260410_k100_d64_sigma03_Lonly_n64.summary.csv | 2 + ...4_104164_k100_d64_sigma03_Lonly_n64.summary.csv | 2 + ..._k100_d64_sigma03_Lonly_n1000_seed0.summary.csv | 2 + ..._k100_d64_sigma03_Lonly_n1000_seed0.summary.csv | 2 + ...260410_k100_d64_sigma03_Lonly_n1000.summary.csv | 2 + ...104164_k100_d64_sigma03_Lonly_n1000.summary.csv | 2 + ...e260410_k25_d16_sigma03_Lonly_n1000.summary.csv | 2 + ..._104164_k25_d16_sigma03_Lonly_n1000.summary.csv | 2 + ptrm_rollout_selection.py | 643 +++++++ ...4_sigma03_Lonly_n10000_seed20260602.summary.csv | 2 + ...4_sigma03_Lonly_n10000_seed20260602.summary.csv | 2 + .../paired_ptrm_k100_n1000_seed0_summary.csv | 2 + ...lection_smoke_trm_base260410_k4_n64.summary.csv | 2 + ptrm_selection_trm_base260410_k8_n512.summary.csv | 2 + ...selection_trm_multi4_104164_k8_n512.summary.csv | 2 + ptrm_smoke_official_gbs768_base58590.summary.csv | 2 + ptrm_spectrum_k4_cache_analysis.csv | 3 + ...se260410_k100_d64_sigma03_Lonly_n64.summary.csv | 2 + ...4_104164_k100_d64_sigma03_Lonly_n64.summary.csv | 2 + ptrm_spectrum_smoke.summary.csv | 2 + ...ma03_Lonly_fdlyap_n512_seed20260602.summary.csv | 2 + ...ma03_Lonly_fdlyap_n512_seed20260602.summary.csv | 2 + rainer_email_bundle_20260605/README.md | 17 + rainer_email_bundle_20260605/email_draft.md | 18 + rainer_email_bundle_20260605/figure_captions.md | 23 + report_bundle_20260603/MANIFEST.txt | 96 + report_bundle_20260603/README.md | 34 + .../scripts/make_meeting_artifacts_v2.py | 364 ++++ .../scripts/make_q_lambda_scatter.py | 263 +++ ...ma03_Lonly_fdlyap_n512_seed20260602.summary.csv | 2 + ...4_sigma03_Lonly_n10000_seed20260602.summary.csv | 2 + .../tables/fig5_qhead_vs_lambda1_ptrm_summary.csv | 3 + .../tables/headline_trm_multi4_dynamics_table.csv | 4 + .../tables/hrm_baseline_eval.csv | 11 + .../tables/hrm_honly_step26040_eval.csv | 2 + .../tables/hrm_matched_compare.csv | 6 + .../tables/hrm_multi4_complete_eval.csv | 11 + report_bundle_20260603/tables/hrm_multi4_eval.csv | 6 + .../tables/hrm_multi4_horizon_sweep_768.csv | 41 + .../tables/hrm_trm_redesigned_summary.csv | 7 + .../tables/meeting_figures_v2_report.md | 26 + ...ma03_Lonly_fdlyap_n512_seed20260602.summary.csv | 2 + ...4_sigma03_Lonly_n10000_seed20260602.summary.csv | 2 + .../paired_ptrm_k100_n1000_seed0_summary.csv | 2 + .../tables/summary_n512_k8_seed20260602.csv | 4 + .../tables/summary_n64_k8_seed20260602.csv | 5 + .../tables/trm_baseline_eval.csv | 11 + ...4_step65100_det_n10000_seed20260602.summary.csv | 2 + .../tables/trm_matched_compare.csv | 2 + report_bundle_20260603/tables/trm_multi4_eval.csv | 2 + .../tables/trm_multi4_eval_full.csv | 11 + .../tables/trm_official_gbs768_eval.csv | 11 + .../tables/trm_official_gbs768_multi4_eval.csv | 21 + .../trm_official_gbs768_multi4_step16275_eval.csv | 2 + report_bundle_20260603/tables/wallclock_eval.csv | 30 + reverse_floss_finetune.py | 289 +++ run_HRM256_after_H.sh | 27 + run_HRMOrth_after_SRM7M.sh | 29 + run_H_after_SRM.sh | 29 + run_SRM7M_after_HRM256.sh | 31 + run_checkpoint_evolution.sh | 59 + run_ckpt_evolution2.sh | 55 + run_hrm_diag_all_ckpts.sh | 26 + run_srm_after_W.sh | 32 + run_step4_W.sh | 27 + run_step4_pipeline.sh | 36 + run_step6_prefloss.sh | 40 + run_trm_cf_abcd.sh | 39 + run_trm_cf_ef.sh | 39 + run_trm_cf_long.sh | 36 + run_trm_chaos_onset_diag.sh | 31 + sanity_lipschitz_check.py | 184 ++ spectrum_microscope/checkpoint_summary.csv | 21 + spectrum_microscope/feature_and_rank_summary.csv | 421 +++++ srm_design_codex.md | 69 + srm_design_round2_claude.md | 155 ++ step3_train_with_rf.py | 302 ++++ step4_from_scratch.py | 334 ++++ step5_train_trm_cf.py | 307 ++++ step6_prefloss.py | 310 ++++ step7_interfloss.py | 589 ++++++ step8_basin_consistency.py | 199 +++ step9_trajectory_perturb_train.py | 595 +++++++ .../MinimalGradientFlossinExample.py | 135 ++ .../MinimalSurrogateGradientFlossinExample.py | 228 +++ surrogate_flossing/README.md | 106 ++ .../figures/binary_surrogate_gradient_flossing.md | 2 + toy/toy_transient_chaos.py | 112 ++ track_problem_learning.py | 375 ++++ trajectory_augmentation_notes.md | 190 ++ watch_ptrm_gbs768_compare.sh | 26 + watch_trm_directional_multi4_long.sh | 35 + 384 files changed, 41721 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 analysis_2x2/OBSERVATIONS.md create mode 100644 analysis_2x2/analyze_2x2.py create mode 100644 analysis_2x2/analyze_clv.py create mode 100644 analysis_2x2/analyze_early_connectivity.py create mode 100644 analysis_2x2/analyze_maze_connectivity.py create mode 100644 analysis_2x2/analyze_phase1.py create mode 100644 analysis_2x2/analyze_solution_space.py create mode 100644 analysis_2x2/cells_hrm26040_joint_n2048.csv create mode 100644 analysis_2x2/cells_hrm26040_n8192.csv create mode 100644 analysis_2x2/cells_trm_official58590_n2048.csv create mode 100644 analysis_2x2/cells_trm_official_gbs768_step58590_n512.csv create mode 100644 analysis_2x2/cells_trm_singleGPU_step130205_n512.csv create mode 100644 analysis_2x2/cells_trm_singleGPU_step260410_n512.csv create mode 100644 analysis_2x2/cells_trm_step13020_n512.csv create mode 100644 analysis_2x2/characterization/characterization_results.md create mode 100644 analysis_2x2/characterize_separation.py create mode 100755 analysis_2x2/dump_early_ckpts.sh create mode 100644 analysis_2x2/early_pairing_hrm26040_joint.md create mode 100644 analysis_2x2/early_pairing_trm_official58590.md create mode 100644 analysis_2x2/early_window_pairing.py create mode 100644 analysis_2x2/evolution_hrm.csv create mode 100644 analysis_2x2/evolution_trm.csv create mode 100644 analysis_2x2/offline_followups.py create mode 100644 analysis_2x2/offline_followups/followups.md create mode 100644 analysis_2x2/offline_followups/phase1_e1.md create mode 100644 analysis_2x2/offline_phase1_e1.py create mode 100644 analysis_2x2/phase1/phase1_results.md create mode 100755 analysis_2x2/queue_maze_followups.sh create mode 100644 analysis_2x2/results.md create mode 100755 analysis_2x2/run_clv_geom.sh create mode 100755 analysis_2x2/run_maze_diag_queue.sh create mode 100755 analysis_2x2/run_phase1_queue.sh create mode 100755 analysis_2x2/run_retest_2x2.sh create mode 100644 analysis_2x2/sweep_hrm26040_joint_n2048.csv create mode 100644 analysis_2x2/sweep_hrm26040_n8192.csv create mode 100644 analysis_2x2/sweep_trm_official58590_n2048.csv create mode 100644 analysis_2x2/sweep_trm_official_gbs768_step58590_n512.csv create mode 100644 analysis_2x2/sweep_trm_singleGPU_step130205_n512.csv create mode 100644 analysis_2x2/sweep_trm_singleGPU_step260410_n512.csv create mode 100644 analysis_2x2/sweep_trm_step13020_n512.csv create mode 100644 analyze_diag.py create mode 100644 analyze_dynamics_experiments.py create mode 100644 analyze_halt_bucket.py create mode 100644 analyze_joint.py create mode 100644 analyze_ptrm_rollout_cache.py create mode 100644 analyze_separate.py create mode 100644 analyze_spectrum_microscope.py create mode 100644 analyze_step3.py create mode 100644 analyze_step3_all.py create mode 100644 analyze_tangent.py create mode 100644 analyze_tangent_modes.py create mode 100644 chaos_trm_multi4_step182287_clean_k4_n1000.summary.csv create mode 100644 chaos_trm_multi4_step260410_clean_k4_n1000.summary.csv create mode 100644 chaos_trm_multi4_success_failure_summary.csv create mode 100644 compare_ptrm_rollout_counts.py create mode 100644 diagnose_hrm.py create mode 100644 diagnose_hrm_joint.py create mode 100644 diagnose_hrm_joint_clv.py create mode 100644 diagnose_hrm_joint_geom.py create mode 100644 diagnose_hrm_joint_horizon.py create mode 100644 diagnose_hrm_joint_short.py create mode 100644 diagnose_hrm_separate.py create mode 100644 diagnose_trm_joint.py create mode 100644 diagnose_trm_joint_clv.py create mode 100644 diagnose_trm_joint_geom.py create mode 100644 diagnose_trm_joint_horizon.py create mode 100644 diagnose_trm_joint_maze.py create mode 100644 diagnose_trm_joint_short.py create mode 100644 directional_lyap_perturb/plots/directional_lyap_perturb_combined.csv create mode 100644 directional_lyap_perturb/plots/final_gpu_status.txt create mode 100644 directional_lyap_perturb/smoke_baseline.summary.csv create mode 100644 directional_lyap_perturb/smoke_baseline_fixedclean.summary.csv create mode 100644 directional_lyap_perturb/trm_baseline_best_step58590_n1000_c8_dirlyap.summary.csv create mode 100644 directional_lyap_perturb/trm_multi4_best_step35805_n1000_c8_dirlyap.summary.csv create mode 100644 directional_lyap_perturb/trm_multi4_final_step65100_n1000_c8_dirlyap.summary.csv create mode 100644 directional_lyap_perturb/watch_and_plot.sh create mode 100644 directional_lyap_perturb_eps_sweep/plots/directional_lyap_perturb_combined.csv create mode 100644 directional_lyap_perturb_eps_sweep/plots/final_gpu_status.txt create mode 100644 directional_lyap_perturb_eps_sweep/trm_baseline_best_step58590_n1000_c8_fdeps001.summary.csv create mode 100644 directional_lyap_perturb_eps_sweep/trm_baseline_best_step58590_n1000_c8_fdeps003.summary.csv create mode 100644 directional_lyap_perturb_eps_sweep/trm_multi4_best_step35805_n1000_c8_fdeps001.summary.csv create mode 100644 directional_lyap_perturb_eps_sweep/trm_multi4_best_step35805_n1000_c8_fdeps003.summary.csv create mode 100755 directional_lyap_perturb_eps_sweep/watch_queue_and_plot.sh create mode 100644 directional_lyap_perturb_robustness.py create mode 100644 directional_multi4_eval_n1000.csv create mode 100644 dynamics_experiment_report.md create mode 100644 engelken_python_flossing.py create mode 100644 eval_directional_ckpts.py create mode 100644 eval_trm_gbs192_step260410_n1000.summary.csv create mode 100644 eval_trm_gbs192_step52082_n1000.summary.csv create mode 100644 eval_trm_official768_step13020_n1000.summary.csv create mode 100644 extend_rollout.py create mode 100644 external/GradientFlossing/README.md create mode 100644 external/GradientFlossing/packages.txt create mode 100644 external/julia-1.6.7/LICENSE.md create mode 100644 external/julia-1.6.7/share/julia/base/ryu/LICENSE.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/ArgTools/README.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Artifacts/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Base64/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/CRC32c/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Dates/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/DelimitedFiles/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Distributed/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Downloads/README.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Downloads/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/FileWatching/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Future/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/InteractiveUtils/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/LazyArtifacts/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/LibCURL/LICENSE.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/LibCURL/README.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/LibGit2/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Libdl/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/LinearAlgebra/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Logging/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Markdown/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Mmap/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/NetworkOptions/README.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/LICENSE.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/README.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/api.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/artifacts.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/basedocs.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/compatibility.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/creating-packages.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/environments.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/faq.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/getting-started.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/glossary.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/managing-packages.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/registries.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/toml-files.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/SameNameDifferentUUID/dev/Unregistered/README.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/SandboxFallback2/dev/Unregistered/README.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/TestDepTrackingPath/test/dev/Unregistered/README.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/TransferSubgraph/dev/Unregistered/README.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Printf/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Profile/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/REPL/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/REPL/src/TerminalMenus/LICENSE.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Random/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/SHA/LICENSE.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/SHA/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Serialization/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/SharedArrays/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Sockets/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/SparseArrays/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/LICENSE.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/README.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/SuiteSparse/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/TOML/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Tar/README.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Test/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/UUIDs/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/stdlib/v1.6/Unicode/docs/src/index.md create mode 100644 external/julia-1.6.7/share/julia/test/clangsa/lit.cfg.py create mode 100644 external/julia-1.6.7/share/julia/test/llvmpasses/lit.cfg.py create mode 100755 external/julia_depot/registries/General/.ci/instantiate.sh create mode 100644 external/julia_depot/registries/General/CONTRIBUTING.md create mode 100644 flossing_suite/README.md create mode 100755 flossing_suite/archive_failed_oom_b8/trm_seed123_baseline_nofloss_b8_10000.cmd.sh create mode 100755 flossing_suite/archive_failed_oom_b8/trm_seed123_pre_inter_0_500_b8_k4_10000.cmd.sh create mode 100755 flossing_suite/archive_failed_oom_b8/trm_seed123_prefloss_0_b8_k4_10000.cmd.sh create mode 100755 flossing_suite/launch_toy_official_suite.sh create mode 100755 flossing_suite/launch_trm_faithful_suite.sh create mode 100755 flossing_suite/launch_trm_variant_suite.sh create mode 100644 flossing_suite/results/summary/flossing_eval_curves.csv create mode 100644 flossing_suite/results/summary/flossing_runs_summary.csv create mode 100755 flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000.cmd.sh create mode 100755 flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000.cmd.sh create mode 100755 flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000.cmd.sh create mode 100755 flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000.cmd.sh create mode 100755 flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000.cmd.sh create mode 100755 flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b4_k4_10000.cmd.sh create mode 100755 flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000.cmd.sh create mode 100755 flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000.cmd.sh create mode 100755 flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000.cmd.sh create mode 100755 flossing_suite/smoke_test.sh create mode 100755 flossing_suite/status.sh create mode 100644 flossing_suite/summarize_flossing.py create mode 100755 flossing_suite/watch_and_summarize.sh create mode 100644 initial_perturb_robustness.py create mode 100644 initial_perturb_robustness/plots/final_gpu_status.txt create mode 100644 initial_perturb_robustness/plots/initial_perturb_robustness_combined.csv create mode 100644 initial_perturb_robustness/plots/initial_perturb_robustness_conditional.csv create mode 100644 initial_perturb_robustness/smoke_baseline.summary.csv create mode 100644 initial_perturb_robustness/smoke_baseline_b32k8.summary.csv create mode 100644 initial_perturb_robustness/smoke_plots/initial_perturb_robustness_combined.csv create mode 100644 initial_perturb_robustness/trm_baseline_best_step58590_n5000_k8.summary.csv create mode 100644 initial_perturb_robustness/trm_multi4_best_step35805_n5000_k8.summary.csv create mode 100644 initial_perturb_robustness/trm_multi4_final_step65100_n5000_k8.summary.csv create mode 100755 initial_perturb_robustness/watch_and_plot.sh create mode 100644 late_perturb_robustness.py create mode 100644 late_perturb_robustness/plots/final_gpu_status.txt create mode 100644 late_perturb_robustness/plots/late_perturb_robustness_combined.csv create mode 100644 late_perturb_robustness/smoke_baseline.summary.csv create mode 100644 late_perturb_robustness/smoke_baseline_expandedclean.summary.csv create mode 100644 late_perturb_robustness/smoke_baseline_fastclean.summary.csv create mode 100644 late_perturb_robustness/smoke_plots/late_perturb_robustness_combined.csv create mode 100644 late_perturb_robustness/trm_baseline_best_step58590_n3000_k8_late.summary.csv create mode 100644 late_perturb_robustness/trm_multi4_best_step35805_n3000_k8_late.summary.csv create mode 100644 late_perturb_robustness/trm_multi4_final_step65100_n3000_k8_late.summary.csv create mode 100755 late_perturb_robustness/watch_and_plot.sh create mode 100755 launch_10k_queue.sh create mode 100755 launch_dynamics_variants_queue.sh create mode 100755 launch_engelken_paper_faithful_trm_queue.sh create mode 100755 launch_engelken_python_official_queue.sh create mode 100755 launch_hrm_multi4_perturb_ablation.sh create mode 100755 launch_interfloss_queue.sh create mode 100755 launch_multi4_parallel_repro_config.sh create mode 100755 launch_multi4_repro_config.sh create mode 100755 launch_multi4_repro_config_all.sh create mode 100755 launch_trajectory_perturb_queue.sh create mode 100755 launch_trajectory_sampling_long.sh create mode 100755 launch_trajectory_sampling_long_all.sh create mode 100755 launch_trm_directional_multi4_long.sh create mode 100755 launch_trm_official_gbs768_multi4.sh create mode 100644 make_meeting_artifacts_v2.py create mode 100644 make_q_lambda_scatter.py create mode 100644 make_trm_multi4_meeting_artifacts.py create mode 100644 maze_package/README.md create mode 100644 maze_package/TRANSFER_README.md create mode 100644 maze_package/diagnose_trm_joint.py create mode 100755 maze_package/launch_maze_trm.sh create mode 100755 maze_package/launch_maze_trm_portable.sh create mode 100644 maze_package/pip-freeze.txt create mode 100644 maze_package/requirements.txt create mode 100644 maze_package/step7_interfloss.py create mode 100644 maze_pred_dump.py create mode 100644 meeting_artifacts/trm_multi4_dynamics_report.md create mode 100644 meeting_artifacts_v2/fig5_qhead_vs_lambda1_ptrm_summary.csv create mode 100644 meeting_artifacts_v2/hrm_trm_redesigned_summary.csv create mode 100644 meeting_artifacts_v2/meeting_figures_v2_report.md create mode 100644 merge_and_analyze.py create mode 100755 monitor_dynamics_experiments.sh create mode 100644 multi4_eval_compare/hrm_baseline_eval.csv create mode 100644 multi4_eval_compare/hrm_honly_step26040_eval.csv create mode 100644 multi4_eval_compare/hrm_matched_compare.csv create mode 100644 multi4_eval_compare/hrm_multi4_complete_eval.csv create mode 100644 multi4_eval_compare/hrm_multi4_eval.csv create mode 100644 multi4_eval_compare/hrm_multi4_horizon_sweep_768.csv create mode 100644 multi4_eval_compare/trm_baseline_eval.csv create mode 100644 multi4_eval_compare/trm_gbs768_multi4_step65100_det_n10000_seed20260602.summary.csv create mode 100644 multi4_eval_compare/trm_matched_compare.csv create mode 100644 multi4_eval_compare/trm_multi4_eval.csv create mode 100644 multi4_eval_compare/trm_multi4_eval_full.csv create mode 100644 multi4_eval_compare/trm_official_gbs768_eval.csv create mode 100644 multi4_eval_compare/trm_official_gbs768_multi4_eval.csv create mode 100644 multi4_eval_compare/trm_official_gbs768_multi4_step16275_eval.csv create mode 100644 multi4_eval_compare/wallclock_eval.csv create mode 100644 official_gbs768_spectrum/headline_trm_multi4_dynamics_table.csv create mode 100644 official_gbs768_spectrum/summary_n512_k8_seed20260602.csv create mode 100644 official_gbs768_spectrum/summary_n64_k8_seed20260602.csv create mode 100644 ordinary_multi4_eval_n1000_seed20260611.csv create mode 100644 paper/claims.md create mode 100644 paper/experiment_framework.md create mode 100644 paper/intro.md create mode 100644 paper/outline.md create mode 100644 paper/rainer_followup_draft.md create mode 100644 paper/readiness.md create mode 100644 paper/sample_intro.md create mode 100644 paper/setup_results.md create mode 100644 paper/style_contract.md create mode 100644 paper/validation/validate_le_estimator.py create mode 100644 paper/validation/validation_results.md create mode 100644 papers/txt/engelken2023_gradient_flossing.txt create mode 100644 papers/txt/gram2025_generative_recursive.txt create mode 100644 papers/txt/gram2026_generative_recursive.txt create mode 100644 papers/txt/hrm2025_hierarchical_reasoning.txt create mode 100644 papers/txt/ptrm2025_probabilistic_trm.txt create mode 100644 papers/txt/trm2025_tiny_recursive.txt create mode 100644 plot_ckpt_evolution.py create mode 100644 plot_directional_lyap_perturb.py create mode 100644 plot_initial_perturb_robustness.py create mode 100644 plot_late_perturb_robustness.py create mode 100644 plot_trm_chaos_trajectory.py create mode 100644 plot_trm_lyap_hist.py create mode 100644 problem_tracks/hrm_learning_events.csv create mode 100644 problem_tracks/hrm_learning_groups.csv create mode 100644 problem_tracks/hrm_problem_tracks.csv create mode 100644 problem_tracks/trm_learning_events.csv create mode 100644 problem_tracks/trm_learning_groups.csv create mode 100644 problem_tracks/trm_problem_tracks.csv create mode 100644 ptrm_gbs768_base_vs_multi4_k100_d64_sigma03_Lonly_n1000_seed0.hist.csv create mode 100644 ptrm_gbs768_base_vs_multi4_k100_d64_sigma03_Lonly_n1000_seed0.kcurve.csv create mode 100644 ptrm_gbs768_base_vs_multi4_k100_d64_sigma03_Lonly_n1000_seed0.summary.csv create mode 100644 ptrm_lambda_main_trm_base260410_k100_d64_sigma03_Lonly_n256.summary.csv create mode 100644 ptrm_lambda_main_trm_multi4_104164_k100_d64_sigma03_Lonly_n256.summary.csv create mode 100644 ptrm_lambda_quick_trm_base260410_k100_d64_sigma03_Lonly_n64.summary.csv create mode 100644 ptrm_lambda_quick_trm_multi4_104164_k100_d64_sigma03_Lonly_n64.summary.csv create mode 100644 ptrm_official_gbs768_base58590_k100_d64_sigma03_Lonly_n1000_seed0.summary.csv create mode 100644 ptrm_official_gbs768_multi4_35805_k100_d64_sigma03_Lonly_n1000_seed0.summary.csv create mode 100644 ptrm_official_main_trm_base260410_k100_d64_sigma03_Lonly_n1000.summary.csv create mode 100644 ptrm_official_main_trm_multi4_104164_k100_d64_sigma03_Lonly_n1000.summary.csv create mode 100644 ptrm_official_sweep_trm_base260410_k25_d16_sigma03_Lonly_n1000.summary.csv create mode 100644 ptrm_official_sweep_trm_multi4_104164_k25_d16_sigma03_Lonly_n1000.summary.csv create mode 100644 ptrm_rollout_selection.py create mode 100644 ptrm_same_subset/base58590_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv create mode 100644 ptrm_same_subset/multi4_35805_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv create mode 100644 ptrm_same_subset/paired_ptrm_k100_n1000_seed0_summary.csv create mode 100644 ptrm_selection_smoke_trm_base260410_k4_n64.summary.csv create mode 100644 ptrm_selection_trm_base260410_k8_n512.summary.csv create mode 100644 ptrm_selection_trm_multi4_104164_k8_n512.summary.csv create mode 100644 ptrm_smoke_official_gbs768_base58590.summary.csv create mode 100644 ptrm_spectrum_k4_cache_analysis.csv create mode 100644 ptrm_spectrum_k4_trm_base260410_k100_d64_sigma03_Lonly_n64.summary.csv create mode 100644 ptrm_spectrum_k4_trm_multi4_104164_k100_d64_sigma03_Lonly_n64.summary.csv create mode 100644 ptrm_spectrum_smoke.summary.csv create mode 100644 q_lambda_scatter/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv create mode 100644 q_lambda_scatter/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv create mode 100644 rainer_email_bundle_20260605/README.md create mode 100644 rainer_email_bundle_20260605/email_draft.md create mode 100644 rainer_email_bundle_20260605/figure_captions.md create mode 100644 report_bundle_20260603/MANIFEST.txt create mode 100644 report_bundle_20260603/README.md create mode 100644 report_bundle_20260603/scripts/make_meeting_artifacts_v2.py create mode 100644 report_bundle_20260603/scripts/make_q_lambda_scatter.py create mode 100644 report_bundle_20260603/tables/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv create mode 100644 report_bundle_20260603/tables/base58590_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv create mode 100644 report_bundle_20260603/tables/fig5_qhead_vs_lambda1_ptrm_summary.csv create mode 100644 report_bundle_20260603/tables/headline_trm_multi4_dynamics_table.csv create mode 100644 report_bundle_20260603/tables/hrm_baseline_eval.csv create mode 100644 report_bundle_20260603/tables/hrm_honly_step26040_eval.csv create mode 100644 report_bundle_20260603/tables/hrm_matched_compare.csv create mode 100644 report_bundle_20260603/tables/hrm_multi4_complete_eval.csv create mode 100644 report_bundle_20260603/tables/hrm_multi4_eval.csv create mode 100644 report_bundle_20260603/tables/hrm_multi4_horizon_sweep_768.csv create mode 100644 report_bundle_20260603/tables/hrm_trm_redesigned_summary.csv create mode 100644 report_bundle_20260603/tables/meeting_figures_v2_report.md create mode 100644 report_bundle_20260603/tables/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.summary.csv create mode 100644 report_bundle_20260603/tables/multi4_35805_k25_d64_sigma03_Lonly_n10000_seed20260602.summary.csv create mode 100644 report_bundle_20260603/tables/paired_ptrm_k100_n1000_seed0_summary.csv create mode 100644 report_bundle_20260603/tables/summary_n512_k8_seed20260602.csv create mode 100644 report_bundle_20260603/tables/summary_n64_k8_seed20260602.csv create mode 100644 report_bundle_20260603/tables/trm_baseline_eval.csv create mode 100644 report_bundle_20260603/tables/trm_gbs768_multi4_step65100_det_n10000_seed20260602.summary.csv create mode 100644 report_bundle_20260603/tables/trm_matched_compare.csv create mode 100644 report_bundle_20260603/tables/trm_multi4_eval.csv create mode 100644 report_bundle_20260603/tables/trm_multi4_eval_full.csv create mode 100644 report_bundle_20260603/tables/trm_official_gbs768_eval.csv create mode 100644 report_bundle_20260603/tables/trm_official_gbs768_multi4_eval.csv create mode 100644 report_bundle_20260603/tables/trm_official_gbs768_multi4_step16275_eval.csv create mode 100644 report_bundle_20260603/tables/wallclock_eval.csv create mode 100644 reverse_floss_finetune.py create mode 100755 run_HRM256_after_H.sh create mode 100755 run_HRMOrth_after_SRM7M.sh create mode 100755 run_H_after_SRM.sh create mode 100755 run_SRM7M_after_HRM256.sh create mode 100755 run_checkpoint_evolution.sh create mode 100755 run_ckpt_evolution2.sh create mode 100755 run_hrm_diag_all_ckpts.sh create mode 100755 run_srm_after_W.sh create mode 100755 run_step4_W.sh create mode 100755 run_step4_pipeline.sh create mode 100755 run_step6_prefloss.sh create mode 100755 run_trm_cf_abcd.sh create mode 100755 run_trm_cf_ef.sh create mode 100755 run_trm_cf_long.sh create mode 100755 run_trm_chaos_onset_diag.sh create mode 100644 sanity_lipschitz_check.py create mode 100644 spectrum_microscope/checkpoint_summary.csv create mode 100644 spectrum_microscope/feature_and_rank_summary.csv create mode 100644 srm_design_codex.md create mode 100644 srm_design_round2_claude.md create mode 100644 step3_train_with_rf.py create mode 100644 step4_from_scratch.py create mode 100644 step5_train_trm_cf.py create mode 100644 step6_prefloss.py create mode 100644 step7_interfloss.py create mode 100644 step8_basin_consistency.py create mode 100644 step9_trajectory_perturb_train.py create mode 100644 surrogate_flossing/MinimalGradientFlossinExample.py create mode 100644 surrogate_flossing/MinimalSurrogateGradientFlossinExample.py create mode 100644 surrogate_flossing/README.md create mode 100644 surrogate_flossing/figures/binary_surrogate_gradient_flossing.md create mode 100644 toy/toy_transient_chaos.py create mode 100644 track_problem_learning.py create mode 100644 trajectory_augmentation_notes.md create mode 100755 watch_ptrm_gbs768_compare.sh create mode 100755 watch_trm_directional_multi4_long.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0e73031 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +*.npz +*.png +*.pdf +*.pt +*.zip +*.tar.gz +*.log +*.json +*wandb_history*.csv +__pycache__/ +wandb/ +checkpoints/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..9b9d3cb --- /dev/null +++ b/README.md @@ -0,0 +1,32 @@ +# Recursive Reasoning Dynamics + +Analysis of the inference-trajectory dynamics of recursive reasoning models (HRM, TRM) on +Sudoku-Extreme and Maze-Hard. Central finding and its honest deflation: + +- Along the recurrent inference trajectory, **genuine failures are more chaotic** (higher + finite-time Lyapunov exponents / latent drift) than successes, in the *same* trained network — + task-general (Sudoku; Maze under a validity/connectivity criterion, not exact-match). +- This signal **reduces to convergence / answer-completeness detection** (the FTLE separation is + recoverable from late drift + the model's own halting confidence; leading + full Ginelli CLV + geometry add nothing beyond it). On unique-solution tasks completeness aliases onto correctness. +- Open: the *mechanism* of the non-convergence (multistability vs transient chaos vs input-induced) — + the current line of work (see `paper/readiness.md`, `extend_rollout.py`, `toy/`). + +This repo is the **analysis half** (diagnostics, experiments, paper drafts, toy models). The model +code (TRM/HRM forks with trajectory-augmentation training) lives in the companion `rrm` repo; +heavy artifacts (checkpoints, npz, wandb, figures) are not committed. + +## Entry points +- `paper/` — `readiness.md` (live status + all results), `claims.md` (claim/evidence/counter table), + `intro.md`, `setup_results.md`, `style_contract.md`, `experiment_framework.md`. +- `analysis_2x2/` — the 2×2 (settling × correctness) decomposition, characterization, CLV reducibility, + checkpoint-evolution, connectivity-labeled Maze analysis. Result `.md`/`.csv` kept; npz/png ignored. +- `diagnose_{trm,hrm}_joint*.py` — per-example FTLE (JVP+QR), drift, CLV estimators. `_maze` variant + forces the math SDP backend (FlashAttention lacks the JVP double-backward); `_clv` adds Ginelli CLVs. +- `extend_rollout.py` — long-rollout probe (transient vs multistable vs persistent non-convergence). +- `toy/` — minimal analytically-grounded toy models reproducing "failure = more chaotic". +- `maze_pred_dump.py` — cheap forward dump (connectivity + drift, no JVP) for Maze validity labeling. + +## Estimator validation +`paper/validation/validate_le_estimator.py` — the QR/Benettin FTLE core recovers known spectra +(diagonal/symmetric/non-normal linear maps, Hénon λ₁) to <1e-3. diff --git a/analysis_2x2/OBSERVATIONS.md b/analysis_2x2/OBSERVATIONS.md new file mode 100644 index 0000000..d0b0670 --- /dev/null +++ b/analysis_2x2/OBSERVATIONS.md @@ -0,0 +1,155 @@ +# 2×2 analysis: (late-trajectory settling) × (answer correctness) — observations + +Date: 2026-06-11. Code: `analyze_2x2.py` (+ ad-hoc strict-threshold checks logged in session). +All statements below are measurements on existing diagnostic npz files; no new GPU runs. + +## Data provenance (CRITICAL: λ scales are NOT comparable across estimator versions) + +| dataset | model / ckpt | n | estimator | window | +|---|---|---|---|---| +| `diag_8k.npz` | HRM righteous-python @ step_26040 (H=2,L=2) | 8192 | `diagnose_hrm.py` (May 22 version) | full 16 ACT steps | +| `diag_trm_singleGPU_step*_512.npz` ×10 | TRM mlp_t singleGPU @ 26041…260410 | 512 each | `diagnose_trm_joint.py` (joint JVP+QR, per-sub-update norm) | full 16 ACT steps | +| `diag_hrm_step_*_512.npz` ×10 | HRM @ 2604…26040 | 512 each | joint estimator (−0.15-scale, matches step7 reports) | full 16 ACT steps | + +Within-dataset comparisons only. "Converged"/"settled" here = late z_H drift (mean over ACT steps 13–16) +falls in the low-drift band; threshold from Otsu on pooled log10 drift, robustness via percentile sweep. +Settled ≠ literal fixed point: the settled band has a narrow characteristic residual velocity +(HRM ≈0.96/step, IQR-width <0.03 in log10; TRM ≈18.5/step, q10–q90 = 15.8–21.4). + +## Headline numbers + +### HRM @26040, n=8192, exact_acc 0.525 (full-window λ, May-22 estimator scale) +Otsu τ → cells: A(settled,correct)=4103, B(settled,wrong)=63, C(unsettled,correct)=195, D(unsettled,wrong)=3831. + +- Failure mass is overwhelmingly unsettled: wrong-that-settled = 1.6% at Otsu τ; at a STRICT in-band + threshold (45th pct, drift<0.97) it is 21/3894 = **0.55% of failures**. +- The strict-band B examples (n=21) have λ₁ median **−0.842** vs A −0.867 (success-like contraction), + q_halt(final) median **+7.47 — identical to A** (ACT-confident), token_acc 0.41–0.88 (median 0.62, + substantially wrong, not near-misses). These are confidently-wrong settled answers — invisible to + both a stability-based and an ACT-confidence-based selector. +- λ₁ as a predictor: AUC(−λ₁→correct) = 0.984 ≈ AUC(−λ₁→settled) = 0.986. At the Otsu split the + within-settled outcome gap (Δλ₁ −0.27, AUC 0.852) is a THRESHOLD-MIXTURE ARTIFACT: the Otsu τ sits + above the band gap (drift distribution has a near-empty region between pct≈45 and pct≈55, + τ jumps −0.013→1.39), so Otsu-"settled" includes mid-drift examples. At the strict in-band τ the + within-settled gap nearly closes (−0.867 vs −0.842). +- Residual outcome signal in the unsettled stratum: C vs D Δλ₁(median) = −0.094, AUC 0.818 + (C n=195). Note C drifts MORE than D (median 55.4 vs 40.8) while being more contracting in λ₁. +- AUC(−log late-drift→correct) = 0.964. + +### TRM singleGPU @260410 (final), n=512, exact_acc 0.770 (joint estimator) +Cells: A=383, B=**0**, C=11, D=118. + +- **B = 0 at every threshold**: drift distributions of settled-correct (q90 = 21.4) and wrong + (q10 = 47.8) are completely separated — no wrong example reaches the settled band. Robust across + the entire percentile sweep (nB=0, nD=118 constant). Same at step 130205. Across the 10-ckpt series, + fB>0 only at step 26041 (1.2%). +- λ₁ medians: A +0.0047 (≈0, consistent with "TRM success short-window λ₁>0 is acceptable"), + C +0.0998, D +0.1023. Within-unsettled outcome gap ≈ 0 (Δ −0.0025; AUC 0.619, C n=11). + AUC(−λ₁→correct) = 0.989 ≈ AUC(−λ₁→settled) = 0.996: at the final checkpoint, λ₁'s outcome + signal in TRM is (almost) entirely settling-regime detection. +- q_halt(final) median: A +7.44, C +7.41, D −11.1. + +## Checkpoint evolution (512/ckpt; within-series comparisons) + +- TRM (26041→260410): fB≈0 from 52082 on; fD shrinks 0.41→0.23; λ₁(D) rises monotonically + +0.036→+0.102 while λ₁(A) stays ≈0 — the success/failure λ₁ gap widens over training via the + failure cell becoming more expansive. +- HRM (2604→26040, joint-estimator series): mass migration over training: + fB 0.89→0.008 (early ckpts: nearly all examples low-drift & wrong), fD rises to ~0.52 mid-training, + fA grows with accuracy; λ₁(D) rises from −0.087 to +0.023 (sign flip ~step 15–18k), λ₁(A) stays + −0.10…−0.20. + +## Direct answers to the motivating questions + +1. **Is the failure FTLE cluster a mixture of wandering + wrong-fixed-point modes?** + Measured: yes, but extremely lopsided. Wandering dominates (HRM ≥98.4% of failures at Otsu τ, + 99.45% at strict τ; TRM final 100%). The wrong-fixed-point mode exists in HRM (21 examples) with + exactly the predicted signature (success-like λ₁), and is absent in TRM at late checkpoints. +2. **Does conditioning on settling absorb the success/failure FTLE gap?** + TRM final: essentially yes (within-stratum AUC 0.619 with n=11; regime AUC 0.996). + HRM: mostly yes after threshold correction (strict-band within-settled gap ≈0.025), with one + genuine residual: the unsettled stratum retains an outcome gap (AUC 0.818, C n=195). +3. **Relation to published taxonomies** (factual cross-references): + Ren & Liu's four HRM modes map onto our cells; their "non-trivial failure (converged to wrong + fixed point)" = our strict-band B (rare: 0.55% of failures); their "trivial failure (wanders or + oscillates)" = our D (dominant). Efstathiou & Balwani's "failed runs plateau at stable high-loss + attractors" (TRM): by our state-drift criterion, TRM failures are NOT settled (B=0, drift ≥~48/step, + ~0.77× the early-trajectory velocity); their "stable" refers to loss plateaus/bounded regions, + not state convergence. Our drift+λ₁ measurement distinguishes these. + +## Addendum (same day, offline follow-ups — see offline_followups/followups.md) + +**REVISION of headline point 2.** "λ₁'s outcome signal is (almost) entirely settling-regime detection" +was based on comparing raw AUCs and is too strong as a mediation claim. The proper control — +AUC(−λ₁→correct) **within matched late-drift deciles** of the unsettled stratum — shows substantial +independent signal (HRM: per-decile 0.97→0.69 from low to high drift, weighted mean 0.879; +unconditioned within-unsettled 0.933). TRM official @58590 with a strict band τ shows the same +qualitatively: unsettled-correct λ₁ ≈ +0.017 (n=141) vs unsettled-wrong +0.103 (n=64) at overlapping +drift levels. Corrected statement: **λ₁ correlates strongly with settledness, but at matched drift +level it still separates outcome** — drift and λ₁ are not redundant observables. + +**Difficulty control (#givens, crude proxy).** HRM n=8192: Spearman(correct, givens)=+0.28; +Spearman(λ₁, givens)=−0.35 overall but −0.16/−0.18 within outcome. Within-givens-bin +AUC(−λ₁→correct) = 0.976–0.987 (weighted 0.982, vs overall 0.984): at the #givens level, the +FTLE-outcome separation is NOT a difficulty artifact. (Solver-backtrack difficulty not available +offline; #givens is a weak proxy — flag for the writeup.) + +**Strict-B per-example (n=21).** All 21 have halted_at ∈ [4,9] (median 6) — under real ACT inference +every one would have halted early, confidently (q_halt +7.4–7.5), and wrong (token_acc 0.41–0.88). +The three lowest-token-acc cases are all 17-givens (minimum-clue) puzzles. Per-example table in +followups.md; drift profiles indistinguishable from the A band (fig_hrm_strictB_profiles.png). + +**TRM official @58590 note.** 90% of its settled-correct examples are still descending at window end +(slope median −0.147) — unlike singleGPU @260410 whose A-band is flat (~18.5/step). The "settled band" +criterion is checkpoint-specific; cross-checkpoint comparisons must re-derive τ per dataset. + +## Addendum 2 (2026-06-12, n=2048 retest + early-window pairing; npz in retest/) + +Retest ran on GPU 0 (shared, 12h-fallback claim). Four diagnostics, seed 0, idx-paired. + +**1. TRM official @58590 (87.6%), full window, n=2048: B = 0 confirmed.** +254 failures, none settled; the MINIMUM late-drift among wrong examples (log10 1.664 ≈ 46/step) +exceeds the late-drift of 96.5% of correct examples — near-complete distribution separation, +threshold-free. λ₁: wrong +0.103 / correct +0.012, AUC 0.993. Within-unsettled outcome AUC 0.848 +(C n=70) — residual signal confirmed at usable n. + +**2. HRM @26040, full window, joint estimator, n=2048: replicates diag_8k on a second estimator.** +acc 0.526; A/B/C/D = 1020/14/57/957; λ₁(A) −0.152 vs λ₁(D) +0.032 (the email's −0.15/+0.04 scale); +strict-band B n=5 (0.5% of failures) with λ₁ −0.141 ≈ A and q_halt +7.47 (selector-blind, replicated); +unsettled within-decile AUC weighted 0.900 (was 0.879 on diag_8k). + +**3. Early-window (first 4 ACT steps) does NOT forecast eventual success among still-unsolved +examples — and on HRM the dynamical signals point the OTHER way.** +Unconditioned early AUCs are inflated by already-solved-at-4 examples (TRM 69.4% solved@4, +HRM 34.5%). Restricted to not-yet-correct@4: + +| signal @ step 4 | TRM (n=626, 59.4% eventually correct) | HRM (n=1342, 27.6% eventually correct) | +|---|---|---| +| AUC(−λ₁_early → eventual correct) | 0.543 | **0.448** (reversed) | +| AUC(−drift@4 → eventual correct) | 0.492 | **0.312** (reversed: MORE early movement ↔ eventual success, +dir AUC 0.688) | +| AUC(q_halt@4 → eventual correct) | 0.521 | **0.734** | + +Observations: (i) the λ₁/outcome association is concurrent with the trajectory's fate (final window), +not antecedent — early-window λ₁ has no forward predictive power at this granularity; (ii) on HRM the +sign of the early association is inverted: among undecided examples, higher early drift (and +marginally higher early λ₁) accompany eventual success; (iii) the one early signal with real forecast +power is HRM's learned q_halt (0.734) — absent in TRM (0.521); factual architecture note: TRM removed +HRM's Q-learning continue-head (BCE halt only). Window length 4 was chosen to match train-time; other +horizons untested. + +**Consequences for writeup/claims:** "failure ↔ chaos" should be stated as an outcome-concurrent +dynamical signature, not an early predictor; early-exit/reallocation applications are unsupported at +this granularity (and sign-reversed on HRM); the within-stratum independence result (drift-matched +AUC 0.88-0.90) plus the difficulty control (Addendum 1) remain the strongest positive claims. + +## Caveats + +- n(B)=21 and n(C)=11/195 are small; per-cell statements about those cells are low-precision. +- "Settled" is a relative (band) criterion; both A-bands have nonzero characteristic residual velocity. +- exact_correct is evaluated at ACT step 16 under fixed unroll; post-window corruption or recovery + (Ren & Liu's fixed-point violation) is not observable in these arrays. +- λ window = full trajectory (336 sub-updates TRM / 16 segments HRM); the early-window + (length-matched) version of this analysis is NOT covered by existing npz files — the `_short` + diagnose scripts exist (4 ACT steps) but produced no saved npz we could find. Open follow-up. +- diag_8k (HRM) and the HRM evolution series use different estimator normalizations; only + within-file comparisons are reported above. diff --git a/analysis_2x2/analyze_2x2.py b/analysis_2x2/analyze_2x2.py new file mode 100644 index 0000000..315ffc6 --- /dev/null +++ b/analysis_2x2/analyze_2x2.py @@ -0,0 +1,350 @@ +"""2x2 analysis: (terminal convergence) x (answer correctness), per-example FTLE per cell. + +Inputs: existing diagnostic npz files produced by diagnose_hrm*.py / diagnose_trm_joint.py: + drift_zH/drift_zL (N,16) per-ACT-step state displacement norms, + lyap_spec (N,8) full-window joint FTLE spectrum, exact_correct (N,), + token_acc, halted_at, q_halt/q_continue (N,16), idx. + +Convergence metric (measurement choice, reported alongside robustness sweep): + d_late = mean(drift_zH[:, -4:]) (late-trajectory z_H velocity, ACT steps 13-16) + primary threshold tau = Otsu on log10(d_late) pooled per dataset; + sensitivity: tau swept over pooled percentiles 5..95. + +Cells: A = converged & correct, B = converged & wrong, + C = non-converged & correct, D = non-converged & wrong. + +Outputs (in this directory): results_.json, cells_.csv, sweep_.csv, + fig__{drift_hist,lyap_by_cell,scatter,spectrum}.png, evolution_{hrm,trm}.{csv,png}, + results.md (combined human-readable summary). + +Observational only: this script reports counts, distributions and rank statistics; it does +not test mechanisms. +""" +from __future__ import annotations + +import json +from pathlib import Path + +import matplotlib + +matplotlib.use("Agg") +import matplotlib.pyplot as plt +import numpy as np + +HERE = Path(__file__).resolve().parent +FLOSS = HERE.parent + +LATE_K = 4 # ACT steps used for late drift + + +def otsu_threshold(x: np.ndarray, nbins: int = 256) -> float: + h, edges = np.histogram(x, bins=nbins) + h = h.astype(np.float64) + centers = 0.5 * (edges[:-1] + edges[1:]) + w = h.sum() + if w == 0: + return float(np.median(x)) + p = h / w + omega = np.cumsum(p) + mu = np.cumsum(p * centers) + mu_t = mu[-1] + denom = omega * (1.0 - omega) + denom[denom <= 0] = np.nan + sigma_b2 = (mu_t * omega - mu) ** 2 / denom + k = np.nanargmax(sigma_b2) + return float(centers[k]) + + +def auc_rank(score: np.ndarray, label: np.ndarray) -> float: + """AUC of `score` for predicting label==1 (rank-based, ties averaged).""" + pos = score[label == 1] + neg = score[label == 0] + if len(pos) == 0 or len(neg) == 0: + return float("nan") + allv = np.concatenate([pos, neg]) + order = np.argsort(allv, kind="mergesort") + ranks = np.empty_like(order, dtype=np.float64) + ranks[order] = np.arange(1, len(allv) + 1) + # average ranks for ties + sv = allv[order] + i = 0 + while i < len(sv): + j = i + while j + 1 < len(sv) and sv[j + 1] == sv[i]: + j += 1 + if j > i: + ranks[order[i : j + 1]] = ranks[order[i : j + 1]].mean() + i = j + 1 + r_pos = ranks[: len(pos)].sum() + return float((r_pos - len(pos) * (len(pos) + 1) / 2) / (len(pos) * len(neg))) + + +def cell_stats(lyap: np.ndarray, tok: np.ndarray, halted: np.ndarray, mask: np.ndarray) -> dict: + if mask.sum() == 0: + return {"n": 0} + l1 = lyap[mask, 0] + return { + "n": int(mask.sum()), + "lam1_median": float(np.median(l1)), + "lam1_mean": float(l1.mean()), + "lam1_iqr": [float(np.percentile(l1, 25)), float(np.percentile(l1, 75))], + "lam8_median": float(np.median(lyap[mask, -1])), + "spectrum_median": [float(np.median(lyap[mask, i])) for i in range(lyap.shape[1])], + "token_acc_median": float(np.median(tok[mask])), + "halted_at_median": float(np.median(halted[mask])), + } + + +def analyze(npz_path: Path, tag: str, make_figs: bool = True) -> dict: + d = np.load(npz_path) + lyap = d["lyap_spec"].astype(np.float64) + correct = d["exact_correct"].astype(int) + tok = d["token_acc"].astype(np.float64) + halted = d["halted_at"].astype(np.float64) + drift_h = d["drift_zH"].astype(np.float64) + drift_l = d["drift_zL"].astype(np.float64) + + d_late = drift_h[:, -LATE_K:].mean(axis=1) + d_late_l = drift_l[:, -LATE_K:].mean(axis=1) + logd = np.log10(np.clip(d_late, 1e-12, None)) + + tau = otsu_threshold(logd) + conv = logd < tau + + cells = { + "A_conv_correct": (conv) & (correct == 1), + "B_conv_wrong": (conv) & (correct == 0), + "C_nonconv_correct": (~conv) & (correct == 1), + "D_nonconv_wrong": (~conv) & (correct == 0), + } + + res = { + "npz": str(npz_path), + "n": int(len(correct)), + "exact_acc": float(correct.mean()), + "late_drift_def": f"mean(drift_zH[:, -{LATE_K}:])", + "otsu_tau_log10": tau, + "frac_converged": float(conv.mean()), + "cells": {k: cell_stats(lyap, tok, halted, m) for k, m in cells.items()}, + "mixture": { + "wrong_that_converged": float( + cells["B_conv_wrong"].sum() / max((correct == 0).sum(), 1) + ), + "correct_that_nonconverged": float( + cells["C_nonconv_correct"].sum() / max((correct == 1).sum(), 1) + ), + }, + "contrasts": { + "dlam1_correct_minus_wrong_overall": float( + np.median(lyap[correct == 1, 0]) - np.median(lyap[correct == 0, 0]) + ), + "dlam1_within_converged": float( + np.median(lyap[cells["A_conv_correct"], 0]) - np.median(lyap[cells["B_conv_wrong"], 0]) + ) + if cells["B_conv_wrong"].sum() > 0 and cells["A_conv_correct"].sum() > 0 + else float("nan"), + "dlam1_within_nonconverged": float( + np.median(lyap[cells["C_nonconv_correct"], 0]) - np.median(lyap[cells["D_nonconv_wrong"], 0]) + ) + if cells["C_nonconv_correct"].sum() > 0 and cells["D_nonconv_wrong"].sum() > 0 + else float("nan"), + "dlam1_wrong_conv_minus_wrong_nonconv": float( + np.median(lyap[cells["B_conv_wrong"], 0]) - np.median(lyap[cells["D_nonconv_wrong"], 0]) + ) + if cells["B_conv_wrong"].sum() > 0 and cells["D_nonconv_wrong"].sum() > 0 + else float("nan"), + }, + "auc": { + "neg_lam1_predicts_correct_overall": auc_rank(-lyap[:, 0], correct), + "neg_lam1_predicts_correct_within_conv": auc_rank(-lyap[conv, 0], correct[conv]), + "neg_lam1_predicts_correct_within_nonconv": auc_rank(-lyap[~conv, 0], correct[~conv]), + "neg_logdrift_predicts_correct": auc_rank(-logd, correct), + "neg_lam1_predicts_converged": auc_rank(-lyap[:, 0], conv.astype(int)), + }, + } + + # threshold sensitivity sweep + sweep_rows = [] + for pct in range(5, 96, 5): + t = np.percentile(logd, pct) + c = logd < t + row = { + "pct": pct, + "tau": float(t), + "nA": int((c & (correct == 1)).sum()), + "nB": int((c & (correct == 0)).sum()), + "nC": int((~c & (correct == 1)).sum()), + "nD": int((~c & (correct == 0)).sum()), + } + for nm, m in [ + ("lam1_med_B", c & (correct == 0)), + ("lam1_med_D", ~c & (correct == 0)), + ]: + row[nm] = float(np.median(lyap[m, 0])) if m.sum() > 0 else float("nan") + sweep_rows.append(row) + sweep_csv = HERE / f"sweep_{tag}.csv" + with sweep_csv.open("w") as f: + keys = list(sweep_rows[0].keys()) + f.write(",".join(keys) + "\n") + for r in sweep_rows: + f.write(",".join(str(r[k]) for k in keys) + "\n") + + # per-cell csv + with (HERE / f"cells_{tag}.csv").open("w") as f: + f.write("cell,n,lam1_median,lam1_mean,lam1_q25,lam1_q75,lam8_median,token_acc_median,halted_at_median\n") + for k, m in cells.items(): + s = res["cells"][k] + if s["n"] == 0: + f.write(f"{k},0,,,,,,,\n") + continue + f.write( + f"{k},{s['n']},{s['lam1_median']:.6f},{s['lam1_mean']:.6f}," + f"{s['lam1_iqr'][0]:.6f},{s['lam1_iqr'][1]:.6f},{s['lam8_median']:.6f}," + f"{s['token_acc_median']:.4f},{s['halted_at_median']:.1f}\n" + ) + + if make_figs: + colors = {"A_conv_correct": "tab:green", "B_conv_wrong": "tab:orange", + "C_nonconv_correct": "tab:blue", "D_nonconv_wrong": "tab:red"} + + fig, ax = plt.subplots(figsize=(6, 4)) + bins = np.linspace(logd.min(), logd.max(), 60) + ax.hist(logd[correct == 1], bins=bins, alpha=0.55, label=f"correct (n={int(correct.sum())})", color="tab:green") + ax.hist(logd[correct == 0], bins=bins, alpha=0.55, label=f"wrong (n={int((1-correct).sum())})", color="tab:red") + ax.axvline(tau, color="k", ls="--", lw=1, label=f"Otsu tau={tau:.2f}") + ax.set_xlabel("log10 late drift_zH (steps -4:)"); ax.set_ylabel("count") + ax.set_title(f"{tag}: late-drift distribution by correctness"); ax.legend(fontsize=8) + fig.tight_layout(); fig.savefig(HERE / f"fig_{tag}_drift_hist.png", dpi=150); plt.close(fig) + + fig, ax = plt.subplots(figsize=(6.5, 4)) + for i, (k, m) in enumerate(cells.items()): + if m.sum() == 0: + continue + y = lyap[m, 0] + x = np.full(y.shape, i) + (np.random.default_rng(0).uniform(-0.18, 0.18, y.shape)) + ax.plot(x, y, ".", ms=3, alpha=0.35, color=colors[k]) + ax.hlines(np.median(y), i - 0.28, i + 0.28, color=colors[k], lw=2.5) + ax.set_xticks(range(4)); ax.set_xticklabels([f"{k}\n(n={int(m.sum())})" for k, m in cells.items()], fontsize=7) + ax.set_ylabel("lambda_1 (full-window FTLE)"); ax.axhline(0, color="gray", lw=0.6) + ax.set_title(f"{tag}: lambda_1 by 2x2 cell") + fig.tight_layout(); fig.savefig(HERE / f"fig_{tag}_lyap_by_cell.png", dpi=150); plt.close(fig) + + fig, ax = plt.subplots(figsize=(6, 4.5)) + ax.scatter(logd[correct == 1], lyap[correct == 1, 0], s=5, alpha=0.4, c="tab:green", label="correct") + ax.scatter(logd[correct == 0], lyap[correct == 0, 0], s=5, alpha=0.4, c="tab:red", label="wrong") + ax.axvline(tau, color="k", ls="--", lw=1); ax.axhline(0, color="gray", lw=0.6) + ax.set_xlabel("log10 late drift_zH"); ax.set_ylabel("lambda_1") + ax.set_title(f"{tag}: drift vs lambda_1"); ax.legend(fontsize=8) + fig.tight_layout(); fig.savefig(HERE / f"fig_{tag}_scatter.png", dpi=150); plt.close(fig) + + fig, ax = plt.subplots(figsize=(6, 4)) + for k, m in cells.items(): + if m.sum() < 3: + continue + ax.plot(range(1, lyap.shape[1] + 1), [np.median(lyap[m, i]) for i in range(lyap.shape[1])], + "o-", ms=4, label=f"{k} (n={int(m.sum())})", color=colors[k]) + ax.axhline(0, color="gray", lw=0.6) + ax.set_xlabel("exponent index"); ax.set_ylabel("median lambda_i") + ax.set_title(f"{tag}: median FTLE spectrum per cell"); ax.legend(fontsize=7) + fig.tight_layout(); fig.savefig(HERE / f"fig_{tag}_spectrum.png", dpi=150); plt.close(fig) + + # secondary observables + res["aux"] = { + "late_drift_zL_corr_with_zH_log": float(np.corrcoef(logd, np.log10(np.clip(d_late_l, 1e-12, None)))[0, 1]), + "q_halt_final_median_by_cell": { + k: float(np.median(d["q_halt"][m, -1])) if m.sum() > 0 else float("nan") for k, m in cells.items() + }, + } + (HERE / f"results_{tag}.json").write_text(json.dumps(res, indent=2)) + return res + + +def evolution(series: list[tuple[str, Path]], out_tag: str) -> None: + rows = [] + for label, p in series: + if not p.exists(): + continue + d = np.load(p) + lyap = d["lyap_spec"].astype(np.float64) + correct = d["exact_correct"].astype(int) + logd = np.log10(np.clip(d["drift_zH"][:, -LATE_K:].mean(axis=1), 1e-12, None)) + tau = otsu_threshold(logd) + conv = logd < tau + row = dict(step=label, acc=float(correct.mean()), tau=tau, + fA=float(((conv) & (correct == 1)).mean()), fB=float(((conv) & (correct == 0)).mean()), + fC=float(((~conv) & (correct == 1)).mean()), fD=float(((~conv) & (correct == 0)).mean())) + for nm, m in [("l1A", (conv) & (correct == 1)), ("l1B", (conv) & (correct == 0)), + ("l1C", (~conv) & (correct == 1)), ("l1D", (~conv) & (correct == 0))]: + row[nm] = float(np.median(lyap[m, 0])) if m.sum() > 2 else float("nan") + rows.append(row) + if not rows: + return + keys = list(rows[0].keys()) + with (HERE / f"evolution_{out_tag}.csv").open("w") as f: + f.write(",".join(keys) + "\n") + for r in rows: + f.write(",".join(str(r[k]) for k in keys) + "\n") + fig, axes = plt.subplots(1, 2, figsize=(11, 4)) + xs = range(len(rows)) + for nm, c in [("fA", "tab:green"), ("fB", "tab:orange"), ("fC", "tab:blue"), ("fD", "tab:red")]: + axes[0].plot(xs, [r[nm] for r in rows], "o-", label=nm, color=c) + axes[0].set_xticks(list(xs)); axes[0].set_xticklabels([r["step"] for r in rows], rotation=45, fontsize=7) + axes[0].set_ylabel("cell fraction"); axes[0].legend(fontsize=8); axes[0].set_title(f"{out_tag}: cell fractions") + for nm, c in [("l1A", "tab:green"), ("l1B", "tab:orange"), ("l1C", "tab:blue"), ("l1D", "tab:red")]: + axes[1].plot(xs, [r[nm] for r in rows], "o-", label=nm, color=c) + axes[1].axhline(0, color="gray", lw=0.6) + axes[1].set_xticks(list(xs)); axes[1].set_xticklabels([r["step"] for r in rows], rotation=45, fontsize=7) + axes[1].set_ylabel("median lambda_1"); axes[1].legend(fontsize=8); axes[1].set_title(f"{out_tag}: per-cell lambda_1") + fig.tight_layout(); fig.savefig(HERE / f"evolution_{out_tag}.png", dpi=150); plt.close(fig) + + +def main() -> None: + results = {} + primary = [ + ("hrm26040_n8192", FLOSS / "diag_8k.npz"), + ("trm_singleGPU_step260410_n512", FLOSS / "diag_trm_singleGPU_step260410_512.npz"), + ("trm_singleGPU_step130205_n512", FLOSS / "diag_trm_singleGPU_step130205_512.npz"), + ("trm_step13020_n512", FLOSS / "diag_trm_step13020_512.npz"), + ] + for tag, p in primary: + if p.exists(): + results[tag] = analyze(p, tag) + print(f"[done] {tag}") + + evolution( + [(f"{s}", FLOSS / f"diag_hrm_step_{s}_512.npz") for s in + [2604, 5208, 7812, 10416, 13020, 15624, 18228, 20832, 23436, 26040]], + "hrm", + ) + evolution( + [(f"{s}", FLOSS / f"diag_trm_singleGPU_step{s}_512.npz") for s in + [26041, 52082, 78123, 104164, 130205, 156246, 182287, 208328, 234369, 260410]], + "trm", + ) + + # combined human-readable summary + lines = ["# 2x2 analysis (convergence x correctness) — generated " + __import__("datetime").date.today().isoformat(), ""] + for tag, r in results.items(): + lines += [f"## {tag}", f"- npz: `{r['npz']}`, n={r['n']}, exact_acc={r['exact_acc']:.3f}", + f"- late-drift def: {r['late_drift_def']}, Otsu tau(log10)={r['otsu_tau_log10']:.3f}, frac_converged={r['frac_converged']:.3f}", ""] + lines.append("| cell | n | lam1 median | lam1 IQR | token_acc med |") + lines.append("|---|---|---|---|---|") + for k, s in r["cells"].items(): + if s["n"] == 0: + lines.append(f"| {k} | 0 | - | - | - |") + else: + lines.append(f"| {k} | {s['n']} | {s['lam1_median']:+.4f} | [{s['lam1_iqr'][0]:+.4f}, {s['lam1_iqr'][1]:+.4f}] | {s['token_acc_median']:.3f} |") + c = r["contrasts"]; a = r["auc"]; m = r["mixture"] + lines += ["", + f"- mixture: wrong-that-converged = {m['wrong_that_converged']:.3f}; correct-that-nonconverged = {m['correct_that_nonconverged']:.3f}", + f"- dlam1(correct-wrong): overall {c['dlam1_correct_minus_wrong_overall']:+.4f}; within-conv {c['dlam1_within_converged']:+.4f}; within-nonconv {c['dlam1_within_nonconverged']:+.4f}", + f"- dlam1(wrong: conv - nonconv) = {c['dlam1_wrong_conv_minus_wrong_nonconv']:+.4f}", + f"- AUC(-lam1 -> correct): overall {a['neg_lam1_predicts_correct_overall']:.3f}; within-conv {a['neg_lam1_predicts_correct_within_conv']:.3f}; within-nonconv {a['neg_lam1_predicts_correct_within_nonconv']:.3f}", + f"- AUC(-log d_late -> correct) = {a['neg_logdrift_predicts_correct']:.3f}; AUC(-lam1 -> converged) = {a['neg_lam1_predicts_converged']:.3f}", + ""] + (HERE / "results.md").write_text("\n".join(lines)) + print("wrote", HERE / "results.md") + + +if __name__ == "__main__": + main() diff --git a/analysis_2x2/analyze_clv.py b/analysis_2x2/analyze_clv.py new file mode 100644 index 0000000..b4ae12a --- /dev/null +++ b/analysis_2x2/analyze_clv.py @@ -0,0 +1,72 @@ +"""Decisive test for (b): does the leading-CLV GEOMETRY predict correctness beyond +drift + q_halt (which already fully explain the scalar spectrum)? + +For each geometric feature (leading-CLV H-fraction, token participation ratio, answer-token PR, +readout alignment): raw AUC, partial corr with correctness | (drift+q_halt), and the multivariate +held-out ridge-AUC GAIN from adding all CLV features on top of drift+q_halt. +Gain ~ 0 => geometry also reducible (honest pivot to (a)). +Gain > 0 => a non-reducible dynamical-geometry signal (chase further). +""" +from __future__ import annotations +import sys, glob +import numpy as np +rng = np.random.default_rng(0) + + +def resid(y, X): + A = np.concatenate([X, np.ones((len(y), 1))], 1) + b, _, _, _ = np.linalg.lstsq(A, y, rcond=None) + return y - A @ b + + +def pcorr(a, b, Z): + return float(np.corrcoef(resid(a.astype(float), Z), resid(b.astype(float), Z))[0, 1]) + + +def auc(score, y): + pos, neg = score[y == 1], score[y == 0] + if len(pos) == 0 or len(neg) == 0: + return float("nan") + a = np.concatenate([pos, neg]); o = np.argsort(a); r = np.empty(len(a)); r[o] = np.arange(1, len(a) + 1) + return float((r[:len(pos)].sum() - len(pos) * (len(pos) + 1) / 2) / (len(pos) * len(neg))) + + +def ridge_cv_auc(X, y, folds=5, lam=1.0): + n = len(y); idx = rng.permutation(n); Xs = (X - X.mean(0)) / (X.std(0) + 1e-8); aucs = [] + for f in range(folds): + te = idx[f::folds]; tr = np.setdiff1d(idx, te) + A = np.concatenate([Xs[tr], np.ones((len(tr), 1))], 1) + w = np.linalg.solve(A.T @ A + lam * np.eye(A.shape[1]), A.T @ y[tr]) + pr = np.concatenate([Xs[te], np.ones((len(te), 1))], 1) @ w + aucs.append(auc(pr, y[te])) + return float(np.nanmean(aucs)) + + +def go(tag, npz): + d = np.load(npz) + y = d["exact_correct"].astype(float) + dr = np.concatenate([d["drift_zH"], d["drift_zL"]], 1).astype(float) + qh = np.concatenate([d["q_halt"], d["q_continue"]], 1).astype(float) + ctrl = np.concatenate([dr, qh], 1) + feats = {nm: d[nm][:, 0].astype(float) for nm in ["clv_hfrac", "clv_pr", "clv_pr_ans", "clv_readout"]} + clv_all = np.concatenate([d[nm].astype(float) for nm in ["clv_hfrac", "clv_pr", "clv_pr_ans", "clv_readout"]], 1) + print(f"\n=== {tag} (n={len(y)}, acc={y.mean():.3f}) ===") + print(f"{'feature':14s} {'rawAUC':>7} {'partial|drift+qhalt':>20}") + for nm, f in feats.items(): + print(f"{nm:14s} {auc(f, y):>7.3f} {pcorr(f, y, ctrl):>20.3f}") + base = ridge_cv_auc(ctrl, y) + plus = ridge_cv_auc(np.concatenate([ctrl, clv_all], 1), y) + clv_alone = ridge_cv_auc(clv_all, y) + print(f"held-out ridge AUC: CLV-alone={clv_alone:.3f} | drift+qhalt={base:.3f} | +CLV-geom={plus:.3f} GAIN={plus-base:+.3f}") + print(f" -> GAIN ~0 means geometry reduces to convergence+confidence; >0 means non-reducible signal") + + +if __name__ == "__main__": + CLV = "/home/yurenh2/rrm/research/flossing/analysis_2x2/clv" + for tag, pat in [("Sudoku TRM", f"{CLV}/trm_geom_step58590_n512.npz"), + ("Sudoku HRM", f"{CLV}/hrm_geom_step26040_n512.npz")]: + g = glob.glob(pat) + if g: + go(tag, g[0]) + else: + print(f"[pending] {tag}: {pat} not found yet") diff --git a/analysis_2x2/analyze_early_connectivity.py b/analysis_2x2/analyze_early_connectivity.py new file mode 100644 index 0000000..c74023a --- /dev/null +++ b/analysis_2x2/analyze_early_connectivity.py @@ -0,0 +1,77 @@ +"""Pooled broken-vs-complete dynamics across EARLY maze checkpoints (large-n robust version). +Reads maze_earlysave dumps (each has preds, inputs, drift_zH, ans_drift, exact via maze_pred_dump). +Connectivity = success criterion. Tests: do BROKEN (incomplete) predictions have higher latent +drift (more chaotic / non-settling) than CONNECTED (complete) ones? Pools across checkpoints for n. +""" +from __future__ import annotations +from pathlib import Path +from collections import deque +import glob +import numpy as np + +HERE = Path(__file__).resolve().parent +DUMPS = HERE / "maze_followup" +rng = np.random.default_rng(0) + + +def is_connected(inp, pred): + g = inp.reshape(30, 30); pr = pred.reshape(30, 30) + se = np.argwhere((g == 3) | (g == 4)) + if len(se) < 2: + return True + s, e = tuple(se[0]), tuple(se[1]) + ps = set(map(tuple, np.argwhere(pr == 5))) | {s, e} + if any(g[r, c] == 1 for r, c in ps): + return False + seen = {s}; q = deque([s]) + while q: + r, c = q.popleft() + if (r, c) == e: + return True + for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]: + nr, nc = r + dr, c + dc + if 0 <= nr < 30 and 0 <= nc < 30 and (nr, nc) in ps and (nr, nc) not in seen: + seen.add((nr, nc)); q.append((nr, nc)) + return False + + +def auc(score, y): + p, n = score[y == 1], score[y == 0] + if len(p) == 0 or len(n) == 0: + return float("nan") + a = np.concatenate([p, n]); o = np.argsort(a); r = np.empty(len(a)); r[o] = np.arange(1, len(a) + 1) + return float((r[:len(p)].sum() - len(p) * (len(p) + 1) / 2) / (len(p) * len(n))) + + +def main(): + files = sorted(glob.glob(str(DUMPS / "earlydump_step_*.npz")), + key=lambda f: int(f.split("step_")[1].split("_")[0].split(".")[0])) + if not files: + print("[no earlydump files yet]"); return + pool_ld, pool_ad, pool_conn = [], [], [] + print(f"{'ckpt':>10} {'connAcc':>8} {'broken':>7} {'AUC(ldrift)':>11} {'AUC(ansdrift)':>13}") + for f in files: + d = np.load(f) + step = f.split("step_")[1].split(".")[0] + conn = np.array([is_connected(d["inputs"][k], d["preds"][k]) for k in range(len(d["preds"]))]).astype(int) + ld = np.log10(np.clip(d["drift_zH"][:, -4:].mean(1), 1e-12, None)) + ad = d["ans_drift_ans"][:, -4:].mean(1).astype(float) + nb = int((conn == 0).sum()) + a_ld = auc(-ld, conn) if 3 <= nb <= len(conn) - 3 else float("nan") + a_ad = auc(-ad, conn) if 3 <= nb <= len(conn) - 3 else float("nan") + print(f"{step:>10} {conn.mean():>8.3f} {nb:>7} {a_ld:>11.3f} {a_ad:>13.3f}") + pool_ld.append(ld); pool_ad.append(ad); pool_conn.append(conn) + ld = np.concatenate(pool_ld); ad = np.concatenate(pool_ad); conn = np.concatenate(pool_conn) + nb = int((conn == 0).sum()) + a_ld = auc(-ld, conn); a_ad = auc(-ad, conn) + boot = [auc(-ld[i], conn[i]) for i in (rng.integers(0, len(conn), len(conn)) for _ in range(5000))] + boot = [b for b in boot if not np.isnan(b)] + print(f"\nPOOLED: n={len(conn)}, broken={nb}, connectivity-acc={conn.mean():.3f}") + print(f" latent-drift: broken median={np.median(ld[conn==0]):.2f} vs connected={np.median(ld[conn==1]):.2f}") + print(f" AUC(-latent_drift -> connected/complete) = {a_ld:.3f} bootstrap95%CI=[{np.percentile(boot,2.5):.3f}, {np.percentile(boot,97.5):.3f}]") + print(f" AUC(-answer_drift -> connected) = {a_ad:.3f}") + print(f" => robust 'broken/incomplete = more chaotic'? CI excludes 0.5: {np.percentile(boot,2.5) > 0.5}") + + +if __name__ == "__main__": + main() diff --git a/analysis_2x2/analyze_maze_connectivity.py b/analysis_2x2/analyze_maze_connectivity.py new file mode 100644 index 0000000..f2fb01b --- /dev/null +++ b/analysis_2x2/analyze_maze_connectivity.py @@ -0,0 +1,87 @@ +"""Maze with CONNECTIVITY as the success criterion (not exact-match). +Genuine failure = predicted path does NOT connect start->goal (broken/incomplete answer). +Valid alternative paths (connected but != labeled) count as 'complete answer'. + +Joins per-cell preds (my dump, seed 20260616) with the friend's FTLE/drift npz (same seed/idx), +and asks: do BROKEN (disconnected) predictions WANDER while CONNECTED ones SETTLE? +If yes, the dynamical signal tracks answer-COMPLETENESS, and exact-match was the wrong lens for Maze. +""" +from __future__ import annotations +from pathlib import Path +from collections import deque +import glob +import numpy as np + +HERE = Path(__file__).resolve().parent +FU = HERE / "maze_followup" +FRIEND = "/tmp/friend_maze/maze_all_ckpts_lyap" + + +def is_connected(inp, pred): + g = inp.reshape(30, 30); pr = pred.reshape(30, 30) + se = np.argwhere((g == 3) | (g == 4)) + if len(se) < 2: + return True # can't judge -> treat as connected (won't happen) + s, e = tuple(se[0]), tuple(se[1]) + pathset = set(map(tuple, np.argwhere(pr == 5))) | {s, e} + if any(g[r, c] == 1 for r, c in pathset): + return False # crosses wall = invalid + seen = {s}; q = deque([s]) + while q: + r, c = q.popleft() + if (r, c) == e: + return True + for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]: + nr, nc = r + dr, c + dc + if 0 <= nr < 30 and 0 <= nc < 30 and (nr, nc) in pathset and (nr, nc) not in seen: + seen.add((nr, nc)); q.append((nr, nc)) + return False + + +def auc(score, y): + p, n = score[y == 1], score[y == 0] + if len(p) == 0 or len(n) == 0: + return float("nan") + a = np.concatenate([p, n]); o = np.argsort(a); r = np.empty(len(a)); r[o] = np.arange(1, len(a) + 1) + return float((r[:len(p)].sum() - len(p) * (len(p) + 1) / 2) / (len(p) * len(n))) + + +def cohend(a, b): + if len(a) < 2 or len(b) < 2: + return float("nan") + s = np.sqrt(((len(a) - 1) * a.var(ddof=1) + (len(b) - 1) * b.var(ddof=1)) / (len(a) + len(b) - 2)) + return (a.mean() - b.mean()) / s if s > 0 else float("nan") + + +for step in [13020, 52080, 130200]: + pred_f = FU / f"mazepreds_step_{step}_seed20260616.npz" + fr = glob.glob(f"{FRIEND}/maze_step_{step}_*.npz") + if not pred_f.exists() or not fr: + print(f"[pending] step {step}") + continue + P = np.load(pred_f); F = np.load(fr[0]) + common, pi, fi = np.intersect1d(P["idx"], F["idx"], return_indices=True) + preds = P["preds"][pi]; inputs = P["inputs"][pi] + exact = P["exact_correct"][pi].astype(int) + l1 = F["lyap_spec"][fi, 0].astype(float) + late_drift = np.log10(np.clip(F["drift_zH"][fi, -4:].mean(1), 1e-12, None)) + conn = np.array([is_connected(inputs[k], preds[k]) for k in range(len(common))]).astype(int) + nb = int((conn == 0).sum()) + print(f"\n=== step {step} (joined n={len(common)}) ===") + print(f" exact-match acc={exact.mean():.3f} | CONNECTIVITY acc (valid complete path)={conn.mean():.3f} | broken={nb}") + if nb < 3 or nb > len(common) - 3: + print(f" too few broken/connected to condition dynamics (broken={nb})") + continue + # dynamics conditioned on CONNECTIVITY (broken=0 vs connected=1) + print(f" late-drift (settling): connected median={np.median(late_drift[conn==1]):.2f} broken median={np.median(late_drift[conn==0]):.2f}") + print(f" AUC(-late-drift -> connected) = {auc(-late_drift, conn):.3f} Cohen d(broken-conn)={cohend(late_drift[conn==0], late_drift[conn==1]):+.2f}") + print(f" lambda1: connected median={np.median(l1[conn==1]):+.4f} broken median={np.median(l1[conn==0]):+.4f}") + print(f" AUC(-lambda1 -> connected) = {auc(-l1, conn):.3f}") + # compare: does connectivity separate dynamics BETTER than exact-match? + print(f" [vs exact-match] AUC(-late-drift -> exact_correct) = {auc(-late_drift, exact):.3f}, " + f"AUC(-lambda1 -> exact) = {auc(-l1, exact):.3f}") + # within CONNECTED, does exact-match still separate? (should NOT, if dynamics track completeness) + m = conn == 1 + if 0 < exact[m].mean() < 1: + print(f" within CONNECTED (n={m.sum()}): AUC(-late-drift -> exact) = {auc(-late_drift[m], exact[m]):.3f} " + f"(near 0.5 => dynamics track completeness, not correctness)") diff --git a/analysis_2x2/analyze_phase1.py b/analysis_2x2/analyze_phase1.py new file mode 100644 index 0000000..2ffa58d --- /dev/null +++ b/analysis_2x2/analyze_phase1.py @@ -0,0 +1,126 @@ +"""Analyze phase-1 results: E5 horizon sweep, E6 matched-objective pairs, E2 second-run replication. +Outputs: analysis_2x2/phase1/phase1_results.md + figures. +""" +from __future__ import annotations +from pathlib import Path +import matplotlib; matplotlib.use("Agg") +import matplotlib.pyplot as plt +import numpy as np + +HERE = Path(__file__).resolve().parent +P1 = HERE / "phase1" +RETEST = HERE / "retest" + + +def auc_rank(score, label): + score = np.asarray(score, float); label = np.asarray(label, int) + pos, neg = score[label == 1], score[label == 0] + if len(pos) == 0 or len(neg) == 0: + return float("nan") + allv = np.concatenate([pos, neg]); order = np.argsort(allv, kind="mergesort") + ranks = np.empty(len(allv)); ranks[order] = np.arange(1, len(allv) + 1) + sv = allv[order]; i = 0 + while i < len(sv): + j = i + while j + 1 < len(sv) and sv[j + 1] == sv[i]: + j += 1 + if j > i: + ranks[order[i:j + 1]] = ranks[order[i:j + 1]].mean() + i = j + 1 + return float((ranks[:len(pos)].sum() - len(pos) * (len(pos) + 1) / 2) / (len(pos) * len(neg))) + + +def otsu(x, nbins=256): + h, e = np.histogram(x, bins=nbins); h = h.astype(float); c = (e[:-1] + e[1:]) / 2 + p = h / h.sum(); om = np.cumsum(p); mu = np.cumsum(p * c); mt = mu[-1] + den = om * (1 - om); den[den <= 0] = np.nan + return float(c[np.nanargmax((mt * om - mu) ** 2 / den)]) + + +def cells_of(d): + ld = np.log10(np.clip(d["drift_zH"][:, -4:].mean(1), 1e-12, None)) + c = d["exact_correct"].astype(int); tau = otsu(ld); conv = ld < tau + A = conv & (c == 1); B = conv & (c == 0); C = (~conv) & (c == 1); D = (~conv) & (c == 0) + l1 = d["lyap_spec"][:, 0] + return dict(acc=float(c.mean()), tau=tau, + nA=int(A.sum()), nB=int(B.sum()), nC=int(C.sum()), nD=int(D.sum()), + fD=float(D.mean()), l1A=float(np.median(l1[A])) if A.sum() else float("nan"), + l1D=float(np.median(l1[D])) if D.sum() else float("nan")) + + +lines = ["# Phase-1 analysis (E5 horizon sweep, E6 matched-objective, E2 replication)", ""] + +# ---------- E5 horizon sweep ---------- +lines += ["## E5: when does fate become legible? AUC(early signal -> FINAL correct), restricted to not-yet-correct@H", ""] +finals = {"trm": np.load(RETEST / "trm_gbs768_step58590_full_n2048.npz"), + "hrm": np.load(RETEST / "hrm_righteous_step26040_full_n2048.npz")} +# include h=4 from retest short runs +short4 = {"trm": np.load(RETEST / "trm_gbs768_step58590_short_n2048.npz"), + "hrm": np.load(RETEST / "hrm_righteous_step26040_short_n2048.npz")} +sweep = {} +for model in ["trm", "hrm"]: + fin = finals[model]; fi = fin["idx"]; y_final_all = fin["exact_correct"].astype(int) + rows = [] + for H in [2, 4, 6, 8, 10, 12]: + if H == 4: + s = short4[model] + else: + tag = f"{'trm_official58590' if model=='trm' else 'hrm26040'}_h{H}_n2048" + s = np.load(P1 / f"{tag}.npz") + si = s["idx"]; common, fp, sp = np.intersect1d(fi, si, return_indices=True) + yf = y_final_all[fp]; ye = s["exact_correct"].astype(int)[sp] + l1 = s["lyap_spec"][sp, 0]; dr = np.log10(np.clip(s["drift_zH"][sp, -1], 1e-12, None)); q = s["q_halt"][sp, -1] + m = ye == 0 # not yet correct at H + rows.append(dict(H=H, n_und=int(m.sum()), frac_eventual=float(yf[m].mean()) if m.sum() else float("nan"), + auc_l1=auc_rank(-l1[m], yf[m]), auc_drift=auc_rank(-dr[m], yf[m]), auc_q=auc_rank(q[m], yf[m]), + solved_at_H=float(ye.mean()))) + sweep[model] = rows + lines.append(f"### {model.upper()}") + lines.append("| H | solved@H | undecided n | of which eventual✓ | AUC(-λ₁) | AUC(-drift) | AUC(q_halt) |") + lines.append("|---|---|---|---|---|---|---|") + for r in rows: + lines.append(f"| {r['H']} | {r['solved_at_H']:.3f} | {r['n_und']} | {r['frac_eventual']:.3f} | " + f"{r['auc_l1']:.3f} | {r['auc_drift']:.3f} | {r['auc_q']:.3f} |") + lines.append("") + +# E5 figure +fig, axes = plt.subplots(1, 2, figsize=(11, 4)) +for ax, model in zip(axes, ["trm", "hrm"]): + rows = sweep[model]; H = [r["H"] for r in rows] + ax.plot(H, [r["auc_l1"] for r in rows], "o-", label="−λ₁") + ax.plot(H, [r["auc_drift"] for r in rows], "s-", label="−drift") + ax.plot(H, [r["auc_q"] for r in rows], "^-", label="q_halt") + ax.axhline(0.5, color="gray", lw=0.6, ls="--") + ax.set_xlabel("prefix length H (ACT segments)"); ax.set_ylabel("AUC → final correct (undecided@H only)") + ax.set_ylim(0.3, 1.0); ax.set_title(f"{model.upper()}: legibility of fate vs prefix"); ax.legend(fontsize=8) +fig.tight_layout(); fig.savefig(P1 / "fig_E5_horizon_sweep.png", dpi=150); plt.close(fig) + +# ---------- E6 matched-objective ---------- +lines += ["## E6: matched-objective intervention (step9 fixed-unroll runs, n=512)", ""] +for fam, base, mult in [("HRM (E base vs F multi4)", "step9E_hrm", "step9F_hrm"), + ("TRM (G base vs H multi4)", "step9G_trm", "step9H_trm")]: + lines.append(f"### {fam}") + lines.append("| ckpt | base acc | base fD | base λ₁(D) | multi4 acc | multi4 fD | multi4 λ₁(D) |") + lines.append("|---|---|---|---|---|---|---|") + for ck in ["step_12500", "step_25000", "best", "final"]: + bp = P1 / f"{base}_{ck}_n512.npz"; mp = P1 / f"{mult}_{ck}_n512.npz" + if not bp.exists() or not mp.exists(): + lines.append(f"| {ck} | missing | | | | | |"); continue + b = cells_of(np.load(bp)); m = cells_of(np.load(mp)) + lines.append(f"| {ck} | {b['acc']:.3f} | {b['fD']:.3f} | {b['l1D']:+.4f} | " + f"{m['acc']:.3f} | {m['fD']:.3f} | {m['l1D']:+.4f} |") + lines.append("") + +# ---------- E2 second-run replication ---------- +lines += ["## E2: decomposition on a SECOND HRM training run (step9_E fixed-unroll, n=2048)", ""] +lines.append("| ckpt | acc | A | B | C | D | λ₁(A) | λ₁(D) | settled-wrong frac of failures |") +lines.append("|---|---|---|---|---|---|---|---|---|") +for ck in ["best", "final"]: + d = np.load(P1 / f"step9E_hrm_{ck}_full_n2048.npz"); cc = cells_of(d) + nfail = cc["nB"] + cc["nD"] + lines.append(f"| {ck} | {cc['acc']:.3f} | {cc['nA']} | {cc['nB']} | {cc['nC']} | {cc['nD']} | " + f"{cc['l1A']:+.4f} | {cc['l1D']:+.4f} | {cc['nB']/max(nfail,1):.4f} |") + +(P1 / "phase1_results.md").write_text("\n".join(lines)) +print("\n".join(lines)) +print("\nwrote", P1 / "phase1_results.md", "and fig_E5_horizon_sweep.png") diff --git a/analysis_2x2/analyze_solution_space.py b/analysis_2x2/analyze_solution_space.py new file mode 100644 index 0000000..a1cd6d0 --- /dev/null +++ b/analysis_2x2/analyze_solution_space.py @@ -0,0 +1,95 @@ +"""Solution-space settling test (user's hypothesis): is the weak Maze separation an artifact of +analyzing the FULL latent space (88% trivial copy) instead of the SOLUTION/output space? + +Measures per-step decoded-ANSWER Hamming drift over solution-space cells (label!=input) and asks: +(1) does the DECODED ANSWER settle (stop changing) differently for success vs failure? +(2) does solution-space settling separate outcome where full-latent drift did not (Maze)? +(3) Sudoku control: full-latent DID separate; does solution-space too? +Plus per-cell failure structure for Maze: are error cells a connected detour or scattered? +""" +from __future__ import annotations +from pathlib import Path +import numpy as np + +HERE = Path(__file__).resolve().parent +FU = HERE / "maze_followup" + + +def auc(s, y): + p, n = s[y == 1], s[y == 0] + if len(p) == 0 or len(n) == 0: + return float("nan") + a = np.concatenate([p, n]); o = np.argsort(a); r = np.empty(len(a)); r[o] = np.arange(1, len(a) + 1) + return float((r[:len(p)].sum() - len(p) * (len(p) + 1) / 2) / (len(p) * len(n))) + + +def cohend(a, b): + s = np.sqrt(((len(a) - 1) * a.var(ddof=1) + (len(b) - 1) * b.var(ddof=1)) / max(len(a) + len(b) - 2, 1)) + return (a.mean() - b.mean()) / s if s > 0 else float("nan") + + +def connected_components(mask2d): + # 4-connectivity component count of True cells (small grids, simple flood fill) + seen = np.zeros_like(mask2d, bool); comps = 0; sizes = [] + H, W = mask2d.shape + for i in range(H): + for j in range(W): + if mask2d[i, j] and not seen[i, j]: + comps += 1; stack = [(i, j)]; seen[i, j] = True; sz = 0 + while stack: + r, c = stack.pop(); sz += 1 + for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]: + rr, cc = r + dr, c + dc + if 0 <= rr < H and 0 <= cc < W and mask2d[rr, cc] and not seen[rr, cc]: + seen[rr, cc] = True; stack.append((rr, cc)) + sizes.append(sz) + return comps, sizes + + +def analyze(tag, npz, grid=None): + d = np.load(npz) + y = d["exact_correct"].astype(int) + ad = d["ans_drift_ans"].astype(float) # (N, steps) decoded-answer drift over solution cells + late = ad[:, -4:].mean(1) # late answer-drift = solution-space "still changing" + print(f"\n=== {tag} (n={len(y)}, acc={y.mean():.3f}) — SOLUTION-SPACE settling ===") + print(f" late answer-drift (cells/step changing) | success median={np.median(late[y==1]):.2f} " + f"failure median={np.median(late[y==0]):.2f}") + print(f" AUC(-late answer-drift -> correct) = {auc(-late, y):.3f} Cohen d(fail-succ)={cohend(late[y==0],late[y==1]):+.2f}") + settled = late < 0.5 + print(f" fraction with SETTLED answer (drift<0.5 cells/step): success={settled[y==1].mean():.3f} failure={settled[y==0].mean():.3f}") + # full-grid for reference + adf = d["ans_drift_full"][:, -4:].mean(1).astype(float) + print(f" [ref] AUC(-late FULL-grid answer-drift -> correct) = {auc(-adf, y):.3f}") + + if grid is not None: + # per-cell failure structure: are error cells connected (detour) or scattered? + preds = d["preds"]; labels = d["labels"]; inputs = d["inputs"] + fail = np.where(y == 0)[0] + comps, errs = [], [] + for k in fail: + err = (preds[k] != labels[k]).reshape(grid) + ne = int(err.sum()) + if ne == 0: + continue + c, sizes = connected_components(err) + comps.append(c); errs.append(ne) + comps = np.array(comps); errs = np.array(errs) + if len(comps): + print(f" per-failure error cells: median={int(np.median(errs))}; connected components: median={int(np.median(comps))}; " + f"largest-component fraction median={np.median([1]):.2f}") + frac_one = (comps <= 2).mean() + print(f" -> {frac_one:.2f} of failures have <=2 error components (connected detour); " + f"{(comps>=5).mean():.2f} have >=5 (scattered)") + + +if __name__ == "__main__": + mz = FU / "maze_preds_step130200.npz" + sd = FU / "sudoku_preds_step58590.npz" + if mz.exists(): + analyze("MAZE", mz, grid=(30, 30)) + else: + print("[pending] maze dump") + if sd.exists(): + analyze("SUDOKU (control)", sd, grid=(9, 9)) + else: + print("[pending] sudoku dump") diff --git a/analysis_2x2/cells_hrm26040_joint_n2048.csv b/analysis_2x2/cells_hrm26040_joint_n2048.csv new file mode 100644 index 0000000..9ee8208 --- /dev/null +++ b/analysis_2x2/cells_hrm26040_joint_n2048.csv @@ -0,0 +1,5 @@ +cell,n,lam1_median,lam1_mean,lam1_q25,lam1_q75,lam8_median,token_acc_median,halted_at_median +A_conv_correct,1020,-0.152252,-0.149175,-0.177945,-0.123537,-0.229215,1.0000,4.0 +B_conv_wrong,14,-0.035939,-0.045284,-0.100539,0.023329,-0.091743,0.6420,0.0 +C_nonconv_correct,57,-0.044267,-0.039474,-0.073953,-0.009610,-0.115885,1.0000,14.0 +D_nonconv_wrong,957,0.031881,0.037504,0.001404,0.070978,-0.064309,0.6296,0.0 diff --git a/analysis_2x2/cells_hrm26040_n8192.csv b/analysis_2x2/cells_hrm26040_n8192.csv new file mode 100644 index 0000000..7d2c776 --- /dev/null +++ b/analysis_2x2/cells_hrm26040_n8192.csv @@ -0,0 +1,5 @@ +cell,n,lam1_median,lam1_mean,lam1_q25,lam1_q75,lam8_median,token_acc_median,halted_at_median +A_conv_correct,4103,-0.861743,-0.856474,-0.902015,-0.815340,-0.974624,1.0000,4.0 +B_conv_wrong,63,-0.591228,-0.631761,-0.809065,-0.506116,-0.737965,0.6296,0.0 +C_nonconv_correct,195,-0.694297,-0.685632,-0.735417,-0.646644,-0.788653,1.0000,14.0 +D_nonconv_wrong,3831,-0.599770,-0.597011,-0.647486,-0.548848,-0.714284,0.6296,0.0 diff --git a/analysis_2x2/cells_trm_official58590_n2048.csv b/analysis_2x2/cells_trm_official58590_n2048.csv new file mode 100644 index 0000000..d805cf9 --- /dev/null +++ b/analysis_2x2/cells_trm_official58590_n2048.csv @@ -0,0 +1,5 @@ +cell,n,lam1_median,lam1_mean,lam1_q25,lam1_q75,lam8_median,token_acc_median,halted_at_median +A_conv_correct,1724,0.011282,0.015547,0.008467,0.016524,0.002090,1.0000,2.0 +B_conv_wrong,0,,,,,,, +C_nonconv_correct,70,0.081975,0.077830,0.068631,0.090418,0.008649,1.0000,14.0 +D_nonconv_wrong,254,0.102872,0.102872,0.092413,0.112425,0.018793,0.6296,0.0 diff --git a/analysis_2x2/cells_trm_official_gbs768_step58590_n512.csv b/analysis_2x2/cells_trm_official_gbs768_step58590_n512.csv new file mode 100644 index 0000000..2ff9465 --- /dev/null +++ b/analysis_2x2/cells_trm_official_gbs768_step58590_n512.csv @@ -0,0 +1,5 @@ +cell,n,lam1_median,lam1_mean,lam1_q25,lam1_q75,lam8_median,token_acc_median,halted_at_median +A_conv_correct,434,0.011053,0.015590,0.008054,0.017474,0.002147,1.0000,2.0 +B_conv_wrong,0,,,,,,, +C_nonconv_correct,14,0.082749,0.080288,0.070048,0.095862,0.006181,1.0000,14.0 +D_nonconv_wrong,64,0.103398,0.102595,0.092417,0.111892,0.021354,0.6296,0.0 diff --git a/analysis_2x2/cells_trm_singleGPU_step130205_n512.csv b/analysis_2x2/cells_trm_singleGPU_step130205_n512.csv new file mode 100644 index 0000000..1e9aec5 --- /dev/null +++ b/analysis_2x2/cells_trm_singleGPU_step130205_n512.csv @@ -0,0 +1,5 @@ +cell,n,lam1_median,lam1_mean,lam1_q25,lam1_q75,lam8_median,token_acc_median,halted_at_median +A_conv_correct,365,0.004312,0.009192,-0.000673,0.014061,-0.008585,1.0000,3.0 +B_conv_wrong,0,,,,,,, +C_nonconv_correct,22,0.085199,0.084005,0.069379,0.098772,0.015529,1.0000,15.0 +D_nonconv_wrong,125,0.106698,0.106791,0.099522,0.112582,0.032808,0.6296,0.0 diff --git a/analysis_2x2/cells_trm_singleGPU_step260410_n512.csv b/analysis_2x2/cells_trm_singleGPU_step260410_n512.csv new file mode 100644 index 0000000..4d11774 --- /dev/null +++ b/analysis_2x2/cells_trm_singleGPU_step260410_n512.csv @@ -0,0 +1,5 @@ +cell,n,lam1_median,lam1_mean,lam1_q25,lam1_q75,lam8_median,token_acc_median,halted_at_median +A_conv_correct,383,0.004744,0.010418,0.000744,0.013660,-0.004220,1.0000,2.0 +B_conv_wrong,0,,,,,,, +C_nonconv_correct,11,0.099828,0.089867,0.090672,0.103510,0.013320,1.0000,15.0 +D_nonconv_wrong,118,0.102321,0.102882,0.093520,0.111216,0.021455,0.6420,0.0 diff --git a/analysis_2x2/cells_trm_step13020_n512.csv b/analysis_2x2/cells_trm_step13020_n512.csv new file mode 100644 index 0000000..d3d1ff0 --- /dev/null +++ b/analysis_2x2/cells_trm_step13020_n512.csv @@ -0,0 +1,5 @@ +cell,n,lam1_median,lam1_mean,lam1_q25,lam1_q75,lam8_median,token_acc_median,halted_at_median +A_conv_correct,296,-0.004330,-0.002231,-0.011287,0.005197,-0.032494,1.0000,3.0 +B_conv_wrong,10,-0.002707,-0.003217,-0.008984,0.000914,-0.024164,0.5556,0.0 +C_nonconv_correct,9,0.041901,0.041647,0.032285,0.047971,-0.018014,1.0000,14.0 +D_nonconv_wrong,197,0.031663,0.030858,0.014350,0.046172,-0.012672,0.6420,0.0 diff --git a/analysis_2x2/characterization/characterization_results.md b/analysis_2x2/characterization/characterization_results.md new file mode 100644 index 0000000..2e604aa --- /dev/null +++ b/analysis_2x2/characterization/characterization_results.md @@ -0,0 +1,70 @@ +# Characterization of the FTLE success/failure separation + +Describes the separation (shape, spectrum, time-scaling, effect size). Mechanism = open. + +## Char 1 — full spectrum vs lambda1 (one mode or whole-spectrum shift?) + +### TRM official @58590 (n=2048) +- per-exponent AUC(-λ_i→correct), i=1..8: 0.993, 0.994, 0.994, 0.993, 0.992, 0.987, 0.984, 0.977 +- median spectrum | success: +0.011, +0.009, +0.007, +0.006, +0.005, +0.004, +0.003, +0.002 +- median spectrum | failure: +0.103, +0.074, +0.057, +0.046, +0.036, +0.029, +0.024, +0.019 +- AUC λ1=0.993 | AUC spectral-mean=0.995 | AUC KS-proxy(Σλ⁺)=0.995 +- shift uniformity: success−failure median gap per exponent: -0.091, -0.065, -0.050, -0.040, -0.031, -0.026, -0.021, -0.017 + +### HRM joint @26040 (n=2048) +- per-exponent AUC(-λ_i→correct), i=1..8: 0.987, 0.990, 0.989, 0.990, 0.991, 0.992, 0.992, 0.993 +- median spectrum | success: -0.150, -0.163, -0.182, -0.191, -0.203, -0.212, -0.220, -0.227 +- median spectrum | failure: +0.031, +0.002, -0.018, -0.030, -0.041, -0.050, -0.056, -0.064 +- AUC λ1=0.987 | AUC spectral-mean=0.992 | AUC KS-proxy(Σλ⁺)=0.877 +- shift uniformity: success−failure median gap per exponent: -0.181, -0.165, -0.164, -0.161, -0.161, -0.162, -0.163, -0.163 + +### HRM @26040 (n=8192, est-2) +- per-exponent AUC(-λ_i→correct), i=1..8: 0.984, 0.989, 0.990, 0.991, 0.991, 0.991, 0.991, 0.991 +- median spectrum | success: -0.858, -0.887, -0.908, -0.927, -0.941, -0.952, -0.964, -0.972 +- median spectrum | failure: -0.600, -0.635, -0.656, -0.671, -0.684, -0.696, -0.706, -0.714 +- AUC λ1=0.984 | AUC spectral-mean=0.991 | AUC KS-proxy(Σλ⁺)=0.500 +- shift uniformity: success−failure median gap per exponent: -0.259, -0.252, -0.252, -0.255, -0.256, -0.257, -0.258, -0.258 + +## Char 2 — bimodality vs threshold-on-continuum + +BC>0.555 hints bimodal. outcome-rate-vs-λ1 sharpness: width of 25→75% outcome transition, +in λ1 units, divided by the 10–90 inter-percentile spread of λ1 (small = sharp threshold). + +- TRM official @58590 (n=2048): BC(all)=0.855, BC(succ)=0.780, BC(fail)=0.303; outcome transition width/spread=0.298 (λ1@75%=+0.068, @25%=+0.094) +- HRM joint @26040 (n=2048): BC(all)=0.555, BC(succ)=0.331, BC(fail)=0.287; outcome transition width/spread=0.124 (λ1@75%=-0.069, @25%=-0.037) +- HRM @26040 (n=8192, est-2): BC(all)=0.554, BC(succ)=0.401, BC(fail)=0.258; outcome transition width/spread=0.150 (λ1@75%=-0.749, @25%=-0.693) + +## Char 3 — separation magnitude vs integration window H (all examples, final outcome) + +Distinct from E5 (which restricted to undecided@H for *prediction*). Here: how does the +two-class λ1 separation BUILD as the integration window grows? + +### TRM: H, AUC(-λ1→final correct), Cohen's d(fail−succ) +- H= 2: AUC=0.782, d=+1.059 +- H= 4: AUC=0.891, d=+1.759 +- H= 6: AUC=0.929, d=+2.250 +- H= 8: AUC=0.947, d=+2.680 +- H=10: AUC=0.964, d=+3.118 +- H=12: AUC=0.975, d=+3.577 +- H=16: AUC=0.993, d=+4.840 + +### HRM: H, AUC(-λ1→final correct), Cohen's d(fail−succ) +- H= 2: AUC=0.428, d=+0.028 +- H= 4: AUC=0.728, d=+0.932 +- H= 6: AUC=0.871, d=+1.691 +- H= 8: AUC=0.918, d=+2.154 +- H=10: AUC=0.940, d=+2.497 +- H=12: AUC=0.960, d=+2.821 +- H=16: AUC=0.987, d=+3.448 + +## Char 4 — effect size / overlap beyond AUC (full-window λ1 and KS-proxy) + +- TRM official @58590 (n=2048): + λ1: AUC=0.993, Cohen's d(fail−succ)=+4.84, Bhattacharyya(gauss)=0.042, hist-overlap=0.049 + KS-proxy: AUC=0.995, Cohen's d=+5.55, hist-overlap=0.039 +- HRM joint @26040 (n=2048): + λ1: AUC=0.987, Cohen's d(fail−succ)=+3.45, Bhattacharyya(gauss)=0.227, hist-overlap=0.074 + KS-proxy: AUC=0.877, Cohen's d=+1.14, hist-overlap=0.330 +- HRM @26040 (n=8192, est-2): + λ1: AUC=0.984, Cohen's d(fail−succ)=+3.34, Bhattacharyya(gauss)=0.248, hist-overlap=0.101 + KS-proxy: AUC=0.500, Cohen's d=+nan, hist-overlap=1.000 diff --git a/analysis_2x2/characterize_separation.py b/analysis_2x2/characterize_separation.py new file mode 100644 index 0000000..3677cf8 --- /dev/null +++ b/analysis_2x2/characterize_separation.py @@ -0,0 +1,182 @@ +"""Characterization of the success/failure FTLE separation (describe WHAT, not WHY). +Char 1: full-spectrum vs lambda1 — is it one mode or a whole-spectrum shift? +Char 2: bimodality vs threshold-on-continuum. +Char 3: separation magnitude vs integration window (uses phase-1 horizon npz). +Char 4: effect size / overlap beyond AUC. +All offline, existing npz. numpy-only. Mechanism is NOT addressed here by design. +""" +from __future__ import annotations +from pathlib import Path +import matplotlib; matplotlib.use("Agg") +import matplotlib.pyplot as plt +import numpy as np + +HERE = Path(__file__).resolve().parent +FLOSS = HERE.parent +P1 = HERE / "phase1"; RETEST = HERE / "retest" +OUT = HERE / "characterization"; OUT.mkdir(exist_ok=True) + + +def auc(score, label): + score = np.asarray(score, float); label = np.asarray(label, int) + pos, neg = score[label == 1], score[label == 0] + if len(pos) == 0 or len(neg) == 0: + return float("nan") + a = np.concatenate([pos, neg]); o = np.argsort(a, kind="mergesort") + r = np.empty(len(a)); r[o] = np.arange(1, len(a) + 1) + s = a[o]; i = 0 + while i < len(s): + j = i + while j + 1 < len(s) and s[j + 1] == s[i]: + j += 1 + if j > i: + r[o[i:j + 1]] = r[o[i:j + 1]].mean() + i = j + 1 + return float((r[:len(pos)].sum() - len(pos) * (len(pos) + 1) / 2) / (len(pos) * len(neg))) + + +def cohens_d(a, b): + a, b = np.asarray(a, float), np.asarray(b, float) + na, nb = len(a), len(b) + sp = np.sqrt(((na - 1) * a.var(ddof=1) + (nb - 1) * b.var(ddof=1)) / (na + nb - 2)) + return float((a.mean() - b.mean()) / sp) if sp > 0 else float("nan") + + +def bhattacharyya_gauss(a, b): + a, b = np.asarray(a, float), np.asarray(b, float) + m1, m2, v1, v2 = a.mean(), b.mean(), a.var() + 1e-12, b.var() + 1e-12 + bd = 0.25 * np.log(0.25 * (v1 / v2 + v2 / v1 + 2)) + 0.25 * (m1 - m2) ** 2 / (v1 + v2) + return float(np.exp(-bd)) # 1=identical, 0=disjoint + + +def hist_overlap(a, b, bins=60): + lo, hi = min(a.min(), b.min()), max(a.max(), b.max()) + e = np.linspace(lo, hi, bins + 1) + ha = np.histogram(a, e)[0] / len(a); hb = np.histogram(b, e)[0] / len(b) + return float(np.minimum(ha, hb).sum()) + + +def bimodality_coeff(x): + x = np.asarray(x, float); n = len(x) + m = x.mean(); s = x.std() + if s == 0: + return float("nan") + g = np.mean(((x - m) / s) ** 3) # skewness + k = np.mean(((x - m) / s) ** 4) - 3.0 # excess kurtosis + bc = (g ** 2 + 1) / (k + 3 * (n - 1) ** 2 / ((n - 2) * (n - 3))) + return float(bc) # >0.555 suggests bimodality (uniform=0.555) + + +HEAD = { + "TRM official @58590 (n=2048)": RETEST / "trm_gbs768_step58590_full_n2048.npz", + "HRM joint @26040 (n=2048)": RETEST / "hrm_righteous_step26040_full_n2048.npz", + "HRM @26040 (n=8192, est-2)": FLOSS / "diag_8k.npz", +} + +lines = ["# Characterization of the FTLE success/failure separation", "", + "Describes the separation (shape, spectrum, time-scaling, effect size). Mechanism = open.", ""] + +# ---------- Char 1: full spectrum ---------- +lines += ["## Char 1 — full spectrum vs lambda1 (one mode or whole-spectrum shift?)", ""] +for name, p in HEAD.items(): + d = np.load(p); ly = d["lyap_spec"].astype(float); c = d["exact_correct"].astype(int) + k = ly.shape[1] + per = [auc(-ly[:, i], c) for i in range(k)] + ks_proxy = np.clip(ly, 0, None).sum(1) # sum of positive exponents (KS-entropy proxy) + spec_mean = ly.mean(1) + lines += [f"### {name}", + f"- per-exponent AUC(-λ_i→correct), i=1..{k}: " + ", ".join(f"{v:.3f}" for v in per), + f"- median spectrum | success: " + ", ".join(f"{np.median(ly[c==1,i]):+.3f}" for i in range(k)), + f"- median spectrum | failure: " + ", ".join(f"{np.median(ly[c==0,i]):+.3f}" for i in range(k)), + f"- AUC λ1={per[0]:.3f} | AUC spectral-mean={auc(-spec_mean,c):.3f} | " + f"AUC KS-proxy(Σλ⁺)={auc(-ks_proxy,c):.3f}", + f"- shift uniformity: success−failure median gap per exponent: " + + ", ".join(f"{np.median(ly[c==1,i])-np.median(ly[c==0,i]):+.3f}" for i in range(k)), ""] + # spectrum figure + fig, ax = plt.subplots(figsize=(6, 4)) + xs = np.arange(1, k + 1) + ax.plot(xs, [np.median(ly[c == 1, i]) for i in range(k)], "o-", color="tab:green", label="success (median)") + ax.plot(xs, [np.median(ly[c == 0, i]) for i in range(k)], "s-", color="tab:red", label="failure (median)") + ax.fill_between(xs, [np.percentile(ly[c==1,i],25) for i in range(k)], [np.percentile(ly[c==1,i],75) for i in range(k)], color="tab:green", alpha=0.15) + ax.fill_between(xs, [np.percentile(ly[c==0,i],25) for i in range(k)], [np.percentile(ly[c==0,i],75) for i in range(k)], color="tab:red", alpha=0.15) + ax.axhline(0, color="gray", lw=0.6); ax.set_xlabel("exponent index"); ax.set_ylabel("λ_i") + ax.set_title(f"{name}: spectrum by outcome"); ax.legend(fontsize=8) + fig.tight_layout(); fig.savefig(OUT / f"spectrum_{name.split()[0]}_{'n8192' if '8192' in name else 'n2048'}.png", dpi=140); plt.close(fig) + +# ---------- Char 2: bimodality vs threshold ---------- +lines += ["## Char 2 — bimodality vs threshold-on-continuum", "", + "BC>0.555 hints bimodal. outcome-rate-vs-λ1 sharpness: width of 25→75% outcome transition,", + "in λ1 units, divided by the 10–90 inter-percentile spread of λ1 (small = sharp threshold).", ""] +for name, p in HEAD.items(): + d = np.load(p); l1 = d["lyap_spec"][:, 0].astype(float); c = d["exact_correct"].astype(int) + bc_all = bimodality_coeff(l1); bc_s = bimodality_coeff(l1[c == 1]); bc_f = bimodality_coeff(l1[c == 0]) + # outcome rate vs l1 (sorted, sliding) + order = np.argsort(l1); ls = l1[order]; cs = c[order] + win = max(50, len(l1) // 40); rate = np.convolve(cs, np.ones(win) / win, mode="valid") + lcent = ls[win // 2: win // 2 + len(rate)] + # find l1 where rate crosses 0.25 and 0.75 (rate decreases with l1) + def cross(target): + idx = np.where(np.diff((rate >= target).astype(int)) != 0)[0] + return float(lcent[idx[0]]) if len(idx) else float("nan") + l25, l75 = cross(0.25), cross(0.75) + spread = np.percentile(l1, 90) - np.percentile(l1, 10) + sharp = abs(l25 - l75) / spread if spread > 0 and np.isfinite(l25) and np.isfinite(l75) else float("nan") + lines += [f"- {name}: BC(all)={bc_all:.3f}, BC(succ)={bc_s:.3f}, BC(fail)={bc_f:.3f}; " + f"outcome transition width/spread={sharp:.3f} (λ1@75%={l75:+.3f}, @25%={l25:+.3f})"] + fig, ax = plt.subplots(1, 2, figsize=(10, 3.6)) + ax[0].hist(l1[c == 1], bins=50, alpha=0.55, color="tab:green", label="success", density=True) + ax[0].hist(l1[c == 0], bins=50, alpha=0.55, color="tab:red", label="failure", density=True) + ax[0].set_xlabel("λ1"); ax[0].set_ylabel("density"); ax[0].legend(fontsize=8); ax[0].set_title(f"{name}: λ1 by outcome") + ax[1].plot(lcent, rate, "-", color="tab:blue"); ax[1].axhline(0.5, color="gray", lw=0.5) + ax[1].set_xlabel("λ1"); ax[1].set_ylabel("P(correct)"); ax[1].set_title("outcome rate vs λ1") + fig.tight_layout(); fig.savefig(OUT / f"bimodal_{name.split()[0]}_{'n8192' if '8192' in name else 'n2048'}.png", dpi=140); plt.close(fig) +lines.append("") + +# ---------- Char 3: separation vs integration window ---------- +lines += ["## Char 3 — separation magnitude vs integration window H (all examples, final outcome)", "", + "Distinct from E5 (which restricted to undecided@H for *prediction*). Here: how does the", + "two-class λ1 separation BUILD as the integration window grows?", ""] +finals = {"trm": np.load(RETEST / "trm_gbs768_step58590_full_n2048.npz"), + "hrm": np.load(RETEST / "hrm_righteous_step26040_full_n2048.npz")} +short4 = {"trm": RETEST / "trm_gbs768_step58590_short_n2048.npz", + "hrm": RETEST / "hrm_righteous_step26040_short_n2048.npz"} +for model in ["trm", "hrm"]: + fin = finals[model]; fi = fin["idx"]; yf_all = fin["exact_correct"].astype(int) + rows = [] + for H in [2, 4, 6, 8, 10, 12, 16]: + if H == 16: + s = fin + elif H == 4: + s = np.load(short4[model]) + else: + s = np.load(P1 / f"{'trm_official58590' if model=='trm' else 'hrm26040'}_h{H}_n2048.npz") + si = s["idx"]; common, fp, sp = np.intersect1d(fi, si, return_indices=True) + yf = yf_all[fp]; l1 = s["lyap_spec"][sp, 0].astype(float) + rows.append((H, auc(-l1, yf), cohens_d(l1[yf == 0], l1[yf == 1]))) + lines.append(f"### {model.upper()}: H, AUC(-λ1→final correct), Cohen's d(fail−succ)") + for H, a, dd in rows: + lines.append(f"- H={H:2d}: AUC={a:.3f}, d={dd:+.3f}") + lines.append("") + fig, ax = plt.subplots(figsize=(6, 4)); H = [r[0] for r in rows] + ax.plot(H, [r[1] for r in rows], "o-", label="AUC(-λ1→correct)") + ax2 = ax.twinx(); ax2.plot(H, [abs(r[2]) for r in rows], "s--", color="tab:purple", label="|Cohen's d|") + ax.axhline(0.5, color="gray", lw=0.5); ax.set_xlabel("integration window H (segments)") + ax.set_ylabel("AUC"); ax2.set_ylabel("|Cohen's d|"); ax.set_title(f"{model.upper()}: λ1 separation vs window") + ax.set_ylim(0.4, 1.0); fig.tight_layout(); fig.savefig(OUT / f"sep_vs_window_{model}.png", dpi=140); plt.close(fig) + +# ---------- Char 4: effect size ---------- +lines += ["## Char 4 — effect size / overlap beyond AUC (full-window λ1 and KS-proxy)", ""] +for name, p in HEAD.items(): + d = np.load(p); l1 = d["lyap_spec"][:, 0].astype(float); c = d["exact_correct"].astype(int) + ksp = np.clip(d["lyap_spec"].astype(float), 0, None).sum(1) + lines += [f"- {name}:", + f" λ1: AUC={auc(-l1,c):.3f}, Cohen's d(fail−succ)={cohens_d(l1[c==0],l1[c==1]):+.2f}, " + f"Bhattacharyya(gauss)={bhattacharyya_gauss(l1[c==0],l1[c==1]):.3f}, " + f"hist-overlap={hist_overlap(l1[c==0],l1[c==1]):.3f}", + f" KS-proxy: AUC={auc(-ksp,c):.3f}, Cohen's d={cohens_d(ksp[c==0],ksp[c==1]):+.2f}, " + f"hist-overlap={hist_overlap(ksp[c==0],ksp[c==1]):.3f}"] +lines.append("") + +(OUT / "characterization_results.md").write_text("\n".join(lines)) +print("\n".join(lines)) +print("\nfigures + md in", OUT) diff --git a/analysis_2x2/dump_early_ckpts.sh b/analysis_2x2/dump_early_ckpts.sh new file mode 100755 index 0000000..1dfb722 --- /dev/null +++ b/analysis_2x2/dump_early_ckpts.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# Run after the early-save training: cheap forward dumps (connectivity + drift_zH + ans_drift, +# NO JVP) on all saved early checkpoints, then pooled broken-vs-complete analysis. +set -o pipefail +cd /home/yurenh2/rrm/research/flossing +source /home/yurenh2/miniconda3/etc/profile.d/conda.sh +conda activate rrm +export CUDA_VISIBLE_DEVICES="${1:-1}" +CK=/home/yurenh2/rrm/trm/checkpoints/Maze-30x30-hard-1k-ACT-torch/maze_earlysave_freshTRM +DATA=/home/yurenh2/rrm/data/maze-30x30-hard-1k +OUT=analysis_2x2/maze_followup +for f in "$CK"/step_*; do + s=$(basename "$f") + [[ -f "$OUT/earlydump_${s}.npz" ]] && { echo "skip $s"; continue; } + echo "=== dump $s ===" + python maze_pred_dump.py --ckpt-root "$CK" --ckpt-name "$s" --data "$DATA" \ + --n 512 --seed 20260616 --out "$OUT/earlydump_${s}.npz" 2>&1 | tail -1 +done +echo "=== pooled analysis ===" +python analysis_2x2/analyze_early_connectivity.py diff --git a/analysis_2x2/early_pairing_hrm26040_joint.md b/analysis_2x2/early_pairing_hrm26040_joint.md new file mode 100644 index 0000000..8d90e02 --- /dev/null +++ b/analysis_2x2/early_pairing_hrm26040_joint.md @@ -0,0 +1,17 @@ +# Early-window pairing — hrm26040_joint +- paired n=2048; final acc=0.5259; already-correct@step4=0.3447 +- of final-correct, fraction already correct@4: 0.6555 +- early-window lam1: final-correct med -0.1714, final-wrong med -0.1314 + +## Forecasting FINAL outcome from the first 4 ACT steps +- AUC(-lam1_early -> final correct) = 0.728 +- AUC(-drift@4 -> final correct) = 0.486 +- AUC(q_halt@4 -> final correct) = 0.908 +- reference: AUC(-lam1_full -> final correct) = 0.987 + +## Restricted to examples NOT yet correct at step 4 (the decision-relevant set) +- n=1342, of which eventually correct: 371 (0.276) +- AUC(-lam1_early -> eventually correct) = 0.448 +- AUC(-drift@4 -> eventually correct) = 0.312 +- AUC(q_halt@4 -> eventually correct) = 0.734 +- early lam1 med: eventually-correct -0.1225 vs never-correct -0.1314 \ No newline at end of file diff --git a/analysis_2x2/early_pairing_trm_official58590.md b/analysis_2x2/early_pairing_trm_official58590.md new file mode 100644 index 0000000..2b29ab5 --- /dev/null +++ b/analysis_2x2/early_pairing_trm_official58590.md @@ -0,0 +1,17 @@ +# Early-window pairing — trm_official58590 +- paired n=2048; final acc=0.8760; already-correct@step4=0.6943 +- of final-correct, fraction already correct@4: 0.7926 +- early-window lam1: final-correct med +0.0178, final-wrong med +0.1075 + +## Forecasting FINAL outcome from the first 4 ACT steps +- AUC(-lam1_early -> final correct) = 0.891 +- AUC(-drift@4 -> final correct) = 0.800 +- AUC(q_halt@4 -> final correct) = 0.901 +- reference: AUC(-lam1_full -> final correct) = 0.993 + +## Restricted to examples NOT yet correct at step 4 (the decision-relevant set) +- n=626, of which eventually correct: 372 (0.594) +- AUC(-lam1_early -> eventually correct) = 0.543 +- AUC(-drift@4 -> eventually correct) = 0.492 +- AUC(q_halt@4 -> eventually correct) = 0.521 +- early lam1 med: eventually-correct +0.1060 vs never-correct +0.1075 \ No newline at end of file diff --git a/analysis_2x2/early_window_pairing.py b/analysis_2x2/early_window_pairing.py new file mode 100644 index 0000000..6277df2 --- /dev/null +++ b/analysis_2x2/early_window_pairing.py @@ -0,0 +1,85 @@ +"""Early-window (first 4 ACT steps) vs full-window pairing. + +Question: does the early-window FTLE forecast FINAL failure before convergence? +- join full/short npz per example via idx (same seed/n => same sampling) +- target label = final exact_correct from the FULL-window run +- report: AUC(-lam1_early -> final correct), same for early drift and q_halt@4, + and the conditional version restricted to examples NOT yet correct at step 4 + (short run's own exact_correct == 0) — the practically relevant population for + early-exit / compute-reallocation decisions. + +Observational only. Usage: python early_window_pairing.py FULL.npz SHORT.npz TAG +""" +from __future__ import annotations + +import sys +from pathlib import Path + +import numpy as np + +HERE = Path(__file__).resolve().parent + + +def auc_rank(score: np.ndarray, label: np.ndarray) -> float: + pos, neg = score[label == 1], score[label == 0] + if len(pos) == 0 or len(neg) == 0: + return float("nan") + allv = np.concatenate([pos, neg]) + order = np.argsort(allv, kind="mergesort") + ranks = np.empty(len(allv)); ranks[order] = np.arange(1, len(allv) + 1) + sv = allv[order]; i = 0 + while i < len(sv): + j = i + while j + 1 < len(sv) and sv[j + 1] == sv[i]: + j += 1 + if j > i: + ranks[order[i:j + 1]] = ranks[order[i:j + 1]].mean() + i = j + 1 + return float((ranks[:len(pos)].sum() - len(pos) * (len(pos) + 1) / 2) / (len(pos) * len(neg))) + + +def main(full_path: str, short_path: str, tag: str) -> None: + f = np.load(full_path) + s = np.load(short_path) + + fi, si = f["idx"], s["idx"] + common, f_pos, s_pos = np.intersect1d(fi, si, return_indices=True) + print(f"[{tag}] paired {len(common)} examples (full n={len(fi)}, short n={len(si)})") + + y_final = f["exact_correct"].astype(int)[f_pos] # FINAL outcome (step 16) + y_early = s["exact_correct"].astype(int)[s_pos] # correct already at step 4 + l1_early = s["lyap_spec"][s_pos, 0] + l1_full = f["lyap_spec"][f_pos, 0] + drift4 = np.log10(np.clip(s["drift_zH"][s_pos, -1], 1e-12, None)) # drift at ACT step 4 + q4 = s["q_halt"][s_pos, -1] + + lines = [f"# Early-window pairing — {tag}", + f"- paired n={len(common)}; final acc={y_final.mean():.4f}; already-correct@step4={y_early.mean():.4f}", + f"- of final-correct, fraction already correct@4: {y_early[y_final==1].mean():.4f}", + f"- early-window lam1: final-correct med {np.median(l1_early[y_final==1]):+.4f}, " + f"final-wrong med {np.median(l1_early[y_final==0]):+.4f}", + "", + "## Forecasting FINAL outcome from the first 4 ACT steps", + f"- AUC(-lam1_early -> final correct) = {auc_rank(-l1_early, y_final):.3f}", + f"- AUC(-drift@4 -> final correct) = {auc_rank(-drift4, y_final):.3f}", + f"- AUC(q_halt@4 -> final correct) = {auc_rank(q4, y_final):.3f}", + f"- reference: AUC(-lam1_full -> final correct) = {auc_rank(-l1_full, y_final):.3f}", + "", + "## Restricted to examples NOT yet correct at step 4 (the decision-relevant set)"] + m = y_early == 0 + n_m = int(m.sum()); n_pos = int(y_final[m].sum()) + lines += [f"- n={n_m}, of which eventually correct: {n_pos} ({n_pos/max(n_m,1):.3f})", + f"- AUC(-lam1_early -> eventually correct) = {auc_rank(-l1_early[m], y_final[m]):.3f}", + f"- AUC(-drift@4 -> eventually correct) = {auc_rank(-drift4[m], y_final[m]):.3f}", + f"- AUC(q_halt@4 -> eventually correct) = {auc_rank(q4[m], y_final[m]):.3f}", + f"- early lam1 med: eventually-correct {np.median(l1_early[m & (y_final==1)]):+.4f} vs " + f"never-correct {np.median(l1_early[m & (y_final==0)]):+.4f}"] + + out = HERE / f"early_pairing_{tag}.md" + out.write_text("\n".join(lines)) + print("\n".join(lines)) + print("wrote", out) + + +if __name__ == "__main__": + main(sys.argv[1], sys.argv[2], sys.argv[3]) diff --git a/analysis_2x2/evolution_hrm.csv b/analysis_2x2/evolution_hrm.csv new file mode 100644 index 0000000..c6a6b10 --- /dev/null +++ b/analysis_2x2/evolution_hrm.csv @@ -0,0 +1,11 @@ +step,acc,tau,fA,fB,fC,fD,l1A,l1B,l1C,l1D +2604,0.015625,0.32445240020751953,0.015625,0.892578125,0.0,0.091796875,-0.11765776202082634,-0.10138508677482605,nan,-0.08651512861251831 +5208,0.048828125,0.40048032999038696,0.029296875,0.900390625,0.01953125,0.05078125,-0.17876404523849487,-0.12725482881069183,-0.18487446755170822,-0.11079395189881325 +7812,0.15234375,0.6775624752044678,0.119140625,0.7734375,0.033203125,0.07421875,-0.20277076959609985,-0.1001129113137722,-0.19649504125118256,-0.12255467846989632 +10416,0.1796875,0.7741286754608154,0.166015625,0.599609375,0.013671875,0.220703125,-0.18029353022575378,-0.06513682007789612,-0.038433440029621124,-0.0504322350025177 +13020,0.30078125,0.8470979928970337,0.283203125,0.2421875,0.017578125,0.45703125,-0.17255809903144836,-0.024668633937835693,-0.07013614475727081,-0.010587955359369516 +15624,0.333984375,0.8144785761833191,0.3125,0.1484375,0.021484375,0.517578125,-0.19566883146762848,0.0031415667617693543,-0.05802934989333153,0.009074798785150051 +18228,0.474609375,0.8437007665634155,0.439453125,0.001953125,0.03515625,0.5234375,-0.08657147735357285,nan,-0.0343703031539917,0.012321019545197487 +20832,0.45703125,0.6549708247184753,0.423828125,0.0078125,0.033203125,0.53515625,-0.09613814204931259,-0.022231140173971653,-0.04714604467153549,0.001996797218453139 +23436,0.505859375,0.7171876430511475,0.484375,0.01171875,0.021484375,0.482421875,-0.14968957751989365,-0.04964173026382923,-0.06192338466644287,-0.0044072768650949 +26040,0.5,0.7611898183822632,0.47265625,0.0078125,0.02734375,0.4921875,-0.1546676605939865,0.0052672732854261994,-0.03868532460182905,0.023184756748378277 diff --git a/analysis_2x2/evolution_trm.csv b/analysis_2x2/evolution_trm.csv new file mode 100644 index 0000000..a0b19f5 --- /dev/null +++ b/analysis_2x2/evolution_trm.csv @@ -0,0 +1,11 @@ +step,acc,tau,fA,fB,fC,fD,l1A,l1B,l1C,l1D +26041,0.576171875,0.7283560037612915,0.560546875,0.01171875,0.015625,0.412109375,-0.04085663706064224,-0.0017999516567215323,0.015885045286267996,0.035852354019880295 +52082,0.6484375,0.8146334886550903,0.626953125,0.0,0.021484375,0.3515625,-0.005659707821905613,nan,0.05140006169676781,0.060247575864195824 +78123,0.6875,0.9091860055923462,0.66015625,0.0,0.02734375,0.3125,-0.004423868143931031,nan,0.05615279637277126,0.08166220411658287 +104164,0.7109375,1.0002410411834717,0.681640625,0.0,0.029296875,0.2890625,-0.006436891388148069,nan,0.08611240983009338,0.09516968205571175 +130205,0.755859375,1.2130658626556396,0.712890625,0.0,0.04296875,0.244140625,0.004312093835324049,nan,0.08519911020994186,0.10669758170843124 +156246,0.7578125,1.2462413311004639,0.73046875,0.0,0.02734375,0.2421875,0.03479858115315437,nan,0.08626002073287964,0.10777830705046654 +182287,0.7421875,1.3035683631896973,0.712890625,0.0,0.029296875,0.2578125,-0.006856455933302641,nan,0.09087501466274261,0.10055964067578316 +208328,0.75,1.3776456117630005,0.720703125,0.0,0.029296875,0.25,0.03374895453453064,nan,0.08769012242555618,0.10804034024477005 +234369,0.7734375,1.36112380027771,0.7421875,0.0,0.03125,0.2265625,0.011585433036088943,nan,0.09419485554099083,0.10411781445145607 +260410,0.76953125,1.406412124633789,0.748046875,0.0,0.021484375,0.23046875,0.004744160454720259,nan,0.09982780367136002,0.10232088342308998 diff --git a/analysis_2x2/offline_followups.py b/analysis_2x2/offline_followups.py new file mode 100644 index 0000000..8101acb --- /dev/null +++ b/analysis_2x2/offline_followups.py @@ -0,0 +1,229 @@ +"""Offline follow-ups to the 2x2 analysis (no GPU): + +1. Residual outcome signal within the unsettled stratum (HRM diag_8k primary, + TRM official @58590 secondary): per-cell drift profiles over the 16 ACT steps, + end-of-window drift slope, q_halt trajectories, halted_at, lambda spectra with a + STRICT in-band threshold, and per-drift-decile AUC(lambda1 -> correct) within the + unsettled stratum (does lambda1 add signal beyond drift level?). +2. Per-example profile of the strict-band settled-but-wrong examples (HRM, n~21). +3. Difficulty control: #givens per puzzle (input tokens != 1) joined via idx; + lambda1 ~ givens rank correlation overall/within outcome, and per-givens-bin + AUC(-lambda1 -> correct). + +Observational only. Outputs to analysis_2x2/offline_followups/. +""" +from __future__ import annotations + +from pathlib import Path + +import matplotlib + +matplotlib.use("Agg") +import matplotlib.pyplot as plt +import numpy as np + +HERE = Path(__file__).resolve().parent +FLOSS = HERE.parent +OUT = HERE / "offline_followups" +OUT.mkdir(exist_ok=True) + +DATA_TEST_INPUTS = Path("/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000/test/all__inputs.npy") + +CELL_COLORS = {"A": "tab:green", "B": "tab:orange", "C": "tab:blue", "D": "tab:red"} + + +def auc_rank(score: np.ndarray, label: np.ndarray) -> float: + pos, neg = score[label == 1], score[label == 0] + if len(pos) == 0 or len(neg) == 0: + return float("nan") + allv = np.concatenate([pos, neg]) + order = np.argsort(allv, kind="mergesort") + ranks = np.empty(len(allv)); ranks[order] = np.arange(1, len(allv) + 1) + sv = allv[order]; i = 0 + while i < len(sv): + j = i + while j + 1 < len(sv) and sv[j + 1] == sv[i]: + j += 1 + if j > i: + ranks[order[i:j + 1]] = ranks[order[i:j + 1]].mean() + i = j + 1 + return float((ranks[:len(pos)].sum() - len(pos) * (len(pos) + 1) / 2) / (len(pos) * len(neg))) + + +def spearman(a: np.ndarray, b: np.ndarray) -> float: + ra = np.argsort(np.argsort(a)).astype(float) + rb = np.argsort(np.argsort(b)).astype(float) + return float(np.corrcoef(ra, rb)[0, 1]) + + +def load(npz_path: Path, strict_pct: float): + d = np.load(npz_path) + out = {k: d[k] for k in d.files} + out["logd_late"] = np.log10(np.clip(out["drift_zH"][:, -4:].mean(1), 1e-12, None)) + out["tau_strict"] = float(np.percentile(out["logd_late"], strict_pct)) + conv = out["logd_late"] < out["tau_strict"] + c = out["exact_correct"].astype(int) + out["cells"] = { + "A": conv & (c == 1), "B": conv & (c == 0), + "C": (~conv) & (c == 1), "D": (~conv) & (c == 0), + } + return out + + +def givens_for(idx: np.ndarray) -> np.ndarray: + inputs = np.load(DATA_TEST_INPUTS, mmap_mode="r") + return np.array([(inputs[i] != 1).sum() for i in idx]) + + +def drift_profiles_fig(ds, tag, lines): + fig, axes = plt.subplots(1, 2, figsize=(11, 4)) + steps = np.arange(1, ds["drift_zH"].shape[1] + 1) + for nm, m in ds["cells"].items(): + if m.sum() < 3: + continue + med = np.median(ds["drift_zH"][m], axis=0) + q1 = np.percentile(ds["drift_zH"][m], 25, axis=0) + q3 = np.percentile(ds["drift_zH"][m], 75, axis=0) + axes[0].plot(steps, med, "o-", ms=3, color=CELL_COLORS[nm], label=f"{nm} (n={int(m.sum())})") + axes[0].fill_between(steps, q1, q3, color=CELL_COLORS[nm], alpha=0.15) + qm = np.median(ds["q_halt"][m], axis=0) + axes[1].plot(steps, qm, "o-", ms=3, color=CELL_COLORS[nm], label=nm) + axes[0].set_yscale("log"); axes[0].set_xlabel("ACT step"); axes[0].set_ylabel("drift_zH (median, IQR)") + axes[0].legend(fontsize=8); axes[0].set_title(f"{tag}: drift profiles per cell") + axes[1].set_xlabel("ACT step"); axes[1].set_ylabel("q_halt (median)"); axes[1].axhline(0, color="gray", lw=0.6) + axes[1].set_title(f"{tag}: q_halt per cell"); axes[1].legend(fontsize=8) + fig.tight_layout(); fig.savefig(OUT / f"fig_{tag}_profiles.png", dpi=150); plt.close(fig) + + # end-of-window slope: log10 mean(drift[13:16]) - log10 mean(drift[9:12]) + slope = (np.log10(np.clip(ds["drift_zH"][:, 12:16].mean(1), 1e-12, None)) + - np.log10(np.clip(ds["drift_zH"][:, 8:12].mean(1), 1e-12, None))) + lines.append(f"\n### {tag}: end-of-window drift slope (log10 steps13-16 vs 9-12; <0 = still descending)") + for nm, m in ds["cells"].items(): + if m.sum() == 0: + lines.append(f"- {nm}: n=0") + continue + lines.append(f"- {nm}: n={int(m.sum())}, slope median {np.median(slope[m]):+.4f}, " + f"IQR [{np.percentile(slope[m],25):+.4f}, {np.percentile(slope[m],75):+.4f}], " + f"frac still descending (<-0.01): {float((slope[m] < -0.01).mean()):.2f}") + return slope + + +def main() -> None: + lines = ["# Offline follow-ups (no GPU) — 2026-06-11", + "", + "Strict in-band thresholds: HRM pct45 of pooled log10 late-drift; TRM pct60 (band edge; B=0 regardless).", + "All numbers observational; within-dataset comparisons only."] + + # ---------- HRM diag_8k ---------- + hrm = load(FLOSS / "diag_8k.npz", strict_pct=45) + tag = "hrm26040_n8192_strict" + lines.append(f"\n## HRM @26040 (n=8192), strict tau(log10)={hrm['tau_strict']:.4f}") + lines.append("| cell | n | lam1 med | lam8 med | token_acc med | halted_at med | q_halt_final med | givens med |") + lines.append("|---|---|---|---|---|---|---|---|") + g_hrm = givens_for(hrm["idx"]) + for nm, m in hrm["cells"].items(): + if m.sum() == 0: + lines.append(f"| {nm} | 0 | | | | | | |") + continue + lines.append( + f"| {nm} | {int(m.sum())} | {np.median(hrm['lyap_spec'][m,0]):+.4f} | {np.median(hrm['lyap_spec'][m,-1]):+.4f} " + f"| {np.median(hrm['token_acc'][m]):.3f} | {np.median(hrm['halted_at'][m]):.0f} " + f"| {np.median(hrm['q_halt'][m,-1]):+.2f} | {np.median(g_hrm[m]):.0f} |") + + slope = drift_profiles_fig(hrm, tag, lines) + + # residual signal within unsettled stratum: per-drift-decile AUC + uns = ~(hrm["cells"]["A"] | hrm["cells"]["B"]) + c = hrm["exact_correct"].astype(int) + lines.append("\n### HRM unsettled stratum: AUC(-lam1 -> correct) per log-drift decile") + lines.append("| decile | drift range (log10) | n | n_correct | AUC |") + lines.append("|---|---|---|---|---|") + ld_u, l1_u, c_u = hrm["logd_late"][uns], hrm["lyap_spec"][uns, 0], c[uns] + qs = np.percentile(ld_u, np.arange(0, 101, 10)) + aucs, ws = [], [] + for i in range(10): + m = (ld_u >= qs[i]) & (ld_u <= qs[i + 1] if i == 9 else ld_u < qs[i + 1]) + a = auc_rank(-l1_u[m], c_u[m]) + if not np.isnan(a) and c_u[m].sum() >= 5: + aucs.append(a); ws.append(m.sum()) + lines.append(f"| {i+1} | [{qs[i]:.2f}, {qs[i+1]:.2f}] | {int(m.sum())} | {int(c_u[m].sum())} | " + f"{a:.3f} |" if not np.isnan(a) else f"| {i+1} | [{qs[i]:.2f}, {qs[i+1]:.2f}] | {int(m.sum())} | {int(c_u[m].sum())} | n/a |") + if aucs: + lines.append(f"- weighted mean within-decile AUC = {np.average(aucs, weights=ws):.3f} " + f"(vs unconditioned within-unsettled AUC {auc_rank(-l1_u, c_u):.3f})") + + # also: does end-slope separate C from D? + lines.append(f"- AUC(-end_slope -> correct | unsettled) = {auc_rank(-slope[uns], c_u):.3f} " + f"(C still-descending fraction vs D, see slope table above)") + + # ---------- strict-B per-example table ---------- + B = hrm["cells"]["B"] + lines.append(f"\n## HRM strict-band settled-but-wrong examples (n={int(B.sum())})") + lines.append("| idx | givens | token_acc | lam1 | drift_final | halted_at | q_halt_final |") + lines.append("|---|---|---|---|---|---|---|") + bi = np.where(B)[0] + order = np.argsort(hrm["token_acc"][bi]) + for j in bi[order]: + lines.append( + f"| {int(hrm['idx'][j])} | {int(g_hrm[j])} | {hrm['token_acc'][j]:.3f} | {hrm['lyap_spec'][j,0]:+.3f} " + f"| {hrm['drift_zH'][j,-1]:.3f} | {int(hrm['halted_at'][j])} | {hrm['q_halt'][j,-1]:+.2f} |") + # B drift profiles vs A band + fig, ax = plt.subplots(figsize=(6.5, 4)) + steps = np.arange(1, 17) + A = hrm["cells"]["A"] + ax.fill_between(steps, np.percentile(hrm["drift_zH"][A], 10, axis=0), + np.percentile(hrm["drift_zH"][A], 90, axis=0), color="tab:green", alpha=0.2, + label=f"A q10-q90 (n={int(A.sum())})") + for j in bi: + ax.plot(steps, hrm["drift_zH"][j], "-", lw=1, alpha=0.8, color="tab:orange") + ax.set_yscale("log"); ax.set_xlabel("ACT step"); ax.set_ylabel("drift_zH") + ax.set_title("HRM: strict-B drift profiles vs A band"); ax.legend(fontsize=8) + fig.tight_layout(); fig.savefig(OUT / "fig_hrm_strictB_profiles.png", dpi=150); plt.close(fig) + + # ---------- difficulty control (HRM) ---------- + l1 = hrm["lyap_spec"][:, 0] + lines.append("\n## HRM difficulty control (#givens, input tokens != 1)") + lines.append(f"- givens: min {g_hrm.min()}, median {np.median(g_hrm):.0f}, max {g_hrm.max()}") + lines.append(f"- Spearman(lam1, givens): overall {spearman(l1, g_hrm):+.3f}; " + f"correct-only {spearman(l1[c==1], g_hrm[c==1]):+.3f}; " + f"wrong-only {spearman(l1[c==0], g_hrm[c==0]):+.3f}") + lines.append(f"- Spearman(correct, givens) = {spearman(c.astype(float), g_hrm):+.3f}") + lines.append("\n| givens bin | n | acc | AUC(-lam1 -> correct) |") + lines.append("|---|---|---|---|") + edges = np.unique(np.percentile(g_hrm, [0, 20, 40, 60, 80, 100])) + bin_aucs, bin_ws = [], [] + for i in range(len(edges) - 1): + m = (g_hrm >= edges[i]) & (g_hrm <= edges[i + 1] if i == len(edges) - 2 else g_hrm < edges[i + 1]) + a = auc_rank(-l1[m], c[m]) + lines.append(f"| [{edges[i]:.0f}, {edges[i+1]:.0f}] | {int(m.sum())} | {c[m].mean():.3f} | {a:.3f} |") + if not np.isnan(a): + bin_aucs.append(a); bin_ws.append(m.sum()) + lines.append(f"- weighted mean within-bin AUC = {np.average(bin_aucs, weights=bin_ws):.3f} (overall 0.984)") + + # ---------- TRM official @58590 (secondary, n=512) ---------- + trm = load(FLOSS / "official_gbs768_spectrum/trm_gbs768_base_step58590_n512_k8_seed20260602.npz", strict_pct=60) + g_trm = givens_for(trm["idx"]) + ct = trm["exact_correct"].astype(int) + lines.append(f"\n## TRM official @58590 (n=512), strict tau(log10)={trm['tau_strict']:.4f}") + lines.append("| cell | n | lam1 med | token_acc med | q_halt_final med | givens med |") + lines.append("|---|---|---|---|---|---|") + for nm, m in trm["cells"].items(): + if m.sum() == 0: + lines.append(f"| {nm} | 0 | | | | |") + continue + lines.append(f"| {nm} | {int(m.sum())} | {np.median(trm['lyap_spec'][m,0]):+.4f} " + f"| {np.median(trm['token_acc'][m]):.3f} | {np.median(trm['q_halt'][m,-1]):+.2f} " + f"| {np.median(g_trm[m]):.0f} |") + drift_profiles_fig(trm, "trm_official58590_n512_strict", lines) + l1t = trm["lyap_spec"][:, 0] + lines.append(f"- Spearman(lam1, givens): overall {spearman(l1t, g_trm):+.3f}; " + f"wrong-only {spearman(l1t[ct==0], g_trm[ct==0]):+.3f}") + lines.append(f"- Spearman(correct, givens) = {spearman(ct.astype(float), g_trm):+.3f}") + + (OUT / "followups.md").write_text("\n".join(lines)) + print("\n".join(lines[:6])) + print("wrote", OUT / "followups.md") + + +if __name__ == "__main__": + main() diff --git a/analysis_2x2/offline_followups/followups.md b/analysis_2x2/offline_followups/followups.md new file mode 100644 index 0000000..be5f519 --- /dev/null +++ b/analysis_2x2/offline_followups/followups.md @@ -0,0 +1,88 @@ +# Offline follow-ups (no GPU) — 2026-06-11 + +Strict in-band thresholds: HRM pct45 of pooled log10 late-drift; TRM pct60 (band edge; B=0 regardless). +All numbers observational; within-dataset comparisons only. + +## HRM @26040 (n=8192), strict tau(log10)=-0.0129 +| cell | n | lam1 med | lam8 med | token_acc med | halted_at med | q_halt_final med | givens med | +|---|---|---|---|---|---|---|---| +| A | 3665 | -0.8670 | -0.9787 | 1.000 | 4 | +7.47 | 26 | +| B | 21 | -0.8421 | -0.9495 | 0.617 | 6 | +7.47 | 25 | +| C | 633 | -0.7796 | -0.8815 | 1.000 | 10 | +7.47 | 25 | +| D | 3873 | -0.5991 | -0.7140 | 0.630 | 0 | -9.62 | 25 | + +### hrm26040_n8192_strict: end-of-window drift slope (log10 steps13-16 vs 9-12; <0 = still descending) +- A: n=3665, slope median -0.0023, IQR [-0.0073, +0.0012], frac still descending (<-0.01): 0.20 +- B: n=21, slope median -0.0063, IQR [-0.4005, -0.0009], frac still descending (<-0.01): 0.48 +- C: n=633, slope median -0.0006, IQR [-1.4084, +0.0088], frac still descending (<-0.01): 0.44 +- D: n=3873, slope median -0.0031, IQR [-0.0556, +0.0459], frac still descending (<-0.01): 0.46 + +### HRM unsettled stratum: AUC(-lam1 -> correct) per log-drift decile +| decile | drift range (log10) | n | n_correct | AUC | +|---|---|---|---|---| +| 1 | [-0.01, 0.66] | 451 | 422 | 0.966 | +| 2 | [0.66, 1.42] | 450 | 50 | 0.972 | +| 3 | [1.42, 1.52] | 451 | 4 | 0.988 | +| 4 | [1.52, 1.56] | 450 | 6 | 0.964 | +| 5 | [1.56, 1.60] | 451 | 1 | 0.984 | +| 6 | [1.60, 1.62] | 450 | 7 | 0.949 | +| 7 | [1.62, 1.65] | 451 | 5 | 0.837 | +| 8 | [1.65, 1.68] | 450 | 5 | 0.851 | +| 9 | [1.68, 1.71] | 451 | 16 | 0.804 | +| 10 | [1.71, 1.93] | 451 | 117 | 0.685 | +- weighted mean within-decile AUC = 0.879 (vs unconditioned within-unsettled AUC 0.933) +- AUC(-end_slope -> correct | unsettled) = 0.605 (C still-descending fraction vs D, see slope table above) + +## HRM strict-band settled-but-wrong examples (n=21) +| idx | givens | token_acc | lam1 | drift_final | halted_at | q_halt_final | +|---|---|---|---|---|---|---| +| 342267 | 17 | 0.407 | -0.867 | 0.976 | 5 | +7.41 | +| 212705 | 17 | 0.469 | -0.838 | 0.964 | 8 | +7.44 | +| 329832 | 17 | 0.481 | -0.703 | 0.970 | 8 | +7.41 | +| 20075 | 27 | 0.519 | -0.812 | 0.966 | 5 | +7.50 | +| 198242 | 25 | 0.568 | -0.843 | 0.980 | 7 | +7.47 | +| 223591 | 24 | 0.580 | -0.939 | 0.951 | 4 | +7.47 | +| 238704 | 27 | 0.593 | -0.931 | 0.953 | 5 | +7.47 | +| 364431 | 25 | 0.593 | -0.806 | 0.956 | 6 | +7.44 | +| 274637 | 26 | 0.593 | -0.859 | 0.979 | 6 | +7.47 | +| 182424 | 24 | 0.605 | -0.985 | 0.949 | 6 | +7.47 | +| 351919 | 25 | 0.617 | -0.742 | 0.965 | 5 | +7.47 | +| 123022 | 27 | 0.617 | -0.826 | 0.951 | 7 | +7.50 | +| 150426 | 25 | 0.630 | -0.767 | 0.963 | 9 | +7.47 | +| 175427 | 26 | 0.630 | -0.843 | 0.946 | 8 | +7.50 | +| 422185 | 26 | 0.642 | -0.841 | 0.946 | 7 | +7.47 | +| 344032 | 24 | 0.654 | -0.903 | 0.965 | 4 | +7.53 | +| 30703 | 25 | 0.691 | -0.732 | 0.972 | 6 | +7.53 | +| 386549 | 23 | 0.691 | -0.842 | 0.966 | 4 | +7.47 | +| 3370 | 26 | 0.716 | -0.861 | 0.955 | 6 | +7.47 | +| 243909 | 24 | 0.753 | -0.786 | 0.969 | 8 | +7.50 | +| 258307 | 25 | 0.877 | -0.918 | 0.952 | 5 | +7.47 | + +## HRM difficulty control (#givens, input tokens != 1) +- givens: min 17, median 25, max 36 +- Spearman(lam1, givens): overall -0.350; correct-only -0.155; wrong-only -0.180 +- Spearman(correct, givens) = +0.276 + +| givens bin | n | acc | AUC(-lam1 -> correct) | +|---|---|---|---| +| [17, 24] | 1152 | 0.321 | 0.976 | +| [24, 25] | 1795 | 0.373 | 0.980 | +| [25, 26] | 1764 | 0.503 | 0.987 | +| [26, 36] | 3481 | 0.681 | 0.983 | +- weighted mean within-bin AUC = 0.982 (overall 0.984) + +## TRM official @58590 (n=512), strict tau(log10)=1.0240 +| cell | n | lam1 med | token_acc med | q_halt_final med | givens med | +|---|---|---|---|---|---| +| A | 307 | +0.0105 | 1.000 | +7.78 | 26 | +| B | 0 | | | | | +| C | 141 | +0.0174 | 1.000 | +7.81 | 25 | +| D | 64 | +0.1034 | 0.630 | -11.12 | 25 | + +### trm_official58590_n512_strict: end-of-window drift slope (log10 steps13-16 vs 9-12; <0 = still descending) +- A: n=307, slope median -0.1471, IQR [-0.2267, -0.0641], frac still descending (<-0.01): 0.90 +- B: n=0 +- C: n=141, slope median -0.0080, IQR [-0.2603, +0.0808], frac still descending (<-0.01): 0.49 +- D: n=64, slope median -0.0125, IQR [-0.0525, +0.0276], frac still descending (<-0.01): 0.53 +- Spearman(lam1, givens): overall -0.240; wrong-only -0.238 +- Spearman(correct, givens) = +0.148 \ No newline at end of file diff --git a/analysis_2x2/offline_followups/phase1_e1.md b/analysis_2x2/offline_followups/phase1_e1.md new file mode 100644 index 0000000..b3e3f40 --- /dev/null +++ b/analysis_2x2/offline_followups/phase1_e1.md @@ -0,0 +1,27 @@ +# E1 offline batch — bootstrap CIs, settling robustness, TRM multi4 pair + +## Bootstrap / exact CIs (TRM official @58590, n=2048) +- settled-wrong fraction: observed 0/254; exact 95% upper bound 0.0117 (1.17% of failures) +- AUC(-lam1->correct) = 0.9935, bootstrap 95% CI (0.9908244697676584, 0.9957330475628791) +- lam1(wrong) median 95% CI (0.10100110620260239, 0.10556983947753906) +- lam1(correct) median 95% CI (0.011215815320611, 0.011744528077542782) + +## Bootstrap CIs (HRM @26040, n=8192, strict band) +- strict settled-wrong fraction of failures: observed 0.0054, bootstrap 95% CI (0.0032613427182413084, 0.007798538095694945) +- AUC(-lam1->correct) = 0.9841, bootstrap 95% CI (0.9815470536412456, 0.9865145187475995) + +## Settling-criterion robustness (B-cell counts under alternative drift definitions) +- TRM official n=2048 | zH: B=0/A=1724 (tau=1.36) | zL: B=0/A=1728 (tau=1.42) | combined: B=0/A=1727 (tau=1.54) +- HRM n=8192 | zH: B=63/A=4103 (tau=0.77) | zL: B=59/A=4083 (tau=1.01) | combined: B=60/A=4087 (tau=1.07) + +## TRM official-pipeline multi4 vs baseline (matched objective, n=512 each) +- baseline @58590: acc=0.875; A/B/C/D=434/0/14/64; fD=0.125; lam1(D)=+0.1034; lam1(A)=+0.0111 +- multi4 @35805 (best): acc=0.900; A/B/C/D=452/0/9/51; fD=0.100; lam1(D)=+0.1019; lam1(A)=+0.0039 +- multi4 @65100 (final): acc=0.824; A/B/C/D=408/1/14/89; fD=0.174; lam1(D)=+0.0946; lam1(A)=+0.0133 + +## hrm_multi4 provenance (E6a) +- diag_hrm_multi4_step_{20832,23436,26040}_512.npz step grid matches HRM pretrain numbering; + multi4_eval_compare/logs should contain the eval invocations — checked manually below. +- ACTION: if the hrm_multi4 run is pretrain-pipeline (ACT-streaming + perturbation), then the + May-28 multi4 vs righteous baseline comparison IS matched-pipeline and Sec 3.4's caveat is + narrower than written; step9 E-vs-F pair (queued) covers the fixed-unroll objective regardless. \ No newline at end of file diff --git a/analysis_2x2/offline_phase1_e1.py b/analysis_2x2/offline_phase1_e1.py new file mode 100644 index 0000000..7c42825 --- /dev/null +++ b/analysis_2x2/offline_phase1_e1.py @@ -0,0 +1,123 @@ +"""E1 offline batch (experiment_framework.md): +(1) bootstrap CIs for headline quantities; (2) settling-criterion robustness (z_L / combined); +(3) TRM official multi4 vs baseline matched-pipeline 2x2; (4) provenance note for hrm_multi4. +Outputs: offline_followups/phase1_e1.md +""" +from __future__ import annotations + +from pathlib import Path + +import numpy as np + +HERE = Path(__file__).resolve().parent +FLOSS = HERE.parent +OUT = HERE / "offline_followups" +RNG = np.random.default_rng(0) + + +def auc_rank(score, label): + pos, neg = score[label == 1], score[label == 0] + if len(pos) == 0 or len(neg) == 0: + return float("nan") + allv = np.concatenate([pos, neg]) + order = np.argsort(allv, kind="mergesort") + ranks = np.empty(len(allv)); ranks[order] = np.arange(1, len(allv) + 1) + sv = allv[order]; i = 0 + while i < len(sv): + j = i + while j + 1 < len(sv) and sv[j + 1] == sv[i]: + j += 1 + if j > i: + ranks[order[i:j + 1]] = ranks[order[i:j + 1]].mean() + i = j + 1 + return float((ranks[:len(pos)].sum() - len(pos) * (len(pos) + 1) / 2) / (len(pos) * len(neg))) + + +def boot_ci(stat_fn, n, B=10000): + vals = [] + for _ in range(B): + idx = RNG.integers(0, n, n) + v = stat_fn(idx) + if not np.isnan(v): + vals.append(v) + return float(np.percentile(vals, 2.5)), float(np.percentile(vals, 97.5)) + + +def late(d, key="drift_zH"): + return np.log10(np.clip(d[key][:, -4:].mean(1), 1e-12, None)) + + +def otsu(x, nbins=256): + h, e = np.histogram(x, bins=nbins); h = h.astype(float) + c = (e[:-1] + e[1:]) / 2 + p = h / h.sum(); om = np.cumsum(p); mu = np.cumsum(p * c); mt = mu[-1] + den = om * (1 - om); den[den <= 0] = np.nan + return float(c[np.nanargmax((mt * om - mu) ** 2 / den)]) + + +lines = ["# E1 offline batch — bootstrap CIs, settling robustness, TRM multi4 pair", ""] + +# ---------- (1) bootstrap CIs ---------- +trm = np.load(FLOSS / "analysis_2x2/retest/trm_gbs768_step58590_full_n2048.npz") +c_t = trm["exact_correct"].astype(int); l1_t = trm["lyap_spec"][:, 0]; ld_t = late(trm) +n_wrong = int((c_t == 0).sum()) +cp_upper = 1 - 0.05 ** (1 / n_wrong) # Clopper-Pearson upper for 0 events +lines += ["## Bootstrap / exact CIs (TRM official @58590, n=2048)", + f"- settled-wrong fraction: observed 0/{n_wrong}; exact 95% upper bound {cp_upper:.4f} " + f"({cp_upper*100:.2f}% of failures)", + f"- AUC(-lam1->correct) = {auc_rank(-l1_t, c_t):.4f}, bootstrap 95% CI " + f"{boot_ci(lambda i: auc_rank(-l1_t[i], c_t[i]), len(c_t))}", + f"- lam1(wrong) median 95% CI {boot_ci(lambda i: float(np.median(l1_t[i][c_t[i]==0])), len(c_t))}", + f"- lam1(correct) median 95% CI {boot_ci(lambda i: float(np.median(l1_t[i][c_t[i]==1])), len(c_t))}"] + +hrm = np.load(FLOSS / "diag_8k.npz") +c_h = hrm["exact_correct"].astype(int); l1_h = hrm["lyap_spec"][:, 0]; ld_h = late(hrm) +tau_strict = float(np.percentile(ld_h, 45)) +def strictB_frac(idx): + c, ld = c_h[idx], ld_h[idx] + w = (c == 0).sum() + return float(((ld < tau_strict) & (c == 0)).sum() / max(w, 1)) +lines += ["", "## Bootstrap CIs (HRM @26040, n=8192, strict band)", + f"- strict settled-wrong fraction of failures: observed {21/3894:.4f}, bootstrap 95% CI " + f"{boot_ci(strictB_frac, len(c_h))}", + f"- AUC(-lam1->correct) = {auc_rank(-l1_h, c_h):.4f}, bootstrap 95% CI " + f"{boot_ci(lambda i: auc_rank(-l1_h[i], c_h[i]), len(c_h))}"] + +# ---------- (2) settling-criterion robustness ---------- +lines += ["", "## Settling-criterion robustness (B-cell counts under alternative drift definitions)"] +for tag, d, c in [("TRM official n=2048", trm, c_t), ("HRM n=8192", hrm, c_h)]: + row = [tag] + for key, nm in [("drift_zH", "zH"), ("drift_zL", "zL")]: + ld = late(d, key); tau = otsu(ld); conv = ld < tau + row.append(f"{nm}: B={int((conv & (c==0)).sum())}/A={int((conv & (c==1)).sum())} (tau={tau:.2f})") + comb = np.log10(np.clip(np.sqrt(d["drift_zH"][:, -4:].mean(1) ** 2 + d["drift_zL"][:, -4:].mean(1) ** 2), 1e-12, None)) + tau = otsu(comb); conv = comb < tau + row.append(f"combined: B={int((conv & (c==0)).sum())}/A={int((conv & (c==1)).sum())} (tau={tau:.2f})") + lines.append("- " + " | ".join(row)) + +# ---------- (3) TRM official multi4 vs baseline (matched pipeline) ---------- +lines += ["", "## TRM official-pipeline multi4 vs baseline (matched objective, n=512 each)"] +spec = FLOSS / "official_gbs768_spectrum" +for nm, f in [("baseline @58590", spec / "trm_gbs768_base_step58590_n512_k8_seed20260602.npz"), + ("multi4 @35805 (best)", spec / "trm_gbs768_multi4_step35805_n512_k8_seed20260602.npz"), + ("multi4 @65100 (final)", spec / "trm_gbs768_multi4_step65100_n512_k8_seed20260602.npz")]: + if not f.exists(): + lines.append(f"- {nm}: npz missing"); continue + d = np.load(f); c = d["exact_correct"].astype(int); l1 = d["lyap_spec"][:, 0] + ld = late(d); tau = otsu(ld); conv = ld < tau + A = conv & (c == 1); B = conv & (c == 0); C = (~conv) & (c == 1); D = (~conv) & (c == 0) + lines.append(f"- {nm}: acc={c.mean():.3f}; A/B/C/D={int(A.sum())}/{int(B.sum())}/{int(C.sum())}/{int(D.sum())}; " + f"fD={float(D.mean()):.3f}; lam1(D)={np.median(l1[D]) if D.sum()>0 else float('nan'):+.4f}; " + f"lam1(A)={np.median(l1[A]) if A.sum()>0 else float('nan'):+.4f}") + +# ---------- (4) provenance note ---------- +lines += ["", "## hrm_multi4 provenance (E6a)", + "- diag_hrm_multi4_step_{20832,23436,26040}_512.npz step grid matches HRM pretrain numbering;", + " multi4_eval_compare/logs should contain the eval invocations — checked manually below.", + "- ACTION: if the hrm_multi4 run is pretrain-pipeline (ACT-streaming + perturbation), then the", + " May-28 multi4 vs righteous baseline comparison IS matched-pipeline and Sec 3.4's caveat is", + " narrower than written; step9 E-vs-F pair (queued) covers the fixed-unroll objective regardless."] + +OUT.mkdir(exist_ok=True) +(OUT / "phase1_e1.md").write_text("\n".join(lines)) +print("\n".join(lines)) diff --git a/analysis_2x2/phase1/phase1_results.md b/analysis_2x2/phase1/phase1_results.md new file mode 100644 index 0000000..5dde25d --- /dev/null +++ b/analysis_2x2/phase1/phase1_results.md @@ -0,0 +1,48 @@ +# Phase-1 analysis (E5 horizon sweep, E6 matched-objective, E2 replication) + +## E5: when does fate become legible? AUC(early signal -> FINAL correct), restricted to not-yet-correct@H + +### TRM +| H | solved@H | undecided n | of which eventual✓ | AUC(-λ₁) | AUC(-drift) | AUC(q_halt) | +|---|---|---|---|---|---|---| +| 2 | 0.542 | 939 | 0.729 | 0.526 | 0.411 | 0.608 | +| 4 | 0.694 | 626 | 0.594 | 0.543 | 0.492 | 0.521 | +| 6 | 0.766 | 479 | 0.470 | 0.523 | 0.441 | 0.531 | +| 8 | 0.801 | 407 | 0.376 | 0.483 | 0.495 | 0.514 | +| 10 | 0.831 | 347 | 0.268 | 0.477 | 0.536 | 0.476 | +| 12 | 0.846 | 315 | 0.194 | 0.483 | 0.509 | 0.509 | + +### HRM +| H | solved@H | undecided n | of which eventual✓ | AUC(-λ₁) | AUC(-drift) | AUC(q_halt) | +|---|---|---|---|---|---|---| +| 2 | 0.071 | 1903 | 0.490 | 0.375 | 0.193 | 0.810 | +| 4 | 0.345 | 1342 | 0.276 | 0.448 | 0.312 | 0.734 | +| 6 | 0.436 | 1155 | 0.159 | 0.438 | 0.349 | 0.676 | +| 8 | 0.471 | 1083 | 0.103 | 0.432 | 0.371 | 0.705 | +| 10 | 0.489 | 1046 | 0.072 | 0.478 | 0.407 | 0.705 | +| 12 | 0.506 | 1012 | 0.041 | 0.503 | 0.348 | 0.635 | + +## E6: matched-objective intervention (step9 fixed-unroll runs, n=512) + +### HRM (E base vs F multi4) +| ckpt | base acc | base fD | base λ₁(D) | multi4 acc | multi4 fD | multi4 λ₁(D) | +|---|---|---|---|---|---|---| +| step_12500 | 0.613 | 0.387 | -0.0157 | 0.631 | 0.369 | -0.0215 | +| step_25000 | 0.615 | 0.385 | +0.0138 | 0.617 | 0.379 | +0.0101 | +| best | 0.619 | 0.381 | -0.0214 | 0.613 | 0.387 | -0.0181 | +| final | 0.588 | 0.410 | +0.0456 | 0.609 | 0.387 | +0.0335 | + +### TRM (G base vs H multi4) +| ckpt | base acc | base fD | base λ₁(D) | multi4 acc | multi4 fD | multi4 λ₁(D) | +|---|---|---|---|---|---|---| +| step_12500 | 0.553 | 0.344 | +0.0219 | 0.600 | 0.305 | +0.0284 | +| step_25000 | 0.525 | 0.396 | +0.0209 | 0.545 | 0.361 | +0.0222 | +| best | 0.596 | 0.334 | +0.0233 | 0.580 | 0.361 | +0.0366 | +| final | 0.477 | 0.312 | +0.0164 | 0.537 | 0.270 | +0.0249 | + +## E2: decomposition on a SECOND HRM training run (step9_E fixed-unroll, n=2048) + +| ckpt | acc | A | B | C | D | λ₁(A) | λ₁(D) | settled-wrong frac of failures | +|---|---|---|---|---|---|---|---|---| +| best | 0.637 | 1244 | 1 | 61 | 742 | -0.1685 | -0.0308 | 0.0013 | +| final | 0.594 | 1124 | 3 | 92 | 829 | -0.0144 | +0.0444 | 0.0036 | \ No newline at end of file diff --git a/analysis_2x2/queue_maze_followups.sh b/analysis_2x2/queue_maze_followups.sh new file mode 100755 index 0000000..2257013 --- /dev/null +++ b/analysis_2x2/queue_maze_followups.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# Pre-HRM-Maze queue (user request): (1) per-cell failure structure of TRM-Maze (cheap inference), +# (2) continue-train TRM-Maze from step_130200 to test saturation (does acc climb toward ~1.0?). +# Waits for a genuinely free GPU; good-neighbor on the shared box. +set -o pipefail +cd /home/yurenh2/rrm/research/flossing +source /home/yurenh2/miniconda3/etc/profile.d/conda.sh +conda activate rrm +OUTDIR=analysis_2x2/maze_followup; mkdir -p "$OUTDIR" +ST="$OUTDIR/queue.log"; log(){ echo "[$(date '+%m-%d %H:%M:%S')] $*" >> "$ST"; } +CKDIR=/home/yurenh2/rrm/trm/checkpoints/maze-30x30-hard-1k-ACT-torch/pretrain_att_maze30x30_2gpu_gbs384 +DATA=/home/yurenh2/rrm/data/maze-30x30-hard-1k +free_gpu(){ nvidia-smi --query-gpu=index,utilization.gpu,memory.used --format=csv,noheader,nounits \ + | awk -F', ' '$2<20 && $3<4000 {print $1; exit}'; } + +log "queue started; waiting for a free GPU (util<20, mem<4G; 12h fallback)" +DEADLINE=$(( $(date +%s) + 12*3600 )); GPU="" +while true; do + g1="$(free_gpu)"; if [[ -n "$g1" ]]; then sleep 60; g2="$(free_gpu)"; [[ "$g2" == "$g1" ]] && GPU="$g1" && break; fi + if (( $(date +%s) > DEADLINE )); then GPU="$(nvidia-smi --query-gpu=index,memory.used --format=csv,noheader,nounits|sort -t, -k2 -n|head -1|cut -d, -f1)"; log "fallback GPU $GPU"; break; fi + sleep 300 +done +log "claimed GPU $GPU"; export CUDA_VISIBLE_DEVICES="$GPU" + +# (1) Solution-space dynamics + per-cell structure: TRM-Maze final ckpt (saves preds, inputs, +# labels, AND per-step decoded-answer Hamming drift over solution-space cells). +log "start MAZE solution-space dump (inference, no JVP)" +if python maze_pred_dump.py --ckpt-root "$CKDIR" --ckpt-name step_130200 --data "$DATA" \ + --n 512 --out "$OUTDIR/maze_preds_step130200.npz" > "$OUTDIR/preds_maze.log" 2>&1; then + log "done maze dump"; else log "FAILED maze dump"; fi +# (1b) SUDOKU control: solution-space dynamics where full-space DID separate. +SUD_CK=/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro +SUD_DATA=/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000 +log "start SUDOKU solution-space control dump" +if python maze_pred_dump.py --ckpt-root "$SUD_CK" --ckpt-name step_58590 --data "$SUD_DATA" \ + --n 512 --out "$OUTDIR/sudoku_preds_step58590.npz" > "$OUTDIR/preds_sudoku.log" 2>&1; then + log "done sudoku dump"; else log "FAILED sudoku dump"; fi + +# (2) continue-train TRM-Maze from step_130200 (saturation test). gbs 192 on 1 GPU; constant LR. +log "start continue-train TRM-Maze from step_130200 (+25000 epochs, eval every 2500)" +cd /home/yurenh2/rrm/trm +export WANDB_MODE=offline +RUN=pretrain_att_maze30x30_CONTINUE_from130200 +nohup python pretrain.py arch=trm "data_paths=[$DATA]" "evaluators=[]" \ + epochs=12500 eval_interval=2500 \ + lr=1e-4 puzzle_emb_lr=1e-4 weight_decay=1.0 puzzle_emb_weight_decay=1.0 global_batch_size=192 \ + arch.L_layers=2 arch.H_cycles=3 arch.L_cycles=4 +run_name="$RUN" ema=True \ + +checkpoint_every_eval=true "+load_checkpoint=$CKDIR/step_130200" \ + > /home/yurenh2/rrm/research/flossing/$OUTDIR/continue_train.log 2>&1 +echo "continue-train exited" >> "/home/yurenh2/rrm/research/flossing/$ST" diff --git a/analysis_2x2/results.md b/analysis_2x2/results.md new file mode 100644 index 0000000..a62bba2 --- /dev/null +++ b/analysis_2x2/results.md @@ -0,0 +1,69 @@ +# 2x2 analysis (convergence x correctness) — generated 2026-06-11 + +## hrm26040_n8192 +- npz: `/home/yurenh2/rrm/research/flossing/diag_8k.npz`, n=8192, exact_acc=0.525 +- late-drift def: mean(drift_zH[:, -4:]), Otsu tau(log10)=0.766, frac_converged=0.509 + +| cell | n | lam1 median | lam1 IQR | token_acc med | +|---|---|---|---|---| +| A_conv_correct | 4103 | -0.8617 | [-0.9020, -0.8153] | 1.000 | +| B_conv_wrong | 63 | -0.5912 | [-0.8091, -0.5061] | 0.630 | +| C_nonconv_correct | 195 | -0.6943 | [-0.7354, -0.6466] | 1.000 | +| D_nonconv_wrong | 3831 | -0.5998 | [-0.6475, -0.5488] | 0.630 | + +- mixture: wrong-that-converged = 0.016; correct-that-nonconverged = 0.045 +- dlam1(correct-wrong): overall -0.2586; within-conv -0.2705; within-nonconv -0.0945 +- dlam1(wrong: conv - nonconv) = +0.0085 +- AUC(-lam1 -> correct): overall 0.984; within-conv 0.852; within-nonconv 0.818 +- AUC(-log d_late -> correct) = 0.964; AUC(-lam1 -> converged) = 0.986 + +## trm_singleGPU_step260410_n512 +- npz: `/home/yurenh2/rrm/research/flossing/diag_trm_singleGPU_step260410_512.npz`, n=512, exact_acc=0.770 +- late-drift def: mean(drift_zH[:, -4:]), Otsu tau(log10)=1.406, frac_converged=0.748 + +| cell | n | lam1 median | lam1 IQR | token_acc med | +|---|---|---|---|---| +| A_conv_correct | 383 | +0.0047 | [+0.0007, +0.0137] | 1.000 | +| B_conv_wrong | 0 | - | - | - | +| C_nonconv_correct | 11 | +0.0998 | [+0.0907, +0.1035] | 1.000 | +| D_nonconv_wrong | 118 | +0.1023 | [+0.0935, +0.1112] | 0.642 | + +- mixture: wrong-that-converged = 0.000; correct-that-nonconverged = 0.028 +- dlam1(correct-wrong): overall -0.0973; within-conv +nan; within-nonconv -0.0025 +- dlam1(wrong: conv - nonconv) = +nan +- AUC(-lam1 -> correct): overall 0.989; within-conv nan; within-nonconv 0.619 +- AUC(-log d_late -> correct) = 0.975; AUC(-lam1 -> converged) = 0.996 + +## trm_singleGPU_step130205_n512 +- npz: `/home/yurenh2/rrm/research/flossing/diag_trm_singleGPU_step130205_512.npz`, n=512, exact_acc=0.756 +- late-drift def: mean(drift_zH[:, -4:]), Otsu tau(log10)=1.213, frac_converged=0.713 + +| cell | n | lam1 median | lam1 IQR | token_acc med | +|---|---|---|---|---| +| A_conv_correct | 365 | +0.0043 | [-0.0007, +0.0141] | 1.000 | +| B_conv_wrong | 0 | - | - | - | +| C_nonconv_correct | 22 | +0.0852 | [+0.0694, +0.0988] | 1.000 | +| D_nonconv_wrong | 125 | +0.1067 | [+0.0995, +0.1126] | 0.630 | + +- mixture: wrong-that-converged = 0.000; correct-that-nonconverged = 0.057 +- dlam1(correct-wrong): overall -0.1017; within-conv +nan; within-nonconv -0.0215 +- dlam1(wrong: conv - nonconv) = +nan +- AUC(-lam1 -> correct): overall 0.989; within-conv nan; within-nonconv 0.805 +- AUC(-log d_late -> correct) = 0.957; AUC(-lam1 -> converged) = 0.996 + +## trm_step13020_n512 +- npz: `/home/yurenh2/rrm/research/flossing/diag_trm_step13020_512.npz`, n=512, exact_acc=0.596 +- late-drift def: mean(drift_zH[:, -4:]), Otsu tau(log10)=0.730, frac_converged=0.598 + +| cell | n | lam1 median | lam1 IQR | token_acc med | +|---|---|---|---|---| +| A_conv_correct | 296 | -0.0043 | [-0.0113, +0.0052] | 1.000 | +| B_conv_wrong | 10 | -0.0027 | [-0.0090, +0.0009] | 0.556 | +| C_nonconv_correct | 9 | +0.0419 | [+0.0323, +0.0480] | 1.000 | +| D_nonconv_wrong | 197 | +0.0317 | [+0.0143, +0.0462] | 0.642 | + +- mixture: wrong-that-converged = 0.048; correct-that-nonconverged = 0.030 +- dlam1(correct-wrong): overall -0.0339; within-conv -0.0016; within-nonconv +0.0102 +- dlam1(wrong: conv - nonconv) = -0.0344 +- AUC(-lam1 -> correct): overall 0.849; within-conv 0.515; within-nonconv 0.360 +- AUC(-log d_late -> correct) = 0.976; AUC(-lam1 -> converged) = 0.887 diff --git a/analysis_2x2/run_clv_geom.sh b/analysis_2x2/run_clv_geom.sh new file mode 100755 index 0000000..d271370 --- /dev/null +++ b/analysis_2x2/run_clv_geom.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# CLV leading-vector geometry probe (b): does the EXPANSION DIRECTION carry outcome signal +# beyond drift+q_halt? Sudoku TRM + HRM (mlp_t, no flash issue), n=512, on the free GPU. +set -o pipefail +cd /home/yurenh2/rrm/research/flossing +source /home/yurenh2/miniconda3/etc/profile.d/conda.sh +conda activate rrm +export CUDA_VISIBLE_DEVICES="${1:-1}" +OUTDIR=analysis_2x2/clv; mkdir -p "$OUTDIR" +ST="$OUTDIR/status.log"; log(){ echo "[$(date '+%H:%M:%S')] $*" >> "$ST"; } +TRM=/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro +HRM="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python" + +log "start TRM geom n=512 (GPU $CUDA_VISIBLE_DEVICES)" +if python diagnose_trm_joint_geom.py --ckpt-root "$TRM" --ckpt-name step_58590 \ + --n-samples 512 --batch-size 8 --k-lyap 8 --t-ons 1 --seed 0 \ + --out "$OUTDIR/trm_geom_step58590_n512.npz" > "$OUTDIR/trm.log" 2>&1; then log "done TRM"; else log "FAILED TRM"; fi + +log "start HRM geom n=512" +if python diagnose_hrm_joint_geom.py --ckpt-root "$HRM" --ckpt-name step_26040 \ + --n-samples 512 --batch-size 16 --k-lyap 8 --t-ons 1 --seed 0 \ + --out "$OUTDIR/hrm_geom_step26040_n512.npz" > "$OUTDIR/hrm.log" 2>&1; then log "done HRM"; else log "FAILED HRM"; fi +log "clv geom finished" diff --git a/analysis_2x2/run_maze_diag_queue.sh b/analysis_2x2/run_maze_diag_queue.sh new file mode 100755 index 0000000..7dfc757 --- /dev/null +++ b/analysis_2x2/run_maze_diag_queue.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# Maze FTLE diagnostics (att+rope, math SDP backend). Final checkpoint first (answers the +# headline separation question), then two more for an evolution view. Waits for a GPU. +set -o pipefail +cd /home/yurenh2/rrm/research/flossing +source /home/yurenh2/miniconda3/etc/profile.d/conda.sh +conda activate rrm + +CKPT_ROOT=/home/yurenh2/rrm/trm/checkpoints/maze-30x30-hard-1k-ACT-torch/pretrain_att_maze30x30_2gpu_gbs384 +OUTDIR=analysis_2x2/maze; mkdir -p "$OUTDIR" +STATUS="$OUTDIR/queue_status.log" +log(){ echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >> "$STATUS"; } +free_gpu(){ nvidia-smi --query-gpu=index,utilization.gpu,memory.used --format=csv,noheader,nounits \ + | awk -F', ' '$2<25 && $3<6000 {print $1; exit}'; } + +log "maze diag queue started (n=512, batch 2, math SDP; final ckpt first)" +DEADLINE=$(( $(date +%s) + 12*3600 )); GPU="" +while true; do + g1="$(free_gpu)"; if [[ -n "$g1" ]]; then sleep 60; g2="$(free_gpu)"; [[ "$g2" == "$g1" ]] && GPU="$g1" && break; fi + if (( $(date +%s) > DEADLINE )); then + GPU="$(nvidia-smi --query-gpu=index,memory.used --format=csv,noheader,nounits | sort -t, -k2 -n | head -1 | cut -d, -f1)" + log "12h fallback: GPU $GPU"; break; fi + sleep 300 +done +log "claimed GPU $GPU"; export CUDA_VISIBLE_DEVICES="$GPU" + +for CK in step_130200 step_65100 step_26040; do + OUT="$OUTDIR/maze_${CK}_n512.npz" + [[ -f "$OUT" ]] && { log "skip $CK"; continue; } + log "start $CK" + if python diagnose_trm_joint_maze.py --ckpt-root "$CKPT_ROOT" --ckpt-name "$CK" \ + --n-samples 512 --batch-size 2 --k-lyap 8 --t-ons 1 --seed 0 \ + --out "$OUT" > "$OUTDIR/${CK}.log" 2>&1; then + acc=$(grep -oE "acc=[0-9.]+" "$OUTDIR/${CK}.log" | tail -1) + log "done $CK ($acc)" + else + log "FAILED $CK (see $OUTDIR/${CK}.log)" + fi +done +log "maze diag queue finished" diff --git a/analysis_2x2/run_phase1_queue.sh b/analysis_2x2/run_phase1_queue.sh new file mode 100755 index 0000000..989c7ee --- /dev/null +++ b/analysis_2x2/run_phase1_queue.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash +# Phase-1 queue (experiment_framework.md): E5 horizon sweeps, E2 run-level replication, +# E6 matched-objective step9 pairs. Waits for a free GPU (12h fallback), runs sequentially. +set -o pipefail + +cd /home/yurenh2/rrm/research/flossing +source /home/yurenh2/miniconda3/etc/profile.d/conda.sh +conda activate rrm + +OUTDIR=analysis_2x2/phase1 +mkdir -p "$OUTDIR" +STATUS="$OUTDIR/queue_status.log" +TRM_OFF="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" +TRM_SGL="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_singleGPU" +HRM_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python" +S9=/home/yurenh2/rrm/research/flossing + +log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >> "$STATUS"; } +free_gpu() { + nvidia-smi --query-gpu=index,utilization.gpu,memory.used --format=csv,noheader,nounits \ + | awk -F', ' '$2<30 && $3<8000 {print $1; exit}' +} + +log "phase-1 queue started (E5 horizon sweeps, E2 step9_E replication, E6 step9 pairs)" +DEADLINE=$(( $(date +%s) + 12*3600 )) +GPU="" +while true; do + g1="$(free_gpu)" + if [[ -n "$g1" ]]; then + sleep 60; g2="$(free_gpu)" + if [[ "$g2" == "$g1" ]]; then GPU="$g1"; break; fi + fi + if (( $(date +%s) > DEADLINE )); then + GPU="$(nvidia-smi --query-gpu=index,memory.used --format=csv,noheader,nounits | sort -t, -k2 -n | head -1 | cut -d, -f1)" + log "12h fallback: taking GPU $GPU" + break + fi + sleep 300 +done +log "claimed GPU $GPU" +export CUDA_VISIBLE_DEVICES="$GPU" + +run_job() { # name horizon script args... + local name="$1" hor="$2"; shift 2 + if [[ -f "$OUTDIR/${name}.npz" ]]; then log "skip $name"; return 0; fi + log "start $name" + if DIAG_HORIZON="$hor" python "$@" --out "$OUTDIR/${name}.npz" > "$OUTDIR/${name}.log" 2>&1; then + log "done $name" + else + log "FAILED $name" + fi +} + +# --- E5: TRM horizon sweep (h=4 already exists in retest/) --- +for H in 2 6 8 10 12; do + run_job "trm_official58590_h${H}_n2048" "$H" diagnose_trm_joint_horizon.py \ + --ckpt-root "$TRM_OFF" --ckpt-name step_58590 --n-samples 2048 --batch-size 16 \ + --k-lyap 8 --t-ons 1 --seed 0 +done + +# --- E5: HRM horizon sweep --- +for H in 2 6 8 10 12; do + run_job "hrm26040_h${H}_n2048" "$H" diagnose_hrm_joint_horizon.py \ + --ckpt-root "$HRM_ROOT" --ckpt-name step_26040 --n-samples 2048 --batch-size 32 \ + --k-lyap 8 --t-ons 1 --seed 0 +done + +# --- E2: HRM second training run (step9_E fixed-unroll baseline), full window --- +run_job "step9E_hrm_best_full_n2048" 16 diagnose_hrm_joint.py \ + --ckpt-root "$HRM_ROOT" --ckpt-name "$S9/step9_E_hrm_baseline_parallel_fixed_26040_50k_ckpts/best.pt" \ + --n-samples 2048 --batch-size 32 --k-lyap 8 --t-ons 1 --seed 0 +run_job "step9E_hrm_final_full_n2048" 16 diagnose_hrm_joint.py \ + --ckpt-root "$HRM_ROOT" --ckpt-name "$S9/step9_E_hrm_baseline_parallel_fixed_26040_50k_ckpts/final.pt" \ + --n-samples 2048 --batch-size 32 --k-lyap 8 --t-ons 1 --seed 0 + +# --- E6: matched-objective pairs (n=512): HRM E vs F, TRM G vs H --- +for CK in step_12500 step_25000 best final; do + run_job "step9E_hrm_${CK}_n512" 16 diagnose_hrm_joint.py \ + --ckpt-root "$HRM_ROOT" --ckpt-name "$S9/step9_E_hrm_baseline_parallel_fixed_26040_50k_ckpts/${CK}.pt" \ + --n-samples 512 --batch-size 32 --k-lyap 8 --t-ons 1 --seed 0 + run_job "step9F_hrm_${CK}_n512" 16 diagnose_hrm_joint.py \ + --ckpt-root "$HRM_ROOT" --ckpt-name "$S9/step9_F_hrm_multi4_loguniform_ramp_26040_50k_ckpts/${CK}.pt" \ + --n-samples 512 --batch-size 32 --k-lyap 8 --t-ons 1 --seed 0 + run_job "step9G_trm_${CK}_n512" 16 diagnose_trm_joint.py \ + --ckpt-root "$TRM_SGL" --ckpt-name "$S9/step9_G_trm_baseline_parallel_fixed_26041_batch4_50k_ckpts/${CK}.pt" \ + --n-samples 512 --batch-size 16 --k-lyap 8 --t-ons 1 --seed 0 + run_job "step9H_trm_${CK}_n512" 16 diagnose_trm_joint.py \ + --ckpt-root "$TRM_SGL" --ckpt-name "$S9/step9_H_trm_multi4_loguniform_ramp_26041_batch4_50k_ckpts/${CK}.pt" \ + --n-samples 512 --batch-size 16 --k-lyap 8 --t-ons 1 --seed 0 +done + +log "phase-1 queue finished" diff --git a/analysis_2x2/run_retest_2x2.sh b/analysis_2x2/run_retest_2x2.sh new file mode 100755 index 0000000..9cdcc13 --- /dev/null +++ b/analysis_2x2/run_retest_2x2.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +# Queue: wait for a free GPU, then run the 2x2 re-test diagnostics: +# 1. TRM official_gbs768 @ step_58590 (86.9% ckpt) full-window n=2048 +# 2. TRM official_gbs768 @ step_58590 early-window n=2048 (first 4 ACT steps) +# 3. HRM righteous-python @ step_26040 (joint est.) full-window n=2048 +# 4. HRM righteous-python @ step_26040 early-window n=2048 +# Same --seed 0 and same n across full/short pairs so idx fields pair up. +set -o pipefail + +cd /home/yurenh2/rrm/research/flossing +source /home/yurenh2/miniconda3/etc/profile.d/conda.sh +conda activate rrm + +OUTDIR=analysis_2x2/retest +mkdir -p "$OUTDIR" +STATUS="$OUTDIR/queue_status.log" +TRM_ROOT="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" +HRM_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python" + +log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >> "$STATUS"; } + +free_gpu() { + # print index of a GPU with util<30% and mem<8GB, else empty + nvidia-smi --query-gpu=index,utilization.gpu,memory.used --format=csv,noheader,nounits \ + | awk -F', ' '$2<30 && $3<8000 {print $1; exit}' +} + +log "queue started, waiting for a free GPU (util<30%, mem<8GB, two checks 60s apart; 12h fallback)" +DEADLINE=$(( $(date +%s) + 12*3600 )) +GPU="" +while true; do + g1="$(free_gpu)" + if [[ -n "$g1" ]]; then + sleep 60 + g2="$(free_gpu)" + if [[ "$g2" == "$g1" ]]; then GPU="$g1"; break; fi + fi + if (( $(date +%s) > DEADLINE )); then + GPU="$(nvidia-smi --query-gpu=index,memory.used --format=csv,noheader,nounits | sort -t, -k2 -n | head -1 | cut -d, -f1)" + log "12h fallback: taking GPU $GPU (most free memory) despite utilization" + break + fi + sleep 300 +done +log "claimed GPU $GPU" +export CUDA_VISIBLE_DEVICES="$GPU" + +run_job() { + local name="$1"; shift + if [[ -f "$OUTDIR/${name}.npz" ]]; then log "skip $name (output exists)"; return 0; fi + log "start $name" + if python "$@" --out "$OUTDIR/${name}.npz" > "$OUTDIR/${name}.log" 2>&1; then + log "done $name" + else + log "FAILED $name (see $OUTDIR/${name}.log)" + fi +} + +run_job trm_gbs768_step58590_full_n2048 \ + diagnose_trm_joint.py --ckpt-root "$TRM_ROOT" --ckpt-name step_58590 \ + --n-samples 2048 --batch-size 16 --k-lyap 8 --t-ons 1 --seed 0 + +run_job trm_gbs768_step58590_short_n2048 \ + diagnose_trm_joint_short.py --ckpt-root "$TRM_ROOT" --ckpt-name step_58590 \ + --n-samples 2048 --batch-size 16 --k-lyap 8 --t-ons 1 --seed 0 + +run_job hrm_righteous_step26040_full_n2048 \ + diagnose_hrm_joint.py --ckpt-root "$HRM_ROOT" --ckpt-name step_26040 \ + --n-samples 2048 --batch-size 32 --k-lyap 8 --t-ons 1 --seed 0 + +run_job hrm_righteous_step26040_short_n2048 \ + diagnose_hrm_joint_short.py --ckpt-root "$HRM_ROOT" --ckpt-name step_26040 \ + --n-samples 2048 --batch-size 32 --k-lyap 8 --t-ons 1 --seed 0 + +log "all retest diagnostics finished" diff --git a/analysis_2x2/sweep_hrm26040_joint_n2048.csv b/analysis_2x2/sweep_hrm26040_joint_n2048.csv new file mode 100644 index 0000000..c4a8b6c --- /dev/null +++ b/analysis_2x2/sweep_hrm26040_joint_n2048.csv @@ -0,0 +1,20 @@ +pct,tau,nA,nB,nC,nD,lam1_med_B,lam1_med_D +5,-0.011992306107950053,102,1,975,970,-0.22750887274742126,0.03138776309788227 +10,-0.01058249634236916,203,2,874,969,-0.15784957632422447,0.0314599983394146 +15,-0.009196568905867631,306,2,771,969,-0.15784957632422447,0.0314599983394146 +20,-0.008030915298460381,408,2,669,969,-0.15784957632422447,0.0314599983394146 +25,-0.006957811036151525,509,3,568,968,-0.10465513914823532,0.03146406076848507 +30,-0.006023327926233211,612,3,465,968,-0.10465513914823532,0.03146406076848507 +35,-0.004937302122343026,714,3,363,968,-0.10465513914823532,0.03146406076848507 +40,-0.0033157474065108714,814,5,263,966,-0.14139553904533386,0.03153638541698456 +45,-0.0012688460769353994,917,5,160,966,-0.14139553904533386,0.03153638541698456 +50,0.5973517463361264,1013,11,64,960,-0.0452079139649868,0.03175541199743748 +55,1.3993508404545523,1034,92,43,879,0.033553484827280045,0.031315527856349945 +60,1.4994241881984267,1036,193,41,778,0.029089782387018204,0.03191038407385349 +65,1.547490873090265,1038,293,39,678,0.02823900803923607,0.032731860876083374 +70,1.5799635318056726,1040,393,37,578,0.029996931552886963,0.03191038407385349 +75,1.6038922156413973,1040,496,37,475,0.029781382530927658,0.03269977867603302 +80,1.6256149248592253,1040,598,37,373,0.03138776309788227,0.030956093221902847 +85,1.6518692868467673,1042,698,35,273,0.02991918846964836,0.03507234901189804 +90,1.677233478220475,1044,799,33,172,0.028961632400751114,0.044597823172807693 +95,1.708463281897988,1049,896,28,75,0.029092321172356606,0.052752863615751266 diff --git a/analysis_2x2/sweep_hrm26040_n8192.csv b/analysis_2x2/sweep_hrm26040_n8192.csv new file mode 100644 index 0000000..b2f9f82 --- /dev/null +++ b/analysis_2x2/sweep_hrm26040_n8192.csv @@ -0,0 +1,20 @@ +pct,tau,nA,nB,nC,nD,lam1_med_B,lam1_med_D +5,-0.024403165931412795,410,0,3888,3894,nan,-0.5996554493904114 +10,-0.02256924601928628,818,2,3480,3892,-0.8345716893672943,-0.5995031297206879 +15,-0.021242109610698556,1225,4,3073,3890,-0.8419434428215027,-0.5994212925434113 +20,-0.020075217197184483,1634,5,2664,3889,-0.8429288864135742,-0.599414587020874 +25,-0.019023493082334777,2039,9,2259,3885,-0.8409579992294312,-0.5993443727493286 +30,-0.017916215347934226,2444,14,1854,3880,-0.8415209650993347,-0.599242627620697 +35,-0.016770285088033448,2852,15,1446,3879,-0.8420839309692383,-0.5992319583892822 +40,-0.015257325632230992,3260,17,1038,3877,-0.8420839309692383,-0.599224328994751 +45,-0.012884453933432984,3665,21,633,3873,-0.8420839309692383,-0.5990880727767944 +50,0.371726245629762,4067,29,231,3865,-0.8250086307525635,-0.5989847183227539 +55,1.3871392863925978,4136,370,162,3524,-0.5518185496330261,-0.6034163236618042 +60,1.4972477780890994,4138,777,160,3117,-0.5671209096908569,-0.6080851554870605 +65,1.5481401201578837,4144,1181,154,2713,-0.5731313228607178,-0.6117637753486633 +70,1.5835620683527638,4147,1587,151,2307,-0.5808747410774231,-0.6136600971221924 +75,1.6110709254128324,4153,1991,145,1903,-0.584848165512085,-0.616129994392395 +80,1.6353553466380342,4156,2397,142,1497,-0.5879771709442139,-0.6185622215270996 +85,1.6574195825233073,4161,2802,137,1092,-0.5930990278720856,-0.6169184744358063 +90,1.6817109395003784,4167,3205,131,689,-0.5954653024673462,-0.619819164276123 +95,1.7160308725239535,4184,3598,114,296,-0.5972362160682678,-0.6263471245765686 diff --git a/analysis_2x2/sweep_trm_official58590_n2048.csv b/analysis_2x2/sweep_trm_official58590_n2048.csv new file mode 100644 index 0000000..747eb40 --- /dev/null +++ b/analysis_2x2/sweep_trm_official58590_n2048.csv @@ -0,0 +1,20 @@ +pct,tau,nA,nB,nC,nD,lam1_med_B,lam1_med_D +5,0.6425822518044539,103,0,1691,254,nan,0.10287182405591011 +10,0.7090247691558889,205,0,1589,254,nan,0.10287182405591011 +15,0.7579128424918095,308,0,1486,254,nan,0.10287182405591011 +20,0.7959777945095498,410,0,1384,254,nan,0.10287182405591011 +25,0.8365441996415908,512,0,1282,254,nan,0.10287182405591011 +30,0.8629687925201839,615,0,1179,254,nan,0.10287182405591011 +35,0.8888206300392528,717,0,1077,254,nan,0.10287182405591011 +40,0.9161186566022281,819,0,975,254,nan,0.10287182405591011 +45,0.9434356854634222,922,0,872,254,nan,0.10287182405591011 +50,0.9675603217529236,1024,0,770,254,nan,0.10287182405591011 +55,0.9912876432316691,1126,0,668,254,nan,0.10287182405591011 +60,1.01543024505144,1229,0,565,254,nan,0.10287182405591011 +65,1.0446947959840196,1331,0,463,254,nan,0.10287182405591011 +70,1.0695176837117564,1433,0,361,254,nan,0.10287182405591011 +75,1.100648956309921,1536,0,258,254,nan,0.10287182405591011 +80,1.2184501775276642,1638,0,156,254,nan,0.10287182405591011 +85,1.696239099229016,1733,7,61,247,0.1096716970205307,0.10244401544332504 +90,1.794792940908479,1740,103,54,151,0.10650952905416489,0.10119745135307312 +95,1.8549837815942356,1749,196,45,58,0.10514867305755615,0.09396017715334892 diff --git a/analysis_2x2/sweep_trm_official_gbs768_step58590_n512.csv b/analysis_2x2/sweep_trm_official_gbs768_step58590_n512.csv new file mode 100644 index 0000000..011c7cc --- /dev/null +++ b/analysis_2x2/sweep_trm_official_gbs768_step58590_n512.csv @@ -0,0 +1,20 @@ +pct,tau,nA,nB,nC,nD,lam1_med_B,lam1_med_D +5,0.6520149305579591,26,0,422,64,nan,0.10339764878153801 +10,0.7055634417495923,52,0,396,64,nan,0.10339764878153801 +15,0.7503615854928217,77,0,371,64,nan,0.10339764878153801 +20,0.8005556182318934,103,0,345,64,nan,0.10339764878153801 +25,0.840565017355408,128,0,320,64,nan,0.10339764878153801 +30,0.8627818507451074,154,0,294,64,nan,0.10339764878153801 +35,0.8956748981953134,179,0,269,64,nan,0.10339764878153801 +40,0.923274365661878,205,0,243,64,nan,0.10339764878153801 +45,0.9484961694413702,230,0,218,64,nan,0.10339764878153801 +50,0.9733951084873498,256,0,192,64,nan,0.10339764878153801 +55,1.0041568660376061,282,0,166,64,nan,0.10339764878153801 +60,1.02397895505604,307,0,141,64,nan,0.10339764878153801 +65,1.04513267210118,333,0,115,64,nan,0.10339764878153801 +70,1.0684270138688392,358,0,90,64,nan,0.10339764878153801 +75,1.097585285269502,384,0,64,64,nan,0.10339764878153801 +80,1.1958246561577712,409,0,39,64,nan,0.10339764878153801 +85,1.3747578433518455,435,0,13,64,nan,0.10339764878153801 +90,1.785234311475947,437,23,11,41,0.10155700892210007,0.10475098341703415 +95,1.8427352264688373,438,48,10,16,0.10240473970770836,0.10433581843972206 diff --git a/analysis_2x2/sweep_trm_singleGPU_step130205_n512.csv b/analysis_2x2/sweep_trm_singleGPU_step130205_n512.csv new file mode 100644 index 0000000..2c44154 --- /dev/null +++ b/analysis_2x2/sweep_trm_singleGPU_step130205_n512.csv @@ -0,0 +1,20 @@ +pct,tau,nA,nB,nC,nD,lam1_med_B,lam1_med_D +5,0.3525141817739674,26,0,361,125,nan,0.10669758170843124 +10,0.5997049779303102,52,0,335,125,nan,0.10669758170843124 +15,0.7276586924161174,77,0,310,125,nan,0.10669758170843124 +20,0.745895549719167,103,0,284,125,nan,0.10669758170843124 +25,0.7574238256855987,128,0,259,125,nan,0.10669758170843124 +30,0.7756026094327713,154,0,233,125,nan,0.10669758170843124 +35,0.7948198738771284,179,0,208,125,nan,0.10669758170843124 +40,0.8150679269326413,205,0,182,125,nan,0.10669758170843124 +45,0.8328527737072317,230,0,157,125,nan,0.10669758170843124 +50,0.8680717056251492,256,0,131,125,nan,0.10669758170843124 +55,0.8878173590916981,282,0,105,125,nan,0.10669758170843124 +60,0.9114474129601072,307,0,80,125,nan,0.10669758170843124 +65,0.9357973777374493,333,0,54,125,nan,0.10669758170843124 +70,0.9914630324419641,358,0,29,125,nan,0.10669758170843124 +75,1.6265775245886065,370,14,17,111,0.10315298289060593,0.10740122199058533 +80,1.6638789305051203,370,39,17,86,0.10702066123485565,0.10642785206437111 +85,1.690579189299017,370,65,17,60,0.10719160735607147,0.10596203431487083 +90,1.722529342618924,370,90,17,35,0.10658939182758331,0.10669758170843124 +95,1.7704888842696966,372,114,15,11,0.10589195787906647,0.10992329567670822 diff --git a/analysis_2x2/sweep_trm_singleGPU_step260410_n512.csv b/analysis_2x2/sweep_trm_singleGPU_step260410_n512.csv new file mode 100644 index 0000000..fac3a6d --- /dev/null +++ b/analysis_2x2/sweep_trm_singleGPU_step260410_n512.csv @@ -0,0 +1,20 @@ +pct,tau,nA,nB,nC,nD,lam1_med_B,lam1_med_D +5,1.1904157471082346,26,0,368,118,nan,0.10232088342308998 +10,1.205761365098233,52,0,342,118,nan,0.10232088342308998 +15,1.220964285676207,77,0,317,118,nan,0.10232088342308998 +20,1.2340382280630668,103,0,291,118,nan,0.10232088342308998 +25,1.2434017565460773,128,0,266,118,nan,0.10232088342308998 +30,1.253923051119596,154,0,240,118,nan,0.10232088342308998 +35,1.260020693170751,179,0,215,118,nan,0.10232088342308998 +40,1.2729663625028254,205,0,189,118,nan,0.10232088342308998 +45,1.286432864193943,230,0,164,118,nan,0.10232088342308998 +50,1.29425001787462,256,0,138,118,nan,0.10232088342308998 +55,1.3019951688280547,282,0,112,118,nan,0.10232088342308998 +60,1.3106991625583733,307,0,87,118,nan,0.10232088342308998 +65,1.323434760415417,333,0,61,118,nan,0.10232088342308998 +70,1.3423083391290647,358,0,36,118,nan,0.10232088342308998 +75,1.4595699782590752,384,0,10,118,nan,0.10232088342308998 +80,1.6965097149553376,384,25,10,93,0.10151073336601257,0.10370030999183655 +85,1.7416635003129175,384,51,10,67,0.1015518382191658,0.10389910638332367 +90,1.7702691318062869,384,76,10,42,0.10161199420690536,0.10456060990691185 +95,1.7933802257282094,384,102,10,16,0.10197825729846954,0.10976629704236984 diff --git a/analysis_2x2/sweep_trm_step13020_n512.csv b/analysis_2x2/sweep_trm_step13020_n512.csv new file mode 100644 index 0000000..4dba5eb --- /dev/null +++ b/analysis_2x2/sweep_trm_step13020_n512.csv @@ -0,0 +1,20 @@ +pct,tau,nA,nB,nC,nD,lam1_med_B,lam1_med_D +5,0.1461886368890553,26,0,279,207,nan,0.02986210025846958 +10,0.1484773952154017,52,0,253,207,nan,0.02986210025846958 +15,0.14987173414316576,77,0,228,207,nan,0.02986210025846958 +20,0.15185786893932013,103,0,202,207,nan,0.02986210025846958 +25,0.15372304028625772,128,0,177,207,nan,0.02986210025846958 +30,0.15513756643739401,154,0,151,207,nan,0.02986210025846958 +35,0.15748307719056248,179,0,126,207,nan,0.02986210025846958 +40,0.16089218583944756,205,0,100,207,nan,0.02986210025846958 +45,0.16811494142658162,230,0,75,207,nan,0.02986210025846958 +50,0.21289853477178294,256,0,49,207,nan,0.02986210025846958 +55,0.31298600561046025,281,1,24,206,-0.014628570526838303,0.030006153509020805 +60,0.8658403204807424,296,11,9,196,-0.003207409055903554,0.03166834078729153 +65,1.1762548477525878,298,35,7,172,-0.0013167093275114894,0.03500013053417206 +70,1.3359059506661692,298,60,7,147,0.008530525024980307,0.03881623595952988 +75,1.4369319380372483,298,86,7,121,0.01115282904356718,0.043723251670598984 +80,1.5107383605809237,298,111,7,96,0.014448095113039017,0.045386193320155144 +85,1.5691906481172995,298,137,7,70,0.020133845508098602,0.04726334474980831 +90,1.6297611879974812,298,162,7,45,0.022078031674027443,0.04918263107538223 +95,1.7017159174665506,298,188,7,19,0.0257729422301054,0.061704523861408234 diff --git a/analyze_diag.py b/analyze_diag.py new file mode 100644 index 0000000..0382d3c --- /dev/null +++ b/analyze_diag.py @@ -0,0 +1,115 @@ +"""Analyze the HRM diagnostic output: stats + plots.""" +import numpy as np +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +from scipy import stats +import argparse + +ap = argparse.ArgumentParser() +ap.add_argument("--in", dest="inp", required=True) +ap.add_argument("--out-dir", default="/home/yurenh2/rrm/research/flossing/plots") +args = ap.parse_args() + +import os +os.makedirs(args.out_dir, exist_ok=True) + +d = np.load(args.inp) +print("keys:", list(d.keys())) +lyap = d["lyap_max"] # (N,) +exact = d["exact_correct"].astype(bool) # (N,) +drift_zH = d["drift_zH"] # (N, T) ACT-step level drift +drift_zL = d["drift_zL"] # (N, T) +q_halt = d["q_halt"] # (N, T) +q_continue = d["q_continue"] # (N, T) +halted_at = d["halted_at"] # (N,) — ACT step at which q_halt>q_continue first + +N = len(lyap) +print(f"N={N}, exact_acc={exact.mean():.3f}") + +succ = lyap[exact]; fail = lyap[~exact] +print(f"\n=== Lyapunov max ===") +print(f" success: mean={succ.mean():+.4f} std={succ.std():.4f} n={succ.size}") +print(f" failure: mean={fail.mean():+.4f} std={fail.std():.4f} n={fail.size}") + +# Stats tests +t, p = stats.ttest_ind(succ, fail, equal_var=False) +print(f" Welch t-test: t={t:.3f} p={p:.2e}") +u, p_u = stats.mannwhitneyu(succ, fail, alternative="two-sided") +print(f" Mann-Whitney U: U={u:.0f} p={p_u:.2e}") +# Effect size (Cohen's d) +pooled_std = np.sqrt(((succ.size-1)*succ.var() + (fail.size-1)*fail.var()) / (succ.size+fail.size-2)) +d_cohen = (succ.mean() - fail.mean()) / pooled_std +print(f" Cohen's d: {d_cohen:.3f}") +# AUC: lyap predicts failure +# Failure tends to have higher (less negative) lyap. +auc = stats.mannwhitneyu(fail, succ, alternative="greater").statistic / (fail.size * succ.size) +print(f" AUROC (lyap predicting failure): {auc:.3f}") + +# ----- plot 1: histogram of lyap by success/failure ----- +fig, ax = plt.subplots(1, 1, figsize=(6,4)) +bins = np.linspace(min(lyap.min(),-1.5), max(lyap.max(), -0.1), 40) +ax.hist(succ, bins=bins, alpha=0.55, label=f"success (n={succ.size})", color="C0", density=True) +ax.hist(fail, bins=bins, alpha=0.55, label=f"failure (n={fail.size})", color="C3", density=True) +ax.axvline(succ.mean(), color="C0", ls="--", lw=1) +ax.axvline(fail.mean(), color="C3", ls="--", lw=1) +ax.set_xlabel(r"$\lambda_{max}$ (top Lyapunov exponent, per inner cycle)") +ax.set_ylabel("density") +ax.set_title(f"HRM Sudoku 1k @ step_26040: λ_max distribution\nCohen's d={d_cohen:.2f}, Welch p={p:.1e}, AUROC={auc:.2f}") +ax.legend() +fig.tight_layout() +fig.savefig(f"{args.out_dir}/lyap_hist.png", dpi=130) +plt.close() + +# ----- plot 2: drift trajectory means ----- +fig, axes = plt.subplots(1, 2, figsize=(11,4), sharey=False) +for ax, drift, name in [(axes[0], drift_zH, r"$\|z_H^{(t+1)} - z_H^{(t)}\|$"), + (axes[1], drift_zL, r"$\|z_L^{(t+1)} - z_L^{(t)}\|$")]: + mean_s = drift[exact].mean(0); std_s = drift[exact].std(0) + mean_f = drift[~exact].mean(0); std_f = drift[~exact].std(0) + x = np.arange(drift.shape[1]) + ax.fill_between(x, mean_s-std_s, mean_s+std_s, color="C0", alpha=0.25) + ax.plot(x, mean_s, "C0", label="success") + ax.fill_between(x, mean_f-std_f, mean_f+std_f, color="C3", alpha=0.25) + ax.plot(x, mean_f, "C3", label="failure") + ax.set_xlabel("ACT step") + ax.set_title(f"drift per ACT step: {name}") + ax.legend() + ax.set_yscale("log") +fig.tight_layout() +fig.savefig(f"{args.out_dir}/drift_per_act.png", dpi=130) +plt.close() + +# ----- plot 3: Q-halt gating dynamics ----- +fig, ax = plt.subplots(1, 1, figsize=(6,4)) +diff = q_halt - q_continue +mean_s = diff[exact].mean(0); std_s = diff[exact].std(0) +mean_f = diff[~exact].mean(0); std_f = diff[~exact].std(0) +x = np.arange(diff.shape[1]) +ax.fill_between(x, mean_s-std_s, mean_s+std_s, color="C0", alpha=0.25) +ax.plot(x, mean_s, "C0", label="success") +ax.fill_between(x, mean_f-std_f, mean_f+std_f, color="C3", alpha=0.25) +ax.plot(x, mean_f, "C3", label="failure") +ax.axhline(0, color="k", ls=":", lw=0.6) +ax.set_xlabel("ACT step") +ax.set_ylabel("q_halt − q_continue") +ax.set_title("ACT Q-head gating signal over ACT steps") +ax.legend() +fig.tight_layout() +fig.savefig(f"{args.out_dir}/q_gating.png", dpi=130) +plt.close() + +# ----- plot 4: lyap vs token_acc scatter ----- +token_acc = d["token_acc"] +fig, ax = plt.subplots(1, 1, figsize=(6,4)) +ax.scatter(lyap[exact], token_acc[exact], s=8, alpha=0.3, color="C0", label="exact-correct") +ax.scatter(lyap[~exact], token_acc[~exact], s=8, alpha=0.3, color="C3", label="exact-fail") +ax.set_xlabel(r"$\lambda_{max}$") +ax.set_ylabel("token-level accuracy") +ax.legend() +ax.set_title("λ_max vs token accuracy") +fig.tight_layout() +fig.savefig(f"{args.out_dir}/lyap_vs_tokenacc.png", dpi=130) +plt.close() + +print(f"\nplots saved to {args.out_dir}/") diff --git a/analyze_dynamics_experiments.py b/analyze_dynamics_experiments.py new file mode 100644 index 0000000..22491d0 --- /dev/null +++ b/analyze_dynamics_experiments.py @@ -0,0 +1,289 @@ +"""Summarize dynamics-control experiments into a markdown report.""" +from __future__ import annotations + +import json +import re +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path + + +ROOT = Path(__file__).resolve().parent + + +@dataclass(frozen=True) +class RunSpec: + name: str + model: str + family: str + json_name: str + log_name: str | None = None + + +RUNS = [ + RunSpec("HRM baseline 10k", "HRM", "baseline", "step3_L_baseline_26040_fast_10k.json"), + RunSpec("HRM mixed volume-CF", "HRM", "mixed_loss", "step3_M_volume_cf_26040_lstar_neg015_k8_a10_10k.json"), + RunSpec("HRM Engelken interfloss", "HRM", "interfloss", "step7_A_hrm_engelken_interfloss_26040_k8_10k.json"), + RunSpec("HRM Engelken+KL interfloss", "HRM", "interfloss_kl", "step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k.json"), + RunSpec("HRM conservative Engelken+KL", "HRM", "interfloss_kl_conservative", "step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k.json"), + RunSpec("HRM late Engelken+KL", "HRM", "late_interfloss_kl", "step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k.json"), + RunSpec("HRM volume-envelope+KL", "HRM", "volume_interfloss_kl", "step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k.json"), + RunSpec("HRM basin consistency", "HRM", "basin", "step8_A_hrm_basin_consistency_beta1_noise002_after8_26040_10k.json"), + RunSpec("HRM single perturbed CE", "HRM", "trajectory_augment_single", "step9_A_hrm_single_perturb_sigma1e-3_26040_10k.json"), + RunSpec("HRM clean+multi perturbed CE", "HRM", "trajectory_augment_multi", "step9_B_hrm_multi4_perturb_sigma1e-3_26040_10k.json"), + RunSpec("HRM fixed-unroll baseline 50k", "HRM", "trajectory_fixed_baseline", "step9_E_hrm_baseline_parallel_fixed_26040_50k.json"), + RunSpec("HRM multi4 loguniform 50k", "HRM", "trajectory_augment_loguniform", "step9_F_hrm_multi4_loguniform_ramp_26040_50k.json"), + RunSpec("TRM baseline 10k", "TRM", "baseline", "step5_L_trm_baseline_26041_batch4_fast_10k.json"), + RunSpec("TRM mixed volume-CF", "TRM", "mixed_loss", "step5_M_trm_volume_cf_26041_lstar002_batch4_k4_a10_10k.json"), + RunSpec("TRM Engelken interfloss", "TRM", "interfloss", "step7_B_trm_engelken_interfloss_26041_k4_batch4_10k.json"), + RunSpec("TRM Engelken+KL interfloss", "TRM", "interfloss_kl", "step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k.json"), + RunSpec("TRM late Engelken+KL", "TRM", "late_interfloss_kl", "step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k.json"), + RunSpec("TRM volume-envelope+KL", "TRM", "volume_interfloss_kl", "step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k.json"), + RunSpec("TRM basin consistency", "TRM", "basin", "step8_B_trm_basin_consistency_beta1_noise002_after8_26041_batch4_10k.json"), + RunSpec("TRM single perturbed CE", "TRM", "trajectory_augment_single", "step9_C_trm_single_perturb_sigma1e-3_26041_batch4_10k.json"), + RunSpec("TRM clean+multi perturbed CE", "TRM", "trajectory_augment_multi", "step9_D_trm_multi4_perturb_sigma1e-3_26041_batch4_10k.json"), + RunSpec("TRM fixed-unroll baseline 50k", "TRM", "trajectory_fixed_baseline", "step9_G_trm_baseline_parallel_fixed_26041_batch4_50k.json"), + RunSpec("TRM multi4 loguniform 50k", "TRM", "trajectory_augment_loguniform", "step9_H_trm_multi4_loguniform_ramp_26041_batch4_50k.json"), +] + + +def load_json(path: Path): + if not path.exists(): + return None + try: + return json.loads(path.read_text()) + except Exception as exc: # noqa: BLE001 + return {"_bad_json": str(exc)} + + +def step_of(ev: dict): + return ev.get("step", ev.get("train_step")) + + +def acc_of(ev: dict): + acc = ev.get("acc") + return None if acc is None else float(acc) + + +def evals_of(data: dict): + return [ev for ev in data.get("evals", []) if acc_of(ev) is not None] + + +def best_eval(evals: list[dict]): + if not evals: + return None + return max(evals, key=lambda ev: acc_of(ev)) + + +def last_eval(evals: list[dict]): + if not evals: + return None + return evals[-1] + + +def fmt(x, digits=4): + if x is None: + return "NA" + return f"{float(x):.{digits}f}" + + +def completion_status(data): + if data is None: + return "missing" + if "_bad_json" in data: + return "bad_json" + if data.get("final_acc") is not None: + return "complete" + evs = evals_of(data) + if evs: + return "partial" + return "started" + + +def run_summary(spec: RunSpec): + path = ROOT / spec.json_name + data = load_json(path) + status = completion_status(data) + if data is None or "_bad_json" in data: + return { + "spec": spec, + "status": status, + "initial": None, + "final": None, + "last": None, + "best": None, + "n_evals": 0, + "data": data, + } + evs = evals_of(data) + last = last_eval(evs) + best = best_eval(evs) + final = data.get("final_acc") + if final is None and last is not None: + final = acc_of(last) + return { + "spec": spec, + "status": status, + "initial": data.get("initial_acc"), + "final": final, + "last": last, + "best": best, + "n_evals": len(evs), + "data": data, + } + + +def collect_process_snapshot(): + # Avoid importing psutil. This is a best-effort snapshot from /proc. + patterns = [ + "step3_M_volume_cf_26040_lstar_neg015", + "step7_C_hrm_engelken_interfloss_kl10", + "step7_D_trm_engelken_interfloss_kl10", + "launch_dynamics_variants_queue", + "step7_E_hrm_late", + "step7_F_trm_late", + "step7_G_hrm_volume", + "step7_H_trm_volume", + "step8_A_hrm_basin", + "step8_B_trm_basin", + "step9_A_hrm_single", + "step9_B_hrm_multi4", + "step9_C_trm_single", + "step9_D_trm_multi4", + "step9_E_hrm_baseline", + "step9_F_hrm_multi4", + "step9_G_trm_baseline", + "step9_H_trm_multi4", + "launch_trajectory_perturb_queue", + "launch_trajectory_sampling_long", + ] + rows = [] + for proc in Path("/proc").iterdir(): + if not proc.name.isdigit(): + continue + try: + cmd = (proc / "cmdline").read_bytes().replace(b"\x00", b" ").decode("utf-8", "ignore") + except Exception: # noqa: BLE001 + continue + if any(p in cmd for p in patterns): + rows.append((int(proc.name), cmd.strip())) + return sorted(rows) + + +def log_tail_metrics(log_name: str): + path = ROOT / log_name + if not path.exists(): + return {} + text = path.read_text(errors="ignore") + evals = [] + for step, acc in re.findall(r"EVAL @ step\s+(\d+): exact_acc=([0-9.]+)", text): + evals.append((int(step), float(acc))) + progress = re.findall(r"\[\s*(\d+)/10000\]", text) + return { + "log_step": int(progress[-1]) if progress else None, + "log_last_eval": evals[-1] if evals else None, + "log_best_eval": max(evals, key=lambda x: x[1]) if evals else None, + } + + +def floss_episode_summary(data: dict): + lines = [] + episodes = data.get("floss_episodes", []) if data else [] + for ep in episodes: + steps = ep.get("steps", []) + if not steps: + continue + first = steps[0] + last = steps[-1] + max_kl = max((s.get("kl_loss", 0.0) for s in steps), default=0.0) + mean_kl = sum((s.get("kl_loss", 0.0) for s in steps)) / max(len(steps), 1) + lines.append( + f"episode {ep.get('episode')} @ train_step {ep.get('train_step')}: " + f"floss {fmt(first.get('floss_loss', first.get('loss')), 6)} -> " + f"{fmt(last.get('floss_loss', last.get('loss')), 6)}, " + f"lyap1 {fmt(first.get('lyap1_mean'))} -> {fmt(last.get('lyap1_mean'))}, " + f"volume {fmt(first.get('volume_mean'))} -> {fmt(last.get('volume_mean'))}, " + f"KL mean/max {fmt(mean_kl, 6)}/{fmt(max_kl, 6)}" + ) + return lines + + +def markdown_report(): + rows = [run_summary(spec) for spec in RUNS] + baseline_final = { + row["spec"].model: row["final"] + for row in rows + if row["spec"].family == "baseline" and row["final"] is not None + } + + out = [] + out.append("# Dynamics Control Experiment Report") + out.append("") + out.append(f"Generated: {datetime.now().isoformat(timespec='seconds')}") + out.append("") + out.append("## Summary Table") + out.append("") + out.append("| Model | Run | Status | Init | Final/Last | Delta | Best | Best Step | Vs Baseline | Evals |") + out.append("|---|---|---:|---:|---:|---:|---:|---:|---:|---:|") + for row in rows: + spec = row["spec"] + init = row["initial"] + final = row["final"] + best = row["best"] + best_acc = acc_of(best) if best else None + best_step = step_of(best) if best else None + delta = None if init is None or final is None else final - init + base = baseline_final.get(spec.model) + vs_base = None if base is None or final is None or spec.family == "baseline" else final - base + out.append( + f"| {spec.model} | {spec.name} | {row['status']} | {fmt(init)} | {fmt(final)} | " + f"{fmt(delta)} | {fmt(best_acc)} | {best_step if best_step is not None else 'NA'} | " + f"{fmt(vs_base)} | {row['n_evals']} |" + ) + + out.append("") + out.append("## Floss Episode Diagnostics") + out.append("") + any_floss = False + for row in rows: + lines = floss_episode_summary(row.get("data") or {}) + if not lines: + continue + any_floss = True + out.append(f"### {row['spec'].name}") + for line in lines: + out.append(f"- {line}") + out.append("") + if not any_floss: + out.append("No completed floss episode diagnostics found yet.") + out.append("") + + out.append("## Incomplete Runs / Process Snapshot") + out.append("") + active = collect_process_snapshot() + if active: + for pid, cmd in active: + out.append(f"- PID {pid}: `{cmd[:220]}`") + else: + out.append("- No monitored experiment processes are active.") + out.append("") + + out.append("## Notes") + out.append("") + out.append("- `Final/Last` is `final_acc` when present, otherwise the latest eval accuracy.") + out.append("- `Vs Baseline` compares against the matching HRM/TRM 10k no-floss baseline.") + out.append("- A complete report may still show partial rows if an experiment crashed or was interrupted.") + out.append("") + return "\n".join(out) + + +def main(): + report = markdown_report() + out_path = ROOT / "dynamics_experiment_report.md" + out_path.write_text(report) + print(report) + print(f"\nWrote {out_path}") + + +if __name__ == "__main__": + main() diff --git a/analyze_halt_bucket.py b/analyze_halt_bucket.py new file mode 100644 index 0000000..2ad80fc --- /dev/null +++ b/analyze_halt_bucket.py @@ -0,0 +1,122 @@ +"""Validate codex's λ*(h, β) = (1/h) log β framework using halted_at buckets. + +If the framework is right, then across halt buckets: +- success samples should all satisfy λ_1 × h_micro × halted_at ≈ log(β_target) (a constant ≪ 0) +- equivalently: success λ_1 should scale as ~1/halted_at + +We use halted_at as a proxy for "effective computation budget" (h in codex's notation). +For each ACT step we have cycles_per_act micro-Lyapunov-steps: + HRM (H=2, L=2): 2*(2+1) = 6 + TRM (H=3, L=6): 3*(6+1) = 21 +""" +import numpy as np +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt + +ROOT = "/home/yurenh2/rrm/research/flossing" + +cases = { + "HRM_step26040": dict(npz=f"{ROOT}/diag_joint_1k.npz", cycles_per_act=6), + "TRM_step13020": dict(npz=f"{ROOT}/diag_trm_step13020_512.npz", cycles_per_act=21), +} + +fig, axes = plt.subplots(2, 3, figsize=(15, 9)) + +for row, (name, meta) in enumerate(cases.items()): + d = np.load(meta["npz"]) + cpa = meta["cycles_per_act"] + lam = d["lyap_spec"][:, 0] # top-1 Lyap per sample (per micro step) + succ = d["exact_correct"] > 0.5 + halted = d["halted_at"].copy() + halted[halted == 0] = 16 # never-halt → used full 16 ACT steps + + h_micro = halted * cpa # effective micro-step horizon + logbeta = lam * h_micro # log of cumulative decay over h_micro + beta = np.exp(np.clip(logbeta, -20, 20)) # implied β + + print(f"\n=== {name} ===") + print(f" N={len(lam)} acc={succ.mean():.3f} cycles/ACT={cpa}") + print(f" λ_1 mean: succ={lam[succ].mean():+.4f} fail={lam[~succ].mean():+.4f}") + print(f" halted_at: succ={halted[succ].mean():.2f} fail={halted[~succ].mean():.2f}") + print(f" log β implied succ={logbeta[succ].mean():+.3f} fail={logbeta[~succ].mean():+.3f}") + print(f" β implied: succ={beta[succ].mean():.3f} fail={beta[~succ].mean():.3f}") + + # By halt bucket + print(f" bucket n_succ n_fail λ_succ λ_fail logβ_succ logβ_fail") + buckets = sorted(set(halted.tolist())) + for b in buckets: + ms = (halted == b) & succ + mf = (halted == b) & ~succ + if ms.sum() + mf.sum() < 5: continue + lam_s = lam[ms].mean() if ms.sum() > 0 else np.nan + lam_f = lam[mf].mean() if mf.sum() > 0 else np.nan + lb_s = lam_s * b * cpa + lb_f = lam_f * b * cpa + print(f" h={b:>3} {ms.sum():>4} {mf.sum():>4} " + f"{lam_s:+7.4f} {lam_f:+7.4f} {lb_s:+8.3f} {lb_f:+8.3f}") + + # ----- Panel A: λ vs halted_at, succ vs fail ----- + ax = axes[row, 0] + ax.scatter(halted[succ] + np.random.uniform(-0.15, 0.15, succ.sum()), lam[succ], + s=5, alpha=0.35, c="C2", label=f"succ (n={succ.sum()})") + ax.scatter(halted[~succ] + np.random.uniform(-0.15, 0.15, (~succ).sum()), lam[~succ], + s=5, alpha=0.35, c="C3", label=f"fail (n={(~succ).sum()})") + # Per-bucket mean + means_s, means_f, bs = [], [], [] + for b in buckets: + ms = (halted == b) & succ + mf = (halted == b) & ~succ + if ms.sum() >= 3: means_s.append((b, lam[ms].mean())) + if mf.sum() >= 3: means_f.append((b, lam[mf].mean())) + if means_s: + xs, ys = zip(*means_s); ax.plot(xs, ys, "C2o-", lw=2, ms=8, label="succ mean") + if means_f: + xf, yf = zip(*means_f); ax.plot(xf, yf, "C3o-", lw=2, ms=8, label="fail mean") + # 1/h reference: codex prediction λ ∝ 1/h + if means_s: + h_ref = np.array([b for b, _ in means_s]) + # fit constant = mean of λ*h*cpa over succ + c = (lam[succ] * halted[succ] * cpa).mean() + ref = c / (h_ref * cpa) + ax.plot(h_ref, ref, "k--", lw=1, alpha=0.5, label=f"λ=1/h·log β (log β={c:.2f})") + ax.axhline(0, color="k", lw=0.5, ls=":") + ax.set_xlabel("halted_at (ACT steps)"); ax.set_ylabel("λ_1 (per micro-step)") + ax.set_title(f"{name}: λ_1 vs halt step") + ax.legend(fontsize=7); ax.grid(alpha=0.3) + + # ----- Panel B: log β = λ × h_micro by halt bucket ----- + ax = axes[row, 1] + ax.scatter(halted[succ] + np.random.uniform(-0.15, 0.15, succ.sum()), logbeta[succ], + s=5, alpha=0.35, c="C2", label="succ") + ax.scatter(halted[~succ] + np.random.uniform(-0.15, 0.15, (~succ).sum()), logbeta[~succ], + s=5, alpha=0.35, c="C3", label="fail") + if means_s: + xs = [b for b, _ in means_s] + ys = [lam[(halted==b)&succ].mean() * b * cpa for b in xs] + ax.plot(xs, ys, "C2o-", lw=2, ms=8, label="succ mean") + if means_f: + xf = [b for b, _ in means_f] + yf = [lam[(halted==b)&~succ].mean() * b * cpa for b in xf] + ax.plot(xf, yf, "C3o-", lw=2, ms=8, label="fail mean") + ax.axhline(0, color="k", lw=0.5, ls=":") + ax.set_xlabel("halted_at"); ax.set_ylabel("log β = λ_1 × halt × cycles/ACT") + ax.set_title(f"{name}: implied log β (constant ⇒ 1/h scaling holds)") + ax.legend(fontsize=8); ax.grid(alpha=0.3) + + # ----- Panel C: histogram of implied β by succ/fail ----- + ax = axes[row, 2] + bins = np.linspace(-6, 4, 50) + ax.hist(logbeta[succ], bins=bins, alpha=0.6, color="C2", label=f"succ μ={logbeta[succ].mean():+.2f}") + ax.hist(logbeta[~succ], bins=bins, alpha=0.6, color="C3", label=f"fail μ={logbeta[~succ].mean():+.2f}") + ax.axvline(0, color="k", lw=0.5, ls=":") + ax.set_xlabel("log β implied"); ax.set_ylabel("count") + ax.set_title(f"{name}: log β distribution") + ax.legend(fontsize=8); ax.grid(alpha=0.3) + +fig.suptitle("Codex's λ*(h, β) = (1/h) log β prediction: across halt buckets, " + "λ should scale as 1/h with constant log β", fontsize=11) +fig.tight_layout() +out = f"{ROOT}/plots_halt_bucket.png" +fig.savefig(out, dpi=130) +print(f"\n→ {out}") diff --git a/analyze_joint.py b/analyze_joint.py new file mode 100644 index 0000000..1b65d0b --- /dev/null +++ b/analyze_joint.py @@ -0,0 +1,103 @@ +"""Analyze the JOINT-tangent diagnostic (1024 samples).""" +import numpy as np +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +from scipy import stats +import glob, os + +ROOT = "/home/yurenh2/rrm/research/flossing" +OUT = f"{ROOT}/plots_joint" +os.makedirs(OUT, exist_ok=True) + +# Merge +files = sorted(glob.glob(f"{ROOT}/diag_joint_1k_shard*.npz")) +m = {} +for f in files: + d = np.load(f) + for k in d.files: m.setdefault(k, []).append(d[k]) +for k in list(m.keys()): m[k] = np.concatenate(m[k], 0) +np.savez_compressed(f"{ROOT}/diag_joint_1k.npz", **m) +print(f"merged: N={len(m['exact_correct'])} acc={m['exact_correct'].mean():.4f}") + +ls = m["lyap_spec"] # (N, K) +exact = m["exact_correct"].astype(bool) +N, K = ls.shape +succ = ls[exact]; fail = ls[~exact] + +print(f"\n--per-λ stats (joint tangent, k={K}) --") +print(f"{'i':>3} {'mean_all':>10} {'mean_succ':>10} {'mean_fail':>10} {'Δ':>8} {'d':>7} {'auroc':>7} {'p_t':>9}") +for i in range(K): + li = ls[:, i]; s_ = li[exact]; f_ = li[~exact] + pooled = np.sqrt(((s_.size-1)*s_.var() + (f_.size-1)*f_.var()) / (s_.size+f_.size-2)) + dc = (s_.mean() - f_.mean()) / pooled + delta = f_.mean() - s_.mean() + auc = stats.mannwhitneyu(f_, s_, alternative="greater").statistic / (f_.size * s_.size) + t, p = stats.ttest_ind(s_, f_, equal_var=False) + print(f"{i+1:>3} {li.mean():+10.4f} {s_.mean():+10.4f} {f_.mean():+10.4f} {delta:+8.4f} {abs(dc):>7.3f} {auc:>7.3f} {p:>9.2e}") + +def auc(score, target_pos): + return stats.mannwhitneyu(score[target_pos], score[~target_pos], alternative="greater").statistic / (target_pos.sum() * (~target_pos).sum()) + +print(f"\n--combined predictors AUROC (failure)--") +for label, score in [ + ("λ_1", ls[:, 0]), + ("λ_K (most-neg)", ls[:, -1]), + ("mean λ", ls.mean(1)), + ("max λ", ls.max(1)), + ("# of positive λ", (ls > 0).sum(1).astype(float)), +]: + print(f" {label:18s}: {auc(score, ~exact):.4f}") + +# --- plot 1: spectrum mean ± std --- +fig, ax = plt.subplots(1, 1, figsize=(7, 5)) +mean_s = succ.mean(0); std_s = succ.std(0) +mean_f = fail.mean(0); std_f = fail.std(0) +x = np.arange(1, K+1) +ax.fill_between(x, mean_s-std_s, mean_s+std_s, color="C0", alpha=0.25) +ax.plot(x, mean_s, "C0-o", label=f"success n={succ.shape[0]}", lw=2) +ax.fill_between(x, mean_f-std_f, mean_f+std_f, color="C3", alpha=0.25) +ax.plot(x, mean_f, "C3-o", label=f"failure n={fail.shape[0]}", lw=2) +ax.axhline(0, color="k", ls="--", lw=1, label="critical λ=0") +ax.set_xlabel(r"Lyapunov index $i$") +ax.set_ylabel(r"$\lambda_i$ (per inner cycle, joint $(z_H,z_L)$ tangent)") +ax.set_title(f"Joint-tangent top-{K} Lyapunov spectrum @ step_26040 (N={N}, acc={exact.mean():.3f})") +ax.legend() +ax.grid(alpha=0.3) +fig.tight_layout() +fig.savefig(f"{OUT}/lyap_spectrum_joint.png", dpi=130) +plt.close() + +# --- plot 2: λ_1 histogram with critical line --- +fig, ax = plt.subplots(1, 1, figsize=(7, 4)) +lo, hi = ls[:,0].min(), ls[:,0].max() +bins = np.linspace(lo, hi, 60) +ax.hist(succ[:,0], bins=bins, alpha=0.55, label=f"succ", color="C0", density=True) +ax.hist(fail[:,0], bins=bins, alpha=0.55, label=f"fail", color="C3", density=True) +ax.axvline(0, color="k", ls="--", lw=1, label="λ=0") +ax.axvline(succ[:,0].mean(), color="C0", ls=":", lw=1) +ax.axvline(fail[:,0].mean(), color="C3", ls=":", lw=1) +ax.set_xlabel(r"$\lambda_1$") +ax.set_ylabel("density") +ax.set_title(f"Joint $\\lambda_1$ distribution (AUROC={auc(ls[:,0], ~exact):.3f})") +ax.legend() +fig.tight_layout() +fig.savefig(f"{OUT}/lyap1_hist_joint.png", dpi=130) +plt.close() + +# --- plot 3: # positive Lyapunov per sample (each sample, how many of top-8 are positive?) --- +n_pos_per_sample = (ls > 0).sum(1) +fig, ax = plt.subplots(1, 1, figsize=(6.5, 4)) +bins = np.arange(K+2) - 0.5 +ax.hist(n_pos_per_sample[exact], bins=bins, alpha=0.55, label="success", color="C0", density=True, align="mid") +ax.hist(n_pos_per_sample[~exact], bins=bins, alpha=0.55, label="failure", color="C3", density=True, align="mid") +ax.set_xticks(range(K+1)) +ax.set_xlabel("# of positive (expansive) Lyapunov exponents in top-8") +ax.set_ylabel("fraction of samples in group") +ax.set_title("Counts of expansive modes per sample\n(joint dynamics has unstable manifold → no fixed point)") +ax.legend() +fig.tight_layout() +fig.savefig(f"{OUT}/n_positive_modes.png", dpi=130) +plt.close() + +print(f"\nplots → {OUT}/") diff --git a/analyze_ptrm_rollout_cache.py b/analyze_ptrm_rollout_cache.py new file mode 100644 index 0000000..b7413e8 --- /dev/null +++ b/analyze_ptrm_rollout_cache.py @@ -0,0 +1,135 @@ +"""Analyze cached PTRM rollout/spectrum files without rerunning the model.""" +from __future__ import annotations + +import argparse +import csv +from pathlib import Path + +import numpy as np + + +def _safe_corr(a: np.ndarray, b: np.ndarray) -> float: + a = np.asarray(a, dtype=np.float64).reshape(-1) + b = np.asarray(b, dtype=np.float64).reshape(-1) + if a.size == 0 or b.size == 0 or np.std(a) == 0 or np.std(b) == 0: + return float("nan") + return float(np.corrcoef(a, b)[0, 1]) + + +def _take(values: np.ndarray, idx: np.ndarray) -> np.ndarray: + return values[np.arange(values.shape[0]), idx] + + +def _selector_metrics(exact: np.ndarray, token_acc: np.ndarray, idx: np.ndarray, prefix: str) -> dict[str, float]: + return { + f"{prefix}/exact": float(_take(exact, idx).mean()), + f"{prefix}/token_acc": float(_take(token_acc, idx).mean()), + } + + +def _feature_table(data: np.lib.npyio.NpzFile) -> dict[str, np.ndarray]: + features: dict[str, np.ndarray] = {} + lyap = data["lyap"] if "lyap" in data.files else np.asarray([]) + if lyap.size: + features["lambda1"] = lyap + + spec = data["lyap_spec"] if "lyap_spec" in data.files else np.asarray([]) + if spec.size: + features["spec_lambda1"] = spec[..., 0] + features["spec_pos_mass"] = np.maximum(spec, 0).sum(axis=-1) + features["spec_pos_l2"] = np.sqrt((np.maximum(spec, 0) ** 2).mean(axis=-1)) + features["spec_mean"] = spec.mean(axis=-1) + features["spec_count_pos"] = (spec > 0).sum(axis=-1).astype(np.float32) + features["spec_spread"] = spec[..., 0] - spec[..., -1] + return features + + +def analyze(path: Path) -> dict[str, float | str]: + data = np.load(path) + exact = data["exact"].astype(bool) + token_acc = data["token_acc"].astype(np.float32) + q_halt = data["q_halt"].astype(np.float32) + det_exact = data["det_exact"].astype(bool) if "det_exact" in data.files else np.asarray([]) + correct_count = exact.sum(axis=1).astype(np.float32) + + out: dict[str, float | str] = { + "file": str(path), + "n_samples": float(exact.shape[0]), + "rollouts": float(exact.shape[1]), + "deterministic/exact": float(det_exact.mean()) if det_exact.size else float("nan"), + "mean_rollout/exact": float(exact.mean()), + "oracle_pass/exact": float(exact.any(axis=1).mean()), + "correct_count/mean": float(correct_count.mean()), + "correct_count/std": float(correct_count.std()), + "correct_count/median": float(np.median(correct_count)), + "correct_count/q10": float(np.quantile(correct_count, 0.10)), + "correct_count/q25": float(np.quantile(correct_count, 0.25)), + "correct_count/q75": float(np.quantile(correct_count, 0.75)), + "correct_count/q90": float(np.quantile(correct_count, 0.90)), + "correct_count/zero_frac": float((correct_count == 0).mean()), + "correct_count/full_frac": float((correct_count == exact.shape[1]).mean()), + } + for threshold in (1, 5, 10, 25, 50, 75, 90): + if threshold <= exact.shape[1]: + out[f"correct_count/ge_{threshold}_frac"] = float((correct_count >= threshold).mean()) + + q_idx = q_halt.argmax(axis=1) + out.update(_selector_metrics(exact, token_acc, q_idx, "q_max")) + out["corr_q_correct"] = _safe_corr(q_halt, exact.astype(np.float32)) + q_selected = _take(exact, q_idx).astype(bool) + + if det_exact.size: + det_success = det_exact.astype(bool) + det_fail = ~det_success + if det_success.any(): + out["correct_count/det_success_mean"] = float(correct_count[det_success].mean()) + out["oracle_pass/det_success_frac"] = float(exact.any(axis=1)[det_success].mean()) + out["q_max/det_success_frac"] = float(q_selected[det_success].mean()) + if det_fail.any(): + out["correct_count/det_fail_mean"] = float(correct_count[det_fail].mean()) + out["oracle_pass/det_fail_frac"] = float(exact.any(axis=1)[det_fail].mean()) + out["q_max/det_fail_frac"] = float(q_selected[det_fail].mean()) + + for name, feature in _feature_table(data).items(): + idx = feature.argmin(axis=1) + out.update(_selector_metrics(exact, token_acc, idx, f"{name}_min")) + out[f"corr_neg_{name}_correct"] = _safe_corr(-feature, exact.astype(np.float32)) + out[f"corr_q_{name}"] = _safe_corr(q_halt, feature) + + q_sel = _take(exact, q_idx).astype(bool) + f_sel = _take(exact, idx).astype(bool) + out[f"{name}_q_wins"] = float((q_sel & ~f_sel).sum()) + out[f"{name}_feature_wins"] = float((~q_sel & f_sel).sum()) + out[f"{name}_both_fail"] = float((~q_sel & ~f_sel).sum()) + + return out + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("npz", nargs="+", help="Cached .npz files from ptrm_rollout_selection.py") + parser.add_argument("--out-csv", default=None) + args = parser.parse_args() + + rows = [analyze(Path(p)) for p in args.npz] + fieldnames = sorted({k for row in rows for k in row}) + + if args.out_csv: + out = Path(args.out_csv) + out.parent.mkdir(parents=True, exist_ok=True) + with out.open("w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(rows) + print(f"saved {out}") + + for row in rows: + print(f"\n{row['file']}") + for key in fieldnames: + if key == "file" or key not in row: + continue + print(f"{key}: {row[key]}") + + +if __name__ == "__main__": + main() diff --git a/analyze_separate.py b/analyze_separate.py new file mode 100644 index 0000000..d5759ab --- /dev/null +++ b/analyze_separate.py @@ -0,0 +1,124 @@ +"""Analyze separated λ_L and λ_H, and their predictive power for failure.""" +import numpy as np +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +from scipy import stats +import glob, os + +ROOT = "/home/yurenh2/rrm/research/flossing" +OUT = f"{ROOT}/plots_separate" +os.makedirs(OUT, exist_ok=True) + +files = sorted(glob.glob(f"{ROOT}/diag_sep_1k_shard*.npz")) +m = {} +for f in files: + d = np.load(f) + for k in d.files: m.setdefault(k, []).append(d[k]) +for k in list(m.keys()): m[k] = np.concatenate(m[k], 0) +np.savez_compressed(f"{ROOT}/diag_sep_1k.npz", **m) + +N = len(m["exact_correct"]) +exact = m["exact_correct"].astype(bool) +acc = exact.mean() +print(f"N={N} acc={acc:.4f}\n") + +def auc(score, target): + return stats.mannwhitneyu(score[target], score[~target], alternative="greater").statistic / (target.sum() * (~target).sum()) + +results = {} +for name in ["lyap_joint", "lyap_L", "lyap_H"]: + ls = m[name] + print(f"== {name} ==") + print(f"{'i':>3} {'mean_all':>10} {'mean_succ':>10} {'mean_fail':>10} {'Δ':>8} {'AUROC':>7} {'cohen_d':>8}") + for i in range(ls.shape[1]): + li = ls[:, i]; s_ = li[exact]; f_ = li[~exact] + pooled = np.sqrt(((s_.size-1)*s_.var() + (f_.size-1)*f_.var()) / (s_.size+f_.size-2)) + dc = (s_.mean() - f_.mean()) / pooled + delta = f_.mean() - s_.mean() + au = auc(li, ~exact) + print(f"{i+1:>3} {li.mean():+10.4f} {s_.mean():+10.4f} {f_.mean():+10.4f} {delta:+8.4f} {au:>7.3f} {abs(dc):>8.3f}") + results[name] = ls + print() + +# AUROC comparison +print("=== Single-feature AUROC predicting failure ===") +for name in ["lyap_joint", "lyap_L", "lyap_H"]: + ls = m[name] + for label, score in [("λ_1", ls[:, 0]), ("mean", ls.mean(1)), ("λ_K", ls[:, -1])]: + print(f" {name}.{label:6s}: AUROC = {auc(score, ~exact):.4f}") + +# --- plot 1: three spectra overlay --- +fig, axes = plt.subplots(1, 3, figsize=(15, 4.5), sharex=True) +for ax, name in zip(axes, ["lyap_joint", "lyap_L", "lyap_H"]): + ls = m[name] + K = ls.shape[1] + x = np.arange(1, K+1) + s_ = ls[exact]; f_ = ls[~exact] + ax.fill_between(x, s_.mean(0)-s_.std(0), s_.mean(0)+s_.std(0), color="C0", alpha=0.25) + ax.plot(x, s_.mean(0), "C0-o", label="succ", lw=2) + ax.fill_between(x, f_.mean(0)-f_.std(0), f_.mean(0)+f_.std(0), color="C3", alpha=0.25) + ax.plot(x, f_.mean(0), "C3-o", label="fail", lw=2) + ax.axhline(0, color="k", ls="--", lw=1, label="critical") + ax.set_xlabel("λ index") + ax.set_ylabel(r"$\lambda_i$ (per cycle)") + ax.set_title(name) + ax.legend() + ax.grid(alpha=0.3) +fig.suptitle(f"HRM step_26040 — separated Lyapunov spectra (N={N}, acc={acc:.2%})") +fig.tight_layout() +fig.savefig(f"{OUT}/three_spectra.png", dpi=130) +plt.close() + +# --- plot 2: λ_H_1 histogram (the differentiator) --- +fig, ax = plt.subplots(1, 1, figsize=(7, 4)) +ls = m["lyap_H"][:, 0] +lo = ls.min(); hi = ls.max() +bins = np.linspace(lo, hi, 60) +ax.hist(ls[exact], bins=bins, alpha=0.55, label=f"succ", color="C0", density=True) +ax.hist(ls[~exact], bins=bins, alpha=0.55, label=f"fail", color="C3", density=True) +ax.axvline(0, color="k", ls="--", lw=1) +ax.axvline(ls[exact].mean(), color="C0", ls=":", lw=1) +ax.axvline(ls[~exact].mean(), color="C3", ls=":", lw=1) +ax.set_xlabel(r"$\lambda_{H,1}$ — H subsystem top Lyapunov") +ax.set_ylabel("density") +ax.set_title(f"H-module Lyapunov is the differentiator (AUROC={auc(ls, ~exact):.3f})") +ax.legend() +fig.tight_layout() +fig.savefig(f"{OUT}/lyap_H_hist.png", dpi=130) +plt.close() + +# --- plot 3: λ_L_1 histogram (NOT a differentiator) --- +fig, ax = plt.subplots(1, 1, figsize=(7, 4)) +ls = m["lyap_L"][:, 0] +lo = ls.min(); hi = ls.max() +bins = np.linspace(lo, hi, 60) +ax.hist(ls[exact], bins=bins, alpha=0.55, label=f"succ", color="C0", density=True) +ax.hist(ls[~exact], bins=bins, alpha=0.55, label=f"fail", color="C3", density=True) +ax.axvline(ls[exact].mean(), color="C0", ls=":", lw=1) +ax.axvline(ls[~exact].mean(), color="C3", ls=":", lw=1) +ax.set_xlabel(r"$\lambda_{L,1}$ — L subsystem top Lyapunov") +ax.set_ylabel("density") +ax.set_title(f"L-module Lyapunov is NOT a differentiator (AUROC={auc(ls, ~exact):.3f})") +ax.legend() +fig.tight_layout() +fig.savefig(f"{OUT}/lyap_L_hist.png", dpi=130) +plt.close() + +# --- plot 4: scatter λ_L vs λ_H, colored by success/failure --- +fig, ax = plt.subplots(1, 1, figsize=(7, 6)) +xL = m["lyap_L"][:, 0] +xH = m["lyap_H"][:, 0] +ax.scatter(xL[exact], xH[exact], s=5, alpha=0.4, color="C0", label="succ") +ax.scatter(xL[~exact], xH[~exact], s=5, alpha=0.4, color="C3", label="fail") +ax.axhline(0, color="k", ls="--", lw=0.5) +ax.set_xlabel(r"$\lambda_{L,1}$ (L subsystem)") +ax.set_ylabel(r"$\lambda_{H,1}$ (H subsystem)") +ax.set_title("Success/failure separates along the λ_H axis") +ax.legend() +ax.grid(alpha=0.3) +fig.tight_layout() +fig.savefig(f"{OUT}/lyap_L_vs_H_scatter.png", dpi=130) +plt.close() + +print(f"\nplots → {OUT}/") diff --git a/analyze_spectrum_microscope.py b/analyze_spectrum_microscope.py new file mode 100644 index 0000000..d2b4d82 --- /dev/null +++ b/analyze_spectrum_microscope.py @@ -0,0 +1,360 @@ +"""Full-spectrum microscope for HRM/TRM joint Lyapunov diagnostics. + +This script intentionally treats the finite-time QR columns as an unordered +top-k estimate per sample and analyzes both the raw column-0 value and the +sorted spectrum. The sorted features are safer for per-sample questions like +"how many unstable directions does this trajectory have?". +""" +from __future__ import annotations + +import argparse +import csv +import json +import math +import re +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np + + +ROOT = Path("/home/yurenh2/rrm/research/flossing") + + +def auc_score(score: np.ndarray, label: np.ndarray) -> float: + """AUROC for score predicting label=True, with average ranks for ties.""" + score = np.asarray(score, dtype=np.float64) + label = np.asarray(label, dtype=bool) + n_pos = int(label.sum()) + n_neg = int((~label).sum()) + if n_pos == 0 or n_neg == 0: + return float("nan") + + order = np.argsort(score) + ranks = np.empty_like(order, dtype=np.float64) + ranks[order] = np.arange(1, len(score) + 1, dtype=np.float64) + + sorted_score = score[order] + i = 0 + while i < len(sorted_score): + j = i + 1 + while j < len(sorted_score) and sorted_score[j] == sorted_score[i]: + j += 1 + if j - i > 1: + ranks[order[i:j]] = (i + 1 + j) / 2.0 + i = j + + rank_sum = ranks[label].sum() + return float((rank_sum - n_pos * (n_pos + 1) / 2.0) / (n_pos * n_neg)) + + +def cohen_d(success: np.ndarray, failure: np.ndarray) -> float: + success = np.asarray(success, dtype=np.float64) + failure = np.asarray(failure, dtype=np.float64) + if len(success) < 2 or len(failure) < 2: + return float("nan") + pooled = ( + (len(success) - 1) * success.var(ddof=1) + + (len(failure) - 1) * failure.var(ddof=1) + ) / (len(success) + len(failure) - 2) + if pooled <= 0: + return float("nan") + return float((failure.mean() - success.mean()) / math.sqrt(pooled)) + + +def safe_mean(x: np.ndarray) -> float: + return float(np.mean(x)) if len(x) else float("nan") + + +def safe_std(x: np.ndarray) -> float: + return float(np.std(x)) if len(x) else float("nan") + + +def parse_step(path: Path, kind: str) -> int: + if kind == "HRM": + return int(re.search(r"step_(\d+)_512", path.name).group(1)) + return int(re.search(r"step(\d+)_512", path.name).group(1)) + + +def discover(kind: str) -> list[Path]: + if kind == "HRM": + files = sorted( + ROOT.glob("diag_hrm_step_*_512.npz"), + key=lambda p: parse_step(p, kind), + ) + else: + files = sorted( + ROOT.glob("diag_trm_singleGPU_step*_512.npz"), + key=lambda p: parse_step(p, kind), + ) + if not files: + raise FileNotFoundError(f"No {kind} diagnostic files found under {ROOT}") + return files + + +def feature_dict(raw_lam: np.ndarray) -> dict[str, np.ndarray]: + sorted_lam = np.sort(raw_lam, axis=1)[:, ::-1] + positive = np.clip(sorted_lam, 0.0, None) + k = sorted_lam.shape[1] + x = np.arange(k, dtype=np.float64) + x = x - x.mean() + denom = float((x**2).sum()) + slope = (sorted_lam @ x) / denom + return { + "raw_col0": raw_lam[:, 0], + "lambda_max": sorted_lam[:, 0], + "lambda_2": sorted_lam[:, 1], + "lambda_min8": sorted_lam[:, -1], + "mean8": sorted_lam.mean(axis=1), + "sum8": sorted_lam.sum(axis=1), + "tail_mean_5_8": sorted_lam[:, 4:].mean(axis=1), + "positive_sum": positive.sum(axis=1), + "positive_count": (sorted_lam > 0).sum(axis=1), + "spread": sorted_lam[:, 0] - sorted_lam[:, -1], + "gap12": sorted_lam[:, 0] - sorted_lam[:, 1], + "std8": sorted_lam.std(axis=1), + "linear_slope": slope, + } + + +def summarize_file(kind: str, path: Path) -> tuple[dict, list[dict]]: + data = np.load(path) + raw_lam = np.asarray(data["lyap_spec"], dtype=np.float64) + sorted_lam = np.sort(raw_lam, axis=1)[:, ::-1] + exact = data["exact_correct"].astype(bool) + fail = ~exact + token_acc = np.asarray(data["token_acc"], dtype=np.float64) + features = feature_dict(raw_lam) + + monotone_frac = float((np.diff(raw_lam, axis=1) <= 1e-5).mean()) + summary = { + "kind": kind, + "file": str(path), + "step": parse_step(path, kind), + "n": int(len(exact)), + "k": int(raw_lam.shape[1]), + "acc": float(exact.mean()), + "n_success": int(exact.sum()), + "n_failure": int(fail.sum()), + "raw_monotone_adjacent_fraction": monotone_frac, + "raw_col0_is_sample_max_fraction": float((raw_lam[:, 0] >= sorted_lam[:, 0] - 1e-7).mean()), + } + + # Group means for the most interpretable features. + for name in [ + "raw_col0", + "lambda_max", + "mean8", + "tail_mean_5_8", + "positive_sum", + "positive_count", + "spread", + "gap12", + ]: + arr = features[name] + summary[f"{name}_success_mean"] = safe_mean(arr[exact]) + summary[f"{name}_failure_mean"] = safe_mean(arr[fail]) + summary[f"{name}_delta_failure_minus_success"] = ( + summary[f"{name}_failure_mean"] - summary[f"{name}_success_mean"] + ) + summary[f"{name}_auc_failure"] = auc_score(arr, fail) + + # Continuous token-accuracy correlations catch near-misses, not only exact success. + for name in ["lambda_max", "mean8", "tail_mean_5_8", "positive_sum", "positive_count"]: + arr = features[name] + if arr.std() > 0 and token_acc.std() > 0: + summary[f"{name}_corr_token_acc"] = float(np.corrcoef(arr, token_acc)[0, 1]) + else: + summary[f"{name}_corr_token_acc"] = float("nan") + + feature_rows = [] + for name, arr in features.items(): + feature_rows.append( + { + "kind": kind, + "step": summary["step"], + "feature": name, + "success_mean": safe_mean(arr[exact]), + "failure_mean": safe_mean(arr[fail]), + "success_std": safe_std(arr[exact]), + "failure_std": safe_std(arr[fail]), + "delta_failure_minus_success": safe_mean(arr[fail]) - safe_mean(arr[exact]), + "cohen_d_failure_minus_success": cohen_d(arr[exact], arr[fail]), + "auc_failure": auc_score(arr, fail), + } + ) + + spectrum_rows = [] + for i in range(raw_lam.shape[1]): + spectrum_rows.append( + { + "kind": kind, + "step": summary["step"], + "rank": i + 1, + "success_mean": safe_mean(sorted_lam[exact, i]), + "failure_mean": safe_mean(sorted_lam[fail, i]), + "delta_failure_minus_success": safe_mean(sorted_lam[fail, i]) + - safe_mean(sorted_lam[exact, i]), + "auc_failure": auc_score(sorted_lam[:, i], fail), + } + ) + + return summary, feature_rows + spectrum_rows + + +def write_csv(path: Path, rows: list[dict]) -> None: + if not rows: + return + keys: list[str] = [] + for row in rows: + for key in row: + if key not in keys: + keys.append(key) + with path.open("w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=keys) + writer.writeheader() + writer.writerows(rows) + + +def plot_series(kind: str, summaries: list[dict], out_dir: Path) -> None: + steps = np.array([r["step"] for r in summaries]) + acc = np.array([r["acc"] for r in summaries]) + + fig, axes = plt.subplots(2, 2, figsize=(12, 8)) + ax = axes[0, 0] + ax.plot(steps, acc, "ko-", label="exact acc") + ax.set_title(f"{kind}: accuracy") + ax.set_xlabel("checkpoint step") + ax.set_ylabel("accuracy") + ax.grid(alpha=0.3) + + ax = axes[0, 1] + ax.plot(steps, [r["lambda_max_success_mean"] for r in summaries], "C0-o", label="success λmax") + ax.plot(steps, [r["lambda_max_failure_mean"] for r in summaries], "C3-o", label="failure λmax") + ax.axhline(0, color="k", lw=1, alpha=0.35) + ax.set_title("Most unstable measured mode") + ax.set_xlabel("checkpoint step") + ax.set_ylabel("sorted λmax") + ax.legend() + ax.grid(alpha=0.3) + + ax = axes[1, 0] + ax.plot(steps, [r["mean8_success_mean"] for r in summaries], "C0-o", label="success mean top-8") + ax.plot(steps, [r["mean8_failure_mean"] for r in summaries], "C3-o", label="failure mean top-8") + ax.axhline(0, color="k", lw=1, alpha=0.35) + ax.set_title("Top-8 volume proxy") + ax.set_xlabel("checkpoint step") + ax.set_ylabel("mean sorted λ1..λ8") + ax.legend() + ax.grid(alpha=0.3) + + ax = axes[1, 1] + ax.plot(steps, [r["positive_count_success_mean"] for r in summaries], "C0-o", label="success") + ax.plot(steps, [r["positive_count_failure_mean"] for r in summaries], "C3-o", label="failure") + ax.set_title("Dimensionality of expansion") + ax.set_xlabel("checkpoint step") + ax.set_ylabel("# positive exponents among top 8") + ax.legend() + ax.grid(alpha=0.3) + + fig.tight_layout() + fig.savefig(out_dir / f"{kind.lower()}_checkpoint_spectrum_features.png", dpi=150) + plt.close(fig) + + +def plot_spectra(kind: str, files: list[Path], out_dir: Path) -> None: + n = len(files) + cols = min(5, n) + rows = int(math.ceil(n / cols)) + fig, axes = plt.subplots(rows, cols, figsize=(3.4 * cols, 2.7 * rows), squeeze=False) + for ax, path in zip(axes.flat, files): + data = np.load(path) + raw_lam = np.asarray(data["lyap_spec"], dtype=np.float64) + sorted_lam = np.sort(raw_lam, axis=1)[:, ::-1] + exact = data["exact_correct"].astype(bool) + fail = ~exact + x = np.arange(1, sorted_lam.shape[1] + 1) + if exact.any(): + ax.plot(x, sorted_lam[exact].mean(axis=0), "C0-o", lw=1.6, ms=3, label="success") + if fail.any(): + ax.plot(x, sorted_lam[fail].mean(axis=0), "C3-o", lw=1.6, ms=3, label="failure") + ax.axhline(0, color="k", lw=0.8, alpha=0.35) + ax.set_title(f"step {parse_step(path, kind)} acc={exact.mean():.2f}") + ax.set_xlabel("sorted rank") + ax.set_ylabel("λ") + ax.grid(alpha=0.25) + for ax in axes.flat[len(files) :]: + ax.axis("off") + handles, labels = axes.flat[0].get_legend_handles_labels() + if handles: + fig.legend(handles, labels, loc="upper center", ncol=2) + fig.tight_layout(rect=[0, 0, 1, 0.96]) + fig.savefig(out_dir / f"{kind.lower()}_mean_sorted_spectra_grid.png", dpi=150) + plt.close(fig) + + +def plot_feature_auc(kind: str, feature_rows: list[dict], out_dir: Path) -> None: + selected = [ + "raw_col0", + "lambda_max", + "mean8", + "tail_mean_5_8", + "positive_sum", + "positive_count", + "spread", + "gap12", + ] + fig, ax = plt.subplots(figsize=(12, 5)) + for feature in selected: + rows = [r for r in feature_rows if r.get("feature") == feature and r["kind"] == kind] + rows = sorted(rows, key=lambda r: r["step"]) + ax.plot([r["step"] for r in rows], [r["auc_failure"] for r in rows], marker="o", label=feature) + ax.axhline(0.5, color="k", lw=1, alpha=0.35) + ax.set_title(f"{kind}: feature AUROC for predicting exact failure") + ax.set_xlabel("checkpoint step") + ax.set_ylabel("AUROC") + ax.set_ylim(0.0, 1.02) + ax.legend(ncol=4, fontsize=8) + ax.grid(alpha=0.3) + fig.tight_layout() + fig.savefig(out_dir / f"{kind.lower()}_feature_auc.png", dpi=150) + plt.close(fig) + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--out-dir", default=str(ROOT / "spectrum_microscope")) + args = parser.parse_args() + + out_dir = Path(args.out_dir) + out_dir.mkdir(parents=True, exist_ok=True) + + all_summaries: list[dict] = [] + all_feature_rows: list[dict] = [] + for kind in ["HRM", "TRM"]: + files = discover(kind) + summaries = [] + feature_rows = [] + for path in files: + summary, rows = summarize_file(kind, path) + summaries.append(summary) + feature_rows.extend(rows) + summaries = sorted(summaries, key=lambda r: r["step"]) + all_summaries.extend(summaries) + all_feature_rows.extend(feature_rows) + + plot_series(kind, summaries, out_dir) + plot_spectra(kind, files, out_dir) + plot_feature_auc(kind, feature_rows, out_dir) + + write_csv(out_dir / "checkpoint_summary.csv", all_summaries) + write_csv(out_dir / "feature_and_rank_summary.csv", all_feature_rows) + (out_dir / "checkpoint_summary.json").write_text(json.dumps(all_summaries, indent=2)) + + print(f"Wrote {out_dir}") + print(f" {out_dir / 'checkpoint_summary.csv'}") + print(f" {out_dir / 'feature_and_rank_summary.csv'}") + + +if __name__ == "__main__": + main() diff --git a/analyze_step3.py b/analyze_step3.py new file mode 100644 index 0000000..825408c --- /dev/null +++ b/analyze_step3.py @@ -0,0 +1,76 @@ +"""Plot Step 3 A/B/C trajectories: λ and acc over training.""" +import json, os +import numpy as np +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt + +ROOT = "/home/yurenh2/rrm/research/flossing" +OUT = f"{ROOT}/plots_step3" +os.makedirs(OUT, exist_ok=True) + +runs = { + "A: baseline (α=0)\nfrom step_18228": "step3_A_baseline_18228.json", + "B: CF α=10 λ*=-0.15\nfrom step_18228": "step3_B_rf_18228.json", + "C: CF α=10 λ*=-0.05\nfrom step_26040": "step3_C_rf_26040.json", +} +colors = {"A": "C0", "B": "C3", "C": "C2"} + +fig, axes = plt.subplots(2, 2, figsize=(13, 9)) + +for label, fn in runs.items(): + d = json.loads(open(f"{ROOT}/{fn}").read()) + key = label.split(":")[0] + steps = [r["step"] for r in d["steps"]] + sup = np.array([r["sup_loss"] for r in d["steps"]]) + rf = np.array([r["rf_loss"] for r in d["steps"]]) + lyap_mean = np.array([r["lyap1_mean"] for r in d["steps"]]) + lyap_max = np.array([r["lyap1_max"] for r in d["steps"]]) + frac = np.array([r["frac_above_star"] for r in d["steps"]]) + + # Smooth (moving average) + def smooth(x, w=20): + if len(x) < w: return x + kernel = np.ones(w) / w + return np.convolve(x, kernel, mode="same") + + # acc evals + eval_steps = [e["step"] for e in d["evals"]] + eval_accs = [e["acc"] for e in d["evals"]] + + axes[0,0].plot(eval_steps, eval_accs, f"{colors[key]}-o", label=label) + axes[0,1].plot(steps, smooth(sup), f"{colors[key]}-", label=label, alpha=0.8) + axes[1,0].plot(steps, smooth(lyap_mean), f"{colors[key]}-", label=f"{key} mean", alpha=0.8) + axes[1,0].plot(steps, smooth(lyap_max), f"{colors[key]}--", label=f"{key} max", alpha=0.4) + axes[1,1].plot(steps, smooth(frac), f"{colors[key]}-", label=label, alpha=0.8) + +axes[0,0].set_title("Test exact accuracy vs training step") +axes[0,0].set_xlabel("step"); axes[0,0].set_ylabel("exact_acc"); axes[0,0].legend(fontsize=8, loc="best"); axes[0,0].grid(alpha=0.3) + +axes[0,1].set_title("Supervised loss (smoothed, w=20)") +axes[0,1].set_xlabel("step"); axes[0,1].set_ylabel("sup_loss"); axes[0,1].legend(fontsize=8); axes[0,1].grid(alpha=0.3) + +axes[1,0].set_title("λ_joint_1 trajectory (smoothed)") +axes[1,0].axhline(0, color="k", ls=":", lw=0.6) +axes[1,0].axhline(-0.05, color="C2", ls="--", lw=0.5, label="C λ*") +axes[1,0].axhline(-0.15, color="C3", ls="--", lw=0.5, label="B λ*") +axes[1,0].set_xlabel("step"); axes[1,0].set_ylabel(r"$\lambda_{joint,1}$"); axes[1,0].legend(fontsize=7); axes[1,0].grid(alpha=0.3) + +axes[1,1].set_title(r"fraction of batch samples with $\lambda > \lambda^*$") +axes[1,1].set_xlabel("step"); axes[1,1].set_ylabel("fraction"); axes[1,1].legend(fontsize=8); axes[1,1].grid(alpha=0.3) + +fig.suptitle("Step 3: contractive flossing as training-time regularizer on HRM") +fig.tight_layout() +fig.savefig(f"{OUT}/step3_trajectories.png", dpi=130) +plt.close() + +print(f"\n== summary ==") +for label, fn in runs.items(): + d = json.loads(open(f"{ROOT}/{fn}").read()) + init = d["initial_acc"]; fin = d["final_acc"] + last_lyap = d["steps"][-1] + print(f" {label.split(':')[0]}: acc {init:.3f} → {fin:.3f} (Δ {fin-init:+.4f}) " + f"final λ_mean={last_lyap['lyap1_mean']:+.3f} max={last_lyap['lyap1_max']:+.3f} " + f"final frac>λ*={last_lyap['frac_above_star']:.2f}") + +print(f"\nplots → {OUT}/") diff --git a/analyze_step3_all.py b/analyze_step3_all.py new file mode 100644 index 0000000..bfd2676 --- /dev/null +++ b/analyze_step3_all.py @@ -0,0 +1,94 @@ +"""Compile ABCDEF Step 3 final analysis.""" +import json, os +import numpy as np +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt + +ROOT = "/home/yurenh2/rrm/research/flossing" +OUT = f"{ROOT}/plots_step3_final" +os.makedirs(OUT, exist_ok=True) + +runs = { + "A: baseline α=0 from 18228": ("step3_A_baseline_18228.json", "C0", "-"), + "B: CF α=10 λ*=-0.15 from 18228": ("step3_B_rf_18228.json", "C3", "-"), + "C: CF α=10 λ*=-0.05 from 26040": ("step3_C_rf_26040.json", "C2", "-"), + "D: CF α=10 λ*=0 from 26040": ("step3_D_rf_26040_lstar0.json", "C4", "-"), + "E: CF α=10 λ*=0 from 18228": ("step3_E_rf_18228_lstar0.json", "C1", "-"), + "F: extended D (1500 step)": ("step3_F_rf_26040_lstar0_1500.json", "C2", "--"), +} + +fig, axes = plt.subplots(2, 2, figsize=(15, 9)) + +summary = [] +for label, (fn, color, ls) in runs.items(): + d = json.loads(open(f"{ROOT}/{fn}").read()) + key = label.split(":")[0] + + eval_steps = [e["step"] for e in d["evals"]] + eval_accs = [e["acc"] for e in d["evals"]] + steps = [r["step"] for r in d["steps"]] + sup = np.array([r["sup_loss"] for r in d["steps"]]) + lyap_mean = np.array([r["lyap1_mean"] for r in d["steps"]]) + frac = np.array([r["frac_above_star"] for r in d["steps"]]) + + def smooth(x, w=20): + if len(x) < w: return x + return np.convolve(x, np.ones(w)/w, mode="same") + + axes[0,0].plot(eval_steps, eval_accs, f"{color}{ls}", marker="o", label=label, lw=1.5, alpha=0.85) + axes[0,1].plot(steps, smooth(sup), f"{color}{ls}", label=key, alpha=0.7) + axes[1,0].plot(steps, smooth(lyap_mean), f"{color}{ls}", label=key, alpha=0.7) + axes[1,1].plot(steps, smooth(frac), f"{color}{ls}", label=key, alpha=0.7) + + summary.append({ + "key": key, "label": label, + "init_acc": d["initial_acc"], + "final_acc": d["final_acc"], + "delta": d["final_acc"] - d["initial_acc"], + "final_lyap_mean": d["steps"][-1]["lyap1_mean"], + "final_frac_above": d["steps"][-1]["frac_above_star"], + "n_steps": len(d["steps"]), + }) + +axes[0,0].set_title("Test exact accuracy vs training step") +axes[0,0].set_xlabel("step"); axes[0,0].set_ylabel("exact_acc"); axes[0,0].legend(fontsize=8, loc="best"); axes[0,0].grid(alpha=0.3) + +axes[0,1].set_title("Supervised loss (smoothed)") +axes[0,1].set_xlabel("step"); axes[0,1].set_ylabel("sup_loss"); axes[0,1].legend(fontsize=8); axes[0,1].grid(alpha=0.3) + +axes[1,0].set_title(r"$\lambda_{joint,1}$ mean trajectory (smoothed)") +axes[1,0].axhline(0, color="k", ls=":", lw=0.6, alpha=0.6) +axes[1,0].set_xlabel("step"); axes[1,0].set_ylabel(r"$\lambda_{1,joint}$"); axes[1,0].legend(fontsize=8); axes[1,0].grid(alpha=0.3) + +axes[1,1].set_title(r"Fraction of batch with $\lambda > \lambda^*$ (smoothed)") +axes[1,1].set_xlabel("step"); axes[1,1].set_ylabel("frac > λ*"); axes[1,1].legend(fontsize=8); axes[1,1].grid(alpha=0.3) + +fig.suptitle("Step 3 — Contractive Flossing (CF) as training-time regularizer on HRM Sudoku-Extreme-1k", fontsize=11) +fig.tight_layout() +fig.savefig(f"{OUT}/abcdef_full.png", dpi=130) +plt.close() + +# Bar chart of final Δ acc +fig, ax = plt.subplots(1, 1, figsize=(9, 5)) +keys = [s["key"] for s in summary] +deltas = [s["delta"]*100 for s in summary] +colors = ["C0", "C3", "C2", "C4", "C1", "C2"] +bars = ax.bar(keys, deltas, color=colors) +ax.axhline(0, color="k", lw=0.6) +for bar, delta in zip(bars, deltas): + ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + (0.3 if delta>0 else -1), + f"{delta:+.1f}%", ha="center", fontsize=9, fontweight="bold") +ax.set_ylabel("Δ test exact_accuracy (pp)") +ax.set_title("CF intervention: final accuracy change vs baseline") +ax.grid(alpha=0.3, axis="y") +fig.tight_layout() +fig.savefig(f"{OUT}/abcdef_deltas.png", dpi=130) +plt.close() + +print(f"{'key':>3} {'init':>7} {'final':>7} {'Δ':>7} {'n_steps':>8} {'final_λ':>9} {'frac>λ*':>9}") +for s in summary: + print(f" {s['key']:>3} {s['init_acc']:>7.3f} {s['final_acc']:>7.3f} {s['delta']*100:>6.1f}% {s['n_steps']:>8} " + f"{s['final_lyap_mean']:>+9.3f} {s['final_frac_above']:>9.2f}") + +print(f"\nplots → {OUT}/") diff --git a/analyze_tangent.py b/analyze_tangent.py new file mode 100644 index 0000000..32fde5e --- /dev/null +++ b/analyze_tangent.py @@ -0,0 +1,149 @@ +"""Analyze the saved tangent basis Q (final, after all ACT steps). + +Q shape: (N, seq_full=82, hidden=512, k=4). +Splits by success/failure and computes: + - position activity per mode: ||Q[:, s, :, i]||_2 averaged + - hidden-dim activity per mode + - cosine similarity between succ and fail mode subspaces +""" +import numpy as np +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +import os + +d = np.load("/home/yurenh2/rrm/research/flossing/tangent_modes_512.npz") +Q = d["Q_final"] # (N, seq, hidden, k) +exact = d["exact_correct"].astype(bool) # (N,) +N, seq, hidden, K = Q.shape +print(f"N={N}, seq={seq}, hidden={hidden}, K={K}, acc={exact.mean():.3f}") + +OUT = "/home/yurenh2/rrm/research/flossing/plots_tangent" +os.makedirs(OUT, exist_ok=True) + +# Position activity per mode: ||Q[:, s, :, i]||_2 averaged across samples in group +def pos_activity(Q_, exact_mask): + # for each mode i and each position s, compute average ||Q[s, :, i]|| + sub = Q_[exact_mask] # (n, seq, hidden, k) + acts = np.linalg.norm(sub, axis=2) # (n, seq, k) — L2 norm over hidden dim + return acts.mean(axis=0), acts.std(axis=0) # (seq, k) + +pa_s_mean, pa_s_std = pos_activity(Q, exact) +pa_f_mean, pa_f_std = pos_activity(Q, ~exact) + +# Hidden activity per mode +def hid_activity(Q_, exact_mask): + sub = Q_[exact_mask] # (n, seq, hidden, k) + acts = np.linalg.norm(sub, axis=1) # (n, hidden, k) — L2 norm over positions + return acts.mean(axis=0), acts.std(axis=0) # (hidden, k) + +ha_s_mean, _ = hid_activity(Q, exact) +ha_f_mean, _ = hid_activity(Q, ~exact) + +# ---- Plot 1: position activity per mode ---- +fig, axes = plt.subplots(2, 2, figsize=(13, 7), sharex=True, sharey=True) +for i, ax in enumerate(axes.flat): + if i >= K: ax.set_visible(False); continue + x = np.arange(seq) + ax.plot(x, pa_s_mean[:, i], "C0-", label="succ", lw=1.5) + ax.fill_between(x, pa_s_mean[:, i]-pa_s_std[:, i], pa_s_mean[:, i]+pa_s_std[:, i], color="C0", alpha=0.2) + ax.plot(x, pa_f_mean[:, i], "C3-", label="fail", lw=1.5) + ax.fill_between(x, pa_f_mean[:, i]-pa_f_std[:, i], pa_f_mean[:, i]+pa_f_std[:, i], color="C3", alpha=0.2) + ax.axvline(0.5, color="k", ls=":", lw=0.6) # mark puzzle_emb boundary + ax.set_title(f"mode {i+1}: position activity") + ax.set_xlabel("seq position (0=puzzle_emb, 1-81=Sudoku cells)") + ax.set_ylabel(r"$\|Q[\cdot,:,i]\|_2$") + ax.legend() + ax.grid(alpha=0.3) +fig.suptitle(f"Per-mode position activity (N={N}, acc={exact.mean():.2%})") +fig.tight_layout() +fig.savefig(f"{OUT}/position_activity.png", dpi=130) +plt.close() + +# ---- Plot 2: position activity as 9x9 grid (positions 1-81 = Sudoku cells) ---- +fig, axes = plt.subplots(2, K, figsize=(K*3, 6)) +for i in range(K): + s_grid = pa_s_mean[1:82, i].reshape(9, 9) + f_grid = pa_f_mean[1:82, i].reshape(9, 9) + vmax = max(s_grid.max(), f_grid.max()) + im0 = axes[0, i].imshow(s_grid, vmin=0, vmax=vmax, cmap="viridis") + axes[0, i].set_title(f"succ mode {i+1}") + axes[0, i].set_xticks([]); axes[0, i].set_yticks([]) + im1 = axes[1, i].imshow(f_grid, vmin=0, vmax=vmax, cmap="viridis") + axes[1, i].set_title(f"fail mode {i+1}") + axes[1, i].set_xticks([]); axes[1, i].set_yticks([]) + plt.colorbar(im1, ax=axes[:, i], fraction=0.046) +fig.suptitle("Tangent mode activity over the 9x9 Sudoku board") +fig.savefig(f"{OUT}/sudoku_grid_activity.png", dpi=130, bbox_inches="tight") +plt.close() + +# ---- Plot 3: hidden activity per mode ---- +fig, axes = plt.subplots(2, 2, figsize=(13, 7), sharex=True) +for i, ax in enumerate(axes.flat): + if i >= K: ax.set_visible(False); continue + x = np.arange(hidden) + # sort for cleaner viewing + order = np.argsort(-(ha_s_mean[:, i] + ha_f_mean[:, i])) + ax.plot(ha_s_mean[order, i], "C0-", label="succ (sorted)", lw=1) + ax.plot(ha_f_mean[order, i], "C3-", label="fail (sorted)", lw=1) + ax.set_title(f"mode {i+1}: hidden-dim activity (sorted by sum)") + ax.set_xlabel("hidden dim (sorted)") + ax.set_ylabel(r"$\|Q[:,h,i]\|_2$") + ax.legend() + ax.grid(alpha=0.3) +fig.tight_layout() +fig.savefig(f"{OUT}/hidden_activity.png", dpi=130) +plt.close() + +# ---- Subspace cosine analysis ---- +# Flatten Q to (N, seq*hidden, k). The k columns span a k-d subspace per sample. +# Average outer projector P_succ = (1/n) Σ Q_n Q_n^T (over success samples). Same for fail. +# Then compare top eigenvalues / overlap. +Qflat = Q.reshape(N, seq*hidden, K) +def projector_sample(qf): + # qf: (state_dim, k). Already orthonormal columns since QR'd at end. Returns top-r eigvecs of P. + # We'll compute the trace of P_s @ P_f as a "subspace overlap": Σ_ij ||q_s^i · q_f^j||² + return qf + +def avg_pos_outer(qflat_subset): + # Average of q q^T over samples — too big (state_dim x state_dim). Instead average the + # k x k Gram matrices won't help since each sample has its own orthonormal basis. + # Compute average squared norm of cross-projections instead: + # For pairs (s_a, s_b) in same group, compute ||q_a^T q_b||_F² / k (subspace overlap) + n = qflat_subset.shape[0] + # Sample a subset of pairs to keep this manageable + rng = np.random.default_rng(0) + pairs = min(n*(n-1)//2, 5000) + if n < 2: return np.nan, np.nan + overlaps = [] + for _ in range(pairs): + a, b = rng.choice(n, size=2, replace=False) + M = qflat_subset[a].T @ qflat_subset[b] # (k, k) + overlaps.append((M**2).sum() / qflat_subset.shape[-1]) + return float(np.mean(overlaps)), float(np.std(overlaps)) + +print("\nSubspace overlap (squared cosine, averaged over random pairs):") +ovl_ss, std_ss = avg_pos_outer(Qflat[exact]) +ovl_ff, std_ff = avg_pos_outer(Qflat[~exact]) +# Cross-group overlap +def cross_overlap(A, B): + nA, nB = A.shape[0], B.shape[0] + rng = np.random.default_rng(0) + pairs = min(nA*nB, 5000) + overlaps = [] + for _ in range(pairs): + a = rng.integers(nA); b = rng.integers(nB) + M = A[a].T @ B[b] + overlaps.append((M**2).sum() / A.shape[-1]) + return float(np.mean(overlaps)), float(np.std(overlaps)) +ovl_sf, std_sf = cross_overlap(Qflat[exact], Qflat[~exact]) + +print(f" succ-succ: {ovl_ss:.4f} ± {std_ss:.4f}") +print(f" fail-fail: {ovl_ff:.4f} ± {std_ff:.4f}") +print(f" succ-fail: {ovl_sf:.4f} ± {std_sf:.4f}") + +# ---- Final ratio of subspace overlap inside vs cross-group ---- +# If succ-succ and fail-fail >> succ-fail, the two groups live in DIFFERENT subspaces. +# If they're similar, the unstable subspace is shared. + +print("\nplots saved to", OUT) diff --git a/analyze_tangent_modes.py b/analyze_tangent_modes.py new file mode 100644 index 0000000..5801ad8 --- /dev/null +++ b/analyze_tangent_modes.py @@ -0,0 +1,143 @@ +"""(a) lite: For each test sample, save the final tangent basis Q (top-k modes after +running through the full inference). Compute position/hidden activity profiles per +mode and compare success vs failure groups. +""" +from __future__ import annotations +import sys, os, yaml, json, time, argparse +from pathlib import Path +import numpy as np +import torch + +HRM_DIR = Path("/home/yurenh2/rrm/hrm") +sys.path.insert(0, str(HRM_DIR)) +from models.hrm.hrm_act_v1 import HierarchicalReasoningModel_ACTV1 + + +def load_model(ckpt_root, ckpt_name, device): + cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text()) + arch_cfg = dict(cfg["arch"]) + train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text()) + arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"], + vocab_size=train_meta["vocab_size"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], causal=False) + model = HierarchicalReasoningModel_ACTV1(arch_cfg) + sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + stripped = {} + for k, v in sd.items(): + nk = k + for p in ("_orig_mod.", "model."): + if nk.startswith(p): nk = nk[len(p):] + stripped[nk] = v + model.load_state_dict(stripped, strict=False) + model.to(device).eval() + return model, cfg, train_meta + + +def jvp_one(f, x, v): + return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False) + + +def run_save_final_Q(model, batch, k_lyap, device, seed): + """Run inference with QR-iteration on top-k tangents; return final Q (B, seq, hidden, k) + after all ACT steps. Also return exact_correct, predicted_logits. + """ + inner = model.inner + cfg = inner.config + B = batch["inputs"].shape[0] + seq_full = cfg.seq_len + inner.puzzle_emb_len + hidden = cfg.hidden_size + state_dim = seq_full * hidden + + z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None) + input_embeddings = inner._input_embeddings(batch["inputs"].to(device), + batch["puzzle_identifiers"].to(device)) + + g = torch.Generator(device=device).manual_seed(seed) + Q0 = torch.randn(B, state_dim, k_lyap, device=device, dtype=torch.float32, generator=g) + Q, _ = torch.linalg.qr(Q0) + + with torch.enable_grad(): + for _act in range(cfg.halt_max_steps): + zH = z_H.detach(); zL = z_L.detach() + for _h in range(cfg.H_cycles): + for _l in range(cfg.L_cycles): + out = [] + fx_last = None + f = lambda x: inner.L_level(x, zH + input_embeddings, **seq_info) + for i in range(k_lyap): + v_i = Q[..., i].view_as(zL) + fx, Dv = jvp_one(f, zL, v_i) + out.append(Dv.reshape(B, state_dim).to(torch.float32)) + fx_last = fx + Q = torch.stack(out, dim=-1) + zL = fx_last + Q, R = torch.linalg.qr(Q) + out = [] + f = lambda x: inner.H_level(x, zL, **seq_info) + for i in range(k_lyap): + v_i = Q[..., i].view_as(zH) + fx, Dv = jvp_one(f, zH, v_i) + out.append(Dv.reshape(B, state_dim).to(torch.float32)) + fx_last = fx + Q = torch.stack(out, dim=-1) + zH = fx_last + Q, R = torch.linalg.qr(Q) + z_H, z_L = zH, zL + + with torch.no_grad(): + output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float() + preds = output.argmax(dim=-1) + labels = batch["labels"].to(device) + mask = labels > 0 + exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float().numpy() + + Q_final = Q.reshape(B, seq_full, hidden, k_lyap).cpu().float().numpy() + return Q_final, exact + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--ckpt-root", required=True) + ap.add_argument("--ckpt-name", default="step_26040") + ap.add_argument("--n-samples", type=int, default=256) + ap.add_argument("--batch-size", type=int, default=32) + ap.add_argument("--k-lyap", type=int, default=4) + ap.add_argument("--seed", type=int, default=0) + ap.add_argument("--out", default="tangent_modes.npz") + args = ap.parse_args() + device = "cuda" + + model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device) + + rng = np.random.default_rng(args.seed) + data_path = Path(cfg["data_path"]) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + idx = rng.choice(len(inputs), size=args.n_samples, replace=False) + + Q_all = []; exact_all = [] + t0 = time.time() + for s in range(0, args.n_samples, args.batch_size): + e = min(s + args.batch_size, args.n_samples) + bidx = idx[s:e] + batch = { + "inputs": torch.from_numpy(inputs[bidx].astype(np.int32)), + "labels": torch.from_numpy(labels[bidx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(pid[bidx].astype(np.int32)), + } + Q_final, exact = run_save_final_Q(model, batch, args.k_lyap, device, seed=args.seed + s) + Q_all.append(Q_final); exact_all.append(exact) + print(f" [{e}/{args.n_samples}] dt={time.time()-t0:.1f}s exact={exact.mean():.3f}", flush=True) + + Q_all = np.concatenate(Q_all, axis=0) # (N, seq, hidden, k) + exact_all = np.concatenate(exact_all, axis=0) # (N,) + print(f"saved shape Q={Q_all.shape}, exact={exact_all.shape}, acc={exact_all.mean():.3f}") + np.savez_compressed(args.out, Q_final=Q_all, exact_correct=exact_all, sample_idx=idx) + print(f"saved → {args.out}") + + +if __name__ == "__main__": + main() diff --git a/chaos_trm_multi4_step182287_clean_k4_n1000.summary.csv b/chaos_trm_multi4_step182287_clean_k4_n1000.summary.csv new file mode 100644 index 0000000..88ab53b --- /dev/null +++ b/chaos_trm_multi4_step182287_clean_k4_n1000.summary.csv @@ -0,0 +1,2 @@ +deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_fail_mean,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts,spec_count_pos_fail_mean,spec_count_pos_mean,spec_count_pos_min/exact,spec_count_pos_min/token_acc,spec_count_pos_success_mean,spec_k,spec_mean_fail_mean,spec_mean_mean,spec_mean_min/exact,spec_mean_min/token_acc,spec_mean_success_mean,spec_pos_l2_mean,spec_pos_l2_min/exact,spec_pos_l2_min/token_acc,spec_pos_mass_fail_mean,spec_pos_mass_mean,spec_pos_mass_min/exact,spec_pos_mass_min/token_acc,spec_pos_mass_success_mean,spec_spread_mean,spec_spread_min/exact,spec_spread_min/token_acc,steps +0.8640000224113464,0.9498025178909302,0.0,4.0,0.0,7.309304714202881,6.157678127288818,5.977945327758789,0.8650000095367432,0.9495925307273865,0.8650000095367432,0.9495925307273865,1000.0,0.0,0.8650000095367432,0.9495925307273865,1.0,0.0,0.0,-10.708564758300781,0.8650000095367432,0.9495925307273865,5.329593658447266,0.8650000095367432,0.9495925307273865,0.8650000095367432,0.9495925307273865,0.8650000095367432,0.9495925307273865,0.8650000095367432,0.9495925307273865,7.832658767700195,0.8650000095367432,0.9495925307273865,1.0,4.0,4.0,0.8650000095367432,0.9495925307273865,4.0,4.0,7.008894443511963,5.972970008850098,0.8650000095367432,0.9495925307273865,5.811293601989746,5.974251747131348,0.8650000095367432,0.9495925307273865,28.03557777404785,23.89188003540039,0.8650000095367432,0.9495925307273865,23.245174407958984,0.30898988246917725,0.8650000095367432,0.9495925307273865,16.0 diff --git a/chaos_trm_multi4_step260410_clean_k4_n1000.summary.csv b/chaos_trm_multi4_step260410_clean_k4_n1000.summary.csv new file mode 100644 index 0000000..e640bc4 --- /dev/null +++ b/chaos_trm_multi4_step260410_clean_k4_n1000.summary.csv @@ -0,0 +1,2 @@ +deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,lambda_fail_mean,lambda_mean,lambda_success_mean,lyap_min/exact,lyap_min/token_acc,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_fail_mean,q_max/exact,q_max/token_acc,q_mean,q_minus_0.25lambda/exact,q_minus_0.25lambda/token_acc,q_minus_0.5lambda/exact,q_minus_0.5lambda/token_acc,q_minus_1lambda/exact,q_minus_1lambda/token_acc,q_minus_2lambda/exact,q_minus_2lambda/token_acc,q_success_mean,rollout0/exact,rollout0/token_acc,rollouts,spec_count_pos_fail_mean,spec_count_pos_mean,spec_count_pos_min/exact,spec_count_pos_min/token_acc,spec_count_pos_success_mean,spec_k,spec_mean_fail_mean,spec_mean_mean,spec_mean_min/exact,spec_mean_min/token_acc,spec_mean_success_mean,spec_pos_l2_mean,spec_pos_l2_min/exact,spec_pos_l2_min/token_acc,spec_pos_mass_fail_mean,spec_pos_mass_mean,spec_pos_mass_min/exact,spec_pos_mass_min/token_acc,spec_pos_mass_success_mean,spec_spread_mean,spec_spread_min/exact,spec_spread_min/token_acc,steps +0.8320000171661377,0.9365678429603577,0.0,4.0,0.0,7.217503547668457,6.245500564575195,6.071272850036621,0.8479999899864197,0.9428519010543823,0.8479999899864197,0.9428519010543823,1000.0,0.0,0.8479999899864197,0.9428519010543823,1.0,0.0,0.0,-10.5178861618042,0.8479999899864197,0.9428519010543823,5.794531345367432,0.8479999899864197,0.9428519010543823,0.8479999899864197,0.9428519010543823,0.8479999899864197,0.9428519010543823,0.8479999899864197,0.9428519010543823,8.71845531463623,0.8479999899864197,0.9428519010543823,1.0,4.0,4.0,0.8479999899864197,0.9428519010543823,4.0,4.0,6.9086737632751465,6.057910919189453,0.8479999899864197,0.9428519010543823,5.905416011810303,6.059232711791992,0.8479999899864197,0.9428519010543823,27.634695053100586,24.231643676757812,0.8479999899864197,0.9428519010543823,23.62166404724121,0.3121836483478546,0.8479999899864197,0.9428519010543823,16.0 diff --git a/chaos_trm_multi4_success_failure_summary.csv b/chaos_trm_multi4_success_failure_summary.csv new file mode 100644 index 0000000..ff2d00f --- /dev/null +++ b/chaos_trm_multi4_success_failure_summary.csv @@ -0,0 +1,11 @@ +checkpoint,feature,n,det_acc,rollout_acc,disagree,succ_n,fail_n,succ_mean,fail_mean,delta_fail_minus_succ,succ_median,fail_median,succ_q25,succ_q75,fail_q25,fail_q75,auc_failure,corr_token_acc +step182287_best,lambda1,1000,0.864,0.865,0.065,864,136,6.0026164054870605,7.142782688140869,1.1401662826538086,5.860308647155762,7.266811370849609,5.824850082397461,6.004127025604248,7.107278347015381,7.365792274475098,0.9588524645969498,-0.7372595375732337 +step182287_best,spec_mean,1000,0.864,0.865,0.065,864,136,5.8336968421936035,6.857765197753906,1.0240683555603027,5.705381870269775,6.9673967361450195,5.6780009269714355,5.8299102783203125,6.840461730957031,7.037558078765869,0.9598481753812637,-0.7403875443857514 +step182287_best,pos_mass,1000,0.864,0.865,0.065,864,136,23.334787368774414,27.431060791015625,4.096273422241211,22.8215274810791,27.869586944580078,22.712003707885742,23.31964111328125,27.361846923828125,28.150232315063477,0.9598481753812637,-0.7403875443857514 +step182287_best,pos_l2,1000,0.864,0.865,0.065,864,136,5.834743499755859,6.8605427742004395,1.02579927444458,5.706200122833252,6.97038459777832,5.678751468658447,5.831027984619141,6.842685699462891,7.040294170379639,0.9598396650326797,-0.7403654596306658 +step182287_best,spread,1000,0.864,0.865,0.065,864,136,0.2796080410480499,0.49565085768699646,0.21604281663894653,0.2521345615386963,0.49982571601867676,0.23640167713165283,0.2870197296142578,0.38921797275543213,0.5801196098327637,0.9201473992374728,-0.6308994031700919 +step260410_final,lambda1,1000,0.832,0.848,0.052,832,168,6.075295448303223,7.088419437408447,1.0131239891052246,5.967571258544922,7.155555725097656,5.938466548919678,6.082776069641113,7.029343128204346,7.297847270965576,0.9660957532051282,-0.7909342942372484 +step260410_final,spec_mean,1000,0.832,0.848,0.052,832,168,5.9091668128967285,6.794551849365234,0.8853850364685059,5.8149518966674805,6.87446928024292,5.792088031768799,5.9152512550354,6.788240432739258,6.948907375335693,0.9685997596153846,-0.7938247800283409 +step260410_final,pos_mass,1000,0.832,0.848,0.052,832,168,23.636667251586914,27.178207397460938,3.5415401458740234,23.259807586669922,27.49787712097168,23.168352127075195,23.6610050201416,27.15296173095703,27.795629501342773,0.9685997596153846,-0.7938247800283409 +step260410_final,pos_l2,1000,0.832,0.848,0.052,832,168,5.910159587860107,6.7974982261657715,0.8873386383056641,5.815634250640869,6.875983238220215,5.792854309082031,5.915988445281982,6.789846897125244,6.953376770019531,0.968585451007326,-0.793837702081877 +step260410_final,spread,1000,0.832,0.848,0.052,832,168,0.273774653673172,0.5023994445800781,0.22862479090690613,0.2473444938659668,0.49890947341918945,0.23368358612060547,0.27810585498809814,0.39853525161743164,0.595841646194458,0.9267900068681318,-0.6832077001617288 diff --git a/compare_ptrm_rollout_counts.py b/compare_ptrm_rollout_counts.py new file mode 100644 index 0000000..0528820 --- /dev/null +++ b/compare_ptrm_rollout_counts.py @@ -0,0 +1,203 @@ +"""Paired comparison of PTRM rollout caches. + +This focuses on per-problem correct rollout counts, not only best-Q accuracy. +Use it when two caches were generated with the same sample seed so `idx` aligns. +""" +from __future__ import annotations + +import argparse +import csv +from pathlib import Path + +import numpy as np + + +def _load(path: Path) -> dict[str, np.ndarray]: + data = np.load(path) + return {name: data[name] for name in data.files} + + +def _take(values: np.ndarray, idx: np.ndarray) -> np.ndarray: + return values[np.arange(values.shape[0]), idx] + + +def _summary(prefix: str, exact: np.ndarray, q_halt: np.ndarray, det_exact: np.ndarray) -> dict[str, float]: + correct_count = exact.sum(axis=1).astype(np.float32) + q_sel = _take(exact, q_halt.argmax(axis=1)).astype(bool) + oracle = exact.any(axis=1) + out = { + f"{prefix}/det_exact": float(det_exact.mean()), + f"{prefix}/q_max_exact": float(q_sel.mean()), + f"{prefix}/oracle_pass": float(oracle.mean()), + f"{prefix}/mean_rollout_exact": float(exact.mean()), + f"{prefix}/correct_count_mean": float(correct_count.mean()), + f"{prefix}/correct_count_median": float(np.median(correct_count)), + f"{prefix}/correct_count_q10": float(np.quantile(correct_count, 0.10)), + f"{prefix}/correct_count_q25": float(np.quantile(correct_count, 0.25)), + f"{prefix}/correct_count_q75": float(np.quantile(correct_count, 0.75)), + f"{prefix}/correct_count_q90": float(np.quantile(correct_count, 0.90)), + f"{prefix}/zero_correct_frac": float((correct_count == 0).mean()), + f"{prefix}/all_correct_frac": float((correct_count == exact.shape[1]).mean()), + } + for threshold in (1, 5, 10, 25, 50, 75, 90): + if threshold <= exact.shape[1]: + out[f"{prefix}/correct_count_ge_{threshold}_frac"] = float((correct_count >= threshold).mean()) + for name, mask in { + "det_success": det_exact.astype(bool), + "det_fail": ~det_exact.astype(bool), + }.items(): + if mask.any(): + out[f"{prefix}/correct_count_{name}_mean"] = float(correct_count[mask].mean()) + out[f"{prefix}/oracle_{name}_frac"] = float(oracle[mask].mean()) + out[f"{prefix}/q_max_{name}_frac"] = float(q_sel[mask].mean()) + return out + + +def _prefix_curves(prefix: str, exact: np.ndarray, q_halt: np.ndarray) -> tuple[dict[str, float], list[dict[str, float]]]: + rows: list[dict[str, float]] = [] + out: dict[str, float] = {} + max_k = exact.shape[1] + for k in range(1, max_k + 1): + exact_k = exact[:, :k] + q_k = q_halt[:, :k] + oracle = exact_k.any(axis=1) + q_sel = _take(exact_k, q_k.argmax(axis=1)).astype(bool) + count = exact_k.sum(axis=1).astype(np.float32) + row = { + "K": float(k), + f"{prefix}/oracle_pass": float(oracle.mean()), + f"{prefix}/q_max_exact": float(q_sel.mean()), + f"{prefix}/correct_count_mean": float(count.mean()), + f"{prefix}/zero_correct_frac": float((count == 0).mean()), + f"{prefix}/correct_count_ge_5_frac": float((count >= min(5, k)).mean()), + f"{prefix}/correct_count_ge_half_frac": float((count >= (k / 2.0)).mean()), + } + rows.append(row) + + for target in (0.90, 0.95, 0.975, 0.99): + for metric in ("oracle_pass", "q_max_exact"): + needed = next( + (int(row["K"]) for row in rows if row[f"{prefix}/{metric}"] >= target), + None, + ) + out[f"{prefix}/K_for_{metric}_{target:g}"] = float("nan") if needed is None else float(needed) + return out, rows + + +def compare(base_path: Path, test_path: Path, base_label: str, test_label: str) -> tuple[dict[str, float | str], list[dict[str, float]]]: + base = _load(base_path) + test = _load(test_path) + if "idx" in base and "idx" in test and not np.array_equal(base["idx"], test["idx"]): + raise ValueError("Caches are not paired: idx arrays differ.") + + base_exact = base["exact"].astype(bool) + test_exact = test["exact"].astype(bool) + base_q = base["q_halt"].astype(np.float32) + test_q = test["q_halt"].astype(np.float32) + base_det = base["det_exact"].astype(bool) + test_det = test["det_exact"].astype(bool) + + base_count = base_exact.sum(axis=1).astype(np.float32) + test_count = test_exact.sum(axis=1).astype(np.float32) + delta = test_count - base_count + base_oracle = base_exact.any(axis=1) + test_oracle = test_exact.any(axis=1) + base_q_sel = _take(base_exact, base_q.argmax(axis=1)).astype(bool) + test_q_sel = _take(test_exact, test_q.argmax(axis=1)).astype(bool) + + out: dict[str, float | str] = { + "base_file": str(base_path), + "test_file": str(test_path), + "n_samples": float(base_exact.shape[0]), + "rollouts": float(base_exact.shape[1]), + } + out.update(_summary(base_label, base_exact, base_q, base_det)) + out.update(_summary(test_label, test_exact, test_q, test_det)) + base_curve_summary, base_curve_rows = _prefix_curves(base_label, base_exact, base_q) + test_curve_summary, test_curve_rows = _prefix_curves(test_label, test_exact, test_q) + out.update(base_curve_summary) + out.update(test_curve_summary) + curve_rows: list[dict[str, float]] = [] + for base_row, test_row in zip(base_curve_rows, test_curve_rows, strict=True): + row = {**base_row, **{k: v for k, v in test_row.items() if k != "K"}} + k = int(row["K"]) + base_prefix = f"{base_label}/" + test_prefix = f"{test_label}/" + row["delta/oracle_pass"] = row[f"{test_prefix}oracle_pass"] - row[f"{base_prefix}oracle_pass"] + row["delta/q_max_exact"] = row[f"{test_prefix}q_max_exact"] - row[f"{base_prefix}q_max_exact"] + row["delta/correct_count_mean"] = row[f"{test_prefix}correct_count_mean"] - row[f"{base_prefix}correct_count_mean"] + curve_rows.append(row) + out.update( + { + "delta_correct_count_mean": float(delta.mean()), + "delta_correct_count_median": float(np.median(delta)), + "delta_correct_count_q10": float(np.quantile(delta, 0.10)), + "delta_correct_count_q25": float(np.quantile(delta, 0.25)), + "delta_correct_count_q75": float(np.quantile(delta, 0.75)), + "delta_correct_count_q90": float(np.quantile(delta, 0.90)), + "test_more_correct_frac": float((delta > 0).mean()), + "test_equal_correct_frac": float((delta == 0).mean()), + "test_fewer_correct_frac": float((delta < 0).mean()), + "base_zero_test_nonzero_frac": float(((base_count == 0) & (test_count > 0)).mean()), + "base_nonzero_test_zero_frac": float(((base_count > 0) & (test_count == 0)).mean()), + "both_oracle_success_test_more_frac": float((base_oracle & test_oracle & (delta > 0)).mean()), + "oracle_test_only_frac": float((~base_oracle & test_oracle).mean()), + "oracle_base_only_frac": float((base_oracle & ~test_oracle).mean()), + "oracle_both_success_frac": float((base_oracle & test_oracle).mean()), + "oracle_both_fail_frac": float((~base_oracle & ~test_oracle).mean()), + "q_max_test_only_frac": float((~base_q_sel & test_q_sel).mean()), + "q_max_base_only_frac": float((base_q_sel & ~test_q_sel).mean()), + "q_max_both_success_frac": float((base_q_sel & test_q_sel).mean()), + "q_max_both_fail_frac": float((~base_q_sel & ~test_q_sel).mean()), + } + ) + + hist_rows: list[dict[str, float]] = [] + max_count = int(max(base_count.max(), test_count.max())) + for count in range(max_count + 1): + hist_rows.append( + { + "correct_count": float(count), + f"{base_label}_frac": float((base_count == count).mean()), + f"{test_label}_frac": float((test_count == count).mean()), + } + ) + return out, hist_rows, curve_rows + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--base", required=True) + parser.add_argument("--test", required=True) + parser.add_argument("--base-label", default="base") + parser.add_argument("--test-label", default="test") + parser.add_argument("--out-prefix", default=None) + args = parser.parse_args() + + summary, hist_rows, curve_rows = compare(Path(args.base), Path(args.test), args.base_label, args.test_label) + for key in sorted(summary): + print(f"{key}: {summary[key]}") + + if args.out_prefix: + prefix = Path(args.out_prefix) + prefix.parent.mkdir(parents=True, exist_ok=True) + with (prefix.with_suffix(".summary.csv")).open("w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=sorted(summary)) + writer.writeheader() + writer.writerow(summary) + with (prefix.with_suffix(".hist.csv")).open("w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=list(hist_rows[0])) + writer.writeheader() + writer.writerows(hist_rows) + with (prefix.with_suffix(".kcurve.csv")).open("w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=list(curve_rows[0])) + writer.writeheader() + writer.writerows(curve_rows) + print( + f"saved {prefix.with_suffix('.summary.csv')}, " + f"{prefix.with_suffix('.hist.csv')}, and {prefix.with_suffix('.kcurve.csv')}" + ) + + +if __name__ == "__main__": + main() diff --git a/diagnose_hrm.py b/diagnose_hrm.py new file mode 100644 index 0000000..193fa41 --- /dev/null +++ b/diagnose_hrm.py @@ -0,0 +1,306 @@ +"""HRM Sudoku Lyapunov / trajectory diagnostic. + +Loads a trained HRM checkpoint, runs inference on a sample of the test set, +records the recursion trajectory (z_H, z_L at every (act_step, h_cycle, l_cycle)), +and computes the top Lyapunov exponent of the recursion Jacobian via power +iteration with JVP. Splits samples by success / failure and writes a npz. +""" +from __future__ import annotations +import os, sys, yaml, math, argparse, json, time +from pathlib import Path +import numpy as np +import torch +import torch.nn.functional as F + +HRM_DIR = Path("/home/yurenh2/rrm/hrm") +sys.path.insert(0, str(HRM_DIR)) + +from models.hrm.hrm_act_v1 import ( + HierarchicalReasoningModel_ACTV1, + HierarchicalReasoningModel_ACTV1Config, + HierarchicalReasoningModel_ACTV1Carry, + HierarchicalReasoningModel_ACTV1InnerCarry, +) + + +def load_model(ckpt_root: Path, ckpt_name: str, device: str = "cuda"): + cfg_path = ckpt_root / "all_config.yaml" + cfg = yaml.safe_load(cfg_path.read_text()) + arch_cfg = cfg["arch"] + # Need batch_size, seq_len, vocab_size, num_puzzle_identifiers — read from train metadata + train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text()) + arch_cfg = dict(arch_cfg) + arch_cfg["batch_size"] = cfg["global_batch_size"] + arch_cfg["seq_len"] = train_meta["seq_len"] + arch_cfg["vocab_size"] = train_meta["vocab_size"] + arch_cfg["num_puzzle_identifiers"] = train_meta["num_puzzle_identifiers"] + arch_cfg["causal"] = False + model = HierarchicalReasoningModel_ACTV1(arch_cfg) + sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + # Strip torch.compile (`_orig_mod.`) and ACTLossHead wrapper (`model.`) prefixes + stripped = {} + for k, v in sd.items(): + nk = k + for prefix in ("_orig_mod.", "model."): + if nk.startswith(prefix): + nk = nk[len(prefix):] + stripped[nk] = v + missing, unexpected = model.load_state_dict(stripped, strict=False) + if missing or unexpected: + print(f"[load] missing={len(missing)} unexpected={len(unexpected)}; " + f"sample missing={missing[:3]}, sample unexpected={unexpected[:3]}") + model.to(device).eval() + return model, cfg, train_meta + + +def load_test_samples(data_path: Path, n_total: int, shard_id: int = 0, num_shards: int = 1, seed: int = 0): + """Choose a deterministic set of n_total samples using `seed`, then return shard `shard_id`.""" + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + all_idx = rng.choice(len(inputs), size=n_total, replace=False) + shard_size = (n_total + num_shards - 1) // num_shards + s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total) + idx = all_idx[s:e] + return { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)), + "idx": idx, + } + + +def inner_step(inner, z_H, z_L, input_embeddings, seq_info): + """One *full* inner forward = H_cycles x L_cycles cycles, exactly mirroring the + training-time recursion but with gradient enabled throughout (we need Jacobians). + Returns the *new* (z_H, z_L) and a list of intermediate states. + + The natural unit step we use for Lyapunov is one *L_level* application; the + extra H_level update at the end of each H_cycle is also included as a step. + """ + trajectory = [(z_H.detach().clone(), z_L.detach().clone())] + for _ in range(inner.config.H_cycles): + for _ in range(inner.config.L_cycles): + z_L = inner.L_level(z_L, z_H + input_embeddings, **seq_info) + trajectory.append((z_H.detach().clone(), z_L.detach().clone())) + z_H = inner.H_level(z_H, z_L, **seq_info) + trajectory.append((z_H.detach().clone(), z_L.detach().clone())) + return z_H, z_L, trajectory + + +def _flatten(z): + """(B, seq, hidden) → (B, seq*hidden).""" + return z.reshape(z.shape[0], -1) + + +def _unflatten(v_flat, B, seq, hidden): + return v_flat.reshape(B, seq, hidden) + + +def jvp_apply_D(f, x, V): + """Compute D_f(x) @ V where V has shape (B, state_dim, k). + + Returns f(x) (computed once with the LAST tangent), plus stacked Dv with same shape as V. + We do k separate JVPs. + """ + B, state_dim, k = V.shape + out_list = [] + fx_last = None + for i in range(k): + v_i = V[..., i].view_as(x) # (B, seq, hidden) + fx, Dv = torch.autograd.functional.jvp(f, x, v=v_i, create_graph=False, strict=False) + out_list.append(_flatten(Dv).to(torch.float32)) + fx_last = fx + DV = torch.stack(out_list, dim=-1) # (B, state_dim, k) + return fx_last, DV + + +def run_diagnose_batch(model, batch, device, halt_max_steps, compute_lyap=True, k_lyap=8, t_ons=1, seed=0): + """Run inference and collect trajectory + top-k Lyapunov for each sample in batch. + + For Lyapunov: maintain an orthonormal basis Q of size (state_dim, k_lyap) per sample. + At each (h_cycle, l_cycle) step we apply D_t (the Jacobian of one L_level or H_level + update) via JVP, then QR-reorthonormalize every t_ons steps and accumulate log|R_ii|. + λ_i = (1/T) Σ_t log|R_ii(t)|. + """ + inner = model.inner + B = batch["inputs"].shape[0] + seq_full = train_meta_seq_full + hidden = inner.config.hidden_size + state_dim = seq_full * hidden + + # Initialize carry + z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + + seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None) + input_embeddings = inner._input_embeddings(batch["inputs"].to(device), + batch["puzzle_identifiers"].to(device)) + + # Initialize orthonormal Q basis for top-k Lyapunov + if compute_lyap and k_lyap > 0: + torch.manual_seed(seed) + # Init random Gaussian then QR + Q0 = torch.randn(B, state_dim, k_lyap, device=device, dtype=torch.float32) + Q, _ = torch.linalg.qr(Q0) # Q: (B, state_dim, k_lyap), orthonormal columns + log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32) + n_lyap_steps = 0 + step_counter = 0 + + drift_zH_per_step = [] + drift_zL_per_step = [] + halted_at = torch.zeros(B, dtype=torch.long, device=device) + q_halt_history = [] + + final_logits = None + for act_step in range(halt_max_steps): + z_H_prev = z_H.detach().clone() + z_L_prev = z_L.detach().clone() + + if compute_lyap and k_lyap > 0: + with torch.enable_grad(): + zH = z_H.detach() + zL = z_L.detach() + for _h in range(inner.config.H_cycles): + for _l in range(inner.config.L_cycles): + f = lambda x: inner.L_level(x, zH + input_embeddings, **seq_info) + zL_new, DV = jvp_apply_D(f, zL, Q) # DV: (B, state_dim, k) + Q = DV # evolved tangent + zL = zL_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) # Q (B, state_dim, k), R (B, k, k) + log_R_sum += R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + f = lambda x: inner.H_level(x, zL, **seq_info) + zH_new, DV = jvp_apply_D(f, zH, Q) + Q = DV + zH = zH_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum += R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + z_H = zH + z_L = zL + else: + with torch.no_grad(): + for _h in range(inner.config.H_cycles): + for _l in range(inner.config.L_cycles): + z_L = inner.L_level(z_L, z_H + input_embeddings, **seq_info) + z_H = inner.H_level(z_H, z_L, **seq_info) + + drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu()) + drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu()) + + with torch.no_grad(): + q_logits = inner.q_head(z_H[:, 0]).float() + q_halt = q_logits[..., 0]; q_continue = q_logits[..., 1] + q_halt_history.append((q_halt.cpu(), q_continue.cpu())) + newly = (q_halt > q_continue) & (halted_at == 0) + halted_at[newly] = act_step + 1 + output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float() + final_logits = output + + lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() if (compute_lyap and k_lyap > 0) else None + + with torch.no_grad(): + preds = final_logits.argmax(dim=-1) + labels = batch["labels"].to(device) + mask = labels > 0 + exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float() + token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1) + token_acc = token_acc.cpu() + + return { + "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(), + "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(), + "halted_at": halted_at.cpu().numpy(), + "q_halt": torch.stack([h[0] for h in q_halt_history], dim=1).numpy(), + "q_continue": torch.stack([h[1] for h in q_halt_history], dim=1).numpy(), + "lyap_spec": lyap_spec, # (B, k_lyap) + "exact_correct": exact.numpy(), + "token_acc": token_acc.numpy(), + } + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--ckpt-root", required=True, + help="path containing all_config.yaml and step_X") + ap.add_argument("--ckpt-name", default="step_26040") + ap.add_argument("--n-samples", type=int, default=5, help="total sample pool") + ap.add_argument("--shard-id", type=int, default=0) + ap.add_argument("--num-shards", type=int, default=1) + ap.add_argument("--batch-size", type=int, default=64) + ap.add_argument("--out", default="diagnose_out.npz") + ap.add_argument("--seed", type=int, default=0) + ap.add_argument("--no-lyap", action="store_true") + ap.add_argument("--k-lyap", type=int, default=8, help="top-k Lyapunov exponents to compute") + ap.add_argument("--t-ons", type=int, default=1, help="QR reorthonormalization interval") + args = ap.parse_args() + + device = "cuda" + print(f"Loading model from {args.ckpt_root}/{args.ckpt_name} ...") + model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device) + global train_meta_seq_full + train_meta_seq_full = train_meta["seq_len"] + model.inner.puzzle_emb_len + print(f" hidden={model.inner.config.hidden_size}, seq_full={train_meta_seq_full}, " + f"halt_max_steps={model.inner.config.halt_max_steps}, " + f"H_cycles={model.inner.config.H_cycles}, L_cycles={model.inner.config.L_cycles}") + + test_samples = load_test_samples(Path(cfg["data_path"]), args.n_samples, + shard_id=args.shard_id, num_shards=args.num_shards, + seed=args.seed) + n_this_shard = len(test_samples['inputs']) + print(f"Loaded shard {args.shard_id}/{args.num_shards}: {n_this_shard} samples") + + results = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue", + "lyap_spec","exact_correct","token_acc","idx"]} + t0 = time.time() + for s in range(0, n_this_shard, args.batch_size): + e = min(s + args.batch_size, n_this_shard) + batch = {k: test_samples[k][s:e].to(device) + for k in ["inputs","labels","puzzle_identifiers"]} + out = run_diagnose_batch( + model, batch, device, + halt_max_steps=model.inner.config.halt_max_steps, + compute_lyap=not args.no_lyap, k_lyap=args.k_lyap, t_ons=args.t_ons, + seed=args.seed + s, + ) + for k, v in out.items(): + if v is not None: + results[k].append(v) + results["idx"].append(test_samples["idx"][s:e]) + lyap_str = (f" lyap_max={out['lyap_spec'][:,0].mean():.4f} " + f"lyap_min={out['lyap_spec'][:,-1].mean():.4f}" + if out["lyap_spec"] is not None else "") + print(f" [{e}/{n_this_shard}] dt={time.time()-t0:.1f}s " + f"exact={out['exact_correct'].mean():.3f}{lyap_str}", flush=True) + + # Stack + saved = {} + for k, v in results.items(): + if not v: continue + try: + saved[k] = np.concatenate(v, axis=0) + except ValueError: + saved[k] = np.stack(v, axis=0) + np.savez_compressed(args.out, **saved) + print(f"saved to {args.out}") + print(f"summary:") + print(f" N={len(saved['exact_correct'])} acc={saved['exact_correct'].mean():.3f}") + if "lyap_spec" in saved: + ls = saved["lyap_spec"] # (N, k) + succ = saved["exact_correct"] > 0.5 + print(f" lyap_spec shape: {ls.shape}") + for i in range(ls.shape[1]): + li = ls[:, i] + print(f" λ_{i+1}: overall={li.mean():+.4f}±{li.std():.4f} " + f"succ={li[succ].mean():+.4f} fail={li[~succ].mean():+.4f} " + f"Δ={li[~succ].mean()-li[succ].mean():+.4f}") + + +if __name__ == "__main__": + main() diff --git a/diagnose_hrm_joint.py b/diagnose_hrm_joint.py new file mode 100644 index 0000000..3ba39cf --- /dev/null +++ b/diagnose_hrm_joint.py @@ -0,0 +1,240 @@ +"""HRM Sudoku Lyapunov diagnostic with CORRECTED joint (z_H, z_L) tangent tracking. + +Key fix over diagnose_hrm.py: + - State is conceptually (z_H, z_L) ∈ R^{2D} where D = seq_full * hidden. + - L_level update: z_L_new = layers_L(z_L + z_H + input_embeddings), so + v_L_new = J_L · (v_H + v_L), v_H_new = v_H + - H_level update: z_H_new = layers_H(z_H + z_L), so + v_H_new = J_H · (v_H + v_L), v_L_new = v_L + - Each L or H cycle = ONE JVP per tangent column (same cost as before), + but operating on the combined tangent v_H + v_L. + - Q is (B, 2D, k); QR over the 2D dimension keeps an orthonormal basis. +""" +from __future__ import annotations +import sys, os, yaml, math, argparse, json, time +from pathlib import Path +import numpy as np +import torch + +HRM_DIR = Path("/home/yurenh2/rrm/hrm") +sys.path.insert(0, str(HRM_DIR)) + +from models.hrm.hrm_act_v1 import HierarchicalReasoningModel_ACTV1 + + +def load_model(ckpt_root: Path, ckpt_name: str, device: str): + cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text()) + arch_cfg = dict(cfg["arch"]) + train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text()) + arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"], + vocab_size=train_meta["vocab_size"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], causal=False) + model = HierarchicalReasoningModel_ACTV1(arch_cfg) + sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()} + model.load_state_dict(stripped, strict=False) + model.to(device).eval() + return model, cfg, train_meta + + +def load_test_samples(data_path: Path, n_total: int, shard_id: int, num_shards: int, seed: int): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + all_idx = rng.choice(len(inputs), size=n_total, replace=False) + shard_size = (n_total + num_shards - 1) // num_shards + s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total) + idx = all_idx[s:e] + return { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)), + "idx": idx, + } + + +def jvp_through(f, x, v): + """One JVP. Returns (f(x), D_f(x) @ v). create_graph=False since this is diagnostic.""" + return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False) + + +def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed): + """Compute joint top-k Lyapunov spectrum over (z_H, z_L) joint tangent. + + Per L_level step: + v_L_new = J_L · (v_H + v_L), v_H_new = v_H + Per H_level step: + v_H_new = J_H · (v_H + v_L), v_L_new = v_L + """ + inner = model.inner + cfg = inner.config + B = batch["inputs"].shape[0] + seq_full = cfg.seq_len + inner.puzzle_emb_len + hidden = cfg.hidden_size + state_dim = seq_full * hidden # one of (z_H or z_L) + total_dim = 2 * state_dim # joint (v_H, v_L) + + # Carry init + z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None) + input_embeddings = inner._input_embeddings(batch["inputs"].to(device), + batch["puzzle_identifiers"].to(device)) + + # Joint orthonormal tangent basis + g = torch.Generator(device=device).manual_seed(seed) + Q0 = torch.randn(B, total_dim, k_lyap, device=device, dtype=torch.float32, generator=g) + Q, _ = torch.linalg.qr(Q0) # (B, 2D, k) + log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32) + n_lyap_steps = 0 + step_counter = 0 + + drift_zH_per_step, drift_zL_per_step = [], [] + halted_at = torch.zeros(B, dtype=torch.long, device=device) + q_halt_hist, q_continue_hist = [], [] + + for act_step in range(cfg.halt_max_steps): + z_H_prev = z_H.detach().clone() + z_L_prev = z_L.detach().clone() + + with torch.enable_grad(): + zH, zL = z_H.detach(), z_L.detach() + for _h in range(cfg.H_cycles): + for _l in range(cfg.L_cycles): + # --- joint tangent: prep v_combined = v_H + v_L --- + v_H_all = Q[:, :state_dim, :] # (B, D, k) + v_L_all = Q[:, state_dim:, :] + v_comb = v_H_all + v_L_all + # --- k JVPs through L_level --- + new_v_L_cols = [] + f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zL_new, Dv = jvp_through(f_L, zL, v_i) + new_v_L_cols.append(Dv.reshape(B, state_dim).to(torch.float32)) + new_v_L = torch.stack(new_v_L_cols, dim=-1) # (B, D, k) + # Reassemble Q (v_H unchanged, v_L updated) + Q = torch.cat([v_H_all, new_v_L], dim=1) + zL = zL_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + # --- H step: v_comb = v_H + v_L, JVP through H_level --- + v_H_all = Q[:, :state_dim, :] + v_L_all = Q[:, state_dim:, :] + v_comb = v_H_all + v_L_all + new_v_H_cols = [] + f_H = lambda z: inner.H_level(z, zL, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zH_new, Dv = jvp_through(f_H, zH, v_i) + new_v_H_cols.append(Dv.reshape(B, state_dim).to(torch.float32)) + new_v_H = torch.stack(new_v_H_cols, dim=-1) + Q = torch.cat([new_v_H, v_L_all], dim=1) + zH = zH_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + z_H, z_L = zH, zL + + drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu()) + drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu()) + + with torch.no_grad(): + q_logits = inner.q_head(z_H[:, 0]).float() + q_halt, q_continue = q_logits[..., 0], q_logits[..., 1] + q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu()) + new_halt = (q_halt > q_continue) & (halted_at == 0) + halted_at[new_halt] = act_step + 1 + output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float() + final_logits = output + + lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() # (B, k) + + with torch.no_grad(): + preds = final_logits.argmax(dim=-1) + labels = batch["labels"].to(device) + mask = labels > 0 + exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float() + token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1) + token_acc = token_acc.cpu() + + return { + "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(), + "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(), + "halted_at": halted_at.cpu().numpy(), + "q_halt": torch.stack(q_halt_hist, dim=1).numpy(), + "q_continue": torch.stack(q_continue_hist, dim=1).numpy(), + "lyap_spec": lyap_spec, + "exact_correct": exact.numpy(), + "token_acc": token_acc.numpy(), + } + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--ckpt-root", required=True) + ap.add_argument("--ckpt-name", default="step_26040") + ap.add_argument("--n-samples", type=int, default=1024) + ap.add_argument("--shard-id", type=int, default=0) + ap.add_argument("--num-shards", type=int, default=1) + ap.add_argument("--batch-size", type=int, default=32) + ap.add_argument("--k-lyap", type=int, default=8) + ap.add_argument("--t-ons", type=int, default=1) + ap.add_argument("--seed", type=int, default=0) + ap.add_argument("--out", default="diag_joint.npz") + args = ap.parse_args() + + device = "cuda" + model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device) + print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, " + f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, " + f"halt_max_steps={model.inner.config.halt_max_steps}, " + f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}") + + test_samples = load_test_samples(Path(cfg["data_path"]), args.n_samples, + args.shard_id, args.num_shards, args.seed) + n_this = len(test_samples["inputs"]) + print(f"shard {args.shard_id}/{args.num_shards}: {n_this} samples") + + results = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue", + "lyap_spec","exact_correct","token_acc","idx"]} + t0 = time.time() + for s in range(0, n_this, args.batch_size): + e = min(s + args.batch_size, n_this) + batch = {k: test_samples[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]} + out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s) + for k, v in out.items(): + if v is not None: results[k].append(v) + results["idx"].append(test_samples["idx"][s:e]) + ls = out["lyap_spec"] + print(f" [{e}/{n_this}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} " + f"λ_1={ls[:,0].mean():.4f} λ_{args.k_lyap}={ls[:,-1].mean():.4f}", flush=True) + + saved = {} + for k, v in results.items(): + if not v: continue + try: saved[k] = np.concatenate(v, 0) + except ValueError: saved[k] = np.stack(v, 0) + np.savez_compressed(args.out, **saved) + + ls = saved["lyap_spec"] + succ = saved["exact_correct"] > 0.5 + print(f"\nN={len(saved['exact_correct'])} acc={succ.mean():.4f}") + print(f"{'i':>3} {'mean':>10} {'succ':>10} {'fail':>10} {'Δ(f-s)':>10}") + for i in range(ls.shape[1]): + li = ls[:, i] + print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} " + f"{li[~succ].mean()-li[succ].mean():+10.4f}") + print(f"\nsaved → {args.out}") + + +if __name__ == "__main__": + main() diff --git a/diagnose_hrm_joint_clv.py b/diagnose_hrm_joint_clv.py new file mode 100644 index 0000000..227ce67 --- /dev/null +++ b/diagnose_hrm_joint_clv.py @@ -0,0 +1,310 @@ +"""HRM Sudoku Lyapunov diagnostic with CORRECTED joint (z_H, z_L) tangent tracking. + +Key fix over diagnose_hrm.py: + - State is conceptually (z_H, z_L) ∈ R^{2D} where D = seq_full * hidden. + - L_level update: z_L_new = layers_L(z_L + z_H + input_embeddings), so + v_L_new = J_L · (v_H + v_L), v_H_new = v_H + - H_level update: z_H_new = layers_H(z_H + z_L), so + v_H_new = J_H · (v_H + v_L), v_L_new = v_L + - Each L or H cycle = ONE JVP per tangent column (same cost as before), + but operating on the combined tangent v_H + v_L. + - Q is (B, 2D, k); QR over the 2D dimension keeps an orthonormal basis. +""" +from __future__ import annotations +import sys, os, yaml, math, argparse, json, time +from pathlib import Path +import numpy as np +import torch + +HRM_DIR = Path("/home/yurenh2/rrm/hrm") +sys.path.insert(0, str(HRM_DIR)) + +from models.hrm.hrm_act_v1 import HierarchicalReasoningModel_ACTV1 + + +def load_model(ckpt_root: Path, ckpt_name: str, device: str): + cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text()) + arch_cfg = dict(cfg["arch"]) + train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text()) + arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"], + vocab_size=train_meta["vocab_size"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], causal=False) + model = HierarchicalReasoningModel_ACTV1(arch_cfg) + sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()} + model.load_state_dict(stripped, strict=False) + model.to(device).eval() + return model, cfg, train_meta + + +def load_test_samples(data_path: Path, n_total: int, shard_id: int, num_shards: int, seed: int): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + all_idx = rng.choice(len(inputs), size=n_total, replace=False) + shard_size = (n_total + num_shards - 1) // num_shards + s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total) + idx = all_idx[s:e] + return { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)), + "idx": idx, + } + + +def jvp_through(f, x, v): + """One JVP. Returns (f(x), D_f(x) @ v). create_graph=False since this is diagnostic.""" + return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False) + + +def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed): + """Compute joint top-k Lyapunov spectrum over (z_H, z_L) joint tangent. + + Per L_level step: + v_L_new = J_L · (v_H + v_L), v_H_new = v_H + Per H_level step: + v_H_new = J_H · (v_H + v_L), v_L_new = v_L + """ + inner = model.inner + cfg = inner.config + B = batch["inputs"].shape[0] + seq_full = cfg.seq_len + inner.puzzle_emb_len + hidden = cfg.hidden_size + state_dim = seq_full * hidden # one of (z_H or z_L) + total_dim = 2 * state_dim # joint (v_H, v_L) + + # Carry init + z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None) + input_embeddings = inner._input_embeddings(batch["inputs"].to(device), + batch["puzzle_identifiers"].to(device)) + + # Joint orthonormal tangent basis + g = torch.Generator(device=device).manual_seed(seed) + Q0 = torch.randn(B, total_dim, k_lyap, device=device, dtype=torch.float32, generator=g) + Q, _ = torch.linalg.qr(Q0) # (B, 2D, k) + log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32) + n_lyap_steps = 0 + step_counter = 0 + + drift_zH_per_step, drift_zL_per_step = [], [] + halted_at = torch.zeros(B, dtype=torch.long, device=device) + q_halt_hist, q_continue_hist = [], [] + + R_list = [] + Q_mid = None + target_qr = (cfg.halt_max_steps * cfg.H_cycles * (cfg.L_cycles + 1) // t_ons) // 2 + + for act_step in range(cfg.halt_max_steps): + z_H_prev = z_H.detach().clone() + z_L_prev = z_L.detach().clone() + + with torch.enable_grad(): + zH, zL = z_H.detach(), z_L.detach() + for _h in range(cfg.H_cycles): + for _l in range(cfg.L_cycles): + # --- joint tangent: prep v_combined = v_H + v_L --- + v_H_all = Q[:, :state_dim, :] # (B, D, k) + v_L_all = Q[:, state_dim:, :] + v_comb = v_H_all + v_L_all + # --- k JVPs through L_level --- + new_v_L_cols = [] + f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zL_new, Dv = jvp_through(f_L, zL, v_i) + new_v_L_cols.append(Dv.reshape(B, state_dim).to(torch.float32)) + new_v_L = torch.stack(new_v_L_cols, dim=-1) # (B, D, k) + # Reassemble Q (v_H unchanged, v_L updated) + Q = torch.cat([v_H_all, new_v_L], dim=1) + zL = zL_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + R_list.append(R.detach()) + if n_lyap_steps == target_qr: + Q_mid = Q.detach().clone() + + # --- H step: v_comb = v_H + v_L, JVP through H_level --- + v_H_all = Q[:, :state_dim, :] + v_L_all = Q[:, state_dim:, :] + v_comb = v_H_all + v_L_all + new_v_H_cols = [] + f_H = lambda z: inner.H_level(z, zL, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zH_new, Dv = jvp_through(f_H, zH, v_i) + new_v_H_cols.append(Dv.reshape(B, state_dim).to(torch.float32)) + new_v_H = torch.stack(new_v_H_cols, dim=-1) + Q = torch.cat([new_v_H, v_L_all], dim=1) + zH = zH_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + R_list.append(R.detach()) + if n_lyap_steps == target_qr: + Q_mid = Q.detach().clone() + + z_H, z_L = zH, zL + + drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu()) + drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu()) + + with torch.no_grad(): + q_logits = inner.q_head(z_H[:, 0]).float() + q_halt, q_continue = q_logits[..., 0], q_logits[..., 1] + q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu()) + new_halt = (q_halt > q_continue) & (halted_at == 0) + halted_at[new_halt] = act_step + 1 + output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float() + final_logits = output + + lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() # (B, k) + + # Ginelli CLV backward pass -> covariant Lyapunov vectors at the midpoint; pairwise CLV angles. + with torch.no_grad(): + if Q_mid is None: + Q_mid = Q + g2 = torch.Generator(device=device).manual_seed(seed + 9973) + C = torch.triu(torch.randn(B, k_lyap, k_lyap, device=device, dtype=torch.float32, generator=g2)) + C = C / C.norm(dim=1, keepdim=True).clamp_min(1e-20) + for R in reversed(R_list[target_qr:]): + Rf = R.float() + diag = Rf.diagonal(dim1=-2, dim2=-1) + Rf = Rf + torch.diag_embed(torch.where(diag.abs() < 1e-8, torch.full_like(diag, 1e-8), torch.zeros_like(diag))) + C = torch.linalg.solve_triangular(Rf, C, upper=True) + C = C / C.norm(dim=1, keepdim=True).clamp_min(1e-20) + V = torch.matmul(Q_mid.float(), C) + V = V / V.norm(dim=1, keepdim=True).clamp_min(1e-20) + gram = torch.matmul(V.transpose(1, 2), V).abs() + eye = torch.eye(k_lyap, device=device).unsqueeze(0) + off = gram * (1 - eye) + clv_maxcos = off.amax(dim=(1, 2)).cpu().numpy() + clv_meancos = (off.sum(dim=(1, 2)) / (k_lyap * (k_lyap - 1))).cpu().numpy() + clv_minangle = torch.acos(off.amax(dim=(1, 2)).clamp(max=1 - 1e-7)).cpu().numpy() + Dh0 = seq_full * hidden; pe0 = inner.puzzle_emb_len + vH = V[:, :Dh0, 0].reshape(B, seq_full, hidden) + eH = (vH ** 2).sum(-1); eL = (V[:, Dh0:, 0].reshape(B, seq_full, hidden) ** 2).sum(-1) + e = eH + eL + clv_true_hfrac = (eH.sum(-1) / e.sum(-1).clamp_min(1e-20)).cpu().numpy() + clv_true_pr_ans = (e[:, pe0:].sum(-1) ** 2 / (e[:, pe0:] ** 2).sum(-1).clamp_min(1e-20) / max(seq_full - pe0, 1)).cpu().numpy() + ro = torch.matmul(vH[:, pe0:], inner.lm_head.weight.float().t()) + clv_true_readout = (ro.flatten(1).norm(dim=1) / vH[:, pe0:].flatten(1).norm(dim=1).clamp_min(1e-20)).cpu().numpy() + + with torch.no_grad(): + preds = final_logits.argmax(dim=-1) + labels = batch["labels"].to(device) + mask = labels > 0 + exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float() + token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1) + token_acc = token_acc.cpu() + + # CLV geometry: leading columns of final Q are the leading covariant Lyapunov vectors. + with torch.no_grad(): + Dh = seq_full * hidden + n_top = min(3, k_lyap) + pe = inner.puzzle_emb_len + W = inner.lm_head.weight.float() + hfrac_c, pr_c, prans_c, ro_c = [], [], [], [] + for j in range(n_top): + col = Q[:, :, j].float() + qH = col[:, :Dh].reshape(B, seq_full, hidden) + qL = col[:, Dh:].reshape(B, seq_full, hidden) + eH = (qH ** 2).sum(-1); eL = (qL ** 2).sum(-1) + e = eH + eL + hfrac_c.append((eH.sum(-1) / e.sum(-1).clamp_min(1e-20)).cpu()) + pr_c.append((e.sum(-1) ** 2 / (e ** 2).sum(-1).clamp_min(1e-20) / seq_full).cpu()) + ea = e[:, pe:] + prans_c.append((ea.sum(-1) ** 2 / (ea ** 2).sum(-1).clamp_min(1e-20) / max(seq_full - pe, 1)).cpu()) + ro = torch.matmul(qH[:, pe:], W.t()) + ro_c.append((ro.flatten(1).norm(dim=1) / qH[:, pe:].flatten(1).norm(dim=1).clamp_min(1e-20)).cpu()) + + return { + "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(), + "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(), + "halted_at": halted_at.cpu().numpy(), + "q_halt": torch.stack(q_halt_hist, dim=1).numpy(), + "q_continue": torch.stack(q_continue_hist, dim=1).numpy(), + "lyap_spec": lyap_spec, + "exact_correct": exact.numpy(), + "token_acc": token_acc.numpy(), + "clv_hfrac": torch.stack(hfrac_c, 1).numpy(), + "clv_pr": torch.stack(pr_c, 1).numpy(), + "clv_pr_ans": torch.stack(prans_c, 1).numpy(), + "clv_readout": torch.stack(ro_c, 1).numpy(), + "clv_maxcos": clv_maxcos, + "clv_meancos": clv_meancos, + "clv_minangle": clv_minangle, + "clv_true_hfrac": clv_true_hfrac, + "clv_true_pr_ans": clv_true_pr_ans, + "clv_true_readout": clv_true_readout, + } + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--ckpt-root", required=True) + ap.add_argument("--ckpt-name", default="step_26040") + ap.add_argument("--n-samples", type=int, default=1024) + ap.add_argument("--shard-id", type=int, default=0) + ap.add_argument("--num-shards", type=int, default=1) + ap.add_argument("--batch-size", type=int, default=32) + ap.add_argument("--k-lyap", type=int, default=8) + ap.add_argument("--t-ons", type=int, default=1) + ap.add_argument("--seed", type=int, default=0) + ap.add_argument("--out", default="diag_joint.npz") + args = ap.parse_args() + + device = "cuda" + model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device) + print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, " + f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, " + f"halt_max_steps={model.inner.config.halt_max_steps}, " + f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}") + + test_samples = load_test_samples(Path(cfg["data_path"]), args.n_samples, + args.shard_id, args.num_shards, args.seed) + n_this = len(test_samples["inputs"]) + print(f"shard {args.shard_id}/{args.num_shards}: {n_this} samples") + + results = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue", + "lyap_spec","exact_correct","token_acc","idx"]} + t0 = time.time() + for s in range(0, n_this, args.batch_size): + e = min(s + args.batch_size, n_this) + batch = {k: test_samples[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]} + out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s) + for k, v in out.items(): + if v is not None: results.setdefault(k, []).append(v) + results.setdefault("idx", []).append(test_samples["idx"][s:e]) + ls = out["lyap_spec"] + print(f" [{e}/{n_this}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} " + f"λ_1={ls[:,0].mean():.4f} λ_{args.k_lyap}={ls[:,-1].mean():.4f}", flush=True) + + saved = {} + for k, v in results.items(): + if not v: continue + try: saved[k] = np.concatenate(v, 0) + except ValueError: saved[k] = np.stack(v, 0) + np.savez_compressed(args.out, **saved) + + ls = saved["lyap_spec"] + succ = saved["exact_correct"] > 0.5 + print(f"\nN={len(saved['exact_correct'])} acc={succ.mean():.4f}") + print(f"{'i':>3} {'mean':>10} {'succ':>10} {'fail':>10} {'Δ(f-s)':>10}") + for i in range(ls.shape[1]): + li = ls[:, i] + print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} " + f"{li[~succ].mean()-li[succ].mean():+10.4f}") + print(f"\nsaved → {args.out}") + + +if __name__ == "__main__": + main() diff --git a/diagnose_hrm_joint_geom.py b/diagnose_hrm_joint_geom.py new file mode 100644 index 0000000..41c8629 --- /dev/null +++ b/diagnose_hrm_joint_geom.py @@ -0,0 +1,264 @@ +"""HRM Sudoku Lyapunov diagnostic with CORRECTED joint (z_H, z_L) tangent tracking. + +Key fix over diagnose_hrm.py: + - State is conceptually (z_H, z_L) ∈ R^{2D} where D = seq_full * hidden. + - L_level update: z_L_new = layers_L(z_L + z_H + input_embeddings), so + v_L_new = J_L · (v_H + v_L), v_H_new = v_H + - H_level update: z_H_new = layers_H(z_H + z_L), so + v_H_new = J_H · (v_H + v_L), v_L_new = v_L + - Each L or H cycle = ONE JVP per tangent column (same cost as before), + but operating on the combined tangent v_H + v_L. + - Q is (B, 2D, k); QR over the 2D dimension keeps an orthonormal basis. +""" +from __future__ import annotations +import sys, os, yaml, math, argparse, json, time +from pathlib import Path +import numpy as np +import torch + +HRM_DIR = Path("/home/yurenh2/rrm/hrm") +sys.path.insert(0, str(HRM_DIR)) + +from models.hrm.hrm_act_v1 import HierarchicalReasoningModel_ACTV1 + + +def load_model(ckpt_root: Path, ckpt_name: str, device: str): + cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text()) + arch_cfg = dict(cfg["arch"]) + train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text()) + arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"], + vocab_size=train_meta["vocab_size"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], causal=False) + model = HierarchicalReasoningModel_ACTV1(arch_cfg) + sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()} + model.load_state_dict(stripped, strict=False) + model.to(device).eval() + return model, cfg, train_meta + + +def load_test_samples(data_path: Path, n_total: int, shard_id: int, num_shards: int, seed: int): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + all_idx = rng.choice(len(inputs), size=n_total, replace=False) + shard_size = (n_total + num_shards - 1) // num_shards + s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total) + idx = all_idx[s:e] + return { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)), + "idx": idx, + } + + +def jvp_through(f, x, v): + """One JVP. Returns (f(x), D_f(x) @ v). create_graph=False since this is diagnostic.""" + return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False) + + +def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed): + """Compute joint top-k Lyapunov spectrum over (z_H, z_L) joint tangent. + + Per L_level step: + v_L_new = J_L · (v_H + v_L), v_H_new = v_H + Per H_level step: + v_H_new = J_H · (v_H + v_L), v_L_new = v_L + """ + inner = model.inner + cfg = inner.config + B = batch["inputs"].shape[0] + seq_full = cfg.seq_len + inner.puzzle_emb_len + hidden = cfg.hidden_size + state_dim = seq_full * hidden # one of (z_H or z_L) + total_dim = 2 * state_dim # joint (v_H, v_L) + + # Carry init + z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None) + input_embeddings = inner._input_embeddings(batch["inputs"].to(device), + batch["puzzle_identifiers"].to(device)) + + # Joint orthonormal tangent basis + g = torch.Generator(device=device).manual_seed(seed) + Q0 = torch.randn(B, total_dim, k_lyap, device=device, dtype=torch.float32, generator=g) + Q, _ = torch.linalg.qr(Q0) # (B, 2D, k) + log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32) + n_lyap_steps = 0 + step_counter = 0 + + drift_zH_per_step, drift_zL_per_step = [], [] + halted_at = torch.zeros(B, dtype=torch.long, device=device) + q_halt_hist, q_continue_hist = [], [] + + for act_step in range(cfg.halt_max_steps): + z_H_prev = z_H.detach().clone() + z_L_prev = z_L.detach().clone() + + with torch.enable_grad(): + zH, zL = z_H.detach(), z_L.detach() + for _h in range(cfg.H_cycles): + for _l in range(cfg.L_cycles): + # --- joint tangent: prep v_combined = v_H + v_L --- + v_H_all = Q[:, :state_dim, :] # (B, D, k) + v_L_all = Q[:, state_dim:, :] + v_comb = v_H_all + v_L_all + # --- k JVPs through L_level --- + new_v_L_cols = [] + f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zL_new, Dv = jvp_through(f_L, zL, v_i) + new_v_L_cols.append(Dv.reshape(B, state_dim).to(torch.float32)) + new_v_L = torch.stack(new_v_L_cols, dim=-1) # (B, D, k) + # Reassemble Q (v_H unchanged, v_L updated) + Q = torch.cat([v_H_all, new_v_L], dim=1) + zL = zL_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + # --- H step: v_comb = v_H + v_L, JVP through H_level --- + v_H_all = Q[:, :state_dim, :] + v_L_all = Q[:, state_dim:, :] + v_comb = v_H_all + v_L_all + new_v_H_cols = [] + f_H = lambda z: inner.H_level(z, zL, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zH_new, Dv = jvp_through(f_H, zH, v_i) + new_v_H_cols.append(Dv.reshape(B, state_dim).to(torch.float32)) + new_v_H = torch.stack(new_v_H_cols, dim=-1) + Q = torch.cat([new_v_H, v_L_all], dim=1) + zH = zH_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + z_H, z_L = zH, zL + + drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu()) + drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu()) + + with torch.no_grad(): + q_logits = inner.q_head(z_H[:, 0]).float() + q_halt, q_continue = q_logits[..., 0], q_logits[..., 1] + q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu()) + new_halt = (q_halt > q_continue) & (halted_at == 0) + halted_at[new_halt] = act_step + 1 + output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float() + final_logits = output + + lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() # (B, k) + + with torch.no_grad(): + preds = final_logits.argmax(dim=-1) + labels = batch["labels"].to(device) + mask = labels > 0 + exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float() + token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1) + token_acc = token_acc.cpu() + + # CLV geometry: leading columns of final Q are the leading covariant Lyapunov vectors. + with torch.no_grad(): + Dh = seq_full * hidden + n_top = min(3, k_lyap) + pe = inner.puzzle_emb_len + W = inner.lm_head.weight.float() + hfrac_c, pr_c, prans_c, ro_c = [], [], [], [] + for j in range(n_top): + col = Q[:, :, j].float() + qH = col[:, :Dh].reshape(B, seq_full, hidden) + qL = col[:, Dh:].reshape(B, seq_full, hidden) + eH = (qH ** 2).sum(-1); eL = (qL ** 2).sum(-1) + e = eH + eL + hfrac_c.append((eH.sum(-1) / e.sum(-1).clamp_min(1e-20)).cpu()) + pr_c.append((e.sum(-1) ** 2 / (e ** 2).sum(-1).clamp_min(1e-20) / seq_full).cpu()) + ea = e[:, pe:] + prans_c.append((ea.sum(-1) ** 2 / (ea ** 2).sum(-1).clamp_min(1e-20) / max(seq_full - pe, 1)).cpu()) + ro = torch.matmul(qH[:, pe:], W.t()) + ro_c.append((ro.flatten(1).norm(dim=1) / qH[:, pe:].flatten(1).norm(dim=1).clamp_min(1e-20)).cpu()) + + return { + "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(), + "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(), + "halted_at": halted_at.cpu().numpy(), + "q_halt": torch.stack(q_halt_hist, dim=1).numpy(), + "q_continue": torch.stack(q_continue_hist, dim=1).numpy(), + "lyap_spec": lyap_spec, + "exact_correct": exact.numpy(), + "token_acc": token_acc.numpy(), + "clv_hfrac": torch.stack(hfrac_c, 1).numpy(), + "clv_pr": torch.stack(pr_c, 1).numpy(), + "clv_pr_ans": torch.stack(prans_c, 1).numpy(), + "clv_readout": torch.stack(ro_c, 1).numpy(), + } + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--ckpt-root", required=True) + ap.add_argument("--ckpt-name", default="step_26040") + ap.add_argument("--n-samples", type=int, default=1024) + ap.add_argument("--shard-id", type=int, default=0) + ap.add_argument("--num-shards", type=int, default=1) + ap.add_argument("--batch-size", type=int, default=32) + ap.add_argument("--k-lyap", type=int, default=8) + ap.add_argument("--t-ons", type=int, default=1) + ap.add_argument("--seed", type=int, default=0) + ap.add_argument("--out", default="diag_joint.npz") + args = ap.parse_args() + + device = "cuda" + model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device) + print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, " + f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, " + f"halt_max_steps={model.inner.config.halt_max_steps}, " + f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}") + + test_samples = load_test_samples(Path(cfg["data_path"]), args.n_samples, + args.shard_id, args.num_shards, args.seed) + n_this = len(test_samples["inputs"]) + print(f"shard {args.shard_id}/{args.num_shards}: {n_this} samples") + + results = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue", + "lyap_spec","exact_correct","token_acc","idx"]} + t0 = time.time() + for s in range(0, n_this, args.batch_size): + e = min(s + args.batch_size, n_this) + batch = {k: test_samples[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]} + out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s) + for k, v in out.items(): + if v is not None: results.setdefault(k, []).append(v) + results.setdefault("idx", []).append(test_samples["idx"][s:e]) + ls = out["lyap_spec"] + print(f" [{e}/{n_this}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} " + f"λ_1={ls[:,0].mean():.4f} λ_{args.k_lyap}={ls[:,-1].mean():.4f}", flush=True) + + saved = {} + for k, v in results.items(): + if not v: continue + try: saved[k] = np.concatenate(v, 0) + except ValueError: saved[k] = np.stack(v, 0) + np.savez_compressed(args.out, **saved) + + ls = saved["lyap_spec"] + succ = saved["exact_correct"] > 0.5 + print(f"\nN={len(saved['exact_correct'])} acc={succ.mean():.4f}") + print(f"{'i':>3} {'mean':>10} {'succ':>10} {'fail':>10} {'Δ(f-s)':>10}") + for i in range(ls.shape[1]): + li = ls[:, i] + print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} " + f"{li[~succ].mean()-li[succ].mean():+10.4f}") + print(f"\nsaved → {args.out}") + + +if __name__ == "__main__": + main() diff --git a/diagnose_hrm_joint_horizon.py b/diagnose_hrm_joint_horizon.py new file mode 100644 index 0000000..156d2d8 --- /dev/null +++ b/diagnose_hrm_joint_horizon.py @@ -0,0 +1,240 @@ +"""HRM Sudoku Lyapunov diagnostic with CORRECTED joint (z_H, z_L) tangent tracking. + +Key fix over diagnose_hrm.py: + - State is conceptually (z_H, z_L) ∈ R^{2D} where D = seq_full * hidden. + - L_level update: z_L_new = layers_L(z_L + z_H + input_embeddings), so + v_L_new = J_L · (v_H + v_L), v_H_new = v_H + - H_level update: z_H_new = layers_H(z_H + z_L), so + v_H_new = J_H · (v_H + v_L), v_L_new = v_L + - Each L or H cycle = ONE JVP per tangent column (same cost as before), + but operating on the combined tangent v_H + v_L. + - Q is (B, 2D, k); QR over the 2D dimension keeps an orthonormal basis. +""" +from __future__ import annotations +import sys, os, yaml, math, argparse, json, time +from pathlib import Path +import numpy as np +import torch + +HRM_DIR = Path("/home/yurenh2/rrm/hrm") +sys.path.insert(0, str(HRM_DIR)) + +from models.hrm.hrm_act_v1 import HierarchicalReasoningModel_ACTV1 + + +def load_model(ckpt_root: Path, ckpt_name: str, device: str): + cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text()) + arch_cfg = dict(cfg["arch"]) + train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text()) + arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"], + vocab_size=train_meta["vocab_size"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], causal=False) + model = HierarchicalReasoningModel_ACTV1(arch_cfg) + sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()} + model.load_state_dict(stripped, strict=False) + model.to(device).eval() + return model, cfg, train_meta + + +def load_test_samples(data_path: Path, n_total: int, shard_id: int, num_shards: int, seed: int): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + all_idx = rng.choice(len(inputs), size=n_total, replace=False) + shard_size = (n_total + num_shards - 1) // num_shards + s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total) + idx = all_idx[s:e] + return { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)), + "idx": idx, + } + + +def jvp_through(f, x, v): + """One JVP. Returns (f(x), D_f(x) @ v). create_graph=False since this is diagnostic.""" + return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False) + + +def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed): + """Compute joint top-k Lyapunov spectrum over (z_H, z_L) joint tangent. + + Per L_level step: + v_L_new = J_L · (v_H + v_L), v_H_new = v_H + Per H_level step: + v_H_new = J_H · (v_H + v_L), v_L_new = v_L + """ + inner = model.inner + cfg = inner.config + B = batch["inputs"].shape[0] + seq_full = cfg.seq_len + inner.puzzle_emb_len + hidden = cfg.hidden_size + state_dim = seq_full * hidden # one of (z_H or z_L) + total_dim = 2 * state_dim # joint (v_H, v_L) + + # Carry init + z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None) + input_embeddings = inner._input_embeddings(batch["inputs"].to(device), + batch["puzzle_identifiers"].to(device)) + + # Joint orthonormal tangent basis + g = torch.Generator(device=device).manual_seed(seed) + Q0 = torch.randn(B, total_dim, k_lyap, device=device, dtype=torch.float32, generator=g) + Q, _ = torch.linalg.qr(Q0) # (B, 2D, k) + log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32) + n_lyap_steps = 0 + step_counter = 0 + + drift_zH_per_step, drift_zL_per_step = [], [] + halted_at = torch.zeros(B, dtype=torch.long, device=device) + q_halt_hist, q_continue_hist = [], [] + + for act_step in range(int(__import__("os").environ.get("DIAG_HORIZON", "4"))): # horizon via env + z_H_prev = z_H.detach().clone() + z_L_prev = z_L.detach().clone() + + with torch.enable_grad(): + zH, zL = z_H.detach(), z_L.detach() + for _h in range(cfg.H_cycles): + for _l in range(cfg.L_cycles): + # --- joint tangent: prep v_combined = v_H + v_L --- + v_H_all = Q[:, :state_dim, :] # (B, D, k) + v_L_all = Q[:, state_dim:, :] + v_comb = v_H_all + v_L_all + # --- k JVPs through L_level --- + new_v_L_cols = [] + f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zL_new, Dv = jvp_through(f_L, zL, v_i) + new_v_L_cols.append(Dv.reshape(B, state_dim).to(torch.float32)) + new_v_L = torch.stack(new_v_L_cols, dim=-1) # (B, D, k) + # Reassemble Q (v_H unchanged, v_L updated) + Q = torch.cat([v_H_all, new_v_L], dim=1) + zL = zL_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + # --- H step: v_comb = v_H + v_L, JVP through H_level --- + v_H_all = Q[:, :state_dim, :] + v_L_all = Q[:, state_dim:, :] + v_comb = v_H_all + v_L_all + new_v_H_cols = [] + f_H = lambda z: inner.H_level(z, zL, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zH_new, Dv = jvp_through(f_H, zH, v_i) + new_v_H_cols.append(Dv.reshape(B, state_dim).to(torch.float32)) + new_v_H = torch.stack(new_v_H_cols, dim=-1) + Q = torch.cat([new_v_H, v_L_all], dim=1) + zH = zH_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + z_H, z_L = zH, zL + + drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu()) + drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu()) + + with torch.no_grad(): + q_logits = inner.q_head(z_H[:, 0]).float() + q_halt, q_continue = q_logits[..., 0], q_logits[..., 1] + q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu()) + new_halt = (q_halt > q_continue) & (halted_at == 0) + halted_at[new_halt] = act_step + 1 + output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float() + final_logits = output + + lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() # (B, k) + + with torch.no_grad(): + preds = final_logits.argmax(dim=-1) + labels = batch["labels"].to(device) + mask = labels > 0 + exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float() + token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1) + token_acc = token_acc.cpu() + + return { + "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(), + "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(), + "halted_at": halted_at.cpu().numpy(), + "q_halt": torch.stack(q_halt_hist, dim=1).numpy(), + "q_continue": torch.stack(q_continue_hist, dim=1).numpy(), + "lyap_spec": lyap_spec, + "exact_correct": exact.numpy(), + "token_acc": token_acc.numpy(), + } + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--ckpt-root", required=True) + ap.add_argument("--ckpt-name", default="step_26040") + ap.add_argument("--n-samples", type=int, default=1024) + ap.add_argument("--shard-id", type=int, default=0) + ap.add_argument("--num-shards", type=int, default=1) + ap.add_argument("--batch-size", type=int, default=32) + ap.add_argument("--k-lyap", type=int, default=8) + ap.add_argument("--t-ons", type=int, default=1) + ap.add_argument("--seed", type=int, default=0) + ap.add_argument("--out", default="diag_joint.npz") + args = ap.parse_args() + + device = "cuda" + model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device) + print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, " + f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, " + f"halt_max_steps={model.inner.config.halt_max_steps}, " + f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}") + + test_samples = load_test_samples(Path(cfg["data_path"]), args.n_samples, + args.shard_id, args.num_shards, args.seed) + n_this = len(test_samples["inputs"]) + print(f"shard {args.shard_id}/{args.num_shards}: {n_this} samples") + + results = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue", + "lyap_spec","exact_correct","token_acc","idx"]} + t0 = time.time() + for s in range(0, n_this, args.batch_size): + e = min(s + args.batch_size, n_this) + batch = {k: test_samples[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]} + out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s) + for k, v in out.items(): + if v is not None: results[k].append(v) + results["idx"].append(test_samples["idx"][s:e]) + ls = out["lyap_spec"] + print(f" [{e}/{n_this}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} " + f"λ_1={ls[:,0].mean():.4f} λ_{args.k_lyap}={ls[:,-1].mean():.4f}", flush=True) + + saved = {} + for k, v in results.items(): + if not v: continue + try: saved[k] = np.concatenate(v, 0) + except ValueError: saved[k] = np.stack(v, 0) + np.savez_compressed(args.out, **saved) + + ls = saved["lyap_spec"] + succ = saved["exact_correct"] > 0.5 + print(f"\nN={len(saved['exact_correct'])} acc={succ.mean():.4f}") + print(f"{'i':>3} {'mean':>10} {'succ':>10} {'fail':>10} {'Δ(f-s)':>10}") + for i in range(ls.shape[1]): + li = ls[:, i] + print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} " + f"{li[~succ].mean()-li[succ].mean():+10.4f}") + print(f"\nsaved → {args.out}") + + +if __name__ == "__main__": + main() diff --git a/diagnose_hrm_joint_short.py b/diagnose_hrm_joint_short.py new file mode 100644 index 0000000..2160470 --- /dev/null +++ b/diagnose_hrm_joint_short.py @@ -0,0 +1,240 @@ +"""HRM Sudoku Lyapunov diagnostic with CORRECTED joint (z_H, z_L) tangent tracking. + +Key fix over diagnose_hrm.py: + - State is conceptually (z_H, z_L) ∈ R^{2D} where D = seq_full * hidden. + - L_level update: z_L_new = layers_L(z_L + z_H + input_embeddings), so + v_L_new = J_L · (v_H + v_L), v_H_new = v_H + - H_level update: z_H_new = layers_H(z_H + z_L), so + v_H_new = J_H · (v_H + v_L), v_L_new = v_L + - Each L or H cycle = ONE JVP per tangent column (same cost as before), + but operating on the combined tangent v_H + v_L. + - Q is (B, 2D, k); QR over the 2D dimension keeps an orthonormal basis. +""" +from __future__ import annotations +import sys, os, yaml, math, argparse, json, time +from pathlib import Path +import numpy as np +import torch + +HRM_DIR = Path("/home/yurenh2/rrm/hrm") +sys.path.insert(0, str(HRM_DIR)) + +from models.hrm.hrm_act_v1 import HierarchicalReasoningModel_ACTV1 + + +def load_model(ckpt_root: Path, ckpt_name: str, device: str): + cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text()) + arch_cfg = dict(cfg["arch"]) + train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text()) + arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"], + vocab_size=train_meta["vocab_size"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], causal=False) + model = HierarchicalReasoningModel_ACTV1(arch_cfg) + sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()} + model.load_state_dict(stripped, strict=False) + model.to(device).eval() + return model, cfg, train_meta + + +def load_test_samples(data_path: Path, n_total: int, shard_id: int, num_shards: int, seed: int): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + all_idx = rng.choice(len(inputs), size=n_total, replace=False) + shard_size = (n_total + num_shards - 1) // num_shards + s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total) + idx = all_idx[s:e] + return { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)), + "idx": idx, + } + + +def jvp_through(f, x, v): + """One JVP. Returns (f(x), D_f(x) @ v). create_graph=False since this is diagnostic.""" + return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False) + + +def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed): + """Compute joint top-k Lyapunov spectrum over (z_H, z_L) joint tangent. + + Per L_level step: + v_L_new = J_L · (v_H + v_L), v_H_new = v_H + Per H_level step: + v_H_new = J_H · (v_H + v_L), v_L_new = v_L + """ + inner = model.inner + cfg = inner.config + B = batch["inputs"].shape[0] + seq_full = cfg.seq_len + inner.puzzle_emb_len + hidden = cfg.hidden_size + state_dim = seq_full * hidden # one of (z_H or z_L) + total_dim = 2 * state_dim # joint (v_H, v_L) + + # Carry init + z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None) + input_embeddings = inner._input_embeddings(batch["inputs"].to(device), + batch["puzzle_identifiers"].to(device)) + + # Joint orthonormal tangent basis + g = torch.Generator(device=device).manual_seed(seed) + Q0 = torch.randn(B, total_dim, k_lyap, device=device, dtype=torch.float32, generator=g) + Q, _ = torch.linalg.qr(Q0) # (B, 2D, k) + log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32) + n_lyap_steps = 0 + step_counter = 0 + + drift_zH_per_step, drift_zL_per_step = [], [] + halted_at = torch.zeros(B, dtype=torch.long, device=device) + q_halt_hist, q_continue_hist = [], [] + + for act_step in range(4): # SHORT: 4 ACT steps + z_H_prev = z_H.detach().clone() + z_L_prev = z_L.detach().clone() + + with torch.enable_grad(): + zH, zL = z_H.detach(), z_L.detach() + for _h in range(cfg.H_cycles): + for _l in range(cfg.L_cycles): + # --- joint tangent: prep v_combined = v_H + v_L --- + v_H_all = Q[:, :state_dim, :] # (B, D, k) + v_L_all = Q[:, state_dim:, :] + v_comb = v_H_all + v_L_all + # --- k JVPs through L_level --- + new_v_L_cols = [] + f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zL_new, Dv = jvp_through(f_L, zL, v_i) + new_v_L_cols.append(Dv.reshape(B, state_dim).to(torch.float32)) + new_v_L = torch.stack(new_v_L_cols, dim=-1) # (B, D, k) + # Reassemble Q (v_H unchanged, v_L updated) + Q = torch.cat([v_H_all, new_v_L], dim=1) + zL = zL_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + # --- H step: v_comb = v_H + v_L, JVP through H_level --- + v_H_all = Q[:, :state_dim, :] + v_L_all = Q[:, state_dim:, :] + v_comb = v_H_all + v_L_all + new_v_H_cols = [] + f_H = lambda z: inner.H_level(z, zL, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zH_new, Dv = jvp_through(f_H, zH, v_i) + new_v_H_cols.append(Dv.reshape(B, state_dim).to(torch.float32)) + new_v_H = torch.stack(new_v_H_cols, dim=-1) + Q = torch.cat([new_v_H, v_L_all], dim=1) + zH = zH_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + z_H, z_L = zH, zL + + drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu()) + drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu()) + + with torch.no_grad(): + q_logits = inner.q_head(z_H[:, 0]).float() + q_halt, q_continue = q_logits[..., 0], q_logits[..., 1] + q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu()) + new_halt = (q_halt > q_continue) & (halted_at == 0) + halted_at[new_halt] = act_step + 1 + output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float() + final_logits = output + + lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() # (B, k) + + with torch.no_grad(): + preds = final_logits.argmax(dim=-1) + labels = batch["labels"].to(device) + mask = labels > 0 + exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float() + token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1) + token_acc = token_acc.cpu() + + return { + "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(), + "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(), + "halted_at": halted_at.cpu().numpy(), + "q_halt": torch.stack(q_halt_hist, dim=1).numpy(), + "q_continue": torch.stack(q_continue_hist, dim=1).numpy(), + "lyap_spec": lyap_spec, + "exact_correct": exact.numpy(), + "token_acc": token_acc.numpy(), + } + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--ckpt-root", required=True) + ap.add_argument("--ckpt-name", default="step_26040") + ap.add_argument("--n-samples", type=int, default=1024) + ap.add_argument("--shard-id", type=int, default=0) + ap.add_argument("--num-shards", type=int, default=1) + ap.add_argument("--batch-size", type=int, default=32) + ap.add_argument("--k-lyap", type=int, default=8) + ap.add_argument("--t-ons", type=int, default=1) + ap.add_argument("--seed", type=int, default=0) + ap.add_argument("--out", default="diag_joint.npz") + args = ap.parse_args() + + device = "cuda" + model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device) + print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, " + f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, " + f"halt_max_steps={model.inner.config.halt_max_steps}, " + f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}") + + test_samples = load_test_samples(Path(cfg["data_path"]), args.n_samples, + args.shard_id, args.num_shards, args.seed) + n_this = len(test_samples["inputs"]) + print(f"shard {args.shard_id}/{args.num_shards}: {n_this} samples") + + results = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue", + "lyap_spec","exact_correct","token_acc","idx"]} + t0 = time.time() + for s in range(0, n_this, args.batch_size): + e = min(s + args.batch_size, n_this) + batch = {k: test_samples[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]} + out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s) + for k, v in out.items(): + if v is not None: results[k].append(v) + results["idx"].append(test_samples["idx"][s:e]) + ls = out["lyap_spec"] + print(f" [{e}/{n_this}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} " + f"λ_1={ls[:,0].mean():.4f} λ_{args.k_lyap}={ls[:,-1].mean():.4f}", flush=True) + + saved = {} + for k, v in results.items(): + if not v: continue + try: saved[k] = np.concatenate(v, 0) + except ValueError: saved[k] = np.stack(v, 0) + np.savez_compressed(args.out, **saved) + + ls = saved["lyap_spec"] + succ = saved["exact_correct"] > 0.5 + print(f"\nN={len(saved['exact_correct'])} acc={succ.mean():.4f}") + print(f"{'i':>3} {'mean':>10} {'succ':>10} {'fail':>10} {'Δ(f-s)':>10}") + for i in range(ls.shape[1]): + li = ls[:, i] + print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} " + f"{li[~succ].mean()-li[succ].mean():+10.4f}") + print(f"\nsaved → {args.out}") + + +if __name__ == "__main__": + main() diff --git a/diagnose_hrm_separate.py b/diagnose_hrm_separate.py new file mode 100644 index 0000000..1157c16 --- /dev/null +++ b/diagnose_hrm_separate.py @@ -0,0 +1,242 @@ +"""HRM diagnostic: separate λ_L (固定 z_H 下 L 子系统) and λ_H (z_L 收敛后 H 子系统), +in addition to the joint λ from diagnose_hrm_joint.py. + +Three orthonormal bases evolved in parallel: +- Q_joint: (B, 2D, k) — joint (v_H, v_L). Block-matrix update per L/H step. +- Q_L: (B, D, k) — only updated during L steps via J_L. Unchanged during H steps. +- Q_H: (B, D, k) — only updated during H steps via J_H. Unchanged during L steps. + +Note: Q_L's evolution uses J_L evaluated at the current trajectory's z_L+z_H+ie, +which means we measure the L sub-system Lyapunov "along the actual z_H trajectory" +(z_H changes between H-cycles). Similarly for Q_H. +""" +from __future__ import annotations +import sys, os, yaml, math, argparse, json, time +from pathlib import Path +import numpy as np +import torch + +HRM_DIR = Path("/home/yurenh2/rrm/hrm") +sys.path.insert(0, str(HRM_DIR)) +from models.hrm.hrm_act_v1 import HierarchicalReasoningModel_ACTV1 + + +def load_model(ckpt_root: Path, ckpt_name: str, device: str): + cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text()) + arch_cfg = dict(cfg["arch"]) + train_meta = json.loads((Path(cfg["data_path"]) / "train" / "dataset.json").read_text()) + arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"], + vocab_size=train_meta["vocab_size"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], causal=False) + model = HierarchicalReasoningModel_ACTV1(arch_cfg) + sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()} + model.load_state_dict(stripped, strict=False) + model.to(device).eval() + return model, cfg, train_meta + + +def load_test_samples(data_path, n_total, shard_id, num_shards, seed): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + all_idx = rng.choice(len(inputs), size=n_total, replace=False) + shard_size = (n_total + num_shards - 1) // num_shards + s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total) + idx = all_idx[s:e] + return { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)), + "idx": idx, + } + + +def jvp(f, x, v): + return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False) + + +def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed): + inner = model.inner + cfg = inner.config + B = batch["inputs"].shape[0] + seq_full = cfg.seq_len + inner.puzzle_emb_len + hidden = cfg.hidden_size + D = seq_full * hidden + + z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None) + input_embeddings = inner._input_embeddings(batch["inputs"].to(device), + batch["puzzle_identifiers"].to(device)) + + # Three independent orthonormal bases + g = torch.Generator(device=device).manual_seed(seed) + Q_joint = torch.linalg.qr(torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g))[0] + Q_L = torch.linalg.qr(torch.randn(B, D, k_lyap, device=device, dtype=torch.float32, generator=g))[0] + Q_H = torch.linalg.qr(torch.randn(B, D, k_lyap, device=device, dtype=torch.float32, generator=g))[0] + + log_R_joint = torch.zeros(B, k_lyap, device=device, dtype=torch.float32) + log_R_L = torch.zeros(B, k_lyap, device=device, dtype=torch.float32) + log_R_H = torch.zeros(B, k_lyap, device=device, dtype=torch.float32) + n_joint_steps = 0; n_L_steps = 0; n_H_steps = 0 + step_counter_joint = 0; step_counter_L = 0; step_counter_H = 0 + + for act_step in range(cfg.halt_max_steps): + with torch.enable_grad(): + zH, zL = z_H.detach(), z_L.detach() + for _h in range(cfg.H_cycles): + for _l in range(cfg.L_cycles): + # ============ JOINT update (L step) ============ + v_H_j = Q_joint[:, :D, :] + v_L_j = Q_joint[:, D:, :] + v_comb = v_H_j + v_L_j + new_v_L_j_cols = [] + f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zL_new, Dv = jvp(f_L, zL, v_i) + new_v_L_j_cols.append(Dv.reshape(B, D).to(torch.float32)) + new_v_L_j = torch.stack(new_v_L_j_cols, dim=-1) + Q_joint = torch.cat([v_H_j, new_v_L_j], dim=1) + + # ============ L-only update ============ + new_v_L_only_cols = [] + for i in range(k_lyap): + v_i = Q_L[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + _, Dv = jvp(f_L, zL, v_i) + new_v_L_only_cols.append(Dv.reshape(B, D).to(torch.float32)) + Q_L = torch.stack(new_v_L_only_cols, dim=-1) + + # Q_H untouched during L step (since H_level wasn't applied) + zL = zL_new + + step_counter_joint += 1; step_counter_L += 1 + if step_counter_joint % t_ons == 0: + Q_joint, Rj = torch.linalg.qr(Q_joint) + log_R_joint = log_R_joint + Rj.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_joint_steps += 1 + if step_counter_L % t_ons == 0: + Q_L, Rl = torch.linalg.qr(Q_L) + log_R_L = log_R_L + Rl.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_L_steps += 1 + + # ============ JOINT update (H step) ============ + v_H_j = Q_joint[:, :D, :] + v_L_j = Q_joint[:, D:, :] + v_comb = v_H_j + v_L_j + new_v_H_j_cols = [] + f_H = lambda z: inner.H_level(z, zL, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zH_new, Dv = jvp(f_H, zH, v_i) + new_v_H_j_cols.append(Dv.reshape(B, D).to(torch.float32)) + new_v_H_j = torch.stack(new_v_H_j_cols, dim=-1) + Q_joint = torch.cat([new_v_H_j, v_L_j], dim=1) + + # ============ H-only update ============ + new_v_H_only_cols = [] + for i in range(k_lyap): + v_i = Q_H[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + _, Dv = jvp(f_H, zH, v_i) + new_v_H_only_cols.append(Dv.reshape(B, D).to(torch.float32)) + Q_H = torch.stack(new_v_H_only_cols, dim=-1) + + # Q_L untouched during H step + zH = zH_new + + step_counter_joint += 1; step_counter_H += 1 + if step_counter_joint % t_ons == 0: + Q_joint, Rj = torch.linalg.qr(Q_joint) + log_R_joint = log_R_joint + Rj.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_joint_steps += 1 + if step_counter_H % t_ons == 0: + Q_H, Rh = torch.linalg.qr(Q_H) + log_R_H = log_R_H + Rh.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_H_steps += 1 + + z_H, z_L = zH, zL + + with torch.no_grad(): + output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float() + final_logits = output + + lyap_joint = (log_R_joint / max(n_joint_steps, 1)).cpu().numpy() + lyap_L = (log_R_L / max(n_L_steps, 1)).cpu().numpy() + lyap_H = (log_R_H / max(n_H_steps, 1)).cpu().numpy() + + with torch.no_grad(): + preds = final_logits.argmax(dim=-1) + labels = batch["labels"].to(device) + mask = labels > 0 + exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float() + token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1) + token_acc = token_acc.cpu() + + return { + "lyap_joint": lyap_joint, + "lyap_L": lyap_L, + "lyap_H": lyap_H, + "exact_correct": exact.numpy(), + "token_acc": token_acc.numpy(), + } + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--ckpt-root", required=True) + ap.add_argument("--ckpt-name", default="step_26040") + ap.add_argument("--n-samples", type=int, default=1024) + ap.add_argument("--shard-id", type=int, default=0) + ap.add_argument("--num-shards", type=int, default=1) + ap.add_argument("--batch-size", type=int, default=32) + ap.add_argument("--k-lyap", type=int, default=8) + ap.add_argument("--t-ons", type=int, default=1) + ap.add_argument("--seed", type=int, default=0) + ap.add_argument("--out", default="diag_separate.npz") + args = ap.parse_args() + + device = "cuda" + model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device) + test = load_test_samples(Path(cfg["data_path"]), args.n_samples, + args.shard_id, args.num_shards, args.seed) + n = len(test["inputs"]) + print(f"shard {args.shard_id}/{args.num_shards}: {n} samples") + print(f"H_cycles={model.inner.config.H_cycles} L_cycles={model.inner.config.L_cycles} halt={model.inner.config.halt_max_steps}") + + res = {k: [] for k in ["lyap_joint","lyap_L","lyap_H","exact_correct","token_acc","idx"]} + t0 = time.time() + for s in range(0, n, args.batch_size): + e = min(s + args.batch_size, n) + batch = {k: test[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]} + out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s) + for k, v in out.items(): + res[k].append(v) + res["idx"].append(test["idx"][s:e]) + print(f" [{e}/{n}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} " + f"λj1={out['lyap_joint'][:,0].mean():+.3f} " + f"λL1={out['lyap_L'][:,0].mean():+.3f} " + f"λH1={out['lyap_H'][:,0].mean():+.3f}", flush=True) + + saved = {} + for k, v in res.items(): + if not v: continue + try: saved[k] = np.concatenate(v, 0) + except ValueError: saved[k] = np.stack(v, 0) + np.savez_compressed(args.out, **saved) + + succ = saved["exact_correct"] > 0.5 + print(f"\nN={len(succ)} acc={succ.mean():.4f}") + for name in ["lyap_joint", "lyap_L", "lyap_H"]: + ls = saved[name] + print(f"\n{name}:") + print(f" i mean_succ mean_fail Δ") + for i in range(ls.shape[1]): + ms, mf = ls[succ,i].mean(), ls[~succ,i].mean() + print(f" {i+1} {ms:+8.4f} {mf:+8.4f} {mf-ms:+8.4f}") + print(f"\nsaved → {args.out}") + + +if __name__ == "__main__": + main() diff --git a/diagnose_trm_joint.py b/diagnose_trm_joint.py new file mode 100644 index 0000000..160a7cb --- /dev/null +++ b/diagnose_trm_joint.py @@ -0,0 +1,225 @@ +"""TRM Sudoku joint Lyapunov diagnostic — TRM version of diagnose_hrm_joint.py. + +Key differences from HRM: +- TRM has ONE shared L_level (H_layers config is "ignored") +- z_L update: z_L = L_level(z_L, z_H + input_embeddings) +- z_H update: z_H = L_level(z_H, z_L) ← same L_level! +- H_cycles=3, L_cycles=6 (vs HRM 2,2) + +Joint tangent block structure: +- L step: v_L_new = J · (v_L + v_H), v_H_new = v_H, J at (z_L + z_H + ie) +- H step: v_H_new = J' · (v_H + v_L), v_L_new = v_L, J' at (z_H + z_L) +J and J' share weights but evaluated at different points. +""" +from __future__ import annotations +import sys, os, yaml, math, argparse, json, time +from pathlib import Path +import numpy as np +import torch + +TRM_DIR = Path("/home/yurenh2/rrm/trm") +sys.path.insert(0, str(TRM_DIR)) + +from models.recursive_reasoning.trm import TinyRecursiveReasoningModel_ACTV1 + + +def load_model(ckpt_root: Path, ckpt_name: str, device: str): + cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text()) + arch_cfg = dict(cfg["arch"]) + train_meta = json.loads((Path(cfg["data_paths"][0]) / "train" / "dataset.json").read_text()) + arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"], + vocab_size=train_meta["vocab_size"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"]) + model = TinyRecursiveReasoningModel_ACTV1(arch_cfg) + sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()} + missing, unexpected = model.load_state_dict(stripped, strict=False) + print(f"[load] missing={len(missing)} unexpected={len(unexpected)}") + if missing[:3]: print(f" sample missing: {missing[:3]}") + if unexpected[:3]: print(f" sample unexpected: {unexpected[:3]}") + model.to(device).eval() + return model, cfg, train_meta + + +def load_test_samples(data_path, n_total, shard_id, num_shards, seed): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + all_idx = rng.choice(len(inputs), size=n_total, replace=False) + shard_size = (n_total + num_shards - 1) // num_shards + s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total) + idx = all_idx[s:e] + return { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)), + "idx": idx, + } + + +def jvp_through(f, x, v): + return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False) + + +def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed): + inner = model.inner + cfg = inner.config + B = batch["inputs"].shape[0] + seq_full = cfg.seq_len + inner.puzzle_emb_len + hidden = cfg.hidden_size + D = seq_full * hidden + + z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None) + input_embeddings = inner._input_embeddings(batch["inputs"].to(device), + batch["puzzle_identifiers"].to(device)) + + g = torch.Generator(device=device).manual_seed(seed) + Q0 = torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g) + Q, _ = torch.linalg.qr(Q0) + log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32) + n_lyap_steps = 0 + step_counter = 0 + + drift_zH_per_step, drift_zL_per_step = [], [] + halted_at = torch.zeros(B, dtype=torch.long, device=device) + q_halt_hist, q_continue_hist = [], [] + + for act_step in range(cfg.halt_max_steps): + z_H_prev = z_H.detach().clone() + z_L_prev = z_L.detach().clone() + + with torch.enable_grad(): + zH, zL = z_H.detach(), z_L.detach() + for _h in range(cfg.H_cycles): + # L cycles + for _l in range(cfg.L_cycles): + v_H_j = Q[:, :D, :] + v_L_j = Q[:, D:, :] + v_comb = v_H_j + v_L_j + new_v_L_cols = [] + f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zL_new, Dv = jvp_through(f_L, zL, v_i) + new_v_L_cols.append(Dv.reshape(B, D).to(torch.float32)) + new_v_L = torch.stack(new_v_L_cols, dim=-1) + Q = torch.cat([v_H_j, new_v_L], dim=1) + zL = zL_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + # H step (uses SAME L_level!) + v_H_j = Q[:, :D, :] + v_L_j = Q[:, D:, :] + v_comb = v_H_j + v_L_j + new_v_H_cols = [] + f_H = lambda z: inner.L_level(z, zL, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zH_new, Dv = jvp_through(f_H, zH, v_i) + new_v_H_cols.append(Dv.reshape(B, D).to(torch.float32)) + new_v_H = torch.stack(new_v_H_cols, dim=-1) + Q = torch.cat([new_v_H, v_L_j], dim=1) + zH = zH_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + z_H, z_L = zH, zL + + drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu()) + drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu()) + + with torch.no_grad(): + q_logits = inner.q_head(z_H[:, 0]).float() + q_halt, q_continue = q_logits[..., 0], q_logits[..., 1] + q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu()) + new_halt = (q_halt > q_continue) & (halted_at == 0) + halted_at[new_halt] = act_step + 1 + output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float() + final_logits = output + + lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() + + with torch.no_grad(): + preds = final_logits.argmax(dim=-1) + labels = batch["labels"].to(device) + mask = labels > 0 + exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float() + token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1) + token_acc = token_acc.cpu() + + return { + "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(), + "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(), + "halted_at": halted_at.cpu().numpy(), + "q_halt": torch.stack(q_halt_hist, dim=1).numpy(), + "q_continue": torch.stack(q_continue_hist, dim=1).numpy(), + "lyap_spec": lyap_spec, + "exact_correct": exact.numpy(), + "token_acc": token_acc.numpy(), + } + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--ckpt-root", required=True) + ap.add_argument("--ckpt-name", default="step_13020") + ap.add_argument("--n-samples", type=int, default=512) + ap.add_argument("--shard-id", type=int, default=0) + ap.add_argument("--num-shards", type=int, default=1) + ap.add_argument("--batch-size", type=int, default=16) + ap.add_argument("--k-lyap", type=int, default=8) + ap.add_argument("--t-ons", type=int, default=1) + ap.add_argument("--seed", type=int, default=0) + ap.add_argument("--out", default="diag_trm.npz") + args = ap.parse_args() + + device = "cuda" + model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device) + print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, " + f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, " + f"halt_max_steps={model.inner.config.halt_max_steps}, " + f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}") + + test = load_test_samples(Path(cfg["data_paths"][0]), args.n_samples, args.shard_id, args.num_shards, args.seed) + n = len(test["inputs"]) + print(f"shard {args.shard_id}/{args.num_shards}: {n} samples") + + res = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue","lyap_spec","exact_correct","token_acc","idx"]} + t0 = time.time() + for s in range(0, n, args.batch_size): + e = min(s + args.batch_size, n) + batch = {k: test[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]} + out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s) + for k, v in out.items(): + res[k].append(v) + res["idx"].append(test["idx"][s:e]) + ls = out["lyap_spec"] + print(f" [{e}/{n}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} " + f"λ_1={ls[:,0].mean():+.4f} λ_{args.k_lyap}={ls[:,-1].mean():+.4f}", flush=True) + + saved = {} + for k, v in res.items(): + if not v: continue + try: saved[k] = np.concatenate(v, 0) + except ValueError: saved[k] = np.stack(v, 0) + np.savez_compressed(args.out, **saved) + succ = saved["exact_correct"] > 0.5 + print(f"\nN={len(succ)} acc={succ.mean():.4f}") + print(f"{'i':>3} {'all':>10} {'succ':>10} {'fail':>10} {'Δ':>9}") + for i in range(saved["lyap_spec"].shape[1]): + li = saved["lyap_spec"][:, i] + print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} {li[~succ].mean()-li[succ].mean():+9.4f}") + + +if __name__ == "__main__": + main() diff --git a/diagnose_trm_joint_clv.py b/diagnose_trm_joint_clv.py new file mode 100644 index 0000000..f4c939e --- /dev/null +++ b/diagnose_trm_joint_clv.py @@ -0,0 +1,304 @@ +"""TRM Sudoku joint Lyapunov diagnostic — TRM version of diagnose_hrm_joint.py. + +Key differences from HRM: +- TRM has ONE shared L_level (H_layers config is "ignored") +- z_L update: z_L = L_level(z_L, z_H + input_embeddings) +- z_H update: z_H = L_level(z_H, z_L) ← same L_level! +- H_cycles=3, L_cycles=6 (vs HRM 2,2) + +Joint tangent block structure: +- L step: v_L_new = J · (v_L + v_H), v_H_new = v_H, J at (z_L + z_H + ie) +- H step: v_H_new = J' · (v_H + v_L), v_L_new = v_L, J' at (z_H + z_L) +J and J' share weights but evaluated at different points. +""" +from __future__ import annotations +import sys, os, yaml, math, argparse, json, time +from pathlib import Path +import numpy as np +import torch + +TRM_DIR = Path("/home/yurenh2/rrm/trm") +sys.path.insert(0, str(TRM_DIR)) + +from models.recursive_reasoning.trm import TinyRecursiveReasoningModel_ACTV1 + + +def load_model(ckpt_root: Path, ckpt_name: str, device: str): + cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text()) + arch_cfg = dict(cfg["arch"]) + train_meta = json.loads((Path(cfg["data_paths"][0]) / "train" / "dataset.json").read_text()) + arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"], + vocab_size=train_meta["vocab_size"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"]) + model = TinyRecursiveReasoningModel_ACTV1(arch_cfg) + sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()} + missing, unexpected = model.load_state_dict(stripped, strict=False) + print(f"[load] missing={len(missing)} unexpected={len(unexpected)}") + if missing[:3]: print(f" sample missing: {missing[:3]}") + if unexpected[:3]: print(f" sample unexpected: {unexpected[:3]}") + model.to(device).eval() + return model, cfg, train_meta + + +def load_test_samples(data_path, n_total, shard_id, num_shards, seed): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + all_idx = rng.choice(len(inputs), size=n_total, replace=False) + shard_size = (n_total + num_shards - 1) // num_shards + s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total) + idx = all_idx[s:e] + return { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)), + "idx": idx, + } + + +def jvp_through(f, x, v): + return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False) + + +def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed): + inner = model.inner + cfg = inner.config + B = batch["inputs"].shape[0] + seq_full = cfg.seq_len + inner.puzzle_emb_len + hidden = cfg.hidden_size + D = seq_full * hidden + + z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None) + input_embeddings = inner._input_embeddings(batch["inputs"].to(device), + batch["puzzle_identifiers"].to(device)) + + g = torch.Generator(device=device).manual_seed(seed) + Q0 = torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g) + Q, _ = torch.linalg.qr(Q0) + log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32) + n_lyap_steps = 0 + step_counter = 0 + + drift_zH_per_step, drift_zL_per_step = [], [] + halted_at = torch.zeros(B, dtype=torch.long, device=device) + q_halt_hist, q_continue_hist = [], [] + + # Ginelli CLV: store R at every QR, capture Q at the trajectory midpoint (analysis time). + R_list = [] + Q_mid = None + target_qr = (cfg.halt_max_steps * cfg.H_cycles * (cfg.L_cycles + 1) // t_ons) // 2 + + for act_step in range(cfg.halt_max_steps): + z_H_prev = z_H.detach().clone() + z_L_prev = z_L.detach().clone() + + with torch.enable_grad(): + zH, zL = z_H.detach(), z_L.detach() + for _h in range(cfg.H_cycles): + # L cycles + for _l in range(cfg.L_cycles): + v_H_j = Q[:, :D, :] + v_L_j = Q[:, D:, :] + v_comb = v_H_j + v_L_j + new_v_L_cols = [] + f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zL_new, Dv = jvp_through(f_L, zL, v_i) + new_v_L_cols.append(Dv.reshape(B, D).to(torch.float32)) + new_v_L = torch.stack(new_v_L_cols, dim=-1) + Q = torch.cat([v_H_j, new_v_L], dim=1) + zL = zL_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + R_list.append(R.detach()) + if n_lyap_steps == target_qr: + Q_mid = Q.detach().clone() + + # H step (uses SAME L_level!) + v_H_j = Q[:, :D, :] + v_L_j = Q[:, D:, :] + v_comb = v_H_j + v_L_j + new_v_H_cols = [] + f_H = lambda z: inner.L_level(z, zL, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zH_new, Dv = jvp_through(f_H, zH, v_i) + new_v_H_cols.append(Dv.reshape(B, D).to(torch.float32)) + new_v_H = torch.stack(new_v_H_cols, dim=-1) + Q = torch.cat([new_v_H, v_L_j], dim=1) + zH = zH_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + R_list.append(R.detach()) + if n_lyap_steps == target_qr: + Q_mid = Q.detach().clone() + + z_H, z_L = zH, zL + + drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu()) + drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu()) + + with torch.no_grad(): + q_logits = inner.q_head(z_H[:, 0]).float() + q_halt, q_continue = q_logits[..., 0], q_logits[..., 1] + q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu()) + new_halt = (q_halt > q_continue) & (halted_at == 0) + halted_at[new_halt] = act_step + 1 + output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float() + final_logits = output + + lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() + + # Ginelli CLV backward pass: iterate C = normalize(R^{-1} C) from the end back to the + # midpoint analysis time, then CLVs = Q_mid @ C. Geometry NOT captured by exponents: + # pairwise CLV angles (Oseledets-splitting tangency). + with torch.no_grad(): + if Q_mid is None: + Q_mid = Q # fallback (shouldn't happen with no early halt) + g2 = torch.Generator(device=device).manual_seed(seed + 9973) + C = torch.triu(torch.randn(B, k_lyap, k_lyap, device=device, dtype=torch.float32, generator=g2)) + C = C / C.norm(dim=1, keepdim=True).clamp_min(1e-20) + n_used = 0 + for R in reversed(R_list[target_qr:]): + Rf = R.float() + # guard against tiny diagonal (rank-deficient tangent) before triangular solve + diag = Rf.diagonal(dim1=-2, dim2=-1) + Rf = Rf + torch.diag_embed(torch.where(diag.abs() < 1e-8, torch.full_like(diag, 1e-8), torch.zeros_like(diag))) + C = torch.linalg.solve_triangular(Rf, C, upper=True) + C = C / C.norm(dim=1, keepdim=True).clamp_min(1e-20) + n_used += 1 + V = torch.matmul(Q_mid.float(), C) # (B, 2D, k) covariant LVs + V = V / V.norm(dim=1, keepdim=True).clamp_min(1e-20) # unit columns (non-orthogonal) + gram = torch.matmul(V.transpose(1, 2), V).abs() # (B, k, k) |cos| between CLVs + eye = torch.eye(k_lyap, device=device).unsqueeze(0) + off = gram * (1 - eye) + clv_maxcos = off.amax(dim=(1, 2)).cpu().numpy() # tangency: max |cos| (1=degenerate) + clv_meancos = (off.sum(dim=(1, 2)) / (k_lyap * (k_lyap - 1))).cpu().numpy() + clv_minangle = torch.acos(off.amax(dim=(1, 2)).clamp(max=1 - 1e-7)).cpu().numpy() + # leading TRUE CLV localization/readout (vs the leading-GS-vector version below) + Dh0 = seq_full * hidden; pe0 = inner.puzzle_emb_len + vH = V[:, :Dh0, 0].reshape(B, seq_full, hidden) + eH = (vH ** 2).sum(-1); eL = (V[:, Dh0:, 0].reshape(B, seq_full, hidden) ** 2).sum(-1) + e = eH + eL + clv_true_hfrac = (eH.sum(-1) / e.sum(-1).clamp_min(1e-20)).cpu().numpy() + clv_true_pr_ans = (e[:, pe0:].sum(-1) ** 2 / (e[:, pe0:] ** 2).sum(-1).clamp_min(1e-20) / max(seq_full - pe0, 1)).cpu().numpy() + ro = torch.matmul(vH[:, pe0:], inner.lm_head.weight.float().t()) + clv_true_readout = (ro.flatten(1).norm(dim=1) / vH[:, pe0:].flatten(1).norm(dim=1).clamp_min(1e-20)).cpu().numpy() + + with torch.no_grad(): + preds = final_logits.argmax(dim=-1) + labels = batch["labels"].to(device) + mask = labels > 0 + exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float() + token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1) + token_acc = token_acc.cpu() + + # CLV geometry: leading columns of final Q are the leading covariant Lyapunov vectors. + # Features are DIRECTION-based (not scalar growth rates): H/L energy split, token + # participation ratio (localization), and alignment with the decode/readout direction. + with torch.no_grad(): + Dh = seq_full * hidden + n_top = min(3, k_lyap) + pe = inner.puzzle_emb_len + W = inner.lm_head.weight.float() # (vocab, hidden), bias-free + hfrac_c, pr_c, prans_c, ro_c = [], [], [], [] + for j in range(n_top): + col = Q[:, :, j].float() # (B, 2Dh), unit norm + qH = col[:, :Dh].reshape(B, seq_full, hidden) + qL = col[:, Dh:].reshape(B, seq_full, hidden) + eH = (qH ** 2).sum(-1); eL = (qL ** 2).sum(-1) # (B, seq_full) per-token energy + e = eH + eL + hfrac_c.append((eH.sum(-1) / e.sum(-1).clamp_min(1e-20)).cpu()) + pr_c.append((e.sum(-1) ** 2 / (e ** 2).sum(-1).clamp_min(1e-20) / seq_full).cpu()) + ea = e[:, pe:] + prans_c.append((ea.sum(-1) ** 2 / (ea ** 2).sum(-1).clamp_min(1e-20) / max(seq_full - pe, 1)).cpu()) + ro = torch.matmul(qH[:, pe:], W.t()) # (B, ans, vocab) readout projection + ro_c.append((ro.flatten(1).norm(dim=1) / qH[:, pe:].flatten(1).norm(dim=1).clamp_min(1e-20)).cpu()) + + return { + "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(), + "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(), + "halted_at": halted_at.cpu().numpy(), + "q_halt": torch.stack(q_halt_hist, dim=1).numpy(), + "q_continue": torch.stack(q_continue_hist, dim=1).numpy(), + "lyap_spec": lyap_spec, + "exact_correct": exact.numpy(), + "token_acc": token_acc.numpy(), + "clv_hfrac": torch.stack(hfrac_c, 1).numpy(), + "clv_pr": torch.stack(pr_c, 1).numpy(), + "clv_pr_ans": torch.stack(prans_c, 1).numpy(), + "clv_readout": torch.stack(ro_c, 1).numpy(), + "clv_maxcos": clv_maxcos, + "clv_meancos": clv_meancos, + "clv_minangle": clv_minangle, + "clv_true_hfrac": clv_true_hfrac, + "clv_true_pr_ans": clv_true_pr_ans, + "clv_true_readout": clv_true_readout, + } + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--ckpt-root", required=True) + ap.add_argument("--ckpt-name", default="step_13020") + ap.add_argument("--n-samples", type=int, default=512) + ap.add_argument("--shard-id", type=int, default=0) + ap.add_argument("--num-shards", type=int, default=1) + ap.add_argument("--batch-size", type=int, default=16) + ap.add_argument("--k-lyap", type=int, default=8) + ap.add_argument("--t-ons", type=int, default=1) + ap.add_argument("--seed", type=int, default=0) + ap.add_argument("--out", default="diag_trm.npz") + args = ap.parse_args() + + device = "cuda" + model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device) + print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, " + f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, " + f"halt_max_steps={model.inner.config.halt_max_steps}, " + f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}") + + test = load_test_samples(Path(cfg["data_paths"][0]), args.n_samples, args.shard_id, args.num_shards, args.seed) + n = len(test["inputs"]) + print(f"shard {args.shard_id}/{args.num_shards}: {n} samples") + + res = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue","lyap_spec","exact_correct","token_acc","idx"]} + t0 = time.time() + for s in range(0, n, args.batch_size): + e = min(s + args.batch_size, n) + batch = {k: test[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]} + out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s) + for k, v in out.items(): + res.setdefault(k, []).append(v) + res.setdefault("idx", []).append(test["idx"][s:e]) + ls = out["lyap_spec"] + print(f" [{e}/{n}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} " + f"λ_1={ls[:,0].mean():+.4f} λ_{args.k_lyap}={ls[:,-1].mean():+.4f}", flush=True) + + saved = {} + for k, v in res.items(): + if not v: continue + try: saved[k] = np.concatenate(v, 0) + except ValueError: saved[k] = np.stack(v, 0) + np.savez_compressed(args.out, **saved) + succ = saved["exact_correct"] > 0.5 + print(f"\nN={len(succ)} acc={succ.mean():.4f}") + print(f"{'i':>3} {'all':>10} {'succ':>10} {'fail':>10} {'Δ':>9}") + for i in range(saved["lyap_spec"].shape[1]): + li = saved["lyap_spec"][:, i] + print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} {li[~succ].mean()-li[succ].mean():+9.4f}") + + +if __name__ == "__main__": + main() diff --git a/diagnose_trm_joint_geom.py b/diagnose_trm_joint_geom.py new file mode 100644 index 0000000..f130305 --- /dev/null +++ b/diagnose_trm_joint_geom.py @@ -0,0 +1,251 @@ +"""TRM Sudoku joint Lyapunov diagnostic — TRM version of diagnose_hrm_joint.py. + +Key differences from HRM: +- TRM has ONE shared L_level (H_layers config is "ignored") +- z_L update: z_L = L_level(z_L, z_H + input_embeddings) +- z_H update: z_H = L_level(z_H, z_L) ← same L_level! +- H_cycles=3, L_cycles=6 (vs HRM 2,2) + +Joint tangent block structure: +- L step: v_L_new = J · (v_L + v_H), v_H_new = v_H, J at (z_L + z_H + ie) +- H step: v_H_new = J' · (v_H + v_L), v_L_new = v_L, J' at (z_H + z_L) +J and J' share weights but evaluated at different points. +""" +from __future__ import annotations +import sys, os, yaml, math, argparse, json, time +from pathlib import Path +import numpy as np +import torch + +TRM_DIR = Path("/home/yurenh2/rrm/trm") +sys.path.insert(0, str(TRM_DIR)) + +from models.recursive_reasoning.trm import TinyRecursiveReasoningModel_ACTV1 + + +def load_model(ckpt_root: Path, ckpt_name: str, device: str): + cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text()) + arch_cfg = dict(cfg["arch"]) + train_meta = json.loads((Path(cfg["data_paths"][0]) / "train" / "dataset.json").read_text()) + arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"], + vocab_size=train_meta["vocab_size"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"]) + model = TinyRecursiveReasoningModel_ACTV1(arch_cfg) + sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()} + missing, unexpected = model.load_state_dict(stripped, strict=False) + print(f"[load] missing={len(missing)} unexpected={len(unexpected)}") + if missing[:3]: print(f" sample missing: {missing[:3]}") + if unexpected[:3]: print(f" sample unexpected: {unexpected[:3]}") + model.to(device).eval() + return model, cfg, train_meta + + +def load_test_samples(data_path, n_total, shard_id, num_shards, seed): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + all_idx = rng.choice(len(inputs), size=n_total, replace=False) + shard_size = (n_total + num_shards - 1) // num_shards + s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total) + idx = all_idx[s:e] + return { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)), + "idx": idx, + } + + +def jvp_through(f, x, v): + return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False) + + +def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed): + inner = model.inner + cfg = inner.config + B = batch["inputs"].shape[0] + seq_full = cfg.seq_len + inner.puzzle_emb_len + hidden = cfg.hidden_size + D = seq_full * hidden + + z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None) + input_embeddings = inner._input_embeddings(batch["inputs"].to(device), + batch["puzzle_identifiers"].to(device)) + + g = torch.Generator(device=device).manual_seed(seed) + Q0 = torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g) + Q, _ = torch.linalg.qr(Q0) + log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32) + n_lyap_steps = 0 + step_counter = 0 + + drift_zH_per_step, drift_zL_per_step = [], [] + halted_at = torch.zeros(B, dtype=torch.long, device=device) + q_halt_hist, q_continue_hist = [], [] + + for act_step in range(cfg.halt_max_steps): + z_H_prev = z_H.detach().clone() + z_L_prev = z_L.detach().clone() + + with torch.enable_grad(): + zH, zL = z_H.detach(), z_L.detach() + for _h in range(cfg.H_cycles): + # L cycles + for _l in range(cfg.L_cycles): + v_H_j = Q[:, :D, :] + v_L_j = Q[:, D:, :] + v_comb = v_H_j + v_L_j + new_v_L_cols = [] + f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zL_new, Dv = jvp_through(f_L, zL, v_i) + new_v_L_cols.append(Dv.reshape(B, D).to(torch.float32)) + new_v_L = torch.stack(new_v_L_cols, dim=-1) + Q = torch.cat([v_H_j, new_v_L], dim=1) + zL = zL_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + # H step (uses SAME L_level!) + v_H_j = Q[:, :D, :] + v_L_j = Q[:, D:, :] + v_comb = v_H_j + v_L_j + new_v_H_cols = [] + f_H = lambda z: inner.L_level(z, zL, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zH_new, Dv = jvp_through(f_H, zH, v_i) + new_v_H_cols.append(Dv.reshape(B, D).to(torch.float32)) + new_v_H = torch.stack(new_v_H_cols, dim=-1) + Q = torch.cat([new_v_H, v_L_j], dim=1) + zH = zH_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + z_H, z_L = zH, zL + + drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu()) + drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu()) + + with torch.no_grad(): + q_logits = inner.q_head(z_H[:, 0]).float() + q_halt, q_continue = q_logits[..., 0], q_logits[..., 1] + q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu()) + new_halt = (q_halt > q_continue) & (halted_at == 0) + halted_at[new_halt] = act_step + 1 + output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float() + final_logits = output + + lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() + + with torch.no_grad(): + preds = final_logits.argmax(dim=-1) + labels = batch["labels"].to(device) + mask = labels > 0 + exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float() + token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1) + token_acc = token_acc.cpu() + + # CLV geometry: leading columns of final Q are the leading covariant Lyapunov vectors. + # Features are DIRECTION-based (not scalar growth rates): H/L energy split, token + # participation ratio (localization), and alignment with the decode/readout direction. + with torch.no_grad(): + Dh = seq_full * hidden + n_top = min(3, k_lyap) + pe = inner.puzzle_emb_len + W = inner.lm_head.weight.float() # (vocab, hidden), bias-free + hfrac_c, pr_c, prans_c, ro_c = [], [], [], [] + for j in range(n_top): + col = Q[:, :, j].float() # (B, 2Dh), unit norm + qH = col[:, :Dh].reshape(B, seq_full, hidden) + qL = col[:, Dh:].reshape(B, seq_full, hidden) + eH = (qH ** 2).sum(-1); eL = (qL ** 2).sum(-1) # (B, seq_full) per-token energy + e = eH + eL + hfrac_c.append((eH.sum(-1) / e.sum(-1).clamp_min(1e-20)).cpu()) + pr_c.append((e.sum(-1) ** 2 / (e ** 2).sum(-1).clamp_min(1e-20) / seq_full).cpu()) + ea = e[:, pe:] + prans_c.append((ea.sum(-1) ** 2 / (ea ** 2).sum(-1).clamp_min(1e-20) / max(seq_full - pe, 1)).cpu()) + ro = torch.matmul(qH[:, pe:], W.t()) # (B, ans, vocab) readout projection + ro_c.append((ro.flatten(1).norm(dim=1) / qH[:, pe:].flatten(1).norm(dim=1).clamp_min(1e-20)).cpu()) + + return { + "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(), + "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(), + "halted_at": halted_at.cpu().numpy(), + "q_halt": torch.stack(q_halt_hist, dim=1).numpy(), + "q_continue": torch.stack(q_continue_hist, dim=1).numpy(), + "lyap_spec": lyap_spec, + "exact_correct": exact.numpy(), + "token_acc": token_acc.numpy(), + "clv_hfrac": torch.stack(hfrac_c, 1).numpy(), + "clv_pr": torch.stack(pr_c, 1).numpy(), + "clv_pr_ans": torch.stack(prans_c, 1).numpy(), + "clv_readout": torch.stack(ro_c, 1).numpy(), + } + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--ckpt-root", required=True) + ap.add_argument("--ckpt-name", default="step_13020") + ap.add_argument("--n-samples", type=int, default=512) + ap.add_argument("--shard-id", type=int, default=0) + ap.add_argument("--num-shards", type=int, default=1) + ap.add_argument("--batch-size", type=int, default=16) + ap.add_argument("--k-lyap", type=int, default=8) + ap.add_argument("--t-ons", type=int, default=1) + ap.add_argument("--seed", type=int, default=0) + ap.add_argument("--out", default="diag_trm.npz") + args = ap.parse_args() + + device = "cuda" + model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device) + print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, " + f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, " + f"halt_max_steps={model.inner.config.halt_max_steps}, " + f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}") + + test = load_test_samples(Path(cfg["data_paths"][0]), args.n_samples, args.shard_id, args.num_shards, args.seed) + n = len(test["inputs"]) + print(f"shard {args.shard_id}/{args.num_shards}: {n} samples") + + res = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue","lyap_spec","exact_correct","token_acc","idx"]} + t0 = time.time() + for s in range(0, n, args.batch_size): + e = min(s + args.batch_size, n) + batch = {k: test[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]} + out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s) + for k, v in out.items(): + res.setdefault(k, []).append(v) + res.setdefault("idx", []).append(test["idx"][s:e]) + ls = out["lyap_spec"] + print(f" [{e}/{n}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} " + f"λ_1={ls[:,0].mean():+.4f} λ_{args.k_lyap}={ls[:,-1].mean():+.4f}", flush=True) + + saved = {} + for k, v in res.items(): + if not v: continue + try: saved[k] = np.concatenate(v, 0) + except ValueError: saved[k] = np.stack(v, 0) + np.savez_compressed(args.out, **saved) + succ = saved["exact_correct"] > 0.5 + print(f"\nN={len(succ)} acc={succ.mean():.4f}") + print(f"{'i':>3} {'all':>10} {'succ':>10} {'fail':>10} {'Δ':>9}") + for i in range(saved["lyap_spec"].shape[1]): + li = saved["lyap_spec"][:, i] + print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} {li[~succ].mean()-li[succ].mean():+9.4f}") + + +if __name__ == "__main__": + main() diff --git a/diagnose_trm_joint_horizon.py b/diagnose_trm_joint_horizon.py new file mode 100644 index 0000000..0ba5d71 --- /dev/null +++ b/diagnose_trm_joint_horizon.py @@ -0,0 +1,225 @@ +"""TRM Sudoku joint Lyapunov diagnostic — TRM version of diagnose_hrm_joint.py. + +Key differences from HRM: +- TRM has ONE shared L_level (H_layers config is "ignored") +- z_L update: z_L = L_level(z_L, z_H + input_embeddings) +- z_H update: z_H = L_level(z_H, z_L) ← same L_level! +- H_cycles=3, L_cycles=6 (vs HRM 2,2) + +Joint tangent block structure: +- L step: v_L_new = J · (v_L + v_H), v_H_new = v_H, J at (z_L + z_H + ie) +- H step: v_H_new = J' · (v_H + v_L), v_L_new = v_L, J' at (z_H + z_L) +J and J' share weights but evaluated at different points. +""" +from __future__ import annotations +import sys, os, yaml, math, argparse, json, time +from pathlib import Path +import numpy as np +import torch + +TRM_DIR = Path("/home/yurenh2/rrm/trm") +sys.path.insert(0, str(TRM_DIR)) + +from models.recursive_reasoning.trm import TinyRecursiveReasoningModel_ACTV1 + + +def load_model(ckpt_root: Path, ckpt_name: str, device: str): + cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text()) + arch_cfg = dict(cfg["arch"]) + train_meta = json.loads((Path(cfg["data_paths"][0]) / "train" / "dataset.json").read_text()) + arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"], + vocab_size=train_meta["vocab_size"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"]) + model = TinyRecursiveReasoningModel_ACTV1(arch_cfg) + sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()} + missing, unexpected = model.load_state_dict(stripped, strict=False) + print(f"[load] missing={len(missing)} unexpected={len(unexpected)}") + if missing[:3]: print(f" sample missing: {missing[:3]}") + if unexpected[:3]: print(f" sample unexpected: {unexpected[:3]}") + model.to(device).eval() + return model, cfg, train_meta + + +def load_test_samples(data_path, n_total, shard_id, num_shards, seed): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + all_idx = rng.choice(len(inputs), size=n_total, replace=False) + shard_size = (n_total + num_shards - 1) // num_shards + s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total) + idx = all_idx[s:e] + return { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)), + "idx": idx, + } + + +def jvp_through(f, x, v): + return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False) + + +def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed): + inner = model.inner + cfg = inner.config + B = batch["inputs"].shape[0] + seq_full = cfg.seq_len + inner.puzzle_emb_len + hidden = cfg.hidden_size + D = seq_full * hidden + + z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None) + input_embeddings = inner._input_embeddings(batch["inputs"].to(device), + batch["puzzle_identifiers"].to(device)) + + g = torch.Generator(device=device).manual_seed(seed) + Q0 = torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g) + Q, _ = torch.linalg.qr(Q0) + log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32) + n_lyap_steps = 0 + step_counter = 0 + + drift_zH_per_step, drift_zL_per_step = [], [] + halted_at = torch.zeros(B, dtype=torch.long, device=device) + q_halt_hist, q_continue_hist = [], [] + + for act_step in range(int(__import__("os").environ.get("DIAG_HORIZON", "4"))): # horizon via env + z_H_prev = z_H.detach().clone() + z_L_prev = z_L.detach().clone() + + with torch.enable_grad(): + zH, zL = z_H.detach(), z_L.detach() + for _h in range(cfg.H_cycles): + # L cycles + for _l in range(cfg.L_cycles): + v_H_j = Q[:, :D, :] + v_L_j = Q[:, D:, :] + v_comb = v_H_j + v_L_j + new_v_L_cols = [] + f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zL_new, Dv = jvp_through(f_L, zL, v_i) + new_v_L_cols.append(Dv.reshape(B, D).to(torch.float32)) + new_v_L = torch.stack(new_v_L_cols, dim=-1) + Q = torch.cat([v_H_j, new_v_L], dim=1) + zL = zL_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + # H step (uses SAME L_level!) + v_H_j = Q[:, :D, :] + v_L_j = Q[:, D:, :] + v_comb = v_H_j + v_L_j + new_v_H_cols = [] + f_H = lambda z: inner.L_level(z, zL, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zH_new, Dv = jvp_through(f_H, zH, v_i) + new_v_H_cols.append(Dv.reshape(B, D).to(torch.float32)) + new_v_H = torch.stack(new_v_H_cols, dim=-1) + Q = torch.cat([new_v_H, v_L_j], dim=1) + zH = zH_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + z_H, z_L = zH, zL + + drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu()) + drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu()) + + with torch.no_grad(): + q_logits = inner.q_head(z_H[:, 0]).float() + q_halt, q_continue = q_logits[..., 0], q_logits[..., 1] + q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu()) + new_halt = (q_halt > q_continue) & (halted_at == 0) + halted_at[new_halt] = act_step + 1 + output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float() + final_logits = output + + lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() + + with torch.no_grad(): + preds = final_logits.argmax(dim=-1) + labels = batch["labels"].to(device) + mask = labels > 0 + exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float() + token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1) + token_acc = token_acc.cpu() + + return { + "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(), + "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(), + "halted_at": halted_at.cpu().numpy(), + "q_halt": torch.stack(q_halt_hist, dim=1).numpy(), + "q_continue": torch.stack(q_continue_hist, dim=1).numpy(), + "lyap_spec": lyap_spec, + "exact_correct": exact.numpy(), + "token_acc": token_acc.numpy(), + } + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--ckpt-root", required=True) + ap.add_argument("--ckpt-name", default="step_13020") + ap.add_argument("--n-samples", type=int, default=512) + ap.add_argument("--shard-id", type=int, default=0) + ap.add_argument("--num-shards", type=int, default=1) + ap.add_argument("--batch-size", type=int, default=16) + ap.add_argument("--k-lyap", type=int, default=8) + ap.add_argument("--t-ons", type=int, default=1) + ap.add_argument("--seed", type=int, default=0) + ap.add_argument("--out", default="diag_trm.npz") + args = ap.parse_args() + + device = "cuda" + model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device) + print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, " + f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, " + f"halt_max_steps={model.inner.config.halt_max_steps}, " + f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}") + + test = load_test_samples(Path(cfg["data_paths"][0]), args.n_samples, args.shard_id, args.num_shards, args.seed) + n = len(test["inputs"]) + print(f"shard {args.shard_id}/{args.num_shards}: {n} samples") + + res = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue","lyap_spec","exact_correct","token_acc","idx"]} + t0 = time.time() + for s in range(0, n, args.batch_size): + e = min(s + args.batch_size, n) + batch = {k: test[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]} + out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s) + for k, v in out.items(): + res[k].append(v) + res["idx"].append(test["idx"][s:e]) + ls = out["lyap_spec"] + print(f" [{e}/{n}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} " + f"λ_1={ls[:,0].mean():+.4f} λ_{args.k_lyap}={ls[:,-1].mean():+.4f}", flush=True) + + saved = {} + for k, v in res.items(): + if not v: continue + try: saved[k] = np.concatenate(v, 0) + except ValueError: saved[k] = np.stack(v, 0) + np.savez_compressed(args.out, **saved) + succ = saved["exact_correct"] > 0.5 + print(f"\nN={len(succ)} acc={succ.mean():.4f}") + print(f"{'i':>3} {'all':>10} {'succ':>10} {'fail':>10} {'Δ':>9}") + for i in range(saved["lyap_spec"].shape[1]): + li = saved["lyap_spec"][:, i] + print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} {li[~succ].mean()-li[succ].mean():+9.4f}") + + +if __name__ == "__main__": + main() diff --git a/diagnose_trm_joint_maze.py b/diagnose_trm_joint_maze.py new file mode 100644 index 0000000..ab4bf73 --- /dev/null +++ b/diagnose_trm_joint_maze.py @@ -0,0 +1,227 @@ +"""TRM Sudoku joint Lyapunov diagnostic — TRM version of diagnose_hrm_joint.py. + +Key differences from HRM: +- TRM has ONE shared L_level (H_layers config is "ignored") +- z_L update: z_L = L_level(z_L, z_H + input_embeddings) +- z_H update: z_H = L_level(z_H, z_L) ← same L_level! +- H_cycles=3, L_cycles=6 (vs HRM 2,2) + +Joint tangent block structure: +- L step: v_L_new = J · (v_L + v_H), v_H_new = v_H, J at (z_L + z_H + ie) +- H step: v_H_new = J' · (v_H + v_L), v_L_new = v_L, J' at (z_H + z_L) +J and J' share weights but evaluated at different points. +""" +from __future__ import annotations +import sys, os, yaml, math, argparse, json, time +from pathlib import Path +import numpy as np +import torch +from torch.nn.attention import sdpa_kernel, SDPBackend # MAZE: force math SDP for JVP double-backward + +TRM_DIR = Path("/home/yurenh2/rrm/trm") +sys.path.insert(0, str(TRM_DIR)) + +from models.recursive_reasoning.trm import TinyRecursiveReasoningModel_ACTV1 + + +def load_model(ckpt_root: Path, ckpt_name: str, device: str): + cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text()) + arch_cfg = dict(cfg["arch"]) + train_meta = json.loads((Path(cfg["data_paths"][0]) / "train" / "dataset.json").read_text()) + arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"], + vocab_size=train_meta["vocab_size"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"]) + model = TinyRecursiveReasoningModel_ACTV1(arch_cfg) + sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()} + missing, unexpected = model.load_state_dict(stripped, strict=False) + print(f"[load] missing={len(missing)} unexpected={len(unexpected)}") + if missing[:3]: print(f" sample missing: {missing[:3]}") + if unexpected[:3]: print(f" sample unexpected: {unexpected[:3]}") + model.to(device).eval() + return model, cfg, train_meta + + +def load_test_samples(data_path, n_total, shard_id, num_shards, seed): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + all_idx = rng.choice(len(inputs), size=n_total, replace=False) + shard_size = (n_total + num_shards - 1) // num_shards + s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total) + idx = all_idx[s:e] + return { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)), + "idx": idx, + } + + +def jvp_through(f, x, v): + return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False) + + +def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed): + inner = model.inner + cfg = inner.config + B = batch["inputs"].shape[0] + seq_full = cfg.seq_len + inner.puzzle_emb_len + hidden = cfg.hidden_size + D = seq_full * hidden + + z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None) + input_embeddings = inner._input_embeddings(batch["inputs"].to(device), + batch["puzzle_identifiers"].to(device)) + + g = torch.Generator(device=device).manual_seed(seed) + Q0 = torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g) + Q, _ = torch.linalg.qr(Q0) + log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32) + n_lyap_steps = 0 + step_counter = 0 + + drift_zH_per_step, drift_zL_per_step = [], [] + halted_at = torch.zeros(B, dtype=torch.long, device=device) + q_halt_hist, q_continue_hist = [], [] + + for act_step in range(cfg.halt_max_steps): + z_H_prev = z_H.detach().clone() + z_L_prev = z_L.detach().clone() + + with torch.enable_grad(): + zH, zL = z_H.detach(), z_L.detach() + for _h in range(cfg.H_cycles): + # L cycles + for _l in range(cfg.L_cycles): + v_H_j = Q[:, :D, :] + v_L_j = Q[:, D:, :] + v_comb = v_H_j + v_L_j + new_v_L_cols = [] + f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zL_new, Dv = jvp_through(f_L, zL, v_i) + new_v_L_cols.append(Dv.reshape(B, D).to(torch.float32)) + new_v_L = torch.stack(new_v_L_cols, dim=-1) + Q = torch.cat([v_H_j, new_v_L], dim=1) + zL = zL_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + # H step (uses SAME L_level!) + v_H_j = Q[:, :D, :] + v_L_j = Q[:, D:, :] + v_comb = v_H_j + v_L_j + new_v_H_cols = [] + f_H = lambda z: inner.L_level(z, zL, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zH_new, Dv = jvp_through(f_H, zH, v_i) + new_v_H_cols.append(Dv.reshape(B, D).to(torch.float32)) + new_v_H = torch.stack(new_v_H_cols, dim=-1) + Q = torch.cat([new_v_H, v_L_j], dim=1) + zH = zH_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + z_H, z_L = zH, zL + + drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu()) + drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu()) + + with torch.no_grad(): + q_logits = inner.q_head(z_H[:, 0]).float() + q_halt, q_continue = q_logits[..., 0], q_logits[..., 1] + q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu()) + new_halt = (q_halt > q_continue) & (halted_at == 0) + halted_at[new_halt] = act_step + 1 + output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float() + final_logits = output + + lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() + + with torch.no_grad(): + preds = final_logits.argmax(dim=-1) + labels = batch["labels"].to(device) + mask = labels > 0 + exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float() + token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1) + token_acc = token_acc.cpu() + + return { + "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(), + "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(), + "halted_at": halted_at.cpu().numpy(), + "q_halt": torch.stack(q_halt_hist, dim=1).numpy(), + "q_continue": torch.stack(q_continue_hist, dim=1).numpy(), + "lyap_spec": lyap_spec, + "exact_correct": exact.numpy(), + "token_acc": token_acc.numpy(), + } + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--ckpt-root", required=True) + ap.add_argument("--ckpt-name", default="step_13020") + ap.add_argument("--n-samples", type=int, default=512) + ap.add_argument("--shard-id", type=int, default=0) + ap.add_argument("--num-shards", type=int, default=1) + ap.add_argument("--batch-size", type=int, default=16) + ap.add_argument("--k-lyap", type=int, default=8) + ap.add_argument("--t-ons", type=int, default=1) + ap.add_argument("--seed", type=int, default=0) + ap.add_argument("--out", default="diag_trm.npz") + args = ap.parse_args() + + device = "cuda" + model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device) + print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, " + f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, " + f"halt_max_steps={model.inner.config.halt_max_steps}, " + f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}") + + test = load_test_samples(Path(cfg["data_paths"][0]), args.n_samples, args.shard_id, args.num_shards, args.seed) + n = len(test["inputs"]) + print(f"shard {args.shard_id}/{args.num_shards}: {n} samples") + + res = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue","lyap_spec","exact_correct","token_acc","idx"]} + t0 = time.time() + for s in range(0, n, args.batch_size): + e = min(s + args.batch_size, n) + batch = {k: test[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]} + with sdpa_kernel(SDPBackend.MATH): # MAZE: math backend supports the JVP double-backward + out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s) + for k, v in out.items(): + res[k].append(v) + res["idx"].append(test["idx"][s:e]) + ls = out["lyap_spec"] + print(f" [{e}/{n}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} " + f"λ_1={ls[:,0].mean():+.4f} λ_{args.k_lyap}={ls[:,-1].mean():+.4f}", flush=True) + + saved = {} + for k, v in res.items(): + if not v: continue + try: saved[k] = np.concatenate(v, 0) + except ValueError: saved[k] = np.stack(v, 0) + np.savez_compressed(args.out, **saved) + succ = saved["exact_correct"] > 0.5 + print(f"\nN={len(succ)} acc={succ.mean():.4f}") + print(f"{'i':>3} {'all':>10} {'succ':>10} {'fail':>10} {'Δ':>9}") + for i in range(saved["lyap_spec"].shape[1]): + li = saved["lyap_spec"][:, i] + print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} {li[~succ].mean()-li[succ].mean():+9.4f}") + + +if __name__ == "__main__": + main() diff --git a/diagnose_trm_joint_short.py b/diagnose_trm_joint_short.py new file mode 100644 index 0000000..956afe7 --- /dev/null +++ b/diagnose_trm_joint_short.py @@ -0,0 +1,225 @@ +"""TRM Sudoku joint Lyapunov diagnostic — TRM version of diagnose_hrm_joint.py. + +Key differences from HRM: +- TRM has ONE shared L_level (H_layers config is "ignored") +- z_L update: z_L = L_level(z_L, z_H + input_embeddings) +- z_H update: z_H = L_level(z_H, z_L) ← same L_level! +- H_cycles=3, L_cycles=6 (vs HRM 2,2) + +Joint tangent block structure: +- L step: v_L_new = J · (v_L + v_H), v_H_new = v_H, J at (z_L + z_H + ie) +- H step: v_H_new = J' · (v_H + v_L), v_L_new = v_L, J' at (z_H + z_L) +J and J' share weights but evaluated at different points. +""" +from __future__ import annotations +import sys, os, yaml, math, argparse, json, time +from pathlib import Path +import numpy as np +import torch + +TRM_DIR = Path("/home/yurenh2/rrm/trm") +sys.path.insert(0, str(TRM_DIR)) + +from models.recursive_reasoning.trm import TinyRecursiveReasoningModel_ACTV1 + + +def load_model(ckpt_root: Path, ckpt_name: str, device: str): + cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text()) + arch_cfg = dict(cfg["arch"]) + train_meta = json.loads((Path(cfg["data_paths"][0]) / "train" / "dataset.json").read_text()) + arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"], + vocab_size=train_meta["vocab_size"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"]) + model = TinyRecursiveReasoningModel_ACTV1(arch_cfg) + sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()} + missing, unexpected = model.load_state_dict(stripped, strict=False) + print(f"[load] missing={len(missing)} unexpected={len(unexpected)}") + if missing[:3]: print(f" sample missing: {missing[:3]}") + if unexpected[:3]: print(f" sample unexpected: {unexpected[:3]}") + model.to(device).eval() + return model, cfg, train_meta + + +def load_test_samples(data_path, n_total, shard_id, num_shards, seed): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + all_idx = rng.choice(len(inputs), size=n_total, replace=False) + shard_size = (n_total + num_shards - 1) // num_shards + s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total) + idx = all_idx[s:e] + return { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)), + "idx": idx, + } + + +def jvp_through(f, x, v): + return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False) + + +def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed): + inner = model.inner + cfg = inner.config + B = batch["inputs"].shape[0] + seq_full = cfg.seq_len + inner.puzzle_emb_len + hidden = cfg.hidden_size + D = seq_full * hidden + + z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None) + input_embeddings = inner._input_embeddings(batch["inputs"].to(device), + batch["puzzle_identifiers"].to(device)) + + g = torch.Generator(device=device).manual_seed(seed) + Q0 = torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g) + Q, _ = torch.linalg.qr(Q0) + log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32) + n_lyap_steps = 0 + step_counter = 0 + + drift_zH_per_step, drift_zL_per_step = [], [] + halted_at = torch.zeros(B, dtype=torch.long, device=device) + q_halt_hist, q_continue_hist = [], [] + + for act_step in range(4): # SHORT: only 4 ACT steps to match train-time + z_H_prev = z_H.detach().clone() + z_L_prev = z_L.detach().clone() + + with torch.enable_grad(): + zH, zL = z_H.detach(), z_L.detach() + for _h in range(cfg.H_cycles): + # L cycles + for _l in range(cfg.L_cycles): + v_H_j = Q[:, :D, :] + v_L_j = Q[:, D:, :] + v_comb = v_H_j + v_L_j + new_v_L_cols = [] + f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zL_new, Dv = jvp_through(f_L, zL, v_i) + new_v_L_cols.append(Dv.reshape(B, D).to(torch.float32)) + new_v_L = torch.stack(new_v_L_cols, dim=-1) + Q = torch.cat([v_H_j, new_v_L], dim=1) + zL = zL_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + # H step (uses SAME L_level!) + v_H_j = Q[:, :D, :] + v_L_j = Q[:, D:, :] + v_comb = v_H_j + v_L_j + new_v_H_cols = [] + f_H = lambda z: inner.L_level(z, zL, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zH_new, Dv = jvp_through(f_H, zH, v_i) + new_v_H_cols.append(Dv.reshape(B, D).to(torch.float32)) + new_v_H = torch.stack(new_v_H_cols, dim=-1) + Q = torch.cat([new_v_H, v_L_j], dim=1) + zH = zH_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + z_H, z_L = zH, zL + + drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu()) + drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu()) + + with torch.no_grad(): + q_logits = inner.q_head(z_H[:, 0]).float() + q_halt, q_continue = q_logits[..., 0], q_logits[..., 1] + q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu()) + new_halt = (q_halt > q_continue) & (halted_at == 0) + halted_at[new_halt] = act_step + 1 + output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float() + final_logits = output + + lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() + + with torch.no_grad(): + preds = final_logits.argmax(dim=-1) + labels = batch["labels"].to(device) + mask = labels > 0 + exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float() + token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1) + token_acc = token_acc.cpu() + + return { + "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(), + "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(), + "halted_at": halted_at.cpu().numpy(), + "q_halt": torch.stack(q_halt_hist, dim=1).numpy(), + "q_continue": torch.stack(q_continue_hist, dim=1).numpy(), + "lyap_spec": lyap_spec, + "exact_correct": exact.numpy(), + "token_acc": token_acc.numpy(), + } + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--ckpt-root", required=True) + ap.add_argument("--ckpt-name", default="step_13020") + ap.add_argument("--n-samples", type=int, default=512) + ap.add_argument("--shard-id", type=int, default=0) + ap.add_argument("--num-shards", type=int, default=1) + ap.add_argument("--batch-size", type=int, default=16) + ap.add_argument("--k-lyap", type=int, default=8) + ap.add_argument("--t-ons", type=int, default=1) + ap.add_argument("--seed", type=int, default=0) + ap.add_argument("--out", default="diag_trm.npz") + args = ap.parse_args() + + device = "cuda" + model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device) + print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, " + f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, " + f"halt_max_steps={model.inner.config.halt_max_steps}, " + f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}") + + test = load_test_samples(Path(cfg["data_paths"][0]), args.n_samples, args.shard_id, args.num_shards, args.seed) + n = len(test["inputs"]) + print(f"shard {args.shard_id}/{args.num_shards}: {n} samples") + + res = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue","lyap_spec","exact_correct","token_acc","idx"]} + t0 = time.time() + for s in range(0, n, args.batch_size): + e = min(s + args.batch_size, n) + batch = {k: test[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]} + out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s) + for k, v in out.items(): + res[k].append(v) + res["idx"].append(test["idx"][s:e]) + ls = out["lyap_spec"] + print(f" [{e}/{n}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} " + f"λ_1={ls[:,0].mean():+.4f} λ_{args.k_lyap}={ls[:,-1].mean():+.4f}", flush=True) + + saved = {} + for k, v in res.items(): + if not v: continue + try: saved[k] = np.concatenate(v, 0) + except ValueError: saved[k] = np.stack(v, 0) + np.savez_compressed(args.out, **saved) + succ = saved["exact_correct"] > 0.5 + print(f"\nN={len(succ)} acc={succ.mean():.4f}") + print(f"{'i':>3} {'all':>10} {'succ':>10} {'fail':>10} {'Δ':>9}") + for i in range(saved["lyap_spec"].shape[1]): + li = saved["lyap_spec"][:, i] + print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} {li[~succ].mean()-li[succ].mean():+9.4f}") + + +if __name__ == "__main__": + main() diff --git a/directional_lyap_perturb/plots/directional_lyap_perturb_combined.csv b/directional_lyap_perturb/plots/directional_lyap_perturb_combined.csv new file mode 100644 index 0000000..0491f8c --- /dev/null +++ b/directional_lyap_perturb/plots/directional_lyap_perturb_combined.csv @@ -0,0 +1,73 @@ +label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail +trm_baseline_best,0,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8579999804496765,0.9472222328186035,0.8579999804496765,0.8579999804496765,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0 +trm_baseline_best,0,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8579999804496765,0.9472222328186035,0.8579999804496765,0.8579999804496765,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0 +trm_baseline_best,0,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8535000085830688,0.9457222819328308,0.8289999961853027,0.878000020980835,-3.885612726211548,-3.8856122493743896,0.9825174808502197,0.9650349617004395,0.07394365966320038,0.14084507524967194 +trm_baseline_best,0,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8615000247955322,0.9481049180030823,0.8240000009536743,0.8989999890327454,-3.885612726211548,-3.8856122493743896,0.9603729844093323,0.9382284283638,0.26408451795578003,0.39436620473861694 +trm_baseline_best,0,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8560000061988831,0.9467840790748596,0.8119999766349792,0.8999999761581421,-3.885612726211548,-3.8856122493743896,0.9545454382896423,0.9219114184379578,0.26056337356567383,0.3732394278049469 +trm_baseline_best,0,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8619999885559082,0.9479444622993469,0.8240000009536743,0.8999999761581421,-3.885612726211548,-3.8856122493743896,0.9586247205734253,0.939393937587738,0.27816900610923767,0.42957746982574463 +trm_baseline_best,4,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8489999771118164,0.9435185194015503,0.8489999771118164,0.8489999771118164,-1.2879749536514282,0.8905364871025085,1.0,1.0,0.0,0.0 +trm_baseline_best,4,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8510000109672546,0.944135844707489,0.8379999995231628,0.8640000224113464,-1.2879749536514282,0.8905364871025085,0.9929328560829163,0.9858657121658325,0.05298013240098953,0.09933774918317795 +trm_baseline_best,4,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8554999828338623,0.9452839493751526,0.8270000219345093,0.8840000033378601,-1.2879749536514282,0.8905364871025085,0.9734982252120972,0.9611307382583618,0.19205297529697418,0.3112582862377167 +trm_baseline_best,4,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8565000295639038,0.9460123181343079,0.828000009059906,0.8849999904632568,-1.2879749536514282,0.8905364871025085,0.9723203778266907,0.9587750434875488,0.20529800653457642,0.3178808093070984 +trm_baseline_best,4,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8560000061988831,0.9458826184272766,0.828000009059906,0.8840000033378601,-1.2879749536514282,0.8905364871025085,0.9699646830558777,0.9564192891120911,0.215231791138649,0.3245033025741577 +trm_baseline_best,4,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8550000190734863,0.9454135894775391,0.8289999961853027,0.8809999823570251,-1.2879749536514282,0.8905364871025085,0.9699646830558777,0.9552414417266846,0.20860926806926727,0.29801324009895325 +trm_baseline_best,8,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8539999723434448,0.9449505805969238,0.8539999723434448,0.8539999723434448,-1.811623215675354,1.4589354991912842,1.0,1.0,0.0,0.0 +trm_baseline_best,8,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8544999957084656,0.9452963471412659,0.8429999947547913,0.8659999966621399,-1.811623215675354,1.4589354991912842,0.993559718132019,0.9871194362640381,0.04109589010477066,0.08219178020954132 +trm_baseline_best,8,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8600000143051147,0.9476419687271118,0.8420000076293945,0.878000020980835,-1.811623215675354,1.4589354991912842,0.983021080493927,0.976580798625946,0.14041095972061157,0.22602739930152893 +trm_baseline_best,8,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8544999957084656,0.9457098245620728,0.8320000171661377,0.8769999742507935,-1.811623215675354,1.4589354991912842,0.9783372282981873,0.966042160987854,0.13013698160648346,0.21232876181602478 +trm_baseline_best,8,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8529999852180481,0.9448826909065247,0.8360000252723694,0.8700000047683716,-1.811623215675354,1.4589354991912842,0.9783372282981873,0.9695550203323364,0.11986301094293594,0.18493150174617767 +trm_baseline_best,8,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8544999957084656,0.9452345371246338,0.8379999995231628,0.8709999918937683,-1.811623215675354,1.4589354991912842,0.9800936579704285,0.9730679392814636,0.11986301094293594,0.19178082048892975 +trm_baseline_best,12,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8550000190734863,0.9453826546669006,0.8550000190734863,0.8550000190734863,-3.6991007328033447,2.8621342182159424,1.0,1.0,0.0,0.0 +trm_baseline_best,12,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8525000214576721,0.944302499294281,0.847000002861023,0.8579999804496765,-3.6991007328033447,2.8621342182159424,0.9947368502616882,0.9906432628631592,0.013793103396892548,0.027586206793785095 +trm_baseline_best,12,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8519999980926514,0.9443457126617432,0.8429999947547913,0.8610000014305115,-3.6991007328033447,2.8621342182159424,0.988304078578949,0.984795331954956,0.04827586188912392,0.08965517580509186 +trm_baseline_best,12,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8489999771118164,0.9430555105209351,0.8460000157356262,0.8519999980926514,-3.6991007328033447,2.8621342182159424,0.988304078578949,0.9871345162391663,0.027586206793785095,0.04137931019067764 +trm_baseline_best,12,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8554999828338623,0.945648193359375,0.8479999899864197,0.8629999756813049,-3.6991007328033447,2.8621342182159424,0.9888888597488403,0.9859648942947388,0.06896551698446274,0.1034482792019844 +trm_baseline_best,12,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8500000238418579,0.9435864090919495,0.8460000157356262,0.8539999723434448,-3.6991007328033447,2.8621342182159424,0.988304078578949,0.9871345162391663,0.03448275849223137,0.05517241358757019 +trm_multi4_best,0,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8930000066757202,0.9589999914169312,0.8930000066757202,0.8930000066757202,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0 +trm_multi4_best,0,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8930000066757202,0.9589999914169312,0.8930000066757202,0.8930000066757202,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0 +trm_multi4_best,0,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8970000147819519,0.960277795791626,0.8840000033378601,0.9100000262260437,-3.885612726211548,-3.8856122493743896,0.9938409924507141,0.9876819849014282,0.08878504484891891,0.15887850522994995 +trm_multi4_best,0,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8884999752044678,0.9585925340652466,0.8600000143051147,0.9169999957084656,-3.885612726211548,-3.8856122493743896,0.9591265320777893,0.9406495094299316,0.29906541109085083,0.4112149477005005 +trm_multi4_best,0,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8865000009536743,0.9574320316314697,0.8519999980926514,0.9210000038146973,-3.885612726211548,-3.8856122493743896,0.9596864581108093,0.9372900128364563,0.2757009267807007,0.4112149477005005 +trm_multi4_best,0,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8989999890327454,0.9610432386398315,0.871999979019165,0.9259999990463257,-3.885612726211548,-3.8856122493743896,0.9664053916931152,0.9462485909461975,0.336448609828949,0.420560747385025 +trm_multi4_best,4,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8840000033378601,0.9556913375854492,0.8840000033378601,0.8840000033378601,-1.7770261764526367,0.8977539539337158,1.0,1.0,0.0,0.0 +trm_multi4_best,4,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.887499988079071,0.9567221403121948,0.8769999742507935,0.8980000019073486,-1.7770261764526367,0.8977539539337158,0.9954751133918762,0.9909502267837524,0.06465516984462738,0.12068965286016464 +trm_multi4_best,4,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8884999752044678,0.9570987820625305,0.8659999966621399,0.9110000133514404,-1.7770261764526367,0.8977539539337158,0.9796379804611206,0.9683257937431335,0.19396552443504333,0.3017241358757019 +trm_multi4_best,4,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8865000009536743,0.9563456773757935,0.8610000014305115,0.9120000004768372,-1.7770261764526367,0.8977539539337158,0.9717194437980652,0.959276020526886,0.23706896603107452,0.36206895112991333 +trm_multi4_best,4,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8899999856948853,0.9573581218719482,0.8659999966621399,0.9139999747276306,-1.7770261764526367,0.8977539539337158,0.9739819169044495,0.959276020526886,0.25,0.3448275923728943 +trm_multi4_best,4,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8960000276565552,0.9607962369918823,0.8700000047683716,0.921999990940094,-1.7770261764526367,0.8977539539337158,0.9739819169044495,0.9615384340286255,0.3017241358757019,0.43103447556495667 +trm_multi4_best,8,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8889999985694885,0.9573332667350769,0.8889999985694885,0.8889999985694885,-2.587038040161133,1.1594672203063965,1.0,1.0,0.0,0.0 +trm_multi4_best,8,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8920000195503235,0.9578518271446228,0.8870000243186951,0.8970000147819519,-2.587038040161133,1.1594672203063965,0.9977502822875977,0.9955005645751953,0.045045044273138046,0.07207207381725311 +trm_multi4_best,8,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8880000114440918,0.9568086266517639,0.8709999918937683,0.9049999713897705,-2.587038040161133,1.1594672203063965,0.9842519760131836,0.9752531051635742,0.11711711436510086,0.19819819927215576 +trm_multi4_best,8,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8889999985694885,0.9574383497238159,0.8730000257492065,0.9049999713897705,-2.587038040161133,1.1594672203063965,0.9848144054412842,0.9775028228759766,0.12162162363529205,0.20720720291137695 +trm_multi4_best,8,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8895000219345093,0.957660436630249,0.871999979019165,0.9070000052452087,-2.587038040161133,1.1594672203063965,0.9859392642974854,0.9797525405883789,0.11711711436510086,0.22522522509098053 +trm_multi4_best,8,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8914999961853027,0.9589135646820068,0.8769999742507935,0.906000018119812,-2.587038040161133,1.1594672203063965,0.9848144054412842,0.9786276817321777,0.14414414763450623,0.22522522509098053 +trm_multi4_best,12,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8840000033378601,0.956098735332489,0.8840000033378601,0.8840000033378601,-4.764944553375244,2.045381546020508,1.0,1.0,0.0,0.0 +trm_multi4_best,12,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8849999904632568,0.9564691781997681,0.8820000290870667,0.8880000114440918,-4.764944553375244,2.045381546020508,0.9977375268936157,0.9966063499450684,0.0258620698004961,0.043103449046611786 +trm_multi4_best,12,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8855000138282776,0.9565123915672302,0.8809999823570251,0.8899999856948853,-4.764944553375244,2.045381546020508,0.9949095249176025,0.9943438768386841,0.0517241396009922,0.08620689809322357 +trm_multi4_best,12,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8855000138282776,0.9570556282997131,0.8799999952316284,0.890999972820282,-4.764944553375244,2.045381546020508,0.9954751133918762,0.9943438768386841,0.047413792461156845,0.08620689809322357 +trm_multi4_best,12,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8849999904632568,0.9560925960540771,0.8809999823570251,0.8889999985694885,-4.764944553375244,2.045381546020508,0.9949095249176025,0.9943438768386841,0.047413792461156845,0.07758620381355286 +trm_multi4_best,12,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8849999904632568,0.9559259414672852,0.8799999952316284,0.8899999856948853,-4.764944553375244,2.045381546020508,0.9943438768386841,0.9932126402854919,0.0517241396009922,0.08620689809322357 +trm_multi4_final,0,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8299999833106995,0.9333704113960266,0.8299999833106995,0.8299999833106995,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0 +trm_multi4_final,0,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8299999833106995,0.9333704113960266,0.8299999833106995,0.8299999833106995,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0 +trm_multi4_final,0,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8289999961853027,0.9334814548492432,0.8199999928474426,0.8379999995231628,-3.885612726211548,-3.8856122493743896,0.9927710890769958,0.9855421781539917,0.029411764815449715,0.0470588244497776 +trm_multi4_final,0,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8255000114440918,0.9313827157020569,0.7990000247955322,0.8519999980926514,-3.885612726211548,-3.8856122493743896,0.9572288990020752,0.9385542273521423,0.1823529452085495,0.24705882370471954 +trm_multi4_final,0,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8295000195503235,0.9325369596481323,0.7960000038146973,0.8629999756813049,-3.885612726211548,-3.8856122493743896,0.9590361714363098,0.9385542273521423,0.19705882668495178,0.29411765933036804 +trm_multi4_final,0,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8309999704360962,0.933006227016449,0.7990000247955322,0.8629999756813049,-3.885612726211548,-3.8856122493743896,0.9554216861724854,0.9325301051139832,0.2235294133424759,0.30000001192092896 +trm_multi4_final,4,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8370000123977661,0.9353950619697571,0.8370000123977661,0.8370000123977661,-1.9923189878463745,0.9806549549102783,1.0,1.0,0.0,0.0 +trm_multi4_final,4,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8355000019073486,0.9351357817649841,0.8240000009536743,0.847000002861023,-1.9923189878463745,0.9806549549102783,0.9910394549369812,0.9820788502693176,0.03680981695652008,0.061349693685770035 +trm_multi4_final,4,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8355000019073486,0.9351975321769714,0.8130000233650208,0.8579999804496765,-1.9923189878463745,0.9806549549102783,0.9731183052062988,0.9617682099342346,0.12883435189723969,0.20858895778656006 +trm_multi4_final,4,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8299999833106995,0.9334691166877747,0.8059999942779541,0.8539999723434448,-1.9923189878463745,0.9806549549102783,0.965352475643158,0.9498208165168762,0.1349693238735199,0.20245398581027985 +trm_multi4_final,4,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8309999704360962,0.9334567785263062,0.8069999814033508,0.8550000190734863,-1.9923189878463745,0.9806549549102783,0.9677419066429138,0.9534050226211548,0.12883435189723969,0.20245398581027985 +trm_multi4_final,4,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8330000042915344,0.933746874332428,0.8069999814033508,0.859000027179718,-1.9923189878463745,0.9806549549102783,0.9701313972473145,0.9569892287254333,0.12883435189723969,0.22085890173912048 +trm_multi4_final,8,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8299999833106995,0.9329506754875183,0.8299999833106995,0.8299999833106995,-2.597419500350952,1.466092586517334,1.0,1.0,0.0,0.0 +trm_multi4_final,8,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8299999833106995,0.9333766102790833,0.8220000267028809,0.8379999995231628,-2.597419500350952,1.466092586517334,0.9951807260513306,0.9903614521026611,0.0235294122248888,0.0470588244497776 +trm_multi4_final,8,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.828000009059906,0.9320307374000549,0.8130000233650208,0.8429999947547913,-2.597419500350952,1.466092586517334,0.9849397540092468,0.9771084189414978,0.061764705926179886,0.11176470667123795 +trm_multi4_final,8,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8314999938011169,0.9331419467926025,0.8190000057220459,0.843999981880188,-2.597419500350952,1.466092586517334,0.9849397540092468,0.9819276928901672,0.08235294371843338,0.1411764770746231 +trm_multi4_final,8,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8309999704360962,0.9327530860900879,0.8169999718666077,0.8450000286102295,-2.597419500350952,1.466092586517334,0.9819276928901672,0.9771084189414978,0.0941176488995552,0.15294118225574493 +trm_multi4_final,8,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8335000276565552,0.9352839589118958,0.8180000185966492,0.8489999771118164,-2.597419500350952,1.466092586517334,0.9855421781539917,0.9819276928901672,0.09117647260427475,0.16470588743686676 +trm_multi4_final,12,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8339999914169312,0.9340246319770813,0.8339999914169312,0.8339999914169312,-5.133573055267334,2.7817435264587402,1.0,1.0,0.0,0.0 +trm_multi4_final,12,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8335000276565552,0.9341234564781189,0.8289999961853027,0.8379999995231628,-5.133573055267334,2.7817435264587402,0.9970024228096008,0.9940047860145569,0.012048192322254181,0.024096384644508362 +trm_multi4_final,12,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8345000147819519,0.9352160692214966,0.8289999961853027,0.8399999737739563,-5.133573055267334,2.7817435264587402,0.9922062158584595,0.9892086386680603,0.04216867312788963,0.0602409653365612 +trm_multi4_final,12,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8320000171661377,0.9335123300552368,0.824999988079071,0.8389999866485596,-5.133573055267334,2.7817435264587402,0.9892086386680603,0.986810564994812,0.04216867312788963,0.07228915393352509 +trm_multi4_final,12,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8299999833106995,0.9333456754684448,0.8240000009536743,0.8360000252723694,-5.133573055267334,2.7817435264587402,0.9880095720291138,0.9856114983558655,0.03614457696676254,0.0602409653365612 +trm_multi4_final,12,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8309999704360962,0.9343765377998352,0.824999988079071,0.8370000123977661,-5.133573055267334,2.7817435264587402,0.9898081421852112,0.9880095720291138,0.03313253074884415,0.0602409653365612 diff --git a/directional_lyap_perturb/plots/final_gpu_status.txt b/directional_lyap_perturb/plots/final_gpu_status.txt new file mode 100644 index 0000000..7c03653 --- /dev/null +++ b/directional_lyap_perturb/plots/final_gpu_status.txt @@ -0,0 +1,4 @@ +0, 480, 49140, 25 +1, 13, 49140, 0 +2, 45494, 49140, 0 +3, 13, 49140, 0 diff --git a/directional_lyap_perturb/smoke_baseline.summary.csv b/directional_lyap_perturb/smoke_baseline.summary.csv new file mode 100644 index 0000000..49a997c --- /dev/null +++ b/directional_lyap_perturb/smoke_baseline.summary.csv @@ -0,0 +1,5 @@ +label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail +smoke_baseline,0,0.0,16,4,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.875,0.9375,0.979938268661499,0.9375,0.9375,-3.8856122493743896,-3.8856122493743896,1.0,1.0,0.5,0.5 +smoke_baseline,0,0.03,16,4,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.875,0.875,0.9475308656692505,0.8125,0.9375,-3.8856122493743896,-3.8856122493743896,0.9642857313156128,0.9285714030265808,0.25,0.5 +smoke_baseline,8,0.0,16,4,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,1.0,1.0,1.0,1.0,1.0,-5.557522296905518,1.0598664283752441,1.0,1.0,nan,nan +smoke_baseline,8,0.03,16,4,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,1.0,0.9375,0.9764660596847534,0.9375,0.9375,-5.557522296905518,1.0598664283752441,0.9375,0.9375,nan,nan diff --git a/directional_lyap_perturb/smoke_baseline_fixedclean.summary.csv b/directional_lyap_perturb/smoke_baseline_fixedclean.summary.csv new file mode 100644 index 0000000..26ecd07 --- /dev/null +++ b/directional_lyap_perturb/smoke_baseline_fixedclean.summary.csv @@ -0,0 +1,5 @@ +label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail +smoke_baseline,0,0.0,16,4,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.9375,0.9375,0.979938268661499,0.9375,0.9375,-3.8856122493743896,-3.8856122493743896,1.0,1.0,0.0,0.0 +smoke_baseline,0,0.03,16,4,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.9375,0.875,0.9475308656692505,0.8125,0.9375,-3.8856122493743896,-3.8856122493743896,0.9333333373069763,0.8666666746139526,0.0,0.0 +smoke_baseline,8,0.0,16,4,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,1.0,1.0,1.0,1.0,1.0,-5.557522296905518,1.0598664283752441,1.0,1.0,nan,nan +smoke_baseline,8,0.03,16,4,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,1.0,0.9375,0.9764660596847534,0.9375,0.9375,-5.557522296905518,1.0598664283752441,0.9375,0.9375,nan,nan diff --git a/directional_lyap_perturb/trm_baseline_best_step58590_n1000_c8_dirlyap.summary.csv b/directional_lyap_perturb/trm_baseline_best_step58590_n1000_c8_dirlyap.summary.csv new file mode 100644 index 0000000..5e1db0a --- /dev/null +++ b/directional_lyap_perturb/trm_baseline_best_step58590_n1000_c8_dirlyap.summary.csv @@ -0,0 +1,25 @@ +label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail +trm_baseline_best,0,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8579999804496765,0.9472222328186035,0.8579999804496765,0.8579999804496765,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0 +trm_baseline_best,0,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8579999804496765,0.9472222328186035,0.8579999804496765,0.8579999804496765,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0 +trm_baseline_best,0,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8535000085830688,0.9457222819328308,0.8289999961853027,0.878000020980835,-3.885612726211548,-3.8856122493743896,0.9825174808502197,0.9650349617004395,0.07394365966320038,0.14084507524967194 +trm_baseline_best,0,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8615000247955322,0.9481049180030823,0.8240000009536743,0.8989999890327454,-3.885612726211548,-3.8856122493743896,0.9603729844093323,0.9382284283638,0.26408451795578003,0.39436620473861694 +trm_baseline_best,0,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8560000061988831,0.9467840790748596,0.8119999766349792,0.8999999761581421,-3.885612726211548,-3.8856122493743896,0.9545454382896423,0.9219114184379578,0.26056337356567383,0.3732394278049469 +trm_baseline_best,0,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8579999804496765,0.8619999885559082,0.9479444622993469,0.8240000009536743,0.8999999761581421,-3.885612726211548,-3.8856122493743896,0.9586247205734253,0.939393937587738,0.27816900610923767,0.42957746982574463 +trm_baseline_best,4,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8489999771118164,0.9435185194015503,0.8489999771118164,0.8489999771118164,-1.2879749536514282,0.8905364871025085,1.0,1.0,0.0,0.0 +trm_baseline_best,4,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8510000109672546,0.944135844707489,0.8379999995231628,0.8640000224113464,-1.2879749536514282,0.8905364871025085,0.9929328560829163,0.9858657121658325,0.05298013240098953,0.09933774918317795 +trm_baseline_best,4,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8554999828338623,0.9452839493751526,0.8270000219345093,0.8840000033378601,-1.2879749536514282,0.8905364871025085,0.9734982252120972,0.9611307382583618,0.19205297529697418,0.3112582862377167 +trm_baseline_best,4,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8565000295639038,0.9460123181343079,0.828000009059906,0.8849999904632568,-1.2879749536514282,0.8905364871025085,0.9723203778266907,0.9587750434875488,0.20529800653457642,0.3178808093070984 +trm_baseline_best,4,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8560000061988831,0.9458826184272766,0.828000009059906,0.8840000033378601,-1.2879749536514282,0.8905364871025085,0.9699646830558777,0.9564192891120911,0.215231791138649,0.3245033025741577 +trm_baseline_best,4,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8489999771118164,0.8550000190734863,0.9454135894775391,0.8289999961853027,0.8809999823570251,-1.2879749536514282,0.8905364871025085,0.9699646830558777,0.9552414417266846,0.20860926806926727,0.29801324009895325 +trm_baseline_best,8,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8539999723434448,0.9449505805969238,0.8539999723434448,0.8539999723434448,-1.811623215675354,1.4589354991912842,1.0,1.0,0.0,0.0 +trm_baseline_best,8,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8544999957084656,0.9452963471412659,0.8429999947547913,0.8659999966621399,-1.811623215675354,1.4589354991912842,0.993559718132019,0.9871194362640381,0.04109589010477066,0.08219178020954132 +trm_baseline_best,8,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8600000143051147,0.9476419687271118,0.8420000076293945,0.878000020980835,-1.811623215675354,1.4589354991912842,0.983021080493927,0.976580798625946,0.14041095972061157,0.22602739930152893 +trm_baseline_best,8,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8544999957084656,0.9457098245620728,0.8320000171661377,0.8769999742507935,-1.811623215675354,1.4589354991912842,0.9783372282981873,0.966042160987854,0.13013698160648346,0.21232876181602478 +trm_baseline_best,8,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8529999852180481,0.9448826909065247,0.8360000252723694,0.8700000047683716,-1.811623215675354,1.4589354991912842,0.9783372282981873,0.9695550203323364,0.11986301094293594,0.18493150174617767 +trm_baseline_best,8,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8539999723434448,0.8544999957084656,0.9452345371246338,0.8379999995231628,0.8709999918937683,-1.811623215675354,1.4589354991912842,0.9800936579704285,0.9730679392814636,0.11986301094293594,0.19178082048892975 +trm_baseline_best,12,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8550000190734863,0.9453826546669006,0.8550000190734863,0.8550000190734863,-3.6991007328033447,2.8621342182159424,1.0,1.0,0.0,0.0 +trm_baseline_best,12,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8525000214576721,0.944302499294281,0.847000002861023,0.8579999804496765,-3.6991007328033447,2.8621342182159424,0.9947368502616882,0.9906432628631592,0.013793103396892548,0.027586206793785095 +trm_baseline_best,12,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8519999980926514,0.9443457126617432,0.8429999947547913,0.8610000014305115,-3.6991007328033447,2.8621342182159424,0.988304078578949,0.984795331954956,0.04827586188912392,0.08965517580509186 +trm_baseline_best,12,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8489999771118164,0.9430555105209351,0.8460000157356262,0.8519999980926514,-3.6991007328033447,2.8621342182159424,0.988304078578949,0.9871345162391663,0.027586206793785095,0.04137931019067764 +trm_baseline_best,12,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8554999828338623,0.945648193359375,0.8479999899864197,0.8629999756813049,-3.6991007328033447,2.8621342182159424,0.9888888597488403,0.9859648942947388,0.06896551698446274,0.1034482792019844 +trm_baseline_best,12,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8550000190734863,0.8500000238418579,0.9435864090919495,0.8460000157356262,0.8539999723434448,-3.6991007328033447,2.8621342182159424,0.988304078578949,0.9871345162391663,0.03448275849223137,0.05517241358757019 diff --git a/directional_lyap_perturb/trm_multi4_best_step35805_n1000_c8_dirlyap.summary.csv b/directional_lyap_perturb/trm_multi4_best_step35805_n1000_c8_dirlyap.summary.csv new file mode 100644 index 0000000..34bb554 --- /dev/null +++ b/directional_lyap_perturb/trm_multi4_best_step35805_n1000_c8_dirlyap.summary.csv @@ -0,0 +1,25 @@ +label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail +trm_multi4_best,0,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8930000066757202,0.9589999914169312,0.8930000066757202,0.8930000066757202,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0 +trm_multi4_best,0,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8930000066757202,0.9589999914169312,0.8930000066757202,0.8930000066757202,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0 +trm_multi4_best,0,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8970000147819519,0.960277795791626,0.8840000033378601,0.9100000262260437,-3.885612726211548,-3.8856122493743896,0.9938409924507141,0.9876819849014282,0.08878504484891891,0.15887850522994995 +trm_multi4_best,0,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8884999752044678,0.9585925340652466,0.8600000143051147,0.9169999957084656,-3.885612726211548,-3.8856122493743896,0.9591265320777893,0.9406495094299316,0.29906541109085083,0.4112149477005005 +trm_multi4_best,0,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8865000009536743,0.9574320316314697,0.8519999980926514,0.9210000038146973,-3.885612726211548,-3.8856122493743896,0.9596864581108093,0.9372900128364563,0.2757009267807007,0.4112149477005005 +trm_multi4_best,0,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8930000066757202,0.8989999890327454,0.9610432386398315,0.871999979019165,0.9259999990463257,-3.885612726211548,-3.8856122493743896,0.9664053916931152,0.9462485909461975,0.336448609828949,0.420560747385025 +trm_multi4_best,4,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8840000033378601,0.9556913375854492,0.8840000033378601,0.8840000033378601,-1.7770261764526367,0.8977539539337158,1.0,1.0,0.0,0.0 +trm_multi4_best,4,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.887499988079071,0.9567221403121948,0.8769999742507935,0.8980000019073486,-1.7770261764526367,0.8977539539337158,0.9954751133918762,0.9909502267837524,0.06465516984462738,0.12068965286016464 +trm_multi4_best,4,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8884999752044678,0.9570987820625305,0.8659999966621399,0.9110000133514404,-1.7770261764526367,0.8977539539337158,0.9796379804611206,0.9683257937431335,0.19396552443504333,0.3017241358757019 +trm_multi4_best,4,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8865000009536743,0.9563456773757935,0.8610000014305115,0.9120000004768372,-1.7770261764526367,0.8977539539337158,0.9717194437980652,0.959276020526886,0.23706896603107452,0.36206895112991333 +trm_multi4_best,4,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8899999856948853,0.9573581218719482,0.8659999966621399,0.9139999747276306,-1.7770261764526367,0.8977539539337158,0.9739819169044495,0.959276020526886,0.25,0.3448275923728943 +trm_multi4_best,4,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8960000276565552,0.9607962369918823,0.8700000047683716,0.921999990940094,-1.7770261764526367,0.8977539539337158,0.9739819169044495,0.9615384340286255,0.3017241358757019,0.43103447556495667 +trm_multi4_best,8,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8889999985694885,0.9573332667350769,0.8889999985694885,0.8889999985694885,-2.587038040161133,1.1594672203063965,1.0,1.0,0.0,0.0 +trm_multi4_best,8,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8920000195503235,0.9578518271446228,0.8870000243186951,0.8970000147819519,-2.587038040161133,1.1594672203063965,0.9977502822875977,0.9955005645751953,0.045045044273138046,0.07207207381725311 +trm_multi4_best,8,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8880000114440918,0.9568086266517639,0.8709999918937683,0.9049999713897705,-2.587038040161133,1.1594672203063965,0.9842519760131836,0.9752531051635742,0.11711711436510086,0.19819819927215576 +trm_multi4_best,8,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8889999985694885,0.9574383497238159,0.8730000257492065,0.9049999713897705,-2.587038040161133,1.1594672203063965,0.9848144054412842,0.9775028228759766,0.12162162363529205,0.20720720291137695 +trm_multi4_best,8,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8895000219345093,0.957660436630249,0.871999979019165,0.9070000052452087,-2.587038040161133,1.1594672203063965,0.9859392642974854,0.9797525405883789,0.11711711436510086,0.22522522509098053 +trm_multi4_best,8,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8889999985694885,0.8914999961853027,0.9589135646820068,0.8769999742507935,0.906000018119812,-2.587038040161133,1.1594672203063965,0.9848144054412842,0.9786276817321777,0.14414414763450623,0.22522522509098053 +trm_multi4_best,12,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8840000033378601,0.956098735332489,0.8840000033378601,0.8840000033378601,-4.764944553375244,2.045381546020508,1.0,1.0,0.0,0.0 +trm_multi4_best,12,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8849999904632568,0.9564691781997681,0.8820000290870667,0.8880000114440918,-4.764944553375244,2.045381546020508,0.9977375268936157,0.9966063499450684,0.0258620698004961,0.043103449046611786 +trm_multi4_best,12,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8855000138282776,0.9565123915672302,0.8809999823570251,0.8899999856948853,-4.764944553375244,2.045381546020508,0.9949095249176025,0.9943438768386841,0.0517241396009922,0.08620689809322357 +trm_multi4_best,12,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8855000138282776,0.9570556282997131,0.8799999952316284,0.890999972820282,-4.764944553375244,2.045381546020508,0.9954751133918762,0.9943438768386841,0.047413792461156845,0.08620689809322357 +trm_multi4_best,12,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8849999904632568,0.9560925960540771,0.8809999823570251,0.8889999985694885,-4.764944553375244,2.045381546020508,0.9949095249176025,0.9943438768386841,0.047413792461156845,0.07758620381355286 +trm_multi4_best,12,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8840000033378601,0.8849999904632568,0.9559259414672852,0.8799999952316284,0.8899999856948853,-4.764944553375244,2.045381546020508,0.9943438768386841,0.9932126402854919,0.0517241396009922,0.08620689809322357 diff --git a/directional_lyap_perturb/trm_multi4_final_step65100_n1000_c8_dirlyap.summary.csv b/directional_lyap_perturb/trm_multi4_final_step65100_n1000_c8_dirlyap.summary.csv new file mode 100644 index 0000000..9df33ba --- /dev/null +++ b/directional_lyap_perturb/trm_multi4_final_step65100_n1000_c8_dirlyap.summary.csv @@ -0,0 +1,25 @@ +label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail +trm_multi4_final,0,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8299999833106995,0.9333704113960266,0.8299999833106995,0.8299999833106995,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0 +trm_multi4_final,0,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8299999833106995,0.9333704113960266,0.8299999833106995,0.8299999833106995,-3.885612726211548,-3.8856122493743896,1.0,1.0,0.0,0.0 +trm_multi4_final,0,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8289999961853027,0.9334814548492432,0.8199999928474426,0.8379999995231628,-3.885612726211548,-3.8856122493743896,0.9927710890769958,0.9855421781539917,0.029411764815449715,0.0470588244497776 +trm_multi4_final,0,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8255000114440918,0.9313827157020569,0.7990000247955322,0.8519999980926514,-3.885612726211548,-3.8856122493743896,0.9572288990020752,0.9385542273521423,0.1823529452085495,0.24705882370471954 +trm_multi4_final,0,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8295000195503235,0.9325369596481323,0.7960000038146973,0.8629999756813049,-3.885612726211548,-3.8856122493743896,0.9590361714363098,0.9385542273521423,0.19705882668495178,0.29411765933036804 +trm_multi4_final,0,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8309999704360962,0.933006227016449,0.7990000247955322,0.8629999756813049,-3.885612726211548,-3.8856122493743896,0.9554216861724854,0.9325301051139832,0.2235294133424759,0.30000001192092896 +trm_multi4_final,4,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8370000123977661,0.9353950619697571,0.8370000123977661,0.8370000123977661,-1.9923189878463745,0.9806549549102783,1.0,1.0,0.0,0.0 +trm_multi4_final,4,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8355000019073486,0.9351357817649841,0.8240000009536743,0.847000002861023,-1.9923189878463745,0.9806549549102783,0.9910394549369812,0.9820788502693176,0.03680981695652008,0.061349693685770035 +trm_multi4_final,4,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8355000019073486,0.9351975321769714,0.8130000233650208,0.8579999804496765,-1.9923189878463745,0.9806549549102783,0.9731183052062988,0.9617682099342346,0.12883435189723969,0.20858895778656006 +trm_multi4_final,4,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8299999833106995,0.9334691166877747,0.8059999942779541,0.8539999723434448,-1.9923189878463745,0.9806549549102783,0.965352475643158,0.9498208165168762,0.1349693238735199,0.20245398581027985 +trm_multi4_final,4,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8309999704360962,0.9334567785263062,0.8069999814033508,0.8550000190734863,-1.9923189878463745,0.9806549549102783,0.9677419066429138,0.9534050226211548,0.12883435189723969,0.20245398581027985 +trm_multi4_final,4,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8370000123977661,0.8330000042915344,0.933746874332428,0.8069999814033508,0.859000027179718,-1.9923189878463745,0.9806549549102783,0.9701313972473145,0.9569892287254333,0.12883435189723969,0.22085890173912048 +trm_multi4_final,8,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8299999833106995,0.9329506754875183,0.8299999833106995,0.8299999833106995,-2.597419500350952,1.466092586517334,1.0,1.0,0.0,0.0 +trm_multi4_final,8,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8299999833106995,0.9333766102790833,0.8220000267028809,0.8379999995231628,-2.597419500350952,1.466092586517334,0.9951807260513306,0.9903614521026611,0.0235294122248888,0.0470588244497776 +trm_multi4_final,8,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.828000009059906,0.9320307374000549,0.8130000233650208,0.8429999947547913,-2.597419500350952,1.466092586517334,0.9849397540092468,0.9771084189414978,0.061764705926179886,0.11176470667123795 +trm_multi4_final,8,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8314999938011169,0.9331419467926025,0.8190000057220459,0.843999981880188,-2.597419500350952,1.466092586517334,0.9849397540092468,0.9819276928901672,0.08235294371843338,0.1411764770746231 +trm_multi4_final,8,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8309999704360962,0.9327530860900879,0.8169999718666077,0.8450000286102295,-2.597419500350952,1.466092586517334,0.9819276928901672,0.9771084189414978,0.0941176488995552,0.15294118225574493 +trm_multi4_final,8,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8299999833106995,0.8335000276565552,0.9352839589118958,0.8180000185966492,0.8489999771118164,-2.597419500350952,1.466092586517334,0.9855421781539917,0.9819276928901672,0.09117647260427475,0.16470588743686676 +trm_multi4_final,12,0.0,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8339999914169312,0.9340246319770813,0.8339999914169312,0.8339999914169312,-5.133573055267334,2.7817435264587402,1.0,1.0,0.0,0.0 +trm_multi4_final,12,0.001,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8335000276565552,0.9341234564781189,0.8289999961853027,0.8379999995231628,-5.133573055267334,2.7817435264587402,0.9970024228096008,0.9940047860145569,0.012048192322254181,0.024096384644508362 +trm_multi4_final,12,0.003,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8345000147819519,0.9352160692214966,0.8289999961853027,0.8399999737739563,-5.133573055267334,2.7817435264587402,0.9922062158584595,0.9892086386680603,0.04216867312788963,0.0602409653365612 +trm_multi4_final,12,0.01,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8320000171661377,0.9335123300552368,0.824999988079071,0.8389999866485596,-5.133573055267334,2.7817435264587402,0.9892086386680603,0.986810564994812,0.04216867312788963,0.07228915393352509 +trm_multi4_final,12,0.03,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8299999833106995,0.9333456754684448,0.8240000009536743,0.8360000252723694,-5.133573055267334,2.7817435264587402,0.9880095720291138,0.9856114983558655,0.03614457696676254,0.0602409653365612 +trm_multi4_final,12,0.1,1000,8,0.001,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,0.8339999914169312,0.8309999704360962,0.9343765377998352,0.824999988079071,0.8370000123977661,-5.133573055267334,2.7817435264587402,0.9898081421852112,0.9880095720291138,0.03313253074884415,0.0602409653365612 diff --git a/directional_lyap_perturb/watch_and_plot.sh b/directional_lyap_perturb/watch_and_plot.sh new file mode 100644 index 0000000..683d1b8 --- /dev/null +++ b/directional_lyap_perturb/watch_and_plot.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT=/home/yurenh2/rrm +PY=/home/yurenh2/miniconda3/envs/rrm/bin/python +cd "${ROOT}" + +PIDS=( + research/flossing/directional_lyap_perturb/logs/trm_baseline_best_step58590_n1000_c8_dirlyap.pid + research/flossing/directional_lyap_perturb/logs/trm_multi4_best_step35805_n1000_c8_dirlyap.pid + research/flossing/directional_lyap_perturb/logs/trm_multi4_final_step65100_n1000_c8_dirlyap.pid +) + +for pf in "${PIDS[@]}"; do + pid=$(cat "${pf}") + echo "watch ${pf}: ${pid}" + while kill -0 "${pid}" 2>/dev/null; do + sleep 60 + done + echo "done ${pf}: ${pid}" +done + +"${PY}" research/flossing/plot_directional_lyap_perturb.py \ + --summaries \ + research/flossing/directional_lyap_perturb/trm_baseline_best_step58590_n1000_c8_dirlyap.summary.csv \ + research/flossing/directional_lyap_perturb/trm_multi4_best_step35805_n1000_c8_dirlyap.summary.csv \ + research/flossing/directional_lyap_perturb/trm_multi4_final_step65100_n1000_c8_dirlyap.summary.csv \ + --out-dir research/flossing/directional_lyap_perturb/plots \ + --slice-sigma 0.03 + +nvidia-smi --query-gpu=index,memory.used,memory.total,utilization.gpu --format=csv,noheader,nounits \ + > research/flossing/directional_lyap_perturb/plots/final_gpu_status.txt diff --git a/directional_lyap_perturb_eps_sweep/plots/directional_lyap_perturb_combined.csv b/directional_lyap_perturb_eps_sweep/plots/directional_lyap_perturb_combined.csv new file mode 100644 index 0000000..9aacd7a --- /dev/null +++ b/directional_lyap_perturb_eps_sweep/plots/directional_lyap_perturb_combined.csv @@ -0,0 +1,97 @@ +label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail +trm_baseline_best,0,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8949999809265137,0.961814820766449,0.8949999809265137,0.8949999809265137,0.5487902164459229,0.6468369364738464,1.0,1.0,0.0,0.0 +trm_baseline_best,0,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8949999809265137,0.961814820766449,0.8949999809265137,0.8949999809265137,0.5487902164459229,0.6468369364738464,1.0,1.0,0.0,0.0 +trm_baseline_best,0,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8934999704360962,0.961506187915802,0.8790000081062317,0.9079999923706055,0.5487902164459229,0.6468369364738464,0.9905027747154236,0.9821228981018066,0.06666667014360428,0.13333334028720856 +trm_baseline_best,0,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.890500009059906,0.9606789350509644,0.8539999723434448,0.9269999861717224,0.5487902164459229,0.6468369364738464,0.9631285071372986,0.9396647810935974,0.27142858505249023,0.41904762387275696 +trm_baseline_best,0,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8859999775886536,0.9576419591903687,0.8610000014305115,0.9110000133514404,0.5487902164459229,0.6468369364738464,0.9642457962036133,0.9486033320426941,0.21904762089252472,0.32380953431129456 +trm_baseline_best,0,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.890500009059906,0.9597654342651367,0.8579999804496765,0.9229999780654907,0.5487902164459229,0.6468369364738464,0.9653631448745728,0.9474860429763794,0.25238096714019775,0.4095238149166107 +trm_baseline_best,4,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8980000019073486,0.9629135727882385,0.8980000019073486,0.8980000019073486,0.6417636275291443,0.8611953854560852,1.0,1.0,0.0,0.0 +trm_baseline_best,4,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8974999785423279,0.9625309109687805,0.8920000195503235,0.902999997138977,0.6417636275291443,0.8611953854560852,0.9966592192649841,0.993318498134613,0.02450980432331562,0.04901960864663124 +trm_baseline_best,4,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8934999704360962,0.9609135389328003,0.8659999966621399,0.9210000038146973,0.6417636275291443,0.8611953854560852,0.9760578870773315,0.9587973356246948,0.1666666716337204,0.28431373834609985 +trm_baseline_best,4,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8859999775886536,0.9590432047843933,0.8629999756813049,0.9089999794960022,0.6417636275291443,0.8611953854560852,0.9660356640815735,0.948775053024292,0.18137255311012268,0.2549019753932953 +trm_baseline_best,4,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8840000033378601,0.957679033279419,0.8560000061988831,0.9120000004768372,0.6417636275291443,0.8611953854560852,0.9649220705032349,0.9476614594459534,0.1715686321258545,0.29411765933036804 +trm_baseline_best,4,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8889999985694885,0.9591296315193176,0.8629999756813049,0.9150000214576721,0.6417636275291443,0.8611953854560852,0.9671491980552673,0.9521158337593079,0.20098039507865906,0.3235294222831726 +trm_baseline_best,8,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8970000147819519,0.9637901186943054,0.8970000147819519,0.8970000147819519,0.8348738551139832,1.2223771810531616,1.0,1.0,0.0,0.0 +trm_baseline_best,8,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8970000147819519,0.9635308980941772,0.8939999938011169,0.8999999761581421,0.8348738551139832,1.2223771810531616,0.997770369052887,0.9966555237770081,0.019417475908994675,0.03883495181798935 +trm_baseline_best,8,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8964999914169312,0.9620987772941589,0.8769999742507935,0.9160000085830688,0.8348738551139832,1.2223771810531616,0.9832776188850403,0.9732441306114197,0.14077669382095337,0.24271844327449799 +trm_baseline_best,8,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8945000171661377,0.9619569182395935,0.8790000081062317,0.9100000262260437,0.8348738551139832,1.2223771810531616,0.9777034521102905,0.9732441306114197,0.16990290582180023,0.28155338764190674 +trm_baseline_best,8,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8964999914169312,0.9622469544410706,0.8809999823570251,0.9120000004768372,0.8348738551139832,1.2223771810531616,0.9821627736091614,0.9754738211631775,0.15048544108867645,0.24271844327449799 +trm_baseline_best,8,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.890500009059906,0.9603580236434937,0.875,0.906000018119812,0.8348738551139832,1.2223771810531616,0.9771460294723511,0.9676700234413147,0.13592232763767242,0.20388349890708923 +trm_baseline_best,12,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.9613826274871826,0.8930000066757202,0.8930000066757202,1.5429991483688354,2.3959367275238037,1.0,1.0,0.0,0.0 +trm_baseline_best,12,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8924999833106995,0.961481511592865,0.8920000195503235,0.8930000066757202,1.5429991483688354,2.3959367275238037,0.99944007396698,0.9988802075386047,0.0,0.0 +trm_baseline_best,12,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8914999961853027,0.9610123634338379,0.8859999775886536,0.8970000147819519,1.5429991483688354,2.3959367275238037,0.9938409924507141,0.9910414218902588,0.037383176386356354,0.06542056053876877 +trm_baseline_best,12,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8914999961853027,0.9606543183326721,0.8849999904632568,0.8980000019073486,1.5429991483688354,2.3959367275238037,0.9921612739562988,0.9899216294288635,0.05140186846256256,0.09345794469118118 +trm_baseline_best,12,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.9610803127288818,0.8849999904632568,0.9010000228881836,1.5429991483688354,2.3959367275238037,0.9927211403846741,0.9899216294288635,0.0607476644217968,0.11214952915906906 +trm_baseline_best,12,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.961530864238739,0.8859999775886536,0.8999999761581421,1.5429991483688354,2.3959367275238037,0.9938409924507141,0.9910414218902588,0.05140186846256256,0.09345794469118118 +trm_multi4_best,0,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8899999856948853,0.9582962989807129,0.8899999856948853,0.8899999856948853,0.5268718004226685,0.6456454992294312,1.0,1.0,0.0,0.0 +trm_multi4_best,0,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8899999856948853,0.9582962989807129,0.8899999856948853,0.8899999856948853,0.5268718004226685,0.6456454992294312,1.0,1.0,0.0,0.0 +trm_multi4_best,0,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8955000042915344,0.9605987668037415,0.8870000243186951,0.9039999842643738,0.5268718004226685,0.6456454992294312,0.9960674047470093,0.9932584166526794,0.08181817829608917,0.13636364042758942 +trm_multi4_best,0,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9075000286102295,0.9647963047027588,0.8820000290870667,0.9330000281333923,0.5268718004226685,0.6456454992294312,0.9820224642753601,0.9674157500267029,0.30454546213150024,0.41818180680274963 +trm_multi4_best,0,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8970000147819519,0.9618827104568481,0.8679999709129333,0.9259999990463257,0.5268718004226685,0.6456454992294312,0.9764044880867004,0.9606741666793823,0.2545454502105713,0.3909091055393219 +trm_multi4_best,0,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9089999794960022,0.9657654166221619,0.8790000081062317,0.9390000104904175,0.5268718004226685,0.6456454992294312,0.983146071434021,0.968539297580719,0.30909091234207153,0.4636363685131073 +trm_multi4_best,4,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.921999990940094,0.970037043094635,0.921999990940094,0.921999990940094,0.5925552845001221,0.8220179677009583,1.0,1.0,0.0,0.0 +trm_multi4_best,4,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9175000190734863,0.9687839150428772,0.9120000004768372,0.9229999780654907,0.5925552845001221,0.8220179677009583,0.9940347075462341,0.989154040813446,0.012820512987673283,0.025641025975346565 +trm_multi4_best,4,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9135000109672546,0.9665740132331848,0.8980000019073486,0.9290000200271606,0.5925552845001221,0.8220179677009583,0.9772234559059143,0.9674620628356934,0.16025641560554504,0.24358974397182465 +trm_multi4_best,4,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9110000133514404,0.9670863747596741,0.8920000195503235,0.9300000071525574,0.5925552845001221,0.8220179677009583,0.9701735377311707,0.9598698616027832,0.21153846383094788,0.3333333432674408 +trm_multi4_best,4,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9125000238418579,0.9668148159980774,0.8930000066757202,0.9319999814033508,0.5925552845001221,0.8220179677009583,0.9718004465103149,0.9566160440444946,0.21153846383094788,0.28205129504203796 +trm_multi4_best,4,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9150000214576721,0.967555582523346,0.8949999809265137,0.9350000023841858,0.5925552845001221,0.8220179677009583,0.9750542044639587,0.962039053440094,0.20512820780277252,0.3076923191547394 +trm_multi4_best,8,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9110000133514404,0.9664074182510376,0.9110000133514404,0.9110000133514404,0.8208635449409485,1.2134735584259033,1.0,1.0,0.0,0.0 +trm_multi4_best,8,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9089999794960022,0.9657962918281555,0.9079999923706055,0.9100000262260437,0.8208635449409485,1.2134735584259033,0.9972557425498962,0.9967069029808044,0.00561797758564353,0.01123595517128706 +trm_multi4_best,8,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9120000004768372,0.9669011831283569,0.8960000276565552,0.9279999732971191,0.8208635449409485,1.2134735584259033,0.9868276715278625,0.9791437983512878,0.14606741070747375,0.24719101190567017 +trm_multi4_best,8,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9120000004768372,0.966401219367981,0.8980000019073486,0.9259999990463257,0.8208635449409485,1.2134735584259033,0.9868276715278625,0.9780461192131042,0.14606741070747375,0.21348313987255096 +trm_multi4_best,8,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9120000004768372,0.9667222499847412,0.8960000276565552,0.9279999732971191,0.8208635449409485,1.2134735584259033,0.983534574508667,0.9769483804702759,0.17977528274059296,0.2921348214149475 +trm_multi4_best,8,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9129999876022339,0.9660493731498718,0.8999999761581421,0.9259999990463257,0.8208635449409485,1.2134735584259033,0.9862788319587708,0.9802414774894714,0.16292135417461395,0.24719101190567017 +trm_multi4_best,12,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9190000295639038,0.9690986275672913,0.9190000295639038,0.9190000295639038,1.5447064638137817,1.7272931337356567,1.0,1.0,0.0,0.0 +trm_multi4_best,12,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9175000190734863,0.9684197902679443,0.9150000214576721,0.9200000166893005,1.5447064638137817,1.7272931337356567,0.9972796440124512,0.9956474304199219,0.012345679104328156,0.02469135820865631 +trm_multi4_best,12,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9175000190734863,0.9685803055763245,0.9110000133514404,0.9240000247955322,1.5447064638137817,1.7272931337356567,0.9940152168273926,0.9902067184448242,0.04938271641731262,0.08641975373029709 +trm_multi4_best,12,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9154999852180481,0.9677098989486694,0.9110000133514404,0.9200000166893005,1.5447064638137817,1.7272931337356567,0.990750789642334,0.9891186356544495,0.06172839552164078,0.09876543283462524 +trm_multi4_best,12,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9175000190734863,0.9684074521064758,0.9110000133514404,0.9240000247955322,1.5447064638137817,1.7272931337356567,0.9912948608398438,0.9902067184448242,0.08024691045284271,0.14814814925193787 +trm_multi4_best,12,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9200000166893005,0.9692777991294861,0.9139999747276306,0.9259999990463257,1.5447064638137817,1.7272931337356567,0.992927074432373,0.9912948608398438,0.09259258955717087,0.14814814925193787 +trm_baseline_best,0,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8949999809265137,0.961814820766449,0.8949999809265137,0.8949999809265137,0.4821663796901703,0.5784756541252136,1.0,1.0,0.0,0.0 +trm_baseline_best,0,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8949999809265137,0.961814820766449,0.8949999809265137,0.8949999809265137,0.4821663796901703,0.5784756541252136,1.0,1.0,0.0,0.0 +trm_baseline_best,0,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8980000019073486,0.9630987644195557,0.8790000081062317,0.9169999957084656,0.4821663796901703,0.5784756541252136,0.9893854856491089,0.9798882603645325,0.1190476194024086,0.21904762089252472 +trm_baseline_best,0,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.887499988079071,0.9590184688568115,0.8600000143051147,0.9150000214576721,0.4821663796901703,0.5784756541252136,0.9653631448745728,0.9463686943054199,0.22380952537059784,0.32380953431129456 +trm_baseline_best,0,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8895000219345093,0.9602900743484497,0.8619999885559082,0.9169999957084656,0.4821663796901703,0.5784756541252136,0.9670391082763672,0.9508379697799683,0.22857142984867096,0.3523809611797333 +trm_baseline_best,0,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8930000066757202,0.961030900478363,0.8619999885559082,0.9240000247955322,0.4821663796901703,0.5784756541252136,0.9670391082763672,0.9474860429763794,0.261904776096344,0.39047619700431824 +trm_baseline_best,4,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8980000019073486,0.9629135727882385,0.8980000019073486,0.8980000019073486,0.5513403415679932,0.7693743705749512,1.0,1.0,0.0,0.0 +trm_baseline_best,4,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8955000042915344,0.9619382619857788,0.8899999856948853,0.9010000228881836,0.5513403415679932,0.7693743705749512,0.9955456852912903,0.9910913109779358,0.014705882407724857,0.029411764815449715 +trm_baseline_best,4,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8880000114440918,0.9597716331481934,0.8669999837875366,0.9089999794960022,0.5513403415679932,0.7693743705749512,0.9716035723686218,0.9599109292030334,0.15196079015731812,0.2549019753932953 +trm_baseline_best,4,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8934999704360962,0.9613086581230164,0.8700000047683716,0.9169999957084656,0.5513403415679932,0.7693743705749512,0.9738307595252991,0.9610245227813721,0.18627451360225677,0.30392158031463623 +trm_baseline_best,4,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.890999972820282,0.9601975083351135,0.8650000095367432,0.9169999957084656,0.5513403415679932,0.7693743705749512,0.9688196182250977,0.9510022401809692,0.20588235557079315,0.30392158031463623 +trm_baseline_best,4,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8880000114440918,0.9588024616241455,0.8640000224113464,0.9120000004768372,0.5513403415679932,0.7693743705749512,0.9699332118034363,0.955456554889679,0.1666666716337204,0.27450981736183167 +trm_baseline_best,8,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8970000147819519,0.9637901186943054,0.8970000147819519,0.8970000147819519,0.6981183886528015,1.082965612411499,1.0,1.0,0.0,0.0 +trm_baseline_best,8,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8974999785423279,0.963746964931488,0.8939999938011169,0.9010000228881836,0.6981183886528015,1.082965612411499,0.9983277320861816,0.9966555237770081,0.019417475908994675,0.03883495181798935 +trm_baseline_best,8,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8934999704360962,0.9613950848579407,0.8769999742507935,0.9100000262260437,0.6981183886528015,1.082965612411499,0.9793757200241089,0.9698996543884277,0.1456310749053955,0.223300963640213 +trm_baseline_best,8,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8914999961853027,0.9603024125099182,0.8759999871253967,0.9070000052452087,0.6981183886528015,1.082965612411499,0.9788182973861694,0.9710144996643066,0.13106796145439148,0.21359223127365112 +trm_baseline_best,8,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8985000252723694,0.9633148312568665,0.8820000290870667,0.9150000214576721,0.6981183886528015,1.082965612411499,0.9816053509712219,0.9743589758872986,0.17475728690624237,0.27184465527534485 +trm_baseline_best,8,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8920000195503235,0.9607345461845398,0.8790000081062317,0.9049999713897705,0.6981183886528015,1.082965612411499,0.97826087474823,0.9698996543884277,0.14077669382095337,0.19417475163936615 +trm_baseline_best,12,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.9613826274871826,0.8930000066757202,0.8930000066757202,1.2681450843811035,2.1185879707336426,1.0,1.0,0.0,0.0 +trm_baseline_best,12,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8914999961853027,0.9607530236244202,0.8899999856948853,0.8930000066757202,1.2681450843811035,2.1185879707336426,0.9983202815055847,0.9966405630111694,0.0,0.0 +trm_baseline_best,12,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.9613209962844849,0.8880000114440918,0.8980000019073486,1.2681450843811035,2.1185879707336426,0.9955207109451294,0.9921612739562988,0.037383176386356354,0.05607476457953453 +trm_baseline_best,12,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8899999856948853,0.9602900743484497,0.8849999904632568,0.8949999809265137,1.2681450843811035,2.1185879707336426,0.9904814958572388,0.9888017773628235,0.05140186846256256,0.08411215245723724 +trm_baseline_best,12,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.890999972820282,0.9605308175086975,0.8870000243186951,0.8949999809265137,1.2681450843811035,2.1185879707336426,0.9921612739562988,0.9910414218902588,0.04672897234559059,0.07476635277271271 +trm_baseline_best,12,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8895000219345093,0.9602345824241638,0.8840000033378601,0.8949999809265137,1.2681450843811035,2.1185879707336426,0.9921612739562988,0.9899216294288635,0.032710280269384384,0.06542056053876877 +trm_multi4_best,0,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8899999856948853,0.9582962989807129,0.8899999856948853,0.8899999856948853,0.4601479172706604,0.5775974988937378,1.0,1.0,0.0,0.0 +trm_multi4_best,0,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8899999856948853,0.9582962989807129,0.8899999856948853,0.8899999856948853,0.4601479172706604,0.5775974988937378,1.0,1.0,0.0,0.0 +trm_multi4_best,0,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8964999914169312,0.9609135985374451,0.8889999985694885,0.9039999842643738,0.4601479172706604,0.5775974988937378,0.9977527856826782,0.9955056309700012,0.07727272808551788,0.12727272510528564 +trm_multi4_best,0,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9139999747276306,0.9676605463027954,0.8849999904632568,0.9430000185966492,0.4601479172706604,0.5775974988937378,0.9853932857513428,0.9719101190567017,0.33636364340782166,0.4909090995788574 +trm_multi4_best,0,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9104999899864197,0.9665431380271912,0.8880000114440918,0.9330000281333923,0.4601479172706604,0.5775974988937378,0.9876404404640198,0.9786517024040222,0.2863636314868927,0.41818180680274963 +trm_multi4_best,0,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9035000205039978,0.9636295437812805,0.8709999918937683,0.9359999895095825,0.4601479172706604,0.5775974988937378,0.9764044880867004,0.9606741666793823,0.3136363625526428,0.48181816935539246 +trm_multi4_best,4,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.921999990940094,0.970037043094635,0.921999990940094,0.921999990940094,0.5034276247024536,0.7669552564620972,1.0,1.0,0.0,0.0 +trm_multi4_best,4,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9204999804496765,0.9694691300392151,0.9169999957084656,0.9240000247955322,0.5034276247024536,0.7669552564620972,0.9972885251045227,0.9945769906044006,0.012820512987673283,0.025641025975346565 +trm_multi4_best,4,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9139999747276306,0.9672160148620605,0.8999999761581421,0.9279999732971191,0.5034276247024536,0.7669552564620972,0.9783080220222473,0.9674620628356934,0.1538461595773697,0.20512820780277252 +trm_multi4_best,4,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9164999723434448,0.9680370092391968,0.8980000019073486,0.9350000023841858,0.5034276247024536,0.7669552564620972,0.9761388301849365,0.9652928709983826,0.21153846383094788,0.3205128312110901 +trm_multi4_best,4,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9075000286102295,0.9649505615234375,0.8899999856948853,0.925000011920929,0.5034276247024536,0.7669552564620972,0.9690889120101929,0.9566160440444946,0.1794871836900711,0.25641027092933655 +trm_multi4_best,4,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9139999747276306,0.9669753313064575,0.8939999938011169,0.9340000152587891,0.5034276247024536,0.7669552564620972,0.97559654712677,0.9642082452774048,0.18589743971824646,0.3076923191547394 +trm_multi4_best,8,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9110000133514404,0.9664074182510376,0.9110000133514404,0.9110000133514404,0.6825441718101501,1.0761522054672241,1.0,1.0,0.0,0.0 +trm_multi4_best,8,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9125000238418579,0.9669321179389954,0.9089999794960022,0.9160000085830688,0.6825441718101501,1.0761522054672241,0.9989023208618164,0.997804582118988,0.028089888393878937,0.056179776787757874 +trm_multi4_best,8,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9139999747276306,0.9670432806015015,0.8980000019073486,0.9300000071525574,0.6825441718101501,1.0761522054672241,0.9890230298042297,0.983534574508667,0.14606741070747375,0.26966291666030884 +trm_multi4_best,8,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9160000085830688,0.9678518772125244,0.9020000100135803,0.9300000071525574,0.6825441718101501,1.0761522054672241,0.9879253506660461,0.983534574508667,0.17977528274059296,0.2921348214149475 +trm_multi4_best,8,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9125000238418579,0.9668642282485962,0.9020000100135803,0.9229999780654907,0.6825441718101501,1.0761522054672241,0.9879253506660461,0.9824368953704834,0.1404494345188141,0.20224718749523163 +trm_multi4_best,8,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9179999828338623,0.9684135317802429,0.9049999713897705,0.9309999942779541,0.6825441718101501,1.0761522054672241,0.9884741902351379,0.983534574508667,0.19662921130657196,0.2921348214149475 +trm_multi4_best,12,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9190000295639038,0.9690986275672913,0.9190000295639038,0.9190000295639038,1.2695486545562744,1.4881229400634766,1.0,1.0,0.0,0.0 +trm_multi4_best,12,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9175000190734863,0.9683518409729004,0.9160000085830688,0.9190000295639038,1.2695486545562744,1.4881229400634766,0.9978237152099609,0.9967355728149414,0.006172839552164078,0.012345679104328156 +trm_multi4_best,12,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9164999723434448,0.9678456783294678,0.9100000262260437,0.9229999780654907,1.2695486545562744,1.4881229400634766,0.992927074432373,0.9891186356544495,0.04938271641731262,0.08641975373029709 +trm_multi4_best,12,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9190000295639038,0.9691110849380493,0.9120000004768372,0.9259999990463257,1.2695486545562744,1.4881229400634766,0.9940152168273926,0.9912948608398438,0.06790123134851456,0.12345679104328156 +trm_multi4_best,12,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9164999723434448,0.9683271646499634,0.9120000004768372,0.9210000038146973,1.2695486545562744,1.4881229400634766,0.992927074432373,0.9912948608398438,0.04938271641731262,0.08641975373029709 +trm_multi4_best,12,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9200000166893005,0.9692345857620239,0.9139999747276306,0.9259999990463257,1.2695486545562744,1.4881229400634766,0.9934711456298828,0.9923830032348633,0.08641975373029709,0.14814814925193787 diff --git a/directional_lyap_perturb_eps_sweep/plots/final_gpu_status.txt b/directional_lyap_perturb_eps_sweep/plots/final_gpu_status.txt new file mode 100644 index 0000000..4ddfa59 --- /dev/null +++ b/directional_lyap_perturb_eps_sweep/plots/final_gpu_status.txt @@ -0,0 +1,4 @@ +0, 13, 49140, 0 +1, 618, 49140, 29 +2, 45494, 49140, 0 +3, 13, 49140, 0 diff --git a/directional_lyap_perturb_eps_sweep/trm_baseline_best_step58590_n1000_c8_fdeps001.summary.csv b/directional_lyap_perturb_eps_sweep/trm_baseline_best_step58590_n1000_c8_fdeps001.summary.csv new file mode 100644 index 0000000..fe53bc3 --- /dev/null +++ b/directional_lyap_perturb_eps_sweep/trm_baseline_best_step58590_n1000_c8_fdeps001.summary.csv @@ -0,0 +1,25 @@ +label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail +trm_baseline_best,0,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8949999809265137,0.961814820766449,0.8949999809265137,0.8949999809265137,0.5487902164459229,0.6468369364738464,1.0,1.0,0.0,0.0 +trm_baseline_best,0,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8949999809265137,0.961814820766449,0.8949999809265137,0.8949999809265137,0.5487902164459229,0.6468369364738464,1.0,1.0,0.0,0.0 +trm_baseline_best,0,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8934999704360962,0.961506187915802,0.8790000081062317,0.9079999923706055,0.5487902164459229,0.6468369364738464,0.9905027747154236,0.9821228981018066,0.06666667014360428,0.13333334028720856 +trm_baseline_best,0,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.890500009059906,0.9606789350509644,0.8539999723434448,0.9269999861717224,0.5487902164459229,0.6468369364738464,0.9631285071372986,0.9396647810935974,0.27142858505249023,0.41904762387275696 +trm_baseline_best,0,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8859999775886536,0.9576419591903687,0.8610000014305115,0.9110000133514404,0.5487902164459229,0.6468369364738464,0.9642457962036133,0.9486033320426941,0.21904762089252472,0.32380953431129456 +trm_baseline_best,0,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.890500009059906,0.9597654342651367,0.8579999804496765,0.9229999780654907,0.5487902164459229,0.6468369364738464,0.9653631448745728,0.9474860429763794,0.25238096714019775,0.4095238149166107 +trm_baseline_best,4,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8980000019073486,0.9629135727882385,0.8980000019073486,0.8980000019073486,0.6417636275291443,0.8611953854560852,1.0,1.0,0.0,0.0 +trm_baseline_best,4,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8974999785423279,0.9625309109687805,0.8920000195503235,0.902999997138977,0.6417636275291443,0.8611953854560852,0.9966592192649841,0.993318498134613,0.02450980432331562,0.04901960864663124 +trm_baseline_best,4,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8934999704360962,0.9609135389328003,0.8659999966621399,0.9210000038146973,0.6417636275291443,0.8611953854560852,0.9760578870773315,0.9587973356246948,0.1666666716337204,0.28431373834609985 +trm_baseline_best,4,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8859999775886536,0.9590432047843933,0.8629999756813049,0.9089999794960022,0.6417636275291443,0.8611953854560852,0.9660356640815735,0.948775053024292,0.18137255311012268,0.2549019753932953 +trm_baseline_best,4,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8840000033378601,0.957679033279419,0.8560000061988831,0.9120000004768372,0.6417636275291443,0.8611953854560852,0.9649220705032349,0.9476614594459534,0.1715686321258545,0.29411765933036804 +trm_baseline_best,4,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8889999985694885,0.9591296315193176,0.8629999756813049,0.9150000214576721,0.6417636275291443,0.8611953854560852,0.9671491980552673,0.9521158337593079,0.20098039507865906,0.3235294222831726 +trm_baseline_best,8,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8970000147819519,0.9637901186943054,0.8970000147819519,0.8970000147819519,0.8348738551139832,1.2223771810531616,1.0,1.0,0.0,0.0 +trm_baseline_best,8,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8970000147819519,0.9635308980941772,0.8939999938011169,0.8999999761581421,0.8348738551139832,1.2223771810531616,0.997770369052887,0.9966555237770081,0.019417475908994675,0.03883495181798935 +trm_baseline_best,8,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8964999914169312,0.9620987772941589,0.8769999742507935,0.9160000085830688,0.8348738551139832,1.2223771810531616,0.9832776188850403,0.9732441306114197,0.14077669382095337,0.24271844327449799 +trm_baseline_best,8,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8945000171661377,0.9619569182395935,0.8790000081062317,0.9100000262260437,0.8348738551139832,1.2223771810531616,0.9777034521102905,0.9732441306114197,0.16990290582180023,0.28155338764190674 +trm_baseline_best,8,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8964999914169312,0.9622469544410706,0.8809999823570251,0.9120000004768372,0.8348738551139832,1.2223771810531616,0.9821627736091614,0.9754738211631775,0.15048544108867645,0.24271844327449799 +trm_baseline_best,8,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.890500009059906,0.9603580236434937,0.875,0.906000018119812,0.8348738551139832,1.2223771810531616,0.9771460294723511,0.9676700234413147,0.13592232763767242,0.20388349890708923 +trm_baseline_best,12,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.9613826274871826,0.8930000066757202,0.8930000066757202,1.5429991483688354,2.3959367275238037,1.0,1.0,0.0,0.0 +trm_baseline_best,12,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8924999833106995,0.961481511592865,0.8920000195503235,0.8930000066757202,1.5429991483688354,2.3959367275238037,0.99944007396698,0.9988802075386047,0.0,0.0 +trm_baseline_best,12,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8914999961853027,0.9610123634338379,0.8859999775886536,0.8970000147819519,1.5429991483688354,2.3959367275238037,0.9938409924507141,0.9910414218902588,0.037383176386356354,0.06542056053876877 +trm_baseline_best,12,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8914999961853027,0.9606543183326721,0.8849999904632568,0.8980000019073486,1.5429991483688354,2.3959367275238037,0.9921612739562988,0.9899216294288635,0.05140186846256256,0.09345794469118118 +trm_baseline_best,12,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.9610803127288818,0.8849999904632568,0.9010000228881836,1.5429991483688354,2.3959367275238037,0.9927211403846741,0.9899216294288635,0.0607476644217968,0.11214952915906906 +trm_baseline_best,12,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.961530864238739,0.8859999775886536,0.8999999761581421,1.5429991483688354,2.3959367275238037,0.9938409924507141,0.9910414218902588,0.05140186846256256,0.09345794469118118 diff --git a/directional_lyap_perturb_eps_sweep/trm_baseline_best_step58590_n1000_c8_fdeps003.summary.csv b/directional_lyap_perturb_eps_sweep/trm_baseline_best_step58590_n1000_c8_fdeps003.summary.csv new file mode 100644 index 0000000..696a9e9 --- /dev/null +++ b/directional_lyap_perturb_eps_sweep/trm_baseline_best_step58590_n1000_c8_fdeps003.summary.csv @@ -0,0 +1,25 @@ +label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail +trm_baseline_best,0,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8949999809265137,0.961814820766449,0.8949999809265137,0.8949999809265137,0.4821663796901703,0.5784756541252136,1.0,1.0,0.0,0.0 +trm_baseline_best,0,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8949999809265137,0.961814820766449,0.8949999809265137,0.8949999809265137,0.4821663796901703,0.5784756541252136,1.0,1.0,0.0,0.0 +trm_baseline_best,0,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8980000019073486,0.9630987644195557,0.8790000081062317,0.9169999957084656,0.4821663796901703,0.5784756541252136,0.9893854856491089,0.9798882603645325,0.1190476194024086,0.21904762089252472 +trm_baseline_best,0,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.887499988079071,0.9590184688568115,0.8600000143051147,0.9150000214576721,0.4821663796901703,0.5784756541252136,0.9653631448745728,0.9463686943054199,0.22380952537059784,0.32380953431129456 +trm_baseline_best,0,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8895000219345093,0.9602900743484497,0.8619999885559082,0.9169999957084656,0.4821663796901703,0.5784756541252136,0.9670391082763672,0.9508379697799683,0.22857142984867096,0.3523809611797333 +trm_baseline_best,0,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8949999809265137,0.8930000066757202,0.961030900478363,0.8619999885559082,0.9240000247955322,0.4821663796901703,0.5784756541252136,0.9670391082763672,0.9474860429763794,0.261904776096344,0.39047619700431824 +trm_baseline_best,4,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8980000019073486,0.9629135727882385,0.8980000019073486,0.8980000019073486,0.5513403415679932,0.7693743705749512,1.0,1.0,0.0,0.0 +trm_baseline_best,4,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8955000042915344,0.9619382619857788,0.8899999856948853,0.9010000228881836,0.5513403415679932,0.7693743705749512,0.9955456852912903,0.9910913109779358,0.014705882407724857,0.029411764815449715 +trm_baseline_best,4,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8880000114440918,0.9597716331481934,0.8669999837875366,0.9089999794960022,0.5513403415679932,0.7693743705749512,0.9716035723686218,0.9599109292030334,0.15196079015731812,0.2549019753932953 +trm_baseline_best,4,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8934999704360962,0.9613086581230164,0.8700000047683716,0.9169999957084656,0.5513403415679932,0.7693743705749512,0.9738307595252991,0.9610245227813721,0.18627451360225677,0.30392158031463623 +trm_baseline_best,4,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.890999972820282,0.9601975083351135,0.8650000095367432,0.9169999957084656,0.5513403415679932,0.7693743705749512,0.9688196182250977,0.9510022401809692,0.20588235557079315,0.30392158031463623 +trm_baseline_best,4,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8980000019073486,0.8880000114440918,0.9588024616241455,0.8640000224113464,0.9120000004768372,0.5513403415679932,0.7693743705749512,0.9699332118034363,0.955456554889679,0.1666666716337204,0.27450981736183167 +trm_baseline_best,8,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8970000147819519,0.9637901186943054,0.8970000147819519,0.8970000147819519,0.6981183886528015,1.082965612411499,1.0,1.0,0.0,0.0 +trm_baseline_best,8,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8974999785423279,0.963746964931488,0.8939999938011169,0.9010000228881836,0.6981183886528015,1.082965612411499,0.9983277320861816,0.9966555237770081,0.019417475908994675,0.03883495181798935 +trm_baseline_best,8,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8934999704360962,0.9613950848579407,0.8769999742507935,0.9100000262260437,0.6981183886528015,1.082965612411499,0.9793757200241089,0.9698996543884277,0.1456310749053955,0.223300963640213 +trm_baseline_best,8,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8914999961853027,0.9603024125099182,0.8759999871253967,0.9070000052452087,0.6981183886528015,1.082965612411499,0.9788182973861694,0.9710144996643066,0.13106796145439148,0.21359223127365112 +trm_baseline_best,8,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8985000252723694,0.9633148312568665,0.8820000290870667,0.9150000214576721,0.6981183886528015,1.082965612411499,0.9816053509712219,0.9743589758872986,0.17475728690624237,0.27184465527534485 +trm_baseline_best,8,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8970000147819519,0.8920000195503235,0.9607345461845398,0.8790000081062317,0.9049999713897705,0.6981183886528015,1.082965612411499,0.97826087474823,0.9698996543884277,0.14077669382095337,0.19417475163936615 +trm_baseline_best,12,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.9613826274871826,0.8930000066757202,0.8930000066757202,1.2681450843811035,2.1185879707336426,1.0,1.0,0.0,0.0 +trm_baseline_best,12,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8914999961853027,0.9607530236244202,0.8899999856948853,0.8930000066757202,1.2681450843811035,2.1185879707336426,0.9983202815055847,0.9966405630111694,0.0,0.0 +trm_baseline_best,12,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8930000066757202,0.9613209962844849,0.8880000114440918,0.8980000019073486,1.2681450843811035,2.1185879707336426,0.9955207109451294,0.9921612739562988,0.037383176386356354,0.05607476457953453 +trm_baseline_best,12,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8899999856948853,0.9602900743484497,0.8849999904632568,0.8949999809265137,1.2681450843811035,2.1185879707336426,0.9904814958572388,0.9888017773628235,0.05140186846256256,0.08411215245723724 +trm_baseline_best,12,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.890999972820282,0.9605308175086975,0.8870000243186951,0.8949999809265137,1.2681450843811035,2.1185879707336426,0.9921612739562988,0.9910414218902588,0.04672897234559059,0.07476635277271271 +trm_baseline_best,12,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,0.8930000066757202,0.8895000219345093,0.9602345824241638,0.8840000033378601,0.8949999809265137,1.2681450843811035,2.1185879707336426,0.9921612739562988,0.9899216294288635,0.032710280269384384,0.06542056053876877 diff --git a/directional_lyap_perturb_eps_sweep/trm_multi4_best_step35805_n1000_c8_fdeps001.summary.csv b/directional_lyap_perturb_eps_sweep/trm_multi4_best_step35805_n1000_c8_fdeps001.summary.csv new file mode 100644 index 0000000..3cd781e --- /dev/null +++ b/directional_lyap_perturb_eps_sweep/trm_multi4_best_step35805_n1000_c8_fdeps001.summary.csv @@ -0,0 +1,25 @@ +label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail +trm_multi4_best,0,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8899999856948853,0.9582962989807129,0.8899999856948853,0.8899999856948853,0.5268718004226685,0.6456454992294312,1.0,1.0,0.0,0.0 +trm_multi4_best,0,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8899999856948853,0.9582962989807129,0.8899999856948853,0.8899999856948853,0.5268718004226685,0.6456454992294312,1.0,1.0,0.0,0.0 +trm_multi4_best,0,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8955000042915344,0.9605987668037415,0.8870000243186951,0.9039999842643738,0.5268718004226685,0.6456454992294312,0.9960674047470093,0.9932584166526794,0.08181817829608917,0.13636364042758942 +trm_multi4_best,0,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9075000286102295,0.9647963047027588,0.8820000290870667,0.9330000281333923,0.5268718004226685,0.6456454992294312,0.9820224642753601,0.9674157500267029,0.30454546213150024,0.41818180680274963 +trm_multi4_best,0,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8970000147819519,0.9618827104568481,0.8679999709129333,0.9259999990463257,0.5268718004226685,0.6456454992294312,0.9764044880867004,0.9606741666793823,0.2545454502105713,0.3909091055393219 +trm_multi4_best,0,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9089999794960022,0.9657654166221619,0.8790000081062317,0.9390000104904175,0.5268718004226685,0.6456454992294312,0.983146071434021,0.968539297580719,0.30909091234207153,0.4636363685131073 +trm_multi4_best,4,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.921999990940094,0.970037043094635,0.921999990940094,0.921999990940094,0.5925552845001221,0.8220179677009583,1.0,1.0,0.0,0.0 +trm_multi4_best,4,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9175000190734863,0.9687839150428772,0.9120000004768372,0.9229999780654907,0.5925552845001221,0.8220179677009583,0.9940347075462341,0.989154040813446,0.012820512987673283,0.025641025975346565 +trm_multi4_best,4,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9135000109672546,0.9665740132331848,0.8980000019073486,0.9290000200271606,0.5925552845001221,0.8220179677009583,0.9772234559059143,0.9674620628356934,0.16025641560554504,0.24358974397182465 +trm_multi4_best,4,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9110000133514404,0.9670863747596741,0.8920000195503235,0.9300000071525574,0.5925552845001221,0.8220179677009583,0.9701735377311707,0.9598698616027832,0.21153846383094788,0.3333333432674408 +trm_multi4_best,4,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9125000238418579,0.9668148159980774,0.8930000066757202,0.9319999814033508,0.5925552845001221,0.8220179677009583,0.9718004465103149,0.9566160440444946,0.21153846383094788,0.28205129504203796 +trm_multi4_best,4,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9150000214576721,0.967555582523346,0.8949999809265137,0.9350000023841858,0.5925552845001221,0.8220179677009583,0.9750542044639587,0.962039053440094,0.20512820780277252,0.3076923191547394 +trm_multi4_best,8,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9110000133514404,0.9664074182510376,0.9110000133514404,0.9110000133514404,0.8208635449409485,1.2134735584259033,1.0,1.0,0.0,0.0 +trm_multi4_best,8,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9089999794960022,0.9657962918281555,0.9079999923706055,0.9100000262260437,0.8208635449409485,1.2134735584259033,0.9972557425498962,0.9967069029808044,0.00561797758564353,0.01123595517128706 +trm_multi4_best,8,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9120000004768372,0.9669011831283569,0.8960000276565552,0.9279999732971191,0.8208635449409485,1.2134735584259033,0.9868276715278625,0.9791437983512878,0.14606741070747375,0.24719101190567017 +trm_multi4_best,8,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9120000004768372,0.966401219367981,0.8980000019073486,0.9259999990463257,0.8208635449409485,1.2134735584259033,0.9868276715278625,0.9780461192131042,0.14606741070747375,0.21348313987255096 +trm_multi4_best,8,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9120000004768372,0.9667222499847412,0.8960000276565552,0.9279999732971191,0.8208635449409485,1.2134735584259033,0.983534574508667,0.9769483804702759,0.17977528274059296,0.2921348214149475 +trm_multi4_best,8,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9129999876022339,0.9660493731498718,0.8999999761581421,0.9259999990463257,0.8208635449409485,1.2134735584259033,0.9862788319587708,0.9802414774894714,0.16292135417461395,0.24719101190567017 +trm_multi4_best,12,0.0,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9190000295639038,0.9690986275672913,0.9190000295639038,0.9190000295639038,1.5447064638137817,1.7272931337356567,1.0,1.0,0.0,0.0 +trm_multi4_best,12,0.001,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9175000190734863,0.9684197902679443,0.9150000214576721,0.9200000166893005,1.5447064638137817,1.7272931337356567,0.9972796440124512,0.9956474304199219,0.012345679104328156,0.02469135820865631 +trm_multi4_best,12,0.003,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9175000190734863,0.9685803055763245,0.9110000133514404,0.9240000247955322,1.5447064638137817,1.7272931337356567,0.9940152168273926,0.9902067184448242,0.04938271641731262,0.08641975373029709 +trm_multi4_best,12,0.01,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9154999852180481,0.9677098989486694,0.9110000133514404,0.9200000166893005,1.5447064638137817,1.7272931337356567,0.990750789642334,0.9891186356544495,0.06172839552164078,0.09876543283462524 +trm_multi4_best,12,0.03,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9175000190734863,0.9684074521064758,0.9110000133514404,0.9240000247955322,1.5447064638137817,1.7272931337356567,0.9912948608398438,0.9902067184448242,0.08024691045284271,0.14814814925193787 +trm_multi4_best,12,0.1,1000,8,0.01,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9200000166893005,0.9692777991294861,0.9139999747276306,0.9259999990463257,1.5447064638137817,1.7272931337356567,0.992927074432373,0.9912948608398438,0.09259258955717087,0.14814814925193787 diff --git a/directional_lyap_perturb_eps_sweep/trm_multi4_best_step35805_n1000_c8_fdeps003.summary.csv b/directional_lyap_perturb_eps_sweep/trm_multi4_best_step35805_n1000_c8_fdeps003.summary.csv new file mode 100644 index 0000000..ee59fd9 --- /dev/null +++ b/directional_lyap_perturb_eps_sweep/trm_multi4_best_step35805_n1000_c8_fdeps003.summary.csv @@ -0,0 +1,25 @@ +label,perturb_after,sigma,n_samples,candidates,fd_eps,ckpt_root,ckpt_name,clean_acc,mean_sign_exact,mean_sign_token_acc,worst_sign_exact,best_sign_exact,selected_growth_mean,selected_growth_q90,retain_mean_on_clean_success,retain_worst_on_clean_success,rescue_mean_on_clean_fail,rescue_best_on_clean_fail +trm_multi4_best,0,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8899999856948853,0.9582962989807129,0.8899999856948853,0.8899999856948853,0.4601479172706604,0.5775974988937378,1.0,1.0,0.0,0.0 +trm_multi4_best,0,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8899999856948853,0.9582962989807129,0.8899999856948853,0.8899999856948853,0.4601479172706604,0.5775974988937378,1.0,1.0,0.0,0.0 +trm_multi4_best,0,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.8964999914169312,0.9609135985374451,0.8889999985694885,0.9039999842643738,0.4601479172706604,0.5775974988937378,0.9977527856826782,0.9955056309700012,0.07727272808551788,0.12727272510528564 +trm_multi4_best,0,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9139999747276306,0.9676605463027954,0.8849999904632568,0.9430000185966492,0.4601479172706604,0.5775974988937378,0.9853932857513428,0.9719101190567017,0.33636364340782166,0.4909090995788574 +trm_multi4_best,0,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9104999899864197,0.9665431380271912,0.8880000114440918,0.9330000281333923,0.4601479172706604,0.5775974988937378,0.9876404404640198,0.9786517024040222,0.2863636314868927,0.41818180680274963 +trm_multi4_best,0,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.8899999856948853,0.9035000205039978,0.9636295437812805,0.8709999918937683,0.9359999895095825,0.4601479172706604,0.5775974988937378,0.9764044880867004,0.9606741666793823,0.3136363625526428,0.48181816935539246 +trm_multi4_best,4,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.921999990940094,0.970037043094635,0.921999990940094,0.921999990940094,0.5034276247024536,0.7669552564620972,1.0,1.0,0.0,0.0 +trm_multi4_best,4,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9204999804496765,0.9694691300392151,0.9169999957084656,0.9240000247955322,0.5034276247024536,0.7669552564620972,0.9972885251045227,0.9945769906044006,0.012820512987673283,0.025641025975346565 +trm_multi4_best,4,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9139999747276306,0.9672160148620605,0.8999999761581421,0.9279999732971191,0.5034276247024536,0.7669552564620972,0.9783080220222473,0.9674620628356934,0.1538461595773697,0.20512820780277252 +trm_multi4_best,4,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9164999723434448,0.9680370092391968,0.8980000019073486,0.9350000023841858,0.5034276247024536,0.7669552564620972,0.9761388301849365,0.9652928709983826,0.21153846383094788,0.3205128312110901 +trm_multi4_best,4,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9075000286102295,0.9649505615234375,0.8899999856948853,0.925000011920929,0.5034276247024536,0.7669552564620972,0.9690889120101929,0.9566160440444946,0.1794871836900711,0.25641027092933655 +trm_multi4_best,4,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.921999990940094,0.9139999747276306,0.9669753313064575,0.8939999938011169,0.9340000152587891,0.5034276247024536,0.7669552564620972,0.97559654712677,0.9642082452774048,0.18589743971824646,0.3076923191547394 +trm_multi4_best,8,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9110000133514404,0.9664074182510376,0.9110000133514404,0.9110000133514404,0.6825441718101501,1.0761522054672241,1.0,1.0,0.0,0.0 +trm_multi4_best,8,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9125000238418579,0.9669321179389954,0.9089999794960022,0.9160000085830688,0.6825441718101501,1.0761522054672241,0.9989023208618164,0.997804582118988,0.028089888393878937,0.056179776787757874 +trm_multi4_best,8,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9139999747276306,0.9670432806015015,0.8980000019073486,0.9300000071525574,0.6825441718101501,1.0761522054672241,0.9890230298042297,0.983534574508667,0.14606741070747375,0.26966291666030884 +trm_multi4_best,8,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9160000085830688,0.9678518772125244,0.9020000100135803,0.9300000071525574,0.6825441718101501,1.0761522054672241,0.9879253506660461,0.983534574508667,0.17977528274059296,0.2921348214149475 +trm_multi4_best,8,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9125000238418579,0.9668642282485962,0.9020000100135803,0.9229999780654907,0.6825441718101501,1.0761522054672241,0.9879253506660461,0.9824368953704834,0.1404494345188141,0.20224718749523163 +trm_multi4_best,8,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9110000133514404,0.9179999828338623,0.9684135317802429,0.9049999713897705,0.9309999942779541,0.6825441718101501,1.0761522054672241,0.9884741902351379,0.983534574508667,0.19662921130657196,0.2921348214149475 +trm_multi4_best,12,0.0,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9190000295639038,0.9690986275672913,0.9190000295639038,0.9190000295639038,1.2695486545562744,1.4881229400634766,1.0,1.0,0.0,0.0 +trm_multi4_best,12,0.001,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9175000190734863,0.9683518409729004,0.9160000085830688,0.9190000295639038,1.2695486545562744,1.4881229400634766,0.9978237152099609,0.9967355728149414,0.006172839552164078,0.012345679104328156 +trm_multi4_best,12,0.003,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9164999723434448,0.9678456783294678,0.9100000262260437,0.9229999780654907,1.2695486545562744,1.4881229400634766,0.992927074432373,0.9891186356544495,0.04938271641731262,0.08641975373029709 +trm_multi4_best,12,0.01,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9190000295639038,0.9691110849380493,0.9120000004768372,0.9259999990463257,1.2695486545562744,1.4881229400634766,0.9940152168273926,0.9912948608398438,0.06790123134851456,0.12345679104328156 +trm_multi4_best,12,0.03,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9164999723434448,0.9683271646499634,0.9120000004768372,0.9210000038146973,1.2695486545562744,1.4881229400634766,0.992927074432373,0.9912948608398438,0.04938271641731262,0.08641975373029709 +trm_multi4_best,12,0.1,1000,8,0.03,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,0.9190000295639038,0.9200000166893005,0.9692345857620239,0.9139999747276306,0.9259999990463257,1.2695486545562744,1.4881229400634766,0.9934711456298828,0.9923830032348633,0.08641975373029709,0.14814814925193787 diff --git a/directional_lyap_perturb_eps_sweep/watch_queue_and_plot.sh b/directional_lyap_perturb_eps_sweep/watch_queue_and_plot.sh new file mode 100755 index 0000000..4a44936 --- /dev/null +++ b/directional_lyap_perturb_eps_sweep/watch_queue_and_plot.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT=/home/yurenh2/rrm +PY=/home/yurenh2/miniconda3/envs/rrm/bin/python +cd "${ROOT}" + +DIR=research/flossing/directional_lyap_perturb_eps_sweep +BASE_CKPT="${ROOT}/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" +MULTI_CKPT="${ROOT}/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro" +SIGMAS='0,0.001,0.003,0.01,0.03,0.1' +AFTERS='0,4,8,12' +N=1000 +CAND=8 +BS=16 +SEED=20260608 + +wait_pid_file() { + local pf="$1" + local pid + pid=$(cat "${pf}") + echo "watch ${pf}: ${pid}" + while kill -0 "${pid}" 2>/dev/null; do + sleep 60 + done + echo "done ${pf}: ${pid}" +} + +launch_multi_eps003_on_gpu0() { + local out=trm_multi4_best_step35805_n1000_c8_fdeps003 + local log="${DIR}/logs/${out}.log" + local pidf="${DIR}/logs/${out}.pid" + setsid bash -c "cd '${ROOT}' && export CUDA_VISIBLE_DEVICES='0' PYTHONUNBUFFERED=1 && exec '${PY}' research/flossing/directional_lyap_perturb_robustness.py --ckpt-root '${MULTI_CKPT}' --ckpt-name step_35805 --label trm_multi4_best --n-samples '${N}' --batch-size '${BS}' --candidates '${CAND}' --fd-eps 0.03 --sigmas '${SIGMAS}' --perturb-afters '${AFTERS}' --seed '${SEED}' --out-prefix '${DIR}/${out}'" \ + > "${log}" 2>&1 < /dev/null & + echo $! > "${pidf}" + echo "launched ${out}: $(cat "${pidf}")" +} + +wait_pid_file "${DIR}/logs/trm_baseline_best_step58590_n1000_c8_fdeps001.pid" +launch_multi_eps003_on_gpu0 +wait_pid_file "${DIR}/logs/trm_multi4_best_step35805_n1000_c8_fdeps001.pid" +wait_pid_file "${DIR}/logs/trm_baseline_best_step58590_n1000_c8_fdeps003.pid" +wait_pid_file "${DIR}/logs/trm_multi4_best_step35805_n1000_c8_fdeps003.pid" + +"${PY}" research/flossing/plot_directional_lyap_perturb.py \ + --summaries \ + "${DIR}/trm_baseline_best_step58590_n1000_c8_fdeps001.summary.csv" \ + "${DIR}/trm_multi4_best_step35805_n1000_c8_fdeps001.summary.csv" \ + "${DIR}/trm_baseline_best_step58590_n1000_c8_fdeps003.summary.csv" \ + "${DIR}/trm_multi4_best_step35805_n1000_c8_fdeps003.summary.csv" \ + --out-dir "${DIR}/plots" \ + --slice-sigma 0.03 + +nvidia-smi --query-gpu=index,memory.used,memory.total,utilization.gpu --format=csv,noheader,nounits \ + > "${DIR}/plots/final_gpu_status.txt" diff --git a/directional_lyap_perturb_robustness.py b/directional_lyap_perturb_robustness.py new file mode 100644 index 0000000..c0a803c --- /dev/null +++ b/directional_lyap_perturb_robustness.py @@ -0,0 +1,428 @@ +"""Directional late-state perturbation using finite-difference Lyapunov search. + +At a chosen recurrent step, sample several unit tangent directions, propagate +small shadow trajectories through the remaining deterministic dynamics, choose +the direction with maximal final hidden-state expansion, then perturb along +that selected direction with +/- sigma and measure answer robustness. + +This is a practical finite-difference proxy for perturbing along a local top +Lyapunov direction without paying exact JVP costs. +""" +from __future__ import annotations + +import argparse +import csv +import json +import math +import sys +from dataclasses import replace +from pathlib import Path +from typing import Any + +import numpy as np +import torch +import yaml + + +TRM_DIR = Path("/home/yurenh2/rrm/trm") +sys.path.insert(0, str(TRM_DIR)) + +from models.recursive_reasoning.trm import ( # noqa: E402 + TinyRecursiveReasoningModel_ACTV1, + TinyRecursiveReasoningModel_ACTV1InnerCarry, +) + + +IGNORE_LABEL_ID = -100 + + +def parse_float_list(text: str) -> list[float]: + return [float(x.strip()) for x in text.split(",") if x.strip()] + + +def parse_int_list(text: str) -> list[int]: + return [int(x.strip()) for x in text.split(",") if x.strip()] + + +def load_model(ckpt_root: Path, ckpt_name: str, device: str): + cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text()) + data_path = Path(cfg.get("data_path") or cfg["data_paths"][0]) + train_meta = json.loads((data_path / "train" / "dataset.json").read_text()) + + arch_cfg = dict(cfg["arch"]) + arch_cfg.update( + batch_size=cfg["global_batch_size"], + seq_len=train_meta["seq_len"], + vocab_size=train_meta["vocab_size"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], + causal=False, + ) + + model = TinyRecursiveReasoningModel_ACTV1(arch_cfg) + state = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in state.items()} + missing, unexpected = model.load_state_dict(stripped, strict=False) + print(f"[load] {ckpt_root.name}/{ckpt_name} missing={len(missing)} unexpected={len(unexpected)}", flush=True) + model.to(device).eval() + return model, cfg, data_path + + +def load_test_samples(data_path: Path, n_samples: int, seed: int): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + puzzle_ids = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + + n = min(n_samples, len(inputs)) + idx = rng.choice(len(inputs), size=n, replace=False) + return { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(puzzle_ids[idx].astype(np.int32)), + "idx": idx, + } + + +def batch_slice(samples: dict[str, Any], start: int, end: int, device: str): + return { + k: v[start:end].to(device, non_blocking=True) + for k, v in samples.items() + if k in ("inputs", "labels", "puzzle_identifiers") + } + + +def repeat_batch(batch: dict[str, torch.Tensor], repeats: int): + if repeats == 1: + return batch + return {k: v.repeat_interleave(repeats, dim=0) for k, v in batch.items()} + + +def cat_batches(a: dict[str, torch.Tensor], b: dict[str, torch.Tensor]): + return {k: torch.cat([a[k], b[k]], dim=0) for k in a} + + +def cat_inner(a: TinyRecursiveReasoningModel_ACTV1InnerCarry, b: TinyRecursiveReasoningModel_ACTV1InnerCarry): + return TinyRecursiveReasoningModel_ACTV1InnerCarry( + z_H=torch.cat([a.z_H, b.z_H], dim=0), + z_L=torch.cat([a.z_L, b.z_L], dim=0), + ) + + +def split_inner(inner: TinyRecursiveReasoningModel_ACTV1InnerCarry, n_main: int): + return ( + TinyRecursiveReasoningModel_ACTV1InnerCarry( + z_H=inner.z_H[:n_main].contiguous(), + z_L=inner.z_L[:n_main].contiguous(), + ), + TinyRecursiveReasoningModel_ACTV1InnerCarry( + z_H=inner.z_H[n_main:].contiguous(), + z_L=inner.z_L[n_main:].contiguous(), + ), + ) + + +def correctness(logits: torch.Tensor, labels: torch.Tensor): + preds = logits.argmax(dim=-1) + mask = labels != IGNORE_LABEL_ID + exact = torch.where(mask, preds == labels, True).all(dim=-1) + denom = mask.sum(-1).clamp_min(1) + token_acc = ((preds == labels) & mask).sum(-1).float() / denom.float() + return exact, token_acc + + +def rand_unit_dirs( + inner: TinyRecursiveReasoningModel_ACTV1InnerCarry, + candidates: int, + generator: torch.Generator, +): + bsz = inner.z_H.shape[0] + h_dirs = torch.randn( + (bsz, candidates) + tuple(inner.z_H.shape[1:]), + device=inner.z_H.device, + dtype=torch.float32, + generator=generator, + ) + l_dirs = torch.randn( + (bsz, candidates) + tuple(inner.z_L.shape[1:]), + device=inner.z_L.device, + dtype=torch.float32, + generator=generator, + ) + norm = torch.sqrt(h_dirs.flatten(2).square().sum(-1) + l_dirs.flatten(2).square().sum(-1)).clamp_min(1e-30) + h_view = (bsz, candidates) + (1,) * (h_dirs.ndim - 2) + l_view = (bsz, candidates) + (1,) * (l_dirs.ndim - 2) + return (h_dirs / norm.view(h_view)).to(inner.z_H.dtype), (l_dirs / norm.view(l_view)).to(inner.z_L.dtype) + + +def make_shadow_inner( + inner: TinyRecursiveReasoningModel_ACTV1InnerCarry, + h_dirs: torch.Tensor, + l_dirs: torch.Tensor, + eps: float, +): + bsz, candidates = h_dirs.shape[:2] + z_h = inner.z_H[:, None] + eps * h_dirs + z_l = inner.z_L[:, None] + eps * l_dirs + return TinyRecursiveReasoningModel_ACTV1InnerCarry( + z_H=z_h.reshape((bsz * candidates,) + tuple(inner.z_H.shape[1:])).detach(), + z_L=z_l.reshape((bsz * candidates,) + tuple(inner.z_L.shape[1:])).detach(), + ) + + +def separation( + main: TinyRecursiveReasoningModel_ACTV1InnerCarry, + shadow: TinyRecursiveReasoningModel_ACTV1InnerCarry, + candidates: int, +): + bsz = main.z_H.shape[0] + sh = shadow.z_H.reshape((bsz, candidates) + tuple(main.z_H.shape[1:])).float() + sl = shadow.z_L.reshape((bsz, candidates) + tuple(main.z_L.shape[1:])).float() + dh = (sh - main.z_H[:, None].float()).flatten(2) + dl = (sl - main.z_L[:, None].float()).flatten(2) + return torch.sqrt(dh.square().sum(-1) + dl.square().sum(-1)).clamp_min(1e-30) + + +def gather_dirs(h_dirs: torch.Tensor, l_dirs: torch.Tensor, idx: torch.Tensor): + bsz = h_dirs.shape[0] + arange = torch.arange(bsz, device=h_dirs.device) + return h_dirs[arange, idx].contiguous(), l_dirs[arange, idx].contiguous() + + +def perturb_inner( + inner: TinyRecursiveReasoningModel_ACTV1InnerCarry, + h_dir: torch.Tensor, + l_dir: torch.Tensor, + sigma: float, + sign: float, +): + return replace( + inner, + z_H=inner.z_H + (sign * sigma) * h_dir.to(inner.z_H.dtype), + z_L=inner.z_L + (sign * sigma) * l_dir.to(inner.z_L.dtype), + ) + + +@torch.inference_mode() +def warmup_inner(model, batch: dict[str, torch.Tensor], after: int): + bsz = batch["inputs"].shape[0] + with torch.device(batch["inputs"].device): + carry = model.initial_carry(batch) + reset = torch.ones(bsz, device=batch["inputs"].device, dtype=torch.bool) + inner = model.inner.reset_carry(reset, carry.inner_carry) + logits = None + for _ in range(after): + inner, logits, _q = model.inner(inner, batch) + return inner, logits + + +@torch.inference_mode() +def search_direction( + model, + warm: TinyRecursiveReasoningModel_ACTV1InnerCarry, + batch: dict[str, torch.Tensor], + after: int, + candidates: int, + fd_eps: float, + generator: torch.Generator, +): + bsz = batch["inputs"].shape[0] + remaining = model.config.halt_max_steps - after + h_dirs, l_dirs = rand_unit_dirs(warm, candidates, generator) + main = TinyRecursiveReasoningModel_ACTV1InnerCarry(z_H=warm.z_H.detach(), z_L=warm.z_L.detach()) + shadow = make_shadow_inner(main, h_dirs, l_dirs, fd_eps) + combined = cat_inner(main, shadow) + combined_batch = cat_batches(batch, repeat_batch(batch, candidates)) + + logits = None + for _ in range(remaining): + combined, logits, _q = model.inner(combined, combined_batch) + assert logits is not None + main_final, shadow_final = split_inner(combined, bsz) + main_logits = logits[:bsz] + sep = separation(main_final, shadow_final, candidates) + best_idx = sep.argmax(dim=1) + best_sep = sep.gather(1, best_idx[:, None]).squeeze(1) + best_h, best_l = gather_dirs(h_dirs, l_dirs, best_idx) + growth = torch.log(best_sep / fd_eps).float() / max(remaining, 1) + clean_exact, clean_token = correctness(main_logits, batch["labels"]) + return best_h, best_l, growth, clean_exact, clean_token + + +@torch.inference_mode() +def eval_directional_sigma( + model, + warm: TinyRecursiveReasoningModel_ACTV1InnerCarry, + batch: dict[str, torch.Tensor], + after: int, + h_dir: torch.Tensor, + l_dir: torch.Tensor, + sigma: float, +): + remaining = model.config.halt_max_steps - after + plus = perturb_inner(warm, h_dir, l_dir, sigma, +1.0) + minus = perturb_inner(warm, h_dir, l_dir, sigma, -1.0) + inner = cat_inner(plus, minus) + combined_batch = cat_batches(batch, batch) + logits = None + for _ in range(remaining): + inner, logits, _q = model.inner(inner, combined_batch) + assert logits is not None + exact, token = correctness(logits, combined_batch["labels"]) + bsz = batch["inputs"].shape[0] + return exact.view(2, bsz).transpose(0, 1).contiguous(), token.view(2, bsz).transpose(0, 1).contiguous() + + +def summarize(exact: torch.Tensor, token: torch.Tensor, clean_exact: torch.Tensor, growth: torch.Tensor): + clean_success = clean_exact.bool() + clean_fail = ~clean_success + both = exact.all(dim=1) + either = exact.any(dim=1) + out = { + "clean_acc": clean_success.float().mean().item(), + "mean_sign_exact": exact.float().mean().item(), + "mean_sign_token_acc": token.mean().item(), + "worst_sign_exact": both.float().mean().item(), + "best_sign_exact": either.float().mean().item(), + "selected_growth_mean": growth.mean().item(), + "selected_growth_q90": torch.quantile(growth, 0.90).item(), + } + if clean_success.any().item(): + out["retain_mean_on_clean_success"] = exact[clean_success].float().mean().item() + out["retain_worst_on_clean_success"] = both[clean_success].float().mean().item() + else: + out["retain_mean_on_clean_success"] = float("nan") + out["retain_worst_on_clean_success"] = float("nan") + if clean_fail.any().item(): + out["rescue_mean_on_clean_fail"] = exact[clean_fail].float().mean().item() + out["rescue_best_on_clean_fail"] = either[clean_fail].float().mean().item() + else: + out["rescue_mean_on_clean_fail"] = float("nan") + out["rescue_best_on_clean_fail"] = float("nan") + return out + + +def write_summary(path: Path, rows: list[dict[str, Any]]) -> None: + keys = list(rows[0]) + for row in rows[1:]: + for key in row: + if key not in keys: + keys.append(key) + with path.open("w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=keys) + writer.writeheader() + writer.writerows(rows) + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--ckpt-root", required=True) + parser.add_argument("--ckpt-name", required=True) + parser.add_argument("--label", required=True) + parser.add_argument("--n-samples", type=int, default=1000) + parser.add_argument("--batch-size", type=int, default=16) + parser.add_argument("--candidates", type=int, default=8) + parser.add_argument("--fd-eps", type=float, default=1e-3) + parser.add_argument("--sigmas", default="0,0.001,0.003,0.01,0.03,0.1") + parser.add_argument("--perturb-afters", default="0,4,8,12") + parser.add_argument("--seed", type=int, default=20260607) + parser.add_argument("--out-prefix", required=True) + args = parser.parse_args() + + device = "cuda" + sigmas = parse_float_list(args.sigmas) + if not any(abs(s) <= 1e-12 for s in sigmas): + sigmas = [0.0] + sigmas + afters = parse_int_list(args.perturb_afters) + torch.manual_seed(args.seed) + generator = torch.Generator(device=device).manual_seed(args.seed + 17) + + model, cfg, data_path = load_model(Path(args.ckpt_root), args.ckpt_name, device) + samples = load_test_samples(data_path, args.n_samples, args.seed) + n = len(samples["inputs"]) + print( + f"[run] label={args.label} n={n} batch={args.batch_size} candidates={args.candidates} " + f"afters={afters} sigmas={sigmas}", + flush=True, + ) + + rows: list[dict[str, Any]] = [] + exact_store = [] + token_store = [] + growth_store = [] + for after in afters: + sigma_exact = {sigma: [] for sigma in sigmas} + sigma_token = {sigma: [] for sigma in sigmas} + growth_parts = [] + for start in range(0, n, args.batch_size): + end = min(start + args.batch_size, n) + batch = batch_slice(samples, start, end, device) + warm, _ = warmup_inner(model, batch, after) + h_dir, l_dir, growth, _search_clean_exact, _search_clean_token = search_direction( + model, warm, batch, after, args.candidates, args.fd_eps, generator + ) + growth_parts.append(growth.cpu()) + for sigma in sigmas: + exact, token = eval_directional_sigma(model, warm, batch, after, h_dir, l_dir, sigma) + sigma_exact[sigma].append(exact.cpu()) + sigma_token[sigma].append(token.cpu()) + if end == n or (end // args.batch_size) % 10 == 0: + print(f" after={after} [{end}/{n}]", flush=True) + + growth_all = torch.cat(growth_parts, dim=0) + growth_store.append(growth_all.numpy()) + exact_by_sigma = {sigma: torch.cat(sigma_exact[sigma], dim=0) for sigma in sigmas} + token_by_sigma = {sigma: torch.cat(sigma_token[sigma], dim=0) for sigma in sigmas} + clean_all = exact_by_sigma[0.0][:, 0].clone() + print(f" after={after} clean grouping done clean_acc={clean_all.float().mean().item():.4f}", flush=True) + for sigma in sigmas: + exact_all = exact_by_sigma[sigma] + token_all = token_by_sigma[sigma] + row: dict[str, Any] = { + "label": args.label, + "perturb_after": after, + "sigma": sigma, + "n_samples": n, + "candidates": args.candidates, + "fd_eps": args.fd_eps, + "ckpt_root": str(Path(args.ckpt_root)), + "ckpt_name": args.ckpt_name, + **summarize(exact_all, token_all, clean_all, growth_all), + } + rows.append(row) + exact_store.append(exact_all.numpy()) + token_store.append(token_all.numpy()) + print( + f" after={after} sigma={sigma:g} clean={row['clean_acc']:.4f} " + f"mean={row['mean_sign_exact']:.4f} worst={row['worst_sign_exact']:.4f} " + f"retain_worst={row['retain_worst_on_clean_success']:.4f} " + f"rescue_best={row['rescue_best_on_clean_fail']:.4f}", + flush=True, + ) + + out_prefix = Path(args.out_prefix) + out_prefix.parent.mkdir(parents=True, exist_ok=True) + write_summary(out_prefix.with_suffix(".summary.csv"), rows) + meta = { + "args": vars(args), + "data_path": str(data_path), + "config_global_batch_size": cfg.get("global_batch_size"), + "sigmas": sigmas, + "perturb_afters": afters, + "n_samples": n, + } + out_prefix.with_suffix(".meta.json").write_text(json.dumps(meta, indent=2, sort_keys=True)) + np.savez_compressed( + out_prefix.with_suffix(".npz"), + idx=samples["idx"], + sigmas=np.asarray(sigmas, dtype=np.float32), + perturb_afters=np.asarray(afters, dtype=np.int32), + exact=np.stack(exact_store, axis=0), + token_acc=np.stack(token_store, axis=0), + selected_growth=np.stack(growth_store, axis=0), + meta_json=np.asarray(json.dumps(meta, sort_keys=True)), + ) + print(f"[done] {out_prefix}.summary.csv", flush=True) + + +if __name__ == "__main__": + main() diff --git a/directional_multi4_eval_n1000.csv b/directional_multi4_eval_n1000.csv new file mode 100644 index 0000000..42dfad7 --- /dev/null +++ b/directional_multi4_eval_n1000.csv @@ -0,0 +1,6 @@ +step,n,seed,exact_acc,token_acc,elapsed_sec +6510,1000,20260611,0.117,0.7025432098765432,39.42588257789612 +13020,1000,20260611,0.464,0.8220246913580247,37.25507998466492 +19530,1000,20260611,0.692,0.888753086419753,37.18044900894165 +26040,1000,20260611,0.719,0.8994074074074074,37.06992506980896 +32550,1000,20260611,0.761,0.9130493827160494,37.29014587402344 diff --git a/dynamics_experiment_report.md b/dynamics_experiment_report.md new file mode 100644 index 0000000..489fb91 --- /dev/null +++ b/dynamics_experiment_report.md @@ -0,0 +1,86 @@ +# Dynamics Control Experiment Report + +Generated: 2026-05-27T20:24:05 + +## Summary Table + +| Model | Run | Status | Init | Final/Last | Delta | Best | Best Step | Vs Baseline | Evals | +|---|---|---:|---:|---:|---:|---:|---:|---:|---:| +| HRM | HRM baseline 10k | complete | 0.5176 | 0.6504 | 0.1328 | 0.6699 | 9000 | NA | 12 | +| HRM | HRM mixed volume-CF | complete | 0.5176 | 0.6562 | 0.1387 | 0.6660 | 7000 | 0.0059 | 12 | +| HRM | HRM Engelken interfloss | complete | 0.5176 | 0.6465 | 0.1289 | 0.6621 | 8000 | -0.0039 | 14 | +| HRM | HRM Engelken+KL interfloss | complete | 0.5176 | 0.6211 | 0.1035 | 0.6250 | 9000 | -0.0293 | 14 | +| HRM | HRM conservative Engelken+KL | complete | 0.5176 | 0.6367 | 0.1191 | 0.6777 | 7000 | -0.0137 | 14 | +| HRM | HRM late Engelken+KL | complete | 0.5176 | 0.6191 | 0.1016 | 0.6367 | 7000 | -0.0312 | 14 | +| HRM | HRM volume-envelope+KL | complete | 0.5176 | 0.6074 | 0.0898 | 0.6211 | 8000 | -0.0430 | 14 | +| HRM | HRM basin consistency | complete | 0.5176 | 0.6152 | 0.0977 | 0.6152 | 10000 | -0.0352 | 12 | +| HRM | HRM single perturbed CE | complete | 0.5176 | 0.6660 | 0.1484 | 0.6660 | 5000 | 0.0156 | 12 | +| HRM | HRM clean+multi perturbed CE | complete | 0.5176 | 0.6543 | 0.1367 | 0.6660 | 7000 | 0.0039 | 12 | +| HRM | HRM fixed-unroll baseline 50k | complete | 0.5176 | 0.5801 | 0.0625 | 0.6328 | 5000 | -0.0703 | 22 | +| HRM | HRM multi4 loguniform 50k | complete | 0.5176 | 0.5889 | 0.0713 | 0.6250 | 7500 | -0.0615 | 22 | +| TRM | TRM baseline 10k | complete | 0.5977 | 0.5762 | -0.0215 | 0.6621 | 2000 | NA | 12 | +| TRM | TRM mixed volume-CF | complete | 0.5977 | 0.5078 | -0.0898 | 0.6133 | 1000 | -0.0684 | 12 | +| TRM | TRM Engelken interfloss | complete | 0.5977 | 0.5195 | -0.0781 | 0.5977 | 0 | -0.0566 | 14 | +| TRM | TRM Engelken+KL interfloss | complete | 0.5977 | 0.5957 | -0.0020 | 0.5977 | 0 | 0.0195 | 14 | +| TRM | TRM late Engelken+KL | complete | 0.5977 | 0.5449 | -0.0527 | 0.5977 | 0 | -0.0312 | 14 | +| TRM | TRM volume-envelope+KL | complete | 0.5977 | 0.5508 | -0.0469 | 0.5977 | 0 | -0.0254 | 14 | +| TRM | TRM basin consistency | complete | 0.5977 | 0.2422 | -0.3555 | 0.5977 | 0 | -0.3340 | 12 | +| TRM | TRM single perturbed CE | complete | 0.5977 | 0.6191 | 0.0215 | 0.6582 | 2000 | 0.0430 | 12 | +| TRM | TRM clean+multi perturbed CE | complete | 0.5977 | 0.5918 | -0.0059 | 0.6719 | 4000 | 0.0156 | 12 | +| TRM | TRM fixed-unroll baseline 50k | complete | 0.5615 | 0.4971 | -0.0645 | 0.5947 | 22500 | -0.0791 | 22 | +| TRM | TRM multi4 loguniform 50k | complete | 0.5615 | 0.5508 | -0.0107 | 0.6084 | 42500 | -0.0254 | 22 | + +## Floss Episode Diagnostics + +### HRM Engelken interfloss +- episode 0 @ train_step 0: floss 0.030840 -> 0.000022, lyap1 -0.1476 -> 0.0047, volume -0.1693 -> 0.0013, KL mean/max 0.000000/0.000000 +- episode 1 @ train_step 500: floss 0.019141 -> 0.000014, lyap1 -0.1208 -> 0.0017, volume -0.1357 -> 0.0005, KL mean/max 0.000000/0.000000 + +### HRM Engelken+KL interfloss +- episode 0 @ train_step 0: floss 0.030840 -> 0.001082, lyap1 -0.1476 -> 0.0107, volume -0.1693 -> -0.0180, KL mean/max 0.645335/14.346942 +- episode 1 @ train_step 500: floss 0.007646 -> 0.000361, lyap1 -0.0588 -> 0.0122, volume -0.0812 -> -0.0068, KL mean/max 0.130172/4.959406 + +### HRM conservative Engelken+KL +- episode 0 @ train_step 0: floss 0.030840 -> 0.046181, lyap1 -0.1476 -> -0.2018, volume -0.1693 -> -0.2144, KL mean/max 2.819328/11.853225 +- episode 1 @ train_step 500: floss 0.016849 -> 0.048128, lyap1 -0.0850 -> -0.2127, volume -0.1248 -> -0.2180, KL mean/max 1.228630/5.586896 + +### HRM late Engelken+KL +- episode 0 @ train_step 0: floss 0.046401 -> 0.000793, lyap1 -0.1729 -> 0.0256, volume -0.1976 -> 0.0086, KL mean/max 0.418510/6.242658 +- episode 1 @ train_step 500: floss 0.007572 -> 0.001378, lyap1 0.0810 -> 0.0078, volume 0.0456 -> -0.0055, KL mean/max 0.132306/2.906027 + +### HRM volume-envelope+KL +- episode 0 @ train_step 0: floss 0.000046 -> 0.000000, lyap1 -0.1476 -> -0.2818, volume -0.1693 -> -0.3005, KL mean/max 0.436240/8.779900 +- episode 1 @ train_step 500: floss 0.000801 -> 0.000000, lyap1 -0.1108 -> -0.2791, volume -0.1299 -> -0.2994, KL mean/max 0.112411/1.281256 + +### TRM Engelken interfloss +- episode 0 @ train_step 0: floss 0.000883 -> 0.000047, lyap1 0.0089 -> -0.0011, volume 0.0023 -> -0.0047, KL mean/max 0.000000/0.000000 +- episode 1 @ train_step 500: floss 0.000257 -> 0.000006, lyap1 -0.0149 -> -0.0008, volume -0.0157 -> -0.0015, KL mean/max 0.000000/0.000000 + +### TRM Engelken+KL interfloss +- episode 0 @ train_step 0: floss 0.000883 -> 0.000242, lyap1 0.0089 -> 0.0158, volume 0.0023 -> 0.0064, KL mean/max 0.309170/7.308133 +- episode 1 @ train_step 500: floss 0.000063 -> 0.000059, lyap1 0.0031 -> 0.0088, volume -0.0017 -> 0.0018, KL mean/max 0.086242/0.927036 + +### TRM late Engelken+KL +- episode 0 @ train_step 0: floss 0.004981 -> 0.001447, lyap1 -0.0423 -> -0.0327, volume -0.0482 -> -0.0368, KL mean/max 0.296968/3.760708 +- episode 1 @ train_step 500: floss 0.002911 -> 0.001941, lyap1 -0.0540 -> -0.0372, volume -0.0538 -> -0.0426, KL mean/max 0.092765/3.175512 + +### TRM volume-envelope+KL +- episode 0 @ train_step 0: floss 0.000074 -> 0.000000, lyap1 0.0089 -> 0.0145, volume 0.0023 -> -0.0076, KL mean/max 0.286836/6.571321 +- episode 1 @ train_step 500: floss 0.000000 -> 0.000000, lyap1 0.0055 -> -0.0251, volume -0.0009 -> -0.0281, KL mean/max 0.089595/2.901087 + +## Incomplete Runs / Process Snapshot + +- No monitored experiment processes are active. + +## Notes + +- `Final/Last` is `final_acc` when present, otherwise the latest eval accuracy. +- `Vs Baseline` compares against the matching HRM/TRM 10k no-floss baseline. +- A complete report may still show partial rows if an experiment crashed or was interrupted. + +## Next Experiment Questions + +- Close the faithful-flossing loop before making a claim that flossing is ineffective. Required matrix: from-scratch baseline, direct flossing loss as negative control, faithful prefloss, faithful interfloss with separated floss optimizer steps, and optionally volume/spectrum interfloss. +- Treat continuation runs as screening only. Final claims about flossing should use from-scratch full training, because continuation can confound optimizer state, EMA horizon, puzzle embeddings, and data-order effects. +- Run GRM/PTRM after or in parallel with faithful flossing. This answers a different question: whether stochastic multi-rollout plus Q-selection is learning a low-dimensional stability/Lyapunov-spectrum observer. +- For GRM/PTRM, compare learned Q selection with lambda1 and top-spectrum-feature selection, and measure Q-score correlation with stability features. diff --git a/engelken_python_flossing.py b/engelken_python_flossing.py new file mode 100644 index 0000000..b83be2f --- /dev/null +++ b/engelken_python_flossing.py @@ -0,0 +1,336 @@ +"""PyTorch port of Rainer Engelken's GradientFlossing_ExampleCode.jl. + +This script is intentionally a vanilla-RNN delayed-XOR reproduction scaffold, +not an HRM/TRM experiment. It keeps the algorithmic choices that matter for +checking gradient flossing itself: + + - flossing is a separate phase, not mixed with task loss; + - flossing optimizes only input/recurrent/bias parameters, not readout; + - flossing uses mean((lambda_i - target)^2); + - flossing batch size is one, as in the Julia code; + - QR is differentiable and participates in the backward pass; + - Lyapunov accumulation discards the initial transient. + +Reference: + research/flossing/external/GradientFlossing/GradientFlossing_ExampleCode.jl +""" + +from __future__ import annotations + +import argparse +import json +import math +import time +from dataclasses import asdict, dataclass +from pathlib import Path + +import numpy as np +import torch +import torch.nn.functional as F + + +def recabs(bits: np.ndarray) -> np.ndarray: + out = bits[0] + for bit in bits[1:]: + out = np.abs(out - bit) + return out + + +def generate_input_output( + batch_size: int, + steps: int, + seed: int, + input_dim: int, + input_scale: float, + delay: int, + task: int, + device: torch.device, +) -> tuple[torch.Tensor, torch.Tensor]: + """Port of generateInputOutput in GradientFlossing_XOR.jl. + + Returns tensors shaped as Julia uses conceptually: + s: [T, S, B] + target: [T, 1, B] + """ + rng = np.random.default_rng(seed) + raw_steps = steps + delay + s = rng.integers(0, 2, size=(raw_steps, input_dim, batch_size), dtype=np.int64).astype(np.float32) + target = np.zeros((raw_steps, 1, batch_size), dtype=np.float32) + + if task == 0: + for i in range(delay, raw_steps): + target[i, :, :] = s[i - delay, :1, :] + else: + h = abs(task) + span = delay * (2**h) + for i in range(span, raw_steps): + indices = [i - delay * j for j in range(1, 2**h + 1)] + values = np.flip(s[indices, 0, :], axis=0) + target[i, 0, :] = recabs(values) + + sx = torch.from_numpy(s[delay:] * np.float32(input_scale)).to(device) + ty = torch.from_numpy(target[delay:]).to(device) + return sx, ty + + +@dataclass +class Config: + hidden_size: int = 80 + train_epochs: int = 3001 + inter_period: int = 100 + inter_epochs: int = 500 + pre_epochs: int = 500 + max_inter_episodes: int = 2 + batch_size: int = 16 + input_dim: int = 1 + train_steps: int = 300 + lyap_steps: int = 55 + floss_input_steps: int = 300 + seed_ic: int = 1 + seed_input: int = 1 + seed_net: int = 1 + seed_ons: int = 1 + lr: float = 1e-3 + beta1: float = 0.9 + beta2: float = 0.999 + init_type: int = 1 + recurrent_gain: float = 1.0 + recurrent_mean_gain: float = 0.0 + input_scale: float = 1.0 + delay: int = 10 + ws_std: float = 1.0 + ws_mean: float = 0.0 + wr_std: float = 1.0 + wr_mean: float = 0.0 + b_std: float = 0.1 + b_mean: float = 0.0 + n_lyap: int = 75 + task: int = -1 + lyap_target: float = 0.0 + eval_every: int = 100 + eval_batches: int = 4 + log_every_floss: int = 50 + device: str = "cpu" + out: str = "research/flossing/engelken_python/run.json" + + +class VanillaRNN: + def __init__(self, cfg: Config, device: torch.device): + self.cfg = cfg + self.device = device + torch.manual_seed(cfg.seed_net) + n = cfg.hidden_size + + self.ws = torch.nn.Parameter( + cfg.ws_std * torch.randn(n, cfg.input_dim, device=device) + cfg.ws_mean + ) + self.wr = torch.nn.Parameter( + cfg.wr_std * torch.randn(1, n, device=device) + cfg.wr_mean + ) + self.b = torch.nn.Parameter( + cfg.b_mean + cfg.b_std * torch.rand(n, device=device) + ) + self.offset = torch.nn.Parameter(torch.tensor([0.001], device=device)) + + if cfg.init_type == 1: + j = cfg.recurrent_gain * torch.randn(n, n, device=device) / math.sqrt(n) + if cfg.recurrent_mean_gain != 0: + j = j + cfg.recurrent_mean_gain / n + elif cfg.init_type == 3: + j0 = cfg.recurrent_gain * torch.randn(n, n, device=device) / math.sqrt(n) + q, _ = torch.linalg.qr(j0) + j = cfg.recurrent_gain * q + else: + raise ValueError(f"unsupported init_type={cfg.init_type}") + self.j = torch.nn.Parameter(j) + + self.x_init = torch.nn.Parameter(self._relax_initial_condition(cfg.seed_ic, 100)) + + def _relax_initial_condition(self, seed: int, steps: int) -> torch.Tensor: + gen = torch.Generator(device=self.device).manual_seed(seed) + x = torch.randn(self.cfg.hidden_size, self.cfg.batch_size, generator=gen, device=self.device) + with torch.no_grad(): + for _ in range(steps): + x = self.j.detach() @ torch.tanh(x) + self.b.detach().unsqueeze(1) + return x + + def task_parameters(self) -> list[torch.nn.Parameter]: + return [self.ws, self.j, self.wr, self.b, self.offset, self.x_init] + + def floss_parameters(self) -> list[torch.nn.Parameter]: + return [self.ws, self.j, self.b] + + def task_loss_and_accuracy(self, s: torch.Tensor, target: torch.Tensor) -> tuple[torch.Tensor, float]: + cfg = self.cfg + first_loss_idx = 2 * cfg.delay if cfg.task > 0 else cfg.delay * (2 ** abs(cfg.task)) + # Julia is 1-indexed and starts loss at ti >= firstlossidx. + first_loss_idx = max(first_loss_idx - 1, 0) + x = self.x_init + loss = torch.zeros((), device=self.device) + correct = 0.0 + count = 0 + for ti in range(cfg.train_steps): + x = self.j @ torch.tanh(x) + self.b.unsqueeze(1) + (self.ws @ s[ti]) / cfg.input_dim + if ti >= first_loss_idx: + logits = self.wr @ x + self.offset.unsqueeze(1) + loss = loss + F.binary_cross_entropy_with_logits(logits, target[ti], reduction="sum") + pred = (torch.sigmoid(logits) >= 0.5).to(target.dtype) + correct += (pred == target[ti]).float().sum().item() + count += target[ti].numel() + denom = cfg.batch_size * max(cfg.train_steps - first_loss_idx, 1) + return loss / denom, correct / max(count, 1) + + def lyapunov_spectrum(self, s: torch.Tensor) -> torch.Tensor: + cfg = self.cfg + if cfg.n_lyap <= 0: + return torch.empty(0, device=self.device) + + x = self.x_init[:, 0].clone() + gen = torch.Generator(device=self.device).manual_seed(cfg.seed_ons) + q0 = torch.randn(cfg.hidden_size, cfg.n_lyap, generator=gen, device=self.device) + q, _ = torch.linalg.qr(q0) + accum = torch.zeros(cfg.n_lyap, device=self.device) + tsim = 0 + transient = 10 + transient_ons = math.ceil(cfg.lyap_steps / 10) + diag_idx = torch.arange(cfg.n_lyap, device=self.device) + + for n in range(1, cfg.lyap_steps + 1): + x = self.j @ torch.tanh(x) + self.b + (self.ws @ s[n - 1, :, 0]) / cfg.input_dim + phi_prime = 1.0 / torch.cosh(x).square() + tangent = phi_prime.unsqueeze(1) * self.j + q = tangent @ q + q, r = torch.linalg.qr(q) + if n > transient + transient_ons: + accum = accum + r[diag_idx, diag_idx].abs().clamp_min(1e-30).log() + tsim += 1 + + if tsim == 0: + raise ValueError("lyap_steps too short after transient discard") + return accum / tsim + + +def evaluate(model: VanillaRNN, cfg: Config, epoch: int) -> dict[str, float]: + losses = [] + accs = [] + with torch.no_grad(): + for i in range(cfg.eval_batches): + seed = 10_000_000 + epoch * 1000 + i + s, target = generate_input_output( + cfg.batch_size, cfg.train_steps, seed, cfg.input_dim, + cfg.input_scale, cfg.delay, cfg.task, model.device, + ) + loss, acc = model.task_loss_and_accuracy(s, target) + losses.append(float(loss.item())) + accs.append(acc) + return {"loss": float(np.mean(losses)), "accuracy": float(np.mean(accs))} + + +def run(cfg: Config) -> dict: + device = torch.device(cfg.device) + if cfg.n_lyap > cfg.hidden_size: + raise ValueError("n_lyap must be <= hidden_size") + if cfg.lyap_steps <= 10 + math.ceil(cfg.lyap_steps / 10): + raise ValueError("lyap_steps too short for official transient discard") + + model = VanillaRNN(cfg, device) + task_optim = torch.optim.AdamW( + model.task_parameters(), lr=cfg.lr, betas=(cfg.beta1, cfg.beta2), weight_decay=0.0 + ) + floss_optim = torch.optim.AdamW( + model.floss_parameters(), lr=cfg.lr, betas=(cfg.beta1, cfg.beta2), weight_decay=0.0 + ) + target = torch.full((cfg.n_lyap,), cfg.lyap_target, device=device) + log: dict = {"config": asdict(cfg), "evals": [], "floss": [], "task": []} + + t0 = time.time() + inter_count = 0 + for epoch in range(1, cfg.train_epochs + 1): + should_prefloss = epoch == 1 and cfg.pre_epochs > 0 + should_interfloss = ( + epoch % cfg.inter_period == 0 + and inter_count < cfg.max_inter_episodes + and cfg.inter_epochs > 0 + ) + if should_prefloss or should_interfloss: + n_steps = cfg.pre_epochs if should_prefloss else cfg.inter_epochs + kind = "pre" if should_prefloss else "inter" + if should_interfloss: + inter_count += 1 + for floss_step in range(1, n_steps + 1): + seed = cfg.train_epochs * cfg.seed_input + epoch + floss_step + s, _ = generate_input_output( + 1, cfg.floss_input_steps, seed, cfg.input_dim, + cfg.input_scale, cfg.delay, cfg.task, device, + ) + floss_optim.zero_grad(set_to_none=True) + spectrum = model.lyapunov_spectrum(s) + floss_loss = (spectrum - target).square().mean() + floss_loss.backward() + floss_optim.step() + if floss_step == 1 or floss_step == n_steps or floss_step % cfg.log_every_floss == 0: + rec = { + "epoch": epoch, + "kind": kind, + "floss_step": floss_step, + "loss": float(floss_loss.item()), + "lambda_mean": float(spectrum.detach().mean().item()), + "lambda_1": float(spectrum.detach()[0].item()), + "elapsed": time.time() - t0, + } + log["floss"].append(rec) + print( + f"F {kind} epoch={epoch} step={floss_step}/{n_steps} " + f"loss={rec['loss']:.6f} lambda1={rec['lambda_1']:+.4f}", + flush=True, + ) + + seed = cfg.train_epochs * cfg.seed_input + epoch + s, target_batch = generate_input_output( + cfg.batch_size, cfg.train_steps, seed, cfg.input_dim, + cfg.input_scale, cfg.delay, cfg.task, device, + ) + task_optim.zero_grad(set_to_none=True) + task_loss, task_acc = model.task_loss_and_accuracy(s, target_batch) + task_loss.backward() + torch.nn.utils.clip_grad_norm_(model.task_parameters(), 0.03) + task_optim.step() + + if epoch == 1 or epoch == cfg.train_epochs or epoch % cfg.eval_every == 0: + eval_metrics = evaluate(model, cfg, epoch) + rec = { + "epoch": epoch, + "train_loss": float(task_loss.item()), + "train_accuracy": float(task_acc), + "eval_loss": eval_metrics["loss"], + "eval_accuracy": eval_metrics["accuracy"], + "elapsed": time.time() - t0, + } + log["evals"].append(rec) + print( + f"T epoch={epoch}/{cfg.train_epochs} loss={rec['train_loss']:.4f} " + f"train_acc={rec['train_accuracy']:.3f} eval_acc={rec['eval_accuracy']:.3f}", + flush=True, + ) + + out_path = Path(cfg.out) + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_text(json.dumps(log, indent=2)) + return log + + +def parse_args() -> Config: + parser = argparse.ArgumentParser() + defaults = Config() + for field_name, default in asdict(defaults).items(): + arg = "--" + field_name.replace("_", "-") + if isinstance(default, bool): + parser.add_argument(arg, action=argparse.BooleanOptionalAction, default=default) + else: + parser.add_argument(arg, type=type(default), default=default) + ns = parser.parse_args() + return Config(**vars(ns)) + + +if __name__ == "__main__": + run(parse_args()) diff --git a/eval_directional_ckpts.py b/eval_directional_ckpts.py new file mode 100644 index 0000000..68653e6 --- /dev/null +++ b/eval_directional_ckpts.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +import argparse +import csv +import json +import sys +import time +from pathlib import Path + +import torch + + +FLOSS_DIR = Path(__file__).resolve().parent +sys.path.insert(0, str(FLOSS_DIR)) + +from step7_interfloss import evaluate, load_model # noqa: E402 + + +def parse_steps(text: str) -> list[int]: + return [int(x.strip()) for x in text.split(",") if x.strip()] + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--ckpt-root", required=True) + parser.add_argument("--steps", required=True) + parser.add_argument("--n", type=int, default=1000) + parser.add_argument("--batch-size", type=int, default=32) + parser.add_argument("--seed", type=int, default=20260611) + parser.add_argument("--out", required=True) + args = parser.parse_args() + + ckpt_root = Path(args.ckpt_root) + rows = [] + for step in parse_steps(args.steps): + ckpt_name = f"step_{step}" + if not (ckpt_root / ckpt_name).exists(): + print(f"[skip] missing {ckpt_name}", flush=True) + continue + + t0 = time.time() + head, base, cfg, _adam_cls, _sparse_cls = load_model( + "trm", + ckpt_root, + ckpt_name, + "cuda", + batch_size_override=args.batch_size, + ) + data_path = Path(cfg["data_path"]) + acc, tok = evaluate( + head, + base, + data_path, + n_samples=args.n, + batch_size=args.batch_size, + device="cuda", + seed=args.seed, + ) + elapsed = time.time() - t0 + row = { + "step": step, + "n": args.n, + "seed": args.seed, + "exact_acc": acc, + "token_acc": tok, + "elapsed_sec": elapsed, + } + rows.append(row) + print( + f"step={step} exact={acc:.4f} token={tok:.4f} elapsed={elapsed:.1f}s", + flush=True, + ) + del head, base + torch.cuda.empty_cache() + + out_path = Path(args.out) + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.with_suffix(".json").write_text(json.dumps(rows, indent=2)) + with out_path.open("w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=list(rows[0].keys())) + writer.writeheader() + writer.writerows(rows) + + +if __name__ == "__main__": + main() diff --git a/eval_trm_gbs192_step260410_n1000.summary.csv b/eval_trm_gbs192_step260410_n1000.summary.csv new file mode 100644 index 0000000..1d25685 --- /dev/null +++ b/eval_trm_gbs192_step260410_n1000.summary.csv @@ -0,0 +1,2 @@ +deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps +0.7879999876022339,0.9226666688919067,0.0,0.0,0.0,0.7879999876022339,0.9226666688919067,1000.0,0.0,0.7879999876022339,0.9226666688919067,0.0,0.0,1.0,0.7879999876022339,0.9226666688919067,3.5457303524017334,0.7879999876022339,0.9226666688919067,1.0,16.0 diff --git a/eval_trm_gbs192_step52082_n1000.summary.csv b/eval_trm_gbs192_step52082_n1000.summary.csv new file mode 100644 index 0000000..53637db --- /dev/null +++ b/eval_trm_gbs192_step52082_n1000.summary.csv @@ -0,0 +1,2 @@ +deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps +0.6610000133514404,0.8824321031570435,0.0,0.0,0.0,0.6610000133514404,0.8824321031570435,1000.0,0.0,0.6610000133514404,0.8824321031570435,0.0,0.0,1.0,0.6610000133514404,0.8824321031570435,1.226062536239624,0.6610000133514404,0.8824321031570435,1.0,16.0 diff --git a/eval_trm_official768_step13020_n1000.summary.csv b/eval_trm_official768_step13020_n1000.summary.csv new file mode 100644 index 0000000..fb4d943 --- /dev/null +++ b/eval_trm_official768_step13020_n1000.summary.csv @@ -0,0 +1,2 @@ +deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps +0.6050000190734863,0.8624938130378723,0.0,0.0,0.0,0.6050000190734863,0.8624938130378723,1000.0,0.0,0.6050000190734863,0.8624938130378723,0.0,0.0,1.0,0.6050000190734863,0.8624938130378723,0.3627968728542328,0.6050000190734863,0.8624938130378723,1.0,16.0 diff --git a/extend_rollout.py b/extend_rollout.py new file mode 100644 index 0000000..09af1f8 --- /dev/null +++ b/extend_rollout.py @@ -0,0 +1,68 @@ +"""Discriminate Rainer's hypotheses: run the trained recurrence FAR beyond the 16-segment budget +and watch the fate of trajectories that FAIL at segment 16. + - settle to CORRECT later => transient that would self-resolve (more compute helps) + - settle to WRONG (drift->0) => multistable WRONG attractor (genuine bistability) + - never settle (drift stays high) => chaotic saddle / persistent non-convergence +Plain forward (no JVP). Saves per-segment decoded-exactness and per-segment z_H drift. +""" +from __future__ import annotations +import sys, argparse +from pathlib import Path +import numpy as np +import torch + +sys.path.insert(0, "/home/yurenh2/rrm/research/flossing") +from diagnose_trm_joint_maze import load_model, load_test_samples + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--ckpt-root", required=True) + ap.add_argument("--ckpt-name", required=True) + ap.add_argument("--data", required=True) + ap.add_argument("--n", type=int, default=512) + ap.add_argument("--batch-size", type=int, default=32) + ap.add_argument("--n-seg", type=int, default=128) + ap.add_argument("--seed", type=int, default=0) + ap.add_argument("--out", required=True) + args = ap.parse_args() + device = "cuda" + model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device) + inner = model.inner + test = load_test_samples(Path(args.data), args.n, 0, 1, args.seed) + n = len(test["inputs"]); pe = inner.puzzle_emb_len + + EX, DR, IDX = [], [], [] + for s in range(0, n, args.batch_size): + e = min(s + args.batch_size, n) + batch = {k: test[k][s:e].to(device) for k in ["inputs", "labels", "puzzle_identifiers"]} + B = batch["inputs"].shape[0] + seq_full = inner.config.seq_len + pe; hidden = inner.config.hidden_size + with torch.no_grad(): + z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None) + inp_emb = inner._input_embeddings(batch["inputs"], batch["puzzle_identifiers"]) + labels = batch["labels"]; mask = labels > 0 + prev_zH = None; ex_seg, dr_seg = [], [] + for seg in range(args.n_seg): + for _h in range(inner.config.H_cycles): + for _l in range(inner.config.L_cycles): + z_L = inner.L_level(z_L, z_H + inp_emb, **seq_info) + z_H = inner.L_level(z_H, z_L, **seq_info) + p = inner.lm_head(z_H)[:, pe:].float().argmax(-1) + ex_seg.append(((p == labels) | ~mask).all(-1).float().cpu()) + dr_seg.append((torch.zeros(B) if prev_zH is None + else (z_H - prev_zH).float().flatten(1).norm(dim=1).cpu())) + prev_zH = z_H.detach() + EX.append(torch.stack(ex_seg, 1).numpy()); DR.append(torch.stack(dr_seg, 1).numpy()) + IDX.append(test["idx"][s:e]) + print(f" [{e}/{n}] exact@16={torch.stack(ex_seg,1)[:,15].mean():.3f} exact@{args.n_seg}={torch.stack(ex_seg,1)[:,-1].mean():.3f}", flush=True) + + np.savez_compressed(args.out, exact_seg=np.concatenate(EX), drift_seg=np.concatenate(DR), + idx=np.concatenate(IDX)) + print("saved", args.out) + + +if __name__ == "__main__": + main() diff --git a/external/GradientFlossing/README.md b/external/GradientFlossing/README.md new file mode 100644 index 0000000..962c955 --- /dev/null +++ b/external/GradientFlossing/README.md @@ -0,0 +1,79 @@ +# Gradient Flossing: Improving Gradient Descent through Dynamic Control of Jacobians + +This repository contains the implementation code for manuscript:
+__Gradient Flossing: Improving Gradient Descent through Dynamic Control of Jacobians__
+## Overview +In this work we show that RNN training can be improved by pushing Lyapunov exponents of the forward pass to zero during or before training. + +## Installation + +#### Prerequisites +- Download [Julia](https://julialang.org/downloads/) + +#### Dependencies +- Julia (1.6) +- Flux, BackwardsLinalg + +## Getting started +To install the required packages, run the following in the julia REPL after installing Julia: + +``` +using Pkg + +for pkg in ["Flux", "BackwardsLinalg"] + Pkg.add(pkg) +end +``` + +For example, to train an RNN on tracking delayed XOR task, run: +``` +include("GradientFlossing_ExampleCode.jl") +# setting parameters: +N, E, Ef, Ei, Ep, Ni, B, S, T, Tp, Ti, sIC, sIn, sNet, sONS, lr, b1, b2, IC, g, gbar, I1, delay, wsS, wsM, wrS, wrM, bS, bM, nLE, task, intype, Lwnt= +80, 3001, 100, 500, 500, 2, 16, 1, 300, 55, 300, 1,1,1,1, 0.001f0, 0.9, 0.999, 1, 1.0, 0.0, 1.0,10, 1.0f0, 0.0f0, 1.0f0, 0.0f0, 0.1f0, 0.0f0,75, -1, 3, 0.0 + +trainRNNflossing(N, E, Ef, Ei, Ep, Ni, B, S, T, Tp, Ti, sIC, sIn, sNet, sONS, lr, b1, b2, IC, g, gbar, I1, delay, wsS, wsM, wrS, wrM, bS, bM, nLE, task, intype, Lwnt) +``` + +## Repository Overview +_GradientFlossing_ExampleCode.jl_:\ +Example scripts for training networks with gradient flossing before training, with gradient flossing before and during training and without gradient flossing. + + +_GradientFlossing_XOR.jl_:\ +Generates input and target output for copy task and delayed XOR task. + + + + + + + +### Implementation details +A full specification of packages used and their versions can be found in _packages.txt_ .\ +For learning rates the default ADAM parameters were used to avoid any impression of fine-tuning.\ +All simulations were run on a single CPU and took on the order of minutes to a few of hours. + + + + + + + diff --git a/external/GradientFlossing/packages.txt b/external/GradientFlossing/packages.txt new file mode 100644 index 0000000..e90facc --- /dev/null +++ b/external/GradientFlossing/packages.txt @@ -0,0 +1,6 @@ +julia => v"1.6.7" + +[442b4e1a] BackwardsLinalg v0.1.1 +[587475ba] Flux v0.11.3 +[e6cf234a] RandomNumbers v1.5.3 +[e88e6eb3] Zygote v0.5.1 diff --git a/external/julia-1.6.7/LICENSE.md b/external/julia-1.6.7/LICENSE.md new file mode 100644 index 0000000..7912722 --- /dev/null +++ b/external/julia-1.6.7/LICENSE.md @@ -0,0 +1,26 @@ +MIT License + +Copyright (c) 2009-2021: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +end of terms and conditions + +Please see THIRDPARTY.md for license information for other software used in this project. diff --git a/external/julia-1.6.7/share/julia/base/ryu/LICENSE.md b/external/julia-1.6.7/share/julia/base/ryu/LICENSE.md new file mode 100644 index 0000000..74c7186 --- /dev/null +++ b/external/julia-1.6.7/share/julia/base/ryu/LICENSE.md @@ -0,0 +1,25 @@ +The code in this directory (base/ryu) is a derivative based on the work in the https://github.com/ulfjack/ryu repository, which allows the use of the Boost software license, included below. + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/ArgTools/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/ArgTools/README.md new file mode 100644 index 0000000..a0415d7 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/ArgTools/README.md @@ -0,0 +1,431 @@ +# ArgTools + +[![Build Status](https://travis-ci.org/JuliaIO/ArgTools.jl.svg?branch=master)](https://travis-ci.org/JuliaIO/ArgTools.jl) +[![Codecov](https://codecov.io/gh/JuliaIO/ArgTools.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaIO/ArgTools.jl) + +`ArgTools` provides tools for creating consistent, flexible APIs that work with +various kinds of function arguments. In the current version, it helps deal with +arguments that are, at their core, IO handles, but which you'd like to allow the +user to specify directly as file names, commands, pipelines, or, of course, as +raw IO handles. For write arguments, it's also possible to use `nothing` and +write to a temporary file whose path is returned. + +## API + +There are two parts to the `ArgTools` API: + +1. Functions and types for helping define flexible function APIs. +2. Functions for helping to test APIs defined with above. + +While it's great to be able to define a flexibile API, if you're not sure that +it works the way it's supposed to, what's the benefit. Since it's can be quite +verbose to test such a combinatorial explosion of methods, `ArgTools` also +provides tools to help testing all the ways your tools can be called to make +sure everything is working as intented. + +### Argument Handling + +The API for helping defing flexible function signatures consists of two types +and four helper functions: `ArgRead` and `ArgWrite`; `arg_read`, `arg_write`, +`arg_isdir` and `arg_mkdir`. + + + +#### ArgRead + +```jl +ArgRead = Union{AbstractString, AbstractCmd, IO} +``` +The `ArgRead` types is a union of the types that the `arg_read` function knows +how to convert into readable IO handles. See [`arg_read`](@ref) for details. + +#### ArgWrite + +```jl +ArgWrite = Union{AbstractString, AbstractCmd, IO} +``` +The `ArgWrite` types is a union of the types that the `arg_write` function knows +how to convert into writeable IO handles, except for `Nothing` which `arg_write` +handles by generating a temporary file. See [`arg_write`](@ref) for details. + +#### arg_read + +```jl +arg_read(f::Function, arg::ArgRead) -> f(arg_io) +``` +The `arg_read` function accepts an argument `arg` that can be any of these: + +- `AbstractString`: a file path to be opened for reading +- `AbstractCmd`: a command to be run, reading from its standard output +- `IO`: an open IO handle to be read from + +Whether the body returns normally or throws an error, a path which is opened +will be closed before returning from `arg_read` and an `IO` handle will be +flushed but not closed before returning from `arg_read`. + +#### arg_write + +```jl +arg_write(f::Function, arg::ArgWrite) -> arg +arg_write(f::Function, arg::Nothing) -> tempname() +``` +The `arg_write` function accepts an argument `arg` that can be any of these: + +- `AbstractString`: a file path to be opened for writing +- `AbstractCmd`: a command to be run, writing to its standard input +- `IO`: an open IO handle to be written to +- `Nothing`: a temporary path should be written to + +If the body returns normally, a path that is opened will be closed upon +completion; an IO handle argument is left open but flushed before return. If the +argument is `nothing` then a temporary path is opened for writing and closed +open completion and the path is returned from `arg_write`. In all other cases, +`arg` itself is returned. This is a useful pattern since you can consistently +return whatever was written, whether an argument was passed or not. + +If there is an error during the evaluation of the body, a path that is opened by +`arg_write` for writing will be deleted, whether it's passed in as a string or a +temporary path generated when `arg` is `nothing`. + +#### arg_isdir + +```jl +arg_isdir(f::Function, arg::AbstractString) -> f(arg) +``` +The `arg_isdir` function takes `arg` which must be the path to an existing +directory (an error is raised otherwise) and passes that path to `f` finally +returning the result of `f(arg)`. This is definitely the least useful tool +offered by `ArgTools` and mostly exists for symmetry with `arg_mkdir` and to +give consistent error messages. + +#### arg_mkdir + +```jl +arg_mkdir(f::Function, arg::AbstractString) -> arg +arg_mkdir(f::Function, arg::Nothing) -> mktempdir() +``` +The `arg_mkdir` function takes `arg` which must either be one of: + +- a path to an already existing empty directory, +- a non-existent path which can be created as a directory, or +- `nothing` in which case a temporary directory is created. + +In all cases the path to the directory is returned. If an error occurs during +`f(arg)`, the directory is returned to its original state: if it already existed +but was empty, it will be emptied; if it did not exist it will be deleted. + + + +### Function Testing + +Using `ArgTools` is easy; thoroughly testing flexible functions defined using +`ArgTools` is a bit trickier, but the package includes testing tools that help. +The API for testing functions defined with the argument handling API consists +of two functions and a macro: `arg_readers`, `arg_writers` and `@arg_test`. + + + +#### arg_readers + +```jl +arg_readers(arg :: AbstractString, [ type = ArgRead ]) do arg::Function + ## pre-test setup ## + @arg_test arg begin + arg :: ArgRead + ## test using `arg` ## + end + ## post-test cleanup ## +end +``` + +The `arg_readers` function takes a path to be read and a single-argument do +block, which is invoked once for each test reader type that `arg_read` can +handle. If the optional `type` argument is given then the do block is only +invoked for readers that produce arguments of that type. + +The `arg` passed to the do block is not the argument value itself, because some +of test argument types need to be initialized and finalized for each test case. +Consider an open file handle argument: once you've used it for one test, you +can't use it again; you need to close it and open the file again for the next +test. This function `arg` can be converted into an `ArgRead` instance using +`@arg_test arg begin ... end`. + +#### arg_writers + +```jl +arg_writers([ type = ArgWrite ]) do path::String, arg::Function + ## pre-test setup ## + @arg_test arg begin + arg :: ArgWrite + ## test using `arg` ## + end + ## post-test cleanup ## +end +``` + +The `arg_writers` function takes a do block, which is invoked once for each test +writer type that `arg_write` can handle with a temporary (non-existent) `path` +and `arg` which can be converted into various writable argument types which +write to `path`. If the optional `type` argument is given then the do block is +only invoked for writers that produce arguments of that type. + +The `arg` passed to the do block is not the argument value itself, because some +of test argument types need to be initialized and finalized for each test case. +Consider an open file handle argument: once you've used it for one test, you +can't use it again; you need to close it and open the file again for the next +test. This function `arg` can be converted into an `ArgWrite` instance using +`@arg_test arg begin ... end`. + +There is also an `arg_writers` method that takes a path name like `arg_readers`: + +```jl +arg_writers(path::AbstractString, [ type = ArgWrite ]) do arg::Function + ## pre-test setup ## + @arg_test arg begin + arg :: ArgWrite + ## test using `arg` ## + end + ## post-test cleanup ## +end +``` + +This method is useful if you need to specify `path` instead of using path name +generated by `tempname()`. Since `path` is passed from outside of `arg_writers`, +the path is not an argument to the do block in this form. + +#### @arg_test + +```jl +@arg_test arg1 arg2 ... body +``` + +The `@arg_test` macro is used to convert `arg` functions provided by +`arg_readers` and `arg_writers` into actual argument values. When you write +`@arg_test arg body` it is equivalent to `arg(arg -> body)`. + + + +## Examples + +The examples, like the API, are split into two parts: + +1. An example of defining a function with a flexible API using the main API; +2. Examples of how to thoroughly test that function using the test utilities. + +### Usage Example + +The best explanation may be an example, which is also used for testing: + +```jl +using ArgTools + +function send_data(src::ArgRead, dst::Union{ArgWrite, Nothing} = nothing) + arg_read(src) do src_io + arg_write(dst) do dst_io + buffer = Vector{UInt8}(undef, 2*1024*1024) + while !eof(src_io) + n = readbytes!(src_io, buffer) + write(dst_io, view(buffer, 1:n)) + end + end + end +end +``` + +This defines the `send_data` function which reads data from a source and writes +it to a destination, specified by the `src` and `dst` arguments, respectively. +Thanks to `ArgTools`, this relatively simple definition acts as a swiss-army +knife for sending data from a source to a destination. Here are some examples: + +```jl +julia> cd(mktempdir()) + +julia> write("hello.txt", "Hello, world.\n") +14 + +julia> run(`cat hello.txt`); +Hello, world. + +julia> send_data("hello.txt", "hello_copy.txt") +"hello_copy.txt" + +julia> run(`cat $ans`); +Hello, world. + +julia> rm("hello_copy.txt") + +julia> send_data("hello.txt", stdout); +Hello, world. + +julia> send_data("hello.txt", pipeline(`gzip -9`, "hello.gz")); + +julia> run(`gzcat hello.gz`); +Hello, world. + +julia> hello_copy = send_data(`gzcat hello.gz`) +"/var/folders/4g/b8p546px3nd550b3k288mhp80000gp/T/jl_cguepi" + +julia> run(`cat $hello_copy`); +Hello, world. +``` + +To understand the definition of `send_data`, let's work from the inside out: + +* The main body of the function operates on the `src_io` and `dst_io` IO + handles, using a buffer to read data from the former to the latter in 2MiB + blocks. + +* The calls to `arg_read` and `arg_write` transform the `src` and `dst` + arguments from various types to `src_io` and `dst_io` IO handles. This allows + the inner body to handle the core case of dealing with IO handles, without + having to worry about the various possible incoming argument types. See the API + section below for more details about how `arg_read` and `arg_write` work on + different types. + +* The arguments to `send_data` are `src::ArgRead` and `dst::ArgWrite` where + `dst` is optional and defaults to `nothing` if not given. The `ArgRead` type is + a union including all the types that `arg_read` knows how to handle. Similarly, + the `ArgWrite` type is a union including the types that `arg_write` knows how to + handle, except for `nothing` which must be explicitly opted into, for which + `arg_write` creates a temporary file and returns its path. + +Taken altogether, this allows the `send_data` function to work with a combinatoral +explosion of type signatures: + +* `send_data(src::AbstractString)` +* `send_data(src::AbstractCmd)` +* `send_data(src::IO)` +* `send_data(src::AbstractString, dst::AbstractString)` +* `send_data(src::AbstractCmd, dst::AbstractString)` +* `send_data(src::IO, dst::AbstractString)` +* `send_data(src::AbstractString, dst::AbstractCmd)` +* `send_data(src::AbstractCmd, dst::AbstractCmd)` +* `send_data(src::IO, dst::AbstractCmd)` +* `send_data(src::AbstractString, dst::IO)` +* `send_data(src::AbstractCmd, dst::IO)` +* `send_data(src::IO, dst::IO)` + +Each combination guarantees the proper initialization and cleanup of its +arguments whether it is opening a file and closing it upon completion or error, +or creating a temporary output file and returning it upon completion or deleting +it on error. If the arguments are commands or pipelines, those are correctly +opened with the necessary read/write options. + +### Testing Example + +Now that we've defined the `send_data` function, we must test it. But it has so +many different kinds of arguments that it can accept, how do we produce tests +for all of these combinations? `ArgTools` also offers tools to help with testing +APIs that it lets you define. The example tests assume that the above definition +of `send_data` has already been evaluated in the same Julia session. + +```jl +using Test + +# create a source file +src_file = tempname() +data = rand(UInt8, 666) +write(src_file, data) + +print_sig(args...) = + println("send_data(", join(map(typeof, args), ", "), ")") + +arg_readers(src_file) do src + # test 1-arg methods + @arg_test src begin + print_sig(src) + dst_file = send_data(src) + @test data == read(dst_file) + rm(dst_file) + end + + # test 2-arg methods + arg_writers() do dst_file, dst + @arg_test src dst begin + print_sig(src, dst) + @test dst == send_data(src, dst) + end + @test data == read(dst_file) + end +end + +# cleanup +rm(src_file) +``` + +Evaluating this testing code prints the following output: +```jl +send_data(String) +send_data(String, String) +send_data(String, Cmd) +send_data(String, Base.CmdRedirect) +send_data(String, IOStream) +send_data(String, Base.Process) +send_data(Cmd) +send_data(Cmd, String) +send_data(Cmd, Cmd) +send_data(Cmd, Base.CmdRedirect) +send_data(Cmd, IOStream) +send_data(Cmd, Base.Process) +send_data(Base.CmdRedirect) +send_data(Base.CmdRedirect, String) +send_data(Base.CmdRedirect, Cmd) +send_data(Base.CmdRedirect, Base.CmdRedirect) +send_data(Base.CmdRedirect, IOStream) +send_data(Base.CmdRedirect, Base.Process) +send_data(IOStream) +send_data(IOStream, String) +send_data(IOStream, Cmd) +send_data(IOStream, Base.CmdRedirect) +send_data(IOStream, IOStream) +send_data(IOStream, Base.Process) +send_data(Base.Process) +send_data(Base.Process, String) +send_data(Base.Process, Cmd) +send_data(Base.Process, Base.CmdRedirect) +send_data(Base.Process, IOStream) +send_data(Base.Process, Base.Process) +``` + +Test code doesn't isn't normally this verbose, but for this example it may be +helpful to understand what's happening. What this output shows is the various +ways in which this short bit of code tests invoking the `send_data` function. +Here are some details about what's happening: + +* The call to `arg_readers(src_file)` evaluates the attached do block with five + different `arg` values, which can be converted to readable arguments of the + types: `String`, `Cmd`, `CmdRedirect`, `IOStream` and `Process`. + +* The call to `@arg_test src begin ... end` converts `src` into a readable + arguments of those same types and closes or finalizes each at the end. + +* The call to `arg_writers()` evaluates the attached do block with five + different `arg` values, which can be converted to writable arguments of the + types: `String`, `Cmd`, `CmdRedirect`, `IOStream` and `Process`. + +* The call to `@arg_test src dst begin ... end` converts `src` into a readable + arguments and `dst` into writeable arguments of the same set of types, and + closes or otherwise finalizes each one at the end of the block. + +This example test code illustrates some of the reasoning features of the testing +API which might initially seem puzzling. For example, it shows why `arg_readers` +and `arg_writers` don't simply produce argument values that can be passed to the +function being tested, instead requiring conversion by the `@arg_test` macro. +There are two reasons: + +1. The same value returned from `arg_readers` or `arg_writers` may need to be + used in multiple tests and some argument types, such as IO handles, need to + be initialized before each test and finalized after. The `@arg_test` block + delimits where initialization and finalization occur. + +2. Sometimes operations need to be done after the `@arg_test` block but before + the end of the enclosing `arg_readers` or `arg_writers` block. Testing that + `dst_file` has the expected contents, i.e. `@test data == read(dst_file)`, + will not work reliably inside of the `@arg_test` block: data is not guaranteed + to have been fully written to `dst_file` until `dst` is finalized. This is an + issue when `dst` is an already-opened process, for example: `arg_write` leaves + the process open since it recieved it that way (you might want to write more + data to it), and while it does flush the handle, there is no guarantee that + the process will get data to its final destination until the process has + exited. Putting the test after the `@arg_test` block ensures that the process + has terminated, so we can reliably test the contents of `dst_file`. diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Artifacts/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Artifacts/docs/src/index.md new file mode 100644 index 0000000..80f4c62 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Artifacts/docs/src/index.md @@ -0,0 +1,21 @@ +# Artifacts + +```@meta +DocTestSetup = :(using Artifacts) +``` + +Starting with Julia 1.6, the artifacts support has moved from `Pkg.jl` to Julia itself. +Until proper documentation can be added here, you can learn more about artifacts in the +`Pkg.jl` manual at . + +!!! compat "Julia 1.6" + Julia's artifacts API requires at least Julia 1.6. In Julia + versions 1.3 to 1.5, you can use `Pkg.Artifacts` instead. + + +```@docs +Artifacts.artifact_meta +Artifacts.artifact_hash +Artifacts.find_artifacts_toml +Artifacts.@artifact_str +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Base64/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Base64/docs/src/index.md new file mode 100644 index 0000000..6bc647f --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Base64/docs/src/index.md @@ -0,0 +1,10 @@ +# Base64 + +```@docs +Base64.Base64 +Base64.Base64EncodePipe +Base64.base64encode +Base64.Base64DecodePipe +Base64.base64decode +Base64.stringmime +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/CRC32c/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/CRC32c/docs/src/index.md new file mode 100644 index 0000000..1304709 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/CRC32c/docs/src/index.md @@ -0,0 +1,6 @@ +# CRC32c + +```@docs +CRC32c.crc32c +CRC32c.crc32c(::IO, ::Integer, ::UInt32) +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Dates/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Dates/docs/src/index.md new file mode 100644 index 0000000..975c087 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Dates/docs/src/index.md @@ -0,0 +1,826 @@ +# Dates + +```@meta +DocTestSetup = :(using Dates) +``` + +The `Dates` module provides two types for working with dates: [`Date`](@ref) and [`DateTime`](@ref), +representing day and millisecond precision, respectively; both are subtypes of the abstract [`TimeType`](@ref). +The motivation for distinct types is simple: some operations are much simpler, both in terms of +code and mental reasoning, when the complexities of greater precision don't have to be dealt with. +For example, since the [`Date`](@ref) type only resolves to the precision of a single date (i.e. +no hours, minutes, or seconds), normal considerations for time zones, daylight savings/summer +time, and leap seconds are unnecessary and avoided. + +Both [`Date`](@ref) and [`DateTime`](@ref) are basically immutable [`Int64`](@ref) wrappers. +The single `instant` field of either type is actually a `UTInstant{P}` type, which +represents a continuously increasing machine timeline based on the UT second [^1]. The +[`DateTime`](@ref) type is not aware of time zones (*naive*, in Python parlance), +analogous to a *LocalDateTime* in Java 8. Additional time zone functionality +can be added through the [TimeZones.jl package](https://github.com/JuliaTime/TimeZones.jl/), which +compiles the [IANA time zone database](http://www.iana.org/time-zones). Both [`Date`](@ref) and +[`DateTime`](@ref) are based on the [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) standard, which follows the proleptic Gregorian calendar. +One note is that the ISO 8601 standard is particular about BC/BCE dates. In general, the last +day of the BC/BCE era, 1-12-31 BC/BCE, was followed by 1-1-1 AD/CE, thus no year zero exists. +The ISO standard, however, states that 1 BC/BCE is year zero, so `0000-12-31` is the day before +`0001-01-01`, and year `-0001` (yes, negative one for the year) is 2 BC/BCE, year `-0002` is 3 +BC/BCE, etc. + +[^1]: + The notion of the UT second is actually quite fundamental. There are basically two different notions + of time generally accepted, one based on the physical rotation of the earth (one full rotation + = 1 day), the other based on the SI second (a fixed, constant value). These are radically different! + Think about it, a "UT second", as defined relative to the rotation of the earth, may have a different + absolute length depending on the day! Anyway, the fact that [`Date`](@ref) and [`DateTime`](@ref) + are based on UT seconds is a simplifying, yet honest assumption so that things like leap seconds + and all their complexity can be avoided. This basis of time is formally called [UT](https://en.wikipedia.org/wiki/Universal_Time) + or UT1. Basing types on the UT second basically means that every minute has 60 seconds and every + day has 24 hours and leads to more natural calculations when working with calendar dates. + +## Constructors + +[`Date`](@ref) and [`DateTime`](@ref) types can be constructed by integer or [`Period`](@ref) +types, by parsing, or through adjusters (more on those later): + +```jldoctest +julia> DateTime(2013) +2013-01-01T00:00:00 + +julia> DateTime(2013,7) +2013-07-01T00:00:00 + +julia> DateTime(2013,7,1) +2013-07-01T00:00:00 + +julia> DateTime(2013,7,1,12) +2013-07-01T12:00:00 + +julia> DateTime(2013,7,1,12,30) +2013-07-01T12:30:00 + +julia> DateTime(2013,7,1,12,30,59) +2013-07-01T12:30:59 + +julia> DateTime(2013,7,1,12,30,59,1) +2013-07-01T12:30:59.001 + +julia> Date(2013) +2013-01-01 + +julia> Date(2013,7) +2013-07-01 + +julia> Date(2013,7,1) +2013-07-01 + +julia> Date(Dates.Year(2013),Dates.Month(7),Dates.Day(1)) +2013-07-01 + +julia> Date(Dates.Month(7),Dates.Year(2013)) +2013-07-01 +``` + +[`Date`](@ref) or [`DateTime`](@ref) parsing is accomplished by the use of format strings. Format +strings work by the notion of defining *delimited* or *fixed-width* "slots" that contain a period +to parse and passing the text to parse and format string to a [`Date`](@ref) or [`DateTime`](@ref) +constructor, of the form `Date("2015-01-01","y-m-d")` or `DateTime("20150101","yyyymmdd")`. + +Delimited slots are marked by specifying the delimiter the parser should expect between two subsequent +periods; so `"y-m-d"` lets the parser know that between the first and second slots in a date string +like `"2014-07-16"`, it should find the `-` character. The `y`, `m`, and `d` characters let the +parser know which periods to parse in each slot. + +Fixed-width slots are specified by repeating the period character the number of times corresponding +to the width with no delimiter between characters. So `"yyyymmdd"` would correspond to a date +string like `"20140716"`. The parser distinguishes a fixed-width slot by the absence of a delimiter, +noting the transition `"yyyymm"` from one period character to the next. + +Support for text-form month parsing is also supported through the `u` and `U` characters, for +abbreviated and full-length month names, respectively. By default, only English month names are +supported, so `u` corresponds to "Jan", "Feb", "Mar", etc. And `U` corresponds to "January", "February", +"March", etc. Similar to other name=>value mapping functions [`dayname`](@ref) and [`monthname`](@ref), +custom locales can be loaded by passing in the `locale=>Dict{String,Int}` mapping to the `MONTHTOVALUEABBR` +and `MONTHTOVALUE` dicts for abbreviated and full-name month names, respectively. + +One note on parsing performance: using the `Date(date_string,format_string)` function is fine +if only called a few times. If there are many similarly formatted date strings to parse however, +it is much more efficient to first create a [`Dates.DateFormat`](@ref), and pass it instead of +a raw format string. + +```jldoctest +julia> df = DateFormat("y-m-d"); + +julia> dt = Date("2015-01-01",df) +2015-01-01 + +julia> dt2 = Date("2015-01-02",df) +2015-01-02 +``` + +You can also use the `dateformat""` string macro. This macro creates the `DateFormat` object once when the macro is expanded and uses the same `DateFormat` object even if a code snippet is run multiple times. + +```jldoctest +julia> for i = 1:10^5 + Date("2015-01-01", dateformat"y-m-d") + end +``` + +A full suite of parsing and formatting tests and examples is available in [`stdlib/Dates/test/io.jl`](https://github.com/JuliaLang/julia/blob/master/stdlib/Dates/test/io.jl). + +## Durations/Comparisons + +Finding the length of time between two [`Date`](@ref) or [`DateTime`](@ref) is straightforward +given their underlying representation as `UTInstant{Day}` and `UTInstant{Millisecond}`, respectively. +The difference between [`Date`](@ref) is returned in the number of [`Day`](@ref), and [`DateTime`](@ref) +in the number of [`Millisecond`](@ref). Similarly, comparing [`TimeType`](@ref) is a simple matter +of comparing the underlying machine instants (which in turn compares the internal [`Int64`](@ref) values). + +```jldoctest +julia> dt = Date(2012,2,29) +2012-02-29 + +julia> dt2 = Date(2000,2,1) +2000-02-01 + +julia> dump(dt) +Date + instant: Dates.UTInstant{Day} + periods: Day + value: Int64 734562 + +julia> dump(dt2) +Date + instant: Dates.UTInstant{Day} + periods: Day + value: Int64 730151 + +julia> dt > dt2 +true + +julia> dt != dt2 +true + +julia> dt + dt2 +ERROR: MethodError: no method matching +(::Date, ::Date) +[...] + +julia> dt * dt2 +ERROR: MethodError: no method matching *(::Date, ::Date) +[...] + +julia> dt / dt2 +ERROR: MethodError: no method matching /(::Date, ::Date) + +julia> dt - dt2 +4411 days + +julia> dt2 - dt +-4411 days + +julia> dt = DateTime(2012,2,29) +2012-02-29T00:00:00 + +julia> dt2 = DateTime(2000,2,1) +2000-02-01T00:00:00 + +julia> dt - dt2 +381110400000 milliseconds +``` + +## Accessor Functions + +Because the [`Date`](@ref) and [`DateTime`](@ref) types are stored as single [`Int64`](@ref) values, date +parts or fields can be retrieved through accessor functions. The lowercase accessors return the +field as an integer: + +```jldoctest tdate +julia> t = Date(2014, 1, 31) +2014-01-31 + +julia> Dates.year(t) +2014 + +julia> Dates.month(t) +1 + +julia> Dates.week(t) +5 + +julia> Dates.day(t) +31 +``` + +While propercase return the same value in the corresponding [`Period`](@ref) type: + +```jldoctest tdate +julia> Dates.Year(t) +2014 years + +julia> Dates.Day(t) +31 days +``` + +Compound methods are provided because it is more efficient to access multiple fields at the same time than individually: + +```jldoctest tdate +julia> Dates.yearmonth(t) +(2014, 1) + +julia> Dates.monthday(t) +(1, 31) + +julia> Dates.yearmonthday(t) +(2014, 1, 31) +``` + +One may also access the underlying `UTInstant` or integer value: + +```jldoctest tdate +julia> dump(t) +Date + instant: Dates.UTInstant{Day} + periods: Day + value: Int64 735264 + +julia> t.instant +Dates.UTInstant{Day}(Day(735264)) + +julia> Dates.value(t) +735264 +``` + +## Query Functions + +Query functions provide calendrical information about a [`TimeType`](@ref). They include information +about the day of the week: + +```jldoctest tdate2 +julia> t = Date(2014, 1, 31) +2014-01-31 + +julia> Dates.dayofweek(t) +5 + +julia> Dates.dayname(t) +"Friday" + +julia> Dates.dayofweekofmonth(t) # 5th Friday of January +5 +``` + +Month of the year: + +```jldoctest tdate2 +julia> Dates.monthname(t) +"January" + +julia> Dates.daysinmonth(t) +31 +``` + +As well as information about the [`TimeType`](@ref)'s year and quarter: + +```jldoctest tdate2 +julia> Dates.isleapyear(t) +false + +julia> Dates.dayofyear(t) +31 + +julia> Dates.quarterofyear(t) +1 + +julia> Dates.dayofquarter(t) +31 +``` + +The [`dayname`](@ref) and [`monthname`](@ref) methods can also take an optional `locale` keyword +that can be used to return the name of the day or month of the year for other languages/locales. +There are also versions of these functions returning the abbreviated names, namely +[`dayabbr`](@ref) and [`monthabbr`](@ref). +First the mapping is loaded into the `LOCALES` variable: + +```jldoctest tdate2 +julia> french_months = ["janvier", "février", "mars", "avril", "mai", "juin", + "juillet", "août", "septembre", "octobre", "novembre", "décembre"]; + +julia> french_monts_abbrev = ["janv","févr","mars","avril","mai","juin", + "juil","août","sept","oct","nov","déc"]; + +julia> french_days = ["lundi","mardi","mercredi","jeudi","vendredi","samedi","dimanche"]; + +julia> Dates.LOCALES["french"] = Dates.DateLocale(french_months, french_monts_abbrev, french_days, [""]); +``` + + The above mentioned functions can then be used to perform the queries: + +```jldoctest tdate2 +julia> Dates.dayname(t;locale="french") +"vendredi" + +julia> Dates.monthname(t;locale="french") +"janvier" + +julia> Dates.monthabbr(t;locale="french") +"janv" +``` + +Since the abbreviated versions of the days are not loaded, trying to use the +function `dayabbr` will error. + +```jldoctest tdate2 +julia> Dates.dayabbr(t;locale="french") +ERROR: BoundsError: attempt to access 1-element Vector{String} at index [5] +Stacktrace: +[...] +``` + + +## TimeType-Period Arithmetic + +It's good practice when using any language/date framework to be familiar with how date-period +arithmetic is handled as there are some [tricky issues](https://codeblog.jonskeet.uk/2010/12/01/the-joys-of-date-time-arithmetic/) +to deal with (though much less so for day-precision types). + +The `Dates` module approach tries to follow the simple principle of trying to change as +little as possible when doing [`Period`](@ref) arithmetic. This approach is also often known as +*calendrical* arithmetic or what you would probably guess if someone were to ask you the same +calculation in a conversation. Why all the fuss about this? Let's take a classic example: add +1 month to January 31st, 2014. What's the answer? Javascript will say [March 3](https://markhneedham.com/blog/2009/01/07/javascript-add-a-month-to-a-date/) +(assumes 31 days). PHP says [March 2](https://stackoverflow.com/questions/5760262/php-adding-months-to-a-date-while-not-exceeding-the-last-day-of-the-month) +(assumes 30 days). The fact is, there is no right answer. In the `Dates` module, it gives +the result of February 28th. How does it figure that out? Consider the classic 7-7-7 +gambling game in casinos. + +Now just imagine that instead of 7-7-7, the slots are Year-Month-Day, or in our example, 2014-01-31. +When you ask to add 1 month to this date, the month slot is incremented, so now we have 2014-02-31. +Then the day number is checked if it is greater than the last valid day of the new month; if it +is (as in the case above), the day number is adjusted down to the last valid day (28). What are +the ramifications with this approach? Go ahead and add another month to our date, `2014-02-28 + Month(1) == 2014-03-28`. +What? Were you expecting the last day of March? Nope, sorry, remember the 7-7-7 slots. As few +slots as possible are going to change, so we first increment the month slot by 1, 2014-03-28, +and boom, we're done because that's a valid date. On the other hand, if we were to add 2 months +to our original date, 2014-01-31, then we end up with 2014-03-31, as expected. The other ramification +of this approach is a loss in associativity when a specific ordering is forced (i.e. adding things +in different orders results in different outcomes). For example: + +```jldoctest +julia> (Date(2014,1,29)+Dates.Day(1)) + Dates.Month(1) +2014-02-28 + +julia> (Date(2014,1,29)+Dates.Month(1)) + Dates.Day(1) +2014-03-01 +``` + +What's going on there? In the first line, we're adding 1 day to January 29th, which results in +2014-01-30; then we add 1 month, so we get 2014-02-30, which then adjusts down to 2014-02-28. +In the second example, we add 1 month *first*, where we get 2014-02-29, which adjusts down to +2014-02-28, and *then* add 1 day, which results in 2014-03-01. One design principle that helps +in this case is that, in the presence of multiple Periods, the operations will be ordered by the +Periods' *types*, not their value or positional order; this means `Year` will always be added +first, then `Month`, then `Week`, etc. Hence the following *does* result in associativity and +Just Works: + +```jldoctest +julia> Date(2014,1,29) + Dates.Day(1) + Dates.Month(1) +2014-03-01 + +julia> Date(2014,1,29) + Dates.Month(1) + Dates.Day(1) +2014-03-01 +``` + +Tricky? Perhaps. What is an innocent `Dates` user to do? The bottom line is to be aware +that explicitly forcing a certain associativity, when dealing with months, may lead to some unexpected +results, but otherwise, everything should work as expected. Thankfully, that's pretty much the +extent of the odd cases in date-period arithmetic when dealing with time in UT (avoiding the "joys" +of dealing with daylight savings, leap seconds, etc.). + +As a bonus, all period arithmetic objects work directly with ranges: + +```jldoctest +julia> dr = Date(2014,1,29):Day(1):Date(2014,2,3) +Date("2014-01-29"):Day(1):Date("2014-02-03") + +julia> collect(dr) +6-element Vector{Date}: + 2014-01-29 + 2014-01-30 + 2014-01-31 + 2014-02-01 + 2014-02-02 + 2014-02-03 + +julia> dr = Date(2014,1,29):Dates.Month(1):Date(2014,07,29) +Date("2014-01-29"):Month(1):Date("2014-07-29") + +julia> collect(dr) +7-element Vector{Date}: + 2014-01-29 + 2014-02-28 + 2014-03-29 + 2014-04-29 + 2014-05-29 + 2014-06-29 + 2014-07-29 +``` + +## Adjuster Functions + +As convenient as date-period arithmetic is, often the kinds of calculations needed on dates +take on a *calendrical* or *temporal* nature rather than a fixed number of periods. Holidays are +a perfect example; most follow rules such as "Memorial Day = Last Monday of May", or "Thanksgiving += 4th Thursday of November". These kinds of temporal expressions deal with rules relative to the +calendar, like first or last of the month, next Tuesday, or the first and third Wednesdays, etc. + +The `Dates` module provides the *adjuster* API through several convenient methods that +aid in simply and succinctly expressing temporal rules. The first group of adjuster methods deal +with the first and last of weeks, months, quarters, and years. They each take a single [`TimeType`](@ref) +as input and return or *adjust to* the first or last of the desired period relative to the input. + +```jldoctest +julia> Dates.firstdayofweek(Date(2014,7,16)) # Adjusts the input to the Monday of the input's week +2014-07-14 + +julia> Dates.lastdayofmonth(Date(2014,7,16)) # Adjusts to the last day of the input's month +2014-07-31 + +julia> Dates.lastdayofquarter(Date(2014,7,16)) # Adjusts to the last day of the input's quarter +2014-09-30 +``` + +The next two higher-order methods, [`tonext`](@ref), and [`toprev`](@ref), generalize working +with temporal expressions by taking a `DateFunction` as first argument, along with a starting +[`TimeType`](@ref). A `DateFunction` is just a function, usually anonymous, that takes a single +[`TimeType`](@ref) as input and returns a [`Bool`](@ref), `true` indicating a satisfied +adjustment criterion. +For example: + +```jldoctest +julia> istuesday = x->Dates.dayofweek(x) == Dates.Tuesday; # Returns true if the day of the week of x is Tuesday + +julia> Dates.tonext(istuesday, Date(2014,7,13)) # 2014-07-13 is a Sunday +2014-07-15 + +julia> Dates.tonext(Date(2014,7,13), Dates.Tuesday) # Convenience method provided for day of the week adjustments +2014-07-15 +``` + +This is useful with the do-block syntax for more complex temporal expressions: + +```jldoctest +julia> Dates.tonext(Date(2014,7,13)) do x + # Return true on the 4th Thursday of November (Thanksgiving) + Dates.dayofweek(x) == Dates.Thursday && + Dates.dayofweekofmonth(x) == 4 && + Dates.month(x) == Dates.November + end +2014-11-27 +``` + +The [`Base.filter`](@ref) method can be used to obtain all valid dates/moments in a specified +range: + +```jldoctest +# Pittsburgh street cleaning; Every 2nd Tuesday from April to November +# Date range from January 1st, 2014 to January 1st, 2015 +julia> dr = Dates.Date(2014):Day(1):Dates.Date(2015); + +julia> filter(dr) do x + Dates.dayofweek(x) == Dates.Tue && + Dates.April <= Dates.month(x) <= Dates.Nov && + Dates.dayofweekofmonth(x) == 2 + end +8-element Vector{Date}: + 2014-04-08 + 2014-05-13 + 2014-06-10 + 2014-07-08 + 2014-08-12 + 2014-09-09 + 2014-10-14 + 2014-11-11 +``` + +Additional examples and tests are available in [`stdlib/Dates/test/adjusters.jl`](https://github.com/JuliaLang/julia/blob/master/stdlib/Dates/test/adjusters.jl). + +## Period Types + +Periods are a human view of discrete, sometimes irregular durations of time. Consider 1 month; +it could represent, in days, a value of 28, 29, 30, or 31 depending on the year and month context. +Or a year could represent 365 or 366 days in the case of a leap year. [`Period`](@ref) types are +simple [`Int64`](@ref) wrappers and are constructed by wrapping any `Int64` convertible type, i.e. `Year(1)` +or `Month(3.0)`. Arithmetic between [`Period`](@ref) of the same type behave like integers, and +limited `Period-Real` arithmetic is available. You can extract the underlying integer with +[`Dates.value`](@ref). + +```jldoctest +julia> y1 = Dates.Year(1) +1 year + +julia> y2 = Dates.Year(2) +2 years + +julia> y3 = Dates.Year(10) +10 years + +julia> y1 + y2 +3 years + +julia> div(y3,y2) +5 + +julia> y3 - y2 +8 years + +julia> y3 % y2 +0 years + +julia> div(y3,3) # mirrors integer division +3 years + +julia> Dates.value(Dates.Millisecond(10)) +10 +``` + +## Rounding + +[`Date`](@ref) and [`DateTime`](@ref) values can be rounded to a specified resolution (e.g., 1 +month or 15 minutes) with [`floor`](@ref), [`ceil`](@ref), or [`round`](@ref): + +```jldoctest +julia> floor(Date(1985, 8, 16), Dates.Month) +1985-08-01 + +julia> ceil(DateTime(2013, 2, 13, 0, 31, 20), Dates.Minute(15)) +2013-02-13T00:45:00 + +julia> round(DateTime(2016, 8, 6, 20, 15), Dates.Day) +2016-08-07T00:00:00 +``` + +Unlike the numeric [`round`](@ref) method, which breaks ties toward the even number by default, +the [`TimeType`](@ref)[`round`](@ref) method uses the `RoundNearestTiesUp` rounding mode. (It's +difficult to guess what breaking ties to nearest "even" [`TimeType`](@ref) would entail.) Further +details on the available `RoundingMode` s can be found in the [API reference](@ref stdlib-dates-api). + +Rounding should generally behave as expected, but there are a few cases in which the expected +behaviour is not obvious. + +### Rounding Epoch + +In many cases, the resolution specified for rounding (e.g., `Dates.Second(30)`) divides evenly +into the next largest period (in this case, `Dates.Minute(1)`). But rounding behaviour in cases +in which this is not true may lead to confusion. What is the expected result of rounding a [`DateTime`](@ref) +to the nearest 10 hours? + +```jldoctest +julia> round(DateTime(2016, 7, 17, 11, 55), Dates.Hour(10)) +2016-07-17T12:00:00 +``` + +That may seem confusing, given that the hour (12) is not divisible by 10. The reason that `2016-07-17T12:00:00` +was chosen is that it is 17,676,660 hours after `0000-01-01T00:00:00`, and 17,676,660 is divisible +by 10. + +As Julia [`Date`](@ref) and [`DateTime`](@ref) values are represented according to the ISO 8601 +standard, `0000-01-01T00:00:00` was chosen as base (or "rounding epoch") from which to begin the +count of days (and milliseconds) used in rounding calculations. (Note that this differs slightly +from Julia's internal representation of [`Date`](@ref) s using Rata Die notation; but since the +ISO 8601 standard is most visible to the end user, `0000-01-01T00:00:00` was chosen as the rounding +epoch instead of the `0000-12-31T00:00:00` used internally to minimize confusion.) + +The only exception to the use of `0000-01-01T00:00:00` as the rounding epoch is when rounding +to weeks. Rounding to the nearest week will always return a Monday (the first day of the week +as specified by ISO 8601). For this reason, we use `0000-01-03T00:00:00` (the first day of the +first week of year 0000, as defined by ISO 8601) as the base when rounding to a number of weeks. + +Here is a related case in which the expected behaviour is not necessarily obvious: What happens +when we round to the nearest `P(2)`, where `P` is a [`Period`](@ref) type? In some cases (specifically, +when `P <: Dates.TimePeriod`) the answer is clear: + +```jldoctest +julia> round(DateTime(2016, 7, 17, 8, 55, 30), Dates.Hour(2)) +2016-07-17T08:00:00 + +julia> round(DateTime(2016, 7, 17, 8, 55, 30), Dates.Minute(2)) +2016-07-17T08:56:00 +``` + +This seems obvious, because two of each of these periods still divides evenly into the next larger +order period. But in the case of two months (which still divides evenly into one year), the answer +may be surprising: + +```jldoctest +julia> round(DateTime(2016, 7, 17, 8, 55, 30), Dates.Month(2)) +2016-07-01T00:00:00 +``` + +Why round to the first day in July, even though it is month 7 (an odd number)? The key is that +months are 1-indexed (the first month is assigned 1), unlike hours, minutes, seconds, and milliseconds +(the first of which are assigned 0). + +This means that rounding a [`DateTime`](@ref) to an even multiple of seconds, minutes, hours, +or years (because the ISO 8601 specification includes a year zero) will result in a [`DateTime`](@ref) +with an even value in that field, while rounding a [`DateTime`](@ref) to an even multiple of months +will result in the months field having an odd value. Because both months and years may contain +an irregular number of days, whether rounding to an even number of days will result in an even +value in the days field is uncertain. + +See the [API reference](@ref stdlib-dates-api) for additional information +on methods exported from the `Dates` module. + +# [API reference](@id stdlib-dates-api) + +## Dates and Time Types + +```@docs +Dates.Period +Dates.CompoundPeriod +Dates.Instant +Dates.UTInstant +Dates.TimeType +Dates.DateTime +Dates.Date +Dates.Time +Dates.TimeZone +Dates.UTC +``` + +## Dates Functions + +```@docs +Dates.DateTime(::Int64, ::Int64, ::Int64, ::Int64, ::Int64, ::Int64, ::Int64) +Dates.DateTime(::Dates.Period) +Dates.DateTime(::Function, ::Any...) +Dates.DateTime(::Dates.TimeType) +Dates.DateTime(::AbstractString, ::AbstractString) +Dates.format(::Dates.TimeType, ::AbstractString) +Dates.DateFormat +Dates.@dateformat_str +Dates.DateTime(::AbstractString, ::Dates.DateFormat) +Dates.Date(::Int64, ::Int64, ::Int64) +Dates.Date(::Dates.Period) +Dates.Date(::Function, ::Any, ::Any, ::Any) +Dates.Date(::Dates.TimeType) +Dates.Date(::AbstractString, ::AbstractString) +Dates.Date(::AbstractString, ::Dates.DateFormat) +Dates.Time(::Int64::Int64, ::Int64, ::Int64, ::Int64, ::Int64) +Dates.Time(::Dates.TimePeriod) +Dates.Time(::Function, ::Any...) +Dates.Time(::Dates.DateTime) +Dates.now() +Dates.now(::Type{Dates.UTC}) +Base.eps(::Union{Type{DateTime}, Type{Date}, Type{Time}, TimeType}) +``` + +### Accessor Functions + +```@docs +Dates.year +Dates.month +Dates.week +Dates.day +Dates.hour +Dates.minute +Dates.second +Dates.millisecond +Dates.microsecond +Dates.nanosecond +Dates.Year(::Dates.TimeType) +Dates.Month(::Dates.TimeType) +Dates.Week(::Dates.TimeType) +Dates.Day(::Dates.TimeType) +Dates.Hour(::DateTime) +Dates.Minute(::DateTime) +Dates.Second(::DateTime) +Dates.Millisecond(::DateTime) +Dates.Microsecond(::Dates.Time) +Dates.Nanosecond(::Dates.Time) +Dates.yearmonth +Dates.monthday +Dates.yearmonthday +``` + +### Query Functions + +```@docs +Dates.dayname +Dates.dayabbr +Dates.dayofweek +Dates.dayofmonth +Dates.dayofweekofmonth +Dates.daysofweekinmonth +Dates.monthname +Dates.monthabbr +Dates.daysinmonth +Dates.isleapyear +Dates.dayofyear +Dates.daysinyear +Dates.quarterofyear +Dates.dayofquarter +``` + +### Adjuster Functions + +```@docs +Base.trunc(::Dates.TimeType, ::Type{Dates.Period}) +Dates.firstdayofweek +Dates.lastdayofweek +Dates.firstdayofmonth +Dates.lastdayofmonth +Dates.firstdayofyear +Dates.lastdayofyear +Dates.firstdayofquarter +Dates.lastdayofquarter +Dates.tonext(::Dates.TimeType, ::Int) +Dates.toprev(::Dates.TimeType, ::Int) +Dates.tofirst +Dates.tolast +Dates.tonext(::Function, ::Dates.TimeType) +Dates.toprev(::Function, ::Dates.TimeType) +``` + +### Periods + +```@docs +Dates.Period(::Any) +Dates.CompoundPeriod(::Vector{<:Dates.Period}) +Dates.value +Dates.default +``` + +### Rounding Functions + +`Date` and `DateTime` values can be rounded to a specified resolution (e.g., 1 month or 15 minutes) +with `floor`, `ceil`, or `round`. + +```@docs +Base.floor(::Dates.TimeType, ::Dates.Period) +Base.ceil(::Dates.TimeType, ::Dates.Period) +Base.round(::Dates.TimeType, ::Dates.Period, ::RoundingMode{:NearestTiesUp}) +``` + +Most `Period` values can also be rounded to a specified resolution: + +```@docs +Base.floor(::Dates.ConvertiblePeriod, ::T) where T <: Dates.ConvertiblePeriod +Base.ceil(::Dates.ConvertiblePeriod, ::Dates.ConvertiblePeriod) +Base.round(::Dates.ConvertiblePeriod, ::Dates.ConvertiblePeriod, ::RoundingMode{:NearestTiesUp}) +``` + +The following functions are not exported: + +```@docs +Dates.floorceil +Dates.epochdays2date +Dates.epochms2datetime +Dates.date2epochdays +Dates.datetime2epochms +``` + +### Conversion Functions + +```@docs +Dates.today +Dates.unix2datetime +Dates.datetime2unix +Dates.julian2datetime +Dates.datetime2julian +Dates.rata2datetime +Dates.datetime2rata +``` + +### Constants + +Days of the Week: + +| Variable | Abbr. | Value (Int) | +|:----------- |:----- |:----------- | +| `Monday` | `Mon` | 1 | +| `Tuesday` | `Tue` | 2 | +| `Wednesday` | `Wed` | 3 | +| `Thursday` | `Thu` | 4 | +| `Friday` | `Fri` | 5 | +| `Saturday` | `Sat` | 6 | +| `Sunday` | `Sun` | 7 | + +Months of the Year: + +| Variable | Abbr. | Value (Int) | +|:----------- |:----- |:----------- | +| `January` | `Jan` | 1 | +| `February` | `Feb` | 2 | +| `March` | `Mar` | 3 | +| `April` | `Apr` | 4 | +| `May` | `May` | 5 | +| `June` | `Jun` | 6 | +| `July` | `Jul` | 7 | +| `August` | `Aug` | 8 | +| `September` | `Sep` | 9 | +| `October` | `Oct` | 10 | +| `November` | `Nov` | 11 | +| `December` | `Dec` | 12 | + +```@meta +DocTestSetup = nothing +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/DelimitedFiles/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/DelimitedFiles/docs/src/index.md new file mode 100644 index 0000000..11e24f1 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/DelimitedFiles/docs/src/index.md @@ -0,0 +1,11 @@ +# Delimited Files + +```@docs +DelimitedFiles.readdlm(::Any, ::AbstractChar, ::Type, ::AbstractChar) +DelimitedFiles.readdlm(::Any, ::AbstractChar, ::AbstractChar) +DelimitedFiles.readdlm(::Any, ::AbstractChar, ::Type) +DelimitedFiles.readdlm(::Any, ::AbstractChar) +DelimitedFiles.readdlm(::Any, ::Type) +DelimitedFiles.readdlm(::Any) +DelimitedFiles.writedlm +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Distributed/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Distributed/docs/src/index.md new file mode 100644 index 0000000..1b1675e --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Distributed/docs/src/index.md @@ -0,0 +1,69 @@ +# Distributed Computing + +```@docs +Distributed.addprocs +Distributed.nprocs +Distributed.nworkers +Distributed.procs() +Distributed.procs(::Integer) +Distributed.workers +Distributed.rmprocs +Distributed.interrupt +Distributed.myid +Distributed.pmap +Distributed.RemoteException +Distributed.Future +Distributed.RemoteChannel +Distributed.fetch(::Distributed.Future) +Distributed.fetch(::RemoteChannel) +Distributed.remotecall(::Any, ::Integer, ::Any...) +Distributed.remotecall_wait(::Any, ::Integer, ::Any...) +Distributed.remotecall_fetch(::Any, ::Integer, ::Any...) +Distributed.remote_do(::Any, ::Integer, ::Any...) +Distributed.put!(::RemoteChannel, ::Any...) +Distributed.put!(::Distributed.Future, ::Any) +Distributed.take!(::RemoteChannel, ::Any...) +Distributed.isready(::RemoteChannel, ::Any...) +Distributed.isready(::Distributed.Future) +Distributed.AbstractWorkerPool +Distributed.WorkerPool +Distributed.CachingPool +Distributed.default_worker_pool +Distributed.clear!(::CachingPool) +Distributed.remote +Distributed.remotecall(::Any, ::AbstractWorkerPool, ::Any...) +Distributed.remotecall_wait(::Any, ::AbstractWorkerPool, ::Any...) +Distributed.remotecall_fetch(::Any, ::AbstractWorkerPool, ::Any...) +Distributed.remote_do(::Any, ::AbstractWorkerPool, ::Any...) +Distributed.@spawnat +Distributed.@fetch +Distributed.@fetchfrom +Distributed.@distributed +Distributed.@everywhere +Distributed.clear!(::Any, ::Any; ::Any) +Distributed.remoteref_id +Distributed.channel_from_id +Distributed.worker_id_from_socket +Distributed.cluster_cookie() +Distributed.cluster_cookie(::Any) +``` + +## Cluster Manager Interface + +This interface provides a mechanism to launch and manage Julia workers on different cluster environments. +There are two types of managers present in Base: `LocalManager`, for launching additional workers on the +same host, and `SSHManager`, for launching on remote hosts via `ssh`. TCP/IP sockets are used to connect +and transport messages between processes. It is possible for Cluster Managers to provide a different transport. + +```@docs +Distributed.ClusterManager +Distributed.WorkerConfig +Distributed.launch +Distributed.manage +Distributed.kill(::ClusterManager, ::Int, ::WorkerConfig) +Distributed.connect(::ClusterManager, ::Int, ::WorkerConfig) +Distributed.init_worker +Distributed.start_worker +Distributed.process_messages +Distributed.default_addprocs_params +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Downloads/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Downloads/README.md new file mode 100644 index 0000000..132a564 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Downloads/README.md @@ -0,0 +1,186 @@ +# Downloads + +[![Build Status](https://travis-ci.com/JuliaLang/Downloads.jl.svg?branch=master)](https://travis-ci.com/JuliaLang/Downloads.jl) +[![Codecov](https://codecov.io/gh/JuliaLang/Downloads.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaLang/Downloads.jl) + +The `Downloads` package provides a single function, `download`, which provides +cross-platform, multi-protocol, in-process download functionality implemented +with [libcurl](https://curl.haxx.se/libcurl/). It uses libcurl's multi-handle +callback API to present a Julian API: `download(url)` blocks the task in which +it occurs but yields to Julia's scheduler, allowing arbitrarily many tasks to +download URLs concurrently and efficiently. As of Julia 1.6, this package is a +standard library that is included with Julia, but this package can be used with +Julia 1.3 through 1.5 as well. + +## API + +The public API of `Downloads` consists of two functions and three types: + +- `download` — download a file from a URL, erroring if it can't be downloaded +- `request` — request a URL, returning a `Response` object indicating success +- `Response` — a type capturing the status and other metadata about a request +- `RequestError` — an error type thrown by `download` and `request` on error +- `Downloader` — an object encapsulating shared resources for downloading + +### download + +```jl +download(url, [ output = tempfile() ]; + [ method = "GET", ] + [ headers = , ] + [ timeout = , ] + [ progress = , ] + [ verbose = false, ] + [ downloader = , ] +) -> output +``` +- `url :: AbstractString` +- `output :: Union{AbstractString, AbstractCmd, IO}` +- `method :: AbstractString` +- `headers :: Union{AbstractVector, AbstractDict}` +- `timeout :: Real` +- `progress :: (total::Integer, now::Integer) --> Any` +- `verbose :: Bool` +- `downloader :: Downloader` + +Download a file from the given url, saving it to `output` or if not specified, a +temporary path. The `output` can also be an `IO` handle, in which case the body +of the response is streamed to that handle and the handle is returned. If +`output` is a command, the command is run and output is sent to it on stdin. + +If the `downloader` keyword argument is provided, it must be a `Downloader` +object. Resources and connections will be shared between downloads performed by +the same `Downloader` and cleaned up automatically when the object is garbage +collected or there have been no downloads performed with it for a grace period. +See `Downloader` for more info about configuration and usage. + +If the `headers` keyword argument is provided, it must be a vector or dictionary +whose elements are all pairs of strings. These pairs are passed as headers when +downloading URLs with protocols that supports them, such as HTTP/S. + +The `timeout` keyword argument specifies a timeout for the download in seconds, +with a resolution of milliseconds. By default no timeout is set, but this can +also be explicitly requested by passing a timeout value of `Inf`. + +If the `progress` keyword argument is provided, it must be a callback funtion +which will be called whenever there are updates about the size and status of the +ongoing download. The callback must take two integer arguments: `total` and +`now` which are the total size of the download in bytes, and the number of bytes +which have been downloaded so far. Note that `total` starts out as zero and +remains zero until the server gives an indiation of the total size of the +download (e.g. with a `Content-Length` header), which may never happen. So a +well-behaved progress callback should handle a total size of zero gracefully. + +If the `verbose` optoin is set to true, `libcurl`, which is used to implement +the download functionality will print debugging information to `stderr`. + +### request + +```jl +request(url; + [ input = , ] + [ output = , ] + [ method = input ? "PUT" : output ? "GET" : "HEAD", ] + [ headers = , ] + [ timeout = , ] + [ progress = , ] + [ verbose = false, ] + [ throw = true, ] + [ downloader = , ] +) -> Union{Response, RequestError} +``` +- `url :: AbstractString` +- `input :: Union{AbstractString, AbstractCmd, IO}` +- `output :: Union{AbstractString, AbstractCmd, IO}` +- `method :: AbstractString` +- `headers :: Union{AbstractVector, AbstractDict}` +- `timeout :: Real` +- `progress :: (dl_total, dl_now, ul_total, ul_now) --> Any` +- `verbose :: Bool` +- `throw :: Bool` +- `downloader :: Downloader` + +Make a request to the given url, returning a `Response` object capturing the +status, headers and other information about the response. The body of the +reponse is written to `output` if specified and discarded otherwise. For HTTP/S +requests, if an `input` stream is given, a `PUT` request is made; otherwise if +an `output` stream is givven, a `GET` request is made; if neither is given a +`HEAD` request is made. For other protocols, appropriate default methods are +used based on what combination of input and output are requested. The following +options differ from the `download` function: + +- `input` allows providing a request body; if provided default to `PUT` request +- `progress` is a callback taking four integers for upload and download progress +- `throw` controls whether to throw or return a `RequestError` on request error + +Note that unlike `download` which throws an error if the requested URL could not +be downloaded (indicated by non-2xx status code), `request` returns a `Response` +object no matter what the status code of the response is. If there is an error +with getting a response at all, then a `RequestError` is thrown or returned. + +### Response + +```jl +struct Response + proto :: String + url :: String + status :: Int + message :: String + headers :: Vector{Pair{String,String}} +end +``` +`Response` is a type capturing the properties of a successful response to a +request as an object. It has the following fields: + +- `proto`: the protocol that was used to get the response +- `url`: the URL that was ultimately requested after following redirects +- `status`: the status code of the response, indicating success, failure, etc. +- `message`: a textual message describing the nature of the response +- `headers`: any headers that were returned with the response + +The meaning and availability of some of these responses depends on the protocol +used for the request. For many protocols, including HTTP/S and S/FTP, a 2xx +status code indicates a successful response. For responses in protocols that do +not support headers, the headers vector will be empty. HTTP/2 does not include a +status message, only a status code, so the message will be empty. + +### RequestError + +```jl +struct RequestError <: ErrorException + url :: String + code :: Int + message :: String + response :: Response +end +``` +`RequestError` is a type capturing the properties of a failed response to a +request as an exception object: + +- `url`: the original URL that was requested without any redirects +- `code`: the libcurl error code; `0` if a protocol-only error occurred +- `message`: the libcurl error message indicating what went wrong +- `response`: response object capturing what response info is available + +The same `RequestError` type is thrown by `download` if the request was +successful but there was a protocol-level error indicated by a status code that +is not in the 2xx range, in which case `code` will be zero and the `message` +field will be the empty string. The `request` API only throws a `RequestError` +if the libcurl error `code` is non-zero, in which case the included `response` +object is likely to have a `status` of zero and an empty message. There are, +however, situations where a curl-level error is thrown due to a protocol error, +in which case both the inner and outer code and message may be of interest. + +### Downloader + +```jl +Downloader(; [ grace::Real = 30 ]) +``` +`Downloader` objects are used to perform individual `download` operations. +Connections, name lookups and other resources are shared within a `Downloader`. +These connections and resources are cleaned up after a configurable grace period +(default: 30 seconds) since anything was downloaded with it, or when it is +garbage collected, whichever comes first. If the grace period is set to zero, +all resources will be cleaned up immediately as soon as there are no more +ongoing downloads in progress. If the grace period is set to `Inf` then +resources are not cleaned up until `Downloader` is garbage collected. diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Downloads/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Downloads/docs/src/index.md new file mode 100644 index 0000000..23628fd --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Downloads/docs/src/index.md @@ -0,0 +1,9 @@ +# Downloads + +```@docs +Downloads.download +Downloads.request +Downloads.Response +Downloads.RequestError +Downloads.Downloader +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/FileWatching/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/FileWatching/docs/src/index.md new file mode 100644 index 0000000..3944f5d --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/FileWatching/docs/src/index.md @@ -0,0 +1,9 @@ +# [File Events](@id lib-filewatching) + +```@docs +FileWatching.poll_fd +FileWatching.poll_file +FileWatching.watch_file +FileWatching.watch_folder +FileWatching.unwatch_folder +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Future/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Future/docs/src/index.md new file mode 100644 index 0000000..dcb1a36 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Future/docs/src/index.md @@ -0,0 +1,9 @@ +# Future + +The `Future` module implements future behavior of already existing functions, +which will replace the current version in a future release of Julia. + +```@docs +Future.copy! +Future.randjump +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/InteractiveUtils/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/InteractiveUtils/docs/src/index.md new file mode 100644 index 0000000..a8a675e --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/InteractiveUtils/docs/src/index.md @@ -0,0 +1,28 @@ +# Interactive Utilities + +```@docs +InteractiveUtils.apropos +InteractiveUtils.varinfo +InteractiveUtils.versioninfo +InteractiveUtils.methodswith +InteractiveUtils.subtypes +InteractiveUtils.supertypes +InteractiveUtils.edit(::AbstractString, ::Integer) +InteractiveUtils.edit(::Any) +InteractiveUtils.@edit +InteractiveUtils.define_editor +InteractiveUtils.less(::AbstractString) +InteractiveUtils.less(::Any) +InteractiveUtils.@less +InteractiveUtils.@which +InteractiveUtils.@functionloc +InteractiveUtils.@code_lowered +InteractiveUtils.@code_typed +InteractiveUtils.code_warntype +InteractiveUtils.@code_warntype +InteractiveUtils.code_llvm +InteractiveUtils.@code_llvm +InteractiveUtils.code_native +InteractiveUtils.@code_native +InteractiveUtils.clipboard +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/LazyArtifacts/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/LazyArtifacts/docs/src/index.md new file mode 100644 index 0000000..9de6b21 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/LazyArtifacts/docs/src/index.md @@ -0,0 +1,10 @@ +# Lazy Artifacts + +```@meta +DocTestSetup = :(using LazyArtifacts) +``` + +In order for a package to download artifacts lazily, `LazyArtifacts` must be +explicitly listed as a dependency of that package. + +For further information on artifacts, see [Artifacts](@ref). diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/LibCURL/LICENSE.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/LibCURL/LICENSE.md new file mode 100644 index 0000000..b6f1d7e --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/LibCURL/LICENSE.md @@ -0,0 +1,23 @@ +LibCURL.jl is licensed under the MIT "Expat" License: + +> Copyright (c) 2013: JuliaWeb contributors +> +> Permission is hereby granted, free of charge, to any person obtaining +> a copy of this software and associated documentation files (the +> "Software"), to deal in the Software without restriction, including +> without limitation the rights to use, copy, modify, merge, publish, +> distribute, sublicense, and/or sell copies of the Software, and to +> permit persons to whom the Software is furnished to do so, subject to +> the following conditions: +> +> The above copyright notice and this permission notice shall be +> included in all copies or substantial portions of the Software. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +> NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +> LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +> OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +> WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/LibCURL/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/LibCURL/README.md new file mode 100644 index 0000000..14b256b --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/LibCURL/README.md @@ -0,0 +1,72 @@ +LibCURL.jl +========== + +*Julia wrapper for libCURL* + +[![Build Status](https://travis-ci.com/JuliaWeb/LibCURL.jl.svg?branch=master)](https://travis-ci.com/JuliaWeb/LibCURL.jl) +[![Appveyor](https://ci.appveyor.com/api/projects/status/github/JuliaWeb/LibCurl.jl?svg=true)](https://ci.appveyor.com/project/shashi/libcurl-jl) +[![codecov.io](http://codecov.io/github/JuliaWeb/LibCURL.jl/coverage.svg?branch=master)](http://codecov.io/github/JuliaWeb/LibCURL.jl?branch=master) + +--- +This is a simple Julia wrapper around http://curl.haxx.se/libcurl/ generated using [Clang.jl](https://github.com/ihnorton/Clang.jl). Please see the [libcurl API documentation](https://curl.haxx.se/libcurl/c/) for help on how to use this package. + +### Example (fetch a URL) + +```julia +using LibCURL + +# init a curl handle +curl = curl_easy_init() + +# set the URL and request to follow redirects +curl_easy_setopt(curl, CURLOPT_URL, "http://example.com") +curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1) + + +# setup the callback function to recv data +function curl_write_cb(curlbuf::Ptr{Cvoid}, s::Csize_t, n::Csize_t, p_ctxt::Ptr{Cvoid}) + sz = s * n + data = Array{UInt8}(undef, sz) + + ccall(:memcpy, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, UInt64), data, curlbuf, sz) + println("recd: ", String(data)) + + sz::Csize_t +end + +c_curl_write_cb = @cfunction(curl_write_cb, Csize_t, (Ptr{Cvoid}, Csize_t, Csize_t, Ptr{Cvoid})) +curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, c_curl_write_cb) + + +# execute the query +res = curl_easy_perform(curl) +println("curl url exec response : ", res) + +# retrieve HTTP code +http_code = Array{Clong}(undef, 1) +curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, http_code) +println("httpcode : ", http_code) + +# release handle +curl_easy_cleanup(curl) + +``` + +### Binaries + +This package uses the [LibCURL_jll](https://github.com/JuliaBinaryWrappers/libCURL_jll.jl) binary package to install compiled libCURL binaries on all supported platforms. The following products are defined in the jll + +* `libcurl`: A `LibraryProduct` referencing the shared library +* `curl`: An `ExecutableProduct` referencing the binary + +This package also uses the [MozillaCACerts_jll](https://github.com/JuliaBinaryWrappers/MozillaCACerts_jll.jl) package to supply the Mozilla CA root certificate bundle. Note that the `cacert` symbol is re-exported from this package for ease of use. + +* `cacert`: A `FileProduct` referencing the Mozilla CA certificate bundle + +### SSL certificates + +Making SSL/TLS connections usually needs access to a CA certificate to validate peers. The Mozilla CA bundle can be used via this package. To use this certificate bundle, set the following option: + +```julia +curl_easy_setopt(curl, CURLOPT_CAINFO, LibCURL.cacert) +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/LibGit2/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/LibGit2/docs/src/index.md new file mode 100644 index 0000000..e53a933 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/LibGit2/docs/src/index.md @@ -0,0 +1,161 @@ +# LibGit2 + +The LibGit2 module provides bindings to [libgit2](https://libgit2.org/), a portable C library that +implements core functionality for the [Git](https://git-scm.com/) version control system. +These bindings are currently used to power Julia's package manager. +It is expected that this module will eventually be moved into a separate package. + +### Functionality + +Some of this documentation assumes some prior knowledge of the libgit2 API. +For more information on some of the objects and methods referenced here, consult the upstream +[libgit2 API reference](https://libgit2.org/libgit2/#v0.25.1). + +```@docs +LibGit2.Buffer +LibGit2.CheckoutOptions +LibGit2.CloneOptions +LibGit2.DescribeOptions +LibGit2.DescribeFormatOptions +LibGit2.DiffDelta +LibGit2.DiffFile +LibGit2.DiffOptionsStruct +LibGit2.FetchHead +LibGit2.FetchOptions +LibGit2.GitAnnotated +LibGit2.GitBlame +LibGit2.GitBlob +LibGit2.GitCommit +LibGit2.GitHash +LibGit2.GitObject +LibGit2.GitRemote +LibGit2.GitRemoteAnon +LibGit2.GitRepo +LibGit2.GitRepoExt +LibGit2.GitRevWalker +LibGit2.GitShortHash +LibGit2.GitSignature +LibGit2.GitStatus +LibGit2.GitTag +LibGit2.GitTree +LibGit2.IndexEntry +LibGit2.IndexTime +LibGit2.BlameOptions +LibGit2.MergeOptions +LibGit2.ProxyOptions +LibGit2.PushOptions +LibGit2.RebaseOperation +LibGit2.RebaseOptions +LibGit2.RemoteCallbacks +LibGit2.SignatureStruct +LibGit2.StatusEntry +LibGit2.StatusOptions +LibGit2.StrArrayStruct +LibGit2.TimeStruct +LibGit2.add! +LibGit2.add_fetch! +LibGit2.add_push! +LibGit2.addblob! +LibGit2.author +LibGit2.authors +LibGit2.branch +LibGit2.branch! +LibGit2.checkout! +LibGit2.clone +LibGit2.commit +LibGit2.committer +LibGit2.count +LibGit2.counthunks +LibGit2.create_branch +LibGit2.credentials_callback +LibGit2.credentials_cb +LibGit2.default_signature +LibGit2.delete_branch +LibGit2.diff_files +LibGit2.entryid +LibGit2.entrytype +LibGit2.fetch +LibGit2.fetchheads +LibGit2.fetch_refspecs +LibGit2.fetchhead_foreach_cb +LibGit2.merge_base +LibGit2.merge!(::LibGit2.GitRepo; ::Any...) +LibGit2.merge!(::LibGit2.GitRepo, ::Vector{LibGit2.GitAnnotated}; ::LibGit2.MergeOptions, ::LibGit2.CheckoutOptions) +LibGit2.merge!(::LibGit2.GitRepo, ::Vector{LibGit2.GitAnnotated}, ::Bool; ::LibGit2.MergeOptions, ::LibGit2.CheckoutOptions) +LibGit2.ffmerge! +LibGit2.fullname +LibGit2.features +LibGit2.filename +LibGit2.filemode +LibGit2.gitdir +LibGit2.git_url +LibGit2.@githash_str +LibGit2.head +LibGit2.head! +LibGit2.head_oid +LibGit2.headname +LibGit2.init +LibGit2.is_ancestor_of +LibGit2.isbinary +LibGit2.iscommit +LibGit2.isdiff +LibGit2.isdirty +LibGit2.isorphan +LibGit2.isset +LibGit2.iszero +LibGit2.lookup_branch +LibGit2.map +LibGit2.mirror_callback +LibGit2.mirror_cb +LibGit2.message +LibGit2.merge_analysis +LibGit2.name +LibGit2.need_update +LibGit2.objtype +LibGit2.path +LibGit2.peel +LibGit2.posixpath +LibGit2.push +LibGit2.push!(::LibGit2.GitRevWalker, ::LibGit2.GitHash) +LibGit2.push_head! +LibGit2.push_refspecs +LibGit2.raw +LibGit2.read_tree! +LibGit2.rebase! +LibGit2.ref_list +LibGit2.reftype +LibGit2.remotes +LibGit2.remove! +LibGit2.reset +LibGit2.reset! +LibGit2.restore +LibGit2.revcount +LibGit2.set_remote_url +LibGit2.shortname +LibGit2.snapshot +LibGit2.split_cfg_entry +LibGit2.status +LibGit2.stage +LibGit2.tag_create +LibGit2.tag_delete +LibGit2.tag_list +LibGit2.target +LibGit2.toggle +LibGit2.transact +LibGit2.treewalk +LibGit2.upstream +LibGit2.update! +LibGit2.url +LibGit2.version +LibGit2.with +LibGit2.with_warn +LibGit2.workdir +LibGit2.GitObject(::LibGit2.GitTreeEntry) +LibGit2.UserPasswordCredential +LibGit2.SSHCredential +LibGit2.isfilled +LibGit2.CachedCredentials +LibGit2.CredentialPayload +LibGit2.approve +LibGit2.reject +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Libdl/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Libdl/docs/src/index.md new file mode 100644 index 0000000..62f9837 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Libdl/docs/src/index.md @@ -0,0 +1,15 @@ +# Dynamic Linker + +```@docs +Libdl.dlopen +Libdl.dlopen_e +Libdl.RTLD_NOW +Libdl.dlsym +Libdl.dlsym_e +Libdl.dlclose +Libdl.dlext +Libdl.dllist +Libdl.dlpath +Libdl.find_library +Libdl.DL_LOAD_PATH +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/LinearAlgebra/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/LinearAlgebra/docs/src/index.md new file mode 100644 index 0000000..52e7860 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/LinearAlgebra/docs/src/index.md @@ -0,0 +1,681 @@ +# [Linear Algebra](@id man-linalg) + +```@meta +DocTestSetup = :(using LinearAlgebra, SparseArrays, SuiteSparse) +``` + +In addition to (and as part of) its support for multi-dimensional arrays, Julia provides native implementations +of many common and useful linear algebra operations which can be loaded with `using LinearAlgebra`. Basic operations, such as [`tr`](@ref), [`det`](@ref), +and [`inv`](@ref) are all supported: + +```jldoctest +julia> A = [1 2 3; 4 1 6; 7 8 1] +3×3 Matrix{Int64}: + 1 2 3 + 4 1 6 + 7 8 1 + +julia> tr(A) +3 + +julia> det(A) +104.0 + +julia> inv(A) +3×3 Matrix{Float64}: + -0.451923 0.211538 0.0865385 + 0.365385 -0.192308 0.0576923 + 0.240385 0.0576923 -0.0673077 +``` + +As well as other useful operations, such as finding eigenvalues or eigenvectors: + +```jldoctest +julia> A = [-4. -17.; 2. 2.] +2×2 Matrix{Float64}: + -4.0 -17.0 + 2.0 2.0 + +julia> eigvals(A) +2-element Vector{ComplexF64}: + -1.0 - 5.0im + -1.0 + 5.0im + +julia> eigvecs(A) +2×2 Matrix{ComplexF64}: + 0.945905-0.0im 0.945905+0.0im + -0.166924+0.278207im -0.166924-0.278207im +``` + +In addition, Julia provides many [factorizations](@ref man-linalg-factorizations) which can be used to +speed up problems such as linear solve or matrix exponentiation by pre-factorizing a matrix into a form +more amenable (for performance or memory reasons) to the problem. See the documentation on [`factorize`](@ref) +for more information. As an example: + +```jldoctest +julia> A = [1.5 2 -4; 3 -1 -6; -10 2.3 4] +3×3 Matrix{Float64}: + 1.5 2.0 -4.0 + 3.0 -1.0 -6.0 + -10.0 2.3 4.0 + +julia> factorize(A) +LU{Float64, Matrix{Float64}} +L factor: +3×3 Matrix{Float64}: + 1.0 0.0 0.0 + -0.15 1.0 0.0 + -0.3 -0.132196 1.0 +U factor: +3×3 Matrix{Float64}: + -10.0 2.3 4.0 + 0.0 2.345 -3.4 + 0.0 0.0 -5.24947 +``` + +Since `A` is not Hermitian, symmetric, triangular, tridiagonal, or bidiagonal, an LU factorization may be the +best we can do. Compare with: + +```jldoctest +julia> B = [1.5 2 -4; 2 -1 -3; -4 -3 5] +3×3 Matrix{Float64}: + 1.5 2.0 -4.0 + 2.0 -1.0 -3.0 + -4.0 -3.0 5.0 + +julia> factorize(B) +BunchKaufman{Float64, Matrix{Float64}} +D factor: +3×3 Tridiagonal{Float64, Vector{Float64}}: + -1.64286 0.0 ⋅ + 0.0 -2.8 0.0 + ⋅ 0.0 5.0 +U factor: +3×3 UnitUpperTriangular{Float64, Matrix{Float64}}: + 1.0 0.142857 -0.8 + ⋅ 1.0 -0.6 + ⋅ ⋅ 1.0 +permutation: +3-element Vector{Int64}: + 1 + 2 + 3 +``` + +Here, Julia was able to detect that `B` is in fact symmetric, and used a more appropriate factorization. +Often it's possible to write more efficient code for a matrix that is known to have certain properties e.g. +it is symmetric, or tridiagonal. Julia provides some special types so that you can "tag" matrices as having +these properties. For instance: + +```jldoctest +julia> B = [1.5 2 -4; 2 -1 -3; -4 -3 5] +3×3 Matrix{Float64}: + 1.5 2.0 -4.0 + 2.0 -1.0 -3.0 + -4.0 -3.0 5.0 + +julia> sB = Symmetric(B) +3×3 Symmetric{Float64, Matrix{Float64}}: + 1.5 2.0 -4.0 + 2.0 -1.0 -3.0 + -4.0 -3.0 5.0 +``` + +`sB` has been tagged as a matrix that's (real) symmetric, so for later operations we might perform on it, +such as eigenfactorization or computing matrix-vector products, efficiencies can be found by only referencing +half of it. For example: + +```jldoctest +julia> B = [1.5 2 -4; 2 -1 -3; -4 -3 5] +3×3 Matrix{Float64}: + 1.5 2.0 -4.0 + 2.0 -1.0 -3.0 + -4.0 -3.0 5.0 + +julia> sB = Symmetric(B) +3×3 Symmetric{Float64, Matrix{Float64}}: + 1.5 2.0 -4.0 + 2.0 -1.0 -3.0 + -4.0 -3.0 5.0 + +julia> x = [1; 2; 3] +3-element Vector{Int64}: + 1 + 2 + 3 + +julia> sB\x +3-element Vector{Float64}: + -1.7391304347826084 + -1.1086956521739126 + -1.4565217391304346 +``` +The `\` operation here performs the linear solution. The left-division operator is pretty powerful and it's easy to write compact, readable code that is flexible enough to solve all sorts of systems of linear equations. + +## Special matrices + +[Matrices with special symmetries and structures](http://www2.imm.dtu.dk/pubdb/views/publication_details.php?id=3274) +arise often in linear algebra and are frequently associated with various matrix factorizations. +Julia features a rich collection of special matrix types, which allow for fast computation with +specialized routines that are specially developed for particular matrix types. + +The following tables summarize the types of special matrices that have been implemented in Julia, +as well as whether hooks to various optimized methods for them in LAPACK are available. + +| Type | Description | +|:----------------------------- |:--------------------------------------------------------------------------------------------- | +| [`Symmetric`](@ref) | [Symmetric matrix](https://en.wikipedia.org/wiki/Symmetric_matrix) | +| [`Hermitian`](@ref) | [Hermitian matrix](https://en.wikipedia.org/wiki/Hermitian_matrix) | +| [`UpperTriangular`](@ref) | Upper [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix) | +| [`UnitUpperTriangular`](@ref) | Upper [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix) with unit diagonal | +| [`LowerTriangular`](@ref) | Lower [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix) | | +| [`UnitLowerTriangular`](@ref) | Lower [triangular matrix](https://en.wikipedia.org/wiki/Triangular_matrix) with unit diagonal | +| [`UpperHessenberg`](@ref) | Upper [Hessenberg matrix](https://en.wikipedia.org/wiki/Hessenberg_matrix) +| [`Tridiagonal`](@ref) | [Tridiagonal matrix](https://en.wikipedia.org/wiki/Tridiagonal_matrix) | +| [`SymTridiagonal`](@ref) | Symmetric tridiagonal matrix | +| [`Bidiagonal`](@ref) | Upper/lower [bidiagonal matrix](https://en.wikipedia.org/wiki/Bidiagonal_matrix) | +| [`Diagonal`](@ref) | [Diagonal matrix](https://en.wikipedia.org/wiki/Diagonal_matrix) | +| [`UniformScaling`](@ref) | [Uniform scaling operator](https://en.wikipedia.org/wiki/Uniform_scaling) | + +### Elementary operations + +| Matrix type | `+` | `-` | `*` | `\` | Other functions with optimized methods | +|:----------------------------- |:--- |:--- |:--- |:--- |:----------------------------------------------------------- | +| [`Symmetric`](@ref) | | | | MV | [`inv`](@ref), [`sqrt`](@ref), [`exp`](@ref) | +| [`Hermitian`](@ref) | | | | MV | [`inv`](@ref), [`sqrt`](@ref), [`exp`](@ref) | +| [`UpperTriangular`](@ref) | | | MV | MV | [`inv`](@ref), [`det`](@ref) | +| [`UnitUpperTriangular`](@ref) | | | MV | MV | [`inv`](@ref), [`det`](@ref) | +| [`LowerTriangular`](@ref) | | | MV | MV | [`inv`](@ref), [`det`](@ref) | +| [`UnitLowerTriangular`](@ref) | | | MV | MV | [`inv`](@ref), [`det`](@ref) | +| [`UpperHessenberg`](@ref) | | | | MM | [`inv`](@ref), [`det`](@ref) | +| [`SymTridiagonal`](@ref) | M | M | MS | MV | [`eigmax`](@ref), [`eigmin`](@ref) | +| [`Tridiagonal`](@ref) | M | M | MS | MV | | +| [`Bidiagonal`](@ref) | M | M | MS | MV | | +| [`Diagonal`](@ref) | M | M | MV | MV | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref), [`/`](@ref) | +| [`UniformScaling`](@ref) | M | M | MVS | MVS | [`/`](@ref) | + +Legend: + +| Key | Description | +|:---------- |:------------------------------------------------------------- | +| M (matrix) | An optimized method for matrix-matrix operations is available | +| V (vector) | An optimized method for matrix-vector operations is available | +| S (scalar) | An optimized method for matrix-scalar operations is available | + +### Matrix factorizations + +| Matrix type | LAPACK | [`eigen`](@ref) | [`eigvals`](@ref) | [`eigvecs`](@ref) | [`svd`](@ref) | [`svdvals`](@ref) | +|:----------------------------- |:------ |:------------- |:----------------- |:----------------- |:------------- |:----------------- | +| [`Symmetric`](@ref) | SY | | ARI | | | | +| [`Hermitian`](@ref) | HE | | ARI | | | | +| [`UpperTriangular`](@ref) | TR | A | A | A | | | +| [`UnitUpperTriangular`](@ref) | TR | A | A | A | | | +| [`LowerTriangular`](@ref) | TR | A | A | A | | | +| [`UnitLowerTriangular`](@ref) | TR | A | A | A | | | +| [`SymTridiagonal`](@ref) | ST | A | ARI | AV | | | +| [`Tridiagonal`](@ref) | GT | | | | | | +| [`Bidiagonal`](@ref) | BD | | | | A | A | +| [`Diagonal`](@ref) | DI | | A | | | | + +Legend: + +| Key | Description | Example | +|:------------ |:------------------------------------------------------------------------------------------------------------------------------- |:-------------------- | +| A (all) | An optimized method to find all the characteristic values and/or vectors is available | e.g. `eigvals(M)` | +| R (range) | An optimized method to find the `il`th through the `ih`th characteristic values are available | `eigvals(M, il, ih)` | +| I (interval) | An optimized method to find the characteristic values in the interval [`vl`, `vh`] is available | `eigvals(M, vl, vh)` | +| V (vectors) | An optimized method to find the characteristic vectors corresponding to the characteristic values `x=[x1, x2,...]` is available | `eigvecs(M, x)` | + +### The uniform scaling operator + +A [`UniformScaling`](@ref) operator represents a scalar times the identity operator, `λ*I`. The identity +operator `I` is defined as a constant and is an instance of `UniformScaling`. The size of these +operators are generic and match the other matrix in the binary operations [`+`](@ref), [`-`](@ref), +[`*`](@ref) and [`\`](@ref). For `A+I` and `A-I` this means that `A` must be square. Multiplication +with the identity operator `I` is a noop (except for checking that the scaling factor is one) +and therefore almost without overhead. + +To see the `UniformScaling` operator in action: + +```jldoctest +julia> U = UniformScaling(2); + +julia> a = [1 2; 3 4] +2×2 Matrix{Int64}: + 1 2 + 3 4 + +julia> a + U +2×2 Matrix{Int64}: + 3 2 + 3 6 + +julia> a * U +2×2 Matrix{Int64}: + 2 4 + 6 8 + +julia> [a U] +2×4 Matrix{Int64}: + 1 2 2 0 + 3 4 0 2 + +julia> b = [1 2 3; 4 5 6] +2×3 Matrix{Int64}: + 1 2 3 + 4 5 6 + +julia> b - U +ERROR: DimensionMismatch("matrix is not square: dimensions are (2, 3)") +Stacktrace: +[...] +``` + +If you need to solve many systems of the form `(A+μI)x = b` for the same `A` and different `μ`, it might be beneficial +to first compute the Hessenberg factorization `F` of `A` via the [`hessenberg`](@ref) function. +Given `F`, Julia employs an efficient algorithm for `(F+μ*I) \ b` (equivalent to `(A+μ*I)x \ b`) and related +operations like determinants. + + +## [Matrix factorizations](@id man-linalg-factorizations) + +[Matrix factorizations (a.k.a. matrix decompositions)](https://en.wikipedia.org/wiki/Matrix_decomposition) +compute the factorization of a matrix into a product of matrices, and are one of the central concepts +in linear algebra. + +The following table summarizes the types of matrix factorizations that have been implemented in +Julia. Details of their associated methods can be found in the [Standard functions](@ref) section +of the Linear Algebra documentation. + +| Type | Description | +|:------------------ |:-------------------------------------------------------------------------------------------------------------- | +| `BunchKaufman` | Bunch-Kaufman factorization | +| `Cholesky` | [Cholesky factorization](https://en.wikipedia.org/wiki/Cholesky_decomposition) | +| `CholeskyPivoted` | [Pivoted](https://en.wikipedia.org/wiki/Pivot_element) Cholesky factorization | +| `LDLt` | [LDL(T) factorization](https://en.wikipedia.org/wiki/Cholesky_decomposition#LDL_decomposition) | +| `LU` | [LU factorization](https://en.wikipedia.org/wiki/LU_decomposition) | +| `QR` | [QR factorization](https://en.wikipedia.org/wiki/QR_decomposition) | +| `QRCompactWY` | Compact WY form of the QR factorization | +| `QRPivoted` | Pivoted [QR factorization](https://en.wikipedia.org/wiki/QR_decomposition) | +| `LQ` | [QR factorization](https://en.wikipedia.org/wiki/QR_decomposition) of `transpose(A)` | +| `Hessenberg` | [Hessenberg decomposition](http://mathworld.wolfram.com/HessenbergDecomposition.html) | +| `Eigen` | [Spectral decomposition](https://en.wikipedia.org/wiki/Eigendecomposition_of_a_matrix) | +| `GeneralizedEigen` | [Generalized spectral decomposition](https://en.wikipedia.org/wiki/Eigendecomposition_of_a_matrix#Generalized_eigenvalue_problem) | +| `SVD` | [Singular value decomposition](https://en.wikipedia.org/wiki/Singular_value_decomposition) | +| `GeneralizedSVD` | [Generalized SVD](https://en.wikipedia.org/wiki/Generalized_singular_value_decomposition#Higher_order_version) | +| `Schur` | [Schur decomposition](https://en.wikipedia.org/wiki/Schur_decomposition) | +| `GeneralizedSchur` | [Generalized Schur decomposition](https://en.wikipedia.org/wiki/Schur_decomposition#Generalized_Schur_decomposition) | + +## Standard functions + +Linear algebra functions in Julia are largely implemented by calling functions from [LAPACK](http://www.netlib.org/lapack/). Sparse matrix factorizations call functions from [SuiteSparse](http://suitesparse.com). Other sparse solvers are available as Julia packages. + +```@docs +Base.:*(::AbstractMatrix, ::AbstractMatrix) +Base.:\(::AbstractMatrix, ::AbstractVecOrMat) +LinearAlgebra.SingularException +LinearAlgebra.PosDefException +LinearAlgebra.ZeroPivotException +LinearAlgebra.dot +LinearAlgebra.dot(::Any, ::Any, ::Any) +LinearAlgebra.cross +LinearAlgebra.factorize +LinearAlgebra.Diagonal +LinearAlgebra.Bidiagonal +LinearAlgebra.SymTridiagonal +LinearAlgebra.Tridiagonal +LinearAlgebra.Symmetric +LinearAlgebra.Hermitian +LinearAlgebra.LowerTriangular +LinearAlgebra.UpperTriangular +LinearAlgebra.UnitLowerTriangular +LinearAlgebra.UnitUpperTriangular +LinearAlgebra.UpperHessenberg +LinearAlgebra.UniformScaling +LinearAlgebra.I +LinearAlgebra.UniformScaling(::Integer) +LinearAlgebra.Factorization +LinearAlgebra.LU +LinearAlgebra.lu +LinearAlgebra.lu! +LinearAlgebra.Cholesky +LinearAlgebra.CholeskyPivoted +LinearAlgebra.cholesky +LinearAlgebra.cholesky! +LinearAlgebra.lowrankupdate +LinearAlgebra.lowrankdowndate +LinearAlgebra.lowrankupdate! +LinearAlgebra.lowrankdowndate! +LinearAlgebra.LDLt +LinearAlgebra.ldlt +LinearAlgebra.ldlt! +LinearAlgebra.QR +LinearAlgebra.QRCompactWY +LinearAlgebra.QRPivoted +LinearAlgebra.qr +LinearAlgebra.qr! +LinearAlgebra.LQ +LinearAlgebra.lq +LinearAlgebra.lq! +LinearAlgebra.BunchKaufman +LinearAlgebra.bunchkaufman +LinearAlgebra.bunchkaufman! +LinearAlgebra.Eigen +LinearAlgebra.GeneralizedEigen +LinearAlgebra.eigvals +LinearAlgebra.eigvals! +LinearAlgebra.eigmax +LinearAlgebra.eigmin +LinearAlgebra.eigvecs +LinearAlgebra.eigen +LinearAlgebra.eigen! +LinearAlgebra.Hessenberg +LinearAlgebra.hessenberg +LinearAlgebra.hessenberg! +LinearAlgebra.Schur +LinearAlgebra.GeneralizedSchur +LinearAlgebra.schur +LinearAlgebra.schur! +LinearAlgebra.ordschur +LinearAlgebra.ordschur! +LinearAlgebra.SVD +LinearAlgebra.GeneralizedSVD +LinearAlgebra.svd +LinearAlgebra.svd! +LinearAlgebra.svdvals +LinearAlgebra.svdvals! +LinearAlgebra.Givens +LinearAlgebra.givens +LinearAlgebra.triu +LinearAlgebra.triu! +LinearAlgebra.tril +LinearAlgebra.tril! +LinearAlgebra.diagind +LinearAlgebra.diag +LinearAlgebra.diagm +LinearAlgebra.rank +LinearAlgebra.norm +LinearAlgebra.opnorm +LinearAlgebra.normalize! +LinearAlgebra.normalize +LinearAlgebra.cond +LinearAlgebra.condskeel +LinearAlgebra.tr +LinearAlgebra.det +LinearAlgebra.logdet +LinearAlgebra.logabsdet +Base.inv(::AbstractMatrix) +LinearAlgebra.pinv +LinearAlgebra.nullspace +Base.kron +Base.kron! +LinearAlgebra.exp(::StridedMatrix{<:LinearAlgebra.BlasFloat}) +Base.:^(::AbstractMatrix, ::Number) +Base.:^(::Number, ::AbstractMatrix) +LinearAlgebra.log(::StridedMatrix) +LinearAlgebra.sqrt(::StridedMatrix{<:Real}) +LinearAlgebra.cos(::StridedMatrix{<:Real}) +LinearAlgebra.sin(::StridedMatrix{<:Real}) +LinearAlgebra.sincos(::StridedMatrix{<:Real}) +LinearAlgebra.tan(::StridedMatrix{<:Real}) +LinearAlgebra.sec(::StridedMatrix) +LinearAlgebra.csc(::StridedMatrix) +LinearAlgebra.cot(::StridedMatrix) +LinearAlgebra.cosh(::StridedMatrix) +LinearAlgebra.sinh(::StridedMatrix) +LinearAlgebra.tanh(::StridedMatrix) +LinearAlgebra.sech(::StridedMatrix) +LinearAlgebra.csch(::StridedMatrix) +LinearAlgebra.coth(::StridedMatrix) +LinearAlgebra.acos(::StridedMatrix) +LinearAlgebra.asin(::StridedMatrix) +LinearAlgebra.atan(::StridedMatrix) +LinearAlgebra.asec(::StridedMatrix) +LinearAlgebra.acsc(::StridedMatrix) +LinearAlgebra.acot(::StridedMatrix) +LinearAlgebra.acosh(::StridedMatrix) +LinearAlgebra.asinh(::StridedMatrix) +LinearAlgebra.atanh(::StridedMatrix) +LinearAlgebra.asech(::StridedMatrix) +LinearAlgebra.acsch(::StridedMatrix) +LinearAlgebra.acoth(::StridedMatrix) +LinearAlgebra.lyap +LinearAlgebra.sylvester +LinearAlgebra.issuccess +LinearAlgebra.issymmetric +LinearAlgebra.isposdef +LinearAlgebra.isposdef! +LinearAlgebra.istril +LinearAlgebra.istriu +LinearAlgebra.isdiag +LinearAlgebra.ishermitian +Base.transpose +LinearAlgebra.transpose! +LinearAlgebra.Transpose +Base.adjoint +LinearAlgebra.adjoint! +LinearAlgebra.Adjoint +Base.copy(::Union{Transpose,Adjoint}) +LinearAlgebra.stride1 +LinearAlgebra.checksquare +LinearAlgebra.peakflops +``` + +## Low-level matrix operations + +In many cases there are in-place versions of matrix operations that allow you to supply +a pre-allocated output vector or matrix. This is useful when optimizing critical code in order +to avoid the overhead of repeated allocations. These in-place operations are suffixed with `!` +below (e.g. `mul!`) according to the usual Julia convention. + +```@docs +LinearAlgebra.mul! +LinearAlgebra.lmul! +LinearAlgebra.rmul! +LinearAlgebra.ldiv! +LinearAlgebra.rdiv! +``` + +## BLAS functions + +In Julia (as in much of scientific computation), dense linear-algebra operations are based on +the [LAPACK library](http://www.netlib.org/lapack/), which in turn is built on top of basic linear-algebra +building-blocks known as the [BLAS](http://www.netlib.org/blas/). There are highly optimized +implementations of BLAS available for every computer architecture, and sometimes in high-performance +linear algebra routines it is useful to call the BLAS functions directly. + +`LinearAlgebra.BLAS` provides wrappers for some of the BLAS functions. Those BLAS functions +that overwrite one of the input arrays have names ending in `'!'`. Usually, a BLAS function has +four methods defined, for [`Float64`](@ref), [`Float32`](@ref), `ComplexF64`, and `ComplexF32` arrays. + +### [BLAS character arguments](@id stdlib-blas-chars) +Many BLAS functions accept arguments that determine whether to transpose an argument (`trans`), +which triangle of a matrix to reference (`uplo` or `ul`), +whether the diagonal of a triangular matrix can be assumed to +be all ones (`dA`) or which side of a matrix multiplication +the input argument belongs on (`side`). The possibilities are: + +#### [Multiplication order](@id stdlib-blas-side) +| `side` | Meaning | +|:-------|:--------------------------------------------------------------------| +| `'L'` | The argument goes on the *left* side of a matrix-matrix operation. | +| `'R'` | The argument goes on the *right* side of a matrix-matrix operation. | + +#### [Triangle referencing](@id stdlib-blas-uplo) +| `uplo`/`ul` | Meaning | +|:------------|:------------------------------------------------------| +| `'U'` | Only the *upper* triangle of the matrix will be used. | +| `'L'` | Only the *lower* triangle of the matrix will be used. | + +#### [Transposition operation](@id stdlib-blas-trans) +| `trans`/`tX` | Meaning | +|:-------------|:--------------------------------------------------------| +| `'N'` | The input matrix `X` is not transposed or conjugated. | +| `'T'` | The input matrix `X` will be transposed. | +| `'C'` | The input matrix `X` will be conjugated and transposed. | + +#### [Unit diagonal](@id stdlib-blas-diag) +| `diag`/`dX` | Meaning | +|:------------|:----------------------------------------------------------| +| `'N'` | The diagonal values of the matrix `X` will be read. | +| `'U'` | The diagonal of the matrix `X` is assumed to be all ones. | + +```@docs +LinearAlgebra.BLAS +LinearAlgebra.BLAS.dot +LinearAlgebra.BLAS.dotu +LinearAlgebra.BLAS.dotc +LinearAlgebra.BLAS.blascopy! +LinearAlgebra.BLAS.nrm2 +LinearAlgebra.BLAS.asum +LinearAlgebra.axpy! +LinearAlgebra.axpby! +LinearAlgebra.BLAS.scal! +LinearAlgebra.BLAS.scal +LinearAlgebra.BLAS.iamax +LinearAlgebra.BLAS.ger! +LinearAlgebra.BLAS.syr! +LinearAlgebra.BLAS.syrk! +LinearAlgebra.BLAS.syrk +LinearAlgebra.BLAS.syr2k! +LinearAlgebra.BLAS.syr2k +LinearAlgebra.BLAS.her! +LinearAlgebra.BLAS.herk! +LinearAlgebra.BLAS.herk +LinearAlgebra.BLAS.her2k! +LinearAlgebra.BLAS.her2k +LinearAlgebra.BLAS.gbmv! +LinearAlgebra.BLAS.gbmv +LinearAlgebra.BLAS.sbmv! +LinearAlgebra.BLAS.sbmv(::Any, ::Any, ::Any, ::Any, ::Any) +LinearAlgebra.BLAS.sbmv(::Any, ::Any, ::Any, ::Any) +LinearAlgebra.BLAS.gemm! +LinearAlgebra.BLAS.gemm(::Any, ::Any, ::Any, ::Any, ::Any) +LinearAlgebra.BLAS.gemm(::Any, ::Any, ::Any, ::Any) +LinearAlgebra.BLAS.gemv! +LinearAlgebra.BLAS.gemv(::Any, ::Any, ::Any, ::Any) +LinearAlgebra.BLAS.gemv(::Any, ::Any, ::Any) +LinearAlgebra.BLAS.symm! +LinearAlgebra.BLAS.symm(::Any, ::Any, ::Any, ::Any, ::Any) +LinearAlgebra.BLAS.symm(::Any, ::Any, ::Any, ::Any) +LinearAlgebra.BLAS.symv! +LinearAlgebra.BLAS.symv(::Any, ::Any, ::Any, ::Any) +LinearAlgebra.BLAS.symv(::Any, ::Any, ::Any) +LinearAlgebra.BLAS.hemm! +LinearAlgebra.BLAS.hemm(::Any, ::Any, ::Any, ::Any, ::Any) +LinearAlgebra.BLAS.hemm(::Any, ::Any, ::Any, ::Any) +LinearAlgebra.BLAS.hemv! +LinearAlgebra.BLAS.hemv(::Any, ::Any, ::Any, ::Any) +LinearAlgebra.BLAS.hemv(::Any, ::Any, ::Any) +LinearAlgebra.BLAS.trmm! +LinearAlgebra.BLAS.trmm +LinearAlgebra.BLAS.trsm! +LinearAlgebra.BLAS.trsm +LinearAlgebra.BLAS.trmv! +LinearAlgebra.BLAS.trmv +LinearAlgebra.BLAS.trsv! +LinearAlgebra.BLAS.trsv +LinearAlgebra.BLAS.set_num_threads +``` + +## LAPACK functions + +`LinearAlgebra.LAPACK` provides wrappers for some of the LAPACK functions for linear algebra. + Those functions that overwrite one of the input arrays have names ending in `'!'`. + +Usually a function has 4 methods defined, one each for [`Float64`](@ref), [`Float32`](@ref), +`ComplexF64` and `ComplexF32` arrays. + +Note that the LAPACK API provided by Julia can and will change in the future. Since this API is +not user-facing, there is no commitment to support/deprecate this specific set of functions in +future releases. + +```@docs +LinearAlgebra.LAPACK +LinearAlgebra.LAPACK.gbtrf! +LinearAlgebra.LAPACK.gbtrs! +LinearAlgebra.LAPACK.gebal! +LinearAlgebra.LAPACK.gebak! +LinearAlgebra.LAPACK.gebrd! +LinearAlgebra.LAPACK.gelqf! +LinearAlgebra.LAPACK.geqlf! +LinearAlgebra.LAPACK.geqrf! +LinearAlgebra.LAPACK.geqp3! +LinearAlgebra.LAPACK.gerqf! +LinearAlgebra.LAPACK.geqrt! +LinearAlgebra.LAPACK.geqrt3! +LinearAlgebra.LAPACK.getrf! +LinearAlgebra.LAPACK.tzrzf! +LinearAlgebra.LAPACK.ormrz! +LinearAlgebra.LAPACK.gels! +LinearAlgebra.LAPACK.gesv! +LinearAlgebra.LAPACK.getrs! +LinearAlgebra.LAPACK.getri! +LinearAlgebra.LAPACK.gesvx! +LinearAlgebra.LAPACK.gelsd! +LinearAlgebra.LAPACK.gelsy! +LinearAlgebra.LAPACK.gglse! +LinearAlgebra.LAPACK.geev! +LinearAlgebra.LAPACK.gesdd! +LinearAlgebra.LAPACK.gesvd! +LinearAlgebra.LAPACK.ggsvd! +LinearAlgebra.LAPACK.ggsvd3! +LinearAlgebra.LAPACK.geevx! +LinearAlgebra.LAPACK.ggev! +LinearAlgebra.LAPACK.gtsv! +LinearAlgebra.LAPACK.gttrf! +LinearAlgebra.LAPACK.gttrs! +LinearAlgebra.LAPACK.orglq! +LinearAlgebra.LAPACK.orgqr! +LinearAlgebra.LAPACK.orgql! +LinearAlgebra.LAPACK.orgrq! +LinearAlgebra.LAPACK.ormlq! +LinearAlgebra.LAPACK.ormqr! +LinearAlgebra.LAPACK.ormql! +LinearAlgebra.LAPACK.ormrq! +LinearAlgebra.LAPACK.gemqrt! +LinearAlgebra.LAPACK.posv! +LinearAlgebra.LAPACK.potrf! +LinearAlgebra.LAPACK.potri! +LinearAlgebra.LAPACK.potrs! +LinearAlgebra.LAPACK.pstrf! +LinearAlgebra.LAPACK.ptsv! +LinearAlgebra.LAPACK.pttrf! +LinearAlgebra.LAPACK.pttrs! +LinearAlgebra.LAPACK.trtri! +LinearAlgebra.LAPACK.trtrs! +LinearAlgebra.LAPACK.trcon! +LinearAlgebra.LAPACK.trevc! +LinearAlgebra.LAPACK.trrfs! +LinearAlgebra.LAPACK.stev! +LinearAlgebra.LAPACK.stebz! +LinearAlgebra.LAPACK.stegr! +LinearAlgebra.LAPACK.stein! +LinearAlgebra.LAPACK.syconv! +LinearAlgebra.LAPACK.sysv! +LinearAlgebra.LAPACK.sytrf! +LinearAlgebra.LAPACK.sytri! +LinearAlgebra.LAPACK.sytrs! +LinearAlgebra.LAPACK.hesv! +LinearAlgebra.LAPACK.hetrf! +LinearAlgebra.LAPACK.hetri! +LinearAlgebra.LAPACK.hetrs! +LinearAlgebra.LAPACK.syev! +LinearAlgebra.LAPACK.syevr! +LinearAlgebra.LAPACK.sygvd! +LinearAlgebra.LAPACK.bdsqr! +LinearAlgebra.LAPACK.bdsdc! +LinearAlgebra.LAPACK.gecon! +LinearAlgebra.LAPACK.gehrd! +LinearAlgebra.LAPACK.orghr! +LinearAlgebra.LAPACK.gees! +LinearAlgebra.LAPACK.gges! +LinearAlgebra.LAPACK.trexc! +LinearAlgebra.LAPACK.trsen! +LinearAlgebra.LAPACK.tgsen! +LinearAlgebra.LAPACK.trsyl! +``` + +```@meta +DocTestSetup = nothing +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Logging/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Logging/docs/src/index.md new file mode 100644 index 0000000..e1d6022 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Logging/docs/src/index.md @@ -0,0 +1,311 @@ +# Logging + +The [`Logging`](@ref Logging.Logging) module provides a way to record the history and progress of a +computation as a log of events. Events are created by inserting a logging +statement into the source code, for example: + +```julia +@warn "Abandon printf debugging, all ye who enter here!" +┌ Warning: Abandon printf debugging, all ye who enter here! +└ @ Main REPL[1]:1 +``` + +The system provides several advantages over peppering your source code with +calls to `println()`. First, it allows you to control the visibility and +presentation of messages without editing the source code. For example, in +contrast to the `@warn` above + +```julia +@debug "The sum of some values $(sum(rand(100)))" +``` + +will produce no output by default. Furthermore, it's very cheap to leave debug +statements like this in the source code because the system avoids evaluating +the message if it would later be ignored. In this case `sum(rand(100))` and +the associated string processing will never be executed unless debug logging is +enabled. + +Second, the logging tools allow you to attach arbitrary data to each event as a +set of key--value pairs. This allows you to capture local variables and other +program state for later analysis. For example, to attach the local array +variable `A` and the sum of a vector `v` as the key `s` you can use + +```jldoctest +A = ones(Int, 4, 4) +v = ones(100) +@info "Some variables" A s=sum(v) + +# output +┌ Info: Some variables +│ A = +│ 4×4 Matrix{Int64}: +│ 1 1 1 1 +│ 1 1 1 1 +│ 1 1 1 1 +│ 1 1 1 1 +└ s = 100.0 +``` + +All of the logging macros `@debug`, `@info`, `@warn` and `@error` share common +features that are described in detail in the documentation for the more +general macro [`@logmsg`](@ref). + +## Log event structure + +Each event generates several pieces of data, some provided by the user and some +automatically extracted. Let's examine the user-defined data first: + +* The *log level* is a broad category for the message that is used for early + filtering. There are several standard levels of type [`LogLevel`](@ref); + user-defined levels are also possible. + Each is distinct in purpose: + - `Debug` is information intended for the developer of the program. + These events are disabled by default. + - `Info` is for general information to the user. + Think of it as an alternative to using `println` directly. + - `Warn` means something is wrong and action is likely required + but that for now the program is still working. + - `Error` means something is wrong and it is unlikely to be recovered, + at least by this part of the code. + Often this log-level is unneeded as throwing an exception can convey + all the required information. + +* The *message* is an object describing the event. By convention + `AbstractString`s passed as messages are assumed to be in markdown format. + Other types will be displayed using `print(io, obj)` or `string(obj)` for + text-based output and possibly `show(io,mime,obj)` for other multimedia + displays used in the installed logger. +* Optional *key--value pairs* allow arbitrary data to be attached to each event. + Some keys have conventional meaning that can affect the way an event is + interpreted (see [`@logmsg`](@ref)). + +The system also generates some standard information for each event: + +* The `module` in which the logging macro was expanded. +* The `file` and `line` where the logging macro occurs in the source code. +* A message `id` that is a unique, fixed identifier for the *source code + statement* where the logging macro appears. This identifier is designed to be + fairly stable even if the source code of the file changes, as long as the + logging statement itself remains the same. +* A `group` for the event, which is set to the base name of the file by default, + without extension. This can be used to group messages into categories more + finely than the log level (for example, all deprecation warnings have group + `:depwarn`), or into logical groupings across or within modules. + +Notice that some useful information such as the event time is not included by +default. This is because such information can be expensive to extract and is +also *dynamically* available to the current logger. It's simple to define a +[custom logger](@ref AbstractLogger-interface) to augment event data with the +time, backtrace, values of global variables and other useful information as +required. + + +## Processing log events + +As you can see in the examples, logging statements make no mention of +where log events go or how they are processed. This is a key design feature +that makes the system composable and natural for concurrent use. It does this +by separating two different concerns: + +* *Creating* log events is the concern of the module author who needs to + decide where events are triggered and which information to include. +* *Processing* of log events — that is, display, filtering, aggregation and + recording — is the concern of the application author who needs to bring + multiple modules together into a cooperating application. + +### Loggers + +Processing of events is performed by a *logger*, which is the first piece of +user configurable code to see the event. All loggers must be subtypes of +[`AbstractLogger`](@ref). + +When an event is triggered, the appropriate logger is found by looking for a +task-local logger with the global logger as fallback. The idea here is that +the application code knows how log events should be processed and exists +somewhere at the top of the call stack. So we should look up through the call +stack to discover the logger — that is, the logger should be *dynamically +scoped*. (This is a point of contrast with logging frameworks where the +logger is *lexically scoped*; provided explicitly by the module author or as a +simple global variable. In such a system it's awkward to control logging while +composing functionality from multiple modules.) + +The global logger may be set with [`global_logger`](@ref), and task-local +loggers controlled using [`with_logger`](@ref). Newly spawned tasks inherit +the logger of the parent task. + +There are three logger types provided by the library. [`ConsoleLogger`](@ref) +is the default logger you see when starting the REPL. It displays events in a +readable text format and tries to give simple but user friendly control over +formatting and filtering. [`NullLogger`](@ref) is a convenient way to drop all +messages where necessary; it is the logging equivalent of the [`devnull`](@ref) +stream. [`SimpleLogger`](@ref) is a very simplistic text formatting logger, +mainly useful for debugging the logging system itself. + +Custom loggers should come with overloads for the functions described in the +[reference section](@ref AbstractLogger-interface). + +### Early filtering and message handling + +When an event occurs, a few steps of early filtering occur to avoid generating +messages that will be discarded: + +1. The message log level is checked against a global minimum level (set via + [`disable_logging`](@ref)). This is a crude but extremely cheap global + setting. +2. The current logger state is looked up and the message level checked against the + logger's cached minimum level, as found by calling [`Logging.min_enabled_level`](@ref). + This behavior can be overridden via environment variables (more on this later). +3. The [`Logging.shouldlog`](@ref) function is called with the current logger, taking + some minimal information (level, module, group, id) which can be computed + statically. Most usefully, `shouldlog` is passed an event `id` which can be + used to discard events early based on a cached predicate. + +If all these checks pass, the message and key--value pairs are evaluated in full +and passed to the current logger via the [`Logging.handle_message`](@ref) function. +`handle_message()` may perform additional filtering as required and display the +event to the screen, save it to a file, etc. + +Exceptions that occur while generating the log event are captured and logged +by default. This prevents individual broken events from crashing the +application, which is helpful when enabling little-used debug events in a +production system. This behavior can be customized per logger type by +extending [`Logging.catch_exceptions`](@ref). + +## Testing log events + +Log events are a side effect of running normal code, but you might find +yourself wanting to test particular informational messages and warnings. The +`Test` module provides a [`@test_logs`](@ref) macro that can be used to +pattern match against the log event stream. + +## Environment variables + +Message filtering can be influenced through the `JULIA_DEBUG` environment +variable, and serves as an easy way to enable debug logging for a file or +module. For example, loading julia with `JULIA_DEBUG=loading` will activate +`@debug` log messages in `loading.jl`: + +``` +$ JULIA_DEBUG=loading julia -e 'using OhMyREPL' +┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/OhMyREPL.ji due to it containing an invalid cache header +└ @ Base loading.jl:1328 +[ Info: Recompiling stale cache file /home/user/.julia/compiled/v0.7/OhMyREPL.ji for module OhMyREPL +┌ Debug: Rejecting cache file /home/user/.julia/compiled/v0.7/Tokenize.ji due to it containing an invalid cache header +└ @ Base loading.jl:1328 +... +``` + +Similarly, the environment variable can be used to enable debug logging of +modules, such as `Pkg`, or module roots (see [`Base.moduleroot`](@ref)). To +enable all debug logging, use the special value `all`. + +To turn debug logging on from the REPL, set `ENV["JULIA_DEBUG"]` to the +name of the module of interest. Functions defined in the REPL belong to +module `Main`; logging for them can be enabled like this: +```julia-repl +julia> foo() = @debug "foo" +foo (generic function with 1 method) + +julia> foo() + +julia> ENV["JULIA_DEBUG"] = Main +Main + +julia> foo() +┌ Debug: foo +└ @ Main REPL[1]:1 + +``` + +## Writing log events to a file + +Sometimes it can be useful to write log events to a file. Here is an example +of how to use a task-local and global logger to write information to a text +file: + +```julia-repl +# Load the logging module +julia> using Logging + +# Open a textfile for writing +julia> io = open("log.txt", "w+") +IOStream() + +# Create a simple logger +julia> logger = SimpleLogger(io) +SimpleLogger(IOStream(), Info, Dict{Any,Int64}()) + +# Log a task-specific message +julia> with_logger(logger) do + @info("a context specific log message") + end + +# Write all buffered messages to the file +julia> flush(io) + +# Set the global logger to logger +julia> global_logger(logger) +SimpleLogger(IOStream(), Info, Dict{Any,Int64}()) + +# This message will now also be written to the file +julia> @info("a global log message") + +# Close the file +julia> close(io) +``` + + +## Reference + +### Logging module +```@docs +Logging.Logging +``` + +### Creating events + +```@docs +Logging.@logmsg +Logging.LogLevel +``` + +### [Processing events with AbstractLogger](@id AbstractLogger-interface) + +Event processing is controlled by overriding functions associated with +`AbstractLogger`: + +| Methods to implement | | Brief description | +|:----------------------------------- |:---------------------- |:---------------------------------------- | +| [`Logging.handle_message`](@ref) | | Handle a log event | +| [`Logging.shouldlog`](@ref) | | Early filtering of events | +| [`Logging.min_enabled_level`](@ref) | | Lower bound for log level of accepted events | +| **Optional methods** | **Default definition** | **Brief description** | +| [`Logging.catch_exceptions`](@ref) | `true` | Catch exceptions during event evaluation | + + +```@docs +Logging.AbstractLogger +Logging.handle_message +Logging.shouldlog +Logging.min_enabled_level +Logging.catch_exceptions +Logging.disable_logging +``` + +### Using Loggers + +Logger installation and inspection: + +```@docs +Logging.global_logger +Logging.with_logger +Logging.current_logger +``` + +Loggers that are supplied with the system: + +```@docs +Logging.NullLogger +Logging.ConsoleLogger +Logging.SimpleLogger +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Markdown/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Markdown/docs/src/index.md new file mode 100644 index 0000000..a107929 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Markdown/docs/src/index.md @@ -0,0 +1,398 @@ +# [Markdown](@id markdown_stdlib) + +This section describes Julia's markdown syntax, which is enabled by the +Markdown standard library. The following Markdown elements are supported: + +## Inline elements + +Here "inline" refers to elements that can be found within blocks of text, i.e. paragraphs. These +include the following elements. + +### Bold + +Surround words with two asterisks, `**`, to display the enclosed text in boldface. + +``` +A paragraph containing a **bold** word. +``` + +### Italics + +Surround words with one asterisk, `*`, to display the enclosed text in italics. + +``` +A paragraph containing an *italicized* word. +``` + +### Literals + +Surround text that should be displayed exactly as written with single backticks, ``` ` ``` . + +``` +A paragraph containing a `literal` word. +``` + +Literals should be used when writing text that refers to names of variables, functions, or other +parts of a Julia program. + +!!! tip + To include a backtick character within literal text use three backticks rather than one to enclose + the text. + + ``` + A paragraph containing ``` `backtick` characters ```. + ``` + + By extension any odd number of backticks may be used to enclose a lesser number of backticks. + +### ``\LaTeX`` + +Surround text that should be displayed as mathematics using ``\LaTeX`` syntax with double backticks, +``` `` ``` . + +``` +A paragraph containing some ``\LaTeX`` markup. +``` + +!!! tip + As with literals in the previous section, if literal backticks need to be written within double + backticks use an even number greater than two. Note that if a single literal backtick needs to + be included within ``\LaTeX`` markup then two enclosing backticks is sufficient. + +!!! note + The `\` character should be escaped appropriately if the text is embedded in a Julia source code, + for example, ``` "``\\LaTeX`` syntax in a docstring." ```, since it is interpreted as a string + literal. Alternatively, in order to avoid escaping, it is possible to use the `raw` string macro + together with the `@doc` macro: + ``` + @doc raw"``\LaTeX`` syntax in a docstring." functionname + ``` + +### Links + +Links to either external or internal targets can be written using the following syntax, where +the text enclosed in square brackets, `[ ]`, is the name of the link and the text enclosed in +parentheses, `( )`, is the URL. + +``` +A paragraph containing a link to [Julia](http://www.julialang.org). +``` + +It's also possible to add cross-references to other documented functions/methods/variables within +the Julia documentation itself. For example: + +```julia +""" + tryparse(type, str; base) + +Like [`parse`](@ref), but returns either a value of the requested type, +or [`nothing`](@ref) if the string does not contain a valid number. +""" +``` + +This will create a link in the generated docs to the [`parse`](@ref) documentation +(which has more information about what this function actually does), and to the +[`nothing`](@ref) documentation. It's good to include cross references to mutating/non-mutating +versions of a function, or to highlight a difference between two similar-seeming functions. + +!!! note + The above cross referencing is *not* a Markdown feature, and relies on + [Documenter.jl](https://github.com/JuliaDocs/Documenter.jl), which is + used to build base Julia's documentation. + +### Footnote references + +Named and numbered footnote references can be written using the following syntax. A footnote name +must be a single alphanumeric word containing no punctuation. + +``` +A paragraph containing a numbered footnote [^1] and a named one [^named]. +``` + +!!! note + The text associated with a footnote can be written anywhere within the same page as the footnote + reference. The syntax used to define the footnote text is discussed in the [Footnotes](@ref) section + below. + +## Toplevel elements + +The following elements can be written either at the "toplevel" of a document or within another +"toplevel" element. + +### Paragraphs + +A paragraph is a block of plain text, possibly containing any number of inline elements defined +in the [Inline elements](@ref) section above, with one or more blank lines above and below it. + +``` +This is a paragraph. + +And this is *another* paragraph containing some emphasized text. +A new line, but still part of the same paragraph. +``` + +### Headers + +A document can be split up into different sections using headers. Headers use the following syntax: + +```julia +# Level One +## Level Two +### Level Three +#### Level Four +##### Level Five +###### Level Six +``` + +A header line can contain any inline syntax in the same way as a paragraph can. + +!!! tip + Try to avoid using too many levels of header within a single document. A heavily nested document + may be indicative of a need to restructure it or split it into several pages covering separate + topics. + +### Code blocks + +Source code can be displayed as a literal block using an indent of four spaces as shown in the +following example. + +``` +This is a paragraph. + + function func(x) + # ... + end + +Another paragraph. +``` + +Additionally, code blocks can be enclosed using triple backticks with an optional "language" to +specify how a block of code should be highlighted. + +```` +A code block without a "language": + +``` +function func(x) + # ... +end +``` + +and another one with the "language" specified as `julia`: + +```julia +function func(x) + # ... +end +``` +```` + +!!! note + "Fenced" code blocks, as shown in the last example, should be preferred over indented code blocks + since there is no way to specify what language an indented code block is written in. + +### Block quotes + +Text from external sources, such as quotations from books or websites, can be quoted using `>` +characters prepended to each line of the quote as follows. + +``` +Here's a quote: + +> Julia is a high-level, high-performance dynamic programming language for +> technical computing, with syntax that is familiar to users of other +> technical computing environments. +``` + +Note that a single space must appear after the `>` character on each line. Quoted blocks may themselves +contain other toplevel or inline elements. + +### Images + +The syntax for images is similar to the link syntax mentioned above. Prepending a `!` character +to a link will display an image from the specified URL rather than a link to it. + +```julia +![alternative text](link/to/image.png) +``` + +### Lists + +Unordered lists can be written by prepending each item in a list with either `*`, `+`, or `-`. + +``` +A list of items: + + * item one + * item two + * item three +``` + +Note the two spaces before each `*` and the single space after each one. + +Lists can contain other nested toplevel elements such as lists, code blocks, or quoteblocks. A +blank line should be left between each list item when including any toplevel elements within a +list. + +``` +Another list: + + * item one + + * item two + + ``` + f(x) = x + ``` + + * And a sublist: + + + sub-item one + + sub-item two +``` + +!!! note + The contents of each item in the list must line up with the first line of the item. In the above + example the fenced code block must be indented by four spaces to align with the `i` in `item two`. + +Ordered lists are written by replacing the "bullet" character, either `*`, `+`, or `-`, with a +positive integer followed by either `.` or `)`. + +``` +Two ordered lists: + + 1. item one + 2. item two + 3. item three + + 5) item five + 6) item six + 7) item seven +``` + +An ordered list may start from a number other than one, as in the second list of the above example, +where it is numbered from five. As with unordered lists, ordered lists can contain nested toplevel +elements. + +### Display equations + +Large ``\LaTeX`` equations that do not fit inline within a paragraph may be written as display +equations using a fenced code block with the "language" `math` as in the example below. + +````julia +```math +f(a) = \frac{1}{2\pi}\int_{0}^{2\pi} (\alpha+R\cos(\theta))d\theta +``` +```` + +### Footnotes + +This syntax is paired with the inline syntax for [Footnote references](@ref). Make sure to read +that section as well. + +Footnote text is defined using the following syntax, which is similar to footnote reference syntax, +aside from the `:` character that is appended to the footnote label. + +``` +[^1]: Numbered footnote text. + +[^note]: + + Named footnote text containing several toplevel elements. + + * item one + * item two + * item three + + ```julia + function func(x) + # ... + end + ``` +``` + +!!! note + No checks are done during parsing to make sure that all footnote references have matching footnotes. + +### Horizontal rules + +The equivalent of an `
` HTML tag can be achieved using three hyphens (`---`). +For example: + +``` +Text above the line. + +--- + +And text below the line. +``` + +### Tables + +Basic tables can be written using the syntax described below. Note that markdown tables have limited +features and cannot contain nested toplevel elements unlike other elements discussed above – +only inline elements are allowed. Tables must always contain a header row with column names. Cells +cannot span multiple rows or columns of the table. + +``` +| Column One | Column Two | Column Three | +|:---------- | ---------- |:------------:| +| Row `1` | Column `2` | | +| *Row* 2 | **Row** 2 | Column ``3`` | +``` + +!!! note + As illustrated in the above example each column of `|` characters must be aligned vertically. + + A `:` character on either end of a column's header separator (the row containing `-` characters) + specifies whether the row is left-aligned, right-aligned, or (when `:` appears on both ends) center-aligned. + Providing no `:` characters will default to right-aligning the column. + +### Admonitions + +Specially formatted blocks, known as admonitions, can be used to highlight particular remarks. +They can be defined using the following `!!!` syntax: + +``` +!!! note + + This is the content of the note. + +!!! warning "Beware!" + + And this is another one. + + This warning admonition has a custom title: `"Beware!"`. +``` + +The first word after `!!!` declares the type of the admonition. +There are standard admonition types that should produce special styling. +Namely (in order of decreasing severity): `danger`, `warning`, `info`/`note`, and `tip`. + +You can also use your own admonition types, as long as the type name only contains lowercase Latin characters (a-z). +For example, you could have a `terminology` block like this: + +``` +!!! terminology "julia vs Julia" + + Strictly speaking, "Julia" refers to the language, + and "julia" to the standard implementation. +``` + +However, unless the code rendering the Markdown special-cases that particular admonition type, it will get the default styling. + +A custom title for the box can be provided as a string (in double quotes) after the admonition type. +If no title text is specified after the admonition type, then the type name will be used as the title (e.g. `"Note"` for the `note` admonition). + +Admonitions, like most other toplevel elements, can contain other toplevel elements (e.g. lists, images). + +## Markdown Syntax Extensions + +Julia's markdown supports interpolation in a very similar way to basic string literals, with the +difference that it will store the object itself in the Markdown tree (as opposed to converting +it to a string). When the Markdown content is rendered the usual `show` methods will be called, +and these can be overridden as usual. This design allows the Markdown to be extended with arbitrarily +complex features (such as references) without cluttering the basic syntax. + +In principle, the Markdown parser itself can also be arbitrarily extended by packages, or an entirely +custom flavour of Markdown can be used, but this should generally be unnecessary. diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Mmap/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Mmap/docs/src/index.md new file mode 100644 index 0000000..ada88b1 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Mmap/docs/src/index.md @@ -0,0 +1,7 @@ +# Memory-mapped I/O + +```@docs +Mmap.Anonymous +Mmap.mmap +Mmap.sync! +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/NetworkOptions/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/NetworkOptions/README.md new file mode 100644 index 0000000..f8df24d --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/NetworkOptions/README.md @@ -0,0 +1,270 @@ +# NetworkOptions + +[![Build Status](https://travis-ci.org/JuliaLang/NetworkOptions.jl.svg?branch=master)](https://travis-ci.org/JuliaLang/NetworkOptions.jl) +[![Codecov](https://codecov.io/gh/JuliaLang/NetworkOptions.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaLang/NetworkOptions.jl) + +The `NetworkOptions` package acts as a mediator between ways of configuring +network transport mechanisms (SSL/TLS, SSH, proxies, etc.) and Julia packages +that provide access to transport mechanisms. This allows the a common interface +to configuring things like TLS and SSH host verification and proxies via +environment variables (currently) and other configuration mechanisms (in the +future), while packages that need to configure these mechanisms can simply +ask `NetworkOptions` what to do in specific situations without worrying about +how that configuration is expressed. + +## API + +### ca_roots + +```jl +ca_roots() :: Union{Nothing, String} +``` +The `ca_roots()` function tells the caller where, if anywhere, to find a file or +directory of PEM-encoded certificate authority roots. By default, on systems +like Windows and macOS where the built-in TLS engines know how to verify hosts +using the system's built-in certificate verification mechanism, this function +will return `nothing`. On classic UNIX systems (excluding macOS), root +certificates are typically stored in a file in `/etc`: the common places for the +current UNIX system will be searched and if one of these paths exists, it will +be returned; if none of these typical root certificate paths exist, then the +path to the set of root certificates that are bundled with Julia is returned. + +The default value returned by `ca_roots()` may be overridden by setting the +`JULIA_SSL_CA_ROOTS_PATH`, `SSL_CERT_DIR`, or `SSL_CERT_FILE` environment +variables, in which case this function will always return the value of the first +of these variables that is set (whether the path exists or not). If +`JULIA_SSL_CA_ROOTS_PATH` is set to the empty string, then the other variables +are ignored (as if unset); if the other variables are set to the empty string, +they behave is if they are not set. + +### ca_roots_path + +```jl +ca_roots_path() :: String +``` +The `ca_roots_path()` function is similar to the `ca_roots()` function except +that it always returns a path to a file or directory of PEM-encoded certificate +authority roots. When called on a system like Windows or macOS, where system +root certificates are not stored in the file system, it will currently return +the path to the set of root certificates that are bundled with Julia. (In the +future, this function may instead extract the root certificates from the system +and save them to a file whose path would be returned.) + +If it is possible to configure a library that uses TLS to use the system +certificates that is generally preferrable: i.e. it is better to use +`ca_roots()` which returns `nothing` to indicate that the system certs should be +used. The `ca_roots_path()` function should only be used when configuring +libraries which _require_ a path to a file or directory for root certificates. + +The default value returned by `ca_roots_path()` may be overridden by setting the +`JULIA_SSL_CA_ROOTS_PATH`, `SSL_CERT_DIR`, or `SSL_CERT_FILE` environment +variables, in which case this function will always return the value of the first +of these variables that is set (whether the path exists or not). If +`JULIA_SSL_CA_ROOTS_PATH` is set to the empty string, then the other variables +are ignored (as if unset); if the other variables are set to the empty string, +they behave is if they are not set. + +### ssh_dir + +```jl +ssh_dir() :: String +``` +The `ssh_dir()` function returns the location of the directory where the `ssh` +program keeps/looks for configuration files. By default this is `~/.ssh` but +this can be overridden by setting the envirnoment variable `SSH_DIR`. + +### ssh_key_name + +```jl +ssh_key_name() :: String +``` +The `ssh_key_name()` function returns the base name of key files that SSH should +use for when estabilishing a connection. There is usually no reason that this +function should be called directly and libraries should generally use the +`ssh_key_path` and `ssh_pub_key_path` functions to get full paths. If the +environment variable `SSH_KEY_NAME` is set then this function returns that; +otherwise it returns `id_rsa` by default. + +### ssh_key_path + +```jl +ssh_key_path() :: String +``` +The `ssh_key_path()` function returns the path of the SSH private key file that +should be used for SSH connections. If the `SSH_KEY_PATH` environment variable +is set then it will return that value. Otherwise it defaults to returning +```jl +joinpath(ssh_dir(), ssh_key_name()) +``` +This default value in turn depends on the `SSH_DIR` and `SSH_KEY_NAME` +environment variables. + +### ssh_pub_key_path + +```jl +ssh_pub_key_path() :: String +``` +The `ssh_pub_key_path()` function returns the path of the SSH public key file +that should be used for SSH connections. If the `SSH_PUB_KEY_PATH` environment +variable is set then it will return that value. If that isn't set but +`SSH_KEY_PATH` is set, it will return that path with the `.pub` suffix appended. +If neither is set, it defaults to returning +```jl +joinpath(ssh_dir(), ssh_key_name() * ".pub") +``` +This default value in turn depends on the `SSH_DIR` and `SSH_KEY_NAME` +environment variables. + +### ssh_key_pass + +```jl +ssh_key_pass() :: String +``` +The `ssh_key_pass()` function returns the value of the environment variable +`SSH_KEY_PASS` if it is set or `nothing` if it is not set. In the future, this +may be able to find a password by other means, such as secure system storage, so +packages that need a pawword to decrypt an SSH private key should use this API +instead of directly checking the environment variable so that they gain such +capabilities automatically when they are added. + +### ssh_known_hosts_files + +```jl +ssh_known_hosts_files() :: Vector{String} +``` +The `ssh_known_hosts_files()` function returns a vector of paths of SSH known +hosts files that should be used when establishing the identities of remote +servers for SSH connections. By default this function returns +```jl +[joinpath(ssh_dir(), "known_hosts"), bundled_known_hosts] +``` +where `bundled_known_hosts` is the path of a copy of a known hosts file that is +bundled with this package (containing known hosts keys for `github.com` and +`gitlab.com`). If the environment variable `SSH_KNOWN_HOSTS_FILES` is set, +however, then its value is split into paths on the `:` character (or on `;` on +Windows) and this vector of paths is returned instead. If any component of this +vector is empty, it is expanded to the default known hosts paths. + +Packages that use `ssh_known_hosts_files()` should ideally look for matching +entries by comparing the host name and key types, considering the first entry in +any of the files which matches to be the definitive identity of the host. If the +caller cannot compare the key type (e.g. because it has been hashes) then it +must approximate the above algorithm by looking for all matching entries for a +host in each file: if a file has any entries for a host then one of them must +match; the caller should only continue to search further known hosts files if +there are no entries for the host in question in an earlier file. + +### ssh_known_hosts_file + +```jl +ssh_known_hosts_file() :: String +``` +The `ssh_known_hosts_file()` function returns a single path of an SSH known +hosts file that should be used when establishing the identities of remote +servers for SSH connections. It returns the first path returned by +`ssh_known_hosts_files` that actually exists. Callers who can look in more than +one known hosts file should use `ssh_known_hosts_files` instead and look for +host matches in all the files returned as described in that function's docs. + +### verify_host + +```jl +verify_host(url::AbstractString, [transport::AbstractString]) :: Bool +``` +The `verify_host` function tells the caller whether the identity of a host +should be verified when communicating over secure transports like TLS or SSH. +The `url` argument may be: + +1. a proper URL staring with `proto://` +2. an `ssh`-style bare host name or host name prefixed with `user@` +3. an `scp`-style host as above, followed by `:` and a path location + +In each case the host name part is parsed out and the decision about whether to +verify or not is made based solely on the host name, not anything else about the +input URL. In particular, the protocol of the URL does not matter (more below). + +The `transport` argument indicates the kind of transport that the query is +about. The currently known values are `SSL` (alias `TLS`) and `SSH`. If the +transport is ommitted, the query will return `true` only if the host name should +not be verified regardless of transport. + +The host name is matched against the host patterns in the relavent environment +variables depending on whether `transport` is supplied and what its value is: + +- `JULIA_NO_VERIFY_HOSTS` — hosts that should not be verified for any transport +- `JULIA_SSL_NO_VERIFY_HOSTS` — hosts that should not be verified for SSL/TLS +- `JULIA_SSH_NO_VERIFY_HOSTS` — hosts that should not be verified for SSH +- `JULIA_ALWAYS_VERIFY_HOSTS` — hosts that should always be verified + +The values of each of these variables is a comma-separated list of host name +patterns with the following syntax — each pattern is split on `.` into parts and +each part must one of: + +1. A literal domain name component consisting of one or more ASCII letter, + digit, hyphen or underscore (technically not part of a legal host name, but + sometimes used). A literal domain name component matches only itself. +2. A `**`, which matches zero or more domain name components. +3. A `*`, which match any one domain name component. + +When matching a host name against a pattern list in one of these variables, the +host name is split on `.` into components and that sequence of words is matched +against the pattern: a literal pattern matches exactly one host name component +with that value; a `*` pattern matches exactly one host name component with any +value; a `**` pattern matches any number of host name components. For example: + +- `**` matches any host name +- `**.org` matches any host name in the `.org` top-level domain +- `example.com` matches only the exact host name `example.com` +- `*.example.com` matches `api.example.com` but not `example.com` or + `v1.api.example.com` +- `**.example.com` matches any domain under `example.com`, including + `example.com` itself, `api.example.com` and `v1.api.example.com` + +#### Example scenarios + +Suppose you want to not verify any hosts under `safe.example.com` for all +protocols, skip SSL host verification for just `ssl.example.com`, and skip SSH +host verification for `ssh.example.com` and its immediate first level +subdomains. Then you could set the following environment variable values: +```sh +export JULIA_NO_VERIFY_HOSTS="**.safe.example.com" +export JULIA_SSL_NO_VERIFY_HOSTS="ssl.example.com" +export JULIA_SSH_NO_VERIFY_HOSTS="ssh.example.com,*.ssh.example.com" +``` +With this configuration: + +- `example.com` would be verified for all protocols +- `safe.example.com`, `api.safe.example.com`, `v1.api.safe.example.com` and so + on would be unverified for all transports +- `ssl.example.com` would be unverified for SSL/TLS transport +- `sub.ssl.example.com` would be verified for all transports, including SSL/TLS +- `ssh.example.com` and `sub.ssh.example.com` would be unverified for SSH only +- `sub.sub.ssh.example.com` would be verified for all transports + +Note that the protocol of `url` need not match the transport mechanism being +queried: the protocol of the URL is entirely discarded. The reason for this is +that the typical usage of this utility function is to configure a library to +enable or disable specific features like TLS host verification based on a URL. +If the URL does not actually use the TLS transport mechanism, then it doesn't +matter if verification for that transport is enabled or not. Moreover, different +protocols can use the same transport: for example, `https` and `ftps` protocols +both use TLS and `ssh`, `scp` and `sftp` protocols all use SSH. + +A common scenario that occur behind firewalls is for all connections to external +systems to go through a transparent man-in-the-middle proxy: any SSL/TLS +connection to a host under `example.com` would be internal and should have a +valid certificate but any connection outside of `example.com` would go through +the proxy, which uses a self-signed certificate. For such a scenario the best +solution would be to deploy a CA root certificate to all clients, but if that's +not possible, then configuring clients to verify hosts under `example.com` but +not verify other SSL/TLS connections would be a viable solution. In fact, as +long as the man-in-the-middle proxy verifies all upstream TLS connections, this +is still secure (although not private from the proxy, of course). Such a +configuration can be accomplished with the following exports: +```sh +export JULIA_ALWAYS_VERIFY_HOSTS="**.example.com" +export JULIA_SSL_NO_VERIFY_HOSTS="**" +``` +This configuration causes all domains under `example.com` to always be verified +for all protocols, including SSL/TLS, while skiping host verifiction for SSL/TLS +connections to all other hosts. diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/LICENSE.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/LICENSE.md new file mode 100644 index 0000000..eb29596 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/LICENSE.md @@ -0,0 +1,47 @@ +The Pkg.jl package is licensed under the MIT "Expat" License: + +> Copyright (c) 2017: Stefan Karpinski. +> +> Permission is hereby granted, free of charge, to any person obtaining a copy +> of this software and associated documentation files (the "Software"), to deal +> in the Software without restriction, including without limitation the rights +> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +> copies of the Software, and to permit persons to whom the Software is +> furnished to do so, subject to the following conditions: +> +> The above copyright notice and this permission notice shall be included in all +> copies or substantial portions of the Software. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +> SOFTWARE. +> + +The file PlatformEngines.jl is under the following license: + +The BinaryProvider.jl package is licensed under the MIT "Expat" License: + +> Copyright (c) 2017: SimonDanisch. +> +> Permission is hereby granted, free of charge, to any person obtaining a copy +> of this software and associated documentation files (the "Software"), to deal +> in the Software without restriction, including without limitation the rights +> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +> copies of the Software, and to permit persons to whom the Software is +> furnished to do so, subject to the following conditions: +> +> The above copyright notice and this permission notice shall be included in all +> copies or substantial portions of the Software. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +> SOFTWARE. +> diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/README.md new file mode 100644 index 0000000..3389eeb --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/README.md @@ -0,0 +1,34 @@ +# Pkg + +| **Documentation** | **Build Status** | +|:-----------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------:| +| [![][docs-v1-img]][docs-v1-url] [![][docs-dev-img]][docs-dev-url] | [![][travis-img]][travis-url] [![][appveyor-img]][appveyor-url] [![][codecov-img]][codecov-url] | + +Development repository for Julia's package manager, +shipped with Julia v1.0 and above. + +#### Using the development version of Pkg.jl + +If you want to develop this package do the following steps: +- Clone the repo anywhere. +- In line 2 of the `Project.toml` file (the line that begins with `uuid = ...`), modify the UUID, e.g. change the `44c` to `54c`. +- Change the current directory to the Pkg repo you just cloned and start julia with `julia --project`. +- `import Pkg` will now load the files in the cloned repo instead of the Pkg stdlib . +- To test your changes, simply do `include("test/runtests.jl")`. + +If you need to build Julia from source with a git checkout of Pkg, then instead use `make DEPS_GIT=Pkg` when building Julia. The `Pkg` repo is in `stdlib/Pkg`, and created initially with a detached `HEAD`. If you're doing this from a pre-existing Julia repository, you may need to `make clean` beforehand. + +[docs-dev-img]: https://img.shields.io/badge/docs-dev-blue.svg +[docs-dev-url]: https://julialang.github.io/Pkg.jl/dev/ + +[docs-v1-img]: https://img.shields.io/badge/docs-v1-blue.svg +[docs-v1-url]: https://julialang.github.io/Pkg.jl/v1/ + +[travis-img]: https://travis-ci.com/JuliaLang/Pkg.jl.svg?branch=master +[travis-url]: https://travis-ci.com/JuliaLang/Pkg.jl + +[appveyor-img]: https://ci.appveyor.com/api/projects/status/cgno2xgwapugpg4t/branch/master?svg=true +[appveyor-url]: https://ci.appveyor.com/project/JuliaLang/pkg-jl/branch/master + +[codecov-img]: https://codecov.io/gh/JuliaLang/Pkg.jl/branch/master/graph/badge.svg +[codecov-url]: https://codecov.io/gh/JuliaLang/Pkg.jl diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/api.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/api.md new file mode 100644 index 0000000..29a54c4 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/api.md @@ -0,0 +1,88 @@ +# [**12.** API Reference](@id API-Reference) + +This section describes the function interface, or "API mode", +for interacting with Pkg.jl. The function API is recommended +for non-interactive usage, for example in scripts. + +## General API Reference + +Certain options are generally useful and can be specified in any API call. +You can specify these options by setting keyword arguments. + +### Redirecting output + +Use the `io::IOBuffer` keyword argument to redirect Pkg output. +For example, `Pkg.add("Example"; io=devnull)` will discard any output produced by the `add` call. + +## Package API Reference + +In the REPL mode, packages (with associated version, UUID, URL etc) are parsed from strings, +for example `"Package#master"`,`"Package@v0.1"`, `"www.mypkg.com/MyPkg#my/feature"`. + +In the API mode, it is possible to use strings as arguments for simple commands (like `Pkg.add(["PackageA", "PackageB"])`, +but more complicated commands, which e.g. specify URLs or version range, require the use of a more structured format over strings. +This is done by creating an instance of [`PackageSpec`](@ref) which is passed in to functions. + +```@docs +PackageSpec +PackageMode +UpgradeLevel +Pkg.add +Pkg.develop +Pkg.activate +Pkg.rm +Pkg.update +Pkg.test +Pkg.build +Pkg.pin +Pkg.free +Pkg.instantiate +Pkg.resolve +Pkg.gc +Pkg.status +Pkg.precompile +Pkg.offline +Pkg.setprotocol! +Pkg.dependencies +Pkg.project +Pkg.undo +Pkg.redo +``` + + +## Registry API Reference + +!!! compat "Julia 1.1" + Pkg's registry handling requires at least Julia 1.1. + +The function API for registries uses [`RegistrySpec`](@ref)s, similar to +[`PackageSpec`](@ref). + +```@docs +RegistrySpec +Pkg.Registry.add +Pkg.Registry.rm +Pkg.Registry.update +Pkg.Registry.status +``` + +## [Artifacts API Reference](@id Artifacts-Reference) + +!!! compat "Julia 1.3" + Pkg's artifacts API requires at least Julia 1.3. + +```@docs +Pkg.Artifacts.create_artifact +Pkg.Artifacts.remove_artifact +Pkg.Artifacts.verify_artifact +Pkg.Artifacts.artifact_meta +Pkg.Artifacts.artifact_hash +Pkg.Artifacts.bind_artifact! +Pkg.Artifacts.unbind_artifact! +Pkg.Artifacts.download_artifact +Pkg.Artifacts.find_artifacts_toml +Pkg.Artifacts.ensure_artifact_installed +Pkg.Artifacts.ensure_all_artifacts_installed +Pkg.Artifacts.@artifact_str +Pkg.Artifacts.archive_artifact +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/artifacts.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/artifacts.md new file mode 100644 index 0000000..7b1ade6 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/artifacts.md @@ -0,0 +1,229 @@ +# [**8.** Artifacts](@id Artifacts) + +!!! compat "Julia 1.3" + Pkg's artifacts functionality requires at least Julia 1.3. + +`Pkg` can install and manage containers of data that are not Julia packages. These containers can contain platform-specific binaries, datasets, text, or any other kind of data that would be convenient to place within an immutable, life-cycled datastore. +These containers, (called "Artifacts") can be created locally, hosted anywhere, and automatically downloaded and unpacked upon installation of your Julia package. +This mechanism is also used to provide the binary dependencies for packages built with [`BinaryBuilder.jl`](https://github.com/JuliaPackaging/BinaryBuilder.jl). + +## Basic Usage + +`Pkg` artifacts are declared in an `Artifacts.toml` file, which can be placed in your current directory or in the root of your package. +Currently, `Pkg` supports downloading of tarfiles (which can be compressed) from a URL. +Following is a minimal `Artifacts.toml` file which will permit the downloading of a `socrates.tar.gz` file from `github.com`. +In this example, a single artifact, given the name `socrates`, is defined. + +```TOML +# a simple Artifacts.toml file +[socrates] +git-tree-sha1 = "43563e7631a7eafae1f9f8d9d332e3de44ad7239" + + [[socrates.download]] + url = "https://github.com/staticfloat/small_bin/raw/master/socrates.tar.gz" + sha256 = "e65d2f13f2085f2c279830e863292312a72930fee5ba3c792b14c33ce5c5cc58" +``` + +If this `Artifacts.toml` file is placed in your current directory, then `socrates.tar.gz` can be downloaded, unpacked and used with `artifact"socrates"`. +Since this tarball contains a folder `bin`, and a text file named `socrates` within that folder, we could access the content of that file as follows. + +```julia +using Pkg.Artifacts + +rootpath = artifact"socrates" +open(joinpath(rootpath, "bin", "socrates")) do file + println(read(file, String)) +end +``` + +If you have an existing tarball that is accessible via a `url`, it could also be be accessed in this manner. +To create the `Artifacts.toml` you must compute two hashes: the `sha256` hash of the download file, and the `git-tree-sha1` of the unpacked content. +These can be computed as follows. + +```julia +using Tar, Inflate, SHA + +filename = "socrates.tar.gz" +println("sha256: ", bytes2hex(open(sha256, filename))) +println("git-tree-sha1: ", Tar.tree_hash(IOBuffer(inflate_gzip(filename)))) +``` + +To access this artifact from within a package you create, place the `Artifacts.toml` at the root of your package, adjacent to `Project.toml`. Then, make sure to add `Pkg` in your `deps` and set `julia = "1.3"` or higher in your `compat` section. + +## `Artifacts.toml` files + +`Pkg` provides an API for working with artifacts, as well as a TOML file format for recording artifact usage in your packages, and to automate downloading of artifacts at package install time. +Artifacts can always be referred to by content hash, but are typically accessed by a name that is bound to a content hash in an `Artifacts.toml` file that lives in a project's source tree. + +!!! note + It is possible to use the alternate name `JuliaArtifacts.toml`, similar + to how it is possible to use `JuliaProject.toml` and `JuliaManifest.toml` + instead of `Project.toml` and `Manifest.toml`, respectively. + +An example `Artifacts.toml` file is shown here: + +```TOML +# Example Artifacts.toml file +[socrates] +git-tree-sha1 = "43563e7631a7eafae1f9f8d9d332e3de44ad7239" +lazy = true + + [[socrates.download]] + url = "https://github.com/staticfloat/small_bin/raw/master/socrates.tar.gz" + sha256 = "e65d2f13f2085f2c279830e863292312a72930fee5ba3c792b14c33ce5c5cc58" + + [[socrates.download]] + url = "https://github.com/staticfloat/small_bin/raw/master/socrates.tar.bz2" + sha256 = "13fc17b97be41763b02cbb80e9d048302cec3bd3d446c2ed6e8210bddcd3ac76" + +[[c_simple]] +arch = "x86_64" +git-tree-sha1 = "4bdf4556050cb55b67b211d4e78009aaec378cbc" +libc = "musl" +os = "linux" + + [[c_simple.download]] + sha256 = "411d6befd49942826ea1e59041bddf7dbb72fb871bb03165bf4e164b13ab5130" + url = "https://github.com/JuliaBinaryWrappers/c_simple_jll.jl/releases/download/c_simple+v1.2.3+0/c_simple.v1.2.3.x86_64-linux-musl.tar.gz" + +[[c_simple]] +arch = "x86_64" +git-tree-sha1 = "51264dbc770cd38aeb15f93536c29dc38c727e4c" +os = "macos" + + [[c_simple.download]] + sha256 = "6c17d9e1dc95ba86ec7462637824afe7a25b8509cc51453f0eb86eda03ed4dc3" + url = "https://github.com/JuliaBinaryWrappers/c_simple_jll.jl/releases/download/c_simple+v1.2.3+0/c_simple.v1.2.3.x86_64-apple-darwin14.tar.gz" + +[processed_output] +git-tree-sha1 = "1c223e66f1a8e0fae1f9fcb9d3f2e3ce48a82200" +``` + +This `Artifacts.toml` binds three artifacts; one named `socrates`, one named `c_simple` and one named `processed_output`. +The single required piece of information for an artifact is its `git-tree-sha1`. +Because artifacts are addressed only by their content hash, the purpose of an `Artifacts.toml` file is to provide metadata about these artifacts, such as binding a human-readable name to a content hash, providing information about where an artifact may be downloaded from, or even binding a single name to multiple hashes, keyed by platform-specific constraints such as operating system or libgfortran version. + +## Artifact types and properties + +In the above example, the `socrates` artifact showcases a platform-independent artifact with multiple download locations. +When downloading and installing the `socrates` artifact, URLs will be attempted in-order until one succeeds. +The `socrates` artifact is marked as `lazy`, which means that it will not be automatically downloaded when the containing package is installed, but rather will be downloaded on-demand when the package first attempts to use it. + +The `c_simple` artifact showcases a platform-dependent artifact, where each entry in the `c_simple` array contains keys that help the calling package choose the appropriate download based on the particulars of the host machine. +Note that each artifact contains both a `git-tree-sha1` and a `sha256` for each download entry. This is to ensure that the downloaded tarball is secure before attempting to unpack it, as well as enforcing that all tarballs must expand to the same overall tree hash. + +The `processed_output` artifact contains no `download` stanza, and so cannot be installed. +An artifact such as this would be the result of code that was previously run, generating a new artifact and binding the resultant hash to a name within this project. + +## Using Artifacts + +Artifacts can be manipulated using convenient APIs exposed from the `Pkg.Artifacts` namespace. +As a motivating example, let us imagine that we are writing a package that needs to load the [Iris machine learning dataset](https://archive.ics.uci.edu/ml/datasets/iris). +While we could just download the dataset during a build step into the package directory, and many packages currently do precisely this, that has some significant drawbacks: + +* First, it modifies the package directory, making package installation stateful, which we want to avoid. + In the future, we would like to reach the point where packages can be installed completely read-only, instead of being able to modify themselves after installation. + +* Second, the downloaded data is not shared across different versions of our package. + If we have three different versions of the package installed for use by various projects, then we need three different copies of the data, even if it hasn't changed between those versions. + Moreover, each time we upgrade or downgrade the package, unless we do something clever (and probably brittle), we have to download the data again. + +With artifacts, we will instead check to see if our `iris` artifact already exists on-disk and only if it doesn't will we download and install it, after which we can bind the result into our `Artifacts.toml` file: + +```julia +using Pkg.Artifacts + +# This is the path to the Artifacts.toml we will manipulate +artifact_toml = joinpath(@__DIR__, "Artifacts.toml") + +# Query the `Artifacts.toml` file for the hash bound to the name "iris" +# (returns `nothing` if no such binding exists) +iris_hash = artifact_hash("iris", artifact_toml) + +# If the name was not bound, or the hash it was bound to does not exist, create it! +if iris_hash == nothing || !artifact_exists(iris_hash) + # create_artifact() returns the content-hash of the artifact directory once we're finished creating it + iris_hash = create_artifact() do artifact_dir + # We create the artifact by simply downloading a few files into the new artifact directory + iris_url_base = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris" + download("$(iris_url_base)/iris.data", joinpath(artifact_dir, "iris.csv")) + download("$(iris_url_base)/bezdekIris.data", joinpath(artifact_dir, "bezdekIris.csv")) + download("$(iris_url_base)/iris.names", joinpath(artifact_dir, "iris.names")) + end + + # Now bind that hash within our `Artifacts.toml`. `force = true` means that if it already exists, + # just overwrite with the new content-hash. Unless the source files change, we do not expect + # the content hash to change, so this should not cause unnecessary version control churn. + bind_artifact!(artifact_toml, "iris", iris_hash) +end + +# Get the path of the iris dataset, either newly created or previously generated. +# this should be something like `~/.julia/artifacts/dbd04e28be047a54fbe9bf67e934be5b5e0d357a` +iris_dataset_path = artifact_path(iris_hash) +``` + +For the specific use case of using artifacts that were previously bound, we have the shorthand notation `artifact"name"` which will automatically search for the `Artifacts.toml` file contained within the current package, look up the given artifact by name, install it if it is not yet installed, then return the path to that given artifact. +An example of this shorthand notation is given below: + +```julia +using Pkg.Artifacts + +# For this to work, an `Artifacts.toml` file must be in the current working directory +# (or in the root of the current package) and must define a mapping for the "iris" +# artifact. If it does not exist on-disk, it will be downloaded. +iris_dataset_path = artifact"iris" +``` + +## The `Pkg.Artifacts` API + +The `Artifacts` API is broken up into three levels: hash-aware functions, name-aware functions and utility functions. + +* **Hash-aware** functions deal with content-hashes and essentially nothing else. These methods allow you to query whether an artifact exists, what its path is, to verify that an artifact satisfies its content hash on-disk, etc. Hash-aware functions include: `artifact_exists()`, `artifact_path()`, `remove_artifact()`, `verify_artifact()` and `archive_artifact()`. Note that in general you should not use `remove_artifact()` and should instead use `Pkg.gc()` to cleanup artifact installations. + +* **Name-aware** functions deal with bound names within an `Artifacts.toml` file, and as such, typically require both a path to an `Artifacts.toml` file as well as the artifact name. Name-aware functions include: `artifact_meta()`, `artifact_hash()`, `bind_artifact!()`, `unbind_artifact!()`, `download_artifact()` and `ensure_artifact_installed()`. + +* **Utility** functions deal with miscellaneous aspects of artifact life, such as `create_artifact()`, `ensure_all_artifacts_installed()`, and even the `@artifact_str` string macro. + +For a full listing of docstrings and methods, see the [Artifacts Reference](@ref) section. + +## Overriding artifact locations + +It is occasionally necessary to be able to override the location and content of an artifact. +A common use case is a computing environment where certain versions of a binary dependency must be used, regardless of what version of this dependency a package was published with. +While a typical Julia configuration would download, unpack and link against a generic library, a system administrator may wish to disable this and instead use a library already installed on the local machine. +To enable this, `Pkg` supports a per-depot `Overrides.toml` file placed within the `artifacts` depot directory (e.g. `~/.julia/artifacts/Overrides.toml` for the default user depot) that can override the location of an artifact either by content-hash or by package UUID and bound artifact name. +Additionally, the destination location can be either an absolute path, or a replacement artifact content hash. +This allows sysadmins to create their own artifacts which they can then use by overriding other packages to use the new artifact. + +```TOML +# Override single hash to absolute path +78f35e74ff113f02274ce60dab6e92b4546ef806 = "/path/to/replacement" + +# Override single hash to new artifact content-hash +683942669b4639019be7631caa28c38f3e1924fe = "d826e316b6c0d29d9ad0875af6ca63bf67ed38c3" + +# Override package bindings by specifying the package UUID and bound artifact name +# For demonstration purposes we assume this package is called `Foo` +[d57dbccd-ca19-4d82-b9b8-9d660942965b] +libfoo = "/path/to/libfoo" +libbar = "683942669b4639019be7631caa28c38f3e1924fe" +``` + +Due to the layered nature of `Pkg` depots, multiple `Overrides.toml` files may be in effect at once. +This allows the "inner" `Overrides.toml` files to override the overrides placed within the "outer" `Overrides.toml` files. +To remove an override and re-enable default location logic for an artifact, insert an entry mapping to the empty string: + +```TOML +78f35e74ff113f02274ce60dab6e92b4546ef806 = "/path/to/new/replacement" +683942669b4639019be7631caa28c38f3e1924fe = "" + +[d57dbccd-ca19-4d82-b9b8-9d660942965b] +libfoo = "" +``` + +If the two `Overrides.toml` snippets as given above are layered on top of eachother, the end result will be mapping the content-hash `78f35e74ff113f02274ce60dab6e92b4546ef806` to `"/path/to/new/replacement"`, and mapping `Foo.libbar` to the artifact identified by the content-hash `683942669b4639019be7631caa28c38f3e1924fe`. +Note that while that hash was previously overridden, it is no longer, and therefore `Foo.libbar` will look directly at locations such as `~/.julia/artifacts/683942669b4639019be7631caa28c38f3e1924fe`. + +Most methods that are affected by overrides have the ability to ignore overrides by setting `honor_overrides=false` as a keyword argument within them. +For UUID/name based overrides to work, `Artifacts.toml` files must be loaded with the knowledge of the UUID of the loading package. +This is deduced automatically by the `artifacts""` string macro, however if you are for some reason manually using the `Pkg.Artifacts` API within your package and you wish to honor overrides, you must provide the package UUID to API calls like `artifact_meta()` and `ensure_artifact_installed()` via the `pkg_uuid` keyword argument. diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/basedocs.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/basedocs.md new file mode 100644 index 0000000..21a12c5 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/basedocs.md @@ -0,0 +1,25 @@ +```@meta +EditURL = "https://github.com/JuliaLang/Pkg.jl/blob/master/docs/src/basedocs.md" +``` + +# Pkg + +Pkg is Julia's builtin package manager, and handles operations +such as installing, updating and removing packages. + +!!! note + What follows is a very brief introduction to Pkg. For more + information on `Project.toml` files, `Manifest.toml` files, package + version compatibility (`[compat]`), environments, registries, etc., + it is highly recommended to read the full manual, which is available here: + [https://julialang.github.io/Pkg.jl/v1/](https://julialang.github.io/Pkg.jl/v1/). + +```@eval +import Markdown +file = joinpath(Sys.STDLIB, "Pkg", "docs", "src", "getting-started.md") +str = read(file, String) +str = replace(str, r"^#.*$"m => "") +str = replace(str, "[API Reference](@ref)" => + "[API Reference](https://julialang.github.io/Pkg.jl/v1/api/)") +Markdown.parse(str) +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/compatibility.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/compatibility.md new file mode 100644 index 0000000..94f160e --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/compatibility.md @@ -0,0 +1,201 @@ +# [**6.** Compatibility](@id Compatibility) + +Compatibility refers to the ability to restrict the versions of the dependencies that your project is compatible with. +If the compatibility for a dependency is not given, the project is assumed to be compatible with all versions of that dependency. + +Compatibility for a dependency is entered in the `Project.toml` file as for example: + +```toml +[compat] +julia = "1.0" +Example = "0.4.3" +``` + +After a compatibility entry is put into the project file, `up` can be used to apply it. + +The format of the version specifier is described in detail below. + +!!! info + There is currently no way to give compatibility from the Pkg REPL mode so for now, one has to manually edit the project file. + +## Version specifier format + +Similar to other package managers, the Julia package manager respects [semantic versioning](https://semver.org/) (semver). +As an example, a version specifier given as e.g. `1.2.3` is therefore assumed to be compatible with the versions `[1.2.3 - 2.0.0)` where `)` is a non-inclusive upper bound. +More specifically, a version specifier is either given as a **caret specifier**, e.g. `^1.2.3` or as a **tilde specifier**, e.g. `~1.2.3`. +Caret specifiers are the default and hence `1.2.3 == ^1.2.3`. The difference between a caret and tilde is described in the next section. +The union of multiple version specifiers can be formed by comma separating individual version specifiers, e.g. +```toml +[compat] +Example = "1.2, 2" +``` +will result in `[1.2.0, 3.0.0)`. Note leading zeros are treated differently, e.g. `Example = "0.2, 1"` would only result in `[0.2.0-0.3.0, 1.0.0-2.0.0]`. See the next section for more information on versions with leading zeros. + +### [Behavior of versions with leading zeros (0.0.x and 0.x.y)](@id compat-pre-1.0) + +While the semver specification says that all versions with a major version of 0 (versions before 1.0.0) are incompatible +with each other, we have decided to only apply that for when both the major and minor versions are zero. In other words, +0.0.1 and 0.0.2 are considered incompatible. A pre-1.0 version with non-zero minor version (`0.a.b` with `a != 0`) is +considered compatible with versions with the same minor version and smaller or equal patch versions (`0.a.c` with `c <= b`); +i.e., the versions 0.2.2 and 0.2.3 are compatible with 0.2.1 and 0.2.0. Versions with a major version of 0 and different +minor versions are not considered compatible, so the version 0.3.0 might have breaking changes from 0.2.0. To that end, the +`[compat]` entry: + +```toml +[compat] +Example = "0.0.1" +``` + +results in a versionbound on `Example` as `[0.0.1, 0.0.2)` (which is equivalent to only the version 0.0.1), while the +`[compat]` entry: + +```toml +[compat] +Example = "0.2.1" +``` + +results in a versionbound on Example as `[0.2.1, 0.3.0)`. + +In particular, a package may set `version = "0.2.4"` when it has feature additions compared to 0.2.3 as long as it +remains backward compatible with 0.2.0. See also [The `version` field](@ref). + +### Caret specifiers + +A caret specifier allows upgrade that would be compatible according to semver. +An updated dependency is considered compatible if the new version does not modify the left-most non zero digit in the version specifier. + +Some examples are shown below. + +```toml +[compat] +PkgA = "^1.2.3" # [1.2.3, 2.0.0) +PkgB = "^1.2" # [1.2.0, 2.0.0) +PkgC = "^1" # [1.0.0, 2.0.0) +PkgD = "^0.2.3" # [0.2.3, 0.3.0) +PkgE = "^0.0.3" # [0.0.3, 0.0.4) +PkgF = "^0.0" # [0.0.0, 0.1.0) +PkgG = "^0" # [0.0.0, 1.0.0) +``` + +### Tilde specifiers + +A tilde specifier provides more limited upgrade possibilities. When specifying major, minor +and patch versions, or when specifying major and minor versions, only the patch version is +allowed to change. If you only specify a major version, then both minor and patch versions +are allowed to be upgraded (`~1` is thus equivalent to `^1`). +For example: + +```toml +[compat] +PkgA = "~1.2.3" # [1.2.3, 1.3.0) +PkgB = "~1.2" # [1.2.0, 1.3.0) +PkgC = "~1" # [1.0.0, 2.0.0) +PkgD = "~0.2.3" # [0.2.3, 0.3.0) +PkgE = "~0.0.3" # [0.0.3, 0.0.4) +PkgF = "~0.0" # [0.0.0, 0.1.0) +PkgG = "~0" # [0.0.0, 1.0.0) +``` + +For all versions with a major version of 0 the tilde and caret specifiers are equivalent. + +### Equality specifier + +Equality can be used to specify an exact version: + +```toml +[compat] +PkgA = "= 1.2.3" # [1.2.3, 1.2.3] +``` + +### Inequality specifiers + +Inequalities can also be used to specify version ranges: + +```toml +[compat] +PkgB = ">= 1.2.3" # [1.2.3, ∞) +PkgC = "≥ 1.2.3" # [1.2.3, ∞) +PkgD = "< 1.2.3" # [0.0.0, 1.2.3) = [0.0.0, 1.2.2] +``` + +### Hyphen specifiers + +Hyphen syntax can also be used to specify version ranges. Make sure that you have a space on both sides of the hyphen. + +```toml +[compat] +PkgA = "1.2.3 - 4.5.6" # [1.2.3, 4.5.6] +PkgA = "0.2.3 - 4.5.6" # [0.2.3, 4.5.6] +``` + +Any unspecified trailing numbers in the first end-point are considered to be zero: + +```toml +[compat] +PkgA = "1.2 - 4.5.6" # [1.2.0, 4.5.6] +PkgA = "1 - 4.5.6" # [1.0.0, 4.5.6] +PkgA = "0.2 - 4.5.6" # [0.2.0, 4.5.6] +PkgA = "0.2 - 0.5.6" # [0.2.0, 0.5.6] +``` + +Any unspecified trailing numbers in the second end-point will be considered to be wildcards: + +```toml +[compat] +PkgA = "1.2.3 - 4.5" # 1.2.3 - 4.5.* = [1.2.3, 4.6.0) +PkgA = "1.2.3 - 4" # 1.2.3 - 4.*.* = [1.2.3, 5.0.0) +PkgA = "1.2 - 4.5" # 1.2.0 - 4.5.* = [1.2.0, 4.6.0) +PkgA = "1.2 - 4" # 1.2.0 - 4.*.* = [1.2.0, 5.0.0) +PkgA = "1 - 4.5" # 1.0.0 - 4.5.* = [1.0.0, 4.6.0) +PkgA = "1 - 4" # 1.0.0 - 4.*.* = [1.0.0, 5.0.0) +PkgA = "0.2.3 - 4.5" # 0.2.3 - 4.5.* = [0.2.3, 4.6.0) +PkgA = "0.2.3 - 4" # 0.2.3 - 4.*.* = [0.2.3, 5.0.0) +PkgA = "0.2 - 4.5" # 0.2.0 - 4.5.* = [0.2.0, 4.6.0) +PkgA = "0.2 - 4" # 0.2.0 - 4.*.* = [0.2.0, 5.0.0) +PkgA = "0.2 - 0.5" # 0.2.0 - 0.5.* = [0.2.0, 0.6.0) +PkgA = "0.2 - 0" # 0.2.0 - 0.*.* = [0.2.0, 1.0.0) +``` + +!!! compat "Julia 1.4" + Hyphen specifiers requires at least Julia 1.4, so it is strongly recomended to also add + ```toml + [compat] + julia = "1.4" + ``` + to the project file when using them. + +## Fixing conflicts + +Version conflicts were introduced previously with an [example](@ref conflicts) +of a conflict arising in a package `D` used by two other packages, `B` and `C`. +Our analysis of the error message revealed that `B` is using an outdated +version of `D`. +To fix it, the first thing to try is to `pkg> dev B` so that +you can modify `B` and its compatibility requirements. +If you open its `Project.toml` file in an editor, you would probably notice something like + +```toml +[compat] +D = "0.1" +``` + +Usually the first step is to modify this to something like +```toml +[compat] +D = "0.1, 0.2" +``` + +This indicates that `B` is compatible with both versions 0.1 and version 0.2; if you `pkg> up` +this would fix the package error. +However, there is one major concern you need to address first: perhaps there was an incompatible change +in `v0.2` of `D` that breaks `B`. +Before proceeding further, you should update all packages and then run `B`'s tests, scanning the +output of `pkg> test B` to be sure that `v0.2` of `D` is in fact being used. +(It is possible that an additional dependency of `D` pins it to `v0.1`, and you wouldn't want to be misled into thinking that you had tested `B` on the newer version.) +If the new version was used and the tests still pass, +you can assume that `B` didn't need any further updating to accomodate `v0.2` of `D`; +you can safely submit this change as a pull request to `B` so that a new release is made. +If instead an error is thrown, it indicates that `B` requires more extensive updates to be +compatible with the latest version of `D`; those updates will need to be completed before +it becomes possible to use both `A` and `B` simultaneously. +You can, though, continue to use the independently of one another. diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/creating-packages.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/creating-packages.md new file mode 100644 index 0000000..a21887c --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/creating-packages.md @@ -0,0 +1,290 @@ +# **5.** Creating Packages + +A package is a project with a `name`, `uuid` and `version` entry in the `Project.toml` file, and a `src/PackageName.jl` file that defines the module `PackageName`. +This file is executed when the package is loaded. + +### Generating files for a package + +!!! note + The [PkgTemplates](https://github.com/invenia/PkgTemplates.jl) package offers a very easy, repeatable, and + customizable way to generate the files for a new package. We recommend that you use PkgTemplates for creating + new packages instead of using the minimal `pkg> generate` functionality described below. + +To generate files for a new package, use `pkg> generate`. + +```julia-repl +(v1.0) pkg> generate HelloWorld +``` + +This creates a new project `HelloWorld` with the following files (visualized with the external [`tree` command](https://linux.die.net/man/1/tree)): + +```julia-repl +julia> cd("HelloWorld") + +shell> tree . +. +├── Project.toml +└── src + └── HelloWorld.jl + +1 directory, 2 files +``` + +The `Project.toml` file contains the name of the package, its unique UUID, its version, the authors and potential dependencies: + +```toml +name = "HelloWorld" +uuid = "b4cd1eb8-1e24-11e8-3319-93036a3eb9f3" +version = "0.1.0" +authors = ["Some One "] + +[deps] +``` + +The content of `src/HelloWorld.jl` is: + +```julia +module HelloWorld + +greet() = print("Hello World!") + +end # module +``` + +We can now activate the project and load the package: + +```julia-repl +pkg> activate . + +julia> import HelloWorld + +julia> HelloWorld.greet() +Hello World! +``` + +### Adding dependencies to the project + +Let’s say we want to use the standard library package `Random` and the registered package `JSON` in our project. +We simply `add` these packages (note how the prompt now shows the name of the newly generated project, +since we `activate`d it): + +```julia-repl +(HelloWorld) pkg> add Random JSON + Resolving package versions... + Updating "~/Documents/HelloWorld/Project.toml" + [682c06a0] + JSON v0.17.1 + [9a3f8284] + Random + Updating "~/Documents/HelloWorld/Manifest.toml" + [34da2185] + Compat v0.57.0 + [682c06a0] + JSON v0.17.1 + [4d1e1d77] + Nullables v0.0.4 + ... +``` + +Both `Random` and `JSON` got added to the project’s `Project.toml` file, and the resulting dependencies got added to the `Manifest.toml` file. +The resolver has installed each package with the highest possible version, while still respecting the compatibility that each package enforces on its dependencies. + +We can now use both `Random` and `JSON` in our project. Changing `src/HelloWorld.jl` to + +```julia +module HelloWorld + +import Random +import JSON + +greet() = print("Hello World!") +greet_alien() = print("Hello ", Random.randstring(8)) + +end # module +``` + +and reloading the package, the new `greet_alien` function that uses `Random` can be called: + +```julia-repl +julia> HelloWorld.greet_alien() +Hello aT157rHV +``` + +### Adding a build step to the package + +The build step is executed the first time a package is installed or when explicitly invoked with `build`. +A package is built by executing the file `deps/build.jl`. + +```julia-repl +julia> print(read("deps/build.jl", String)) +println("I am being built...") + +(HelloWorld) pkg> build + Building HelloWorld → `deps/build.log` + Resolving package versions... + +julia> print(read("deps/build.log", String)) +I am being built... +``` + +If the build step fails, the output of the build step is printed to the console + +```julia-repl +julia> print(read("deps/build.jl", String)) +error("Ooops") + +(HelloWorld) pkg> build + Building HelloWorld → `deps/build.log` + Resolving package versions... +┌ Error: Error building `HelloWorld`: +│ ERROR: LoadError: Ooops +│ Stacktrace: +│ [1] error(::String) at ./error.jl:33 +│ [2] top-level scope at none:0 +│ [3] include at ./boot.jl:317 [inlined] +│ [4] include_relative(::Module, ::String) at ./loading.jl:1071 +│ [5] include(::Module, ::String) at ./sysimg.jl:29 +│ [6] include(::String) at ./client.jl:393 +│ [7] top-level scope at none:0 +│ in expression starting at /Users/kristoffer/.julia/dev/Pkg/HelloWorld/deps/build.jl:1 +└ @ Pkg.Operations Operations.jl:938 +``` + +### Adding tests to the package + +When a package is tested the file `test/runtests.jl` is executed: + +```julia-repl +julia> print(read("test/runtests.jl", String)) +println("Testing...") + +(HelloWorld) pkg> test + Testing HelloWorld + Resolving package versions... +Testing... + Testing HelloWorld tests passed +``` + +Tests are run in a new Julia process, where the package itself, and any +test-specific dependencies, are available, see below. + +#### Test-specific dependencies in Julia 1.2 and above + +!!! compat "Julia 1.2" + This section only applies to Julia 1.2 and above. For specifying test dependencies + on previous Julia versions, see [Test-specific dependencies in Julia 1.0 and 1.1](@ref). + +!!! note + The exact interaction between `Project.toml`, `test/Project.toml` and their corresponding + `Manifest.toml`s are not fully worked out, and may be subject to change in future versions. + The old method of adding test-specific dependencies, described in the next section, will + therefore be supported throughout all Julia 1.X releases. + +In Julia 1.2 and later the test environment is given by `test/Project.toml`. Thus, when running +tests, this will be the active project, and only dependencies to the `test/Project.toml` project +can be used. Note that Pkg will add the tested package itself implictly. + +!!! note + If no `test/Project.toml` exists Pkg will use the old style test-setup, as + described in [Test-specific dependencies in Julia 1.0 and 1.1](@ref). + +To add a test-specific dependency, i.e. a dependency that is available only when testing, +it is thus enough to add this dependency to the `test/Project.toml` project. This can be +done from the Pkg REPL by activating this environment, and then use `add` as one normally +does. Lets add the `Test` standard library as a test dependency: + +```julia-repl +(HelloWorld) pkg> activate ./test +[ Info: activating environment at `~/HelloWorld/test/Project.toml`. + +(test) pkg> add Test + Resolving package versions... + Updating `~/HelloWorld/test/Project.toml` + [8dfed614] + Test + Updating `~/HelloWorld/test/Manifest.toml` + [...] +``` + +We can now use `Test` in the test script and we can see that it gets installed when testing: + +```julia-repl +julia> print(read("test/runtests.jl", String)) +using Test +@test 1 == 1 + +(HelloWorld) pkg> test + Testing HelloWorld + Resolving package versions... + Updating `/var/folders/64/76tk_g152sg6c6t0b4nkn1vw0000gn/T/tmpPzUPPw/Project.toml` + [d8327f2a] + HelloWorld v0.1.0 [`~/.julia/dev/Pkg/HelloWorld`] + [8dfed614] + Test + Updating `/var/folders/64/76tk_g152sg6c6t0b4nkn1vw0000gn/T/tmpPzUPPw/Manifest.toml` + [d8327f2a] + HelloWorld v0.1.0 [`~/.julia/dev/Pkg/HelloWorld`] + Testing HelloWorld tests passed``` +``` + +#### Test-specific dependencies in Julia 1.0 and 1.1 + +!!! note + The method of adding test-specific dependencies described in this section will + be replaced by the method from the previous section in future Julia versions. + The method in this section will, however, be supported throughout all Julia 1.X + releases. + +In Julia 1.0 and Julia 1.1 test-specific dependencies are added to the main +`Project.toml`. To add `Markdown` and `Test` as test-dependencies, add the following: + +```toml +[extras] +Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Markdown", "Test"] +``` + +### Package naming guidelines + +Package names should be sensible to most Julia users, *even to those who are not domain experts*. +The following guidelines applies to the `General` registry, but may be useful for other package +registries as well. + +Since the `General` registry belongs to the entire community, people may have opinions about +your package name when you publish it, especially if it's ambiguous or can be confused with +something other than what it is. Usually you will then get suggestions for a new name that +may fit your package better. + +1. Avoid jargon. In particular, avoid acronyms unless there is minimal possibility of confusion. + + * It's ok to say `USA` if you're talking about the USA. + * It's not ok to say `PMA`, even if you're talking about positive mental attitude. +2. Avoid using `Julia` in your package name or prefixing it with `Ju`. + + * It is usually clear from context and to your users that the package is a Julia package. + * Package names already have a `.jl` extension, which communicates to users that `Package.jl` is a Julia package. + * Having Julia in the name can imply that the package is connected to, or endorsed by, contributors + to the Julia language itself. +3. Packages that provide most of their functionality in association with a new type should have pluralized + names. + + * `DataFrames` provides the `DataFrame` type. + * `BloomFilters` provides the `BloomFilter` type. + * In contrast, `JuliaParser` provides no new type, but instead new functionality in the `JuliaParser.parse()` + function. +4. Err on the side of clarity, even if clarity seems long-winded to you. + + * `RandomMatrices` is a less ambiguous name than `RndMat` or `RMT`, even though the latter are shorter. +5. A less systematic name may suit a package that implements one of several possible approaches to + its domain. + + * Julia does not have a single comprehensive plotting package. Instead, `Gadfly`, `PyPlot`, `Winston` + and other packages each implement a unique approach based on a particular design philosophy. + * In contrast, `SortingAlgorithms` provides a consistent interface to use many well-established + sorting algorithms. +6. Packages that wrap external libraries or programs should be named after those libraries or programs. + + * `CPLEX.jl` wraps the `CPLEX` library, which can be identified easily in a web search. + * `MATLAB.jl` provides an interface to call the MATLAB engine from within Julia. +7. Avoid naming a package closely to an existing package + * `Websocket` is too close to `WebSockets` and can be confusing to users. Rather use a new name such as `SimpleWebsockets`. + +### Registering packages + +Once a package is ready it can be registered with the [General Registry](https://github.com/JuliaRegistries/General). +Currently packages are submitted via [`Registrator`](https://juliaregistrator.github.io/). +In addition to `Registrator`, [`TagBot`](https://github.com/apps/julia-tagbot) helps manage the process of tagging releases. diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/environments.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/environments.md new file mode 100644 index 0000000..9d5bd46 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/environments.md @@ -0,0 +1,108 @@ +# **4.** Working with Environments + +The following discusses Pkg's interaction with environments. For more on the role environments play in code loading, including the "stack" of environments from which code can be loaded, see [this section in the Julia manual](https://docs.julialang.org/en/v1/manual/code-loading/#Environments-1). + +## Creating your own projects + +So far we have added packages to the default project at `~/.julia/environments/v1.0`. It is however easy to create other, independent, projects. +It should be pointed out that when two projects use the same package at the same version, the content of this package is not duplicated. +In order to create a new project, create a directory for it and then activate that directory to make it the "active project", which package operations manipulate: + +```julia-repl +julia> mkdir("MyProject") + +julia> cd("MyProject") +/Users/kristoffer/MyProject + +(v1.0) pkg> activate . + +(MyProject) pkg> st + Status `Project.toml` +``` + +Note that the REPL prompt changed when the new project is activated. Since this is a newly created project, the status command shows that it contains no packages, and in fact, it has no project or manifest file until we add a package to it: + +```julia-repl +julia> readdir() +0-element Array{String,1} + +(MyProject) pkg> add Example + Updating registry at `~/.julia/registries/General` + Updating git-repo `https://github.com/JuliaRegistries/General.git` + Resolving package versions... + Updating `Project.toml` + [7876af07] + Example v0.5.1 + Updating `Manifest.toml` + [7876af07] + Example v0.5.1 + [8dfed614] + Test +Precompiling project... + 1 dependency successfully precompiled in 2 seconds + +julia> readdir() +2-element Array{String,1}: + "Manifest.toml" + "Project.toml" + +julia> print(read("Project.toml", String)) +[deps] +Example = "7876af07-990d-54b4-ab0e-23690620f79a" + +julia> print(read("Manifest.toml", String)) +[[Example]] +deps = ["Test"] +git-tree-sha1 = "8eb7b4d4ca487caade9ba3e85932e28ce6d6e1f8" +uuid = "7876af07-990d-54b4-ab0e-23690620f79a" +version = "0.5.1" + +[[Test]] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +``` + +This new environment is completely separate from the one we used earlier. + + +## Project Precompilation + +By default any package that is added to a project or updated in a Pkg action will be automatically precompiled, along +with its dependencies. The exception is the `develop` command, which neither builds nor precompiles the package, when +that happens is left up to the user to decide. + +If a package that has been updated is already loaded in the session, the precompilation process will go ahead and precompile +the new version, and any packages that depend on it, but will note that the package cannot be used until session restart. + +To disable this auto-precompilation, set `ENV["JULIA_PKG_PRECOMPILE_AUTO"]=0`, after which precompilation can be triggered +manually either serially via code loading + +```julia-repl +julia> using Example +[ Info: Precompiling Example [7876af07-990d-54b4-ab0e-23690620f79a] +``` + + or the parallel precompilation, which can be significantly faster when many dependencies are involved, via + +```julia-repl +pkg> precompile +Precompiling project... + 23 dependencies successfully precompiled in 36 seconds +``` + +## Using someone else's project + +Simply clone their project using e.g. `git clone`, `cd` to the project directory and call + +```julia-repl +(v1.0) pkg> activate . + +(SomeProject) pkg> instantiate +``` + +If the project contains a manifest, this will install the packages in the same state that is given by that manifest. +Otherwise, it will resolve the latest versions of the dependencies compatible with the project. + +!!! note "Specifying project on startup" + Instead of using `activate` from within Julia you can specify the project on startup using + the `--project=` flag. For example, to run a script from the command line using the + environment in the current directory you can run + ```bash + $ julia --project=. myscript.jl + ``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/faq.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/faq.md new file mode 100644 index 0000000..3809353 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/faq.md @@ -0,0 +1,3 @@ +# **8.** FAQ + +### Should `Manifest.toml` be checked in to the package? diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/getting-started.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/getting-started.md new file mode 100644 index 0000000..acfb0c5 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/getting-started.md @@ -0,0 +1,216 @@ +# **2.** Getting Started + +What follows is a quick overview of Pkg, Julia's package manager. +It should help new users become familiar with basic Pkg features. + +## Basic Usage + +Pkg comes with a REPL. +Enter the Pkg REPL by pressing `]` from the Julia REPL. +To get back to the Julia REPL, press backspace or ^C. + +!!! note + This guide relies on the Pkg REPL to execute Pkg commands. + For non-interactive use, we recommend the Pkg API. + The Pkg API is fully documented in the [API Reference](@ref) section of the Pkg documentation. + +Upon entering the Pkg REPL, you should see a similar prompt: + +```julia-repl +(v1.1) pkg> +``` + +To add a package, use `add`: + +```julia-repl +(v1.1) pkg> add Example +``` + +!!! note + Some Pkg output has been omitted in order to keep this guide focused. + This will help maintain a good pace and not get bogged down in details. + If you require more details, refer to subsequent sections of the Pkg manual. + +We can also specify multiple packages at once: + +```julia-repl +(v1.1) pkg> add JSON StaticArrays +``` + +To remove packages, use `rm`: + +```julia-repl +(v1.1) pkg> rm JSON StaticArrays +``` + +So far, we have referred only to registered packages. +Pkg also supports working with unregistered packages. +To add an unregistered package, specify a URL: + +```julia-repl +(v1.1) pkg> add https://github.com/JuliaLang/Example.jl +``` + +Use `rm` to remove this package by name: + +```julia-repl +(v1.1) pkg> rm Example +``` + +Use `update` to update an installed package: + +```julia-repl +(v1.1) pkg> update Example +``` + +To update all installed packages, use `update` without any arguments: + +```julia-repl +(v1.1) pkg> update +``` + +## Getting Started with Environments + +Up to this point, we have covered basic package management: adding, updating and removing packages. +This will be familiar if you have used other package managers. +Pkg offers significant advantages over traditional package managers +by organizing dependencies into **environments**. + +You may have noticed the `(v1.1)` in the REPL prompt. +This lets us know `v1.1` is the **active environment**. +The active environment is the environment that will be modified by Pkg commands such as `add`, `rm` and `update`. + +Let's set up a new environment so we may experiment. +To set the active environment, use `activate`: + +```julia-repl +(v1.1) pkg> activate tutorial +[ Info: activating new environment at `/tmp/tutorial/Project.toml`. +``` + +Pkg lets us know we are creating a new environment and that this environment +will be stored in the `/tmp/tutorial` directory. + +Pkg has also updated the REPL prompt in order to reflect the new +active environment: + +```julia-repl +(tutorial) pkg> +``` + +We can ask for information about the active environment by using `status`: + +```julia-repl +(tutorial) pkg> status + Status `/tmp/tutorial/Project.toml` + (empty environment) +``` + +`/tmp/tutorial/Project.toml` is the location of the active environment's **project file**. +A project file is where Pkg stores metadata for an environment. +Notice this new environment is empty. +Let us add a package and observe: + +```julia-repl +(tutorial) pkg> add Example +... + +(tutorial) pkg> status + Status `/tmp/tutorial/Project.toml` + [7876af07] Example v0.5.1 +``` + +We can see `tutorial` now contains `Example` as a dependency. + +## Modifying A Dependency + +Say we are working on `Example` and feel it needs new functionality. +How can we modify the source code? +We can use `develop` to set up a git clone of the `Example` package. + +```julia-repl +(tutorial) pkg> develop --local Example +... + +(tutorial) pkg> status + Status `/tmp/tutorial/Project.toml` + [7876af07] Example v0.5.1+ [`dev/Example`] +``` + +Notice the feedback has changed. +`dev/Example` refers to the location of the newly created clone. +If we look inside the `/tmp/tutorial` directory, we will notice the following files: + +``` +tutorial +├── dev +│ └── Example +├── Manifest.toml +└── Project.toml +``` + +Instead of loading a registered version of `Example`, +Julia will load the source code contained in `tutorial/dev/Example`. + +Let's try it out. +First we modify the file at `tutorial/dev/Example/src/Example.jl` and add a simple function: + +```julia +plusone(x::Int) = x + 1 +``` + +Now we can go back to the Julia REPL and load the package: + +```julia-repl +julia> import Example +``` + +!!! warn + A package can only be loaded once per Julia session. + If you have run `import Example` in the current Julia session, you will + have to restart Julia and rerun `activate tutorial` in the Pkg REPL. + [Revise.jl](https://github.com/timholy/Revise.jl/) can make this process + significantly more pleasant, but setting it up is beyond the scope of this guide. + +Julia should load our new code. Let's test it: + +```julia-repl +julia> Example.plusone(1) +2 +``` + +Say we have a change of heart and decide the world is not ready for such elegant code. +We can tell Pkg to stop using the local clone and use a registered version instead. +We do this with `free`: + +```julia-repl +(tutorial) pkg> free Example +``` + +When you are done experimenting with `tutorial`, you can return to the **default +environment** by running `activate` with no arguments: + +```julia-repl +(tutorial) pkg> activate + +(v1.1) pkg> +``` + +## Asking for Help + +If you are ever stuck, you can ask `Pkg` for help: + +```julia-repl +(v1.1) pkg> ? +``` + +You should see a list of available commands along with short descriptions. +You can ask for more detailed help by specifying a command: + +```julia-repl +(v1.1) pkg> ?develop +``` + +This guide should help you get started with `Pkg`. +`Pkg` has much more to offer in terms of powerful package management, +read the full manual to learn more! diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/glossary.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/glossary.md new file mode 100644 index 0000000..f632140 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/glossary.md @@ -0,0 +1,124 @@ +# [**9.** Glossary](@id Glossary) + +**Project:** a source tree with a standard layout, including a `src` directory +for the main body of Julia code, a `test` directory for testing the project, +a `docs` directory for documentation files, and optionally a `deps` directory for a build +script and its outputs. A project will typically also have a project file and +may optionally have a manifest file: + +- **Project file:** a file in the root directory of a project, named + `Project.toml` (or `JuliaProject.toml`), describing metadata about the project, + including its name, UUID (for packages), authors, license, and the names and + UUIDs of packages and libraries that it depends on. + +- **Manifest file:** a file in the root directory of a project, named + `Manifest.toml` (or `JuliaManifest.toml`), describing a complete dependency graph + and exact versions of each package and library used by a project. + +**Package:** a project which provides reusable functionality that can be used by +other Julia projects via `import X` or `using X`. A package should have a +project file with a `uuid` entry giving its package UUID. This UUID is used to +identify the package in projects that depend on it. + +!!! note + For legacy reasons it is possible to load a package without a project file or + UUID from the REPL or the top-level of a script. It is not possible, however, + to load a package without a project file or UUID from a project with them. Once + you've loaded from a project file, everything needs a project file and UUID. + +**Application:** a project which provides standalone functionality not intended +to be reused by other Julia projects. For example a web application or a +commmand-line utility, or simulation/analytics code accompanying a scientific paper. +An application may have a UUID but does not need one. +An application may also provide global configuration options for packages it +depends on. Packages, on the other hand, may not provide global configuration +since that could conflict with the configuration of the main application. + +!!! note + **Projects _vs._ Packages _vs._ Applications:** + + 1. **Project** is an umbrella term: packages and applications are kinds of projects. + 2. **Packages** should have UUIDs, applications can have a UUIDs but don't need them. + 3. **Applications** can provide global configuration, whereas packages cannot. + +**Library (future work):** a compiled binary dependency (not written in Julia) +packaged to be used by a Julia project. These are currently typically built in- +place by a `deps/build.jl` script in a project’s source tree, but in the future +we plan to make libraries first-class entities directly installed and upgraded +by the package manager. + +**Environment:** the combination of the top-level name map provided by a project +file combined with the dependency graph and map from packages to their entry points +provided by a manifest file. For more detail see the manual section on code loading. + +- **Explicit environment:** an environment in the form of an explicit project + file and an optional corresponding manifest file together in a directory. If the + manifest file is absent then the implied dependency graph and location maps are + empty. + +- **Implicit environment:** an environment provided as a directory (without a + project file or manifest file) containing packages with entry points of the form + `X.jl`, `X.jl/src/X.jl` or `X/src/X.jl`. The top-level name map is implied by + these entry points. The dependency graph is implied by the existence of project + files inside of these package directories, e.g. `X.jl/Project.toml` or + `X/Project.toml`. The dependencies of the `X` package are the dependencies in + the corresponding project file if there is one. The location map is implied by + the entry points themselves. + +**Registry:** a source tree with a standard layout recording metadata about a +registered set of packages, the tagged versions of them which are available, and +which versions of packages are compatible or incompatible with each other. A +registry is indexed by package name and UUID, and has a directory for each +registered package providing the following metadata about it: + +- name – e.g. `DataFrames` +- UUID – e.g. `a93c6f00-e57d-5684-b7b6-d8193f3e46c0` +- authors – e.g. `Jane Q. Developer ` +- license – e.g. MIT, BSD3, or GPLv2 +- repository – e.g. `https://github.com/JuliaData/DataFrames.jl.git` +- description – a block of text summarizing the functionality of a package +- keywords – e.g. `data`, `tabular`, `analysis`, `statistics` +- versions – a list of all registered version tags + +For each registered version of a package, the following information is provided: + +- its semantic version number – e.g. `v1.2.3` +- its git tree SHA-1 hash – e.g. `7ffb18ea3245ef98e368b02b81e8a86543a11103` +- a map from names to UUIDs of dependencies +- which versions of other packages it is compatible/incompatible with + +Dependencies and compatibility are stored in a compressed but human-readable +format using ranges of package versions. + +**Depot:** a directory on a system where various package-related resources live, +including: + +- `environments`: shared named environments (e.g. `v1.0`, `devtools`) +- `clones`: bare clones of package repositories +- `compiled`: cached compiled package images (`.ji` files) +- `config`: global configuration files (e.g. `startup.jl`) +- `dev`: default directory for package development +- `logs`: log files (e.g. `manifest_usage.toml`, `repl_history.jl`) +- `packages`: installed package versions +- `registries`: clones of registries (e.g. `General`) + +**Load path:** a stack of environments where package identities, their +dependencies, and entry-points are searched for. The load path is controlled in +Julia by the `LOAD_PATH` global variable which is populated at startup based on +the value of the `JULIA_LOAD_PATH` environment variable. The first entry is your +primary environment, often the current project, while later entries provide +additional packages one may want to use from the REPL or top-level scripts. + +**Depot path:** a stack of depot locations where the package manager, as well as +Julia's code loading mechanisms, look for registries, installed packages, named +environments, repo clones, cached compiled package images, and configuration +files. The depot path is controlled by the Julia `DEPOT_PATH` global variable +which is populated at startup based on the value of the `JULIA_DEPOT_PATH` +environment variable. The first entry is the “user depot” and should be writable +by and owned by the current user. The user depot is where: registries are +cloned, new package versions are installed, named environments are created and +updated, package repos are cloned, newly compiled package image files are saved, +log files are written, development packages are checked out by default, and +global configuration data is saved. Later entries in the depot path are treated +as read-only and are appropriate for registries, packages, etc. installed and +managed by system administrators. diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/index.md new file mode 100644 index 0000000..8cbab92 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/index.md @@ -0,0 +1,65 @@ +# **1.** Introduction + +Welcome to the documentation for Pkg, Julia's package manager. +The documentation covers many things, for example managing package +installations, developing packages, working with package registries and more. + +Throughout the manual the REPL interface to Pkg is used in the examples. +There is also a functional API interface, which is preferred when not working +interactively. This API is documented in the [API Reference](@ref) section. + +## Background and Design + +Pkg is a complete rewrite of Julia's old package manager[^1] and was released +together with Julia v1.0. Unlike traditional +package managers, which install and manage a single global set of packages, Pkg +is designed around “environments”: independent sets of packages that can be +local to an individual project or shared and selected by name. The exact set of +packages and versions in an environment is captured in a _manifest file_ which +can be checked into a project repository and tracked in version control, +significantly improving reproducibility of projects. If you’ve ever tried to run +code you haven’t used in a while only to find that you can’t get anything to +work because you’ve updated or uninstalled some of the packages your project was +using, you’ll understand the motivation for this approach. In Pkg, since each +project maintains its own independent set of package versions, you’ll never have +this problem again. Moreover, if you check out a project on a new system, you +can simply materialize the environment described by its manifest file and +immediately be up and running with a known-good set of dependencies. + +Since environments are managed and updated independently from each other, +“dependency hell” is significantly alleviated in Pkg. If you want to use the +latest and greatest version of some package in a new project but you’re stuck on +an older version in a different project, that’s no problem – since they have +separate environments they can just use different versions, which are both +installed at the same time in different locations on your system. The location +of each package version is canonical, so when environments use the same versions +of packages, they can share installations, avoiding unnecessary duplication of +the package. Old package versions that are no longer used by any environments +are periodically “garbage collected” by the package manager. + +Pkg’s approach to local environments may be familiar to people who have used +Python’s `virtualenv` or Ruby’s `bundler`. In Julia, instead of hacking the +language’s code loading mechanisms to support environments, we have the benefit +that Julia natively understands them. In addition, Julia environments are +“stackable”: you can overlay one environment with another and thereby have +access to additional packages outside of the primary environment. This makes it +easy to work on a project, which provides the primary environment, while still +having access from the REPL to all your usual dev tools like profilers, +debuggers, and so on, just by having an environment including these dev tools +later in the load path. + +Last but not least, Pkg is designed to support federated package registries. +This means that it allows multiple registries managed by different parties to +interact seamlessly. In particular, this includes private registries which can +live behind corporate firewalls. You can install and update your own packages +from a private registry with exactly the same tools and workflows that you use +to install and manage official Julia packages. If you urgently need to apply a +hotfix for a public package that’s critical to your company’s product, you can +tag a private version of it in your company’s internal registry and get a fix to +your developers and ops teams quickly and easily without having to wait for an +upstream patch to be accepted and published. Once an official fix is published, +however, you can just upgrade your dependencies and you'll be back on an +official release again. + +[^1]: Often denoted `Pkg2`, now archived as `OldPkg` at + [`github.com/JuliaAttic/OldPkg.jl`](https://github.com/JuliaAttic/OldPkg.jl). diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/managing-packages.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/managing-packages.md new file mode 100644 index 0000000..45df972 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/managing-packages.md @@ -0,0 +1,428 @@ +# **3.** Managing Packages + +## Adding packages + +There are two ways of adding packages, either using the `add` command or the `dev` command. +The most frequently used is `add` and its usage is described first. + +### Adding registered packages + +In the Pkg REPL, packages can be added with the `add` command followed by the name of the package, for example: + +```julia-repl +(v1.0) pkg> add Example + Cloning default registries into /Users/kristoffer/.julia/registries + Cloning registry General from "https://github.com/JuliaRegistries/General.git" + Updating registry at `~/.julia/registries/General` + Updating git-repo `https://github.com/JuliaRegistries/General.git` + Resolving package versions... + Updating `~/.julia/environments/v1.0/Project.toml` + [7876af07] + Example v0.5.1 + Updating `~/.julia/environments/v1.0/Manifest.toml` + [7876af07] + Example v0.5.1 + [8dfed614] + Test +``` + +Here we added the package Example to the current project. In this example, we are using a fresh Julia installation, +and this is our first time adding a package using Pkg. By default, Pkg clones Julia's General registry, +and uses this registry to look up packages requested for inclusion in the current environment. +The status update shows a short form of the package UUID to the left, then the package name, and the version. +Since standard libraries (e.g. `Test`) are shipped with Julia, they do not have a version. The project status contains the packages +you have added yourself, in this case, `Example`: + +```julia-repl +(v1.0) pkg> st + Status `Project.toml` + [7876af07] Example v0.5.1 +``` + +The manifest status shows all the packages in the environment, including recursive dependencies: + +```julia-repl +(v1.0) pkg> st --manifest + Status `Manifest.toml` + [7876af07] Example v0.5.1 + [8dfed614] Test +``` + +It is possible to add multiple packages in one command as `pkg> add A B C`. + +After a package is added to the project, it can be loaded in Julia: + +```julia-repl +julia> using Example + +julia> Example.hello("User") +"Hello, User" +``` + +A specific version can be installed by appending a version after a `@` symbol, e.g. `@v0.4`, to the package name: + +```julia-repl +(v1.0) pkg> add Example@0.4 + Resolving package versions... + Updating `~/.julia/environments/v1.0/Project.toml` + [7876af07] + Example v0.4.1 + Updating `~/.julia/environments/v1.0/Manifest.toml` + [7876af07] + Example v0.4.1 +``` + +If a branch (or a certain commit) of `Example` has a hotfix that is not yet included in a registered version, +we can explicitly track that branch (or commit) by appending `#branchname` (or `#commitSHA1`) to the package name: + +```julia-repl +(v1.0) pkg> add Example#master + Updating git-repo `https://github.com/JuliaLang/Example.jl.git` + Resolving package versions... + Updating `~/.julia/environments/v1.0/Project.toml` + [7876af07] ~ Example v0.5.1 ⇒ v0.5.1+ #master (https://github.com/JuliaLang/Example.jl.git) + Updating `~/.julia/environments/v1.0/Manifest.toml` + [7876af07] ~ Example v0.5.1 ⇒ v0.5.1+ #master (https://github.com/JuliaLang/Example.jl.git) +``` + +The status output now shows that we are tracking the `master` branch of `Example`. +When updating packages, we will pull updates from that branch. + +To go back to tracking the registry version of `Example`, the command `free` is used: + +```julia-repl +(v1.0) pkg> free Example + Resolving package versions... + Updating `~/.julia/environments/v1.0/Project.toml` + [7876af07] ~ Example v0.5.1+ #master (https://github.com/JuliaLang/Example.jl.git) ⇒ v0.5.1 + Updating `~/.julia/environments/v1.0/Manifest.toml` + [7876af07] ~ Example v0.5.1+ #master )https://github.com/JuliaLang/Example.jl.git) ⇒ v0.5.1 +``` + + +### Adding unregistered packages + +If a package is not in a registry, it can be added by specifying a URL to the repository: + +```julia-repl +(v1.0) pkg> add https://github.com/fredrikekre/ImportMacros.jl + Updating git-repo `https://github.com/fredrikekre/ImportMacros.jl` + Resolving package versions... +Downloaded MacroTools ─ v0.4.1 + Updating `~/.julia/environments/v1.0/Project.toml` + [e6797606] + ImportMacros v0.0.0 # (https://github.com/fredrikekre/ImportMacros.jl) + Updating `~/.julia/environments/v1.0/Manifest.toml` + [e6797606] + ImportMacros v0.0.0 # (https://github.com/fredrikekre/ImportMacros.jl) + [1914dd2f] + MacroTools v0.4.1 +``` + +The dependencies of the unregistered package (here `MacroTools`) got installed. +For unregistered packages we could have given a branch name (or commit SHA1) to track using `#`, just like for registered packages. + +If you want to add a package using the SSH-based `git` protocol, you have to use quotes because the URL contains a `@`. For example, +```julia-repl +(v1.0) pkg> add "git@github.com:fredrikekre/ImportMacros.jl.git" + Cloning git-repo `git@github.com:fredrikekre/ImportMacros.jl.git` + Updating git-repo `git@github.com:fredrikekre/ImportMacros.jl.git` + Updating registry at `~/.julia/registries/General` + Resolving package versions... +Updating `~/.julia/environments/v1/Project.toml` + [92a963f6] + ImportMacros v1.0.0 `git@github.com:fredrikekre/ImportMacros.jl.git#master` +Updating `~/.julia/environments/v1/Manifest.toml` + [92a963f6] + ImportMacros v1.0.0 `git@github.com:fredrikekre/ImportMacros.jl.git#master` +``` + +### Adding a local package + +Instead of giving a URL of a git repo to `add` we could instead have given a local path to a git repo. +This works similar to adding a URL. The local repository will be tracked (at some branch) and updates +from that local repo are pulled when packages are updated. +Note tracking a package through `add` is distinct from `develop`: +changes to files in the local package repository will not immediately be reflected when loading that package. +The changes would have to be committed and the packages updated in order to pull in the changes. + +In addition, it is possible to add packages relatively to the `Manifest.toml` file, see [Developing packages](@ref developing) for an example. + +### [Developing packages](@id developing) + +By only using `add` your Manifest will always have a "reproducible state", in other words, as long as the repositories and registries used are still accessible +it is possible to retrieve the exact state of all the dependencies in the project. This has the advantage that you can send your project (`Project.toml` +and `Manifest.toml`) to someone else and they can "instantiate" that project in the same state as you had it locally. +However, when you are developing a package, it is more convenient to load packages at their current state at some path. For this reason, the `dev` command exists. + +Let's try to `dev` a registered package: + +```julia-repl +(v1.0) pkg> dev Example + Updating git-repo `https://github.com/JuliaLang/Example.jl.git` + Resolving package versions... + Updating `~/.julia/environments/v1.0/Project.toml` + [7876af07] + Example v0.5.1+ [`~/.julia/dev/Example`] + Updating `~/.julia/environments/v1.0/Manifest.toml` + [7876af07] + Example v0.5.1+ [`~/.julia/dev/Example`] +``` + +The `dev` command fetches a full clone of the package to `~/.julia/dev/` (the path can be changed by setting the environment variable `JULIA_PKG_DEVDIR`). +When importing `Example` julia will now import it from `~/.julia/dev/Example` and whatever local changes have been made to the files in that path are consequently +reflected in the code loaded. When we used `add` we said that we tracked the package repository, we here say that we track the path itself. +Note the package manager will never touch any of the files at a tracked path. It is therefore up to you to pull updates, change branches etc. +If we try to `dev` a package at some branch that already exists at `~/.julia/dev/` the package manager we will simply use the existing path. +For example: + +```julia-repl +(v1.0) pkg> dev Example + Updating git-repo `https://github.com/JuliaLang/Example.jl.git` +[ Info: Path `/Users/kristoffer/.julia/dev/Example` exists and looks like the correct package, using existing path instead of cloning +``` + +Note the info message saying that it is using the existing path. +When tracking a path, the package manager will never modify the files at that path. + +If `dev` is used on a local path, that path to that package is recorded and used when loading that package. +The path will be recorded relative to the project file, unless it is given as an absolute path. + +To stop tracking a path and use the registered version again, use `free`: + +```julia-repl +(v1.0) pkg> free Example + Resolving package versions... + Updating `~/.julia/environments/v1.0/Project.toml` + [7876af07] ↓ Example v0.5.1+ [`~/.julia/dev/Example`] ⇒ v0.5.1 + Updating `~/.julia/environments/v1.0/Manifest.toml` + [7876af07] ↓ Example v0.5.1+ [`~/.julia/dev/Example`] ⇒ v0.5.1 +``` + +It should be pointed out that by using `dev` your project is now inherently stateful. +Its state depends on the current content of the files at the path and the manifest cannot be "instantiated" by someone else without +knowing the exact content of all the packages that are tracking a path. + +Note that if you add a dependency to a package that tracks a local path, the Manifest (which contains the whole dependency graph) will become +out of sync with the actual dependency graph. This means that the package will not be able to load that dependency since it is not recorded +in the Manifest. To synchronize the Manifest, use the REPL command `resolve`. + +In addition to absolute paths, `add` and `dev` can accept relative paths to packages. +In this case, the relative path from the active project to the package is stored. +This approach is useful when the relative location of tracked dependencies is more important than their absolute location. +For example, the tracked dependencies can be stored inside of the active project directory. +The whole directory can be moved and `Pkg` will still be able to find the dependencies +because their path relative to the active project is preserved even though their absolute path has changed. + +## Removing packages + +Packages can be removed from the current project by using `pkg> rm Package`. +This will only remove packages that exist in the project; to remove a package that only +exists as a dependency use `pkg> rm --manifest DepPackage`. +Note that this will remove all packages that depend on `DepPackage`. + +## [Updating packages](@id updating) + +When new versions of packages that the project is using are released, it is a good idea to update. Simply calling `up` will try to update *all* the dependencies of the project +to the latest compatible version. Sometimes this is not what you want. You can specify a subset of the dependencies to upgrade by giving them as arguments to `up`, e.g: + +```julia-repl +(v1.0) pkg> up Example +``` + +If `Example` has a dependency which is also a dependency for another explicitly added package, that dependency will not be updated. If you only want to update the minor version of packages, to reduce the risk that your project breaks, you can give the `--minor` flag, e.g: + +```julia-repl +(v1.0) pkg> up --minor Example +``` + +Packages that track a local repository are not updated when a minor upgrade is done. +Packages that track a path are never touched by the package manager. + +## Pinning a package + +A pinned package will never be updated. A package can be pinned using `pin`, for example: + +```julia-repl +(v1.0) pkg> pin Example + Resolving package versions... + Updating `~/.julia/environments/v1.0/Project.toml` + [7876af07] ~ Example v0.5.1 ⇒ v0.5.1 ⚲ + Updating `~/.julia/environments/v1.0/Manifest.toml` + [7876af07] ~ Example v0.5.1 ⇒ v0.5.1 ⚲ +``` + +Note the pin symbol `⚲` showing that the package is pinned. Removing the pin is done using `free` + +```julia-repl +(v1.0) pkg> free Example + Updating `~/.julia/environments/v1.0/Project.toml` + [7876af07] ~ Example v0.5.1 ⚲ ⇒ v0.5.1 + Updating `~/.julia/environments/v1.0/Manifest.toml` + [7876af07] ~ Example v0.5.1 ⚲ ⇒ v0.5.1 +``` + +## Testing packages + +The tests for a package can be run using `test`command: + +```julia-repl +(v1.0) pkg> test Example + Testing Example + Testing Example tests passed +``` + +## Building packages + +The build step of a package is automatically run when a package is first installed. +The output of the build process is directed to a file. +To explicitly run the build step for a package, the `build` command is used: + +```julia-repl +(v1.0) pkg> build MbedTLS + Building MbedTLS → `~/.julia/packages/MbedTLS/h1Vu/deps/build.log` + +julia> print(read("~/.julia/packages/MbedTLS/h1Vu/deps/build.log", String)) +┌ Warning: `wait(t::Task)` is deprecated, use `fetch(t)` instead. +│ caller = macro expansion at OutputCollector.jl:63 [inlined] +└ @ Core OutputCollector.jl:63 +... +[ Info: using prebuilt binaries +``` + +## [Interpreting and resolving version conflicts](@id conflicts) + +An environment consists of a set of mutually-compatible packages. +Sometimes, you can find yourself in a situation in which two packages you'd like to use simultaneously +have incompatible requirements. +In such cases you'll get an "Unsatisfiable requirements" error: + +```@setup conflict +using Pkg +include(joinpath(pkgdir(Pkg), "test", "resolve_utils.jl")) +using .ResolveUtils +deps_data = Any[["A", v"1.0.0", "C", v"0.2"], + ["B", v"1.0.0", "D", v"0.1"], + ["C", v"0.1.0", "D", v"0.1"], + ["C", v"0.1.1", "D", v"0.1"], + ["C", v"0.2.0", "D", v"0.2"], + ["D", v"0.1.0"], + ["D", v"0.2.0"], + ["D", v"0.2.1"]] +reqs_data = Any[["A", "*"], + ["B", "*"]] +``` + +```@example conflict +print("pkg> add A\n", try resolve_tst(deps_data, reqs_data) catch e sprint(showerror, e) end) # hide +``` + +This message means that a package named `D` has a version conflict. +Even if you have never `add`ed `D` directly, this kind of error can arise +if `D` is required by other packages that you are trying to use. + +The error message has a lot of crucial information. +It may be easiest to interpret piecewise: + +``` +Unsatisfiable requirements detected for package D [756980fe]: + D [756980fe] log: + ├─possible versions are: [0.1.0, 0.2.0-0.2.1] or uninstalled +``` +means that `D` has three released versions, `v0.1.0`, `v0.2.0`, and `v0.2.1`. +You also have the option of not having it installed at all. +Each of these options might have different implications for the set of other packages that can be installed. + +Crucially, notice the stroke characters (vertical and horizontal lines) and their indentation. +Together, these connect *messages* to specific *packages*. +For instance the right stroke of `├─` indicates that the message to its right (`possible versions...`) +is connected to the package pointed to by its vertical stroke (`D`). +This same principle applies to the next line: + +``` + ├─restricted by compatibility requirements with B [f4259836] to versions: 0.1.0 +``` +The vertical stroke here is also aligned under `D`, and thus this message +is in reference to `D`. +Specifically, there's some other package `B` that depends on version `v0.1.0` of `D`. +Notice that this is not the newest version of `D`. + +Next comes some information about `B`: + +``` + │ └─B [f4259836] log: + │ ├─possible versions are: 1.0.0 or uninstalled + │ └─restricted to versions * by an explicit requirement, leaving only versions 1.0.0 +``` +The two lines below the first have a vertical stroke that aligns with `B`, +and thus they provide information about `B`. +They tell you that `B` has just one release, `v1.0.0`. +You've not specified a particular version of `B` (`restricted to versions *` means that any version will do), +but the `explicit requirement` means that you've asked for `B` to be part of your environment, +for example by `pkg> add B`. +You might have asked for `B` previously, and the requirement is still active. + +The conflict becomes clear with the line +``` +└─restricted by compatibility requirements with C [c99a7cb2] to versions: 0.2.0 — no versions left +``` + +Here again the vertical stroke aligns with `D`: this means that `D` is *also* required by another package, `C`. +`C` requires `v0.2.0` of `D`, and this conflicts with `B`'s need for `v0.1.0` of `D`. +This explains the conflict. + +But wait, you might ask, what is `C` and why do I need it at all? +The next few lines introduce the problem: + +``` + └─C [c99a7cb2] log: + ├─possible versions are: [0.1.0-0.1.1, 0.2.0] or uninstalled + └─restricted by compatibility requirements with A [29c70717] to versions: 0.2.0 +``` +These provide more information about `C`, revealing that it has 3 released versions: `v0.1.0`, `v0.1.1`, and `v0.2.0`. +Moreover, `C` is required by another package `A`. +Indeed, `A`'s requirements are such that we need `v0.2.0` of `C`. +`A`'s origin is revealed on the next lines: + +``` + └─A [29c70717] log: + ├─possible versions are: 1.0.0 or uninstalled + └─restricted to versions * by an explicit requirement, leaving only versions 1.0.0 +``` + +So we can see that `A` was `explicitly required`, and in this case it's because we were trying to +`add` it to our environment. + +In summary, we explicitly asked to use `A` and `B`, but this gave a conflict for `D`. +The reason was that `B` and `C` require conflicting versions of `D`. +Even though `C` isn't something we asked for explicitly, it was needed by `A`. + +To fix such errors, you have a number of options: + +- try [updating your packages](@ref updating). It's possible the developers of these packages have recently released new versions that are mutually compatible. +- remove either `A` or `B` from your environment. Perhaps `B` is left over from something you were previously working on, and you don't need it anymore. If you don't need `A` and `B` at the same time, this is the easiest way to fix the problem. +- try reporting your conflict. In this case, we were able to deduce that `B` requires an outdated version of `D`. You could thus report an issue in the development repository of `B.jl` asking for an updated version. +- try fixing the problem yourself. + This becomes easier once you understand `Project.toml` files and how they declare their compatiblity requirements. We'll return to this example in [Fixing conflicts](@ref). + +## Garbage collecting old, unused packages + +As packages are updated and projects are deleted, installed package versions and artifacts that were +once used will inevitably become old and not used from any existing project. +`Pkg` keeps a log of all projects used so it can go through the log and see exactly which projects still exist +and what packages/artifacts those projects used. +If a package or artifact is not marked as used by any project, it is added to a list of orphaned packages. +Packages and artifacts that are in the orphan list for 30 days without being used again are deleted from the system on the next garbage collection. +This timing is configurable via the `collect_delay` keyword argument to `Pkg.gc()`. +A value of `0` will cause anything currently not in use immediately, skipping the orphans list entirely; +If you are short on disk space and want to clean out as many unused packages and artifacts as possible, you may want to try this, but if you need these versions again, you will have to download them again. +To run a typical garbage collection with default arguments, simply use the `gc` command at the `pkg>` REPL: + +```julia-repl +(v1.0) pkg> gc + Active manifests at: + `~/BinaryProvider/Manifest.toml` + ... + `~/Compat.jl/Manifest.toml` + Active artifacts: + `~/src/MyProject/Artifacts.toml` + + Deleted ~/.julia/packages/BenchmarkTools/1cAj: 146.302 KiB + Deleted ~/.julia/packages/Cassette/BXVB: 795.557 KiB + ... + Deleted `~/.julia/artifacts/e44cdf2579a92ad5cbacd1cddb7414c8b9d2e24e` (152.253 KiB) + Deleted `~/.julia/artifacts/f2df5266567842bbb8a06acca56bcabf813cd73f` (21.536 MiB) + + Deleted 36 package installations (113.205 MiB) + Deleted 15 artifact installations (20.759 GiB) +``` + +Note that only packages in `~/.julia/packages` are deleted. diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/registries.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/registries.md new file mode 100644 index 0000000..be6f8e2 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/registries.md @@ -0,0 +1,81 @@ +# **7.** Registries + +Registries contain information about packages, such as +available releases and dependencies, and where they can be downloaded. +The `General` registry (https://github.com/JuliaRegistries/General) +is the default one, and is installed automatically. + +## Managing registries + +!!! compat "Julia 1.1" + Pkg's registry handling requires at least Julia 1.1. + +Registries can be added, removed and updated from either the Pkg REPL +or by using the function based API. In this section we will describe the +REPL interface. The registry API is documented in +the [Registry API Reference](@ref) section. + +### Adding registries + +A custom registry can be added with the `registry add` command +from the Pkg REPL. Usually this will be done with a URL to the +registry. Here we add the `General` registry: + +```julia-repl +pkg> registry add https://github.com/JuliaRegistries/General + Cloning registry from "https://github.com/JuliaRegistries/General" + Added registry `General` to `~/.julia/registries/General` +``` + +and now all the packages registered in `General` are available for e.g. adding. +To see which registries are currently installed you can use the `registry status` +(or `registry st`) command + +```julia-repl +pkg> registry st +Registry Status + [23338594] General (https://github.com/JuliaRegistries/General.git) +``` + +Registries are always added to the user depot, which is the first entry in `DEPOT_PATH` (cf. the [Glossary](@ref) section). + +### Removing registries + +Registries can be removed with the `registry remove` (or `registry rm`) command. +Here we remove the `General` registry + +```julia-repl +pkg> registry rm General + Removing registry `General` from ~/.julia/registries/General + +pkg> registry st +Registry Status + (no registries found) +``` + +In case there are multiple registries named `General` installed you have to +disambiguate with the `uuid`, just as when manipulating packages, e.g. + +```julia-repl +pkg> registry rm General=23338594-aafe-5451-b93e-139f81909106 + Removing registry `General` from ~/.julia/registries/General +``` + +### Updating registries + +The `registry update` (or `registry up`) command is available to update registries. +Here we update the `General` registry: + +```julia-repl +pkg> registry up General + Updating registry at `~/.julia/registries/General` + Updating git-repo `https://github.com/JuliaRegistries/General` +``` + +and to update all installed registries just do: + +```julia-repl +pkg> registry up + Updating registry at `~/.julia/registries/General` + Updating git-repo `https://github.com/JuliaRegistries/General` +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/toml-files.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/toml-files.md new file mode 100644 index 0000000..d79f9ef --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/docs/src/toml-files.md @@ -0,0 +1,270 @@ +# [**10.** `Project.toml` and `Manifest.toml`](@id Project-and-Manifest) + +Two files that are central to Pkg are `Project.toml` and `Manifest.toml`. `Project.toml` +and `Manifest.toml` are written in [TOML](https://github.com/toml-lang/toml) (hence the +`.toml` extension) and include information about dependencies, versions, package names, +UUIDs etc. + +!!! note + The `Project.toml` and `Manifest.toml` files are not only used by the package manager; + they are also used by Julia's code loading, and determine e.g. what `using Example` + should do. For more details see the section about + [Code Loading](https://docs.julialang.org/en/v1/manual/code-loading/) + in the Julia manual. + + +## `Project.toml` + +The project file describes the project on a high level, for example the package/project +dependencies and compatibility constraints are listed in the project file. The file entries +are described below. + + +### The `authors` field + +For a package, the optional `authors` field is a list of strings describing the +package authors, in the form `NAME `. For example: +```toml +authors = ["Some One ", + "Foo Bar "] +``` + + +### The `name` field + +The name of the package/project is determined by the `name` field, for example: +```toml +name = "Example" +``` +The name can contain word characters `[a-zA-Z0-9_]`, but can not start with a number. For +packages it is recommended to follow the +[package naming guidelines](@ref Package-naming-guidelines). The `name` field is mandatory +for packages. + + +### The `uuid` field + +`uuid` is a string with a [universally unique identifier] +(https://en.wikipedia.org/wiki/Universally_unique_identifier) for the package/project, for example: +```toml +uuid = "7876af07-990d-54b4-ab0e-23690620f79a" +``` +The `uuid` field is mandatory for packages. + +!!! note + It is recommended that `UUIDs.uuid4()` is used to generate random UUIDs. + + +### The `version` field + +`version` is a string with the version number for the package/project. It should consist of +three numbers, major version, minor version and patch number, separated with a `.`, for example: +```toml +version = "1.2.5" +``` +Julia uses [Semantic Versioning](https://semver.org/) (SemVer) and the `version` field +should follow SemVer. The basic rules are: +* Before 1.0.0, anything goes, but when you make breaking changes the minor version should + be incremented. +* After 1.0.0 only make breaking changes when incrementing the major version. +* After 1.0.0 no new public API should be added without incrementing the minor version. + This includes, in particular, new types, functions, methods and method overloads, from + `Base` or other packages. +See also the section on [Compatibility](@ref). + +Note that Pkg.jl deviates from the SemVer specification when it comes to versions pre-1.0.0. See +the section on [pre-1.0 behavior](@ref compat-pre-1.0) for more details. + + +### The `[deps]` section + +All dependencies of the package/project are listed in the `[deps]` section. Each dependency +is listed as a name-uuid pair, for example: + +```toml +[deps] +Example = "7876af07-990d-54b4-ab0e-23690620f79a" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +``` + +Typically it is not needed to manually add entries to the `[deps]` section; this is instead +handled by Pkg operations such as `add`. + + +### The `[compat]` section + +Compatibility constraints for the dependencies listed under `[deps]` can be listed in the +`[compat]` section. +Example: + +```toml +[deps] +Example = "7876af07-990d-54b4-ab0e-23690620f79a" + +[compat] +Example = "1.2" +``` + +The [Compatibility](@ref) section describes the different possible compatibility +constraints in detail. It is also possible to list constraints on `julia` itself, although +`julia` is not listed as a dependency in the `[deps]` section: + +```toml +[compat] +julia = "1.1" +``` + + +## `Manifest.toml` + +The manifest file is an absolute record of the state of the packages in the environment. +It includes exact information about (direct and indirect) dependencies of the project. +Given a `Project.toml` + `Manifest.toml` pair, it is possible to instantiate the exact same +package environment, which is very useful for reproducibility. +For the details, see [`Pkg.instantiate`](@ref). + +!!! note + The `Manifest.toml` file is generated and maintained by Pkg and, in general, this file + should *never* be modified manually. + + +### `Manifest.toml` entries + +Each dependency has its own section in the manifest file, and its content varies depending +on how the dependency was added to the environment. Every +dependency section includes a combination of the following entries: + +* `uuid`: the [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier) + for the dependency, for example `uuid = "7876af07-990d-54b4-ab0e-23690620f79a"`. +* `deps`: a vector listing the dependencies of the dependency, for example + `deps = ["Example", "JSON"]`. +* `version`: a version number, for example `version = "1.2.6"`. +* `path`: a file path to the source code, for example `path = /home/user/Example`. +* `repo-url`: a URL to the repository where the source code was found, + for example `repo-url = "https://github.com/JuliaLang/Example.jl.git"`. +* `repo-rev`: a git revision, for example a branch `repo-rev = "master"` + or a commit `repo-rev = "66607a62a83cb07ab18c0b35c038fcd62987c9b1"`. +* `git-tree-sha1`: a content hash of the source tree, for example + `git-tree-sha1 = "ca3820cc4e66f473467d912c4b2b3ae5dc968444"`. + + +#### Added package + +When a package is added from a package registry, for example by invoking `pkg> add Example` +or with a specific version `pkg> add Example@1.2`, the resulting `Manifest.toml` entry looks +like: + +```toml +[[Example]] +deps = ["DependencyA", "DependencyB"] +git-tree-sha1 = "8eb7b4d4ca487caade9ba3e85932e28ce6d6e1f8" +uuid = "7876af07-990d-54b4-ab0e-23690620f79a" +version = "1.2.3" +``` + +Note, in particular, that no `repo-url` is present, since that information is included in +the registry where this package were found. + +#### Added package by branch + +The resulting dependency section when adding a package specified by a branch, e.g. +`pkg> add Example#master` or `pkg> add https://github.com/JuliaLang/Example.jl.git`, +looks like: + +```toml +[[Example]] +deps = ["DependencyA", "DependencyB"] +git-tree-sha1 = "54c7a512469a38312a058ec9f429e1db1f074474" +repo-rev = "master" +repo-url = "https://github.com/JuliaLang/Example.jl.git" +uuid = "7876af07-990d-54b4-ab0e-23690620f79a" +version = "1.2.4" +``` + +Note that both the branch we are tracking (`master`) and the remote repository url +(`"https://github.com/JuliaLang/Example.jl.git"`) are stored in the manifest. + +#### Added package by commit + +The resulting dependency section when adding a package specified by a commit, e.g. +`pkg> add Example#cf6ba6cc0be0bb5f56840188563579d67048be34`, looks like: + +```toml +[[Example]] +deps = ["DependencyA", "DependencyB"] +git-tree-sha1 = "54c7a512469a38312a058ec9f429e1db1f074474" +repo-rev = "cf6ba6cc0be0bb5f56840188563579d67048be34" +repo-url = "https://github.com/JuliaLang/Example.jl.git" +uuid = "7876af07-990d-54b4-ab0e-23690620f79a" +version = "1.2.4" +``` + +The only difference from tracking a branch is the content of `repo-rev`. + +#### Developed package + +The resulting dependency section when adding a package with `develop`, +e.g. `pkg> develop Example` or `pkg> develop /path/to/local/folder/Example`, +looks like: + +```toml +[[Example]] +deps = ["DependencyA", "DependencyB"] +path = "/home/user/.julia/dev/Example/" +uuid = "7876af07-990d-54b4-ab0e-23690620f79a" +version = "1.2.4" +``` + +Note that the path to the source code is included, and changes made to that +source tree is directly reflected. + +#### Pinned package + +Pinned packages are also recorded in the manifest file; the resulting +dependency section for e.g. `pkg> add Example; pin Example` looks like: + +```toml +[[Example]] +deps = ["DependencyA", "DependencyB"] +git-tree-sha1 = "54c7a512469a38312a058ec9f429e1db1f074474" +pinned = true +uuid = "7876af07-990d-54b4-ab0e-23690620f79a" +version = "1.2.4" +``` + +The only difference is the addition of the `pinned = true` entry. + +#### Multiple package with the same name + +Julia differentiates packages based on UUID, which means that the name alone is not enough +to identify a package. It is possible to have multiple packages in the same environment +with the same name, but with different UUID. In such a situation the `Manifest.toml` file +looks a bit different. Consider for example the situation where you have added `A` and `B` +to your environment, and the `Project.toml` file looks as follows: + +```toml +[deps] +A = "ead4f63c-334e-11e9-00e6-e7f0a5f21b60" +B = "edca9bc6-334e-11e9-3554-9595dbb4349c" +``` + +If `A` now depends on `B = "f41f7b98-334e-11e9-1257-49272045fb24"`, i.e. *another* package +named `B` there will be two different `B` packages in the `Manifest.toml` file. In this +case the full `Manifest.toml` file, with `git-tree-sha1` and `version` fields removed for +clarity, looks like: + +```toml +[[A]] +uuid = "ead4f63c-334e-11e9-00e6-e7f0a5f21b60" + + [A.deps] + B = "f41f7b98-334e-11e9-1257-49272045fb24" + +[[B]] +uuid = "f41f7b98-334e-11e9-1257-49272045fb24" +[[B]] +uuid = "edca9bc6-334e-11e9-3554-9595dbb4349c" +``` + +There is now an array of the two `B` packages, and the `[deps]` section for `A` has been +expanded in order to be explicit about which `B` package `A` depends on. diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/SameNameDifferentUUID/dev/Unregistered/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/SameNameDifferentUUID/dev/Unregistered/README.md new file mode 100644 index 0000000..26b9f8d --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/SameNameDifferentUUID/dev/Unregistered/README.md @@ -0,0 +1,2 @@ +# Unregistered.jl +Test repo diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/SandboxFallback2/dev/Unregistered/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/SandboxFallback2/dev/Unregistered/README.md new file mode 100644 index 0000000..26b9f8d --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/SandboxFallback2/dev/Unregistered/README.md @@ -0,0 +1,2 @@ +# Unregistered.jl +Test repo diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/TestDepTrackingPath/test/dev/Unregistered/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/TestDepTrackingPath/test/dev/Unregistered/README.md new file mode 100644 index 0000000..26b9f8d --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/TestDepTrackingPath/test/dev/Unregistered/README.md @@ -0,0 +1,2 @@ +# Unregistered.jl +Test repo diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/TransferSubgraph/dev/Unregistered/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/TransferSubgraph/dev/Unregistered/README.md new file mode 100644 index 0000000..26b9f8d --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Pkg/test/test_packages/TransferSubgraph/dev/Unregistered/README.md @@ -0,0 +1,2 @@ +# Unregistered.jl +Test repo diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Printf/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Printf/docs/src/index.md new file mode 100644 index 0000000..828e527 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Printf/docs/src/index.md @@ -0,0 +1,6 @@ +# Printf + +```@docs +Printf.@printf +Printf.@sprintf +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Profile/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Profile/docs/src/index.md new file mode 100644 index 0000000..ac60bb9 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Profile/docs/src/index.md @@ -0,0 +1,17 @@ +# [Profiling](@id lib-profiling) + +```@docs +Profile.@profile +``` + +The methods in `Profile` are not exported and need to be called e.g. as `Profile.print()`. + +```@docs +Profile.clear +Profile.print +Profile.init +Profile.fetch +Profile.retrieve +Profile.callers +Profile.clear_malloc_data +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/REPL/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/REPL/docs/src/index.md new file mode 100644 index 0000000..168d3e9 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/REPL/docs/src/index.md @@ -0,0 +1,692 @@ +# The Julia REPL + +Julia comes with a full-featured interactive command-line REPL (read-eval-print loop) built into +the `julia` executable. In addition to allowing quick and easy evaluation of Julia statements, +it has a searchable history, tab-completion, many helpful keybindings, and dedicated help and +shell modes. The REPL can be started by simply calling `julia` with no arguments or double-clicking +on the executable: + +```@eval +io = IOBuffer() +Base.banner(io) +banner = String(take!(io)) +import Markdown +Markdown.parse("```\n\$ julia\n\n$(banner)\njulia>\n```") +``` + +To exit the interactive session, type `^D` -- the control key together with the `d` key on a blank +line -- or type `exit()` followed by the return or enter key. The REPL greets you with a banner +and a `julia>` prompt. + +## The different prompt modes + +### The Julian mode + +The REPL has five main modes of operation. The first and most common is the Julian prompt. It +is the default mode of operation; each new line initially starts with `julia>`. It is here that +you can enter Julia expressions. Hitting return or enter after a complete expression has been +entered will evaluate the entry and show the result of the last expression. + +```jldoctest +julia> string(1 + 2) +"3" +``` + +There are a number useful features unique to interactive work. In addition to showing the result, +the REPL also binds the result to the variable `ans`. A trailing semicolon on the line can be +used as a flag to suppress showing the result. + +```jldoctest +julia> string(3 * 4); + +julia> ans +"12" +``` + +In Julia mode, the REPL supports something called *prompt pasting*. This activates when pasting +text that starts with `julia> ` into the REPL. In that case, only expressions starting with +`julia> ` are parsed, others are removed. This makes it possible to paste a chunk of code +that has been copied from a REPL session without having to scrub away prompts and outputs. This +feature is enabled by default but can be disabled or enabled at will with `REPL.enable_promptpaste(::Bool)`. +If it is enabled, you can try it out by pasting the code block above this paragraph straight into +the REPL. This feature does not work on the standard Windows command prompt due to its limitation +at detecting when a paste occurs. + +Objects are printed at the REPL using the [`show`](@ref) function with a specific [`IOContext`](@ref). +In particular, the `:limit` attribute is set to `true`. +Other attributes can receive in certain `show` methods a default value if it's not already set, +like `:compact`. +It's possible, as an experimental feature, to specify the attributes used by the REPL via the +`Base.active_repl.options.iocontext` dictionary (associating values to attributes). For example: + +```julia-repl +julia> rand(2, 2) +2×2 Array{Float64,2}: + 0.8833 0.329197 + 0.719708 0.59114 + +julia> show(IOContext(stdout, :compact => false), "text/plain", rand(2, 2)) + 0.43540323669187075 0.15759787870609387 + 0.2540832269192739 0.4597637838786053 +julia> Base.active_repl.options.iocontext[:compact] = false; + +julia> rand(2, 2) +2×2 Array{Float64,2}: + 0.2083967319174056 0.13330606013126012 + 0.6244375177790158 0.9777957560761545 +``` + +In order to define automatically the values of this dictionary at startup time, one can use the +[`atreplinit`](@ref) function in the `~/.julia/config/startup.jl` file, for example: +```julia +atreplinit() do repl + repl.options.iocontext[:compact] = false +end +``` + +### Help mode + +When the cursor is at the beginning of the line, the prompt can be changed to a help mode by typing +`?`. Julia will attempt to print help or documentation for anything entered in help mode: + +```julia-repl +julia> ? # upon typing ?, the prompt changes (in place) to: help?> + +help?> string +search: string String Cstring Cwstring RevString randstring bytestring SubString + + string(xs...) + + Create a string from any values using the print function. +``` + +Macros, types and variables can also be queried: + +``` +help?> @time + @time + + A macro to execute an expression, printing the time it took to execute, the number of allocations, + and the total number of bytes its execution caused to be allocated, before returning the value of the + expression. + + See also @timev, @timed, @elapsed, and @allocated. + +help?> Int32 +search: Int32 UInt32 + + Int32 <: Signed + + 32-bit signed integer type. +``` + +A string or regex literal searches all docstrings using [`apropos`](@ref): + +``` +help?> "aprop" +REPL.stripmd +Base.Docs.apropos + +help?> r"ap..p" +Base.:∘ +Base.shell_escape_posixly +Distributed.CachingPool +REPL.stripmd +Base.Docs.apropos +``` + +Help mode can be exited by pressing backspace at the beginning of the line. + +### [Shell mode](@id man-shell-mode) + +Just as help mode is useful for quick access to documentation, another common task is to use the +system shell to execute system commands. Just as `?` entered help mode when at the beginning +of the line, a semicolon (`;`) will enter the shell mode. And it can be exited by pressing backspace +at the beginning of the line. + +```julia-repl +julia> ; # upon typing ;, the prompt changes (in place) to: shell> + +shell> echo hello +hello +``` +!!! note + For Windows users, Julia's shell mode does not expose windows shell commands. + Hence, this will fail: + +```julia-repl +julia> ; # upon typing ;, the prompt changes (in place) to: shell> + +shell> dir +ERROR: IOError: could not spawn `dir`: no such file or directory (ENOENT) +Stacktrace! +....... +``` +However, you can get access to `PowerShell` like this: +```julia-repl +julia> ; # upon typing ;, the prompt changes (in place) to: shell> + +shell> powershell +Windows PowerShell +Copyright (C) Microsoft Corporation. All rights reserved. +PS C:\Users\elm> +``` +... and to `cmd.exe` like that (see the `dir` command): +```julia-repl +julia> ; # upon typing ;, the prompt changes (in place) to: shell> + +shell> cmd +Microsoft Windows [version 10.0.17763.973] +(c) 2018 Microsoft Corporation. All rights reserved. +C:\Users\elm>dir + Volume in drive C has no label + Volume Serial Number is 1643-0CD7 + Directory of C:\Users\elm + +29/01/2020 22:15 . +29/01/2020 22:15 .. +02/02/2020 08:06 .atom +``` + +### Pkg mode + +The Package manager mode accepts specialized commands for loading and updating packages. It is entered +by pressing the `]` key at the Julian REPL prompt and exited by pressing CTRL-C or pressing the backspace key +at the beginning of the line. The prompt for this mode is `pkg>`. It supports its own help-mode, which is +entered by pressing `?` at the beginning of the line of the `pkg>` prompt. The Package manager mode is +documented in the Pkg manual, available at [https://julialang.github.io/Pkg.jl/v1/](https://julialang.github.io/Pkg.jl/v1/). + +### Search modes + +In all of the above modes, the executed lines get saved to a history file, which can be searched. + To initiate an incremental search through the previous history, type `^R` -- the control key +together with the `r` key. The prompt will change to ```(reverse-i-search)`':```, and as you +type the search query will appear in the quotes. The most recent result that matches the query +will dynamically update to the right of the colon as more is typed. To find an older result using +the same query, simply type `^R` again. + +Just as `^R` is a reverse search, `^S` is a forward search, with the prompt ```(i-search)`':```. + The two may be used in conjunction with each other to move through the previous or next matching +results, respectively. + +## Key bindings + +The Julia REPL makes great use of key bindings. Several control-key bindings were already introduced +above (`^D` to exit, `^R` and `^S` for searching), but there are many more. In addition to the +control-key, there are also meta-key bindings. These vary more by platform, but most terminals +default to using alt- or option- held down with a key to send the meta-key (or can be configured +to do so), or pressing Esc and then the key. + +| Keybinding | Description | +|:------------------- |:---------------------------------------------------------------------------------------------------------- | +| **Program control** |   | +| `^D` | Exit (when buffer is empty) | +| `^C` | Interrupt or cancel | +| `^L` | Clear console screen | +| Return/Enter, `^J` | New line, executing if it is complete | +| meta-Return/Enter | Insert new line without executing it | +| `?` or `;` | Enter help or shell mode (when at start of a line) | +| `^R`, `^S` | Incremental history search, described above | +| **Cursor movement** |   | +| Right arrow, `^F` | Move right one character | +| Left arrow, `^B` | Move left one character | +| ctrl-Right, `meta-F`| Move right one word | +| ctrl-Left, `meta-B` | Move left one word | +| Home, `^A` | Move to beginning of line | +| End, `^E` | Move to end of line | +| Up arrow, `^P` | Move up one line (or change to the previous history entry that matches the text before the cursor) | +| Down arrow, `^N` | Move down one line (or change to the next history entry that matches the text before the cursor) | +| Shift-Arrow Key | Move cursor according to the direction of the Arrow key, while activating the region ("shift selection") | +| Page-up, `meta-P` | Change to the previous history entry | +| Page-down, `meta-N` | Change to the next history entry | +| `meta-<` | Change to the first history entry (of the current session if it is before the current position in history) | +| `meta->` | Change to the last history entry | +| `^-Space` | Set the "mark" in the editing region (and de-activate the region if it's active) | +| `^-Space ^-Space` | Set the "mark" in the editing region and make the region "active", i.e. highlighted | +| `^G` | De-activate the region (i.e. make it not highlighted) | +| `^X^X` | Exchange the current position with the mark | +| **Editing** |   | +| Backspace, `^H` | Delete the previous character, or the whole region when it's active | +| Delete, `^D` | Forward delete one character (when buffer has text) | +| meta-Backspace | Delete the previous word | +| `meta-d` | Forward delete the next word | +| `^W` | Delete previous text up to the nearest whitespace | +| `meta-w` | Copy the current region in the kill ring | +| `meta-W` | "Kill" the current region, placing the text in the kill ring | +| `^K` | "Kill" to end of line, placing the text in the kill ring | +| `^Y` | "Yank" insert the text from the kill ring | +| `meta-y` | Replace a previously yanked text with an older entry from the kill ring | +| `^T` | Transpose the characters about the cursor | +| `meta-Up arrow` | Transpose current line with line above | +| `meta-Down arrow` | Transpose current line with line below | +| `meta-u` | Change the next word to uppercase | +| `meta-c` | Change the next word to titlecase | +| `meta-l` | Change the next word to lowercase | +| `^/`, `^_` | Undo previous editing action | +| `^Q` | Write a number in REPL and press `^Q` to open editor at corresponding stackframe or method | +| `meta-Left Arrow` | indent the current line on the left | +| `meta-Right Arrow` | indent the current line on the right | +| `meta-.` | insert last word from previous history entry | + +### Customizing keybindings + +Julia's REPL keybindings may be fully customized to a user's preferences by passing a dictionary +to `REPL.setup_interface`. The keys of this dictionary may be characters or strings. The key +`'*'` refers to the default action. Control plus character `x` bindings are indicated with `"^x"`. +Meta plus `x` can be written `"\\M-x"` or `"\ex"`, and Control plus `x` can be written +`"\\C-x"` or `"^x"`. +The values of the custom keymap must be `nothing` (indicating +that the input should be ignored) or functions that accept the signature +`(PromptState, AbstractREPL, Char)`. +The `REPL.setup_interface` function must be called before the REPL is initialized, by registering +the operation with [`atreplinit`](@ref) . For example, to bind the up and down arrow keys to move through +history without prefix search, one could put the following code in `~/.julia/config/startup.jl`: + +```julia +import REPL +import REPL.LineEdit + +const mykeys = Dict{Any,Any}( + # Up Arrow + "\e[A" => (s,o...)->(LineEdit.edit_move_up(s) || LineEdit.history_prev(s, LineEdit.mode(s).hist)), + # Down Arrow + "\e[B" => (s,o...)->(LineEdit.edit_move_down(s) || LineEdit.history_next(s, LineEdit.mode(s).hist)) +) + +function customize_keys(repl) + repl.interface = REPL.setup_interface(repl; extra_repl_keymap = mykeys) +end + +atreplinit(customize_keys) +``` + +Users should refer to `LineEdit.jl` to discover the available actions on key input. + +## Tab completion + +In both the Julian and help modes of the REPL, one can enter the first few characters of a function +or type and then press the tab key to get a list all matches: + +```julia-repl +julia> stri[TAB] +stride strides string strip + +julia> Stri[TAB] +StridedArray StridedMatrix StridedVecOrMat StridedVector String +``` + +The tab key can also be used to substitute LaTeX math symbols with their Unicode equivalents, +and get a list of LaTeX matches as well: + +```julia-repl +julia> \pi[TAB] +julia> π +π = 3.1415926535897... + +julia> e\_1[TAB] = [1,0] +julia> e₁ = [1,0] +2-element Array{Int64,1}: + 1 + 0 + +julia> e\^1[TAB] = [1 0] +julia> e¹ = [1 0] +1×2 Array{Int64,2}: + 1 0 + +julia> \sqrt[TAB]2 # √ is equivalent to the sqrt function +julia> √2 +1.4142135623730951 + +julia> \hbar[TAB](h) = h / 2\pi[TAB] +julia> ħ(h) = h / 2π +ħ (generic function with 1 method) + +julia> \h[TAB] +\hat \hermitconjmatrix \hkswarow \hrectangle +\hatapprox \hexagon \hookleftarrow \hrectangleblack +\hbar \hexagonblack \hookrightarrow \hslash +\heartsuit \hksearow \house \hspace + +julia> α="\alpha[TAB]" # LaTeX completion also works in strings +julia> α="α" +``` + +A full list of tab-completions can be found in the [Unicode Input](@ref) section of the manual. + +Completion of paths works for strings and julia's shell mode: + +```julia-repl +julia> path="/[TAB]" +.dockerenv .juliabox/ boot/ etc/ lib/ media/ opt/ root/ sbin/ sys/ usr/ +.dockerinit bin/ dev/ home/ lib64/ mnt/ proc/ run/ srv/ tmp/ var/ +shell> /[TAB] +.dockerenv .juliabox/ boot/ etc/ lib/ media/ opt/ root/ sbin/ sys/ usr/ +.dockerinit bin/ dev/ home/ lib64/ mnt/ proc/ run/ srv/ tmp/ var/ +``` + +Tab completion can help with investigation of the available methods matching the input arguments: + +```julia-repl +julia> max([TAB] # All methods are displayed, not shown here due to size of the list + +julia> max([1, 2], [TAB] # All methods where `Vector{Int}` matches as first argument +max(x, y) in Base at operators.jl:215 +max(a, b, c, xs...) in Base at operators.jl:281 + +julia> max([1, 2], max(1, 2), [TAB] # All methods matching the arguments. +max(x, y) in Base at operators.jl:215 +max(a, b, c, xs...) in Base at operators.jl:281 +``` + +Keywords are also displayed in the suggested methods after `;`, see below line where `limit` +and `keepempty` are keyword arguments: + +```julia-repl +julia> split("1 1 1", [TAB] +split(str::AbstractString; limit, keepempty) in Base at strings/util.jl:302 +split(str::T, splitter; limit, keepempty) where T<:AbstractString in Base at strings/util.jl:277 +``` + +The completion of the methods uses type inference and can therefore see if the arguments match +even if the arguments are output from functions. The function needs to be type stable for the +completion to be able to remove non-matching methods. + +Tab completion can also help completing fields: + +```julia-repl +julia> import UUIDs + +julia> UUIDs.uuid[TAB] +uuid1 uuid4 uuid_version +``` + +Fields for output from functions can also be completed: + +```julia-repl +julia> split("","")[1].[TAB] +lastindex offset string +``` + +The completion of fields for output from functions uses type inference, and it can only suggest +fields if the function is type stable. + +Dictionary keys can also be tab completed: + +```julia-repl +julia> foo = Dict("qwer1"=>1, "qwer2"=>2, "asdf"=>3) +Dict{String,Int64} with 3 entries: + "qwer2" => 2 + "asdf" => 3 + "qwer1" => 1 + +julia> foo["q[TAB] + +"qwer1" "qwer2" +julia> foo["qwer +``` + +## Customizing Colors + +The colors used by Julia and the REPL can be customized, as well. To change the +color of the Julia prompt you can add something like the following to your +`~/.julia/config/startup.jl` file, which is to be placed inside your home directory: + +```julia +function customize_colors(repl) + repl.prompt_color = Base.text_colors[:cyan] +end + +atreplinit(customize_colors) +``` + +The available color keys can be seen by typing `Base.text_colors` in the help mode of the REPL. +In addition, the integers 0 to 255 can be used as color keys for terminals +with 256 color support. + +You can also change the colors for the help and shell prompts and +input and answer text by setting the appropriate field of `repl` in the `customize_colors` function +above (respectively, `help_color`, `shell_color`, `input_color`, and `answer_color`). For the +latter two, be sure that the `envcolors` field is also set to false. + +It is also possible to apply boldface formatting by using +`Base.text_colors[:bold]` as a color. For instance, to print answers in +boldface font, one can use the following as a `~/.julia/config/startup.jl`: + +```julia +function customize_colors(repl) + repl.envcolors = false + repl.answer_color = Base.text_colors[:bold] +end + +atreplinit(customize_colors) +``` + +You can also customize the color used to render warning and informational messages by +setting the appropriate environment variables. For instance, to render error, warning, and informational +messages respectively in magenta, yellow, and cyan you can add the following to your +`~/.julia/config/startup.jl` file: + +```julia +ENV["JULIA_ERROR_COLOR"] = :magenta +ENV["JULIA_WARN_COLOR"] = :yellow +ENV["JULIA_INFO_COLOR"] = :cyan +``` + +## TerminalMenus + +TerminalMenus is a submodule of the Julia REPL and enables small, low-profile interactive menus in the terminal. + +### Examples + +```julia +import REPL +using REPL.TerminalMenus + +options = ["apple", "orange", "grape", "strawberry", + "blueberry", "peach", "lemon", "lime"] + +``` + +#### RadioMenu + +The RadioMenu allows the user to select one option from the list. The `request` +function displays the interactive menu and returns the index of the selected +choice. If a user presses 'q' or `ctrl-c`, `request` will return a `-1`. + + +```julia +# `pagesize` is the number of items to be displayed at a time. +# The UI will scroll if the number of options is greater +# than the `pagesize` +menu = RadioMenu(options, pagesize=4) + +# `request` displays the menu and returns the index after the +# user has selected a choice +choice = request("Choose your favorite fruit:", menu) + +if choice != -1 + println("Your favorite fruit is ", options[choice], "!") +else + println("Menu canceled.") +end + +``` + +Output: + +``` +Choose your favorite fruit: +^ grape + strawberry + > blueberry +v peach +Your favorite fruit is blueberry! +``` + +#### MultiSelectMenu + +The MultiSelectMenu allows users to select many choices from a list. + +```julia +# here we use the default `pagesize` 10 +menu = MultiSelectMenu(options) + +# `request` returns a `Set` of selected indices +# if the menu us canceled (ctrl-c or q), return an empty set +choices = request("Select the fruits you like:", menu) + +if length(choices) > 0 + println("You like the following fruits:") + for i in choices + println(" - ", options[i]) + end +else + println("Menu canceled.") +end +``` + +Output: + +``` +Select the fruits you like: +[press: d=done, a=all, n=none] + [ ] apple + > [X] orange + [X] grape + [ ] strawberry + [ ] blueberry + [X] peach + [ ] lemon + [ ] lime +You like the following fruits: + - orange + - grape + - peach +``` + +### Customization / Configuration + +#### ConfiguredMenu subtypes + +Starting with Julia 1.6, the recommended way to configure menus is via the constructor. +For instance, the default multiple-selection menu + +``` +julia> menu = MultiSelectMenu(options, pagesize=5); + +julia> request(menu) # ASCII is used by default +[press: d=done, a=all, n=none] + [ ] apple + [X] orange + [ ] grape + > [X] strawberry +v [ ] blueberry +``` + +can instead be rendered with Unicode selection and navigation characters with + +```julia +julia> menu = MultiSelectMenu(options, pagesize=5, charset=:unicode); + +julia> request(menu) +[press: d=done, a=all, n=none] + ⬚ apple + ✓ orange + ⬚ grape + → ✓ strawberry +↓ ⬚ blueberry +``` + +More fine-grained configuration is also possible: + +```julia +julia> menu = MultiSelectMenu(options, pagesize=5, charset=:unicode, checked="YEP!", unchecked="NOPE", cursor='⧐'); + +julia> request(menu) +julia> request(menu) +[press: d=done, a=all, n=none] + NOPE apple + YEP! orange + NOPE grape + ⧐ YEP! strawberry +↓ NOPE blueberry +``` + +Aside from the overall `charset` option, for `RadioMenu` the configurable options are: + + - `cursor::Char='>'|'→'`: character to use for cursor + - `up_arrow::Char='^'|'↑'`: character to use for up arrow + - `down_arrow::Char='v'|'↓'`: character to use for down arrow + - `updown_arrow::Char='I'|'↕'`: character to use for up/down arrow in one-line page + - `scroll_wrap::Bool=false`: optionally wrap-around at the beginning/end of a menu + - `ctrl_c_interrupt::Bool=true`: If `false`, return empty on ^C, if `true` throw InterruptException() on ^C + +`MultiSelectMenu` adds: + + - `checked::String="[X]"|"✓"`: string to use for checked + - `unchecked::String="[ ]"|"⬚")`: string to use for unchecked + +You can create new menu types of your own. +Types that are derived from `TerminalMenus.ConfiguredMenu` configure the menu options at construction time. + +#### Legacy interface + +Prior to Julia 1.6, and still supported throughout Julia 1.x, one can also configure menus by calling +`TerminalMenus.config()`. + +## References + +### REPL + +```@docs +Base.atreplinit +``` + +### TerminalMenus + +#### Configuration + +```@docs +REPL.TerminalMenus.Config +REPL.TerminalMenus.MultiSelectConfig +REPL.TerminalMenus.config +``` + +#### User interaction + +```@docs +REPL.TerminalMenus.request +``` + +#### AbstractMenu extension interface + +Any subtype of `AbstractMenu` must be mutable, and must contain the fields `pagesize::Int` and +`pageoffset::Int`. +Any subtype must also implement the following functions: + +```@docs +REPL.TerminalMenus.pick +REPL.TerminalMenus.cancel +REPL.TerminalMenus.writeline +``` + +It must also implement either `options` or `numoptions`: + +```@docs +REPL.TerminalMenus.options +REPL.TerminalMenus.numoptions +``` + +If the subtype does not have a field named `selected`, it must also implement + +```@docs +REPL.TerminalMenus.selected +``` + +The following are optional but can allow additional customization: + +```@docs +REPL.TerminalMenus.header +REPL.TerminalMenus.keypress +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/REPL/src/TerminalMenus/LICENSE.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/REPL/src/TerminalMenus/LICENSE.md new file mode 100644 index 0000000..9ca98a4 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/REPL/src/TerminalMenus/LICENSE.md @@ -0,0 +1,22 @@ +The TerminalMenus.jl package is licensed under the MIT "Expat" License: + +> Copyright (c) 2017: Nick Paul. +> +> Permission is hereby granted, free of charge, to any person obtaining a copy +> of this software and associated documentation files (the "Software"), to deal +> in the Software without restriction, including without limitation the rights +> to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +> copies of the Software, and to permit persons to whom the Software is +> furnished to do so, subject to the following conditions: +> +> The above copyright notice and this permission notice shall be included in all +> copies or substantial portions of the Software. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +> IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +> FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +> AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +> LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +> OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +> SOFTWARE. +> diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Random/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Random/docs/src/index.md new file mode 100644 index 0000000..ca86de4 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Random/docs/src/index.md @@ -0,0 +1,358 @@ +# Random Numbers + +```@meta +DocTestSetup = :(using Random) +``` + +Random number generation in Julia uses the [Mersenne Twister library](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/#dSFMT) +via `MersenneTwister` objects. Julia has a global RNG, which is used by default. Other RNG types +can be plugged in by inheriting the `AbstractRNG` type; they can then be used to have multiple +streams of random numbers. Besides `MersenneTwister`, Julia also provides the `RandomDevice` RNG +type, which is a wrapper over the OS provided entropy. + +Most functions related to random generation accept an optional `AbstractRNG` object as first argument, +which defaults to the global one if not provided. Moreover, some of them accept optionally +dimension specifications `dims...` (which can be given as a tuple) to generate arrays of random +values. In a multi-threaded program, you should generally use different RNG objects from different threads +in order to be thread-safe. However, the default global RNG is thread-safe as of Julia 1.3 (because +it internally corresponds to a per-thread RNG). + +A `MersenneTwister` or `RandomDevice` RNG can generate uniformly random numbers of the following types: +[`Float16`](@ref), [`Float32`](@ref), [`Float64`](@ref), [`BigFloat`](@ref), [`Bool`](@ref), +[`Int8`](@ref), [`UInt8`](@ref), [`Int16`](@ref), [`UInt16`](@ref), [`Int32`](@ref), +[`UInt32`](@ref), [`Int64`](@ref), [`UInt64`](@ref), [`Int128`](@ref), [`UInt128`](@ref), +[`BigInt`](@ref) (or complex numbers of those types). +Random floating point numbers are generated uniformly in ``[0, 1)``. As `BigInt` represents +unbounded integers, the interval must be specified (e.g. `rand(big.(1:6))`). + +Additionally, normal and exponential distributions are implemented for some `AbstractFloat` and +`Complex` types, see [`randn`](@ref) and [`randexp`](@ref) for details. + +!!! warning + Because the precise way in which random numbers are generated is considered an implementation detail, bug fixes and speed improvements may change the stream of numbers that are generated after a version change. Relying on a specific seed or generated stream of numbers during unit testing is thus discouraged - consider testing properties of the methods in question instead. + +## Random numbers module +```@docs +Random.Random +``` + +## Random generation functions + +```@docs +Random.rand +Random.rand! +Random.bitrand +Random.randn +Random.randn! +Random.randexp +Random.randexp! +Random.randstring +``` + +## Subsequences, permutations and shuffling + +```@docs +Random.randsubseq +Random.randsubseq! +Random.randperm +Random.randperm! +Random.randcycle +Random.randcycle! +Random.shuffle +Random.shuffle! +``` + +## Generators (creation and seeding) + +```@docs +Random.seed! +Random.AbstractRNG +Random.MersenneTwister +Random.RandomDevice +``` + +## Hooking into the `Random` API + +There are two mostly orthogonal ways to extend `Random` functionalities: +1) generating random values of custom types +2) creating new generators + +The API for 1) is quite functional, but is relatively recent so it may still have to evolve in subsequent releases of the `Random` module. +For example, it's typically sufficient to implement one `rand` method in order to have all other usual methods work automatically. + +The API for 2) is still rudimentary, and may require more work than strictly necessary from the implementor, +in order to support usual types of generated values. + +### Generating random values of custom types + +Generating random values for some distributions may involve various trade-offs. *Pre-computed* values, such as an [alias table](https://en.wikipedia.org/wiki/Alias_method) for discrete distributions, or [“squeezing” functions](https://en.wikipedia.org/wiki/Rejection_sampling) for univariate distributions, can speed up sampling considerably. How much information should be pre-computed can depend on the number of values we plan to draw from a distribution. Also, some random number generators can have certain properties that various algorithms may want to exploit. + +The `Random` module defines a customizable framework for obtaining random values that can address these issues. Each invocation of `rand` generates a *sampler* which can be customized with the above trade-offs in mind, by adding methods to `Sampler`, which in turn can dispatch on the random number generator, the object that characterizes the distribution, and a suggestion for the number of repetitions. Currently, for the latter, `Val{1}` (for a single sample) and `Val{Inf}` (for an arbitrary number) are used, with `Random.Repetition` an alias for both. + +The object returned by `Sampler` is then used to generate the random values. When implementing the random generation interface for a value `X` that can be sampled from, the implementor should define the method + +```julia +rand(rng, sampler) +``` +for the particular `sampler` returned by `Sampler(rng, X, repetition)`. + +Samplers can be arbitrary values that implement `rand(rng, sampler)`, but for most applications the following predefined samplers may be sufficient: + +1. `SamplerType{T}()` can be used for implementing samplers that draw from type `T` (e.g. `rand(Int)`). This is the default returned by `Sampler` for *types*. + +2. `SamplerTrivial(self)` is a simple wrapper for `self`, which can be accessed with `[]`. This is the recommended sampler when no pre-computed information is needed (e.g. `rand(1:3)`), and is the default returned by `Sampler` for *values*. + +3. `SamplerSimple(self, data)` also contains the additional `data` field, which can be used to store arbitrary pre-computed values, which should be computed in a *custom method* of `Sampler`. + +We provide examples for each of these. We assume here that the choice of algorithm is independent of the RNG, so we use `AbstractRNG` in our signatures. + +```@docs +Random.Sampler +Random.SamplerType +Random.SamplerTrivial +Random.SamplerSimple +``` + +Decoupling pre-computation from actually generating the values is part of the API, and is also available to the user. As an example, assume that `rand(rng, 1:20)` has to be called repeatedly in a loop: the way to take advantage of this decoupling is as follows: + +```julia +rng = MersenneTwister() +sp = Random.Sampler(rng, 1:20) # or Random.Sampler(MersenneTwister, 1:20) +for x in X + n = rand(rng, sp) # similar to n = rand(rng, 1:20) + # use n +end +``` + +This is the mechanism that is also used in the standard library, e.g. by the default implementation of random array generation (like in `rand(1:20, 10)`). + +#### Generating values from a type + +Given a type `T`, it's currently assumed that if `rand(T)` is defined, an object of type `T` will be produced. `SamplerType` is the *default sampler for types*. In order to define random generation of values of type `T`, the `rand(rng::AbstractRNG, ::Random.SamplerType{T})` method should be defined, and should return values what `rand(rng, T)` is expected to return. + +Let's take the following example: we implement a `Die` type, with a variable number `n` of sides, numbered from `1` to `n`. We want `rand(Die)` to produce a `Die` with a random number of up to 20 sides (and at least 4): + +```jldoctest Die +struct Die + nsides::Int # number of sides +end + +Random.rand(rng::AbstractRNG, ::Random.SamplerType{Die}) = Die(rand(rng, 4:20)) + +# output + +``` + +Scalar and array methods for `Die` now work as expected: + +```jldoctest Die; setup = :(Random.seed!(1)) +julia> rand(Die) +Die(15) + +julia> rand(MersenneTwister(0), Die) +Die(11) + +julia> rand(Die, 3) +3-element Vector{Die}: + Die(18) + Die(5) + Die(4) + +julia> a = Vector{Die}(undef, 3); rand!(a) +3-element Vector{Die}: + Die(5) + Die(20) + Die(15) +``` + +#### A simple sampler without pre-computed data + +Here we define a sampler for a collection. If no pre-computed data is required, it can be implemented with a `SamplerTrivial` sampler, which is in fact the *default fallback for values*. + +In order to define random generation out of objects of type `S`, the following method should be defined: `rand(rng::AbstractRNG, sp::Random.SamplerTrivial{S})`. Here, `sp` simply wraps an object of type `S`, which can be accessed via `sp[]`. Continuing the `Die` example, we want now to define `rand(d::Die)` to produce an `Int` corresponding to one of `d`'s sides: + +```jldoctest Die; setup = :(Random.seed!(1)) +julia> Random.rand(rng::AbstractRNG, d::Random.SamplerTrivial{Die}) = rand(rng, 1:d[].nsides); + +julia> rand(Die(4)) +3 + +julia> rand(Die(4), 3) +3-element Vector{Any}: + 4 + 1 + 1 +``` + +Given a collection type `S`, it's currently assumed that if `rand(::S)` is defined, an object of type `eltype(S)` will be produced. In the last example, a `Vector{Any}` is produced; the reason is that `eltype(Die) == Any`. The remedy is to define `Base.eltype(::Type{Die}) = Int`. + +#### Generating values for an `AbstractFloat` type + +`AbstractFloat` types are special-cased, because by default random values are not produced in the whole type domain, but rather in `[0,1)`. The following method should be implemented for `T <: AbstractFloat`: `Random.rand(::AbstractRNG, ::Random.SamplerTrivial{Random.CloseOpen01{T}})` + +#### An optimized sampler with pre-computed data + +Consider a discrete distribution, where numbers `1:n` are drawn with given probabilities that sum to one. When many values are needed from this distribution, the fastest method is using an [alias table](https://en.wikipedia.org/wiki/Alias_method). We don't provide the algorithm for building such a table here, but suppose it is available in `make_alias_table(probabilities)` instead, and `draw_number(rng, alias_table)` can be used to draw a random number from it. + +Suppose that the distribution is described by +```julia +struct DiscreteDistribution{V <: AbstractVector} + probabilities::V +end +``` +and that we *always* want to build an alias table, regardless of the number of values needed (we learn how to customize this below). The methods +```julia +Random.eltype(::Type{<:DiscreteDistribution}) = Int + +function Random.Sampler(::Type{<:AbstractRNG}, distribution::DiscreteDistribution, ::Repetition) + SamplerSimple(disribution, make_alias_table(distribution.probabilities)) +end +``` +should be defined to return a sampler with pre-computed data, then +```julia +function rand(rng::AbstractRNG, sp::SamplerSimple{<:DiscreteDistribution}) + draw_number(rng, sp.data) +end +``` +will be used to draw the values. + +#### Custom sampler types + +The `SamplerSimple` type is sufficient for most use cases with precomputed data. However, in order to demonstrate how to use custom sampler types, here we implement something similar to `SamplerSimple`. + +Going back to our `Die` example: `rand(::Die)` uses random generation from a range, so there is an opportunity for this optimization. We call our custom sampler `SamplerDie`. + +```julia +import Random: Sampler, rand + +struct SamplerDie <: Sampler{Int} # generates values of type Int + die::Die + sp::Sampler{Int} # this is an abstract type, so this could be improved +end + +Sampler(RNG::Type{<:AbstractRNG}, die::Die, r::Random.Repetition) = + SamplerDie(die, Sampler(RNG, 1:die.nsides, r)) +# the `r` parameter will be explained later on + +rand(rng::AbstractRNG, sp::SamplerDie) = rand(rng, sp.sp) +``` + +It's now possible to get a sampler with `sp = Sampler(rng, die)`, and use `sp` instead of `die` in any `rand` call involving `rng`. In the simplistic example above, `die` doesn't need to be stored in `SamplerDie` but this is often the case in practice. + +Of course, this pattern is so frequent that the helper type used above, namely `Random.SamplerSimple`, is available, +saving us the definition of `SamplerDie`: we could have implemented our decoupling with: + +```julia +Sampler(RNG::Type{<:AbstractRNG}, die::Die, r::Random.Repetition) = + SamplerSimple(die, Sampler(RNG, 1:die.nsides, r)) + +rand(rng::AbstractRNG, sp::SamplerSimple{Die}) = rand(rng, sp.data) +``` + +Here, `sp.data` refers to the second parameter in the call to the `SamplerSimple` constructor +(in this case equal to `Sampler(rng, 1:die.nsides, r)`), while the `Die` object can be accessed +via `sp[]`. + +Like `SamplerDie`, any custom sampler must be a subtype of `Sampler{T}` where `T` is the type +of the generated values. Note that `SamplerSimple(x, data) isa Sampler{eltype(x)}`, +so this constrains what the first argument to `SamplerSimple` can be +(it's recommended to use `SamplerSimple` like in the `Die` example, where +`x` is simply forwarded while defining a `Sampler` method). +Similarly, `SamplerTrivial(x) isa Sampler{eltype(x)}`. + +Another helper type is currently available for other cases, `Random.SamplerTag`, but is +considered as internal API, and can break at any time without proper deprecations. + + +#### Using distinct algorithms for scalar or array generation + +In some cases, whether one wants to generate only a handful of values or a large number of values +will have an impact on the choice of algorithm. This is handled with the third parameter of the +`Sampler` constructor. Let's assume we defined two helper types for `Die`, say `SamplerDie1` +which should be used to generate only few random values, and `SamplerDieMany` for many values. +We can use those types as follows: + +```julia +Sampler(RNG::Type{<:AbstractRNG}, die::Die, ::Val{1}) = SamplerDie1(...) +Sampler(RNG::Type{<:AbstractRNG}, die::Die, ::Val{Inf}) = SamplerDieMany(...) +``` + +Of course, `rand` must also be defined on those types (i.e. `rand(::AbstractRNG, ::SamplerDie1)` and `rand(::AbstractRNG, ::SamplerDieMany)`). Note that, as usual, `SamplerTrivial` and `SamplerSimple` can be used if custom types are not necessary. + +Note: `Sampler(rng, x)` is simply a shorthand for `Sampler(rng, x, Val(Inf))`, and +`Random.Repetition` is an alias for `Union{Val{1}, Val{Inf}}`. + + +### Creating new generators + +The API is not clearly defined yet, but as a rule of thumb: +1) any `rand` method producing "basic" types (`isbitstype` integer and floating types in `Base`) + should be defined for this specific RNG, if they are needed; +2) other documented `rand` methods accepting an `AbstractRNG` should work out of the box, + (provided the methods from 1) what are relied on are implemented), + but can of course be specialized for this RNG if there is room for optimization; +3) `copy` for pseudo-RNGs should return an independent copy that generates the exact same random sequence as the + original from that point when called in the same way. When this is not feasible (e.g. hardware-based RNGs), + `copy` must not be implemented. + +Concerning 1), a `rand` method may happen to work automatically, but it's not officially +supported and may break without warnings in a subsequent release. + +To define a new `rand` method for an hypothetical `MyRNG` generator, and a value specification `s` +(e.g. `s == Int`, or `s == 1:10`) of type `S==typeof(s)` or `S==Type{s}` if `s` is a type, +the same two methods as we saw before must be defined: + +1) `Sampler(::Type{MyRNG}, ::S, ::Repetition)`, which returns an object of type say `SamplerS` +2) `rand(rng::MyRNG, sp::SamplerS)` + +It can happen that `Sampler(rng::AbstractRNG, ::S, ::Repetition)` is +already defined in the `Random` module. It would then be possible to +skip step 1) in practice (if one wants to specialize generation for +this particular RNG type), but the corresponding `SamplerS` type is +considered as internal detail, and may be changed without warning. + + +#### Specializing array generation + +In some cases, for a given RNG type, generating an array of random +values can be more efficient with a specialized method than by merely +using the decoupling technique explained before. This is for example +the case for `MersenneTwister`, which natively writes random values in +an array. + +To implement this specialization for `MyRNG` +and for a specification `s`, producing elements of type `S`, +the following method can be defined: +`rand!(rng::MyRNG, a::AbstractArray{S}, ::SamplerS)`, +where `SamplerS` is the type of the sampler returned by `Sampler(MyRNG, s, Val(Inf))`. +Instead of `AbstractArray`, it's possible to implement the functionality only for a subtype, e.g. `Array{S}`. +The non-mutating array method of `rand` will automatically call this specialization internally. + +```@meta +DocTestSetup = nothing +``` + +# Reproducibility + +By using an RNG parameter initialized with a given seed, you can reproduce the same pseudorandom +number sequence when running your program multiple times. However, a minor release of Julia (e.g. +1.3 to 1.4) *may change* the sequence of pseudorandom numbers generated from a specific seed, in +particular if `MersenneTwister` is used. (Even if the sequence produced by a low-level function like +[`rand`](@ref) does not change, the output of higher-level functions like [`randsubseq`](@ref) may +change due to algorithm updates.) Rationale: guaranteeing that pseudorandom streams never change +prohibits many algorithmic improvements. + +If you need to guarantee exact reproducibility of random data, it is advisable to simply *save the +data* (e.g. as a supplementary attachment in a scientific publication). (You can also, of course, +specify a particular Julia version and package manifest, especially if you require bit +reproducibility.) + +Software tests that rely on *specific* "random" data should also generally either save the data, +embed it into the test code, or use third-party packages like +[StableRNGs.jl](https://github.com/JuliaRandom/StableRNGs.jl). On the other hand, tests that should +pass for *most* random data (e.g. testing `A \ (A*x) ≈ x` for a random matrix `A = randn(n,n)`) can +use an RNG with a fixed seed to ensure that simply running the test many times does not encounter a +failure due to very improbable data (e.g. an extremely ill-conditioned matrix). + +The statistical *distribution* from which random samples are drawn *is* guaranteed to be the same +across any minor Julia releases. diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/SHA/LICENSE.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/SHA/LICENSE.md new file mode 100644 index 0000000..eec075c --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/SHA/LICENSE.md @@ -0,0 +1,58 @@ +The SHA.jl package is licensed under the MIT "Expat" License: + +> Copyright (c) 2014: Elliot Saba. +> +> Permission is hereby granted, free of charge, to any person obtaining +> a copy of this software and associated documentation files (the +> "Software"), to deal in the Software without restriction, including +> without limitation the rights to use, copy, modify, merge, publish, +> distribute, sublicense, and/or sell copies of the Software, and to +> permit persons to whom the Software is furnished to do so, subject to +> the following conditions: +> +> The above copyright notice and this permission notice shall be +> included in all copies or substantial portions of the Software. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +> IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +> CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +> TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +> SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +This package was inspired by the SHA2 [source code from Minix](https://github.com/minix3/minix/blob/b6cbf7203b080219de306404f8022a65b7884f33/common/lib/libc/hash/sha2/sha2.c), itself released under the BSD license: + +> sha2.c +> +> Version 1.0.0beta1 +> +> Written by Aaron D. Gifford +> +> Copyright 2000 Aaron D. Gifford. All rights reserved. +> +> Redistribution and use in source and binary forms, with or without +> modification, are permitted provided that the following conditions +> are met: +> +> 1. Redistributions of source code must retain the above copyright +> notice, this list of conditions and the following disclaimer. +> +> 2. Redistributions in binary form must reproduce the above copyright +> notice, this list of conditions and the following disclaimer in the +> documentation and/or other materials provided with the distribution. +> +> 3. Neither the name of the copyright holder nor the names of contributors +> may be used to endorse or promote products derived from this software +> without specific prior written permission. +> +> THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) AND CONTRIBUTOR(S) ``AS IS'' AND +> ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +> IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +> ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR(S) OR CONTRIBUTOR(S) BE LIABLE +> FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +> DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +> OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +> HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +> LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +> OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/SHA/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/SHA/docs/src/index.md new file mode 100644 index 0000000..30c88e5 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/SHA/docs/src/index.md @@ -0,0 +1,75 @@ +# SHA + + +Usage is very straightforward: +```julia +julia> using SHA + +julia> bytes2hex(sha256("test")) +"9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" +``` + +Each exported function (at the time of this writing, SHA-1, SHA-2 224, 256, 384 and 512, and SHA-3 224, 256, 384 and 512 functions are implemented) takes in either an `AbstractVector{UInt8}`, an `AbstractString` or an `IO` object. This makes it trivial to checksum a file: + +```julia +shell> cat /tmp/test.txt +test +julia> using SHA + +julia> open("/tmp/test.txt") do f + sha2_256(f) + end +32-element Array{UInt8,1}: + 0x9f + 0x86 + 0xd0 + 0x81 + 0x88 + 0x4c + 0x7d + 0x65 + ⋮ + 0x5d + 0x6c + 0x15 + 0xb0 + 0xf0 + 0x0a + 0x08 +``` + +Due to the colloquial usage of `sha256` to refer to `sha2_256`, convenience functions are provided, mapping `shaxxx()` function calls to `sha2_xxx()`. For SHA-3, no such colloquialisms exist and the user must use the full `sha3_xxx()` names. + +`shaxxx()` takes `AbstractString` and array-like objects (`NTuple` and `Array`) with elements of type `UInt8`. + +To create a hash from multiple items the `SHAX_XXX_CTX()` types can be used to create a stateful hash object that +is updated with `update!` and finalized with `digest!` + +```julia +julia> ctx = SHA2_256_CTX() +SHA2 256-bit hash state + +julia> update!(ctx, b"some data") +0x0000000000000009 + +julia> update!(ctx, b"some more data") +0x0000000000000017 + +julia> digest!(ctx) +32-element Vector{UInt8}: + 0xbe + 0xcf + 0x23 + 0xda + 0xaf + 0x02 + ⋮ + 0x25 + 0x52 + 0x19 + 0xa0 + 0x8b + 0xc5 +``` + +Note that, at the time of this writing, the SHA3 code is not optimized, and as such is roughly an order of magnitude slower than SHA2. diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Serialization/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Serialization/docs/src/index.md new file mode 100644 index 0000000..c01ead7 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Serialization/docs/src/index.md @@ -0,0 +1,7 @@ +# Serialization + +```@docs +Serialization.serialize +Serialization.deserialize +Serialization.writeheader +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/SharedArrays/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/SharedArrays/docs/src/index.md new file mode 100644 index 0000000..7b23ec1 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/SharedArrays/docs/src/index.md @@ -0,0 +1,11 @@ +# Shared Arrays + +```@docs +SharedArrays.SharedArray +SharedArrays.SharedVector +SharedArrays.SharedMatrix +SharedArrays.procs(::SharedArray) +SharedArrays.sdata +SharedArrays.indexpids +SharedArrays.localindices +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Sockets/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Sockets/docs/src/index.md new file mode 100644 index 0000000..c294461 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Sockets/docs/src/index.md @@ -0,0 +1,33 @@ +# Sockets + +```@docs +Sockets.Sockets +Sockets.connect(::TCPSocket, ::Integer) +Sockets.connect(::AbstractString) +Sockets.listen(::Any) +Sockets.listen(::AbstractString) +Sockets.getaddrinfo +Sockets.getipaddr +Sockets.getipaddrs +Sockets.islinklocaladdr +Sockets.getalladdrinfo +Sockets.DNSError +Sockets.getnameinfo +Sockets.getsockname +Sockets.getpeername +Sockets.IPAddr +Sockets.IPv4 +Sockets.IPv6 +Sockets.@ip_str +Sockets.TCPSocket +Sockets.UDPSocket +Sockets.accept +Sockets.listenany +Sockets.bind +Sockets.send +Sockets.recv +Sockets.recvfrom +Sockets.setopt +Sockets.nagle +Sockets.quickack +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/SparseArrays/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/SparseArrays/docs/src/index.md new file mode 100644 index 0000000..286c46c --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/SparseArrays/docs/src/index.md @@ -0,0 +1,232 @@ +# Sparse Arrays + +```@meta +DocTestSetup = :(using SparseArrays, LinearAlgebra) +``` + +Julia has support for sparse vectors and [sparse matrices](https://en.wikipedia.org/wiki/Sparse_matrix) +in the `SparseArrays` stdlib module. Sparse arrays are arrays that contain enough zeros +that storing them in a special data structure leads to savings in space and execution time, +compared to dense arrays. + +## [Compressed Sparse Column (CSC) Sparse Matrix Storage](@id man-csc) + +In Julia, sparse matrices are stored in the [Compressed Sparse Column (CSC) format](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_.28CSC_or_CCS.29). +Julia sparse matrices have the type [`SparseMatrixCSC{Tv,Ti}`](@ref), where `Tv` is the +type of the stored values, and `Ti` is the integer type for storing column pointers and +row indices. The internal representation of `SparseMatrixCSC` is as follows: + +```julia +struct SparseMatrixCSC{Tv,Ti<:Integer} <: AbstractSparseMatrixCSC{Tv,Ti} + m::Int # Number of rows + n::Int # Number of columns + colptr::Vector{Ti} # Column j is in colptr[j]:(colptr[j+1]-1) + rowval::Vector{Ti} # Row indices of stored values + nzval::Vector{Tv} # Stored values, typically nonzeros +end +``` + +The compressed sparse column storage makes it easy and quick to access the elements in the column +of a sparse matrix, whereas accessing the sparse matrix by rows is considerably slower. Operations +such as insertion of previously unstored entries one at a time in the CSC structure tend to be slow. This is +because all elements of the sparse matrix that are beyond the point of insertion have to be moved +one place over. + +All operations on sparse matrices are carefully implemented to exploit the CSC data structure +for performance, and to avoid expensive operations. + +If you have data in CSC format from a different application or library, and wish to import it +in Julia, make sure that you use 1-based indexing. The row indices in every column need to be +sorted. If your `SparseMatrixCSC` object contains unsorted row indices, one quick way to sort +them is by doing a double transpose. + +In some applications, it is convenient to store explicit zero values in a `SparseMatrixCSC`. These +*are* accepted by functions in `Base` (but there is no guarantee that they will be preserved in +mutating operations). Such explicitly stored zeros are treated as structural nonzeros by many +routines. The [`nnz`](@ref) function returns the number of elements explicitly stored in the +sparse data structure, including non-structural zeros. In order to count the exact number of +numerical nonzeros, use [`count(!iszero, x)`](@ref), which inspects every stored element of a sparse +matrix. [`dropzeros`](@ref), and the in-place [`dropzeros!`](@ref), can be used to +remove stored zeros from the sparse matrix. + +```jldoctest +julia> A = sparse([1, 1, 2, 3], [1, 3, 2, 3], [0, 1, 2, 0]) +3×3 SparseMatrixCSC{Int64, Int64} with 4 stored entries: + 0 ⋅ 1 + ⋅ 2 ⋅ + ⋅ ⋅ 0 + +julia> dropzeros(A) +3×3 SparseMatrixCSC{Int64, Int64} with 2 stored entries: + ⋅ ⋅ 1 + ⋅ 2 ⋅ + ⋅ ⋅ ⋅ +``` + +## Sparse Vector Storage + +Sparse vectors are stored in a close analog to compressed sparse column format for sparse +matrices. In Julia, sparse vectors have the type [`SparseVector{Tv,Ti}`](@ref) where `Tv` +is the type of the stored values and `Ti` the integer type for the indices. The internal +representation is as follows: + +```julia +struct SparseVector{Tv,Ti<:Integer} <: AbstractSparseVector{Tv,Ti} + n::Int # Length of the sparse vector + nzind::Vector{Ti} # Indices of stored values + nzval::Vector{Tv} # Stored values, typically nonzeros +end +``` + +As for [`SparseMatrixCSC`](@ref), the `SparseVector` type can also contain explicitly +stored zeros. (See [Sparse Matrix Storage](@ref man-csc).). + +## Sparse Vector and Matrix Constructors + +The simplest way to create a sparse array is to use a function equivalent to the [`zeros`](@ref) +function that Julia provides for working with dense arrays. To produce a +sparse array instead, you can use the same name with an `sp` prefix: + +```jldoctest +julia> spzeros(3) +3-element SparseVector{Float64, Int64} with 0 stored entries +``` + +The [`sparse`](@ref) function is often a handy way to construct sparse arrays. For +example, to construct a sparse matrix we can input a vector `I` of row indices, a vector +`J` of column indices, and a vector `V` of stored values (this is also known as the +[COO (coordinate) format](https://en.wikipedia.org/wiki/Sparse_matrix#Coordinate_list_.28COO.29)). +`sparse(I,J,V)` then constructs a sparse matrix such that `S[I[k], J[k]] = V[k]`. The +equivalent sparse vector constructor is [`sparsevec`](@ref), which takes the (row) index +vector `I` and the vector `V` with the stored values and constructs a sparse vector `R` +such that `R[I[k]] = V[k]`. + +```jldoctest sparse_function +julia> I = [1, 4, 3, 5]; J = [4, 7, 18, 9]; V = [1, 2, -5, 3]; + +julia> S = sparse(I,J,V) +5×18 SparseMatrixCSC{Int64, Int64} with 4 stored entries: +⠀⠈⠀⡀⠀⠀⠀⠀⠠ +⠀⠀⠀⠀⠁⠀⠀⠀⠀ + +julia> R = sparsevec(I,V) +5-element SparseVector{Int64, Int64} with 4 stored entries: + [1] = 1 + [3] = -5 + [4] = 2 + [5] = 3 +``` + +The inverse of the [`sparse`](@ref) and [`sparsevec`](@ref) functions is +[`findnz`](@ref), which retrieves the inputs used to create the sparse array. +[`findall(!iszero, x)`](@ref) returns the cartesian indices of non-zero entries in `x` +(including stored entries equal to zero). + +```jldoctest sparse_function +julia> findnz(S) +([1, 4, 5, 3], [4, 7, 9, 18], [1, 2, 3, -5]) + +julia> findall(!iszero, S) +4-element Vector{CartesianIndex{2}}: + CartesianIndex(1, 4) + CartesianIndex(4, 7) + CartesianIndex(5, 9) + CartesianIndex(3, 18) + +julia> findnz(R) +([1, 3, 4, 5], [1, -5, 2, 3]) + +julia> findall(!iszero, R) +4-element Vector{Int64}: + 1 + 3 + 4 + 5 +``` + +Another way to create a sparse array is to convert a dense array into a sparse array using +the [`sparse`](@ref) function: + +```jldoctest +julia> sparse(Matrix(1.0I, 5, 5)) +5×5 SparseMatrixCSC{Float64, Int64} with 5 stored entries: + 1.0 ⋅ ⋅ ⋅ ⋅ + ⋅ 1.0 ⋅ ⋅ ⋅ + ⋅ ⋅ 1.0 ⋅ ⋅ + ⋅ ⋅ ⋅ 1.0 ⋅ + ⋅ ⋅ ⋅ ⋅ 1.0 + +julia> sparse([1.0, 0.0, 1.0]) +3-element SparseVector{Float64, Int64} with 2 stored entries: + [1] = 1.0 + [3] = 1.0 +``` + +You can go in the other direction using the [`Array`](@ref) constructor. The [`issparse`](@ref) +function can be used to query if a matrix is sparse. + +```jldoctest +julia> issparse(spzeros(5)) +true +``` + +## Sparse matrix operations + +Arithmetic operations on sparse matrices also work as they do on dense matrices. Indexing of, +assignment into, and concatenation of sparse matrices work in the same way as dense matrices. +Indexing operations, especially assignment, are expensive, when carried out one element at a time. +In many cases it may be better to convert the sparse matrix into `(I,J,V)` format using [`findnz`](@ref), +manipulate the values or the structure in the dense vectors `(I,J,V)`, and then reconstruct +the sparse matrix. + +## Correspondence of dense and sparse methods + +The following table gives a correspondence between built-in methods on sparse matrices and their +corresponding methods on dense matrix types. In general, methods that generate sparse matrices +differ from their dense counterparts in that the resulting matrix follows the same sparsity pattern +as a given sparse matrix `S`, or that the resulting sparse matrix has density `d`, i.e. each matrix +element has a probability `d` of being non-zero. + +Details can be found in the [Sparse Vectors and Matrices](@ref stdlib-sparse-arrays) +section of the standard library reference. + +| Sparse | Dense | Description | +|:-------------------------- |:---------------------- |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [`spzeros(m,n)`](@ref) | [`zeros(m,n)`](@ref) | Creates a *m*-by-*n* matrix of zeros. ([`spzeros(m,n)`](@ref) is empty.) | +| [`sparse(I,n,n)`](@ref) | [`Matrix(I,n,n)`](@ref)| Creates a *n*-by-*n* identity matrix. | +| [`sparse(A)`](@ref) | [`Array(S)`](@ref) | Interconverts between dense and sparse formats. | +| [`sprand(m,n,d)`](@ref) | [`rand(m,n)`](@ref) | Creates a *m*-by-*n* random matrix (of density *d*) with iid non-zero elements distributed uniformly on the half-open interval ``[0, 1)``. | +| [`sprandn(m,n,d)`](@ref) | [`randn(m,n)`](@ref) | Creates a *m*-by-*n* random matrix (of density *d*) with iid non-zero elements distributed according to the standard normal (Gaussian) distribution. | +| [`sprandn(rng,m,n,d)`](@ref) | [`randn(rng,m,n)`](@ref) | Creates a *m*-by-*n* random matrix (of density *d*) with iid non-zero elements generated with the `rng` random number generator | + +# [Sparse Arrays](@id stdlib-sparse-arrays) + +```@docs +SparseArrays.AbstractSparseArray +SparseArrays.AbstractSparseVector +SparseArrays.AbstractSparseMatrix +SparseArrays.SparseVector +SparseArrays.SparseMatrixCSC +SparseArrays.sparse +SparseArrays.sparsevec +SparseArrays.issparse +SparseArrays.nnz +SparseArrays.findnz +SparseArrays.spzeros +SparseArrays.spdiagm +SparseArrays.blockdiag +SparseArrays.sprand +SparseArrays.sprandn +SparseArrays.nonzeros +SparseArrays.rowvals +SparseArrays.nzrange +SparseArrays.droptol! +SparseArrays.dropzeros! +SparseArrays.dropzeros +SparseArrays.permute +permute!{Tv, Ti, Tp <: Integer, Tq <: Integer}(::SparseMatrixCSC{Tv,Ti}, ::SparseMatrixCSC{Tv,Ti}, ::AbstractArray{Tp,1}, ::AbstractArray{Tq,1}) +``` + +```@meta +DocTestSetup = nothing +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/LICENSE.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/LICENSE.md new file mode 100644 index 0000000..7528792 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/LICENSE.md @@ -0,0 +1,24 @@ +Statistics.jl is licensed under the MIT License: + +> Copyright (c) 2012-2016: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, +> Dahua Lin, Simon Byrne, Andreas Noack, Douglas Bates, John Myles White, +> Simon Kornblith, and other contributors. + +> Permission is hereby granted, free of charge, to any person obtaining +> a copy of this software and associated documentation files (the +> "Software"), to deal in the Software without restriction, including +> without limitation the rights to use, copy, modify, merge, publish, +> distribute, sublicense, and/or sell copies of the Software, and to +> permit persons to whom the Software is furnished to do so, subject to +> the following conditions: +> +> The above copyright notice and this permission notice shall be +> included in all copies or substantial portions of the Software. +> +> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +> NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +> LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +> OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +> WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/README.md new file mode 100644 index 0000000..db50bb9 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/README.md @@ -0,0 +1,19 @@ +# Statistics.jl + +[![Travis CI Build Status][travis-img]][travis-url] + +Development repository for the Statistics standard library (stdlib) that ships with Julia. + +#### Using the development version of Statistics.jl + +If you want to develop this package, do the following steps: +- Clone the repo anywhere. +- In line 2 of the `Project.toml` file (the line that begins with `uuid = ...`), modify the UUID, e.g. change the `107` to `207`. +- Change the current directory to the Statistics repo you just cloned and start julia with `julia --project`. +- `import Statistics` will now load the files in the cloned repo instead of the Statistics stdlib. +- To test your changes, simply do `include("test/runtests.jl")`. + +If you need to build Julia from source with a git checkout of Statistics, then instead use `make DEPS_GIT=Statistics` when building Julia. The `Statistics` repo is in `stdlib/Statistics`, and created initially with a detached `HEAD`. If you're doing this from a pre-existing Julia repository, you may need to `make clean` beforehand. + +[travis-img]: https://travis-ci.com/JuliaLang/Statistics.jl.svg?branch=master +[travis-url]: https://travis-ci.com/JuliaLang/Statistics.jl diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/docs/src/index.md new file mode 100644 index 0000000..93f3db5 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Statistics/docs/src/index.md @@ -0,0 +1,19 @@ +# Statistics + +The Statistics standard library module contains basic statistics functionality. + +```@docs +std +stdm +var +varm +cor +cov +mean! +mean +median! +median +middle +quantile! +quantile +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/SuiteSparse/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/SuiteSparse/docs/src/index.md new file mode 100644 index 0000000..e8654ca --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/SuiteSparse/docs/src/index.md @@ -0,0 +1,34 @@ +# Sparse Linear Algebra + +```@meta +DocTestSetup = :(using LinearAlgebra, SparseArrays, SuiteSparse) +``` + +Sparse matrix solvers call functions from [SuiteSparse](http://suitesparse.com). The following factorizations are available: + +| Type | Description | +|:--------------------------------- |:--------------------------------------------- | +| `SuiteSparse.CHOLMOD.Factor` | Cholesky factorization | +| `SuiteSparse.UMFPACK.UmfpackLU` | LU factorization | +| `SuiteSparse.SPQR.QRSparse` | QR factorization | + +Other solvers such as [Pardiso.jl](https://github.com/JuliaSparse/Pardiso.jl/) are as external packages. [Arpack.jl](https://julialinearalgebra.github.io/Arpack.jl/stable/) provides `eigs` and `svds` for iterative solution of eigensystems and singular value decompositions. + +These factorizations are described in the [`Linear Algebra`](@ref man-linalg) section of the manual: +1. [`cholesky`](@ref) +2. [`ldlt`](@ref) +3. [`lu`](@ref) +4. [`qr`](@ref) + +```@docs +SuiteSparse.CHOLMOD.lowrankupdate +SuiteSparse.CHOLMOD.lowrankupdate! +SuiteSparse.CHOLMOD.lowrankdowndate +SuiteSparse.CHOLMOD.lowrankdowndate! +SuiteSparse.CHOLMOD.lowrankupdowndate! +``` + + +```@meta +DocTestSetup = nothing +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/TOML/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/TOML/docs/src/index.md new file mode 100644 index 0000000..36e8ec6 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/TOML/docs/src/index.md @@ -0,0 +1,132 @@ +# TOML + +TOML.jl is a Julia standard library for parsing and writing [TOML +v1.0](https://toml.io/en/) files. + +## Parsing TOML data + +```jldoctest +julia> using TOML + +julia> data = """ + [database] + server = "192.168.1.1" + ports = [ 8001, 8001, 8002 ] + """; + +julia> TOML.parse(data) +Dict{String, Any} with 1 entry: + "database" => Dict{String, Any}("server"=>"192.168.1.1", "ports"=>[8001, 8001… +``` + +To parse a file, use [`TOML.parsefile`](@ref). If the file has a syntax error, +an exception is thrown: + +```jldoctest +julia> using TOML + +julia> TOML.parse(""" + value = 0.0.0 + """) +ERROR: TOML Parser error: +none:1:16 error: failed to parse value + value = 0.0.0 + ^ +[...] +``` + +There are other versions of the parse functions ([`TOML.tryparse`](@ref) +and [`TOML.tryparsefile`]) that instead of throwing exceptions on parser error +returns a [`TOML.ParserError`](@ref) with information: + +```jldoctest +julia> using TOML + +julia> err = TOML.tryparse(""" + value = 0.0.0 + """); + +julia> err.type +ErrGenericValueError::ErrorType = 14 + +julia> err.line +1 + +julia> err.column +16 +``` + + +## Exporting data to TOML file + +The [`TOML.print`](@ref) function is used to print (or serialize) data into TOML +format. + +```jldoctest +julia> using TOML + +julia> data = Dict( + "names" => ["Julia", "Julio"], + "age" => [10, 20], + ); + +julia> TOML.print(data) +names = ["Julia", "Julio"] +age = [10, 20] + +julia> fname = tempname(); + +julia> open(fname, "w") do io + TOML.print(io, data) + end + +julia> TOML.parsefile(fname) +Dict{String, Any} with 2 entries: + "names" => ["Julia", "Julio"] + "age" => [10, 20] +``` + +Keys can be sorted according to some value + +```jldoctest +julia> using TOML + +julia> TOML.print(Dict( + "abc" => 1, + "ab" => 2, + "abcd" => 3, + ); sorted=true, by=length) +ab = 2 +abc = 1 +abcd = 3 +``` + +For custom structs, pass a function that converts the struct to a supported +type + +```jldoctest +julia> using TOML + +julia> struct MyStruct + a::Int + b::String + end + +julia> TOML.print(Dict("foo" => MyStruct(5, "bar"))) do x + x isa MyStruct && return [x.a, x.b] + error("unhandled type $(typeof(x))") + end +foo = [5, "bar"] +``` + + +## References +```@docs +TOML.parse +TOML.parsefile +TOML.tryparse +TOML.tryparsefile +TOML.print +TOML.Parser +TOML.ParserError +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Tar/README.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Tar/README.md new file mode 100644 index 0000000..d9c2e08 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Tar/README.md @@ -0,0 +1,467 @@ +# Tar.jl + +[![Build Status](https://travis-ci.org/JuliaIO/Tar.jl.svg?branch=master)](https://travis-ci.org/JuliaIO/Tar.jl) +[![Codecov](https://codecov.io/gh/JuliaIO/Tar.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/JuliaIO/Tar.jl) + +The `Tar` package can list, extract and create POSIX TAR archives ("tarballs") +as specified in [POSIX +1003.1-2001](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html). +It is designed to support using the TAR format as a mechanism for sending trees +of files from one system to another, rather than for the historical use case of +backing up files for restoration to the same system. Because of this design +goal, `Tar` intentionally ignores much of the metadata included in the TAR +format, which does not make sense for the data transfer use case. The package +also does not aim to read or create legacy non-POSIX variants of the TAR format, +although it does support reading GNU long name and long link extensions. + +## API & Usage + +The public API of `Tar` includes five functions and one type: + +* `create` — creates a tarball from an on-disk file tree +* `extract` — extracts a tarball to an on-disk file tree +* `list` — lists the contents of a tarball as a vector of `Header` objects +* `rewrite` — rewrite a tarball to the standard format `create` produces +* `tree_hash` — compute a tree hash of the content of a tarball (default: git + SHA1) +* `Header` — struct representing metadata that `Tar` considers important in a + TAR entry + +None of these are exported, however: the recommended usage is to do `import Tar` +and then access all of these names fully qualified as `Tar.create`, +`Tar.extract` and so on. + + + +### Tar.create + +```jl +create([ predicate, ] dir, [ tarball ]; [ skeleton ]) -> tarball +``` +* `predicate :: String --> Bool` +* `dir :: AbstractString` +* `tarball :: Union{AbstractString, AbstractCmd, IO}` +* `skeleton :: Union{AbstractString, AbstractCmd, IO}` + +Create a tar archive ("tarball") of the directory `dir`. The resulting archive +is written to the path `tarball` or if no path is specified, a temporary path is +created and returned by the function call. If `tarball` is an IO object then the +tarball content is written to that handle instead (the handle is left open). + +If a `predicate` function is passed, it is called on each system path that is +encountered while recursively searching `dir` and `path` is only included in the +tarball if `predicate(path)` is true. If `predicate(path)` returns false for a +directory, then the directory is excluded entirely: nothing under that directory +will be included in the archive. + +If the `skeleton` keyword is passed then the file or IO handle given is used as +a "skeleton" to generate the tarball. You create a skeleton file by passing the +`skeleton` keyword to the `extract` command. If `create` is called with that +skeleton file and the extracted files haven't changed, an identical tarball is +recreated. The `skeleton` and `predicate` arguments cannot be used together. + +### Tar.extract + +```jl +extract([ predicate, ] tarball, [ dir ]; + [ skeleton, ] [ copy_symlinks ]) -> dir +``` +* `predicate :: Header --> Bool` +* `tarball :: Union{AbstractString, AbstractCmd, IO}` +* `dir :: AbstractString` +* `skeleton :: Union{AbstractString, AbstractCmd, IO}` +* `copy_symlinks :: Bool` + +Extract a tar archive ("tarball") located at the path `tarball` into the +directory `dir`. If `tarball` is an IO object instead of a path, then the +archive contents will be read from that IO stream. The archive is extracted to +`dir` which must either be an existing empty directory or a non-existent path +which can be created as a new directory. If `dir` is not specified, the archive +is extracted into a temporary directory which is returned by `extract`. + +If a `predicate` function is passed, it is called on each `Header` object that +is encountered while extracting `tarball` and the entry is only extracted if the +`predicate(hdr)` is true. This can be used to selectively extract only parts of +an archive, to skip entries that cause `extract` to throw an error, or to record +what is extracted during the extraction process. + +If the `skeleton` keyword is passed then a "skeleton" of the extracted tarball +is written to the file or IO handle given. This skeleton file can be used to +recreate an identical tarball by passing the `skeleton` keyword to the `create` +function. The `skeleton` and `predicate` arguments cannot be used together. + +If `copy_symlinks` is `true` then instead of extracting symbolic links as such, +they will be extracted as copies of what they link to if they are internal to +the tarball and if it is possible to do so. Non-internal symlinks, such as a +link to `/etc/passwd` will not be copied. Symlinks which are in any way cyclic +will also not be copied and will instead be skipped. By default, `extract` will +detect whether symlinks can be created in `dir` or not and will automatically +copy symlinks if they cannot be created. + +### Tar.list + +```jl +list(tarball; [ strict = true ]) -> Vector{Header} +list(callback, tarball; [ strict = true ]) +``` +* `callback :: Header --> Bool` +* `tarball :: Union{AbstractString, AbstractCmd, IO}` +* `strict :: Bool` + +List the contents of a tar archive ("tarball") located at the path `tarball`. If +`tarball` is an IO handle, read the tar contents from that stream. Returns a +vector of `Header` structs. See [`Header`](@ref) for details. If a `callback` is +provided then instead of returning a vector of headers, the callback is called +on each `Header`. This can be useful if the number of items in the tarball is +large or if you want examine items prior to an error in the tarball. + +By default `list` will error if it encounters any tarball contents which the +`extract` function would refuse to extract. With `strict=false` it will skip +these checks and list all the the contents of the tar file whether `extract` +would extract them or not. Beware that malicious tarballs can do all sorts of +crafty and unexpected things to try to trick you into doing something bad. + +If the `tarball` argument is a skeleton file (see `extract` and `create`) then +`list` will detect that from the file header and appropriately list or iterate +the headers of the skeleton file. + +### Tar.rewrite + +```jl +rewrite([ predicate, ], old_tarball, [ new_tarball ]) -> new_tarball +``` +* `predicate :: Header --> Bool` +* `old_tarball :: Union{AbstractString, AbstractCmd, IO}` +* `new_tarball :: Union{AbstractString, AbstractCmd, IO}` + +Rewrite `old_tarball` to the standard format that `create` generates, while also +checking that it doesn't contain anything that would cause `extract` to raise an +error. This is functionally equivalent to doing +```jl +Tar.create(Tar.extract(predicate, old_tarball), new_tarball) +``` +However, it never extracts anything to disk and instead uses the `seek` function +to navigate the old tarball's data. If no `new_tarball` argument is passed, the +new tarball is written to a temporary file whose path is returned. + +If a `predicate` function is passed, it is called on each `Header` object that +is encountered while extracting `old_tarball` and the entry is skipped unless +`predicate(hdr)` is true. This can be used to selectively rewrite only parts of +an archive, to skip entries that would cause `extract` to throw an error, or to +record what content is encountered during the rewrite process. + +### Tar.tree_hash + +```jl +tree_hash([ predicate, ] tarball; + [ algorithm = "git-sha1", ] + [ skip_empty = false ]) -> hash::String +``` +* `predicate :: Header --> Bool` +* `tarball :: Union{AbstractString, AbstractCmd, IO}` +* `algorithm :: AbstractString` +* `skip_empty :: Bool` + +Compute a tree hash value for the file tree that the tarball contains. By +default, this uses git's tree hashing algorithm with the SHA1 secure hash +function (like current versions of git). This means that for any tarball whose +file tree git can represent—i.e. one with only files, symlinks and non-empty +directories—the hash value computed by this function will be the same as the +hash value git would compute for that file tree. Note that tarballs can +represent file trees with empty directories, which git cannot store, and this +function can generate hashes for those, which will, by default (see `skip_empty` +below for how to change this behavior), differ from the hash of a tarball which +omits those empty directories. In short, the hash function agrees with git on +all trees which git can represent, but extends (in a consistent way) the domain +of hashable trees to other trees which git cannot represent. + +If a `predicate` function is passed, it is called on each `Header` object that +is encountered while processing `tarball` and an entry is only hashed if +`predicate(hdr)` is true. This can be used to selectively hash only parts of an +archive, to skip entries that cause `extract` to throw an error, or to record +what is extracted during the hashing process. + +Currently supported values for `algorithm` are `git-sha1` (the default) and +`git-sha256`, which uses the same basic algorithm as `git-sha1` but replaces the +SHA1 hash function with SHA2-256, the hash function that git will transition to +using in the future (due to known attacks on SHA1). Support for other file tree +hashing algorithms may be added in the future. + +The `skip_empty` option controls whether directories in the tarball which +recursively contain no files or symlinks are included in the hash or ignored. +In general, if you are hashing the content of a tarball or a file tree, you care +about all directories, not just non-empty ones, so including these in the +computed hash is the default. So why does this function even provide the option +to skip empty directories? Because git refuses to store empty directories and +will ignore them if you try to add them to a repo. So if you compute a reference +tree hash by by adding files to a git repo and then asking git for the tree +hash, the hash value that you get will match the hash value computed by +`tree_hash` with `skip_empty=true`. In other words, this option allows +`tree_hash` to emulate how git would hash a tree with empty directories. If you +are hashing trees that may contain empty directories (i.e. do not come from a +git repo), however, it is recommended that you hash them using a tool (such as +this one) that does not ignore empty directories. + +### Tar.Header + +The `Header` type is a struct representing the essential metadata for a single +record in a tar file with this definition: +```jl +struct Header + path :: String # path relative to the root + type :: Symbol # type indicator (see below) + mode :: UInt16 # mode/permissions (best viewed in octal) + size :: Int64 # size of record data in bytes + link :: String # target path of a symlink +end +``` +Types are represented with the following symbols: `file`, `hardlink`, `symlink`, +`chardev`, `blockdev`, `directory`, `fifo`, or for unknown types, the typeflag +character as a symbol. Note that [`extract`](#Tarextract) refuses to extract +records types other than `file`, `symlink` and `directory`; [`list`](#Tarlist) +will only list other kinds of records if called with `strict=false`. + + + +### Compression + +It is typical to compress tarballs when saving of transferring them. In the UNIX +tradition of doing one thing and doing it well, the `Tar` package does not do +any kind of compression and instead makes it easy to compose it's API with +external compression tools. The simplest way to read a compressed archive is to +use a command-line tool to decompress it. For example: +```jl +Tar.list(`gzcat $tarball`) +Tar.extract(`gzcat $tarball`) +``` +This will spawn the `gzcat $tarball` command, read the uncompressed tarball data +from the output of that process, and then close the process. Creating a tarball +with the `gzip` command is nearly as easy: +```jl +Tar.create(dir, pipeline(`gzip -9`, tarball)) +``` +This assumes that `dir` is the directory you want to archive and `tarball` is +the path you want to create as a compressed archive. + +If you want to compress or decompress a tarball in the same process, you can +using various +[[TranscodingStreams](https://github.com/JuliaIO/TranscodingStreams.jl) +packages: +```jl +using CodecZlib + +tar_gz = open(tarball, write=true) +tar = GzipCompressorStream(tar_gz) +Tar.create(dir, tar) +close(tar) +``` +This assumes that `dir` is the directory you want to archive and `tarball` is +the path you want to create as a compressed archive. You can decompress +in-process in a similar manner: +```jl +using CodecZlib + +tar_gz = open(tarball) +tar = GzipDecompressorStream(tar_gz) +dir = Tar.extract(tar) +close(tar) +``` +This assumes that `tarball` is the path of the compressed archive you want to +extract. + +### API comparison with command-line tar + +It might be helpful to compare the `Tar` API with command-line `tar`. Unlike +`tar -c` the `Tar.create` function does not include any of the path you tell it +to bundle in the resulting TAR file: the location of the data is not part of the +data. Doing `Tar.create(dir, tarball)` is roughly equivalent to running the +following `tar` command: +```sh +tar -f $tarball -C $dir -c $(cd $dir; ls -A) +``` +In other words, `tar` is told to change into the directory `dir` before +constructing the tarball and then include all the top-level items in that +directory without any path prefix. Note that the above command does not fully +emulate the behavior of `Tar.create`: it does not sort entries in the same order +and it still records user and group information, modification times and exact +permissions. Coaxing command-line `tar` programs to omit this non-portable +information and use a portable (and `git`-compatible sort order) is non-trivial. + +Another difference from command-line `tar`: non-empty directories are also +omitted from the tarballs that `Tar` creates since no metadata is recorded about +directories aside from the fact that they exist and the existence of non-empty +directories is already implied by the fact that they contain something else. If, +in the future, the ability to record metadata about directories is added, +tarballs will record entries for non-empty directories with non-default +metadata. + +On the extraction side of things, doing `Tar.extract(tarball, dir)` is roughly +equivalent to the following commands: +```sh +test -d $dir || mkdir $dir +tar -f $tarball -C $dir -mx +``` +Again, `tar` is told to change into the directory `dir` before extracting the +tarball and to extract each path relative to that directory. The `-m` option +tells `tar` to ignore the modification times recorded in the tarball and just +let files and directories have their natural modification times. + +If the current user has elevated privileges, the `tar` command will attempt to +change the owner and group of files to what is recorded in the tarball, whereas +`Tar.extract` will never do that. The `tar` command may also try to restore +permissions without respecting the current `umask` if the user is an +administrator. Again, `Tar.extract` will never do that—it behaves the same way +for any users: by ignoring any user/group/permission information, aside from +whether plain files are executable by their owner or not. To suppress these +behaviors with GNU tar, you can use the `--no-same-owner` and +`--no-same-permissions` options; these options are not broadly supported by +other `tar` commands, which may not have options to support these behaviors. + +## Design & Features + +Unlike the `tar` command line tool, which was originally designed to archive +data in order to restore it back to the same system or to a replica thereof, the +`Tar` package is designed for using the TAR format to transfer trees of files +and directories from one system to another. This design goal means that some +metadata fields supported by the TAR format and used by default by historical +`tar` tools are not used or supported by `Tar`. In short, the choice of features +and defaults for `Tar` are designed to support transfer of data, rather than +backup and restoration. + +The TAR format can, for example, record the name and ID of the user that owns +each file. Recording this information makes perfect sense when using tarballs +for backup: the `tar` program should run as root when restoring data, so it can +restore the original owner of each file and directory. On the other hand, this +ownership information is of no use when using the TAR format to transfer data +from one system to another: the user names and IDs will not generally be the +same on different systems, and the tool should _not_ be run as `root`, so it +cannot change the owner of anything it extracts. For data transfer, ownership +metadata should be disregarded and need not be recorded in the first place. + +Similarly, it makes little sense, when using tarballs for data transfer, to copy +the modification time of each file from the source system. Those time stamps are +unlikely to be relevant on the destination system, and in some cases, clock skew +between the systems could mean that time stamps from the source appear to be in +the future at the destination. This can confuse some programs and may even be +perceived as an attempted security breach; most `tar` command line tools print +warnings when extracting files with time stamps from the future. When using the +TAR format for data transfer, it is better to ignore time stamps and just let +the extracted contents have natural modification times. + +The features and defaults of the `Tar` package are guided by the principle that +it uses the TAR format for transmitting data, not as a tool for backup and +restoration. If you want to use the TAR format for archival purposes, you are +likely better off using a traditional command line tool like [GNU +tar](https://www.gnu.org/software/tar/). If, on the other hand, you want to use +the TAR format to transmit data from one system to another, then you've come to +the right place. + +### File Types + +Since `Tar` is designed for transmission of file and directory trees, it +supports only the following file types: + +* plain files +* directories +* symlinks + +The `Tar` package does not support other file types that the TAR format can +represent, including: hard links, character devices, block devices, and FIFOs. +If you attempt to create or extract an archive that contains any of these kinds +of entries, `Tar` will raise an error. You can, however, list the contents of a +tarball containing other kinds of entries by passing the `strict=false` flag to +the `list` function; without this option, `list` raises the same error as +`extract` would. + +In the future, optional support may be added for using hard links within +archives to avoid duplicating identical files. + +### Time Stamps + +Also in accordance with its design goal as a data transfer tool, the `Tar` +package does not record or set modification times upon tarball creation and +extraction. When creating a tarball, it sets the time stamp of each entry to +`0`, representing the UNIX epoch (Jan 1st, 1970). When extracting a tarball, it +ignores the time stamps of entries and lets all extracted content have "natural" +modification times based on when each file or directory is extracted. + +In the future, optional support may be added for recording and restoring time +stamps. + +### Users & Groups + +`Tar` ignores user and group names and IDs when creating and extracting +tarballs. This is due to two facts: + +* names and IDs on source and destination systems will generally not match; +* names and IDs can only be changed if `Tar` is run with elevated privileges. + +The first fact means that it probably doesn't make sense to try to restore +ownership when transferring data, while the second fact means that it's probably +not possible. Accordingly, `Tar` disregards user and group names and IDs when +creating and extracting tarballs. During creation, the ID fields are recorded as +`0` and names fields are recorded as the empty string. When extracting a +tarball, the user and group fields are ignored entirely and all extracted +content is owned by the current user. + +It is unlikely that support will be added for recording or restoring ownership +of files or directories since that functionality only makes sense when using the +TAR format for backup, a purpose better served by using a command line `tar` +tool. + +### Permissions + +Upon tarball extraction, `Tar` respects the permissions recorded for each file. +When creating tarball, however, it ignores most permission information and +normalizes permissions as follows: + +* files that are not executable by the owner are archived with mode `0o644`; +* files that are executable by the owner are archived with mode `0o755`; +* directories and symlinks are always archived with mode `0o755`. + +In other words, `Tar` records only one significant bit of information: whether +plain files are executable by their owner or not. No permission information for +directories or symlinks is considered significant. This one bit of information +is the only one which makes sense across all platforms, so this choice makes +`Tar`'s behavior as portable as possible. On systems (like Windows) that do not +use POSIX modes, whatever permission mechanism exists (_e.g._ ACLs) should be +queried/modified to determine whether each file is executable by its owner or +not. Unfortunately, this is currently broken on Windows since `libuv` does not +correctly support querying or changing the user executable "bit"; this is +actively being worked on, however, and should be fixed in future versions of +Julia. + +In the future, optional support may be added for recording exact permission +modes on POSIX systems, and possibly for normalizing permissions on extraction +in the same way that they are normalized upon archive creation. + +### Reproducibility + +The information that `Tar` records about permissions is the same information +that `git` considers to be significant when recording and hashing tree contents +(admittedly not by coincidence). As a result, an important and useful +consequence of `Tar`'s design is that it has the following properties: + +* if you create a tarball from a file tree and extract it, the new tree will + have the same `git` tree hash as the original; +* if you `git checkout` a file tree and archive it using `Tar`, the resulting + TAR archive file is always the same. + +One important caveat to keep in mind is that `git` ignores directories that +recursively contain only directories—_i.e._ unless there's a file or a symlink +somewhere, `git` will not acknowledge the existence of a subdirectory. This +means that two trees with the same `git` tree hash can produce different +tarballs if they differ by subdirectories containing no files or symlinks: `git` +will ignore those subdirectories, while `Tar` will not. Therefore, they will +have the same `git` tree hash, but produce different tarballs. Two _identical_ +file trees will always produce identical tarballs, however, and that tarball +should remain stable in future versions of the `Tar` package. + +The `tree_hash` function can be used to compute a git-style tree hash of the +contents of a tarball (without needing to extract it). Moreover, two tarballs +created by the `Tar` package will have the same hash if and only if they contain +the same file tree, which is true if and only if they are identical tarballs. +You can, however, hash tarballs not created by `Tar` this way to see if they +represent the same file tree, and you can use the `skip_empty=true` option to +`tree_hash` to compute the hash that `git` would assign the tree, ignoring empty +directories. diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Test/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Test/docs/src/index.md new file mode 100644 index 0000000..8e3ba43 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Test/docs/src/index.md @@ -0,0 +1,293 @@ +# Unit Testing + +```@meta +DocTestSetup = :(using Test) +``` + +## Testing Base Julia + +Julia is under rapid development and has an extensive test suite to verify functionality across +multiple platforms. If you build Julia from source, you can run this test suite with `make test`. +In a binary install, you can run the test suite using `Base.runtests()`. + +```@docs +Base.runtests +``` + +## Basic Unit Tests + +The `Test` module provides simple *unit testing* functionality. Unit testing is a way to +see if your code is correct by checking that the results are what you expect. It can be helpful +to ensure your code still works after you make changes, and can be used when developing as a way +of specifying the behaviors your code should have when complete. + +Simple unit testing can be performed with the `@test` and `@test_throws` macros: + +```@docs +Test.@test +Test.@test_throws +``` + +For example, suppose we want to check our new function `foo(x)` works as expected: + +```jldoctest testfoo +julia> using Test + +julia> foo(x) = length(x)^2 +foo (generic function with 1 method) +``` + +If the condition is true, a `Pass` is returned: + +```jldoctest testfoo +julia> @test foo("bar") == 9 +Test Passed + +julia> @test foo("fizz") >= 10 +Test Passed +``` + +If the condition is false, then a `Fail` is returned and an exception is thrown: + +```jldoctest testfoo +julia> @test foo("f") == 20 +Test Failed at none:1 + Expression: foo("f") == 20 + Evaluated: 1 == 20 +ERROR: There was an error during testing +``` + +If the condition could not be evaluated because an exception was thrown, which occurs in this +case because `length` is not defined for symbols, an `Error` object is returned and an exception +is thrown: + +```julia-repl +julia> @test foo(:cat) == 1 +Error During Test + Test threw an exception of type MethodError + Expression: foo(:cat) == 1 + MethodError: no method matching length(::Symbol) + Closest candidates are: + length(::SimpleVector) at essentials.jl:256 + length(::Base.MethodList) at reflection.jl:521 + length(::MethodTable) at reflection.jl:597 + ... + Stacktrace: + [...] +ERROR: There was an error during testing +``` + +If we expect that evaluating an expression *should* throw an exception, then we can use `@test_throws` +to check that this occurs: + +```jldoctest testfoo +julia> @test_throws MethodError foo(:cat) +Test Passed + Thrown: MethodError +``` + +## Working with Test Sets + +Typically a large number of tests are used to make sure functions work correctly over a range +of inputs. In the event a test fails, the default behavior is to throw an exception immediately. +However, it is normally preferable to run the rest of the tests first to get a better picture +of how many errors there are in the code being tested. + +!!! note + The `@testset` will create a local scope of its own when running the tests in it. + +The `@testset` macro can be used to group tests into *sets*. All the tests in a test set will +be run, and at the end of the test set a summary will be printed. If any of the tests failed, +or could not be evaluated due to an error, the test set will then throw a `TestSetException`. + +```@docs +Test.@testset +``` + +We can put our tests for the `foo(x)` function in a test set: + +```jldoctest testfoo +julia> @testset "Foo Tests" begin + @test foo("a") == 1 + @test foo("ab") == 4 + @test foo("abc") == 9 + end; +Test Summary: | Pass Total +Foo Tests | 3 3 +``` + +Test sets can also be nested: + +```jldoctest testfoo +julia> @testset "Foo Tests" begin + @testset "Animals" begin + @test foo("cat") == 9 + @test foo("dog") == foo("cat") + end + @testset "Arrays $i" for i in 1:3 + @test foo(zeros(i)) == i^2 + @test foo(fill(1.0, i)) == i^2 + end + end; +Test Summary: | Pass Total +Foo Tests | 8 8 +``` + +In the event that a nested test set has no failures, as happened here, it will be hidden in the +summary, unless the `verbose=true` option is passed: + +```jldoctest testfoo +julia> @testset verbose = true "Foo Tests" begin + @testset "Animals" begin + @test foo("cat") == 9 + @test foo("dog") == foo("cat") + end + @testset "Arrays $i" for i in 1:3 + @test foo(zeros(i)) == i^2 + @test foo(fill(1.0, i)) == i^2 + end + end; +Test Summary: | Pass Total +Foo Tests | 8 8 + Animals | 2 2 + Arrays 1 | 2 2 + Arrays 2 | 2 2 + Arrays 3 | 2 2 +``` + +If we do have a test failure, only the details for the failed test sets will be shown: + +```julia-repl +julia> @testset "Foo Tests" begin + @testset "Animals" begin + @testset "Felines" begin + @test foo("cat") == 9 + end + @testset "Canines" begin + @test foo("dog") == 9 + end + end + @testset "Arrays" begin + @test foo(zeros(2)) == 4 + @test foo(fill(1.0, 4)) == 15 + end + end + +Arrays: Test Failed + Expression: foo(fill(1.0, 4)) == 15 + Evaluated: 16 == 15 +[...] +Test Summary: | Pass Fail Total +Foo Tests | 3 1 4 + Animals | 2 2 + Arrays | 1 1 2 +ERROR: Some tests did not pass: 3 passed, 1 failed, 0 errored, 0 broken. +``` + +## Other Test Macros + +As calculations on floating-point values can be imprecise, you can perform approximate equality +checks using either `@test a ≈ b` (where `≈`, typed via tab completion of `\approx`, is the +[`isapprox`](@ref) function) or use [`isapprox`](@ref) directly. + +```jldoctest +julia> @test 1 ≈ 0.999999999 +Test Passed + +julia> @test 1 ≈ 0.999999 +Test Failed at none:1 + Expression: 1 ≈ 0.999999 + Evaluated: 1 ≈ 0.999999 +ERROR: There was an error during testing +``` + +```@docs +Test.@inferred +Test.@test_logs +Test.@test_deprecated +Test.@test_warn +Test.@test_nowarn +``` + +## Broken Tests + +If a test fails consistently it can be changed to use the `@test_broken` macro. This will denote +the test as `Broken` if the test continues to fail and alerts the user via an `Error` if the test +succeeds. + +```@docs +Test.@test_broken +``` + +`@test_skip` is also available to skip a test without evaluation, but counting the skipped test +in the test set reporting. The test will not run but gives a `Broken` `Result`. + +```@docs +Test.@test_skip +``` + +## Creating Custom `AbstractTestSet` Types + +Packages can create their own `AbstractTestSet` subtypes by implementing the `record` and `finish` +methods. The subtype should have a one-argument constructor taking a description string, with +any options passed in as keyword arguments. + +```@docs +Test.record +Test.finish +``` + +`Test` takes responsibility for maintaining a stack of nested testsets as they are executed, +but any result accumulation is the responsibility of the `AbstractTestSet` subtype. You can access +this stack with the `get_testset` and `get_testset_depth` methods. Note that these functions are +not exported. + +```@docs +Test.get_testset +Test.get_testset_depth +``` + +`Test` also makes sure that nested `@testset` invocations use the same `AbstractTestSet` +subtype as their parent unless it is set explicitly. It does not propagate any properties of the +testset. Option inheritance behavior can be implemented by packages using the stack infrastructure +that `Test` provides. + +Defining a basic `AbstractTestSet` subtype might look like: + +```julia +import Test: Test, record, finish +using Test: AbstractTestSet, Result, Pass, Fail, Error +using Test: get_testset_depth, get_testset +struct CustomTestSet <: Test.AbstractTestSet + description::AbstractString + foo::Int + results::Vector + # constructor takes a description string and options keyword arguments + CustomTestSet(desc; foo=1) = new(desc, foo, []) +end + +record(ts::CustomTestSet, child::AbstractTestSet) = push!(ts.results, child) +record(ts::CustomTestSet, res::Result) = push!(ts.results, res) +function finish(ts::CustomTestSet) + # just record if we're not the top-level parent + if get_testset_depth() > 0 + record(get_testset(), ts) + end + ts +end +``` + +And using that testset looks like: + +```julia +@testset CustomTestSet foo=4 "custom testset inner 2" begin + # this testset should inherit the type, but not the argument. + @testset "custom testset inner" begin + @test true + end +end +``` + +```@meta +DocTestSetup = nothing +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/UUIDs/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/UUIDs/docs/src/index.md new file mode 100644 index 0000000..1e6c950 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/UUIDs/docs/src/index.md @@ -0,0 +1,8 @@ +# UUIDs + +```@docs +UUIDs.uuid1 +UUIDs.uuid4 +UUIDs.uuid5 +UUIDs.uuid_version +``` diff --git a/external/julia-1.6.7/share/julia/stdlib/v1.6/Unicode/docs/src/index.md b/external/julia-1.6.7/share/julia/stdlib/v1.6/Unicode/docs/src/index.md new file mode 100644 index 0000000..aba9d80 --- /dev/null +++ b/external/julia-1.6.7/share/julia/stdlib/v1.6/Unicode/docs/src/index.md @@ -0,0 +1,7 @@ +# Unicode + +```@docs +Unicode.isassigned +Unicode.normalize +Unicode.graphemes +``` diff --git a/external/julia-1.6.7/share/julia/test/clangsa/lit.cfg.py b/external/julia-1.6.7/share/julia/test/clangsa/lit.cfg.py new file mode 100644 index 0000000..5790eab --- /dev/null +++ b/external/julia-1.6.7/share/julia/test/clangsa/lit.cfg.py @@ -0,0 +1,27 @@ +import os +import sys +import re +import platform + +import lit.util +import lit.formats + +config.name = 'Julia-GCChecker' +config.suffixes = ['.c','.cpp'] +config.test_source_root = os.path.dirname(__file__) +config.test_format = lit.formats.ShTest(True) +config.substitutions.append(('%shlibext', '.dylib' if platform.system() == 'Darwin' else '.dll' if + platform.system() == 'Windows' else '.so')) +config.substitutions.append(("%julia_home", os.path.join(os.path.dirname(__file__), "../.."))) + +path = os.path.pathsep.join((os.path.join(os.path.dirname(__file__),"../../usr/tools"), os.path.join(os.path.dirname(__file__),"../../usr/bin"), config.environment['PATH'])) +config.environment['PATH'] = path +config.environment['HOME'] = "/tmp" +config.environment['CLANGSA_FLAGS'] = os.environ.get('CLANGSA_FLAGS', "") +config.environment['CLANGSA_CXXFLAGS'] = os.environ.get('CLANGSA_CXXFLAGS', "") +config.environment['CPPFLAGS'] = os.environ.get('CPPFLAGS', "") +config.environment['CFLAGS'] = os.environ.get('CFLAGS', "") +config.environment['CXXFLAGS'] = os.environ.get('CXXFLAGS', "") + +if platform.machine() == "x86_64": + config.available_features.add('x86_64') diff --git a/external/julia-1.6.7/share/julia/test/llvmpasses/lit.cfg.py b/external/julia-1.6.7/share/julia/test/llvmpasses/lit.cfg.py new file mode 100644 index 0000000..2054876 --- /dev/null +++ b/external/julia-1.6.7/share/julia/test/llvmpasses/lit.cfg.py @@ -0,0 +1,21 @@ +import os +import sys +import re +import platform + +import lit.util +import lit.formats + +config.name = 'Julia' +config.suffixes = ['.ll','.jl'] +config.test_source_root = os.path.dirname(__file__) +config.test_format = lit.formats.ShTest(True) +config.substitutions.append(('%shlibext', '.dylib' if platform.system() == 'Darwin' else '.dll' if + platform.system() == 'Windows' else '.so')) + +path = os.path.pathsep.join((os.path.join(os.path.dirname(__file__),"../../usr/tools"), os.path.join(os.path.dirname(__file__),"../../usr/bin"), config.environment['PATH'])) +config.environment['PATH'] = path +config.environment['HOME'] = "/tmp" + +if platform.machine() == "x86_64": + config.available_features.add('x86_64') diff --git a/external/julia_depot/registries/General/.ci/instantiate.sh b/external/julia_depot/registries/General/.ci/instantiate.sh new file mode 100755 index 0000000..ce636ec --- /dev/null +++ b/external/julia_depot/registries/General/.ci/instantiate.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +# This script runs `Pkg.instantiate()` inside a bounded retry loop. + +dir="${1:-}" + +if [ -z "$dir" ]; then + echo "Usage: $0 " >&2 + exit 2 +fi +if [ ! -d "$dir" ]; then + echo "Error: '$dir' is not a directory." >&2 + exit 2 +fi + +try_count=0 +while [ "$try_count" -lt 9 ]; do + julia --color=yes --project="$dir" -e 'import Pkg; Pkg.instantiate()' + if [ $? -eq 0 ]; then exit 0; fi + try_count=$((try_count + 1)) + sleep 20 +done + +exit 1 diff --git a/external/julia_depot/registries/General/CONTRIBUTING.md b/external/julia_depot/registries/General/CONTRIBUTING.md new file mode 100644 index 0000000..62e00fe --- /dev/null +++ b/external/julia_depot/registries/General/CONTRIBUTING.md @@ -0,0 +1,185 @@ +# Contribution guidelines + +Anyone can help improve the General registry! Here are a few ways. + +## As a package author + +You can register your package! +See [Registering a package in General](https://github.com/JuliaRegistries/General#registering-a-package-in-general) in the README for how to do that. +The "FAQ" section in the README helps answer many more questions, like [do I need to register a package to install it?](https://github.com/JuliaRegistries/General#do-i-need-to-register-a-package-to-install-it), [should I register my package?](https://github.com/JuliaRegistries/General#should-i-register-my-package), and more. + +* Please be aware of the [package naming guidelines][naming-guidelines] +* We strongly encourage authors to follow best practices like having documentation (or a descriptive README), tests, and continuous integration. + +## As a Julia community member + +You (yes, you!) can help General be the best registry it can be. + +### New package registrations + +The first step to getting involved with General is to check out new package registrations. +They are filed under the ["new package" label](https://github.com/JuliaRegistries/General/pulls?q=is%3Apr+is%3Aopen+label%3A%22new+package%22), and a automatic feed posts them in the `#new-packages-feed` channel in the [community Slack](https://julialang.org/slack/) or [Zulip](https://julialang.zulipchat.com/register/). + +When registration is triggered for a new package (or new version of a package), [RegistryCI.jl](RegistryCI)-powered AutoMerge automatically runs and performs [some basic checks](https://juliaregistries.github.io/RegistryCI.jl/stable/guidelines/). +These are merely guidelines, and not all checks must pass --- if a check fails, the registration can still be manually merged. +There are a few ways to help here: + +1. First, whenever you are engaging with a package author, remember to always be polite and kind-- if you are feeling frustrated, it may be better to not comment at all and let someone else respond instead. +Not everyone understands things from the same explanations, and some folks may need to translate or overcome other barriers to understanding, or may simply disagree with you. +When we are helping maintain the General registry, we are acting as representatives of the Julia community, and need to be mindful of that. +2. If an AutoMerge guideline fails and the package author does not seem to know how to address it, you can help guide them through the process. +Pointing them to the [FAQ](FAQ) can help, as can updating the FAQ and other guidance to make the process more clear. +Sometimes folks also just need a bit of help to understand the process, and writing a note can help. +3. If an AutoMerge fails but you think the package should be manually merged, comment in the PR to explain why. + * One common issue here is the name similarity check. + This exists to prevent malicious [typosquatting](https://en.wikipedia.org/wiki/Typosquatting). + For example, [Flux](https://github.com/FluxML/Flux.jl) is a popular machine learning package. + A malicious actor could try to register FIux (with an uppercase-eye instead of a lowercase-ell), and encourage users to install it by writing a tutorial or such. + They could then add malicious code to the package to try to steal secrets. + Such an event would be an extreme security violation and the package would be yanked or removed from the registry as soon as possible-- but we try to be a bit safer by proactively screening names to require manual merging if they are "too similar" to an existing package name. + + If a package fails the name similarity check, you can help out by taking a look at the two names as well as the package code itself, and try to make a determination if it looks "too close" (e.g. Websockets vs WebSocket), and if the package code contains anything that would indicate malicious activity. + You can make a comment in the PR indicating whether or not you think the name similarity is okay. Include `[noblock]` in the comment if you don't want to block AutoMerge. + If you have [triage](permissions)-level access or higher to General, you can additionally override automerge by adding the label _Override AutoMerge: name similarity is okay_. + +4. Regardless of AutoMerge's status, if you think perhaps something more should be done before registration, feel free to leave a comment in the PR explaining what you think should be done first. +Any comment without `[noblock]` included in it will block AutoMerge from automatically merging the pull request (editing `[noblock]` into old comments **will** allow it to resume). + * For example, occasionally someone will register a package without any content in order to reserve the package name, with the intent to add content later. + We don't allow that in General, and ask authors to add content first before registering. + * Sometimes authors register packages without any description of what the package is for in the README or without documentation. + Since registration is a mechanism to share code with the whole Julia community, such a description is important for the package to be useful. + While we don't strictly require such documentation, it can help to give a polite and gentle nudge in the PR comments, or show folks how to write documentation and/or what is helpful to include in a README. + We want to encourage best practices (in an inclusive and friendly way!) even when they are not strict requirements. + * Sometimes package names are possibly confusing or don't conform to our [naming guidelines][naming-guidelines], but AutoMerge does not detect this. + Feel free to comment, describing what you think is confusing or non-compliant about the current name, and any suggestions you have for a more clear name. + +### Other PRs to General + +Sometimes, the registry needs to be updated in other ways that involve manual pull requests (PRs) rather than auto-generated ones. +The most common reason is to update the URL for a repository. + +#### Updating the URL for a repository + +If someone transfers a GitHub repository, [we ask](https://github.com/JuliaRegistries/General#how-do-i-transfer-a-package-to-an-organization-or-another-user) that they update the URL stored in General. +This is done by manually making a PR to General to update the URL. +You can review such a PR by checking that the old URL redirects to the new one. +* If it does, that's a clear sign that the change is legitimate and the new URL is correct. + If you have write permissions to General, you can merge the PR; otherwise you can approve it or comment. +* If it does not, you can ask the author why. This should be handled on a case-by-case basis. Be sure to check that: + 1. The package is not being hijacked; check for example that the person making the PR has registered a version of the package before, indicating they are authorized to do so. + 2. All the registered revisions of the package are accessible in the new repository. + Specifically, this means checking that all the git-tree-shas can be found in the new repository. + See [the appendix](#appendix-checking-if-a-repository-contains-all-registered-versions-of-a-package) below for a script to automate this checking. + +### Other ways to help + +Besides helping out with PRs to General, you can... + +* ...improve [General's README](https://github.com/JuliaRegistries/General#general), the [RegistryCI documentation](https://juliaregistries.github.io/RegistryCI.jl/stable/guidelines/), or these guidelines! +* ...add new checks to AutoMerge (in [RegistryCI](RegistryCI)) or improve existing ones. +* ...address open issues in [General](https://github.com/JuliaRegistries/General/issues), [RegistryCI.jl](https://github.com/JuliaRegistries/RegistryCI.jl/issues), or [Registrator.jl](https://github.com/JuliaRegistries/Registrator.jl/issues). +* ...write blog posts and documentation to help folks get started with writing documentation, tests, and setting up CI for their own packages, and find appropriate places to link to it and help out new package authors. + +Additionally, if you have elevated [permissions](permissions) to General, there's a few more things you can do: + +* [triage] You can add or remove labels to PRs to help communicate the status and to automatically override AutoMerge for name similairty failures + * Specifically, adding the label `Override AutoMerge: name similarity is okay` will retrigger AutoMerge and cause it to ignore the distance check between the package name and existing package names. This will not override any other guidelines (e.g. name too short). +* [triage] You can close PRs if the package author requests it or the registration is superseded by another registration request. +* [write] You can merge PRs that have the _needs to be manually merged in 3 days_ label once the requisite waiting period has passed, assuming there are no outstanding objections in the PR comments. +* [write] You can choose to facilitate expedited merge requests, after manually reviewing the package. +You generally should not merge your own registrations or those you are involved with (though you can make requests to another maintainer). See also [this FAQ entry](https://github.com/JuliaRegistries/General/#who-can-approve-an-early-merge). +* [write] You can merge improvements to the README, these guidelines, or our workflows. +* [admin] You can give other contributors triage-level access so they can apply labels to PRs, or write-level permissions to merge PRs. + +## Expectations and tips for registry maintainers + +We ask registry maintainers to refrain from merging their own PRs or PRs that they are closely involved in. + +### Is there a checklist for reviewing PRs that change the URL of a repo? + +Here's a checklist. + +```md +1. [ ] I have confirmed that the old URL automatically redirects to the new URL in the web browser. +2. [ ] The PR preserves the trailing `.git` at the end of the URL. +3. [ ] The Treecheck CI job ran on this PR and is green. +``` + +## Appendix: Checking if a repository contains all registered versions of a package + +When someone wishes to move a package from one repo to another, it is important that the new repo contains all of the tree hashes corresponding to registered versions of a package. That way these old versions of the package can continue to be installed from the new repository. In order to check if a given repository contains all of the registered versions of a package, the following script can be used: + +```julia +using RegistryInstances, UUIDs, Git + +const GENERAL_UUID = UUID("23338594-aafe-5451-b93e-139f81909106") + +pretty_print_row(row) = println(row.pkg_name, ": v", row.version, " ", row.found ? "found" : "is missing") +pretty_print_table(table) = foreach(pretty_print_row, table) + +function check_all_found(table) + idx = findfirst(row -> !row.found, table) + idx === nothing && return nothing + row = table[idx] + error(string("Repository missing v", row.version, " of package $(row.pkg_name)")) +end + +function check_packages_versions(pkg_names, repo_url; registry_uuid=GENERAL_UUID, verbose=true, throw=true) + if isdir(repo_url) + dir = repo_url + else + dir = mktempdir() + run(`$(git()) clone $(repo_url) $dir`) + end + + registry = only(filter!(r -> r.uuid == registry_uuid, reachable_registries())) + + table = @NamedTuple{pkg_name::String, version::VersionNumber, found::Bool, tree_sha::Base.SHA1}[] + + for pkg_name in pkg_names + pkg = registry.pkgs[only(uuids_from_name(registry, pkg_name))] + versions = registry_info(pkg).version_info + for version in sort(collect(keys(versions))) + tree_sha = versions[version].git_tree_sha1 + found = success(`$(git()) -C $dir rev-parse -q --verify "$(tree_sha)^{tree}"`) + + push!(table, (; pkg_name, version, found, tree_sha)) + end + end + verbose && pretty_print_table(table) + throw && check_all_found(table) + return table +end + +check_package_versions(pkg_name, repo_url; kw...) = check_packages_versions([pkg_name], repo_url; kw...) +``` + +For example, in [General#75319](https://github.com/JuliaRegistries/General/pull/75319), a package author wanted to update the URL associated +to their package "FastParzenWindows". At the time, the package had 1 registered version. We can check that it is present in the new repository via: + +```julia +julia> check_package_versions("FastParzenWindows", "https://github.com/ngiann/FastParzenWindows.jl.git"); +Cloning into '/var/folders/jb/plyyfc_d2bz195_0rc0n_zcw0000gp/T/jl_ke9E8C'... +...text omitted... +FastParzenWindows: v0.1.2 found +``` + +We see that this version was found in the new repository. This script was based on [this comment from General#35965](https://github.com/JuliaRegistries/General/pull/35965#issuecomment-832721704), +which involved checking if 4 packages in the same repository all had their versions present in the new repository. That example can be handled as follows: + +```julia +pkg_names = ["ReinforcementLearningBase", "ReinforcementLearningCore", + "ReinforcementLearningEnvironments", "ReinforcementLearningZoo"] +check_packages_versions(pkg_names, "https://github.com/JuliaReinforcementLearning/ReinforcementLearning.jl.git") +``` + +You can also use this script against local repositories when modifying Git commit history: + +```julia +check_package_versions("FastParzenWindows", ".") +``` + +[FAQ]: https://github.com/JuliaRegistries/General#faq] +[naming-guidelines]: ./NAMING_GUIDELINES.md +[permissions]: https://docs.github.com/en/organizations/managing-access-to-your-organizations-repositories/repository-permission-levels-for-an-organization#permission-levels-for-repositories-owned-by-an-organization +[RegistryCI]: https://github.com/JuliaRegistries/RegistryCI.jl/ diff --git a/flossing_suite/README.md b/flossing_suite/README.md new file mode 100644 index 0000000..6081c97 --- /dev/null +++ b/flossing_suite/README.md @@ -0,0 +1,89 @@ +# Flossing Suite + +This directory is a reproducible wrapper around the existing flossing code. +It separates three questions: + +1. Does the Engelken algorithm itself reproduce on a vanilla RNN toy task? +2. Does a faithful pre/interfloss analogue help TRM/HRM when the flossing phase is separate from task loss? +3. Do our one-sided variants (`top1_cf`, `spectrum_cf`, `volume_cf`) behave differently from Engelken's two-sided L2 target? + +## Important Algorithmic Distinctions + +- `engelken_python_flossing.py` is the toy RNN faithful port. It keeps the paper-style separate flossing phase, no task/floss mixed objective, flosses only input/recurrent/bias parameters, uses differentiable QR, and optimizes `mean((lambda_i - lambda_star)^2)`. +- `step7_interfloss.py` is the HRM/TRM analogue. It also uses separate floss-only episodes. Ordinary training steps use only supervised ACT loss. +- `step7_interfloss.py --floss-mode engelken_l2` is the Rainer-style two-sided target. +- `top1_cf`, `spectrum_cf`, and `volume_cf` are our one-sided contractive variants, not the paper method. +- KL preservation is optional and only applies during floss-only episodes. + +## Current Known Sanity Check + +Existing toy RNN result: + +- Baseline no floss: final eval accuracy about `0.777`. +- Prefloss: final eval accuracy about `0.997`. + +This means the Python port can reproduce a positive toy result. Negative HRM/TRM results should therefore be interpreted as model/task transfer issues, not simply "flossing code cannot work." + +## Recommended Workflow + +Run smoke tests: + +```bash +bash research/flossing/flossing_suite/smoke_test.sh 0 +``` + +Launch toy RNN paper-style suite: + +```bash +bash research/flossing/flossing_suite/launch_toy_official_suite.sh +``` + +Launch TRM faithful Rainer-style suite: + +```bash +GPU_BASE=0 GPU_PREFLOSS=1 GPU_INTER=3 bash research/flossing/flossing_suite/launch_trm_faithful_suite.sh +``` + +Launch TRM CF/volume variants: + +```bash +GPU_TOP1=0 GPU_VOLUME=1 GPU_KL=3 bash research/flossing/flossing_suite/launch_trm_variant_suite.sh +``` + +Summarize all available flossing logs: + +```bash +/home/yurenh2/miniconda3/envs/rrm/bin/python research/flossing/flossing_suite/summarize_flossing.py +``` + +Check active jobs: + +```bash +bash research/flossing/flossing_suite/status.sh +``` + +Wait for current TRM faithful jobs and refresh summary: + +```bash +bash research/flossing/flossing_suite/watch_and_summarize.sh +``` + +Outputs go to: + +- `results/toy_rnn/` +- `results/trm_faithful/` +- `results/trm_variants/` +- `results/smoke/` +- `results/summary/` + +## Existing Historical Results Included By Summarizer + +The summarizer also scans: + +- `research/flossing/engelken_python/*.json` +- `research/flossing/engelken_paper_faithful/*.json` +- `research/flossing/step6_*.json` +- `research/flossing/step7_*.json` +- `research/flossing/flossing_suite/results/**/*.json` + +This keeps old negative/positive evidence visible without rerunning everything. diff --git a/flossing_suite/archive_failed_oom_b8/trm_seed123_baseline_nofloss_b8_10000.cmd.sh b/flossing_suite/archive_failed_oom_b8/trm_seed123_baseline_nofloss_b8_10000.cmd.sh new file mode 100755 index 0000000..98411d8 --- /dev/null +++ b/flossing_suite/archive_failed_oom_b8/trm_seed123_baseline_nofloss_b8_10000.cmd.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "/home/yurenh2/rrm" +export CUDA_VISIBLE_DEVICES="0" +export PYTHONUNBUFFERED=1 +exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/step7_interfloss.py \ + --model trm \ + --ckpt-root "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \ + --ckpt-name __random__ \ + --init-seed 123 \ + --train-steps "10000" \ + --batch-size 8 \ + --train-lr 1e-4 \ + --floss-lr 1e-4 \ + --floss-mode engelken_l2 \ + --lambda-star 0 \ + --k-lyap 4 \ + --lyap-act-steps 4 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n "1000" \ + --eval-batch-size 64 \ + --floss-log-every 10 \ + --train-puzzle-emb \ + --puzzle-emb-lr 1e-4 \ + --puzzle-emb-weight-decay 1.0 \ + --kl-beta 0 \ + --floss-steps "0" \ + --interfloss-at "" \ + --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b8_10000.json" diff --git a/flossing_suite/archive_failed_oom_b8/trm_seed123_pre_inter_0_500_b8_k4_10000.cmd.sh b/flossing_suite/archive_failed_oom_b8/trm_seed123_pre_inter_0_500_b8_k4_10000.cmd.sh new file mode 100755 index 0000000..e3f25ab --- /dev/null +++ b/flossing_suite/archive_failed_oom_b8/trm_seed123_pre_inter_0_500_b8_k4_10000.cmd.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "/home/yurenh2/rrm" +export CUDA_VISIBLE_DEVICES="3" +export PYTHONUNBUFFERED=1 +exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/step7_interfloss.py \ + --model trm \ + --ckpt-root "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \ + --ckpt-name __random__ \ + --init-seed 123 \ + --train-steps "10000" \ + --batch-size 8 \ + --train-lr 1e-4 \ + --floss-lr 1e-4 \ + --floss-mode engelken_l2 \ + --lambda-star 0 \ + --k-lyap 4 \ + --lyap-act-steps 4 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n "1000" \ + --eval-batch-size 64 \ + --floss-log-every 10 \ + --train-puzzle-emb \ + --puzzle-emb-lr 1e-4 \ + --puzzle-emb-weight-decay 1.0 \ + --kl-beta 0 \ + --floss-steps "500" \ + --interfloss-at "0,500" \ + --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b8_k4_10000.json" diff --git a/flossing_suite/archive_failed_oom_b8/trm_seed123_prefloss_0_b8_k4_10000.cmd.sh b/flossing_suite/archive_failed_oom_b8/trm_seed123_prefloss_0_b8_k4_10000.cmd.sh new file mode 100755 index 0000000..86c445d --- /dev/null +++ b/flossing_suite/archive_failed_oom_b8/trm_seed123_prefloss_0_b8_k4_10000.cmd.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "/home/yurenh2/rrm" +export CUDA_VISIBLE_DEVICES="1" +export PYTHONUNBUFFERED=1 +exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/step7_interfloss.py \ + --model trm \ + --ckpt-root "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \ + --ckpt-name __random__ \ + --init-seed 123 \ + --train-steps "10000" \ + --batch-size 8 \ + --train-lr 1e-4 \ + --floss-lr 1e-4 \ + --floss-mode engelken_l2 \ + --lambda-star 0 \ + --k-lyap 4 \ + --lyap-act-steps 4 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n "1000" \ + --eval-batch-size 64 \ + --floss-log-every 10 \ + --train-puzzle-emb \ + --puzzle-emb-lr 1e-4 \ + --puzzle-emb-weight-decay 1.0 \ + --kl-beta 0 \ + --floss-steps "500" \ + --interfloss-at "0" \ + --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b8_k4_10000.json" diff --git a/flossing_suite/launch_toy_official_suite.sh b/flossing_suite/launch_toy_official_suite.sh new file mode 100755 index 0000000..43a2047 --- /dev/null +++ b/flossing_suite/launch_toy_official_suite.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="/home/yurenh2/rrm" +PY="/home/yurenh2/miniconda3/envs/rrm/bin/python" +OUT_DIR="${ROOT}/research/flossing/flossing_suite/results/toy_rnn" +mkdir -p "${OUT_DIR}" + +GPU_BASE="${GPU_BASE:-0}" +GPU_PREFLOSS="${GPU_PREFLOSS:-1}" +GPU_INTER="${GPU_INTER:-3}" + +write_and_launch() { + local gpu="$1" + local name="$2" + local pre_epochs="$3" + local max_inter="$4" + local cmd="${OUT_DIR}/${name}.cmd.sh" + local log="${OUT_DIR}/${name}.log" + local pid="${OUT_DIR}/${name}.pid" + + cat > "${cmd}" < "${log}" 2>&1 < /dev/null & + echo $! > "${pid}" + echo "${name}: pid $(cat "${pid}") on GPU ${gpu}" +} + +write_and_launch "${GPU_BASE}" "toy_baseline_no_floss_N80_k40_E1000" 0 0 +write_and_launch "${GPU_PREFLOSS}" "toy_prefloss_N80_k40_E1000" 100 0 +write_and_launch "${GPU_INTER}" "toy_pre_inter_N80_k40_E1000" 100 2 + +echo "queued toy RNN official-style suite in ${OUT_DIR}" diff --git a/flossing_suite/launch_trm_faithful_suite.sh b/flossing_suite/launch_trm_faithful_suite.sh new file mode 100755 index 0000000..2b3190c --- /dev/null +++ b/flossing_suite/launch_trm_faithful_suite.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="/home/yurenh2/rrm" +PY="/home/yurenh2/miniconda3/envs/rrm/bin/python" +OUT_DIR="${ROOT}/research/flossing/flossing_suite/results/trm_faithful" +CKPT_ROOT="${ROOT}/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" +mkdir -p "${OUT_DIR}" + +GPU_BASE="${GPU_BASE:-0}" +GPU_PREFLOSS="${GPU_PREFLOSS:-1}" +GPU_INTER="${GPU_INTER:-3}" +TRAIN_STEPS="${TRAIN_STEPS:-20000}" +PREFLOSS_STEPS="${PREFLOSS_STEPS:-500}" +INTERFLOSS_STEPS="${INTERFLOSS_STEPS:-100}" +INTERFLOSS_EVERY="${INTERFLOSS_EVERY:-2000}" +INTERFLOSS_START="${INTERFLOSS_START:-2000}" +INTERFLOSS_STOP="${INTERFLOSS_STOP:-10000}" +EVAL_N="${EVAL_N:-1000}" +TASK_BATCH_SIZE="${TASK_BATCH_SIZE:-32}" +FLOSS_BATCH_SIZE="${FLOSS_BATCH_SIZE:-4}" + +write_and_launch() { + local gpu="$1" + local name="$2" + local schedule="$3" + local floss_steps="$4" + local every="$5" + local start="$6" + local stop="$7" + local cmd="${OUT_DIR}/${name}.cmd.sh" + local log="${OUT_DIR}/${name}.log" + local pid="${OUT_DIR}/${name}.pid" + + cat > "${cmd}" < "${log}" 2>&1 < /dev/null & + echo $! > "${pid}" + echo "${name}: pid $(cat "${pid}") on GPU ${gpu}" +} + +write_and_launch "${GPU_BASE}" \ + "trm_seed123_baseline_nofloss_tb${TASK_BATCH_SIZE}_fb${FLOSS_BATCH_SIZE}_${TRAIN_STEPS}" \ + "" 0 0 0 -1 +write_and_launch "${GPU_PREFLOSS}" \ + "trm_seed123_prefloss_0_tb${TASK_BATCH_SIZE}_fb${FLOSS_BATCH_SIZE}_k4_${TRAIN_STEPS}" \ + "0" "${PREFLOSS_STEPS}" 0 0 -1 +write_and_launch "${GPU_INTER}" \ + "trm_seed123_pre_inter_periodic${INTERFLOSS_EVERY}_tb${TASK_BATCH_SIZE}_fb${FLOSS_BATCH_SIZE}_k4_${TRAIN_STEPS}" \ + "0" "${INTERFLOSS_STEPS}" "${INTERFLOSS_EVERY}" "${INTERFLOSS_START}" "${INTERFLOSS_STOP}" + +echo "queued TRM Engelken-faithful suite in ${OUT_DIR}" diff --git a/flossing_suite/launch_trm_variant_suite.sh b/flossing_suite/launch_trm_variant_suite.sh new file mode 100755 index 0000000..86c2e64 --- /dev/null +++ b/flossing_suite/launch_trm_variant_suite.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="/home/yurenh2/rrm" +PY="/home/yurenh2/miniconda3/envs/rrm/bin/python" +OUT_DIR="${ROOT}/research/flossing/flossing_suite/results/trm_variants" +CKPT_ROOT="${ROOT}/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" +mkdir -p "${OUT_DIR}" + +GPU_TOP1="${GPU_TOP1:-0}" +GPU_VOLUME="${GPU_VOLUME:-1}" +GPU_KL="${GPU_KL:-3}" +TRAIN_STEPS="${TRAIN_STEPS:-20000}" +FLOSS_STEPS="${FLOSS_STEPS:-100}" +INTERFLOSS_EVERY="${INTERFLOSS_EVERY:-2000}" +INTERFLOSS_START="${INTERFLOSS_START:-2000}" +INTERFLOSS_STOP="${INTERFLOSS_STOP:-10000}" +EVAL_N="${EVAL_N:-1000}" +TASK_BATCH_SIZE="${TASK_BATCH_SIZE:-32}" +FLOSS_BATCH_SIZE="${FLOSS_BATCH_SIZE:-4}" + +write_and_launch() { + local gpu="$1" + local name="$2" + local floss_mode="$3" + local kl_beta="$4" + local cmd="${OUT_DIR}/${name}.cmd.sh" + local log="${OUT_DIR}/${name}.log" + local pid="${OUT_DIR}/${name}.pid" + + cat > "${cmd}" < "${log}" 2>&1 < /dev/null & + echo $! > "${pid}" + echo "${name}: pid $(cat "${pid}") on GPU ${gpu}" +} + +write_and_launch "${GPU_TOP1}" \ + "trm_seed123_top1_cf_periodic${INTERFLOSS_EVERY}_tb${TASK_BATCH_SIZE}_fb${FLOSS_BATCH_SIZE}_k4_${TRAIN_STEPS}" \ + top1_cf 0 +write_and_launch "${GPU_VOLUME}" \ + "trm_seed123_volume_cf_periodic${INTERFLOSS_EVERY}_tb${TASK_BATCH_SIZE}_fb${FLOSS_BATCH_SIZE}_k4_${TRAIN_STEPS}" \ + volume_cf 0 +write_and_launch "${GPU_KL}" \ + "trm_seed123_volume_cf_kl10_periodic${INTERFLOSS_EVERY}_tb${TASK_BATCH_SIZE}_fb${FLOSS_BATCH_SIZE}_k4_${TRAIN_STEPS}" \ + volume_cf 10 + +echo "queued TRM CF/volume variant suite in ${OUT_DIR}" diff --git a/flossing_suite/results/summary/flossing_eval_curves.csv b/flossing_suite/results/summary/flossing_eval_curves.csv new file mode 100644 index 0000000..50aad05 --- /dev/null +++ b/flossing_suite/results/summary/flossing_eval_curves.csv @@ -0,0 +1,399 @@ +name,family,model,selector,x,acc,loss,kind +engelken_paper_faithful/smoke,rrm_step7,trm,engelken_l2@0_steps1,0,0.0,,initial +engelken_paper_faithful/smoke,rrm_step7,trm,engelken_l2@0_steps1,0,0.0,,after_floss +engelken_paper_faithful/smoke,rrm_step7,trm,engelken_l2@0_steps1,1,0.0,,task +engelken_paper_faithful/smoke,rrm_step7,trm,engelken_l2@0_steps1,1,0.0,,final +engelken_python/debug_n80_k40_3epoch,toy_rnn,vanilla_rnn,baseline_no_floss,1,0.5124555160142349,2.1269004344940186,eval +engelken_python/debug_n80_k40_3epoch,toy_rnn,vanilla_rnn,baseline_no_floss,2,0.5113434163701067,1.894472599029541,eval +engelken_python/debug_n80_k40_3epoch,toy_rnn,vanilla_rnn,baseline_no_floss,3,0.4853202846975089,1.9043740034103394,eval +engelken_python/gpu_smoke,toy_rnn,vanilla_rnn,prefloss,1,0.5151515151515151,0.8679313659667969,eval +engelken_python/gpu_smoke,toy_rnn,vanilla_rnn,prefloss,2,0.49242424242424243,0.9058582782745361,eval +engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,1,0.5063389679715302,2.151065170764923,eval +engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,100,0.49888790035587194,0.6981811821460724,eval +engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,200,0.5025578291814947,0.7067497670650482,eval +engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,300,0.4968861209964413,0.7166876345872879,eval +engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,400,0.5023354092526691,0.702445313334465,eval +engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,500,0.49844306049822057,0.6947141587734222,eval +engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,600,0.49649688612099646,0.6974123269319534,eval +engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,700,0.49827624555160144,0.7033253163099289,eval +engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,800,0.5050600533807829,0.6959485709667206,eval +engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,900,0.5216303380782918,0.6901399791240692,eval +engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,baseline_no_floss,1000,0.7770796263345195,0.47778721898794174,eval +engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,1,0.5019461743772242,1.6856758892536163,eval +engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,100,0.9623554270462634,0.11776464246213436,eval +engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,200,0.9935498220640568,0.027145093772560358,eval +engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,300,0.9963300711743772,0.014187443535774946,eval +engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,400,0.9975533807829181,0.009522247593849897,eval +engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,500,0.9973865658362989,0.009814425837248564,eval +engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,600,0.9983874555160143,0.006339075975120068,eval +engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,700,0.9981094306049823,0.008150239707902074,eval +engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,800,0.9981650355871887,0.006329314899630845,eval +engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,900,0.9981650355871886,0.0068275314988568425,eval +engelken_python/official_py_prefloss_N80_k40_E1000_setsid,toy_rnn,vanilla_rnn,prefloss,1000,0.9972753558718862,0.01047352165915072,eval +engelken_python/smoke,toy_rnn,vanilla_rnn,prefloss,1,0.49242424242424243,0.9093148708343506,eval +engelken_python/smoke,toy_rnn,vanilla_rnn,prefloss,2,0.5151515151515151,0.9205472469329834,eval +engelken_python/smoke,toy_rnn,vanilla_rnn,prefloss,3,0.5303030303030303,0.8646541237831116,eval +flossing_suite/results/smoke/toy_smoke,toy_rnn,vanilla_rnn,prefloss,1,0.5476190476190477,0.8243135213851929,eval +flossing_suite/results/smoke/toy_smoke,toy_rnn,vanilla_rnn,prefloss,2,0.5238095238095238,0.9111667275428772,eval +flossing_suite/results/smoke/toy_smoke,toy_rnn,vanilla_rnn,prefloss,3,0.6190476190476191,0.7443239688873291,eval +flossing_suite/results/smoke/trm_step7_decoupled_periodic_smoke,rrm_step7,trm,engelken_l2@0_steps1_every1,0,0.0,,initial +flossing_suite/results/smoke/trm_step7_decoupled_periodic_smoke,rrm_step7,trm,engelken_l2@0_steps1_every1,1,0.0,,task +flossing_suite/results/smoke/trm_step7_decoupled_periodic_smoke,rrm_step7,trm,engelken_l2@0_steps1_every1,2,0.0,,task +flossing_suite/results/smoke/trm_step7_decoupled_periodic_smoke,rrm_step7,trm,engelken_l2@0_steps1_every1,2,0.0,,final +flossing_suite/results/smoke/trm_step7_smoke,rrm_step7,trm,engelken_l2@0_steps1,0,0.0,,initial +flossing_suite/results/smoke/trm_step7_smoke,rrm_step7,trm,engelken_l2@0_steps1,1,0.0,,task +flossing_suite/results/smoke/trm_step7_smoke,rrm_step7,trm,engelken_l2@0_steps1,1,0.0,,final +flossing_suite/results/smoke/trm_task_batch32_smoke,rrm_step7,trm,baseline_no_floss,0,0.0,,initial +flossing_suite/results/smoke/trm_task_batch32_smoke,rrm_step7,trm,baseline_no_floss,1,0.0,,task +flossing_suite/results/smoke/trm_task_batch32_smoke,rrm_step7,trm,baseline_no_floss,1,0.0,,final +flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,1,0.5063389679715302,2.151065170764923,eval +flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,100,0.49888790035587194,0.6981811821460724,eval +flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,200,0.5025578291814947,0.7067497670650482,eval +flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,300,0.4968861209964413,0.7166876345872879,eval +flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,400,0.5023354092526691,0.702445313334465,eval +flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,500,0.49844306049822057,0.6947141587734222,eval +flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,600,0.49649688612099646,0.6974123269319534,eval +flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,700,0.49827624555160144,0.7033253163099289,eval +flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,800,0.5050600533807829,0.6959485709667206,eval +flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,900,0.5216303380782918,0.6901399791240692,eval +flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,toy_rnn,vanilla_rnn,baseline_no_floss,1000,0.7770796263345195,0.47778721898794174,eval +flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,1,0.5019461743772242,1.6856758892536163,eval +flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,100,0.564279359430605,1.2137768864631653,eval +flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,200,0.5421485765124555,1.4198356568813324,eval +flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,300,0.9971085409252668,0.013412912143394351,eval +flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,400,0.9976089857651246,0.009532386669889092,eval +flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,500,0.9974977758007118,0.00779245572630316,eval +flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,600,0.9976645907473309,0.007897141389548779,eval +flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,700,0.9977201957295374,0.00889350671786815,eval +flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,800,0.9978314056939501,0.006675782264210284,eval +flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,900,0.9979982206405694,0.00699911720585078,eval +flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,toy_rnn,vanilla_rnn,pre_interfloss,1000,0.9978314056939501,0.008064003428444266,eval +flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,1,0.5019461743772242,1.6856758892536163,eval +flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,100,0.9623554270462634,0.11776464246213436,eval +flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,200,0.9935498220640568,0.027145093772560358,eval +flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,300,0.9963300711743772,0.014187443535774946,eval +flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,400,0.9975533807829181,0.009522247593849897,eval +flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,500,0.9973865658362989,0.009814425837248564,eval +flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,600,0.9983874555160143,0.006339075975120068,eval +flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,700,0.9981094306049823,0.008150239707902074,eval +flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,800,0.9981650355871887,0.006329314899630845,eval +flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,900,0.9981650355871886,0.0068275314988568425,eval +flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,toy_rnn,vanilla_rnn,prefloss,1000,0.9972753558718862,0.01047352165915072,eval +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000,rrm_step7,trm,baseline_no_floss,0,0.0,,initial +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000,rrm_step7,trm,baseline_no_floss,1000,0.0,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000,rrm_step7,trm,baseline_no_floss,2000,0.0,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000,rrm_step7,trm,baseline_no_floss,3000,0.0,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000,rrm_step7,trm,baseline_no_floss,4000,0.0,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000,rrm_step7,trm,baseline_no_floss,5000,0.001,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000,rrm_step7,trm,baseline_no_floss,6000,0.002,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,0,0.0,,initial +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,1000,0.0,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,2000,0.0,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,3000,0.022,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,4000,0.027,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,5000,0.028,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,6000,0.029,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,7000,0.066,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,8000,0.077,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,9000,0.179,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,10000,0.136,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,11000,0.164,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,12000,0.184,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,13000,0.176,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,14000,0.153,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,15000,0.194,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,16000,0.185,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,17000,0.127,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,18000,0.201,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,19000,0.144,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,20000,0.204,,task +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,rrm_step7,trm,baseline_no_floss,20000,0.204,,final +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b4_k4_10000,rrm_step7,trm,"engelken_l2@0,500_steps500",0,0.0,,initial +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b4_k4_10000,rrm_step7,trm,"engelken_l2@0,500_steps500",0,0.0,,after_floss +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,0,0.0,,initial +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,0,0.0,,after_floss +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,1000,0.0,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,2000,0.001,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,2000,0.0,,after_floss +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,3000,0.022,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,4000,0.027,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,4000,0.0,,after_floss +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,5000,0.027,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,6000,0.061,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,6000,0.0,,after_floss +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,7000,0.11,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,8000,0.066,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,8000,0.0,,after_floss +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,9000,0.097,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,10000,0.169,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,10000,0.0,,after_floss +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,11000,0.192,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,12000,0.128,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,13000,0.181,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,14000,0.179,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,15000,0.195,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,16000,0.179,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,17000,0.191,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,18000,0.167,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,19000,0.192,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,20000,0.207,,task +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps100_every2000,20000,0.207,,final +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000,rrm_step7,trm,engelken_l2@0_steps500,0,0.0,,initial +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000,rrm_step7,trm,engelken_l2@0_steps500,0,0.0,,after_floss +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000,rrm_step7,trm,engelken_l2@0_steps500,1000,0.0,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000,rrm_step7,trm,engelken_l2@0_steps500,2000,0.0,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000,rrm_step7,trm,engelken_l2@0_steps500,3000,0.0,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,0,0.0,,initial +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,0,0.0,,after_floss +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,1000,0.0,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,2000,0.0,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,3000,0.023,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,4000,0.026,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,5000,0.029,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,6000,0.067,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,7000,0.11,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,8000,0.125,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,9000,0.157,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,10000,0.153,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,11000,0.15,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,12000,0.184,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,13000,0.208,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,14000,0.126,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,15000,0.21,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,16000,0.166,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,17000,0.105,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,18000,0.184,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,19000,0.172,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,20000,0.21,,task +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,rrm_step7,trm,engelken_l2@0_steps500,20000,0.21,,final +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,100,0.3125,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,200,0.3359375,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,300,0.3203125,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,400,0.33203125,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,500,0.318359375,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,600,0.359375,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,700,0.33984375,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,800,0.361328125,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,900,0.373046875,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1000,0.359375,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1100,0.349609375,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1200,0.36328125,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1300,0.36328125,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1400,0.37890625,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1500,0.404296875,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1600,0.388671875,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1700,0.380859375,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1800,0.384765625,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,1900,0.380859375,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2000,0.400390625,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2100,0.4140625,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2200,0.376953125,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2300,0.400390625,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2400,0.392578125,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2500,0.3828125,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2600,0.3984375,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2700,0.384765625,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2800,0.423828125,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,2900,0.419921875,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,3000,0.392578125,,phase2 +step6_A_baseline_no_prefloss,hrm_step6,hrm,baseline_no_prefloss,3000,0.392578125,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,100,0.0,,phase1 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,200,0.0,,phase1 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,300,0.0,,phase1 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,400,0.0,,phase1 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,500,0.0,,phase1 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,500,0.0,,phase1 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,600,0.0,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,700,0.0,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,800,0.0,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,900,0.0,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1000,0.0,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1100,0.015625,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1200,0.076171875,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1300,0.125,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1400,0.1640625,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1500,0.16796875,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1600,0.2109375,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1700,0.2109375,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1800,0.212890625,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,1900,0.2265625,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2000,0.259765625,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2100,0.255859375,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2200,0.296875,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2300,0.2734375,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2400,0.2734375,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2500,0.2890625,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2600,0.318359375,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2700,0.326171875,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2800,0.291015625,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,2900,0.330078125,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,3000,0.32421875,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,3100,0.359375,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,3200,0.326171875,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,3300,0.357421875,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,3400,0.34375,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,3500,0.310546875,,phase2 +step6_B_engelken,hrm_step6,hrm,prefloss_engelken,3500,0.310546875,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,100,0.0,,phase1 +step6_C_cf,hrm_step6,hrm,prefloss_cf,200,0.0,,phase1 +step6_C_cf,hrm_step6,hrm,prefloss_cf,300,0.0,,phase1 +step6_C_cf,hrm_step6,hrm,prefloss_cf,400,0.0,,phase1 +step6_C_cf,hrm_step6,hrm,prefloss_cf,500,0.0,,phase1 +step6_C_cf,hrm_step6,hrm,prefloss_cf,500,0.0,,phase1 +step6_C_cf,hrm_step6,hrm,prefloss_cf,600,0.0,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,700,0.0,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,800,0.0,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,900,0.064453125,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,1000,0.119140625,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,1100,0.181640625,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,1200,0.23828125,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,1300,0.259765625,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,1400,0.291015625,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,1500,0.302734375,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,1600,0.298828125,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,1700,0.3046875,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,1800,0.30078125,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,1900,0.3125,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,2000,0.33203125,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,2100,0.31640625,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,2200,0.3359375,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,2300,0.341796875,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,2400,0.33203125,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,2500,0.353515625,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,2600,0.35546875,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,2700,0.361328125,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,2800,0.35546875,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,2900,0.3828125,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,3000,0.3828125,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,3100,0.361328125,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,3200,0.390625,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,3300,0.3984375,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,3400,0.359375,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,3500,0.380859375,,phase2 +step6_C_cf,hrm_step6,hrm,prefloss_cf,3500,0.380859375,,phase2 +step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",0,0.517578125,,initial +step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",0,0.0,,after_floss +step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",500,0.0,,after_floss +step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",1000,0.1796875,,task +step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",2000,0.583984375,,task +step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",3000,0.580078125,,task +step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",4000,0.58984375,,task +step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",5000,0.611328125,,task +step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",6000,0.634765625,,task +step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",7000,0.63671875,,task +step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",8000,0.662109375,,task +step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",9000,0.642578125,,task +step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",10000,0.646484375,,task +step7_A_hrm_engelken_interfloss_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500",10000,0.646484375,,final +step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",0,0.59765625,,initial +step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",0,0.0,,after_floss +step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",500,0.0,,after_floss +step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",1000,0.021484375,,task +step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",2000,0.041015625,,task +step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",3000,0.255859375,,task +step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",4000,0.275390625,,task +step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",5000,0.453125,,task +step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",6000,0.466796875,,task +step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",7000,0.494140625,,task +step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",8000,0.501953125,,task +step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",9000,0.521484375,,task +step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",10000,0.51953125,,task +step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500",10000,0.51953125,,final +step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",0,0.517578125,,initial +step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",0,0.017578125,,after_floss +step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",500,0.021484375,,after_floss +step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",1000,0.177734375,,task +step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",2000,0.482421875,,task +step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",3000,0.54296875,,task +step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",4000,0.5859375,,task +step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",5000,0.599609375,,task +step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",6000,0.595703125,,task +step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",7000,0.62109375,,task +step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",8000,0.61328125,,task +step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",9000,0.625,,task +step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",10000,0.62109375,,task +step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",10000,0.62109375,,final +step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",0,0.59765625,,initial +step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",0,0.025390625,,after_floss +step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",500,0.013671875,,after_floss +step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",1000,0.041015625,,task +step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",2000,0.322265625,,task +step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",3000,0.439453125,,task +step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",4000,0.515625,,task +step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",5000,0.498046875,,task +step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",6000,0.57421875,,task +step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",7000,0.556640625,,task +step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",8000,0.55859375,,task +step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",9000,0.5546875,,task +step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",10000,0.595703125,,task +step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",10000,0.595703125,,final +step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",0,0.517578125,,initial +step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",0,0.017578125,,after_floss +step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",500,0.017578125,,after_floss +step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",1000,0.193359375,,task +step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",2000,0.4453125,,task +step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",3000,0.564453125,,task +step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",4000,0.564453125,,task +step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",5000,0.583984375,,task +step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",6000,0.61328125,,task +step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",7000,0.63671875,,task +step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",8000,0.61328125,,task +step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",9000,0.615234375,,task +step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",10000,0.619140625,,task +step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",10000,0.619140625,,final +step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",0,0.59765625,,initial +step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",0,0.046875,,after_floss +step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",500,0.015625,,after_floss +step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",1000,0.060546875,,task +step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",2000,0.18359375,,task +step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",3000,0.408203125,,task +step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",4000,0.466796875,,task +step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",5000,0.47265625,,task +step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",6000,0.5546875,,task +step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",7000,0.541015625,,task +step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",8000,0.58203125,,task +step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",9000,0.55859375,,task +step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",10000,0.544921875,,task +step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",10000,0.544921875,,final +step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",0,0.517578125,,initial +step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",0,0.025390625,,after_floss +step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",500,0.015625,,after_floss +step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",1000,0.208984375,,task +step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",2000,0.478515625,,task +step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",3000,0.521484375,,task +step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",4000,0.556640625,,task +step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",5000,0.58203125,,task +step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",6000,0.6171875,,task +step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",7000,0.607421875,,task +step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",8000,0.62109375,,task +step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",9000,0.58984375,,task +step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",10000,0.607421875,,task +step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",10000,0.607421875,,final +step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",0,0.59765625,,initial +step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",0,0.078125,,after_floss +step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",500,0.01953125,,after_floss +step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",1000,0.099609375,,task +step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",2000,0.322265625,,task +step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",3000,0.41015625,,task +step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",4000,0.44921875,,task +step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",5000,0.498046875,,task +step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",6000,0.537109375,,task +step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",7000,0.5625,,task +step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",8000,0.548828125,,task +step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",9000,0.556640625,,task +step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",10000,0.55078125,,task +step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",10000,0.55078125,,final +step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",0,0.517578125,,initial +step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",0,0.0,,after_floss +step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",500,0.017578125,,after_floss +step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",1000,0.642578125,,task +step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",2000,0.63671875,,task +step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",3000,0.65625,,task +step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",4000,0.66015625,,task +step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",5000,0.64453125,,task +step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",6000,0.638671875,,task +step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",7000,0.677734375,,task +step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",8000,0.65234375,,task +step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",9000,0.658203125,,task +step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",10000,0.63671875,,task +step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",10000,0.63671875,,final diff --git a/flossing_suite/results/summary/flossing_runs_summary.csv b/flossing_suite/results/summary/flossing_runs_summary.csv new file mode 100644 index 0000000..3adb736 --- /dev/null +++ b/flossing_suite/results/summary/flossing_runs_summary.csv @@ -0,0 +1,32 @@ +name,path,family,model,selector,floss_mode,schedule,train_steps,floss_steps,k_lyap,kl_beta,initial_acc,final_acc,best_acc,delta_final,n_evals,n_floss_episodes,status,task_batch_size,floss_batch_size,effective_official_gbs768_steps +step6_A_baseline_no_prefloss,/home/yurenh2/rrm/research/flossing/step6_A_baseline_no_prefloss.json,hrm_step6,hrm,baseline_no_prefloss,volume_cf,pre=0,3000,0,8,0,0.3046875,0.392578125,0.423828125,0.087890625,31,0,complete,,, +step6_B_engelken,/home/yurenh2/rrm/research/flossing/step6_B_engelken.json,hrm_step6,hrm,prefloss_engelken,engelken,pre=500,3000,500,2,0,0.3046875,0.310546875,0.359375,0.005859375,37,1,complete,,, +step6_C_cf,/home/yurenh2/rrm/research/flossing/step6_C_cf.json,hrm_step6,hrm,prefloss_cf,cf,pre=500,3000,500,2,0,0.3046875,0.380859375,0.3984375,0.076171875,37,1,complete,,, +step7_A_hrm_engelken_interfloss_26040_k8_10k,/home/yurenh2/rrm/research/flossing/step7_A_hrm_engelken_interfloss_26040_k8_10k.json,rrm_step7,hrm,"engelken_l2@0,500_steps500",engelken_l2,"0,500",10000,500,8,0,0.517578125,0.646484375,0.662109375,0.12890625,14,2,complete,8,8,104.16666666666667 +step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k,/home/yurenh2/rrm/research/flossing/step7_C_hrm_engelken_interfloss_kl10_26040_k8_10k.json,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",engelken_l2,"0,500",10000,500,8,10.0,0.517578125,0.62109375,0.625,0.103515625,14,2,complete,8,8,104.16666666666667 +step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k,/home/yurenh2/rrm/research/flossing/step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k.json,rrm_step7,hrm,"engelken_l2@0,500_steps500_kl10",engelken_l2,"0,500",10000,500,8,10.0,0.517578125,0.619140625,0.63671875,0.1015625,14,2,complete,8,8,104.16666666666667 +step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k,/home/yurenh2/rrm/research/flossing/step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k.json,rrm_step7,hrm,"volume_cf@0,500_steps500_kl10",volume_cf,"0,500",10000,500,8,10.0,0.517578125,0.607421875,0.62109375,0.08984375,14,2,complete,8,8,104.16666666666667 +step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k,/home/yurenh2/rrm/research/flossing/step7_I_hrm_engelken_interfloss_kl100_short_26040_k8_10k.json,rrm_step7,hrm,"engelken_l2@0,500_steps100_kl100",engelken_l2,"0,500",10000,100,8,100.0,0.517578125,0.63671875,0.677734375,0.119140625,14,2,complete,8,8,104.16666666666667 +engelken_paper_faithful/smoke,/home/yurenh2/rrm/research/flossing/engelken_paper_faithful/smoke.json,rrm_step7,trm,engelken_l2@0_steps1,engelken_l2,0,1,1,1,0.0,0.0,0.0,0.0,0.0,4,1,complete,2,2,0.0026041666666666665 +flossing_suite/results/smoke/trm_step7_decoupled_periodic_smoke,/home/yurenh2/rrm/research/flossing/flossing_suite/results/smoke/trm_step7_decoupled_periodic_smoke.json,rrm_step7,trm,engelken_l2@0_steps1_every1,engelken_l2,"0; every=1, start=1, stop=1",2,1,1,0.0,0.0,0.0,0.0,0.0,4,2,complete,8,2,0.020833333333333332 +flossing_suite/results/smoke/trm_step7_smoke,/home/yurenh2/rrm/research/flossing/flossing_suite/results/smoke/trm_step7_smoke.json,rrm_step7,trm,engelken_l2@0_steps1,engelken_l2,0,1,1,1,0.0,0.0,0.0,0.0,0.0,3,1,complete,2,2,0.0026041666666666665 +flossing_suite/results/smoke/trm_task_batch32_smoke,/home/yurenh2/rrm/research/flossing/flossing_suite/results/smoke/trm_task_batch32_smoke.json,rrm_step7,trm,baseline_no_floss,engelken_l2,,1,0,4,0.0,0.0,0.0,0.0,0.0,3,0,complete,32,4,0.041666666666666664 +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000,/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000.json,rrm_step7,trm,baseline_no_floss,engelken_l2,,10000,0,4,0.0,0.0,0.002,0.002,0.002,7,0,running_or_incomplete,4,4,52.083333333333336 +flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000,/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000.json,rrm_step7,trm,baseline_no_floss,engelken_l2,,20000,0,4,0.0,0.0,0.204,0.204,0.204,22,0,complete,32,4,833.3333333333334 +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b4_k4_10000,/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b4_k4_10000.json,rrm_step7,trm,"engelken_l2@0,500_steps500",engelken_l2,"0,500",10000,500,4,0.0,0.0,0.0,0.0,0.0,2,1,running_or_incomplete,4,4,52.083333333333336 +flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000,/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000.json,rrm_step7,trm,engelken_l2@0_steps100_every2000,engelken_l2,"0; every=2000, start=2000, stop=10000",20000,100,4,0.0,0.0,0.207,0.207,0.207,28,6,complete,32,4,833.3333333333334 +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000,/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000.json,rrm_step7,trm,engelken_l2@0_steps500,engelken_l2,0,10000,500,4,0.0,0.0,0.0,0.0,0.0,5,1,running_or_incomplete,4,4,52.083333333333336 +flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000,/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000.json,rrm_step7,trm,engelken_l2@0_steps500,engelken_l2,0,20000,500,4,0.0,0.0,0.21,0.21,0.21,23,1,complete,32,4,833.3333333333334 +step7_B_trm_engelken_interfloss_26041_k4_batch4_10k,/home/yurenh2/rrm/research/flossing/step7_B_trm_engelken_interfloss_26041_k4_batch4_10k.json,rrm_step7,trm,"engelken_l2@0,500_steps500",engelken_l2,"0,500",10000,500,4,0,0.59765625,0.51953125,0.59765625,-0.078125,14,2,complete,4,4,52.083333333333336 +step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k,/home/yurenh2/rrm/research/flossing/step7_D_trm_engelken_interfloss_kl10_26041_k4_batch4_10k.json,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",engelken_l2,"0,500",10000,500,4,10.0,0.59765625,0.595703125,0.59765625,-0.001953125,14,2,complete,4,4,52.083333333333336 +step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k,/home/yurenh2/rrm/research/flossing/step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k.json,rrm_step7,trm,"engelken_l2@0,500_steps500_kl10",engelken_l2,"0,500",10000,500,4,10.0,0.59765625,0.544921875,0.59765625,-0.052734375,14,2,complete,4,4,52.083333333333336 +step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k,/home/yurenh2/rrm/research/flossing/step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k.json,rrm_step7,trm,"volume_cf@0,500_steps500_kl10",volume_cf,"0,500",10000,500,4,10.0,0.59765625,0.55078125,0.59765625,-0.046875,14,2,complete,4,4,52.083333333333336 +engelken_python/debug_n80_k40_3epoch,/home/yurenh2/rrm/research/flossing/engelken_python/debug_n80_k40_3epoch.json,toy_rnn,vanilla_rnn,baseline_no_floss,engelken_l2,"pre=0,inter_period=100,max_inter=0",3,0,40,0,0.5124555160142349,0.4853202846975089,0.5124555160142349,-0.027135231316726016,3,0,complete,,, +engelken_python/gpu_smoke,/home/yurenh2/rrm/research/flossing/engelken_python/gpu_smoke.json,toy_rnn,vanilla_rnn,prefloss,engelken_l2,"pre=1,inter_period=100,max_inter=0",2,1,4,0,0.5151515151515151,0.49242424242424243,0.5151515151515151,-0.022727272727272707,2,1,complete,,, +engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid,/home/yurenh2/rrm/research/flossing/engelken_python/official_py_baseline_no_floss_N80_k40_E1000_setsid.json,toy_rnn,vanilla_rnn,baseline_no_floss,engelken_l2,"pre=0,inter_period=100,max_inter=0",1000,0,40,0,0.5063389679715302,0.7770796263345195,0.7770796263345195,0.27074065836298933,11,0,complete,,, +engelken_python/official_py_prefloss_N80_k40_E1000_setsid,/home/yurenh2/rrm/research/flossing/engelken_python/official_py_prefloss_N80_k40_E1000_setsid.json,toy_rnn,vanilla_rnn,prefloss,engelken_l2,"pre=100,inter_period=100,max_inter=0",1000,100,40,0,0.5019461743772242,0.9972753558718862,0.9983874555160143,0.495329181494662,11,1,complete,,, +engelken_python/smoke,/home/yurenh2/rrm/research/flossing/engelken_python/smoke.json,toy_rnn,vanilla_rnn,prefloss,engelken_l2,"pre=2,inter_period=100,max_inter=0",3,2,4,0,0.49242424242424243,0.5303030303030303,0.5303030303030303,0.037878787878787845,3,1,complete,,, +flossing_suite/results/smoke/toy_smoke,/home/yurenh2/rrm/research/flossing/flossing_suite/results/smoke/toy_smoke.json,toy_rnn,vanilla_rnn,prefloss,engelken_l2,"pre=2,inter_period=100,max_inter=0",3,2,4,0,0.5476190476190477,0.6190476190476191,0.6190476190476191,0.0714285714285714,3,1,complete,,, +flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000,/home/yurenh2/rrm/research/flossing/flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000.json,toy_rnn,vanilla_rnn,baseline_no_floss,engelken_l2,"pre=0,inter_period=100,max_inter=0",1000,0,40,0,0.5063389679715302,0.7770796263345195,0.7770796263345195,0.27074065836298933,11,0,complete,,, +flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000,/home/yurenh2/rrm/research/flossing/flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000.json,toy_rnn,vanilla_rnn,pre_interfloss,engelken_l2,"pre=100,inter_period=100,max_inter=2",1000,300,40,0,0.5019461743772242,0.9978314056939501,0.9979982206405694,0.4958852313167259,11,3,complete,,, +flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000,/home/yurenh2/rrm/research/flossing/flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000.json,toy_rnn,vanilla_rnn,prefloss,engelken_l2,"pre=100,inter_period=100,max_inter=0",1000,100,40,0,0.5019461743772242,0.9972753558718862,0.9983874555160143,0.495329181494662,11,1,complete,,, diff --git a/flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000.cmd.sh b/flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000.cmd.sh new file mode 100755 index 0000000..d7d9526 --- /dev/null +++ b/flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000.cmd.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "/home/yurenh2/rrm" +export CUDA_VISIBLE_DEVICES="0" +export PYTHONUNBUFFERED=1 +exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/engelken_python_flossing.py \ + --hidden-size 80 \ + --n-lyap 40 \ + --train-epochs 1000 \ + --inter-period 100 \ + --inter-epochs 100 \ + --batch-size 16 \ + --input-dim 1 \ + --train-steps 300 \ + --lyap-steps 55 \ + --floss-input-steps 300 \ + --seed-ic 1 \ + --seed-input 1 \ + --seed-net 1 \ + --seed-ons 1 \ + --lr 0.001 \ + --beta1 0.9 \ + --beta2 0.999 \ + --init-type 1 \ + --recurrent-gain 1.0 \ + --recurrent-mean-gain 0.0 \ + --input-scale 1.0 \ + --delay 10 \ + --ws-std 1.0 \ + --ws-mean 0.0 \ + --wr-std 1.0 \ + --wr-mean 0.0 \ + --b-std 0.1 \ + --b-mean 0.0 \ + --task -1 \ + --lyap-target 0.0 \ + --eval-every 100 \ + --eval-batches 4 \ + --log-every-floss 25 \ + --device cuda \ + --pre-epochs "0" \ + --max-inter-episodes "0" \ + --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/toy_rnn/toy_baseline_no_floss_N80_k40_E1000.json" diff --git a/flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000.cmd.sh b/flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000.cmd.sh new file mode 100755 index 0000000..13383f9 --- /dev/null +++ b/flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000.cmd.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "/home/yurenh2/rrm" +export CUDA_VISIBLE_DEVICES="3" +export PYTHONUNBUFFERED=1 +exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/engelken_python_flossing.py \ + --hidden-size 80 \ + --n-lyap 40 \ + --train-epochs 1000 \ + --inter-period 100 \ + --inter-epochs 100 \ + --batch-size 16 \ + --input-dim 1 \ + --train-steps 300 \ + --lyap-steps 55 \ + --floss-input-steps 300 \ + --seed-ic 1 \ + --seed-input 1 \ + --seed-net 1 \ + --seed-ons 1 \ + --lr 0.001 \ + --beta1 0.9 \ + --beta2 0.999 \ + --init-type 1 \ + --recurrent-gain 1.0 \ + --recurrent-mean-gain 0.0 \ + --input-scale 1.0 \ + --delay 10 \ + --ws-std 1.0 \ + --ws-mean 0.0 \ + --wr-std 1.0 \ + --wr-mean 0.0 \ + --b-std 0.1 \ + --b-mean 0.0 \ + --task -1 \ + --lyap-target 0.0 \ + --eval-every 100 \ + --eval-batches 4 \ + --log-every-floss 25 \ + --device cuda \ + --pre-epochs "100" \ + --max-inter-episodes "2" \ + --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/toy_rnn/toy_pre_inter_N80_k40_E1000.json" diff --git a/flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000.cmd.sh b/flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000.cmd.sh new file mode 100755 index 0000000..db82a74 --- /dev/null +++ b/flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000.cmd.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "/home/yurenh2/rrm" +export CUDA_VISIBLE_DEVICES="1" +export PYTHONUNBUFFERED=1 +exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/engelken_python_flossing.py \ + --hidden-size 80 \ + --n-lyap 40 \ + --train-epochs 1000 \ + --inter-period 100 \ + --inter-epochs 100 \ + --batch-size 16 \ + --input-dim 1 \ + --train-steps 300 \ + --lyap-steps 55 \ + --floss-input-steps 300 \ + --seed-ic 1 \ + --seed-input 1 \ + --seed-net 1 \ + --seed-ons 1 \ + --lr 0.001 \ + --beta1 0.9 \ + --beta2 0.999 \ + --init-type 1 \ + --recurrent-gain 1.0 \ + --recurrent-mean-gain 0.0 \ + --input-scale 1.0 \ + --delay 10 \ + --ws-std 1.0 \ + --ws-mean 0.0 \ + --wr-std 1.0 \ + --wr-mean 0.0 \ + --b-std 0.1 \ + --b-mean 0.0 \ + --task -1 \ + --lyap-target 0.0 \ + --eval-every 100 \ + --eval-batches 4 \ + --log-every-floss 25 \ + --device cuda \ + --pre-epochs "100" \ + --max-inter-episodes "0" \ + --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/toy_rnn/toy_prefloss_N80_k40_E1000.json" diff --git a/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000.cmd.sh b/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000.cmd.sh new file mode 100755 index 0000000..db9568c --- /dev/null +++ b/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000.cmd.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "/home/yurenh2/rrm" +export CUDA_VISIBLE_DEVICES="0" +export PYTHONUNBUFFERED=1 +exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/step7_interfloss.py \ + --model trm \ + --ckpt-root "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \ + --ckpt-name __random__ \ + --init-seed 123 \ + --train-steps "10000" \ + --batch-size "4" \ + --train-lr 1e-4 \ + --floss-lr 1e-4 \ + --floss-mode engelken_l2 \ + --lambda-star 0 \ + --k-lyap 4 \ + --lyap-act-steps 4 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n "1000" \ + --eval-batch-size 64 \ + --floss-log-every 10 \ + --train-puzzle-emb \ + --puzzle-emb-lr 1e-4 \ + --puzzle-emb-weight-decay 1.0 \ + --kl-beta 0 \ + --floss-steps "0" \ + --interfloss-at "" \ + --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_b4_10000.json" diff --git a/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000.cmd.sh b/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000.cmd.sh new file mode 100755 index 0000000..52a9fb4 --- /dev/null +++ b/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000.cmd.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "/home/yurenh2/rrm" +export CUDA_VISIBLE_DEVICES="0" +export PYTHONUNBUFFERED=1 +exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/step7_interfloss.py \ + --model trm \ + --ckpt-root "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \ + --ckpt-name __random__ \ + --init-seed 123 \ + --train-steps "20000" \ + --batch-size "32" \ + --task-batch-size "32" \ + --floss-batch-size "4" \ + --train-lr 1e-4 \ + --floss-lr 1e-4 \ + --floss-mode engelken_l2 \ + --lambda-star 0 \ + --k-lyap 4 \ + --lyap-act-steps 4 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n "1000" \ + --eval-batch-size 64 \ + --floss-log-every 10 \ + --train-puzzle-emb \ + --puzzle-emb-lr 1e-4 \ + --puzzle-emb-weight-decay 1.0 \ + --kl-beta 0 \ + --floss-steps "0" \ + --interfloss-at "" \ + --interfloss-every "0" \ + --interfloss-start "0" \ + --interfloss-stop "-1" \ + --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_baseline_nofloss_tb32_fb4_20000.json" diff --git a/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b4_k4_10000.cmd.sh b/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b4_k4_10000.cmd.sh new file mode 100755 index 0000000..5c3ba64 --- /dev/null +++ b/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b4_k4_10000.cmd.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "/home/yurenh2/rrm" +export CUDA_VISIBLE_DEVICES="3" +export PYTHONUNBUFFERED=1 +exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/step7_interfloss.py \ + --model trm \ + --ckpt-root "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \ + --ckpt-name __random__ \ + --init-seed 123 \ + --train-steps "10000" \ + --batch-size "4" \ + --train-lr 1e-4 \ + --floss-lr 1e-4 \ + --floss-mode engelken_l2 \ + --lambda-star 0 \ + --k-lyap 4 \ + --lyap-act-steps 4 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n "1000" \ + --eval-batch-size 64 \ + --floss-log-every 10 \ + --train-puzzle-emb \ + --puzzle-emb-lr 1e-4 \ + --puzzle-emb-weight-decay 1.0 \ + --kl-beta 0 \ + --floss-steps "500" \ + --interfloss-at "0,500" \ + --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_0_500_b4_k4_10000.json" diff --git a/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000.cmd.sh b/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000.cmd.sh new file mode 100755 index 0000000..482c147 --- /dev/null +++ b/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000.cmd.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "/home/yurenh2/rrm" +export CUDA_VISIBLE_DEVICES="3" +export PYTHONUNBUFFERED=1 +exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/step7_interfloss.py \ + --model trm \ + --ckpt-root "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \ + --ckpt-name __random__ \ + --init-seed 123 \ + --train-steps "20000" \ + --batch-size "32" \ + --task-batch-size "32" \ + --floss-batch-size "4" \ + --train-lr 1e-4 \ + --floss-lr 1e-4 \ + --floss-mode engelken_l2 \ + --lambda-star 0 \ + --k-lyap 4 \ + --lyap-act-steps 4 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n "1000" \ + --eval-batch-size 64 \ + --floss-log-every 10 \ + --train-puzzle-emb \ + --puzzle-emb-lr 1e-4 \ + --puzzle-emb-weight-decay 1.0 \ + --kl-beta 0 \ + --floss-steps "100" \ + --interfloss-at "0" \ + --interfloss-every "2000" \ + --interfloss-start "2000" \ + --interfloss-stop "10000" \ + --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_pre_inter_periodic2000_tb32_fb4_k4_20000.json" diff --git a/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000.cmd.sh b/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000.cmd.sh new file mode 100755 index 0000000..7f364d6 --- /dev/null +++ b/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000.cmd.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "/home/yurenh2/rrm" +export CUDA_VISIBLE_DEVICES="1" +export PYTHONUNBUFFERED=1 +exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/step7_interfloss.py \ + --model trm \ + --ckpt-root "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \ + --ckpt-name __random__ \ + --init-seed 123 \ + --train-steps "10000" \ + --batch-size "4" \ + --train-lr 1e-4 \ + --floss-lr 1e-4 \ + --floss-mode engelken_l2 \ + --lambda-star 0 \ + --k-lyap 4 \ + --lyap-act-steps 4 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n "1000" \ + --eval-batch-size 64 \ + --floss-log-every 10 \ + --train-puzzle-emb \ + --puzzle-emb-lr 1e-4 \ + --puzzle-emb-weight-decay 1.0 \ + --kl-beta 0 \ + --floss-steps "500" \ + --interfloss-at "0" \ + --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_b4_k4_10000.json" diff --git a/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000.cmd.sh b/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000.cmd.sh new file mode 100755 index 0000000..0e903bd --- /dev/null +++ b/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000.cmd.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "/home/yurenh2/rrm" +export CUDA_VISIBLE_DEVICES="1" +export PYTHONUNBUFFERED=1 +exec "/home/yurenh2/miniconda3/envs/rrm/bin/python" research/flossing/step7_interfloss.py \ + --model trm \ + --ckpt-root "/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \ + --ckpt-name __random__ \ + --init-seed 123 \ + --train-steps "20000" \ + --batch-size "32" \ + --task-batch-size "32" \ + --floss-batch-size "4" \ + --train-lr 1e-4 \ + --floss-lr 1e-4 \ + --floss-mode engelken_l2 \ + --lambda-star 0 \ + --k-lyap 4 \ + --lyap-act-steps 4 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n "1000" \ + --eval-batch-size 64 \ + --floss-log-every 10 \ + --train-puzzle-emb \ + --puzzle-emb-lr 1e-4 \ + --puzzle-emb-weight-decay 1.0 \ + --kl-beta 0 \ + --floss-steps "500" \ + --interfloss-at "0" \ + --interfloss-every "0" \ + --interfloss-start "0" \ + --interfloss-stop "-1" \ + --out "/home/yurenh2/rrm/research/flossing/flossing_suite/results/trm_faithful/trm_seed123_prefloss_0_tb32_fb4_k4_20000.json" diff --git a/flossing_suite/smoke_test.sh b/flossing_suite/smoke_test.sh new file mode 100755 index 0000000..8b766b5 --- /dev/null +++ b/flossing_suite/smoke_test.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash +set -euo pipefail + +GPU="${1:-0}" +ROOT="/home/yurenh2/rrm" +PY="/home/yurenh2/miniconda3/envs/rrm/bin/python" +OUT_DIR="${ROOT}/research/flossing/flossing_suite/results/smoke" +mkdir -p "${OUT_DIR}" +cd "${ROOT}" + +echo "[smoke] toy RNN faithful port" +CUDA_VISIBLE_DEVICES="${GPU}" PYTHONUNBUFFERED=1 "${PY}" research/flossing/engelken_python_flossing.py \ + --hidden-size 16 \ + --n-lyap 4 \ + --train-epochs 3 \ + --pre-epochs 2 \ + --inter-epochs 0 \ + --max-inter-episodes 0 \ + --batch-size 4 \ + --train-steps 40 \ + --lyap-steps 30 \ + --floss-input-steps 40 \ + --eval-every 1 \ + --eval-batches 1 \ + --device cuda \ + --out "${OUT_DIR}/toy_smoke.json" \ + > "${OUT_DIR}/toy_smoke.log" 2>&1 + +echo "[smoke] TRM interfloss analogue" +CUDA_VISIBLE_DEVICES="${GPU}" PYTHONUNBUFFERED=1 "${PY}" research/flossing/step7_interfloss.py \ + --model trm \ + --ckpt-root "${ROOT}/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" \ + --ckpt-name __random__ \ + --init-seed 123 \ + --train-steps 1 \ + --batch-size 2 \ + --train-lr 1e-4 \ + --floss-lr 1e-4 \ + --floss-steps 1 \ + --interfloss-at 0 \ + --floss-mode engelken_l2 \ + --lambda-star 0 \ + --k-lyap 1 \ + --lyap-act-steps 1 \ + --seed 42 \ + --eval-every 1 \ + --eval-n 8 \ + --eval-batch-size 8 \ + --floss-log-every 1 \ + --no-eval-after-floss \ + --train-puzzle-emb \ + --puzzle-emb-lr 1e-4 \ + --puzzle-emb-weight-decay 1.0 \ + --out "${OUT_DIR}/trm_step7_smoke.json" \ + > "${OUT_DIR}/trm_step7_smoke.log" 2>&1 + +echo "[smoke] summarizer" +"${PY}" research/flossing/flossing_suite/summarize_flossing.py \ + > "${OUT_DIR}/summarize_smoke.log" 2>&1 + +echo "smoke OK: ${OUT_DIR}" diff --git a/flossing_suite/status.sh b/flossing_suite/status.sh new file mode 100755 index 0000000..39af2d6 --- /dev/null +++ b/flossing_suite/status.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="/home/yurenh2/rrm" +cd "${ROOT}" + +echo "== GPU ==" +nvidia-smi --query-gpu=index,memory.used,memory.total,utilization.gpu --format=csv,noheader,nounits + +echo +echo "== Active flossing processes ==" +ps -eo pid,ppid,stat,etime,cmd | rg 'engelken_python_flossing.py|step7_interfloss.py' || true + +echo +echo "== Latest logs ==" +for dir in \ + research/flossing/flossing_suite/results/toy_rnn \ + research/flossing/flossing_suite/results/trm_faithful \ + research/flossing/flossing_suite/results/trm_variants \ + research/flossing/flossing_suite/results/smoke +do + [[ -d "${dir}" ]] || continue + for f in "${dir}"/*.log; do + [[ -f "${f}" ]] || continue + echo + echo "-- ${f} --" + tail -n 8 "${f}" + done +done + +echo +echo "== Summary files ==" +ls -lh research/flossing/flossing_suite/results/summary 2>/dev/null || true diff --git a/flossing_suite/summarize_flossing.py b/flossing_suite/summarize_flossing.py new file mode 100644 index 0000000..e753b13 --- /dev/null +++ b/flossing_suite/summarize_flossing.py @@ -0,0 +1,311 @@ +from __future__ import annotations + +import csv +import json +from pathlib import Path +from typing import Any + +import matplotlib.pyplot as plt + + +ROOT = Path("/home/yurenh2/rrm") +FLOSS = ROOT / "research/flossing" +SUITE = FLOSS / "flossing_suite" +OUT = SUITE / "results/summary" + + +def load_json(path: Path) -> dict[str, Any] | None: + try: + return json.loads(path.read_text()) + except Exception as exc: + print(f"[skip] {path}: {exc}") + return None + + +def fnum(value: Any, default: float = float("nan")) -> float: + try: + if value is None: + return default + return float(value) + except Exception: + return default + + +def run_name(path: Path) -> str: + return path.relative_to(FLOSS).with_suffix("").as_posix() + + +def iter_json_paths() -> list[Path]: + patterns = [ + "engelken_python/*.json", + "engelken_paper_faithful/*.json", + "step6_*.json", + "step7_*.json", + "flossing_suite/results/**/*.json", + ] + paths: list[Path] = [] + for pattern in patterns: + paths.extend(FLOSS.glob(pattern)) + return sorted(set(paths)) + + +def classify(path: Path, data: dict[str, Any]) -> str: + if "config" in data: + return "toy_rnn" + if "phase1_steps" in data: + return "step6_prefloss_hrm" + if "args" in data and "floss_episodes" in data: + args = data.get("args", {}) + model = args.get("model", "unknown") + return f"step7_interfloss_{model}" + return "unknown" + + +def selector_from_args(args: dict[str, Any]) -> str: + schedule = str(args.get("interfloss_at", "")) + steps = int(fnum(args.get("floss_steps"), 0)) + mode = str(args.get("floss_mode", "none")) + kl = fnum(args.get("kl_beta"), 0.0) + every = int(fnum(args.get("interfloss_every"), 0)) + if not schedule or steps <= 0: + return "baseline_no_floss" + label = f"{mode}@{schedule}_steps{steps}" + if every > 0: + label += f"_every{every}" + if kl > 0: + label += f"_kl{kl:g}" + return label + + +def summarize_toy(path: Path, data: dict[str, Any]) -> tuple[dict[str, Any], list[dict[str, Any]]]: + cfg = data.get("config", {}) + evals = data.get("evals", []) + first = evals[0] if evals else {} + last = evals[-1] if evals else {} + peak = max((fnum(e.get("eval_accuracy")) for e in evals), default=float("nan")) + pre_epochs = int(fnum(cfg.get("pre_epochs"), 0)) + inter_epochs = int(fnum(cfg.get("inter_epochs"), 0)) + max_inter = int(fnum(cfg.get("max_inter_episodes"), 0)) + if pre_epochs <= 0 and max_inter <= 0: + selector = "baseline_no_floss" + elif pre_epochs > 0 and max_inter <= 0: + selector = "prefloss" + else: + selector = "pre_interfloss" + + summary = { + "name": run_name(path), + "path": str(path), + "family": "toy_rnn", + "model": "vanilla_rnn", + "selector": selector, + "floss_mode": "engelken_l2", + "schedule": f"pre={pre_epochs},inter_period={cfg.get('inter_period')},max_inter={max_inter}", + "train_steps": cfg.get("train_epochs"), + "floss_steps": pre_epochs + inter_epochs * max_inter, + "k_lyap": cfg.get("n_lyap"), + "kl_beta": 0, + "initial_acc": fnum(first.get("eval_accuracy")), + "final_acc": fnum(last.get("eval_accuracy")), + "best_acc": peak, + "delta_final": fnum(last.get("eval_accuracy")) - fnum(first.get("eval_accuracy")), + "n_evals": len(evals), + "n_floss_episodes": (1 if pre_epochs > 0 else 0) + max_inter, + "status": "complete" if evals else "unknown", + } + rows = [] + for e in evals: + rows.append( + { + "name": summary["name"], + "family": summary["family"], + "model": summary["model"], + "selector": summary["selector"], + "x": e.get("epoch"), + "acc": e.get("eval_accuracy"), + "loss": e.get("eval_loss"), + "kind": "eval", + } + ) + return summary, rows + + +def summarize_step7(path: Path, data: dict[str, Any]) -> tuple[dict[str, Any], list[dict[str, Any]]]: + args = data.get("args", {}) + evals = data.get("evals", []) + first = evals[0] if evals else {} + final_acc = fnum(data.get("final_acc"), fnum(evals[-1].get("acc") if evals else None)) + best = max((fnum(e.get("acc")) for e in evals), default=float("nan")) + model = str(args.get("model", "unknown")) + train_steps = int(fnum(args.get("train_steps"), 0)) + task_batch = int(fnum(args.get("task_batch_size", args.get("batch_size")), 0)) + floss_batch = int(fnum(args.get("floss_batch_size", args.get("batch_size")), 0)) + every = int(fnum(args.get("interfloss_every"), 0)) + start = int(fnum(args.get("interfloss_start"), 0)) + stop = int(fnum(args.get("interfloss_stop"), -1)) + schedule = str(args.get("interfloss_at", "")) + if every > 0: + schedule = f"{schedule}; every={every}, start={start}, stop={stop}" + summary = { + "name": run_name(path), + "path": str(path), + "family": "rrm_step7", + "model": model, + "selector": selector_from_args(args), + "floss_mode": args.get("floss_mode", "none"), + "schedule": schedule, + "train_steps": train_steps, + "task_batch_size": task_batch, + "floss_batch_size": floss_batch, + "effective_official_gbs768_steps": train_steps * task_batch / 768 if task_batch else "", + "floss_steps": args.get("floss_steps"), + "k_lyap": args.get("k_lyap"), + "kl_beta": args.get("kl_beta", 0), + "initial_acc": fnum(data.get("initial_acc"), fnum(first.get("acc"))), + "final_acc": final_acc, + "best_acc": best, + "delta_final": final_acc - fnum(data.get("initial_acc"), fnum(first.get("acc"))), + "n_evals": len(evals), + "n_floss_episodes": len(data.get("floss_episodes", [])), + "status": "complete" if "final_acc" in data else "running_or_incomplete", + } + rows = [] + for e in evals: + rows.append( + { + "name": summary["name"], + "family": summary["family"], + "model": model, + "selector": summary["selector"], + "x": e.get("train_step"), + "acc": e.get("acc"), + "loss": "", + "kind": e.get("kind", "eval"), + } + ) + return summary, rows + + +def summarize_step6(path: Path, data: dict[str, Any]) -> tuple[dict[str, Any], list[dict[str, Any]]]: + args = data.get("args", {}) + phase1 = data.get("phase1_evals", []) + phase2 = data.get("phase2_evals", []) + evals = [] + evals.extend({"kind": "phase1", **e} for e in phase1) + evals.extend({"kind": "phase2", **e} for e in phase2) + final_acc = fnum(data.get("final_acc"), fnum(phase2[-1].get("acc") if phase2 else None)) + best = max((fnum(e.get("acc")) for e in evals), default=float("nan")) + pre_steps = int(fnum(args.get("prefloss_steps"), 0)) + selector = "baseline_no_prefloss" if pre_steps <= 0 else f"prefloss_{args.get('floss_mode')}" + summary = { + "name": run_name(path), + "path": str(path), + "family": "hrm_step6", + "model": "hrm", + "selector": selector, + "floss_mode": args.get("floss_mode", "none"), + "schedule": f"pre={pre_steps}", + "train_steps": args.get("train_steps"), + "floss_steps": args.get("prefloss_steps"), + "k_lyap": args.get("k_lyap"), + "kl_beta": 0, + "initial_acc": fnum(data.get("initial_acc")), + "final_acc": final_acc, + "best_acc": best, + "delta_final": final_acc - fnum(data.get("initial_acc")), + "n_evals": len(evals), + "n_floss_episodes": 1 if pre_steps > 0 else 0, + "status": "complete" if "final_acc" in data else "running_or_incomplete", + } + rows = [] + for e in evals: + rows.append( + { + "name": summary["name"], + "family": summary["family"], + "model": "hrm", + "selector": summary["selector"], + "x": e.get("step"), + "acc": e.get("acc"), + "loss": "", + "kind": e.get("kind", "eval"), + } + ) + return summary, rows + + +def collect() -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: + summaries: list[dict[str, Any]] = [] + eval_rows: list[dict[str, Any]] = [] + for path in iter_json_paths(): + data = load_json(path) + if data is None: + continue + family = classify(path, data) + if family == "toy_rnn": + summary, rows = summarize_toy(path, data) + elif family.startswith("step7"): + summary, rows = summarize_step7(path, data) + elif family == "step6_prefloss_hrm": + summary, rows = summarize_step6(path, data) + else: + continue + summaries.append(summary) + eval_rows.extend(rows) + return summaries, eval_rows + + +def write_csv(path: Path, rows: list[dict[str, Any]]) -> None: + if not rows: + path.write_text("") + return + keys = list(rows[0].keys()) + for row in rows[1:]: + for key in row: + if key not in keys: + keys.append(key) + with path.open("w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=keys) + writer.writeheader() + writer.writerows(rows) + + +def plot_family(eval_rows: list[dict[str, Any]], family: str, out: Path, title: str) -> None: + rows = [r for r in eval_rows if r["family"] == family and r.get("x") not in ("", None)] + if not rows: + return + fig, ax = plt.subplots(figsize=(8.5, 4.6)) + names = sorted({r["name"] for r in rows}) + for name in names: + nr = [r for r in rows if r["name"] == name] + nr.sort(key=lambda r: fnum(r["x"])) + xs = [fnum(r["x"]) for r in nr] + ys = [fnum(r["acc"]) for r in nr] + label = name.split("/")[-1] + ax.plot(xs, ys, marker="o", linewidth=1.8, label=label) + ax.set_title(title) + ax.set_xlabel("epoch / train step") + ax.set_ylabel("eval exact accuracy") + ax.grid(alpha=0.22) + ax.legend(frameon=False, fontsize=7) + fig.tight_layout() + fig.savefig(out, dpi=180) + plt.close(fig) + + +def main() -> None: + OUT.mkdir(parents=True, exist_ok=True) + summaries, eval_rows = collect() + summaries.sort(key=lambda r: (str(r["family"]), str(r["model"]), str(r["name"]))) + write_csv(OUT / "flossing_runs_summary.csv", summaries) + write_csv(OUT / "flossing_eval_curves.csv", eval_rows) + plot_family(eval_rows, "toy_rnn", OUT / "toy_rnn_eval_curves.png", "Toy RNN Engelken-style flossing") + plot_family(eval_rows, "rrm_step7", OUT / "rrm_step7_eval_curves.png", "HRM/TRM interfloss analogues") + plot_family(eval_rows, "hrm_step6", OUT / "hrm_step6_eval_curves.png", "HRM prefloss experiments") + print(f"wrote {OUT / 'flossing_runs_summary.csv'}") + print(f"wrote {OUT / 'flossing_eval_curves.csv'}") + print(f"runs: {len(summaries)} eval points: {len(eval_rows)}") + + +if __name__ == "__main__": + main() diff --git a/flossing_suite/watch_and_summarize.sh b/flossing_suite/watch_and_summarize.sh new file mode 100755 index 0000000..a75c250 --- /dev/null +++ b/flossing_suite/watch_and_summarize.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="/home/yurenh2/rrm" +PY="/home/yurenh2/miniconda3/envs/rrm/bin/python" +cd "${ROOT}" + +if [[ "$#" -gt 0 ]]; then + pid_files=("$@") +else + pid_files=(research/flossing/flossing_suite/results/trm_faithful/trm_seed123_*.pid) +fi + +echo "watching ${#pid_files[@]} pid files" +for pf in "${pid_files[@]}"; do + [[ -f "${pf}" ]] || continue + pid="$(cat "${pf}")" + echo "watch ${pf}: pid ${pid}" + while kill -0 "${pid}" 2>/dev/null; do + sleep 60 + done + echo "done ${pf}: pid ${pid}" +done + +"${PY}" research/flossing/flossing_suite/summarize_flossing.py +bash research/flossing/flossing_suite/status.sh > research/flossing/flossing_suite/results/summary/final_status.txt +echo "watch complete" diff --git a/initial_perturb_robustness.py b/initial_perturb_robustness.py new file mode 100644 index 0000000..e652080 --- /dev/null +++ b/initial_perturb_robustness.py @@ -0,0 +1,286 @@ +"""Inference-time robustness to initial recurrent-state perturbations. + +This tests whether trajectory-perturbation training enlarged the correct +attractor basin. Unlike PTRM-style rollout noise, the perturbation is applied +once after resetting z_H/z_L, matching the training augmentation mechanism. +""" +from __future__ import annotations + +import argparse +import csv +import json +import math +import sys +from dataclasses import replace +from pathlib import Path +from typing import Any + +import numpy as np +import torch +import yaml + + +TRM_DIR = Path("/home/yurenh2/rrm/trm") +sys.path.insert(0, str(TRM_DIR)) + +from models.recursive_reasoning.trm import ( # noqa: E402 + TinyRecursiveReasoningModel_ACTV1, + TinyRecursiveReasoningModel_ACTV1InnerCarry, +) + + +IGNORE_LABEL_ID = -100 + + +def parse_float_list(text: str) -> list[float]: + return [float(x.strip()) for x in text.split(",") if x.strip()] + + +def load_model(ckpt_root: Path, ckpt_name: str, device: str): + cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text()) + data_path = Path(cfg.get("data_path") or cfg["data_paths"][0]) + train_meta = json.loads((data_path / "train" / "dataset.json").read_text()) + + arch_cfg = dict(cfg["arch"]) + arch_cfg.update( + batch_size=cfg["global_batch_size"], + seq_len=train_meta["seq_len"], + vocab_size=train_meta["vocab_size"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], + causal=False, + ) + + model = TinyRecursiveReasoningModel_ACTV1(arch_cfg) + state = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in state.items()} + missing, unexpected = model.load_state_dict(stripped, strict=False) + print(f"[load] {ckpt_root.name}/{ckpt_name} missing={len(missing)} unexpected={len(unexpected)}", flush=True) + if missing[:4]: + print(f"[load] sample missing: {missing[:4]}", flush=True) + if unexpected[:4]: + print(f"[load] sample unexpected: {unexpected[:4]}", flush=True) + model.to(device).eval() + return model, cfg, data_path + + +def load_test_samples(data_path: Path, n_samples: int, seed: int): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + puzzle_ids = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + + n = min(n_samples, len(inputs)) + idx = rng.choice(len(inputs), size=n, replace=False) + return { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(puzzle_ids[idx].astype(np.int32)), + "idx": idx, + } + + +def batch_slice(samples: dict[str, Any], start: int, end: int, device: str): + return { + k: v[start:end].to(device, non_blocking=True) + for k, v in samples.items() + if k in ("inputs", "labels", "puzzle_identifiers") + } + + +def repeat_batch(batch: dict[str, torch.Tensor], repeats: int): + if repeats == 1: + return batch + return {k: v.repeat_interleave(repeats, dim=0) for k, v in batch.items()} + + +def sample_unit_noise_like(tensor: torch.Tensor, generator: torch.Generator, distribution: str): + if distribution == "uniform": + noise = 2.0 * torch.rand(tensor.shape, device=tensor.device, dtype=torch.float32, generator=generator) - 1.0 + noise = noise * math.sqrt(3.0) + else: + noise = torch.randn(tensor.shape, device=tensor.device, dtype=torch.float32, generator=generator) + return noise.to(tensor.dtype) + + +def apply_initial_noise( + inner: TinyRecursiveReasoningModel_ACTV1InnerCarry, + sigma: float, + perturb: str, + generator: torch.Generator, + distribution: str, +): + if sigma <= 0: + return inner + z_h, z_l = inner.z_H, inner.z_L + if perturb in ("h", "both"): + z_h = z_h + sigma * sample_unit_noise_like(z_h, generator, distribution) + if perturb in ("l", "both"): + z_l = z_l + sigma * sample_unit_noise_like(z_l, generator, distribution) + return replace(inner, z_H=z_h, z_L=z_l) + + +def correctness(logits: torch.Tensor, labels: torch.Tensor): + preds = logits.argmax(dim=-1) + mask = labels != IGNORE_LABEL_ID + exact = torch.where(mask, preds == labels, True).all(dim=-1) + denom = mask.sum(-1).clamp_min(1) + token_acc = ((preds == labels) & mask).sum(-1).float() / denom.float() + return exact, token_acc + + +@torch.inference_mode() +def eval_sigma( + model, + batch: dict[str, torch.Tensor], + sigma: float, + rollouts: int, + perturb: str, + distribution: str, + generator: torch.Generator, +): + expanded = repeat_batch(batch, rollouts) + total = expanded["inputs"].shape[0] + with torch.device(expanded["inputs"].device): + carry = model.initial_carry(expanded) + reset = torch.ones(total, device=expanded["inputs"].device, dtype=torch.bool) + inner = model.inner.reset_carry(reset, carry.inner_carry) + inner = apply_initial_noise(inner, sigma, perturb, generator, distribution) + + logits = None + for _ in range(model.config.halt_max_steps): + inner, logits, _q = model.inner(inner, expanded) + + assert logits is not None + exact, token_acc = correctness(logits, expanded["labels"]) + base_bsz = batch["inputs"].shape[0] + return exact.view(base_bsz, rollouts), token_acc.view(base_bsz, rollouts) + + +def summarize_sigma(exact: torch.Tensor, token_acc: torch.Tensor) -> dict[str, float]: + correct_counts = exact.float().sum(dim=1) + rollouts = exact.shape[1] + return { + "mean_rollout_exact": exact.float().mean().item(), + "mean_rollout_token_acc": token_acc.mean().item(), + "pass_at_k": exact.any(dim=1).float().mean().item(), + "all_k": exact.all(dim=1).float().mean().item(), + "correct_count_mean": correct_counts.mean().item(), + "correct_count_std": correct_counts.std(unbiased=False).item(), + "correct_count_q10": torch.quantile(correct_counts, 0.10).item(), + "correct_count_q50": torch.quantile(correct_counts, 0.50).item(), + "correct_count_q90": torch.quantile(correct_counts, 0.90).item(), + "zero_frac": (correct_counts == 0).float().mean().item(), + "full_frac": (correct_counts == rollouts).float().mean().item(), + } + + +def write_summary(path: Path, rows: list[dict[str, Any]]) -> None: + keys = list(rows[0]) + for row in rows[1:]: + for key in row: + if key not in keys: + keys.append(key) + with path.open("w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=keys) + writer.writeheader() + writer.writerows(rows) + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--ckpt-root", required=True) + parser.add_argument("--ckpt-name", required=True) + parser.add_argument("--label", required=True) + parser.add_argument("--n-samples", type=int, default=2000) + parser.add_argument("--batch-size", type=int, default=32, + help="Number of original problems per batch; expanded batch is batch_size * rollouts.") + parser.add_argument("--rollouts", type=int, default=8) + parser.add_argument("--sigmas", default="0,3e-5,1e-4,3e-4,1e-3,3e-3,1e-2,3e-2") + parser.add_argument("--perturb", choices=["h", "l", "both"], default="both") + parser.add_argument("--noise-distribution", choices=["gaussian", "uniform"], default="gaussian") + parser.add_argument("--seed", type=int, default=20260605) + parser.add_argument("--out-prefix", required=True) + args = parser.parse_args() + + device = "cuda" + sigmas = parse_float_list(args.sigmas) + torch.manual_seed(args.seed) + generator = torch.Generator(device=device).manual_seed(args.seed + 101) + + model, cfg, data_path = load_model(Path(args.ckpt_root), args.ckpt_name, device) + samples = load_test_samples(data_path, args.n_samples, args.seed) + n = len(samples["inputs"]) + print( + f"[run] label={args.label} n={n} rollouts={args.rollouts} " + f"batch={args.batch_size} sigmas={sigmas}", + flush=True, + ) + + rows: list[dict[str, Any]] = [] + all_exact = [] + all_token = [] + for sigma in sigmas: + exact_parts = [] + token_parts = [] + for start in range(0, n, args.batch_size): + end = min(start + args.batch_size, n) + batch = batch_slice(samples, start, end, device) + exact, token_acc = eval_sigma( + model=model, + batch=batch, + sigma=sigma, + rollouts=args.rollouts, + perturb=args.perturb, + distribution=args.noise_distribution, + generator=generator, + ) + exact_parts.append(exact.cpu()) + token_parts.append(token_acc.cpu()) + if end == n or (end // args.batch_size) % 10 == 0: + print(f" sigma={sigma:g} [{end}/{n}]", flush=True) + exact_all = torch.cat(exact_parts, dim=0) + token_all = torch.cat(token_parts, dim=0) + row: dict[str, Any] = { + "label": args.label, + "sigma": sigma, + "n_samples": n, + "rollouts": args.rollouts, + "ckpt_root": str(Path(args.ckpt_root)), + "ckpt_name": args.ckpt_name, + "perturb": args.perturb, + "noise_distribution": args.noise_distribution, + **summarize_sigma(exact_all, token_all), + } + rows.append(row) + all_exact.append(exact_all.numpy()) + all_token.append(token_all.numpy()) + print( + f" sigma={sigma:g} mean={row['mean_rollout_exact']:.4f} " + f"pass@K={row['pass_at_k']:.4f} allK={row['all_k']:.4f}", + flush=True, + ) + + out_prefix = Path(args.out_prefix) + out_prefix.parent.mkdir(parents=True, exist_ok=True) + write_summary(out_prefix.with_suffix(".summary.csv"), rows) + meta = { + "args": vars(args), + "data_path": str(data_path), + "config_global_batch_size": cfg.get("global_batch_size"), + "sigmas": sigmas, + "n_samples": n, + } + out_prefix.with_suffix(".meta.json").write_text(json.dumps(meta, indent=2, sort_keys=True)) + np.savez_compressed( + out_prefix.with_suffix(".npz"), + idx=samples["idx"], + sigmas=np.asarray(sigmas, dtype=np.float32), + exact=np.stack(all_exact, axis=0), + token_acc=np.stack(all_token, axis=0), + meta_json=np.asarray(json.dumps(meta, sort_keys=True)), + ) + print(f"[done] {out_prefix}.summary.csv", flush=True) + + +if __name__ == "__main__": + main() diff --git a/initial_perturb_robustness/plots/final_gpu_status.txt b/initial_perturb_robustness/plots/final_gpu_status.txt new file mode 100644 index 0000000..29c0466 --- /dev/null +++ b/initial_perturb_robustness/plots/final_gpu_status.txt @@ -0,0 +1,4 @@ +0, 13, 49140, 0 +1, 13, 49140, 0 +2, 45494, 49140, 0 +3, 13, 49140, 0 diff --git a/initial_perturb_robustness/plots/initial_perturb_robustness_combined.csv b/initial_perturb_robustness/plots/initial_perturb_robustness_combined.csv new file mode 100644 index 0000000..12fdf18 --- /dev/null +++ b/initial_perturb_robustness/plots/initial_perturb_robustness_combined.csv @@ -0,0 +1,28 @@ +label,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,correct_count_q10,correct_count_q50,correct_count_q90,zero_frac,full_frac +trm_baseline_best,0.0,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8658000230789185,0.9502986073493958,0.8658000230789185,0.8658000230789185,6.926400184631348,2.7269365787506104,0.0,8.0,8.0,0.13420000672340393,0.8658000230789185 +trm_baseline_best,3e-05,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.868399977684021,0.9510218501091003,0.9455999732017517,0.769599974155426,6.947199821472168,2.309980869293213,3.0,8.0,8.0,0.0544000007212162,0.769599974155426 +trm_baseline_best,0.0001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8675500154495239,0.9506199955940247,0.9485999941825867,0.769599974155426,6.940400123596191,2.3122386932373047,2.0,8.0,8.0,0.05139999836683273,0.769599974155426 +trm_baseline_best,0.0003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8685500025749207,0.9512001872062683,0.9476000070571899,0.7698000073432922,6.948400020599365,2.310267925262451,3.0,8.0,8.0,0.052400000393390656,0.7698000073432922 +trm_baseline_best,0.001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8694999814033508,0.9514188766479492,0.9488000273704529,0.7712000012397766,6.955999851226807,2.289206027984619,3.0,8.0,8.0,0.05119999870657921,0.7712000012397766 +trm_baseline_best,0.003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8676750063896179,0.950739860534668,0.9448000192642212,0.7666000127792358,6.941400051116943,2.311615467071533,3.0,8.0,8.0,0.0551999993622303,0.7666000127792358 +trm_baseline_best,0.01,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669000267982483,0.9505268335342407,0.9452000260353088,0.7635999917984009,6.935200214385986,2.3034324645996094,3.0,8.0,8.0,0.05480000004172325,0.7635999917984009 +trm_baseline_best,0.03,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8665000200271606,0.9502792954444885,0.9503999948501587,0.7573999762535095,6.932000160217285,2.2814416885375977,3.0,8.0,8.0,0.04960000142455101,0.7573999762535095 +trm_baseline_best,0.1,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8664500117301941,0.9503796696662903,0.9553999900817871,0.7558000087738037,6.931600093841553,2.2636523246765137,3.0,8.0,8.0,0.044599998742341995,0.7558000087738037 +trm_multi4_best,0.0,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8920000195503235,0.9585950970649719,0.8920000195503235,0.8920000195503235,7.136000156402588,2.4830431938171387,0.0,8.0,8.0,0.1080000028014183,0.8920000195503235 +trm_multi4_best,3e-05,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8941249847412109,0.9596672654151917,0.9577999711036682,0.8095999956130981,7.1529998779296875,2.0940847396850586,4.0,8.0,8.0,0.0421999990940094,0.8095999956130981 +trm_multi4_best,0.0001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8919249773025513,0.9588295817375183,0.9574000239372253,0.8040000200271606,7.13539981842041,2.107004165649414,4.0,8.0,8.0,0.04259999841451645,0.8040000200271606 +trm_multi4_best,0.0003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.9595761895179749,0.9570000171661377,0.8118000030517578,7.1519999504089355,2.106584072113037,4.0,8.0,8.0,0.0430000014603138,0.8118000030517578 +trm_multi4_best,0.001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8934500217437744,0.9593700170516968,0.9535999894142151,0.8091999888420105,7.147600173950195,2.1107852458953857,4.0,8.0,8.0,0.04639999940991402,0.8091999888420105 +trm_multi4_best,0.003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8934749960899353,0.9593802690505981,0.9593999981880188,0.8101999759674072,7.147799968719482,2.1013221740722656,4.0,8.0,8.0,0.0406000018119812,0.8101999759674072 +trm_multi4_best,0.01,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8938249945640564,0.9593154191970825,0.9599999785423279,0.7993999719619751,7.150599956512451,2.065894365310669,4.0,8.0,8.0,0.03999999910593033,0.7993999719619751 +trm_multi4_best,0.03,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8929749727249146,0.9591527581214905,0.9606000185012817,0.7986000180244446,7.143799781799316,2.0669593811035156,4.0,8.0,8.0,0.039400000125169754,0.7986000180244446 +trm_multi4_best,0.1,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8917250037193298,0.9587355256080627,0.9639999866485596,0.79339998960495,7.133800029754639,2.062788724899292,4.0,8.0,8.0,0.035999998450279236,0.79339998960495 +trm_multi4_final,0.0,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8452000021934509,0.9396741390228271,0.8452000021934509,0.8452000021934509,6.761600017547607,2.8937113285064697,0.0,8.0,8.0,0.15479999780654907,0.8452000021934509 +trm_multi4_final,3e-05,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.840499997138977,0.9371709823608398,0.9114000201225281,0.7591999769210815,6.723999977111816,2.6249237060546875,1.0,8.0,8.0,0.08860000222921371,0.7591999769210815 +trm_multi4_final,0.0001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8407999873161316,0.9373589754104614,0.9136000275611877,0.7577999830245972,6.726399898529053,2.612344264984131,1.0,8.0,8.0,0.08640000224113464,0.7577999830245972 +trm_multi4_final,0.0003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8399249911308289,0.9371064305305481,0.9092000126838684,0.7572000026702881,6.719399929046631,2.6245501041412354,1.0,8.0,8.0,0.09080000221729279,0.7572000026702881 +trm_multi4_final,0.001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8385249972343445,0.9361668229103088,0.9089999794960022,0.7588000297546387,6.708199977874756,2.6401236057281494,1.0,8.0,8.0,0.09099999815225601,0.7588000297546387 +trm_multi4_final,0.003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8406999707221985,0.9372812509536743,0.9165999889373779,0.753000020980835,6.725599765777588,2.6047465801239014,1.0,8.0,8.0,0.08340000361204147,0.753000020980835 +trm_multi4_final,0.01,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.838100016117096,0.9362147450447083,0.9132000207901001,0.7477999925613403,6.704800128936768,2.612366199493408,1.0,8.0,8.0,0.0868000015616417,0.7477999925613403 +trm_multi4_final,0.03,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8380249738693237,0.9364882707595825,0.9197999835014343,0.7378000020980835,6.70419979095459,2.575946807861328,1.0,8.0,8.0,0.08020000159740448,0.7378000020980835 +trm_multi4_final,0.1,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.841949999332428,0.9376598596572876,0.9305999875068665,0.7305999994277954,6.735599994659424,2.5063304901123047,2.0,8.0,8.0,0.06939999759197235,0.7305999994277954 diff --git a/initial_perturb_robustness/plots/initial_perturb_robustness_conditional.csv b/initial_perturb_robustness/plots/initial_perturb_robustness_conditional.csv new file mode 100644 index 0000000..b7bf3a1 --- /dev/null +++ b/initial_perturb_robustness/plots/initial_perturb_robustness_conditional.csv @@ -0,0 +1,28 @@ +label,sigma,clean_acc,n_clean_success,n_clean_fail,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail +baseline best,0.0,0.8658,4329,671,1.0,1.0,0.0,0.0 +baseline best,2.9999999242136255e-05,0.8658,4329,671,0.9607010857010857,0.8833448833448834,0.2729135618479881,0.6140089418777943 +baseline best,9.999999747378752e-05,0.8658,4329,671,0.959026334026334,0.8838068838068838,0.27738450074515647,0.6363636363636364 +baseline best,0.0003000000142492354,0.8658,4329,671,0.9602102102102102,0.8824208824208825,0.27719821162444114,0.6274217585692996 +baseline best,0.0010000000474974513,0.8658,4329,671,0.9605855855855856,0.8854238854238854,0.2818554396423249,0.6348733233979136 +baseline best,0.003000000026077032,0.8658,4329,671,0.9599214599214599,0.8791868791868792,0.2725409836065574,0.6080476900149031 +baseline best,0.009999999776482582,0.8658,4329,671,0.9584488334488335,0.8768768768768769,0.27626676602086436,0.6005961251862891 +baseline best,0.029999999329447746,0.8658,4329,671,0.9548972048972049,0.86994686994687,0.29619970193740686,0.6482861400894188 +baseline best,0.10000000149011612,0.8658,4329,671,0.9538865788865789,0.8655578655578655,0.3023472429210134,0.6795827123695977 +multi4 best,0.0,0.892,4460,540,1.0,1.0,0.0,0.0 +multi4 best,2.9999999242136255e-05,0.892,4460,540,0.9675728699551569,0.9042600896860986,0.2875,0.6314814814814815 +multi4 best,9.999999747378752e-05,0.892,4460,540,0.965695067264574,0.897982062780269,0.2826388888888889,0.6222222222222222 +multi4 best,0.0003000000142492354,0.892,4460,540,0.9668721973094171,0.905829596412556,0.29212962962962963,0.6296296296296297 +multi4 best,0.0010000000474974513,0.892,4460,540,0.9665078475336323,0.9013452914798207,0.2900462962962963,0.5981481481481481 +multi4 best,0.003000000026077032,0.892,4460,540,0.9664798206278027,0.9035874439461884,0.29050925925925924,0.6481481481481481 +multi4 best,0.009999999776482582,0.892,4460,540,0.965695067264574,0.8917040358744395,0.3002314814814815,0.6425925925925926 +multi4 best,0.029999999329447746,0.892,4460,540,0.9631726457399103,0.8905829596412556,0.31319444444444444,0.6555555555555556 +multi4 best,0.10000000149011612,0.892,4460,540,0.9617432735426009,0.8847533632286996,0.31342592592592594,0.6833333333333333 +multi4 final,0.0,0.8452,4226,774,1.0,1.0,0.0,0.0 +multi4 final,2.9999999242136255e-05,0.8452,4226,774,0.9613700899195456,0.8946994794131566,0.18055555555555555,0.4483204134366925 +multi4 final,9.999999747378752e-05,0.8452,4226,774,0.959979886417416,0.8928064363464269,0.19008397932816537,0.4625322997416021 +multi4 final,0.0003000000142492354,0.8452,4226,774,0.9602756743965926,0.8916232844297208,0.18281653746770027,0.4405684754521964 +multi4 final,0.0010000000474974513,0.8452,4226,774,0.9595362044486512,0.8937529578797918,0.17781007751937986,0.4418604651162791 +multi4 final,0.003000000026077032,0.8452,4226,774,0.9595362044486512,0.8868906767628963,0.19186046511627908,0.4844961240310077 +multi4 final,0.009999999776482582,0.8452,4226,774,0.9556613819214387,0.8797917652626597,0.19622093023255813,0.46511627906976744 +multi4 final,0.029999999329447746,0.8452,4226,774,0.9528513961192617,0.8684335068622812,0.21107881136950904,0.5064599483204134 +multi4 final,0.10000000149011612,0.8452,4226,774,0.949420255560814,0.8563653573118788,0.25516795865633074,0.5762273901808785 diff --git a/initial_perturb_robustness/smoke_baseline.summary.csv b/initial_perturb_robustness/smoke_baseline.summary.csv new file mode 100644 index 0000000..c5fd40c --- /dev/null +++ b/initial_perturb_robustness/smoke_baseline.summary.csv @@ -0,0 +1,3 @@ +label,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,correct_count_q10,correct_count_q50,correct_count_q90,zero_frac,full_frac +trm_baseline_smoke,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9537037014961243,0.875,0.875,1.75,0.6614378094673157,1.0,2.0,2.0,0.125,0.875 +trm_baseline_smoke,0.001,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8125,0.9309413433074951,0.875,0.75,1.625,0.6959705352783203,0.5,2.0,2.0,0.125,0.75 diff --git a/initial_perturb_robustness/smoke_baseline_b32k8.summary.csv b/initial_perturb_robustness/smoke_baseline_b32k8.summary.csv new file mode 100644 index 0000000..e7ca899 --- /dev/null +++ b/initial_perturb_robustness/smoke_baseline_b32k8.summary.csv @@ -0,0 +1,2 @@ +label,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,correct_count_q10,correct_count_q50,correct_count_q90,zero_frac,full_frac +trm_baseline_b32k8_smoke,0.0,32,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.90625,0.9637345671653748,0.90625,0.90625,7.25,2.3318448066711426,8.0,8.0,8.0,0.09375,0.90625 diff --git a/initial_perturb_robustness/smoke_plots/initial_perturb_robustness_combined.csv b/initial_perturb_robustness/smoke_plots/initial_perturb_robustness_combined.csv new file mode 100644 index 0000000..f7c3ccc --- /dev/null +++ b/initial_perturb_robustness/smoke_plots/initial_perturb_robustness_combined.csv @@ -0,0 +1,4 @@ +label,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,correct_count_q10,correct_count_q50,correct_count_q90,zero_frac,full_frac +trm_baseline_smoke,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9537037014961243,0.875,0.875,1.75,0.6614378094673157,1.0,2.0,2.0,0.125,0.875 +trm_baseline_smoke,0.001,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8125,0.9309413433074951,0.875,0.75,1.625,0.6959705352783203,0.5,2.0,2.0,0.125,0.75 +trm_baseline_b32k8_smoke,0.0,32,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.90625,0.9637345671653748,0.90625,0.90625,7.25,2.3318448066711426,8.0,8.0,8.0,0.09375,0.90625 diff --git a/initial_perturb_robustness/trm_baseline_best_step58590_n5000_k8.summary.csv b/initial_perturb_robustness/trm_baseline_best_step58590_n5000_k8.summary.csv new file mode 100644 index 0000000..94b9d4a --- /dev/null +++ b/initial_perturb_robustness/trm_baseline_best_step58590_n5000_k8.summary.csv @@ -0,0 +1,10 @@ +label,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,correct_count_q10,correct_count_q50,correct_count_q90,zero_frac,full_frac +trm_baseline_best,0.0,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8658000230789185,0.9502986073493958,0.8658000230789185,0.8658000230789185,6.926400184631348,2.7269365787506104,0.0,8.0,8.0,0.13420000672340393,0.8658000230789185 +trm_baseline_best,3e-05,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.868399977684021,0.9510218501091003,0.9455999732017517,0.769599974155426,6.947199821472168,2.309980869293213,3.0,8.0,8.0,0.0544000007212162,0.769599974155426 +trm_baseline_best,0.0001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8675500154495239,0.9506199955940247,0.9485999941825867,0.769599974155426,6.940400123596191,2.3122386932373047,2.0,8.0,8.0,0.05139999836683273,0.769599974155426 +trm_baseline_best,0.0003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8685500025749207,0.9512001872062683,0.9476000070571899,0.7698000073432922,6.948400020599365,2.310267925262451,3.0,8.0,8.0,0.052400000393390656,0.7698000073432922 +trm_baseline_best,0.001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8694999814033508,0.9514188766479492,0.9488000273704529,0.7712000012397766,6.955999851226807,2.289206027984619,3.0,8.0,8.0,0.05119999870657921,0.7712000012397766 +trm_baseline_best,0.003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8676750063896179,0.950739860534668,0.9448000192642212,0.7666000127792358,6.941400051116943,2.311615467071533,3.0,8.0,8.0,0.0551999993622303,0.7666000127792358 +trm_baseline_best,0.01,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669000267982483,0.9505268335342407,0.9452000260353088,0.7635999917984009,6.935200214385986,2.3034324645996094,3.0,8.0,8.0,0.05480000004172325,0.7635999917984009 +trm_baseline_best,0.03,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8665000200271606,0.9502792954444885,0.9503999948501587,0.7573999762535095,6.932000160217285,2.2814416885375977,3.0,8.0,8.0,0.04960000142455101,0.7573999762535095 +trm_baseline_best,0.1,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8664500117301941,0.9503796696662903,0.9553999900817871,0.7558000087738037,6.931600093841553,2.2636523246765137,3.0,8.0,8.0,0.044599998742341995,0.7558000087738037 diff --git a/initial_perturb_robustness/trm_multi4_best_step35805_n5000_k8.summary.csv b/initial_perturb_robustness/trm_multi4_best_step35805_n5000_k8.summary.csv new file mode 100644 index 0000000..0f352ca --- /dev/null +++ b/initial_perturb_robustness/trm_multi4_best_step35805_n5000_k8.summary.csv @@ -0,0 +1,10 @@ +label,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,correct_count_q10,correct_count_q50,correct_count_q90,zero_frac,full_frac +trm_multi4_best,0.0,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8920000195503235,0.9585950970649719,0.8920000195503235,0.8920000195503235,7.136000156402588,2.4830431938171387,0.0,8.0,8.0,0.1080000028014183,0.8920000195503235 +trm_multi4_best,3e-05,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8941249847412109,0.9596672654151917,0.9577999711036682,0.8095999956130981,7.1529998779296875,2.0940847396850586,4.0,8.0,8.0,0.0421999990940094,0.8095999956130981 +trm_multi4_best,0.0001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8919249773025513,0.9588295817375183,0.9574000239372253,0.8040000200271606,7.13539981842041,2.107004165649414,4.0,8.0,8.0,0.04259999841451645,0.8040000200271606 +trm_multi4_best,0.0003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.9595761895179749,0.9570000171661377,0.8118000030517578,7.1519999504089355,2.106584072113037,4.0,8.0,8.0,0.0430000014603138,0.8118000030517578 +trm_multi4_best,0.001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8934500217437744,0.9593700170516968,0.9535999894142151,0.8091999888420105,7.147600173950195,2.1107852458953857,4.0,8.0,8.0,0.04639999940991402,0.8091999888420105 +trm_multi4_best,0.003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8934749960899353,0.9593802690505981,0.9593999981880188,0.8101999759674072,7.147799968719482,2.1013221740722656,4.0,8.0,8.0,0.0406000018119812,0.8101999759674072 +trm_multi4_best,0.01,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8938249945640564,0.9593154191970825,0.9599999785423279,0.7993999719619751,7.150599956512451,2.065894365310669,4.0,8.0,8.0,0.03999999910593033,0.7993999719619751 +trm_multi4_best,0.03,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8929749727249146,0.9591527581214905,0.9606000185012817,0.7986000180244446,7.143799781799316,2.0669593811035156,4.0,8.0,8.0,0.039400000125169754,0.7986000180244446 +trm_multi4_best,0.1,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8917250037193298,0.9587355256080627,0.9639999866485596,0.79339998960495,7.133800029754639,2.062788724899292,4.0,8.0,8.0,0.035999998450279236,0.79339998960495 diff --git a/initial_perturb_robustness/trm_multi4_final_step65100_n5000_k8.summary.csv b/initial_perturb_robustness/trm_multi4_final_step65100_n5000_k8.summary.csv new file mode 100644 index 0000000..b4387d7 --- /dev/null +++ b/initial_perturb_robustness/trm_multi4_final_step65100_n5000_k8.summary.csv @@ -0,0 +1,10 @@ +label,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,correct_count_q10,correct_count_q50,correct_count_q90,zero_frac,full_frac +trm_multi4_final,0.0,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8452000021934509,0.9396741390228271,0.8452000021934509,0.8452000021934509,6.761600017547607,2.8937113285064697,0.0,8.0,8.0,0.15479999780654907,0.8452000021934509 +trm_multi4_final,3e-05,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.840499997138977,0.9371709823608398,0.9114000201225281,0.7591999769210815,6.723999977111816,2.6249237060546875,1.0,8.0,8.0,0.08860000222921371,0.7591999769210815 +trm_multi4_final,0.0001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8407999873161316,0.9373589754104614,0.9136000275611877,0.7577999830245972,6.726399898529053,2.612344264984131,1.0,8.0,8.0,0.08640000224113464,0.7577999830245972 +trm_multi4_final,0.0003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8399249911308289,0.9371064305305481,0.9092000126838684,0.7572000026702881,6.719399929046631,2.6245501041412354,1.0,8.0,8.0,0.09080000221729279,0.7572000026702881 +trm_multi4_final,0.001,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8385249972343445,0.9361668229103088,0.9089999794960022,0.7588000297546387,6.708199977874756,2.6401236057281494,1.0,8.0,8.0,0.09099999815225601,0.7588000297546387 +trm_multi4_final,0.003,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8406999707221985,0.9372812509536743,0.9165999889373779,0.753000020980835,6.725599765777588,2.6047465801239014,1.0,8.0,8.0,0.08340000361204147,0.753000020980835 +trm_multi4_final,0.01,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.838100016117096,0.9362147450447083,0.9132000207901001,0.7477999925613403,6.704800128936768,2.612366199493408,1.0,8.0,8.0,0.0868000015616417,0.7477999925613403 +trm_multi4_final,0.03,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8380249738693237,0.9364882707595825,0.9197999835014343,0.7378000020980835,6.70419979095459,2.575946807861328,1.0,8.0,8.0,0.08020000159740448,0.7378000020980835 +trm_multi4_final,0.1,5000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.841949999332428,0.9376598596572876,0.9305999875068665,0.7305999994277954,6.735599994659424,2.5063304901123047,2.0,8.0,8.0,0.06939999759197235,0.7305999994277954 diff --git a/initial_perturb_robustness/watch_and_plot.sh b/initial_perturb_robustness/watch_and_plot.sh new file mode 100755 index 0000000..eb2ba4f --- /dev/null +++ b/initial_perturb_robustness/watch_and_plot.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +set -euo pipefail +ROOT=/home/yurenh2/rrm +PY=/home/yurenh2/miniconda3/envs/rrm/bin/python +cd "${ROOT}" +PIDS=( + research/flossing/initial_perturb_robustness/logs/trm_baseline_best_step58590_n5000_k8.pid + research/flossing/initial_perturb_robustness/logs/trm_multi4_best_step35805_n5000_k8.pid + research/flossing/initial_perturb_robustness/logs/trm_multi4_final_step65100_n5000_k8.pid +) +for pf in "${PIDS[@]}"; do + pid=$(cat "${pf}") + echo "watch ${pf}: ${pid}" + while kill -0 "${pid}" 2>/dev/null; do + sleep 60 + done + echo "done ${pf}: ${pid}" +done +"${PY}" research/flossing/plot_initial_perturb_robustness.py \ + --summaries \ + research/flossing/initial_perturb_robustness/trm_baseline_best_step58590_n5000_k8.summary.csv \ + research/flossing/initial_perturb_robustness/trm_multi4_best_step35805_n5000_k8.summary.csv \ + research/flossing/initial_perturb_robustness/trm_multi4_final_step65100_n5000_k8.summary.csv \ + --out-dir research/flossing/initial_perturb_robustness/plots +nvidia-smi --query-gpu=index,memory.used,memory.total,utilization.gpu --format=csv,noheader,nounits > research/flossing/initial_perturb_robustness/plots/final_gpu_status.txt diff --git a/late_perturb_robustness.py b/late_perturb_robustness.py new file mode 100644 index 0000000..7ad2ba5 --- /dev/null +++ b/late_perturb_robustness.py @@ -0,0 +1,323 @@ +"""Inference-time robustness to late recurrent-state perturbations. + +For each sample, run the model cleanly for `perturb_after` inner iterations, +perturb z_H/z_L once, then continue the deterministic recurrent rollout. This +stress-tests the attractor basin along the inference trajectory rather than +only at the initial recurrent state. +""" +from __future__ import annotations + +import argparse +import csv +import json +import math +import sys +from dataclasses import replace +from pathlib import Path +from typing import Any + +import numpy as np +import torch +import yaml + + +TRM_DIR = Path("/home/yurenh2/rrm/trm") +sys.path.insert(0, str(TRM_DIR)) + +from models.recursive_reasoning.trm import ( # noqa: E402 + TinyRecursiveReasoningModel_ACTV1, + TinyRecursiveReasoningModel_ACTV1InnerCarry, +) + + +IGNORE_LABEL_ID = -100 + + +def parse_float_list(text: str) -> list[float]: + return [float(x.strip()) for x in text.split(",") if x.strip()] + + +def parse_int_list(text: str) -> list[int]: + return [int(x.strip()) for x in text.split(",") if x.strip()] + + +def is_zero(value: float) -> bool: + return abs(value) <= 1e-12 + + +def load_model(ckpt_root: Path, ckpt_name: str, device: str): + cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text()) + data_path = Path(cfg.get("data_path") or cfg["data_paths"][0]) + train_meta = json.loads((data_path / "train" / "dataset.json").read_text()) + + arch_cfg = dict(cfg["arch"]) + arch_cfg.update( + batch_size=cfg["global_batch_size"], + seq_len=train_meta["seq_len"], + vocab_size=train_meta["vocab_size"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], + causal=False, + ) + + model = TinyRecursiveReasoningModel_ACTV1(arch_cfg) + state = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in state.items()} + missing, unexpected = model.load_state_dict(stripped, strict=False) + print(f"[load] {ckpt_root.name}/{ckpt_name} missing={len(missing)} unexpected={len(unexpected)}", flush=True) + model.to(device).eval() + return model, cfg, data_path + + +def load_test_samples(data_path: Path, n_samples: int, seed: int): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + puzzle_ids = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + + n = min(n_samples, len(inputs)) + idx = rng.choice(len(inputs), size=n, replace=False) + return { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(puzzle_ids[idx].astype(np.int32)), + "idx": idx, + } + + +def batch_slice(samples: dict[str, Any], start: int, end: int, device: str): + return { + k: v[start:end].to(device, non_blocking=True) + for k, v in samples.items() + if k in ("inputs", "labels", "puzzle_identifiers") + } + + +def repeat_batch(batch: dict[str, torch.Tensor], repeats: int): + if repeats == 1: + return batch + return {k: v.repeat_interleave(repeats, dim=0) for k, v in batch.items()} + + +def sample_unit_noise_like(tensor: torch.Tensor, generator: torch.Generator, distribution: str): + if distribution == "uniform": + noise = 2.0 * torch.rand(tensor.shape, device=tensor.device, dtype=torch.float32, generator=generator) - 1.0 + noise = noise * math.sqrt(3.0) + else: + noise = torch.randn(tensor.shape, device=tensor.device, dtype=torch.float32, generator=generator) + return noise.to(tensor.dtype) + + +def apply_noise( + inner: TinyRecursiveReasoningModel_ACTV1InnerCarry, + sigma: float, + perturb: str, + generator: torch.Generator, + distribution: str, +): + if sigma <= 0: + return inner + z_h, z_l = inner.z_H, inner.z_L + if perturb in ("h", "both"): + z_h = z_h + sigma * sample_unit_noise_like(z_h, generator, distribution) + if perturb in ("l", "both"): + z_l = z_l + sigma * sample_unit_noise_like(z_l, generator, distribution) + return replace(inner, z_H=z_h, z_L=z_l) + + +def correctness(logits: torch.Tensor, labels: torch.Tensor): + preds = logits.argmax(dim=-1) + mask = labels != IGNORE_LABEL_ID + exact = torch.where(mask, preds == labels, True).all(dim=-1) + denom = mask.sum(-1).clamp_min(1) + token_acc = ((preds == labels) & mask).sum(-1).float() / denom.float() + return exact, token_acc + + +@torch.inference_mode() +def eval_condition( + model, + batch: dict[str, torch.Tensor], + sigma: float, + rollouts: int, + perturb_after: int, + perturb: str, + distribution: str, + generator: torch.Generator, +): + expanded = repeat_batch(batch, rollouts) + total = expanded["inputs"].shape[0] + steps = model.config.halt_max_steps + warmup = min(max(perturb_after, 0), steps - 1) + + with torch.device(expanded["inputs"].device): + carry = model.initial_carry(expanded) + reset = torch.ones(total, device=expanded["inputs"].device, dtype=torch.bool) + inner = model.inner.reset_carry(reset, carry.inner_carry) + + logits = None + for _ in range(warmup): + inner, logits, _q = model.inner(inner, expanded) + inner = apply_noise(inner, sigma, perturb, generator, distribution) + for _ in range(warmup, steps): + inner, logits, _q = model.inner(inner, expanded) + + assert logits is not None + exact, token_acc = correctness(logits, expanded["labels"]) + base_bsz = batch["inputs"].shape[0] + return exact.view(base_bsz, rollouts), token_acc.view(base_bsz, rollouts) + + +def summarize(exact: torch.Tensor, token_acc: torch.Tensor, clean_exact: torch.Tensor) -> dict[str, float]: + correct_counts = exact.float().sum(dim=1) + rollouts = exact.shape[1] + clean_success = clean_exact.bool() + clean_fail = ~clean_success + out = { + "mean_rollout_exact": exact.float().mean().item(), + "mean_rollout_token_acc": token_acc.mean().item(), + "pass_at_k": exact.any(dim=1).float().mean().item(), + "all_k": exact.all(dim=1).float().mean().item(), + "correct_count_mean": correct_counts.mean().item(), + "correct_count_std": correct_counts.std(unbiased=False).item(), + "zero_frac": (correct_counts == 0).float().mean().item(), + "full_frac": (correct_counts == rollouts).float().mean().item(), + "clean_acc": clean_success.float().mean().item(), + } + if clean_success.any().item(): + out["retain_mean_on_clean_success"] = exact[clean_success].float().mean().item() + out["allK_on_clean_success"] = exact[clean_success].all(dim=1).float().mean().item() + else: + out["retain_mean_on_clean_success"] = float("nan") + out["allK_on_clean_success"] = float("nan") + if clean_fail.any().item(): + out["rescue_mean_on_clean_fail"] = exact[clean_fail].float().mean().item() + out["passK_on_clean_fail"] = exact[clean_fail].any(dim=1).float().mean().item() + else: + out["rescue_mean_on_clean_fail"] = float("nan") + out["passK_on_clean_fail"] = float("nan") + return out + + +def write_summary(path: Path, rows: list[dict[str, Any]]) -> None: + keys = list(rows[0]) + for row in rows[1:]: + for key in row: + if key not in keys: + keys.append(key) + with path.open("w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=keys) + writer.writeheader() + writer.writerows(rows) + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--ckpt-root", required=True) + parser.add_argument("--ckpt-name", required=True) + parser.add_argument("--label", required=True) + parser.add_argument("--n-samples", type=int, default=3000) + parser.add_argument("--batch-size", type=int, default=32) + parser.add_argument("--rollouts", type=int, default=8) + parser.add_argument("--sigmas", default="0,0.001,0.003,0.01,0.03,0.1") + parser.add_argument("--perturb-afters", default="0,4,8,12,15") + parser.add_argument("--perturb", choices=["h", "l", "both"], default="both") + parser.add_argument("--noise-distribution", choices=["gaussian", "uniform"], default="gaussian") + parser.add_argument("--seed", type=int, default=20260606) + parser.add_argument("--out-prefix", required=True) + args = parser.parse_args() + + device = "cuda" + sigmas = parse_float_list(args.sigmas) + perturb_afters = parse_int_list(args.perturb_afters) + torch.manual_seed(args.seed) + generator = torch.Generator(device=device).manual_seed(args.seed + 101) + + model, cfg, data_path = load_model(Path(args.ckpt_root), args.ckpt_name, device) + samples = load_test_samples(data_path, args.n_samples, args.seed) + n = len(samples["inputs"]) + print( + f"[run] label={args.label} n={n} rollouts={args.rollouts} batch={args.batch_size} " + f"afters={perturb_afters} sigmas={sigmas}", + flush=True, + ) + + rows: list[dict[str, Any]] = [] + all_exact = [] + all_token = [] + if not any(is_zero(s) for s in sigmas): + sigmas = [0.0] + sigmas + + for after in perturb_afters: + clean_exact = None + pending: list[tuple[dict[str, Any], torch.Tensor, torch.Tensor]] = [] + for sigma in sigmas: + exact_parts = [] + token_parts = [] + for start in range(0, n, args.batch_size): + end = min(start + args.batch_size, n) + batch = batch_slice(samples, start, end, device) + exact, token_acc = eval_condition( + model, batch, sigma, args.rollouts, after, args.perturb, args.noise_distribution, generator + ) + exact_parts.append(exact.cpu()) + token_parts.append(token_acc.cpu()) + if end == n or (end // args.batch_size) % 10 == 0: + print(f" after={after} sigma={sigma:g} [{end}/{n}]", flush=True) + exact_all = torch.cat(exact_parts, dim=0) + token_all = torch.cat(token_parts, dim=0) + if is_zero(sigma): + clean_exact = exact_all[:, 0].clone() + print(f" after={after} clean grouping done clean_acc={clean_exact.float().mean().item():.4f}", flush=True) + if clean_exact is None: + pending.append(({"sigma": sigma}, exact_all, token_all)) + continue + row: dict[str, Any] = { + "label": args.label, + "perturb_after": after, + "sigma": sigma, + "n_samples": n, + "rollouts": args.rollouts, + "ckpt_root": str(Path(args.ckpt_root)), + "ckpt_name": args.ckpt_name, + "perturb": args.perturb, + "noise_distribution": args.noise_distribution, + **summarize(exact_all, token_all, clean_exact), + } + rows.append(row) + all_exact.append(exact_all.numpy()) + all_token.append(token_all.numpy()) + print( + f" after={after} sigma={sigma:g} mean={row['mean_rollout_exact']:.4f} " + f"retain={row['retain_mean_on_clean_success']:.4f} " + f"rescue={row['rescue_mean_on_clean_fail']:.4f}", + flush=True, + ) + if pending: + raise RuntimeError("sigma=0 must be evaluated before nonzero sigmas") + + out_prefix = Path(args.out_prefix) + out_prefix.parent.mkdir(parents=True, exist_ok=True) + write_summary(out_prefix.with_suffix(".summary.csv"), rows) + meta = { + "args": vars(args), + "data_path": str(data_path), + "config_global_batch_size": cfg.get("global_batch_size"), + "sigmas": sigmas, + "perturb_afters": perturb_afters, + "n_samples": n, + } + out_prefix.with_suffix(".meta.json").write_text(json.dumps(meta, indent=2, sort_keys=True)) + np.savez_compressed( + out_prefix.with_suffix(".npz"), + idx=samples["idx"], + sigmas=np.asarray(sigmas, dtype=np.float32), + perturb_afters=np.asarray(perturb_afters, dtype=np.int32), + exact=np.stack(all_exact, axis=0), + token_acc=np.stack(all_token, axis=0), + meta_json=np.asarray(json.dumps(meta, sort_keys=True)), + ) + print(f"[done] {out_prefix}.summary.csv", flush=True) + + +if __name__ == "__main__": + main() diff --git a/late_perturb_robustness/plots/final_gpu_status.txt b/late_perturb_robustness/plots/final_gpu_status.txt new file mode 100644 index 0000000..07bebea --- /dev/null +++ b/late_perturb_robustness/plots/final_gpu_status.txt @@ -0,0 +1,4 @@ +0, 13, 49140, 0 +1, 13, 49140, 0 +2, 45494, 49140, 95 +3, 13, 49140, 0 diff --git a/late_perturb_robustness/plots/late_perturb_robustness_combined.csv b/late_perturb_robustness/plots/late_perturb_robustness_combined.csv new file mode 100644 index 0000000..144b4c5 --- /dev/null +++ b/late_perturb_robustness/plots/late_perturb_robustness_combined.csv @@ -0,0 +1,91 @@ +label,perturb_after,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,zero_frac,full_frac,clean_acc,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail +trm_baseline_best,0,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0 +trm_baseline_best,0,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9504892826080322,0.9483333230018616,0.7639999985694885,6.935999870300293,2.303454875946045,0.05166666582226753,0.7639999985694885,0.8669999837875366,0.9578046798706055,0.875048041343689,0.2750626504421234,0.6315789222717285 +trm_baseline_best,0,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8684166669845581,0.9509722590446472,0.9456666707992554,0.7683333158493042,6.947333335876465,2.3008460998535156,0.05433333292603493,0.7683333158493042,0.8669999837875366,0.9602556824684143,0.8800461292266846,0.2697368562221527,0.6090225577354431 +trm_baseline_best,0,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9504145383834839,0.9463333487510681,0.765333354473114,6.935999870300293,2.3080809116363525,0.05366666615009308,0.765333354473114,0.8669999837875366,0.9579008221626282,0.8785082697868347,0.2744360864162445,0.6190476417541504 +trm_baseline_best,0,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8675000071525574,0.9505621790885925,0.9496666789054871,0.7616666555404663,6.940000057220459,2.2861320972442627,0.050333332270383835,0.7616666555404663,0.8669999837875366,0.9567474126815796,0.8723567724227905,0.2857142984867096,0.6390977501869202 +trm_baseline_best,0,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8682083487510681,0.9508457183837891,0.9506666660308838,0.7593333125114441,6.945666790008545,2.265108108520508,0.04933333396911621,0.7593333125114441,0.8669999837875366,0.956122636795044,0.8696655035018921,0.2951127886772156,0.6441102623939514 +trm_baseline_best,4,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0 +trm_baseline_best,4,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8666250109672546,0.9501558542251587,0.9350000023841858,0.8009999990463257,6.933000087738037,2.4135406017303467,0.06499999761581421,0.8009999990463257,0.8669999837875366,0.9687620401382446,0.9211841821670532,0.20081453025341034,0.5263158082962036 +trm_baseline_best,4,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8679583072662354,0.950590968132019,0.9359999895095825,0.7996666431427002,6.943666458129883,2.404542922973633,0.06400000303983688,0.7996666431427002,0.8669999837875366,0.970588207244873,0.9200307726860046,0.19893483817577362,0.5338345766067505 +trm_baseline_best,4,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8680416941642761,0.9508261680603027,0.9399999976158142,0.8013333082199097,6.944333553314209,2.3910739421844482,0.05999999865889549,0.8013333082199097,0.8669999837875366,0.9685697555541992,0.9227220416069031,0.21271929144859314,0.5664160251617432 +trm_baseline_best,4,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8660833239555359,0.9499126076698303,0.934333324432373,0.7953333258628845,6.928666591644287,2.413485288619995,0.06566666811704636,0.7953333258628845,0.8669999837875366,0.967608630657196,0.915032684803009,0.20426064729690552,0.523809552192688 +trm_baseline_best,4,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8659999966621399,0.9500890970230103,0.9403333067893982,0.7973333597183228,6.927999973297119,2.399336576461792,0.05966666713356972,0.7973333597183228,0.8669999837875366,0.9667435884475708,0.9161860942840576,0.20927318930625916,0.5664160251617432 +trm_baseline_best,8,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0 +trm_baseline_best,8,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669583201408386,0.9507402181625366,0.9226666688919067,0.8306666612625122,6.935666561126709,2.5147950649261475,0.07733333110809326,0.8306666612625122,0.8669999837875366,0.9784217476844788,0.9573240876197815,0.14035087823867798,0.4436090290546417 +trm_baseline_best,8,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8678333163261414,0.9507989287376404,0.9246666431427002,0.831333339214325,6.942666530609131,2.5057361125946045,0.07533333450555801,0.831333339214325,0.8669999837875366,0.9795271158218384,0.9588619470596313,0.13972431421279907,0.451127827167511 +trm_baseline_best,8,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8676666617393494,0.9505447745323181,0.9229999780654907,0.8306666612625122,6.941333293914795,2.51287841796875,0.07699999958276749,0.8306666612625122,0.8669999837875366,0.980199933052063,0.9573240876197815,0.13408520817756653,0.4411027431488037 +trm_baseline_best,8,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8682083487510681,0.950954258441925,0.9236666560173035,0.8286666870117188,6.945666790008545,2.4972081184387207,0.07633333653211594,0.8286666870117188,0.8669999837875366,0.9788542985916138,0.9554017782211304,0.14692983031272888,0.448621541261673 +trm_baseline_best,8,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8687083125114441,0.9511321783065796,0.9286666512489319,0.824999988079071,6.949666500091553,2.46991229057312,0.07133333384990692,0.824999988079071,0.8669999837875366,0.9784698486328125,0.9504036903381348,0.15319548547267914,0.48120301961898804 +trm_baseline_best,12,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0 +trm_baseline_best,12,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8673333525657654,0.9506542682647705,0.8926666378974915,0.856333315372467,6.938666820526123,2.641887903213501,0.10733333230018616,0.856333315372467,0.8669999837875366,0.9928392767906189,0.9873125553131104,0.049185462296009064,0.2005012482404709 +trm_baseline_best,12,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8668749928474426,0.9505452513694763,0.8896666765213013,0.8560000061988831,6.934999942779541,2.651435136795044,0.11033333092927933,0.8560000061988831,0.8669999837875366,0.9924548268318176,0.9869281053543091,0.048245612531900406,0.18045112490653992 +trm_baseline_best,12,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669583201408386,0.9506661891937256,0.8913333415985107,0.8556666374206543,6.935666561126709,2.6479289531707764,0.10866666585206985,0.8556666374206543,0.8669999837875366,0.991782009601593,0.9869281053543091,0.05325814709067345,0.19548872113227844 +trm_baseline_best,12,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8671666383743286,0.9506459832191467,0.8989999890327454,0.8539999723434448,6.937333106994629,2.629715919494629,0.10100000351667404,0.8539999723434448,0.8669999837875366,0.991733968257904,0.985005795955658,0.055137842893600464,0.24561403691768646 +trm_baseline_best,12,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8677916526794434,0.950908362865448,0.9043333530426025,0.8516666889190674,6.942333221435547,2.602372169494629,0.09566666930913925,0.8516666889190674,0.8669999837875366,0.9901480078697205,0.9823144674301147,0.07017543911933899,0.2882205545902252 +trm_baseline_best,15,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0 +trm_baseline_best,15,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9506445527076721,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0 +trm_baseline_best,15,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9506717920303345,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0 +trm_baseline_best,15,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9506852030754089,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0 +trm_baseline_best,15,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8670833110809326,0.9507201910018921,0.8673333525657654,0.8669999837875366,6.936666488647461,2.7151405811309814,0.1326666623353958,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0006265664123930037,0.002506265649572015 +trm_baseline_best,15,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8670833110809326,0.9508404731750488,0.8686666488647461,0.8656666874885559,6.936666488647461,2.7110862731933594,0.1313333362340927,0.8656666874885559,0.8669999837875366,0.9997597336769104,0.9984621405601501,0.0021929824724793434,0.01253132801502943 +trm_multi4_best,0,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0 +trm_multi4_best,0,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8957083225250244,0.9601898789405823,0.9583333134651184,0.8119999766349792,7.165666580200195,2.081879138946533,0.0416666679084301,0.8119999766349792,0.8939999938011169,0.9684004187583923,0.9030573964118958,0.28262579441070557,0.6194968819618225 +trm_multi4_best,0,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8957916498184204,0.9602490067481995,0.9606666564941406,0.8130000233650208,7.166333198547363,2.062037706375122,0.03933333232998848,0.8130000233650208,0.8939999938011169,0.9666293859481812,0.9038031101226807,0.2983490526676178,0.6352201104164124 +trm_multi4_best,0,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8947083353996277,0.9597618579864502,0.9599999785423279,0.8063333630561829,7.1576666831970215,2.0646567344665527,0.03999999910593033,0.8063333630561829,0.8939999938011169,0.9667226076126099,0.8963460326194763,0.28734275698661804,0.6320754885673523 +trm_multi4_best,0,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8953750133514404,0.9600813984870911,0.9603333473205566,0.8009999990463257,7.163000106811523,2.049495220184326,0.03966666758060455,0.8009999990463257,0.8939999938011169,0.9666759967803955,0.8918717503547668,0.29402515292167664,0.6446540951728821 +trm_multi4_best,0,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8950416445732117,0.9601003527641296,0.9629999995231628,0.7983333468437195,7.160333156585693,2.0425376892089844,0.03700000047683716,0.7983333468437195,0.8939999938011169,0.9635066986083984,0.8851603269577026,0.3176100552082062,0.6666666865348816 +trm_multi4_best,4,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0 +trm_multi4_best,4,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8946250081062317,0.9599500894546509,0.9473333358764648,0.8433333039283752,7.1570000648498535,2.1906659603118896,0.052666667848825455,0.8433333039283752,0.8939999938011169,0.9753448963165283,0.9392244815826416,0.21383647620677948,0.5251572132110596 +trm_multi4_best,4,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8962500095367432,0.9606265425682068,0.9523333311080933,0.843999981880188,7.170000076293945,2.1701996326446533,0.04766666516661644,0.843999981880188,0.8939999938011169,0.976416826248169,0.9410887360572815,0.22012577950954437,0.5691823959350586 +trm_multi4_best,4,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8963750004768372,0.9604902863502502,0.9483333230018616,0.8446666598320007,7.171000003814697,2.171119213104248,0.05166666582226753,0.8446666598320007,0.8939999938011169,0.9760906100273132,0.9410887360572815,0.22405660152435303,0.5314465165138245 +trm_multi4_best,4,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8968333601951599,0.9606810808181763,0.9506666660308838,0.8403333425521851,7.174666881561279,2.1587400436401367,0.04933333396911621,0.8403333425521851,0.8939999938011169,0.9759973883628845,0.9377330541610718,0.2291666716337204,0.544025182723999 +trm_multi4_best,4,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8944583535194397,0.9597752094268799,0.9526666402816772,0.8326666951179504,7.155666828155518,2.1656641960144043,0.047333333641290665,0.8326666951179504,0.8939999938011169,0.9737602472305298,0.9284116625785828,0.22562892735004425,0.5628930926322937 +trm_multi4_best,8,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0 +trm_multi4_best,8,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8962083458900452,0.9605055451393127,0.9399999976158142,0.8696666955947876,7.169666767120361,2.2701423168182373,0.05999999865889549,0.8696666955947876,0.8939999938011169,0.9840604066848755,0.9709172248840332,0.15526729822158813,0.45597484707832336 +trm_multi4_best,8,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8970000147819519,0.9608615636825562,0.9416666626930237,0.8696666955947876,7.176000118255615,2.254556179046631,0.05833333358168602,0.8696666955947876,0.8939999938011169,0.9844798445701599,0.9709172248840332,0.1591981202363968,0.46855345368385315 +trm_multi4_best,8,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8955416679382324,0.9602766633033752,0.9399999976158142,0.8673333525657654,7.164333343505859,2.2744951248168945,0.05999999865889549,0.8673333525657654,0.8939999938011169,0.9835943579673767,0.9690529704093933,0.1529088020324707,0.45597484707832336 +trm_multi4_best,8,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8949999809265137,0.9600179195404053,0.9436666369438171,0.8650000095367432,7.159999847412109,2.2588493824005127,0.056333333253860474,0.8650000095367432,0.8939999938011169,0.9836875200271606,0.9668158292770386,0.14701257646083832,0.47484275698661804 +trm_multi4_best,8,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8958333134651184,0.9602567553520203,0.9449999928474426,0.8643333315849304,7.166666507720947,2.251863479614258,0.054999999701976776,0.8643333315849304,0.8939999938011169,0.9831282496452332,0.9656972289085388,0.1595911979675293,0.49685534834861755 +trm_multi4_best,12,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0 +trm_multi4_best,12,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8956666588783264,0.9604059457778931,0.9129999876022339,0.887333333492279,7.165333271026611,2.3898391723632812,0.08699999749660492,0.887333333492279,0.8939999938011169,0.9954324960708618,0.9925428628921509,0.05424528196454048,0.19182389974594116 +trm_multi4_best,12,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8966249823570251,0.9608678221702576,0.9193333387374878,0.887333333492279,7.172999858856201,2.3672215938568115,0.0806666687130928,0.887333333492279,0.8939999938011169,0.995712161064148,0.9925428628921509,0.06092767417430878,0.24528302252292633 +trm_multi4_best,12,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8974583148956299,0.9608935713768005,0.9213333129882812,0.887666642665863,7.179666519165039,2.3568453788757324,0.07866666465997696,0.887666642665863,0.8939999938011169,0.9956655502319336,0.9925428628921509,0.069182388484478,0.26729559898376465 +trm_multi4_best,12,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8966249823570251,0.9606748819351196,0.9223333597183228,0.8870000243186951,7.172999858856201,2.3598878383636475,0.07766667008399963,0.8870000243186951,0.8939999938011169,0.9950596690177917,0.9921700358390808,0.06643081456422806,0.276729553937912 +trm_multi4_best,12,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8972083330154419,0.9610205292701721,0.9259999990463257,0.8849999904632568,7.177666664123535,2.338397264480591,0.07400000095367432,0.8849999904632568,0.8939999938011169,0.9935682415962219,0.9899328947067261,0.08451257646083832,0.31446540355682373 +trm_multi4_best,15,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0 +trm_multi4_best,15,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.9598419666290283,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0 +trm_multi4_best,15,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.9597874879837036,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0 +trm_multi4_best,15,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.9598518013954163,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0 +trm_multi4_best,15,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8941249847412109,0.9598509073257446,0.8946666717529297,0.8939999938011169,7.1529998779296875,2.4601335525512695,0.10533333569765091,0.8939999938011169,0.8939999938011169,1.0,1.0,0.001179245300590992,0.006289307959377766 +trm_multi4_best,15,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8942499756813049,0.9599588513374329,0.8956666588783264,0.893666684627533,7.1539998054504395,2.456070899963379,0.10433333367109299,0.893666684627533,0.8939999938011169,0.9999067783355713,0.9996271729469299,0.003144653979688883,0.015723271295428276 +trm_multi4_final,0,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0 +trm_multi4_final,0,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8319166898727417,0.9341394901275635,0.9086666703224182,0.7463333606719971,6.655333518981934,2.654029369354248,0.09133332967758179,0.7463333606719971,0.8286666870117188,0.9579645991325378,0.8942075371742249,0.22227627038955688,0.4902723729610443 +trm_multi4_final,0,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8315416574478149,0.93404221534729,0.9143333435058594,0.7419999837875366,6.6523332595825195,2.6464054584503174,0.08566666394472122,0.7419999837875366,0.8286666870117188,0.9564561247825623,0.8897827863693237,0.22738327085971832,0.5252918004989624 +trm_multi4_final,0,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8301666378974915,0.9333322644233704,0.9143333435058594,0.7356666922569275,6.641333103179932,2.639954090118408,0.08566666394472122,0.7356666922569275,0.8286666870117188,0.9536906480789185,0.879324197769165,0.23273345828056335,0.5272373557090759 +trm_multi4_final,0,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8319166898727417,0.934055507183075,0.9213333129882812,0.7239999771118164,6.655333518981934,2.594970464706421,0.07866666465997696,0.7239999771118164,0.8286666870117188,0.9528358578681946,0.8688656687736511,0.2470817118883133,0.5661478638648987 +trm_multi4_final,0,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8354583382606506,0.9354007244110107,0.9269999861717224,0.7246666550636292,6.683666706085205,2.5519142150878906,0.0729999989271164,0.7246666550636292,0.8286666870117188,0.9477574229240417,0.861625075340271,0.29231518507003784,0.5992217659950256 +trm_multi4_final,4,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0 +trm_multi4_final,4,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8335833549499512,0.9348888397216797,0.8993333578109741,0.7756666541099548,6.668666839599609,2.732560157775879,0.10066666454076767,0.7756666541099548,0.8286666870117188,0.970585286617279,0.9336283206939697,0.17096303403377533,0.43968871235847473 +trm_multi4_final,4,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8352500200271606,0.9354547262191772,0.9020000100135803,0.7749999761581421,6.682000160217285,2.7150585651397705,0.09799999743700027,0.7749999761581421,0.8286666870117188,0.971238911151886,0.9328238368034363,0.17752918601036072,0.4494163393974304 +trm_multi4_final,4,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8327916860580444,0.9343785643577576,0.8989999890327454,0.7720000147819519,6.6623334884643555,2.7240254878997803,0.10100000351667404,0.7720000147819519,0.8286666870117188,0.9690265655517578,0.9292035102844238,0.17388132214546204,0.4319066107273102 +trm_multi4_final,4,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8316666483879089,0.9340242743492126,0.8986666798591614,0.7730000019073486,6.6533331871032715,2.7423510551452637,0.10133333504199982,0.7730000019073486,0.8286666870117188,0.9677695035934448,0.9267899990081787,0.17339494824409485,0.4319066107273102 +trm_multi4_final,4,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8324583172798157,0.9344562888145447,0.8989999890327454,0.7680000066757202,6.659666538238525,2.7232038974761963,0.10100000351667404,0.7680000066757202,0.8286666870117188,0.9691271185874939,0.9239742755889893,0.1714494228363037,0.4319066107273102 +trm_multi4_final,8,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0 +trm_multi4_final,8,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8332499861717224,0.934550940990448,0.8849999904632568,0.8013333082199097,6.665999889373779,2.8155124187469482,0.11500000208616257,0.8013333082199097,0.8286666870117188,0.9811444282531738,0.9662107825279236,0.11794747412204742,0.344357967376709 +trm_multi4_final,8,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8323749899864197,0.9341363906860352,0.8830000162124634,0.8016666769981384,6.658999919891357,2.8219470977783203,0.11699999868869781,0.8016666769981384,0.8286666870117188,0.9805410504341125,0.967015266418457,0.1157587543129921,0.3346303403377533 +trm_multi4_final,8,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8352916836738586,0.9354629516601562,0.8856666684150696,0.8016666769981384,6.682333469390869,2.7959651947021484,0.11433333158493042,0.8016666769981384,0.8286666870117188,0.9822506308555603,0.9666130542755127,0.12451361864805222,0.344357967376709 +trm_multi4_final,8,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8335000276565552,0.9347721338272095,0.890333354473114,0.7993333339691162,6.668000221252441,2.803647994995117,0.10966666787862778,0.7993333339691162,0.8286666870117188,0.9798873662948608,0.9625905156135559,0.1254863739013672,0.3696497976779938 +trm_multi4_final,8,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8331666588783264,0.9347808957099915,0.8916666507720947,0.7960000038146973,6.665333271026611,2.7949953079223633,0.10833333432674408,0.7960000038146973,0.8286666870117188,0.9781777858734131,0.9593724608421326,0.13180933892726898,0.38910505175590515 +trm_multi4_final,12,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0 +trm_multi4_final,12,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8307916522026062,0.9338425993919373,0.8569999933242798,0.8209999799728394,6.64633321762085,2.936537742614746,0.14300000667572021,0.8209999799728394,0.8286666870117188,0.9936142563819885,0.9907481670379639,0.043287936598062515,0.1750972718000412 +trm_multi4_final,12,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8307499885559082,0.9337176084518433,0.8603333234786987,0.8209999799728394,6.645999908447266,2.9328060150146484,0.13966666162014008,0.8209999799728394,0.8286666870117188,0.9935137033462524,0.9907481670379639,0.04353112727403641,0.18871594965457916 +trm_multi4_final,12,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8307499885559082,0.9339007139205933,0.8583333492279053,0.8203333616256714,6.645999908447266,2.93530535697937,0.14166666567325592,0.8203333616256714,0.8286666870117188,0.993463397026062,0.9899436831474304,0.0437743179500103,0.17898832261562347 +trm_multi4_final,12,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.831166684627533,0.9338744282722473,0.8616666793823242,0.8190000057220459,6.649333477020264,2.921477794647217,0.13833333551883698,0.8190000057220459,0.8286666870117188,0.9930108785629272,0.9883346557617188,0.04839494079351425,0.20428015291690826 +trm_multi4_final,12,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8309583067893982,0.9341686964035034,0.8683333396911621,0.8149999976158142,6.6476664543151855,2.9015274047851562,0.1316666603088379,0.8149999976158142,0.8286666870117188,0.9908487796783447,0.9835076332092285,0.057636186480522156,0.24319066107273102 +trm_multi4_final,15,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0 +trm_multi4_final,15,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9330503940582275,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0 +trm_multi4_final,15,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8287083506584167,0.9331271052360535,0.8289999961853027,0.8286666870117188,6.629666805267334,3.0137219429016113,0.17100000381469727,0.8286666870117188,0.8286666870117188,1.0,1.0,0.00024319066142197698,0.0019455252913758159 +trm_multi4_final,15,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286250233650208,0.9330225586891174,0.8289999961853027,0.828000009059906,6.629000186920166,3.0135293006896973,0.17100000381469727,0.828000009059906,0.8286666870117188,0.9998994469642639,0.9991955161094666,0.00024319066142197698,0.0019455252913758159 +trm_multi4_final,15,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8287500143051147,0.9331192970275879,0.8306666612625122,0.8276666402816772,6.630000114440918,3.009169340133667,0.1693333387374878,0.8276666402816772,0.8286666870117188,0.999698281288147,0.9987932443618774,0.0019455252913758159,0.011673151515424252 +trm_multi4_final,15,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8292916417121887,0.9331137537956238,0.8330000042915344,0.8276666402816772,6.63433313369751,3.0008811950683594,0.16699999570846558,0.8276666402816772,0.8286666870117188,0.9995977282524109,0.9987932443618774,0.0055933850817382336,0.02529182843863964 diff --git a/late_perturb_robustness/smoke_baseline.summary.csv b/late_perturb_robustness/smoke_baseline.summary.csv new file mode 100644 index 0000000..ec4de26 --- /dev/null +++ b/late_perturb_robustness/smoke_baseline.summary.csv @@ -0,0 +1,5 @@ +label,perturb_after,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,zero_frac,full_frac,clean_acc,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail +smoke_baseline,0,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9614197611808777,0.875,0.875,1.75,0.6614378094673157,0.125,0.875,0.875,1.0,1.0,0.0,0.0 +smoke_baseline,0,0.01,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.9375,0.9834104776382446,1.0,0.875,1.875,0.33071890473365784,0.0,0.875,0.875,0.9642857313156128,0.9285714030265808,0.75,1.0 +smoke_baseline,8,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9614197611808777,0.875,0.875,1.75,0.6614378094673157,0.125,0.875,0.875,1.0,1.0,0.0,0.0 +smoke_baseline,8,0.01,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.90625,0.96875,0.9375,0.875,1.8125,0.5266343355178833,0.0625,0.875,0.875,1.0,1.0,0.25,0.5 diff --git a/late_perturb_robustness/smoke_baseline_expandedclean.summary.csv b/late_perturb_robustness/smoke_baseline_expandedclean.summary.csv new file mode 100644 index 0000000..ffe22cf --- /dev/null +++ b/late_perturb_robustness/smoke_baseline_expandedclean.summary.csv @@ -0,0 +1,5 @@ +label,perturb_after,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,zero_frac,full_frac,clean_acc,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail +smoke_baseline_expandedclean,0,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9614197611808777,0.875,0.875,1.75,0.6614378094673157,0.125,0.875,0.875,1.0,1.0,0.0,0.0 +smoke_baseline_expandedclean,0,0.01,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.9375,0.9834104776382446,1.0,0.875,1.875,0.33071890473365784,0.0,0.875,0.875,0.9642857313156128,0.9285714030265808,0.75,1.0 +smoke_baseline_expandedclean,8,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9614197611808777,0.875,0.875,1.75,0.6614378094673157,0.125,0.875,0.875,1.0,1.0,0.0,0.0 +smoke_baseline_expandedclean,8,0.01,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.90625,0.96875,0.9375,0.875,1.8125,0.5266343355178833,0.0625,0.875,0.875,1.0,1.0,0.25,0.5 diff --git a/late_perturb_robustness/smoke_baseline_fastclean.summary.csv b/late_perturb_robustness/smoke_baseline_fastclean.summary.csv new file mode 100644 index 0000000..fb4e3a6 --- /dev/null +++ b/late_perturb_robustness/smoke_baseline_fastclean.summary.csv @@ -0,0 +1,5 @@ +label,perturb_after,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,zero_frac,full_frac,clean_acc,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail +smoke_baseline_fastclean,0,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9614197611808777,0.875,0.875,1.75,0.6614378094673157,0.125,0.875,0.9375,0.9333333373069763,0.9333333373069763,0.0,0.0 +smoke_baseline_fastclean,0,0.01,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.9375,0.9834104776382446,1.0,0.875,1.875,0.33071890473365784,0.0,0.875,0.9375,0.9333333373069763,0.8666666746139526,1.0,1.0 +smoke_baseline_fastclean,8,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9614197611808777,0.875,0.875,1.75,0.6614378094673157,0.125,0.875,0.9375,0.9333333373069763,0.9333333373069763,0.0,0.0 +smoke_baseline_fastclean,8,0.01,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.90625,0.96875,0.9375,0.875,1.8125,0.5266343355178833,0.0625,0.875,0.9375,0.9666666388511658,0.9333333373069763,0.0,0.0 diff --git a/late_perturb_robustness/smoke_plots/late_perturb_robustness_combined.csv b/late_perturb_robustness/smoke_plots/late_perturb_robustness_combined.csv new file mode 100644 index 0000000..ec4de26 --- /dev/null +++ b/late_perturb_robustness/smoke_plots/late_perturb_robustness_combined.csv @@ -0,0 +1,5 @@ +label,perturb_after,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,zero_frac,full_frac,clean_acc,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail +smoke_baseline,0,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9614197611808777,0.875,0.875,1.75,0.6614378094673157,0.125,0.875,0.875,1.0,1.0,0.0,0.0 +smoke_baseline,0,0.01,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.9375,0.9834104776382446,1.0,0.875,1.875,0.33071890473365784,0.0,0.875,0.875,0.9642857313156128,0.9285714030265808,0.75,1.0 +smoke_baseline,8,0.0,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.875,0.9614197611808777,0.875,0.875,1.75,0.6614378094673157,0.125,0.875,0.875,1.0,1.0,0.0,0.0 +smoke_baseline,8,0.01,16,2,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.90625,0.96875,0.9375,0.875,1.8125,0.5266343355178833,0.0625,0.875,0.875,1.0,1.0,0.25,0.5 diff --git a/late_perturb_robustness/trm_baseline_best_step58590_n3000_k8_late.summary.csv b/late_perturb_robustness/trm_baseline_best_step58590_n3000_k8_late.summary.csv new file mode 100644 index 0000000..9f376be --- /dev/null +++ b/late_perturb_robustness/trm_baseline_best_step58590_n3000_k8_late.summary.csv @@ -0,0 +1,31 @@ +label,perturb_after,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,zero_frac,full_frac,clean_acc,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail +trm_baseline_best,0,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0 +trm_baseline_best,0,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9504892826080322,0.9483333230018616,0.7639999985694885,6.935999870300293,2.303454875946045,0.05166666582226753,0.7639999985694885,0.8669999837875366,0.9578046798706055,0.875048041343689,0.2750626504421234,0.6315789222717285 +trm_baseline_best,0,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8684166669845581,0.9509722590446472,0.9456666707992554,0.7683333158493042,6.947333335876465,2.3008460998535156,0.05433333292603493,0.7683333158493042,0.8669999837875366,0.9602556824684143,0.8800461292266846,0.2697368562221527,0.6090225577354431 +trm_baseline_best,0,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9504145383834839,0.9463333487510681,0.765333354473114,6.935999870300293,2.3080809116363525,0.05366666615009308,0.765333354473114,0.8669999837875366,0.9579008221626282,0.8785082697868347,0.2744360864162445,0.6190476417541504 +trm_baseline_best,0,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8675000071525574,0.9505621790885925,0.9496666789054871,0.7616666555404663,6.940000057220459,2.2861320972442627,0.050333332270383835,0.7616666555404663,0.8669999837875366,0.9567474126815796,0.8723567724227905,0.2857142984867096,0.6390977501869202 +trm_baseline_best,0,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8682083487510681,0.9508457183837891,0.9506666660308838,0.7593333125114441,6.945666790008545,2.265108108520508,0.04933333396911621,0.7593333125114441,0.8669999837875366,0.956122636795044,0.8696655035018921,0.2951127886772156,0.6441102623939514 +trm_baseline_best,4,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0 +trm_baseline_best,4,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8666250109672546,0.9501558542251587,0.9350000023841858,0.8009999990463257,6.933000087738037,2.4135406017303467,0.06499999761581421,0.8009999990463257,0.8669999837875366,0.9687620401382446,0.9211841821670532,0.20081453025341034,0.5263158082962036 +trm_baseline_best,4,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8679583072662354,0.950590968132019,0.9359999895095825,0.7996666431427002,6.943666458129883,2.404542922973633,0.06400000303983688,0.7996666431427002,0.8669999837875366,0.970588207244873,0.9200307726860046,0.19893483817577362,0.5338345766067505 +trm_baseline_best,4,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8680416941642761,0.9508261680603027,0.9399999976158142,0.8013333082199097,6.944333553314209,2.3910739421844482,0.05999999865889549,0.8013333082199097,0.8669999837875366,0.9685697555541992,0.9227220416069031,0.21271929144859314,0.5664160251617432 +trm_baseline_best,4,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8660833239555359,0.9499126076698303,0.934333324432373,0.7953333258628845,6.928666591644287,2.413485288619995,0.06566666811704636,0.7953333258628845,0.8669999837875366,0.967608630657196,0.915032684803009,0.20426064729690552,0.523809552192688 +trm_baseline_best,4,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8659999966621399,0.9500890970230103,0.9403333067893982,0.7973333597183228,6.927999973297119,2.399336576461792,0.05966666713356972,0.7973333597183228,0.8669999837875366,0.9667435884475708,0.9161860942840576,0.20927318930625916,0.5664160251617432 +trm_baseline_best,8,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0 +trm_baseline_best,8,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669583201408386,0.9507402181625366,0.9226666688919067,0.8306666612625122,6.935666561126709,2.5147950649261475,0.07733333110809326,0.8306666612625122,0.8669999837875366,0.9784217476844788,0.9573240876197815,0.14035087823867798,0.4436090290546417 +trm_baseline_best,8,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8678333163261414,0.9507989287376404,0.9246666431427002,0.831333339214325,6.942666530609131,2.5057361125946045,0.07533333450555801,0.831333339214325,0.8669999837875366,0.9795271158218384,0.9588619470596313,0.13972431421279907,0.451127827167511 +trm_baseline_best,8,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8676666617393494,0.9505447745323181,0.9229999780654907,0.8306666612625122,6.941333293914795,2.51287841796875,0.07699999958276749,0.8306666612625122,0.8669999837875366,0.980199933052063,0.9573240876197815,0.13408520817756653,0.4411027431488037 +trm_baseline_best,8,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8682083487510681,0.950954258441925,0.9236666560173035,0.8286666870117188,6.945666790008545,2.4972081184387207,0.07633333653211594,0.8286666870117188,0.8669999837875366,0.9788542985916138,0.9554017782211304,0.14692983031272888,0.448621541261673 +trm_baseline_best,8,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8687083125114441,0.9511321783065796,0.9286666512489319,0.824999988079071,6.949666500091553,2.46991229057312,0.07133333384990692,0.824999988079071,0.8669999837875366,0.9784698486328125,0.9504036903381348,0.15319548547267914,0.48120301961898804 +trm_baseline_best,12,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0 +trm_baseline_best,12,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8673333525657654,0.9506542682647705,0.8926666378974915,0.856333315372467,6.938666820526123,2.641887903213501,0.10733333230018616,0.856333315372467,0.8669999837875366,0.9928392767906189,0.9873125553131104,0.049185462296009064,0.2005012482404709 +trm_baseline_best,12,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8668749928474426,0.9505452513694763,0.8896666765213013,0.8560000061988831,6.934999942779541,2.651435136795044,0.11033333092927933,0.8560000061988831,0.8669999837875366,0.9924548268318176,0.9869281053543091,0.048245612531900406,0.18045112490653992 +trm_baseline_best,12,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669583201408386,0.9506661891937256,0.8913333415985107,0.8556666374206543,6.935666561126709,2.6479289531707764,0.10866666585206985,0.8556666374206543,0.8669999837875366,0.991782009601593,0.9869281053543091,0.05325814709067345,0.19548872113227844 +trm_baseline_best,12,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8671666383743286,0.9506459832191467,0.8989999890327454,0.8539999723434448,6.937333106994629,2.629715919494629,0.10100000351667404,0.8539999723434448,0.8669999837875366,0.991733968257904,0.985005795955658,0.055137842893600464,0.24561403691768646 +trm_baseline_best,12,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8677916526794434,0.950908362865448,0.9043333530426025,0.8516666889190674,6.942333221435547,2.602372169494629,0.09566666930913925,0.8516666889190674,0.8669999837875366,0.9901480078697205,0.9823144674301147,0.07017543911933899,0.2882205545902252 +trm_baseline_best,15,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.950645923614502,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0 +trm_baseline_best,15,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9506445527076721,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0 +trm_baseline_best,15,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9506717920303345,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0 +trm_baseline_best,15,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8669999837875366,0.9506852030754089,0.8669999837875366,0.8669999837875366,6.935999870300293,2.71659779548645,0.13300000131130219,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0,0.0 +trm_baseline_best,15,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8670833110809326,0.9507201910018921,0.8673333525657654,0.8669999837875366,6.936666488647461,2.7151405811309814,0.1326666623353958,0.8669999837875366,0.8669999837875366,1.0,1.0,0.0006265664123930037,0.002506265649572015 +trm_baseline_best,15,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro,step_58590,both,gaussian,0.8670833110809326,0.9508404731750488,0.8686666488647461,0.8656666874885559,6.936666488647461,2.7110862731933594,0.1313333362340927,0.8656666874885559,0.8669999837875366,0.9997597336769104,0.9984621405601501,0.0021929824724793434,0.01253132801502943 diff --git a/late_perturb_robustness/trm_multi4_best_step35805_n3000_k8_late.summary.csv b/late_perturb_robustness/trm_multi4_best_step35805_n3000_k8_late.summary.csv new file mode 100644 index 0000000..01a8526 --- /dev/null +++ b/late_perturb_robustness/trm_multi4_best_step35805_n3000_k8_late.summary.csv @@ -0,0 +1,31 @@ +label,perturb_after,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,zero_frac,full_frac,clean_acc,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail +trm_multi4_best,0,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0 +trm_multi4_best,0,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8957083225250244,0.9601898789405823,0.9583333134651184,0.8119999766349792,7.165666580200195,2.081879138946533,0.0416666679084301,0.8119999766349792,0.8939999938011169,0.9684004187583923,0.9030573964118958,0.28262579441070557,0.6194968819618225 +trm_multi4_best,0,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8957916498184204,0.9602490067481995,0.9606666564941406,0.8130000233650208,7.166333198547363,2.062037706375122,0.03933333232998848,0.8130000233650208,0.8939999938011169,0.9666293859481812,0.9038031101226807,0.2983490526676178,0.6352201104164124 +trm_multi4_best,0,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8947083353996277,0.9597618579864502,0.9599999785423279,0.8063333630561829,7.1576666831970215,2.0646567344665527,0.03999999910593033,0.8063333630561829,0.8939999938011169,0.9667226076126099,0.8963460326194763,0.28734275698661804,0.6320754885673523 +trm_multi4_best,0,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8953750133514404,0.9600813984870911,0.9603333473205566,0.8009999990463257,7.163000106811523,2.049495220184326,0.03966666758060455,0.8009999990463257,0.8939999938011169,0.9666759967803955,0.8918717503547668,0.29402515292167664,0.6446540951728821 +trm_multi4_best,0,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8950416445732117,0.9601003527641296,0.9629999995231628,0.7983333468437195,7.160333156585693,2.0425376892089844,0.03700000047683716,0.7983333468437195,0.8939999938011169,0.9635066986083984,0.8851603269577026,0.3176100552082062,0.6666666865348816 +trm_multi4_best,4,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0 +trm_multi4_best,4,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8946250081062317,0.9599500894546509,0.9473333358764648,0.8433333039283752,7.1570000648498535,2.1906659603118896,0.052666667848825455,0.8433333039283752,0.8939999938011169,0.9753448963165283,0.9392244815826416,0.21383647620677948,0.5251572132110596 +trm_multi4_best,4,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8962500095367432,0.9606265425682068,0.9523333311080933,0.843999981880188,7.170000076293945,2.1701996326446533,0.04766666516661644,0.843999981880188,0.8939999938011169,0.976416826248169,0.9410887360572815,0.22012577950954437,0.5691823959350586 +trm_multi4_best,4,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8963750004768372,0.9604902863502502,0.9483333230018616,0.8446666598320007,7.171000003814697,2.171119213104248,0.05166666582226753,0.8446666598320007,0.8939999938011169,0.9760906100273132,0.9410887360572815,0.22405660152435303,0.5314465165138245 +trm_multi4_best,4,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8968333601951599,0.9606810808181763,0.9506666660308838,0.8403333425521851,7.174666881561279,2.1587400436401367,0.04933333396911621,0.8403333425521851,0.8939999938011169,0.9759973883628845,0.9377330541610718,0.2291666716337204,0.544025182723999 +trm_multi4_best,4,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8944583535194397,0.9597752094268799,0.9526666402816772,0.8326666951179504,7.155666828155518,2.1656641960144043,0.047333333641290665,0.8326666951179504,0.8939999938011169,0.9737602472305298,0.9284116625785828,0.22562892735004425,0.5628930926322937 +trm_multi4_best,8,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0 +trm_multi4_best,8,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8962083458900452,0.9605055451393127,0.9399999976158142,0.8696666955947876,7.169666767120361,2.2701423168182373,0.05999999865889549,0.8696666955947876,0.8939999938011169,0.9840604066848755,0.9709172248840332,0.15526729822158813,0.45597484707832336 +trm_multi4_best,8,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8970000147819519,0.9608615636825562,0.9416666626930237,0.8696666955947876,7.176000118255615,2.254556179046631,0.05833333358168602,0.8696666955947876,0.8939999938011169,0.9844798445701599,0.9709172248840332,0.1591981202363968,0.46855345368385315 +trm_multi4_best,8,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8955416679382324,0.9602766633033752,0.9399999976158142,0.8673333525657654,7.164333343505859,2.2744951248168945,0.05999999865889549,0.8673333525657654,0.8939999938011169,0.9835943579673767,0.9690529704093933,0.1529088020324707,0.45597484707832336 +trm_multi4_best,8,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8949999809265137,0.9600179195404053,0.9436666369438171,0.8650000095367432,7.159999847412109,2.2588493824005127,0.056333333253860474,0.8650000095367432,0.8939999938011169,0.9836875200271606,0.9668158292770386,0.14701257646083832,0.47484275698661804 +trm_multi4_best,8,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8958333134651184,0.9602567553520203,0.9449999928474426,0.8643333315849304,7.166666507720947,2.251863479614258,0.054999999701976776,0.8643333315849304,0.8939999938011169,0.9831282496452332,0.9656972289085388,0.1595911979675293,0.49685534834861755 +trm_multi4_best,12,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0 +trm_multi4_best,12,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8956666588783264,0.9604059457778931,0.9129999876022339,0.887333333492279,7.165333271026611,2.3898391723632812,0.08699999749660492,0.887333333492279,0.8939999938011169,0.9954324960708618,0.9925428628921509,0.05424528196454048,0.19182389974594116 +trm_multi4_best,12,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8966249823570251,0.9608678221702576,0.9193333387374878,0.887333333492279,7.172999858856201,2.3672215938568115,0.0806666687130928,0.887333333492279,0.8939999938011169,0.995712161064148,0.9925428628921509,0.06092767417430878,0.24528302252292633 +trm_multi4_best,12,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8974583148956299,0.9608935713768005,0.9213333129882812,0.887666642665863,7.179666519165039,2.3568453788757324,0.07866666465997696,0.887666642665863,0.8939999938011169,0.9956655502319336,0.9925428628921509,0.069182388484478,0.26729559898376465 +trm_multi4_best,12,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8966249823570251,0.9606748819351196,0.9223333597183228,0.8870000243186951,7.172999858856201,2.3598878383636475,0.07766667008399963,0.8870000243186951,0.8939999938011169,0.9950596690177917,0.9921700358390808,0.06643081456422806,0.276729553937912 +trm_multi4_best,12,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8972083330154419,0.9610205292701721,0.9259999990463257,0.8849999904632568,7.177666664123535,2.338397264480591,0.07400000095367432,0.8849999904632568,0.8939999938011169,0.9935682415962219,0.9899328947067261,0.08451257646083832,0.31446540355682373 +trm_multi4_best,15,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.959773600101471,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0 +trm_multi4_best,15,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.9598419666290283,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0 +trm_multi4_best,15,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.9597874879837036,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0 +trm_multi4_best,15,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8939999938011169,0.9598518013954163,0.8939999938011169,0.8939999938011169,7.1519999504089355,2.462700843811035,0.10599999874830246,0.8939999938011169,0.8939999938011169,1.0,1.0,0.0,0.0 +trm_multi4_best,15,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8941249847412109,0.9598509073257446,0.8946666717529297,0.8939999938011169,7.1529998779296875,2.4601335525512695,0.10533333569765091,0.8939999938011169,0.8939999938011169,1.0,1.0,0.001179245300590992,0.006289307959377766 +trm_multi4_best,15,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_35805,both,gaussian,0.8942499756813049,0.9599588513374329,0.8956666588783264,0.893666684627533,7.1539998054504395,2.456070899963379,0.10433333367109299,0.893666684627533,0.8939999938011169,0.9999067783355713,0.9996271729469299,0.003144653979688883,0.015723271295428276 diff --git a/late_perturb_robustness/trm_multi4_final_step65100_n3000_k8_late.summary.csv b/late_perturb_robustness/trm_multi4_final_step65100_n3000_k8_late.summary.csv new file mode 100644 index 0000000..b4d80d6 --- /dev/null +++ b/late_perturb_robustness/trm_multi4_final_step65100_n3000_k8_late.summary.csv @@ -0,0 +1,31 @@ +label,perturb_after,sigma,n_samples,rollouts,ckpt_root,ckpt_name,perturb,noise_distribution,mean_rollout_exact,mean_rollout_token_acc,pass_at_k,all_k,correct_count_mean,correct_count_std,zero_frac,full_frac,clean_acc,retain_mean_on_clean_success,allK_on_clean_success,rescue_mean_on_clean_fail,passK_on_clean_fail +trm_multi4_final,0,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0 +trm_multi4_final,0,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8319166898727417,0.9341394901275635,0.9086666703224182,0.7463333606719971,6.655333518981934,2.654029369354248,0.09133332967758179,0.7463333606719971,0.8286666870117188,0.9579645991325378,0.8942075371742249,0.22227627038955688,0.4902723729610443 +trm_multi4_final,0,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8315416574478149,0.93404221534729,0.9143333435058594,0.7419999837875366,6.6523332595825195,2.6464054584503174,0.08566666394472122,0.7419999837875366,0.8286666870117188,0.9564561247825623,0.8897827863693237,0.22738327085971832,0.5252918004989624 +trm_multi4_final,0,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8301666378974915,0.9333322644233704,0.9143333435058594,0.7356666922569275,6.641333103179932,2.639954090118408,0.08566666394472122,0.7356666922569275,0.8286666870117188,0.9536906480789185,0.879324197769165,0.23273345828056335,0.5272373557090759 +trm_multi4_final,0,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8319166898727417,0.934055507183075,0.9213333129882812,0.7239999771118164,6.655333518981934,2.594970464706421,0.07866666465997696,0.7239999771118164,0.8286666870117188,0.9528358578681946,0.8688656687736511,0.2470817118883133,0.5661478638648987 +trm_multi4_final,0,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8354583382606506,0.9354007244110107,0.9269999861717224,0.7246666550636292,6.683666706085205,2.5519142150878906,0.0729999989271164,0.7246666550636292,0.8286666870117188,0.9477574229240417,0.861625075340271,0.29231518507003784,0.5992217659950256 +trm_multi4_final,4,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0 +trm_multi4_final,4,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8335833549499512,0.9348888397216797,0.8993333578109741,0.7756666541099548,6.668666839599609,2.732560157775879,0.10066666454076767,0.7756666541099548,0.8286666870117188,0.970585286617279,0.9336283206939697,0.17096303403377533,0.43968871235847473 +trm_multi4_final,4,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8352500200271606,0.9354547262191772,0.9020000100135803,0.7749999761581421,6.682000160217285,2.7150585651397705,0.09799999743700027,0.7749999761581421,0.8286666870117188,0.971238911151886,0.9328238368034363,0.17752918601036072,0.4494163393974304 +trm_multi4_final,4,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8327916860580444,0.9343785643577576,0.8989999890327454,0.7720000147819519,6.6623334884643555,2.7240254878997803,0.10100000351667404,0.7720000147819519,0.8286666870117188,0.9690265655517578,0.9292035102844238,0.17388132214546204,0.4319066107273102 +trm_multi4_final,4,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8316666483879089,0.9340242743492126,0.8986666798591614,0.7730000019073486,6.6533331871032715,2.7423510551452637,0.10133333504199982,0.7730000019073486,0.8286666870117188,0.9677695035934448,0.9267899990081787,0.17339494824409485,0.4319066107273102 +trm_multi4_final,4,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8324583172798157,0.9344562888145447,0.8989999890327454,0.7680000066757202,6.659666538238525,2.7232038974761963,0.10100000351667404,0.7680000066757202,0.8286666870117188,0.9691271185874939,0.9239742755889893,0.1714494228363037,0.4319066107273102 +trm_multi4_final,8,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0 +trm_multi4_final,8,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8332499861717224,0.934550940990448,0.8849999904632568,0.8013333082199097,6.665999889373779,2.8155124187469482,0.11500000208616257,0.8013333082199097,0.8286666870117188,0.9811444282531738,0.9662107825279236,0.11794747412204742,0.344357967376709 +trm_multi4_final,8,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8323749899864197,0.9341363906860352,0.8830000162124634,0.8016666769981384,6.658999919891357,2.8219470977783203,0.11699999868869781,0.8016666769981384,0.8286666870117188,0.9805410504341125,0.967015266418457,0.1157587543129921,0.3346303403377533 +trm_multi4_final,8,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8352916836738586,0.9354629516601562,0.8856666684150696,0.8016666769981384,6.682333469390869,2.7959651947021484,0.11433333158493042,0.8016666769981384,0.8286666870117188,0.9822506308555603,0.9666130542755127,0.12451361864805222,0.344357967376709 +trm_multi4_final,8,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8335000276565552,0.9347721338272095,0.890333354473114,0.7993333339691162,6.668000221252441,2.803647994995117,0.10966666787862778,0.7993333339691162,0.8286666870117188,0.9798873662948608,0.9625905156135559,0.1254863739013672,0.3696497976779938 +trm_multi4_final,8,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8331666588783264,0.9347808957099915,0.8916666507720947,0.7960000038146973,6.665333271026611,2.7949953079223633,0.10833333432674408,0.7960000038146973,0.8286666870117188,0.9781777858734131,0.9593724608421326,0.13180933892726898,0.38910505175590515 +trm_multi4_final,12,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0 +trm_multi4_final,12,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8307916522026062,0.9338425993919373,0.8569999933242798,0.8209999799728394,6.64633321762085,2.936537742614746,0.14300000667572021,0.8209999799728394,0.8286666870117188,0.9936142563819885,0.9907481670379639,0.043287936598062515,0.1750972718000412 +trm_multi4_final,12,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8307499885559082,0.9337176084518433,0.8603333234786987,0.8209999799728394,6.645999908447266,2.9328060150146484,0.13966666162014008,0.8209999799728394,0.8286666870117188,0.9935137033462524,0.9907481670379639,0.04353112727403641,0.18871594965457916 +trm_multi4_final,12,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8307499885559082,0.9339007139205933,0.8583333492279053,0.8203333616256714,6.645999908447266,2.93530535697937,0.14166666567325592,0.8203333616256714,0.8286666870117188,0.993463397026062,0.9899436831474304,0.0437743179500103,0.17898832261562347 +trm_multi4_final,12,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.831166684627533,0.9338744282722473,0.8616666793823242,0.8190000057220459,6.649333477020264,2.921477794647217,0.13833333551883698,0.8190000057220459,0.8286666870117188,0.9930108785629272,0.9883346557617188,0.04839494079351425,0.20428015291690826 +trm_multi4_final,12,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8309583067893982,0.9341686964035034,0.8683333396911621,0.8149999976158142,6.6476664543151855,2.9015274047851562,0.1316666603088379,0.8149999976158142,0.8286666870117188,0.9908487796783447,0.9835076332092285,0.057636186480522156,0.24319066107273102 +trm_multi4_final,15,0.0,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9328684210777283,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0 +trm_multi4_final,15,0.001,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286666870117188,0.9330503940582275,0.8286666870117188,0.8286666870117188,6.62933349609375,3.014399766921997,0.17133332788944244,0.8286666870117188,0.8286666870117188,1.0,1.0,0.0,0.0 +trm_multi4_final,15,0.003,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8287083506584167,0.9331271052360535,0.8289999961853027,0.8286666870117188,6.629666805267334,3.0137219429016113,0.17100000381469727,0.8286666870117188,0.8286666870117188,1.0,1.0,0.00024319066142197698,0.0019455252913758159 +trm_multi4_final,15,0.01,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8286250233650208,0.9330225586891174,0.8289999961853027,0.828000009059906,6.629000186920166,3.0135293006896973,0.17100000381469727,0.828000009059906,0.8286666870117188,0.9998994469642639,0.9991955161094666,0.00024319066142197698,0.0019455252913758159 +trm_multi4_final,15,0.03,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8287500143051147,0.9331192970275879,0.8306666612625122,0.8276666402816772,6.630000114440918,3.009169340133667,0.1693333387374878,0.8276666402816772,0.8286666870117188,0.999698281288147,0.9987932443618774,0.0019455252913758159,0.011673151515424252 +trm_multi4_final,15,0.1,3000,8,/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro,step_65100,both,gaussian,0.8292916417121887,0.9331137537956238,0.8330000042915344,0.8276666402816772,6.63433313369751,3.0008811950683594,0.16699999570846558,0.8276666402816772,0.8286666870117188,0.9995977282524109,0.9987932443618774,0.0055933850817382336,0.02529182843863964 diff --git a/late_perturb_robustness/watch_and_plot.sh b/late_perturb_robustness/watch_and_plot.sh new file mode 100755 index 0000000..5bb1d8a --- /dev/null +++ b/late_perturb_robustness/watch_and_plot.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT=/home/yurenh2/rrm +PY=/home/yurenh2/miniconda3/envs/rrm/bin/python +cd "${ROOT}" + +PIDS=( + research/flossing/late_perturb_robustness/logs/trm_baseline_best_step58590_n3000_k8_late.pid + research/flossing/late_perturb_robustness/logs/trm_multi4_best_step35805_n3000_k8_late.pid + research/flossing/late_perturb_robustness/logs/trm_multi4_final_step65100_n3000_k8_late.pid +) + +for pf in "${PIDS[@]}"; do + pid=$(cat "${pf}") + echo "watch ${pf}: ${pid}" + while kill -0 "${pid}" 2>/dev/null; do + sleep 60 + done + echo "done ${pf}: ${pid}" +done + +"${PY}" research/flossing/plot_late_perturb_robustness.py \ + --summaries \ + research/flossing/late_perturb_robustness/trm_baseline_best_step58590_n3000_k8_late.summary.csv \ + research/flossing/late_perturb_robustness/trm_multi4_best_step35805_n3000_k8_late.summary.csv \ + research/flossing/late_perturb_robustness/trm_multi4_final_step65100_n3000_k8_late.summary.csv \ + --out-dir research/flossing/late_perturb_robustness/plots \ + --slice-sigma 0.1 + +nvidia-smi --query-gpu=index,memory.used,memory.total,utilization.gpu --format=csv,noheader,nounits \ + > research/flossing/late_perturb_robustness/plots/final_gpu_status.txt diff --git a/launch_10k_queue.sh b/launch_10k_queue.sh new file mode 100755 index 0000000..8376ead --- /dev/null +++ b/launch_10k_queue.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +set -eo pipefail + +FLOSS_DIR="/home/yurenh2/rrm/research/flossing" +CONDA_SH="/home/yurenh2/miniconda3/etc/profile.d/conda.sh" +HRM_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python" +TRM_ROOT="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_singleGPU" + +wait_for_pid() { + local pid="$1" + if [[ "${pid}" == "0" ]]; then + return 0 + fi + while kill -0 "${pid}" 2>/dev/null; do + sleep 60 + done +} + +run_hrm_baseline() { + wait_for_pid "${1:-0}" + source "${CONDA_SH}" + conda activate rrm + cd "${FLOSS_DIR}" + CUDA_VISIBLE_DEVICES=0 python step3_train_with_rf.py \ + --ckpt-root "${HRM_ROOT}" \ + --ckpt-name step_26040 \ + --n-steps 10000 \ + --batch-size 8 \ + --lr 1e-5 \ + --alpha-rf 0 \ + --lambda-star 0 \ + --rf-mode volume_cf \ + --k-lyap 0 \ + --lyap-act-steps 4 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n 512 \ + --eval-batch-size 32 \ + --out step3_L_baseline_26040_fast_10k.json \ + > step3_L_baseline_26040_fast_10k.log 2>&1 +} + +run_hrm_volume() { + wait_for_pid "${1:-0}" + source "${CONDA_SH}" + conda activate rrm + cd "${FLOSS_DIR}" + CUDA_VISIBLE_DEVICES=1 python step3_train_with_rf.py \ + --ckpt-root "${HRM_ROOT}" \ + --ckpt-name step_26040 \ + --n-steps 10000 \ + --batch-size 8 \ + --lr 1e-5 \ + --alpha-rf 10 \ + --lambda-star -0.15 \ + --rf-mode volume_cf \ + --k-lyap 8 \ + --lyap-act-steps 4 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n 512 \ + --eval-batch-size 32 \ + --out step3_M_volume_cf_26040_lstar_neg015_k8_a10_10k.json \ + > step3_M_volume_cf_26040_lstar_neg015_k8_a10_10k.log 2>&1 +} + +run_trm_baseline() { + wait_for_pid "${1:-0}" + source "${CONDA_SH}" + conda activate rrm + cd "${FLOSS_DIR}" + CUDA_VISIBLE_DEVICES=2 python step5_train_trm_cf.py \ + --ckpt-root "${TRM_ROOT}" \ + --ckpt-name step_26041 \ + --n-steps 10000 \ + --batch-size 4 \ + --lr 1e-5 \ + --alpha-rf 0 \ + --lambda-star 0.02 \ + --rf-mode volume_cf \ + --k-lyap 0 \ + --lyap-act-steps 4 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n 512 \ + --eval-batch-size 32 \ + --out step5_L_trm_baseline_26041_batch4_fast_10k.json \ + > step5_L_trm_baseline_26041_batch4_fast_10k.log 2>&1 +} + +run_trm_volume() { + wait_for_pid "${1:-0}" + source "${CONDA_SH}" + conda activate rrm + cd "${FLOSS_DIR}" + CUDA_VISIBLE_DEVICES=3 python step5_train_trm_cf.py \ + --ckpt-root "${TRM_ROOT}" \ + --ckpt-name step_26041 \ + --n-steps 10000 \ + --batch-size 4 \ + --lr 1e-5 \ + --alpha-rf 10 \ + --lambda-star 0.02 \ + --rf-mode volume_cf \ + --k-lyap 4 \ + --lyap-act-steps 4 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n 512 \ + --eval-batch-size 32 \ + --out step5_M_trm_volume_cf_26041_lstar002_batch4_k4_a10_10k.json \ + > step5_M_trm_volume_cf_26041_lstar002_batch4_k4_a10_10k.log 2>&1 +} + +cmd="${1:?usage: launch_10k_queue.sh MODE [wait_pid]}" +wait_pid="${2:-0}" + +case "${cmd}" in + hrm_baseline) run_hrm_baseline "${wait_pid}" ;; + hrm_volume) run_hrm_volume "${wait_pid}" ;; + trm_baseline) run_trm_baseline "${wait_pid}" ;; + trm_volume) run_trm_volume "${wait_pid}" ;; + *) echo "unknown command: ${cmd}" >&2; exit 2 ;; +esac diff --git a/launch_dynamics_variants_queue.sh b/launch_dynamics_variants_queue.sh new file mode 100755 index 0000000..51076b1 --- /dev/null +++ b/launch_dynamics_variants_queue.sh @@ -0,0 +1,186 @@ +#!/usr/bin/env bash +set -eo pipefail + +FLOSS_DIR="/home/yurenh2/rrm/research/flossing" +CONDA_SH="/home/yurenh2/miniconda3/etc/profile.d/conda.sh" +HRM_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python" +TRM_ROOT="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_singleGPU" + +wait_for_pid() { + local pid="$1" + if [[ "${pid}" == "0" ]]; then + return 0 + fi + while kill -0 "${pid}" 2>/dev/null; do + sleep 60 + done +} + +activate_env() { + source "${CONDA_SH}" + conda activate rrm + cd "${FLOSS_DIR}" +} + +run_hrm_queue() { + wait_for_pid "${1:-0}" + activate_env + + CUDA_VISIBLE_DEVICES=0 python step7_interfloss.py \ + --model hrm \ + --ckpt-root "${HRM_ROOT}" \ + --ckpt-name step_26040 \ + --train-steps 10000 \ + --batch-size 8 \ + --train-lr 1e-5 \ + --floss-lr 1e-4 \ + --floss-steps 500 \ + --interfloss-at 0,500 \ + --floss-mode engelken_l2 \ + --lambda-star 0 \ + --k-lyap 8 \ + --lyap-act-steps 4 \ + --lyap-start-act 12 \ + --kl-beta 10 \ + --kl-replay-size 64 \ + --kl-batch-size 4 \ + --kl-temperature 1 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n 512 \ + --eval-batch-size 32 \ + --floss-log-every 10 \ + --out step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k.json \ + > step7_E_hrm_late_engelken_interfloss_kl10_start12_26040_k8_10k.log 2>&1 + + CUDA_VISIBLE_DEVICES=0 python step7_interfloss.py \ + --model hrm \ + --ckpt-root "${HRM_ROOT}" \ + --ckpt-name step_26040 \ + --train-steps 10000 \ + --batch-size 8 \ + --train-lr 1e-5 \ + --floss-lr 1e-4 \ + --floss-steps 500 \ + --interfloss-at 0,500 \ + --floss-mode volume_cf \ + --lambda-star -0.15 \ + --k-lyap 8 \ + --lyap-act-steps 4 \ + --kl-beta 10 \ + --kl-replay-size 64 \ + --kl-batch-size 4 \ + --kl-temperature 1 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n 512 \ + --eval-batch-size 32 \ + --floss-log-every 10 \ + --out step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k.json \ + > step7_G_hrm_volume_envelope_interfloss_kl10_lstar_neg015_26040_k8_10k.log 2>&1 + + CUDA_VISIBLE_DEVICES=0 python step8_basin_consistency.py \ + --model hrm \ + --ckpt-root "${HRM_ROOT}" \ + --ckpt-name step_26040 \ + --train-steps 10000 \ + --batch-size 8 \ + --lr 1e-5 \ + --consistency-beta 1 \ + --consistency-every 1 \ + --perturb-after 8 \ + --noise-std 0.02 \ + --kl-temperature 1 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n 512 \ + --eval-batch-size 32 \ + --out step8_A_hrm_basin_consistency_beta1_noise002_after8_26040_10k.json \ + > step8_A_hrm_basin_consistency_beta1_noise002_after8_26040_10k.log 2>&1 +} + +run_trm_queue() { + wait_for_pid "${1:-0}" + activate_env + + CUDA_VISIBLE_DEVICES=2 python step7_interfloss.py \ + --model trm \ + --ckpt-root "${TRM_ROOT}" \ + --ckpt-name step_26041 \ + --train-steps 10000 \ + --batch-size 4 \ + --train-lr 1e-5 \ + --floss-lr 1e-4 \ + --floss-steps 500 \ + --interfloss-at 0,500 \ + --floss-mode engelken_l2 \ + --lambda-star 0 \ + --k-lyap 4 \ + --lyap-act-steps 4 \ + --lyap-start-act 12 \ + --kl-beta 10 \ + --kl-replay-size 64 \ + --kl-batch-size 4 \ + --kl-temperature 1 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n 512 \ + --eval-batch-size 32 \ + --floss-log-every 10 \ + --out step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k.json \ + > step7_F_trm_late_engelken_interfloss_kl10_start12_26041_k4_batch4_10k.log 2>&1 + + CUDA_VISIBLE_DEVICES=2 python step7_interfloss.py \ + --model trm \ + --ckpt-root "${TRM_ROOT}" \ + --ckpt-name step_26041 \ + --train-steps 10000 \ + --batch-size 4 \ + --train-lr 1e-5 \ + --floss-lr 1e-4 \ + --floss-steps 500 \ + --interfloss-at 0,500 \ + --floss-mode volume_cf \ + --lambda-star 0.02 \ + --k-lyap 4 \ + --lyap-act-steps 4 \ + --kl-beta 10 \ + --kl-replay-size 64 \ + --kl-batch-size 4 \ + --kl-temperature 1 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n 512 \ + --eval-batch-size 32 \ + --floss-log-every 10 \ + --out step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k.json \ + > step7_H_trm_volume_envelope_interfloss_kl10_lstar002_26041_k4_batch4_10k.log 2>&1 + + CUDA_VISIBLE_DEVICES=2 python step8_basin_consistency.py \ + --model trm \ + --ckpt-root "${TRM_ROOT}" \ + --ckpt-name step_26041 \ + --train-steps 10000 \ + --batch-size 4 \ + --lr 1e-5 \ + --consistency-beta 1 \ + --consistency-every 1 \ + --perturb-after 8 \ + --noise-std 0.02 \ + --kl-temperature 1 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n 512 \ + --eval-batch-size 32 \ + --out step8_B_trm_basin_consistency_beta1_noise002_after8_26041_batch4_10k.json \ + > step8_B_trm_basin_consistency_beta1_noise002_after8_26041_batch4_10k.log 2>&1 +} + +cmd="${1:?usage: launch_dynamics_variants_queue.sh MODE [wait_pid]}" +wait_pid="${2:-0}" + +case "${cmd}" in + hrm) run_hrm_queue "${wait_pid}" ;; + trm) run_trm_queue "${wait_pid}" ;; + *) echo "unknown command: ${cmd}" >&2; exit 2 ;; +esac diff --git a/launch_engelken_paper_faithful_trm_queue.sh b/launch_engelken_paper_faithful_trm_queue.sh new file mode 100755 index 0000000..f0c979d --- /dev/null +++ b/launch_engelken_paper_faithful_trm_queue.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="/home/yurenh2/rrm" +FLOSS_DIR="${ROOT}/research/flossing" +PY="/home/yurenh2/miniconda3/envs/rrm/bin/python" +CKPT_ROOT="${ROOT}/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" +OUT_DIR="${FLOSS_DIR}/engelken_paper_faithful" +mkdir -p "${OUT_DIR}" + +wait_for_pid() { + local pid="${1:-}" + if [[ -z "${pid}" || "${pid}" == "0" ]]; then + return 0 + fi + while kill -0 "${pid}" 2>/dev/null; do + sleep 60 + done +} + +common_args=( + --model trm + --ckpt-root "${CKPT_ROOT}" + --ckpt-name __random__ + --init-seed 123 + --train-steps 10000 + --batch-size 8 + --train-lr 1e-4 + --floss-lr 1e-4 + --floss-steps 500 + --floss-mode engelken_l2 + --lambda-star 0 + --k-lyap 4 + --lyap-act-steps 4 + --seed 42 + --eval-every 1000 + --eval-n 1000 + --eval-batch-size 64 + --floss-log-every 10 + --train-puzzle-emb + --puzzle-emb-lr 1e-4 + --puzzle-emb-weight-decay 1.0 + --kl-beta 0 +) + +run_case() { + local gpu="$1" + local name="$2" + local schedule="$3" + local extra_floss_steps="${4:-500}" + cd "${ROOT}" + CUDA_VISIBLE_DEVICES="${gpu}" PYTHONUNBUFFERED=1 "${PY}" research/flossing/step7_interfloss.py \ + "${common_args[@]}" \ + --floss-steps "${extra_floss_steps}" \ + --interfloss-at "${schedule}" \ + --out "${OUT_DIR}/${name}.json" \ + > "${OUT_DIR}/${name}.log" 2>&1 +} + +base_wait_pid="$(cat "${FLOSS_DIR}/ptrm_official_gbs768_base58590_k100_d64_sigma03_Lonly_n1000_seed0.pid" 2>/dev/null || true)" +multi_wait_pid="$(cat "${FLOSS_DIR}/ptrm_official_gbs768_multi4_35805_k100_d64_sigma03_Lonly_n1000_seed0.pid" 2>/dev/null || true)" + +( + wait_for_pid "${base_wait_pid}" + run_case 2 "trm_sudoku_seed123_baseline_nofloss_b8_10k" "" 0 + run_case 2 "trm_sudoku_seed123_pre_interfloss_0_500_b8_k4_10k" "0,500" 500 +) & +echo $! > "${OUT_DIR}/gpu2_queue.pid" + +( + wait_for_pid "${multi_wait_pid}" + run_case 3 "trm_sudoku_seed123_prefloss_0_b8_k4_10k" "0" 500 +) & +echo $! > "${OUT_DIR}/gpu3_queue.pid" + +echo "queued Engelken-faithful TRM runs" +echo "gpu2 queue pid: $(cat "${OUT_DIR}/gpu2_queue.pid")" +echo "gpu3 queue pid: $(cat "${OUT_DIR}/gpu3_queue.pid")" diff --git a/launch_engelken_python_official_queue.sh b/launch_engelken_python_official_queue.sh new file mode 100755 index 0000000..d1b3faf --- /dev/null +++ b/launch_engelken_python_official_queue.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +set -euo pipefail + +cd /home/yurenh2/rrm + +PY="/home/yurenh2/miniconda3/envs/rrm/bin/python" +SCRIPT="research/flossing/engelken_python_flossing.py" +OUT_DIR="research/flossing/engelken_python" +mkdir -p "${OUT_DIR}" + +common=( + --hidden-size 80 + --n-lyap 40 + --train-epochs 1000 + --inter-period 100 + --inter-epochs 100 + --batch-size 16 + --input-dim 1 + --train-steps 300 + --lyap-steps 55 + --floss-input-steps 300 + --seed-ic 1 + --seed-input 1 + --seed-net 1 + --seed-ons 1 + --lr 0.001 + --beta1 0.9 + --beta2 0.999 + --init-type 1 + --recurrent-gain 1.0 + --recurrent-mean-gain 0.0 + --input-scale 1.0 + --delay 10 + --ws-std 1.0 + --ws-mean 0.0 + --wr-std 1.0 + --wr-mean 0.0 + --b-std 0.1 + --b-mean 0.0 + --task -1 + --lyap-target 0.0 + --eval-every 100 + --eval-batches 4 + --log-every-floss 25 + --device cuda +) + +run_case() { + local gpu="$1" + local name="$2" + shift 2 + CUDA_VISIBLE_DEVICES="${gpu}" PYTHONUNBUFFERED=1 "${PY}" "${SCRIPT}" \ + "${common[@]}" "$@" \ + --out "${OUT_DIR}/${name}.json" \ + > "${OUT_DIR}/${name}.log" 2>&1 +} + +( + run_case 2 "official_py_baseline_no_floss_N80_k40_E1000" \ + --pre-epochs 0 --max-inter-episodes 0 + run_case 2 "official_py_pre_inter_N80_k40_E1000" \ + --pre-epochs 100 --max-inter-episodes 2 +) & +echo $! > "${OUT_DIR}/gpu2_python_queue.pid" + +( + run_case 3 "official_py_prefloss_N80_k40_E1000" \ + --pre-epochs 100 --max-inter-episodes 0 +) & +echo $! > "${OUT_DIR}/gpu3_python_queue.pid" + +echo "queued Engelken Python official-analogue runs" +echo "gpu2 queue pid: $(cat "${OUT_DIR}/gpu2_python_queue.pid")" +echo "gpu3 queue pid: $(cat "${OUT_DIR}/gpu3_python_queue.pid")" diff --git a/launch_hrm_multi4_perturb_ablation.sh b/launch_hrm_multi4_perturb_ablation.sh new file mode 100755 index 0000000..07fba63 --- /dev/null +++ b/launch_hrm_multi4_perturb_ablation.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +set -eo pipefail + +CASE="${1:?case required: h | l | joint | all}" +GPU="${2:-0}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +run_case() { + local perturb="$1" + local suffix="$2" + + source /home/yurenh2/miniconda3/etc/profile.d/conda.sh + conda activate rrm + + cd /home/yurenh2/rrm/hrm + export WANDB_MODE=offline + export CUDA_VISIBLE_DEVICES="${GPU}" + + python pretrain.py \ + data_path=/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000 \ + +project_name='Sudoku-extreme-1k-aug-1000 ACT-torch' \ + +run_name="HierarchicalReasoningModel_ACTV1 multi4-loguniform-${suffix}-repro" \ + +checkpoint_path="checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 multi4-loguniform-${suffix}-repro" \ + epochs=20000 eval_interval=2000 checkpoint_every_eval=true \ + global_batch_size=768 \ + lr=0.0001 lr_min_ratio=1.0 lr_warmup_steps=2000 \ + beta1=0.9 beta2=0.95 weight_decay=1.0 \ + puzzle_emb_lr=0.0001 puzzle_emb_weight_decay=1.0 \ + +trajectory_augment=true \ + +trajectory_n=4 \ + +trajectory_noise_std=0.001 \ + +trajectory_noise_min=0.00003 \ + +trajectory_noise_max=0.003 \ + +trajectory_noise_sampling=loguniform \ + +trajectory_sigma_start=0.0 \ + +trajectory_sigma_ramp_steps=5000 \ + +trajectory_perturb="${perturb}" +} + +case "${CASE}" in + h) + run_case h honly + ;; + l) + run_case l lonly + ;; + joint) + run_case joint jointnorm + ;; + all) + bash "${SCRIPT_DIR}/launch_hrm_multi4_perturb_ablation.sh" h "${GPU}" + bash "${SCRIPT_DIR}/launch_hrm_multi4_perturb_ablation.sh" l "${GPU}" + bash "${SCRIPT_DIR}/launch_hrm_multi4_perturb_ablation.sh" joint "${GPU}" + ;; + *) + echo "unknown case: ${CASE}" >&2 + exit 2 + ;; +esac diff --git a/launch_interfloss_queue.sh b/launch_interfloss_queue.sh new file mode 100755 index 0000000..1c5807f --- /dev/null +++ b/launch_interfloss_queue.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +set -eo pipefail + +FLOSS_DIR="/home/yurenh2/rrm/research/flossing" +CONDA_SH="/home/yurenh2/miniconda3/etc/profile.d/conda.sh" +HRM_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python" +TRM_ROOT="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_singleGPU" + +wait_for_pid() { + local pid="$1" + if [[ "${pid}" == "0" ]]; then + return 0 + fi + while kill -0 "${pid}" 2>/dev/null; do + sleep 60 + done +} + +activate_env() { + source "${CONDA_SH}" + conda activate rrm + cd "${FLOSS_DIR}" +} + +run_hrm_engelken() { + wait_for_pid "${1:-0}" + activate_env + CUDA_VISIBLE_DEVICES=0 python step7_interfloss.py \ + --model hrm \ + --ckpt-root "${HRM_ROOT}" \ + --ckpt-name step_26040 \ + --train-steps 10000 \ + --batch-size 8 \ + --train-lr 1e-5 \ + --floss-lr 1e-4 \ + --floss-steps 500 \ + --interfloss-at 0,500 \ + --floss-mode engelken_l2 \ + --lambda-star 0 \ + --k-lyap 8 \ + --lyap-act-steps 4 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n 512 \ + --eval-batch-size 32 \ + --floss-log-every 10 \ + --out step7_A_hrm_engelken_interfloss_26040_k8_10k.json \ + > step7_A_hrm_engelken_interfloss_26040_k8_10k.log 2>&1 +} + +run_trm_engelken() { + wait_for_pid "${1:-0}" + activate_env + CUDA_VISIBLE_DEVICES=2 python step7_interfloss.py \ + --model trm \ + --ckpt-root "${TRM_ROOT}" \ + --ckpt-name step_26041 \ + --train-steps 10000 \ + --batch-size 4 \ + --train-lr 1e-5 \ + --floss-lr 1e-4 \ + --floss-steps 500 \ + --interfloss-at 0,500 \ + --floss-mode engelken_l2 \ + --lambda-star 0 \ + --k-lyap 4 \ + --lyap-act-steps 4 \ + --seed 42 \ + --eval-every 1000 \ + --eval-n 512 \ + --eval-batch-size 32 \ + --floss-log-every 10 \ + --out step7_B_trm_engelken_interfloss_26041_k4_batch4_10k.json \ + > step7_B_trm_engelken_interfloss_26041_k4_batch4_10k.log 2>&1 +} + +cmd="${1:?usage: launch_interfloss_queue.sh MODE [wait_pid]}" +wait_pid="${2:-0}" + +case "${cmd}" in + hrm_engelken) run_hrm_engelken "${wait_pid}" ;; + trm_engelken) run_trm_engelken "${wait_pid}" ;; + *) echo "unknown command: ${cmd}" >&2; exit 2 ;; +esac diff --git a/launch_multi4_parallel_repro_config.sh b/launch_multi4_parallel_repro_config.sh new file mode 100755 index 0000000..d618cfe --- /dev/null +++ b/launch_multi4_parallel_repro_config.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +set -eo pipefail + +CASE="${1:?case required}" +GPU="${2:-0}" + +source /home/yurenh2/miniconda3/etc/profile.d/conda.sh +conda activate rrm + +case "${CASE}" in + hrm) + cd /home/yurenh2/rrm/hrm + export WANDB_MODE=offline + export CUDA_VISIBLE_DEVICES="${GPU}" + python pretrain.py \ + data_path=/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000 \ + +project_name='Sudoku-extreme-1k-aug-1000 ACT-torch' \ + +run_name='HierarchicalReasoningModel_ACTV1 multi4-loguniform-parallel-repro' \ + +checkpoint_path='checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 multi4-loguniform-parallel-repro' \ + epochs=20000 eval_interval=2000 checkpoint_every_eval=true \ + global_batch_size=768 \ + lr=0.0001 lr_min_ratio=1.0 lr_warmup_steps=2000 \ + beta1=0.9 beta2=0.95 weight_decay=1.0 \ + puzzle_emb_lr=0.0001 puzzle_emb_weight_decay=1.0 \ + +trajectory_augment=true \ + +trajectory_parallel=true \ + +trajectory_n=4 \ + +trajectory_noise_std=0.001 \ + +trajectory_noise_min=0.00003 \ + +trajectory_noise_max=0.003 \ + +trajectory_noise_sampling=loguniform \ + +trajectory_sigma_start=0.0 \ + +trajectory_sigma_ramp_steps=5000 \ + +trajectory_perturb=both + ;; + trm) + cd /home/yurenh2/rrm/trm + export WANDB_MODE=offline + export CUDA_VISIBLE_DEVICES="${GPU}" + python pretrain.py \ + data_paths='[/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000]' \ + data_paths_test='[]' \ + evaluators='[]' \ + +project_name='Sudoku-extreme-1k-aug-1000-ACT-torch' \ + +run_name='pretrain_mlp_t_sudoku_multi4_loguniform_parallel_repro' \ + +checkpoint_path='checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_multi4_loguniform_parallel_repro' \ + +load_checkpoint=null \ + epochs=50000 eval_interval=5000 min_eval_interval=0 checkpoint_every_eval=true \ + global_batch_size=192 \ + lr=0.0001 lr_min_ratio=1.0 lr_warmup_steps=2000 \ + beta1=0.9 beta2=0.95 weight_decay=1.0 \ + puzzle_emb_lr=0.0001 puzzle_emb_weight_decay=1.0 \ + ema=true ema_rate=0.999 freeze_weights=false \ + arch.mlp_t=true arch.pos_encodings=none arch.puzzle_emb_len=16 arch.no_ACT_continue=true \ + +trajectory_augment=true \ + +trajectory_parallel=true \ + +trajectory_n=4 \ + +trajectory_noise_std=0.001 \ + +trajectory_noise_min=0.00003 \ + +trajectory_noise_max=0.003 \ + +trajectory_noise_sampling=loguniform \ + +trajectory_sigma_start=0.0 \ + +trajectory_sigma_ramp_steps=5000 \ + +trajectory_perturb=both + ;; + *) + echo "unknown case: ${CASE}" >&2 + exit 2 + ;; +esac diff --git a/launch_multi4_repro_config.sh b/launch_multi4_repro_config.sh new file mode 100755 index 0000000..8cb5b25 --- /dev/null +++ b/launch_multi4_repro_config.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +set -eo pipefail + +CASE="${1:?case required}" +GPU="${2:-0}" + +source /home/yurenh2/miniconda3/etc/profile.d/conda.sh +conda activate rrm + +case "${CASE}" in + hrm) + cd /home/yurenh2/rrm/hrm + export WANDB_MODE=offline + export CUDA_VISIBLE_DEVICES="${GPU}" + python pretrain.py \ + data_path=/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000 \ + +project_name='Sudoku-extreme-1k-aug-1000 ACT-torch' \ + +run_name='HierarchicalReasoningModel_ACTV1 multi4-loguniform-repro' \ + +checkpoint_path='checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 multi4-loguniform-repro' \ + epochs=20000 eval_interval=2000 checkpoint_every_eval=true \ + global_batch_size=768 \ + lr=0.0001 lr_min_ratio=1.0 lr_warmup_steps=2000 \ + beta1=0.9 beta2=0.95 weight_decay=1.0 \ + puzzle_emb_lr=0.0001 puzzle_emb_weight_decay=1.0 \ + +trajectory_augment=true \ + +trajectory_n=4 \ + +trajectory_noise_std=0.001 \ + +trajectory_noise_min=0.00003 \ + +trajectory_noise_max=0.003 \ + +trajectory_noise_sampling=loguniform \ + +trajectory_sigma_start=0.0 \ + +trajectory_sigma_ramp_steps=5000 \ + +trajectory_perturb=both + ;; + trm) + cd /home/yurenh2/rrm/trm + export WANDB_MODE=offline + export CUDA_VISIBLE_DEVICES="${GPU}" + python pretrain.py \ + data_paths='[/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000]' \ + data_paths_test='[]' \ + evaluators='[]' \ + +project_name='Sudoku-extreme-1k-aug-1000-ACT-torch' \ + +run_name='pretrain_mlp_t_sudoku_multi4_loguniform_repro' \ + +checkpoint_path='checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_multi4_loguniform_repro' \ + +load_checkpoint=null \ + epochs=50000 eval_interval=5000 min_eval_interval=0 checkpoint_every_eval=true \ + global_batch_size=192 \ + lr=0.0001 lr_min_ratio=1.0 lr_warmup_steps=2000 \ + beta1=0.9 beta2=0.95 weight_decay=1.0 \ + puzzle_emb_lr=0.0001 puzzle_emb_weight_decay=1.0 \ + ema=true ema_rate=0.999 freeze_weights=false \ + arch.mlp_t=true arch.pos_encodings=none arch.puzzle_emb_len=16 arch.no_ACT_continue=true \ + +trajectory_augment=true \ + +trajectory_n=4 \ + +trajectory_noise_std=0.001 \ + +trajectory_noise_min=0.00003 \ + +trajectory_noise_max=0.003 \ + +trajectory_noise_sampling=loguniform \ + +trajectory_sigma_start=0.0 \ + +trajectory_sigma_ramp_steps=5000 \ + +trajectory_perturb=both + ;; + *) + echo "unknown case: ${CASE}" >&2 + exit 2 + ;; +esac diff --git a/launch_multi4_repro_config_all.sh b/launch_multi4_repro_config_all.sh new file mode 100755 index 0000000..9d25d87 --- /dev/null +++ b/launch_multi4_repro_config_all.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -eo pipefail + +cd /home/yurenh2/rrm/research/flossing + +setsid bash launch_multi4_repro_config.sh hrm 2 > multi4_repro_hrm.outer.log 2>&1 < /dev/null & +echo "hrm pid=$!" + +setsid bash launch_multi4_repro_config.sh trm 3 > multi4_repro_trm.outer.log 2>&1 < /dev/null & +echo "trm pid=$!" diff --git a/launch_trajectory_perturb_queue.sh b/launch_trajectory_perturb_queue.sh new file mode 100755 index 0000000..ba681ed --- /dev/null +++ b/launch_trajectory_perturb_queue.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash +set -eo pipefail + +MODEL="${1:-hrm}" +GPU="${2:-0}" + +source /home/yurenh2/miniconda3/etc/profile.d/conda.sh +conda activate rrm +cd /home/yurenh2/rrm/research/flossing + +HRM_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python" +TRM_ROOT="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_singleGPU" + +run_one() { + local tag="$1" + shift + echo "[$(date -Is)] START ${tag}" + CUDA_VISIBLE_DEVICES="${GPU}" python step9_trajectory_perturb_train.py "$@" \ + > "${tag}.log" 2>&1 + echo "[$(date -Is)] DONE ${tag}" +} + +if [[ "${MODEL}" == "hrm" ]]; then + run_one step9_A_hrm_single_perturb_sigma1e-3_26040_10k \ + --model hrm \ + --ckpt-root "${HRM_ROOT}" \ + --ckpt-name step_26040 \ + --mode single_perturbed_ce \ + --train-steps 10000 \ + --batch-size 8 \ + --lr 1e-5 \ + --noise-std 0.001 \ + --perturb both \ + --seed 42 \ + --eval-every 1000 \ + --eval-n 512 \ + --eval-batch-size 32 \ + --out step9_A_hrm_single_perturb_sigma1e-3_26040_10k.json + + run_one step9_B_hrm_multi4_perturb_sigma1e-3_26040_10k \ + --model hrm \ + --ckpt-root "${HRM_ROOT}" \ + --ckpt-name step_26040 \ + --mode multi_perturbed_ce \ + --n-trajectories 4 \ + --train-steps 10000 \ + --batch-size 8 \ + --lr 1e-5 \ + --noise-std 0.001 \ + --perturb both \ + --seed 42 \ + --eval-every 1000 \ + --eval-n 512 \ + --eval-batch-size 32 \ + --out step9_B_hrm_multi4_perturb_sigma1e-3_26040_10k.json +elif [[ "${MODEL}" == "trm" ]]; then + run_one step9_C_trm_single_perturb_sigma1e-3_26041_batch4_10k \ + --model trm \ + --ckpt-root "${TRM_ROOT}" \ + --ckpt-name step_26041 \ + --mode single_perturbed_ce \ + --train-steps 10000 \ + --batch-size 4 \ + --lr 1e-5 \ + --noise-std 0.001 \ + --perturb both \ + --seed 42 \ + --eval-every 1000 \ + --eval-n 512 \ + --eval-batch-size 32 \ + --out step9_C_trm_single_perturb_sigma1e-3_26041_batch4_10k.json + + run_one step9_D_trm_multi4_perturb_sigma1e-3_26041_batch4_10k \ + --model trm \ + --ckpt-root "${TRM_ROOT}" \ + --ckpt-name step_26041 \ + --mode multi_perturbed_ce \ + --n-trajectories 4 \ + --train-steps 10000 \ + --batch-size 4 \ + --lr 1e-5 \ + --noise-std 0.001 \ + --perturb both \ + --seed 42 \ + --eval-every 1000 \ + --eval-n 512 \ + --eval-batch-size 32 \ + --out step9_D_trm_multi4_perturb_sigma1e-3_26041_batch4_10k.json +else + echo "unknown model: ${MODEL}" >&2 + exit 2 +fi diff --git a/launch_trajectory_sampling_long.sh b/launch_trajectory_sampling_long.sh new file mode 100755 index 0000000..4a03650 --- /dev/null +++ b/launch_trajectory_sampling_long.sh @@ -0,0 +1,128 @@ +#!/usr/bin/env bash +set -eo pipefail + +CASE="${1:?case required}" +GPU="${2:-0}" + +source /home/yurenh2/miniconda3/etc/profile.d/conda.sh +conda activate rrm +cd /home/yurenh2/rrm/research/flossing + +HRM_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python" +TRM_ROOT="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_singleGPU" + +run_one() { + local tag="$1" + shift + echo "[$(date -Is)] START ${tag} on GPU ${GPU}" + CUDA_VISIBLE_DEVICES="${GPU}" python step9_trajectory_perturb_train.py "$@" \ + > "${tag}.log" 2>&1 + echo "[$(date -Is)] DONE ${tag}" +} + +case "${CASE}" in + hrm_baseline50k) + run_one step9_E_hrm_baseline_parallel_fixed_26040_50k \ + --model hrm \ + --ckpt-root "${HRM_ROOT}" \ + --ckpt-name step_26040 \ + --mode baseline_clean \ + --rollout-impl parallel_fixed \ + --train-steps 50000 \ + --batch-size 8 \ + --lr 1e-5 \ + --seed 42 \ + --eval-every 2500 \ + --eval-n 1024 \ + --eval-batch-size 32 \ + --save-best \ + --save-final \ + --save-every-eval \ + --save-train-state \ + --out step9_E_hrm_baseline_parallel_fixed_26040_50k.json + ;; + + hrm_multi4_loguniform50k) + run_one step9_F_hrm_multi4_loguniform_ramp_26040_50k \ + --model hrm \ + --ckpt-root "${HRM_ROOT}" \ + --ckpt-name step_26040 \ + --mode multi_perturbed_ce \ + --rollout-impl parallel_fixed \ + --n-trajectories 4 \ + --train-steps 50000 \ + --batch-size 8 \ + --lr 1e-5 \ + --noise-std 0.001 \ + --noise-min 0.00003 \ + --noise-max 0.003 \ + --noise-sampling loguniform \ + --sigma-start 0 \ + --sigma-ramp-steps 5000 \ + --perturb both \ + --seed 42 \ + --eval-every 2500 \ + --eval-n 1024 \ + --eval-batch-size 32 \ + --save-best \ + --save-final \ + --save-every-eval \ + --save-train-state \ + --out step9_F_hrm_multi4_loguniform_ramp_26040_50k.json + ;; + + trm_baseline50k) + run_one step9_G_trm_baseline_parallel_fixed_26041_batch4_50k \ + --model trm \ + --ckpt-root "${TRM_ROOT}" \ + --ckpt-name step_26041 \ + --mode baseline_clean \ + --rollout-impl parallel_fixed \ + --train-steps 50000 \ + --batch-size 4 \ + --lr 1e-5 \ + --seed 42 \ + --eval-every 2500 \ + --eval-n 1024 \ + --eval-batch-size 32 \ + --save-best \ + --save-final \ + --save-every-eval \ + --save-train-state \ + --out step9_G_trm_baseline_parallel_fixed_26041_batch4_50k.json + ;; + + trm_multi4_loguniform50k) + run_one step9_H_trm_multi4_loguniform_ramp_26041_batch4_50k \ + --model trm \ + --ckpt-root "${TRM_ROOT}" \ + --ckpt-name step_26041 \ + --mode multi_perturbed_ce \ + --rollout-impl parallel_fixed \ + --n-trajectories 4 \ + --train-steps 50000 \ + --batch-size 4 \ + --lr 1e-5 \ + --noise-std 0.001 \ + --noise-min 0.00003 \ + --noise-max 0.003 \ + --noise-sampling loguniform \ + --sigma-start 0 \ + --sigma-ramp-steps 5000 \ + --perturb both \ + --seed 42 \ + --eval-every 2500 \ + --eval-n 1024 \ + --eval-batch-size 32 \ + --save-best \ + --save-final \ + --save-every-eval \ + --save-train-state \ + --out step9_H_trm_multi4_loguniform_ramp_26041_batch4_50k.json + ;; + + *) + echo "unknown case: ${CASE}" >&2 + exit 2 + ;; +esac diff --git a/launch_trajectory_sampling_long_all.sh b/launch_trajectory_sampling_long_all.sh new file mode 100755 index 0000000..c223c6e --- /dev/null +++ b/launch_trajectory_sampling_long_all.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +set -eo pipefail + +cd /home/yurenh2/rrm/research/flossing + +launch_case() { + local case_name="$1" + local gpu="$2" + local outer_log="queue_${case_name}.outer.log" + setsid bash launch_trajectory_sampling_long.sh "${case_name}" "${gpu}" \ + > "${outer_log}" 2>&1 < /dev/null & + echo "${case_name} gpu=${gpu} pid=$! outer_log=${outer_log}" +} + +launch_case hrm_baseline50k 0 +launch_case hrm_multi4_loguniform50k 1 +launch_case trm_baseline50k 2 +launch_case trm_multi4_loguniform50k 3 diff --git a/launch_trm_directional_multi4_long.sh b/launch_trm_directional_multi4_long.sh new file mode 100755 index 0000000..1a56bcc --- /dev/null +++ b/launch_trm_directional_multi4_long.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +set -eo pipefail + +GPU="${1:-3}" + +source /home/yurenh2/miniconda3/etc/profile.d/conda.sh +conda activate rrm + +cd /home/yurenh2/rrm/trm +export WANDB_MODE=offline +export CUDA_VISIBLE_DEVICES="${GPU}" + +python pretrain.py \ + arch=trm \ + data_paths='[/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000]' \ + data_paths_test='[]' \ + evaluators='[]' \ + +project_name='Sudoku-extreme-1k-aug-1000-ACT-torch' \ + +run_name='pretrain_mlp_t_sudoku_directional_multi4_parallel_c4_eps003_sigma003' \ + +checkpoint_path='checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_directional_multi4_parallel_c4_eps003_sigma003' \ + +load_checkpoint=null \ + epochs=12500 eval_interval=1250 min_eval_interval=0 checkpoint_every_eval=true \ + global_batch_size=192 \ + lr=0.0001 lr_min_ratio=1.0 lr_warmup_steps=2000 \ + beta1=0.9 beta2=0.95 weight_decay=1.0 \ + puzzle_emb_lr=0.0001 puzzle_emb_weight_decay=1.0 \ + ema=true ema_rate=0.999 freeze_weights=false \ + arch.mlp_t=true arch.pos_encodings=none arch.puzzle_emb_len=16 arch.no_ACT_continue=true \ + +trajectory_augment=true \ + +trajectory_parallel=true \ + +trajectory_n=4 \ + +trajectory_noise_std=0.03 \ + +trajectory_noise_min=0.003 \ + +trajectory_noise_max=0.03 \ + +trajectory_noise_sampling=loguniform \ + +trajectory_sigma_start=0.0 \ + +trajectory_sigma_ramp_steps=5000 \ + +trajectory_perturb=both \ + +trajectory_directional=true \ + +trajectory_directional_candidates=4 \ + +trajectory_directional_fd_eps=0.03 \ + +trajectory_directional_horizon=16 \ + +trajectory_directional_sign=alternate diff --git a/launch_trm_official_gbs768_multi4.sh b/launch_trm_official_gbs768_multi4.sh new file mode 100755 index 0000000..5bd9575 --- /dev/null +++ b/launch_trm_official_gbs768_multi4.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +set -eo pipefail + +GPU="${1:-0}" + +source /home/yurenh2/miniconda3/etc/profile.d/conda.sh +conda activate rrm + +cd /home/yurenh2/rrm/trm +export WANDB_MODE=offline +export CUDA_VISIBLE_DEVICES="${GPU}" + +python pretrain.py \ + arch=trm \ + data_paths='[/home/yurenh2/rrm/data/sudoku-extreme-1k-aug-1000]' \ + data_paths_test='[]' \ + evaluators='[]' \ + +project_name='Sudoku-extreme-1k-aug-1000-ACT-torch' \ + +run_name='pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro' \ + +checkpoint_path='checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_multi4_loguniform_repro' \ + +load_checkpoint=null \ + epochs=50000 eval_interval=2500 min_eval_interval=0 checkpoint_every_eval=true \ + global_batch_size=768 \ + lr=0.0001 lr_min_ratio=1.0 lr_warmup_steps=2000 \ + beta1=0.9 beta2=0.95 weight_decay=1.0 \ + puzzle_emb_lr=0.0001 puzzle_emb_weight_decay=1.0 \ + ema=true ema_rate=0.999 freeze_weights=false \ + arch.mlp_t=true arch.pos_encodings=none arch.L_layers=2 arch.H_cycles=3 arch.L_cycles=6 \ + +trajectory_augment=true \ + +trajectory_n=4 \ + +trajectory_noise_std=0.001 \ + +trajectory_noise_min=0.00003 \ + +trajectory_noise_max=0.003 \ + +trajectory_noise_sampling=loguniform \ + +trajectory_sigma_start=0.0 \ + +trajectory_sigma_ramp_steps=5000 \ + +trajectory_perturb=both diff --git a/make_meeting_artifacts_v2.py b/make_meeting_artifacts_v2.py new file mode 100644 index 0000000..4c88412 --- /dev/null +++ b/make_meeting_artifacts_v2.py @@ -0,0 +1,364 @@ +from __future__ import annotations + +import csv +import re +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np + + +ROOT = Path("research/flossing") +OUT = ROOT / "meeting_artifacts_v2" + + +def read_csv(path: Path) -> list[dict[str, str]]: + with path.open(newline="") as f: + return list(csv.DictReader(f)) + + +def f(row: dict[str, str], key: str, default: float = float("nan")) -> float: + try: + val = row.get(key, "") + return float(val) if val != "" else default + except Exception: + return default + + +def row_for(rows: list[dict[str, str]], step: int) -> dict[str, str]: + for row in rows: + if int(float(row["step"])) == step: + return row + raise KeyError(step) + + +def spectrum_metrics(npz_path: Path) -> dict[str, float]: + data = np.load(npz_path) + spec = data["lyap_spec"].astype(float) + exact = data["exact_correct"].astype(bool) + mean8 = spec.mean(axis=1) + pos_count = (spec > 0).sum(axis=1) + return { + "sample_exact": float(exact.mean()), + "lambda1_all": float(spec[:, 0].mean()), + "mean8_all": float(mean8.mean()), + "pos_count_all": float(pos_count.mean()), + "lambda1_success": float(spec[exact, 0].mean()), + "lambda1_fail": float(spec[~exact, 0].mean()), + "mean8_success": float(mean8[exact].mean()), + "mean8_fail": float(mean8[~exact].mean()), + "pos_count_success": float(pos_count[exact].mean()), + "pos_count_fail": float(pos_count[~exact].mean()), + } + + +def spectrum_arrays(npz_path: Path) -> tuple[np.ndarray, np.ndarray]: + data = np.load(npz_path) + return data["lyap_spec"].astype(float), data["exact_correct"].astype(bool) + + +def auc_failure_score(score: np.ndarray, exact: np.ndarray) -> float: + """AUROC where higher score predicts failure.""" + fail = ~exact + n_fail = int(fail.sum()) + n_succ = int(exact.sum()) + if n_fail == 0 or n_succ == 0: + return float("nan") + order = np.argsort(score) + ranks = np.empty_like(order, dtype=float) + ranks[order] = np.arange(1, len(score) + 1) + return float((ranks[fail].sum() - n_fail * (n_fail + 1) / 2) / (n_fail * n_succ)) + + +def get_data() -> dict[str, object]: + eval_dir = ROOT / "multi4_eval_compare" + spec_dir = ROOT / "official_gbs768_spectrum" + + hrm_base = read_csv(eval_dir / "hrm_baseline_eval.csv") + hrm_multi = read_csv(eval_dir / "hrm_multi4_complete_eval.csv") + trm_base = read_csv(eval_dir / "trm_official_gbs768_eval.csv") + trm_multi = read_csv(eval_dir / "trm_official_gbs768_multi4_eval.csv") + + hrm_points = [ + { + "label": "baseline best", + "family": "baseline", + "step": 26040, + "full_exact": f(row_for(hrm_base, 26040), "all/exact_accuracy"), + **spectrum_metrics(ROOT / "diag_hrm_step_26040_512.npz"), + }, + { + "label": "multi4 best", + "family": "multi4", + "step": 23436, + "full_exact": f(row_for(hrm_multi, 23436), "exact"), + **spectrum_metrics(ROOT / "diag_hrm_multi4_step_23436_512.npz"), + }, + { + "label": "multi4 final", + "family": "multi4-final", + "step": 26040, + "full_exact": f(row_for(hrm_multi, 26040), "exact"), + **spectrum_metrics(ROOT / "diag_hrm_multi4_step_26040_512.npz"), + }, + ] + + trm_points = [ + { + "label": "baseline best", + "family": "baseline", + "step": 58590, + "full_exact": f(row_for(trm_base, 58590), "all/exact_accuracy"), + **spectrum_metrics(spec_dir / "trm_gbs768_base_step58590_n512_k8_seed20260602.npz"), + }, + { + "label": "multi4 best", + "family": "multi4", + "step": 35805, + "full_exact": f(row_for(trm_multi, 35805), "all/exact_accuracy"), + **spectrum_metrics(spec_dir / "trm_gbs768_multi4_step35805_n512_k8_seed20260602.npz"), + }, + { + "label": "multi4 final", + "family": "multi4-final", + "step": 65100, + "full_exact": f(row_for(trm_multi, 65100), "all/exact_accuracy"), + **spectrum_metrics(spec_dir / "trm_gbs768_multi4_step65100_n512_k8_seed20260602.npz"), + }, + ] + + return { + "hrm_base": hrm_base, + "hrm_multi": hrm_multi, + "trm_base": trm_base, + "trm_multi": trm_multi, + "hrm_points": hrm_points, + "trm_points": trm_points, + } + + +def metric_series(rows: list[dict[str, str]], metric: str) -> tuple[np.ndarray, np.ndarray]: + xs, ys = [], [] + for row in rows: + val = f(row, metric) + if np.isfinite(val): + xs.append(int(float(row["step"]))) + ys.append(val) + order = np.argsort(xs) + return np.asarray(xs)[order], np.asarray(ys)[order] + + +def plot_training_curves(data: dict[str, object]) -> None: + fig, axes = plt.subplots(1, 2, figsize=(11.8, 4.2)) + colors = {"baseline": "#475569", "multi4": "#2563eb"} + + panels = [ + ("HRM", data["hrm_base"], data["hrm_multi"], "all/exact_accuracy", "exact", 0.70), + ("TRM official GBS768", data["trm_base"], data["trm_multi"], "all/exact_accuracy", "all/exact_accuracy", 0.94), + ] + for ax, (title, base_rows, multi_rows, base_metric, multi_metric, ymax) in zip(axes, panels): + bx, by = metric_series(base_rows, base_metric) # type: ignore[arg-type] + mx, my = metric_series(multi_rows, multi_metric) # type: ignore[arg-type] + ax.plot(bx / 1000, by, marker="o", color=colors["baseline"], label="baseline", linewidth=2) + ax.plot(mx / 1000, my, marker="o", color=colors["multi4"], label="multi4", linewidth=2) + bi, mi = int(np.argmax(by)), int(np.argmax(my)) + ax.scatter([bx[bi] / 1000], [by[bi]], s=95, color=colors["baseline"], edgecolor="white", zorder=5) + ax.scatter([mx[mi] / 1000], [my[mi]], s=95, color=colors["multi4"], edgecolor="white", zorder=5) + ax.annotate(f"best {by[bi]:.3f}", (bx[bi] / 1000, by[bi]), xytext=(6, -18), textcoords="offset points", fontsize=8) + ax.annotate(f"best {my[mi]:.3f}", (mx[mi] / 1000, my[mi]), xytext=(6, 8), textcoords="offset points", fontsize=8) + ax.set_title(title) + ax.set_xlabel("optimizer step (k)") + ax.set_ylabel("full test exact accuracy") + ax.set_ylim(0, ymax) + ax.grid(alpha=0.22) + ax.legend(frameon=False, loc="lower right") + + fig.suptitle("Trajectory perturbation helps both HRM and TRM, but late checkpoints can collapse") + fig.tight_layout() + fig.savefig(OUT / "fig1_hrm_trm_training_curves.png", dpi=220) + plt.close(fig) + + +def plot_lambda1_motivation() -> None: + entries = [ + ("HRM baseline best\nstep 26040", ROOT / "diag_hrm_step_26040_512.npz"), + ( + "TRM baseline best\nstep 58590", + ROOT / "official_gbs768_spectrum/trm_gbs768_base_step58590_n512_k8_seed20260602.npz", + ), + ] + fig, axes = plt.subplots(1, 2, figsize=(11.6, 4.2), sharey=False) + for ax, (title, path) in zip(axes, entries): + spec, exact = spectrum_arrays(path) + lam1 = spec[:, 0] + succ = lam1[exact] + fail = lam1[~exact] + lo, hi = np.quantile(lam1, [0.01, 0.99]) + pad = 0.08 * (hi - lo) + bins = np.linspace(lo - pad, hi + pad, 34) + ax.hist(succ, bins=bins, density=True, alpha=0.62, color="#2563eb", label=f"success (n={len(succ)})") + ax.hist(fail, bins=bins, density=True, alpha=0.58, color="#dc2626", label=f"failure (n={len(fail)})") + ax.axvline(succ.mean(), color="#1d4ed8", linewidth=2) + ax.axvline(fail.mean(), color="#b91c1c", linewidth=2) + auc = auc_failure_score(lam1, exact) + ax.set_title(f"{title}\nacc={exact.mean():.3f}, AUROC={auc:.3f}") + ax.set_xlabel("top Lyapunov exponent λ1") + ax.set_ylabel("density") + ax.grid(alpha=0.2) + ax.legend(frameon=False, fontsize=8) + fig.suptitle("Motivation: λ1 is a strong success/failure detector in both HRM and TRM") + fig.tight_layout() + fig.savefig(OUT / "fig0_motivation_lambda1_success_failure_hrm_trm.png", dpi=220) + plt.close(fig) + + +def plot_phase(data: dict[str, object]) -> None: + fig, axes = plt.subplots(1, 2, figsize=(11.8, 4.4), sharex=False) + panels = [("HRM", data["hrm_points"]), ("TRM official GBS768", data["trm_points"])] + style = { + "baseline": ("#475569", "o"), + "multi4": ("#2563eb", "o"), + "multi4-final": ("#dc2626", "X"), + } + for ax, (title, points) in zip(axes, panels): + for point in points: # type: ignore[assignment] + color, marker = style[point["family"]] + ax.scatter(point["mean8_all"], point["full_exact"], s=150, marker=marker, color=color, edgecolor="white", zorder=4) + ax.annotate( + f"{point['label']}\nstep {point['step']}", + (point["mean8_all"], point["full_exact"]), + xytext=(8, 5), + textcoords="offset points", + fontsize=8, + ) + ax.set_title(title) + ax.set_xlabel("mean top-8 Lyapunov exponent (lower = more stable)") + ax.set_ylabel("full test exact accuracy") + ax.grid(alpha=0.22) + fig.suptitle("Accuracy-vs-chaotic-volume phase view: best multi4 moves up-left; collapse moves down-right") + fig.tight_layout() + fig.savefig(OUT / "fig2_accuracy_vs_chaotic_volume_phase.png", dpi=220) + plt.close(fig) + + +def plot_spectrum_grid() -> None: + entries = [ + ("HRM baseline best", ROOT / "diag_hrm_step_26040_512.npz"), + ("HRM multi4 best", ROOT / "diag_hrm_multi4_step_23436_512.npz"), + ("TRM baseline best", ROOT / "official_gbs768_spectrum/trm_gbs768_base_step58590_n512_k8_seed20260602.npz"), + ("TRM multi4 best", ROOT / "official_gbs768_spectrum/trm_gbs768_multi4_step35805_n512_k8_seed20260602.npz"), + ] + fig, axes = plt.subplots(2, 2, figsize=(10.8, 7.2), sharey=False) + for ax, (title, path) in zip(axes.flat, entries): + spec, exact = spectrum_arrays(path) + x = np.arange(1, spec.shape[1] + 1) + for mask, color, label in [(exact, "#2563eb", "success"), (~exact, "#dc2626", "failure")]: + mean = spec[mask].mean(axis=0) + se = spec[mask].std(axis=0) / max(mask.sum(), 1) ** 0.5 + ax.plot(x, mean, marker="o", color=color, label=label) + ax.fill_between(x, mean - se, mean + se, color=color, alpha=0.16, linewidth=0) + ax.axhline(0, color="black", linewidth=0.8, alpha=0.55) + ax.set_title(f"{title}\nN={len(exact)}, acc={exact.mean():.3f}") + ax.set_xlabel("spectrum rank") + ax.set_ylabel("finite-time exponent") + ax.grid(alpha=0.22) + axes.flat[0].legend(frameon=False) + fig.suptitle("Success/failure separation is present in both HRM and TRM; multi4 mainly stabilizes successful trajectories") + fig.tight_layout() + fig.savefig(OUT / "fig3_hrm_trm_success_failure_spectra.png", dpi=220) + plt.close(fig) + + +def plot_ptrm() -> None: + summary = read_csv(ROOT / "ptrm_same_subset/paired_ptrm_k100_n1000_seed0_summary.csv")[0] + labels = ["deterministic", "mean rollout", "Q-selected", "oracle"] + base = [float(summary[k]) for k in ["base_det", "base_mean_rollout", "base_qmax", "base_oracle"]] + multi = [float(summary[k]) for k in ["multi4_det", "multi4_mean_rollout", "multi4_qmax", "multi4_oracle"]] + x = np.arange(len(labels)) + width = 0.36 + fig, ax = plt.subplots(figsize=(8.7, 4.3)) + ax.bar(x - width / 2, base, width, color="#64748b", label="baseline best") + ax.bar(x + width / 2, multi, width, color="#2563eb", label="multi4 best") + ax.set_xticks(x, labels) + ax.set_ylim(0.86, 1.0) + ax.set_ylabel("exact accuracy") + ax.set_title("PTRM same-subset comparison (n=1000, K=100, D=64, sigma=0.3, L-only)") + ax.grid(axis="y", alpha=0.22) + ax.legend(frameon=False) + for xpos, vals in [(x - width / 2, base), (x + width / 2, multi)]: + for xi, val in zip(xpos, vals): + ax.text(xi, val + 0.002, f"{val:.3f}", ha="center", va="bottom", fontsize=8) + fig.tight_layout() + fig.savefig(OUT / "fig4_ptrm_same_subset_comparison.png", dpi=220) + plt.close(fig) + + +def write_summary(data: dict[str, object]) -> None: + rows = [] + for model, points in [("HRM", data["hrm_points"]), ("TRM", data["trm_points"])]: + for point in points: # type: ignore[assignment] + rows.append( + { + "model": model, + "label": point["label"], + "step": point["step"], + "full_exact": point["full_exact"], + "sample_exact": point["sample_exact"], + "lambda1_all": point["lambda1_all"], + "mean8_all": point["mean8_all"], + "pos_count_all": point["pos_count_all"], + "lambda1_success": point["lambda1_success"], + "lambda1_fail": point["lambda1_fail"], + "mean8_success": point["mean8_success"], + "mean8_fail": point["mean8_fail"], + "pos_count_success": point["pos_count_success"], + "pos_count_fail": point["pos_count_fail"], + } + ) + with (OUT / "hrm_trm_redesigned_summary.csv").open("w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=list(rows[0].keys())) + writer.writeheader() + writer.writerows(rows) + + ptrm = read_csv(ROOT / "ptrm_same_subset/paired_ptrm_k100_n1000_seed0_summary.csv")[0] + report = f"""# Meeting Figures v2 + +## Figure Strategy + +0. `fig0_motivation_lambda1_success_failure_hrm_trm.png`: first-exponent success/failure distribution in HRM and TRM. This motivates chaos as a detector before introducing the method. +1. `fig1_hrm_trm_training_curves.png`: performance over training for HRM and TRM. This answers whether the method improves accuracy and where best/final are. +2. `fig2_accuracy_vs_chaotic_volume_phase.png`: phase view, with accuracy versus mean top-8 Lyapunov exponent. This answers whether better checkpoints are dynamically more stable. +3. `fig3_hrm_trm_success_failure_spectra.png`: full success/failure spectrum separation for HRM and TRM best checkpoints. This extends Fig0 beyond λ1. +4. `fig4_ptrm_same_subset_comparison.png`: PTRM same-subset result. This is a secondary inference-time story. + +## Key Numbers + +- HRM baseline best: 0.5265 exact. HRM multi4 best: 0.6443 exact. HRM multi4 final: 0.4624 exact. +- TRM baseline best: 0.8686 exact. TRM multi4 best: 0.8965 exact. TRM multi4 final: 0.8351 exact. +- HRM multi4 best dynamics sample: mean top-8 exponent {rows[1]['mean8_all']:+.4f}; final {rows[2]['mean8_all']:+.4f}. +- TRM multi4 best dynamics sample: mean top-8 exponent {rows[4]['mean8_all']:+.4f}; final {rows[5]['mean8_all']:+.4f}. +- PTRM same subset, K=100: Q-selected {float(ptrm['base_qmax']):.3f} -> {float(ptrm['multi4_qmax']):.3f}; mean rollout {float(ptrm['base_mean_rollout']):.3f} -> {float(ptrm['multi4_mean_rollout']):.3f}. + +## Caveats + +- Dynamics spectra use N=512 diagnostic samples, not the full test set. +- PTRM numbers use a fixed N=1000 subset; do not mix its deterministic subset accuracy with full-test W&B exact accuracy. +- Final checkpoints are collapse diagnostics, not the method's reported performance. +""" + (OUT / "meeting_figures_v2_report.md").write_text(report) + + +def main() -> None: + OUT.mkdir(parents=True, exist_ok=True) + data = get_data() + plot_lambda1_motivation() + plot_training_curves(data) + plot_phase(data) + plot_spectrum_grid() + plot_ptrm() + write_summary(data) + print(f"wrote redesigned artifacts to {OUT}") + + +if __name__ == "__main__": + main() diff --git a/make_q_lambda_scatter.py b/make_q_lambda_scatter.py new file mode 100644 index 0000000..54ff8d2 --- /dev/null +++ b/make_q_lambda_scatter.py @@ -0,0 +1,263 @@ +from __future__ import annotations + +import csv +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np + + +ROOT = Path("research/flossing") +IN_DIR = ROOT / "q_lambda_scatter" +OUT = ROOT / "meeting_artifacts_v2" + +RUNS = [ + ( + "TRM baseline + PTRM rollouts", + IN_DIR / "base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.npz", + ), + ( + "TRM multi4 + PTRM rollouts", + IN_DIR / "multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.npz", + ), +] + + +def rank_average(values: np.ndarray) -> np.ndarray: + order = np.argsort(values, kind="mergesort") + sorted_values = values[order] + ranks = np.empty(len(values), dtype=float) + i = 0 + while i < len(values): + j = i + 1 + while j < len(values) and sorted_values[j] == sorted_values[i]: + j += 1 + ranks[order[i:j]] = 0.5 * (i + j - 1) + 1.0 + i = j + return ranks + + +def corr(x: np.ndarray, y: np.ndarray) -> float: + mask = np.isfinite(x) & np.isfinite(y) + x = x[mask] + y = y[mask] + if len(x) < 3: + return float("nan") + x = x - x.mean() + y = y - y.mean() + denom = np.sqrt(np.square(x).sum() * np.square(y).sum()) + if denom <= 0: + return float("nan") + return float(np.dot(x, y) / denom) + + +def spearman(x: np.ndarray, y: np.ndarray) -> float: + mask = np.isfinite(x) & np.isfinite(y) + x = x[mask] + y = y[mask] + if len(x) < 3: + return float("nan") + return corr(rank_average(x), rank_average(y)) + + +def mean_within_problem_corr(stability: np.ndarray, q_halt: np.ndarray, rank: bool) -> float: + vals: list[float] = [] + for x, y in zip(stability, q_halt): + val = spearman(x, y) if rank else corr(x, y) + if np.isfinite(val): + vals.append(val) + return float(np.mean(vals)) if vals else float("nan") + + +def summarize(name: str, path: Path) -> dict[str, float | str]: + data = np.load(path) + exact = data["exact"].astype(bool) + q_halt = data["q_halt"].astype(float) + lyap = data["lyap"].astype(float) + if lyap.size == 0: + raise ValueError(f"{path} does not contain lyap") + stability = -lyap + + arange = np.arange(exact.shape[0]) + q_idx = q_halt.argmax(axis=1) + lyap_idx = lyap.argmin(axis=1) + correct_count = exact.sum(axis=1) + mixed = (correct_count > 0) & (correct_count < exact.shape[1]) + + mixed_summary: dict[str, float] = { + "mixed_problem_count": float(mixed.sum()), + "zero_success_problem_count": float((correct_count == 0).sum()), + "full_success_problem_count": float((correct_count == exact.shape[1]).sum()), + "mixed_global_pearson_q_vs_stability": float("nan"), + "mixed_q_max_exact": float("nan"), + "mixed_lambda_min_exact": float("nan"), + "mixed_oracle_exact": float("nan"), + } + if mixed.any(): + m_arange = np.arange(int(mixed.sum())) + m_exact = exact[mixed] + m_q = q_halt[mixed] + m_lyap = lyap[mixed] + mixed_summary.update( + { + "mixed_global_pearson_q_vs_stability": corr((-m_lyap).reshape(-1), m_q.reshape(-1)), + "mixed_q_max_exact": float(m_exact[m_arange, m_q.argmax(axis=1)].mean()), + "mixed_lambda_min_exact": float(m_exact[m_arange, m_lyap.argmin(axis=1)].mean()), + "mixed_oracle_exact": float(m_exact.any(axis=1).mean()), + } + ) + + out: dict[str, float | str] = { + "name": name, + "path": str(path), + "n_samples": float(exact.shape[0]), + "rollouts": float(exact.shape[1]), + "mean_rollout_exact": float(exact.mean()), + "q_max_exact": float(exact[arange, q_idx].mean()), + "lambda_min_exact": float(exact[arange, lyap_idx].mean()), + "oracle_pass_exact": float(exact.any(axis=1).mean()), + "q_lambda_same_argmax_frac": float((q_idx == lyap_idx).mean()), + "global_pearson_q_vs_stability": corr(stability.reshape(-1), q_halt.reshape(-1)), + "global_spearman_q_vs_stability": spearman(stability.reshape(-1), q_halt.reshape(-1)), + "within_problem_pearson_mean": mean_within_problem_corr(stability, q_halt, rank=False), + "within_problem_spearman_mean": mean_within_problem_corr(stability, q_halt, rank=True), + "q_success_mean": float(q_halt[exact].mean()) if exact.any() else float("nan"), + "q_fail_mean": float(q_halt[~exact].mean()) if (~exact).any() else float("nan"), + "lambda_success_mean": float(lyap[exact].mean()) if exact.any() else float("nan"), + "lambda_fail_mean": float(lyap[~exact].mean()) if (~exact).any() else float("nan"), + } + out.update(mixed_summary) + return out + + +def scatter_panel( + ax: plt.Axes, + stability_2d: np.ndarray, + q_2d: np.ndarray, + exact_2d: np.ndarray, + title: str, +) -> None: + stability = stability_2d.reshape(-1) + q_halt = q_2d.reshape(-1) + exact = exact_2d.reshape(-1) + finite = np.isfinite(stability) & np.isfinite(q_halt) + exact = exact[finite] + q_halt = q_halt[finite] + stability = stability[finite] + if len(stability) == 0: + ax.set_title(title + "\n(no points)") + return + + xlo, xhi = np.quantile(stability, [0.005, 0.995]) + ylo, yhi = np.quantile(q_halt, [0.005, 0.995]) + visible = (stability >= xlo) & (stability <= xhi) & (q_halt >= ylo) & (q_halt <= yhi) + + ax.scatter( + stability[visible & ~exact], + q_halt[visible & ~exact], + s=8, + alpha=0.22, + color="#dc2626", + linewidths=0, + label="incorrect rollout", + ) + ax.scatter( + stability[visible & exact], + q_halt[visible & exact], + s=8, + alpha=0.17, + color="#2563eb", + linewidths=0, + label="correct rollout", + ) + + fit = visible + if int(fit.sum()) >= 3: + slope, intercept = np.polyfit(stability[fit], q_halt[fit], 1) + xs = np.linspace(xlo, xhi, 100) + ax.plot(xs, slope * xs + intercept, color="black", linewidth=1.8, alpha=0.75) + + ax.set_title(title) + ax.set_xlim(xlo, xhi) + ax.set_ylim(ylo, yhi) + ax.grid(alpha=0.22) + + +def plot() -> list[dict[str, float | str]]: + missing = [path for _name, path in RUNS if not path.exists()] + if missing: + raise SystemExit("missing input files:\n" + "\n".join(str(p) for p in missing)) + + OUT.mkdir(parents=True, exist_ok=True) + summaries = [summarize(name, path) for name, path in RUNS] + + fig, axes = plt.subplots(2, len(RUNS), figsize=(12.0, 8.2), sharey="row") + if len(RUNS) == 1: + axes = np.asarray(axes).reshape(2, 1) + + for col, ((name, path), summary) in enumerate(zip(RUNS, summaries)): + data = np.load(path) + exact = data["exact"].astype(bool) + q_halt = data["q_halt"].astype(float) + stability = -data["lyap"].astype(float) + correct_count = exact.sum(axis=1) + mixed = (correct_count > 0) & (correct_count < exact.shape[1]) + + scatter_panel( + axes[0, col], + stability, + q_halt, + exact, + f"{name}: all rollouts\n" + f"r={summary['global_pearson_q_vs_stability']:.2f}, " + f"rho={summary['global_spearman_q_vs_stability']:.2f}, " + f"Q exact={summary['q_max_exact']:.3f}", + ) + scatter_panel( + axes[1, col], + stability[mixed], + q_halt[mixed], + exact[mixed], + f"mixed problems only (n={int(summary['mixed_problem_count'])})\n" + f"r={summary['mixed_global_pearson_q_vs_stability']:.2f}, " + f"Q={summary['mixed_q_max_exact']:.3f}, " + f"lambda-min={summary['mixed_lambda_min_exact']:.3f}", + ) + + for ax in axes[1, :]: + ax.set_xlabel("stability proxy = -lambda_1") + axes[0, 0].set_ylabel("Q-head halt logit") + axes[1, 0].set_ylabel("Q-head halt logit") + axes[0, 0].legend(frameon=False, loc="lower right", fontsize=8) + fig.suptitle("PTRM Q-head score contains a stability signal; mixed problems reveal selector behavior") + fig.tight_layout() + fig.savefig(OUT / "fig5_qhead_vs_lambda1_ptrm.png", dpi=240) + plt.close(fig) + + out_csv = OUT / "fig5_qhead_vs_lambda1_ptrm_summary.csv" + with out_csv.open("w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=list(summaries[0].keys())) + writer.writeheader() + writer.writerows(summaries) + return summaries + + +def main() -> None: + summaries = plot() + for row in summaries: + print( + f"{row['name']}: " + f"q_exact={row['q_max_exact']:.4f} " + f"lambda_min_exact={row['lambda_min_exact']:.4f} " + f"oracle={row['oracle_pass_exact']:.4f} " + f"pearson={row['global_pearson_q_vs_stability']:.4f} " + f"spearman={row['global_spearman_q_vs_stability']:.4f} " + f"within_spearman={row['within_problem_spearman_mean']:.4f} " + f"mixed_pearson={row['mixed_global_pearson_q_vs_stability']:.4f}" + ) + print(f"wrote {OUT / 'fig5_qhead_vs_lambda1_ptrm.png'}") + print(f"wrote {OUT / 'fig5_qhead_vs_lambda1_ptrm_summary.csv'}") + + +if __name__ == "__main__": + main() diff --git a/make_trm_multi4_meeting_artifacts.py b/make_trm_multi4_meeting_artifacts.py new file mode 100644 index 0000000..5f964ed --- /dev/null +++ b/make_trm_multi4_meeting_artifacts.py @@ -0,0 +1,133 @@ +from __future__ import annotations + +from pathlib import Path +import csv + +import matplotlib.pyplot as plt +import numpy as np + + +ROOT = Path("research/flossing") +SPECTRUM_DIR = ROOT / "official_gbs768_spectrum" +PTRM_DIR = ROOT / "ptrm_same_subset" +PLOT_DIR = ROOT / "meeting_artifacts" + + +def _read_csv_rows(path: Path) -> list[dict[str, str]]: + with path.open(newline="") as fh: + return list(csv.DictReader(fh)) + + +def _load_spectrum_rows() -> list[tuple[str, int, np.lib.npyio.NpzFile]]: + return [ + ( + "TRM baseline best", + 58590, + np.load(SPECTRUM_DIR / "trm_gbs768_base_step58590_n512_k8_seed20260602.npz"), + ), + ( + "multi4 best", + 35805, + np.load(SPECTRUM_DIR / "trm_gbs768_multi4_step35805_n512_k8_seed20260602.npz"), + ), + ( + "multi4 final", + 65100, + np.load(SPECTRUM_DIR / "trm_gbs768_multi4_step65100_n512_k8_seed20260602.npz"), + ), + ] + + +def plot_spectra() -> None: + rows = _load_spectrum_rows() + fig, axes = plt.subplots(1, 3, figsize=(13.5, 4.2), sharey=True) + for ax, (name, step, data) in zip(axes, rows): + spec = data["lyap_spec"].astype(float) + exact = data["exact_correct"].astype(bool) + x = np.arange(1, spec.shape[1] + 1) + for mask, color, label in [ + (exact, "#1f77b4", "success"), + (~exact, "#d62728", "failure"), + ]: + mean = spec[mask].mean(axis=0) + se = spec[mask].std(axis=0) / max(mask.sum(), 1) ** 0.5 + ax.plot(x, mean, marker="o", color=color, label=label) + ax.fill_between(x, mean - se, mean + se, color=color, alpha=0.16, linewidth=0) + ax.axhline(0, color="black", linewidth=0.8, alpha=0.55) + ax.set_title(f"{name}\nstep {step}, acc={exact.mean():.3f}") + ax.set_xlabel("Lyapunov spectrum rank") + ax.grid(alpha=0.22) + axes[0].set_ylabel("finite-time exponent") + axes[0].legend(frameon=False) + fig.suptitle("TRM official GBS768: spectrum separates success/failure; multi4 best suppresses successful spectrum") + fig.tight_layout() + fig.savefig(PLOT_DIR / "trm_gbs768_spectrum_success_failure_n512.png", dpi=220) + plt.close(fig) + + +def plot_headline() -> None: + rows = _read_csv_rows(SPECTRUM_DIR / "headline_trm_multi4_dynamics_table.csv") + labels = ["baseline\nbest", "multi4\nbest", "multi4\nfinal"] + fig, axes = plt.subplots(1, 3, figsize=(12, 3.8)) + metrics = [ + ("full_exact", "Full test exact acc", "#2f6f4e"), + ("mean8_all", "Mean top-8 exponent", "#8a4b2a"), + ("pos_count_all", "Positive exponents / 8", "#4d5f8a"), + ] + for ax, (col, title, color) in zip(axes, metrics): + values = np.asarray([float(row[col]) for row in rows]) + ax.bar(labels, values, color=color, alpha=0.86) + ax.set_title(title) + ax.grid(axis="y", alpha=0.22) + for i, v in enumerate(values): + ax.text(i, v, f"{v:.3f}" if col != "pos_count_all" else f"{v:.2f}", ha="center", va="bottom", fontsize=9) + fig.suptitle("Accuracy improves at multi4 best while chaotic volume drops; final collapse restores positive spectrum") + fig.tight_layout() + fig.savefig(PLOT_DIR / "trm_gbs768_headline_dynamics_bars.png", dpi=220) + plt.close(fig) + + +def plot_ptrm() -> None: + row = _read_csv_rows(PTRM_DIR / "paired_ptrm_k100_n1000_seed0_summary.csv")[0] + labels = ["deterministic", "mean rollout", "Q-selected", "oracle"] + base = [ + float(row["base_det"]), + float(row["base_mean_rollout"]), + float(row["base_qmax"]), + float(row["base_oracle"]), + ] + multi = [ + float(row["multi4_det"]), + float(row["multi4_mean_rollout"]), + float(row["multi4_qmax"]), + float(row["multi4_oracle"]), + ] + x = np.arange(len(labels)) + width = 0.36 + fig, ax = plt.subplots(figsize=(8.4, 4.2)) + ax.bar(x - width / 2, base, width, label="baseline best", color="#6b7280") + ax.bar(x + width / 2, multi, width, label="multi4 best", color="#2563eb") + ax.set_ylim(0.86, 1.0) + ax.set_xticks(x, labels) + ax.set_ylabel("exact accuracy") + ax.set_title("PTRM same-subset comparison (n=1000, K=100, D=64, sigma=0.3, L-only)") + ax.grid(axis="y", alpha=0.22) + ax.legend(frameon=False) + for xpos, vals in [(x - width / 2, base), (x + width / 2, multi)]: + for xi, v in zip(xpos, vals): + ax.text(xi, v + 0.002, f"{v:.3f}", ha="center", va="bottom", fontsize=8) + fig.tight_layout() + fig.savefig(PLOT_DIR / "ptrm_same_subset_k100_n1000_comparison.png", dpi=220) + plt.close(fig) + + +def main() -> None: + PLOT_DIR.mkdir(parents=True, exist_ok=True) + plot_spectra() + plot_headline() + plot_ptrm() + print(f"wrote plots to {PLOT_DIR}") + + +if __name__ == "__main__": + main() diff --git a/maze_package/README.md b/maze_package/README.md new file mode 100644 index 0000000..45ab740 --- /dev/null +++ b/maze_package/README.md @@ -0,0 +1,30 @@ +# Maze-Hard package (E8) — train on dedicated cards, diagnose after + +## Contents +- `launch_maze_trm.sh` — TRM Maze official recipe (att variant, 50k epochs), 1–2 GPU. +- dataset already at `/home/yurenh2/rrm/data/maze-30x30-hard-1k` (built 2026-06-13; + seq_len 900, vocab 6, 1000 puzzles ×8 dihedral augments). + +## Run +```bash +bash launch_maze_trm.sh 2 384 # 2x A6000 +bash launch_maze_trm.sh 2 192 # 2x A5000 (->128 if OOM) +``` +Target: ~75% exact accuracy (official figure). Saves a checkpoint every 5000 epochs +(10 checkpoints) — needed for the evolution analysis. + +## After training: diagnostics +The 2x2 / FTLE pipeline reads any TRM checkpoint dir (all_config.yaml + step_N). Two caveats +vs Sudoku, to verify on first run: +1. ATTENTION arch (not mlp_t): confirm diagnose_trm_joint.py's JVP path runs on att blocks + (Sudoku used mlp_t). If the L_level call signature differs, patch the f_L/f_H closures. +2. seq_len 900 vs 97 → per-sample JVP+QR cost ~9-10x Sudoku. Use n=512 for the headline 2x2 + and n=256 for the horizon sweep; k_lyap=8 unchanged. Budget ~0.5-1 day on one card, or + rsync checkpoints back to the lab box and run via the analysis_2x2 queue. + +## What Maze closes +Kills the "Sudoku-only" limitation. Pre-registered prediction (write BEFORE looking, for the +paper's credibility): if the wandering-not-settling decomposition is architecture/task-general, +Maze should show B≈0 (failures don't settle) and the same concurrent-not-antecedent horizon +profile. A DIFFERENT result (e.g. Maze failures do settle) is also publishable — it bounds the +claim's scope. Either way the decomposition gets a second task. diff --git a/maze_package/TRANSFER_README.md b/maze_package/TRANSFER_README.md new file mode 100644 index 0000000..6e11076 --- /dev/null +++ b/maze_package/TRANSFER_README.md @@ -0,0 +1,36 @@ +# Maze training bundle — transfer to your training machine + +## What's in this bundle +- `maze-30x30-hard-1k/` — the built dataset (seq_len 900, vocab 6, 1000 puzzles ×8 augments). +- `launch_maze_trm_portable.sh` — path-configurable launcher. +- `diagnose_trm_joint.py`, `step7_interfloss.py` — diagnostic scripts (only if you run + diagnostics on the training machine; otherwise rsync checkpoints back to the lab box). + +## On the training machine +1. Have the TinyRecursiveModels repo cloned and the `rrm` conda env (torch 2.7 cu126, + flash-attn 2 for Ampere). If the env doesn't exist, recreate from the lab box's + `env/requirements.txt` / `pip-freeze.txt`. +2. Put the dataset somewhere, e.g. `~/data/maze-30x30-hard-1k`. +3. Launch: + ```bash + TRM_DIR=~/TinyRecursiveModels DATA_DIR=~/data/maze-30x30-hard-1k \ + bash launch_maze_trm_portable.sh 2 384 # 2x A6000 + # or: 2 192 # 2x A5000 (->128 if OOM) + ``` + Target ~75% exact accuracy (official). ~18-28h on 2x A6000, ~24-36h on 2x A5000. + Saves one checkpoint per 5000 epochs (10 total) — keep all, the evolution analysis needs them. + +## After training +Preferred: `rsync` the whole run checkpoint dir (checkpoints/maze-.../pretrain_att_maze30x30_*/) +back to the lab box and run the existing analysis_2x2 queue there. The dir must include +`all_config.yaml` plus the `step_*` files. + +If diagnosing on the training machine, two caveats vs the Sudoku runs: +1. Maze uses the ATTENTION arch (not mlp_t). Verify diagnose_trm_joint.py's f_L/f_H JVP + closures call the attention L_level correctly; patch if the signature differs. +2. seq_len 900 (vs 97) makes per-sample JVP+QR ~9-10x slower. Use n=512 for the headline 2x2, + n=256 for the horizon sweep, k_lyap=8. + +## Sanity check before the long run +A 200-step smoke (epochs=200 eval_interval=200) should complete in minutes and confirm the +attention model + flash-attn + dataset load without OOM before committing to 50k epochs. diff --git a/maze_package/diagnose_trm_joint.py b/maze_package/diagnose_trm_joint.py new file mode 100644 index 0000000..160a7cb --- /dev/null +++ b/maze_package/diagnose_trm_joint.py @@ -0,0 +1,225 @@ +"""TRM Sudoku joint Lyapunov diagnostic — TRM version of diagnose_hrm_joint.py. + +Key differences from HRM: +- TRM has ONE shared L_level (H_layers config is "ignored") +- z_L update: z_L = L_level(z_L, z_H + input_embeddings) +- z_H update: z_H = L_level(z_H, z_L) ← same L_level! +- H_cycles=3, L_cycles=6 (vs HRM 2,2) + +Joint tangent block structure: +- L step: v_L_new = J · (v_L + v_H), v_H_new = v_H, J at (z_L + z_H + ie) +- H step: v_H_new = J' · (v_H + v_L), v_L_new = v_L, J' at (z_H + z_L) +J and J' share weights but evaluated at different points. +""" +from __future__ import annotations +import sys, os, yaml, math, argparse, json, time +from pathlib import Path +import numpy as np +import torch + +TRM_DIR = Path("/home/yurenh2/rrm/trm") +sys.path.insert(0, str(TRM_DIR)) + +from models.recursive_reasoning.trm import TinyRecursiveReasoningModel_ACTV1 + + +def load_model(ckpt_root: Path, ckpt_name: str, device: str): + cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text()) + arch_cfg = dict(cfg["arch"]) + train_meta = json.loads((Path(cfg["data_paths"][0]) / "train" / "dataset.json").read_text()) + arch_cfg.update(batch_size=cfg["global_batch_size"], seq_len=train_meta["seq_len"], + vocab_size=train_meta["vocab_size"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"]) + model = TinyRecursiveReasoningModel_ACTV1(arch_cfg) + sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + stripped = {k.replace("_orig_mod.", "").replace("model.", ""): v for k, v in sd.items()} + missing, unexpected = model.load_state_dict(stripped, strict=False) + print(f"[load] missing={len(missing)} unexpected={len(unexpected)}") + if missing[:3]: print(f" sample missing: {missing[:3]}") + if unexpected[:3]: print(f" sample unexpected: {unexpected[:3]}") + model.to(device).eval() + return model, cfg, train_meta + + +def load_test_samples(data_path, n_total, shard_id, num_shards, seed): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + all_idx = rng.choice(len(inputs), size=n_total, replace=False) + shard_size = (n_total + num_shards - 1) // num_shards + s, e = shard_id * shard_size, min((shard_id + 1) * shard_size, n_total) + idx = all_idx[s:e] + return { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)), + "idx": idx, + } + + +def jvp_through(f, x, v): + return torch.autograd.functional.jvp(f, x, v=v, create_graph=False, strict=False) + + +def run_diagnose_batch(model, batch, device, k_lyap, t_ons, seed): + inner = model.inner + cfg = inner.config + B = batch["inputs"].shape[0] + seq_full = cfg.seq_len + inner.puzzle_emb_len + hidden = cfg.hidden_size + D = seq_full * hidden + + z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None) + input_embeddings = inner._input_embeddings(batch["inputs"].to(device), + batch["puzzle_identifiers"].to(device)) + + g = torch.Generator(device=device).manual_seed(seed) + Q0 = torch.randn(B, 2*D, k_lyap, device=device, dtype=torch.float32, generator=g) + Q, _ = torch.linalg.qr(Q0) + log_R_sum = torch.zeros(B, k_lyap, device=device, dtype=torch.float32) + n_lyap_steps = 0 + step_counter = 0 + + drift_zH_per_step, drift_zL_per_step = [], [] + halted_at = torch.zeros(B, dtype=torch.long, device=device) + q_halt_hist, q_continue_hist = [], [] + + for act_step in range(cfg.halt_max_steps): + z_H_prev = z_H.detach().clone() + z_L_prev = z_L.detach().clone() + + with torch.enable_grad(): + zH, zL = z_H.detach(), z_L.detach() + for _h in range(cfg.H_cycles): + # L cycles + for _l in range(cfg.L_cycles): + v_H_j = Q[:, :D, :] + v_L_j = Q[:, D:, :] + v_comb = v_H_j + v_L_j + new_v_L_cols = [] + f_L = lambda z: inner.L_level(z, zH + input_embeddings, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zL_new, Dv = jvp_through(f_L, zL, v_i) + new_v_L_cols.append(Dv.reshape(B, D).to(torch.float32)) + new_v_L = torch.stack(new_v_L_cols, dim=-1) + Q = torch.cat([v_H_j, new_v_L], dim=1) + zL = zL_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + # H step (uses SAME L_level!) + v_H_j = Q[:, :D, :] + v_L_j = Q[:, D:, :] + v_comb = v_H_j + v_L_j + new_v_H_cols = [] + f_H = lambda z: inner.L_level(z, zL, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(B, seq_full, hidden).to(inner.forward_dtype) + zH_new, Dv = jvp_through(f_H, zH, v_i) + new_v_H_cols.append(Dv.reshape(B, D).to(torch.float32)) + new_v_H = torch.stack(new_v_H_cols, dim=-1) + Q = torch.cat([new_v_H, v_L_j], dim=1) + zH = zH_new + step_counter += 1 + if step_counter % t_ons == 0: + Q, R = torch.linalg.qr(Q) + log_R_sum = log_R_sum + R.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_lyap_steps += 1 + + z_H, z_L = zH, zL + + drift_zH_per_step.append((z_H - z_H_prev).float().flatten(1).norm(dim=1).cpu()) + drift_zL_per_step.append((z_L - z_L_prev).float().flatten(1).norm(dim=1).cpu()) + + with torch.no_grad(): + q_logits = inner.q_head(z_H[:, 0]).float() + q_halt, q_continue = q_logits[..., 0], q_logits[..., 1] + q_halt_hist.append(q_halt.cpu()); q_continue_hist.append(q_continue.cpu()) + new_halt = (q_halt > q_continue) & (halted_at == 0) + halted_at[new_halt] = act_step + 1 + output = inner.lm_head(z_H)[:, inner.puzzle_emb_len:].float() + final_logits = output + + lyap_spec = (log_R_sum / max(n_lyap_steps, 1)).cpu().numpy() + + with torch.no_grad(): + preds = final_logits.argmax(dim=-1) + labels = batch["labels"].to(device) + mask = labels > 0 + exact = ((preds == labels) | ~mask).all(dim=-1).cpu().float() + token_acc = ((preds == labels) & mask).sum(-1).float() / mask.sum(-1).float().clamp_min(1) + token_acc = token_acc.cpu() + + return { + "drift_zH": torch.stack(drift_zH_per_step, dim=1).numpy(), + "drift_zL": torch.stack(drift_zL_per_step, dim=1).numpy(), + "halted_at": halted_at.cpu().numpy(), + "q_halt": torch.stack(q_halt_hist, dim=1).numpy(), + "q_continue": torch.stack(q_continue_hist, dim=1).numpy(), + "lyap_spec": lyap_spec, + "exact_correct": exact.numpy(), + "token_acc": token_acc.numpy(), + } + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--ckpt-root", required=True) + ap.add_argument("--ckpt-name", default="step_13020") + ap.add_argument("--n-samples", type=int, default=512) + ap.add_argument("--shard-id", type=int, default=0) + ap.add_argument("--num-shards", type=int, default=1) + ap.add_argument("--batch-size", type=int, default=16) + ap.add_argument("--k-lyap", type=int, default=8) + ap.add_argument("--t-ons", type=int, default=1) + ap.add_argument("--seed", type=int, default=0) + ap.add_argument("--out", default="diag_trm.npz") + args = ap.parse_args() + + device = "cuda" + model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device) + print(f"loaded {args.ckpt_name}: hidden={model.inner.config.hidden_size}, " + f"seq_full={train_meta['seq_len'] + model.inner.puzzle_emb_len}, " + f"halt_max_steps={model.inner.config.halt_max_steps}, " + f"H={model.inner.config.H_cycles} L={model.inner.config.L_cycles}") + + test = load_test_samples(Path(cfg["data_paths"][0]), args.n_samples, args.shard_id, args.num_shards, args.seed) + n = len(test["inputs"]) + print(f"shard {args.shard_id}/{args.num_shards}: {n} samples") + + res = {k: [] for k in ["drift_zH","drift_zL","halted_at","q_halt","q_continue","lyap_spec","exact_correct","token_acc","idx"]} + t0 = time.time() + for s in range(0, n, args.batch_size): + e = min(s + args.batch_size, n) + batch = {k: test[k][s:e].to(device) for k in ["inputs","labels","puzzle_identifiers"]} + out = run_diagnose_batch(model, batch, device, args.k_lyap, args.t_ons, args.seed + s) + for k, v in out.items(): + res[k].append(v) + res["idx"].append(test["idx"][s:e]) + ls = out["lyap_spec"] + print(f" [{e}/{n}] dt={time.time()-t0:.1f}s exact={out['exact_correct'].mean():.3f} " + f"λ_1={ls[:,0].mean():+.4f} λ_{args.k_lyap}={ls[:,-1].mean():+.4f}", flush=True) + + saved = {} + for k, v in res.items(): + if not v: continue + try: saved[k] = np.concatenate(v, 0) + except ValueError: saved[k] = np.stack(v, 0) + np.savez_compressed(args.out, **saved) + succ = saved["exact_correct"] > 0.5 + print(f"\nN={len(succ)} acc={succ.mean():.4f}") + print(f"{'i':>3} {'all':>10} {'succ':>10} {'fail':>10} {'Δ':>9}") + for i in range(saved["lyap_spec"].shape[1]): + li = saved["lyap_spec"][:, i] + print(f"{i+1:>3} {li.mean():+10.4f} {li[succ].mean():+10.4f} {li[~succ].mean():+10.4f} {li[~succ].mean()-li[succ].mean():+9.4f}") + + +if __name__ == "__main__": + main() diff --git a/maze_package/launch_maze_trm.sh b/maze_package/launch_maze_trm.sh new file mode 100755 index 0000000..093bb1e --- /dev/null +++ b/maze_package/launch_maze_trm.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# TRM Maze-Hard 30x30 official recipe, adapted for 2 GPUs. Run on dedicated training cards. +# Usage: bash launch_maze_trm.sh [NGPU] [GBS] +# 2x A6000 (48G): bash launch_maze_trm.sh 2 384 +# 2x A5000 (24G): bash launch_maze_trm.sh 2 192 (drop to 128 if OOM) +# 1x card: bash launch_maze_trm.sh 1 128 +set -eo pipefail + +NGPU="${1:-2}" +GBS="${2:-384}" +RUN_NAME="pretrain_att_maze30x30_${NGPU}gpu_gbs${GBS}" + +source /home/yurenh2/miniconda3/etc/profile.d/conda.sh +conda activate rrm +cd /home/yurenh2/rrm/trm +export WANDB_MODE=offline + +COMMON_ARGS=( + arch=trm + "data_paths=[/home/yurenh2/rrm/data/maze-30x30-hard-1k]" + "evaluators=[]" + epochs=50000 eval_interval=5000 + lr=1e-4 puzzle_emb_lr=1e-4 weight_decay=1.0 puzzle_emb_weight_decay=1.0 + global_batch_size="${GBS}" + arch.L_layers=2 arch.H_cycles=3 arch.L_cycles=4 + +run_name="${RUN_NAME}" ema=True + +checkpoint_every_eval=true +) + +LOG="/home/yurenh2/rrm/research/flossing/maze_${RUN_NAME}.log" + +if [[ "${NGPU}" -gt 1 ]]; then + nohup torchrun --nproc-per-node "${NGPU}" --rdzv_backend=c10d --rdzv_endpoint=localhost:0 \ + --nnodes=1 pretrain.py "${COMMON_ARGS[@]}" > "${LOG}" 2>&1 & +else + nohup python pretrain.py "${COMMON_ARGS[@]}" > "${LOG}" 2>&1 & +fi +echo "launched ${RUN_NAME} (pid $!), log: ${LOG}" +echo "checkpoints -> trm/checkpoints/maze-30x30-hard-1k.../${RUN_NAME}/ (one per 5000 epochs)" +echo "monitor: tail -f ${LOG} | grep -E 'accuracy|exact'" diff --git a/maze_package/launch_maze_trm_portable.sh b/maze_package/launch_maze_trm_portable.sh new file mode 100755 index 0000000..d801ceb --- /dev/null +++ b/maze_package/launch_maze_trm_portable.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# Portable TRM Maze-Hard launcher — run on any machine with the TRM repo + rrm conda env. +# +# Set these two paths for the target machine (env vars or edit here): +# TRM_DIR = path to the TinyRecursiveModels repo clone (contains pretrain.py) +# DATA_DIR = path to the maze-30x30-hard-1k dataset (from this bundle) +# CONDA_SH = path to conda.sh (default tries common locations) +# +# Usage: TRM_DIR=~/TinyRecursiveModels DATA_DIR=~/maze-30x30-hard-1k bash launch_maze_trm_portable.sh [NGPU] [GBS] +# 2x A6000 (48G): ... bash launch_maze_trm_portable.sh 2 384 +# 2x A5000 (24G): ... bash launch_maze_trm_portable.sh 2 192 (-> 128 if OOM) +set -eo pipefail + +TRM_DIR="${TRM_DIR:?set TRM_DIR to the TinyRecursiveModels repo path}" +DATA_DIR="${DATA_DIR:?set DATA_DIR to the maze-30x30-hard-1k dataset path}" +NGPU="${1:-2}" +GBS="${2:-384}" +RUN_NAME="pretrain_att_maze30x30_${NGPU}gpu_gbs${GBS}" + +# conda +CONDA_SH="${CONDA_SH:-}" +if [[ -z "${CONDA_SH}" ]]; then + for p in "$HOME/miniconda3/etc/profile.d/conda.sh" "$HOME/anaconda3/etc/profile.d/conda.sh" \ + "/opt/conda/etc/profile.d/conda.sh"; do + [[ -f "$p" ]] && CONDA_SH="$p" && break + done +fi +[[ -f "${CONDA_SH}" ]] && source "${CONDA_SH}" && conda activate "${CONDA_ENV:-rrm}" + +cd "${TRM_DIR}" +export WANDB_MODE=offline + +ARGS=( + arch=trm + "data_paths=[${DATA_DIR}]" + "evaluators=[]" + epochs=50000 eval_interval=5000 + lr=1e-4 puzzle_emb_lr=1e-4 weight_decay=1.0 puzzle_emb_weight_decay=1.0 + global_batch_size="${GBS}" + arch.L_layers=2 arch.H_cycles=3 arch.L_cycles=4 + +run_name="${RUN_NAME}" ema=True + +checkpoint_every_eval=true +) +LOG="maze_${RUN_NAME}.log" + +if [[ "${NGPU}" -gt 1 ]]; then + nohup torchrun --nproc-per-node "${NGPU}" --rdzv_backend=c10d --rdzv_endpoint=localhost:0 \ + --nnodes=1 pretrain.py "${ARGS[@]}" > "${LOG}" 2>&1 & +else + nohup python pretrain.py "${ARGS[@]}" > "${LOG}" 2>&1 & +fi +echo "launched ${RUN_NAME} (pid $!)" +echo "log: ${TRM_DIR}/${LOG}" +echo "ckpts: ${TRM_DIR}/checkpoints/maze-30x30-hard-1k.../${RUN_NAME}/ (1 per 5000 epochs)" +echo "watch: tail -f ${TRM_DIR}/${LOG} | grep -E 'exact|accuracy'" +echo +echo "When done: rsync the run's checkpoint dir back to the lab box for the diagnostic pipeline," +echo "or run diagnostics here (see TRANSFER_README.md, note the attention-arch + n=512 caveats)." diff --git a/maze_package/pip-freeze.txt b/maze_package/pip-freeze.txt new file mode 100644 index 0000000..03c81b2 --- /dev/null +++ b/maze_package/pip-freeze.txt @@ -0,0 +1,18 @@ +adam_atan2==0.0.3 +argdantic==1.3.3 +coolname==2.2.0 +einops==0.8.1 +flash_attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.7cxx11abiTRUE-cp310-cp310-linux_x86_64.whl#sha256=ce91e246f21d61ad66b1a7555340dbaa28e4aa86edcf00c18f0837422939b529 +huggingface-hub==0.34.4 +hydra-core==1.3.2 +numba==0.61.2 +omegaconf==2.3.0 +pydantic==2.11.7 +pydantic_core==2.33.2 +pydantic-settings==2.14.1 +torch==2.7.0+cu126 +torchaudio==2.7.0+cu126 +torchvision==0.22.0+cu126 +tqdm==4.67.1 +triton==3.3.0 +wandb==0.21.3 diff --git a/maze_package/requirements.txt b/maze_package/requirements.txt new file mode 100644 index 0000000..d03a446 --- /dev/null +++ b/maze_package/requirements.txt @@ -0,0 +1,13 @@ +# Merged from HRM + TRM official; versions pinned per TRM specific_requirements.txt +# torch handled separately (torch==2.7.0+cu126 via cu126 index) +einops==0.8.1 +tqdm==4.67.1 +coolname==2.2.0 +pydantic==2.11.7 +pydantic-core==2.33.2 +argdantic==1.3.3 +wandb==0.21.3 +omegaconf==2.3.0 +hydra-core==1.3.2 +huggingface_hub==0.34.4 +numba==0.61.2 diff --git a/maze_package/step7_interfloss.py b/maze_package/step7_interfloss.py new file mode 100644 index 0000000..3b8e4f0 --- /dev/null +++ b/maze_package/step7_interfloss.py @@ -0,0 +1,589 @@ +"""Step 7: Engelken-style interflossing. + +This is intentionally not a mixed objective. Ordinary task-training steps use +only the supervised ACT loss. Flossing episodes use only a Lyapunov-spectrum +conditioning loss, then task training resumes. + +Paper mapping: + - preflossing: run a floss-only episode before task training. + - interflossing: run short floss-only episodes at selected training steps. + - no persistent L_task + alpha * L_floss term is used here. +""" +from __future__ import annotations + +import argparse +import importlib +import json +import sys +import time +from pathlib import Path + +import numpy as np +import torch +import torch.nn.functional as F +import yaml + + +HRM_DIR = Path("/home/yurenh2/rrm/hrm") +TRM_DIR = Path("/home/yurenh2/rrm/trm") + + +def import_stack(model_type: str): + repo_dir = HRM_DIR if model_type == "hrm" else TRM_DIR + sys.path.insert(0, str(repo_dir)) + if model_type == "hrm": + model_mod = importlib.import_module("models.hrm.hrm_act_v1") + model_cls = model_mod.HierarchicalReasoningModel_ACTV1 + else: + model_mod = importlib.import_module("models.recursive_reasoning.trm") + model_cls = model_mod.TinyRecursiveReasoningModel_ACTV1 + losses_mod = importlib.import_module("models.losses") + optim_mod = importlib.import_module("adam_atan2") + sparse_mod = importlib.import_module("models.sparse_embedding") + return model_cls, losses_mod.ACTLossHead, optim_mod.AdamATan2, sparse_mod.CastedSparseEmbeddingSignSGD_Distributed + + +def parse_step_list(text: str) -> set[int]: + if not text.strip(): + return set() + out = set() + for part in text.split(","): + part = part.strip() + if not part: + continue + out.add(int(part)) + return out + + +def build_interfloss_steps(args) -> set[int]: + steps = parse_step_list(args.interfloss_at) + if args.interfloss_every and args.interfloss_every > 0: + start = max(args.interfloss_start, 0) + stop = args.interfloss_stop if args.interfloss_stop >= 0 else args.train_steps + stop = min(stop, args.train_steps) + steps.update(range(start, stop + 1, args.interfloss_every)) + return steps + + +def load_model(model_type: str, ckpt_root: Path, ckpt_name: str, device: str, batch_size_override: int | None = None): + model_cls, loss_head_cls, adam_cls, sparse_cls = import_stack(model_type) + cfg = yaml.safe_load((ckpt_root / "all_config.yaml").read_text()) + arch_cfg = dict(cfg["arch"]) + data_path = Path(cfg.get("data_path") or cfg["data_paths"][0]) + train_meta = json.loads((data_path / "train" / "dataset.json").read_text()) + arch_cfg.update( + batch_size=batch_size_override or cfg["global_batch_size"], + seq_len=train_meta["seq_len"], + vocab_size=train_meta["vocab_size"], + num_puzzle_identifiers=train_meta["num_puzzle_identifiers"], + causal=False, + ) + cfg["data_path"] = str(data_path) + with torch.device(device): + base = model_cls(arch_cfg) + head = loss_head_cls(base, loss_type=arch_cfg["loss"]["loss_type"]) + if ckpt_name != "__random__": + sd = torch.load(ckpt_root / ckpt_name, map_location="cpu", weights_only=True) + stripped = {k.replace("_orig_mod.", ""): v for k, v in sd.items()} + missing, unexpected = head.load_state_dict(stripped, strict=False) + print(f"[load {ckpt_name}] missing={len(missing)} unexpected={len(unexpected)}") + else: + print("[load __random__] random initialization from config") + return head, base, cfg, adam_cls, sparse_cls + + +def jvp_train(f, x, v): + return torch.autograd.functional.jvp(f, x, v=v, create_graph=True, strict=False) + + +def compute_joint_lyap_spec(model_type, base, batch, k_lyap, lyap_act_steps, device, seed, lyap_start_act=0): + inner = base.inner + cfg = inner.config + bsz = batch["inputs"].shape[0] + seq_full = cfg.seq_len + inner.puzzle_emb_len + hidden = cfg.hidden_size + dim = seq_full * hidden + + z_h = inner.H_init.unsqueeze(0).expand(bsz, seq_full, hidden).clone().to(inner.forward_dtype) + z_l = inner.L_init.unsqueeze(0).expand(bsz, seq_full, hidden).clone().to(inner.forward_dtype) + seq_info = {"cos_sin": inner.rotary_emb() if hasattr(inner, "rotary_emb") else None} + input_embeddings = inner._input_embeddings(batch["inputs"], batch["puzzle_identifiers"]) + + # Optional late-window measurement: first move to a later recursive state + # without differentiating through the warmup trajectory. This regularizes + # local late-stage stability instead of penalizing useful early expansion. + warmup_acts = min(max(lyap_start_act, 0), cfg.halt_max_steps) + if warmup_acts > 0: + with torch.no_grad(): + for _act in range(warmup_acts): + for _h in range(cfg.H_cycles): + for _l in range(cfg.L_cycles): + z_l = inner.L_level(z_l, z_h + input_embeddings, **seq_info) + if model_type == "trm": + z_h = inner.L_level(z_h, z_l, **seq_info) + else: + z_h = inner.H_level(z_h, z_l, **seq_info) + z_h = z_h.detach() + z_l = z_l.detach() + + gen = torch.Generator(device=device).manual_seed(seed) + q0 = torch.randn(bsz, 2 * dim, k_lyap, device=device, dtype=torch.float32, generator=gen) + q, _ = torch.linalg.qr(q0) + log_r_sum = torch.zeros(bsz, k_lyap, device=device, dtype=torch.float32) + n_steps = 0 + + n_act = min(lyap_act_steps, max(cfg.halt_max_steps - warmup_acts, 1)) + for _act in range(n_act): + for _h in range(cfg.H_cycles): + for _l in range(cfg.L_cycles): + v_h = q[:, :dim, :] + v_l = q[:, dim:, :] + v_comb = v_h + v_l + new_v_l_cols = [] + f_l = lambda z: inner.L_level(z, z_h + input_embeddings, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(bsz, seq_full, hidden).to(inner.forward_dtype) + z_l_new, d_v = jvp_train(f_l, z_l, v_i) + new_v_l_cols.append(d_v.reshape(bsz, dim).to(torch.float32)) + q = torch.cat([v_h, torch.stack(new_v_l_cols, dim=-1)], dim=1) + z_l = z_l_new + q, r = torch.linalg.qr(q) + log_r_sum = log_r_sum + r.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_steps += 1 + + v_h = q[:, :dim, :] + v_l = q[:, dim:, :] + v_comb = v_h + v_l + new_v_h_cols = [] + if model_type == "trm": + f_h = lambda z: inner.L_level(z, z_l, **seq_info) + else: + f_h = lambda z: inner.H_level(z, z_l, **seq_info) + for i in range(k_lyap): + v_i = v_comb[:, :, i].reshape(bsz, seq_full, hidden).to(inner.forward_dtype) + z_h_new, d_v = jvp_train(f_h, z_h, v_i) + new_v_h_cols.append(d_v.reshape(bsz, dim).to(torch.float32)) + q = torch.cat([torch.stack(new_v_h_cols, dim=-1), v_l], dim=1) + z_h = z_h_new + q, r = torch.linalg.qr(q) + log_r_sum = log_r_sum + r.diagonal(dim1=-2, dim2=-1).abs().clamp_min(1e-30).log() + n_steps += 1 + + return log_r_sum / max(n_steps, 1) + + +def floss_loss_from_spec(spec, mode: str, lambda_star: float): + if mode == "engelken_l2": + return (spec ** 2).mean(), spec + if mode == "spectrum_cf": + excess = (spec - lambda_star).clamp_min(0.0) + return (excess ** 2).mean(), excess + if mode == "volume_cf": + volume = spec.mean(dim=1) + excess = (volume - lambda_star).clamp_min(0.0) + return (excess ** 2).mean(), excess + if mode == "top1_cf": + excess = (spec[:, 0] - lambda_star).clamp_min(0.0) + return (excess ** 2).mean(), excess + raise ValueError(f"unknown floss mode: {mode}") + + +def load_train_batches(data_path: Path, batch_size: int, n_iters: int, seed: int = 0): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "train" / "all__inputs.npy") + labels = np.load(data_path / "train" / "all__labels.npy") + pid = np.load(data_path / "train" / "all__puzzle_identifiers.npy") + n = len(inputs) + for _ in range(n_iters): + idx = rng.choice(n, size=batch_size, replace=False) + yield { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)), + } + + +def sample_replay_batch(data_path: Path, n_samples: int, seed: int): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "train" / "all__inputs.npy") + labels = np.load(data_path / "train" / "all__labels.npy") + pid = np.load(data_path / "train" / "all__puzzle_identifiers.npy") + idx = rng.choice(len(inputs), size=n_samples, replace=False) + return { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)), + "labels": torch.from_numpy(labels[idx].astype(np.int32)), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)), + } + + +def move_batch(batch: dict[str, torch.Tensor], device: str): + return {k: v.to(device) for k, v in batch.items()} + + +def rollout_logits(base, batch, device): + with torch.device(device): + carry = base.initial_carry(batch) + for _ in range(base.config.halt_max_steps): + carry, outputs = base(carry=carry, batch=batch) + return outputs["logits"] + + +def build_kl_replay(args, base, data_path, device, episode_idx): + if args.kl_beta <= 0 or args.kl_replay_size <= 0: + return None + + replay = sample_replay_batch( + data_path, + n_samples=args.kl_replay_size, + seed=args.seed + 200000 + episode_idx, + ) + teacher_chunks = [] + base.eval() + with torch.no_grad(): + for start in range(0, args.kl_replay_size, args.kl_batch_size): + end = min(start + args.kl_batch_size, args.kl_replay_size) + batch = move_batch({k: v[start:end] for k, v in replay.items()}, device) + logits = rollout_logits(base, batch, device) + teacher_chunks.append(logits.detach().to(torch.float32).cpu()) + + replay["teacher_logits"] = torch.cat(teacher_chunks, dim=0) + replay["mask"] = replay["labels"] > 0 + return replay + + +def kl_preservation_loss(args, base, replay, step, device): + if replay is None: + return torch.zeros((), device=device) + + n_replay = replay["inputs"].shape[0] + batch_size = min(args.kl_batch_size, n_replay) + start = (step * batch_size) % n_replay + if start + batch_size <= n_replay: + idx = torch.arange(start, start + batch_size) + else: + idx = torch.cat([torch.arange(start, n_replay), torch.arange(0, start + batch_size - n_replay)]) + + batch = move_batch( + { + "inputs": replay["inputs"][idx], + "labels": replay["labels"][idx], + "puzzle_identifiers": replay["puzzle_identifiers"][idx], + }, + device, + ) + teacher_logits = replay["teacher_logits"][idx].to(device) + mask = replay["mask"][idx].to(device) + was_training = base.training + base.eval() + student_logits = rollout_logits(base, batch, device).to(torch.float32) + if was_training: + base.train() + set_puzzle_embedding_mode(base, args.train_puzzle_emb) + temp = args.kl_temperature + student_logp = F.log_softmax(student_logits / temp, dim=-1) + teacher_p = F.softmax(teacher_logits / temp, dim=-1) + kl_per_token = F.kl_div(student_logp, teacher_p, reduction="none").sum(dim=-1) * (temp ** 2) + if mask.any(): + return kl_per_token[mask].mean() + return kl_per_token.mean() + + +def evaluate(head, base, data_path, n_samples, batch_size, device, seed=42): + rng = np.random.default_rng(seed) + inputs = np.load(data_path / "test" / "all__inputs.npy") + labels = np.load(data_path / "test" / "all__labels.npy") + pid = np.load(data_path / "test" / "all__puzzle_identifiers.npy") + idx_all = rng.choice(len(inputs), size=n_samples, replace=False) + head.eval() + correct = 0 + token_correct = 0 + token_total = 0 + for start in range(0, n_samples, batch_size): + end = min(start + batch_size, n_samples) + idx = idx_all[start:end] + batch = { + "inputs": torch.from_numpy(inputs[idx].astype(np.int32)).to(device), + "labels": torch.from_numpy(labels[idx].astype(np.int32)).to(device), + "puzzle_identifiers": torch.from_numpy(pid[idx].astype(np.int32)).to(device), + } + with torch.no_grad(): + with torch.device(device): + carry = base.initial_carry(batch) + for _ in range(base.config.halt_max_steps): + carry, outputs = base(carry=carry, batch=batch) + preds = outputs["logits"].argmax(dim=-1) + mask = batch["labels"] > 0 + exact = ((preds == batch["labels"]) | ~mask).all(dim=-1).float() + correct += exact.sum().item() + token_correct += ((preds == batch["labels"]) & mask).sum().item() + token_total += mask.sum().item() + return correct / n_samples, token_correct / max(token_total, 1) + + +def write_log(path: str, log: dict): + Path(path).write_text(json.dumps(log, indent=2)) + + +def freeze_puzzle_embedding(base): + base.inner.puzzle_emb.eval() + + +def set_puzzle_embedding_mode(base, train_puzzle_emb: bool): + if train_puzzle_emb: + base.inner.puzzle_emb.train() + else: + freeze_puzzle_embedding(base) + + +def make_optimizers(args, base, head, adam_cls, sparse_cls, lr: float, weight_decay: float, train_puzzle_emb: bool): + optimizers = [] + if train_puzzle_emb and getattr(base.inner.config, "puzzle_emb_ndim", 0) > 0: + optimizers.append( + sparse_cls( + base.inner.puzzle_emb.buffers(), + lr=lr if args.puzzle_emb_lr is None else args.puzzle_emb_lr, + weight_decay=args.puzzle_emb_weight_decay, + world_size=1, + ) + ) + optimizers.append(adam_cls(head.parameters(), lr=lr, betas=(0.9, 0.95), weight_decay=weight_decay)) + return optimizers + + +def optim_zero_grad(optimizers): + for optim in optimizers: + optim.zero_grad(set_to_none=True) + + +def optim_step(optimizers): + for optim in optimizers: + optim.step() + + +def run_floss_episode(args, head, base, adam_cls, data_path, device, log, episode_idx, train_step): + print( + f"\n=== Floss episode {episode_idx} at train_step={train_step}: " + f"{args.floss_steps} steps, mode={args.floss_mode}, lr={args.floss_lr} ===", + flush=True, + ) + optimizers = make_optimizers( + args, base, head, adam_cls, args.sparse_cls, + lr=args.floss_lr, weight_decay=0.0, train_puzzle_emb=False, + ) + replay = build_kl_replay(args, base, data_path, device, episode_idx) + train_iter = load_train_batches( + data_path, + args.floss_batch_size, + args.floss_steps, + seed=args.seed + 100000 + episode_idx * 1000, + ) + episode = {"episode": episode_idx, "train_step": train_step, "steps": []} + t0 = time.time() + + for step, batch in enumerate(train_iter): + batch = {k: v.to(device) for k, v in batch.items()} + head.train() + set_puzzle_embedding_mode(base, False) + spec = compute_joint_lyap_spec( + args.model, + base, + batch, + k_lyap=args.k_lyap, + lyap_act_steps=args.lyap_act_steps, + device=device, + seed=args.seed + episode_idx * 10000 + step, + lyap_start_act=args.lyap_start_act, + ) + optim_zero_grad(optimizers) + floss_loss, excess = floss_loss_from_spec(spec, args.floss_mode, args.lambda_star) + floss_loss.backward() + kl_loss = kl_preservation_loss(args, base, replay, step, device) + if args.kl_beta > 0: + (args.kl_beta * kl_loss).backward() + torch.nn.utils.clip_grad_norm_([p for p in head.parameters() if p.requires_grad], 1.0) + optim_step(optimizers) + + detached = spec.detach() + total_loss = floss_loss.detach() + args.kl_beta * kl_loss.detach() + rec = { + "step": step, + "loss": float(total_loss.item()), + "floss_loss": float(floss_loss.item()), + "kl_loss": float(kl_loss.item()), + "lyap1_mean": float(detached[:, 0].mean().item()), + "lyap1_max": float(detached[:, 0].max().item()), + "lyap_mean": float(detached.mean().item()), + "volume_mean": float(detached.mean(dim=1).mean().item()), + "volume_max": float(detached.mean(dim=1).max().item()), + "frac_active": float((excess.detach() > 0).float().mean().item()), + } + episode["steps"].append(rec) + if step % args.floss_log_every == 0 or step == args.floss_steps - 1: + print( + f" F[{step:>4}/{args.floss_steps}] dt={time.time() - t0:.1f}s " + f"loss={rec['loss']:.6f} floss={rec['floss_loss']:.6f} " + f"kl={rec['kl_loss']:.6f} lyap1={rec['lyap1_mean']:+.4f} " + f"vol={rec['volume_mean']:+.4f} active={rec['frac_active']:.2f}", + flush=True, + ) + + log["floss_episodes"].append(episode) + if args.eval_after_floss: + acc, tok_acc = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device) + print(f" >> FLOSS EVAL train_step={train_step}: exact_acc={acc:.4f}", flush=True) + log["evals"].append( + {"kind": "after_floss", "train_step": train_step, "episode": episode_idx, "acc": acc, "tok_acc": tok_acc} + ) + write_log(args.out, log) + + +def run_task_step(args, head, base, batch, optimizers, device): + batch = {k: v.to(device) for k, v in batch.items()} + head.train() + set_puzzle_embedding_mode(base, args.train_puzzle_emb) + with torch.device(device): + carry = base.initial_carry(batch) + loss_sum = 0.0 + n_loss = 0 + for _ in range(base.config.halt_max_steps): + carry, loss, _metrics, _outputs, all_finish = head(return_keys=[], carry=carry, batch=batch) + loss_sum = loss_sum + loss + n_loss += 1 + if all_finish: + break + sup_loss = loss_sum / max(n_loss, 1) / batch["inputs"].shape[0] + optim_zero_grad(optimizers) + sup_loss.backward() + torch.nn.utils.clip_grad_norm_([p for p in head.parameters() if p.requires_grad], 1.0) + optim_step(optimizers) + return sup_loss + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--model", choices=["hrm", "trm"], required=True) + parser.add_argument("--ckpt-root", required=True) + parser.add_argument("--ckpt-name", required=True, + help="Checkpoint file name, or __random__ to initialize from config without loading weights.") + parser.add_argument("--train-steps", type=int, default=10000) + parser.add_argument("--batch-size", type=int, default=8) + parser.add_argument("--task-batch-size", type=int, default=None, + help="Supervised task microbatch size. Defaults to --batch-size.") + parser.add_argument("--floss-batch-size", type=int, default=None, + help="Flossing microbatch size. Defaults to --batch-size.") + parser.add_argument("--train-lr", type=float, default=1e-5) + parser.add_argument("--floss-lr", type=float, default=1e-4) + parser.add_argument("--floss-steps", type=int, default=500) + parser.add_argument("--interfloss-at", default="0,500") + parser.add_argument("--interfloss-every", type=int, default=0, + help="If >0, also run floss episodes periodically every N task optimizer steps.") + parser.add_argument("--interfloss-start", type=int, default=0, + help="First task optimizer step for periodic interfloss.") + parser.add_argument("--interfloss-stop", type=int, default=-1, + help="Last task optimizer step for periodic interfloss. -1 means train_steps.") + parser.add_argument("--floss-mode", choices=["engelken_l2", "spectrum_cf", "volume_cf", "top1_cf"], default="engelken_l2") + parser.add_argument("--lambda-star", type=float, default=0.0) + parser.add_argument("--k-lyap", type=int, default=8) + parser.add_argument("--lyap-act-steps", type=int, default=4) + parser.add_argument("--lyap-start-act", type=int, default=0, + help="Warm up this many ACT steps before measuring/flossing the Lyapunov window.") + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--eval-every", type=int, default=1000) + parser.add_argument("--eval-n", type=int, default=512) + parser.add_argument("--eval-batch-size", type=int, default=32) + parser.add_argument("--floss-log-every", type=int, default=10) + parser.add_argument("--eval-after-floss", action=argparse.BooleanOptionalAction, default=True) + parser.add_argument("--kl-beta", type=float, default=0.0, + help="Episode-start replay-logit KL weight during floss-only steps.") + parser.add_argument("--kl-replay-size", type=int, default=64) + parser.add_argument("--kl-batch-size", type=int, default=8) + parser.add_argument("--kl-temperature", type=float, default=1.0) + parser.add_argument("--init-seed", type=int, default=None, + help="Torch seed used before model construction. Use this for matched from-scratch runs.") + parser.add_argument("--train-puzzle-emb", action=argparse.BooleanOptionalAction, default=False, + help="Train sparse puzzle embeddings. Requires --batch-size to match the model local embedding batch.") + parser.add_argument("--puzzle-emb-lr", type=float, default=None, + help="Sparse puzzle embedding LR. Defaults to current phase LR.") + parser.add_argument("--puzzle-emb-weight-decay", type=float, default=1.0) + parser.add_argument("--out", default="step7_interfloss_log.json") + args = parser.parse_args() + if args.task_batch_size is None: + args.task_batch_size = args.batch_size + if args.floss_batch_size is None: + args.floss_batch_size = args.batch_size + args.batch_size = args.task_batch_size + + device = "cuda" + if args.init_seed is not None: + torch.manual_seed(args.init_seed) + np.random.seed(args.init_seed) + interfloss_steps = build_interfloss_steps(args) + head, base, cfg, adam_cls, sparse_cls = load_model( + args.model, + Path(args.ckpt_root), + args.ckpt_name, + device, + batch_size_override=args.task_batch_size if args.train_puzzle_emb else None, + ) + args.sparse_cls = sparse_cls + data_path = Path(cfg["data_path"]) + + print(f"\n=== Initial eval (loaded {args.ckpt_name}) ===") + acc0, tok0 = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device) + print(f" initial: exact_acc={acc0:.4f} token_acc={tok0:.4f}", flush=True) + + log = { + "args": {k: v for k, v in vars(args).items() if k != "sparse_cls"}, + "initial_acc": acc0, + "initial_tok_acc": tok0, + "interfloss_steps": sorted(interfloss_steps), + "task_steps": [], + "floss_episodes": [], + "evals": [{"kind": "initial", "train_step": 0, "acc": acc0, "tok_acc": tok0}], + } + write_log(args.out, log) + + task_optimizers = make_optimizers( + args, base, head, adam_cls, sparse_cls, + lr=args.train_lr, weight_decay=cfg["weight_decay"], train_puzzle_emb=args.train_puzzle_emb, + ) + train_iter = load_train_batches(data_path, args.task_batch_size, args.train_steps, seed=args.seed) + episode_idx = 0 + t0 = time.time() + + for train_step, batch in enumerate(train_iter): + if train_step in interfloss_steps: + run_floss_episode(args, head, base, adam_cls, data_path, device, log, episode_idx, train_step) + episode_idx += 1 + + sup_loss = run_task_step(args, head, base, batch, task_optimizers, device) + rec = {"train_step": train_step + 1, "sup_loss": float(sup_loss.item())} + log["task_steps"].append(rec) + if train_step % 50 == 0 or train_step == args.train_steps - 1: + print( + f" T[{train_step + 1:>5}/{args.train_steps}] dt={time.time() - t0:.1f}s " + f"sup={rec['sup_loss']:.4f}", + flush=True, + ) + + if (train_step + 1) % args.eval_every == 0: + acc, tok_acc = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device) + print(f" >> TASK EVAL @ step {train_step + 1}: exact_acc={acc:.4f} delta={acc - acc0:+.4f}", flush=True) + log["evals"].append({"kind": "task", "train_step": train_step + 1, "acc": acc, "tok_acc": tok_acc}) + write_log(args.out, log) + + if args.train_steps in interfloss_steps: + run_floss_episode(args, head, base, adam_cls, data_path, device, log, episode_idx, args.train_steps) + + acc_f, tok_f = evaluate(head, base, data_path, args.eval_n, args.eval_batch_size, device) + print("\n=== Final eval ===") + print(f" initial={acc0:.4f} final={acc_f:.4f} delta={acc_f - acc0:+.4f}", flush=True) + log["final_acc"] = acc_f + log["final_tok_acc"] = tok_f + log["evals"].append({"kind": "final", "train_step": args.train_steps, "acc": acc_f, "tok_acc": tok_f}) + write_log(args.out, log) + print(f"log -> {args.out}") + + +if __name__ == "__main__": + main() diff --git a/maze_pred_dump.py b/maze_pred_dump.py new file mode 100644 index 0000000..ebbc735 --- /dev/null +++ b/maze_pred_dump.py @@ -0,0 +1,85 @@ +"""Dump per-cell predictions for a TRM-Maze checkpoint (plain forward, no JVP) so we can +analyze WHERE failure errors are (connected detour = coherent stable wrong path vs scattered). +Saves preds, labels, inputs, exact_correct, idx for n test puzzles. +""" +from __future__ import annotations +import sys, argparse +from pathlib import Path +import numpy as np +import torch + +sys.path.insert(0, "/home/yurenh2/rrm/research/flossing") +from diagnose_trm_joint_maze import load_model, load_test_samples # att+maze-capable loader + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--ckpt-root", required=True) + ap.add_argument("--ckpt-name", default="step_130200") + ap.add_argument("--data", required=True) + ap.add_argument("--n", type=int, default=512) + ap.add_argument("--batch-size", type=int, default=32) + ap.add_argument("--seed", type=int, default=0) + ap.add_argument("--out", required=True) + args = ap.parse_args() + device = "cuda" + model, cfg, train_meta = load_model(Path(args.ckpt_root), args.ckpt_name, device) + inner = model.inner + test = load_test_samples(Path(args.data), args.n, 0, 1, args.seed) + n = len(test["inputs"]) + pe = inner.puzzle_emb_len + + preds_all, labels_all, inputs_all, exact_all, idx_all = [], [], [], [], [] + ans_drift_full_all, ans_drift_ans_all, ldrift_all = [], [], [] + for s in range(0, n, args.batch_size): + e = min(s + args.batch_size, n) + batch = {k: test[k][s:e].to(device) for k in ["inputs", "labels", "puzzle_identifiers"]} + B = batch["inputs"].shape[0] + seq_full = inner.config.seq_len + pe + hidden = inner.config.hidden_size + with torch.no_grad(): + z_H = inner.H_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + z_L = inner.L_init.unsqueeze(0).expand(B, seq_full, hidden).clone().to(inner.forward_dtype) + seq_info = dict(cos_sin=inner.rotary_emb() if hasattr(inner, "rotary_emb") else None) + inp_emb = inner._input_embeddings(batch["inputs"], batch["puzzle_identifiers"]) + labels = batch["labels"] + ans_mask = (labels != batch["inputs"]) # SOLUTION-SPACE cells: where task requires change + prev = None; prev_zH = None + adrift_full, adrift_ans, ldrift = [], [], [] # answer Hamming drift + LATENT z_H drift + for _ in range(inner.config.halt_max_steps): + for _h in range(inner.config.H_cycles): + for _l in range(inner.config.L_cycles): + z_L = inner.L_level(z_L, z_H + inp_emb, **seq_info) + z_H = inner.L_level(z_H, z_L, **seq_info) + p = inner.lm_head(z_H)[:, pe:].float().argmax(-1) # decode answer THIS step + if prev is None: + adrift_full.append(torch.zeros(B, device=device)); adrift_ans.append(torch.zeros(B, device=device)) + ldrift.append(torch.zeros(B, device=device)) + else: + adrift_full.append((p != prev).float().sum(-1)) + adrift_ans.append(((p != prev) & ans_mask).float().sum(-1)) + ldrift.append((z_H - prev_zH).float().flatten(1).norm(dim=1)) # latent z_H drift + prev = p; prev_zH = z_H.detach() + preds = prev + mask = labels > 0 + exact = ((preds == labels) | ~mask).all(-1) + preds_all.append(preds.cpu().numpy()); labels_all.append(labels.cpu().numpy()) + inputs_all.append(batch["inputs"].cpu().numpy()); exact_all.append(exact.cpu().numpy()) + idx_all.append(test["idx"][s:e]) + ans_drift_full_all.append(torch.stack(adrift_full, 1).cpu().numpy()) + ans_drift_ans_all.append(torch.stack(adrift_ans, 1).cpu().numpy()) + ldrift_all.append(torch.stack(ldrift, 1).cpu().numpy()) + print(f" [{e}/{n}] exact={exact.float().mean():.3f}", flush=True) + + np.savez_compressed(args.out, + preds=np.concatenate(preds_all), labels=np.concatenate(labels_all), + inputs=np.concatenate(inputs_all), exact_correct=np.concatenate(exact_all).astype(np.float32), + idx=np.concatenate(idx_all), + ans_drift_full=np.concatenate(ans_drift_full_all), # (N, steps) decoded-answer Hamming drift + ans_drift_ans=np.concatenate(ans_drift_ans_all), + drift_zH=np.concatenate(ldrift_all)) # (N, steps) over solution-space cells only + print("saved", args.out) + + +if __name__ == "__main__": + main() diff --git a/meeting_artifacts/trm_multi4_dynamics_report.md b/meeting_artifacts/trm_multi4_dynamics_report.md new file mode 100644 index 0000000..184656c --- /dev/null +++ b/meeting_artifacts/trm_multi4_dynamics_report.md @@ -0,0 +1,57 @@ +# TRM multi4 Dynamics Report + +Date: 2026-06-02 + +## Main Result + +Official GBS768 Sudoku-Extreme runs show a real best-checkpoint gain from trajectory perturbation training. + +| model | step | full exact | full token acc | dynamics sample exact | lambda1 mean | top-8 mean | positive count / 8 | +|---|---:|---:|---:|---:|---:|---:|---:| +| TRM baseline best | 58590 | 0.8686 | 0.9508 | 0.8750 | +0.0282 | +0.0135 | 7.84 | +| TRM multi4 best | 35805 | 0.8965 | 0.9604 | 0.9004 | +0.0204 | +0.0066 | 3.84 | +| TRM multi4 final | 65100 | 0.8351 | 0.9350 | 0.8242 | +0.0323 | +0.0185 | 8.00 | + +Interpretation: multi4 best improves full-test exact accuracy by +2.78pp over baseline best, while suppressing the Lyapunov spectrum volume on successful trajectories. The final checkpoint collapses below baseline and its spectrum becomes broadly positive again. + +## Success vs Failure Separation + +The N=512 spectrum sample keeps the original observation: failures have much larger positive exponents. + +| model | success lambda1 | failure lambda1 | success top-8 mean | failure top-8 mean | success positive count | failure positive count | +|---|---:|---:|---:|---:|---:|---:| +| TRM baseline best | +0.0176 | +0.1026 | +0.0080 | +0.0516 | 7.82 | 8.00 | +| TRM multi4 best | +0.0112 | +0.1035 | +0.0020 | +0.0485 | 3.38 | 8.00 | +| TRM multi4 final | +0.0191 | +0.0942 | +0.0129 | +0.0449 | 8.00 | 8.00 | + +Interpretation: multi4 does not make failures stable; it mainly shifts successful solutions into less chaotic basins. This is the right story for stable attractor training: success becomes less volume-expanding, while bad basins remain chaotic. + +## PTRM Same-Subset Result + +Existing PTRM paired files already use identical `idx` with `n=1000`, `K=100`, `D=64`, `sigma=0.3`, L-only perturbation. + +| metric | baseline best | multi4 best | delta | +|---|---:|---:|---:| +| deterministic exact on subset | 0.8870 | 0.9110 | +0.0240 | +| mean rollout exact | 0.9419 | 0.9542 | +0.0123 | +| Q-selected exact | 0.9840 | 0.9880 | +0.0040 | +| oracle pass@100 | 0.9850 | 0.9880 | +0.0030 | +| correct rollout count / 100 | 94.19 | 95.42 | +1.23 | + +Interpretation: PTRM is near saturated on Sudoku, so Q-selected/oracle improvements are small. The stronger signal is that multi4 increases deterministic accuracy and the density of correct stochastic rollouts on the same test subset. + +## Caveats + +- Full TRM eval uses all 422,786 Sudoku-Extreme test puzzles. PTRM runs above use a fixed 1,000-sample subset, so deterministic PTRM-subset numbers should not be mixed with full-test W&B exact accuracy. +- The N=512 Lyapunov spectrum is a diagnostic sample, not full-test evaluation. +- Final checkpoint is not the result to report as the method's performance; it is an overtraining/collapse diagnostic. +- A larger paired PTRM run is currently queued: `n=10000`, `K=25`, `D=64`, `sigma=0.3`, L-only, same seed. It is slow and should be treated as a robustness check, not a blocker. + +## Artifacts + +- Headline CSV: `research/flossing/official_gbs768_spectrum/headline_trm_multi4_dynamics_table.csv` +- Spectrum summary CSV: `research/flossing/official_gbs768_spectrum/summary_n512_k8_seed20260602.csv` +- PTRM paired CSV: `research/flossing/ptrm_same_subset/paired_ptrm_k100_n1000_seed0_summary.csv` +- Spectrum plot: `research/flossing/meeting_artifacts/trm_gbs768_spectrum_success_failure_n512.png` +- Headline bar plot: `research/flossing/meeting_artifacts/trm_gbs768_headline_dynamics_bars.png` +- PTRM plot: `research/flossing/meeting_artifacts/ptrm_same_subset_k100_n1000_comparison.png` diff --git a/meeting_artifacts_v2/fig5_qhead_vs_lambda1_ptrm_summary.csv b/meeting_artifacts_v2/fig5_qhead_vs_lambda1_ptrm_summary.csv new file mode 100644 index 0000000..bcbf401 --- /dev/null +++ b/meeting_artifacts_v2/fig5_qhead_vs_lambda1_ptrm_summary.csv @@ -0,0 +1,3 @@ +name,path,n_samples,rollouts,mean_rollout_exact,q_max_exact,lambda_min_exact,oracle_pass_exact,q_lambda_same_argmax_frac,global_pearson_q_vs_stability,global_spearman_q_vs_stability,within_problem_pearson_mean,within_problem_spearman_mean,q_success_mean,q_fail_mean,lambda_success_mean,lambda_fail_mean,mixed_problem_count,zero_success_problem_count,full_success_problem_count,mixed_global_pearson_q_vs_stability,mixed_q_max_exact,mixed_lambda_min_exact,mixed_oracle_exact +TRM baseline + PTRM rollouts,research/flossing/q_lambda_scatter/base58590_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.npz,512.0,25.0,0.93890625,0.97265625,0.97265625,0.97265625,0.04296875,0.7524171235289728,-0.025655130770112743,0.12497240516050691,0.09656841990607949,7.796669578964886,-10.659986413043478,5.632336168325687,6.921443493469901,58.0,14.0,440.0,0.7857503437605575,1.0,1.0,1.0 +TRM multi4 + PTRM rollouts,research/flossing/q_lambda_scatter/multi4_35805_k25_d64_sigma03_Lonly_fdlyap_n512_seed20260602.npz,512.0,25.0,0.944609375,0.974609375,0.974609375,0.974609375,0.0859375,0.7289674235313497,-0.13633997124282113,0.18866459504767938,0.11859038341758327,7.7404597169382185,-10.081937588152327,5.7459801223593345,7.271883726456269,43.0,13.0,456.0,0.7911647653323942,1.0,1.0,1.0 diff --git a/meeting_artifacts_v2/hrm_trm_redesigned_summary.csv b/meeting_artifacts_v2/hrm_trm_redesigned_summary.csv new file mode 100644 index 0000000..c5a4ad0 --- /dev/null +++ b/meeting_artifacts_v2/hrm_trm_redesigned_summary.csv @@ -0,0 +1,7 @@ +model,label,step,full_exact,sample_exact,lambda1_all,mean8_all,pos_count_all,lambda1_success,lambda1_fail,mean8_success,mean8_fail,pos_count_success,pos_count_fail +HRM,baseline best,26040,0.5265287756919861,0.5,-0.0569394779099639,-0.10732079181130239,0.98046875,-0.14642834789538028,0.03254939207545249,-0.18957092847483636,-0.025070655147768406,0.01953125,1.94140625 +HRM,multi4 best,23436,0.6443189,0.654296875,-0.04733450568929953,-0.112159715874796,1.166015625,-0.10103540500250659,0.054302789621007604,-0.16998952501653278,-0.0027078172167066595,0.05970149253731343,3.2598870056497176 +HRM,multi4 final,26040,0.46235448,0.4296875,0.02874533511322852,-0.040662085491063316,1.626953125,0.03573289099003887,0.02348073821974127,-0.053027883530268646,-0.031345388338237384,1.8181818181818181,1.4828767123287672 +TRM,baseline best,58590,0.8686309456825256,0.875,0.02823458132615997,0.013457294571722192,7.841796875,0.01761167685357837,0.10259491263423115,0.008003413600119422,0.05163446137294159,7.819196428571429,8.0 +TRM,multi4 best,35805,0.8964653611183167,0.900390625,0.020381716455975862,0.0065844104191477015,3.841796875,0.01118387160416574,0.10352301992037717,0.0019524994650864183,0.04845325257252518,3.3817787418655096,8.0 +TRM,multi4 final,65100,0.8350536823272705,0.82421875,0.03232463403946895,0.018508151198432188,8.0,0.01912124014472792,0.09423388096814354,0.012883653437293365,0.04488079625621645,8.0,8.0 diff --git a/meeting_artifacts_v2/meeting_figures_v2_report.md b/meeting_artifacts_v2/meeting_figures_v2_report.md new file mode 100644 index 0000000..5158c99 --- /dev/null +++ b/meeting_artifacts_v2/meeting_figures_v2_report.md @@ -0,0 +1,26 @@ +# Meeting Figures v2 + +## Figure Strategy + +0. `fig0_motivation_lambda1_success_failure_hrm_trm.png`: first-exponent success/failure distribution in HRM and TRM. This motivates chaos as a detector before introducing the method. +1. `fig1_hrm_trm_training_curves.png`: performance over training for HRM and TRM. This answers whether the method improves accuracy and where best/final are. +2. `fig2_accuracy_vs_chaotic_volume_phase.png`: phase view, with accuracy versus mean top-8 Lyapunov exponent. This answers whether better checkpoints are dynamically more stable. +3. `fig3_hrm_trm_success_failure_spectra.png`: full success/failure spectrum separation for HRM and TRM best checkpoints. This extends Fig0 beyond λ1. +4. `fig4_ptrm_same_subset_comparison.png`: PTRM same-subset result. This is a secondary inference-time story. +5. `fig5_qhead_vs_lambda1_ptrm.png`: PTRM Q-head halt logit versus finite-difference stability proxy `-lambda_1`. The bottom row isolates mixed problems where trajectory selection actually matters. + +## Key Numbers + +- HRM baseline best: 0.5265 exact. HRM multi4 best: 0.6443 exact. HRM multi4 final: 0.4624 exact. +- TRM baseline best: 0.8686 exact. TRM multi4 best: 0.8965 exact. TRM multi4 final: 0.8351 exact. +- HRM multi4 best dynamics sample: mean top-8 exponent -0.1122; final -0.0407. +- TRM multi4 best dynamics sample: mean top-8 exponent +0.0066; final +0.0185. +- PTRM same subset, K=100: Q-selected 0.984 -> 0.988; mean rollout 0.942 -> 0.954. +- PTRM Q-vs-stability, K=25/N=512: mixed-problem Pearson is 0.786 for baseline and 0.791 for multi4. In both runs, Q-max selection and lambda-min selection reach the same oracle exact accuracy on this subset. + +## Caveats + +- Dynamics spectra use N=512 diagnostic samples, not the full test set. +- PTRM numbers use a fixed N=1000 subset; do not mix its deterministic subset accuracy with full-test W&B exact accuracy. +- Final checkpoints are collapse diagnostics, not the method's reported performance. +- Q-head is not a pure lambda ranker: global Spearman is weak because most problems are all-success/all-failure across K rollouts. The strongest evidence is the mixed-problem class separation and selector equivalence. diff --git a/merge_and_analyze.py b/merge_and_analyze.py new file mode 100644 index 0000000..3aeb28f --- /dev/null +++ b/merge_and_analyze.py @@ -0,0 +1,157 @@ +"""Merge shard npz files and produce top-k Lyapunov analysis + plots.""" +import numpy as np +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +from scipy import stats +import argparse, glob, os + +ap = argparse.ArgumentParser() +ap.add_argument("--in-glob", default="/home/yurenh2/rrm/research/flossing/diag_8k_shard*.npz") +ap.add_argument("--out-dir", default="/home/yurenh2/rrm/research/flossing/plots_topk") +ap.add_argument("--merged-npz", default="/home/yurenh2/rrm/research/flossing/diag_8k.npz") +args = ap.parse_args() + +os.makedirs(args.out_dir, exist_ok=True) + +files = sorted(glob.glob(args.in_glob)) +print(f"Merging {len(files)} files:") +for f in files: print(f" {f}") + +merged = {} +for f in files: + d = np.load(f) + for k in d.files: + merged.setdefault(k, []).append(d[k]) +for k in list(merged.keys()): + merged[k] = np.concatenate(merged[k], axis=0) +np.savez_compressed(args.merged_npz, **merged) +print(f"Merged → {args.merged_npz}") +print(f"N={len(merged['exact_correct'])}") + +lyap_spec = merged["lyap_spec"] # (N, K) +exact = merged["exact_correct"].astype(bool) +token_acc = merged["token_acc"] +drift_zH = merged["drift_zH"] +drift_zL = merged["drift_zL"] +q_halt = merged["q_halt"]; q_continue = merged["q_continue"] +N, K = lyap_spec.shape +print(f"K (top-k) = {K}") + +succ = lyap_spec[exact]; fail = lyap_spec[~exact] +print(f"acc = {exact.mean():.4f}") + +# --- per-lambda stats --- +print(f"\n--per-λ stats (Δ = mean_fail - mean_succ; positive ⇒ failures are LESS contractive) --") +print(f"{'i':>3} {'mean_all':>10} {'mean_succ':>10} {'mean_fail':>10} {'Δ':>8} {'cohen_d':>8} {'auroc':>7} {'p_t':>10}") +for i in range(K): + li = lyap_spec[:, i]; ls = li[exact]; lf = li[~exact] + pooled = np.sqrt(((ls.size-1)*ls.var() + (lf.size-1)*lf.var()) / (ls.size+lf.size-2)) + d_c = (ls.mean() - lf.mean()) / pooled + delta = lf.mean() - ls.mean() + auc = stats.mannwhitneyu(lf, ls, alternative="greater").statistic / (lf.size * ls.size) + t, p = stats.ttest_ind(ls, lf, equal_var=False) + print(f"{i+1:>3} {li.mean():+10.4f} {ls.mean():+10.4f} {lf.mean():+10.4f} {delta:+8.4f} {abs(d_c):>8.3f} {auc:>7.3f} {p:>10.2e}") + +# --- combined predictor: which λ best separates? sum? mean? λ_1 alone? --- +print(f"\n--combined predictors--") +def auc(score, target): + # AUC: probability the score is higher for fail than succ + return stats.mannwhitneyu(score[~exact], score[exact], alternative="greater").statistic / ((~exact).sum() * exact.sum()) +for label, score in [ + ("λ_1 alone", lyap_spec[:,0]), + ("λ_K alone", lyap_spec[:,-1]), + ("mean λ", lyap_spec.mean(axis=1)), + ("λ_1 + λ_K", lyap_spec[:,0] + lyap_spec[:,-1]), + ("sum λ", lyap_spec.sum(axis=1)), + ("max(λ) (= λ_1)", lyap_spec.max(axis=1)), +]: + print(f" {label:18s}: AUROC = {auc(score, ~exact):.4f}") + +# --- plot 1: Lyapunov spectrum mean ± std --- +fig, ax = plt.subplots(1, 1, figsize=(7,5)) +mean_s = succ.mean(0); std_s = succ.std(0) +mean_f = fail.mean(0); std_f = fail.std(0) +x = np.arange(1, K+1) +ax.fill_between(x, mean_s-std_s, mean_s+std_s, color="C0", alpha=0.25) +ax.plot(x, mean_s, "C0-o", label=f"success (n={succ.shape[0]})", lw=2) +ax.fill_between(x, mean_f-std_f, mean_f+std_f, color="C3", alpha=0.25) +ax.plot(x, mean_f, "C3-o", label=f"failure (n={fail.shape[0]})", lw=2) +ax.axhline(0, color="k", ls=":", lw=0.6) +ax.set_xlabel(r"Lyapunov index $i$") +ax.set_ylabel(r"$\lambda_i$ (per inner cycle)") +ax.set_title(f"HRM Sudoku-Extreme-1k @ step_26040: top-{K} Lyapunov spectrum\n" + f"N={N}, acc={exact.mean():.3f}") +ax.legend() +ax.grid(alpha=0.3) +fig.tight_layout() +fig.savefig(f"{args.out_dir}/lyap_spectrum.png", dpi=130) +plt.close() + +# --- plot 2: λ_1 hist (re-do at 8k) --- +fig, ax = plt.subplots(1, 1, figsize=(7,4)) +lo = min(lyap_spec[:,0].min(), -1.5); hi = max(lyap_spec[:,0].max(), -0.1) +bins = np.linspace(lo, hi, 60) +ax.hist(succ[:,0], bins=bins, alpha=0.55, label=f"success (n={succ.shape[0]})", color="C0", density=True) +ax.hist(fail[:,0], bins=bins, alpha=0.55, label=f"failure (n={fail.shape[0]})", color="C3", density=True) +ax.axvline(succ[:,0].mean(), color="C0", ls="--", lw=1) +ax.axvline(fail[:,0].mean(), color="C3", ls="--", lw=1) +ax.set_xlabel(r"$\lambda_1$ (top Lyapunov exponent)") +ax.set_ylabel("density") +auc1 = auc(lyap_spec[:,0], ~exact) +ax.set_title(f"$\\lambda_1$ distribution by outcome (N={N}, AUROC={auc1:.3f})") +ax.legend() +fig.tight_layout() +fig.savefig(f"{args.out_dir}/lyap1_hist.png", dpi=130) +plt.close() + +# --- plot 3: 2D histogram of (λ_1, λ_8) --- +fig, ax = plt.subplots(1, 1, figsize=(6.5,5.5)) +ax.scatter(lyap_spec[exact,0], lyap_spec[exact,-1], s=3, alpha=0.3, color="C0", label="success") +ax.scatter(lyap_spec[~exact,0], lyap_spec[~exact,-1], s=3, alpha=0.3, color="C3", label="failure") +ax.set_xlabel(r"$\lambda_1$") +ax.set_ylabel(r"$\lambda_{%d}$" % K) +ax.set_title(f"Top vs bottom of the top-{K} spectrum") +ax.legend() +ax.plot([-1.5,0], [-1.5,0], "k:", lw=0.6, label="diag") +fig.tight_layout() +fig.savefig(f"{args.out_dir}/lyap1_vs_lyapK.png", dpi=130) +plt.close() + +# --- plot 4: spectrum slope and gap analysis --- +slope = (lyap_spec[:, 0] - lyap_spec[:, -1]) # λ_1 - λ_K = "spread" +spread_succ = slope[exact]; spread_fail = slope[~exact] +fig, ax = plt.subplots(1, 1, figsize=(6,4)) +ax.hist(spread_succ, bins=40, alpha=0.55, label=f"success", color="C0", density=True) +ax.hist(spread_fail, bins=40, alpha=0.55, label=f"failure", color="C3", density=True) +ax.set_xlabel(r"$\lambda_1 - \lambda_{%d}$ (spectrum spread)" % K) +ax.set_ylabel("density") +spread_auc = auc(slope, ~exact) +ax.set_title(f"Top-bottom Lyapunov spread (AUROC={spread_auc:.3f})\n" + f"succ={spread_succ.mean():.4f}±{spread_succ.std():.4f}, " + f"fail={spread_fail.mean():.4f}±{spread_fail.std():.4f}") +ax.legend() +fig.tight_layout() +fig.savefig(f"{args.out_dir}/lyap_spread.png", dpi=130) +plt.close() + +# --- plot 5: drift per ACT step (revisited) --- +fig, axes = plt.subplots(1, 2, figsize=(12,4)) +for ax, drift, name in [(axes[0], drift_zH, "$\\|\\Delta z_H\\|$"), + (axes[1], drift_zL, "$\\|\\Delta z_L\\|$")]: + mean_s = drift[exact].mean(0); std_s = drift[exact].std(0) + mean_f = drift[~exact].mean(0); std_f = drift[~exact].std(0) + x = np.arange(drift.shape[1]) + ax.fill_between(x, mean_s-std_s, mean_s+std_s, color="C0", alpha=0.2) + ax.plot(x, mean_s, "C0-o", label="success", lw=2) + ax.fill_between(x, mean_f-std_f, mean_f+std_f, color="C3", alpha=0.2) + ax.plot(x, mean_f, "C3-o", label="failure", lw=2) + ax.set_xlabel("ACT step") + ax.set_title(name) + ax.legend() + ax.set_yscale("log") +fig.tight_layout() +fig.savefig(f"{args.out_dir}/drift_per_act.png", dpi=130) +plt.close() + +print(f"\nplots saved to {args.out_dir}/") diff --git a/monitor_dynamics_experiments.sh b/monitor_dynamics_experiments.sh new file mode 100755 index 0000000..b7e74b9 --- /dev/null +++ b/monitor_dynamics_experiments.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +set -eo pipefail + +FLOSS_DIR="/home/yurenh2/rrm/research/flossing" +CONDA_SH="/home/yurenh2/miniconda3/etc/profile.d/conda.sh" +LOG="${FLOSS_DIR}/monitor_dynamics_experiments.log" +REPORT="${FLOSS_DIR}/dynamics_experiment_report.md" + +PATTERN='step3_M_volume_cf_26040_lstar_neg015|step7_C_hrm_engelken_interfloss_kl10|step7_D_trm_engelken_interfloss_kl10|step7_I_hrm_engelken_interfloss_kl100_short|launch_dynamics_variants_queue|step7_E_hrm_late|step7_F_trm_late|step7_G_hrm_volume|step7_H_trm_volume|step8_A_hrm_basin|step8_B_trm_basin|step9_[A-Z]_|launch_trajectory_perturb_queue|launch_trajectory_sampling_long' + +cd "${FLOSS_DIR}" +source "${CONDA_SH}" +conda activate rrm + +echo "[$(date --iso-8601=seconds)] monitor started" >> "${LOG}" +echo "[$(date --iso-8601=seconds)] report target: ${REPORT}" >> "${LOG}" + +last_snapshot="" +while true; do + active="$(pgrep -af "${PATTERN}" || true)" + if [[ -z "${active}" ]]; then + echo "[$(date --iso-8601=seconds)] no active monitored processes; generating final report" >> "${LOG}" + python analyze_dynamics_experiments.py > dynamics_experiment_report.stdout 2>&1 + echo "[$(date --iso-8601=seconds)] final report generated" >> "${LOG}" + exit 0 + fi + + snapshot="$(echo "${active}" | sed 's/ */ /g' | cut -c1-260)" + if [[ "${snapshot}" != "${last_snapshot}" ]]; then + echo "[$(date --iso-8601=seconds)] active processes:" >> "${LOG}" + echo "${snapshot}" >> "${LOG}" + last_snapshot="${snapshot}" + fi + + # Keep a rolling partial report useful while long queues are still running. + python analyze_dynamics_experiments.py > dynamics_experiment_report.stdout 2>&1 || true + sleep 300 +done diff --git a/multi4_eval_compare/hrm_baseline_eval.csv b/multi4_eval_compare/hrm_baseline_eval.csv new file mode 100644 index 0000000..afc2b27 --- /dev/null +++ b/multi4_eval_compare/hrm_baseline_eval.csv @@ -0,0 +1,11 @@ +run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps +hrm_baseline,2604,0.016369983553886414,0.6336106061935425,0.8522627949714661,0.9927079081535339,0.020887982100248337,16 +hrm_baseline,5208,0.06169315055012703,0.6744286417961121,0.7430469989776611,0.9772745370864868,0.07281211763620377,16 +hrm_baseline,7812,0.1358133852481842,0.6985693573951721,0.676047682762146,0.996868371963501,0.0211492907255888,16 +hrm_baseline,10416,0.20248068869113922,0.7259970307350159,0.6480565667152405,0.9983821511268616,0.017285015434026718,16 +hrm_baseline,13020,0.3024248778820038,0.7580602169036865,0.5614010691642761,0.9971805810928345,0.02416178770363331,16 +hrm_baseline,15624,0.36705803871154785,0.7842973470687866,0.49664124846458435,0.9964591860771179,0.02256467007100582,16 +hrm_baseline,18228,0.46287721395492554,0.8077820539474487,0.4641122817993164,0.9953309893608093,0.02509351819753647,16 +hrm_baseline,20832,0.4912721812725067,0.8198283314704895,0.4137057960033417,0.9955745935440063,0.02796635963022709,16 +hrm_baseline,23436,0.5193856954574585,0.8260135054588318,0.41173747181892395,0.9975590705871582,0.024772455915808678,16 +hrm_baseline,26040,0.5265287756919861,0.8270824551582336,0.4161679148674011,0.9960216283798218,0.03298119083046913,16 diff --git a/multi4_eval_compare/hrm_honly_step26040_eval.csv b/multi4_eval_compare/hrm_honly_step26040_eval.csv new file mode 100644 index 0000000..d3e3ab9 --- /dev/null +++ b/multi4_eval_compare/hrm_honly_step26040_eval.csv @@ -0,0 +1,2 @@ +step,accuracy,exact_accuracy,lm_loss,q_halt_accuracy,q_halt_loss,steps +26040,0.85720146,0.62264127,0.35935268,0.99625105,0.024241636,16.0 diff --git a/multi4_eval_compare/hrm_matched_compare.csv b/multi4_eval_compare/hrm_matched_compare.csv new file mode 100644 index 0000000..71b8e16 --- /dev/null +++ b/multi4_eval_compare/hrm_matched_compare.csv @@ -0,0 +1,6 @@ +family,step,base_exact,multi4_exact,delta_exact,base_acc,multi4_acc,delta_acc,base_loss,multi4_loss,delta_loss,base_steps,multi4_steps +hrm,2604,0.016369983553886414,0.0123774204403162,-0.003992563113570213,0.6336106061935425,0.6303551197052002,-0.003255486488342285,0.8522627949714661,0.8603715300559998,0.008108735084533691,16,16 +hrm,5208,0.06169315055012703,0.025012653321027756,-0.036680497229099274,0.6744286417961121,0.6685170531272888,-0.005911588668823242,0.7430469989776611,0.7412638068199158,-0.0017831921577453613,16,16 +hrm,7812,0.1358133852481842,0.04651052877306938,-0.08930285647511482,0.6985693573951721,0.687346339225769,-0.011223018169403076,0.676047682762146,0.7044885158538818,0.02844083309173584,16,16 +hrm,10416,0.20248068869113922,0.2006617933511734,-0.0018188953399658203,0.7259970307350159,0.7243355512619019,-0.0016614794731140137,0.6480565667152405,0.6340938210487366,-0.013962745666503906,16,16 +hrm,13020,0.3024248778820038,0.34697696566581726,0.04455208778381348,0.7580602169036865,0.7794705033302307,0.02141028642654419,0.5614010691642761,0.49915269017219543,-0.06224837899208069,16,16 diff --git a/multi4_eval_compare/hrm_multi4_complete_eval.csv b/multi4_eval_compare/hrm_multi4_complete_eval.csv new file mode 100644 index 0000000..9aac158 --- /dev/null +++ b/multi4_eval_compare/hrm_multi4_complete_eval.csv @@ -0,0 +1,11 @@ +run,step,exact,accuracy,loss +hrm_multi4,2604,0.0123774204403162,0.6303551197052002,0.8603715300559998 +hrm_multi4,5208,0.025012653321027756,0.6685170531272888,0.7412638068199158 +hrm_multi4,7812,0.04651052877306938,0.687346339225769,0.7044885158538818 +hrm_multi4,10416,0.2006617933511734,0.7243355512619019,0.6340938210487366 +hrm_multi4,13020,0.34697696566581726,0.7794705033302307,0.49915269017219543 +hrm_multi4,15624,0.4653252363204956,0.8156101703643799,0.42743897438049316 +hrm_multi4,18228,0.5790873169898987,0.8494690656661987,0.3459467887878418 +hrm_multi4,20832,0.6393187,0.8660642,0.3186217 +hrm_multi4,23436,0.6443189,0.8684137,0.30427682 +hrm_multi4,26040,0.46235448,0.80298746,0.603943 diff --git a/multi4_eval_compare/hrm_multi4_eval.csv b/multi4_eval_compare/hrm_multi4_eval.csv new file mode 100644 index 0000000..0f7dbc9 --- /dev/null +++ b/multi4_eval_compare/hrm_multi4_eval.csv @@ -0,0 +1,6 @@ +run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps +hrm_multi4,2604,0.0123774204403162,0.6303551197052002,0.8603715300559998,0.9879773855209351,0.020903315395116806,16 +hrm_multi4,5208,0.025012653321027756,0.6685170531272888,0.7412638068199158,0.9968069195747375,0.019309692084789276,16 +hrm_multi4,7812,0.04651052877306938,0.687346339225769,0.7044885158538818,0.9902976751327515,0.03646523132920265,16 +hrm_multi4,10416,0.2006617933511734,0.7243355512619019,0.6340938210487366,0.9991532564163208,0.011545917019248009,16 +hrm_multi4,13020,0.34697696566581726,0.7794705033302307,0.49915269017219543,0.9987062215805054,0.01581510715186596,16 diff --git a/multi4_eval_compare/hrm_multi4_horizon_sweep_768.csv b/multi4_eval_compare/hrm_multi4_horizon_sweep_768.csv new file mode 100644 index 0000000..3bdace3 --- /dev/null +++ b/multi4_eval_compare/hrm_multi4_horizon_sweep_768.csv @@ -0,0 +1,41 @@ +step,split,horizon,count,exact_accuracy,accuracy,lm_loss,q_halt_loss,steps +23436,train,2,768.0,0.2955729166666667,0.8364840348561605,0.39225157833573504,0.04459714392820994,2.0 +23436,train,3,768.0,0.5755208333333334,0.8917663097381592,0.2618181909650845,0.01976812755068143,3.0 +23436,train,4,768.0,0.6796875,0.9122781753540039,0.21020127075343642,0.006546831379334132,4.0 +23436,train,5,768.0,0.734375,0.9230967362721761,0.182652537414814,0.00717167928814888,5.0 +23436,train,6,768.0,0.7669270833333334,0.9298482735951742,0.16631383252369117,0.00374875341852506,6.0 +23436,train,8,768.0,0.8033854166666666,0.9389146169026693,0.14464881393209095,0.0037066793690125146,8.0 +23436,train,10,768.0,0.8268229166666666,0.9462127685546875,0.12909535948177878,0.00991838239133358,10.0 +23436,train,12,768.0,0.84375,0.9500546455383301,0.11931335874268405,0.004430865868926048,12.0 +23436,train,14,768.0,0.85546875,0.9517585436503092,0.11357831630187847,0.0033199011037747064,14.0 +23436,train,16,768.0,0.859375,0.9536232948303223,0.11014115689326769,0.003578242535392443,16.0 +23436,test,2,768.0,0.12760416666666666,0.7265946865081787,0.6354224683840087,0.025561923782030743,2.0 +23436,test,3,768.0,0.2526041666666667,0.7604809602101644,0.5506175992783068,0.012921225279569626,3.0 +23436,test,4,768.0,0.3138020833333333,0.7742091019948324,0.515490498860075,0.003827621228992939,4.0 +23436,test,5,768.0,0.3619791666666667,0.7867315610249838,0.48693293612013444,0.00506168728073438,5.0 +23436,test,6,768.0,0.3984375,0.7953317960103353,0.46390732880926117,0.0035611667359868684,6.0 +23436,test,8,768.0,0.4583333333333333,0.8105709552764893,0.4314775246277862,0.005083844686547915,8.0 +23436,test,10,768.0,0.48828125,0.8183674812316895,0.4161860321298086,0.003043775757153829,10.0 +23436,test,12,768.0,0.51171875,0.8260512351989746,0.4016971584101859,0.004879387095570564,12.0 +23436,test,14,768.0,0.5325520833333334,0.831050713857015,0.3878147863194716,0.00292336226751407,14.0 +23436,test,16,768.0,0.5559895833333334,0.8361946741739908,0.37473119346002354,0.0035912382105986276,16.0 +26040,train,2,768.0,0.7825520833333334,0.963814894358317,0.09390118849629236,0.04166571795940399,2.0 +26040,train,3,768.0,0.9127604166666666,0.9810153643290201,0.051538720202730794,0.008062846958637238,3.0 +26040,train,4,768.0,0.9309895833333334,0.9841659863789877,0.04344159450663771,0.012521501630544662,4.0 +26040,train,5,768.0,0.9401041666666666,0.9858539899190267,0.04013312268752094,0.015388640264670054,5.0 +26040,train,6,768.0,0.9401041666666666,0.9870595932006836,0.03853385294570503,0.023091336091359455,6.0 +26040,train,8,768.0,0.94921875,0.9884098370869955,0.03620980748266996,0.01947430024544398,8.0 +26040,train,10,768.0,0.9557291666666666,0.9893261591593424,0.03444665149376691,0.03555429975191752,10.0 +26040,train,12,768.0,0.953125,0.9895029067993164,0.034414704500878884,0.04184401035308838,12.0 +26040,train,14,768.0,0.9557291666666666,0.9893904527028402,0.03586123670240371,0.037144094705581665,14.0 +26040,train,16,768.0,0.9518229166666666,0.9894386132558187,0.03654265702438753,0.041033936043580375,16.0 +26040,test,2,768.0,0.11588541666666667,0.7054398854573568,0.8908620335632751,0.040651207168896995,2.0 +26040,test,3,768.0,0.1875,0.7230902512868246,0.7620792943957264,0.03934991856416067,3.0 +26040,test,4,768.0,0.23567708333333334,0.731706460316976,0.7335753038165317,0.11375004053115845,4.0 +26040,test,5,768.0,0.25,0.7362075646718343,0.7385935210540664,0.160938690106074,5.0 +26040,test,6,768.0,0.2669270833333333,0.7397923469543457,0.743103274276764,0.17507813374201456,6.0 +26040,test,8,768.0,0.296875,0.7452899614969889,0.7468322750276379,0.19044278065363565,8.0 +26040,test,10,768.0,0.3138020833333333,0.7472190856933594,0.7515058945457195,0.21485831340154013,10.0 +26040,test,12,768.0,0.3229166666666667,0.7502892812093099,0.7620030015396181,0.22753063837687174,12.0 +26040,test,14,768.0,0.3307291666666667,0.7518165111541748,0.7553974518406182,0.2158371408780416,14.0 +26040,test,16,768.0,0.3346354166666667,0.7514950434366862,0.7639393310889216,0.23628832896550497,16.0 diff --git a/multi4_eval_compare/trm_baseline_eval.csv b/multi4_eval_compare/trm_baseline_eval.csv new file mode 100644 index 0000000..0b6bc11 --- /dev/null +++ b/multi4_eval_compare/trm_baseline_eval.csv @@ -0,0 +1,11 @@ +run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps +trm_baseline,26041,0.5575894117355347,0.8469749093055725,0.35285070538520813,0.9997445344924927,0.003797376062721014,16 +trm_baseline,52082,0.6295099854469299,0.8704391121864319,0.2973524034023285,0.9998486042022705,0.0011324305087327957,16 +trm_baseline,78123,0.6993892788887024,0.8920264840126038,0.2477506548166275,0.9998935461044312,0.0006260558147914708,16 +trm_baseline,104164,0.72928386926651,0.9020143151283264,0.22608688473701477,0.9998770356178284,0.001141014276072383,16 +trm_baseline,130205,0.7653990387916565,0.914251446723938,0.19878524541854858,0.999917209148407,0.0010435190051794052,16 +trm_baseline,156246,0.7596656680107117,0.9119072556495667,0.2041437327861786,0.999862790107727,0.0011596218682825565,16 +trm_baseline,182287,0.7541900873184204,0.9094774723052979,0.2100999504327774,0.9998249411582947,0.0013093978632241488,16 +trm_baseline,208328,0.7732800841331482,0.9166732430458069,0.19410833716392517,0.9998320937156677,0.0033004307188093662,16 +trm_baseline,234369,0.7750374674797058,0.9172152280807495,0.19279460608959198,0.9998533725738525,0.0032994903158396482,16 +trm_baseline,260410,0.7742025256156921,0.9169425964355469,0.19348150491714478,0.9998462796211243,0.002894919365644455,16 diff --git a/multi4_eval_compare/trm_gbs768_multi4_step65100_det_n10000_seed20260602.summary.csv b/multi4_eval_compare/trm_gbs768_multi4_step65100_det_n10000_seed20260602.summary.csv new file mode 100644 index 0000000..38ca8f5 --- /dev/null +++ b/multi4_eval_compare/trm_gbs768_multi4_step65100_det_n10000_seed20260602.summary.csv @@ -0,0 +1,2 @@ +correct_count/det_fail_mean,correct_count/det_success_mean,correct_count/full_frac,correct_count/ge_1_frac,correct_count/mean,correct_count/median,correct_count/q10,correct_count/q25,correct_count/q75,correct_count/q90,correct_count/std,correct_count/zero_frac,deterministic/exact,deterministic/token_acc,fd_lyap,fd_spectrum_k,include_clean,mean_rollout/exact,mean_rollout/token_acc,n_samples,noise_std,oracle_pass/det_fail_frac,oracle_pass/det_success_frac,oracle_pass/exact,oracle_pass/token_acc,perturb_both,perturb_h,perturb_l,q_max/det_fail_frac,q_max/det_success_frac,q_max/exact,q_max/token_acc,q_mean,rollout0/exact,rollout0/token_acc,rollouts,steps +0.0,1.0,0.8312000036239624,0.8312000036239624,0.8312000036239624,1.0,0.0,1.0,1.0,1.0,0.3745751678943634,0.1687999963760376,0.8312000036239624,0.9333752989768982,0.0,0.0,0.0,0.8312000036239624,0.9333752989768982,10000.0,0.0,0.0,1.0,0.8312000036239624,0.9333752989768982,1.0,0.0,0.0,0.0,1.0,0.8312000036239624,0.9333752989768982,3.762489080429077,0.8312000036239624,0.9333752989768982,1.0,16.0 diff --git a/multi4_eval_compare/trm_matched_compare.csv b/multi4_eval_compare/trm_matched_compare.csv new file mode 100644 index 0000000..32006a5 --- /dev/null +++ b/multi4_eval_compare/trm_matched_compare.csv @@ -0,0 +1,2 @@ +family,step,base_exact,multi4_exact,delta_exact,base_acc,multi4_acc,delta_acc,base_loss,multi4_loss,delta_loss,base_steps,multi4_steps +trm,26041,0.5575894117355347,0.7394047975540161,0.18181538581848145,0.8469749093055725,0.9067130088806152,0.059738099575042725,0.35285070538520813,0.21417337656021118,-0.13867732882499695,16,16 diff --git a/multi4_eval_compare/trm_multi4_eval.csv b/multi4_eval_compare/trm_multi4_eval.csv new file mode 100644 index 0000000..f3d16b5 --- /dev/null +++ b/multi4_eval_compare/trm_multi4_eval.csv @@ -0,0 +1,2 @@ +run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps +trm_multi4,26041,0.7394047975540161,0.9067130088806152,0.21417337656021118,0.9997232556343079,0.0024572687689214945,16 diff --git a/multi4_eval_compare/trm_multi4_eval_full.csv b/multi4_eval_compare/trm_multi4_eval_full.csv new file mode 100644 index 0000000..3411ff5 --- /dev/null +++ b/multi4_eval_compare/trm_multi4_eval_full.csv @@ -0,0 +1,11 @@ +run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps +trm_multi4_loguniform_repro,26041,0.7394047975540161,0.9067130088806152,0.21417337656021118,0.9997232556343079,0.0024572687689214945,16 +trm_multi4_loguniform_repro,52082,0.8449901342391968,0.9424432516098022,0.1342233270406723,0.9998746514320374,0.0010290677892044187,16 +trm_multi4_loguniform_repro,78123,0.8417639136314392,0.9411031007766724,0.13769488036632538,0.9997493028640747,0.0022251552436500788,16 +trm_multi4_loguniform_repro,104164,0.8547161221504211,0.9456510543823242,0.12779666483402252,0.999834418296814,0.0019055778393521905,16 +trm_multi4_loguniform_repro,130205,0.8536233305931091,0.9453508853912354,0.1282763034105301,0.9998888373374939,0.0018422487191855907,16 +trm_multi4_loguniform_repro,156246,0.8489472270011902,0.9433866739273071,0.13273237645626068,0.999782383441925,0.003051365725696087,16 +trm_multi4_loguniform_repro,182287,0.8558868765830994,0.9459810256958008,0.12728126347064972,0.9997469186782837,0.0024399051908403635,16 +trm_multi4_loguniform_repro,208328,0.8404204249382019,0.9403222799301147,0.13971956074237823,0.9996499419212341,0.0029723909683525562,16 +trm_multi4_loguniform_repro,234369,0.845432460308075,0.9419097304344177,0.13668429851531982,0.9997681975364685,0.0016449446557089686,16 +trm_multi4_loguniform_repro,260410,0.826143741607666,0.9344042539596558,0.15468436479568481,0.999701976776123,0.001810370129533112,16 diff --git a/multi4_eval_compare/trm_official_gbs768_eval.csv b/multi4_eval_compare/trm_official_gbs768_eval.csv new file mode 100644 index 0000000..66dfe1b --- /dev/null +++ b/multi4_eval_compare/trm_official_gbs768_eval.csv @@ -0,0 +1,11 @@ +run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps +trm_official_gbs768,6510,0.20006339251995087,0.7261562347412109,0.6155200004577637,0.9993779063224792,0.0063577014952898026,16 +trm_official_gbs768,13020,0.6248409152030945,0.8679871559143066,0.30110087990760803,0.9997587203979492,0.0018232133006677032,16 +trm_official_gbs768,19530,0.7098508477210999,0.8961462378501892,0.23719573020935059,0.9997185468673706,0.002359110629186034,16 +trm_official_gbs768,26040,0.7558197379112244,0.9113300442695618,0.20295456051826477,0.9997066855430603,0.0026622479781508446,16 +trm_official_gbs768,32550,0.7946407794952393,0.9247151017189026,0.17315243184566498,0.9997208714485168,0.0024344150442630053,16 +trm_official_gbs768,39060,0.8247435092926025,0.9351920485496521,0.14995059370994568,0.9996594190597534,0.002962446305900812,16 +trm_official_gbs768,45570,0.8479396104812622,0.9433117508888245,0.1323014795780182,0.9996806979179382,0.0026382978539913893,16 +trm_official_gbs768,52080,0.8633161783218384,0.9488447904586792,0.12009253352880478,0.9997137784957886,0.002587266732007265,16 +trm_official_gbs768,58590,0.8686309456825256,0.9508475661277771,0.11555595695972443,0.9998438954353333,0.0014915302162989974,16 +trm_official_gbs768,65100,0.86624675989151,0.9500409364700317,0.11748332530260086,0.9996523261070251,0.002605273388326168,16 diff --git a/multi4_eval_compare/trm_official_gbs768_multi4_eval.csv b/multi4_eval_compare/trm_official_gbs768_multi4_eval.csv new file mode 100644 index 0000000..ec353ce --- /dev/null +++ b/multi4_eval_compare/trm_official_gbs768_multi4_eval.csv @@ -0,0 +1,21 @@ +run,step,all/exact_accuracy,all/accuracy,all/lm_loss,all/q_halt_accuracy,all/q_halt_loss,all/steps,_runtime,_timestamp +trm_official_gbs768_multi4,3255,0.0175999198108911,0.6504403352737427,0.7941210269927979,0.9824000597000122,0.108424387872219,16.0,12550.718766357,1780192940.452497 +trm_official_gbs768_multi4,6510,0.2277795374393463,0.7379271984100342,0.5934479236602783,0.999262034893036,0.0035974220372736,16.0,25064.702453699,1780205454.4563622 +trm_official_gbs768_multi4,9765,0.6357069611549377,0.8709613084793091,0.2931223213672638,0.9997137784957886,0.0014165197499096,16.0,37579.311725746,1780217969.132149 +trm_official_gbs768_multi4,13020,0.75853031873703,0.910834014415741,0.2033131867647171,0.9997611045837402,0.0021604059729725,16.0,50092.402719112,1780230482.2738986 +trm_official_gbs768_multi4,16275,0.8263778686523438,0.9346956610679626,0.1507271528244018,0.9997800588607788,0.0041020140051841,16.0,62595.579823935,1780242985.398887 +trm_official_gbs768_multi4,19530,0.8654520511627197,0.9488762617111206,0.1198361814022064,0.9997090697288512,0.0052023055031895,16.0,75096.299217356,1780255486.102359 +trm_official_gbs768_multi4,22785,0.8817486763000488,0.9549695253372192,0.1065462753176689,0.9997114539146424,0.0037784865126013,16.0,87607.26667599,1780267997.105077 +trm_official_gbs768_multi4,26040,0.8879267573356628,0.957356870174408,0.1014027148485183,0.999642848968506,0.0053329654037952,16.0,100118.838503384,1780280508.634669 +trm_official_gbs768_multi4,29295,0.8933455944061279,0.959309697151184,0.0971761047840118,0.9996144771575928,0.0041324645280838,16.0,112620.42551711,1780293010.168183 +trm_official_gbs768_multi4,32550,0.8962169885635376,0.960436463356018,0.0948082581162452,0.9996026158332824,0.0144010595977306,16.0,125134.755165262,1780305524.5418072 +trm_official_gbs768_multi4,35805,0.8964653611183167,0.9604493975639344,0.0945562794804573,0.9995790123939514,0.0068304436281323,16.0,137714.797781532,1780318104.6386163 +trm_official_gbs768_multi4,39060,0.8957250118255615,0.9601802825927734,0.0952448472380638,0.999541163444519,0.0185395441949367,16.0,150233.754675447,1780330623.5781178 +trm_official_gbs768_multi4,42315,0.888913094997406,0.9574248790740968,0.1008101180195808,0.9995600581169128,0.0061048995703458,16.0,162739.436977464,1780343129.2559526 +trm_official_gbs768_multi4,45570,0.8858169317245483,0.9561300873756408,0.1036654934287071,0.9994938373565674,0.0054858992807567,16.0,175251.082236918,1780355640.8929477 +trm_official_gbs768_multi4,48825,0.882732629776001,0.9547808170318604,0.1068678125739097,0.9994110465049744,0.0072551541961729,16.0,187778.40637965,1780368168.2221627 +trm_official_gbs768_multi4,52080,0.8782954216003418,0.9530280232429504,0.1104752272367477,0.99934720993042,0.0089566921815276,16.0,200285.373208387,1780380675.1846614 +trm_official_gbs768_multi4,55335,0.8694067597389221,0.94942307472229,0.1194151788949966,0.9987700581550598,0.0272370912134647,16.0,212785.271448664,1780393175.0770009 +trm_official_gbs768_multi4,58590,0.8620365858078003,0.9463443756103516,0.1268824934959411,0.9983466863632202,0.0163928251713514,16.0,225280.516080387,1780405670.3403552 +trm_official_gbs768_multi4,61845,0.850763738155365,0.941650390625,0.1378339380025863,0.9978050589561462,0.0215476993471384,16.0,237778.331426833,1780418168.1822684 +trm_official_gbs768_multi4,65100,0.8350536823272705,0.9350366592407228,0.1547555029392242,0.9962841868400574,0.0310162808746099,16.0,250280.395200839,1780430673.009492 diff --git a/multi4_eval_compare/trm_official_gbs768_multi4_step16275_eval.csv b/multi4_eval_compare/trm_official_gbs768_multi4_step16275_eval.csv new file mode 100644 index 0000000..a34ae57 --- /dev/null +++ b/multi4_eval_compare/trm_official_gbs768_multi4_step16275_eval.csv @@ -0,0 +1,2 @@ +step,accuracy,exact_accuracy,lm_loss,q_halt_accuracy,q_halt_loss,steps +16275,0.934574,0.82621706,0.15090619,0.99974453,0.00405054,16.0 diff --git a/multi4_eval_compare/wallclock_eval.csv b/multi4_eval_compare/wallclock_eval.csv new file mode 100644 index 0000000..268a924 --- /dev/null +++ b/multi4_eval_compare/wallclock_eval.csv @@ -0,0 +1,30 @@ +run,step,runtime_sec,runtime_hms,timestamp_local,ckpt_exists,ckpt_mtime_local,exact,accuracy,loss +hrm_baseline,2604,1440.547846523,00:24:00.55,2026-05-22 06:32:03,True,2026-05-22 06:32:04,0.016369983553886414,0.6336106061935425,0.8522627949714661 +hrm_baseline,5208,2915.282126881,00:48:35.28,2026-05-22 06:56:38,True,2026-05-22 06:56:39,0.06169315055012703,0.6744286417961121,0.7430469989776611 +hrm_baseline,7812,3726.920135005,01:02:06.92,2026-05-22 07:10:09,True,2026-05-22 07:10:10,0.1358133852481842,0.6985693573951721,0.676047682762146 +hrm_baseline,10416,4458.114451179,01:14:18.11,2026-05-22 07:22:21,True,2026-05-22 07:22:22,0.20248068869113922,0.7259970307350159,0.6480565667152405 +hrm_baseline,13020,5801.015399971,01:36:41.02,2026-05-22 07:44:43,True,2026-05-22 07:44:45,0.3024248778820038,0.7580602169036865,0.5614010691642761 +hrm_baseline,15624,7219.496414876,02:00:19.50,2026-05-22 08:08:22,True,2026-05-22 08:08:23,0.36705803871154785,0.7842973470687866,0.49664124846458435 +hrm_baseline,18228,8616.18880162,02:23:36.19,2026-05-22 08:31:39,True,2026-05-22 08:31:40,0.46287721395492554,0.8077820539474487,0.4641122817993164 +hrm_baseline,20832,9904.193793478,02:45:04.19,2026-05-22 08:53:07,True,2026-05-22 08:53:08,0.4912721812725067,0.8198283314704895,0.4137057960033417 +hrm_baseline,23436,10634.360398376,02:57:14.36,2026-05-22 09:05:17,True,2026-05-22 09:05:18,0.5193856954574585,0.8260135054588318,0.41173747181892395 +hrm_baseline,26040,11366.743085113,03:09:26.74,2026-05-22 09:17:29,True,2026-05-22 09:17:30,0.5265287756919861,0.8270824551582336,0.4161679148674011 +hrm_multi4,2604,5810.841404196,01:36:50.84,2026-05-27 22:44:06,True,2026-05-27 22:44:07,0.0123774204403162,0.6303551197052002,0.8603715300559998 +hrm_multi4,5208,11587.805059004,03:13:07.81,2026-05-28 00:20:23,True,2026-05-28 00:20:24,0.025012653321027756,0.6685170531272888,0.7412638068199158 +hrm_multi4,7812,17365.375767937,04:49:25.38,2026-05-28 01:56:41,True,2026-05-28 01:56:42,0.04651052877306938,0.687346339225769,0.7044885158538818 +hrm_multi4,10416,23142.813901422,06:25:42.81,2026-05-28 03:32:58,True,2026-05-28 03:32:59,0.2006617933511734,0.7243355512619019,0.6340938210487366 +hrm_multi4,13020,29347.517355686,08:09:07.52,2026-05-28 05:16:23,True,2026-05-28 05:16:24,0.34697696566581726,0.7794705033302307,0.49915269017219543 +hrm_multi4,15624,35123.513458465,09:45:23.51,2026-05-28 06:52:39,True,2026-05-28 06:52:40,0.4653252363204956,0.8156101703643799,0.42743897438049316 +hrm_multi4,18228,40898.005173388,11:21:38.01,2026-05-28 08:28:54,True,2026-05-28 08:28:55,0.5790873169898987,0.8494690656661987,0.3459467887878418 +trm_baseline,26041,8593.415472305,02:23:13.42,2026-05-23 01:55:09,True,2026-05-23 01:55:10,0.5575894117355347,0.8469749093055725,0.35285070538520813 +trm_baseline,52082,17155.876633182,04:45:55.88,2026-05-23 04:17:52,True,2026-05-23 04:17:52,0.6295099854469299,0.8704391121864319,0.2973524034023285 +trm_baseline,78123,25719.395089913,07:08:39.40,2026-05-23 06:40:35,True,2026-05-23 06:40:36,0.6993892788887024,0.8920264840126038,0.2477506548166275 +trm_baseline,104164,34282.065662564,09:31:22.07,2026-05-23 09:03:18,True,2026-05-23 09:03:18,0.72928386926651,0.9020143151283264,0.22608688473701477 +trm_baseline,130205,42852.348430037,11:54:12.35,2026-05-23 11:26:08,True,2026-05-23 11:26:09,0.7653990387916565,0.914251446723938,0.19878524541854858 +trm_baseline,156246,51422.119869545,14:17:02.12,2026-05-23 13:48:58,True,2026-05-23 13:48:58,0.7596656680107117,0.9119072556495667,0.2041437327861786 +trm_baseline,182287,59826.120123505,16:37:06.12,2026-05-23 16:09:02,True,2026-05-23 16:09:02,0.7541900873184204,0.9094774723052979,0.2100999504327774 +trm_baseline,208328,68138.229200214,18:55:38.23,2026-05-23 18:27:34,True,2026-05-23 18:27:34,0.7732800841331482,0.9166732430458069,0.19410833716392517 +trm_baseline,234369,76460.482892161,21:14:20.48,2026-05-23 20:46:17,True,2026-05-23 20:46:17,0.7750374674797058,0.9172152280807495,0.19279460608959198 +trm_baseline,260410,84783.527271934,23:33:03.53,2026-05-23 23:05:00,True,2026-05-23 23:05:00,0.7742025256156921,0.9169425964355469,0.19348150491714478 +trm_multi4,26041,22216.1844709,06:10:16.18,2026-05-28 03:18:26,True,2026-05-28 03:18:26,0.7394047975540161,0.9067130088806152,0.21417337656021118 +trm_multi4,52082,44853.735386924,12:27:33.74,2026-05-28 09:35:44,True,2026-05-28 09:35:44,0.8449901342391968,0.9424432516098022,0.1342233270406723 diff --git a/official_gbs768_spectrum/headline_trm_multi4_dynamics_table.csv b/official_gbs768_spectrum/headline_trm_multi4_dynamics_table.csv new file mode 100644 index 0000000..742ac68 --- /dev/null +++ b/official_gbs768_spectrum/headline_trm_multi4_dynamics_table.csv @@ -0,0 +1,4 @@ +model,step,full_exact,full_token_acc,lm_loss,dyn_sample_exact,lambda1_all,mean8_all,tail4_all,pos_count_all +TRM baseline best,58590,0.8686309456825256,0.9508475661277772,0.1155559569597244,0.875,0.02823458132615997,0.013457294571722192,0.0075273313675370546,7.841796875 +TRM multi4 best,35805,0.8964653611183167,0.9604493975639344,0.0945562794804573,0.900390625,0.020381716455975862,0.0065844104191477015,0.001402141885882835,3.841796875 +TRM multi4 final,65100,0.8350536823272705,0.9350366592407228,0.1547555029392242,0.82421875,0.03232463403946895,0.018508151198432188,0.013372940185377047,8.0 diff --git a/official_gbs768_spectrum/summary_n512_k8_seed20260602.csv b/official_gbs768_spectrum/summary_n512_k8_seed20260602.csv new file mode 100644 index 0000000..c255252 --- /dev/null +++ b/official_gbs768_spectrum/summary_n512_k8_seed20260602.csv @@ -0,0 +1,4 @@ +run,step,n,sample_exact,sample_token_acc,lambda1_all,lambda1_success,lambda1_fail,lambda1_fail_minus_success,mean8_all,mean8_success,mean8_fail,mean8_fail_minus_success,tail4_all,tail4_success,tail4_fail,pos_count_all,pos_count_success,pos_count_fail,pos_mass_all,pos_mass_success,pos_mass_fail,lambda2_all,lambda2_success,lambda2_fail,lambda3_all,lambda3_success,lambda3_fail,lambda4_all,lambda4_success,lambda4_fail,lambda5_all,lambda5_success,lambda5_fail,lambda6_all,lambda6_success,lambda6_fail,lambda7_all,lambda7_success,lambda7_fail,lambda8_all,lambda8_success,lambda8_fail +baseline_best,58590,512,0.875,0.9539689430384897,0.02823458132615997,0.01761167685357837,0.10259491263423115,0.08498323578065278,0.013457294571722192,0.008003413600119422,0.05163446137294159,0.04363104777282217,0.0075273313675370546,0.004122858266632009,0.03135864307387237,7.841796875,7.819196428571429,8.0,0.10779670139772268,0.0641854171711784,0.4130756909835327,0.0205107267238418,0.012619587999194794,0.07574869779637083,0.016147883449207256,0.0097831444752176,0.060701056267134845,0.012655839604420294,0.007521466406436568,0.04859645199030638,0.010066905798097991,0.00580994511748096,0.03986563056241721,0.00827578879948021,0.00458740731747704,0.0340944591735024,0.006620997387619565,0.003523945934349396,0.028300357560510747,0.0051456334849504515,0.00257013469722064,0.02317412499905913 +multi4_best,35805,512,0.900390625,0.9625048226444051,0.020381716455975862,0.01118387160416574,0.10352301992037717,0.09233914831621143,0.0065844104191477015,0.0019524994650864183,0.04845325257252518,0.046500753107438765,0.001402141885882835,-0.0014188478887233206,0.02690167690732279,3.841796875,3.3817787418655096,8.0,0.06377898653446445,0.027952091227886174,0.38762602058020146,0.012663036363773195,0.0057181008775463405,0.07543980615103946,0.008348809059507634,0.003078277385450445,0.055990281642652025,0.005673153930393582,0.0013151374084221035,0.04506620523684165,0.0034857525132654388,-9.921478389921372e-05,0.035891045140577296,0.002008319042374751,-0.001002505589948276,0.02922381228749074,0.0006402704918926361,-0.0019078789586364823,0.023673542976087213,-0.0005257745040014861,-0.0026657922224093103,0.018818307225135902 +multi4_final,65100,512,0.82421875,0.9289882326847874,0.03232463403946895,0.01912124014472792,0.09423388096814354,0.07511264082341562,0.018508151198432188,0.012883653437293365,0.04488079625621645,0.03199714281892309,0.013372940185377047,0.010672304166179654,0.026035922408724824,8.0,8.0,8.0,0.1480652095874575,0.10306922749834692,0.3590463700497316,0.02459628393262392,0.015458292881759563,0.0674433086377879,0.020185894951282535,0.013495850063401376,0.05155477209223641,0.017466635922573914,0.01230462774373944,0.041670718716664445,0.015463502108104876,0.011429727322452865,0.034377423880828754,0.013748909759215167,0.010808219788353272,0.027537478289256494,0.012621539073734311,0.010437874606770786,0.022860499129941068,0.011657809800453833,0.010013394947141692,0.019368288334872988 diff --git a/official_gbs768_spectrum/summary_n64_k8_seed20260602.csv b/official_gbs768_spectrum/summary_n64_k8_seed20260602.csv new file mode 100644 index 0000000..5a12638 --- /dev/null +++ b/official_gbs768_spectrum/summary_n64_k8_seed20260602.csv @@ -0,0 +1,5 @@ +name,n,acc,lambda1_all,lambda1_success,lambda1_fail,mean8_all,mean8_success,mean8_fail,tail4_all,pos_count_all,pos_count_success,pos_count_fail,pos_mass_all +baseline_best,64,0.875,0.027470633,0.015822656,0.109006464,0.013350397,0.0077010575,0.052895777,0.0074741757,7.796875,7.767857142857143,8.0,0.10691354 +multi4_bestish,64,0.953125,0.016227467,0.012409109,0.09386742,0.004405942,0.002358008,0.046047267,-0.00011889404,3.375,3.1475409836065573,8.0,0.048455432 +multi4_late,64,0.84375,0.031355858,0.01815009,0.10266701,0.018412568,0.01310199,0.047089677,0.013622271,8.0,8.0,8.0,0.14730054 +multi4_final,64,0.859375,0.028622076,0.01753769,0.09636,0.01713102,0.01240586,0.046007,0.012961711,8.0,8.0,8.0,0.13704816 diff --git a/ordinary_multi4_eval_n1000_seed20260611.csv b/ordinary_multi4_eval_n1000_seed20260611.csv new file mode 100644 index 0000000..b587ea1 --- /dev/null +++ b/ordinary_multi4_eval_n1000_seed20260611.csv @@ -0,0 +1,6 @@ +step,n,seed,exact_acc,token_acc,elapsed_sec +6510,1000,20260611,0.231,0.7405925925925926,56.891759634017944 +13020,1000,20260611,0.763,0.912679012345679,55.79828500747681 +19530,1000,20260611,0.869,0.9508641975308642,55.84409546852112 +26040,1000,20260611,0.89,0.9582716049382716,55.78691911697388 +32550,1000,20260611,0.915,0.9673827160493828,55.736825942993164 diff --git a/paper/claims.md b/paper/claims.md new file mode 100644 index 0000000..590b388 --- /dev/null +++ b/paper/claims.md @@ -0,0 +1,37 @@ +# Claim table (write structure FROM this, not from session chronology) + +Rule: every claim states its evidence, its strongest counter-reading, and where that +counter-reading is killed or conceded. A claim that can't fill all four columns gets cut +or demoted to an observation in the discussion. + +| # | Claim | Evidence | Strongest counter-reading | Disposition | +|---|---|---|---|---| +| C1 | Per-example full-trajectory FTLE separates success/failure near-perfectly in trained HRM and TRM on Sudoku-Extreme | AUC(−λ₁→correct): HRM 0.984 (n=8192) / 0.987 (n=2048, 2nd estimator); TRM official 87.6% ckpt 0.993 (n=2048). Medians: HRM joint −0.152 vs +0.032; TRM +0.012 vs +0.103 | Estimator artifact | Replicates across two estimator implementations and two architectures; report both scales, never cross-compare | +| C2 | Failures overwhelmingly never settle; settled-wrong is rare (HRM) to absent (TRM) | TRM: 0/254 failures settled, threshold-free (min wrong-drift > late-drift of 96.5% of correct); HRM: 21/3894 (0.55%) strict-band; replicated 5/971 on 2nd estimator | Threshold choice ("settled" is arbitrary) | State threshold-free version (distribution separation); show full percentile sweep; define settled band by its narrow characteristic velocity | +| C3 | The rare settled-wrong failures are selector-blind: success-like contraction AND success-like halting confidence | n=21: λ₁ med −0.842 (A: −0.867), q_halt(final) +7.47 (= A), all would halt early (halted_at 4–9), token_acc med 0.62 | Small n | Concede explicitly; the point is existence + boundedness (~0.5%), not precision | +| C4 | The FTLE-outcome signal is not reducible to non-settling, and not a difficulty artifact | Drift-decile-matched AUC within unsettled stratum: 0.879 (n=8192) / 0.900 (2nd estimator); #givens-bin AUC 0.982 vs 0.984 overall | (a) residual within-decile drift variation; (b) #givens is a weak difficulty proxy | (a) deciles are narrow (table shows ranges); (b) concede openly, name solver-backtracks as the proper proxy, future work | +| C5 | The chaotic signature is outcome-concurrent, not antecedent: nothing in the first 4 ACT steps forecasts eventual success among not-yet-solved examples — and on HRM the dynamical signals point the other way | Restricted to not-correct@4: TRM AUC λ₁ 0.543 / drift 0.492 / q_halt 0.521 (n=626); HRM λ₁ 0.448 / drift 0.312 (sign-reversed; +drift→success AUC 0.688) / q_halt 0.734 (n=1342) | Window length (only 4 steps tested) | Concede; horizon sweep = explicit future work; 4 chosen to match training window | +| C6a | Correction: TRM failures do not "plateau at stable high-loss attractors" in state space | C2 + λ₁(D)=+0.103 + residual velocity ≈56/step ≈0.77× early; their own Fig 5 oscillation consistent | We strawman "attractor" (bounded attracting set can be chaotic) | Quote their exact wording ("stable", "local minima", "stabilize rather than explore"); credit their loss/boundedness data and intervention; correct only the settledness reading | +| C6b | Refinement: Ren & Liu's four modes confirmed and quantified; wrong-fixed-point mode is real but marginal at trajectory end | Their mode (4) = our strict B (0.5%); mode (3) = our D (~99.5% of failures) | We measure end-of-window only; mid-trajectory lingering invisible | Concede explicitly; their non-trivial-success lingering claim untouched | +| C7a | Training widens the success/failure gap from the failure side: failures become more expansive while the success regime barely moves | λ₁(D): +0.036→+0.102 over the TRM series, λ₁(A) within ±0.03 of 0 throughout | Estimator-scale / single-run | Holds; HRM series shows the mass-migration version. Robust claim. | +| C7b | **DEMOTED by matched-objective control (E6).** Under MATCHED objective (step9 fixed-unroll, E-vs-F / G-vs-H), perturbation training's effect on the wandering cell is small and inconsistent at matched step | HRM fD 0.387→0.369 / 0.385→0.379 / 0.381→**0.387**(↑ at best) / 0.410→0.387; TRM 0.344→0.305 / 0.396→0.361 / 0.334→**0.361**(↑ at best) / 0.312→0.270 | The earlier large shrink (D 274→175) was partly a fixed-unroll-vs-ACT-streaming artifact, NOT a pure intervention effect | **Rewrite §3.4**: report the matched effect as small/equivocal; drop the strong "intervention shrinks wandering" reading. The May-28 mismatched comparison is retired. C7a stands; C7b does not support an intervention claim. | + +## The spine (one sentence) +Direct per-example measurement of settling and tangent expansion decomposes recursive-reasoner +failure: failures are overwhelmingly trajectories that never settle, the chaotic signature is +concurrent with — not antecedent to — the outcome, and it is not explained by non-convergence +alone or by problem difficulty. + +## What this paper is NOT claiming (write these into Discussion to pre-empt) +- No mechanism for WHY trajectories fail to find the settled band (explicitly open). +- No claim that early intervention is impossible in principle — only that λ/drift at 4 steps carry no signal. +- No claim about tasks beyond Sudoku-Extreme or models beyond HRM/TRM. + +## Anti-patterns checklist (apply at style pass) +- [ ] No chronological narration of the investigation; structure = claims order. +- [ ] Numbers in topic sentences; every section's first sentence is a finding, not a plan. +- [ ] No "notably/crucially/interestingly/delve/underscore"; hedges only where the claim table says concede. +- [ ] Related work = positioning (each paragraph ends with the gap), not annotated bibliography. +- [ ] Negative result (C5) framed as a finding with its own section, not a limitation apology. +- [ ] Limitations: specific, short, no re-hedging of already-scoped claims. +- [ ] Prose paragraphs in the body; tables only for numbers; no bullet lists in Results. diff --git a/paper/experiment_framework.md b/paper/experiment_framework.md new file mode 100644 index 0000000..07a69d0 --- /dev/null +++ b/paper/experiment_framework.md @@ -0,0 +1,55 @@ +# Experiment framework — locked 2026-06-12 + +Purpose: every paper claim gets its evidence gap named and the run that closes it specified, +so all remaining GPU/CPU work is execution against this table. Targets: workshop freeze +~Jun 25 (COLM deadline Jul 12), ICLR freeze ~Sep 5 (deadline ~Sep 16, unconfirmed). + +## Locked measurement protocol (uniformity rules) +- Estimator: current `diagnose_{hrm,trm}_joint.py` (joint JVP+QR, k=8, t_ons=1); seed 0 primary; + n=2048 headline, n=512 series/sweeps; batch 16 (TRM) / 32 (HRM). +- λ values never compared across estimator implementations; scale ownership stated at first use. +- Settled = low band of late z_H drift (mean of final 4 segments), Otsu primary + percentile + sweep + threshold-free statement; z_L and combined-drift definitions reported as robustness. +- idx-pairing: same seed + same n across any runs that will be joined per example. +- All new npz under analysis_2x2/ subdirs with queue logs; nothing hand-run on GPU. + +## Claims → gaps → runs + +| ID | Closes | Run | Spec | Cost | Priority / target | +|---|---|---|---|---|---| +| E1 | C1/C2 formal uncertainty; C7-TRM matched pair | OFFLINE: bootstrap CIs (cells, AUCs, B-fraction rule-of-three bound); z_L/combined settling robustness; 2×2 on existing TRM official multi4 npz (@35805 best, @65100 final) vs baseline @58590 | existing npz | CPU, now | **P0 / workshop** | +| E2 | C1/C2 run-level replication (HRM has 1 training run) | diagnose step9_E (HRM fixed-unroll baseline 50k) best.pt + final.pt + 2 mid ckpts, n=512–2048 | loader risk: step9 .pt may need key remap; fix on first failure | ~1–2 h shared | **P1 / workshop** | +| E5 | C5 single-horizon caveat → "when does fate become legible" curve | horizon sweep h∈{2,4,6,8,10,12} segments, both models, idx-paired to full-window retest (seed 0, n=2048) | env-var horizon variant of *_short.py | ~5 h shared | **P1 / workshop headline fig** | +| E6 | C7 matched-objective intervention probe | (a) verify provenance of diag_hrm_multi4_* run; (b) diagnose step9 pairs E vs F (HRM), G vs H (TRM) at {12500, 25000, best, final}, n=512; compare at matched ACCURACY (not matched step) + best-vs-best | ~2–3 h shared | **P1 / ICLR (workshop if cheap)** | +| E3 | C3 "would have halted" + C-cell fate beyond window | 32-segment unroll restricted to B∪C idx sets (≤300 examples/model) + true-ACT inference on HRM's 21 B-cell idx | ~1 h shared | P2 / ICLR | +| E4 | C4 difficulty proxy weakness | CPU sudoku solver with backtrack counting on the 81-token inputs; redo difficulty control binned by backtracks | CPU only | P2 / ICLR | +| E7 | Sec-5 implication → measured result | restart-and-select on TRM official @58590: K∈{1,2,4,8,16,32} h_init Gaussian restarts; select by q_halt vs late-drift vs λ-proxy vs oracle; n=1024; quantify B-blind-spot empirically | ~5–10 h shared | P2 / ICLR | +| E8 | "Sudoku-only" limitation (biggest generalization win) | build Maze dataset; train TRM official-recipe Maze; full diagnostic suite on it | **multi-day dedicated GPU** — needs explicit go | P2-gated / ICLR | + +### E8 detail — which model/variant (decided 2026-06-13) +- **PRIMARY = TRM, attention variant.** Official Maze-Hard recipe = `arch=trm`, mlp_t=False (default), + L_layers=2 H_cycles=3 L_cycles=4. NOTE: our Sudoku TRM analysis used mlp_t=TRUE (attention OFF). + So TRM-Maze varies BOTH the task (Sudoku→Maze) AND the token mixer (mlp_t→attention) at once. + Framing for the paper: this is a STRONGER generality test (decomposition surviving task + mixer), + but state the dual change honestly; do not call it a pure same-architecture task transfer. +- Optional matched control (only if cheap + GPU spare): TRM-Maze with arch.mlp_t=true to isolate + task-only transfer. Not the validated recipe → may underperform 75%; treat as secondary. +- HRM-Maze (completes the {HRM,TRM}×{Sudoku,Maze} grid): deferred. Heavier (27M, HRM Maze recipe + costlier). Decide AFTER TRM-Maze lands — nice-to-have if TRM-Maze confirms, important if it surprises. +- Diagnostic caveat stands: attention arch → verify JVP closures in diagnose_trm_joint.py; seq 900 → n=512/256. +| E9 | transverse/readout framing | logit-space FTLE (diagnose-script modification) | dev + ~2 h | P3 / ICLR appendix | +| E10 | sampling-seed robustness (if bootstrap deemed insufficient) | rerun headline diags seeds 1,2 | ~5 h shared | P3 | + +## Decision rules +- Workshop version ships on E1+E2+E5 (+E6 if done): decomposition, controls, horizon curve. +- ICLR adds E3/E4/E6/E7 (+E8 if approved in time: training must START by ~Jul 1). +- Any FAILED queue job: fix loader/script, rerun; never substitute hand-run results. +- New results that contradict current text → claims.md updated first, prose second. + +## Status board (update as runs land) +- [x] E1 offline batch — done 2026-06-12, see offline_followups/phase1_e1.md +- [x] E2 done — step9_E (2nd HRM run, fixed-unroll): B≈0 replicates (best 1/743=0.13%, final 3/832=0.36%). C2 STRENGTHENED (now 2 estimators × 2 archs × 2 training runs × 2 objectives). +- [x] E5 done — horizon sweep {2,4,6,8,10,12}×2 models. C5 STRENGTHENED to a curve: dynamics never legible early (TRM AUC≈0.48–0.53 all H; HRM reversed/below 0.5 all H); only HRM q_halt predicts (0.81→0.64). See phase1/fig_E5_horizon_sweep.png. NEW: undecided-pool eventual-success drops to 4%(HRM)/19%(TRM) by H=12 yet dynamics still don't discriminate. +- [x] E6 done — matched-objective. **C7b demoted** (effect small/inconsistent at matched step); C7a (λ₁(D) rises over training) stands. §3.4 needs rewrite. +- [ ] E3, E4, E7 — phase-2 +- [ ] E8 Maze — GO (user has dedicated GPUs). Build dataset + package launch/diagnostic; train on user cards. diff --git a/paper/intro.md b/paper/intro.md new file mode 100644 index 0000000..85f06e7 --- /dev/null +++ b/paper/intro.md @@ -0,0 +1,57 @@ +# Recursive Reasoning Models Fail by Wandering, Not by Settling + +## 1 Introduction + +Recursive reasoning models such as the Hierarchical Reasoning Model (HRM; Wang et al., 2025) +and the Tiny Recursive Model (TRM; Jolicoeur-Martineau, 2025) solve constraint-satisfaction +puzzles that defeat far larger language models, by iterating a small network on a latent state +for hundreds of updates per puzzle. When such a model fails, what is dynamically different +about the trajectory it produced? Two recent mechanistic studies answer in attractor language. +Failed TRM runs "plateau at stable high-loss attractors" (Efstathiou & Balwani, 2026); failed +HRM runs converge to spurious fixed points that rival the correct one (Ren & Liu, 2026). The +evidence behind both labels is indirect, resting on loss plateaus and two-dimensional +projections of 512-dimensional trajectories, and the labels disagree about the basic character +of failure: premature stability in one account, partly aimless drift in the other. Neither +measures the trajectory's stability directly. We do, per example, and the measurements support +a third description: recursive reasoning models fail by wandering, not by settling. + +Across 2,048 to 8,192 held-out Sudoku-Extreme puzzles, correct trajectories end inside a +narrow low-velocity band of the latent dynamics, and failures essentially never do. In an +official-recipe TRM at 87.6% test accuracy, none of 254 failures settles: the least mobile +failure still moves faster at the end of inference than 96.5% of successes, a separation of +distributions that no threshold choice can undo, and failed trajectories remain locally +expansive throughout (median leading finite-time Lyapunov exponent λ₁ = +0.103, against +0.012 +for successes; AUC 0.993). HRM shows the same structure with one addition. Settled-but-wrong +trajectories exist, but they account for 0.55% of failures, carry success-like contraction +(λ₁ = −0.84, against −0.87 for settled successes) and success-like halting confidence, and +every one of them would have halted early under adaptive computation. The wrong-attractor +failure mode is real, rare, and the only failure a confidence-based selector cannot catch. + +Two controls locate what the Lyapunov signature adds, and a third experiment locates when it +exists. Matched for displacement level within the unsettled population, λ₁ still separates +eventual successes from failures (decile-matched AUC 0.88–0.90), so the exponent does more +than restate non-convergence. Binned by the number of givens, the separation is unchanged +(within-bin AUC 0.982, against 0.984 unconditioned), so it is not an artifact of problem +difficulty. It is, however, strictly retrospective. Restricted to puzzles still unsolved after +four of sixteen segments, neither early-window exponents nor early state velocity predicts +which trajectories will eventually succeed (AUC ≈ 0.5 in TRM), and in HRM the association +inverts — among the undecided, the trajectories that move more in the early segments are the +ones that go on to solve the puzzle (positive-direction AUC 0.69). The chaos of failure +arrives with the failure; nothing dynamical in the early trajectory anticipates it. + +These measurements redraw the intervention map for this model class. Because failure is almost +never a stable wrong answer, restart-and-select inference strategies have a high ceiling and a +quantifiable blind spot of roughly half a percent. Because the early trajectory carries no +dynamical death sentence, compute spent on early failure prediction is compute wasted, and +restart diversity is the better buy. Our contributions: (i) per-example, outcome-conditioned +measurement of settling and finite-time Lyapunov spectra in HRM and TRM, at sample sizes up to +8,192 and replicated across two estimator implementations; (ii) a decomposition of failure +that corrects the settled-attractor reading and bounds the wrong-attractor mode at ~0.5% of +failures; (iii) controls showing the signature is not reducible to non-convergence or +difficulty; (iv) evidence that the signature is concurrent with the outcome and carries no +early-warning content at the granularity tested. + +--- +*[em-dash count: 1. Contrast-template count: title + one echo (end of ¶1). Flourish count: +1 ("death sentence", ¶4) — cuttable. "essentially never" is the one hedge in ¶2, scoped by +the 0.55% in the next sentence.]* diff --git a/paper/outline.md b/paper/outline.md new file mode 100644 index 0000000..0dde354 --- /dev/null +++ b/paper/outline.md @@ -0,0 +1,79 @@ +# Outline — "Recursive Reasoning Models Fail by Wandering, Not by Settling" (title FIXED 2026-06-12) + +Status: intro.md ✅ (v2, audited) · setup_results.md ✅ (Secs 2–3) · style_contract.md ✅ · +remaining: Sec 4 (relation to prior accounts), Sec 5 (implications), Sec 6 (limitations), +abstract, tables T1–T3 + figures F3/F4 composition. + +Target: ~8 pages main. Every section header below lists [claims served] and [assets]. + +## 1 Introduction [C1, spine] +- Para 1: recursive reasoners (HRM/TRM) solve hard puzzles by iterating a latent state; when they + fail, what is dynamically different? Existing mechanistic accounts infer dynamics from loss + curves and 2-D projections; we measure the dynamics directly, per example. +- Para 2: the answer, with numbers (settling × correctness decomposition; B≈0; AUC 0.99; + concurrent-not-antecedent). +- Para 3: contributions (4 items, one line each): (i) per-example outcome-conditioned FTLE/settling + measurement at n≤8192 across two architectures; (ii) failure-mode decomposition correcting two + published labels; (iii) independence controls (drift-matched, difficulty-binned); (iv) the + early-window null + sign reversal. +- NO general AI-reasoning throat-clearing. First sentence is about the object of study. + +## 2 Setup [assets: estimator details from diagnose_trm_joint.py; OBSERVATIONS.md provenance table] +- 2.1 Models & task: HRM 27M @26040 (acc .526), TRM-MLP official recipe @58590 (acc .876), + Sudoku-Extreme-1k-aug; fixed 16-step unroll, ACT recorded not applied. +- 2.2 Measurements: joint (z_H,z_L) tangent dynamics, JVP+QR, k=8, per-sub-update normalization; + per-ACT-step state displacement (drift); q_halt; exact/token accuracy. Estimator-scale caveat. +- 2.3 The 2×2 design: settled band defined by bimodal late-drift split (Otsu primary, full + percentile sweep + threshold-free statement in appendix); cells A/B/C/D. + +## 3 Results +- 3.1 Decomposition [C1, C2, C3; assets: cells tables, fig_*_scatter, fig_*_lyap_by_cell, + strict-B table + fig_hrm_strictB_profiles] + Lead: "Across 2048–8192 held-out puzzles, no TRM failure and 0.55% of HRM failures end in the + settled band." Then per-cell λ₁; then the 21 selector-blind examples (their three lowest + token-acc are all 17-givens puzzles). +- 3.2 What the signal is not [C4; assets: decile table, givens table] + Drift-matched AUC 0.88–0.90; givens-binned AUC unchanged. One paragraph each, tables carry + the numbers. +- 3.3 When the signal exists [C5; assets: early_pairing_{trm,hrm}.md tables] + The early-window null; the HRM sign reversal (drift@4 +direction AUC 0.688); q_halt@4 0.734 + vs TRM 0.521 (factual note: TRM removed the continue head). Frame as the temporal anatomy of + the signature. +- 3.4 Training evolution [C7; assets: evolution_{trm,hrm}.png/csv; multi4 quick-compare] + Gap widens via λ₁(D); multi4 shrinks D-cell mass at matched steps (preliminary, objective + caveat); multi4 collapse = λ₁(A) sign flip. + +## 4 Relation to prior accounts [C6a, C6b; assets: papers/notes/*] +- Para 1: network-level Lyapunov–performance work (Vogt 2022; AeLLE 2024; Engelken flossing + App. D.3 trains-vs-fails at network level, opposite sign) → none condition per example on outcome. +- Para 2: the 2026 mechanistic trio. Efstathiou & Balwani: credit loss/boundedness/intervention; + quote and correct the settledness reading (C6a). Ren & Liu: confirm + quantify their taxonomy + (C6b). Es'kin & Smorkalov (CMM): their endpoint-stability losses + engineered early repeller + are consistent, at the design level, with where our measurements localize the signal — cite, + don't claim confirmation. +- Para 3: stability-by-construction line (monDEQ, Jacobian-reg DEQ, REN/Sandwich; TRM's own + TorchDEQ negative result; Solve-the-Loop) — what "enforce settling" buys and where it failed; + our measurements say which kind of settling is the operative one. + +## 5 Implications (restrained, half page) +- Intervention design space bifurcates: widen/deepen the settled tube at training time + (perturbation training, equilibrium losses) vs restart-and-select at inference + (q_halt tracks correctness at trajectory end; selector-blind ceiling ≈0.5%). +- Early pruning/reallocation unsupported at 4-step granularity; on HRM the gradient of usable + early signal lives in the learned head, not the generic dynamical quantities. + +## 6 Limitations & future +Sudoku-Extreme only; two models; #givens is a weak difficulty proxy (solver backtracks next); +single early horizon (sweep queued); end-of-window criterion blind to mid-trajectory lingering; +no mechanism offered for why settling fails — measurement paper. + +## Figures plan (all exist or one rerun away) +F1: drift–λ₁ scatter, both models (have). +F2: per-cell λ₁ + strict-B profiles inset (have). +F3: decile-matched AUC + givens-binned AUC (compose from CSVs). +F4: early-window pairing summary (compose: 3 signals × 2 models, restricted set). +F5: checkpoint evolution (have). + +## Order of writing +1. Results 3.1–3.3 (numbers already final) → 2. Setup → 3. Sec 4 (notes ready) → 4. Intro → +5. Implications/Limitations → 6. style pass against claims.md checklist. diff --git a/paper/rainer_followup_draft.md b/paper/rainer_followup_draft.md new file mode 100644 index 0000000..12091de --- /dev/null +++ b/paper/rainer_followup_draft.md @@ -0,0 +1,37 @@ +Subject: Re: Question on gradient flossing vs forward trajectory stability in recursive reasoning models + +Hi Rainer, + +A short follow-up to my email of June 5 — we have since measured the things I was speculating +about, and two results seem worth sharing because they sharpen the question I asked you. + +First, conditioning per-example finite-time Lyapunov spectra on both outcome and terminal +settling (n = 2048–8192, two architectures) shows that failure is almost exclusively +non-settling: in an official-recipe TRM at 87.6% accuracy, none of 254 failed trajectories +ever enters the low-velocity band that all successes occupy, and they remain locally expansive +to the end (median λ₁ +0.10 vs +0.01). "Converged to the wrong attractor" failures exist in +HRM but make up only ~0.5% of failures. The chaotic signature also survives two controls: it +persists after matching trajectories on displacement level (so it is not just re-measuring +non-convergence), and after binning by puzzle difficulty. + +Second — and this is the part that genuinely surprised us — the signature is strictly +concurrent. Among puzzles still unsolved after a quarter of the inference budget, neither the +early-window exponents nor early state velocity predict which trajectories will eventually +succeed (AUC ≈ 0.5); in HRM the association even inverts, with eventually-successful +trajectories moving more in the early phase. So the failed trajectories are not "born chaotic": +chaos at the end and failure appear together. + +This makes me think the right framing for my earlier question is reachability of the settled +region (escape from a long chaotic transient) rather than per-example landscape quality, which +would be consistent with your view of flossing as a learning-time tool rather than an +inference-time one. If you know of work that conditions finite-time exponents on trajectory +fate in this way — in transient-chaos settings or elsewhere — I would be grateful for a +pointer; we have not found a precedent. + +Best, +Yuren + +--- +[Notes, not part of the email: numbers from analysis_2x2/OBSERVATIONS.md addenda 1-2. Send only +if/after Rainer replies to the June 5 email, or as a gentle bump after ~2 weeks (June 19+). +The "born chaotic" phrasing mirrors his literature's transient-chaos vocabulary deliberately.] diff --git a/paper/readiness.md b/paper/readiness.md new file mode 100644 index 0000000..f07f78e --- /dev/null +++ b/paper/readiness.md @@ -0,0 +1,119 @@ +# Path to a citable, build-on-able preprint — status + +Framing (locked 2026-06-19, per user correction): the axis is **expansive vs more-expansive** +(graded; for TRM both classes have λ₁>0), NOT settled-vs-chaotic. The phenomenon is the +**cleanness of a graded separation**; the **mechanism is explicitly OPEN** and is the natural seed +for follow-on projects. This preprint = rigorous phenomenology + precise characterization + +honest open-mechanism. Do NOT force a fixed-point / suppression-of-chaos framing (rejected). + +## Tier 0 — measurement bulletproofing (others build on it) +- [x] **T0.1 estimator validation** — `paper/validation/`: QR/Benettin core recovers known spectra + to <1e-3 (diagonal, symmetric, non-normal asymptotic) and Hénon λ₁ to 8e-5. PASS. Confirms the + numerical core (orthonormalization cadence, log|diagR| bookkeeping, ordering, averaging). +- [ ] **T0.2 robustness reruns (GPU)** — λ stability vs t_ons, tangent-basis seed, k>8. Window + dependence already covered offline (Char 3). Small queue; spec below. +- [ ] **T0.3 language/scope pass** — finite-time vs asymptotic, "expansive not chaotic" for HRM + (negative λ), metric/coordinate-dependence caveat (Lohmiller–Slotine). Prose task. + +## Characterization (replaces the rejected Tier 2; describes WHAT, not WHY) +- [x] **Char 1 whole-spectrum** — separation is a ~rigid shift of the ENTIRE k=8 spectrum, not a + single mode (per-exponent AUC uniformly 0.98–0.99; HRM gap ≈constant −0.16/exponent). Spectral + MEAN separates ≥ λ₁ alone (AUC 0.991–0.995). CAVEAT: KS-proxy Σλ⁺ is the wrong aggregate for HRM + (all-negative spectra → 0); use spectral mean for HRM. +- [x] **Char 2 shape** — two overlapping UNIMODAL classes with well-separated means, NOT two + discrete clusters (within-class BC 0.26–0.40). Outcome is a moderately sharp threshold on the + λ₁ continuum (25→75% transition spans 12–30% of the λ₁ spread). +- [x] **Char 3 integration-time scaling (the key descriptor)** — separation BUILDS monotonically + with window H: Cohen's d 1.06→4.84 (TRM, H=2→16), 0.03→3.45 (HRM). Near-zero at H=2, near-perfect + at the full 16-segment budget. The cleanness is an integration-time phenomenon. COHERENCE with + E5: this accumulation tracks the unfolding of outcomes (more trajectories revealed by larger H), + NOT anticipation — among undecided@H examples λ₁ still doesn't predict (E5). State both together. +- [x] **Char 4 effect size** — "clean" quantified: Cohen's d 3.4–4.8, distributional overlap + <10% (TRM hist-overlap 0.049). Beyond AUC. + +## Tier 1 — causal content (the level-up from correlation) +- [ ] **T1 inference-side causal probe** — nudge a failing trajectory toward lower expansion (or + toward the success-mean manifold) mid-rollout and measure outcome recovery; conversely inject + expansion into a settling-correct trajectory. Tests settling⟹correct as causal, not correlational. + Spec next. GPU. + +## Open-mechanism (NOT this paper; the hook for follow-ons) +Why a graded (both-expansive) difference separates so cleanly. Char 1–4 bound the description; +the why is deferred. Candidate angles are the user's to pursue, not asserted here. + +## Maze cross-task result + checkpoint evolution (2026-06-20) + +**Deflationary finding stands and is now grounded:** the FTLE/CLV separation reduces to +convergence+confidence (λ1, full k=8 spectrum, AND leading-CLV geometry all reduce; partial-corr +→0 once drift+q_halt controlled). The dynamical signal is a (redundant) convergence readout. + +**Maze (TRM att, friend's run, all 10 ckpts, k=1):** separation WEAK (λ1 Cohen's d 0.2–0.5 vs +Sudoku 3–5). Failures SETTLE (B/fail 0.81–0.98, D/fail 0.02–0.19) at ALL ckpts and are NEAR-MISSES +(token_acc ~0.97). Opposite of Sudoku (failures wander, far-from-correct token ~0.63). + +**Checkpoint evolution (the key new result, offline):** wandering is a LATE-TRAINING property. +Sudoku HRM failures SETTLE early (B/fail ~0.9 at acc 2–15%) then flip to WANDER late +(D/fail ~1.0 at acc 50%), transition ~step 13–18k. So "failures wander" is learned, not intrinsic. +BUT matched-accuracy contrast cuts the other way: at acc≈0.76, Sudoku-TRM D/fail=1.00 vs +Maze-TRM D/fail=0.19 — same skill, opposite dynamics → TASK STRUCTURE also matters, not just maturity. +And early-Sudoku settling (token 0.63, confidently-wrong) ≠ Maze settling (token 0.97, near-miss): +not the same phenomenon. Fig: analysis_2x2/checkpoint_evolution_wander.png. + +**Task structure (offline):** Maze solution path (median 113 cells) passes through ~76 branch +points (67% of path cells at deg≥3 junctions; 48% of open cells are junctions) → abundant +locally-coherent alternative paths = many STABLE WRONG ANSWERS available. Sudoku: unique +globally-coupled solution, a wrong cell violates constraints globally → no local near-miss +equilibrium. This structurally explains settle-to-near-miss (Maze) vs wander (Sudoku). + +**Unresolved confound (queued):** TRM-Maze never develops wandering, but can't tell task-structure +from TRM-Maze SATURATION (Maze too easy for TRM). Queued before HRM-Maze: +(1) continue-train TRM-Maze from step_130200 (does acc climb toward ~1.0 = saturation, or plateau?); +(2) per-cell failure structure (are failure errors a connected detour = coherent stable wrong path, +or scattered?). Then HRM-Maze (harder model-task fit, more likely to be stressed into wandering). + +## Solution-space test (2026-06-20) — refutes the measurement-artifact concern, strengthens task-structure +User asked: is weak Maze separation an artifact of analyzing the FULL latent (88% trivial copy) +instead of the SOLUTION space? Tested directly: per-step decoded-ANSWER Hamming drift over +solution cells (label!=input), Maze vs Sudoku control. +- MAZE: failures SETTLE in solution space too (late answer-drift median 0.00, 98.4% settled; + AUC 0.30). Same conclusion as full-latent. NOT an artifact. +- SUDOKU control: failures DON'T settle in solution space (late drift median 8.5/step, 0% settled; + AUC 0.99). Same as full-latent. Both spaces agree. +- Per-cell failure STRUCTURE (direct task-structure evidence): MAZE failures = CONNECTED DETOUR + (97% have ≤2 error components, median 22 cells one blob) = a coherent stable wrong PATH. + SUDOKU failures = SCATTERED (100% have ≥5 components, median 13) = no coherent wrong answer. + Fig: analysis_2x2/maze_failure_detour.png. This is the mechanism-grounding for why Maze settles + (stable wrong answers exist as detours) and Sudoku wanders (no stable wrong answer). + +## CORRECTION (2026-06-20) — Maze exact-match labeling was the artifact; failure=more-chaotic HOLDS +The earlier "Maze dissociates / completeness≠correctness" reading was largely a LABELING ARTIFACT, +not a real dynamical dissociation. Maze exact-match marks VALID alternative solutions (incl. +equal-length valid shortest paths) as "failures"; 100% of exact-match "failures" are valid connected +paths (complete answers) → they settle, trivially. That is a benchmark-design flaw, not a result. +**Under the correct criterion (CONNECTIVITY = is it a valid complete path = is it actually solved):** +genuine failures (broken/disconnected) ARE more chaotic — AUC(-late_drift→connected) = 0.864 @step_13020 +(15 broken), 0.895 pooled (18 broken); bootstrap 95% CI [0.80, 0.96], excludes 0.5. So +"failure = more chaotic" is TASK-GENERAL (Sudoku + Maze) once failure is defined by validity. +LIMITATION (now RESOLVED): trained Maze SATURATES before the first saved ckpt (step_13020 already +97% complete) → only n=18 broken from existing ckpts. FIX DONE: fresh early-save TRM-Maze run +(maze_earlysave_freshTRM, saved every 250 epochs) captured the broken-rich pre-saturation phase; +cheap forward dumps (drift_zH + connectivity, no JVP) on 8 early ckpts give **n=4096, 1835 broken**. +**Pooled: AUC(-latent drift_zH -> connected/complete) = 0.834, bootstrap 95% CI [0.822, 0.846]** +(broken late-drift median 1.06 vs connected 0.56). Per-ckpt AUC rises with training 0.66->0.88 +(mirrors Sudoku's separation-grows-with-training). So 'genuine failure (incomplete) = more chaotic' +is now LARGE-N BULLETPROOF on Maze under validity labeling. Fig: maze_broken_morechaotic.png. +Honest detail: 'more chaotic' is a LATENT-dynamics property (drift_zH AUC 0.834, λ1 AUC 0.86); +the DECODED-answer drift does NOT separate (ans_drift AUC 0.38) — broken paths commit an incomplete +decoded answer while churning internally. Consistent with the FTLE/drift (latent) story. + +## Synthesis for the paper (current honest thesis, corrected) +Genuine failures (incomplete/invalid answers) are MORE CHAOTIC — measurable, task-general (Sudoku; +Maze under validity labeling). Mechanism: the dynamical signal detects answer completeness/convergence +(FTLE reducible to drift+q_halt). On unique-solution tasks completeness=correctness, so it predicts +correctness directly. On multi-solution tasks exact-match mislabels valid alternatives as failures; +use validity labeling. The phenomenon stands; the convergence-detection mechanism is the honest +interpretation, not a refutation. + +## Status: offline T0.1 + Char 1–4 + Maze evolution + task structure DONE. Running: TRM CLV (done), +## HRM CLV (queued on card1), maze-followup queue (continue-train + per-cell, waiting for GPU). +## Remaining: T0.2/T0.3, T1, HRM-Maze (after saturation test). diff --git a/paper/sample_intro.md b/paper/sample_intro.md new file mode 100644 index 0000000..183faa4 --- /dev/null +++ b/paper/sample_intro.md @@ -0,0 +1,49 @@ +# Sample section: Introduction (taste-calibration draft) + +Recursive reasoning models solve constraint-satisfaction problems that defeat much larger +language models by iterating a small network on a latent state — up to several hundred state +updates per puzzle in the Hierarchical Reasoning Model (HRM) and the Tiny Recursive Model +(TRM). When such a model fails, what is dynamically different about the trajectory it +produced? Recent mechanistic studies have answered with attractor language: failed runs +"plateau at stable high-loss attractors" (Efstathiou & Balwani, 2026), or converge to spurious +fixed points that rival the correct one (Ren & Liu, 2026). These accounts rest on indirect +evidence — loss plateaus, two-dimensional projections of 512-dimensional trajectories — and +the two papers do not agree: one describes failure as premature stability, the other partly as +wandering. Neither measures stability itself. + +We measure it directly. For every test puzzle we record two per-example quantities along the +full 16-segment inference trajectory: the finite-time Lyapunov spectrum of the joint latent +dynamics, and the per-segment state displacement. Conditioning these on outcome over 2,048 to +8,192 puzzles per model yields a complete decomposition of failure for HRM (52.6% accuracy) +and an official-recipe TRM (87.6%), and the decomposition contradicts the settled-attractor +picture. Correct trajectories enter a narrow low-velocity band and stay in it; failed +trajectories never do. In TRM, not one of 254 failures settles — the least mobile failure still +moves faster at the end of inference than 96.5% of successes — while remaining locally +expansive (median λ₁ = +0.103 versus +0.012 for successes; AUC 0.993). In HRM, settled-but-wrong +trajectories exist but account for 0.55% of failures; the other 99.45% wander. Failure in these +models is not a wrong attractor. It is the sustained absence of settling. + +Two controls sharpen what the Lyapunov signature adds. Matched for displacement level within +the unsettled population, λ₁ still separates eventual successes from failures (decile-matched +AUC 0.88–0.90), so the exponent is not merely re-measuring non-convergence; and binning by +puzzle givens leaves the separation intact (within-bin AUC 0.982 versus 0.984 overall), so it +is not a difficulty artifact. The signature is, however, strictly retrospective. Restricted to +puzzles still unsolved after four segments, nothing dynamical about those first four segments +predicts which will eventually be solved: AUC ≈ 0.5 in TRM for exponent, displacement, and +halting confidence alike — and in HRM the association inverts, with eventual successes moving +*more* in the early trajectory than eventual failures (AUC 0.69 in the positive direction). +The chaos of failure is concurrent with the outcome, not an omen visible at the start. + +These measurements reframe both the diagnosis and the levers. Because failure is almost never +a stable wrong answer, selection-based inference strategies have a high ceiling — final-step +halting confidence tracks correctness on all but the ~0.5% of failures that settle confidently +— and because the early trajectory carries no dynamical death sentence, compute is better +spent on restarts than on early pruning. We quantify both points, correct the published +attractor labels they depend on, and release the per-example measurement tooling. + +--- +*[Style notes for review, not part of the draft: (1) every paragraph opens with a finding or a +question, none with "In recent years"; (2) the two prior papers are quoted precisely and +credited for what their data shows before the correction is made; (3) hedges appear only where +the claim table concedes (e.g., "almost never", "~0.5%"); (4) the one rhetorical flourish — +"not an omen" — is load-bearing; cut it if it reads as flavor.]* diff --git a/paper/setup_results.md b/paper/setup_results.md new file mode 100644 index 0000000..d6976d5 --- /dev/null +++ b/paper/setup_results.md @@ -0,0 +1,129 @@ +# 2 Setup + +**Models and task.** We study two trained recursive reasoners on Sudoku-Extreme with the +1k×1000-augmentation training set: HRM (27M parameters; checkpoint at step 26,040; 52.6% exact +accuracy on our evaluation samples) and TRM-MLP trained with the official recipe (5M; global +batch size 768; checkpoint at step 58,590, the best of its run; 86.9% on the full test set, +87.6% on our n=2,048 sample). Inference runs a fixed 16-segment unroll; the adaptive-computation +halting signal (q_halt) is recorded at every segment but not applied, so every trajectory is +observed for the full budget. Answers are decoded at segment 16. + +**Per-example measurements.** Along each trajectory we record three families of quantities. +First, the leading k=8 finite-time Lyapunov exponents of the joint latent dynamics: tangent +vectors in the concatenated (z_H, z_L) space are propagated with Jacobian-vector products +through every state update and re-orthonormalized by QR at each sub-step; λ_i is the +time-average of the log diagonal of R over the full trajectory (336 sub-updates for TRM, 64 +for HRM). Second, the per-segment state displacement ‖z^{(t)} − z^{(t−1)}‖ for z_H and z_L +separately ("drift"). Third, q_halt, exact correctness, and token accuracy. Exponent values +are comparable only within an estimator implementation; we replicate the HRM analysis under a +second, earlier implementation (n=8,192) and report its scale separately. + +**The settling criterion.** Late drift — the mean z_H displacement over the final four +segments — is bimodal in log scale for every checkpoint we examine: a narrow low-velocity band +(characteristic residual velocity 0.96 per segment for HRM, 18.5 for TRM, interquartile width +under 10%) separated from a high-velocity mode by one to two orders of magnitude. We call a +trajectory *settled* if its late drift falls in the low band. Thresholds are set by Otsu's +method on the pooled log distribution; every result below is reported with a full percentile +sweep, and the headline TRM result is threshold-free. Settled is a band property, not a fixed +point: both bands have nonzero characteristic velocity. + +**Design.** Crossing the settling criterion with answer correctness yields four cells: +settled-correct (A), settled-wrong (B), unsettled-correct (C), unsettled-wrong (D). The +analysis asks three questions. How is failure distributed over B versus D? What does λ₁ add +beyond the settling split? And when along the trajectory does the discriminative signal exist? + +# 3 Results + +## 3.1 Failure is wandering: the 2×2 decomposition + +At the end of inference, success and failure occupy different dynamical regimes almost without +exception (Table 1). In TRM, 254 of 2,048 puzzles are answered incorrectly and none of them is +settled: the minimum late drift among failures (log₁₀ = 1.66, ≈46 per segment) exceeds the +late drift of 96.5% of successes, so no threshold assignment can place a failure in the +settled band. Failed trajectories also remain locally expansive over the full window (median +λ₁ = +0.103, IQR +0.094 to +0.111) while settled successes sit at the edge of contraction +(+0.011). The same decomposition at a mid-training checkpoint, and across a ten-checkpoint +series, shows the settled-wrong cell empty from 20% of training onward. + +HRM adds the one exception, and it is small. At the strict band threshold, 21 of 3,894 +failures (0.55%; n=8,192) end settled; the replication under the second estimator gives +5 of 971 (0.5%; n=2,048). These settled-wrong trajectories are dynamically indistinguishable +from successes: λ₁ median −0.842 against −0.867 for settled-correct, drift profiles inside the +A band from segment ~4 onward (Figure 2), and final halting confidence identical to successes +(median q_halt +7.47 in both cells, against −9.6 for wandering failures). All 21 crossed the +halting threshold between segments 4 and 9; under adaptive computation each would have stopped +early, confident and wrong. Their token accuracy spans 0.41–0.88, and the three least accurate +are all 17-givens (minimum-clue) puzzles. This cell is the wrong-attractor mode of Ren & Liu +(2026), measured: it exists, it carries exactly the contraction signature their account +predicts, and it is two orders of magnitude less common than wandering. + +The unsettled-correct cell (C) is the mirror curiosity: 3–7% of successes are still moving at +segment 16 (70 of 1,794 in TRM; 57 of 1,077 in HRM), with halting confidence as high as +settled successes. Their existence shows the decode head can read a correct answer off a +moving state; we do not observe what happens to them past the window. + +## 3.2 What the exponent is not measuring + +The λ₁ separation is not a restatement of the settling split. Within the unsettled population, +where every trajectory is still moving, λ₁ ranks eventual successes above failures inside +narrow displacement bands: splitting unsettled HRM trajectories into late-drift deciles (decile +width ≤0.04 log units over most of the range) gives within-decile AUC from 0.97 at low drift +to 0.69 at the highest decile, weighted mean 0.879 (n=8,192); the second estimator gives 0.900 +(n=2,048). A trajectory's expansion rate carries outcome information beyond how fast it is +moving. + +The separation is also not a difficulty artifact, at least not at the resolution of clue +count. Accuracy varies strongly with the number of givens (Spearman +0.28), and λ₁ is itself +difficulty-correlated (−0.35 overall, −0.16/−0.18 within outcome classes), yet conditioning +removes nothing: within givens bins, AUC(−λ₁ → correct) is 0.976–0.987 (weighted 0.982) +against 0.984 unconditioned. Givens count is a coarse proxy — solver backtrack counts would be +the sharper control — but at this resolution the dynamical signature is orthogonal to how hard +the puzzle is. + +## 3.3 When the signal exists: concurrent, with no early warning + +The discriminative power of the dynamics is a property of the realized trajectory, and it is +absent at the start. We re-measured both models over only the first four segments (idx-paired +with the full-window runs, same sampling) and asked whether anything visible by segment 4 +forecasts the final outcome. Unconditioned, early-window λ₁ appears predictive (AUC 0.89 TRM, +0.73 HRM), but the appearance is inherited from puzzles already solved by segment 4 (69% of +TRM successes, 34% of HRM's). Restricted to the decision-relevant population — puzzles not yet +correct at segment 4 — prediction collapses (Table 3). In TRM (n=626, of which 59% eventually +succeed), AUC is 0.543 for early λ₁, 0.492 for early drift, 0.521 for early halting +confidence. In HRM (n=1,342, 28% eventually succeed) the dynamical associations invert: +eventual successes have marginally higher early λ₁ (reverse AUC 0.448) and substantially +higher early displacement (AUC 0.688 in the positive direction). Among undecided HRM +trajectories, the ones still moving vigorously are the ones that go on to solve the puzzle. + +One early signal does carry information, and it is learned, not dynamical: HRM's q_halt at +segment 4 predicts eventual success at AUC 0.734. TRM's does not (0.521); TRM's training +removes HRM's Q-learning continue head in favor of a binary halt loss, a difference we note +without interpreting. Window length is the untested variable here: four segments matches the +deep-supervision horizon, and we have not yet swept longer prefixes. + +## 3.4 Training widens the gap from the failure side + +Over the TRM training series (ten checkpoints, 512 puzzles each), λ₁ of wandering failures +rises monotonically from +0.036 to +0.102 while λ₁ of settled successes stays within ±0.03 of +zero; the settled-wrong cell empties by step 52k and stays empty. The outcome separation grows +over training because the failures become more expansive, while the success regime barely +moves. HRM's series shows a mass migration instead: at early checkpoints nearly all +trajectories are low-drift and wrong (the model barely updates state), this cell drains +through mid-training into high-drift wandering, and accuracy growth then tracks transfer from +wandering into the settled-correct band. + +A preliminary intervention probe is consistent with the decomposition. HRM checkpoints trained +with multi-rollout initial-state perturbation (K=4, log-uniform noise) shrink the wandering- +failure cell at matched steps relative to an ordinary baseline (D: 274→175 at step 20,832 and +247→176 at 23,436, accuracy +0.20 and +0.15), with surviving failures more expansive, and the +known late-run collapse of this variant coincides with the settled band itself destabilizing +(λ₁ of settled successes flipping to +0.04). The comparison baseline differs in training +objective (ACT-streaming versus fixed unroll), so we report this as directional evidence +pending a matched-objective control. + +--- +*[Section pass notes: em-dash count 2 (§3.2, §3.3 one each). Contrast-template count: 0 +(budget spent in title/intro). Flourish count: 1 ("mirror curiosity", §3.1) — cuttable. +Tables referenced: T1 = 2×2 cells both models; T2 = decile + givens AUCs; T3 = early-window +restricted AUCs. All numbers traceable to analysis_2x2/OBSERVATIONS.md (+ addenda) and +offline_followups/followups.md.]* diff --git a/paper/style_contract.md b/paper/style_contract.md new file mode 100644 index 0000000..4e0f319 --- /dev/null +++ b/paper/style_contract.md @@ -0,0 +1,47 @@ +# Style contract (operative checklist for every section pass) + +Sources: Shaib et al. 2026 slop taxonomy (density/relevance/coherence are the load-bearing +dimensions); Buschek's four reviewer complaints; 2026 banned-pattern lists. Applied as a per- +section pass, not a vibe. + +## Hard bans (lexical) +delve, tapestry, landscape (figurative), testament, pivotal, showcase, intricate, vibrant, +seamless, elegant, dramatically, novel (self-describing), comprehensive (self-describing), +notably, crucially, interestingly, importantly, "It is worth noting", "Moreover," as sentence +opener, "In recent years", "has garnered attention", "paradigm shift". +Technical terms that overlap ban lists (e.g., "robust" in "threshold-robust") stay. + +## Punctuation & rhythm +- Em dash: ≤2 per section, never two in one sentence, never as paired parenthetical. +- No exclamation marks. Semicolons fine. +- Every paragraph contains at least one sentence under ~12 words. +- No uniform paragraph shapes: don't end three consecutive paragraphs with a summary clause. + +## Structure +- The contrast template ("X, not Y" / "not X, but Y") has a total budget of ONE device: + the title and its single echo at the end of paragraph 1. Zero elsewhere. Express other + contrasts by stating the positive finding and letting the numbers carry the negation. +- Rule-of-three closers only when the three items are an exhaustive measured list. +- No chronological narration of the investigation. Structure follows claims.md. +- Results topic sentences contain a finding or a number, never a plan ("We then examine..."). + +## Integrity (Buschek's four, mapped to our risks) +- Marketing language: zero unevidenced evaluative adjectives. "near-perfect" only with the AUC + in the same sentence. +- Performative related work: every citation paired with the precise claim it supports; check + each against papers/notes/*.md before the citation lands. +- Misrepresentation: prior-work characterizations quote verbatim with page/section; corrections + target the quoted words only (claims.md C6a discipline). +- Stretched summaries: no interpretive metaphors for results. One flourish per section maximum, + and it must be cuttable without losing content. + +## Density (the actual anti-slop weapon) +- Every sentence must add a number, a definition, a caveat, or a logical step. Otherwise cut. +- Modal verbs (could/might/may) confined to Discussion and future work. +- Hedges appear only where claims.md concedes; one hedge per concession, not a seesaw. + +## Honesty rails (project-specific) +- Never compare λ values across estimator versions; state scale ownership at first use. +- "Settled" always defined as the measured low-velocity band, with its residual velocity given. +- No mechanism claims; observations and their direct logical consequences only. +- No promises (code release, future experiments) that the authors have not decided. diff --git a/paper/validation/validate_le_estimator.py b/paper/validation/validate_le_estimator.py new file mode 100644 index 0000000..7ad4fd0 --- /dev/null +++ b/paper/validation/validate_le_estimator.py @@ -0,0 +1,107 @@ +"""T0.1 — validate the QR/Benettin FTLE estimator core against systems with KNOWN spectra. + +Reimplements the IDENTICAL accumulation used in diagnose_{trm,hrm}_joint.py: + Q in R^{n x k} init random-orthonormal; each step apply the (known) Jacobian to Q's columns; + every t_ons steps QR-decompose, accumulate sum of log|diag(R)|; LE_i = sum / n_qr_steps. + +Test systems (known answers): + (a) diagonal linear map LE_i = log|d_i| (exact at all T) + (b) symmetric linear map LE_i = log|eig_i| (exact; eig=singular values) + (c) non-normal (shear) map LE_i = log|eig_i| asympt. (finite-time transient from singular values) + (d) Henon map (a=1.4,b=0.3) LE = {+0.41922, -1.62319} (nonlinear chaotic; literature value) + +A passing result = recovered exponents match known to within tolerance, confirming the QR core +(orthonormalization cadence, log|diag R| bookkeeping, ordering, averaging) is correct. +No GPU, no model — this isolates the numerical estimator. +""" +from __future__ import annotations +import numpy as np + +RNG = np.random.default_rng(0) + + +def qr_le(jac_fn, x0, n_steps, k, t_ons=1, warmup=0): + """Benettin/QR LE estimate. jac_fn(x)->(x_next, J) gives next state and Jacobian at x. + Mirrors diagnose_*_joint.py: QR every t_ons steps, accumulate log|diag R|, average over QR steps.""" + x = np.asarray(x0, float) + d = x.shape[0] + Q, _ = np.linalg.qr(RNG.standard_normal((d, k))) + log_R_sum = np.zeros(k) + n_qr = 0 + for t in range(n_steps): + x, J = jac_fn(x) + Q = J @ Q + if (t + 1) % t_ons == 0: + Q, R = np.linalg.qr(Q) + if t >= warmup: + log_R_sum += np.log(np.clip(np.abs(np.diag(R)), 1e-30, None)) + n_qr += 1 + return np.sort(log_R_sum / max(n_qr, 1))[::-1] + + +def run(): + out = ["# T0.1 estimator validation (QR/Benettin core vs known spectra)", ""] + tol = 5e-3 + + # (a) diagonal + d_vals = np.array([1.5, 0.8, 0.3, 0.05]) + M = np.diag(d_vals) + known = np.sort(np.log(np.abs(d_vals)))[::-1] + est = qr_le(lambda x: (x, M), np.ones(4), 4000, k=4) # linear: J state-independent, don't grow x + out += [f"(a) diagonal linear: known {np.round(known,4)}", + f" recovered {np.round(est,4)} max|err|={np.max(np.abs(est-known)):.2e} " + f"{'PASS' if np.max(np.abs(est-known)) log|eig| asymptotically; finite-time transient from singular values + N = np.array([[1.1, 5.0], [0.0, 0.6]]) # eigenvalues 1.1, 0.6 (triangular); highly non-normal + known = np.sort(np.log(np.abs(np.linalg.eigvals(N))))[::-1] + est_long = qr_le(lambda x: (x, N), np.ones(2), 40000, k=2) + sv = np.sort(np.log(np.linalg.svd(N, compute_uv=False)))[::-1] + est_short = qr_le(lambda x: (x, N), np.ones(2), 5, k=2) + out += [f"(c) non-normal shear: known asymptotic log|eig| {np.round(known,4)}", + f" recovered (T=40000) {np.round(est_long,4)} " + f"max|err|={np.max(np.abs(est_long-known)):.2e} " + f"{'PASS' if np.max(np.abs(est_long-known))<1e-2 else 'FAIL'}", + f" single-step log singular values {np.round(sv,4)} (finite-time transient ref)", + f" recovered (T=5, finite-time) {np.round(est_short,4)} " + f"(should sit between sv and asymptotic -> confirms finite-time != asymptotic)"] + + # (d) Henon map + a, b = 1.4, 0.3 + def henon(x): + xn = np.array([1 - a * x[0] ** 2 + x[1], b * x[0]]) + J = np.array([[-2 * a * x[0], 1.0], [b, 0.0]]) + return xn, J + # settle onto attractor first + x = np.array([0.1, 0.1]) + for _ in range(1000): + x, _ = henon(x) + known = np.array([0.41922, -1.62319]) # literature (Sprott) + est = qr_le(henon, x, 200000, k=2, warmup=1000) + out += [f"(d) Henon (a=1.4,b=0.3): literature {np.round(known,4)} (sum={known.sum():.4f})", + f" recovered {np.round(est,4)} (sum={est.sum():.4f}) " + f"|err λ1|={abs(est[0]-known[0]):.2e} " + f"{'PASS' if abs(est[0]-known[0])<5e-3 else 'FAIL'}"] + + out += ["", "Interpretation: (a)(b) confirm exact recovery for normal maps; (c) confirms the", + "estimator converges to log|eig| asymptotically while finite-time windows reflect", + "singular-value growth (the regime our paper operates in); (d) confirms correct", + "recovery on a known chaotic nonlinear system. The QR core is validated."] + print("\n".join(out)) + from pathlib import Path + Path(__file__).resolve().parent.joinpath("validation_results.md").write_text("\n".join(out)) + + +if __name__ == "__main__": + run() diff --git a/paper/validation/validation_results.md b/paper/validation/validation_results.md new file mode 100644 index 0000000..008a73a --- /dev/null +++ b/paper/validation/validation_results.md @@ -0,0 +1,17 @@ +# T0.1 estimator validation (QR/Benettin core vs known spectra) + +(a) diagonal linear: known [ 0.4055 -0.2231 -1.204 -2.9957] + recovered [ 0.4047 -0.2236 -1.2032 -2.9953] max|err|=7.99e-04 PASS +(b) symmetric linear: known [-0.1054 -0.6391 -0.9467 -1.4414 -3.074 ] + recovered [-0.1055 -0.6392 -0.9465 -1.4415 -3.0738] max|err|=1.85e-04 PASS +(c) non-normal shear: known asymptotic log|eig| [ 0.0953 -0.5108] + recovered (T=40000) [ 0.0953 -0.5109] max|err|=3.84e-05 PASS + single-step log singular values [ 1.6396 -2.0551] (finite-time transient ref) + recovered (T=5, finite-time) [ 0.5468 -0.9623] (should sit between sv and asymptotic -> confirms finite-time != asymptotic) +(d) Henon (a=1.4,b=0.3): literature [ 0.4192 -1.6232] (sum=-1.2040) + recovered [ 0.4193 -1.6233] (sum=-1.2040) |err λ1|=8.44e-05 PASS + +Interpretation: (a)(b) confirm exact recovery for normal maps; (c) confirms the +estimator converges to log|eig| asymptotically while finite-time windows reflect +singular-value growth (the regime our paper operates in); (d) confirms correct +recovery on a known chaotic nonlinear system. The QR core is validated. \ No newline at end of file diff --git a/papers/txt/engelken2023_gradient_flossing.txt b/papers/txt/engelken2023_gradient_flossing.txt new file mode 100644 index 0000000..6d70608 --- /dev/null +++ b/papers/txt/engelken2023_gradient_flossing.txt @@ -0,0 +1,1879 @@ + Gradient Flossing: Improving Gradient Descent + through Dynamic Control of Jacobians + + + Rainer Engelken + Zuckerman Mind Brain Behavior Institute + Columbia University +arXiv:2312.17306v1 [cs.LG] 28 Dec 2023 + + + + + New York, USA + re2365@columbia.edu + + + + Abstract + Training recurrent neural networks (RNNs) remains a challenge due to the insta- + bility of gradients across long time horizons, which can lead to exploding and + vanishing gradients. Recent research has linked these problems to the values + of Lyapunov exponents for the forward-dynamics, which describe the growth or + shrinkage of infinitesimal perturbations. Here, we propose gradient flossing, a + novel approach to tackling gradient instability by pushing Lyapunov exponents + of the forward dynamics toward zero during learning. We achieve this by regu- + larizing Lyapunov exponents through backpropagation using differentiable linear + algebra. This enables us to "floss" the gradients, stabilizing them and thus improv- + ing network training. We demonstrate that gradient flossing controls not only the + gradient norm but also the condition number of the long-term Jacobian, facilitating + multidimensional error feedback propagation. We find that applying gradient + flossing prior to training enhances both the success rate and convergence speed for + tasks involving long time horizons. For challenging tasks, we show that gradient + flossing during training can further increase the time horizon that can be bridged + by backpropagation through time. Moreover, we demonstrate the effectiveness of + our approach on various RNN architectures and tasks of variable temporal com- + plexity. Additionally, we provide a simple implementation of our gradient flossing + algorithm that can be used in practice. Our results indicate that gradient flossing + via regularizing Lyapunov exponents can significantly enhance the effectiveness of + RNN training and mitigate the exploding and vanishing gradients problem. + + + 1 Introduction + Recurrent neural networks are commonly used both in machine learning and computational neu- + roscience for tasks that involve input-to-output mappings over sequences and dynamic trajectories. + Training is often achieved through gradient descent by the backpropagation of error information + across time steps [1, 2, 3, 4]. This amounts to unrolling the network dynamics in time and recursively + applying the chain rule to calculate the gradient of the loss with respect to the network parameters. + Mathematically, evaluating the product of Jacobians of the recurrent state update describes how error + signals travel across time steps. When trained on tasks that have long-range temporal dependencies, + recurrent neural networks are prone to exploding and vanishing gradients [5, 6, 7, 8]. These arise from + the exponential amplification or attenuation of recursive derivatives of recurrent network states over + many time steps. Intuitively, to evaluate how an output error depends on a small parameter change at + a much earlier point in time, the error information has to be propagated through the recurrent network + states iteratively. Mathematically, this corresponds to a product of Jacobians that describe how + changes in one recurrent network state depend on changes in the previous network state. Together, + this product forms the long-term Jacobian. The singular value spectrum of the long-term Jacobian reg- + + 37th Conference on Neural Information Processing Systems (NeurIPS 2023). + ulates how well error signals can propagate backwards along multiple time steps, allowing temporal +credit assignment. A close mathematical correspondence of these singular values and the Lyapunov +exponents of the forward dynamics was established recently [9, 10, 11, 12]. Lyapunov exponents +characterize the asymptotic average rate of exponential divergence or convergence of nearby initial +conditions and are a cornerstone of dynamical systems theory [13, 14]. We will use this link to +improve the trainability of RNNs. +Previous approaches that tackled the problem of exploding or vanishing gradients have suggested +solutions at different levels. First, specialized units such as LSTM and GRU were introduced, +which have additional latent variables that can be decoupled from the recurrent network states via +multiplicative (gating) interactions. The gating interactions shield the latent memory state, which can +therefore transport information across multiple time steps [5, 6, 15]. Second, exploding gradients can +be avoided by gradient clipping, which re-scales the gradient norm [16] or their individual elements +[17] if they become too large [18]. Third, normalization schemes like batch normalization prevent +saturated nonlinearities that contribute to vanishing gradients [19]. Third, it was suggested that the +problem of exploding/vanishing gradients can be ameliorated by specialized network architectures, +for example, antisymmetric networks [20], orthogonal/unitary initializations [21, 22, 23], coupled +oscillatory RNNs [24], Lipschitz RNNs [25], linear recurrent units [26], echo state networks [27, 28], +(recurrent) highway networks [29, 30], and stable limit cycle neural networks [11, 31, 32]. Fourth, for +large networks, a suitable choice of weights can guarantee a well-conditioned Jacobian at initialization +[21, 33, 34, 35, 36, 37, 38, 39, 40, 41]. These initializations are based on mean-field methods, which +become exact only in the large-network limit. Such initialization schemes have also been suggested +for gated networks [40]. However, even when initializing the network with well-behaved gradients, +gradients will typically not retain their stability during training once the network parameters have +changed. +Here, we propose a novel approach to tackling this challenge by introducing gradient flossing, a +technique that keeps gradients well-behaved throughout training. Gradient flossing is based on +a recently described link between the gradients of backpropagation through time and Lyapunov +exponents, which are the time-averaged logarithms of the singular values of the long-term Jacobian +[9, 11, 12, 32]. Gradient flossing regularizes one or several Lyapunov exponents to keep them close +to zero during training. This improves not only the error gradient norm but also the condition number +of the long-term Jacobian. As a result, error signals can be propagated back over longer time horizons. +We first demonstrate that the Lyapunov exponents can be controlled during training by including an +additional loss term. We then demonstrate that gradient flossing improves the gradient norm and +effective dimension of the gradient signal. We find empirically that gradient flossing improves test +accuracy and convergence speed on synthetic tasks over a range of temporal complexities. Finally, +we find that gradient flossing during training further helps to bridge long-time horizons and show +that it combines well with other approaches to ameliorate exploding and vanishing gradients, such as +dynamic mean-field theory for initialization, orthogonal initialization and gated units. +Our contributions include: + + • Gradient flossing, a novel approach to the problem of exploding and vanishing gradients in + recurrent neural networks based on regularization of Lyapunov exponents. + + • Analytical estimates of the condition number of the long-term Jacobian based on Lyapunov + exponents. + + • Empirical evidence that gradient flossing improves training on tasks that involve bridging + long time horizons. + + +2 RNN Gradients and Lyapunov Exponents + +We begin by revisiting the established mathematical relationship between the gradients of the loss +function, computed via backpropagation through time, and Lyapunov exponents [9, 12], and how +it relates to the problem of vanishing and exploding gradients. In backpropagation through time, +network parameters θ are iteratively updated by stochastic gradient descent such that a loss Lt is +locally reduced [1, 2, 3, 4]. For RNN dynamics hs+1 = fθ (hs , xs+1 ), with recurrent network state +h, external input x, and parameters θ, the gradient of the loss Lt with respect to θ is evaluated by + + + 2 + unrolling the network dynamics in time. The resulting expression for the gradient is given by: + τ =t−1 t−1 + ! + ∂Lt ∂Lt X Y ∂hτ ′ +1 ∂hτ ∂Lt X ∂hτ + = = Tt (hτ ) (1) + ∂θ ∂ht ′ + ∂hτ ′ ∂θ ∂ht τ ∂θ + τ =t−l τ =τ + +where Tt (hτ ) is composed of a product of one-step Jacobians Ds = ∂hs+1 + ∂hs : + t−1 t−1 + Y ∂hτ ′ +1 Y + Tt (hτ ) = = Dτ ′ (2) + ∂hτ ′ + τ ′ =τ ′ τ =τ + +Due to the chain of matrix multiplications in Tt , the gradients tend to vanish or explode exponentially +with time. This complicates training particularly when the task loss at time t dependents on inputs x +or states h from many time steps prior which creates long temporal dependencies [5, 6, 7, 8]. How +well error signals can propagate back in time is constrained by the tangent space dynamics along +trajectory ht , which dictate how local perturbations around each point on the trajectory stretch, rotate, +shear, or compress as the system evolves. +The singular values of the Jacobian’s product Tt , which determine how quickly gradients vanish or +explode during backpropagation through time, are directly related to the Lyapunov exponents of the +forward dynamics [9, 12]: Lyapunov exponents λ1 ≥ λ2 · · · ≥ λN are defined as the asymptotic +time-averaged logarithms of the singular values of the long-term Jacobian [13, 42, 43] + 1 + λi = lim log(σi,t ) (3) + t→∞ t − τ + +where σi,t denotes the ith singular value of Tt (hτ ) with σ1,t ≥ σ2,t . . . σN,t (See Appendix I +for details). This means that positive Lyapunov exponents in the forward dynamics correspond to +exponentially exploding gradient modes, while negative Lyapunov exponents in the forward dynamics +correspond to exponentially vanishing gradient modes. +In summary, the Lyapunov exponents give the average asymptotic exponential growth rates of +infinitesimal perturbations in the tangent space of the forward dynamics, which also constrain the +signal propagation in backpropagation for long time horizons. Lyapunov exponents close to zero in +the forward dynamics correspond to tangent space directions along which error signals are neither +drastically attenuated nor amplified in backpropagation through time. Such close-to-neutral modes in +the tangent dynamics can propagate information reliably across many time steps. + +3 Gradient Flossing: Idea and Algorithm +We now leverage the mathematical connection established between Lyapunov exponents and the +prevalent issue of exploding and vanishing gradients for regularizing the singular values of the +long-term Jacobian. We term this procedure gradient flossing. To prevent exploding and vanishing +gradients, we constrain Lyapunov exponents to be close to zero. This ensures that the corresponding +directions in tangent space grow and shrink on average only slowly. This leads to a better-conditioned +long-term Jacobian Tt (hτ ). We achieve this by using the sum of the squares of the first k largest +Lyapunov exponent λ1 , λ2 . . . λk as a loss function: + k + X + Lflossing = λ2i (4) + i=1 + +and evaluate the gradient obtained from backpropagation through time: + k + ∂Lflossing X ∂λ2i + = (5) + ∂θ i=1 + ∂θ + +This might seem like an ill-fated enterprise, as the gradient expression in Eq 5 suffers from its +own problem of exploding and vanishing gradients. However, instead of calculating the Lyapunov +exponents by directly evaluating the long-term Jacobian Tt (Eq 2), we use an established iterative +reorthonormalization method involving QR decomposition that avoids directly evaluating the ill- +conditioned long-term Jacobian [12, 44]. + + + 3 + First, we evolve an initially orthonormal system Qs = [q1s , q2s , . . . qks ] in the tangent space along the +trajectory using the Jacobian Ds = ∂h s+1 + ∂hs . This means to calculate + + Q + e s+1 = Ds Qs (6) +at every time-step. Second, we extract the exponential growth rates using the QR decomposition, + Qe s+1 = Qs+1 Rs+1 , +which decomposes Q e s+1 uniquely into the product of an orthonormal matrix Qs+1 of size N × k + ⊤ +so Qs+1 Qs+1 = 1k×k and an upper triangular matrix Rs+1 of size k × k with positive diagonal +elements. Note that the QR decomposition does not have to be applied at every step, just sufficiently +often, i.e., once every tONS such that Q + e does not become ill-conditioned. +The Lyapunov exponents are given by time-averaged logarithms of the diagonal entries of Rs [43, 44]: + t t + 1 Y 1X + λi = lim log Rsii = lim log Rsii . (7) + t→∞ t t→∞ t + s=1 s=1 +This way, the Lyapunov exponent can be expressed in terms of a temporal average over the diagonal +elements of the Rs -matrix of a QR decomposition of the iterated Jacobian. To propagate the gradient +of the square of the Lyapunov exponents backward through time in gradient flossing, we used an +analytical expression for the pullback of the QR decomposition [45]: The backward pass of the QR +decomposition is given by [45, 46, 47, 48] + Q = Q + Q copyltu(M) R−T , +   + (8) + T T +where M = RR − Q Q and the copyltu function generates a symmetric matrix by copying +the lower triangle of the input matrix to its upper triangle, with the element [copyltu(M )]ij = +Mmax(i,j),min(i,j) [45, 46, 47, 48]. We denote here adjoint variable as T = ∂L/∂T . A simple +implementation of this algorithm in pseudocode is: +Algorithm 1 Algorithm for gradient flossing of k tangent space directions + initialize h, Q + for e = 1 → E do + for t = 1 → T do + h ← fθ (h, x) + dht + D ← dh t−1 + Q←D·Q + if t ≡ 0 (mod tONS ) then + Q, R ← qr(Q) + γi += log(Rii ) + end if + end for + λi = γi /T + ∂Lflossing + θe+1 ← θe − η ∂θ + end for +For clarity, we described gradient flossing in terms of stochastic gradient descent, but we actually +implemented it with the ADAM optimizer using standard hyperparameters η, β1 and β2 . An example +implementation is available here. Note that this algorithm also works for different recurrent network +architectures. In this case, the Jacobians D has size n × n, where n is the number of dynamic +variables of the recurrent network model. For example, in case of a single recurrent network of +N LSTM units, the Jacobian has size 2N × 2N [9, 12, 41]. The Jacobian matrix D can either be +calculated analytically or it can be obtained via automatic differentiation. + +4 Gradient Flossing: Control of Lyapunov Exponents +In Fig 1, we demonstrate that gradient flossing can set one or several Lyapunov exponents to a +target value via gradient descent with the ADAM optimizer in random Vanilla RNNs initialized with +different weight variances. The N units of the recurrent neural network follow the dynamics + hs+1 = f (hs , xs+1 ) = Wϕ(hs ) + Vxs+1 . (9) + + + 4 + C + t−1 + ∂hτ ′ +1 10 1 + number of flossed i + Y + Tt (hτ ) = k = 32 + ∂hτ ′ k = 16 + τ ′ =τ + 10 3 k=1 + + + + + 2 + i + i=1 + k + 1 + k + 10 5 + + + 0 20 40 60 80 100 + Epochs + B D + 0.0 0 + 0.5 2 +1(1/ ) + + + + + i(1/ ) + 1.0 4 number of flossed i + k = 32 + 1.5 target 1 6 k = 16 + actual 1 k=1 + 2.0 + 0 10 20 30 40 50 60 70 80 0.0 0.2 0.4 0.6 0.8 1.0 + Epochs i/N +Figure 1: Gradient flossing controls Lyapunov exponents and gradient signal propagation +A) Exploding and vanishing gradients in backpropagation through time arise from amplifica- + Qt−1 ∂h ′ +tion/attenuation of product of Jacobians that form the long-term Jacobian Tt (hτ ) = τ ′ =τ ∂hτ +1 . + τ′ +B) First Lyapunov exponent of Vanilla RNN as a function of training epochs. Minimizing the +mean squared error between estimated first Lyapunov exponent and target Lyapunov exponent +λ1 = −1, −0.5, 0 by gradient descent. 10 Vanilla RNNs were initialized with Gaussian recurrent +weights Wij ∼ N (0, g 2 /N ) where values of g were drawn g ∼ Unif(0, 1). C) Gradient flossing +minimizes the square of Lyapunov exponents over epochs. D) Full Lyapunov spectrum of Vanilla +RNN after a different number of Lyapunov exponents are pushed to zero via gradient flossing. Note, +the variability of the Lyapunov exponents that were not flossed. Parameters: network size N = 32 +with 10 network realizations. Error bars in C indicate the 25% and 75% percentiles and solid line +shows median. + + +The initial entries of W are drawn independently from a Gaussian distribution with zero mean and +variance g 2 /N , where g is a gain parameter that controls the heterogeneity of weights. We here use +the transfer function ϕ(x) = tanh(x). (See appendix B for gradient flossing with ReLU and LSTM +units). xs is a sequence of inputs and V is the input weight. xs is a stream of i.i.d. Gaussian input +xs ∼ N (0, 1) and the input weights V are N (0, 1). Both W and V are trained during gradient +flossing. +In Fig 1B, we show that for randomly initialized RNNs, the Lyapunov exponent can be modified by +gradient flossing to match a desired target value. The networks were initialized with 10 different values +of initial weight strength g chosen uniformly between 0 and 1. During gradient flossing, they quickly +approached three different target values of the first Lyapunov exponents λtarget 1 = {−1, −0.5, 0} +within less than 100 training epochs with batch size B = 1. We note that gradient flossing with +positive target λtarget + 1 seems not to arrive at a positive Lyapunov exponent λ1 . +Fig 1C shows gradient flossing for different numbers of Lyapunov exponents k. Here, during gradient- +descent, the sum of the squares of 1, 16, or 32 Lyapunov exponents is used as loss in gradient flossing +(see Fig 1A). Fig 1D shows the Lyapunov spectrum after flossing, which now has 1, 16, or 32 +Lyapunov exponents close to zero. We conclude that gradient flossing can selectively manipulate one, +several, or all Lyapunov exponents before or during network training. Gradient flossing also works for +RNNs of ReLU and LSTM units (See appendix B. Further, we find that the computational bottleneck  +of gradient flossing is the QR decomposition, which has a computational complexity of O N k 2 , +both in the forward pass and in the backward pass. Thus, gradient flossing of the entire Lyapunov +spectrum is computationally expensive. However, as we will show, not all Lyapunov exponents need +to be flossed and only short episodes of gradient flossing are sufficient for significantly improving the +training performance. + + + 5 + 5 Gradient Flossing: Condition Number of the Long-Term Jacobian + A B C +condition number 2(Tt( )) + 1034 1026 initial + after flossing 1030 + 1025 theory + 1019 simulations + + + + + 2 (theory) + 2(Tt( )) + 1020 + 1016 1012 k + initial 1010 5 + 107 after flossing 105 10 + theory 15 + simulations 100 + 0 100 200 300 5 10 15 100 1010 1020 1030 + time horizon t tangent space dimension m 2 (numerical) +Figure 2: Gradient flossing reduces condition number of the long-term Jacobian A) Condition +number κ2 of long-term Jacobian Tt (hτ ) as a function of time horizon t − τ at initialization (blue) +and after gradient flossing (orange). Direct numerical simulations are done with arbitrary precision +floating point arithmetic (transparent lines) with 256 bits per float, asymptotic theory based on +Lyapunov exponents (dashed lines) (Eq 10). B) Condition number for different number of tangent +space dimensions m. Simulations (dots) and Lyapunov exponent based theory (dashed lines) at +initialization (blue) and after gradient flossing (orange). Gradient flossing increases the number of +tangent space dimensions available for backpropagation for a given condition number (Grey dotted +line as a guide for eye for κ2 = 105 .) First 15 Lyapunov exponents were flossed. C) Comparison of +condition number obtained via direct numerical simulations vs. Lyapunov exponent-based. Colors +denote the number of flossed Lyapunov exponents k. Parameters: g = 1, batch size b = 1, N = 80, +epochs = 500, T = 500, gradient flossing for Ef = 500 epochs. Input xs identical to delayed XOR +task in Fig 3D. + +A well-conditioned Jacobian is essential for efficient and fast learning [21, 49, 50]. Gradient +flossing improves the condition number of the long-term Jacobian which constrains the error signal +propagation across long time horizons in backpropagation (Fig 2). The condition number κ2 of a +linear map A measures how close the map is to being singular and is given by the ratio of the largest +singular value σmax and the smallest singular values σmin , so κ2 (A) = σσmax (A) + min (A) + . According to the + p +rule of thumb given in [51], if κ2 (A) = 10 , one can anticipate losing at least p digits of precision +when solving the equation Ax = b. Note that the long-term Jacobian Tt is composed of a product of +Jacobians, which generically makes it ill-conditioned. To nevertheless quantify the condition number +numerically, we use arbitrary-precision arithmetic with 256 bits per float. We find numerically that +the condition number of Tt exponentially diverges with the number of time steps (Fig 2A). We +compare the numerically measured condition number κ2 with an asymptotic approximation of the +condition number based on Lyapunov exponents that are calculated in the forward pass and find a +good match (Fig 2A). +Our theoretical estimate of the condition number κ2 of an orthonormal system Q of size N × m that +is temporally evolved by the long-term Jacobian Tt is: + + e t+τ ) = κ2 Tt (hτ )Qt = σ1 (Tt (hτ )) ≈ exp ((λ1 − λm )(t − τ )) . +  + κ2 (Q (10) + σm (Tt (hτ )) +where σ1 (Tt (hτ )) and σm (Tt (hτ )) are the first and mth singular value of the long-term Jacobian. +We note that this theoretical estimate of the condition number follows from the asymptotic definition +of Lyapunov exponents and should be exact in the limit of long times. We find that gradient flossing +reduces the condition number by a factor whose magnitude increases exponentially with time (orange +in Fig 2A). Thus, we can expect that gradient flossing has a stronger effect on problems with a long +time horizon to bridge. We will later confirm this numerically. +Moreover, Lyapunov exponents enable the estimation of the number of gradient dimensions available +for the backpropagation of error signals. Generally, the long-term Jacobian is ill-conditioned, however, +the Lyapunov spectrum provides for a given number of tangent space dimensions an estimate of the +condition number. This indicates how close to singular the gradient signal for a given number of +tangent space dimensions is. Given a fixed acceptable condition number—determined, for example, +by noise level or floating-point precision—we observe that gradient flossing increases the number of +usable tangent space dimensions for backpropagation (Fig 2B). + + + 6 + Finally, we show that the asymptotic estimate of the condition number based on Lyapunov exponents +can even predict differences in condition number that originate from finite network size N (Fig 2C). +We emphasize that this goes beyond mean-field methods, which become exact only in the large- +network limit N → ∞ and usually do not capture finite-size effects [52] (see appendix G). + +6 Initial Gradient Flossing Improves Trainability + A delayed copy task B delayed XOR task + 10 2 10 2 +test loss + + + + + test loss + no gradient flossing no gradient flossing + 10 3 gradient flossing gradient flossing + 10 3 + 10 4 + 0 2000 4000 6000 8000 10000 0 2000 4000 6000 8000 10000 + Epochs Epochs + C delayed copy task D delayed XOR task + 10 2 10 2 + + 10 3 10 3 +test loss + + + + + test loss + + 10 4 + 10 4 + + 20 40 60 80 10 20 30 40 50 60 70 80 + task difficulty (delay d) task difficulty (delay d) +Figure 3: Gradient flossing improves trainability on tasks that involve long time horizons A) Test +error for Vanilla RNNs trained on delayed copy task yt = xt−d for d = 40 with and without gradient +flossing flossing. Solid lines are medians across 5 network realizations. B) Same as A for delayed +XOR task with yt = |xt−d/2 − xt−d |. C) Mean final test loss as a function of task difficulty (delay d) +for delayed copy task. D) Mean final test loss as a function of task difficulty (delay d) for delayed +XOR task. Parameters: g = 1, batch size b = 16, N = 80, epochs = 104 , T = 300, gradient flossing +for Ef = 500 epochs on k = 75 before training. Shaded regions in C and D indicate the 20% and +80% percentiles and solid line shows mean. Dots are individual runs. Task loss: MSE(y, ŷ). + +We next present numerical results on two tasks with variable spatial and temporal complexity, +demonstrating that gradient flossing before training improves the trainability of Vanilla RNNs. We +call gradient flossing before training in the following preflossing. For preflossing, we first initialize the + Pk +network randomly, then minimize Lflossing = i=1 λ2i using the ADAM optimizer and subsequently +train on the tasks. We deliberately do not use sequential MNIST or similar toy tasks commonly used +to probe exploding/vanishing gradients, because we want a task where the structure of long-range +dependencies in the data is transparent and can be varied as desired. +First, we consider the delayed copy task, where a scalar stream of random input numbers x must be +reproduced by the output y delayed by d time steps, i.e. yt = xt−d . Although the task itself is trivial +and can be solved even by a linear network through a delay line (see appendix E), RNNs encounter +vanishing gradients for large delays d during training even with ’critical’ initialization with g = 1. +Our experiments show that gradient flossing can substantially improve the performance of RNNs +on this task (Fig 3A, C). While Vanilla RNNs without gradient flossing fail to train reliably beyond +d = 20, Vanilla RNNs with gradient flossing can be reliably trained for d = 40 (Fig 3C). Note that +we flossed here k = 40 Lyapunov exponents before training. We will later investigate the role of the +number of flossed Lyapunov exponents. +Second, we consider the temporal XOR task, which requires the RNN to perform a nonlinear input- +output computation on a sequential stream of scalar inputs, i.e., yt = |xt−d/2 − xt−d |, where d +denotes a time delay of d time steps (For details see appendix H). Fig 3D demonstrates that gradient +flossing helps to train networks on a substantially longer delay d. We found similar improvements +through gradient flossing for RNNs initialized with orthogonal weights (see appendix G). + + + 7 + 7 Gradient Flossing During Training + A delayed temporal XOR B delayed spatial XOR + 100 flossing during training 100 +test accuracy (%) 90 preflossing 90 + + + + + test accuracy (%) + no flossing + 80 80 + 70 70 + 60 60 + 50 50 + 0 2000 4000 6000 8000 10000 0 2000 4000 6000 8000 10000 + Epochs Epochs + C D + 100 100 + 90 90 +test accuracy (%) + + + + + test accuracy (%) + 80 80 flossing during training + preflossing + 70 70 no flossing + 60 60 + 50 50 + 10 20 + 30 40 50 60 70 10 20 30 40 50 60 70 + complexity (delay d) complexity (delay d) +Figure 4: Gradient flossing during training further improves trainability +A) Test accuracy for Vanilla RNNs trained on delayed temporal binary XOR task yt = xt−d/2 ⊕ xt−d +with gradient flossing during training (green), preflossing (gradient flossing before training) (orange), +and with no gradient flossing (blue) for d = 70. Solid lines are mean across 20 network realizations, +individual network realizations shown in transparent fine lines. B) Same as A for delayed spatial +XOR task with yt = x1t−d ⊕ x2t−d ⊕ x3t−d . Parameters (g = 1, batch size b = 16). C) Test accuracy +as a function of task difficulty (delay d) for delayed temporal XOR task. D) Test accuracy as a +function of task difficulty (delay d) for delayed spatial XOR task. Parameters: g = 1, batch size +b = 16, N = 80, epochs = 104 , T = 300, gradient flossing for Ef = 500 epochs on k = 75 before +training and during training for green lines, and only before training for orange lines. Same plotting +conventions as previous figure. Task loss: cross-entropy between y and ŷ. +We next investigate the effects of gradient flossing during the training and find that gradient flossing +during training can further improve trainability. We trained RNNs on two more challenging tasks +with variable temporal complexity and performed gradient flossing either both during and before +training, only before training, or not at all. +Fig 4A shows the test accuracy for Vanilla RNNs training on the delayed temporal XOR task +yt = xt−d/2 ⊕ xt−d with random Bernoulli process x ∈ {0, 1}. The accuracy of Vanilla RNNs +falls to chance level for d ≥ 40 (Fig 4C). With gradient flossing before training, the trainability +can be improved, but still goes to chance level for d = 70. In contrast, for networks with gradient +flossing during training, the accuracy is improved to > 80% at d = 70. In this case, we preflossed +for 500 epochs before task training and again after 500 epochs of training on the task. In Fig 4B, +D the networks have to perform the nonlinear XOR operation yt = x1t−d ⊕ x2t−d ⊕ x3t−d on a +three-dimensional binary input signal x1 , x2 , and x3 and generate the correct output with a delay of +d steps. While the solution of the task itself is not difficult and could even be implemented by hand +(see appendix), the task is challenging for backpropagation through time because nonlinear temporal +associations bridging long time horizons have to be formed. Again, we observe that gradient flossing +before training improves the performance compared to baseline, but starts failing for long delays +d > 60. In contrast, networks that are also flossed during training can solve even more difficult tasks +(Fig 4D). We find that after gradient flossing, the norm of the error gradient with respect to initial +conditions h0 is amplified (appendix C). Interestingly, gradient flossing can also be detrimental to +task performance if it is continued throughout all training epochs (appendix C) +We note that merely regularizing the spectral radius of the recurrent weight matrix W or the individual +one-step Jacobians Ds numerically or based on mean-field theory does not yield such a training +improvement. This suggests that taking the temporal correlations between Jacobians Ds into account +is important for improving trainability. + + + 8 + 7.1 Gradient Flossing for Different Numbers of Flossed Lyapunov Exponents + +We investigated how many Lyapunov exponents k have to be flossed to achieve an improvement in +training success (Fig 5). We studied this in the binary temporal delayed XOR task with gradient +flossing during training (same as Fig 3) and varied the task difficulty by changing the delay d. +We found that as the task becomes more difficult, networks where not enough Lyapunov exponents +k are flossed begin to fall below 100% test accuracy (Fig 5A). Correspondingly, when measuring +final test accuracy as a function of the number of flossed Lyapunov exponents, we observed that +more Lyapunov exponent k have to be flossed to achieve 100% accuracy as the tasks become more +difficult (Fig 5B). We also show the entire parameter plane of median test accuracy as a function of +both number of flossed Lyapunov exponents k and task difficulty (delay d), and found the same trend +(Fig 5B). Overall, we found that tasks with larger delay d require more Lyapunov exponents close to +zero. We note that this might also partially be caused by the ’streaming’ nature of the task: in our +tasks, longer delays automatically imply that more values have to be stored as at any moment all the +values in the ’delay line’ have to be remembered to successfully solve the tasks. This is different from +tasks where a single variable has to be stored and recalled after a long delay. It would be interesting +to study tasks where the number of delay steps and the number of items in memory can be varied +independently. +Finally, we did the same analysis on networks with only preflossing (gradient flossing before training) +and found the same trend (supplement Fig 7D), however, in that case even if all N Lyapunov +exponents were flossed, thus k = N , they were not able to solve the most difficult tasks. This seems +to indicate that gradient flossing during training cannot be replaced by just gradient flossing more +Lyapunov exponents before training. + + + + +Figure 5: Gradient flossing for different numbers of flossed Lyapunov exponents +A) Test accuracy for delayed temporal XOR task as a function of delay d with different numbers +flossed Lyapunov exponents k. B) Same data as A but here test accuracy as a function of number +of flossed Lyapunov exponents k. Parameters: g = 1, batch size b = 16, N = 80, epochs = 104 +for delayed temporal XOR, epochs = 5000 for delayed spatial XOR, T = 300, gradient flossing +for Ef = 500 epochs before training and during training for A, B. Shaded areas are 25% and 75% +percentile, solid lines are means, transparent dots are individual simulations, task loss: cross-entropy +between y and ŷ. +8 Limitations +The mathematical connection between Lyapunov exponents and backpropagation through time +exploited in gradient flossing is rigorously established only in the infinite-time limit. It would be +interesting to extend our analysis to finite-time Lyapunov exponents. +Furthermore, the backpropagation through time gradient involves a sum over products of Jacobians +of different time periods t − τ , but the Lyapunov exponent only considers the asymptotic longest +product. Additionally, Lyapunov exponents characterize the asymptotic dynamics on the attractor of +the dynamics, whereas RNNs often exploit transient dynamics from some initial conditions outside +or towards the attractor. +Although our proposed method focuses on exploiting Lyapunov exponents, it neglects the geometry +of covariant Lyapunov vectors [53], which could be used to improve training performance, speed, +and reliability. Additionally, it is important to investigate how sensitive the method is to the choice +of orthonormal basis employed because it is only guaranteed to become unique asymptotically [54]. + + + 9 + Finally, the computational cost of our method scales with O(N k 2 ), where N is the network size +and k is the number of Lyapunov exponents calculated. To reduce the computational cost, we +suggest doing QR decomposition only sufficiently often to ensure that the orthonormal system is +not ill-conditioned and using gradient flossing only intermittently or as pretraining. One could also +calculate the Lyapunov spectrum for a shorter time interval or use a cheaper proxy for the Lyapunov +spectrum and investigate more efficient gradient flossing schedules. + + +9 Discussion + +We tackle the problem of gradient signal propagation in recurrent neural networks through a dy- +namical systems lens. We introduce a novel method called gradient flossing that addresses the +problem of gradient instability during training. Our approach enhances gradient signal stability both +before and during training by regularizing Lyapunov exponents. By keeping the long-term Jacobian +well-conditioned, gradient flossing optimizes both training accuracy and speed. To achieve this, +we combine established dynamical systems methods for calculating Lyapunov exponents with an +analytical pullback of the QR factorization. This allows us to establish and maintain gradient stability +in a in a manner that is memory-efficient, numerically stable, and exact across long time horizons. +Our method is applicable to arbitrary RNN architectures, nonlinearities, and also neural ODEs [55]. +Empirically, pre-training with gradient flossing enhances both training speed and accuracy. For +difficult temporal credit assignment problems, gradient flossing throughout training further enhances +signal propagation. We also demonstrate the versatility of our method on a set of synthetic tasks +with controllable time-complexity and show that it can be combined with other approaches to tackle +exploding and vanishing gradients, such as dynamic mean-field theory for initialization, orthogonal +initialization and specialized single units, such as LSTMs. +Prior research on exploding and vanishing gradients mainly focused on selecting network architectures +that are less prone to exploding/vanishing gradients or finding parameter initializations that provide +well-conditioned gradients at least at the beginning of training. Our introduced gradient flossing can +be seen as a complementary approach that can further enhance gradient stability throughout training. +Compared to the work on picking good parameter initializations based on random matrix theory [41] +and mean-field heuristics [40], gradient flossing provides several improvements: First, mean-field +theory only considers the gradient flow at initialization, while gradient flossing can maintain gradient +flow and well-conditioned Jacobians throughout the training process. Second, random matrix theory +and mean-field heuristics are usually confined to the limit of large networks [52], while gradient +flossing can be used for networks of any size. The link between Lyapunov exponents and the gradients +of backpropagation through time has been described previously [9, 12] and has been spelled out +analytically and studied numerically [10, 11, 56, 57, 58]. In contrast, we use Lyapunov exponents +here not only as a diagnostic tool for gradient stability but also to show that they can directly be part +of the cure for exploding and vanishing gradients. +Future investigations could delve further into the roles of the second to N th Lyapunov exponents +in trainability, and how it is related to the task at hand, the rank of the parameter update, the dimen- +sionality of the solution space, as well as the network dynamics (see also [32, 59, 60]). Our results +suggest a trade-off between trainability across long time horizons and the nonlinear task demands +that is worth exploring in more detail (appendix C). Applying gradient flossing to real-time recurrent +learning and its biologically plausible variants is another avenue [61]. Extending gradient flossing to +feedforward networks, state-space models and transformers is a promising avenue for future research +(see also [62, 63]). While Lyapunov exponents are only strictly defined for dynamical systems, such +as maps or flows that are endomorphisms, the long-term Jacobian of deep feedforward networks +could be treated similarly. This could also provide a link between the stability of the network against +adversarial examples and its dynamic stability, as measured by Lyapunov exponents. Given that +time-varying input can suppress chaos in recurrent networks [9, 12, 64, 65, 66, 67], we anticipate they +may exacerbate vanishing gradients. Gradient flossing could also be applied in neural architecture +search, to identify and optimize trainable networks. Finally, gradient flossing is applicable to other +model parameters, as well. For instance, gradients of Lyapunov exponents with respect to single- +unit parameters could optimize the activation function and single-neuron biophysics in biologically +plausible neuron models. + + + + 10 + Acknowledgments and Disclosure of Funding +I thank E. Izhikevich, F. Wolf, S. Goedeke, J. Lindner, L.F. Abbott, L. Logiaco, M. Schottdorf, G. +Wayne and P. Sokol for fruitful discussions and J. Stone, L.F. Abbott, M. Ding, O. Marshall, S. +Goedeke, S. Lippl, M.P. Puelma-Touzel, J. Lindner and the reviewers for feedback on the manuscript. +I thank Jinguo Liu for the Julia package BackwardsLinalg.jl. Research supported by NSF NeuroNex +Award (DBI-1707398), the Gatsby Charitable Foundation (GAT3708), the Simons Collaboration for +the Global Brain (542939SPI), and the Swartz Foundation (2021-6). + +References + [1] P. WERBOS. Beyond Regression :. Ph. D. dissertation, Harvard University, 1974. + [2] DB Parker. Learning-logic (TR-47). Center for Computational Research in Economics and Management + Science. MIT-Press, Cambridge, Mass, 8, 1985. + [3] Y. LECUN. Une procedure d’apprentissage ponr reseau a seuil asymetrique. Proceedings of Cognitiva 85, + pages 599–604, 1985. + [4] David E. Rumelhart, Geoffrey E. Hinton, and Ronald J. Williams. Learning representations by back- + propagating errors. Nature, 323(6088):533, October 1986. + [5] Sepp Hochreiter. Untersuchungen zu dynamischen neuronalen Netzen. PhD thesis, 1991. + [6] Sepp Hochreiter and Jürgen Schmidhuber. Long Short-Term Memory. Neural Computation, 9(8):1735– + 1780, 1997. + [7] Y. Bengio, P. Simard, and P. Frasconi. Learning long-term dependencies with gradient descent is difficult. + IEEE Transactions on Neural Networks, 5(2):157–166, March 1994. + [8] Razvan Pascanu, Tomas Mikolov, and Yoshua Bengio. On the difficulty of training Recurrent Neural + Networks. arXiv:1211.5063 [cs], November 2012. arXiv: 1211.5063. + [9] Rainer Engelken, Fred Wolf, and L. F. Abbott. Lyapunov spectra of chaotic recurrent neural networks. + arXiv:2006.02427 [nlin, q-bio], June 2020. arXiv: 2006.02427. +[10] Jonas Mikhaeil, Zahra Monfared, and Daniel Durstewitz. On the difficulty of learning chaotic dynamics + with RNNs. Advances in Neural Information Processing Systems, 35:11297–11312, December 2022. +[11] Il Memming Park, Ábel Ságodi, and Piotr Aleksander Sokół. Persistent learning signals and working + memory without continuous attractors. ArXiv, page arXiv:2308.12585v1, August 2023. +[12] Rainer Engelken, Fred Wolf, and L. F. Abbott. Lyapunov spectra of chaotic recurrent neural networks. + Physical Review Research, 5(4):043044, October 2023. +[13] J. P. Eckmann and D. Ruelle. Ergodic theory of chaos and strange attractors. Reviews of Modern Physics, + 57(3):617–656, July 1985. +[14] Arkady Pikovsky and Antonio Politi. Lyapunov Exponents: A Tool to Explore Complex Dynamics. + Cambridge University Press, Cambridge, February 2016. +[15] Kyunghyun Cho, Bart van Merrienboer, Dzmitry Bahdanau, and Yoshua Bengio. On the Properties of + Neural Machine Translation: Encoder-Decoder Approaches. Technical report, September 2014. ADS + Bibcode: 2014arXiv1409.1259C Type: article. +[16] Razvan Pascanu, Tomas Mikolov, and Yoshua Bengio. On the difficulty of training recurrent neural + networks. In Proceedings of the 30th International Conference on International Conference on Machine + Learning - Volume 28, ICML’13, pages III–1310–III–1318, Atlanta, GA, USA, June 2013. JMLR.org. +[17] Tomáš Mikolov. Statistical language models based on neural networks. Ph.D. thesis, Brno University of + Technology, Faculty of Information Technology, Brno, CZ, 2012. +[18] Ian Goodfellow, Yoshua Bengio, and Aaron Courville. Deep Learning. MIT Press, November 2016. + Google-Books-ID: omivDQAAQBAJ. +[19] Sergey Ioffe and Christian Szegedy. Batch Normalization: Accelerating Deep Network Training by + Reducing Internal Covariate Shift. In Proceedings of the 32nd International Conference on Machine + Learning, pages 448–456. PMLR, June 2015. + + + 11 + [20] Bo Chang, Minmin Chen, Eldad Haber, and Ed H. Chi. AntisymmetricRNN: A Dynamical System View + on Recurrent Neural Networks. International Conference on Learning Representations, December 2018. + +[21] Andrew M. Saxe, James L. McClelland, and Surya Ganguli. Exact solutions to the nonlinear dynamics of + learning in deep linear neural networks. arXiv:1312.6120 [cond-mat, q-bio, stat], December 2013. arXiv: + 1312.6120. + +[22] Martin Arjovsky, Amar Shah, and Yoshua Bengio. Unitary Evolution Recurrent Neural Networks. In + Proceedings of The 33rd International Conference on Machine Learning, pages 1120–1128. PMLR, June + 2016. + +[23] Kyle Helfrich, Devin Willmott, and Qiang Ye. Orthogonal Recurrent Neural Networks with Scaled Cayley + Transform. In Proceedings of the 35th International Conference on Machine Learning, pages 1969–1978. + PMLR, July 2018. + +[24] T. Konstantin Rusch and Siddhartha Mishra. Coupled Oscillatory Recurrent Neural Network (coRNN): + An accurate and (gradient) stable architecture for learning long time dependencies. arXiv e-prints, page + arXiv:2010.00951, October 2020. + +[25] N. Benjamin Erichson, Omri Azencot, Alejandro Queiruga, Liam Hodgkinson, and Michael W. Mahoney. + Lipschitz Recurrent Neural Networks. International Conference on Learning Representations, January + 2021. + +[26] Antonio Orvieto, Samuel L Smith, Albert Gu, Anushan Fernando, Caglar Gulcehre, Razvan Pascanu, and + Soham De. Resurrecting Recurrent Neural Networks for Long Sequences. Technical report, March 2023. + ADS Bibcode: 2023arXiv230306349O Type: article. + +[27] Herbert Jaeger. The “echo state” approach to analysing and training recurrent neural networks-with an + erratum note. Bonn, Germany: German National Research Center for Information Technology GMD + Technical Report, 148:34, 2001. + +[28] Mustafa C. Ozturk, Dongming Xu, and José C. Príncipe. Analysis and Design of Echo State Networks. + Neural Computation, 19(1):111–138, January 2007. + +[29] Rupesh K Srivastava, Klaus Greff, and Jürgen Schmidhuber. Training Very Deep Networks. In Advances + in Neural Information Processing Systems, volume 28. Curran Associates, Inc., 2015. + +[30] Julian Georg Zilly, Rupesh Kumar Srivastava, Jan Koutnik, and Jürgen Schmidhuber. Recurrent Highway + Networks. In Proceedings of the 34th International Conference on Machine Learning, pages 4189–4198. + PMLR, July 2017. + +[31] Piotr A. Sokół, Ian Jordan, Eben Kadile, and Il Memming Park. Adjoint Dynamics of Stable Limit Cycle + Neural Networks. In 2019 53rd Asilomar Conference on Signals, Systems, and Computers, pages 884–887, + November 2019. ISSN: 2576-2303. + +[32] Piotr A. Sokół. Geometry of Learning and Representations in Neural Networks. PhD thesis, Stony Brook + University, May 2023. + +[33] Xavier Glorot, Antoine Bordes, and Yoshua Bengio. Deep Sparse Rectifier Neural Networks. volume 15, + pages 315–323, April 2011. + +[34] Ben Poole, Subhaneil Lahiri, Maithra Raghu, Jascha Sohl-Dickstein, and Surya Ganguli. Exponential + expressivity in deep neural networks through transient chaos. arXiv:1606.05340 [cond-mat, stat], June + 2016. arXiv: 1606.05340. + +[35] Minmin Chen, Jeffrey Pennington, and Samuel S. Schoenholz. Dynamical Isometry and a Mean Field + Theory of RNNs: Gating Enables Signal Propagation in Recurrent Neural Networks. arXiv:1806.05394 + [cs, stat], August 2018. arXiv: 1806.05394. + +[36] Boris Hanin and Mihai Nica. Products of Many Large Random Matrices and Gradients in Deep Neural + Networks. December 2018. + +[37] Samuel S. Schoenholz, Justin Gilmer, Surya Ganguli, and Jascha Sohl-Dickstein. Deep Information + Propagation. November 2016. + +[38] Boris Hanin and David Rolnick. How to Start Training: The Effect of Initialization and Architecture. In + Advances in Neural Information Processing Systems, volume 31. Curran Associates, Inc., 2018. + + + 12 + [39] Piotr A. Sokol and Il Memming Park. Information Geometry of Orthogonal Initializations and Training. + Technical report, October 2018. ADS Bibcode: 2018arXiv181003785S Type: article. +[40] Dar Gilboa, Bo Chang, Minmin Chen, Greg Yang, Samuel S. Schoenholz, Ed H. Chi, and Jeffrey + Pennington. Dynamical Isometry and a Mean Field Theory of LSTMs and GRUs. January 2019. +[41] Tankut Can, Kamesh Krishnamurthy, and David J. Schwab. Gating creates slow modes and controls + phase-space complexity in GRUs and LSTMs. arXiv:2002.00025 [cond-mat, stat], January 2020. arXiv: + 2002.00025. +[42] Valery Iustinovich Oseledets. A multiplicative ergodic theorem. Characteristic Ljapunov, exponents of + dynamical systems. Trudy Moskovskogo Matematicheskogo Obshchestva, 19:179–210, 1968. +[43] Karlheinz Geist, Ulrich Parlitz, and Werner Lauterborn. Comparison of Different Methods for Computing + Lyapunov Exponents. Progress of Theoretical Physics, 83(5):875–893, May 1990. +[44] Giancarlo Benettin, Luigi Galgani, Antonio Giorgilli, and Jean-Marie Strelcyn. Lyapunov Characteristic + Exponents for smooth dynamical systems and for hamiltonian systems; A method for computing all of + them. Part 2: Numerical application. Meccanica, 15(1):21–30, March 1980. +[45] Hai-Jun Liao, Jin-Guo Liu, Lei Wang, and Tao Xiang. Differentiable Programming Tensor Networks. + Physical Review X, 9(3):031041, September 2019. arXiv:1903.09650 [cond-mat, physics:quant-ph]. +[46] S. F. Walter and L. Lehmann. Algorithmic Differentiation of Linear Algebra Functions with Application + in Optimum Experimental Design (Extended Version). Technical report, January 2010. ADS Bibcode: + 2010arXiv1001.1654W Type: article. +[47] Robert Schreiber and Charles Van Loan. A Storage-Efficient $WY$ Representation for Products of + Householder Transformations. SIAM Journal on Scientific and Statistical Computing, 10(1):53–57, January + 1989. +[48] Matthias Seeger, Asmus Hetzel, Zhenwen Dai, Eric Meissner, and Neil D. Lawrence. Auto-Differentiating + Linear Algebra. Technical report, October 2017. ADS Bibcode: 2017arXiv171008717S Type: article. +[49] Jeffrey Pennington, Samuel Schoenholz, and Surya Ganguli. Resurrecting the sigmoid in deep learning + through dynamical isometry: theory and practice. In Advances in Neural Information Processing Systems, + volume 30. Curran Associates, Inc., 2017. +[50] Jeffrey Pennington, Samuel S. Schoenholz, and Surya Ganguli. The Emergence of Spectral Universality in + Deep Networks. arXiv:1802.09979 [cs, stat], February 2018. arXiv: 1802.09979. +[51] E. Cheney and David Kincaid. Numerical Mathematics and Computing. Cengage Learning, August 2007. + Google-Books-ID: ZUfVZELlrMEC. +[52] Yasaman Bahri, Jonathan Kadmon, Jeffrey Pennington, Sam S. Schoenholz, Jascha Sohl-Dickstein, and + Surya Ganguli. Statistical Mechanics of Deep Learning. Annual Review of Condensed Matter Physics, + 11(1):501–528, 2020. +[53] F. Ginelli, P. Poggi, A. Turchi, H. Chaté, R. Livi, and A. Politi. Characterizing Dynamics with Covariant + Lyapunov Vectors. Physical Review Letters, 99(13):130601, September 2007. +[54] Sergey V. Ershov and Alexei B. Potapov. On the concept of stationary Lyapunov basis. Physica D: + Nonlinear Phenomena, 118(3):167–198, July 1998. +[55] Ricky T. Q. Chen, Yulia Rubanova, Jesse Bettencourt, and David K Duvenaud. Neural Ordinary Differential + Equations. In Advances in Neural Information Processing Systems, volume 31. Curran Associates, Inc., + 2018. +[56] Javed Lindner. Investigating the exploding and vanishing gradients problem with Lyapunov exponents. + Master’s thesis, RWTH Aaachen, Aaachen/Juelich, 2021. +[57] Ryan Vogt, Maximilian Puelma Touzel, Eli Shlizerman, and Guillaume Lajoie. On Lyapunov Exponents + for RNNs: Understanding Information Propagation Using Dynamical Systems Tools. Frontiers in Applied + Mathematics and Statistics, 8, 2022. +[58] Kamesh Krishnamurthy, Tankut Can, and David J. Schwab. Theory of Gating in Recurrent Neural Networks. + Physical Review X, 12(1):011011, January 2022. +[59] L. S. Pontryagin. Mathematical Theory of Optimal Processes: The Mathematical Theory of Optimal + Processes. Routledge, New York, 1st edition edition, March 1987. + + + 13 + [60] Daniel Liberzon. Calculus of Variations and Optimal Control Theory: A Concise Introduction. In Calculus + of Variations and Optimal Control Theory. Princeton University Press, December 2011. + +[61] Owen Marschall, Kyunghyun Cho, and Cristina Savin. A unified framework of online learning algorithms + for training recurrent neural networks. The Journal of Machine Learning Research, 21(1):135:5320– + 135:5353, January 2020. + +[62] Judy Hoffman, Daniel A. Roberts, and Sho Yaida. Robust Learning with Jacobian Regularization. Technical + report, August 2019. ADS Bibcode: 2019arXiv190802729H Type: article. +[63] Hongyi Zhang, Yann N. Dauphin, and Tengyu Ma. Fixup Initialization: Residual Learning Without + Normalization. Technical report, January 2019. ADS Bibcode: 2019arXiv190109321Z Type: article. + +[64] L. Molgedey, J. Schuchhardt, and H. G. Schuster. Suppressing chaos in neural networks by noise. Physical + Review Letters, 69(26):3717–3719, December 1992. + +[65] Kanaka Rajan, L. F. Abbott, and Haim Sompolinsky. Stimulus-dependent suppression of chaos in recurrent + neural networks. Physical Review E, 82(1):011903, July 2010. + +[66] Jannis Schuecker, Sven Goedeke, David Dahmen, and Moritz Helias. Functional methods for disordered + neural networks. arXiv:1605.06758 [cond-mat, q-bio], May 2016. arXiv: 1605.06758. + +[67] Rainer Engelken, Alessandro Ingrosso, Ramin Khajeh, Sven Goedeke, and L. F. Abbott. Input correlations + impede suppression of chaos and learning in balanced firing-rate networks. PLOS Computational Biology, + 18(12):e1010590, December 2022. + +[68] Edward Ott. Chaos in Dynamical Systems. Cambridge University Press, August 2002. Google-Books-ID: + PfXoAwAAQBAJ. +[69] Angelo Vulpiani, Fabio Cecconi, and Massimo Cencini. Chaos: From Simple Models to Complex Systems. + World Scientific Pub Co Inc, Hackensack, NJ, September 2009. + + + + + 14 + A Backpropagation Through QR Decomposition +The backward pass of the QR decomposition is given by [45, 46, 47, 48] + Q = Q + Q copyltu(M) R−T +   + (11) + T T +where M = RR − Q Q and the copyltu function generates a symmetric matrix by copying the lower +triangle of the input matrix to its upper triangle, with the element [copyltu(M )]ij = Mmax(i,j),min(i,j) +[45, 46, 47, 48]. Adjoint variable are written here as T = ∂L/∂T . +Using an analytical pullback is more memory-efficient and less computationally costly than directly doing +automatic differentiation through the QR-decomposition. Moreover, from a practical perspective, for QR +decomposition, often BLAS/LAPACK routines are utilized which are not amenable to common differentiable +programming frameworks like TensorFlow, PyTorch, JAX and Zygote. In our implementation of gradient +flossing, we used the Julia package BackwardsLinalg.jl by Jinguo Liu available at here . + +B Further Details and Analysis of Gradient Flossing +An example implementation of gradient flossing in Flux, a machine learning library in Julia is available here. +We are actively developing implementations for other widely used differentiable programming frameworks. + +B.1 Gradient Flossing for recurrent LSTM and ReLU networks + + A LSTM C ReLU + 0.2 target 1 6 target 1 + actual 1 actual 1 + 4 + 0.0 + 2 + 1 + + + + + 1 + + + + + 0.2 + 0 + 0.4 + 2 + 0 20 40 60 80 100 0 20 40 60 80 100 + Epochs Epochs + B D 101 + 10 1 + 10 2 10 1 + 10 3 + 2 + + + + + 2 + 1 + + + + + 1 + + + + + 10 4 10 3 + 10 5 + 10 6 10 5 + 0 20 40 60 80 100 0 20 40 60 80 100 + Epochs Epochs + + +Figure 6: Gradient flossing for recurrent LSTM networks and recurrent ReLU networks A) First +Lyapunov exponent of LSTM network as a function of training epochs. Minimizing the mean +squared error between estimated first Lyapunov exponent and target Lyapunov exponent λ1 = 0 +by gradient descent. First Lyapunov exponent of LSTM network (solid lines) converges to target +value (thick dashed lines) within less than 100 epochs. 10 random LSTM RNNs were initialized with +Gaussian recurrent weights, where standard deviations of weight scaling were drawn g ∼ Unif(0, 1). +B) Gradient flossing minimizes the square of the first Lyapunov exponent of random recurrent +LSTM networks over epochs. C) Same as A for recurrent ReLU network. Here networks were +initialized with Gaussian recurrent weights Wij ∼ N (−0.1, g 2 /N ) where values of g were drawn +g ∼ Unif(0, 1) D) B) for recurrent ReLU network. Parameters: network size N = 32 with 10 +network realizations. Shaded regions in B, D are 25% and 75% percentiles, solid line shows median. + +We demonstrate that gradient flossing can also be applied to recurrent LSTM and ReLU networks in Fig 6. To +this end, we generated random LSTM networks where the weights of all the different gates and biases were + + + 15 + independently and identically distributed (i.i.d.) and sampled from Gaussian distributions of different variance. +Our results show that gradient flossing can also constrain the Lyapunov exponent to be close to zero. The +dynamics of each of the N LSTM units follows the map [6]: + ft = σg (Uf ht−1 + Wf xt + bf ) (12) + ot = σg (Uo ht−1 + Wo xt + bo ) (13) + it = σg (Ui ht−1 + Wi xt + bi ) (14) + c̃t = σh (Uc ht−1 + Wc xt + bc ) (15) + ct = ft ⊙ ct−1 + it ⊙ c̃t (16) + ht = ot ⊙ ϕ(ct ) (17) + 1 +where ⊙ denotes the Hadamard product, σg (x) = 1+exp(−x) is the sigmoid function, σh (x) = tanh(x) and + 2 +entries of the matrices Ux are drawn from Ux ∼ N (0, gx /N ). For simplicity, the bias terms bx are scalars. +Subscripts f , o and i denote respectively the forget gate, the output gate, the input gate, and c is the cell state. +In each LSTM unit, there are two dynamic variables c and h, and three gates f , o, and i that control the flow +if signals into and out of the cell c. We set the values gih , gix , gf x , bf , gch , gcx , gcx , gox to be uniformly +distributed between 0 and 1 and initialize bi , gf h ,bc ,b0 as zero. +During gradient flossing, the actual Lyapunov exponents of different random network realizations converge close +to the target Lyapunov exponent λtarget + 1 = 0 in fewer than 100 epochs as shown in Fig 6A. Fig 6B shows that +the squared Lyapunov exponents converge towards zero. We note that for LSTM networks, a target Lyapunov +exponent of λtarget + 1 = −1 is achieved after 100 gradient flossing steps only for a subset of random network +realizations (not shown). We speculate that behavior is influenced by the gating structure of LSTM units, +which seems to naturally place the first Lyapunov exponent close to zero for certain initializations (See also +[9, 12, 41, 58]). +For the recurrent ReLU networks, we considered the same Vanilla RNN dynamics as in the main manuscript in +Eq 9 + hs+1 = f (hs , xs+1 ) = Wϕ(hs ) + Vxs+1 , +The initial entries of W are drawn independently from a Gaussian distribution with a negative mean of −0.1 +and variance g 2 /N , where g is a gain parameter that controls the heterogeneity of weights. We use the transfer +function ϕ(x) = max(x, 0). xs is a sequence of inputs and V is the input weight. xs is a stream of i.i.d. +Gaussian input xs ∼ N (0, 1) and the input weights V are N (0, 1). Both W and V are trained during gradient +flossing. We found that some ReLU network had initially unstable dynamics with positive Lyapunov exponents +Fig 6C. However, during gradient flossing, these unstable networks were quickly stabilized. Fig 6D shows that +the squared Lyapunov exponents of ReLU networks converge towards zero. + +B.2 Additional Results for Different Numbers of Flossed Lyapunov Exponents +Additionally to the main Fig 5, we did the same analysis on networks with only preflossing (gradient flossing +before training) and found that more Lyapunov exponent k have to be flossed to achieve 100% accuracy as the +tasks become more difficult (Fig 7D), however, in that case even if all N Lyapunov exponents were flossed, thus +k = N , they were not able to solve the most difficult tasks. This seems to indicate that gradient flossing during +training cannot be replaced by just gradient flossing more Lyapunov exponents before training. + + +C Additional Results on Gradient Flossing Throughout Training +We now discuss some additional results on gradient flossing throughout training. First, we analyze how gradient +flossing affects the gradients and find that during gradient flossing, the norm of gradients that bridge many time +steps are boosted. Moreover, subordinate singular values of the error norm of the recurrent weights are also +boosted, indicating that gradient flossing can increase the effective rank of the parameter update. Additionally, +we show that if gradient flossing is continued throughout training it can be detrimental to the accuracy. Finally, +we show that Lyapunov exponents of successfully trained networks after training for the spatial delayed XOR +task have a simple relationship to the delay d. + + +D Gradient Flossing boosts the Gradient Norm for Long Time Horizons +In this section, we investigate the impact of gradient flossing on the norm and structure of the gradient. It is +important to note that the complete error gradient of backpropagation through time is composed of a summation +of products of one-step Jacobians, reflecting the number of "loops" the error signal traverses through the recurrent +dynamics before reaching its target. Consequently, when the singular values of the long-term Jacobian are +smaller than 1, the influence of the shorter loops typically dominates the long-term Jacobian. + + + 16 + A B + 100 100 + k + test accuracy (%) + + + + + test accuracy (%) + 80 1 80 delay + 20 30 + 40 50 + 60 70 + 60 80 60 + + 20 40 60 0 20 40 60 80 + complexity (delay d) number of flossed i k + C 100 D 100 + 70 70 + complexity (delay d) + + + + + complexity (delay d) + test accuracy (%) + + + + + test accuracy (%) + 60 60 + 50 50 + 40 40 + 30 30 + 20 20 + 10 10 + 1 10 20 30 40 50 60 70 80 50 1 10 20 30 40 50 60 70 80 50 + number of flossed i k number of flossed i k + + +Figure 7: Gradient flossing for different numbers of flossed Lyapunov exponents +A) Test accuracy for delayed temporal XOR task as a function of delay d with different numbers +flossed Lyapunov exponents k. B) Same data as A but here test accuracy as a function of number of +flossed Lyapunov exponents k. C) Median test accuracy for delayed temporal XOR task as a function +of delay d and k for networks with gradient flossing during training (500 steps of gradient flossing at +epochs e ∈ {0, 100, 200, 300, 400}). D)Same as B for preflossing only. Parameters: g = 1, batch +size b = 16, N = 80, epochs = 104 for delayed temporal XOR, epochs = 5000 for delayed spatial +XOR, T = 300, gradient flossing for Ef = 500 epochs before training and during training for A, B, +C, and only before training for C. Shaded areas are 25% and 75% percentiles, solid lines are means, +transparent dots are individual simulations, task loss: cross-entropy btw. y, ŷ. + + +In our tasks, we have full control over the correlation structure of the task and thus know exactly which loop +length of backpropagation through time is necessary for finding the correct association. We were moreover +careful in our task design not to have any additional signals in our task that might help to bridge the long time +scale. In the case of vanishing gradients, the gradient norm is predominantly influenced by the shorter loops, +even though the actual signal in the gradient originates solely from the loop of length d in our task. To mitigate +the contamination of spurious signals from shorter loops and effectively extract the gradient that spans long time +horizons, we focus on the gradient with respect to the initial conditions h0 . + τ =t−1 t−1 + ! τ =t−1 t−1 + ! + ∂Lt ∂Lt X Y ∂hτ ′ +1 ∂hτ ∂Lt X Y ∂hτ ′ +1 ∂Lt + = = δτ 0 = Tt (h0 ) (18) + ∂h0 ∂ht ′ + ∂h τ ′ ∂h 0 ∂h t ′ + ∂h τ ′ ∂ht + τ =t−l τ =τ τ =t−l τ =τ + +We note that the sum conveniently drops as only the longest ’loop’, in other words, the only summand that +contributes is the product of Jacobians going from 0 to t. By considering this gradient, we can therefore ensure +that no undesired signals stemming from shorter loops interfere with the analysis. Moreover, we note that we +use the binary cross entropy loss which makes the derivative ∂L + ∂ht + t + trivial. +In Fig 8 we show that gradient flossing boosts the gradient with respect to the initial conditions. Specifically, we +compare two identical networks trained on the binary delayed temporal XOR task with a loop length of d = 70. +One network is trained with gradient flossing at epochs e ∈ {0, 100, 200, 300, 400}), while the other is trained +without gradient flossing. + + + 17 + dL +For the network without gradient flossing, the gradient norm of | dh 0 + | diminishes to extremely small values + −6 +(< 10 ) and remains small throughout training. In contrast, for the network trained with gradient flossing, each +episode of gradient flossing causes the norm | dh dL + 0 + | to spike, surpassing values larger than 10−2 . These findings +are direct evidence that gradient flossing boosts the gradient norm, facilitating to bridge long time horizons in +challenging temporal credit assignment tasks. We observe that after several episodes of gradient flossing, the + dL +gradient | dh 0 + | of the networks stays around 10−4 and eventually rise up to values around 10−2 . Subsequent +in training, the test accuracy surpasses chance level (Fig 8B). We observed this temporal relationship between + dL +gradient norm | dh 0 + | and training success consistently across numerous network realizations (Fig 8C and D). + dL +These findings suggest that the gradient norm | dh 0 + | can be a good predictor of learning success, sometimes +hundreds of epochs before the accuracy exceeds the chance level of 50%. Indeed, when depicting the gradient +norm aligned to the last epoch where accuracy was ≤ 50%, we see for many network realizations a gradual +growth of gradient norm oven epochs before accuracy surpasses chance level (Fig 9A). Analogously, when + dL +plotting the accuracy as a function of epoch aligned with the last epoch with | dh 0 + | < 0.001, we observe for this + dL +task that the increase of gradient norm | dh0 | reliably precedes the epoch at which the accuracy surpasses the +chance level (See Fig 9B). We note that when measuring the overlap of the orientation of the gradient vector + dL + dh0 + with the first covariant Lyapunov vector of the forward dynamics, we found a significant increase in overlap +around the training epoch where the accuracy surpasses the chance level both in networks with and without +gradient flossing. This does not come as a surprise as the covariant Lyapunov vector measure the most unstable +(or least stable) direction in the tangent space of a trajectory and perturbations of h0 that have to travel over +many epochs align + +D.1 Gradient Flossing Boosts Effective Dimension of Error Gradient +To further investigate the effect of gradient flossing on training, we investigated the structure of the error gradient + dL +and how it is changed by gradient flossing. To this end, we decompose the recurrent weight gradient σi dW +into in weighted sum of outer products using singular value decomposition (Fig 10). +As the Lyapunov exponents are the time-averaged logarithms of the singular values of the asymptotic long-term +Jacobian Tt (hτ ), this allows us to directly link the effect of pushing Lyapunov exponents toward zero during +gradient flossing to the structure of the error gradient of the recurrent weights, as they are intimately linked: + τ =t−1 t−1 + ! + ∂Lt ∂Lt X Y ∂hτ ′ +1 ∂hτ ∂Lt X ∂hτ + = = Tt (hτ ) (19) + ∂W ∂ht ′ + ∂h τ ′ ∂W ∂h t + τ + ∂W + τ =t−l τ =τ + +We again note that different ’loops’ contribute to the total gradient expression and the Lyapunov exponents only +characterize the longest loop. Further, we note that in our controlled tasks, depending on delay d, only few of the +summands are relevant for solving the task. We thus expect the relevant gradient summand that carries important +signals about the task to be contaminated by summands of both shorter and longer chains, which contribute +irrelevant fluctuations. + dL +  +The singular values of the recurrent weight gradient σi dW as a function of training epoch reveal that the +subordinate singular values subordinate singular value σ20 and σ40 exhibit peaks at the times of gradient flossing, +while the first singular value σ1 only shows a slight peak (Fig 10A). This indicates that gradient flossing + dL +increases the effective rank of the recurrent weight gradient dW . In other words, gradient flossing facilitates +high-dimensional parameter updates. Our interpretation is, as gradient flossing pushes Lyapunov exponents +to zero, the different summands in the total gradient contribute more equitable as long loops have neither a +dominant contribution (which would happen for exploding gradients) nor a vanishing contribution (which would +happen for vanishing gradients). This way, the sum of gradient terms has a higher effective rank. +In contrast, without gradient flossing, the subordinate singular values (in Fig 10A σ20 and σ40 ) rapidly diminish +to extremely small values over training epochs and remain very small throughout training. Note however that the +leading singular values σ1 are of comparable size irrespective whether gradient flossing was performed or not. + dL +We note that similar to the gradient norm of the loss with respect to the initial condition | dh 0 + |, the subordinate +singular values seem to predict when the test accuracy of networks with gradient flossing grows beyond chance +level (Fig 10B). We confirmed this in multiple other network realizations and give here another example we the +accuracy grows beyond chance only later during training (Fig 11). + +D.2 Gradient Flossing Throughout Training Can Be Detrimental +We find that gradient flossing continued throughout all training epochs can be detrimental for performance +(Fig 12). We demonstrate this again in the binary delayed temporal XOR task. We compare three different +conditions: Either, we floss throughout the training every 100 training epochs for 500 flossing epochs (red), or +we floss only early during training at training epochs e ∈ {0, 100, 200, 300, 400})(green) or we do not floss at +all (blue). + + + 18 + A C + 10 2 0.05 + 10 4 0.04 + |dhd 0 | 10 6 0.03 + + + + + |dhd 0 | + 10 8 0.02 + 10 10 0.01 + with flossing during training + 10 12 without flossing + 0.00 + 0 500 1000 1500 2000 2500 3000 0 500 1000 1500 2000 2500 3000 + Epochs Epochs + B D + 100 100 + accuracy (%) + + + + + accuracy (%) + with flossing during training + without flossing + + + 50 50 + 0 500 1000 1500 2000 2500 3000 0 500 1000 1500 2000 2500 3000 + Epochs Epochs + dL +Figure 8: Gradient flossing boosts norm of long-term Jacobian A) Gradient norm of | dh 0 + | as +a function of training epochs for networks without flossing (blue) and networks with flossing +during training (orange). Error gradient norm is boosted after gradient flossing at epochs e ∈ + dL +{0, 100, 200, 300, 400, 500}). In networks without gradient flossing, the gradient norms | dh 0 + | are +much smaller overall. One out of ten random network realizations with solid line, the other 9 with +transparent line. B) Accuracy as a function of epoch, same depiction and network realizations as in +A. Note that accuracy of networks with gradient flossing grows beyond chance level approximately + dL +when the gradient norm | dh 0 + | becomes macroscopically large. C) Same as A in linear scale. Mean +final test loss as a function of task difficulty (delay d) for delayed copy task. Different colors are +different network realizations with gradient flossing during training. Black lines are without any +gradient flossing. D) Accuracy as a function of epochs, same colors as in C. Note that for all network + dL +realizations the moments where gradient norm | dh 0 + | becomes macroscopically large coincides with +the moment the accuracy is beyond chance level. Parameters: g = 1, batch size b = 16, N = 80, +epochs = 104 , T = 300, flossing for Ef = 500 epochs on k = 75 Lyapunov exponents before +training. Task: binary delayed XOR, delay d = 70, loss: cross entropy(y, ŷ). + + +We observe that after every episode of gradient flossing, the accuracy drops down close to chance level of 50% +(Fig 12A red line). Between flossing, the accuracy quickly recovers but never reaches 100%. Simultaneously, + dL +when the accuracy drops the test error jumps up (Fig 12B). We also observed that the gradient norm | dh 0 + | is + dL +initially boosted by gradient flossing, but stays close to indistinguishable once the gradient norm | dh0 | becomes +macroscopically large (Fig 12C). This suggests that once gradient flossing facilitates signal propagation across +long time horizons and the network picks up the relevant gradient signal, further gradient flossing can be harmful +to the actual task execution. We hypothesize that there might be (at least for the Vanilla networks considered +here), a trade-off between the ability to bridge long time scales which seems to require one or several Lyapunov +exponents of the forward dynamics close to zero and nonlinear tasks requirements, which require at least a +fraction of the units to be in the nonlinear regime of the nonlinearity ϕ, where ϕ′ (x) < 1. It would be an +interesting future research avenue to further investigate this potential trade-off also in other network architectures. + +D.3 Lyapunov Exponents after Training With and Without Gradient Flossing +In Fig 13, we show the first (Fig 13A, B) and the tenth (Fig 13C, D) Lyapunov exponent after training on +the spatial delayed XOR task both with and without gradient flossing. We find for successful networks with +gradient flossing a systematic relationship between the first Lyapunov exponent and the delay, that can be fitted +by approximately by λ1 (d) = −0.2exp.(−0.03delay). Unsuccessful networks with accuracy at chance level +have a much smaller largest Lyapunov exponent. The same seems to hold true for the tenth Lyapunov exponent. +In a previous study [57], a similar trend was observed, albeit in the context of a task that did not possess an + + + 19 + A B 80 + + 75 + 10 2 + 70 + 10 3 + + + + + accuracy (%) + 65 + |dhd 0 | + + + + 10 4 60 + 55 + 10 5 + 50 + 45 + 3000 2000 1000 0 1000 0 100 200 300 400 + Epochs Epochs + + +Figure 9: Increase of gradient norm precedes epoch when accuracy exceeds chance level + dL +A) Gradient norm of | dh 0 + | as a function of training epochs for 20 network realizations with flossing +during training. Epochs are aligned to last epoch where accuracy is ≤ 50%. B) Same task and +simulations as in A, but here accuracy as a function of epoch, for 20 network realizations with + dL +flossing during training. Epochs are aligned to the last epoch with | dh 0 + | < 0.001. Different colors +are different network realizations. Parameters: g = 1, batch size b = 16, N = 80, epochs = 104 , +T = 300, flossing for Ef = 500 epochs on k = 75 Lyapunov exponents early during training at +training epochs e ∈ {0, 100, 200, 300, 400}. Task: binary delayed XOR, delay d = 70, loss: cross +entropy(y, ŷ). + + +analytically tractable temporal correlation structure, which might partially explain the less conclusive results. +It is important to note that the numerical evaluation of Lyapunov exponents in recurrent LSTM networks in +[57] was based solely on the N × N Jacobian of the memory state. From a dynamical systems standpoint, a +2N × 2N Jacobian matrix encompassing interactions between both memory and cell states into account is +required [9, 12, 58]. + + +E Gradient Flossing for Linear Network +We provide code for gradient flossing in linear networks here. We find that gradient flossing also helps to train +linear networks on tasks with many time steps that can be solved by linear networks, for example the copy task, +but not for tasks the require a nonlinear input-output operation like the temporally delayed XOR task. Full +analytical description of gradient flossing for linear networks would be a promising avenue for future research as +networks with linear dynamics can still have nonlinear learning dynamics [21]. However this is beyond the cope +of the presented work. + + +F Computational Complexity of Gradient Flossing +We present here a more in-depth scaling analysis of the computational cost of gradient flossing. There are three +main contributors to the computational cost (table 1): First the RNN step, which has a computational complexity +of O N 2 b per time step, where N is the dimension of the recurrent network state (which in case of Vanilla +  +networks equals the number of units) and b is the batch-size both in the forward and backward pass. Second, the +Jacobian step which scales with O N 2 k per time step, where k is the number of flossed Lyapunov exponents. +Third, the QR decomposition, which scales with O N k2 , where k is the number of Lyapunov exponents +  +considered. +Together, this results in a total amortized cost of O N 2 b T per training epoch, where T is the number of +  + +training time steps and a total amortized costs per flossing epoch of O N 2 Tf (1 + k/tONS + k) where Tf is +  +the number of flossing time steps. + + + 20 + A + 10 2 + i dW ) + d + ( + + + + 10 4 + + + 0 500 1000 1500 2000 2500 3000 + B 100 + early training flossing + without flossing + accuracy (%) + + + + + 50 + 0 500 1000 1500 2000 2500 3000 + Epochs +Figure 10: Gradient flossing decreases condition number of recurrent weight error gradient + dL +  +A) Singular values of recurrent weight gradient σi dW as a function of training epochs for singular +values i ∈ 1, 20, 40 for networks without gradient flossing (blue) and early training gradient flossing +(green). At epochs of gradient flossing, the subordinate singular value σ20 and σ40 are peaked, while +the first singular value σ1 has only a slight peak. This indicates that gradient flossing increases the + dL +effective rank of the recurrent weight gradient dW . B) Accuracy as a function of training epochs. +Note that accuracy of networks with gradient flossing grows beyond chance level approximately +when the subordinate singular values singular value σ20 and σ40 are peaked increase, which enables +high-dimensional parameters updates. Parameters: g = 1, batch size b = 16, N = 80, epochs = 103 , +T = 300, gradient flossing for Ef = 500 epochs on k = 75 Lyapunov exponents before training. +Task: binary delayed XOR, delay d = 70, loss: cross entropy(y, ŷ). + + + + +In case of preflossing, thus, the total computation cost scale with O N 2 [Eb T + Ep Tf (1 + k/tONS + k)] , +  +where E is the number of training epochs and Ep is the number of preflossing epochs. +For gradient flossing during training (assuming that there is also preflossing done), the amortized cost scale with +O N 2 [Eb T + Ep Tp + Ef Tf (1 + k/tONS + k)] , where Ef is the total number of flossing epochs during +training. +Empirically, we find that both the number of preflossing epochs Ep and flossing episodes Ef necessary for +training success is much smaller than the total number of training epochs E. For example, the preflossing for +500 epochs in the numerical experiment of Fig 3 took ∼ 37 seconds, while the overall training on 10000 training +epochs with batch size b = 16 took ∼ 1680 seconds. Thus only approximately 2.2% of the total training time +was spent on gradient flossing. Moreover, Tp can be smaller than T , it just has to be long enough such that the +temporal correlations in the task can be bridged. In case of the tasks discussed in the manuscript, this would be +the delay d. It remains an important challenge to infer the suitable number of flossing time steps Tf for tasks +with unknown temporal correlation structure. +It would also be interesting to investigate how the CPU hours/wall-clock time/flops/Joule/CO2-emission spent +on gradient flossing vs on training networks with larger N are trading off against each other. For this, we would +suggest to first find the smallest network that on median successfully trains on a binary temporal XOR task for +a fixed given delay d and measure the computational resources involved in training it, e.g. in terms of CPU +hours. Then compare it to a network with gradient flossing. This would be a promising analysis but is beyond +our current computational budget. We will start such experiments an might be able to provide results during the +reviewer period. + + + 21 + A + 10 2 + i dW ) + d + ( + + + + 10 4 + + + 0 500 1000 1500 2000 2500 3000 + B 100 + early training flossing + without flossing + accuracy (%) + + + + + 50 + 0 500 1000 1500 2000 2500 3000 + Epochs +Figure 11: Gradient flossing decreases condition number of recurrent weight error gradient +Same as Fig 10 for different network realization. + + + +G Additional controls + +We also investigate the effects of gradient flossing during the training with orthogonal weight initializations +and confirm our finding that gradient flossing improves trainability on tasks that have long time horizons to +bridge. Moreover, we find that gradient flossing during training can further improve trainability. We replicated +the two more challenging tasks from the main paper (Fig 4) for orthogonal initialization with variable temporal +complexity and performed gradient flossing either both during and before training, only before training, or not at +all. +Fig 14A shows the test accuracy for Vanilla RNNs with orthogonal initialization trained on the delayed temporal +XOR task yt = xt−d/2 ⊕ xt−d with random Bernoulli process x ∈ {0, 1}. The accuracy of orthogonal Vanilla +RNNs falls to chance level for d ≥ 40 (Fig 14C). With gradient flossing before training, the trainability can +be improved, but still falls close to chance level for d = 70. In contrast, for initially orthogonal networks with +gradient flossing during training, the accuracy is improved to > 80% at d = 70. In this case, we preflossed for +500 epochs before task training and again after 500 epochs of training on the task. In Fig 14B, D the networks +have to perform the nonlinear XOR operation yt = x1t−d ⊕ x2t−d ⊕ x3t−d on a three-dimensional binary input +signal x1 , x2 , and x3 and generate the correct output with a delay of d steps identical to Fig 4 in the main text. +Again, we observe similar to networks with Gaussian initialization that flossing before training improves the +performance compared to baseline, but starts failing for long delays d > 60. In contrast, orthogonal networks +that are also flossed during training can solve even more difficult tasks (Fig 14D). We note that for Fig 14B and +D, we trained the network only on 5000 epochs, compared to 10000 epochs in networks with random Gaussian +initialization because for 10000 epochs, both networks with gradient flossing only before training and with +gradient flossing before and during training were able to bridge d = 70. These results suggest that orthogonal +initialization does seem to slightly improve performance for tasks with long time horizons to bridge and gradient +flossing and additionally boost the performance. Thus orthogonal initialization and gradient flossing seems to go +well together. It would be interesting to study if orthogonal initialization also reduces the number of gradient +flossing steps necessary to improve performance. + + + +H Additional Details on Training Tasks + +In this section, we provide a more rigorous definition of the tasks used for training, as discussed in Section 3: + + + 22 + A 100 + + + accuracy (%) + with flossing throughout training + early training flossing + without flossing + 50 + 0 500 1000 1500 2000 2500 3000 + B + 100 + test error + + + + + 10 1 + 0 500 1000 1500 2000 2500 3000 + C + 10 3 with flossing throughout training + early training flossing + without flossing + |dhd 0 | + + + + + 10 7 + + 0 500 1000 1500 2000 2500 3000 + Epochs +Figure 12: Gradient flossing throughout training can be detrimental to learning A) Accuracy as a +function of training epochs for binary temporal delayed XOR task for gradient flossing throughout +training every 100 training epochs (red). Accuracy drops down close to chance level every time after +gradient flossing but recovers quickly between. Same for only 5 episodes of gradient flossing at +epochs e ∈ {0, 100, 200, 300, 400}) (green) and no flossing at all (blue). B) Test error as a function + dL +of training epochs. C) Gradient norm of | dh 0 + | as a function of training epochs for networks without +gradient flossing (blue) and networks with flossing throughout training (red) and early training +gradient flossing (green). Error gradient norm is boosted after each gradient flossing. In networks + dL +without gradient flossing, the gradient norms | dh 0 + | are much smaller overall. Parameters: g = 1, +batch size b = 16, N = 80, epochs = 104 , T = 300, gradient flossing for Ef = 500 epochs on +k = 75 Lyapunov exponents before training. Task: binary delayed XOR, delay d = 70, loss: cross +entropy(y, ŷ). + + +H.1 Copy task +For the copy task, the target network readout at time t is yt = xt−d , where d denotes the delay. We chose the +input to be sampled i.i.d. from a uniform distribution between 0 and 1. + +H.2 Temporal XOR task +The temporal XOR task requires the target network readout yt at time t to be computed as follows: + yt = |xt−d/2 − xt−d | (20) + 2 +where again d denotes a time delay of d time steps. In the case of x ∈ {0, 1} and y ∈ {0, 1}, the output yt +follows the truth table of the XOR digital logic gate (Table 2). Thus, the function f (xa , xb ) = |xa − xb | can be +seen as an analytical representation of the XOR gate. It is important to note that f (x, 0) = x only for x ≥ 0, +and that this task requires a nonlinearity. The implementation can easily be constructed analytically, for example, +using two rectified linear units ϕ(x) = max(x, 0) the outbut can be constructed by + f (xa , xb ) = |xa − xb | = ϕ(xa − xb ) + ϕ(xb − xa ). (21) +Together with a delay line to transmit the signal xt−d over time, this can solve the task. + + + 23 + A no gradient flossing B gradient flossing during training + 0.1 + 1 + 0.1 + + + + + 1 + 0.2 + 0.2 + 20 40 60 20 40 60 + complexity (delay d) complexity (delay d) + C D + 0.1 + 0.2 0.1 + 10 + + + + + 10 + 0.3 0.2 + 20 40 60 20 40 60 + complexity (delay d) complexity (delay d) +Figure 13: Lyapunov exponents of trained networks with and without gradient flossing A) First +Lyapunov exponents λ1 for Vanilla networks trained on spatial delayed XOR task as a function of +the delay with no gradient flossing. Colored-coded is test accuracy at the end of training where red +corresponds to 100% accuracy and blue to chance level (50%). B) Same as A for networks with +gradient flossing during training. Black dashed line shows that Lyapunov exponents of successfully +trained networks can be approximated by the empirical fit λ1 (d) = −0.2exp.(−0.03delay). (Proto- +col for gradient flossing during training same as main text Fig 4B). C) Same as A for tenth Lyapunov +exponents λ10 . D) Same as B for tenth Lyapunov exponents λ10 . Same fit as in B also describes +λ10 . Parameters: g = 1, batch size b = 16, N = 80, epochs = 104 , T = 300, gradient flossing for +Ef = 500 epochs on k = 75 Lyapunov exponents before training. Task: binary spatial delayed XOR, +loss: cross entropy(y, ŷ). + + +I Additional Background on Lyapunov Exponents of RNNs + +An autonomous dynamical system is usually defined by a set of ordinary differential equations dh/dt = +F(h), h ∈ RN in the case of continuous-time dynamics, or as a map hs+1 = f (hs ) in the case of discrete-time +dynamics. In the following, the theory is presented for discrete-time dynamical systems for ease of notation, +but everything directly extends to continuous-time systems [43]. Together with an initial condition h0 , the +map forms a trajectory. As a natural extension of linear stability analysis, one can ask how an infinitesimal +perturbation h′0 = h0 + ϵu0 evolves in time. Chaotic systems are sensitive to initial conditions; almost all +infinitesimal perturbations ϵu0 of the initial condition grow exponentially with time |ϵut | ≈ exp(λ1 t)|ϵu0 |. +Finite-size perturbations, therefore, may lead to a drastically different subsequent behavior. The largest Lyapunov +exponent λ1 measures the average rate of exponential divergence or convergence of nearby initial conditions: + + + 1 ||ϵut || + λ1 (h0 ) = lim lim log (22) + t→∞ t ϵ→0 ||ϵu0 || +In dynamical systems that are ergodic on the attractor, the Lyapunov exponents do not depend on the initial +conditions as long as the initial conditions are in the basins of attraction of the attractor. Note that it is crucial +to first take the limit ϵ → 0 and then t → ∞, as λ1 (h0 ) would be trivially zero for a bounded attractor if the + ||ϵut || +limits are exchanged, as limt→∞ log ||ϵu 0 || + is bounded for finite perturbations even if the system is chaotic. To +measure k Lyapunov exponents, one has to study the evolution of k independent infinitesimal perturbations us +spanning the tangent space: + + + us+1 = Ds us (23) + + + 24 + forward pass backward pass + O N2 b +  + RNN dynamics " + O N2 k +  + Jacobian step " + O N k2 +  + QR step " + O N2 b T +  + total amortized costs " + per training epoch + O N 2 Tf (1 + k/tONS + k) +  + total amortized costs " + per gradient flossing epoch + O N 2 [Eb T + Ep Tf (1 + k/tONS + k)] +  + total amortized costs " + of preflossing + O N 2 [Eb T + Ep Tp + Ef Tf (1 + k/tONS + k)] +  + total amortized costs " + flossing during training + +Table 1: Computational cost for gradient flossing and training of RNNs +N denotes number of neurons, b is the batch size, T is the number of time steps in forward pass of +training, Tf is the number of time steps in forward pass of flossing, tONS is the reorthonormalization +interval, k is the number of flossed Lyapunov exponents, E is the number of training epochs, Ep is +the number of preflossing epochs, Ef is the number of flossing epochs during training. Empirically, +we find that the necessary number of preflossing epochs Ep and flossing episodes Ef is much smaller +than both the total number of training epochs E. Moreover, Tp can be smaller than T . + + Table 2: XOR + input xt−d input xt−2d target output yt + 0 0 0 + 0 1 1 + 1 0 1 + 1 1 0 + + +where the N × N Jacobian Ds (hs ) = df (hs )/dh characterizes the evolution of generic infinitesimal perturba- +tions during one step. Note that this Jacobian along the trajectory is equivalent to a stability matrix only at a +fixed point, i.e., when hs+1 = f (hs ) = hs . +We are interested in the asymptotic behavior, and therefore we study the long-term Jacobian + + Tt (h0 ) = Dt−1 (ht−1 ) . . . D1 (h1 )D0 (h0 ). (24) +Note that Tt (h0 ) is a product of generally noncommuting matrices. The Lyapunov exponents λ1 ≥ λ2 · · · ≥ λN +are defined as the logarithms of the eigenvalues of the Oseledets matrix + 1 + Λ(h0 ) = lim [Tt (h0 )⊤ Tt (h0 )] 2t , (25) + t→∞ + +where ⊤ denotes the transpose operation. The expression inside the brackets is the Gram matrix of the long-term +Jacobian Tt (h0 ). Geometrically, the determinant of the Gram matrix is the squared volume of the parallelotope +spanned by the columns of Tt (h0 ). Thus, the exponential volume growth rate is given by the sum of the +logarithms of its first k (sorted) eigenvalues. Oseledets’ multiplicative ergodic theorem guarantees the existence +of the Oseledets matrix Λ(h0 ) for almost all initial conditions h0 [42]. In ergodic systems, the Lyapunov +exponents λi do not depend on the initial condition h0 . However, for a numerical calculation of the Lyapunov +spectrum, Eq 25 cannot be used directly because the long-term Jacobian Tt (h0 ) quickly becomes ill-conditioned, +i.e., the ratio between its largest and smallest singular value diverges exponentially with time. + + +J Algorithm for Calculating Lyapunov Spectrum of Rate Networks +For calculating the first k Lyapunov exponents, we exploit the fact that the growth rate of a k-dimensional +infinitesimal volume element is given by λ(m) = m (1) + , λ2 = λ(2) − λ1 , λ3 = + P + i=1 λi . Therefore, λ1 = λ + (3) +λ − λ1 − λ2 , . . . [44]. The volume growth rates can be obtained via QR-decomposition. + + + 25 + orthogonal weight initialization + A delayed temporal XOR B delayed spatial XOR + 100 100 +test accuracy (%) flossing during training + + + + + test accuracy (%) + preflossing + 80 no flossing 80 + + 60 60 + + 0 2000 4000 6000 8000 10000 0 1000 2000 3000 4000 5000 + Epochs Epochs + C D + 100 100 +test accuracy (%) + + + + + test accuracy (%) + 80 80 flossing during training + preflossing + no flossing + 60 60 + + 10 20 30 40 50 60 70 10 20 30 40 50 60 70 + complexity (delay d) complexity (delay d) + +Figure 14: Gradient flossing before and during training improves trainability for orthogonal +nets +A) Test accuracy for orthogonally initialized vanilla RNNs trained on delayed temporal binary XOR +task yt = xt−d/2 ⊕ xt−d with gradient flossing during training (green), preflossing (orange), and +with no gradient flossing (blue) for d = 70. Solid lines are mean, transparent thin lines are individual +network realizations B) Same as A for delayed spatial XOR task with yt = x1t−d ⊕ x2t−d ⊕ x3t−d . +C) Test accuracy as a function of task difficulty (delay d) for delayed temporal XOR task. D) Test +accuracy as a function of task difficulty (delay d) for delayed spatial XOR task. Parameters: g = 1, +batch size b = 16, N = 80, epochs = 104 for delayed temporal XOR, epochs = 5000 for delayed +spatial XOR, T = 300, flossing for Ef = 500 epochs on k = 75 Lyapunov exponents before training +and during training for green lines, and only before training for orange lines. Shaded areas are 25% +and 75% percentiles, solid lines are means, transparent dots are individual simulations, task loss is +cross-entropy between y, ŷ. + + +First, we evolve an initially orthonormal system Qs = [q1s , q2s , . . . qm + s ] in the tangent space along the trajectory +using the Jacobian Ds : + Qe s+1 = Ds Qs (26) +A continuous system can be transformed into a discrete system by considering a stroboscopic representation, +where the trajectory is only considered at certain discrete time points. We use here the notation of discrete +dynamical systems where this corresponds to performing the product of Jacobians along the trajectory Q + e s+1 = +Ds Qs . We study the discrete network dynamics in the limit of small time step ∆t → 0 and for discrete time +∆t = 1. The notation can be readily extended to continuous systems [43]. +Second, we extract the exponential growth rates using the QR-decomposition, + e s+1 = Qs+1 Rs+1 , + Q + +which uniquely decomposes Q e s+1 into an orthonormal matrix Qs+1 of size N × k so Q⊤ s+1 Qs+1 = 1m×m +and to an upper triangular matrix Rs+1 of size k × k with positive diagonal elements. Geometrically, Qs+1 +describes the rotation of Qs caused by Ds and the diagonal entries of Rs+1 describe the stretching and shrinking +of the columns of Qs , while the off-diagonal elements represent the shearing. Fig 15 visualizes Ds and the +QR-decomposition for k = 2. +The Lyapunov exponents are given by time-averaged logarithms of the diagonal elements of Rs : + t t + 1 Y 1X + λi = lim log Rsii = lim log Rsii . (27) + t→∞ t t→∞ t + s=1 s=1 + + + + 26 + Ds + QR + + s+1 + R22 +qs2 ~2 s+1 + 2 + qs+1 + qs+1 R11 ~1 + qs+1 + s+1 + qs1 R11 1 + qs+1 + s+1 + R22 + +Figure 15: Geometric illustration of Lyapunov spectrum calculation. An orthonormal matrix +Qs = [q1s , q2s , . . . qms ], whose columns are the axes of an k-dimensional cube, is rotated and distorted +by the Jacobian Ds into an k-dimensional parallelotope Q e s+1 = Ds Qs embedded in RN . The +figure illustrates this for k = 2, in which case the columns of Q e s+1 span a parallelogram, which +can be divided into a right triangle and a trapezoid and rearranged into a rectangle. Thus, the +area of the gray parallelogram is the same as that of the orange rectangle. The QR-decomposition +reorthonormalizes Q e s+1 by decomposing it into the product of an orthonormal matrix Qs+1 = +[q1s+1 , q2s+1 , . . . qms+1 ] and the upper-triangular matrix R + s+1 + . Qs+1 describes the rotation of Qs + s+1 +caused by Ds . The diagonal entries of R gives the stretching/shrinking along the columns of +Qs+1 ,Qthus the volume of the parallelotope formed by the first k columns of Q e s+1 is given by + m +Vm = i=1 Rs+1 ii . The time-averaged logarithms of the diagonal elements of R s + give the Lyapunov + 1 + Qt s 1 + Pt s +spectrum: λi = limtsim →∞ tsim log s=1 Rii = limtsim →∞ t s=1 log Rii . + + +Note that the QR-decomposition does not need to be performed at every simulation step, just sufficiently often, +i.e., once every sONS steps such that Q ONS = Ds+sONS −1 · Ds+sONS −2 . . . Ds · Qs remains well-conditioned + e s+s +[44]. An appropriate reorthonormalization interval sONS = tONS /∆t thus depends on the condition number, the +ratio of the smallest and largest singular value: + s+s + e s+s ) = κ2 (Rs+sONS ) = σ1 (Rs+sONS ) R ONS + κ2 ( Q = 11 . (28) + Rs+s + ONS s+s + σm (R ONS ) mm + ONS + +An initial transient should be disregarded in the calculation of the Lyapunov spectrum because h first has to +converge towards the attractor and Q has to converge to the unique eigenvectors of the Oseledets matrix (Eq 25) +[54]. A simple example of this algorithm in pseudocode is: + +Algorithm 2 Jacobian-based algorithm for Lyapunov spectrum + initialize h, Q + evolve h until it is on attractor (avoid initial transient) + evolve Q until it converges to the eigenvectors of the backward Oseledets matrix + set γi = 0 + for t = 1 → T do + h ← f (h) + df + D ← dh + Q←D·Q + if s ≡ 0 (mod sONS ) then + Q, R ← qr(Q) + γi += log(Rii ) + end if + end for + λi = γi /T + + +It is guaranteed that under general conditions initially random orthonormal systems will exponentially converge +towards a unique basis that is given by the eigenvectors of the Oseledets matrix Eq 25 [54]. A minimal example +of this algorithm in pseudocode is shown in appendix 3. A feasible strategy to determine the reorthonormalization +time interval tONS is to get first a rough estimate of the Lyapunov spectrum using a short simulation time tsim and +a small tONS and repeat with a longer simulation time and a tONS based on the Lyapunov spectrum of the rough +estimate of the Lyapunov spectrum. Another strategy is, to first iteratively adapt tONS on a short simulation +run to get an acceptable condition number. It should be noted that there exists a diversity of other methods to +estimate the Lyapunov spectrum [14, 43, 68, 69]. + + + 27 + K Convergence of Lyapunov Exponents of RNNs +In Fig. 16, we demonstrate the convergence of the Lyapunov exponents. We show the estimate of the Lyapunov +exponents λi for i = 1, 20, 60, 80 for different initial conditions but identical network realization. + + 0 + 10 +i(1/steps) + + + + + 20 + 30 + 40 + 100 101 102 + steps + + +Figure 16: Convergence of Lyapunov exponents Convergence of selected Lyapunov exponents +λi for ten identical network realizations with different initial conditions with simulation time (i = +1, 20, 60, 80) for σ = 1 and g = 1. (Other parameters: N = 80, tsim = 100 steps, tONS = 1). + + + + + 28 + \ No newline at end of file diff --git a/papers/txt/gram2025_generative_recursive.txt b/papers/txt/gram2025_generative_recursive.txt new file mode 100644 index 0000000..d5f299d --- /dev/null +++ b/papers/txt/gram2025_generative_recursive.txt @@ -0,0 +1,1532 @@ + Generative Recursive Reasoning + + + Junyeob Baek1†∗ Mingyu Jo1†∗ Minsu Kim1,2 + + Mengye Ren3 Yoshua Bengio2,4 Sungjin Ahn1,3† +arXiv:2605.19376v2 [cs.AI] 20 May 2026 + + + + + 1 + KAIST 2 Mila – Québec AI Institute + 3 + New York University 4 Université de Montréal + + + + Abstract + How should future neural reasoning systems implement extended computation? + Recursive Reasoning Models (RRMs) offer a promising alternative to autoregres- + sive sequence extension by performing iterative latent-state refinement with shared + transition functions. Yet existing RRMs are largely deterministic, following a + single latent trajectory and converging to a single prediction. We introduce Gen- + erative Recursive reAsoning Models (GRAM), a framework that turns recursive + latent reasoning into probabilistic multi-trajectory computation. GRAM models + reasoning as a stochastic latent trajectory, enabling multiple hypotheses, alterna- + tive solution strategies, and inference-time scaling through both recursive depth + and parallel trajectory sampling. This yields a latent-variable generative model + supporting conditional reasoning via pθ (y | x) and, with fixed or absent inputs, + unconditional generation via pθ (x). Trained with amortized variational inference, + GRAM improves over deterministic recurrent and recursive baselines on structured + reasoning and multi-solution constraint satisfaction tasks, while demonstrating an + unconditional generation capability. https://ahn-ml.github.io/gram-website + + + 1 Introduction + A central question for future neural reasoning systems is how extended computation should be imple- + mented. Large autoregressive models typically scale reasoning by extending a sequence-generation + process, whether intermediate computation is expressed explicitly as chain-of-thought tokens or im- + plicitly in hidden or latent representations [1–6]. A complementary direction is explored by Recursive + Reasoning Models (RRMs), which use repeated computation to refine a persistent latent state rather + than to append new elements to an output or reasoning sequence [7–9]. This approach is appealing + because it decouples reasoning depth from both parameter scale and output length: a compact model + can perform many steps of internal computation by repeatedly applying shared transition functions + over time. + Recent recursive reasoning models such as HRM [8] and TRM [9] provide early evidence for the + potential of this approach in structured reasoning. Rather than producing a solution in a single + feedforward pass, they perform extended computation through iterative latent-state refinement, deep + supervision across refinement steps, and reasoning-oriented recurrent designs such as hierarchical + latent dynamics. These features make them well suited to problems requiring constraint propagation, + state tracking, iterative correction, and multi-step inference. More broadly, they build on a principle + ∗ Equal contribution + † Correspondence to: Junyeob Baek (wnsdlqjtm@kaist.ac.kr), Mingyu Jo (mingyu.jo@kaist.ac.kr), Sungjin Ahn + + (sungjin.ahn@kaist.ac.kr) + + + Preprint. + Solution 1, + + + Input Task, + + + + Solution 2, + + + + + (a) Deterministic RRMs (b) GRAM (Ours) + +Figure 1: Comparison of Latent Reasoning Trajectories. Left: N-Queens Example with two valid solutions. +Right: Given three independent runs for latent reasoning (τ1 , τ2 , τ3 ): (a) Prior RRMs (e.g. HRM, TRM) are +deterministic—all runs collapse to an identical trajectory, converging to a single solution and failing to explore +alternatives, while (b) GRAM explores diverse trajectories, producing diverse trajectories that reach multiple +valid solutions y1 and y2 , while naturally enabling parallel inference-time scaling. + +also explored in recurrent Transformer architectures such as Universal Transformers [10] and Looped +Transformers [7]: shared Transformer blocks can be repeatedly applied to increase computational +depth without increasing parameter count. Together, these models suggest that reasoning capability +can emerge not only from scaling model size or generating longer traces, but also from the organization +of computation itself. +While recurrent latent-state refinement provides an appealing mechanism for efficiently increasing +reasoning depth, depth alone is not sufficient for many reasoning problems. A capable reasoning +system should also be able to maintain uncertainty, consider alternative hypotheses, and explore +multiple possible solution strategies [11, 12]. This is especially important in settings where ambiguity +or multiple valid solutions are intrinsic, and more generally in problems where a single refinement +path may become trapped in a suboptimal reasoning trajectory. In this sense, future RRMs should be +not only deep, in the sense of repeated refinement, but also wide, in the sense of maintaining and +exploring multiple latent trajectories in parallel. +Existing RRMs [7–10], however, remain fundamentally deterministic: given the same input and +initialization, they follow a single latent trajectory and converge to a single prediction. This deter- +ministic recursion collapses the space of plausible reasoning paths into a single attractor, leaving +probabilistic multi-hypothesis latent reasoning largely unexplored within the RRM paradigm. This +motivates the central question of our work: can recursive latent computation support probabilistic, +generative, multi-hypothesis reasoning while preserving the efficiency of compact recurrent models? +In this paper, we propose Generative Recursive reAsoning Models (GRAM), a framework that turns +recursive latent reasoning into probabilistic multi-trajectory computation. GRAM treats the reasoning +process itself as a stochastic latent trajectory: at each recursion step, the model samples a transition +conditioned on the input and the current reasoning state, rather than deterministically updating to a +single next state. Repeating this process defines a distribution over possible reasoning trajectories, +allowing the model to maintain multiple hypotheses, explore alternative solution strategies, and scale +inference not only by increasing recursive depth but also by sampling trajectories in parallel. From +a probabilistic perspective, GRAM is a latent-variable generative model: it models pθ (y | x) by +marginalizing over latent reasoning trajectories, while the same recursive process can also define an +unconditional generative model pθ (x) when the input is fixed or absent. +We evaluate GRAM on controlled reasoning and generation tasks that serve as probes of the ar- +chitectural properties targeted by our formulation: recursive refinement, stochastic exploration, +multi-solution coverage, and inference-time scaling. Given this goal, our experiments focus on +comparisons with the most relevant deterministic recurrent and recursive latent reasoning baselines, +including Looped Transformers, HRM, and TRM, rather than frontier-scale general-purpose LLMs +whose training data, inference budgets, and external scaffolding are not directly comparable. Sudoku- +Extreme [8] and ARC-AGI [13, 14] test structured reasoning under hard constraints and abstract +transformations; N-Queens and Graph Coloring evaluate multi-solution recovery; and binarized +MNIST [15] probes the unconditional generative interpretation. +Our main contribution is to establish probabilistic multi-trajectory recursion as a design principle +for future recurrent and recursive reasoning architectures. Concretely, we make three contributions. + + + 2 + First, we formulate recursive reasoning as a latent-variable generative process, where solutions are +obtained by marginalizing over stochastic reasoning trajectories. Second, we introduce width-based +inference-time scaling, enabling inference to scale not only with recursive depth but also with the +number of sampled latent trajectories. Third, we provide empirical evidence that this formulation +yields the intended architectural advantages over deterministic recurrent and recursive baselines, +improving structured reasoning, multi-solution constraint satisfaction, and unconditional generation. + +2 Generative Recursive Reasoning Models +In this section, we introduce Generative Recursive reAsoning Models (GRAM), an instantiation +of probabilistic recursive reasoning. We describe the architecture in Section 2.1 and the training +procedure in Section 2.2, with an architecture schematic shown in Figure 2. + +2.1 Architecture + +Overview. GRAM models the conditional distri- 𝑦 (A) CE loss +bution pθ (y | x) by marginalizing over stochas- + Prior (B) KL Div. Posterior Decoder +tic latent reasoning trajectories. Given an input 𝑝𝜃 (⋅ |𝑢𝑡 ) 𝑞𝜙 (⋅ |𝑢𝑡 , 𝑦) ~ 𝜖𝑡 𝑓dec +x, GRAM first computes an embedding + ℎ𝑡−1 𝑓𝐻 𝑢𝑡 ℎ𝑡 + ex = fenc (x; θ), (1) + 𝐾 times +which is reused throughout the entire recursive 𝑙𝑡−1 𝑓𝐿 𝑓𝐿 𝑙𝑡 +computation. Starting from a fixed initial la- + Encoder +tent state z0 , the model evolves the latent state 𝑥 + 𝑓enc +through learned stochastic transitions. The re- +cursive computation is organized into two nested Figure 2: GRAM Architecture. A single stochastic +levels: inner and outer loops. latent transition in the hierarchical instantiation z = +At the inner level, a latent transition samples (h, l). After K low-level refinements via fL , the high- + level update fH produces a deterministic proposal ut , to +a new latent state conditioned on the previous which stochastic guidance ϵt is added: ht = ut + ϵt . +latent state and the input embedding, + zt ∼ pθ (zt | zt−1 , ex ), t = 1, . . . , T. (2) +At the end of the T transitions, the decoder produces a prediction, ŷ = arg max fdec (zT ; θ). We refer +to the sequence of T transitions from the initial state z0 to the final state zT as a supervision step. A +supervision step is the unit at which the decoder is invoked, and the training objective is applied, with +gradients computed as described in Section 2.2. +At the outer level, Nsup supervision steps are applied recursively, with the final state of one supervision +step serving as the initial state of the next, thereby forming the full recursive computation: + (1) T transitions (1) (2) T transitions T transitions (N ) + z0 −−−−−−−→ zT = z0 −−−−−−−→ · · · −−−−−−−→ zT sup , (3) + (n) (1) +where zt denotes the latent state at the t-th transition of the n-th supervision step, z0 is the +fixed initial state, and the terminal state of one supervision step serves as the initial state of the next + (n+1) (n) +(z0 := zT ). This abstract formulation can be instantiated with various recurrent Transformer +backbones, including flat designs such as Universal Transformers and Looped Transformers [10, 7], +as well as hierarchical designs such as HRM and TRM [8, 9]. +Stochastic Latent Transitions. Unlike prior recursive reasoning models (RRMs) that update the +latent state deterministically and follow a single fixed trajectory [8, 9], GRAM defines pθ (zt | +zt−1 , ex ) as a stochastic transition, so that repeated computation induces a distribution over latent +reasoning trajectories. Concretely, GRAM realizes this transition as a learned stochastic residual +perturbation around a deterministic update: at each transition, the model first computes a deterministic +update ut from zt−1 and ex , then samples a conditional perturbation from a state-dependent Gaussian, +and adds it to ut : + ϵt ∼ pθ (ϵt | ut ) := N µθ (ut ), σθ2 (ut )I , +  + (4) + zt = ut + ϵt . (5) + + + 3 + We refer to ϵt as the learnable stochastic guidance. The mean µθ (ut ) encodes a state-dependent +direction in which the trajectory is steered, while the variance σθ2 (ut ) controls the amount of ex- +ploration. This design allows GRAM to capture uncertainty, prevent convergence to local minima, +and support robust exploration of the solution space without discarding the deterministic refinement +performed by ut . +Hierarchical Instantiation. We instantiate the latent state with two interacting components, z = +(h, l). The high-level component h is updated once per latent transition and carries abstract reasoning +state, while the low-level component l is updated K times within a single transition and carries +fine-grained intermediate computation. This decomposition separates the two roles across time scales, +with h accumulating slowly across transitions and l refined rapidly within each one. +With this hierarchical multi-scale structure, a single transition zt−1 → zt is computed as follows. +The low-level component is first refined for K updates, with the high-level component held fixed: + lt,k = fL (ht−1 , lt,k−1 , ex ; θ), k = 1, . . . , K, (6) +where lt,0 := lt−1 and we write lt := lt,K for the refined low-level component. The high-level +component is then updated as a stochastic transition conditioned on the refined lt , + ut = fH (ht−1 , lt ; θ), (7) + ϵt ∼ pθ (ϵt | ut ) := N µθ (ut ), σθ2 (ut ) I +  + , (8) + ht = ut + ϵt , (9) +and we set zt = (ht , lt ). Note that stochasticity is introduced only at the high level: the low- +level refinement is fully deterministic, while the stochastic guidance signal ϵt acts on the slower, +more abstract component of the latent state, where it can steer the overall reasoning trajectory +across transitions3 . Under this instantiation, the decoder reads only the high-level component, i.e., +fdec (zT ) = fdec (hT ). Additional architectural details are provided in Appendix B.1. +Modeling Unconditional Distribution. While the description so far focuses on the conditional +setting pθ (y | x), the same recursive process can also be defined as an unconditional generative model +pθ (x) when the input is replaced with an empty conditioning embedding. We use this formulation for +generation tasks in Section 4.3. + +2.2 Training + +GRAM is trained to model the conditional distribution pθ (y | x), where each training example +consists of an input x and its corresponding target y. As a probabilistic model, GRAM adopts a +latent-variable formulation and is optimized by maximizing an evidence lower bound (ELBO) with +respect to the generative parameters θ and variational parameters ϕ. +Latent Variable Modeling. We model GRAM as a latent-variable probabilistic model pθ , where +the full latent trajectory τ = (z0 → · · · → zTTotal ) consists of a sequence of latent variables, with +TTotal = T × Nsup . The conditional likelihood is defined as + Z + pθ (y | x) = pθ (y | τ, x) pθ (τ | x) dτ, (10) + +where x denotes the input problem and y denotes the corresponding ground-truth output. +Direct maximum likelihood estimation of log pθ (y | x) is intractable due to the marginalization +over latent trajectories. We therefore introduce a variational posterior qϕ (τ | x, y) and optimize the +evidence lower bound (ELBO), jointly training θ and ϕ via variational inference: + log pθ (y | x) ≥ Eqϕ (τ |x,y) [log pθ (y | τ, x)] − KL(qϕ (τ | x, y) ∥ pθ (τ | x)) . (11) + +During training, latent trajectories are sampled from the variational posterior qϕ (· | x, y), which has +access to both the input problem x and the target output y. At inference time, where y is unavailable, +trajectories are instead generated from the learned prior pθ (· | x). + 3We also tried injecting noise into the low-level state, but found that it did not improve performance. + + + + + 4 + Both the prior and the posterior are modeled as conditional Markov processes over latent states: + TY + Total TY + Total + + pθ (τ | x) = p(z0 ) pθ (zt | zt−1 , x), qϕ (τ | x, y) = p(z0 ) qϕ (zt | zt−1 , x, y). (12) + t=1 t=1 +Here, z0 is a fixed initial state shared by the prior and posterior. Both transitions are implemented by +adding reparameterized Gaussian noise ϵt after a deterministic update ut ; the posterior uses the same +transition module as the prior, but samples from a target-conditioned noise distribution qϕ (ϵt | ut , y), +whereas the prior uses pθ (ϵt | ut ). +Since the two processes share the same Markov structure and all stochasticity is introduced through +ϵ1:TTotal , their trajectory distributions can be equivalently represented in noise space. Moreover, +since GRAM decodes the output only from the terminal latent state, the likelihood term satisfies +pθ (y | τ, x) = pθ (y | zTTotal , x). Therefore, the full trajectory-level ELBO can be written as +   TX Total h i + LELBO = Eqϕ log pθ (y | zTTotal , x) − Eqϕ (ϵ