From 66e0d8b9fd4d0f7a2231d689c055e26fdf1cf04a Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Sat, 13 Jun 2026 12:35:36 -0500 Subject: rrm workspace: TRM/HRM/SRM code, Maze dataset, dynamical-analysis pipeline Curated export for clone-and-run Maze training (2x A6000) + diagnostics. trm/hrm pretrain.py carry trajectory-augmentation code (backward-compatible). Heavy artifacts (checkpoints/wandb/npz) gitignored; see PROVENANCE.md. Co-Authored-By: Claude Fable 5 --- .../flossing/analysis_2x2/phase1/phase1_results.md | 48 ++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 research/flossing/analysis_2x2/phase1/phase1_results.md (limited to 'research/flossing/analysis_2x2/phase1') diff --git a/research/flossing/analysis_2x2/phase1/phase1_results.md b/research/flossing/analysis_2x2/phase1/phase1_results.md new file mode 100644 index 0000000..5dde25d --- /dev/null +++ b/research/flossing/analysis_2x2/phase1/phase1_results.md @@ -0,0 +1,48 @@ +# Phase-1 analysis (E5 horizon sweep, E6 matched-objective, E2 replication) + +## E5: when does fate become legible? AUC(early signal -> FINAL correct), restricted to not-yet-correct@H + +### TRM +| H | solved@H | undecided n | of which eventual✓ | AUC(-λ₁) | AUC(-drift) | AUC(q_halt) | +|---|---|---|---|---|---|---| +| 2 | 0.542 | 939 | 0.729 | 0.526 | 0.411 | 0.608 | +| 4 | 0.694 | 626 | 0.594 | 0.543 | 0.492 | 0.521 | +| 6 | 0.766 | 479 | 0.470 | 0.523 | 0.441 | 0.531 | +| 8 | 0.801 | 407 | 0.376 | 0.483 | 0.495 | 0.514 | +| 10 | 0.831 | 347 | 0.268 | 0.477 | 0.536 | 0.476 | +| 12 | 0.846 | 315 | 0.194 | 0.483 | 0.509 | 0.509 | + +### HRM +| H | solved@H | undecided n | of which eventual✓ | AUC(-λ₁) | AUC(-drift) | AUC(q_halt) | +|---|---|---|---|---|---|---| +| 2 | 0.071 | 1903 | 0.490 | 0.375 | 0.193 | 0.810 | +| 4 | 0.345 | 1342 | 0.276 | 0.448 | 0.312 | 0.734 | +| 6 | 0.436 | 1155 | 0.159 | 0.438 | 0.349 | 0.676 | +| 8 | 0.471 | 1083 | 0.103 | 0.432 | 0.371 | 0.705 | +| 10 | 0.489 | 1046 | 0.072 | 0.478 | 0.407 | 0.705 | +| 12 | 0.506 | 1012 | 0.041 | 0.503 | 0.348 | 0.635 | + +## E6: matched-objective intervention (step9 fixed-unroll runs, n=512) + +### HRM (E base vs F multi4) +| ckpt | base acc | base fD | base λ₁(D) | multi4 acc | multi4 fD | multi4 λ₁(D) | +|---|---|---|---|---|---|---| +| step_12500 | 0.613 | 0.387 | -0.0157 | 0.631 | 0.369 | -0.0215 | +| step_25000 | 0.615 | 0.385 | +0.0138 | 0.617 | 0.379 | +0.0101 | +| best | 0.619 | 0.381 | -0.0214 | 0.613 | 0.387 | -0.0181 | +| final | 0.588 | 0.410 | +0.0456 | 0.609 | 0.387 | +0.0335 | + +### TRM (G base vs H multi4) +| ckpt | base acc | base fD | base λ₁(D) | multi4 acc | multi4 fD | multi4 λ₁(D) | +|---|---|---|---|---|---|---| +| step_12500 | 0.553 | 0.344 | +0.0219 | 0.600 | 0.305 | +0.0284 | +| step_25000 | 0.525 | 0.396 | +0.0209 | 0.545 | 0.361 | +0.0222 | +| best | 0.596 | 0.334 | +0.0233 | 0.580 | 0.361 | +0.0366 | +| final | 0.477 | 0.312 | +0.0164 | 0.537 | 0.270 | +0.0249 | + +## E2: decomposition on a SECOND HRM training run (step9_E fixed-unroll, n=2048) + +| ckpt | acc | A | B | C | D | λ₁(A) | λ₁(D) | settled-wrong frac of failures | +|---|---|---|---|---|---|---|---|---| +| best | 0.637 | 1244 | 1 | 61 | 742 | -0.1685 | -0.0308 | 0.0013 | +| final | 0.594 | 1124 | 3 | 92 | 829 | -0.0144 | +0.0444 | 0.0036 | \ No newline at end of file -- cgit v1.2.3