From 66e0d8b9fd4d0f7a2231d689c055e26fdf1cf04a Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Sat, 13 Jun 2026 12:35:36 -0500 Subject: rrm workspace: TRM/HRM/SRM code, Maze dataset, dynamical-analysis pipeline Curated export for clone-and-run Maze training (2x A6000) + diagnostics. trm/hrm pretrain.py carry trajectory-augmentation code (backward-compatible). Heavy artifacts (checkpoints/wandb/npz) gitignored; see PROVENANCE.md. Co-Authored-By: Claude Fable 5 --- .../flossing_suite/launch_trm_variant_suite.sh | 87 ++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100755 research/flossing/flossing_suite/launch_trm_variant_suite.sh (limited to 'research/flossing/flossing_suite/launch_trm_variant_suite.sh') diff --git a/research/flossing/flossing_suite/launch_trm_variant_suite.sh b/research/flossing/flossing_suite/launch_trm_variant_suite.sh new file mode 100755 index 0000000..86c2e64 --- /dev/null +++ b/research/flossing/flossing_suite/launch_trm_variant_suite.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="/home/yurenh2/rrm" +PY="/home/yurenh2/miniconda3/envs/rrm/bin/python" +OUT_DIR="${ROOT}/research/flossing/flossing_suite/results/trm_variants" +CKPT_ROOT="${ROOT}/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" +mkdir -p "${OUT_DIR}" + +GPU_TOP1="${GPU_TOP1:-0}" +GPU_VOLUME="${GPU_VOLUME:-1}" +GPU_KL="${GPU_KL:-3}" +TRAIN_STEPS="${TRAIN_STEPS:-20000}" +FLOSS_STEPS="${FLOSS_STEPS:-100}" +INTERFLOSS_EVERY="${INTERFLOSS_EVERY:-2000}" +INTERFLOSS_START="${INTERFLOSS_START:-2000}" +INTERFLOSS_STOP="${INTERFLOSS_STOP:-10000}" +EVAL_N="${EVAL_N:-1000}" +TASK_BATCH_SIZE="${TASK_BATCH_SIZE:-32}" +FLOSS_BATCH_SIZE="${FLOSS_BATCH_SIZE:-4}" + +write_and_launch() { + local gpu="$1" + local name="$2" + local floss_mode="$3" + local kl_beta="$4" + local cmd="${OUT_DIR}/${name}.cmd.sh" + local log="${OUT_DIR}/${name}.log" + local pid="${OUT_DIR}/${name}.pid" + + cat > "${cmd}" < "${log}" 2>&1 < /dev/null & + echo $! > "${pid}" + echo "${name}: pid $(cat "${pid}") on GPU ${gpu}" +} + +write_and_launch "${GPU_TOP1}" \ + "trm_seed123_top1_cf_periodic${INTERFLOSS_EVERY}_tb${TASK_BATCH_SIZE}_fb${FLOSS_BATCH_SIZE}_k4_${TRAIN_STEPS}" \ + top1_cf 0 +write_and_launch "${GPU_VOLUME}" \ + "trm_seed123_volume_cf_periodic${INTERFLOSS_EVERY}_tb${TASK_BATCH_SIZE}_fb${FLOSS_BATCH_SIZE}_k4_${TRAIN_STEPS}" \ + volume_cf 0 +write_and_launch "${GPU_KL}" \ + "trm_seed123_volume_cf_kl10_periodic${INTERFLOSS_EVERY}_tb${TASK_BATCH_SIZE}_fb${FLOSS_BATCH_SIZE}_k4_${TRAIN_STEPS}" \ + volume_cf 10 + +echo "queued TRM CF/volume variant suite in ${OUT_DIR}" -- cgit v1.2.3