summaryrefslogtreecommitdiff
path: root/research/flossing/run_SRM7M_after_HRM256.sh
diff options
context:
space:
mode:
Diffstat (limited to 'research/flossing/run_SRM7M_after_HRM256.sh')
-rwxr-xr-xresearch/flossing/run_SRM7M_after_HRM256.sh31
1 files changed, 31 insertions, 0 deletions
diff --git a/research/flossing/run_SRM7M_after_HRM256.sh b/research/flossing/run_SRM7M_after_HRM256.sh
new file mode 100755
index 0000000..1f79ea7
--- /dev/null
+++ b/research/flossing/run_SRM7M_after_HRM256.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Wait for HRM256 baseline to finish, then run SRM v1 scaled to 7M (TRM-equiv size).
+set -e
+cd /home/yurenh2/rrm/srm
+source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
+conda activate rrm
+export CUDA_VISIBLE_DEVICES=2
+
+HRM256_RUNNER_PID=$1
+LOG_DIR=/home/yurenh2/rrm/research/flossing
+echo "[$(date '+%H:%M:%S')] SRM7M-runner waiting for HRM256-runner PID $HRM256_RUNNER_PID..." >> $LOG_DIR/step4_runner.log
+while kill -0 "$HRM256_RUNNER_PID" 2>/dev/null; do
+ sleep 30
+done
+echo "[$(date '+%H:%M:%S')] HRM256 done. Starting SRM v1 scaled to 7M (n_aol_layers=5, hidden=512)" >> $LOG_DIR/step4_runner.log
+
+mkdir -p runs ckpts
+
+# SRM v1 at TRM-equivalent param count (~7M)
+python scripts/train_srm.py \
+ --n-steps 3000 --batch-size 8 \
+ --hidden-size 512 --n-iters 12 --n-aol-layers 5 \
+ --kappa 0.9 --eta 1.0 --alpha 1.0 \
+ --warmup-steps 200 \
+ --k-lyap 2 --lyap-iters 8 --lyap-every 50 \
+ --eval-every 300 --eval-n 512 --eval-batch-size 32 \
+ --out runs/srm_v1_7M_sudoku1k_3k.json \
+ --save-ckpt ckpts/srm_v1_7M_3k.pt \
+ > $LOG_DIR/srm_v1_7M_run.log 2>&1
+
+echo "[$(date '+%H:%M:%S')] SRM v1 7M training complete" >> $LOG_DIR/step4_runner.log