summaryrefslogtreecommitdiff
path: root/research/flossing/run_HRM256_after_H.sh
blob: e901c30cbab82c27a391ae4ba4681877d5880139 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/bin/bash
# Wait for H-runner (Engelken on converged HRM) to finish, then run HRM hidden=256 baseline
# as capacity-matched control for SRM v1 (hidden=256/512 plateau diagnosis).
set -e
cd /home/yurenh2/rrm/research/flossing
source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
conda activate rrm
export CUDA_VISIBLE_DEVICES=2

H_RUNNER_PID=$1
echo "[$(date '+%H:%M:%S')] HRM256-runner waiting for H-runner PID $H_RUNNER_PID..." >> step4_runner.log
while kill -0 "$H_RUNNER_PID" 2>/dev/null; do
  sleep 30
done
echo "[$(date '+%H:%M:%S')] H done. Starting HRM hidden=256 baseline (capacity-matched control for SRM v1)" >> step4_runner.log

# HRM hidden=256 from scratch, 3000 steps, no CF (baseline for capacity diagnosis)
python step4_from_scratch.py \
  --n-steps 3000 --batch-size 8 \
  --hidden-size 256 --num-heads 4 \
  --alpha-rf 0.0 \
  --warmup-steps 200 \
  --eval-every 300 --eval-n 512 --eval-batch-size 32 \
  --out step4_HRM256_baseline_fromscratch.json \
  > step4_HRM256.log 2>&1

echo "[$(date '+%H:%M:%S')] HRM hidden=256 baseline complete" >> step4_runner.log