blob: e901c30cbab82c27a391ae4ba4681877d5880139 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
#!/bin/bash
# Wait for H-runner (Engelken on converged HRM) to finish, then run HRM hidden=256 baseline
# as capacity-matched control for SRM v1 (hidden=256/512 plateau diagnosis).
set -e
cd /home/yurenh2/rrm/research/flossing
source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
conda activate rrm
export CUDA_VISIBLE_DEVICES=2
H_RUNNER_PID=$1
echo "[$(date '+%H:%M:%S')] HRM256-runner waiting for H-runner PID $H_RUNNER_PID..." >> step4_runner.log
while kill -0 "$H_RUNNER_PID" 2>/dev/null; do
sleep 30
done
echo "[$(date '+%H:%M:%S')] H done. Starting HRM hidden=256 baseline (capacity-matched control for SRM v1)" >> step4_runner.log
# HRM hidden=256 from scratch, 3000 steps, no CF (baseline for capacity diagnosis)
python step4_from_scratch.py \
--n-steps 3000 --batch-size 8 \
--hidden-size 256 --num-heads 4 \
--alpha-rf 0.0 \
--warmup-steps 200 \
--eval-every 300 --eval-n 512 --eval-batch-size 32 \
--out step4_HRM256_baseline_fromscratch.json \
> step4_HRM256.log 2>&1
echo "[$(date '+%H:%M:%S')] HRM hidden=256 baseline complete" >> step4_runner.log
|