summaryrefslogtreecommitdiff
path: root/research/flossing/launch_trajectory_perturb_queue.sh
blob: ba681edaa5a5386799ca1fa2cb3d9edde8160cc7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/env bash
set -eo pipefail

MODEL="${1:-hrm}"
GPU="${2:-0}"

source /home/yurenh2/miniconda3/etc/profile.d/conda.sh
conda activate rrm
cd /home/yurenh2/rrm/research/flossing

HRM_ROOT="/home/yurenh2/rrm/hrm/checkpoints/Sudoku-extreme-1k-aug-1000 ACT-torch/HierarchicalReasoningModel_ACTV1 righteous-python"
TRM_ROOT="/home/yurenh2/rrm/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_singleGPU"

run_one() {
  local tag="$1"
  shift
  echo "[$(date -Is)] START ${tag}"
  CUDA_VISIBLE_DEVICES="${GPU}" python step9_trajectory_perturb_train.py "$@" \
    > "${tag}.log" 2>&1
  echo "[$(date -Is)] DONE ${tag}"
}

if [[ "${MODEL}" == "hrm" ]]; then
  run_one step9_A_hrm_single_perturb_sigma1e-3_26040_10k \
    --model hrm \
    --ckpt-root "${HRM_ROOT}" \
    --ckpt-name step_26040 \
    --mode single_perturbed_ce \
    --train-steps 10000 \
    --batch-size 8 \
    --lr 1e-5 \
    --noise-std 0.001 \
    --perturb both \
    --seed 42 \
    --eval-every 1000 \
    --eval-n 512 \
    --eval-batch-size 32 \
    --out step9_A_hrm_single_perturb_sigma1e-3_26040_10k.json

  run_one step9_B_hrm_multi4_perturb_sigma1e-3_26040_10k \
    --model hrm \
    --ckpt-root "${HRM_ROOT}" \
    --ckpt-name step_26040 \
    --mode multi_perturbed_ce \
    --n-trajectories 4 \
    --train-steps 10000 \
    --batch-size 8 \
    --lr 1e-5 \
    --noise-std 0.001 \
    --perturb both \
    --seed 42 \
    --eval-every 1000 \
    --eval-n 512 \
    --eval-batch-size 32 \
    --out step9_B_hrm_multi4_perturb_sigma1e-3_26040_10k.json
elif [[ "${MODEL}" == "trm" ]]; then
  run_one step9_C_trm_single_perturb_sigma1e-3_26041_batch4_10k \
    --model trm \
    --ckpt-root "${TRM_ROOT}" \
    --ckpt-name step_26041 \
    --mode single_perturbed_ce \
    --train-steps 10000 \
    --batch-size 4 \
    --lr 1e-5 \
    --noise-std 0.001 \
    --perturb both \
    --seed 42 \
    --eval-every 1000 \
    --eval-n 512 \
    --eval-batch-size 32 \
    --out step9_C_trm_single_perturb_sigma1e-3_26041_batch4_10k.json

  run_one step9_D_trm_multi4_perturb_sigma1e-3_26041_batch4_10k \
    --model trm \
    --ckpt-root "${TRM_ROOT}" \
    --ckpt-name step_26041 \
    --mode multi_perturbed_ce \
    --n-trajectories 4 \
    --train-steps 10000 \
    --batch-size 4 \
    --lr 1e-5 \
    --noise-std 0.001 \
    --perturb both \
    --seed 42 \
    --eval-every 1000 \
    --eval-n 512 \
    --eval-batch-size 32 \
    --out step9_D_trm_multi4_perturb_sigma1e-3_26041_batch4_10k.json
else
  echo "unknown model: ${MODEL}" >&2
  exit 2
fi