diff options
Diffstat (limited to 'research/flossing/flossing_suite/launch_trm_faithful_suite.sh')
| -rwxr-xr-x | research/flossing/flossing_suite/launch_trm_faithful_suite.sh | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/research/flossing/flossing_suite/launch_trm_faithful_suite.sh b/research/flossing/flossing_suite/launch_trm_faithful_suite.sh new file mode 100755 index 0000000..2b3190c --- /dev/null +++ b/research/flossing/flossing_suite/launch_trm_faithful_suite.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="/home/yurenh2/rrm" +PY="/home/yurenh2/miniconda3/envs/rrm/bin/python" +OUT_DIR="${ROOT}/research/flossing/flossing_suite/results/trm_faithful" +CKPT_ROOT="${ROOT}/trm/checkpoints/Sudoku-extreme-1k-aug-1000-ACT-torch/pretrain_mlp_t_sudoku_official_gbs768_repro" +mkdir -p "${OUT_DIR}" + +GPU_BASE="${GPU_BASE:-0}" +GPU_PREFLOSS="${GPU_PREFLOSS:-1}" +GPU_INTER="${GPU_INTER:-3}" +TRAIN_STEPS="${TRAIN_STEPS:-20000}" +PREFLOSS_STEPS="${PREFLOSS_STEPS:-500}" +INTERFLOSS_STEPS="${INTERFLOSS_STEPS:-100}" +INTERFLOSS_EVERY="${INTERFLOSS_EVERY:-2000}" +INTERFLOSS_START="${INTERFLOSS_START:-2000}" +INTERFLOSS_STOP="${INTERFLOSS_STOP:-10000}" +EVAL_N="${EVAL_N:-1000}" +TASK_BATCH_SIZE="${TASK_BATCH_SIZE:-32}" +FLOSS_BATCH_SIZE="${FLOSS_BATCH_SIZE:-4}" + +write_and_launch() { + local gpu="$1" + local name="$2" + local schedule="$3" + local floss_steps="$4" + local every="$5" + local start="$6" + local stop="$7" + local cmd="${OUT_DIR}/${name}.cmd.sh" + local log="${OUT_DIR}/${name}.log" + local pid="${OUT_DIR}/${name}.pid" + + cat > "${cmd}" <<EOF +#!/usr/bin/env bash +set -euo pipefail +cd "${ROOT}" +export CUDA_VISIBLE_DEVICES="${gpu}" +export PYTHONUNBUFFERED=1 +exec "${PY}" research/flossing/step7_interfloss.py \\ + --model trm \\ + --ckpt-root "${CKPT_ROOT}" \\ + --ckpt-name __random__ \\ + --init-seed 123 \\ + --train-steps "${TRAIN_STEPS}" \\ + --batch-size "${TASK_BATCH_SIZE}" \\ + --task-batch-size "${TASK_BATCH_SIZE}" \\ + --floss-batch-size "${FLOSS_BATCH_SIZE}" \\ + --train-lr 1e-4 \\ + --floss-lr 1e-4 \\ + --floss-mode engelken_l2 \\ + --lambda-star 0 \\ + --k-lyap 4 \\ + --lyap-act-steps 4 \\ + --seed 42 \\ + --eval-every 1000 \\ + --eval-n "${EVAL_N}" \\ + --eval-batch-size 64 \\ + --floss-log-every 10 \\ + --train-puzzle-emb \\ + --puzzle-emb-lr 1e-4 \\ + --puzzle-emb-weight-decay 1.0 \\ + --kl-beta 0 \\ + --floss-steps "${floss_steps}" \\ + --interfloss-at "${schedule}" \\ + --interfloss-every "${every}" \\ + --interfloss-start "${start}" \\ + --interfloss-stop "${stop}" \\ + --out "${OUT_DIR}/${name}.json" +EOF + chmod +x "${cmd}" + setsid bash "${cmd}" > "${log}" 2>&1 < /dev/null & + echo $! > "${pid}" + echo "${name}: pid $(cat "${pid}") on GPU ${gpu}" +} + +write_and_launch "${GPU_BASE}" \ + "trm_seed123_baseline_nofloss_tb${TASK_BATCH_SIZE}_fb${FLOSS_BATCH_SIZE}_${TRAIN_STEPS}" \ + "" 0 0 0 -1 +write_and_launch "${GPU_PREFLOSS}" \ + "trm_seed123_prefloss_0_tb${TASK_BATCH_SIZE}_fb${FLOSS_BATCH_SIZE}_k4_${TRAIN_STEPS}" \ + "0" "${PREFLOSS_STEPS}" 0 0 -1 +write_and_launch "${GPU_INTER}" \ + "trm_seed123_pre_inter_periodic${INTERFLOSS_EVERY}_tb${TASK_BATCH_SIZE}_fb${FLOSS_BATCH_SIZE}_k4_${TRAIN_STEPS}" \ + "0" "${INTERFLOSS_STEPS}" "${INTERFLOSS_EVERY}" "${INTERFLOSS_START}" "${INTERFLOSS_STOP}" + +echo "queued TRM Engelken-faithful suite in ${OUT_DIR}" |
