From f1c2cc22d46a6976df3555391e667c7e61592fad Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Wed, 4 Feb 2026 18:59:35 -0600 Subject: Initial commit: RL floating-point noise project --- scripts/submit_all_jobs.sh | 66 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100755 scripts/submit_all_jobs.sh (limited to 'scripts/submit_all_jobs.sh') diff --git a/scripts/submit_all_jobs.sh b/scripts/submit_all_jobs.sh new file mode 100755 index 0000000..86c0f5d --- /dev/null +++ b/scripts/submit_all_jobs.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# submit_all_jobs.sh +# Submit all experiment jobs to SLURM queue +# Jobs will run automatically when resources become available + +set -e + +PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/rl-floating-noise" +cd "$PROJECT_DIR" + +# Create log directory +mkdir -p results/slurm_logs + +# Configuration +SEEDS=(1 2 3 4 5) +PRECISION_MODES=("fp32" "bf16") + +echo "============================================" +echo "Submitting RLVR Experiment Jobs" +echo "============================================" +echo "Seeds: ${SEEDS[*]}" +echo "Precision Modes: ${PRECISION_MODES[*]}" +echo "Total jobs: $((${#SEEDS[@]} * ${#PRECISION_MODES[@]}))" +echo "============================================" + +# Track submitted job IDs +declare -a JOB_IDS + +for precision in "${PRECISION_MODES[@]}"; do + for seed in "${SEEDS[@]}"; do + JOB_NAME="rlvr_${precision}_s${seed}" + + echo "Submitting: $JOB_NAME" + + # Submit job with environment variables + JOB_ID=$(sbatch \ + --job-name="$JOB_NAME" \ + --export=ALL,PRECISION_MODE="$precision",SEED="$seed" \ + scripts/slurm_train.sh | awk '{print $4}') + + JOB_IDS+=("$JOB_ID") + echo " -> Job ID: $JOB_ID" + done +done + +echo "" +echo "============================================" +echo "All jobs submitted!" +echo "Job IDs: ${JOB_IDS[*]}" +echo "============================================" +echo "" +echo "Monitor with:" +echo " squeue -u $USER" +echo " squeue -j $(IFS=,; echo "${JOB_IDS[*]}")" +echo "" +echo "View logs:" +echo " tail -f results/slurm_logs/rlvr_*.out" +echo "" +echo "Cancel all:" +echo " scancel ${JOB_IDS[*]}" +echo "============================================" + +# Save job IDs for reference +echo "${JOB_IDS[*]}" > results/slurm_logs/submitted_jobs.txt +echo "Job IDs saved to: results/slurm_logs/submitted_jobs.txt" + -- cgit v1.2.3