summaryrefslogtreecommitdiff
path: root/scripts/submit_all_jobs.sh
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/submit_all_jobs.sh')
-rwxr-xr-xscripts/submit_all_jobs.sh66
1 files changed, 66 insertions, 0 deletions
diff --git a/scripts/submit_all_jobs.sh b/scripts/submit_all_jobs.sh
new file mode 100755
index 0000000..86c0f5d
--- /dev/null
+++ b/scripts/submit_all_jobs.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# submit_all_jobs.sh
+# Submit all experiment jobs to SLURM queue
+# Jobs will run automatically when resources become available
+
+set -e
+
+PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/rl-floating-noise"
+cd "$PROJECT_DIR"
+
+# Create log directory
+mkdir -p results/slurm_logs
+
+# Configuration
+SEEDS=(1 2 3 4 5)
+PRECISION_MODES=("fp32" "bf16")
+
+echo "============================================"
+echo "Submitting RLVR Experiment Jobs"
+echo "============================================"
+echo "Seeds: ${SEEDS[*]}"
+echo "Precision Modes: ${PRECISION_MODES[*]}"
+echo "Total jobs: $((${#SEEDS[@]} * ${#PRECISION_MODES[@]}))"
+echo "============================================"
+
+# Track submitted job IDs
+declare -a JOB_IDS
+
+for precision in "${PRECISION_MODES[@]}"; do
+ for seed in "${SEEDS[@]}"; do
+ JOB_NAME="rlvr_${precision}_s${seed}"
+
+ echo "Submitting: $JOB_NAME"
+
+ # Submit job with environment variables
+ JOB_ID=$(sbatch \
+ --job-name="$JOB_NAME" \
+ --export=ALL,PRECISION_MODE="$precision",SEED="$seed" \
+ scripts/slurm_train.sh | awk '{print $4}')
+
+ JOB_IDS+=("$JOB_ID")
+ echo " -> Job ID: $JOB_ID"
+ done
+done
+
+echo ""
+echo "============================================"
+echo "All jobs submitted!"
+echo "Job IDs: ${JOB_IDS[*]}"
+echo "============================================"
+echo ""
+echo "Monitor with:"
+echo " squeue -u $USER"
+echo " squeue -j $(IFS=,; echo "${JOB_IDS[*]}")"
+echo ""
+echo "View logs:"
+echo " tail -f results/slurm_logs/rlvr_*.out"
+echo ""
+echo "Cancel all:"
+echo " scancel ${JOB_IDS[*]}"
+echo "============================================"
+
+# Save job IDs for reference
+echo "${JOB_IDS[*]}" > results/slurm_logs/submitted_jobs.txt
+echo "Job IDs saved to: results/slurm_logs/submitted_jobs.txt"
+