#!/bin/bash # submit_all_jobs.sh # Submit all experiment jobs to SLURM queue # Jobs will run automatically when resources become available set -e PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/rl-floating-noise" cd "$PROJECT_DIR" # Create log directory mkdir -p results/slurm_logs # Configuration SEEDS=(1 2 3 4 5) PRECISION_MODES=("fp32" "bf16") echo "============================================" echo "Submitting RLVR Experiment Jobs" echo "============================================" echo "Seeds: ${SEEDS[*]}" echo "Precision Modes: ${PRECISION_MODES[*]}" echo "Total jobs: $((${#SEEDS[@]} * ${#PRECISION_MODES[@]}))" echo "============================================" # Track submitted job IDs declare -a JOB_IDS for precision in "${PRECISION_MODES[@]}"; do for seed in "${SEEDS[@]}"; do JOB_NAME="rlvr_${precision}_s${seed}" echo "Submitting: $JOB_NAME" # Submit job with environment variables JOB_ID=$(sbatch \ --job-name="$JOB_NAME" \ --export=ALL,PRECISION_MODE="$precision",SEED="$seed" \ scripts/slurm_train.sh | awk '{print $4}') JOB_IDS+=("$JOB_ID") echo " -> Job ID: $JOB_ID" done done echo "" echo "============================================" echo "All jobs submitted!" echo "Job IDs: ${JOB_IDS[*]}" echo "============================================" echo "" echo "Monitor with:" echo " squeue -u $USER" echo " squeue -j $(IFS=,; echo "${JOB_IDS[*]}")" echo "" echo "View logs:" echo " tail -f results/slurm_logs/rlvr_*.out" echo "" echo "Cancel all:" echo " scancel ${JOB_IDS[*]}" echo "============================================" # Save job IDs for reference echo "${JOB_IDS[*]}" > results/slurm_logs/submitted_jobs.txt echo "Job IDs saved to: results/slurm_logs/submitted_jobs.txt"