summaryrefslogtreecommitdiff
path: root/scripts/submit_single_job.sh
blob: 7fe7492faea4dceff7b87a047d409b92955cd143 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/bin/bash
# submit_single_job.sh
# Submit a single training job
# Usage: ./submit_single_job.sh <precision_mode> <seed>
# Example: ./submit_single_job.sh bf16 1

PRECISION_MODE=${1:-"bf16"}
SEED=${2:-1}

PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/rl-floating-noise"
cd "$PROJECT_DIR"

mkdir -p results/slurm_logs

JOB_NAME="rlvr_${PRECISION_MODE}_s${SEED}"

echo "Submitting job: $JOB_NAME"
echo "  Precision: $PRECISION_MODE"
echo "  Seed: $SEED"

JOB_ID=$(sbatch \
    --job-name="$JOB_NAME" \
    --export=ALL,PRECISION_MODE="$PRECISION_MODE",SEED="$SEED" \
    scripts/slurm_train.sh | awk '{print $4}')

echo ""
echo "Submitted! Job ID: $JOB_ID"
echo ""
echo "Monitor with: squeue -j $JOB_ID"
echo "View output:  tail -f results/slurm_logs/${JOB_NAME}_${JOB_ID}.out"
echo "Cancel:       scancel $JOB_ID"