blob: 7fe7492faea4dceff7b87a047d409b92955cd143 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
#!/bin/bash
# submit_single_job.sh
# Submit a single training job
# Usage: ./submit_single_job.sh <precision_mode> <seed>
# Example: ./submit_single_job.sh bf16 1
PRECISION_MODE=${1:-"bf16"}
SEED=${2:-1}
PROJECT_DIR="/projects/bfqt/users/yurenh2/ml-projects/rl-floating-noise"
cd "$PROJECT_DIR"
mkdir -p results/slurm_logs
JOB_NAME="rlvr_${PRECISION_MODE}_s${SEED}"
echo "Submitting job: $JOB_NAME"
echo " Precision: $PRECISION_MODE"
echo " Seed: $SEED"
JOB_ID=$(sbatch \
--job-name="$JOB_NAME" \
--export=ALL,PRECISION_MODE="$PRECISION_MODE",SEED="$SEED" \
scripts/slurm_train.sh | awk '{print $4}')
echo ""
echo "Submitted! Job ID: $JOB_ID"
echo ""
echo "Monitor with: squeue -j $JOB_ID"
echo "View output: tail -f results/slurm_logs/${JOB_NAME}_${JOB_ID}.out"
echo "Cancel: scancel $JOB_ID"
|