diff options
Diffstat (limited to 'scripts/slurm_train.sh')
| -rw-r--r-- | scripts/slurm_train.sh | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/scripts/slurm_train.sh b/scripts/slurm_train.sh index e1df687..361ba94 100644 --- a/scripts/slurm_train.sh +++ b/scripts/slurm_train.sh @@ -1,4 +1,5 @@ #!/bin/bash +#SBATCH --signal=SIGUSR1@120 export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache export TRANSFORMERS_CACHE=/projects/bfqt/users/yurenh2/hf_cache/transformers export HF_HUB_CACHE=/projects/bfqt/users/yurenh2/hf_cache/hub @@ -18,4 +19,5 @@ echo "GPU: $(nvidia-smi --query-gpu=name,memory.total --format=csv,noheader)" echo "" echo "=== Starting training ===" -python3 -u scripts/train.py --config configs/sanity_check.yaml +echo " Auto-resume enabled: will pick up from latest checkpoint" +python3 -u scripts/train.py --config ${CONFIG:-configs/sanity_check.yaml} |
