summaryrefslogtreecommitdiff
path: root/run_two_a6000_72h.sbatch
blob: ca9fb64e1f1af7cab883585397849680b6ad4d5b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/bin/bash
#SBATCH --job-name=rrog-gnn-2xa6000
#SBATCH --output=slurm-rrog-gnn-%j.out
#SBATCH --error=slurm-rrog-gnn-%j.err
#SBATCH --time=72:00:00
#SBATCH --account=orion
#SBATCH --partition=orion
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=1
#SBATCH --gres=gpu:a6000:2
#SBATCH --cpus-per-task=16
#SBATCH --mem=128G

set -euo pipefail

ROOT_DIR="${ROOT_DIR:-/orion/u/oscarwan/rrog-gnn-runner}"
cd "${ROOT_DIR}"

echo "Host: $(hostname)"
echo "Job ID: ${SLURM_JOB_ID:-local}"
echo "Workdir: $(pwd)"
echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-unset}"
echo "SKIP_SETUP=${SKIP_SETUP:-0}"
echo "ZINC_DEVICE=${ZINC_DEVICE:-cuda:0}"
echo "OGB_DEVICE=${OGB_DEVICE:-cuda:1}"
echo "ZINC_EPOCHS=${ZINC_EPOCHS:-200}"
echo "OGB_EPOCHS=${OGB_EPOCHS:-100}"
echo "OGB_TASK=${OGB_TASK:-ogbg-molhiv}"
echo "SEED=${SEED:-0}"

nvidia-smi

[[ -x ./scripts/setup_and_run_two_a6000.sh ]] || {
  echo "FATAL: missing executable ./scripts/setup_and_run_two_a6000.sh"
  exit 1
}

# This intentionally delegates to the repository's one-command runner unchanged.
# Override repo-supported options with sbatch --export, e.g.:
#   sbatch --export=ALL,SKIP_SETUP=1 run_two_a6000_72h.sbatch
./scripts/setup_and_run_two_a6000.sh