diff options
| -rw-r--r-- | run_two_a6000_72h.sbatch | 41 | ||||
| -rw-r--r-- | slurm-rrog-gnn-15967778.err | 0 | ||||
| -rw-r--r-- | slurm-rrog-gnn-15967778.out | 123 |
3 files changed, 164 insertions, 0 deletions
diff --git a/run_two_a6000_72h.sbatch b/run_two_a6000_72h.sbatch new file mode 100644 index 0000000..ca9fb64 --- /dev/null +++ b/run_two_a6000_72h.sbatch @@ -0,0 +1,41 @@ +#!/bin/bash +#SBATCH --job-name=rrog-gnn-2xa6000 +#SBATCH --output=slurm-rrog-gnn-%j.out +#SBATCH --error=slurm-rrog-gnn-%j.err +#SBATCH --time=72:00:00 +#SBATCH --account=orion +#SBATCH --partition=orion +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=1 +#SBATCH --gres=gpu:a6000:2 +#SBATCH --cpus-per-task=16 +#SBATCH --mem=128G + +set -euo pipefail + +ROOT_DIR="${ROOT_DIR:-/orion/u/oscarwan/rrog-gnn-runner}" +cd "${ROOT_DIR}" + +echo "Host: $(hostname)" +echo "Job ID: ${SLURM_JOB_ID:-local}" +echo "Workdir: $(pwd)" +echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-unset}" +echo "SKIP_SETUP=${SKIP_SETUP:-0}" +echo "ZINC_DEVICE=${ZINC_DEVICE:-cuda:0}" +echo "OGB_DEVICE=${OGB_DEVICE:-cuda:1}" +echo "ZINC_EPOCHS=${ZINC_EPOCHS:-200}" +echo "OGB_EPOCHS=${OGB_EPOCHS:-100}" +echo "OGB_TASK=${OGB_TASK:-ogbg-molhiv}" +echo "SEED=${SEED:-0}" + +nvidia-smi + +[[ -x ./scripts/setup_and_run_two_a6000.sh ]] || { + echo "FATAL: missing executable ./scripts/setup_and_run_two_a6000.sh" + exit 1 +} + +# This intentionally delegates to the repository's one-command runner unchanged. +# Override repo-supported options with sbatch --export, e.g.: +# sbatch --export=ALL,SKIP_SETUP=1 run_two_a6000_72h.sbatch +./scripts/setup_and_run_two_a6000.sh diff --git a/slurm-rrog-gnn-15967778.err b/slurm-rrog-gnn-15967778.err new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/slurm-rrog-gnn-15967778.err diff --git a/slurm-rrog-gnn-15967778.out b/slurm-rrog-gnn-15967778.out new file mode 100644 index 0000000..22f4f29 --- /dev/null +++ b/slurm-rrog-gnn-15967778.out @@ -0,0 +1,123 @@ +Host: oriong13.stanford.edu +Job ID: 15967778 +Workdir: /orion/u/oscarwan/rrog-gnn-runner +CUDA_VISIBLE_DEVICES=0,1 +SKIP_SETUP=1 +ZINC_DEVICE=cuda:0 +OGB_DEVICE=cuda:1 +ZINC_EPOCHS=200 +OGB_EPOCHS=100 +OGB_TASK=ogbg-molhiv +SEED=0 +Sun Jun 21 17:12:56 2026 ++-----------------------------------------------------------------------------------------+ +| NVIDIA-SMI 580.82.07 Driver Version: 580.82.07 CUDA Version: 13.0 | ++-----------------------------------------+------------------------+----------------------+ +| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | +| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | +| | | MIG M. | +|=========================================+========================+======================| +| 0 NVIDIA RTX A6000 On | 00000000:01:00.0 Off | Off | +| 30% 37C P8 18W / 250W | 1MiB / 49140MiB | 0% Default | +| | | N/A | ++-----------------------------------------+------------------------+----------------------+ +| 1 NVIDIA RTX A6000 On | 00000000:E1:00.0 Off | Off | +| 30% 36C P8 21W / 250W | 1MiB / 49140MiB | 0% Default | +| | | N/A | ++-----------------------------------------+------------------------+----------------------+ + ++-----------------------------------------------------------------------------------------+ +| Processes: | +| GPU GI CI PID Type Process name GPU Memory | +| ID ID Usage | +|=========================================================================================| +| No running processes found | ++-----------------------------------------------------------------------------------------+ +[launch] ZINC-cycle56 on cuda:0 +[launch] ogbg-molhiv on cuda:1 +[pids] zinc=2935253 ogb=2935254 +[done] collecting summaries + +ZINC-cycle56 classic baseline +| task | backbone | n | val MAE-sum | test MAE-sum | +| --- | --- | --- | --- | --- | +| zinc-cycle56 | appnp | 1 | 1.0040 +/- 0.0000 | 1.0183 +/- 0.0000 | +| zinc-cycle56 | arma | 1 | 0.3456 +/- 0.0000 | 0.3406 +/- 0.0000 | +| zinc-cycle56 | cheb | 1 | 0.4038 +/- 0.0000 | 0.3887 +/- 0.0000 | +| zinc-cycle56 | film | 1 | 0.4906 +/- 0.0000 | 0.4998 +/- 0.0000 | +| zinc-cycle56 | gatv2 | 1 | 0.4084 +/- 0.0000 | 0.4012 +/- 0.0000 | +| zinc-cycle56 | gcn | 1 | 0.5821 +/- 0.0000 | 0.5854 +/- 0.0000 | +| zinc-cycle56 | gen | 1 | 0.3961 +/- 0.0000 | 0.3854 +/- 0.0000 | +| zinc-cycle56 | gin | 1 | 0.2981 +/- 0.0000 | 0.2725 +/- 0.0000 | +| zinc-cycle56 | gine | 1 | 0.2387 +/- 0.0000 | 0.2317 +/- 0.0000 | +| zinc-cycle56 | graphconv | 1 | 0.3507 +/- 0.0000 | 0.3462 +/- 0.0000 | +| zinc-cycle56 | graphsage | 1 | 0.4134 +/- 0.0000 | 0.4179 +/- 0.0000 | +| zinc-cycle56 | mf | 1 | 0.3051 +/- 0.0000 | 0.3031 +/- 0.0000 | +| zinc-cycle56 | pna | 1 | 0.1565 +/- 0.0000 | 0.1539 +/- 0.0000 | +| zinc-cycle56 | resgated | 1 | 0.3183 +/- 0.0000 | 0.3168 +/- 0.0000 | +| zinc-cycle56 | sgc | 1 | 0.6287 +/- 0.0000 | 0.6432 +/- 0.0000 | +| zinc-cycle56 | tag | 1 | 0.2889 +/- 0.0000 | 0.2831 +/- 0.0000 | +| zinc-cycle56 | transformer | 1 | 0.3837 +/- 0.0000 | 0.3760 +/- 0.0000 | + +ZINC-cycle56 delta vs matching classic +| task | backbone | compute | n | val score (improvement) | test score (improvement) | +| --- | --- | --- | --- | --- | --- | +| zinc-cycle56 | appnp | fixed-rrog-T1-ns3+trace | 1 | 0.9730 (0.0310) | 0.9845 (0.0338) | +| zinc-cycle56 | arma | fixed-rrog-T1-ns3+trace | 1 | 0.2414 (0.1042) | 0.2378 (0.1028) | +| zinc-cycle56 | cheb | fixed-rrog-T1-ns3+trace | 1 | 0.2896 (0.1143) | 0.2830 (0.1057) | +| zinc-cycle56 | film | fixed-rrog-T1-ns3+trace | 1 | 0.6898 (-0.1993) | 0.6643 (-0.1646) | +| zinc-cycle56 | gatv2 | fixed-rrog-T1-ns3+trace | 1 | 0.3155 (0.0929) | 0.3121 (0.0891) | +| zinc-cycle56 | gcn | fixed-rrog-T1-ns3+trace | 1 | 0.4380 (0.1441) | 0.4603 (0.1251) | +| zinc-cycle56 | gen | fixed-rrog-T1-ns3+trace | 1 | 0.3554 (0.0407) | 0.3405 (0.0450) | +| zinc-cycle56 | gin | fixed-rrog-T1-ns3+trace | 1 | 0.2269 (0.0711) | 0.2222 (0.0502) | +| zinc-cycle56 | gine | fixed-rrog-T1-ns3+trace | 1 | 0.1641 (0.0745) | 0.1509 (0.0808) | +| zinc-cycle56 | graphconv | fixed-rrog-T1-ns3+trace | 1 | 0.2091 (0.1416) | 0.2060 (0.1402) | +| zinc-cycle56 | graphsage | fixed-rrog-T1-ns3+trace | 1 | 0.3381 (0.0753) | 0.3407 (0.0772) | +| zinc-cycle56 | mf | fixed-rrog-T1-ns3+trace | 1 | 0.1987 (0.1065) | 0.1911 (0.1120) | +| zinc-cycle56 | pna | fixed-rrog-T1-ns3+trace | 1 | 0.1216 (0.0349) | 0.1056 (0.0483) | +| zinc-cycle56 | resgated | fixed-rrog-T1-ns3+trace | 1 | 0.1834 (0.1350) | 0.1765 (0.1403) | +| zinc-cycle56 | sgc | fixed-rrog-T1-ns3+trace | 1 | 0.5008 (0.1278) | 0.5066 (0.1366) | +| zinc-cycle56 | tag | fixed-rrog-T1-ns3+trace | 1 | 0.1410 (0.1479) | 0.1386 (0.1445) | +| zinc-cycle56 | transformer | fixed-rrog-T1-ns3+trace | 1 | 0.3092 (0.0744) | 0.3268 (0.0491) | + +Classic baseline: task x backbone +| task | backbone | metric | n | val | test | +| --- | --- | --- | --- | --- | --- | +| ogbg-molhiv | appnp | rocauc | 1 | 0.7675 +/- 0.0000 | 0.7000 +/- 0.0000 | +| ogbg-molhiv | arma | rocauc | 1 | 0.7872 +/- 0.0000 | 0.7296 +/- 0.0000 | +| ogbg-molhiv | cheb | rocauc | 1 | 0.7831 +/- 0.0000 | 0.7282 +/- 0.0000 | +| ogbg-molhiv | film | rocauc | 1 | 0.7922 +/- 0.0000 | 0.7842 +/- 0.0000 | +| ogbg-molhiv | gatv2 | rocauc | 1 | 0.7835 +/- 0.0000 | 0.7563 +/- 0.0000 | +| ogbg-molhiv | gcn | rocauc | 1 | 0.7754 +/- 0.0000 | 0.7198 +/- 0.0000 | +| ogbg-molhiv | gen | rocauc | 1 | 0.7530 +/- 0.0000 | 0.7314 +/- 0.0000 | +| ogbg-molhiv | gin | rocauc | 1 | 0.8167 +/- 0.0000 | 0.7724 +/- 0.0000 | +| ogbg-molhiv | gine | rocauc | 1 | 0.7942 +/- 0.0000 | 0.7445 +/- 0.0000 | +| ogbg-molhiv | graphconv | rocauc | 1 | 0.7707 +/- 0.0000 | 0.7108 +/- 0.0000 | +| ogbg-molhiv | graphsage | rocauc | 1 | 0.7969 +/- 0.0000 | 0.7625 +/- 0.0000 | +| ogbg-molhiv | mf | rocauc | 1 | 0.7845 +/- 0.0000 | 0.7074 +/- 0.0000 | +| ogbg-molhiv | pna | rocauc | 1 | 0.7780 +/- 0.0000 | 0.7648 +/- 0.0000 | +| ogbg-molhiv | resgated | rocauc | 1 | 0.8144 +/- 0.0000 | 0.7242 +/- 0.0000 | +| ogbg-molhiv | sgc | rocauc | 1 | 0.7479 +/- 0.0000 | 0.7019 +/- 0.0000 | +| ogbg-molhiv | tag | rocauc | 1 | 0.7528 +/- 0.0000 | 0.7255 +/- 0.0000 | +| ogbg-molhiv | transformer | rocauc | 1 | 0.7617 +/- 0.0000 | 0.7547 +/- 0.0000 | + +Delta vs matching classic +| task | backbone | compute | metric | n | val score (delta) | test score (delta) | steps | +| --- | --- | --- | --- | --- | --- | --- | --- | +| ogbg-molhiv | appnp | fixed-rrog | rocauc | 1 | 0.7203 (-0.0471) | 0.6825 (-0.0175) | | +| ogbg-molhiv | arma | fixed-rrog | rocauc | 1 | 0.7926 (0.0054) | 0.7318 (0.0021) | | +| ogbg-molhiv | cheb | fixed-rrog | rocauc | 1 | 0.7735 (-0.0096) | 0.7426 (0.0144) | | +| ogbg-molhiv | film | fixed-rrog | rocauc | 1 | 0.7633 (-0.0288) | 0.7728 (-0.0114) | | +| ogbg-molhiv | gatv2 | fixed-rrog | rocauc | 1 | 0.7835 (0.0001) | 0.7466 (-0.0096) | | +| ogbg-molhiv | gcn | fixed-rrog | rocauc | 1 | 0.7455 (-0.0300) | 0.7483 (0.0285) | | +| ogbg-molhiv | gen | fixed-rrog | rocauc | 1 | 0.8128 (0.0598) | 0.7631 (0.0318) | | +| ogbg-molhiv | gin | fixed-rrog | rocauc | 1 | 0.7524 (-0.0644) | 0.7324 (-0.0400) | | +| ogbg-molhiv | gine | fixed-rrog | rocauc | 1 | 0.7575 (-0.0368) | 0.7401 (-0.0044) | | +| ogbg-molhiv | graphconv | fixed-rrog | rocauc | 1 | 0.7713 (0.0006) | 0.6979 (-0.0129) | | +| ogbg-molhiv | graphsage | fixed-rrog | rocauc | 1 | 0.7587 (-0.0382) | 0.7641 (0.0016) | | +| ogbg-molhiv | mf | fixed-rrog | rocauc | 1 | 0.7931 (0.0085) | 0.7217 (0.0143) | | +| ogbg-molhiv | pna | fixed-rrog | rocauc | 1 | 0.7890 (0.0111) | 0.7630 (-0.0018) | | +| ogbg-molhiv | resgated | fixed-rrog | rocauc | 1 | 0.8174 (0.0030) | 0.7055 (-0.0187) | | +| ogbg-molhiv | sgc | fixed-rrog | rocauc | 1 | 0.7482 (0.0003) | 0.7177 (0.0158) | | +| ogbg-molhiv | tag | fixed-rrog | rocauc | 1 | 0.7804 (0.0276) | 0.7352 (0.0096) | | +| ogbg-molhiv | transformer | fixed-rrog | rocauc | 1 | 0.8088 (0.0471) | 0.7413 (-0.0134) | | |
