summaryrefslogtreecommitdiff
path: root/scripts/run_ogb_act_two_gpu.sh
blob: f51262bd6d25e8ca9a2e99086faacf69a85c5004 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env bash
set -euo pipefail

ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "${ROOT_DIR}"
export PYTHONPATH="${ROOT_DIR}:${PYTHONPATH:-}"

GPU0="${GPU0:-cuda:0}"
GPU1="${GPU1:-cuda:1}"
if [[ -z "${TASKS_GPU0+x}" ]]; then
  TASKS_GPU0="ogbg-molhiv ogbg-molbbbp ogbg-molsider ogbg-molbace"
fi
if [[ -z "${TASKS_GPU1+x}" ]]; then
  TASKS_GPU1="ogbg-molesol ogbg-mollipo ogbg-moltox21 ogbg-molclintox"
fi
EPOCHS="${EPOCHS:-100}"
SEEDS="${SEEDS:-${SEED:-0}}"
HALT_MAX="${HALT_MAX:-8}"
HALT_MIN="${HALT_MIN:-2}"
HALT_TARGET="${HALT_TARGET:-loss}"
HALT_LOSS_THRESHOLD="${HALT_LOSS_THRESHOLD:-0.2}"
HALT_EXPLORATION_PROB="${HALT_EXPLORATION_PROB:-0.1}"
LAM_Q="${LAM_Q:-0.1}"
Q_WARMUP="${Q_WARMUP:-0}"
ACT_TRAIN_MODE="${ACT_TRAIN_MODE:-stream}"

mkdir -p logs summaries

fmt_float() {
  python3 - "$1" <<'PY'
import sys
print(f"{float(sys.argv[1]):g}")
PY
}

target_log_tag() {
  local target_tag="${HALT_TARGET}"
  if [[ "${HALT_TARGET}" == "loss" ]]; then
    target_tag="loss$(fmt_float "${HALT_LOSS_THRESHOLD}")"
  fi
  echo "${ACT_TRAIN_MODE}_hm${HALT_MAX}_hmin${HALT_MIN}_${target_tag}_lq$(fmt_float "${LAM_Q}")_hex$(fmt_float "${HALT_EXPLORATION_PROB}")_qw${Q_WARMUP}_e${EPOCHS}_s${SEEDS// /-}"
}

run_queue() {
  local device="$1"
  shift
  local tasks=("$@")
  local task
  local tag
  tag="$(target_log_tag)"
  for task in "${tasks[@]}"; do
    if [[ -z "${task}" ]]; then
      continue
    fi
    echo "[task] ${task} on ${device}"
    TASK="${task}" DEVICE="${device}" EPOCHS="${EPOCHS}" SEEDS="${SEEDS}" \
      HALT_MAX="${HALT_MAX}" HALT_MIN="${HALT_MIN}" HALT_TARGET="${HALT_TARGET}" \
      HALT_LOSS_THRESHOLD="${HALT_LOSS_THRESHOLD}" HALT_EXPLORATION_PROB="${HALT_EXPLORATION_PROB}" \
      LAM_Q="${LAM_Q}" Q_WARMUP="${Q_WARMUP}" \
      ACT_TRAIN_MODE="${ACT_TRAIN_MODE}" COLLECT=0 \
      ./scripts/run_ogb_act_task.sh 2>&1 | tee "logs/${task}_act_${tag}.log"
  done
}

tasks0=()
tasks1=()
if [[ -n "${TASKS_GPU0}" ]]; then
  read -r -a tasks0 <<< "${TASKS_GPU0}"
fi
if [[ -n "${TASKS_GPU1}" ]]; then
  read -r -a tasks1 <<< "${TASKS_GPU1}"
fi

pids=()
if (( ${#tasks0[@]} > 0 )); then
  echo "[launch] ${GPU0}: ${tasks0[*]}"
  run_queue "${GPU0}" "${tasks0[@]}" &
  pids+=("$!")
fi
if (( ${#tasks1[@]} > 0 )); then
  echo "[launch] ${GPU1}: ${tasks1[*]}"
  run_queue "${GPU1}" "${tasks1[@]}" &
  pids+=("$!")
fi

for pid in "${pids[@]}"; do
  wait "${pid}"
done

echo "[done] collecting summaries"
OGB_EPOCHS="${EPOCHS}" ./scripts/collect_results.sh