1 files changed, 82 insertions, 0 deletions
diff --git a/collaborativeagents/slurm/start_model_servers.sh b/collaborativeagents/slurm/start_model_servers.sh
new file mode 100644
index 0000000..1f4e177
--- /dev/null
+++ b/collaborativeagents/slurm/start_model_servers.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+#SBATCH --job-name=model_servers
+#SBATCH --account=bfqt-delta-gpu
+#SBATCH --partition=gpuH200x8
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=32
+#SBATCH --gres=gpu:h200:8
+#SBATCH --mem=400G
+#SBATCH --time=24:00:00
+#SBATCH --output=logs/model_servers_%j.out
+#SBATCH --error=logs/model_servers_%j.err
+
+# Start vLLM/sglang model servers for experiments
+# - Port 8004: Llama-3.3-70B-Instruct (user simulator + judge) - 4 GPUs
+# - Port 8003: Llama-3.1-8B-Instruct (agent) - 2 GPUs
+
+set -e
+
+cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model
+
+# Create logs directory
+mkdir -p collaborativeagents/slurm/logs
+
+echo "Starting model servers at $(date)"
+echo "Job ID: $SLURM_JOB_ID"
+echo "Node: $SLURMD_NODENAME"
+echo "GPUs: $CUDA_VISIBLE_DEVICES"
+
+# Activate environment with sglang
+source /u/yurenh2/miniforge3/etc/profile.d/conda.sh
+conda activate eval
+
+# Check GPU availability
+nvidia-smi
+
+# Start 70B model server (user simulator + judge) - needs 4 GPUs for TP
+echo "Starting Llama-3.3-70B-Instruct server on port 8004..."
+CUDA_VISIBLE_DEVICES=0,1,2,3 python -m sglang.launch_server \
+    --model-path meta-llama/Llama-3.3-70B-Instruct \
+    --port 8004 \
+    --tp-size 4 \
+    --context-length 16384 \
+    --mem-fraction-static 0.85 \
+    2>&1 | tee logs/server_70b_$SLURM_JOB_ID.log &
+SERVER_70B_PID=$!
+
+# Wait for 70B server to start (takes a few minutes)
+echo "Waiting for 70B server to initialize..."
+sleep 120
+
+# Start 8B model server (agent) - needs 2 GPUs
+echo "Starting Llama-3.1-8B-Instruct server on port 8003..."
+CUDA_VISIBLE_DEVICES=4,5 python -m sglang.launch_server \
+    --model-path models/llama-3.1-8b-instruct \
+    --served-model-name meta-llama/Llama-3.1-8B-Instruct \
+    --port 8003 \
+    --tp-size 2 \
+    --context-length 16384 \
+    --mem-fraction-static 0.85 \
+    2>&1 | tee logs/server_8b_$SLURM_JOB_ID.log &
+SERVER_8B_PID=$!
+
+echo "Servers starting..."
+echo "70B server PID: $SERVER_70B_PID"
+echo "8B server PID: $SERVER_8B_PID"
+
+# Save server info for experiment runner
+cat > collaborativeagents/slurm/server_info.txt << EOF
+NODE=$SLURMD_NODENAME
+JOB_ID=$SLURM_JOB_ID
+SERVER_70B_PID=$SERVER_70B_PID
+SERVER_8B_PID=$SERVER_8B_PID
+USER_API_BASE=http://$SLURMD_NODENAME:8004/v1
+AGENT_API_BASE=http://$SLURMD_NODENAME:8003/v1
+JUDGE_API_BASE=http://$SLURMD_NODENAME:8004/v1
+EOF
+
+echo "Server info saved to collaborativeagents/slurm/server_info.txt"
+
+# Wait for both servers
+wait $SERVER_70B_PID $SERVER_8B_PID