#!/bin/bash #SBATCH --job-name=model_servers #SBATCH --account=bfqt-delta-gpu #SBATCH --partition=gpuH200x8 #SBATCH --nodes=1 #SBATCH --ntasks=1 #SBATCH --cpus-per-task=32 #SBATCH --gres=gpu:h200:8 #SBATCH --mem=400G #SBATCH --time=24:00:00 #SBATCH --output=logs/model_servers_%j.out #SBATCH --error=logs/model_servers_%j.err # Start vLLM/sglang model servers for experiments # - Port 8004: Llama-3.3-70B-Instruct (user simulator + judge) - 4 GPUs # - Port 8003: Llama-3.1-8B-Instruct (agent) - 2 GPUs set -e cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model # Create logs directory mkdir -p collaborativeagents/slurm/logs echo "Starting model servers at $(date)" echo "Job ID: $SLURM_JOB_ID" echo "Node: $SLURMD_NODENAME" echo "GPUs: $CUDA_VISIBLE_DEVICES" # Activate environment with sglang source /u/yurenh2/miniforge3/etc/profile.d/conda.sh conda activate eval # Check GPU availability nvidia-smi # Start 70B model server (user simulator + judge) - needs 4 GPUs for TP echo "Starting Llama-3.3-70B-Instruct server on port 8004..." CUDA_VISIBLE_DEVICES=0,1,2,3 python -m sglang.launch_server \ --model-path meta-llama/Llama-3.3-70B-Instruct \ --port 8004 \ --tp-size 4 \ --context-length 16384 \ --mem-fraction-static 0.85 \ 2>&1 | tee logs/server_70b_$SLURM_JOB_ID.log & SERVER_70B_PID=$! # Wait for 70B server to start (takes a few minutes) echo "Waiting for 70B server to initialize..." sleep 120 # Start 8B model server (agent) - needs 2 GPUs echo "Starting Llama-3.1-8B-Instruct server on port 8003..." CUDA_VISIBLE_DEVICES=4,5 python -m sglang.launch_server \ --model-path models/llama-3.1-8b-instruct \ --served-model-name meta-llama/Llama-3.1-8B-Instruct \ --port 8003 \ --tp-size 2 \ --context-length 16384 \ --mem-fraction-static 0.85 \ 2>&1 | tee logs/server_8b_$SLURM_JOB_ID.log & SERVER_8B_PID=$! echo "Servers starting..." echo "70B server PID: $SERVER_70B_PID" echo "8B server PID: $SERVER_8B_PID" # Save server info for experiment runner cat > collaborativeagents/slurm/server_info.txt << EOF NODE=$SLURMD_NODENAME JOB_ID=$SLURM_JOB_ID SERVER_70B_PID=$SERVER_70B_PID SERVER_8B_PID=$SERVER_8B_PID USER_API_BASE=http://$SLURMD_NODENAME:8004/v1 AGENT_API_BASE=http://$SLURMD_NODENAME:8003/v1 JUDGE_API_BASE=http://$SLURMD_NODENAME:8004/v1 EOF echo "Server info saved to collaborativeagents/slurm/server_info.txt" # Wait for both servers wait $SERVER_70B_PID $SERVER_8B_PID