#!/bin/bash # Start vLLM servers for personalization experiments # GPU Layout (4x H200): # GPU 0-1: 70B user simulator (TP=2) # GPU 2: 8B agent # GPU 3: 8B reward model set -e PROJECT_ROOT="/workspace/personalization-user-model" MODEL_8B="${PROJECT_ROOT}/models/llama-3.1-8b-instruct" MODEL_70B="${PROJECT_ROOT}/models/llama-3.1-70b-instruct" mkdir -p "${PROJECT_ROOT}/logs" # Kill any existing vLLM servers pkill -f "vllm.entrypoints" 2>/dev/null || true sleep 2 echo "Starting vLLM servers..." # GPU 0-1: 70B User Simulator (TP=2) echo "Starting 70B user simulator on GPU 0-1 (port 8004)..." CUDA_VISIBLE_DEVICES=0,1 python3 -m vllm.entrypoints.openai.api_server \ --model "${MODEL_70B}" \ --port 8004 \ --tensor-parallel-size 2 \ --dtype bfloat16 \ --max-model-len 4096 \ --gpu-memory-utilization 0.90 \ --disable-log-requests \ > "${PROJECT_ROOT}/logs/vllm_user_70b.log" 2>&1 & USER_PID=$! echo "70B user simulator PID: $USER_PID" # GPU 2: 8B Agent echo "Starting 8B agent on GPU 2 (port 8003)..." CUDA_VISIBLE_DEVICES=2 python3 -m vllm.entrypoints.openai.api_server \ --model "${MODEL_8B}" \ --port 8003 \ --tensor-parallel-size 1 \ --dtype bfloat16 \ --max-model-len 8192 \ --gpu-memory-utilization 0.90 \ --disable-log-requests \ > "${PROJECT_ROOT}/logs/vllm_agent_8b.log" 2>&1 & AGENT_PID=$! echo "8B agent PID: $AGENT_PID" # GPU 3: 8B Reward Model echo "Starting 8B reward model on GPU 3 (port 8005)..." CUDA_VISIBLE_DEVICES=3 python3 -m vllm.entrypoints.openai.api_server \ --model "${MODEL_8B}" \ --port 8005 \ --tensor-parallel-size 1 \ --dtype bfloat16 \ --max-model-len 4096 \ --gpu-memory-utilization 0.50 \ --disable-log-requests \ > "${PROJECT_ROOT}/logs/vllm_reward_8b.log" 2>&1 & REWARD_PID=$! echo "8B reward model PID: $REWARD_PID" echo "" echo "Waiting for servers to initialize (60s)..." sleep 60 # Health checks echo "Checking server health..." for port in 8003 8004 8005; do if curl -s "http://localhost:${port}/health" > /dev/null 2>&1; then echo " Port ${port}: OK" else echo " Port ${port}: WAITING..." fi done echo "" echo "Server PIDs: User=$USER_PID, Agent=$AGENT_PID, Reward=$REWARD_PID" echo "Logs: ${PROJECT_ROOT}/logs/vllm_*.log"