blob: 44e211bbdf739e714f8ad5b83908afadc28cb6e2 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
|
#!/bin/bash
# Start vLLM servers for personalization experiments
# GPU Layout (4x H200):
# GPU 0-1: 70B user simulator (TP=2)
# GPU 2: 8B agent
# GPU 3: 8B reward model
set -e
PROJECT_ROOT="/workspace/personalization-user-model"
MODEL_8B="${PROJECT_ROOT}/models/llama-3.1-8b-instruct"
MODEL_70B="${PROJECT_ROOT}/models/llama-3.1-70b-instruct"
mkdir -p "${PROJECT_ROOT}/logs"
# Kill any existing vLLM servers
pkill -f "vllm.entrypoints" 2>/dev/null || true
sleep 2
echo "Starting vLLM servers..."
# GPU 0-1: 70B User Simulator (TP=2)
echo "Starting 70B user simulator on GPU 0-1 (port 8004)..."
CUDA_VISIBLE_DEVICES=0,1 python3 -m vllm.entrypoints.openai.api_server \
--model "${MODEL_70B}" \
--port 8004 \
--tensor-parallel-size 2 \
--dtype bfloat16 \
--max-model-len 4096 \
--gpu-memory-utilization 0.90 \
--disable-log-requests \
> "${PROJECT_ROOT}/logs/vllm_user_70b.log" 2>&1 &
USER_PID=$!
echo "70B user simulator PID: $USER_PID"
# GPU 2: 8B Agent
echo "Starting 8B agent on GPU 2 (port 8003)..."
CUDA_VISIBLE_DEVICES=2 python3 -m vllm.entrypoints.openai.api_server \
--model "${MODEL_8B}" \
--port 8003 \
--tensor-parallel-size 1 \
--dtype bfloat16 \
--max-model-len 8192 \
--gpu-memory-utilization 0.90 \
--disable-log-requests \
> "${PROJECT_ROOT}/logs/vllm_agent_8b.log" 2>&1 &
AGENT_PID=$!
echo "8B agent PID: $AGENT_PID"
# GPU 3: 8B Reward Model
echo "Starting 8B reward model on GPU 3 (port 8005)..."
CUDA_VISIBLE_DEVICES=3 python3 -m vllm.entrypoints.openai.api_server \
--model "${MODEL_8B}" \
--port 8005 \
--tensor-parallel-size 1 \
--dtype bfloat16 \
--max-model-len 4096 \
--gpu-memory-utilization 0.50 \
--disable-log-requests \
> "${PROJECT_ROOT}/logs/vllm_reward_8b.log" 2>&1 &
REWARD_PID=$!
echo "8B reward model PID: $REWARD_PID"
echo ""
echo "Waiting for servers to initialize (60s)..."
sleep 60
# Health checks
echo "Checking server health..."
for port in 8003 8004 8005; do
if curl -s "http://localhost:${port}/health" > /dev/null 2>&1; then
echo " Port ${port}: OK"
else
echo " Port ${port}: WAITING..."
fi
done
echo ""
echo "Server PIDs: User=$USER_PID, Agent=$AGENT_PID, Reward=$REWARD_PID"
echo "Logs: ${PROJECT_ROOT}/logs/vllm_*.log"
|