summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorYurenHao0426 <blackhao0426@gmail.com>2026-02-10 20:16:36 +0000
committerYurenHao0426 <blackhao0426@gmail.com>2026-02-10 20:16:36 +0000
commit5626080ca4c4219aec4888d6b9406d0d3349fb55 (patch)
tree86287d9fd5833e11ccd78566992540f2664fd195 /scripts
parenta2036838807428424bbbaff507a6563749a83145 (diff)
Add RAG rewrite, 60-session experiment scripts, and analysis tools
- RAG rewrite adapter and vector preference pipeline in personalized_llm - 60-session experiment queue scripts (reflection, rag, rag_vector, rag_rewrite) - Vector-preference correlation analysis and visualization scripts - Local reward model batch processing improvements - Updated CLAUDE.md with full experiment documentation and notes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/start_vllm_servers.sh80
1 files changed, 80 insertions, 0 deletions
diff --git a/scripts/start_vllm_servers.sh b/scripts/start_vllm_servers.sh
new file mode 100755
index 0000000..44e211b
--- /dev/null
+++ b/scripts/start_vllm_servers.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# Start vLLM servers for personalization experiments
+# GPU Layout (4x H200):
+# GPU 0-1: 70B user simulator (TP=2)
+# GPU 2: 8B agent
+# GPU 3: 8B reward model
+
+set -e
+
+PROJECT_ROOT="/workspace/personalization-user-model"
+MODEL_8B="${PROJECT_ROOT}/models/llama-3.1-8b-instruct"
+MODEL_70B="${PROJECT_ROOT}/models/llama-3.1-70b-instruct"
+
+mkdir -p "${PROJECT_ROOT}/logs"
+
+# Kill any existing vLLM servers
+pkill -f "vllm.entrypoints" 2>/dev/null || true
+sleep 2
+
+echo "Starting vLLM servers..."
+
+# GPU 0-1: 70B User Simulator (TP=2)
+echo "Starting 70B user simulator on GPU 0-1 (port 8004)..."
+CUDA_VISIBLE_DEVICES=0,1 python3 -m vllm.entrypoints.openai.api_server \
+ --model "${MODEL_70B}" \
+ --port 8004 \
+ --tensor-parallel-size 2 \
+ --dtype bfloat16 \
+ --max-model-len 4096 \
+ --gpu-memory-utilization 0.90 \
+ --disable-log-requests \
+ > "${PROJECT_ROOT}/logs/vllm_user_70b.log" 2>&1 &
+USER_PID=$!
+echo "70B user simulator PID: $USER_PID"
+
+# GPU 2: 8B Agent
+echo "Starting 8B agent on GPU 2 (port 8003)..."
+CUDA_VISIBLE_DEVICES=2 python3 -m vllm.entrypoints.openai.api_server \
+ --model "${MODEL_8B}" \
+ --port 8003 \
+ --tensor-parallel-size 1 \
+ --dtype bfloat16 \
+ --max-model-len 8192 \
+ --gpu-memory-utilization 0.90 \
+ --disable-log-requests \
+ > "${PROJECT_ROOT}/logs/vllm_agent_8b.log" 2>&1 &
+AGENT_PID=$!
+echo "8B agent PID: $AGENT_PID"
+
+# GPU 3: 8B Reward Model
+echo "Starting 8B reward model on GPU 3 (port 8005)..."
+CUDA_VISIBLE_DEVICES=3 python3 -m vllm.entrypoints.openai.api_server \
+ --model "${MODEL_8B}" \
+ --port 8005 \
+ --tensor-parallel-size 1 \
+ --dtype bfloat16 \
+ --max-model-len 4096 \
+ --gpu-memory-utilization 0.50 \
+ --disable-log-requests \
+ > "${PROJECT_ROOT}/logs/vllm_reward_8b.log" 2>&1 &
+REWARD_PID=$!
+echo "8B reward model PID: $REWARD_PID"
+
+echo ""
+echo "Waiting for servers to initialize (60s)..."
+sleep 60
+
+# Health checks
+echo "Checking server health..."
+for port in 8003 8004 8005; do
+ if curl -s "http://localhost:${port}/health" > /dev/null 2>&1; then
+ echo " Port ${port}: OK"
+ else
+ echo " Port ${port}: WAITING..."
+ fi
+done
+
+echo ""
+echo "Server PIDs: User=$USER_PID, Agent=$AGENT_PID, Reward=$REWARD_PID"
+echo "Logs: ${PROJECT_ROOT}/logs/vllm_*.log"