From 5626080ca4c4219aec4888d6b9406d0d3349fb55 Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Tue, 10 Feb 2026 20:16:36 +0000 Subject: Add RAG rewrite, 60-session experiment scripts, and analysis tools - RAG rewrite adapter and vector preference pipeline in personalized_llm - 60-session experiment queue scripts (reflection, rag, rag_vector, rag_rewrite) - Vector-preference correlation analysis and visualization scripts - Local reward model batch processing improvements - Updated CLAUDE.md with full experiment documentation and notes Co-Authored-By: Claude Opus 4.6 --- scripts/start_vllm_servers.sh | 80 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100755 scripts/start_vllm_servers.sh (limited to 'scripts/start_vllm_servers.sh') diff --git a/scripts/start_vllm_servers.sh b/scripts/start_vllm_servers.sh new file mode 100755 index 0000000..44e211b --- /dev/null +++ b/scripts/start_vllm_servers.sh @@ -0,0 +1,80 @@ +#!/bin/bash +# Start vLLM servers for personalization experiments +# GPU Layout (4x H200): +# GPU 0-1: 70B user simulator (TP=2) +# GPU 2: 8B agent +# GPU 3: 8B reward model + +set -e + +PROJECT_ROOT="/workspace/personalization-user-model" +MODEL_8B="${PROJECT_ROOT}/models/llama-3.1-8b-instruct" +MODEL_70B="${PROJECT_ROOT}/models/llama-3.1-70b-instruct" + +mkdir -p "${PROJECT_ROOT}/logs" + +# Kill any existing vLLM servers +pkill -f "vllm.entrypoints" 2>/dev/null || true +sleep 2 + +echo "Starting vLLM servers..." + +# GPU 0-1: 70B User Simulator (TP=2) +echo "Starting 70B user simulator on GPU 0-1 (port 8004)..." +CUDA_VISIBLE_DEVICES=0,1 python3 -m vllm.entrypoints.openai.api_server \ + --model "${MODEL_70B}" \ + --port 8004 \ + --tensor-parallel-size 2 \ + --dtype bfloat16 \ + --max-model-len 4096 \ + --gpu-memory-utilization 0.90 \ + --disable-log-requests \ + > "${PROJECT_ROOT}/logs/vllm_user_70b.log" 2>&1 & +USER_PID=$! +echo "70B user simulator PID: $USER_PID" + +# GPU 2: 8B Agent +echo "Starting 8B agent on GPU 2 (port 8003)..." +CUDA_VISIBLE_DEVICES=2 python3 -m vllm.entrypoints.openai.api_server \ + --model "${MODEL_8B}" \ + --port 8003 \ + --tensor-parallel-size 1 \ + --dtype bfloat16 \ + --max-model-len 8192 \ + --gpu-memory-utilization 0.90 \ + --disable-log-requests \ + > "${PROJECT_ROOT}/logs/vllm_agent_8b.log" 2>&1 & +AGENT_PID=$! +echo "8B agent PID: $AGENT_PID" + +# GPU 3: 8B Reward Model +echo "Starting 8B reward model on GPU 3 (port 8005)..." +CUDA_VISIBLE_DEVICES=3 python3 -m vllm.entrypoints.openai.api_server \ + --model "${MODEL_8B}" \ + --port 8005 \ + --tensor-parallel-size 1 \ + --dtype bfloat16 \ + --max-model-len 4096 \ + --gpu-memory-utilization 0.50 \ + --disable-log-requests \ + > "${PROJECT_ROOT}/logs/vllm_reward_8b.log" 2>&1 & +REWARD_PID=$! +echo "8B reward model PID: $REWARD_PID" + +echo "" +echo "Waiting for servers to initialize (60s)..." +sleep 60 + +# Health checks +echo "Checking server health..." +for port in 8003 8004 8005; do + if curl -s "http://localhost:${port}/health" > /dev/null 2>&1; then + echo " Port ${port}: OK" + else + echo " Port ${port}: WAITING..." + fi +done + +echo "" +echo "Server PIDs: User=$USER_PID, Agent=$AGENT_PID, Reward=$REWARD_PID" +echo "Logs: ${PROJECT_ROOT}/logs/vllm_*.log" -- cgit v1.2.3