blob: a1f115d2bea792299e730c3cf9f69dca0171931d (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
#!/bin/bash
#SBATCH --job-name=quick_batch_test
#SBATCH --account=bfqt-delta-gpu
#SBATCH --partition=gpuH200x8
#SBATCH --gres=gpu:4
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=16
#SBATCH --mem=128G
#SBATCH --time=01:00:00
#SBATCH --output=/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/quick_batch_test-%j.out
#SBATCH --error=/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/quick_batch_test-%j.err
# Quick test: 10 profiles × 5 sessions = 50 sessions
# Tests batch (vanilla) processing on H200
set -e
cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents
source /u/yurenh2/miniforge3/etc/profile.d/conda.sh
conda activate eval
export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface
export PYTHONPATH="${PWD}:${PWD}/../src:${PYTHONPATH}"
MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct"
PORT_USER=8004
PORT_AGENT=8003
echo "============================================"
echo "Quick Test: Batch Processing on H200"
echo "============================================"
echo "Profiles: 10"
echo "Sessions/profile: 5"
echo "Total: 50 sessions"
echo ""
date
nvidia-smi --query-gpu=index,name,memory.total --format=csv
echo ""
# Kill any existing servers
pkill -f "vllm.entrypoints" 2>/dev/null || true
sleep 2
# Start vLLM servers
echo "Starting 8B user simulator (GPU 0-1, TP=2)..."
CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \
--model $MODEL_8B \
--port $PORT_USER \
--tensor-parallel-size 2 \
--gpu-memory-utilization 0.85 \
--max-model-len 8192 \
--disable-log-requests \
--dtype bfloat16 &
SERVER_USER_PID=$!
echo "Starting 8B agent (GPU 2-3, TP=2)..."
CUDA_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.openai.api_server \
--model $MODEL_8B \
--port $PORT_AGENT \
--tensor-parallel-size 2 \
--gpu-memory-utilization 0.85 \
--max-model-len 8192 \
--disable-log-requests \
--dtype bfloat16 &
SERVER_AGENT_PID=$!
echo "Waiting for servers (may take 5-10 min for CUDA graph compilation)..."
for i in $(seq 1 200); do
READY_USER=$(curl -s http://localhost:$PORT_USER/health > /dev/null 2>&1 && echo 1 || echo 0)
READY_AGENT=$(curl -s http://localhost:$PORT_AGENT/health > /dev/null 2>&1 && echo 1 || echo 0)
if [ "$READY_USER" = "1" ] && [ "$READY_AGENT" = "1" ]; then
echo "Both servers ready after $((i*3))s"
break
fi
if [ $((i % 20)) -eq 0 ]; then
echo " Still waiting... ($((i*3))s)"
fi
sleep 3
done
if ! curl -s http://localhost:$PORT_USER/health > /dev/null; then
echo "ERROR: User server not healthy"; kill $SERVER_USER_PID $SERVER_AGENT_PID 2>/dev/null; exit 1
fi
if ! curl -s http://localhost:$PORT_AGENT/health > /dev/null; then
echo "ERROR: Agent server not healthy"; kill $SERVER_USER_PID $SERVER_AGENT_PID 2>/dev/null; exit 1
fi
echo "Both servers healthy"
echo ""
# Run quick test with vanilla (batch)
echo "============================================"
echo "Test: BATCH processing (vanilla method)"
echo "============================================"
START=$(date +%s)
PROFILE_PATH="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/data/complex_profiles_v2/profiles_100.jsonl"
python scripts/run_experiments.py \
--methods vanilla \
--datasets math-hard \
--n-profiles 10 \
--n-sessions 5 \
--max-turns 15 \
--use-vllm \
--batch-size 50 \
--parallel-profiles 10 \
--output-dir ../results/quick_test_batch_h200 \
--profile-path "$PROFILE_PATH"
END=$(date +%s)
ELAPSED=$((END-START))
echo ""
echo "Vanilla (batch) completed in ${ELAPSED}s"
# Cleanup
echo ""
echo "Cleaning up..."
kill $SERVER_USER_PID $SERVER_AGENT_PID 2>/dev/null || true
echo ""
echo "============================================"
echo "QUICK TEST RESULTS"
echo "============================================"
echo ""
echo "Vanilla (BATCH): ${ELAPSED}s for 50 sessions"
echo ""
if [ $ELAPSED -gt 0 ]; then
THROUGHPUT=$((50 * 3600 / ELAPSED))
echo "Throughput: ${THROUGHPUT} sessions/hr"
fi
echo ""
echo "Results saved to: ../results/quick_test_batch_h200/"
echo ""
date
|