collaborativeagents/scripts/quick_test_batch.sh


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

#!/bin/bash
# Quick test: 10 profiles × 5 sessions = 50 sessions
# Tests both batch (vanilla) and sequential (rag) processing

set -e

cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents
source /u/yurenh2/miniforge3/etc/profile.d/conda.sh
conda activate eval

export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface
export PYTHONPATH="${PWD}:${PWD}/../src:${PYTHONPATH}"

MODEL_8B="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/models/llama-3.1-8b-instruct"
PORT_USER=8004
PORT_AGENT=8003

echo "============================================"
echo "Quick Test: Batch Processing Verification"
echo "============================================"
echo "Profiles: 10"
echo "Sessions/profile: 5"
echo "Total: 50 sessions"
echo ""
date
nvidia-smi --query-gpu=index,name,memory.total --format=csv
echo ""

# Kill any existing servers
pkill -f "vllm.entrypoints" 2>/dev/null || true
sleep 2

# Start vLLM servers
echo "Starting 8B user simulator (GPU 0-1, TP=2)..."
CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \
    --model $MODEL_8B \
    --port $PORT_USER \
    --tensor-parallel-size 2 \
    --gpu-memory-utilization 0.85 \
    --max-model-len 4096 \
    --disable-log-requests \
    --dtype bfloat16 &
SERVER_USER_PID=$!

echo "Starting 8B agent (GPU 2-3, TP=2)..."
CUDA_VISIBLE_DEVICES=2,3 python -m vllm.entrypoints.openai.api_server \
    --model $MODEL_8B \
    --port $PORT_AGENT \
    --tensor-parallel-size 2 \
    --gpu-memory-utilization 0.85 \
    --max-model-len 4096 \
    --disable-log-requests \
    --dtype bfloat16 &
SERVER_AGENT_PID=$!

echo "Waiting for servers..."
for i in $(seq 1 100); do
    READY_USER=$(curl -s http://localhost:$PORT_USER/health > /dev/null 2>&1 && echo 1 || echo 0)
    READY_AGENT=$(curl -s http://localhost:$PORT_AGENT/health > /dev/null 2>&1 && echo 1 || echo 0)
    if [ "$READY_USER" = "1" ] && [ "$READY_AGENT" = "1" ]; then
        echo "Both servers ready after $((i*3))s"
        break
    fi
    if [ $((i % 20)) -eq 0 ]; then
        echo "  Still waiting... ($((i*3))s)"
    fi
    sleep 3
done

if ! curl -s http://localhost:$PORT_USER/health > /dev/null; then
    echo "ERROR: User server not healthy"; kill $SERVER_USER_PID $SERVER_AGENT_PID 2>/dev/null; exit 1
fi
if ! curl -s http://localhost:$PORT_AGENT/health > /dev/null; then
    echo "ERROR: Agent server not healthy"; kill $SERVER_USER_PID $SERVER_AGENT_PID 2>/dev/null; exit 1
fi
echo "✓ Both servers healthy"
echo ""

# Run quick test with vanilla (batch) and rag (sequential)
echo "============================================"
echo "Test 1: BATCH processing (vanilla method)"
echo "============================================"
START=$(date +%s)

# Use absolute path for profile (your 100 profiles with ~40 preferences each)
PROFILE_PATH="/projects/bfqt/users/yurenh2/ml-projects/personalization-user-model/collaborativeagents/data/complex_profiles_v2/profiles_100.jsonl"

python scripts/run_experiments.py \
    --methods vanilla \
    --datasets mmlu \
    --n-profiles 10 \
    --n-sessions 5 \
    --use-vllm \
    --batch-size 50 \
    --parallel-profiles 10 \
    --output-dir ../results/quick_test_batch \
    --profile-path "$PROFILE_PATH"

END=$(date +%s)
ELAPSED_BATCH=$((END-START))
echo ""
echo "Vanilla (batch) completed in ${ELAPSED_BATCH}s"

ELAPSED_SEQ=0
# Skip sequential test for now - just validate batch processing works
echo ""
echo "Skipping Test 2 (sequential) for quick validation..."

# Cleanup
echo ""
echo "Cleaning up..."
kill $SERVER_USER_PID $SERVER_AGENT_PID 2>/dev/null || true

echo ""
echo "============================================"
echo "QUICK TEST RESULTS"
echo "============================================"
echo ""
echo "Vanilla (BATCH):     ${ELAPSED_BATCH}s for 50 sessions"
echo "RAG (SEQUENTIAL):    ${ELAPSED_SEQ}s for 50 sessions"
echo ""

if [ $ELAPSED_BATCH -gt 0 ]; then
    THROUGHPUT_BATCH=$((50 * 3600 / ELAPSED_BATCH))
    echo "Vanilla throughput:  ${THROUGHPUT_BATCH} sessions/hr"
fi
if [ $ELAPSED_SEQ -gt 0 ]; then
    THROUGHPUT_SEQ=$((50 * 3600 / ELAPSED_SEQ))
    echo "RAG throughput:      ${THROUGHPUT_SEQ} sessions/hr"
fi

echo ""
echo "Results saved to:"
echo "  ../results/quick_test_batch/"
echo "  ../results/quick_test_sequential/"
echo ""
date