blob: 675ab76060bf2192b9374f5ebf5f9dc331ac8eb6 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
#!/bin/bash
#SBATCH --job-name=test_local_reward
#SBATCH --account=bfqt-delta-gpu
#SBATCH --partition=gpuA100x4
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=8
#SBATCH --gres=gpu:nvidia_a100:1
#SBATCH --mem=48G
#SBATCH --time=0:30:00
#SBATCH --output=test_local_reward_%j.out
#SBATCH --error=test_local_reward_%j.err
# Test LocalLLMRewardClient with vLLM server
cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model
source /u/yurenh2/miniforge3/etc/profile.d/conda.sh
conda activate eval
export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface
export PYTHONPATH="${PWD}/src:${PYTHONPATH}"
REWARD_MODEL="models/llama-3.1-8b-instruct"
REWARD_PORT=8005
echo "=== Local LLM Reward Model Batch Test ==="
echo "Model: $REWARD_MODEL"
echo "GPU: $(nvidia-smi --query-gpu=name --format=csv,noheader | head -1)"
echo ""
# Start vLLM server for reward model
echo "Starting vLLM server on port $REWARD_PORT..."
python -m vllm.entrypoints.openai.api_server \
--model $REWARD_MODEL \
--port $REWARD_PORT \
--tensor-parallel-size 1 \
--dtype bfloat16 \
--max-model-len 4096 \
--gpu-memory-utilization 0.85 \
--disable-log-requests \
&
VLLM_PID=$!
# Wait for server to be ready (model loading can take 2-3 minutes)
echo "Waiting for vLLM server to start..."
for i in {1..180}; do
if curl -s http://localhost:$REWARD_PORT/health > /dev/null 2>&1; then
echo "vLLM server ready after ${i}s"
break
fi
sleep 1
done
# Check if server started
if ! curl -s http://localhost:$REWARD_PORT/health > /dev/null 2>&1; then
echo "ERROR: vLLM server failed to start"
kill $VLLM_PID 2>/dev/null
exit 1
fi
echo ""
echo "Running batch test..."
python scripts/test_local_reward_batch.py \
--vllm-url http://localhost:$REWARD_PORT/v1 \
--batch-size 12
echo ""
echo "=== Test Complete ==="
# Cleanup
kill $VLLM_PID 2>/dev/null
|