#!/bin/bash #SBATCH --job-name=test_local_reward #SBATCH --account=bfqt-delta-gpu #SBATCH --partition=gpuA100x4 #SBATCH --nodes=1 #SBATCH --ntasks=1 #SBATCH --cpus-per-task=8 #SBATCH --gres=gpu:nvidia_a100:1 #SBATCH --mem=48G #SBATCH --time=0:30:00 #SBATCH --output=test_local_reward_%j.out #SBATCH --error=test_local_reward_%j.err # Test LocalLLMRewardClient with vLLM server cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model source /u/yurenh2/miniforge3/etc/profile.d/conda.sh conda activate eval export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface export PYTHONPATH="${PWD}/src:${PYTHONPATH}" REWARD_MODEL="models/llama-3.1-8b-instruct" REWARD_PORT=8005 echo "=== Local LLM Reward Model Batch Test ===" echo "Model: $REWARD_MODEL" echo "GPU: $(nvidia-smi --query-gpu=name --format=csv,noheader | head -1)" echo "" # Start vLLM server for reward model echo "Starting vLLM server on port $REWARD_PORT..." python -m vllm.entrypoints.openai.api_server \ --model $REWARD_MODEL \ --port $REWARD_PORT \ --tensor-parallel-size 1 \ --dtype bfloat16 \ --max-model-len 4096 \ --gpu-memory-utilization 0.85 \ --disable-log-requests \ & VLLM_PID=$! # Wait for server to be ready (model loading can take 2-3 minutes) echo "Waiting for vLLM server to start..." for i in {1..180}; do if curl -s http://localhost:$REWARD_PORT/health > /dev/null 2>&1; then echo "vLLM server ready after ${i}s" break fi sleep 1 done # Check if server started if ! curl -s http://localhost:$REWARD_PORT/health > /dev/null 2>&1; then echo "ERROR: vLLM server failed to start" kill $VLLM_PID 2>/dev/null exit 1 fi echo "" echo "Running batch test..." python scripts/test_local_reward_batch.py \ --vllm-url http://localhost:$REWARD_PORT/v1 \ --batch-size 12 echo "" echo "=== Test Complete ===" # Cleanup kill $VLLM_PID 2>/dev/null