#!/bin/bash #SBATCH --job-name=test_reward_cmp #SBATCH --account=bfqt-delta-gpu #SBATCH --partition=gpuA100x4 #SBATCH --nodes=1 #SBATCH --ntasks=1 #SBATCH --cpus-per-task=8 #SBATCH --gres=gpu:nvidia_a100:1 #SBATCH --mem=32G #SBATCH --time=0:30:00 #SBATCH --output=test_reward_cmp_%j.out #SBATCH --error=test_reward_cmp_%j.err # Compare Llama-3.1-8B vs GPT-4o-mini for reward classification # Tests 12 scenarios with expected labels cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model source /u/yurenh2/miniforge3/etc/profile.d/conda.sh conda activate eval export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface export PYTHONPATH="${PWD}/src:${PYTHONPATH}" # Load OpenAI API key set -a source .env set +a echo "=== Reward Model Comparison Test ===" echo "Local: Llama-3.1-8B-Instruct" echo "API: GPT-4o-mini" echo "" python scripts/test_reward_comparison.py \ --local-model models/llama-3.1-8b-instruct \ --device cuda echo "" echo "=== Test Complete ==="