diff options
| author | YurenHao0426 <blackhao0426@gmail.com> | 2026-01-27 12:15:45 -0600 |
|---|---|---|
| committer | YurenHao0426 <blackhao0426@gmail.com> | 2026-01-27 12:15:45 -0600 |
| commit | 680513b7771a29f27cbbb3ffb009a69a913de6f9 (patch) | |
| tree | a0d60aef9ade1b2953b915f535b990c0de95e493 /scripts/test_reward_comparison.sh | |
| parent | c06ec2f3b80f8968f09eb801b69237495b055ec1 (diff) | |
local reward model
Diffstat (limited to 'scripts/test_reward_comparison.sh')
| -rw-r--r-- | scripts/test_reward_comparison.sh | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/scripts/test_reward_comparison.sh b/scripts/test_reward_comparison.sh new file mode 100644 index 0000000..d7fe277 --- /dev/null +++ b/scripts/test_reward_comparison.sh @@ -0,0 +1,39 @@ +#!/bin/bash +#SBATCH --job-name=test_reward_cmp +#SBATCH --account=bfqt-delta-gpu +#SBATCH --partition=gpuA100x4 +#SBATCH --nodes=1 +#SBATCH --ntasks=1 +#SBATCH --cpus-per-task=8 +#SBATCH --gres=gpu:nvidia_a100:1 +#SBATCH --mem=32G +#SBATCH --time=0:30:00 +#SBATCH --output=test_reward_cmp_%j.out +#SBATCH --error=test_reward_cmp_%j.err + +# Compare Llama-3.1-8B vs GPT-4o-mini for reward classification +# Tests 12 scenarios with expected labels + +cd /projects/bfqt/users/yurenh2/ml-projects/personalization-user-model +source /u/yurenh2/miniforge3/etc/profile.d/conda.sh +conda activate eval + +export HF_HOME=/projects/bfqt/users/yurenh2/hf_cache/huggingface +export PYTHONPATH="${PWD}/src:${PYTHONPATH}" + +# Load OpenAI API key +set -a +source .env +set +a + +echo "=== Reward Model Comparison Test ===" +echo "Local: Llama-3.1-8B-Instruct" +echo "API: GPT-4o-mini" +echo "" + +python scripts/test_reward_comparison.py \ + --local-model models/llama-3.1-8b-instruct \ + --device cuda + +echo "" +echo "=== Test Complete ===" |
