From f1c2cc22d46a6976df3555391e667c7e61592fad Mon Sep 17 00:00:00 2001 From: YurenHao0426 Date: Wed, 4 Feb 2026 18:59:35 -0600 Subject: Initial commit: RL floating-point noise project --- scripts/run_evaluation.sh | 58 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100755 scripts/run_evaluation.sh (limited to 'scripts/run_evaluation.sh') diff --git a/scripts/run_evaluation.sh b/scripts/run_evaluation.sh new file mode 100755 index 0000000..b39c230 --- /dev/null +++ b/scripts/run_evaluation.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# run_evaluation.sh +# Script to run evaluation on trained models + +set -e +set -o pipefail # Properly capture exit codes through pipes + +# Configuration +export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-"0"} + +# HuggingFace cache - use shared HDD storage to avoid quota issues +export HF_HOME="/work/hdd/bfqt/yurenh2/huggingface_cache" +export HF_HUB_CACHE="/work/hdd/bfqt/yurenh2/huggingface_cache/hub" +mkdir -p "$HF_HOME" "$HF_HUB_CACHE" + +# Default values +PRECISION_MODE=${1:-"bf16"} +SEED=${2:-1} +BASE_MODEL=${BASE_MODEL:-"Qwen/Qwen2.5-Math-7B"} +TRAIN_LOGS_DIR=${TRAIN_LOGS_DIR:-"./results/train_logs"} +EVAL_METRICS_DIR=${EVAL_METRICS_DIR:-"./results/eval_metrics"} +EVAL_CONFIG=${EVAL_CONFIG:-"./configs/eval_tasks_config.json"} + +# Paths +FT_CKPT="${TRAIN_LOGS_DIR}/${PRECISION_MODE}_seed${SEED}/final_model" +OUTPUT_PATH="${EVAL_METRICS_DIR}/${PRECISION_MODE}_seed${SEED}.json" + +# Create output directory +mkdir -p "$EVAL_METRICS_DIR" + +echo "==============================================" +echo "Model Evaluation" +echo "==============================================" +echo "Precision Mode: $PRECISION_MODE" +echo "Seed: $SEED" +echo "Base Model: $BASE_MODEL" +echo "Finetuned Model: $FT_CKPT" +echo "Output: $OUTPUT_PATH" +echo "==============================================" + +# Check if checkpoint exists +if [ ! -d "$FT_CKPT" ]; then + echo "Error: Checkpoint not found at $FT_CKPT" + exit 1 +fi + +# Run evaluation +python eval_policy.py \ + --base_ckpt "$BASE_MODEL" \ + --ft_ckpt "$FT_CKPT" \ + --eval_tasks_config "$EVAL_CONFIG" \ + --output_path "$OUTPUT_PATH" \ + --eval_base \ + --use_amp \ + 2>&1 | tee "${EVAL_METRICS_DIR}/${PRECISION_MODE}_seed${SEED}_eval.log" + +echo "Evaluation complete. Results saved to: $OUTPUT_PATH" + -- cgit v1.2.3