From fc6d57ffb8d5ddb5820fcc00b5491a585c259ebc Mon Sep 17 00:00:00 2001 From: Yuren Hao Date: Thu, 4 Sep 2025 22:16:22 -0500 Subject: Initial commit --- Qwen2.5-Eval/evaluation/sh/eval_all_math.sh | 31 +++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 Qwen2.5-Eval/evaluation/sh/eval_all_math.sh (limited to 'Qwen2.5-Eval/evaluation/sh/eval_all_math.sh') diff --git a/Qwen2.5-Eval/evaluation/sh/eval_all_math.sh b/Qwen2.5-Eval/evaluation/sh/eval_all_math.sh new file mode 100644 index 0000000..8e8067d --- /dev/null +++ b/Qwen2.5-Eval/evaluation/sh/eval_all_math.sh @@ -0,0 +1,31 @@ +set -x +export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" +MODEL_NAME_OR_PATH="/volume/ailab4sci/ztgao/em/checkpoints/qwen25_math_7b_1shot_pi1/step_10" +OUTPUT_DIR="/volume/ailab4sci/ztgao/em/checkpoints/qwen25_math_7b_1shot_pi1/step_10/temp00/eval" +mkdir -p $OUTPUT_DIR +PROMPT_TYPE="qwen25-math-cot" +MAX_TOKENS_PER_CALL="3072" +SPLIT="test" +NUM_TEST_SAMPLE=-1 +DATA_NAMES="amc23x8,minerva_math,olympiadbench,math500" +IFS=',' read -ra DATASETS <<< "$DATA_NAMES" +ALL_EXIST=true + +TOKENIZERS_PARALLELISM=false \ +python3 -u math_eval.py \ + --model_name_or_path ${MODEL_NAME_OR_PATH} \ + --data_name ${DATA_NAMES} \ + --output_dir ${OUTPUT_DIR} \ + --split ${SPLIT} \ + --prompt_type ${PROMPT_TYPE} \ + --num_test_sample ${NUM_TEST_SAMPLE} \ + --seed 0 \ + --temperature 0 \ + --n_sampling 1 \ + --top_p 1 \ + --start 0 \ + --end -1 \ + --use_vllm \ + --save_outputs \ + --max_tokens_per_call ${MAX_TOKENS_PER_CALL} \ + --overwrite -- cgit v1.2.3