summaryrefslogtreecommitdiff
path: root/Qwen2.5-Eval/evaluation/sh
diff options
context:
space:
mode:
authorYuren Hao <yurenh2@timan108.cs.illinois.edu>2025-09-04 22:16:22 -0500
committerYuren Hao <yurenh2@timan108.cs.illinois.edu>2025-09-04 22:16:22 -0500
commitfc6d57ffb8d5ddb5820fcc00b5491a585c259ebc (patch)
treee9841f93a353e2107225cfc721d1ce57c0e594dc /Qwen2.5-Eval/evaluation/sh
Initial commit
Diffstat (limited to 'Qwen2.5-Eval/evaluation/sh')
-rw-r--r--Qwen2.5-Eval/evaluation/sh/eval_all_math.sh31
1 files changed, 31 insertions, 0 deletions
diff --git a/Qwen2.5-Eval/evaluation/sh/eval_all_math.sh b/Qwen2.5-Eval/evaluation/sh/eval_all_math.sh
new file mode 100644
index 0000000..8e8067d
--- /dev/null
+++ b/Qwen2.5-Eval/evaluation/sh/eval_all_math.sh
@@ -0,0 +1,31 @@
+set -x
+export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
+MODEL_NAME_OR_PATH="/volume/ailab4sci/ztgao/em/checkpoints/qwen25_math_7b_1shot_pi1/step_10"
+OUTPUT_DIR="/volume/ailab4sci/ztgao/em/checkpoints/qwen25_math_7b_1shot_pi1/step_10/temp00/eval"
+mkdir -p $OUTPUT_DIR
+PROMPT_TYPE="qwen25-math-cot"
+MAX_TOKENS_PER_CALL="3072"
+SPLIT="test"
+NUM_TEST_SAMPLE=-1
+DATA_NAMES="amc23x8,minerva_math,olympiadbench,math500"
+IFS=',' read -ra DATASETS <<< "$DATA_NAMES"
+ALL_EXIST=true
+
+TOKENIZERS_PARALLELISM=false \
+python3 -u math_eval.py \
+ --model_name_or_path ${MODEL_NAME_OR_PATH} \
+ --data_name ${DATA_NAMES} \
+ --output_dir ${OUTPUT_DIR} \
+ --split ${SPLIT} \
+ --prompt_type ${PROMPT_TYPE} \
+ --num_test_sample ${NUM_TEST_SAMPLE} \
+ --seed 0 \
+ --temperature 0 \
+ --n_sampling 1 \
+ --top_p 1 \
+ --start 0 \
+ --end -1 \
+ --use_vllm \
+ --save_outputs \
+ --max_tokens_per_call ${MAX_TOKENS_PER_CALL} \
+ --overwrite