summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzitian-gao <zitian.gao@outlook.com>2025-05-27 16:50:19 +0800
committerzitian-gao <zitian.gao@outlook.com>2025-05-27 16:50:19 +0800
commite616513b163f3feb2ee197cb0846ece37727580d (patch)
treebfc85be3a904973b11b39ee3564c23ba434476d9
parent7c792461c8e4e4f1f8734fed143630c74e76b27f (diff)
init eval
-rw-r--r--.gitignore3
-rwxr-xr-xQwen2.5-Eval/evaluation/latex2sympy/scripts/compile.sh9
-rwxr-xr-xQwen2.5-Eval/evaluation/latex2sympy/scripts/coverage-ci.sh3
-rwxr-xr-xQwen2.5-Eval/evaluation/latex2sympy/scripts/coverage.sh23
-rwxr-xr-xQwen2.5-Eval/evaluation/latex2sympy/scripts/publish.sh3
-rwxr-xr-xQwen2.5-Eval/evaluation/latex2sympy/scripts/setup-hooks.sh3
-rwxr-xr-xQwen2.5-Eval/evaluation/latex2sympy/scripts/setup.sh42
-rwxr-xr-xQwen2.5-Eval/evaluation/latex2sympy/scripts/test.sh31
-rw-r--r--Qwen2.5-Eval/evaluation/sh/eval_all_math.sh31
9 files changed, 147 insertions, 1 deletions
diff --git a/.gitignore b/.gitignore
index 556fac0..321e8a5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,8 @@
checkpoints/*
wandb/*
+image/*
archived/*
*.log
-*.sh
+./*.sh
*.ipynb
log/* \ No newline at end of file
diff --git a/Qwen2.5-Eval/evaluation/latex2sympy/scripts/compile.sh b/Qwen2.5-Eval/evaluation/latex2sympy/scripts/compile.sh
new file mode 100755
index 0000000..566d911
--- /dev/null
+++ b/Qwen2.5-Eval/evaluation/latex2sympy/scripts/compile.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+# Get relative path of the root directory of the project
+rdir=`git rev-parse --git-dir`
+rel_path="$(dirname "$rdir")"
+# Change to that path and run the file
+cd $rel_path
+
+java -jar antlr-4.11.1-complete.jar PS.g4 -o gen
diff --git a/Qwen2.5-Eval/evaluation/latex2sympy/scripts/coverage-ci.sh b/Qwen2.5-Eval/evaluation/latex2sympy/scripts/coverage-ci.sh
new file mode 100755
index 0000000..335d4c3
--- /dev/null
+++ b/Qwen2.5-Eval/evaluation/latex2sympy/scripts/coverage-ci.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+pytest --doctest-modules --junitxml=junit/test-results.xml --cov-report=xml --cov-config=.coveragerc --cov=latex2sympy tests \ No newline at end of file
diff --git a/Qwen2.5-Eval/evaluation/latex2sympy/scripts/coverage.sh b/Qwen2.5-Eval/evaluation/latex2sympy/scripts/coverage.sh
new file mode 100755
index 0000000..a704a21
--- /dev/null
+++ b/Qwen2.5-Eval/evaluation/latex2sympy/scripts/coverage.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+# Get relative path of the root directory of the project
+rdir=`git rev-parse --git-dir`
+rel_path="$(dirname "$rdir")"
+# Change to that path and run the file
+cd $rel_path
+
+# Activate virtual environment
+echo "activating venv..."
+if test -f .env/bin/activate
+then source .env/bin/activate && echo "venv activate (bin)"
+elif test -f .env/Scripts/activate
+then source .env/Scripts/activate && echo "venv activated (Scripts)"
+else exit 1
+fi
+
+# Run unit test coverage
+echo "starting coverage..."
+if pytest --doctest-modules --cov-report=html --cov-config=.coveragerc --cov=latex2sympy tests
+then echo "coverage finished"
+else exit 1
+fi
diff --git a/Qwen2.5-Eval/evaluation/latex2sympy/scripts/publish.sh b/Qwen2.5-Eval/evaluation/latex2sympy/scripts/publish.sh
new file mode 100755
index 0000000..dc12a03
--- /dev/null
+++ b/Qwen2.5-Eval/evaluation/latex2sympy/scripts/publish.sh
@@ -0,0 +1,3 @@
+rm ./dist/*
+python3 setup.py bdist_wheel
+twine upload dist/*
diff --git a/Qwen2.5-Eval/evaluation/latex2sympy/scripts/setup-hooks.sh b/Qwen2.5-Eval/evaluation/latex2sympy/scripts/setup-hooks.sh
new file mode 100755
index 0000000..760dfd5
--- /dev/null
+++ b/Qwen2.5-Eval/evaluation/latex2sympy/scripts/setup-hooks.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+cp scripts/pre-push .git/hooks/
+cp scripts/pre-commit .git/hooks/ \ No newline at end of file
diff --git a/Qwen2.5-Eval/evaluation/latex2sympy/scripts/setup.sh b/Qwen2.5-Eval/evaluation/latex2sympy/scripts/setup.sh
new file mode 100755
index 0000000..b7e71c9
--- /dev/null
+++ b/Qwen2.5-Eval/evaluation/latex2sympy/scripts/setup.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+
+# Get relative path of the root directory of the project
+rdir=`git rev-parse --git-dir`
+rel_path="$(dirname "$rdir")"
+# Change to that path and run the file
+cd $rel_path
+
+echo "creating venv..."
+if test -d .env
+then echo "venv exists"
+else python3 -m venv .env && echo "venv created"
+fi
+
+echo ''
+# Activate virtual environment
+echo "activating venv..."
+if test -f .env/bin/activate
+then source .env/bin/activate && echo "venv activate (bin)"
+elif test -f .env/Scripts/activate
+then source .env/Scripts/activate && echo "venv activated (Scripts)"
+else exit 1
+fi
+
+echo ''
+echo "installing requirements..."
+if pip install -r dev-requirements.txt
+then echo "requirements installed"
+else exit 1
+fi
+
+echo ''
+echo "compiling parser..."
+sh scripts/compile.sh
+echo "parser compiled"
+
+echo ''
+echo "setup git hooks..."
+sh scripts/setup-hooks.sh
+echo "git hooks setup"
+
+exit 0
diff --git a/Qwen2.5-Eval/evaluation/latex2sympy/scripts/test.sh b/Qwen2.5-Eval/evaluation/latex2sympy/scripts/test.sh
new file mode 100755
index 0000000..20d6b01
--- /dev/null
+++ b/Qwen2.5-Eval/evaluation/latex2sympy/scripts/test.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+# Get relative path of the root directory of the project
+rdir=`git rev-parse --git-dir`
+rel_path="$(dirname "$rdir")"
+# Change to that path and run the file
+cd $rel_path
+
+# Activate virtual environment
+echo "activating venv..."
+if test -f .env/bin/activate
+then source .env/bin/activate && echo "venv activate (bin)"
+elif test -f .env/Scripts/activate
+then source .env/Scripts/activate && echo "venv activated (Scripts)"
+else exit 1
+fi
+
+echo ''
+echo "compiling parser..."
+sh scripts/compile.sh
+echo "parser compiled"
+
+echo ''
+# Run unit tests
+echo "starting tests..."
+if pytest tests
+then echo "tests finished"
+else exit 1
+fi
+
+exit 0
diff --git a/Qwen2.5-Eval/evaluation/sh/eval_all_math.sh b/Qwen2.5-Eval/evaluation/sh/eval_all_math.sh
new file mode 100644
index 0000000..2efd737
--- /dev/null
+++ b/Qwen2.5-Eval/evaluation/sh/eval_all_math.sh
@@ -0,0 +1,31 @@
+set -x
+export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
+MODEL_NAME_OR_PATH="/volume/ailab4sci/ztgao/em/checkpoints/qwen25_math_7b_1shot_pi1/step_10"
+OUTPUT_DIR="/volume/ailab4sci/ztgao/em/checkpoints/qwen25_math_7b_1shot_pi1/step_10/temp03/eval"
+mkdir -p $OUTPUT_DIR
+PROMPT_TYPE="qwen25-math-cot"
+MAX_TOKENS_PER_CALL="3072"
+SPLIT="test"
+NUM_TEST_SAMPLE=-1
+DATA_NAMES="aime25x8,aime24x8,amc23x8,minerva_math,olympiadbench,math500"
+IFS=',' read -ra DATASETS <<< "$DATA_NAMES"
+ALL_EXIST=true
+
+TOKENIZERS_PARALLELISM=false \
+python3 -u math_eval.py \
+ --model_name_or_path ${MODEL_NAME_OR_PATH} \
+ --data_name ${DATA_NAMES} \
+ --output_dir ${OUTPUT_DIR} \
+ --split ${SPLIT} \
+ --prompt_type ${PROMPT_TYPE} \
+ --num_test_sample ${NUM_TEST_SAMPLE} \
+ --seed 0 \
+ --temperature 0.3 \
+ --n_sampling 1 \
+ --top_p 1 \
+ --start 0 \
+ --end -1 \
+ --use_vllm \
+ --save_outputs \
+ --max_tokens_per_call ${MAX_TOKENS_PER_CALL} \
+ --overwrite \ No newline at end of file