blob: 46f605516f431a2d6c2166f5498ca0606dcbf970 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
#!/bin/bash
# GEE评估脚本
# 使用方法: bash scripts/evaluate_gee.sh
echo "开始GEE评估..."
# 设置环境变量
export CUDA_VISIBLE_DEVICES=0
# 模型路径(请根据实际情况修改)
BASE_MODEL_PATH="/volume/pt-train/models/Qwen2.5-Math-7B"
GEE_MODEL_PATH="checkpoints/Qwen2.5-Math-7B/one_shot_gee/final"
# 检查模型路径
if [ ! -d "$BASE_MODEL_PATH" ]; then
echo "错误: 基础模型路径不存在: $BASE_MODEL_PATH"
exit 1
fi
if [ ! -d "$GEE_MODEL_PATH" ]; then
echo "错误: GEE模型路径不存在: $GEE_MODEL_PATH"
echo "请先运行训练脚本"
exit 1
fi
echo "基础模型: $BASE_MODEL_PATH"
echo "GEE模型: $GEE_MODEL_PATH"
# 运行评估
python -c "
import sys
sys.path.append('.')
from evaluation.gee_evaluator import GEEEvaluator
# 创建评估器
evaluator = GEEEvaluator('$BASE_MODEL_PATH')
# 生成测试数据
test_data = evaluator.create_winogender_style_data(num_samples=100)
# 定义要比较的模型
model_paths = {
'Base': '$BASE_MODEL_PATH',
'GEE': '$GEE_MODEL_PATH'
}
# 比较模型
results = evaluator.compare_models(model_paths, test_data)
# 打印摘要
evaluator.print_summary(results)
# 绘制结果
evaluator.plot_results(results, 'gee_evaluation_results.png')
"
echo "评估完成!结果已保存到 gee_evaluation_results.png"
|