diff options
Diffstat (limited to 'run_genderbench_evaluation.py')
| -rw-r--r-- | run_genderbench_evaluation.py | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/run_genderbench_evaluation.py b/run_genderbench_evaluation.py new file mode 100644 index 0000000..50b12ee --- /dev/null +++ b/run_genderbench_evaluation.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +""" +快速启动GenderBench评估 +测试训练结果的性别偏见 +""" +import os +import sys +from pathlib import Path + +def main(): + """主函数""" + print("🎯 GenderBench评估工具") + print("=" * 50) + + # 检查可用的模型 + potential_models = [] + + # 检查常见的模型保存路径 + model_dirs = [ + "checkpoints", + "models", + "output", + "saved_models", + "." + ] + + for model_dir in model_dirs: + if os.path.exists(model_dir): + for item in os.listdir(model_dir): + item_path = os.path.join(model_dir, item) + if os.path.isdir(item_path): + # 检查是否包含模型文件 + model_files = ['pytorch_model.bin', 'model.safetensors', 'config.json'] + if any(os.path.exists(os.path.join(item_path, mf)) for mf in model_files): + potential_models.append(item_path) + + if potential_models: + print(f"🔍 发现可能的模型路径:") + for i, model_path in enumerate(potential_models): + print(f" {i+1}. {model_path}") + + print(f"\n📝 使用示例:") + print(f"python genderbench_integration.py \\") + print(f" --models {' '.join(potential_models[:2])} \\") + print(f" --names baseline_model trained_model \\") + print(f" --output genderbench_results") + + else: + print("❌ 未发现模型文件") + print("📝 请手动指定模型路径:") + print("python genderbench_integration.py \\") + print(" --models /path/to/model1 /path/to/model2 \\") + print(" --names model1_name model2_name \\") + print(" --output genderbench_results") + + print(f"\n🔧 可用选项:") + print(f" --models: 模型路径列表(必需)") + print(f" --names: 模型名称列表(可选)") + print(f" --output: 输出目录(默认: genderbench_results)") + + print(f"\n📊 评估内容:") + print(f" 1. 决策公平性 - 招聘和晋升决策中的性别偏见") + print(f" 2. 创作代表性 - 创作内容中的性别平衡") + print(f" 3. 刻板印象推理 - 对性别刻板印象的认同程度") + + print(f"\n📈 输出结果:") + print(f" - 详细JSON报告") + print(f" - CSV对比表格") + print(f" - HTML可视化报告") + + # 如果有参数,直接运行 + if len(sys.argv) > 1: + print(f"\n🚀 开始运行评估...") + from genderbench_integration import main as run_evaluation + run_evaluation() + +if __name__ == "__main__": + main()
\ No newline at end of file |
