summaryrefslogtreecommitdiff
path: root/run_genderbench_evaluation.py
diff options
context:
space:
mode:
Diffstat (limited to 'run_genderbench_evaluation.py')
-rw-r--r--run_genderbench_evaluation.py78
1 files changed, 78 insertions, 0 deletions
diff --git a/run_genderbench_evaluation.py b/run_genderbench_evaluation.py
new file mode 100644
index 0000000..50b12ee
--- /dev/null
+++ b/run_genderbench_evaluation.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+"""
+快速启动GenderBench评估
+测试训练结果的性别偏见
+"""
+import os
+import sys
+from pathlib import Path
+
+def main():
+ """主函数"""
+ print("🎯 GenderBench评估工具")
+ print("=" * 50)
+
+ # 检查可用的模型
+ potential_models = []
+
+ # 检查常见的模型保存路径
+ model_dirs = [
+ "checkpoints",
+ "models",
+ "output",
+ "saved_models",
+ "."
+ ]
+
+ for model_dir in model_dirs:
+ if os.path.exists(model_dir):
+ for item in os.listdir(model_dir):
+ item_path = os.path.join(model_dir, item)
+ if os.path.isdir(item_path):
+ # 检查是否包含模型文件
+ model_files = ['pytorch_model.bin', 'model.safetensors', 'config.json']
+ if any(os.path.exists(os.path.join(item_path, mf)) for mf in model_files):
+ potential_models.append(item_path)
+
+ if potential_models:
+ print(f"🔍 发现可能的模型路径:")
+ for i, model_path in enumerate(potential_models):
+ print(f" {i+1}. {model_path}")
+
+ print(f"\n📝 使用示例:")
+ print(f"python genderbench_integration.py \\")
+ print(f" --models {' '.join(potential_models[:2])} \\")
+ print(f" --names baseline_model trained_model \\")
+ print(f" --output genderbench_results")
+
+ else:
+ print("❌ 未发现模型文件")
+ print("📝 请手动指定模型路径:")
+ print("python genderbench_integration.py \\")
+ print(" --models /path/to/model1 /path/to/model2 \\")
+ print(" --names model1_name model2_name \\")
+ print(" --output genderbench_results")
+
+ print(f"\n🔧 可用选项:")
+ print(f" --models: 模型路径列表(必需)")
+ print(f" --names: 模型名称列表(可选)")
+ print(f" --output: 输出目录(默认: genderbench_results)")
+
+ print(f"\n📊 评估内容:")
+ print(f" 1. 决策公平性 - 招聘和晋升决策中的性别偏见")
+ print(f" 2. 创作代表性 - 创作内容中的性别平衡")
+ print(f" 3. 刻板印象推理 - 对性别刻板印象的认同程度")
+
+ print(f"\n📈 输出结果:")
+ print(f" - 详细JSON报告")
+ print(f" - CSV对比表格")
+ print(f" - HTML可视化报告")
+
+ # 如果有参数,直接运行
+ if len(sys.argv) > 1:
+ print(f"\n🚀 开始运行评估...")
+ from genderbench_integration import main as run_evaluation
+ run_evaluation()
+
+if __name__ == "__main__":
+ main() \ No newline at end of file