diff options
| author | haoyuren <13851610112@163.com> | 2025-07-04 13:44:31 -0700 |
|---|---|---|
| committer | haoyuren <13851610112@163.com> | 2025-07-04 13:44:31 -0700 |
| commit | f35d5c8b7380b906a6819ef436a9d808041731fb (patch) | |
| tree | 9507750f7eeba3aba26bdb4fe557110c7ee4b90e | |
| parent | 19228600f14eea433c54e17c164c4efe3a029d77 (diff) | |
add genderbench
| -rw-r--r-- | explore_genderbench.py | 213 | ||||
| -rw-r--r-- | gender_debias_utils.py | 316 | ||||
| -rw-r--r-- | gender_debias_utils_english.py | 350 | ||||
| -rw-r--r-- | gender_debias_utils_v2.py | 369 | ||||
| -rw-r--r-- | gender_debias_utils_v3.py | 331 |
5 files changed, 1579 insertions, 0 deletions
diff --git a/explore_genderbench.py b/explore_genderbench.py new file mode 100644 index 0000000..d90a1fa --- /dev/null +++ b/explore_genderbench.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python3 +""" +GenderBench数据探索脚本 +分析不同探测器的数据格式,为masked LLM转换做准备 +""" + +import sys +import os +from pathlib import Path + +# 添加genderbench到路径 +sys.path.append('./genderbench') + +def explore_direct_probe(): + """探索DirectProbe:刻板印象陈述""" + print("🔍 === DirectProbe探索 ===") + + from genderbench.probes.direct.direct_probe import DirectProbe + + probe = DirectProbe() + print(f"📊 Probe类型: {type(probe).__name__}") + print(f"🎯 模板: {probe.template}") + + # 查看stereotype数据 + try: + items = probe._create_probe_items() + print(f"📝 总stereotype数量: {len(items)}") + + # 显示前几个例子 + print("\n📋 Stereotype样本:") + for i, item in enumerate(items[:10]): + if hasattr(item, 'prompts') and item.prompts: + prompt_text = item.prompts[0].text + print(f" {i+1}. {prompt_text}") + + # 按来源分析 + sources = {} + for item in items: + source = item.metadata.get('source', 'unknown') + sources[source] = sources.get(source, 0) + 1 + + print(f"\n📈 按来源统计:") + for source, count in sources.items(): + print(f" {source}: {count}个") + + except Exception as e: + print(f"❌ 加载DirectProbe数据失败: {e}") + +def explore_jobs_probe(): + """探索JobsLumProbe:职业相关偏见""" + print("\n🔍 === JobsLumProbe探索 ===") + + try: + from genderbench.probes.jobs_lum.jobs_lum_probe import JobsLumProbe + + probe = JobsLumProbe() + print(f"📊 Probe类型: {type(probe).__name__}") + + items = probe._create_probe_items() + print(f"🏢 总职业数量: {len(items)}") + + # 显示前几个职业例子 + print("\n💼 职业样本:") + for i, item in enumerate(items[:10]): + if hasattr(item, 'prompts') and item.prompts: + prompt_text = item.prompts[0].text + print(f" {i+1}. {prompt_text}") + + except Exception as e: + print(f"❌ 加载JobsLumProbe失败: {e}") + +def explore_gest_probe(): + """探索GestProbe:性别刻板印象归属""" + print("\n🔍 === GestProbe探索 ===") + + try: + from genderbench.probes.gest.gest_probe import GestProbe + + probe = GestProbe() + print(f"📊 Probe类型: {type(probe).__name__}") + + items = probe._create_probe_items() + print(f"🎭 总测试项数量: {len(items)}") + + # 显示前几个例子 + print("\n🗣️ GEST样本:") + for i, item in enumerate(items[:5]): + if hasattr(item, 'prompts') and item.prompts: + prompt_text = item.prompts[0].text + print(f" {i+1}. {prompt_text}") + + except Exception as e: + print(f"❌ 加载GestProbe失败: {e}") + +def explore_resources(): + """探索资源文件,了解原始数据""" + print("\n🔍 === 资源文件探索 ===") + + resources_path = Path("./genderbench/genderbench/resources") + if resources_path.exists(): + print(f"📁 资源目录: {resources_path}") + + # 探索stereotype文件 + sbic_file = resources_path / "sbic_stereotypes" / "stereotypes.txt" + if sbic_file.exists(): + with open(sbic_file, 'r') as f: + sbic_lines = f.readlines() + print(f"📄 SBIC stereotypes: {len(sbic_lines)}行") + + print("\n🔸 SBIC样本 (前10个):") + for i, line in enumerate(sbic_lines[:10]): + print(f" {i+1}. {line.strip()}") + + gest_file = resources_path / "gest_stereotypes" / "stereotypes.txt" + if gest_file.exists(): + with open(gest_file, 'r') as f: + gest_lines = f.readlines() + print(f"\n📄 GEST stereotypes: {len(gest_lines)}行") + + print("\n🔸 GEST样本 (前10个):") + for i, line in enumerate(gest_lines[:10]): + print(f" {i+1}. {line.strip()}") + + # 探索其他资源 + print(f"\n📂 所有资源目录:") + for subdir in resources_path.iterdir(): + if subdir.is_dir(): + files = list(subdir.glob("*")) + print(f" 📁 {subdir.name}/: {len(files)}个文件") + for file in files[:3]: # 显示前3个文件 + print(f" 📄 {file.name}") + + else: + print("❌ 资源目录不存在") + +def analyze_for_masked_llm(): + """分析数据,为转换为masked LLM格式做准备""" + print("\n🔍 === Masked LLM转换分析 ===") + + # 分析stereotype模式 + resources_path = Path("./genderbench/genderbench/resources") + sbic_file = resources_path / "sbic_stereotypes" / "stereotypes.txt" + + if sbic_file.exists(): + with open(sbic_file, 'r') as f: + stereotypes = [line.strip() for line in f.readlines()] + + # 分析性别词汇模式 + gender_patterns = { + 'men': 0, 'women': 0, 'man': 0, 'woman': 0, + 'male': 0, 'female': 0, 'trans': 0, 'nonbinary': 0, + 'he': 0, 'she': 0, 'his': 0, 'her': 0 + } + + for stereotype in stereotypes: + lower_text = stereotype.lower() + for pattern in gender_patterns: + if pattern in lower_text: + gender_patterns[pattern] += 1 + + print("🎯 性别词汇出现频次:") + for pattern, count in sorted(gender_patterns.items(), key=lambda x: x[1], reverse=True): + if count > 0: + print(f" {pattern}: {count}次") + + # 找出适合转换的stereotype + print("\n🔄 适合Masked LLM转换的stereotype样本:") + convertible = [] + for stereotype in stereotypes[:20]: + if any(word in stereotype.lower() for word in ['men are', 'women are', 'man is', 'woman is']): + convertible.append(stereotype) + + for i, stereotype in enumerate(convertible[:5]): + print(f" 原文: {stereotype}") + # 示例转换 + masked = stereotype.replace('men', '[GENDER]').replace('women', '[GENDER]') + masked = masked.replace('man', '[GENDER]').replace('woman', '[GENDER]') + print(f" 转换: {masked}") + print() + +def main(): + """主函数:运行所有探索""" + print("🚀 GenderBench数据探索开始") + print("=" * 50) + + # 基础信息 + print(f"📍 当前目录: {os.getcwd()}") + print(f"🐍 Python版本: {sys.version}") + + # 检查genderbench是否可用 + try: + import genderbench + print(f"✅ GenderBench版本: {genderbench.__version__ if hasattr(genderbench, '__version__') else '已安装'}") + except ImportError: + print("❌ GenderBench未安装") + return + + # 探索不同探测器 + explore_direct_probe() + explore_jobs_probe() + explore_gest_probe() + + # 探索资源文件 + explore_resources() + + # 分析转换可能性 + analyze_for_masked_llm() + + print("\n🎉 探索完成!") + print("💡 建议: 基于以上分析,我们可以设计数据转换和约束生成策略") + +if __name__ == "__main__": + main()
\ No newline at end of file diff --git a/gender_debias_utils.py b/gender_debias_utils.py new file mode 100644 index 0000000..db0f9fc --- /dev/null +++ b/gender_debias_utils.py @@ -0,0 +1,316 @@ +#!/usr/bin/env python3 +""" +Gender Debias Utilities +基于GenderBench数据实现性别去偏见训练的工具函数 +核心思想:生成除性别外完全一致的回答对 +""" + +import sys +import re +import torch +import torch.nn.functional as F +from typing import List, Dict, Tuple, Optional +from pathlib import Path + +# 添加genderbench到路径 +sys.path.append('./genderbench') + +class GenderWordProcessor: + """性别词汇处理器""" + + def __init__(self): + # 性别词汇映射表 + self.gender_pairs = { + # 基础代词 + 'he': 'she', 'she': 'he', + 'him': 'her', 'her': 'him', + 'his': 'her', 'hers': 'his', + 'himself': 'herself', 'herself': 'himself', + + # 群体名词 + 'men': 'women', 'women': 'men', + 'man': 'woman', 'woman': 'man', + 'male': 'female', 'female': 'male', + 'males': 'females', 'females': 'males', + 'gentleman': 'lady', 'lady': 'gentleman', + 'gentlemen': 'ladies', 'ladies': 'gentlemen', + + # 家庭关系 + 'father': 'mother', 'mother': 'father', + 'dad': 'mom', 'mom': 'dad', + 'son': 'daughter', 'daughter': 'son', + 'brother': 'sister', 'sister': 'brother', + 'uncle': 'aunt', 'aunt': 'uncle', + 'grandfather': 'grandmother', 'grandmother': 'grandfather', + 'grandson': 'granddaughter', 'granddaughter': 'grandson', + + # 职业相关(一些有性别特指的) + 'businessman': 'businesswoman', 'businesswoman': 'businessman', + 'policeman': 'policewoman', 'policewoman': 'policeman', + 'fireman': 'firewoman', 'firewoman': 'fireman', + } + + # 非二元性别词汇(保持不变或特殊处理) + self.neutral_words = { + 'person', 'people', 'individual', 'human', 'adult', + 'parent', 'child', 'sibling', 'spouse', 'partner' + } + + def get_gender_opposite(self, word: str) -> str: + """获取性别对应词""" + word_lower = word.lower() + if word_lower in self.gender_pairs: + opposite = self.gender_pairs[word_lower] + # 保持原始大小写 + if word.isupper(): + return opposite.upper() + elif word.istitle(): + return opposite.title() + else: + return opposite + return word + + def extract_gender_words(self, text: str) -> List[str]: + """提取文本中的性别词汇""" + words = re.findall(r'\b\w+\b', text.lower()) + gender_words = [] + for word in words: + if word in self.gender_pairs or word in self.neutral_words: + gender_words.append(word) + return gender_words + +class StereotypeConverter: + """刻板印象转换器:将stereotype转换为训练格式""" + + def __init__(self): + self.gender_processor = GenderWordProcessor() + + def convert_to_masked_format(self, stereotype: str) -> Dict: + """将stereotype转换为masked格式""" + # 检测性别词汇 + gender_words = self.gender_processor.extract_gender_words(stereotype) + + if not gender_words: + return None + + # 创建masked版本 + masked_text = stereotype + gender_positions = [] + + for word in gender_words: + if word in self.gender_processor.gender_pairs: + # 找到词汇位置并替换为[MASK] + pattern = r'\b' + re.escape(word) + r'\b' + matches = list(re.finditer(pattern, masked_text, re.IGNORECASE)) + for match in matches: + gender_positions.append((match.start(), match.end(), word)) + masked_text = re.sub(pattern, '[MASK]', masked_text, flags=re.IGNORECASE) + + return { + 'original': stereotype, + 'masked': masked_text, + 'gender_words': gender_words, + 'gender_positions': gender_positions + } + + def create_gender_pairs(self, masked_data: Dict) -> Tuple[str, str]: + """基于masked数据创建性别对比对""" + if not masked_data: + return None, None + + masked_text = masked_data['masked'] + gender_words = masked_data['gender_words'] + + # 创建男性版本 + male_text = masked_text + female_text = masked_text + + for word in gender_words: + if word in self.gender_processor.gender_pairs: + opposite = self.gender_processor.get_gender_opposite(word) + + # 替换为对应性别 + if word in ['men', 'man', 'he', 'him', 'his', 'male', 'father', 'son', 'brother']: + # 这是男性词汇,保持male_text不变,female_text替换 + male_text = male_text.replace('[MASK]', word, 1) + female_text = female_text.replace('[MASK]', opposite, 1) + else: + # 这是女性词汇,保持female_text不变,male_text替换 + female_text = female_text.replace('[MASK]', word, 1) + male_text = male_text.replace('[MASK]', opposite, 1) + + return male_text, female_text + +class JobsConverter: + """职业偏见转换器""" + + def __init__(self): + self.gender_processor = GenderWordProcessor() + + def create_job_prompts(self, job_name: str) -> Dict: + """为职业创建性别对比prompt""" + base_template = f"这个{job_name}是" + + return { + 'base': base_template, + 'male': f"这个{job_name}是他", + 'female': f"这个{job_name}是她", + 'neutral': f"这个{job_name}是一个人", + 'job': job_name + } + +class DebiasDataLoader: + """去偏见数据加载器""" + + def __init__(self): + self.stereotype_converter = StereotypeConverter() + self.jobs_converter = JobsConverter() + + def load_direct_probe_data(self) -> List[Dict]: + """加载DirectProbe数据并转换""" + from genderbench.probes.direct.direct_probe import DirectProbe + + probe = DirectProbe() + items = probe._create_probe_items() + + converted_data = [] + for item in items: + if hasattr(item, 'prompts') and item.prompts: + # 提取原始stereotype + prompt_text = item.prompts[0].text + # 从"Do you agree with the following statement? "XXX". Please answer..."中提取XXX + match = re.search(r'"([^"]+)"', prompt_text) + if match: + stereotype = match.group(1) + + # 转换为masked格式 + masked_data = self.stereotype_converter.convert_to_masked_format(stereotype) + if masked_data: + male_text, female_text = self.stereotype_converter.create_gender_pairs(masked_data) + if male_text and female_text: + converted_data.append({ + 'type': 'stereotype', + 'original': stereotype, + 'masked': masked_data['masked'], + 'male_version': male_text, + 'female_version': female_text, + 'source': item.metadata.get('source', 'unknown') + }) + + return converted_data + + def load_jobs_probe_data(self) -> List[Dict]: + """加载JobsProbe数据并转换""" + try: + from genderbench.probes.jobs_lum.jobs_lum_probe import JobsLumProbe + + probe = JobsLumProbe() + items = probe._create_probe_items() + + converted_data = [] + for item in items: + if hasattr(item, 'prompts') and item.prompts: + prompt_text = item.prompts[0].text + # 从"I am writing a novel and I need a character who works as a/an XXX"中提取职业 + match = re.search(r'works as a/an (.+?)\. Please', prompt_text) + if match: + job = match.group(1) + job_prompts = self.jobs_converter.create_job_prompts(job) + converted_data.append({ + 'type': 'job', + 'job': job, + 'prompts': job_prompts + }) + + return converted_data + except Exception as e: + print(f"加载Jobs数据失败: {e}") + return [] + +def content_similarity_loss(text1: str, text2: str, tokenizer) -> torch.Tensor: + """计算两个文本除性别词外的相似度损失""" + processor = GenderWordProcessor() + + # 移除性别词汇 + words1 = text1.split() + words2 = text2.split() + + filtered_words1 = [word for word in words1 + if word.lower() not in processor.gender_pairs + and word.lower() not in processor.neutral_words] + filtered_words2 = [word for word in words2 + if word.lower() not in processor.gender_pairs + and word.lower() not in processor.neutral_words] + + # 计算编辑距离或余弦相似度 + # 这里简化为长度差异惩罚 + length_diff = abs(len(filtered_words1) - len(filtered_words2)) + + # 计算词汇重叠度 + set1, set2 = set(filtered_words1), set(filtered_words2) + overlap = len(set1.intersection(set2)) + total = len(set1.union(set2)) + + similarity = overlap / total if total > 0 else 0 + consistency_loss = 1.0 - similarity + 0.1 * length_diff + + return torch.tensor(consistency_loss, dtype=torch.float32) + +def create_training_batch(converted_data: List[Dict], batch_size: int = 4) -> List[Dict]: + """创建训练批次""" + import random + + # 随机选择数据 + selected = random.sample(converted_data, min(batch_size, len(converted_data))) + + batch = [] + for item in selected: + if item['type'] == 'stereotype': + batch.append({ + 'male_prompt': f"请对这个陈述表态: '{item['male_version']}'", + 'female_prompt': f"请对这个陈述表态: '{item['female_version']}'", + 'bias_type': 'stereotype' + }) + elif item['type'] == 'job': + batch.append({ + 'male_prompt': f"请描述:{item['prompts']['male']}", + 'female_prompt': f"请描述:{item['prompts']['female']}", + 'bias_type': 'occupation' + }) + + return batch + +# 示例使用函数 +def demonstrate_conversion(): + """演示转换功能""" + print("🔄 === 数据转换演示 ===") + + # 加载数据 + loader = DebiasDataLoader() + + # 加载stereotype数据 + stereotype_data = loader.load_direct_probe_data() + print(f"📊 加载了 {len(stereotype_data)} 个stereotype") + + # 显示转换示例 + print("\n🎯 转换示例:") + for i, item in enumerate(stereotype_data[:3]): + print(f" {i+1}. 原始: {item['original']}") + print(f" 男性版本: {item['male_version']}") + print(f" 女性版本: {item['female_version']}") + print() + + # 加载职业数据 + jobs_data = loader.load_jobs_probe_data() + print(f"📊 加载了 {len(jobs_data)} 个职业") + + # 显示职业示例 + print("\n💼 职业示例:") + for i, item in enumerate(jobs_data[:3]): + print(f" {i+1}. 职业: {item['job']}") + print(f" 男性: {item['prompts']['male']}") + print(f" 女性: {item['prompts']['female']}") + print() + +if __name__ == "__main__": + demonstrate_conversion()
\ No newline at end of file diff --git a/gender_debias_utils_english.py b/gender_debias_utils_english.py new file mode 100644 index 0000000..5771752 --- /dev/null +++ b/gender_debias_utils_english.py @@ -0,0 +1,350 @@ +#!/usr/bin/env python3 +""" +Gender Debias Utilities - English Version +Pure English implementation for gender debiasing +Core idea: Generate identical responses except for gender +""" + +import sys +import re +import torch +import torch.nn.functional as F +from typing import List, Dict, Tuple, Optional +from pathlib import Path + +# Add genderbench to path +sys.path.append('./genderbench') + +class EnglishGenderProcessor: + """English Gender Word Processor""" + + def __init__(self): + self.gender_pairs = { + 'he': 'she', 'she': 'he', + 'him': 'her', 'her': 'him', + 'his': 'hers', 'hers': 'his', + 'himself': 'herself', 'herself': 'himself', + 'men': 'women', 'women': 'men', + 'man': 'woman', 'woman': 'man', + 'male': 'female', 'female': 'male', + 'males': 'females', 'females': 'males', + 'gentleman': 'lady', 'lady': 'gentleman', + 'gentlemen': 'ladies', 'ladies': 'gentlemen', + 'father': 'mother', 'mother': 'father', + 'dad': 'mom', 'mom': 'dad', + 'son': 'daughter', 'daughter': 'son', + 'brother': 'sister', 'sister': 'brother', + 'uncle': 'aunt', 'aunt': 'uncle', + 'husband': 'wife', 'wife': 'husband', + 'boy': 'girl', 'girl': 'boy', + 'boys': 'girls', 'girls': 'boys', + 'guy': 'gal', 'gal': 'guy', + 'guys': 'gals', 'gals': 'guys', + } + + # Gender classification + self.male_words = { + 'men', 'man', 'he', 'him', 'his', 'male', 'males', 'father', 'dad', + 'son', 'brother', 'uncle', 'husband', 'boy', 'boys', 'gentleman', + 'gentlemen', 'guy', 'guys' + } + self.female_words = { + 'women', 'woman', 'she', 'her', 'hers', 'female', 'females', 'mother', + 'mom', 'daughter', 'sister', 'aunt', 'wife', 'girl', 'girls', 'lady', + 'ladies', 'gal', 'gals' + } + + def get_gender_opposite(self, word: str) -> str: + """Get gender opposite word""" + word_lower = word.lower() + if word_lower in self.gender_pairs: + opposite = self.gender_pairs[word_lower] + # Preserve original case + if word.isupper(): + return opposite.upper() + elif word.istitle(): + return opposite.title() + else: + return opposite + return word + + def extract_gender_words_with_roles(self, text: str) -> List[Dict]: + """Extract gender words with their roles in sentence""" + words = [] + for match in re.finditer(r'\b\w+\b', text): + word = match.group().lower() + if word in self.gender_pairs: + role = self._analyze_word_role(text, match.start(), match.end(), word) + words.append({ + 'word': word, + 'original': match.group(), + 'start': match.start(), + 'end': match.end(), + 'role': role, + 'is_male': word in self.male_words, + 'is_female': word in self.female_words + }) + return words + + def _analyze_word_role(self, text: str, start: int, end: int, word: str) -> str: + """Analyze word role in sentence (subject, object, modifier)""" + before_text = text[:start].strip() + after_text = text[end:].strip() + + # Check if subject (beginning of sentence or after conjunction) + if not before_text or before_text.endswith('.') or before_text.endswith(','): + return 'subject' + + # Check if object (after action verbs) + action_words = ['abuse', 'hit', 'help', 'support', 'love', 'hate', 'see', 'meet', 'like', 'prefer'] + for action in action_words: + if action in before_text.lower().split()[-3:]: + return 'object' + + # Default to modifier + return 'modifier' + +class EnglishStereotypeConverter: + """English Stereotype Converter""" + + def __init__(self): + self.gender_processor = EnglishGenderProcessor() + + def create_balanced_pairs(self, text: str) -> Tuple[str, str]: + """Create balanced gender pairs""" + gender_words = self.gender_processor.extract_gender_words_with_roles(text) + + if not gender_words: + return None, None + + # Strategy 1: Single gender - create symmetric versions + if len(gender_words) == 1: + return self._create_single_gender_pairs(text, gender_words[0]) + + # Strategy 2: Multiple genders - smart role swapping + return self._create_multi_gender_pairs(text, gender_words) + + def _create_single_gender_pairs(self, text: str, gender_word: Dict) -> Tuple[str, str]: + """Handle single gender word cases""" + word = gender_word['word'] + start = gender_word['start'] + end = gender_word['end'] + + # Keep original version for the matching gender + if gender_word['is_male']: + male_version = text + female_version = text[:start] + self.gender_processor.get_gender_opposite(text[start:end]) + text[end:] + else: + female_version = text + male_version = text[:start] + self.gender_processor.get_gender_opposite(text[start:end]) + text[end:] + + return male_version, female_version + + def _create_multi_gender_pairs(self, text: str, gender_words: List[Dict]) -> Tuple[str, str]: + """Handle multiple gender words - smart role swapping""" + # Create two versions maintaining sentence logic + male_dominant = text # Male-dominant version + female_dominant = text # Female-dominant version + + # Replace from back to front to avoid position shifts + all_words = sorted(gender_words, key=lambda x: x['start'], reverse=True) + + for word_info in all_words: + word = word_info['word'] + start = word_info['start'] + end = word_info['end'] + opposite = self.gender_processor.get_gender_opposite(word) + + # Replace based on word gender and target version + if word_info['is_male']: + # Male word: keep in male_dominant, replace in female_dominant + female_dominant = female_dominant[:start] + opposite + female_dominant[end:] + else: + # Female word: replace in male_dominant, keep in female_dominant + male_dominant = male_dominant[:start] + opposite + male_dominant[end:] + + return male_dominant, female_dominant + + def create_neutral_template(self, text: str) -> str: + """Create neutral template with [GENDER] placeholders""" + gender_words = self.gender_processor.extract_gender_words_with_roles(text) + + if not gender_words: + return text + + neutral_text = text + # Replace from back to front with [GENDER] + for word_info in reversed(gender_words): + start = word_info['start'] + end = word_info['end'] + neutral_text = neutral_text[:start] + '[GENDER]' + neutral_text[end:] + + return neutral_text + +class EnglishDebiasDataLoader: + """English Debias Data Loader""" + + def __init__(self): + self.converter = EnglishStereotypeConverter() + self.gender_processor = EnglishGenderProcessor() + + def load_direct_probe_data(self) -> List[Dict]: + """Load and convert DirectProbe data""" + from genderbench.probes.direct.direct_probe import DirectProbe + + probe = DirectProbe() + items = probe._create_probe_items() + + converted_data = [] + for item in items: + if hasattr(item, 'prompts') and item.prompts: + prompt_text = item.prompts[0].text + match = re.search(r'"([^"]+)"', prompt_text) + if match: + stereotype = match.group(1) + + # Smart conversion + version1, version2 = self.converter.create_balanced_pairs(stereotype) + if version1 and version2 and version1 != version2: + neutral_template = self.converter.create_neutral_template(stereotype) + + converted_data.append({ + 'type': 'stereotype', + 'original': stereotype, + 'neutral_template': neutral_template, + 'version1': version1, + 'version2': version2, + 'source': item.metadata.get('source', 'unknown') + }) + + return converted_data + + def load_jobs_probe_data(self) -> List[Dict]: + """Load and process JobsProbe data in English""" + try: + from genderbench.probes.jobs_lum.jobs_lum_probe import JobsLumProbe + + probe = JobsLumProbe() + items = probe._create_probe_items() + + converted_data = [] + for item in items: + if hasattr(item, 'prompts') and item.prompts: + prompt_text = item.prompts[0].text + match = re.search(r'works as a/an (.+?)\. Please', prompt_text) + if match: + job = match.group(1) + + converted_data.append({ + 'type': 'job', + 'job': job, + 'prompts': { + 'base': f"Describe this {job}:", + 'male_prompt': f"This {job} is a man. Describe his characteristics:", + 'female_prompt': f"This {job} is a woman. Describe her characteristics:", + 'neutral_prompt': f"This {job} is a professional. Describe their characteristics:" + } + }) + + return converted_data + except Exception as e: + print(f"Failed to load Jobs data: {e}") + return [] + + def create_training_data_for_em(self) -> List[Dict]: + """Create training data specifically for Entropy Minimization""" + # Load both stereotype and job data + stereotype_data = self.load_direct_probe_data() + job_data = self.load_jobs_probe_data() + + training_data = [] + + # Process stereotype data + for item in stereotype_data: + training_data.append({ + 'type': 'stereotype_pair', + 'prompt_male': f"Do you agree with this statement: '{item['version1']}'? Please explain your reasoning.", + 'prompt_female': f"Do you agree with this statement: '{item['version2']}'? Please explain your reasoning.", + 'constraint': 'responses_must_be_identical_except_pronouns', + 'original': item['original'] + }) + + # Process job data + for item in job_data: + training_data.append({ + 'type': 'job_pair', + 'prompt_male': item['prompts']['male_prompt'], + 'prompt_female': item['prompts']['female_prompt'], + 'constraint': 'responses_must_be_identical_except_pronouns', + 'job': item['job'] + }) + + return training_data + +def demonstrate_english_conversion(): + """Demonstrate English conversion functionality""" + print("🚀 === English Gender Debias Conversion Demo ===") + + # Load data + loader = EnglishDebiasDataLoader() + + # Load stereotype data + print("📊 Loading stereotype data...") + stereotype_data = loader.load_direct_probe_data() + print(f"✅ Successfully converted {len(stereotype_data)} stereotypes") + + # Show conversion examples + print("\n🎯 English Conversion Examples:") + for i, item in enumerate(stereotype_data[:8]): + print(f" {i+1}. Original: {item['original']}") + print(f" Template: {item['neutral_template']}") + print(f" Version 1: {item['version1']}") + print(f" Version 2: {item['version2']}") + + # Analyze differences + words1 = set(item['version1'].lower().split()) + words2 = set(item['version2'].lower().split()) + diff = words1.symmetric_difference(words2) + print(f" Different words: {diff}") + print() + + # Load job data + print("📊 Loading job data...") + job_data = loader.load_jobs_probe_data() + print(f"✅ Successfully loaded {len(job_data)} jobs") + + # Show job examples + print("\n💼 English Job Examples:") + for i, item in enumerate(job_data[:5]): + print(f" {i+1}. Job: {item['job']}") + print(f" Base: {item['prompts']['base']}") + print(f" Male: {item['prompts']['male_prompt']}") + print(f" Female: {item['prompts']['female_prompt']}") + print(f" Neutral: {item['prompts']['neutral_prompt']}") + print() + + # Create training data + print("📊 Creating training data for EM...") + training_data = loader.create_training_data_for_em() + print(f"✅ Created {len(training_data)} training pairs") + + # Show training data examples + print("\n🎯 Training Data Examples:") + for i, item in enumerate(training_data[:3]): + print(f" {i+1}. Type: {item['type']}") + print(f" Male prompt: {item['prompt_male']}") + print(f" Female prompt: {item['prompt_female']}") + print(f" Constraint: {item['constraint']}") + print() + + # Analysis + print("📊 Analysis:") + stereotype_pairs = sum(1 for item in training_data if item['type'] == 'stereotype_pair') + job_pairs = sum(1 for item in training_data if item['type'] == 'job_pair') + print(f" - Stereotype pairs: {stereotype_pairs}") + print(f" - Job pairs: {job_pairs}") + print(f" - Total pairs: {len(training_data)}") + print(f" - Ready for EM training: ✅") + +if __name__ == "__main__": + demonstrate_english_conversion()
\ No newline at end of file diff --git a/gender_debias_utils_v2.py b/gender_debias_utils_v2.py new file mode 100644 index 0000000..3ea7ad6 --- /dev/null +++ b/gender_debias_utils_v2.py @@ -0,0 +1,369 @@ +#!/usr/bin/env python3 +""" +Gender Debias Utilities V2 - 改进版本 +修复多性别词汇处理bug,添加职业翻译 +核心思想:生成除性别外完全一致的回答对 +""" + +import sys +import re +import torch +import torch.nn.functional as F +from typing import List, Dict, Tuple, Optional +from pathlib import Path + +# 添加genderbench到路径 +sys.path.append('./genderbench') + +class GenderWordProcessor: + """性别词汇处理器 - 改进版""" + + def __init__(self): + # 性别词汇映射表 + self.gender_pairs = { + # 基础代词 + 'he': 'she', 'she': 'he', + 'him': 'her', 'her': 'him', + 'his': 'hers', 'hers': 'his', + 'himself': 'herself', 'herself': 'himself', + + # 群体名词 + 'men': 'women', 'women': 'men', + 'man': 'woman', 'woman': 'man', + 'male': 'female', 'female': 'male', + 'males': 'females', 'females': 'males', + 'gentleman': 'lady', 'lady': 'gentleman', + 'gentlemen': 'ladies', 'ladies': 'gentlemen', + + # 家庭关系 + 'father': 'mother', 'mother': 'father', + 'dad': 'mom', 'mom': 'dad', + 'son': 'daughter', 'daughter': 'son', + 'brother': 'sister', 'sister': 'brother', + 'uncle': 'aunt', 'aunt': 'uncle', + 'grandfather': 'grandmother', 'grandmother': 'grandfather', + 'grandson': 'granddaughter', 'granddaughter': 'grandson', + 'husband': 'wife', 'wife': 'husband', + + # 职业相关 + 'businessman': 'businesswoman', 'businesswoman': 'businessman', + 'policeman': 'policewoman', 'policewoman': 'policeman', + 'fireman': 'firewoman', 'firewoman': 'fireman', + } + + # 职业名称翻译 + self.job_translations = { + 'admiral': '海军上将', + 'advice columnist': '专栏作家', + 'aerobics instructor': '有氧运动教练', + 'air stewardess': '空中乘务员', + 'air traffic controller': '空中交通管制员', + 'allergist': '过敏症专家', + 'animal caretaker': '动物护理员', + 'antique dealer': '古董商', + 'architect': '建筑师', + 'artisan': '工匠', + 'nurse': '护士', + 'teacher': '教师', + 'doctor': '医生', + 'engineer': '工程师', + 'lawyer': '律师', + 'firefighter': '消防员', + 'police officer': '警察', + 'chef': '厨师', + 'pilot': '飞行员', + 'scientist': '科学家', + 'artist': '艺术家', + 'writer': '作家', + 'manager': '经理', + 'accountant': '会计师', + 'mechanic': '机械师', + 'electrician': '电工', + 'plumber': '水管工', + 'carpenter': '木匠', + 'hairdresser': '理发师', + 'secretary': '秘书', + 'sales representative': '销售代表', + 'receptionist': '前台接待', + 'cleaner': '清洁工', + 'cashier': '收银员', + 'driver': '司机', + 'security guard': '保安', + 'waiter': '服务员', + 'waitress': '女服务员', + 'bartender': '调酒师', + 'janitor': '清洁工' + } + + # 性别分类 + self.male_words = {'men', 'man', 'he', 'him', 'his', 'male', 'males', 'father', 'dad', 'son', 'brother', 'uncle', 'grandfather', 'grandson', 'husband', 'gentleman', 'gentlemen'} + self.female_words = {'women', 'woman', 'she', 'her', 'hers', 'female', 'females', 'mother', 'mom', 'daughter', 'sister', 'aunt', 'grandmother', 'granddaughter', 'wife', 'lady', 'ladies'} + + def get_gender_opposite(self, word: str) -> str: + """获取性别对应词""" + word_lower = word.lower() + if word_lower in self.gender_pairs: + opposite = self.gender_pairs[word_lower] + # 保持原始大小写 + if word.isupper(): + return opposite.upper() + elif word.istitle(): + return opposite.title() + else: + return opposite + return word + + def translate_job(self, job: str) -> str: + """翻译职业名称""" + return self.job_translations.get(job.lower(), job) + + def extract_gender_words(self, text: str) -> List[Tuple[str, int, int]]: + """提取文本中的性别词汇,返回(词汇, 开始位置, 结束位置)""" + gender_words = [] + words = re.finditer(r'\b\w+\b', text) + + for match in words: + word = match.group().lower() + if word in self.gender_pairs: + gender_words.append((word, match.start(), match.end())) + + return gender_words + +class SmartStereotypeConverter: + """智能刻板印象转换器 - 改进版""" + + def __init__(self): + self.gender_processor = GenderWordProcessor() + + def create_balanced_pairs(self, text: str) -> Tuple[str, str]: + """创建平衡的性别对比对""" + # 提取性别词汇及其位置 + gender_words = self.gender_processor.extract_gender_words(text) + + if not gender_words: + return None, None + + # 分析男性和女性词汇 + male_positions = [] + female_positions = [] + + for word, start, end in gender_words: + if word in self.gender_processor.male_words: + male_positions.append((word, start, end)) + elif word in self.gender_processor.female_words: + female_positions.append((word, start, end)) + + # 策略1:如果只有一种性别,创建对称版本 + if male_positions and not female_positions: + # 只有男性词汇,创建女性版本 + male_version = text + female_version = text + + # 从后往前替换(避免位置偏移) + for word, start, end in reversed(male_positions): + opposite = self.gender_processor.get_gender_opposite(word) + female_version = female_version[:start] + opposite + female_version[end:] + + return male_version, female_version + + elif female_positions and not male_positions: + # 只有女性词汇,创建男性版本 + female_version = text + male_version = text + + # 从后往前替换 + for word, start, end in reversed(female_positions): + opposite = self.gender_processor.get_gender_opposite(word) + male_version = male_version[:start] + opposite + male_version[end:] + + return male_version, female_version + + # 策略2:如果有两种性别,创建交叉版本 + elif male_positions and female_positions: + # 创建两个版本:男性主导版本和女性主导版本 + male_dominant = text + female_dominant = text + + # 男性主导版本:保持男性词汇,女性词汇改为男性 + for word, start, end in reversed(female_positions): + opposite = self.gender_processor.get_gender_opposite(word) + male_dominant = male_dominant[:start] + opposite + male_dominant[end:] + + # 女性主导版本:保持女性词汇,男性词汇改为女性 + for word, start, end in reversed(male_positions): + opposite = self.gender_processor.get_gender_opposite(word) + female_dominant = female_dominant[:start] + opposite + female_dominant[end:] + + return male_dominant, female_dominant + + return None, None + + def create_neutral_template(self, text: str) -> str: + """创建中性模板""" + gender_words = self.gender_processor.extract_gender_words(text) + + if not gender_words: + return text + + neutral_text = text + # 从后往前替换为[GENDER] + for word, start, end in reversed(gender_words): + neutral_text = neutral_text[:start] + '[GENDER]' + neutral_text[end:] + + return neutral_text + +class ImprovedDebiasDataLoader: + """改进的去偏见数据加载器""" + + def __init__(self): + self.stereotype_converter = SmartStereotypeConverter() + self.gender_processor = GenderWordProcessor() + + def load_direct_probe_data(self) -> List[Dict]: + """加载并智能转换DirectProbe数据""" + from genderbench.probes.direct.direct_probe import DirectProbe + + probe = DirectProbe() + items = probe._create_probe_items() + + converted_data = [] + for item in items: + if hasattr(item, 'prompts') and item.prompts: + # 提取原始stereotype + prompt_text = item.prompts[0].text + match = re.search(r'"([^"]+)"', prompt_text) + if match: + stereotype = match.group(1) + + # 智能转换 + male_version, female_version = self.stereotype_converter.create_balanced_pairs(stereotype) + if male_version and female_version and male_version != female_version: + neutral_template = self.stereotype_converter.create_neutral_template(stereotype) + + converted_data.append({ + 'type': 'stereotype', + 'original': stereotype, + 'neutral_template': neutral_template, + 'male_version': male_version, + 'female_version': female_version, + 'source': item.metadata.get('source', 'unknown') + }) + + return converted_data + + def load_jobs_probe_data(self) -> List[Dict]: + """加载并翻译JobsProbe数据""" + try: + from genderbench.probes.jobs_lum.jobs_lum_probe import JobsLumProbe + + probe = JobsLumProbe() + items = probe._create_probe_items() + + converted_data = [] + for item in items: + if hasattr(item, 'prompts') and item.prompts: + prompt_text = item.prompts[0].text + match = re.search(r'works as a/an (.+?)\. Please', prompt_text) + if match: + job_en = match.group(1) + job_cn = self.gender_processor.translate_job(job_en) + + converted_data.append({ + 'type': 'job', + 'job_en': job_en, + 'job_cn': job_cn, + 'prompts': { + 'base': f"这个{job_cn}是", + 'male': f"这个{job_cn}是他", + 'female': f"这个{job_cn}是她", + 'neutral': f"这个{job_cn}是一个专业人士" + } + }) + + return converted_data + except Exception as e: + print(f"加载Jobs数据失败: {e}") + return [] + +def analyze_conversion_quality(converted_data: List[Dict]) -> Dict: + """分析转换质量""" + analysis = { + 'total_items': len(converted_data), + 'valid_conversions': 0, + 'identical_pairs': 0, + 'word_difference_stats': [], + 'examples': [] + } + + for item in converted_data: + if item['type'] == 'stereotype': + male_words = set(item['male_version'].lower().split()) + female_words = set(item['female_version'].lower().split()) + + # 计算词汇差异 + diff_count = len(male_words.symmetric_difference(female_words)) + analysis['word_difference_stats'].append(diff_count) + + if item['male_version'] != item['female_version']: + analysis['valid_conversions'] += 1 + else: + analysis['identical_pairs'] += 1 + + # 收集示例 + if len(analysis['examples']) < 5: + analysis['examples'].append({ + 'original': item['original'], + 'male': item['male_version'], + 'female': item['female_version'], + 'neutral': item['neutral_template'] + }) + + return analysis + +def demonstrate_improved_conversion(): + """演示改进的转换功能""" + print("🚀 === 改进版数据转换演示 ===") + + # 加载数据 + loader = ImprovedDebiasDataLoader() + + # 加载stereotype数据 + print("📊 加载stereotype数据...") + stereotype_data = loader.load_direct_probe_data() + print(f"✅ 成功转换了 {len(stereotype_data)} 个stereotype") + + # 质量分析 + analysis = analyze_conversion_quality(stereotype_data) + print(f"📈 质量分析:") + print(f" - 总项目数: {analysis['total_items']}") + print(f" - 有效转换: {analysis['valid_conversions']}") + print(f" - 相同配对: {analysis['identical_pairs']}") + if analysis['word_difference_stats']: + avg_diff = sum(analysis['word_difference_stats']) / len(analysis['word_difference_stats']) + print(f" - 平均词汇差异: {avg_diff:.2f}") + + # 显示转换示例 + print("\n🎯 改进后的转换示例:") + for i, example in enumerate(analysis['examples']): + print(f" {i+1}. 原始: {example['original']}") + print(f" 模板: {example['neutral']}") + print(f" 男性版本: {example['male']}") + print(f" 女性版本: {example['female']}") + print() + + # 加载职业数据 + print("📊 加载职业数据...") + jobs_data = loader.load_jobs_probe_data() + print(f"✅ 成功转换了 {len(jobs_data)} 个职业") + + # 显示职业示例 + print("\n💼 改进后的职业示例:") + for i, item in enumerate(jobs_data[:5]): + print(f" {i+1}. 职业: {item['job_en']} ({item['job_cn']})") + print(f" 男性: {item['prompts']['male']}") + print(f" 女性: {item['prompts']['female']}") + print(f" 中性: {item['prompts']['neutral']}") + print() + +if __name__ == "__main__": + demonstrate_improved_conversion()
\ No newline at end of file diff --git a/gender_debias_utils_v3.py b/gender_debias_utils_v3.py new file mode 100644 index 0000000..158fe03 --- /dev/null +++ b/gender_debias_utils_v3.py @@ -0,0 +1,331 @@ +#!/usr/bin/env python3 +""" +Gender Debias Utilities V3 - 终极版本 +真正实现"保持句子结构,只调换性别角色"的智能转换 +核心思想:保持句子逻辑完整性,只调换主语和宾语的性别地位 +""" + +import sys +import re +import torch +import torch.nn.functional as F +from typing import List, Dict, Tuple, Optional +from pathlib import Path + +# 添加genderbench到路径 +sys.path.append('./genderbench') + +class AdvancedGenderProcessor: + """高级性别词汇处理器""" + + def __init__(self): + self.gender_pairs = { + 'he': 'she', 'she': 'he', + 'him': 'her', 'her': 'him', + 'his': 'hers', 'hers': 'his', + 'himself': 'herself', 'herself': 'himself', + 'men': 'women', 'women': 'men', + 'man': 'woman', 'woman': 'man', + 'male': 'female', 'female': 'male', + 'males': 'females', 'females': 'males', + 'gentleman': 'lady', 'lady': 'gentleman', + 'gentlemen': 'ladies', 'ladies': 'gentlemen', + 'father': 'mother', 'mother': 'father', + 'dad': 'mom', 'mom': 'dad', + 'son': 'daughter', 'daughter': 'son', + 'brother': 'sister', 'sister': 'brother', + 'uncle': 'aunt', 'aunt': 'uncle', + 'husband': 'wife', 'wife': 'husband', + 'boy': 'girl', 'girl': 'boy', + 'boys': 'girls', 'girls': 'boys', + } + + # 职业翻译词典(扩展版) + self.job_translations = { + 'admiral': '海军上将', 'advice columnist': '专栏作家', + 'aerobics instructor': '有氧运动教练', 'air stewardess': '空中乘务员', + 'air traffic controller': '空中交通管制员', 'allergist': '过敏症专家', + 'animal caretaker': '动物护理员', 'antique dealer': '古董商', + 'architect': '建筑师', 'artisan': '工匠', 'nurse': '护士', + 'teacher': '教师', 'doctor': '医生', 'engineer': '工程师', + 'lawyer': '律师', 'firefighter': '消防员', 'police officer': '警察', + 'chef': '厨师', 'pilot': '飞行员', 'scientist': '科学家', + 'accountant': '会计师', 'mechanic': '机械师', 'electrician': '电工', + 'plumber': '水管工', 'carpenter': '木匠', 'hairdresser': '理发师', + 'secretary': '秘书', 'sales representative': '销售代表', + 'receptionist': '前台接待', 'cleaner': '清洁工', 'cashier': '收银员', + 'driver': '司机', 'security guard': '保安', 'waiter': '服务员', + 'waitress': '女服务员', 'bartender': '调酒师', 'janitor': '清洁工', + 'manager': '经理', 'ceo': '首席执行官', 'president': '总裁', + 'supervisor': '主管', 'assistant': '助理', 'intern': '实习生', + } + + # 性别分类 + self.male_words = { + 'men', 'man', 'he', 'him', 'his', 'male', 'males', 'father', 'dad', + 'son', 'brother', 'uncle', 'husband', 'boy', 'boys', 'gentleman', 'gentlemen' + } + self.female_words = { + 'women', 'woman', 'she', 'her', 'hers', 'female', 'females', 'mother', 'mom', + 'daughter', 'sister', 'aunt', 'wife', 'girl', 'girls', 'lady', 'ladies' + } + + def get_gender_opposite(self, word: str) -> str: + """获取性别对应词""" + word_lower = word.lower() + if word_lower in self.gender_pairs: + opposite = self.gender_pairs[word_lower] + # 保持原始大小写 + if word.isupper(): + return opposite.upper() + elif word.istitle(): + return opposite.title() + else: + return opposite + return word + + def translate_job(self, job: str) -> str: + """翻译职业名称""" + return self.job_translations.get(job.lower(), job) + + def extract_gender_words_with_roles(self, text: str) -> List[Dict]: + """提取性别词汇及其在句子中的角色""" + words = [] + for match in re.finditer(r'\b\w+\b', text): + word = match.group().lower() + if word in self.gender_pairs: + role = self._analyze_word_role(text, match.start(), match.end(), word) + words.append({ + 'word': word, + 'original': match.group(), + 'start': match.start(), + 'end': match.end(), + 'role': role, + 'is_male': word in self.male_words, + 'is_female': word in self.female_words + }) + return words + + def _analyze_word_role(self, text: str, start: int, end: int, word: str) -> str: + """分析词汇在句子中的角色(主语、宾语、修饰语等)""" + # 简化的角色分析 + before_text = text[:start].strip() + after_text = text[end:].strip() + + # 判断是否为主语(句子开头或连词后) + if not before_text or before_text.endswith('.') or before_text.endswith(','): + return 'subject' + + # 判断是否为宾语(动词后) + action_words = ['abuse', 'hit', 'help', 'support', 'love', 'hate', 'see', 'meet'] + for action in action_words: + if action in before_text.lower().split()[-3:]: + return 'object' + + # 默认为修饰语 + return 'modifier' + +class IntelligentStereotypeConverter: + """智能刻板印象转换器 - 终极版""" + + def __init__(self): + self.gender_processor = AdvancedGenderProcessor() + + def create_role_swapped_pairs(self, text: str) -> Tuple[str, str]: + """创建角色互换的性别对比对""" + gender_words = self.gender_processor.extract_gender_words_with_roles(text) + + if not gender_words: + return None, None + + # 策略1:单一性别 - 创建对称版本 + if len(gender_words) == 1: + return self._create_single_gender_pairs(text, gender_words[0]) + + # 策略2:多个性别 - 智能角色互换 + return self._create_multi_gender_pairs(text, gender_words) + + def _create_single_gender_pairs(self, text: str, gender_word: Dict) -> Tuple[str, str]: + """处理单一性别词汇的情况""" + word = gender_word['word'] + start = gender_word['start'] + end = gender_word['end'] + + # 保持原版本 + if gender_word['is_male']: + male_version = text + female_version = text[:start] + self.gender_processor.get_gender_opposite(text[start:end]) + text[end:] + else: + female_version = text + male_version = text[:start] + self.gender_processor.get_gender_opposite(text[start:end]) + text[end:] + + return male_version, female_version + + def _create_multi_gender_pairs(self, text: str, gender_words: List[Dict]) -> Tuple[str, str]: + """处理多个性别词汇的情况 - 智能角色互换""" + # 分析句子结构 + subjects = [w for w in gender_words if w['role'] == 'subject'] + objects = [w for w in gender_words if w['role'] == 'object'] + modifiers = [w for w in gender_words if w['role'] == 'modifier'] + + # 策略:创建两个版本,保持句子逻辑 + version1 = text # 男性主导版本 + version2 = text # 女性主导版本 + + # 从后往前替换,避免位置偏移 + all_words = sorted(gender_words, key=lambda x: x['start'], reverse=True) + + for word_info in all_words: + word = word_info['word'] + start = word_info['start'] + end = word_info['end'] + opposite = self.gender_processor.get_gender_opposite(word) + + # 根据当前词汇的性别和目标版本决定是否替换 + if word_info['is_male']: + # 男性词汇:版本1保持,版本2替换 + version2 = version2[:start] + opposite + version2[end:] + else: + # 女性词汇:版本1替换,版本2保持 + version1 = version1[:start] + opposite + version1[end:] + + return version1, version2 + + def create_neutral_template(self, text: str) -> str: + """创建中性模板""" + gender_words = self.gender_processor.extract_gender_words_with_roles(text) + + if not gender_words: + return text + + neutral_text = text + # 从后往前替换为[GENDER] + for word_info in reversed(gender_words): + start = word_info['start'] + end = word_info['end'] + neutral_text = neutral_text[:start] + '[GENDER]' + neutral_text[end:] + + return neutral_text + +class FinalDebiasDataLoader: + """最终版去偏见数据加载器""" + + def __init__(self): + self.converter = IntelligentStereotypeConverter() + self.gender_processor = AdvancedGenderProcessor() + + def load_direct_probe_data(self) -> List[Dict]: + """加载并智能转换DirectProbe数据""" + from genderbench.probes.direct.direct_probe import DirectProbe + + probe = DirectProbe() + items = probe._create_probe_items() + + converted_data = [] + for item in items: + if hasattr(item, 'prompts') and item.prompts: + prompt_text = item.prompts[0].text + match = re.search(r'"([^"]+)"', prompt_text) + if match: + stereotype = match.group(1) + + # 智能转换 + version1, version2 = self.converter.create_role_swapped_pairs(stereotype) + if version1 and version2 and version1 != version2: + neutral_template = self.converter.create_neutral_template(stereotype) + + converted_data.append({ + 'type': 'stereotype', + 'original': stereotype, + 'neutral_template': neutral_template, + 'version1': version1, + 'version2': version2, + 'source': item.metadata.get('source', 'unknown') + }) + + return converted_data + + def load_jobs_probe_data(self) -> List[Dict]: + """加载并翻译JobsProbe数据""" + try: + from genderbench.probes.jobs_lum.jobs_lum_probe import JobsLumProbe + + probe = JobsLumProbe() + items = probe._create_probe_items() + + converted_data = [] + for item in items: + if hasattr(item, 'prompts') and item.prompts: + prompt_text = item.prompts[0].text + match = re.search(r'works as a/an (.+?)\. Please', prompt_text) + if match: + job_en = match.group(1) + job_cn = self.gender_processor.translate_job(job_en) + + converted_data.append({ + 'type': 'job', + 'job_en': job_en, + 'job_cn': job_cn, + 'prompts': { + 'base': f"描述一下这个{job_cn}:", + 'male_prompt': f"这个{job_cn}是男性,请描述他的特点:", + 'female_prompt': f"这个{job_cn}是女性,请描述她的特点:", + 'neutral_prompt': f"这个{job_cn}是一个专业人士,请描述其特点:" + } + }) + + return converted_data + except Exception as e: + print(f"加载Jobs数据失败: {e}") + return [] + +def demonstrate_final_conversion(): + """演示最终版转换功能""" + print("🎯 === 最终版智能转换演示 ===") + + # 加载数据 + loader = FinalDebiasDataLoader() + + # 加载stereotype数据 + print("📊 加载stereotype数据...") + stereotype_data = loader.load_direct_probe_data() + print(f"✅ 成功转换了 {len(stereotype_data)} 个stereotype") + + # 显示转换示例 + print("\n🎯 最终版转换示例:") + for i, item in enumerate(stereotype_data[:8]): + print(f" {i+1}. 原始: {item['original']}") + print(f" 模板: {item['neutral_template']}") + print(f" 版本1: {item['version1']}") + print(f" 版本2: {item['version2']}") + + # 分析差异 + words1 = set(item['version1'].lower().split()) + words2 = set(item['version2'].lower().split()) + diff = words1.symmetric_difference(words2) + print(f" 差异词汇: {diff}") + print() + + # 加载职业数据 + print("📊 加载职业数据...") + jobs_data = loader.load_jobs_probe_data() + print(f"✅ 成功转换了 {len(jobs_data)} 个职业") + + # 显示职业示例 + print("\n💼 最终版职业示例:") + for i, item in enumerate(jobs_data[:3]): + print(f" {i+1}. 职业: {item['job_en']} ({item['job_cn']})") + print(f" 基础: {item['prompts']['base']}") + print(f" 男性: {item['prompts']['male_prompt']}") + print(f" 女性: {item['prompts']['female_prompt']}") + print(f" 中性: {item['prompts']['neutral_prompt']}") + print() + + # 分析结果 + print("📊 转换质量分析:") + different_pairs = sum(1 for item in stereotype_data if item['version1'] != item['version2']) + print(f" - 产生不同版本的数量: {different_pairs}/{len(stereotype_data)}") + print(f" - 转换成功率: {different_pairs/len(stereotype_data)*100:.1f}%") + +if __name__ == "__main__": + demonstrate_final_conversion()
\ No newline at end of file |
