4 files changed, 496 insertions, 48 deletions
diff --git a/scripts/__pycache__/fetch_papers.cpython-312.pyc b/scripts/__pycache__/fetch_papers.cpython-312.pyc
index b5ff943..946d75e 100644
--- a/scripts/__pycache__/fetch_papers.cpython-312.pyc
+++ b/scripts/__pycache__/fetch_papers.cpython-312.pyc
diff --git a/scripts/fetch_papers.py b/scripts/fetch_papers.py
index 7920a94..18a0fec 100644
--- a/scripts/fetch_papers.py
+++ b/scripts/fetch_papers.py
@@ -58,24 +58,30 @@ CS_CATEGORIES = [
     "stat.ML" # Machine Learning (Statistics)
 ]
 
-GPT_SYSTEM_PROMPT = """You are an expert researcher in AI/ML bias, fairness, and social good applications.
+GPT_SYSTEM_PROMPT = """You are an expert researcher in AI bias, fairness, and social good applications.
 
-Your task is to analyze a paper's title and abstract to determine if it's relevant to bias and fairness research with social good implications.
+Your task is to analyze a paper's title and abstract to determine if it's relevant to bias and fairness research with clear social good implications.
 
-A paper is relevant if it discusses:
-- Bias, fairness, or discrimination in AI/ML systems with societal impact
-- Algorithmic fairness in healthcare, education, criminal justice, hiring, or finance
-- Demographic bias affecting marginalized or underrepresented groups
-- Data bias and its social consequences
-- Ethical AI and responsible AI deployment in society
-- AI safety and alignment with human values and social welfare
-- Bias evaluation, auditing, or mitigation in real-world applications
-- Representation and inclusion in AI systems and datasets
-- Social implications of AI bias (e.g., perpetuating inequality)
-- Fairness in recommendation systems, search engines, or content moderation
-- Bias in computer vision, NLP, or other AI domains affecting people
+A paper is RELEVANT if it discusses:
+- Algorithmic fairness in real-world applications (healthcare, education, criminal justice, hiring, finance)
+- Demographic bias affecting marginalized or underrepresented groups in society
+- Social implications of AI bias (perpetuating inequality, discrimination, harm to vulnerable populations)
+- Ethical AI deployment addressing social justice and human welfare
+- Bias auditing/evaluation in systems that directly impact people's lives
+- Data bias with clear social consequences and harm
+- AI safety and alignment with human values in societal applications
+- Representation and inclusion in AI systems used by the public
+- Fair recommendation systems, search engines, or content moderation with social impact
 
-The focus is on research that addresses how AI bias impacts society, vulnerable populations, or social justice, rather than purely technical ML advances without clear social relevance.
+A paper is NOT RELEVANT if it discusses:
+- Purely technical computer vision bias without clear social applications
+- Generic ML fairness metrics without real-world context
+- Theoretical bias research without societal implications
+- Technical optimization of models without addressing social harm
+- Academic benchmarking without connection to social good
+- Pure algorithmic improvements without considering human impact
+
+FOCUS: The research must clearly address how AI bias affects society, vulnerable populations, or social justice. Reject purely technical advances without explicit social relevance.
 
 Respond with exactly "1" if the paper is relevant, or "0" if it's not relevant.
 Do not include any other text in your response."""
@@ -106,9 +112,9 @@ class ArxivPaperFetcher:
         Returns:
             List of paper dictionaries
         """
-        logger.info(f"🔍 开始从arXiv抓取论文: {start_date.date()} 到 {end_date.date()}")
-        logger.info(f"📋 目标类别: {', '.join(CS_CATEGORIES)}")
-        logger.info(f"🔧 改进策略: 分别查询每个类别以避免OR查询限制")
+        logger.info(f"🔍 Starting arXiv paper fetch: {start_date.date()} to {end_date.date()}")
+        logger.info(f"📋 Target categories: {', '.join(CS_CATEGORIES)}")
+        logger.info(f"🔧 Strategy: Query each category separately to avoid OR query limitations")
         
         all_papers_dict = {}  # 使用字典去重，key为arxiv_id
         total_categories_processed = 0
@@ -117,13 +123,13 @@ class ArxivPaperFetcher:
         # 分别查询每个类别
         for category in CS_CATEGORIES:
             total_categories_processed += 1
-            logger.info(f"📂 处理类别 {total_categories_processed}/{len(CS_CATEGORIES)}: {category}")
+            logger.info(f"📂 Processing category {total_categories_processed}/{len(CS_CATEGORIES)}: {category}")
             
             category_papers = self._fetch_papers_for_category(
                 category, start_date, end_date, max_papers_per_category=500
             )
             
-            # 合并到总结果中（去重）
+            # Merge to total results (deduplication)
             new_papers_count = 0
             for paper in category_papers:
                 arxiv_id = paper['arxiv_id']
@@ -132,43 +138,43 @@ class ArxivPaperFetcher:
                     new_papers_count += 1
             
             total_raw_papers += len(category_papers)
-            logger.info(f"   ✅ {category}: 获得{len(category_papers)}篇, 新增{new_papers_count}篇")
+            logger.info(f"   ✅ {category}: Found {len(category_papers)} papers, {new_papers_count} new")
         
-        # 转换为列表并按日期排序
+        # Convert to list and sort by date
         all_papers = list(all_papers_dict.values())
         all_papers.sort(key=lambda x: x['updated'], reverse=True)
         
-        logger.info(f"📊 抓取总结:")
-        logger.info(f"   - 处理了 {total_categories_processed} 个类别")
-        logger.info(f"   - 从arXiv获取了 {total_raw_papers} 篇原始论文")
-        logger.info(f"   - 去重后得到 {len(all_papers)} 篇唯一论文")
+        logger.info(f"📊 Fetch Summary:")
+        logger.info(f"   - Processed {total_categories_processed} categories")
+        logger.info(f"   - Retrieved {total_raw_papers} raw papers from arXiv")
+        logger.info(f"   - After deduplication: {len(all_papers)} unique papers")
         
-        # 显示类别分布
+        # Show category distribution
         if all_papers:
             from collections import Counter
             
-            # 日期分布
+            # Date distribution
             dates = []
             for paper in all_papers:
                 paper_date = datetime.strptime(paper['updated'][:10], '%Y-%m-%d')
                 dates.append(paper_date.strftime('%Y-%m-%d'))
             
             date_counts = Counter(dates)
-            logger.info(f"📅 论文日期分布 (前5天):")
+            logger.info(f"📅 Paper date distribution (top 5 days):")
             for date, count in date_counts.most_common(5):
                 days_ago = (datetime.now(timezone.utc).date() - datetime.strptime(date, '%Y-%m-%d').date()).days
-                logger.info(f"   - {date}: {count}篇 ({days_ago}天前)")
+                logger.info(f"   - {date}: {count} papers ({days_ago} days ago)")
             
-            # 类别分布
+            # Category distribution
             category_counts = Counter()
             for paper in all_papers:
                 for cat in paper['categories']:
                     if cat in CS_CATEGORIES:
                         category_counts[cat] += 1
             
-            logger.info(f"📊 类别分布:")
+            logger.info(f"📊 Category distribution:")
             for cat, count in category_counts.most_common():
-                logger.info(f"   - {cat}: {count}篇")
+                logger.info(f"   - {cat}: {count} papers")
         
         return all_papers
     
@@ -283,48 +289,48 @@ class ArxivPaperFetcher:
             List of relevant papers
         """
         if not papers:
-            logger.warning("⚠️ 没有论文需要过滤！")
+            logger.warning("⚠️ No papers to filter!")
             return []
             
         if use_parallel and len(papers) > 5:
-            logger.info(f"🚀 使用并行模式处理 {len(papers)} 篇论文 (最大并发: {max_concurrent})")
+            logger.info(f"🚀 Using parallel mode for {len(papers)} papers (max concurrent: {max_concurrent})")
             return self._filter_papers_parallel(papers, max_concurrent)
         else:
-            logger.info(f"🔄 使用串行模式处理 {len(papers)} 篇论文")
+            logger.info(f"🔄 Using serial mode for {len(papers)} papers")
             return self._filter_papers_sequential(papers)
     
     def _filter_papers_sequential(self, papers: List[Dict]) -> List[Dict]:
         """Serial processing of papers (original method)."""
-        logger.info(f"🤖 开始使用GPT-4o过滤论文...")
-        logger.info(f"📝 待处理论文数量: {len(papers)} 篇")
+        logger.info(f"🤖 Starting GPT-4o paper filtering...")
+        logger.info(f"📝 Papers to process: {len(papers)}")
         
         relevant_papers = []
         processed_count = 0
         
         for i, paper in enumerate(papers, 1):
             try:
-                logger.info(f"🔍 处理第 {i}/{len(papers)} 篇论文: {paper['title'][:60]}...")
+                logger.info(f"🔍 Processing paper {i}/{len(papers)}: {paper['title'][:60]}...")
                 is_relevant = self._check_paper_relevance(paper)
                 processed_count += 1
                 
                 if is_relevant:
                     relevant_papers.append(paper)
-                    logger.info(f"✅ 第 {i} 篇论文 [相关]: {paper['title'][:80]}...")
+                    logger.info(f"✅ Paper {i} [RELEVANT]: {paper['title'][:80]}...")
                 else:
-                    logger.info(f"❌ 第 {i} 篇论文 [不相关]: {paper['title'][:80]}...")
+                    logger.info(f"❌ Paper {i} [NOT RELEVANT]: {paper['title'][:80]}...")
                     
-                # 每处理10篇论文显示一次进度
+                # Show progress every 10 papers
                 if i % 10 == 0:
-                    logger.info(f"📊 进度更新: 已处理 {i}/{len(papers)} 篇论文，发现 {len(relevant_papers)} 篇相关论文")
+                    logger.info(f"📊 Progress update: Processed {i}/{len(papers)} papers, found {len(relevant_papers)} relevant")
                     
             except Exception as e:
-                logger.error(f"❌ 处理第 {i} 篇论文时出错: {e}")
+                logger.error(f"❌ Error processing paper {i}: {e}")
                 continue
         
-        logger.info(f"🎯 GPT-4o过滤完成!")
-        logger.info(f"   - 总共处理: {processed_count} 篇论文")
-        logger.info(f"   - 发现相关: {len(relevant_papers)} 篇论文")
-        logger.info(f"   - 相关比例: {len(relevant_papers)/processed_count*100:.1f}%" if processed_count > 0 else "   - 相关比例: 0%")
+        logger.info(f"🎯 GPT-4o filtering completed!")
+        logger.info(f"   - Total processed: {processed_count} papers")
+        logger.info(f"   - Found relevant: {len(relevant_papers)} papers")
+        logger.info(f"   - Relevance ratio: {len(relevant_papers)/processed_count*100:.1f}%" if processed_count > 0 else "   - Relevance ratio: 0%")
         
         return relevant_papers
     
diff --git a/scripts/test_3day_fetch.py b/scripts/test_3day_fetch.py
new file mode 100644
index 0000000..6a731ff
--- /dev/null
+++ b/scripts/test_3day_fetch.py
@@ -0,0 +1,231 @@
+#!/usr/bin/env python3
+"""
+Test 3-Day Paper Fetch
+
+Detailed analysis of paper availability in the past 3 days
+to identify why 0 papers were retrieved.
+"""
+
+import os
+import sys
+import logging
+import requests
+import feedparser
+from datetime import datetime, timezone, timedelta
+from collections import Counter
+
+# Set up logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[logging.StreamHandler(sys.stdout)]
+)
+logger = logging.getLogger(__name__)
+
+# Add the parent directory to the path
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from scripts.fetch_papers import ArxivPaperFetcher, CS_CATEGORIES
+
+def analyze_recent_papers():
+    """Analyze papers from the past week with daily breakdown"""
+    
+    print("🔍 Analyzing Recent Paper Availability")
+    print("=" * 60)
+    
+    # Calculate time ranges
+    now = datetime.now(timezone.utc)
+    print(f"📅 Current time: {now.strftime('%Y-%m-%d %H:%M:%S')} UTC")
+    
+    # Test different time ranges
+    time_ranges = [
+        ("Past 1 day", now - timedelta(days=1)),
+        ("Past 2 days", now - timedelta(days=2)), 
+        ("Past 3 days", now - timedelta(days=3)),
+        ("Past 7 days", now - timedelta(days=7))
+    ]
+    
+    # Create a fake fetcher instance for accessing private methods
+    class TestFetcher:
+        def __init__(self):
+            import requests
+            self.session = requests.Session()
+            
+        def _parse_paper_entry(self, entry):
+            return {
+                "title": entry.title.replace('\n', ' ').strip(),
+                "abstract": entry.summary.replace('\n', ' ').strip(),
+                "authors": [author.name for author in entry.authors] if hasattr(entry, 'authors') else [],
+                "published": entry.published,
+                "updated": entry.updated,
+                "link": entry.link,
+                "arxiv_id": entry.id.split('/')[-1],
+                "categories": [tag.term for tag in entry.tags] if hasattr(entry, 'tags') else []
+            }
+        
+        def fetch_recent_sample(self, start_date, end_date, max_papers=500):
+            """Fetch a sample of papers from the date range"""
+            all_papers = []
+            
+            # Check a few key categories
+            test_categories = ["cs.AI", "cs.LG", "cs.CL", "cs.CV"]
+            
+            for category in test_categories:
+                try:
+                    params = {
+                        "search_query": f"cat:{category}",
+                        "sortBy": "submittedDate", 
+                        "sortOrder": "descending",
+                        "start": 0,
+                        "max_results": 100
+                    }
+                    
+                    response = self.session.get("http://export.arxiv.org/api/query", 
+                                              params=params, timeout=30)
+                    response.raise_for_status()
+                    
+                    feed = feedparser.parse(response.content)
+                    
+                    for entry in feed.entries:
+                        paper_date = datetime(*entry.updated_parsed[:6], tzinfo=timezone.utc)
+                        
+                        if start_date <= paper_date <= end_date:
+                            paper_data = self._parse_paper_entry(entry)
+                            all_papers.append(paper_data)
+                    
+                except Exception as e:
+                    print(f"   ❌ Error fetching {category}: {e}")
+            
+            # Remove duplicates
+            unique_papers = {}
+            for paper in all_papers:
+                unique_papers[paper['arxiv_id']] = paper
+            
+            return list(unique_papers.values())
+    
+    fetcher = TestFetcher()
+    
+    # Test each time range
+    for range_name, start_date in time_ranges:
+        print(f"\n📊 {range_name} ({start_date.strftime('%Y-%m-%d')} to {now.strftime('%Y-%m-%d')}):")
+        
+        papers = fetcher.fetch_recent_sample(start_date, now)
+        print(f"   📄 Found: {len(papers)} papers")
+        
+        if papers:
+            # Analyze dates
+            dates = []
+            for paper in papers:
+                paper_date = datetime.strptime(paper['updated'][:10], '%Y-%m-%d')
+                dates.append(paper_date.strftime('%Y-%m-%d'))
+            
+            date_counts = Counter(dates)
+            print(f"   📅 Daily distribution:")
+            for date, count in sorted(date_counts.items(), reverse=True)[:5]:
+                days_ago = (now.date() - datetime.strptime(date, '%Y-%m-%d').date()).days
+                print(f"     - {date}: {count} papers ({days_ago} days ago)")
+            
+            # Show some sample titles
+            print(f"   📝 Sample papers:")
+            for i, paper in enumerate(papers[:3], 1):
+                paper_date = datetime.strptime(paper['updated'][:10], '%Y-%m-%d')
+                days_ago = (now.date() - paper_date.date()).days
+                print(f"     {i}. {paper['title'][:60]}... ({days_ago} days ago)")
+        else:
+            print(f"   ❌ No papers found in this range")
+
+
+def check_weekend_effect():
+    """Check if weekend affects paper submission patterns"""
+    
+    print(f"\n" + "="*60)
+    print("📅 Weekend Effect Analysis")
+    print("="*60)
+    
+    now = datetime.now(timezone.utc)
+    current_weekday = now.strftime('%A')
+    
+    print(f"🗓️ Today is: {current_weekday}")
+    print(f"📊 Checking if weekend timing affects paper submissions...")
+    
+    # Analyze the past week day by day
+    for i in range(7):
+        date = now - timedelta(days=i)
+        weekday = date.strftime('%A')
+        date_str = date.strftime('%Y-%m-%d')
+        
+        if i == 0:
+            status = "(Today)"
+        elif i == 1:
+            status = "(Yesterday)" 
+        else:
+            status = f"({i} days ago)"
+        
+        print(f"   {date_str} {weekday} {status}")
+    
+    print(f"\n💡 Possible explanations for low paper count:")
+    if current_weekday in ['Saturday', 'Sunday']:
+        print(f"   🏠 It's {current_weekday} - researchers typically don't submit on weekends")
+    elif current_weekday == 'Monday':
+        print(f"   📅 It's Monday - weekend submissions are rare, Monday submissions may be low")
+    else:
+        print(f"   📚 It's {current_weekday} - should be normal submission day")
+    
+    print(f"   🕐 Time zone effects: arXiv updates happen at specific times")
+    print(f"   ⏰ Current UTC time: {now.strftime('%H:%M')} - submissions may not be processed yet")
+
+
+def test_specific_fetch():
+    """Test the actual fetch function with 3 days"""
+    
+    print(f"\n" + "="*60)
+    print("🧪 Testing Actual Fetch Function")
+    print("="*60)
+    
+    print(f"🔄 Testing the same logic your main script uses...")
+    
+    # Simulate the fetch without OpenAI API
+    class MockFetcher(ArxivPaperFetcher):
+        def __init__(self):
+            import requests
+            self.session = requests.Session()
+        
+        def filter_papers_with_gpt(self, papers, use_parallel=True, max_concurrent=16):
+            # Skip GPT filtering, return all papers
+            print(f"   ⏭️ Skipping GPT filtering, would have processed {len(papers)} papers")
+            return papers
+    
+    try:
+        # Test with mock fetcher
+        fetcher = MockFetcher()
+        
+        # Use the same parameters as your actual run
+        papers = fetcher.fetch_recent_papers(days=3)
+        
+        print(f"📄 Raw papers fetched: {len(papers)} papers")
+        
+        if papers:
+            print(f"✅ Papers found! The issue is likely in GPT filtering or API key")
+            print(f"📋 Sample papers:")
+            for i, paper in enumerate(papers[:3], 1):
+                print(f"   {i}. {paper['title'][:60]}...")
+        else:
+            print(f"❌ No papers found in raw fetch - arXiv issue or date range problem")
+            
+    except Exception as e:
+        print(f"❌ Error in fetch test: {e}")
+
+
+if __name__ == "__main__":
+    analyze_recent_papers()
+    check_weekend_effect() 
+    test_specific_fetch()
+    
+    print(f"\n" + "="*60)
+    print("🎯 Diagnosis Summary")
+    print("="*60)
+    print(f"If this analysis shows:")
+    print(f"   📄 Papers exist → Problem is with GPT filtering or API key")
+    print(f"   ❌ No papers → Weekend effect or arXiv submission patterns")
+    print(f"   🕐 Time zone → Wait a few hours and try again")
+    print(f"   📅 Date issue → Check date range logic in fetch function") 
+\ No newline at end of file
diff --git a/scripts/test_social_good_filtering.py b/scripts/test_social_good_filtering.py
new file mode 100644
index 0000000..971ca35
--- /dev/null
+++ b/scripts/test_social_good_filtering.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python3
+"""
+Test Enhanced Social Good Filtering
+
+This script tests the new prompt to ensure it properly filters out
+pure technical CV bias and focuses on social good applications.
+"""
+
+import os
+import sys
+import logging
+
+# Set up logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[logging.StreamHandler(sys.stdout)]
+)
+logger = logging.getLogger(__name__)
+
+# Add the parent directory to the path
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from scripts.fetch_papers import ArxivPaperFetcher, GPT_SYSTEM_PROMPT
+
+
+def test_enhanced_filtering():
+    """Test the enhanced prompt with social good focus"""
+    
+    print("🎯 Testing Enhanced Social Good Filtering")
+    print("=" * 60)
+    
+    # Check API key
+    openai_api_key = os.getenv("OPENAI_API_KEY")
+    if not openai_api_key:
+        print("❌ Please set OPENAI_API_KEY environment variable")
+        print("   export OPENAI_API_KEY='your-api-key-here'")
+        return
+    
+    print("✅ OpenAI API key is set")
+    print(f"\n📋 Enhanced prompt focus:")
+    print("   - Social good applications (healthcare, education, justice)")
+    print("   - Real-world impact on vulnerable populations")
+    print("   - Excludes pure technical CV bias research")
+    print("   - Focuses on societal implications")
+    
+    # Initialize fetcher
+    fetcher = ArxivPaperFetcher(openai_api_key)
+    
+    # Test papers that SHOULD be accepted (social good relevance)
+    positive_examples = [
+        {
+            "title": "Algorithmic Bias in Medical Diagnosis: Impact on Minority Patient Care",
+            "abstract": "This study examines how AI diagnostic systems exhibit systematic bias against minority patients in hospital settings, leading to delayed treatment and worse health outcomes. We analyze bias in medical imaging and clinical decision support systems, proposing fairness interventions to ensure equitable healthcare delivery for underserved populations."
+        },
+        {
+            "title": "Bias in Criminal Justice Risk Assessment Tools: Perpetuating Racial Inequality",
+            "abstract": "We investigate how algorithmic risk assessment tools used in bail and sentencing decisions systematically discriminate against Black and Latino defendants. Our analysis reveals how these systems perpetuate existing inequalities in the justice system, harming vulnerable communities and undermining fair treatment under the law."
+        },
+        {
+            "title": "Educational AI Systems and Socioeconomic Bias: Impact on Student Opportunities",
+            "abstract": "This paper examines how AI-powered educational platforms exhibit bias against students from low-income backgrounds, affecting their access to advanced coursework and college recommendations. We demonstrate how algorithmic bias in education technology perpetuates inequality and limits social mobility."
+        },
+        {
+            "title": "Hiring Algorithm Bias: Gender Discrimination in Recruitment AI",
+            "abstract": "We analyze gender bias in AI-powered hiring systems used by Fortune 500 companies, showing systematic discrimination against women candidates. Our study reveals how biased algorithms perpetuate workplace inequality and violate equal employment opportunity principles, harming women's career advancement."
+        },
+        {
+            "title": "Social Media Content Moderation: Bias Against LGBTQ+ Communities",
+            "abstract": "This research demonstrates how AI content moderation systems disproportionately target and remove content from LGBTQ+ users, effectively silencing marginalized voices. We examine the social harm caused by biased algorithmic enforcement and its impact on community safety and free expression."
+        }
+    ]
+    
+    # Test papers that SHOULD be rejected (pure technical CV bias without social context)
+    negative_examples = [
+        {
+            "title": "Mitigating Dataset Bias in Deep Convolutional Networks for ImageNet Classification",
+            "abstract": "We propose a novel data augmentation technique to reduce dataset bias in deep convolutional neural networks trained on ImageNet. Our method improves classification accuracy by 3.2% through bias-aware sampling strategies and shows superior performance on standard computer vision benchmarks."
+        },
+        {
+            "title": "Domain Adaptation for Robust Computer Vision Models: Addressing Covariate Shift",
+            "abstract": "This paper presents a domain adaptation framework for computer vision models to handle distribution shift between training and test datasets. We demonstrate improved generalization on various vision tasks through adversarial training and achieve state-of-the-art results on adaptation benchmarks."
+        },
+        {
+            "title": "Fairness Metrics for Multi-Class Classification: Technical Evaluation Framework",
+            "abstract": "We introduce new mathematical fairness metrics for multi-class classification problems in machine learning. Our framework provides theoretical guarantees for algorithmic fairness and demonstrates computational efficiency on synthetic datasets. The proposed metrics outperform existing approaches in balanced accuracy."
+        },
+        {
+            "title": "Bias Correction in Neural Network Training: Gradient Clipping Techniques",
+            "abstract": "This work proposes improved gradient clipping methods to reduce training bias in deep neural networks. We show that our approach leads to faster convergence and better generalization on standard ML benchmarks. The method is evaluated on CIFAR-10, CIFAR-100, and synthetic datasets."
+        },
+        {
+            "title": "Adversarial Training for Robust Feature Learning in Convolutional Networks",
+            "abstract": "We develop adversarial training techniques to improve feature robustness in convolutional neural networks. Our method generates adversarial examples during training to enhance model generalization. Experiments on image classification tasks show improved robustness against various attack methods."
+        }
+    ]
+    
+    print(f"\n🧪 Testing with example papers...")
+    
+    # Test positive examples (should be accepted)
+    print(f"\n✅ Testing papers that SHOULD be accepted (social good relevance):")
+    positive_results = []
+    for i, example in enumerate(positive_examples, 1):
+        try:
+            is_relevant = fetcher._check_paper_relevance(example)
+            positive_results.append(is_relevant)
+            status = "✅ CORRECT" if is_relevant else "❌ MISSED"
+            print(f"   {i}. {status}: {example['title'][:60]}...")
+        except Exception as e:
+            print(f"   {i}. ⚠️ ERROR: {e}")
+            positive_results.append(False)
+    
+    # Test negative examples (should be rejected)
+    print(f"\n❌ Testing papers that SHOULD be rejected (pure technical bias):")
+    negative_results = []
+    for i, example in enumerate(negative_examples, 1):
+        try:
+            is_relevant = fetcher._check_paper_relevance(example)
+            negative_results.append(not is_relevant)  # Expecting not relevant, so invert
+            status = "✅ CORRECT" if not is_relevant else "❌ FALSE POSITIVE"
+            print(f"   {i}. {status}: {example['title'][:60]}...")
+        except Exception as e:
+            print(f"   {i}. ⚠️ ERROR: {e}")
+            negative_results.append(False)
+    
+    # Calculate accuracy
+    print(f"\n📊 Filtering Performance:")
+    positive_accuracy = sum(positive_results) / len(positive_results) * 100 if positive_results else 0
+    negative_accuracy = sum(negative_results) / len(negative_results) * 100 if negative_results else 0
+    overall_accuracy = (sum(positive_results) + sum(negative_results)) / (len(positive_results) + len(negative_results)) * 100
+    
+    print(f"   - Social good detection: {positive_accuracy:.1f}% ({sum(positive_results)}/{len(positive_results)})")
+    print(f"   - Pure tech rejection: {negative_accuracy:.1f}% ({sum(negative_results)}/{len(negative_results)})")
+    print(f"   - Overall accuracy: {overall_accuracy:.1f}%")
+    
+    # Evaluation
+    print(f"\n🎯 Enhanced Filtering Assessment:")
+    if overall_accuracy >= 80:
+        print(f"   🎉 EXCELLENT! Enhanced filtering is working well")
+        print(f"   ✅ Successfully focuses on social good applications")
+        print(f"   ✅ Effectively filters out pure technical CV bias")
+    elif overall_accuracy >= 60:
+        print(f"   ✅ GOOD performance, minor improvements possible")
+        if positive_accuracy < negative_accuracy:
+            print(f"   💡 Suggestion: May need to strengthen social good detection")
+        else:
+            print(f"   💡 Suggestion: May need to better exclude technical bias")
+    else:
+        print(f"   ⚠️ NEEDS IMPROVEMENT - prompt may need refinement")
+        if positive_accuracy < 50:
+            print(f"   🔧 Issue: Not capturing enough social good papers")
+        if negative_accuracy < 50:
+            print(f"   🔧 Issue: Accepting too many pure technical papers")
+    
+    # Show improvement areas
+    print(f"\n💡 Key improvements in enhanced prompt:")
+    print(f"   ✅ Clear distinction between social good vs pure technical research")
+    print(f"   ✅ Explicit exclusion criteria for technical CV bias")
+    print(f"   ✅ Focus on real-world applications affecting people")
+    print(f"   ✅ Emphasis on vulnerable populations and social justice")
+    
+    return overall_accuracy >= 80
+
+
+def show_prompt_comparison():
+    """Show the enhanced prompt focus"""
+    
+    print(f"\n" + "="*60)
+    print("📝 Enhanced Prompt Key Features")
+    print("="*60)
+    
+    print(f"\n✅ ACCEPTS papers with:")
+    accepts = [
+        "Real-world applications (healthcare, education, justice, hiring)",
+        "Impact on marginalized/vulnerable populations",
+        "Social implications and consequences of bias",
+        "Bias auditing in systems affecting people's lives",
+        "Ethical AI deployment addressing social justice",
+        "Clear connection to social good and human welfare"
+    ]
+    
+    for item in accepts:
+        print(f"   ✅ {item}")
+    
+    print(f"\n❌ REJECTS papers with:")
+    rejects = [
+        "Pure technical computer vision without social context",
+        "Generic ML fairness metrics without real-world application",
+        "Theoretical bias research without societal implications",
+        "Technical optimization without considering human impact",
+        "Academic benchmarking without social good connection",
+        "Algorithmic improvements without addressing social harm"
+    ]
+    
+    for item in rejects:
+        print(f"   ❌ {item}")
+    
+    print(f"\n🎯 Core principle:")
+    print(f"   Research must clearly address how AI bias affects society,")
+    print(f"   vulnerable populations, or social justice - not just technical metrics.")
+
+
+if __name__ == "__main__":
+    show_prompt_comparison()
+    success = test_enhanced_filtering()
+    
+    print(f"\n✅ Enhanced filtering test completed!")
+    if success:
+        print(f"🎉 System ready for production with improved social good focus")
+    else:
+        print(f"⚠️ Consider running additional tests or prompt refinement") 
+\ No newline at end of file