scripts/test_daily_fetch.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122

#!/usr/bin/env python3
"""
Test script for daily paper fetching functionality.

This script tests the daily paper fetching with a small sample to verify
the system works correctly before running in production.
"""

import os
import sys
from datetime import datetime, timezone, timedelta

# Add the parent directory to the path so we can import the main module
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from scripts.fetch_papers import ArxivPaperFetcher


def test_daily_fetch():
    """Test fetching papers from the last 3 days (to ensure we get some results)."""
    
    # Check for OpenAI API key
    openai_api_key = os.getenv("OPENAI_API_KEY")
    if not openai_api_key:
        print("ERROR: OPENAI_API_KEY environment variable is required")  
        print("Please set your OpenAI API key in the environment variable")
        sys.exit(1)
        
    print("Testing daily paper fetching (last 3 days)...")
    
    # Initialize fetcher
    fetcher = ArxivPaperFetcher(openai_api_key)
    
    # Test with last 3 days to ensure we get some results
    papers = fetcher.fetch_recent_papers(days=3)
    
    print(f"\nFetch completed!")
    print(f"Found {len(papers)} relevant LLM bias papers in the last 3 days")
    
    if papers:
        print("\nRelevant papers found:")
        for i, paper in enumerate(papers, 1):
            print(f"\n{i}. {paper['title']}")
            print(f"   Authors: {', '.join(paper['authors'][:3])}")
            if len(paper['authors']) > 3:
                print("   et al.")
            print(f"   Categories: {', '.join(paper['categories'])}")
            print(f"   Published: {paper['published']}")
            print(f"   arXiv ID: {paper['arxiv_id']}")
            print(f"   Link: {paper['link']}")
            print(f"   Abstract: {paper['abstract'][:200]}...")
            print("-" * 50)
    else:
        print("\nNo relevant papers found in the last 3 days.")
        print("This could be normal - LLM bias papers are not published every day.")


def test_system_components():
    """Test individual system components."""
    
    openai_api_key = os.getenv("OPENAI_API_KEY")
    if not openai_api_key:
        print("ERROR: OPENAI_API_KEY environment variable is required")
        sys.exit(1)
    
    print("\nTesting system components...")
    
    # Test fetcher initialization
    try:
        fetcher = ArxivPaperFetcher(openai_api_key)
        print("✓ ArxivPaperFetcher initialized successfully")
    except Exception as e:
        print(f"✗ Failed to initialize ArxivPaperFetcher: {e}")
        return False
    
    # Test arXiv API connectivity
    try:
        end_date = datetime.now(timezone.utc)
        start_date = end_date - timedelta(days=1)
        papers = fetcher.fetch_papers_by_date_range(start_date, end_date, max_papers=5)
        print(f"✓ arXiv API connectivity works (fetched {len(papers)} papers)")
    except Exception as e:
        print(f"✗ Failed to connect to arXiv API: {e}")
        return False
    
    # Test OpenAI API connectivity (if we have papers to test)
    if papers:
        try:
            sample_paper = papers[0]
            is_relevant = fetcher._check_paper_relevance(sample_paper)
            print(f"✓ OpenAI API connectivity works (test result: {is_relevant})")
        except Exception as e:
            print(f"✗ Failed to connect to OpenAI API: {e}")
            return False
    
    return True


if __name__ == "__main__":
    print("ArXiv Daily Paper Fetcher Test")
    print("=" * 40)
    
    try:
        # Test system components first
        if test_system_components():
            print("\nAll system components working correctly!")
            
            # Run main test
            test_daily_fetch()
            
            print("\n" + "=" * 40)
            print("Test completed successfully!")
            print("\nTo run the actual daily fetch:")
            print("python scripts/fetch_papers.py")
            
        else:
            print("\nSystem component test failed!")
            sys.exit(1)
        
    except Exception as e:
        print(f"\nError during testing: {e}")
        sys.exit(1)