#!/usr/bin/env python3
"""
测试时间倒序排列功能

验证README更新逻辑是否正确地将最新论文放在最前面，
确保论文始终按时间倒序排列。
"""

import os
import sys
import tempfile
from datetime import datetime, timezone

# Add the parent directory to the path so we can import the main module
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from scripts.fetch_papers import GitHubUpdater


def test_reverse_chronological_order():
    """测试时间倒序插入逻辑"""
    
    print("🔍 测试README时间倒序排列功能")
    print("=" * 60)
    
    # Create a mock README content
    mock_readme_content = """# ArXiv Social Good AI Paper Fetcher

An automated system for discovering and cataloging research papers related to AI bias, fairness, and social good from arXiv.org.

## 🎯 Features

- **Intelligent Paper Detection**: Uses GPT-4o to analyze papers
- **Automated Daily Updates**: Runs daily via GitHub Actions

## 🔧 Setup & Configuration

Setup instructions here...

## 🚀 Usage

Usage instructions here...

**Note**: This tool is designed for academic research purposes. Please respect arXiv's usage policies.

## Papers Updated on 2024-01-15 08:00 UTC

### Old Paper 1

**Authors:** Author A, Author B

**Categories:** cs.AI, cs.LG

**Published:** 2024-01-14T10:00:00Z

**Abstract:** This is an old paper abstract...

**Link:** [arXiv:2401.12345](https://arxiv.org/abs/2401.12345)

---

### Old Paper 2

**Authors:** Author C, Author D

**Categories:** cs.CL

**Published:** 2024-01-13T15:30:00Z

**Abstract:** This is another old paper abstract...

**Link:** [arXiv:2401.12346](https://arxiv.org/abs/2401.12346)

---
"""
    
    print("📄 模拟的现有README内容:")
    print("   - 包含项目描述和设置说明")
    print("   - 已有2篇旧论文 (2024-01-15 和 2024-01-13)")
    print("   - 测试新论文是否会插入到正确位置")
    
    # Create mock new papers (should be inserted at the top)
    new_papers = [
        {
            'title': 'Brand New Paper on AI Fairness',
            'authors': ['New Author A', 'New Author B', 'New Author C', 'New Author D'],
            'categories': ['cs.AI', 'cs.LG', 'cs.CL'],
            'published': '2024-01-16T12:00:00Z',
            'abstract': 'This is a brand new paper about AI fairness that should appear at the top of the README.',
            'link': 'https://arxiv.org/abs/2401.99999',
            'arxiv_id': '2401.99999'
        },
        {
            'title': 'Another New Paper on Social Good AI',
            'authors': ['New Author E', 'New Author F'],
            'categories': ['cs.AI', 'cs.HC'],
            'published': '2024-01-16T09:30:00Z',
            'abstract': 'This is another new paper about social good AI applications.',
            'link': 'https://arxiv.org/abs/2401.99998',
            'arxiv_id': '2401.99998'
        }
    ]
    
    print(f"\n📝 模拟添加 {len(new_papers)} 篇新论文:")
    for i, paper in enumerate(new_papers, 1):
        print(f"   {i}. {paper['title'][:50]}... ({paper['published'][:10]})")
    
    # Test the insertion logic
    print(f"\n🧪 测试插入位置查找逻辑...")
    
    class MockGitHubUpdater(GitHubUpdater):
        def __init__(self):
            # Skip the parent __init__ to avoid GitHub API calls
            pass
        
        def test_insert_position(self, content):
            return self._find_papers_insert_position(content)
        
        def test_format_new_section(self, papers, section_title):
            new_section = f"\n\n## {section_title}\n\n"
            
            for paper in papers:
                # Format paper entry
                authors_str = ", ".join(paper['authors'][:3])  # First 3 authors
                if len(paper['authors']) > 3:
                    authors_str += " et al."
                
                categories_str = ", ".join(paper['categories'])
                
                new_section += f"### {paper['title']}\n\n"
                new_section += f"**Authors:** {authors_str}\n\n"
                new_section += f"**Categories:** {categories_str}\n\n"
                new_section += f"**Published:** {paper['published']}\n\n"
                new_section += f"**Abstract:** {paper['abstract']}\n\n"
                new_section += f"**Link:** [arXiv:{paper['arxiv_id']}]({paper['link']})\n\n"
                new_section += "---\n\n"
            
            return new_section
    
    # Test insertion position finding
    updater = MockGitHubUpdater()
    insert_pos = updater.test_insert_position(mock_readme_content)
    
    if insert_pos > 0:
        lines_before = mock_readme_content[:insert_pos].count('\n')
        print(f"   ✅ 找到插入位置: 第 {lines_before} 行之后")
        
        # Show the context around insertion point
        lines = mock_readme_content.split('\n')
        context_start = max(0, lines_before - 2)
        context_end = min(len(lines), lines_before + 3)
        
        print(f"   📍 插入位置上下文:")
        for i in range(context_start, context_end):
            if i < len(lines):
                marker = " >>> 插入点 <<<" if i == lines_before else ""
                print(f"     {i+1:2d}: {lines[i][:50]}{marker}")
    else:
        print(f"   ⚠️ 未找到合适插入位置，将使用末尾追加")
    
    # Test the complete update logic
    print(f"\n🔄 测试完整的更新逻辑...")
    
    section_title = f"Papers Updated on {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}"
    new_section = updater.test_format_new_section(new_papers, section_title)
    
    if insert_pos > 0:
        updated_content = (mock_readme_content[:insert_pos] + 
                         new_section + 
                         mock_readme_content[insert_pos:])
        print(f"   ✅ 新内容插入到正确位置")
    else:
        updated_content = mock_readme_content + new_section
        print(f"   ⚠️ 新内容追加到末尾")
    
    # Analyze the result
    print(f"\n📊 结果分析:")
    
    # Find all paper sections in the updated content
    lines = updated_content.split('\n')
    paper_sections = []
    
    for i, line in enumerate(lines):
        if line.startswith('## Papers Updated on') or line.startswith('## Historical'):
            # Found a paper section header
            section_info = {
                'line': i + 1,
                'title': line,
                'date_str': None
            }
            
            # Extract date from title
            if 'Updated on' in line:
                try:
                    date_part = line.split('Updated on ')[1].split(' UTC')[0]
                    section_info['date_str'] = date_part
                except:
                    pass
            
            paper_sections.append(section_info)
    
    print(f"   - 找到 {len(paper_sections)} 个论文段落:")
    for i, section in enumerate(paper_sections, 1):
        print(f"     {i}. {section['title'][:60]}... (第{section['line']}行)")
    
    # Check if chronological order is correct
    if len(paper_sections) >= 2:
        first_section = paper_sections[0]
        second_section = paper_sections[1]
        
        print(f"\n🎯 时间倒序验证:")
        print(f"   - 第一个段落: {first_section['title'][:40]}...")
        print(f"   - 第二个段落: {second_section['title'][:40]}...")
        
        if first_section['date_str'] and second_section['date_str']:
            first_is_newer = first_section['date_str'] > second_section['date_str']
            if first_is_newer:
                print(f"   ✅ 时间倒序正确！最新论文在最上面")
            else:
                print(f"   ❌ 时间倒序错误！需要调整插入逻辑")
        else:
            print(f"   ℹ️ 无法比较日期，请手动检查")
    
    # Save result to temporary file for inspection
    with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f:
        f.write(updated_content)
        temp_file = f.name
    
    print(f"\n📄 完整结果已保存到临时文件: {temp_file}")
    print(f"   可以手动检查README更新结果")
    
    print(f"\n✅ 测试完成！")
    print(f"   关键改进:")
    print(f"   - ✅ 新论文会插入到README开头部分")
    print(f"   - ✅ 保持时间倒序排列（最新在上）")
    print(f"   - ✅ 避免在文档末尾追加")
    print(f"   - ✅ 智能识别插入位置")


if __name__ == "__main__":
    test_reverse_chronological_order()