import json
import os
import glob
import argparse
from typing import List, Dict, Any
from openai import OpenAI

# Configuration
DATA_DIR = "LLaMA-Factory/preprocess/PutnamGAP"
OUTPUT_DIR = "data/putnam_eval"
OUTPUT_FILENAME = "putnam_eval_batch.jsonl"
MODEL_NAME = "gpt-5"  # User requested gpt-5

SYSTEM_PROMPT = """You are an expert mathematician and a judge for math competitions. You are given an original math problem (and its solution) and a "kernel variant" of that problem (and its solution).

Your task is to:
1. Evaluate the correctness of the kernel variant. Is the problem statement mathematically sound and clear? Is the provided solution correct?
2. Evaluate the relationship between the original problem and the kernel variant. Are they mathematically equivalent? Or is the variant a strong abstraction/generalization/simplification of the original? Do they test the same core concepts?

Output your analysis in the following JSON format:
{
  "variant_validity": {
    "is_problem_valid": boolean,
    "is_solution_correct": boolean,
    "comments": "string"
  },
  "relation_to_original": {
    "is_equivalent": boolean,
    "is_strongly_related": boolean,
    "relationship_description": "string"
  }
}"""

def load_dataset(data_dir: str) -> List[Dict[str, Any]]:
    files = glob.glob(os.path.join(data_dir, "*.json"))
    items = []
    print(f"Scanning {len(files)} files in {data_dir}...")
    for fpath in files:
        try:
            with open(fpath, "r", encoding="utf-8") as f:
                data = json.load(f)
                
                # Check for required fields
                if "variants" not in data or "kernel_variant" not in data["variants"]:
                    continue
                    
                orig_q = data.get("question", "")
                orig_s = data.get("solution", "")
                kv = data["variants"]["kernel_variant"]
                kv_q = kv.get("question", "")
                kv_s = kv.get("solution", "")
                
                if not kv_q:
                    continue

                items.append({
                    "id": data.get("index", os.path.basename(fpath)),
                    "original_question": orig_q,
                    "original_solution": orig_s,
                    "kernel_variant_question": kv_q,
                    "kernel_variant_solution": kv_s
                })
        except Exception as e:
            print(f"Error reading {fpath}: {e}")
    return items

def create_batch_file(items: List[Dict[str, Any]], output_path: str):
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
    count = 0
    with open(output_path, "w", encoding="utf-8") as f:
        for item in items:
            user_content = f"""[Original Problem]
{item['original_question']}

[Original Solution]
{item['original_solution']}

[Kernel Variant Problem]
{item['kernel_variant_question']}

[Kernel Variant Solution]
{item['kernel_variant_solution']}"""

            # Construct request
            request_obj = {
                "custom_id": f"req_{item['id']}",
                "method": "POST",
                "url": "/v1/chat/completions",
                "body": {
                    "model": MODEL_NAME,
                    "messages": [
                        {"role": "system", "content": SYSTEM_PROMPT},
                        {"role": "user", "content": user_content}
                    ],
                    "response_format": {"type": "json_object"}
                }
            }
            f.write(json.dumps(request_obj) + "\n")
            count += 1
            
    print(f"Created batch file at {output_path} with {count} requests.")
    return count

def submit_batch(file_path: str):
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        print("Error: OPENAI_API_KEY not set. Cannot submit.")
        return

    client = OpenAI(api_key=api_key)
    
    print(f"Uploading {file_path} to OpenAI...")
    with open(file_path, "rb") as f:
        batch_file_obj = client.files.create(
            file=f,
            purpose="batch"
        )
    file_id = batch_file_obj.id
    print(f"Uploaded. File ID: {file_id}")
    
    print("Submitting Batch Job...")
    batch_job = client.batches.create(
        input_file_id=file_id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={
            "description": "PutnamGAP Evaluation"
        }
    )
    print(f"Submitted. Batch ID: {batch_job.id}")
    
    # Save Batch ID
    id_file = os.path.join(os.path.dirname(file_path), "submitted_batch_ids.json")
    existing_ids = []
    if os.path.exists(id_file):
        try:
            with open(id_file, "r") as f:
                existing_ids = json.load(f)
        except:
            pass
    existing_ids.append(batch_job.id)
    with open(id_file, "w") as f:
        json.dump(existing_ids, f, indent=2)
    print(f"Batch ID saved to {id_file}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Prepare and optionally submit PutnamGAP evaluation batch.")
    parser.add_argument("--submit", action="store_true", help="Submit the batch to OpenAI after generating.")
    args = parser.parse_args()

    items = load_dataset(DATA_DIR)
    print(f"Found {len(items)} items with kernel variants.")
    
    output_path = os.path.join(OUTPUT_DIR, OUTPUT_FILENAME)
    if items:
        create_batch_file(items, output_path)
        if args.submit:
            submit_batch(output_path)
        else:
            print("Use --submit to submit the batch to OpenAI.")
    else:
        print("No items found to process.")