import json import os import glob import argparse from typing import List, Dict, Any from openai import OpenAI # Configuration DATA_DIR = "LLaMA-Factory/preprocess/PutnamGAP" OUTPUT_DIR = "data/putnam_eval" OUTPUT_FILENAME = "putnam_eval_batch.jsonl" MODEL_NAME = "gpt-5" # User requested gpt-5 SYSTEM_PROMPT = """You are an expert mathematician and a judge for math competitions. You are given an original math problem (and its solution) and a "kernel variant" of that problem (and its solution). Your task is to: 1. Evaluate the correctness of the kernel variant. Is the problem statement mathematically sound and clear? Is the provided solution correct? 2. Evaluate the relationship between the original problem and the kernel variant. Are they mathematically equivalent? Or is the variant a strong abstraction/generalization/simplification of the original? Do they test the same core concepts? Output your analysis in the following JSON format: { "variant_validity": { "is_problem_valid": boolean, "is_solution_correct": boolean, "comments": "string" }, "relation_to_original": { "is_equivalent": boolean, "is_strongly_related": boolean, "relationship_description": "string" } }""" def load_dataset(data_dir: str) -> List[Dict[str, Any]]: files = glob.glob(os.path.join(data_dir, "*.json")) items = [] print(f"Scanning {len(files)} files in {data_dir}...") for fpath in files: try: with open(fpath, "r", encoding="utf-8") as f: data = json.load(f) # Check for required fields if "variants" not in data or "kernel_variant" not in data["variants"]: continue orig_q = data.get("question", "") orig_s = data.get("solution", "") kv = data["variants"]["kernel_variant"] kv_q = kv.get("question", "") kv_s = kv.get("solution", "") if not kv_q: continue items.append({ "id": data.get("index", os.path.basename(fpath)), "original_question": orig_q, "original_solution": orig_s, "kernel_variant_question": kv_q, "kernel_variant_solution": kv_s }) except Exception as e: print(f"Error reading {fpath}: {e}") return items def create_batch_file(items: List[Dict[str, Any]], output_path: str): os.makedirs(os.path.dirname(output_path), exist_ok=True) count = 0 with open(output_path, "w", encoding="utf-8") as f: for item in items: user_content = f"""[Original Problem] {item['original_question']} [Original Solution] {item['original_solution']} [Kernel Variant Problem] {item['kernel_variant_question']} [Kernel Variant Solution] {item['kernel_variant_solution']}""" # Construct request request_obj = { "custom_id": f"req_{item['id']}", "method": "POST", "url": "/v1/chat/completions", "body": { "model": MODEL_NAME, "messages": [ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": user_content} ], "response_format": {"type": "json_object"} } } f.write(json.dumps(request_obj) + "\n") count += 1 print(f"Created batch file at {output_path} with {count} requests.") return count def submit_batch(file_path: str): api_key = os.getenv("OPENAI_API_KEY") if not api_key: print("Error: OPENAI_API_KEY not set. Cannot submit.") return client = OpenAI(api_key=api_key) print(f"Uploading {file_path} to OpenAI...") with open(file_path, "rb") as f: batch_file_obj = client.files.create( file=f, purpose="batch" ) file_id = batch_file_obj.id print(f"Uploaded. File ID: {file_id}") print("Submitting Batch Job...") batch_job = client.batches.create( input_file_id=file_id, endpoint="/v1/chat/completions", completion_window="24h", metadata={ "description": "PutnamGAP Evaluation" } ) print(f"Submitted. Batch ID: {batch_job.id}") # Save Batch ID id_file = os.path.join(os.path.dirname(file_path), "submitted_batch_ids.json") existing_ids = [] if os.path.exists(id_file): try: with open(id_file, "r") as f: existing_ids = json.load(f) except: pass existing_ids.append(batch_job.id) with open(id_file, "w") as f: json.dump(existing_ids, f, indent=2) print(f"Batch ID saved to {id_file}") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Prepare and optionally submit PutnamGAP evaluation batch.") parser.add_argument("--submit", action="store_true", help="Submit the batch to OpenAI after generating.") args = parser.parse_args() items = load_dataset(DATA_DIR) print(f"Found {len(items)} items with kernel variants.") output_path = os.path.join(OUTPUT_DIR, OUTPUT_FILENAME) if items: create_batch_file(items, output_path) if args.submit: submit_batch(output_path) else: print("Use --submit to submit the batch to OpenAI.") else: print("No items found to process.")