Initial release: GAP framework

- Full pipeline: variant generation, multi-judge verification, evaluation - Loaders for OpenAI / Anthropic / Google / xAI / OpenRouter / vLLM - Framework-level mechanism analyses: paired structural overlap, repairability rescue, self-correction probe, cross-model agreement, topic x problem-type interaction - Unicode -> bare-LaTeX cleaner + audit + spot-check - Mirrors https://huggingface.co/datasets/blackhao0426/PutnamGAP
author: Yuren Hao <yurenh2@illinois.edu> 2026-04-08 22:06:05 -0500
committer: Yuren Hao <yurenh2@illinois.edu> 2026-04-08 22:06:05 -0500
commit: 05704d0eb2fa59fe727652465b07db40bcb06c38 (patch)
tree: 8904aca836cf552fd1a5ae8c2174e9f91e70bbbc /putnamsup/putnam_utils.py
1 files changed, 95 insertions, 0 deletions
diff --git a/putnamsup/putnam_utils.py b/putnamsup/putnam_utils.py
new file mode 100644
index 0000000..7761c49
--- /dev/null
+++ b/putnamsup/putnam_utils.py
@@ -0,0 +1,95 @@
+import os
+import json
+from typing import Dict, Any, Generator, Tuple, Optional, List
+
+# Supported variants as seen in putnamgap_viewer.py
+SUPPORTED_VARIANTS = [
+    "original",
+    "descriptive_long",
+    "descriptive_long_confusing",
+    "descriptive_long_misleading",
+    "garbled_string",
+    "kernel_variant",
+]
+
+def get_original_qa(d: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
+    """Extract original question and solution."""
+    question = d.get("question")
+    solution = d.get("solution", d.get("answer"))
+    return question, solution
+
+def get_variant_qa(d: Dict[str, Any], variant_key: str) -> Tuple[Optional[str], Optional[str]]:
+    """Extract variant question and solution."""
+    variants = d.get("variants")
+    if not isinstance(variants, dict):
+        return None, None
+    var = variants.get(variant_key)
+    if not isinstance(var, dict):
+        return None, None
+    question = var.get("question")
+    solution = var.get("solution", var.get("answer"))
+    return question, solution
+
+def load_dataset(data_dir: str, selected_variants: Optional[List[str]] = None) -> Generator[Dict[str, Any], None, None]:
+    """
+    Iterates over all JSON files in data_dir and yields problem instances.
+    Each instance is a dict with keys: file_index, type, variant, question, solution.
+    
+    Args:
+        data_dir: Path to the dataset directory.
+        selected_variants: List of variants to include. If None, include all.
+                           Supported values are in SUPPORTED_VARIANTS.
+    """
+    if not os.path.isdir(data_dir):
+        raise ValueError(f"Directory not found: {data_dir}")
+
+    # Validate selected_variants
+    if selected_variants:
+        for v in selected_variants:
+            if v not in SUPPORTED_VARIANTS:
+                print(f"Warning: Variant '{v}' not recognized. Supported: {SUPPORTED_VARIANTS}")
+    
+    # If no filter provided, use all supported
+    target_variants = selected_variants if selected_variants else SUPPORTED_VARIANTS
+
+    files = [f for f in os.listdir(data_dir) if f.lower().endswith(".json")]
+    files.sort()
+
+    for f in files:
+        filepath = os.path.join(data_dir, f)
+        try:
+            with open(filepath, "r", encoding="utf-8") as fp:
+                data = json.load(fp)
+        except Exception as e:
+            print(f"Error loading {filepath}: {e}")
+            continue
+
+        file_index = data.get("index", f) # Use filename as index if 'index' key missing
+        prob_type = data.get("problem_type", "unknown")
+
+        # 1. Original
+        if "original" in target_variants:
+            q, a = get_original_qa(data)
+            if q and a:
+                yield {
+                    "file_index": file_index,
+                    "problem_type": prob_type,
+                    "variant": "original",
+                    "question": q,
+                    "solution": a
+                }
+
+        # 2. Variants
+        for var_key in SUPPORTED_VARIANTS:
+            if var_key == "original": continue
+            if var_key not in target_variants: continue
+            
+            q, a = get_variant_qa(data, var_key)
+            if q and a:
+                yield {
+                    "file_index": file_index,
+                    "problem_type": prob_type,
+                    "variant": var_key,
+                    "question": q,
+                    "solution": a
+                }
author	Yuren Hao <yurenh2@illinois.edu>	2026-04-08 22:06:05 -0500
committer	Yuren Hao <yurenh2@illinois.edu>	2026-04-08 22:06:05 -0500
commit	05704d0eb2fa59fe727652465b07db40bcb06c38 (patch)
tree	8904aca836cf552fd1a5ae8c2174e9f91e70bbbc /putnamsup/putnam_utils.py