diff options
| author | Yuren Hao <yurenh2@illinois.edu> | 2026-04-08 22:06:05 -0500 |
|---|---|---|
| committer | Yuren Hao <yurenh2@illinois.edu> | 2026-04-08 22:06:05 -0500 |
| commit | 05704d0eb2fa59fe727652465b07db40bcb06c38 (patch) | |
| tree | 8904aca836cf552fd1a5ae8c2174e9f91e70bbbc /analysis/balance_diff.py | |
Initial release: GAP framework
- Full pipeline: variant generation, multi-judge verification, evaluation
- Loaders for OpenAI / Anthropic / Google / xAI / OpenRouter / vLLM
- Framework-level mechanism analyses: paired structural overlap, repairability rescue, self-correction probe, cross-model agreement, topic x problem-type interaction
- Unicode -> bare-LaTeX cleaner + audit + spot-check
- Mirrors https://huggingface.co/datasets/blackhao0426/PutnamGAP
Diffstat (limited to 'analysis/balance_diff.py')
| -rw-r--r-- | analysis/balance_diff.py | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/analysis/balance_diff.py b/analysis/balance_diff.py new file mode 100644 index 0000000..f420d46 --- /dev/null +++ b/analysis/balance_diff.py @@ -0,0 +1,109 @@ +"""Compare brace/paren/bracket balance BEFORE vs AFTER cleaning to check +whether the cleaner introduced any new imbalance.""" +from __future__ import annotations +import json +import tarfile +from pathlib import Path +from collections import Counter + +CURRENT_DIR = Path("/home/yurenh2/gap/putnam-bench-anon/dataset") +BACKUP_TAR = sorted(Path("/home/yurenh2/gap/analysis/dataset_backups").glob( + "putnam-bench-anon_dataset_*.tar.gz"))[-1] + + +def all_text(d: dict) -> str: + out = [] + for k in ("question", "solution"): + out.append(d.get(k) or "") + for vk, vd in (d.get("variants") or {}).items(): + if isinstance(vd, dict): + for k in ("question", "solution"): + out.append(vd.get(k) or "") + return "\n".join(out) + + +def balance(text: str): + return ( + text.count("{") - text.count("}"), + text.count("(") - text.count(")"), + text.count("[") - text.count("]"), + ) + + +def main(): + print("Loading backup ...") + backup = {} + with tarfile.open(BACKUP_TAR, "r:gz") as tar: + for member in tar.getmembers(): + if not member.isfile() or not member.name.endswith(".json"): + continue + f = tar.extractfile(member) + if not f: + continue + d = json.load(f) + backup[d.get("index")] = all_text(d) + print(f" loaded {len(backup)} backup problems") + + print("Loading current ...") + current = {} + for f in sorted(CURRENT_DIR.glob("*.json")): + d = json.load(open(f)) + current[d.get("index")] = all_text(d) + print(f" loaded {len(current)} current problems") + + # Per-file balance diff + introduced_imbalance = [] + fixed_imbalance = [] + same_imbalance = 0 + same_balanced = 0 + + n_brace_changed = 0 + n_paren_changed = 0 + n_brack_changed = 0 + + for idx in sorted(backup): + b_before = balance(backup[idx]) + b_after = balance(current.get(idx, "")) + was_bal = b_before == (0, 0, 0) + is_bal = b_after == (0, 0, 0) + if b_before != b_after: + if was_bal and not is_bal: + introduced_imbalance.append((idx, b_before, b_after)) + elif not was_bal and is_bal: + fixed_imbalance.append((idx, b_before, b_after)) + else: + if is_bal: + same_balanced += 1 + else: + same_imbalance += 1 + if b_before[0] != b_after[0]: n_brace_changed += 1 + if b_before[1] != b_after[1]: n_paren_changed += 1 + if b_before[2] != b_after[2]: n_brack_changed += 1 + + print(f"\n=== Per-file balance change summary ===") + print(f" Files with no change in any balance:") + print(f" balanced both before and after: {same_balanced}") + print(f" imbalanced before and after (same imbalance): {same_imbalance}") + print(f" Files where cleaner INTRODUCED new imbalance: " + f"{len(introduced_imbalance)}") + print(f" Files where cleaner FIXED prior imbalance: {len(fixed_imbalance)}") + print() + print(f" Files where {{ balance changed: {n_brace_changed}") + print(f" Files where ( balance changed: {n_paren_changed}") + print(f" Files where [ balance changed: {n_brack_changed}") + + if introduced_imbalance: + print(f"\n!!! Cleaner-introduced imbalances ({len(introduced_imbalance)}):") + for idx, before, after in introduced_imbalance[:10]: + print(f" {idx}: before={before}, after={after}") + else: + print("\n ✓ No cleaner-introduced imbalances found.") + + if fixed_imbalance: + print(f"\n Cleaner-fixed imbalances (top 10):") + for idx, before, after in fixed_imbalance[:10]: + print(f" {idx}: before={before}, after={after}") + + +if __name__ == "__main__": + main() |
