#!/usr/bin/env python3
"""
Streamlit-based PutnamGAP dataset viewer.
Features:
- Scans preprocess/PutnamGAP for JSON files and allows prev/next navigation
- Select specific file from a dropdown
- Choose which variant to display: original or one of:
descriptive_long, descriptive_long_confusing, descriptive_long_misleading, garbled_string, kernel_variant
- Toggle to show Question, Solution (a.k.a. Answer), or Both
- TeX rendering via Markdown by default, with optional HTML+MathJax fallback
"""
import json
import os
from typing import Any, Dict, List, Optional, Tuple
import streamlit as st
from streamlit.components.v1 import html as st_html
DATA_DIR = os.path.join(os.path.dirname(__file__), "PutnamGAP")
SUPPORTED_VARIANTS = [
"original",
"descriptive_long",
"descriptive_long_confusing",
"descriptive_long_misleading",
"garbled_string",
"kernel_variant",
]
def discover_json_files(data_dir: str) -> List[str]:
if not os.path.isdir(data_dir):
return []
files = [
os.path.join(data_dir, f)
for f in os.listdir(data_dir)
if f.lower().endswith(".json")
]
files.sort()
return files
def load_json(filepath: str) -> Dict[str, Any]:
with open(filepath, "r", encoding="utf-8") as f:
return json.load(f)
def get_original_qa(d: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
# Prefer "question"/"solution"; gracefully fall back to "answer" if present
question: Optional[str] = d.get("question")
solution: Optional[str] = d.get("solution", d.get("answer"))
return question, solution
def get_variant_qa(
d: Dict[str, Any], variant_key: str
) -> Tuple[Optional[str], Optional[str]]:
variants = d.get("variants")
if not isinstance(variants, dict):
return None, None
var = variants.get(variant_key)
if not isinstance(var, dict):
return None, None
question: Optional[str] = var.get("question")
solution: Optional[str] = var.get("solution", var.get("answer"))
return question, solution
def render_markdown_with_math(text: str) -> None:
# Streamlit markdown supports MathJax ($...$, $$...$$)
st.markdown(text, unsafe_allow_html=True)
def render_with_mathjax_html(blocks: List[Tuple[str, str]]) -> None:
"""
Render content with MathJax v3 inside a single HTML component.
blocks: list of (heading, content) tuples
"""
# Build a small HTML page with MathJax v3; render all blocks together.
content_sections = []
for heading, content in blocks:
section_html = f"""
"""
content_sections.append(section_html)
page = f"""
{''.join(content_sections)}
"""
st_html(page, height=600, scrolling=True)
def main() -> None:
st.set_page_config(page_title="PutnamGAP Viewer", layout="wide")
st.title("PutnamGAP 数据可视化与校对")
st.caption("浏览原题与不同变体;支持 TeX 渲染与文件前后切换。")
files = discover_json_files(DATA_DIR)
if not files:
st.error(f"未在目录中发现 JSON 文件:{DATA_DIR}")
st.stop()
# Sidebar controls
with st.sidebar:
st.subheader("文件与显示设置")
# Single source of truth for navigation: file_index
file_labels = [os.path.basename(p) for p in files]
if "file_index" not in st.session_state:
st.session_state.file_index = 0
selected_label = st.selectbox(
"选择题目文件",
options=file_labels,
index=st.session_state.file_index,
)
# Sync file_index if user chose a different label
current_index = file_labels.index(selected_label)
if current_index != st.session_state.file_index:
st.session_state.file_index = current_index
# Variant selection
variant_human_labels = {
"original": "原题 original",
"descriptive_long": "descriptive_long",
"descriptive_long_confusing": "descriptive_long_confusing",
"descriptive_long_misleading": "descriptive_long_misleading",
"garbled_string": "garbled_string",
"kernel_variant": "kernel_variant",
}
variant_choice_label = st.radio(
"选择显示内容",
options=[variant_human_labels[k] for k in SUPPORTED_VARIANTS],
index=0,
)
# Reverse map to internal key
selected_variant = {
v: k for k, v in variant_human_labels.items()
}[variant_choice_label]
# Display options
show_mode = st.radio(
"显示部分",
options=["Question", "Solution", "Both"],
index=0,
horizontal=True,
)
render_mode = st.radio(
"渲染方式",
options=["Markdown (默认)", "HTML + MathJax"],
index=1,
)
show_meta = st.checkbox("显示原始 JSON 片段", value=False)
# Prev/Next navigation buttons in header row
left, mid, right = st.columns([1, 6, 1])
with left:
if st.button("⬅️ 上一题", use_container_width=True):
new_index = (st.session_state.file_index - 1) % len(files)
st.session_state.file_index = new_index
st.rerun()
with right:
if st.button("下一题 ➡️", use_container_width=True):
new_index = (st.session_state.file_index + 1) % len(files)
st.session_state.file_index = new_index
st.rerun()
current_path = files[st.session_state.file_index]
data = load_json(current_path)
st.write(f"当前文件:`{os.path.basename(current_path)}` ({st.session_state.file_index + 1}/{len(files)})")
st.divider()
# Resolve question/solution for chosen variant
if selected_variant == "original":
q_text, s_text = get_original_qa(data)
else:
q_text, s_text = get_variant_qa(data, selected_variant)
# Assemble content blocks to render
blocks: List[Tuple[str, str]] = []
if show_mode in ("Question", "Both"):
if q_text:
blocks.append(("Question", q_text))
else:
st.warning("该选择下未找到 Question。")
if show_mode in ("Solution", "Both"):
if s_text:
blocks.append(("Solution", s_text))
else:
st.warning("该选择下未找到 Solution/Answer。")
if len(blocks) > 0:
if render_mode.startswith("Markdown"):
for heading, content in blocks:
st.subheader(heading)
render_markdown_with_math(content)
st.markdown("---")
else:
render_with_mathjax_html(blocks)
else:
st.info("无可显示内容。")
if show_meta:
with st.expander("原始 JSON(截断显示)", expanded=False):
# Show a trimmed version to avoid overwhelming the UI
preview: Dict[str, Any] = {}
for k in ("index", "type", "tag", "difficulty", "problem_type"):
if k in data:
preview[k] = data[k]
preview["keys"] = list(data.keys())
st.json(preview)
st.caption("完整 JSON 路径:")
st.code(current_path)
st.caption("提示:可以使用侧边栏选择具体文件与变体,也可通过顶部按钮快速前后切换 JSON。")
if __name__ == "__main__":
main()