1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
#!/usr/bin/env python3
import argparse
import json
import re
from typing import Any, Dict, Iterable, List
COURSE_TOKEN = re.compile(r"\b([A-Z]{2,4})\s*(\d{2,3}[A-Z]?)\b")
NONE_PATTERNS = [
re.compile(r"^\s*none\.?\s*$", re.IGNORECASE),
re.compile(r"no prerequisites", re.IGNORECASE),
re.compile(r"prerequisite[s]?:\s*none\b", re.IGNORECASE),
]
def is_none_text(text: str) -> bool:
t = text.strip()
return any(p.search(t) for p in NONE_PATTERNS)
def extract_course_refs(text: str) -> List[str]:
refs = []
for m in COURSE_TOKEN.finditer(text):
subject, number = m.group(1), m.group(2)
refs.append(f"{subject} {number}")
return refs
NON_COURSE_KEYWORDS = [
r"consent", r"permission", r"approval",
r"standing", r"senior", r"junior", r"sophomore", r"freshman",
r"major", r"minor", r"program", r"restricted", r"enrollment", r"enrolled",
r"gpa", r"grade", r"minimum", r"credit hour", r"credits?", r"hours?",
r"registration", r"concurrent", r"co-requisite", r"corequisite",
r"department", r"instructor",
]
def has_non_course_requirements(text: str) -> bool:
t = text.lower()
return any(re.search(k, t) for k in NON_COURSE_KEYWORDS)
def is_course_only(text: str) -> bool:
t = text.strip()
if has_non_course_requirements(t):
return False
# Remove course tokens, then see if any nontrivial tokens remain besides basic connectors
placeholder = COURSE_TOKEN.sub("COURSE", t)
# Remove conjunctions and punctuation
simplified = re.sub(r"[(),.;]", " ", placeholder)
simplified = re.sub(r"\b(and|or|and/or|either|both|one of|two of|with|credit in)\b", " ", simplified, flags=re.IGNORECASE)
# Remove common quantifiers
simplified = re.sub(r"\b(at\s+least)\b", " ", simplified, flags=re.IGNORECASE)
# Collapse whitespace
simplified = re.sub(r"\s+", " ", simplified).strip()
# If empty or only words like COURSE left, treat as course-only
return simplified == "" or re.fullmatch(r"(COURSE\s*)+", simplified) is not None
def analyze(courses: Iterable[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
results = {
"none": [],
"course_only": [],
"remaining": [],
}
for c in courses:
prereq = c.get("prerequisites") or ""
if not prereq.strip():
results["none"].append(c)
continue
if is_none_text(prereq):
results["none"].append(c)
continue
if is_course_only(prereq):
results["course_only"].append({
"index": c.get("index"),
"name": c.get("name"),
"prerequisites": prereq,
"courses": extract_course_refs(prereq),
})
else:
results["remaining"].append({
"index": c.get("index"),
"name": c.get("name"),
"prerequisites": prereq,
})
return results
def main() -> int:
ap = argparse.ArgumentParser(description="Analyze UIUC course prerequisite text")
ap.add_argument("input", default="data/courses.json", nargs="?", help="Input courses JSON array")
ap.add_argument("--outdir", default="data/analysis", help="Output directory")
args = ap.parse_args()
with open(args.input, "r", encoding="utf-8") as f:
data = json.load(f)
res = analyze(data)
import os
os.makedirs(args.outdir, exist_ok=True)
with open(os.path.join(args.outdir, "none.json"), "w", encoding="utf-8") as f:
json.dump(res["none"], f, ensure_ascii=False, indent=2)
with open(os.path.join(args.outdir, "course_only.json"), "w", encoding="utf-8") as f:
json.dump(res["course_only"], f, ensure_ascii=False, indent=2)
with open(os.path.join(args.outdir, "remaining.json"), "w", encoding="utf-8") as f:
json.dump(res["remaining"], f, ensure_ascii=False, indent=2)
print(f"none: {len(res['none'])}")
print(f"course_only: {len(res['course_only'])}")
print(f"remaining: {len(res['remaining'])}")
return 0
if __name__ == "__main__":
raise SystemExit(main())
|