#!/usr/bin/env python3 import argparse import concurrent.futures import json import re import sys import time from dataclasses import dataclass, asdict from typing import Dict, List, Optional, Tuple import requests from xml.etree import ElementTree as ET BASE_URL = "https://courses.illinois.edu/cisapp/explorer/catalog" @dataclass class CourseRecord: index: str name: Optional[str] description: Optional[str] prerequisites: Optional[str] def parse_xml(content: bytes) -> ET.Element: try: return ET.fromstring(content) except ET.ParseError as exc: raise RuntimeError(f"Failed to parse XML: {exc}") def fetch(session: requests.Session, url: str) -> bytes: resp = session.get(url, timeout=30) if resp.status_code != 200: raise RuntimeError(f"GET {url} -> {resp.status_code}") return resp.content def get_subject_ids(session: requests.Session, year: str, term: str) -> List[str]: url = f"{BASE_URL}/{year}/{term}.xml" root = parse_xml(fetch(session, url)) subjects = [] for node in root.findall(".//subject"): node_id = node.attrib.get("id") if node_id: subjects.append(node_id) return subjects def get_course_numbers_for_subject(session: requests.Session, year: str, term: str, subject: str) -> List[str]: url = f"{BASE_URL}/{year}/{term}/{subject}.xml" root = parse_xml(fetch(session, url)) courses = [] for node in root.findall(".//course"): node_id = node.attrib.get("id") if node_id: courses.append(node_id) return courses def extract_prerequisite_text(root: ET.Element) -> Optional[str]: # Prefer explicitly labeled prerequisite elements if present for tag in ["prerequisites", "prerequisite", "Prerequisites", "Prerequisite"]: found = root.find(f".//{tag}") if found is not None and (found.text and found.text.strip()): return found.text.strip() # Fallback: courseSectionInformation often contains "Prerequisite:" free text csi = root.find(".//courseSectionInformation") if csi is not None and csi.text: text = csi.text.strip() match = re.search(r"Prerequisite[s]?:\s*(.*)$", text, flags=re.IGNORECASE | re.DOTALL) if match: return match.group(1).strip() # As a last resort, scan description for a Prerequisite sentence desc = root.find(".//description") if desc is not None and desc.text: text = desc.text.strip() match = re.search(r"Prerequisite[s]?:\s*(.*)$", text, flags=re.IGNORECASE | re.DOTALL) if match: return match.group(1).strip() return None def get_course_details(session: requests.Session, year: str, term: str, subject: str, course_number: str) -> CourseRecord: url = f"{BASE_URL}/{year}/{term}/{subject}/{course_number}.xml" root = parse_xml(fetch(session, url)) # Title/name may be in