summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--package-lock.json13
-rw-r--r--package.json4
-rw-r--r--scripts/scrape.js24
3 files changed, 35 insertions, 6 deletions
diff --git a/package-lock.json b/package-lock.json
index c3c715c..0f0ed5d 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -13,7 +13,8 @@
"fast-xml-parser": "^4.4.2",
"react": "^18.3.0",
"react-cytoscapejs": "^2.0.0",
- "react-dom": "^18.3.0"
+ "react-dom": "^18.3.0",
+ "undici": "^6.21.3"
},
"devDependencies": {
"@types/node": "^24.2.0",
@@ -1671,6 +1672,16 @@
"node": ">=14.17"
}
},
+
+ "node_modules/undici": {
+ "version": "6.21.3",
+ "resolved": "https://registry.npmjs.org/undici/-/undici-6.21.3.tgz",
+ "integrity": "sha512-gBLkYIlEnSp8pFbT64yFgGE6UIB9tAkhukC23PmMDCe5Nd+cRqKxSjw5y54MK2AZMgZfJWMaNE4nYUHgi1XEOw==",
+ "license": "MIT",
+ "engines": {
+ "node": ">=18.17"
+ }
+ },
"node_modules/undici-types": {
"version": "7.10.0",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.10.0.tgz",
diff --git a/package.json b/package.json
index 4844bba..daf0c3b 100644
--- a/package.json
+++ b/package.json
@@ -16,7 +16,9 @@
"fast-xml-parser": "^4.4.2",
"react": "^18.3.0",
"react-cytoscapejs": "^2.0.0",
- "react-dom": "^18.3.0"
+
+ "react-dom": "^18.3.0",
+ "undici": "^6.21.3"
},
"devDependencies": {
"@types/node": "^24.2.0",
diff --git a/scripts/scrape.js b/scripts/scrape.js
index 2bfca5e..d4b2ecb 100644
--- a/scripts/scrape.js
+++ b/scripts/scrape.js
@@ -3,25 +3,41 @@ import fs from "fs/promises";
import path from "path";
import dns from "node:dns";
+import { ProxyAgent } from "undici";
+
dns.setDefaultResultOrder("ipv4first");
+const proxy = process.env.HTTPS_PROXY || process.env.https_proxy || process.env.HTTP_PROXY || process.env.http_proxy;
+const dispatcher = proxy ? new ProxyAgent(proxy) : undefined;
+
const [ , , YEAR = "2025", TERM = "fall" ] = process.argv;
-const parser = new XMLParser({ ignoreAttributes: false });
+// UIUC's API uses XML namespaces (e.g. `<ns2:term>`). In order for the
+// returned object to have plain keys like `term` and `subject`, we instruct
+// fast-xml-parser to strip the namespace prefixes.
+const parser = new XMLParser({ ignoreAttributes: false, removeNSPrefix: true });
const BASE = `https://courses.illinois.edu/cisapp/explorer`;
async function getXML(url) {
- return parser.parse(await (await fetch(url)).text());
+ const res = await fetch(url, { dispatcher });
+ if (!res.ok) throw new Error(`Request failed: ${res.status} ${url}`);
+ return parser.parse(await res.text());
+
}
async function scrapeSchedule(year, term) {
const catalog = {};
const termRoot = await getXML(`${BASE}/schedule/${year}/${term}.xml`);
- const subjects = termRoot.term.subject;
+
+ const subjects = termRoot.term?.subjects?.subject;
+ if (!subjects) throw new Error(`Unexpected XML structure for ${year} ${term}`);
+
const subjHrefs = Array.isArray(subjects) ? subjects.map(s => s['@_href']) : [subjects['@_href']];
for (const subjURL of subjHrefs) {
const subjXML = await getXML(subjURL);
- const courses = subjXML.subject.course || [];
+
+ const courses = subjXML.subject?.courses?.course || [];
+
const courseList = Array.isArray(courses) ? courses : [courses];
for (const c of courseList) {
const courseURL = c['@_href'];