diff options
| -rw-r--r-- | package-lock.json | 1 | ||||
| -rw-r--r-- | package.json | 1 | ||||
| -rw-r--r-- | scripts/scrape.js | 6 |
3 files changed, 8 insertions, 0 deletions
diff --git a/package-lock.json b/package-lock.json index ccb5d41..0f0ed5d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1672,6 +1672,7 @@ "node": ">=14.17" } }, + "node_modules/undici": { "version": "6.21.3", "resolved": "https://registry.npmjs.org/undici/-/undici-6.21.3.tgz", diff --git a/package.json b/package.json index 1b076e1..daf0c3b 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,7 @@ "fast-xml-parser": "^4.4.2", "react": "^18.3.0", "react-cytoscapejs": "^2.0.0", + "react-dom": "^18.3.0", "undici": "^6.21.3" }, diff --git a/scripts/scrape.js b/scripts/scrape.js index 93f6a41..d4b2ecb 100644 --- a/scripts/scrape.js +++ b/scripts/scrape.js @@ -2,6 +2,7 @@ import { XMLParser } from "fast-xml-parser"; import fs from "fs/promises"; import path from "path"; import dns from "node:dns"; + import { ProxyAgent } from "undici"; dns.setDefaultResultOrder("ipv4first"); @@ -20,18 +21,23 @@ async function getXML(url) { const res = await fetch(url, { dispatcher }); if (!res.ok) throw new Error(`Request failed: ${res.status} ${url}`); return parser.parse(await res.text()); + } async function scrapeSchedule(year, term) { const catalog = {}; const termRoot = await getXML(`${BASE}/schedule/${year}/${term}.xml`); + const subjects = termRoot.term?.subjects?.subject; if (!subjects) throw new Error(`Unexpected XML structure for ${year} ${term}`); + const subjHrefs = Array.isArray(subjects) ? subjects.map(s => s['@_href']) : [subjects['@_href']]; for (const subjURL of subjHrefs) { const subjXML = await getXML(subjURL); + const courses = subjXML.subject?.courses?.course || []; + const courseList = Array.isArray(courses) ? courses : [courses]; for (const c of courseList) { const courseURL = c['@_href']; |
