From d02e3061743d9a63690b0d4fd406b8d14bff82bc Mon Sep 17 00:00:00 2001 From: Yuren Hao <97327730+YurenHao0426@users.noreply.github.com> Date: Wed, 16 Apr 2025 15:41:42 -0500 Subject: Update fetch_papers.py --- scripts/fetch_papers.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'scripts') diff --git a/scripts/fetch_papers.py b/scripts/fetch_papers.py index 7de3f12..9bea97f 100644 --- a/scripts/fetch_papers.py +++ b/scripts/fetch_papers.py @@ -5,6 +5,7 @@ import datetime from github import Github from openai import OpenAI + ALLOWED_CATEGORIES = [ "cs.AI", "cs.CL", "cs.CV", "cs.LG", "cs.NE", "cs.RO", "cs.IR", "stat.ML" @@ -54,9 +55,6 @@ def is_relevant_by_api(title, summary, client, model="gpt-4-turbo"): return False def fetch_papers_combined(days=1): - import datetime - import requests - import feedparser now_utc = datetime.datetime.now(datetime.timezone.utc) cutoff_utc = now_utc - datetime.timedelta(days=days) @@ -90,14 +88,18 @@ def fetch_papers_combined(days=1): # 2. Filter by published date >= cutoff for entry in batch: - published = datetime.datetime.fromisoformat(entry.published) + # — parse the ISO Z‑time correctly — + published = datetime.datetime.strptime( + entry.published, "%Y-%m-%dT%H:%M:%SZ" + ).replace(tzinfo=datetime.timezone.utc) + if published >= cutoff_utc: all_entries.append(entry) else: - # since sorted descending, once we hit older papers we can stop entirely start = None break + if start is None or len(batch) < step: break -- cgit v1.2.3