summaryrefslogtreecommitdiff
path: root/scripts/fetch_papers.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/fetch_papers.py')
-rw-r--r--scripts/fetch_papers.py12
1 files changed, 7 insertions, 5 deletions
diff --git a/scripts/fetch_papers.py b/scripts/fetch_papers.py
index 7de3f12..9bea97f 100644
--- a/scripts/fetch_papers.py
+++ b/scripts/fetch_papers.py
@@ -5,6 +5,7 @@ import datetime
from github import Github
from openai import OpenAI
+
ALLOWED_CATEGORIES = [
"cs.AI", "cs.CL", "cs.CV", "cs.LG", "cs.NE", "cs.RO",
"cs.IR", "stat.ML"
@@ -54,9 +55,6 @@ def is_relevant_by_api(title, summary, client, model="gpt-4-turbo"):
return False
def fetch_papers_combined(days=1):
- import datetime
- import requests
- import feedparser
now_utc = datetime.datetime.now(datetime.timezone.utc)
cutoff_utc = now_utc - datetime.timedelta(days=days)
@@ -90,14 +88,18 @@ def fetch_papers_combined(days=1):
# 2. Filter by published date >= cutoff
for entry in batch:
- published = datetime.datetime.fromisoformat(entry.published)
+ # — parse the ISO Z‑time correctly —
+ published = datetime.datetime.strptime(
+ entry.published, "%Y-%m-%dT%H:%M:%SZ"
+ ).replace(tzinfo=datetime.timezone.utc)
+
if published >= cutoff_utc:
all_entries.append(entry)
else:
- # since sorted descending, once we hit older papers we can stop entirely
start = None
break
+
if start is None or len(batch) < step:
break