summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorYuren Hao <97327730+YurenHao0426@users.noreply.github.com>2025-04-13 22:52:11 -0700
committerGitHub <noreply@github.com>2025-04-13 22:52:11 -0700
commit08cb1db196068d6dbb23a0eb4b620de66c29916f (patch)
tree61e30aabac029b0a232c18855a2e6a45ccce1a35 /scripts
parent48d0e05af0f040ce6f52b6ced120d5a4ba7f9b7e (diff)
Update fetch_papers.py
Diffstat (limited to 'scripts')
-rw-r--r--scripts/fetch_papers.py18
1 files changed, 12 insertions, 6 deletions
diff --git a/scripts/fetch_papers.py b/scripts/fetch_papers.py
index 1bd201c..9ac0ef2 100644
--- a/scripts/fetch_papers.py
+++ b/scripts/fetch_papers.py
@@ -56,11 +56,13 @@ def is_relevant_by_api(title, summary, client, model="gpt-4-turbo"):
def fetch_papers_combined(days=1):
now_utc = datetime.datetime.now(datetime.timezone.utc)
start_utc = now_utc - datetime.timedelta(days=days)
+
start_str = start_utc.strftime("%Y%m%d%H%M")
end_str = now_utc.strftime("%Y%m%d%H%M")
- search_query = f"submittedDate:[{start_str} TO {end_str}]"
+ search_query = f"submittedDate:[{start_str} TO {end_str}]"
base_url = "http://export.arxiv.org/api/query"
+
step = 100
start = 0
all_entries = []
@@ -73,29 +75,32 @@ def fetch_papers_combined(days=1):
"start": start,
"max_results": step
}
- print(f"[DEBUG] fetching arXiv: {start} to {start+step}")
+ print(f"[DEBUG] fetching arXiv entries: {start} to {start+step}")
+
try:
resp = requests.get(base_url, params=params, timeout=30)
if resp.status_code != 200:
- print("[ERROR] HTTP Status:", resp.status_code)
+ print(f"[ERROR] HTTP Status Code: {resp.status_code}")
break
feed = feedparser.parse(resp.content)
batch = feed.entries
print(f"[DEBUG] fetched batch size: {len(batch)}")
+
if not batch:
break
all_entries.extend(batch)
start += step
+
if start >= 3000:
- print("[DEBUG] reached fetch limit 3000, stop.")
+ print("[DEBUG] Reached 3000 entries limit, stopping.")
break
except Exception as e:
- print("[ERROR] fetching arXiv:", e)
+ print("[ERROR] Exception during fetching from arXiv:", e)
break
- print(f"[DEBUG] total papers fetched from arXiv: {len(all_entries)}")
+ print(f"[DEBUG] total fetched papers: {len(all_entries)}")
local_candidates = []
for e in all_entries:
@@ -131,6 +136,7 @@ def fetch_papers_combined(days=1):
return final_matched
+
def update_readme_in_repo(papers, token, repo_name):
if not papers:
print("[INFO] No matched papers, skip README update.")