diff options
Diffstat (limited to 'scripts')
| -rw-r--r-- | scripts/fetch_papers.py | 18 |
1 files changed, 12 insertions, 6 deletions
diff --git a/scripts/fetch_papers.py b/scripts/fetch_papers.py index 1bd201c..9ac0ef2 100644 --- a/scripts/fetch_papers.py +++ b/scripts/fetch_papers.py @@ -56,11 +56,13 @@ def is_relevant_by_api(title, summary, client, model="gpt-4-turbo"): def fetch_papers_combined(days=1): now_utc = datetime.datetime.now(datetime.timezone.utc) start_utc = now_utc - datetime.timedelta(days=days) + start_str = start_utc.strftime("%Y%m%d%H%M") end_str = now_utc.strftime("%Y%m%d%H%M") - search_query = f"submittedDate:[{start_str} TO {end_str}]" + search_query = f"submittedDate:[{start_str} TO {end_str}]" base_url = "http://export.arxiv.org/api/query" + step = 100 start = 0 all_entries = [] @@ -73,29 +75,32 @@ def fetch_papers_combined(days=1): "start": start, "max_results": step } - print(f"[DEBUG] fetching arXiv: {start} to {start+step}") + print(f"[DEBUG] fetching arXiv entries: {start} to {start+step}") + try: resp = requests.get(base_url, params=params, timeout=30) if resp.status_code != 200: - print("[ERROR] HTTP Status:", resp.status_code) + print(f"[ERROR] HTTP Status Code: {resp.status_code}") break feed = feedparser.parse(resp.content) batch = feed.entries print(f"[DEBUG] fetched batch size: {len(batch)}") + if not batch: break all_entries.extend(batch) start += step + if start >= 3000: - print("[DEBUG] reached fetch limit 3000, stop.") + print("[DEBUG] Reached 3000 entries limit, stopping.") break except Exception as e: - print("[ERROR] fetching arXiv:", e) + print("[ERROR] Exception during fetching from arXiv:", e) break - print(f"[DEBUG] total papers fetched from arXiv: {len(all_entries)}") + print(f"[DEBUG] total fetched papers: {len(all_entries)}") local_candidates = [] for e in all_entries: @@ -131,6 +136,7 @@ def fetch_papers_combined(days=1): return final_matched + def update_readme_in_repo(papers, token, repo_name): if not papers: print("[INFO] No matched papers, skip README update.") |
