summaryrefslogtreecommitdiff
path: root/scripts/fetch_papers.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/fetch_papers.py')
-rw-r--r--scripts/fetch_papers.py28
1 files changed, 24 insertions, 4 deletions
diff --git a/scripts/fetch_papers.py b/scripts/fetch_papers.py
index 9db3d79..2f45e7a 100644
--- a/scripts/fetch_papers.py
+++ b/scripts/fetch_papers.py
@@ -58,7 +58,6 @@ def fetch_papers_combined(days=1):
start_utc = now_utc - datetime.timedelta(days=days)
start_str = start_utc.strftime("%Y%m%d%H%M")
end_str = now_utc.strftime("%Y%m%d%H%M")
-
search_query = f"submittedDate:[{start_str} TO {end_str}]"
base_url = "http://export.arxiv.org/api/query"
@@ -76,6 +75,7 @@ def fetch_papers_combined(days=1):
}
resp = requests.get(base_url, params=params, timeout=30)
if resp.status_code != 200:
+ print(f"[ERROR] Failed fetching from arXiv. Status code: {resp.status_code}")
break
feed = feedparser.parse(resp.content)
batch = feed.entries
@@ -87,6 +87,8 @@ def fetch_papers_combined(days=1):
if start >= 3000:
break
+ print(f"[DEBUG] arXiv returned total {len(all_entries)} papers from initial fetch.")
+
local_candidates = [
{
"title": e.title,
@@ -99,16 +101,24 @@ def fetch_papers_combined(days=1):
if any(cat in ALLOWED_CATEGORIES for cat in [t.term for t in e.tags]) and advanced_filter(e)
]
+ print(f"[DEBUG] Number of papers after local filtering: {len(local_candidates)}")
+
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
print("[WARNING] No OPENAI_API_KEY found. Skip second filter.")
return local_candidates
client = OpenAI(api_key=openai_api_key)
- final_matched = [p for p in local_candidates if is_relevant_by_api(p["title"], p["summary"], client)]
+ final_matched = []
+ for p in local_candidates:
+ if is_relevant_by_api(p["title"], p["summary"], client):
+ final_matched.append(p)
+
+ print(f"[DEBUG] Number of papers after OpenAI API filtering: {len(final_matched)}")
return final_matched
+
def update_readme_in_repo(papers, token, repo_name):
if not papers:
return
@@ -140,16 +150,26 @@ def update_readme_in_repo(papers, token, repo_name):
def main():
days = 1
+ print(f"[DEBUG] Starting fetch_papers_combined with days={days}")
papers = fetch_papers_combined(days=days)
- print(f"[FINAL RESULT] Total papers matched: {len(papers)}") # <--- 添加此行
+ print(f"[DEBUG] After fetch_papers_combined: {len(papers)} papers matched.")
+ if not papers:
+ print("[DEBUG] No papers matched after both local and API filters.")
github_token = os.getenv("TARGET_REPO_TOKEN")
target_repo_name = os.getenv("TARGET_REPO_NAME")
+
+ print(f"[DEBUG] Github Token Set: {'Yes' if github_token else 'No'}")
+ print(f"[DEBUG] Target Repo Name: {target_repo_name if target_repo_name else 'Not Set'}")
+
if github_token and target_repo_name and papers:
+ print("[DEBUG] Proceeding to update README in repo...")
update_readme_in_repo(papers, github_token, target_repo_name)
+ print("[DEBUG] README update completed.")
else:
- print("[INFO] No matched papers or missing GitHub credentials.")
+ print("[INFO] Skipped README update due to missing credentials or no papers matched.")
+
if __name__ == "__main__":
main()