summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorblackhao <13851610112@163.com>2025-04-28 23:10:16 -0500
committerblackhao <13851610112@163.com>2025-04-28 23:10:16 -0500
commit1aa82ba951444e985775a55a5bb99e766aa84c31 (patch)
tree519d368ea558cbfd362c89f1bf0fb111262fd766
parentb6e9675883f1ccf1011e7a1df898908958022099 (diff)
init commit
-rw-r--r--.github/workflows/fetch.yml37
-rw-r--r--fetch_update.py58
-rw-r--r--requirements.txt3
3 files changed, 98 insertions, 0 deletions
diff --git a/.github/workflows/fetch.yml b/.github/workflows/fetch.yml
new file mode 100644
index 0000000..81c2a5b
--- /dev/null
+++ b/.github/workflows/fetch.yml
@@ -0,0 +1,37 @@
+name: Fetch Bilibili Replies
+
+on:
+ schedule:
+ # 每 2 小时跑一次;UTC → 中国时间 +8 => 正好本地每整奇数小时
+ - cron: '0 */2 * * *'
+ workflow_dispatch: # 手动触发
+
+jobs:
+ fetch:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.12'
+
+ - name: Install deps
+ run: |
+ python -m pip install -r requirements.txt
+
+ - name: Fetch & update Excel
+ env:
+ BILIBILI_UID: 382384081 # ⚠️或改为 secrets
+ run: |
+ python fetch_update.py
+
+ - name: Push changes
+ if: ${{ github.ref == 'refs/heads/main' }}
+ env:
+ GH_TOKEN: ${{ secrets.GH_PAT }} # 你的 Personal Access Token
+ run: |
+ git config --global url."https://${GH_TOKEN}@github.com/".insteadOf "https://github.com/"
+ git push --quiet
diff --git a/fetch_update.py b/fetch_update.py
new file mode 100644
index 0000000..d055dad
--- /dev/null
+++ b/fetch_update.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+"""
+按 UID 抓取 B 站评论并增量写入 Excel。
+如有更新,自动 `git add && git commit -m "data: update"`。
+"""
+import os, sys, json, subprocess, pathlib, datetime
+import requests, pandas as pd
+
+BASE_URL = "https://api.aicu.cc/api/v3/search/getreply"
+UID = int(os.getenv("BILIBILI_UID", "1")) # 改成目标 UID,或在 Actions secrets 中设
+EXCEL_FILE = pathlib.Path("replies.xlsx")
+MAX_P_PAGE = 500
+
+def fetch_all(uid: int):
+ replies, pn = [], 1
+ while True:
+ r = requests.get(BASE_URL, params=dict(uid=uid, pn=pn, ps=MAX_P_PAGE, mode=0), timeout=15)
+ r.raise_for_status()
+ data = r.json()
+ if data.get("code") != 0:
+ raise RuntimeError(f"API 返回 code={data['code']}")
+ batch = data["data"]["replies"]
+ replies.extend(batch)
+ if data["data"]["cursor"]["is_end"]:
+ break
+ pn += 1
+ return pd.DataFrame(replies)
+
+def merge_and_save(df_new: pd.DataFrame, excel_path: pathlib.Path):
+ if excel_path.exists():
+ df_old = pd.read_excel(excel_path)
+ # 以 reply 的唯一 id 去重(字段为 id_str 或 rpid,看 API 输出)
+ key = "rpid"
+ df_all = pd.concat([df_old, df_new]).drop_duplicates(key, keep="first")
+ else:
+ df_all = df_new
+
+ # 若有新增才写盘,返回是否更新
+ if excel_path.exists() and len(df_all) == len(pd.read_excel(excel_path)):
+ return False
+ df_all.to_excel(excel_path, index=False)
+ return True
+
+def git_commit_if_changed():
+ subprocess.run(["git", "add", "replies.xlsx"], check=True)
+ # --quiet 防止 log 过长;如果无变动会返回 1,用 non-zero exit 吞掉
+ subprocess.run(["git", "-c", "user.name='github-actions'",
+ "-c", "user.email='actions@github.com'",
+ "commit", "-m", f'data: update {datetime.date.today()}'],
+ check=False)
+
+if __name__ == "__main__":
+ df = fetch_all(UID)
+ if merge_and_save(df, EXCEL_FILE):
+ git_commit_if_changed()
+ print("✅ Excel 已更新并提交")
+ else:
+ print("ℹ️ 无新数据")
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..1c34850
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+pandas
+openpyxl # 写 xlsx
+requests