diff options
| -rw-r--r-- | .github/workflows/fetch.yml | 37 | ||||
| -rw-r--r-- | fetch_update.py | 58 | ||||
| -rw-r--r-- | requirements.txt | 3 |
3 files changed, 98 insertions, 0 deletions
diff --git a/.github/workflows/fetch.yml b/.github/workflows/fetch.yml new file mode 100644 index 0000000..81c2a5b --- /dev/null +++ b/.github/workflows/fetch.yml @@ -0,0 +1,37 @@ +name: Fetch Bilibili Replies + +on: + schedule: + # 每 2 小时跑一次;UTC → 中国时间 +8 => 正好本地每整奇数小时 + - cron: '0 */2 * * *' + workflow_dispatch: # 手动触发 + +jobs: + fetch: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install deps + run: | + python -m pip install -r requirements.txt + + - name: Fetch & update Excel + env: + BILIBILI_UID: 382384081 # ⚠️或改为 secrets + run: | + python fetch_update.py + + - name: Push changes + if: ${{ github.ref == 'refs/heads/main' }} + env: + GH_TOKEN: ${{ secrets.GH_PAT }} # 你的 Personal Access Token + run: | + git config --global url."https://${GH_TOKEN}@github.com/".insteadOf "https://github.com/" + git push --quiet diff --git a/fetch_update.py b/fetch_update.py new file mode 100644 index 0000000..d055dad --- /dev/null +++ b/fetch_update.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +""" +按 UID 抓取 B 站评论并增量写入 Excel。 +如有更新,自动 `git add && git commit -m "data: update"`。 +""" +import os, sys, json, subprocess, pathlib, datetime +import requests, pandas as pd + +BASE_URL = "https://api.aicu.cc/api/v3/search/getreply" +UID = int(os.getenv("BILIBILI_UID", "1")) # 改成目标 UID,或在 Actions secrets 中设 +EXCEL_FILE = pathlib.Path("replies.xlsx") +MAX_P_PAGE = 500 + +def fetch_all(uid: int): + replies, pn = [], 1 + while True: + r = requests.get(BASE_URL, params=dict(uid=uid, pn=pn, ps=MAX_P_PAGE, mode=0), timeout=15) + r.raise_for_status() + data = r.json() + if data.get("code") != 0: + raise RuntimeError(f"API 返回 code={data['code']}") + batch = data["data"]["replies"] + replies.extend(batch) + if data["data"]["cursor"]["is_end"]: + break + pn += 1 + return pd.DataFrame(replies) + +def merge_and_save(df_new: pd.DataFrame, excel_path: pathlib.Path): + if excel_path.exists(): + df_old = pd.read_excel(excel_path) + # 以 reply 的唯一 id 去重(字段为 id_str 或 rpid,看 API 输出) + key = "rpid" + df_all = pd.concat([df_old, df_new]).drop_duplicates(key, keep="first") + else: + df_all = df_new + + # 若有新增才写盘,返回是否更新 + if excel_path.exists() and len(df_all) == len(pd.read_excel(excel_path)): + return False + df_all.to_excel(excel_path, index=False) + return True + +def git_commit_if_changed(): + subprocess.run(["git", "add", "replies.xlsx"], check=True) + # --quiet 防止 log 过长;如果无变动会返回 1,用 non-zero exit 吞掉 + subprocess.run(["git", "-c", "user.name='github-actions'", + "-c", "user.email='actions@github.com'", + "commit", "-m", f'data: update {datetime.date.today()}'], + check=False) + +if __name__ == "__main__": + df = fetch_all(UID) + if merge_and_save(df, EXCEL_FILE): + git_commit_if_changed() + print("✅ Excel 已更新并提交") + else: + print("ℹ️ 无新数据") diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1c34850 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +pandas +openpyxl # 写 xlsx +requests |
