From 75f3399b89e6e07a47f514eafe6b0764dc900b56 Mon Sep 17 00:00:00 2001 From: chzhang Date: Sat, 4 Feb 2023 21:44:14 +0800 Subject: =?UTF-8?q?=E8=B1=86=E7=93=A3=E7=83=AD=E9=97=A8=20api?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- spider/douban/01_doubanapi.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 spider/douban/01_doubanapi.py (limited to 'spider/douban/01_doubanapi.py') diff --git a/spider/douban/01_doubanapi.py b/spider/douban/01_doubanapi.py new file mode 100644 index 0000000..a8ac116 --- /dev/null +++ b/spider/douban/01_doubanapi.py @@ -0,0 +1,24 @@ +import requests + +page_limit = 50 +page_start = 0 + + +user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36' + +all_data = [] + +while True: + print(f'get page start: {page_start}') + # resp = requests.get(url=f'https://movie.douban.com/j/search_subjects?type=movie&tag=热门&page_limit={page_limit}&page_start={page_start}', + # headers={'User-Agent': user_agent}) + resp = requests.get( + url=f'https://movie.douban.com/j/search_subjects?type=tv&tag=热门&page_limit={page_limit}&page_start={page_start}', + headers={'User-Agent': user_agent}) + if resp.status_code != 200 or len(resp.json()['subjects']) == 0: + break + all_data += resp.json()['subjects'] + page_start += page_limit + +print(len(all_data)) + -- cgit v1.2.3