summaryrefslogtreecommitdiff
path: root/spider/douban/01_doubanapi.py
blob: a8ac11629c5fdc7ec3e5366e12a8e295ce52bb2e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import requests

page_limit = 50
page_start = 0


user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'

all_data = []

while True:
    print(f'get page start: {page_start}')
    # resp = requests.get(url=f'https://movie.douban.com/j/search_subjects?type=movie&tag=热门&page_limit={page_limit}&page_start={page_start}',
    #                     headers={'User-Agent': user_agent})
    resp = requests.get(
        url=f'https://movie.douban.com/j/search_subjects?type=tv&tag=热门&page_limit={page_limit}&page_start={page_start}',
        headers={'User-Agent': user_agent})
    if resp.status_code != 200 or len(resp.json()['subjects']) == 0:
        break
    all_data += resp.json()['subjects']
    page_start += page_limit

print(len(all_data))