diff options
| -rw-r--r-- | spider/douban/01_doubanapi.py | 24 | ||||
| -rw-r--r-- | spider/douban/02_useragent.py | 12 |
2 files changed, 36 insertions, 0 deletions
diff --git a/spider/douban/01_doubanapi.py b/spider/douban/01_doubanapi.py new file mode 100644 index 0000000..a8ac116 --- /dev/null +++ b/spider/douban/01_doubanapi.py @@ -0,0 +1,24 @@ +import requests + +page_limit = 50 +page_start = 0 + + +user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36' + +all_data = [] + +while True: + print(f'get page start: {page_start}') + # resp = requests.get(url=f'https://movie.douban.com/j/search_subjects?type=movie&tag=热门&page_limit={page_limit}&page_start={page_start}', + # headers={'User-Agent': user_agent}) + resp = requests.get( + url=f'https://movie.douban.com/j/search_subjects?type=tv&tag=热门&page_limit={page_limit}&page_start={page_start}', + headers={'User-Agent': user_agent}) + if resp.status_code != 200 or len(resp.json()['subjects']) == 0: + break + all_data += resp.json()['subjects'] + page_start += page_limit + +print(len(all_data)) + diff --git a/spider/douban/02_useragent.py b/spider/douban/02_useragent.py new file mode 100644 index 0000000..155213e --- /dev/null +++ b/spider/douban/02_useragent.py @@ -0,0 +1,12 @@ +import requests +import json + +headers = { + "Referer": "https://m.douban.com/tv/american", + "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1" +} + + +resp = requests.get('https://m.douban.com/rexxar/api/v2/movie/recommend?refresh=0&start=0&count=20&selected_categories=%7B%22%E5%9C%B0%E5%8C%BA%22:%22%E5%8D%8E%E8%AF%AD%22%7D&uncollect=false&tags=%E5%8D%8E%E8%AF%AD', headers=headers) +for item in json.loads(resp.content.decode())['items']: + print(item['title'], item['rating']['value'])
\ No newline at end of file |
