python3爬取猫眼top100电影信息
import requests
from requests.exceptionsimport RequestException
import re
import json
from multiprocessing import Pool
def get_one_page(url):
try:
response = requests.get(url)
if response.status_code ==200:
return response.text
return None
except RequestException:
return None
def parse_one_page(html):
pattern = re.compile('.*?board-index.*?>(\d+).*?data-src="(.*?)".*?name">
+'.*?>(.*?).*?star">(.*?).*?releasetime">(.*?)'
+'.*?integer">(.*?).*?fraction">(.*?).*?',re.S)
items = re.findall(pattern,html)
# print(items)
for itemin items:
yield {
'排名': item[0],
'图片': item[1],
'名称': item[2],
'演员': item[3].strip()[3:],
'时间': item[4].strip()[5:],
'评分': item[5]+item[6]
}
def write_to_file(content):
with open('result.txt','a',encoding='utf-8')as f:
f.write(json.dumps(content,ensure_ascii=False) +'\n')
f.close()
def main(offset):
url ="http://maoyan.com/board/4" +str(offset)
html = get_one_page(url)
# print(html)
# parse_one_page(html)
for item_ain parse_one_page(html):
print(item_a)
write_to_file(item_a)
if __name__ =="__main__":
# for i in range(10):
# main(i*10)
pool = Pool()
pool.map(main,[i*10 for iin range(10)])
网友评论