美文网首页
2018-03-06

2018-03-06

作者: chenqj_barton | 来源:发表于2018-03-06 22:06 被阅读0次

    python3爬取猫眼top100电影信息

    import requests

    from requests.exceptionsimport RequestException

    import re

    import json

    from multiprocessing import Pool

    def get_one_page(url):

    try:

    response = requests.get(url)

    if response.status_code ==200:

    return response.text

    return None

        except RequestException:

    return None

    def parse_one_page(html):

    pattern = re.compile('.*?board-index.*?>(\d+).*?data-src="(.*?)".*?name">

                            +'.*?>(.*?).*?star">(.*?).*?releasetime">(.*?)'

                            +'.*?integer">(.*?).*?fraction">(.*?).*?',re.S)

    items = re.findall(pattern,html)

    # print(items)

        for itemin items:

    yield {

                '排名': item[0],

                '图片': item[1],

                '名称': item[2],

                '演员': item[3].strip()[3:],

                '时间': item[4].strip()[5:],

                '评分': item[5]+item[6]

    }

    def write_to_file(content):

    with open('result.txt','a',encoding='utf-8')as f:

    f.write(json.dumps(content,ensure_ascii=False) +'\n')

    f.close()

    def main(offset):

    url ="http://maoyan.com/board/4" +str(offset)

    html = get_one_page(url)

    # print(html)

    # parse_one_page(html)

        for item_ain parse_one_page(html):

    print(item_a)

    write_to_file(item_a)

    if __name__ =="__main__":

    # for i in range(10):

    #    main(i*10)

        pool = Pool()

    pool.map(main,[i*10 for iin range(10)])

    相关文章

      网友评论

          本文标题:2018-03-06

          本文链接:https://www.haomeiwen.com/subject/dafdfftx.html