美文网首页
Python41_爬取猫眼电影排行榜

Python41_爬取猫眼电影排行榜

作者: jxvl假装 | 来源:发表于2019-09-28 10:56 被阅读0次
import re
import urllib.request

def load_data(page=1):
    offset = (page - 1) * 10
    url = "https://maoyan.com/board/4?offset=" + str(offset)
    resp = urllib.request.urlopen(url)
    return resp.read().decode("utf-8")

def load_ranking(html):
    reg = '<dd.*?<i.*?>(.*?)</i>'
    return re.findall(reg, html, re.S)
    
def load_name(html):
    reg = '<dd.*?<p\sclass="name".*?><a.*?>(.*?)</a>'
    return re.findall(reg, html, re.S)

def load_info(html):
    reg = '<dd.*?<div\sclass="board-item-main".*?<p\sclass="star".*?>(.*?)</p>'
    rs = re.findall(reg, html, re.S)
    return list(map(str.strip, rs))

def load_time(html):
    reg = '<dd.*?<div\sclass="board-item-main".*?<p\sclass="releasetime".*?>(.*?)</p>'
    rs = re.findall(reg, html, re.S)
    return list(map(str.strip, rs))

def load_score(html):
    reg = '<dd.*?<p\sclass="releasetime".*?<i\sclass="integer".*?>(.*?)</i>'
    iteger = re.findall(reg, html, re.S)

    reg_frag = '<dd.*?<p\sclass="releasetime".*?<i\sclass="fraction".*?>(.*?)</i>'
    fraction = re.findall(reg_frag, html, re.S)
    score = list(zip(iteger, fraction))
    rs = map(lambda x: str(x[0]) + str(x[1]), score)
    return list(rs)

def save_file(html):
    records = zip(load_ranking(html), load_name(html), load_info(html), load_time(html), load_score(html))
    infos = list(records)
    with open("top.txt", "a+") as f:
        for line in infos:
            data = "\t".join(line)
            print(data)
            f.writelines(data)
            # 换行
            f.write("\n")


for i in range(10):
    html = load_data(i+1)
    save_file(html)

相关文章

网友评论

      本文标题:Python41_爬取猫眼电影排行榜

      本文链接:https://www.haomeiwen.com/subject/vbhuuctx.html