美文网首页
BeautifulSoup使用示例代码

BeautifulSoup使用示例代码

作者: 我的袜子都是洞 | 来源:发表于2018-10-26 15:09 被阅读2次
    import requests
    from bs4 import BeautifulSoup
    
    def get_one_page(url):
        headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
        } 
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.text
        return None
    
    def page_parser(html):
        soup = BeautifulSoup(html,'lxml')
        for dd in soup.select('dd'):
            # 排名
            num = dd.select('.board-index')[0].get_text()
            # 标题
            name = dd.find(attrs={'class':'name'}).a.string
            # 图片
            pic_src = dd.find(attrs={'class':'board-img'}).attrs['data-src']
            # 演员
            star = dd.select('.star')[0].get_text()
            # 去除两边的空格
            star = star.strip()
            # 时间
            releasetime = dd.select('.releasetime')[0].string
            # 评分
            integer = dd.select('.integer')[0].string
            fraction = dd.select('.fraction')[0].string
            score = integer+fraction
            print(score)
    
    def main():
        url = 'http://maoyan.com/board/4'
        html = get_one_page(url)
        page_parser(html)
        #print(a)
    
    if __name__ == '__main__':
        main()
    

    相关文章

      网友评论

          本文标题:BeautifulSoup使用示例代码

          本文链接:https://www.haomeiwen.com/subject/lmzktqtx.html