Python原生爬虫小demo

作者: 三不小青年 | 来源:发表于2018-09-17 21:54 被阅读91次

    from urllib import request

    import re

    '''

    可用 beautifulSoup scrapy 框架,爬虫,反爬虫,反反爬虫,ip封,代理ip

    获取内容

    提取内容

    精炼内容

    内容排序

    输出,存数据库等

    '''

    class Spider():

    url ="https://www.panda.tv/cate/lol"

        root_pattern ='<div class="video-info">([\s\S]*?)</div>'  # 正则匹配,[]表区间, *匹配无限多次,?非贪婪(匹配0次或者一次),()只匹配中间部分

        name_pattern ='([\s\S]*?)'

        number_pattern ='([\s\S]*?)'

        def __fetch_content(self):# 获取内容

            r = request.urlopen(Spider.url)

    htmls = r.read()

    htmls =str(htmls, encoding='utf-8')

    return htmls

    def __analysis(self, htmls):# 提取内容

            root_html = re.findall(spider.root_pattern, htmls)

    # print(root_html[0])

            anchors = []

    for htmlin root_html:

    name = re.findall(Spider.name_pattern, html)

    number =re.findall(Spider.number_pattern, html)

    anchor = {'name':name,'number':number}

    anchors.append(anchor)

    return anchors

    def __refine(self, anchors):# 精炼 (去掉空白,换行符等)

            l =lambda anchors: {# lambda 表达式

                'name': anchors['name'][0].strip(),

                'number': anchors['number'][0]

    }

    return list(map(l, anchors))

    def __sort(self, anchors):# 排序

            anchors =sorted(anchors, key=self.__sort_seed, reverse=True)# reverse 控制排序升降

            return anchors

    def __sort_seed(self, anchor):# 排序键 有万字的要按乘以10000计

            r = re.findall('\d*', anchor['number'])

    number =float(r[0])

    if '万' in anchor['number']:

    number *=10000

            return number

    def __show(self, anchors):# 打印输出

            for rankin range(0, len(anchors)):

    print('rank:'+str(rank+1)+';'+'name:'+anchors[rank]['name'] +';' +'number:' + anchors[rank]['number']+';')

    def go(self):

    htmls =self.__fetch_content()

    anchors =self.__analysis(htmls)

    anchors =self.__refine(anchors)

    self.__show(anchors)

    spider = Spider()

    spider.go()

    爬取结果

    相关文章

      网友评论

        本文标题:Python原生爬虫小demo

        本文链接:https://www.haomeiwen.com/subject/egodnftx.html