美文网首页
双色球历史数据爬取

双色球历史数据爬取

作者: Lonelyroots | 来源:发表于2022-06-27 22:55 被阅读0次
    import pymysql
    from pymongo import MongoClient
    from requests_html import HTMLSession
    
    class Spider:
        def __init__(self):
            self.url = "https://datachart.500.com/ssq/history/newinc/history.php?start=19000&end=21018"
            self.session = HTMLSession()
    
            # 只要连接一次,千万不要放到循环里!!!!!!
            # MongoDB连接
            conn = MongoClient('localhost',8881)
            db = conn['dual_colored_ball']  # 如果没有这个数据库,那么创建
            self.my_set = db['words']  # 如果没有这个表(集合),那么创建
    
            # MySQL连接
            host = "localhost"
            port = 8001
            db = "spider"
            user = "admin"
            password = "qwe123"
            self.conn = pymysql.connect(host=host, port=port, db=db, user=user, password=password)
            self.cursor = self.conn.cursor()  # 获取游标
    
    
        def parse(self):
            response = self.session.get(url=self.url)
            for tr in response.html.xpath('//tbody[@id="tdata"]/tr'):
                number = tr.xpath('//td[1]/text()')[0]     # 期号
                red = tr.xpath('//td[2]/text()|//td[3]/text()|//td[4]/text()|//td[5]/text()|//td[6]/text()|//td[7]/text()')       # 红球
                blue = tr.xpath('//td[8]/text()')[0]      # 蓝球
                prizePool = tr.xpath('//td[10]/text()')[0]       # 奖池奖金(元)
                FirstPrize = tr.xpath('//td[11]/text()|//td[12]/text()')       # 一等奖
                SecondPrize = tr.xpath('//td[13]/text()|//td[14]/text()')       # 二等奖
                Total_bet = tr.xpath('//td[15]/text()')[0]       # 投注总金额
                Date = tr.xpath('//td[16]/text()')[0]     # 开奖日期
                data = (number,red,blue,prizePool,FirstPrize,SecondPrize,Total_bet,Date)
                # self.saveMongoDB(data)
                self.saveMySQL(data)
                print(data)     # 如:打印['19077'] ['09', '11', '13', '18', '21', '22'] ['15'] ['928,983,242'] ['1', '10,000,000'] ['118', '221,011'] ['331,156,004'] ['2019-07-04']
    
        def saveMySQL(self,data):
                self.cursor.execute("insert into dual_colored_ball values ('%s','%s','%s','%s','%s','%s','%s','%s');"%(
                    int(data[0]),
                    '-'.join(data[1]),      # 拼接列表
                    data[2],
                    data[3],
                    '-'.join(data[4]),
                    '-'.join(data[5]),
                    data[6],
                    data[7],
                      ))
                self.conn.commit()       # 确认提交,注意!!!
    
        def saveMongoDB(self,data):
            # insert_many插入的是列表,所以需要在字典外加个[]
            self.my_set.insert_many([{
                "number": data[0],
                "red": data[1],
                "blue": data[2],
                "prizePool": data[3],
                "FirstPrize": data[4],
                "SecondPrize": data[5],
                "Total_bet": data[6],
                "Date": data[7],
            }])        # 添加数据
    
        def run(self):
            self.parse()
            self.cursor.close()
            self.conn.close()
    
    if __name__ == '__main__':
        spider = Spider()
        spider.run()
    

    文章到这里就结束了!希望大家能多多支持Python(系列)!六个月带大家学会Python,私聊我,可以问关于本文章的问题!以后每天都会发布新的文章,喜欢的点点关注!一个陪伴你学习Python的新青年!不管多忙都会更新下去,一起加油!

    Editor:Lonelyroots

    注:仅用于学习!

    相关文章

      网友评论

          本文标题:双色球历史数据爬取

          本文链接:https://www.haomeiwen.com/subject/aanovrtx.html