美文网首页
Python(七十一)数据入库

Python(七十一)数据入库

作者: Lonelyroots | 来源:发表于2022-03-02 22:32 被阅读0次

    10_数据入库/01_MySQL查找.py:

    """
    
        创建UTF-8的数据库
            CREATE DATABASE 数据库名字 DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;
            CREATE DATABASE spider DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;
    
    """
    import pymysql
    
    host = "localhost"
    port = 8001
    db = "spider"
    user = "admin"
    password = "qwe123"
    
    conn = pymysql.connect(host=host, port=port, db=db, user=user, password=password)
    # print(conn)     # 打印 <pymysql.connections.Connection object at 0x0000019F12DA1550>
    
    # cursor = conn.cursor()      # 获取游标
    cursor = conn.cursor(pymysql.cursors.DictCursor)      # 不加pymysql.cursors.DictCursor返回元组,加了返回字典
    cursor.execute("SELECT * FROM Students")        # 执行语句
    
    print(cursor.fetchone())    # 查找一个
    # print(cursor.fetchall())        # 查找所有
    
    cursor.close()      # 先关闭游标
    conn.close()        # 再关闭链接
    

    10_数据入库/02_MySQL插入数据.py:

    """
        插入一条:insert into Students (name,age) values ('贾克斯',40);
        插入多条:insert into Students (name,age) values ('贾克斯',40),('贾克斯',40),('贾克斯',40);
    """
    import pymysql
    
    host = "localhost"
    port = 8001
    db = "spider"
    user = "admin"
    password = "qwe123"
    
    conn = pymysql.connect(host=host, port=port, db=db, user=user, password=password)
    cursor = conn.cursor()      # 获取游标
    
    cursor.execute("insert into Students (name,age) values ('贾克斯',40);")        # 执行语句
    
    conn.commit()       # 确认提交,注意!!!
    
    cursor.close()      # 先关闭游标
    conn.close()        # 再关闭链接
    

    10_数据入库/03_MongoDB插入数据.py:

    """
        pip install pymongo
    
        mongodb 默认端口 27017
    
        在 ubuntu 下安装以及开启远程访问:
            1. sudo vi /etc/mongodb.conf
                将 bind_ip = 127.0.0.1 修改为 bind_ip = 0.0.0.0
            2. /etc/init.d/mongodb restart 重启服务
    
    """
    from pymongo import MongoClient
    
    conn = MongoClient('localhost',8881)
    db = conn.students      # 如果没有这个数据库,那么创建
    my_set = db.words       # 如果没有这个表(集合),那么创建
    
    # data = [{'name':'雷霆嘎巴2','age':18},{'name':'马尔扎哈2','age':18}]
    data = [{'name':'雷霆嘎巴2','age':[1,2,3,4]}]
    my_set.insert_many(data)        # 添加数据
    

    10_数据入库/04_MongoDB查找数据.py:

    from pymongo import MongoClient
    
    conn = MongoClient('localhost',8881)
    db = conn.students      # 如果没有这个数据库,那么创建
    my_set = db.words       # 如果没有这个表(集合),那么创建
    
    # print(my_set.find())        # 打印 <pymongo.cursor.Cursor object at 0x000001D4924A2908>
    for data in my_set.find():
        print(data)
        print(data['age'])
    

    10_数据入库/05_爬取双色球历史数据.py:

    import pymysql
    from pymongo import MongoClient
    from requests_html import HTMLSession
    
    class Spider:
        def __init__(self):
            self.url = "https://datachart.500.com/ssq/history/newinc/history.php?start=19000&end=21018"
            self.session = HTMLSession()
    
            # 只要连接一次,千万不要放到循环里!!!!!!
            # MongoDB连接
            conn = MongoClient('localhost',8881)
            db = conn['dual_colored_ball']  # 如果没有这个数据库,那么创建
            self.my_set = db['words']  # 如果没有这个表(集合),那么创建
    
            # MySQL连接
            host = "localhost"
            port = 8001
            db = "spider"
            user = "admin"
            password = "qwe123"
            self.conn = pymysql.connect(host=host, port=port, db=db, user=user, password=password)
            self.cursor = self.conn.cursor()  # 获取游标
    
    
        def parse(self):
            response = self.session.get(url=self.url)
            for tr in response.html.xpath('//tbody[@id="tdata"]/tr'):
                number = tr.xpath('//td[1]/text()')[0]     # 期号
                red = tr.xpath('//td[2]/text()|//td[3]/text()|//td[4]/text()|//td[5]/text()|//td[6]/text()|//td[7]/text()')       # 红球
                blue = tr.xpath('//td[8]/text()')[0]      # 蓝球
                prizePool = tr.xpath('//td[10]/text()')[0]       # 奖池奖金(元)
                FirstPrize = tr.xpath('//td[11]/text()|//td[12]/text()')       # 一等奖
                SecondPrize = tr.xpath('//td[13]/text()|//td[14]/text()')       # 二等奖
                Total_bet = tr.xpath('//td[15]/text()')[0]       # 投注总金额
                Date = tr.xpath('//td[16]/text()')[0]     # 开奖日期
                data = (number,red,blue,prizePool,FirstPrize,SecondPrize,Total_bet,Date)
                # self.saveMongoDB(data)
                self.saveMySQL(data)
                print(data)     # 如:打印['19077'] ['09', '11', '13', '18', '21', '22'] ['15'] ['928,983,242'] ['1', '10,000,000'] ['118', '221,011'] ['331,156,004'] ['2019-07-04']
    
        def saveMySQL(self,data):
                self.cursor.execute("insert into dual_colored_ball values ('%s','%s','%s','%s','%s','%s','%s','%s');"%(
                    int(data[0]),
                    '-'.join(data[1]),      # 拼接列表
                    data[2],
                    data[3],
                    '-'.join(data[4]),
                    '-'.join(data[5]),
                    data[6],
                    data[7],
                      ))
                self.conn.commit()       # 确认提交,注意!!!
    
        def saveMongoDB(self,data):
            # insert_many插入的是列表,所以需要在字典外加个[]
            self.my_set.insert_many([{
                "number": data[0],
                "red": data[1],
                "blue": data[2],
                "prizePool": data[3],
                "FirstPrize": data[4],
                "SecondPrize": data[5],
                "Total_bet": data[6],
                "Date": data[7],
            }])        # 添加数据
    
        def run(self):
            self.parse()
            self.cursor.close()
            self.conn.close()
    
    if __name__ == '__main__':
        spider = Spider()
        spider.run()
    

    10_数据入库/06_链家网.py:

    from requests_html import HTMLSession
    import pymysql
    from pymongo import MongoClient
    import re
    import csv
    
    class Spider:
        def __init__(self):
            self.url = "https://cs.lianjia.com/ershoufang/"
            self.session = HTMLSession()
    
            # 只要连接一次,千万不要放到循环里!!!!!!
            # MongoDB连接
            conn = MongoClient('localhost',8881)
            db = conn['HOME_LINK_net']
            self.my_set = db['house_datas']
    
            # MySQL连接
            host = "localhost"
            port = 8001
            db = "spider"
            user = "admin"
            password = "qwe123"
            self.conn = pymysql.connect(host=host, port=port, db=db, user=user, password=password)
            self.cursor = self.conn.cursor()  # 获取游标
    
        def parse(self):
            response = self.session.get(url=self.url)
            for div in response.html.xpath('//div[@class="info clear"]'):
                title = div.xpath('//div[@class="title"]/a/text()')[0]      # 标题
                position_Small = div.xpath('//div[@class="positionInfo"]/a[1]/text()')[0].strip()       # 打印 和美星城
                position_Big = div.xpath('//div[@class="positionInfo"]/a[2]/text()')[0]
                position = '{}-{}'.format(position_Small, position_Big)     # 打印和美星城-暮云
                house = div.xpath('//div[@class="houseInfo"]/text()')[0]
                follow = div.xpath('//div[@class="followInfo"]/text()')[0]      # 打印 0人关注 / 7天以前发布
                followinfo  = follow.split('/')     # 打印 ['0人关注 ', ' 7天以前发布']
                amount_of_attention = followinfo[0]
                release_time = followinfo[1]
                """
                难爬部分:
                    <div class="followInfo">
                    <span class="starIcon"></span > 
                    "0人关注 / 7天以前发布"
                    </div >
                """
                house_price = div.xpath('//div[@class="totalPrice totalPrice2"]/span/text()|//div[@class="totalPrice totalPrice2"]/i[2]/text()')        # 打印 ['121', '万']
                house_price = house_price[0]+house_price[1]     # 打印 121万
                per_yuan = div.xpath('//div[@class="unitPrice"]/span/text()')[0]
                data = (title,position,house,amount_of_attention,release_time,house_price,per_yuan)
    
                # CSV写入
                # (a:附加写方式打开,不可读;a+:附加读写方式打开)
                with open('房优选择.csv', 'a+', encoding='utf-8', newline='') as fp:
                    writer = csv.writer(fp)
                    writer.writerow(data)
    
                # # MongoDB写入
                # self.saveMongoDB(data)
    
                # # MySQL写入
                # self.saveMySQL(data)
    
                print(data)
    
        def saveMySQL(self,data):
            # 记得先新建数据表
            self.cursor.execute("insert into house_datas values ('%s','%s','%s','%s','%s','%s','%s');" % (
                data[0],
                data[1],
                data[2],
                data[3],
                data[4],
                data[5],
                data[6],
            ))
            self.conn.commit()  # 确认提交,注意!!!
    
        def saveMongoDB(self,data):
            # insert_many插入的是列表,所以需要在字典外加个[]
            self.my_set.insert_many([{
                "title": data[0],
                "position": data[1],
                "house": data[2],
                "amount_of_attention": data[3],
                "release_time": data[4],
                "house_price": ''.join(data[5]),
                "per_yuan": data[6],
            }])        # 添加数据
    
        def run(self):
            self.parse()
            self.cursor.close()
            self.conn.close()
    
    if __name__ == '__main__':
        spider = Spider()
        headers = ('文章标题','地点','房貌','关注量','发表时间','总房价','每平价')
        with open('房优选择.csv','w',encoding='utf-8',newline='') as fp:
            writer = csv.writer(fp)
            writer.writerow(headers)
        spider.run()
    

    文章到这里就结束了!希望大家能多多支持Python(系列)!六个月带大家学会Python,私聊我,可以问关于本文章的问题!以后每天都会发布新的文章,喜欢的点点关注!一个陪伴你学习Python的新青年!不管多忙都会更新下去,一起加油!

    Editor:Lonelyroots

    相关文章

      网友评论

          本文标题:Python(七十一)数据入库

          本文链接:https://www.haomeiwen.com/subject/kznerrtx.html