美文网首页
Python(七十一)数据入库

Python(七十一)数据入库

作者: Lonelyroots | 来源:发表于2022-03-02 22:32 被阅读0次

10_数据入库/01_MySQL查找.py:

"""

    创建UTF-8的数据库
        CREATE DATABASE 数据库名字 DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;
        CREATE DATABASE spider DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;

"""
import pymysql

host = "localhost"
port = 8001
db = "spider"
user = "admin"
password = "qwe123"

conn = pymysql.connect(host=host, port=port, db=db, user=user, password=password)
# print(conn)     # 打印 <pymysql.connections.Connection object at 0x0000019F12DA1550>

# cursor = conn.cursor()      # 获取游标
cursor = conn.cursor(pymysql.cursors.DictCursor)      # 不加pymysql.cursors.DictCursor返回元组,加了返回字典
cursor.execute("SELECT * FROM Students")        # 执行语句

print(cursor.fetchone())    # 查找一个
# print(cursor.fetchall())        # 查找所有

cursor.close()      # 先关闭游标
conn.close()        # 再关闭链接

10_数据入库/02_MySQL插入数据.py:

"""
    插入一条:insert into Students (name,age) values ('贾克斯',40);
    插入多条:insert into Students (name,age) values ('贾克斯',40),('贾克斯',40),('贾克斯',40);
"""
import pymysql

host = "localhost"
port = 8001
db = "spider"
user = "admin"
password = "qwe123"

conn = pymysql.connect(host=host, port=port, db=db, user=user, password=password)
cursor = conn.cursor()      # 获取游标

cursor.execute("insert into Students (name,age) values ('贾克斯',40);")        # 执行语句

conn.commit()       # 确认提交,注意!!!

cursor.close()      # 先关闭游标
conn.close()        # 再关闭链接

10_数据入库/03_MongoDB插入数据.py:

"""
    pip install pymongo

    mongodb 默认端口 27017

    在 ubuntu 下安装以及开启远程访问:
        1. sudo vi /etc/mongodb.conf
            将 bind_ip = 127.0.0.1 修改为 bind_ip = 0.0.0.0
        2. /etc/init.d/mongodb restart 重启服务

"""
from pymongo import MongoClient

conn = MongoClient('localhost',8881)
db = conn.students      # 如果没有这个数据库,那么创建
my_set = db.words       # 如果没有这个表(集合),那么创建

# data = [{'name':'雷霆嘎巴2','age':18},{'name':'马尔扎哈2','age':18}]
data = [{'name':'雷霆嘎巴2','age':[1,2,3,4]}]
my_set.insert_many(data)        # 添加数据

10_数据入库/04_MongoDB查找数据.py:

from pymongo import MongoClient

conn = MongoClient('localhost',8881)
db = conn.students      # 如果没有这个数据库,那么创建
my_set = db.words       # 如果没有这个表(集合),那么创建

# print(my_set.find())        # 打印 <pymongo.cursor.Cursor object at 0x000001D4924A2908>
for data in my_set.find():
    print(data)
    print(data['age'])

10_数据入库/05_爬取双色球历史数据.py:

import pymysql
from pymongo import MongoClient
from requests_html import HTMLSession

class Spider:
    def __init__(self):
        self.url = "https://datachart.500.com/ssq/history/newinc/history.php?start=19000&end=21018"
        self.session = HTMLSession()

        # 只要连接一次,千万不要放到循环里!!!!!!
        # MongoDB连接
        conn = MongoClient('localhost',8881)
        db = conn['dual_colored_ball']  # 如果没有这个数据库,那么创建
        self.my_set = db['words']  # 如果没有这个表(集合),那么创建

        # MySQL连接
        host = "localhost"
        port = 8001
        db = "spider"
        user = "admin"
        password = "qwe123"
        self.conn = pymysql.connect(host=host, port=port, db=db, user=user, password=password)
        self.cursor = self.conn.cursor()  # 获取游标


    def parse(self):
        response = self.session.get(url=self.url)
        for tr in response.html.xpath('//tbody[@id="tdata"]/tr'):
            number = tr.xpath('//td[1]/text()')[0]     # 期号
            red = tr.xpath('//td[2]/text()|//td[3]/text()|//td[4]/text()|//td[5]/text()|//td[6]/text()|//td[7]/text()')       # 红球
            blue = tr.xpath('//td[8]/text()')[0]      # 蓝球
            prizePool = tr.xpath('//td[10]/text()')[0]       # 奖池奖金(元)
            FirstPrize = tr.xpath('//td[11]/text()|//td[12]/text()')       # 一等奖
            SecondPrize = tr.xpath('//td[13]/text()|//td[14]/text()')       # 二等奖
            Total_bet = tr.xpath('//td[15]/text()')[0]       # 投注总金额
            Date = tr.xpath('//td[16]/text()')[0]     # 开奖日期
            data = (number,red,blue,prizePool,FirstPrize,SecondPrize,Total_bet,Date)
            # self.saveMongoDB(data)
            self.saveMySQL(data)
            print(data)     # 如:打印['19077'] ['09', '11', '13', '18', '21', '22'] ['15'] ['928,983,242'] ['1', '10,000,000'] ['118', '221,011'] ['331,156,004'] ['2019-07-04']

    def saveMySQL(self,data):
            self.cursor.execute("insert into dual_colored_ball values ('%s','%s','%s','%s','%s','%s','%s','%s');"%(
                int(data[0]),
                '-'.join(data[1]),      # 拼接列表
                data[2],
                data[3],
                '-'.join(data[4]),
                '-'.join(data[5]),
                data[6],
                data[7],
                  ))
            self.conn.commit()       # 确认提交,注意!!!

    def saveMongoDB(self,data):
        # insert_many插入的是列表,所以需要在字典外加个[]
        self.my_set.insert_many([{
            "number": data[0],
            "red": data[1],
            "blue": data[2],
            "prizePool": data[3],
            "FirstPrize": data[4],
            "SecondPrize": data[5],
            "Total_bet": data[6],
            "Date": data[7],
        }])        # 添加数据

    def run(self):
        self.parse()
        self.cursor.close()
        self.conn.close()

if __name__ == '__main__':
    spider = Spider()
    spider.run()

10_数据入库/06_链家网.py:

from requests_html import HTMLSession
import pymysql
from pymongo import MongoClient
import re
import csv

class Spider:
    def __init__(self):
        self.url = "https://cs.lianjia.com/ershoufang/"
        self.session = HTMLSession()

        # 只要连接一次,千万不要放到循环里!!!!!!
        # MongoDB连接
        conn = MongoClient('localhost',8881)
        db = conn['HOME_LINK_net']
        self.my_set = db['house_datas']

        # MySQL连接
        host = "localhost"
        port = 8001
        db = "spider"
        user = "admin"
        password = "qwe123"
        self.conn = pymysql.connect(host=host, port=port, db=db, user=user, password=password)
        self.cursor = self.conn.cursor()  # 获取游标

    def parse(self):
        response = self.session.get(url=self.url)
        for div in response.html.xpath('//div[@class="info clear"]'):
            title = div.xpath('//div[@class="title"]/a/text()')[0]      # 标题
            position_Small = div.xpath('//div[@class="positionInfo"]/a[1]/text()')[0].strip()       # 打印 和美星城
            position_Big = div.xpath('//div[@class="positionInfo"]/a[2]/text()')[0]
            position = '{}-{}'.format(position_Small, position_Big)     # 打印和美星城-暮云
            house = div.xpath('//div[@class="houseInfo"]/text()')[0]
            follow = div.xpath('//div[@class="followInfo"]/text()')[0]      # 打印 0人关注 / 7天以前发布
            followinfo  = follow.split('/')     # 打印 ['0人关注 ', ' 7天以前发布']
            amount_of_attention = followinfo[0]
            release_time = followinfo[1]
            """
            难爬部分:
                <div class="followInfo">
                <span class="starIcon"></span > 
                "0人关注 / 7天以前发布"
                </div >
            """
            house_price = div.xpath('//div[@class="totalPrice totalPrice2"]/span/text()|//div[@class="totalPrice totalPrice2"]/i[2]/text()')        # 打印 ['121', '万']
            house_price = house_price[0]+house_price[1]     # 打印 121万
            per_yuan = div.xpath('//div[@class="unitPrice"]/span/text()')[0]
            data = (title,position,house,amount_of_attention,release_time,house_price,per_yuan)

            # CSV写入
            # (a:附加写方式打开,不可读;a+:附加读写方式打开)
            with open('房优选择.csv', 'a+', encoding='utf-8', newline='') as fp:
                writer = csv.writer(fp)
                writer.writerow(data)

            # # MongoDB写入
            # self.saveMongoDB(data)

            # # MySQL写入
            # self.saveMySQL(data)

            print(data)

    def saveMySQL(self,data):
        # 记得先新建数据表
        self.cursor.execute("insert into house_datas values ('%s','%s','%s','%s','%s','%s','%s');" % (
            data[0],
            data[1],
            data[2],
            data[3],
            data[4],
            data[5],
            data[6],
        ))
        self.conn.commit()  # 确认提交,注意!!!

    def saveMongoDB(self,data):
        # insert_many插入的是列表,所以需要在字典外加个[]
        self.my_set.insert_many([{
            "title": data[0],
            "position": data[1],
            "house": data[2],
            "amount_of_attention": data[3],
            "release_time": data[4],
            "house_price": ''.join(data[5]),
            "per_yuan": data[6],
        }])        # 添加数据

    def run(self):
        self.parse()
        self.cursor.close()
        self.conn.close()

if __name__ == '__main__':
    spider = Spider()
    headers = ('文章标题','地点','房貌','关注量','发表时间','总房价','每平价')
    with open('房优选择.csv','w',encoding='utf-8',newline='') as fp:
        writer = csv.writer(fp)
        writer.writerow(headers)
    spider.run()

文章到这里就结束了!希望大家能多多支持Python(系列)!六个月带大家学会Python,私聊我,可以问关于本文章的问题!以后每天都会发布新的文章,喜欢的点点关注!一个陪伴你学习Python的新青年!不管多忙都会更新下去,一起加油!

Editor:Lonelyroots

相关文章

网友评论

      本文标题:Python(七十一)数据入库

      本文链接:https://www.haomeiwen.com/subject/kznerrtx.html