前面我们有说到在本地安装数据库,安装完成后我们怎么创建自己的数据库呢?
平时大家常用的都是可视化的工具navicat,下面说下通过python创建数据库
# python + pymysql 创建数据库
# 注意这里的database一定要指定,否则报错
import pymysql
# 创建连接
conn = pymysql.connect(host='localhost',user='root',password='123456',charset='utf8mb4', database='db_name')
# 创建游标
cursor = conn.cursor()
# 创建数据库的sql(如果数据库存在就不创建,防止异常)
sql = "CREATE DATABASE IF NOT EXISTS db_name"
# 执行创建数据库的sql
cursor.execute(sql)
# 创建表
sql_2 = '''CREATE TABLE `employee` (
`id` INT NOT NULL AUTO_INCREMENT,
`topic` INT ,
`ptid` INT NOT NULL,
`level` INT NOT NULL,
`time` TIME,
`consume` INT NOT NULL,
`err` INT NOT NULL,
`points` INT NOT NULL,
`gid` INT NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
'''
cursor.execute(sql_2)
然后我们打开Navicat
![](https://img.haomeiwen.com/i4624177/7177ffd33535c235.png)
其中db_name就是我们创建的数据库。
这是我另一篇文章使用的是sqlite3
python 存储数据到数据库
下面是我抓取豆瓣数据,并存储Excel和数据库
# -*- coding: UTF-8 -*-
# 1. 爬取网页
# 1. 要设置header,用户代理,告诉豆瓣服务器,我是什么类型的机器,浏览器(本质上是告诉浏览器,我们可以接收什么类型的文件内容)
# 2. 模拟浏览器头部信息,像豆瓣服务器发送消息
# 2. 解析数据
# 3. 保存数据
import requests
from bs4 import BeautifulSoup
import re
import xlwt
import sqlite3
def main():
dataList = getData()
print(len(dataList))
# saveDataToExcel(dataList)
saveDataToDB(dataList, 'movie.db')
# save数据到db
def saveDataToDB(dataList, path):
initDB(path)
print("初始化表成功!")
conn = sqlite3.connect(path)
c = conn.cursor()
for data in dataList:
c.execute('''
INSERT INTO movie (movieTitle, imgSrc, bdDetail,movieDetail,rating_num,m_peoplecount)
VALUES (%s, %s, %s, %s, %s, %s)''' % (f'"{data["movieTitle"]}"', f'"{data["imgSrc"]}"', f'"{data["bdDetail"]}"', f'"{data["movieDetail"]}"', f'"{data["rating_num"]}"', f'"{data["m_peoplecount"]}"'))
conn.commit()
c.close()
conn.close()
# 初始化数据库
def initDB(path):
sql = '''
CREATE TABLE IF NOT EXISTS movie
(
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
movieTitle text ,
imgSrc text ,
bdDetail text ,
movieDetail text ,
m_peoplecount text ,
rating_num varcher
)
'''
conn = sqlite3.connect(path)
print("成功打开表")
c = conn.cursor()
c.execute(sql)
conn.commit()
conn.close()
# save数据到excel
def saveDataToExcel(dataList):
workbook = xlwt.Workbook(encoding='utf-8')
bookSheet = workbook.add_sheet("sheet1", cell_overwrite_ok=True)
header = [{"movieTitle": "电影名称"}, {"imgSrc": "电影封面"}, {"bdDetail": "电影详情"}, {
"movieDetail": "电影地址"}, {"rating_num": "评分"}, {"m_peoplecount": "评价人数"}]
for i in range(len(header)):
bookSheet.write(0, i, list(header[i].values())[0])
for j in range(len(dataList)):
item = dataList[j]
for i in range(len(item)):
bookSheet.write(j+1, i, item[list(header[i].keys())[0]])
workbook.save('test.xls')
# 存储乘法口诀表
def saveDataToExcel1():
workbook = xlwt.Workbook(encoding='utf-8')
bookSheet = workbook.add_sheet("sheet1", cell_overwrite_ok=True)
for i in range(0, 9):
for j in range(0, i+1):
bookSheet.write(i, j, f'{i+1} * {j+1} = {(i+1)*(j+1)}')
workbook.save('test_xlwt.xls')
# 爬取网页
def getData():
url = "https://movie.douban.com/top250?start="
dataList = []
for i in range(0, 10):
currentUrl = url + str(i)
htmlStr = requestUrl(currentUrl)
bs = BeautifulSoup(htmlStr, "html.parser")
item_list = bs.find_all("div", class_="item")
for item in item_list:
dataDic = {}
pic = item.find("img")
imgSrc = pic["src"]
dataDic["imgSrc"] = imgSrc
info = item.find("div", class_="info")
movieDetail = info.div.a.get("href")
dataDic["movieDetail"] = movieDetail
movieTitle = info.div.a.get_text()
dataDic["movieTitle"] = movieTitle.strip()
# 简介
bd = item.find("div", class_="bd")
bdDetail = bd.p.text.strip()
dataDic["bdDetail"] = bdDetail
# 评分
star = item.find("div", class_="star")
rating_num = star.find("span", class_="rating_num").string
# print(rating_num)
dataDic["rating_num"] = rating_num
# 多少人评价
m_span = star.findAll('span')
m_peoplecount = m_span[3].string
dataDic["m_peoplecount"] = m_peoplecount
dataList.append(dataDic)
return dataList
# 进行网络请求
def requestUrl(url):
header = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.55 Safari/537.36"}
response = requests.get(url, headers=header)
response.encoding = 'utf-8'
return response.text
if __name__ == "__main__":
main()
网友评论