1.首先导入一些相关模块
import requests
from lxml import etree
from xlrd import open_workbook
from xlutils.copy import copy
import pymysql.cursors
2.获取页面text内容
page = requests.get(url)
result = html.fromstring(page.text)
3.使用xpath获取标签以及标签内容
# 获取页面中的a标签内容
tmps = result.xpath("//a/text()")
# 获取页面中的a标签(有title属性)的href值
tmps = result.xpath("//a[@title]/@href")
# 获取页面中的a标签(title值为‘末页’)的href值
pages = result.xpath("//a[contains(@title,'末页')]/@href")
4.读写Excel文件
rexcel = open_workbook(file_path)
excel = copy(rexcel)
table = excel.get_sheet(0)
table.write(row, cell, content)
excel.save(file_path)
5.录入数据库操作
# 连接MySQL数据库
connection = pymysql.connect(host='127.0.0.1', port=3306, user='root', password='198876', db='guest',charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor)
# 通过cursor创建游标
cursor = connection.cursor()
# 创建sql 语句,并执行
sql = "INSERT INTO `users` (`email`, `password`) VALUES ('huzhiheng@itest.info', '123456')"
cursor.execute(sql)
# 提交SQL
connection.commit()
# 关闭连接
connection.close()
网友评论