from mysql import connector
from datetime import datetime
from dateutil import parser
import json
import requests
#使用BeautifulSoup,需要这么导入模块
from bs4 import BeautifulSoup
def loda_data(url):
"""
发起请求,获取列表页页面源码
"""
response = requests.get(url)
if response.status_code == 200:
return response.text
def detail_data(html):
#创建一个BeautifulSoup对象
d_bs = BeautifulSoup(html,"html.parser")
#使用css语法取出li标签
content = d_bs.select('div[style="width: 1105px;margin:0 auto"]')[0]
return content
# 获取招标信息
def json_data(url):
"""
解析分页的页面源码数据
"""
html = loda_data(url)
html_bs = BeautifulSoup(html,"html.parser")
#找到列表
list=html_bs.find_all('li')
global newinfo
newsinfo = []
for l in list:
newinfo={}
#标题
newinfo["title"] = l.find('a').get_text()
# 时间
newinfo["ctime"] = parser.parse(l.find('span').get_text()).strftime("%Y-%m-%d %H:%M:%S")
newinfo["gtime"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# url
detail_url = 'http://www.ccgp-beijing.gov.cn/xxgg/sjzfcggg/' + str(l.find('a').attrs['href']).replace("./","")
newinfo["url"] = detail_url
#详情的html页面源码
html = loda_data(detail_url)
#获取详情内容
newinfo["content"] = detail_data(html)
newsinfo.append(newinfo)
return newsinfo
# 检查一个表是否存在
def tableExists(mycursor, name):
stmt = 'SHOW TABLES LIKE "{}"'.format(name)
print(stmt)
mycursor.execute(stmt)
return mycursor.fetchone()
def mysql_data(url):
conn=connector.connect(user='root', password='111111', database='book', use_unicode=True)
cursor=conn.cursor()
if tableExists(cursor,'newinfo'):
print("不建")
else:
print("创建")
creat_sql="create table newinfo(id INT AUTO_INCREMENT PRIMARY KEY,url varchar(255), title varchar(255), ctime datetime,gtime datetime,content text)"
cursor.execute(creat_sql)
print("创建成功")
# 获取json中的数据
news=json_data(url)
# print(news)
num=len(news)
for i in range(0,num):
# print(news[i]["content"])
sql = "insert into newinfo (url,title,ctime,gtime,content) values ('{}','{}','{}','{}','{}')".format(news[i]["url"],news[i]["title"],news[i]["ctime"],news[i]["gtime"],news[i]["content"])
cursor.execute(sql)
print(sql)
# 提交事务:
conn.commit()
cursor.close()
url="http://www.ccgp-beijing.gov.cn/xxgg/sjzfcggg/index_2.html"
print(mysql_data(url))
网友评论