前言
今天,大数据,人工智能已经是目前最火热的名词,而作为实现这两个名词最重要的技术之一,python已经变得越来越重要。学习python,是大势所趋。
使用pip3安装对应的库
pip3 install requests
pip3 install bs4
pip3 install lxml
创建mysql表
CREATE TABLE `sse_ipo` (
`code` varchar(10) DEFAULT NULL COMMENT '股票编码',
`name` varchar(20) DEFAULT NULL COMMENT '股票名称',
`stock_type` int(5) DEFAULT NULL COMMENT '上市板块,0:主板,2:科创板',
`total_initial_num` int(15) DEFAULT NULL COMMENT '初始发行股本总量(万股)',
`total_issued_num` int(15) DEFAULT NULL COMMENT '发行总量(万股)',
`issuance_price` int(15) DEFAULT NULL COMMENT '发行价(元)',
`issuance_price_earnings_ratio` int(10) DEFAULT NULL COMMENT '市盈率',
`offline_circulation` int(15) DEFAULT NULL COMMENT '网下发行量(万股)',
`online_circulation` int(15) DEFAULT NULL COMMENT '网上发行量(万股)',
`online_purchase_limit` int(15) DEFAULT NULL COMMENT '网上申购上限(万股)',
`lot_winning_rate` int(10) DEFAULT NULL COMMENT '中签率',
`offline_issuance_date` varchar(10) DEFAULT NULL COMMENT '网下发行日',
`online_issuance_date` varchar(10) DEFAULT NULL COMMENT '网上发行日',
`payment_start_date` varchar(10) DEFAULT NULL COMMENT '缴款日',
`announce_success_rate_result_date` varchar(10) DEFAULT NULL COMMENT '中签结果公告日'
)ENGINE=InnoDB DEFAULT CHARSET=utf8
-- 添加主键
alter table sse_ipo add primary key(code)
实现一个简单的爬虫
- 使用requests库,获取html的内容或者api接口数据
- 使用bs4的BeautifulSoup对html进行解析
- 使用lxml进行解析
#!/usr/bin/env python3
import requests
import json
from bs4 import BeautifulSoup
from db import DB
# 获取mdn中blob的slice方法页面html,并
url = 'https://developer.mozilla.org/zh-CN/docs/Web/API/Blob/slice'
result = requests.get(url)
result.encoding = 'utf-8'
html = result.text
bs = BeautifulSoup(html, 'lxml')
print(bs.find(id="skip-main"))
for item in bs.find_all("script"):
print(item.get('src'))
# 获取上交所新上市的股票并存入mysql
url = 'http://ipo.sseinfo.com/info/commonQuery.do?isPagination=true&sqlId=COMMON_SSE_IPO_ISSUE_L&pageHelp.pageSize=15&_=1590298105330'
res = requests.get(url)
content = json.loads(res.text)
result = content.get('result')
db = DB()
db.open()
for item in result:
insert_stmt = (
"INSERT INTO sse_ipo (code, name, stock_type, total_initial_num, total_issued_num, offline_issuance_date, online_issuance_date, issuance_price, issuance_price_earnings_ratio, offline_circulation, online_circulation, online_purchase_limit, announce_success_rate_result_date, payment_start_date, lot_winning_rate) "
"VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
)
data = (
item.get('SECURITY_CODE'),
item.get('SECURITY_NAME'),
item.get('STOCK_TYPE'),
item.get('TOTAL_INITIAL_ISSUE'),
item.get('TOTAL_ISSUED'),
item.get('OFFLINE_ISSUANCE_START_DATE'),
item.get('ONLINE_ISSUANCE_DATE'),
item.get('ISSUE_PRICE'),
item.get('ISSUANCE_PRICE_EARNINGS_RATIO'),
item.get('OFFLINE_CIRCULATION'),
item.get('ONLINE_CIRCULATION'),
item.get('ONLINE_PURCHASE_LIMIT'),
item.get('ANNOUNCE_SUCCESS_RATE_RESULT_DATE'),
item.get('PAYMENT_START_DATE'),
item.get('LOT_WINNING_RATE'),
)
db.execute(insert_stmt, data)
db.commit()
db.close()
网友评论