美文网首页
简单python爬虫,爬取基金信息

简单python爬虫,爬取基金信息

作者: sweetMemories | 来源:发表于2017-07-21 10:27 被阅读0次

简单爬虫,爬取基金信息,使用了mysql数据库,代码如下:

import pymysql
from urllib.error import HTTPError
from urllib.request import urlopen
from bs4 import BeautifulSoup
import uuid
import datetime

#建立数据库连接
conn = pymysql.connect(host='127.0.0.1', user='root', passwd='1234', db='mysql',charset='utf8')
cur = conn.cursor()
cur.execute("USE fund")

def getFundNumStr(str):
    for i in range(6-len(str)):
        str = "0"+str;
    return str

#获取数据
def getFundData(url):
    #打开链接
    try:
        html = urlopen(url)
    except HTTPError :
        return None;
    #获取目标数据
    try:
        bsObj = BeautifulSoup(html, "lxml")
        name = bsObj.find("div",{"class":"fundDetail-tit"}).div.get_text()
        name = name[:name.index("(")]
        value = bsObj.find("",{"id":"gz_gsz"}).get_text()
        time = bsObj.find("",{"id":"gz_gztime"}).get_text()
        if(time != '--'):
            time = "20"+time[time.index("(")+1:time.index(")")]
        data = [name,value,time]
    except AttributeError:
        return None
    return data

#如果是新基金,则保存新基金信息
def saveNewFundInfo(code,name):
    cur.execute("SELECT * FROM fund_info WHERE code = %s", (code))
    if cur.rowcount == 0:
        cur.execute("INSERT INTO fund_info (code,name) VALUES (%s, %s)", (code, name))

try:
    for num in range(419,1000000):
        funCode = getFundNumStr(str(num))
        url = "http://fund.eastmoney.com/"+funCode+".html"
        data = getFundData(url)
        if data != None:
            id = str(uuid.uuid1()).replace("-","");
            time = datetime.datetime.now().strftime('%Y-%m-%d')
            saveNewFundInfo(funCode, data[0])
            if(data[1] != '--'):
                cur.execute("INSERT INTO fund_day_data (id,code,data,data_time,create_time,update_time) \
                  VALUES (%s, %s,%s, %s, %s, %s)", (id,funCode, float(data[1]),data[2],time,time))
            cur.connection.commit()
    print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
finally:
    cur.close()
    conn.close()

相关文章

网友评论

      本文标题:简单python爬虫,爬取基金信息

      本文链接:https://www.haomeiwen.com/subject/jxiskxtx.html