滚动更新股票数据的python程序，无需注册

作者: micovey | 来源:发表于2019-11-19 20:18 被阅读0次

滚动更新股票数据的python程序，无需注册
Linux下安装Pipenv
2018年企业最合适的工具！ⅤR+小程序，你想免费注册吗？
python数据处理笔记
python数据处理笔记
微信小程序实战（仿小米商城）
Django批量更新数据
QQ小程序正式开放注册，无需邀请码。连接8亿新生代活跃网民
python-01基础
安卓手机运行python程序的软件：Termux

1. 代码目的
设定一定的时间段，滚动下载数据。
例如，当前时间为2019-11-19，当日未收盘，若设置数据期间为180天，则保存2019-5-23至2019-11-18的数据
明日为2019-11-20，当日未收盘，则自动删除2019-5-23数据，加入2019-11-19的数据。
便于之后的量化分析
滚动的目的：避免数据文件过大
2. 前期准备
首先安装python （或者anaconda），但要注意最好不要同时安装python和anacoda，因为anacoda里面包含纯python，容易起冲突。
anacoda网址：https://www.anaconda.com/
若选择纯python，则需要

pip install datetime
pip install pandas
pip install numpy
pip install baostock

若选择anacoda

需要设置环境变量

111111.png

然后只需要

pip install baostock

安装pycharm或者anacoda自带的spyder

3. 为什么选择baostock

Tushare、Baostock、joinquant等均可获得股票日数据
Wind、CSMAR也可以获得（花钱）
但是Tusharepro需要积分、joinquant只有一年试用期
因此本文选择Baostock，无需注册。
http://baostock.com/

4. 代码

import baostock as bs
import pandas as pd
import numpy as np
import datetime
#### 登陆系统 ####
lg = bs.login()
########定义变量#######
now_time=datetime.datetime.now().strftime('%Y-%m-%d')
now_time=datetime.datetime.strptime(now_time,'%Y-%m-%d')
year=int(now_time.year)
print('请输入地址')
global file_place
file_place='D:\\'
fl = 'Quantify\\idcode.csv'
filee=file_place+fl
fff=file_place+'\\Quantify\\daily\\daily'+str(year)+'.csv'
# D:\\Quantify\\idcode.csv
idcode = pd.read_csv(filee,header=None)
if len(idcode) > 10:
    print('读取成功')
else:
    print('读取失败')

idcode = np.array(idcode)
global datalength
datalength=180
#######定义函数########

##滚动删除数据

def drop_date_row(drop_data):
    drop_data['date'] = [datetime.datetime.strptime(x, '%Y/%m/%d') for x in drop_data['date']]
    now_time=datetime.datetime.now().strftime('%Y-%m-%d')
    now_time=datetime.datetime.strptime(now_time,'%Y-%m-%d')
    min_date = drop_data['date'].min()
    drop_date = now_time + datetime.timedelta(days=-datalength)
    if min_date <drop_date:
        drop_data = drop_data.drop(drop_data[drop_data.date < drop_date].index)
        return(drop_data)
    else:

        drop_data=pd.DataFrame()
        return(drop_data )

##滚动获得数据

def get_daily_data(id,start_date_find,end_date_find):
    rs = bs.query_history_k_data_plus(id,        "date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,isST,peTTM,pbMRQ,psTTM,pcfNcfTTM",
        start_date=str(start_date_find), end_date=str(end_date_find),
        frequency="d", adjustflag="3")
    data_list = []
    while (rs.error_code == '0') & rs.next():
        data_list.append(rs.get_row_data())
    result = pd.DataFrame(data_list, columns=rs.fields,index=range(0,len(data_list)))
    result["turn"] = [0 if x == "" else float(x) for x in result["turn"]]
    result["volume"] = [0 if x == "" else float(x) for x in result["volume"]]
    result["close"] = [0 if x == "" else float(x) for x in result["close"]]
    marketvalue=result["volume"] / result["turn"]*result["close"]*100
  #  marketvalue=result.apply(lambda x: x["volume"] / x["turn"]*x["close"]*100, axis=1)
    result["marketvalue"]=marketvalue
#  print( result)
    return(result)
##开始，结束下载日期

def check_download(check_date):
    if len(check_date)==0:
        now_time = datetime.datetime.now().strftime('%Y-%m-%d')
        now_time = datetime.datetime.strptime(now_time, '%Y-%m-%d')
        start_date = now_time + datetime.timedelta(days=-datalength)
        start_date = str(datetime.datetime.strftime(start_date, '%Y-%m-%d'))
    else:
        start_date = check_date['date'].max()
        start_date += datetime.timedelta(days=+1)
        start_date = str(datetime.datetime.strftime(start_date, '%Y-%m-%d'))
    end_date=str(datetime.datetime.now().strftime('%Y-%m-%d'))
    return start_date,end_date

##删除重复项

def drop_duplicates(drop_data):
    n1=len(drop_data)
    drop_data['date'] = [datetime.datetime.strptime(x, '%Y/%m/%d') for x in drop_data['date']]
    drop_data.drop_duplicates(subset=['code','date'],keep='first',inplace=True)
    n2=len(drop_data)
    if n1==n2:
        drop_data = pd.DataFrame()
        return (drop_data)
    else:
        return (drop_data)

#####开始计算###########

######滚动删除数据

drop_data = pd.read_csv(fff)
drop_data = drop_date_row(drop_data)
if len(drop_data)<2:
    print("不需要删除数据")
else:
    print("已删除数据")
    drop_data['date'] = [datetime.datetime.strftime(x, '%Y/%m/%d') for x in drop_data['date']]
    drop_data.to_csv(fff, header=True, index=False)

##查看是否需要更新数据
check_date = pd.read_csv(fff)
check_date['date']=[datetime.datetime.strptime(x,'%Y/%m/%d') for x in check_date['date']]
start_date,end_date=check_download(check_date)
##历史行情数据下载

if datetime.datetime.strptime(start_date,'%Y-%m-%d')<=datetime.datetime.strptime(end_date,'%Y-%m-%d'):
    if datetime.datetime.strptime(end_date,'%Y-%m-%d')==datetime.datetime.strptime(start_date,'%Y-%m-%d') and int(datetime.datetime.now().hour)<18:
        print("不需要更新数据，当日没有结束")
    else:
        for id in idcode:
            id=str(id)
            id=id[2:11]#提取9位代码
            print('=====' + id + '=====')
            result1=get_daily_data(id,start_date,end_date)
            if len(result1) > 0:
                result1['date'] = [datetime.datetime.strptime(x,'%Y-%m-%d') for x in result1['date']]
                result1['quarter']=[int((x.month-1)/3)+1 for x in result1['date']]
                result1['year']=[int(x.year) for x in result1['date']]
                result1['date'] = [datetime.datetime.strftime(x, '%Y/%m/%d') for x in result1['date']]
                result1.to_csv(fff, mode='a', header=False)
            else:
                continue
else:
    print("不需要更新数据，没有开始新交易")

##删除重复项

drop_data = pd.read_csv(fff)
drop_data = drop_duplicates(drop_data)
if len(drop_data)<2:
    print("没有重复项")
else:
    print("已删除重复项")
    drop_data['date'] = [datetime.datetime.strftime(x, '%Y/%m/%d') for x in drop_data['date']]
    drop_data.to_csv(fff, header=True, index=False)
##
bs.logout()