1、AKShare简介
AKShare 是基于 Python 的财经数据接口库, 目的是实现对股票、期货、期权、基金、外汇、债券、指数、加密货币等金融产品的基本面数据、实时和历史行情数据、衍生数据从数据采集、数据清洗到数据落地的一套工具, 主要用于学术研究目的。
AKShare 的特点是获取的是相对权威的财经数据网站公布的原始数据, 通过利用原始数据进行各数据源之间的交叉验证, 进而再加工, 从而得出科学的结论.
- 开源数据库API:https://www.akshare.xyz/
2、基本操作与使用
- 安装第三方数据库
pip install akshare
- 查看 金融类 知名美股
# 导入数据库
import akshare as ak
# 美股-知名美股的实时行情数据
stock_us_famous_spot_em_df = ak.stock_us_famous_spot_em(symbol='金融类')
stock_us_famous_spot_em_df
- 查看科技类美股
# 查看科技类相关知名美股
stock_us_famous_spot_em_df = ak.stock_us_famous_spot_em(symbol='科技类')
stock_us_famous_spot_em_df
- 检索得到银行的股票代码
- 查看所有在美股上市的公司,通过此来查找股票代码
stock_code = ak.stock_us_spot_em()
stock_code
- 通过股票代码检索公司名称:就是在一个DataFrame中检索信息
import pandas as pd
code = ['106.BAC','106.BCS','106.BK','106.C','106.CS','106.DB','106.GS','106.JPM','106.MS','106.RY','106.UBS']
company_name = []
for c in code:
index = stock_code[stock_code['代码'] == c].index.tolist()[0]
company_name.append(stock_code.iloc[index].tolist()[1])
company_name
- 在数据集中查看公司的市值
# 总市值(获取的是当天的市值,是实时更新的)
import pandas as pd
company_name = ['美国银行', '巴克莱', '纽约梅隆银行', '花旗集团', '瑞士信贷',
'德意志银行', '高盛', '摩根大通', '摩根士丹利', '加拿大皇家银行', '瑞银集团']
sum_value = {}
for c in company_name:
index = stock_code[stock_code['名称'] == c].index.tolist()[0]
# 存储在字典中,并将市值单位统一为: 亿美元
sum_value[c] = stock_code.iloc[index].tolist()[9]/100000000
# 字典值降序排列
sum_value = sorted(sum_value.items(), key=lambda d: d[1], reverse=True)
sum_value
- 绘制市值柱状图
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
plt.rcParams['font.sans-serif'] = ['Times New Roman']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['figure.figsize'] = (25, 10) # Change the size of plots
plt.rcParams['font.size'] = '20' # 设置字体大小
import warnings
warnings.filterwarnings('ignore')
# plt.figure(1, figsize=(26, 13), dpi=200)
df.plot.bar('company_name', ['company_value','company_jobs']) # 指定xy轴
plt.savefig('分布图.png', dpi=500, bbox_inches='tight') # 解决图片不清晰,不完整的问题
plt.show()
3、股票数据下载
关于股价的前复权和后复权:
“总而言之,前复权与后复权一样,都是对股价和成交量进行权息修复,按照股票的实际涨跌绘制股价走势图,并把成交量调整为相同的股本口径,以避免除权给投资者带来的误导。”
- 获得美国银行在2012年1月-2016年6月的股票数据
# Bank of America(investment banking)美国银行(代码:106.BAC)
# 默认 adjust="", 则返回未复权的数据; adjust="qfq" 则返回前复权的数据, adjust="hfq" 则返回后复权的数据,
df = ak.stock_us_hist(symbol='106.BAC', start_date="20120101", end_date="20160630", adjust="qfq")
df
- 绘制股价涨跌幅的变化曲线
df['涨跌幅'].plot(grid = True).axhline(y = 0, color = "black", lw = 2)
df['涨跌幅'].plot().axhline(y = 5, color = "red", lw = 2)
df['涨跌幅'].plot(grid = True).axhline(y = -5, color = "blue", lw = 2)
- 下载每个公司的数据集
def download_data(code, path, company_name):
for i in range(len(code)):
# Bank of America(investment banking) 美国银行(代码:106.BAC)
# 时间与出组车数据集对齐,由于之后分析需要,股票数据相对延后半年
df = ak.stock_us_hist(symbol = code[i], start_date="20120101", end_date="20160630", adjust="qfq")
# 保存数据
df.to_csv(path + company_name[i] + '.csv', encoding="utf_8_sig")
print("已完成数据的下载并保存!")
# 股票代码
code = ['106.BAC','106.BCS','106.BK','106.C','106.CS','106.DB','106.GS','106.JPM','106.MS','106.RY','106.UBS']
company_name = ['Citigroup', 'JPMorgan Chase' ,'Bank of America','BNY Mellon', 'Barclays' ,'Goldman Sachs' ,
'Morgan Stanley','UBS', 'RBC Capital Markets', 'Deutsche Bank' ,'Credit Suisse']
# 存储路径:相对路径
path = 'C:\\Users\\JunLiu\\Desktop\\数据分析\\'
download_data(code, path, company_name)
四、股票数据可视化
- python数据分析股票 https://blog.csdn.net/wretch_of_Felix/article/details/82773457
- 数据可视化:https://blog.csdn.net/s1164548515/article/details/89451185
- Python数据分析:股票数据分析案例 https://blog.csdn.net/weixin_41792682/article/details/89683880 (有时间预测)
- 可用数据抽取
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
# Control the default size of figures in this Jupyter notebook
%pylab inline
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.rc('font',family='Times New Roman')
import warnings
warnings.filterwarnings('ignore')
company_name = ['Citigroup', 'JPMorgan Chase' ,'Bank of America','BNY Mellon', 'Barclays' ,'Goldman Sachs' ,
'Morgan Stanley','UBS', 'RBC Capital Markets', 'Deutsche Bank' ,'Credit Suisse']
# 同时读取多个数据集
path = 'C:\\Users\\Desktop\\数据分析\\'
Citigroup_stock = pd.read_csv(path + 'Citigroup.csv')
JPMorgan_Chase_stock = pd.read_csv(path + 'JPMorgan Chase.csv')
Bank_of_America_stock = pd.read_csv(path + 'Bank of America.csv')
BNY_Mellon_stock = pd.read_csv(path + 'BNY Mellon.csv')
Barclays_stock = pd.read_csv(path + 'Barclays.csv')
Goldman_Sachs_stock = pd.read_csv(path + 'Goldman Sachs.csv')
Morgan_Stanley_stock = pd.read_csv(path + 'Morgan Stanley.csv')
UBS_stock = pd.read_csv(path + 'UBS.csv')
RBC_Capital_Markets_stock = pd.read_csv(path + 'RBC Capital Markets.csv')
Deutsche_Bank_stock = pd.read_csv(path + 'Deutsche Bank.csv')
Credit_Suisse_stock = pd.read_csv(path + 'Credit Suisse.csv')
stocks = pd.DataFrame({"Date": Citigroup_stock["日期"],
"Citigroup": Citigroup_stock["收盘"],
"JPMorgan_Chase": JPMorgan_Chase_stock["收盘"],
"Bank_of_America": Bank_of_America_stock["收盘"],
"Barclays": Barclays_stock["收盘"],
"BNY Mellon":BNY_Mellon_stock["收盘"],
"Goldman_Sachs": Goldman_Sachs_stock["收盘"],
"Morgan_Stanley": Morgan_Stanley_stock["收盘"],
"UBS": UBS_stock["收盘"],
"RBC_Capital_Markets": RBC_Capital_Markets_stock["收盘"],
"Deutsche_Bank": Deutsche_Bank_stock["收盘"],
"Credit_Suisse": Credit_Suisse_stock["收盘"]
})
stocks.head()
- 数据可视化
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import matplotlib.dates as mdates
from dateutil import parser
company_name = ['Citigroup', 'JPMorgan Chase' ,'Bank of America','BNY Mellon', 'Barclays' ,'Goldman Sachs' ,
'Morgan Stanley','UBS', 'RBC Capital Markets', 'Deutsche Bank' ,'Credit Suisse']
for i in range(len(company_name)):
data_date_str = stocks.iloc[:, 0]
data_date = list(map(parser.parse, data_date_str))
data_Citigroup = stocks.iloc[:, i+1]
# plt.figure(figsize=(13,7), dpi=80)
pylab.rcParams['figure.figsize'] = (25, 10) # Change the size of plots
plt.subplot(1,1,1)
# 绘制曲线
# plt.plot(data_date, data_Citigroup, color='blue',label='Citigroup')
plt.plot(data_date, data_Citigroup, label = company_name[i])
# 显示图示
font1 = {'weight': 'normal', 'size': 15}
plt.legend(loc="upper right", prop = font1)
# 显示网格线
plt.grid(True)
# 配置横坐标
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d/%Y'))
plt.gca().xaxis.set_major_locator(mdates.YearLocator()) # 按年显示,按月/日显示的话,将MonthLocator()改成DayLocator()
plt.gcf().autofmt_xdate() # 自动旋转日期标记
plt.title('company stock')
# plt.savefig('temper.png')
font2 = {'family': 'Times New Roman',
'weight': 'normal',
'size': 15,
}
plt.xlabel('Date', font2)
plt.ylabel('Value', font2)
plt.show()
- 绘制Morgan_Stanley公司股票数据的20日和60日均线
from dateutil import parser
# 分别绘制20日(一个月)和60日(一个季度)均线,
stocks["Morgan_Stanley_20d"] = np.round(stocks["Morgan_Stanley"].rolling(window = 20, center = False).mean(), 2)
stocks["Morgan_Stanley_60d"] = np.round(stocks["Morgan_Stanley"].rolling(window = 60, center = False).mean(), 2)
pylab.rcParams['figure.figsize'] = (25, 10) # Change the size of plots
data_date_str = stocks.iloc[:, 0]
data_date = list(map(parser.parse, data_date_str))
# 配置横坐标
stocks["Morgan_Stanley"].plot(grid = True)
stocks["Morgan_Stanley_20d"].plot(grid = True)
stocks["Morgan_Stanley_60d"].plot(grid = True)
font1 = {'weight': 'normal', 'size': 15}
plt.legend(loc="upper right", prop = font1)
font2 = {'family': 'Times New Roman',
'weight': 'normal',
'size': 15,
}
plt.xlabel('Date', font2)
plt.ylabel('Value', font2)
plt.show()
- 由于Morgan_Stanley 的数据趋势差异明显,删除Morgan_Stanley 后进行绘制
stocks.drop(['Morgan_Stanley'], axis = 1, inplace = True)
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import matplotlib.dates as mdates
from dateutil import parser
company_name = ['Citigroup', 'JPMorgan Chase' ,'Bank of America','BNY Mellon', 'Barclays' ,'Goldman Sachs' ,
'UBS', 'RBC Capital Markets', 'Deutsche Bank' ,'Credit Suisse']
for i in range(len(company_name)):
data_date_str = stocks.iloc[:, 0]
data_date = list(map(parser.parse, data_date_str))
data_company = stocks.iloc[:, i+1]
# plt.figure(figsize=(13,7), dpi=80)
pylab.rcParams['figure.figsize'] = (25, 10) # Change the size of plots
plt.subplot(1,1,1)
# 绘制曲线
plt.plot(data_date, data_company, label = company_name[i])
# 显示图示
font1 = {'weight': 'normal', 'size': 15}
plt.legend(loc="upper right", prop = font1)
# 显示网格线
plt.grid(True)
# 配置横坐标
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d/%Y'))
plt.gca().xaxis.set_major_locator(mdates.YearLocator()) # 按年显示,按月/日显示的话,将MonthLocator()改成DayLocator()
#设置主刻度 即长刻度线
# plt.gca().xaxis.set_major_locator(monthdays)
#设置副刻度 即短刻度线
# plt.gca().xaxis.set_minor_locator(monthdays)
plt.gcf().autofmt_xdate() # 自动旋转日期标记
plt.title('The stock of company', fontsize = 15)
# plt.savefig('temper.png')
font2 = {'family': 'Times New Roman',
'weight': 'normal',
'size': 15,
}
plt.xlabel('Date', font2)
plt.ylabel('Value', font2)
plt.show()
- 绘制20日滑动窗口均线数据
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import matplotlib.dates as mdates
from dateutil import parser
company_name = ['Citigroup', 'JPMorgan Chase' ,'Bank of America','BNY Mellon', 'Barclays' ,'Goldman Sachs' ,
'UBS', 'RBC Capital Markets', 'Deutsche Bank' ,'Credit Suisse']
for i in range(len(company_name)):
data_date_str = stocks.iloc[:, 0]
data_date = list(map(parser.parse, data_date_str))
data_company = stocks.iloc[:, i+1]
stocks["value_20d"] = np.round(data_company.rolling(window = 20, center = False).mean(), 2)
data_20 = stocks["value_20d"]
# plt.figure(figsize=(13,7), dpi=80)
pylab.rcParams['figure.figsize'] = (25, 10) # Change the size of plots
plt.subplot(1,1,1)
# 绘制曲线
plt.plot(data_date, data_20, label = company_name[i])
# 显示图示
font1 = {'weight': 'normal', 'size': 15}
plt.legend(loc="upper right", prop = font1)
# 显示网格线
plt.grid(True)
# 配置横坐标
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d/%Y'))
plt.gca().xaxis.set_major_locator(mdates.YearLocator()) # 按年显示,按月/日显示的话,将MonthLocator()改成DayLocator()
#设置主刻度 即长刻度线
# plt.gca().xaxis.set_major_locator(monthdays)
#设置副刻度 即短刻度线
# plt.gca().xaxis.set_minor_locator(monthdays)
plt.gcf().autofmt_xdate() # 自动旋转日期标记
plt.title('The stock of company', fontsize = 15)
# plt.savefig('temper.png')
font2 = {'family': 'Times New Roman',
'weight': 'normal',
'size': 15,
}
plt.xlabel('Date', font2)
plt.ylabel('Value', font2)
plt.show()
网友评论