美文网首页
美股部分银行股票数据分析

美股部分银行股票数据分析

作者: ShowMeCoding | 来源:发表于2022-06-01 16:32 被阅读0次

1、AKShare简介

AKShare 是基于 Python 的财经数据接口库, 目的是实现对股票、期货、期权、基金、外汇、债券、指数、加密货币等金融产品的基本面数据、实时和历史行情数据、衍生数据从数据采集、数据清洗到数据落地的一套工具, 主要用于学术研究目的。
AKShare 的特点是获取的是相对权威的财经数据网站公布的原始数据, 通过利用原始数据进行各数据源之间的交叉验证, 进而再加工, 从而得出科学的结论.

2、基本操作与使用

  • 安装第三方数据库
pip install akshare
  • 查看 金融类 知名美股
# 导入数据库
import akshare as ak

# 美股-知名美股的实时行情数据
stock_us_famous_spot_em_df = ak.stock_us_famous_spot_em(symbol='金融类')
stock_us_famous_spot_em_df
  • 查看科技类美股
# 查看科技类相关知名美股
stock_us_famous_spot_em_df = ak.stock_us_famous_spot_em(symbol='科技类')
stock_us_famous_spot_em_df
  • 检索得到银行的股票代码
  • 查看所有在美股上市的公司,通过此来查找股票代码
stock_code = ak.stock_us_spot_em()
stock_code
  • 通过股票代码检索公司名称:就是在一个DataFrame中检索信息
import pandas as pd
code = ['106.BAC','106.BCS','106.BK','106.C','106.CS','106.DB','106.GS','106.JPM','106.MS','106.RY','106.UBS']
company_name = []
for c in code:
    index = stock_code[stock_code['代码'] == c].index.tolist()[0]
    company_name.append(stock_code.iloc[index].tolist()[1])
company_name
  • 在数据集中查看公司的市值
# 总市值(获取的是当天的市值,是实时更新的)
import pandas as pd
company_name = ['美国银行', '巴克莱', '纽约梅隆银行', '花旗集团', '瑞士信贷',
                '德意志银行', '高盛', '摩根大通', '摩根士丹利', '加拿大皇家银行', '瑞银集团']
sum_value = {}
for c in company_name:
    index = stock_code[stock_code['名称'] == c].index.tolist()[0]
    # 存储在字典中,并将市值单位统一为: 亿美元
    sum_value[c] = stock_code.iloc[index].tolist()[9]/100000000
    
# 字典值降序排列
sum_value = sorted(sum_value.items(), key=lambda d: d[1], reverse=True)
sum_value
  • 绘制市值柱状图
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
plt.rcParams['font.sans-serif'] = ['Times New Roman']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['figure.figsize'] = (25, 10)   # Change the size of plots
plt.rcParams['font.size'] = '20' # 设置字体大小 
import warnings
warnings.filterwarnings('ignore')

# plt.figure(1, figsize=(26, 13), dpi=200)
df.plot.bar('company_name', ['company_value','company_jobs']) # 指定xy轴

plt.savefig('分布图.png', dpi=500, bbox_inches='tight') # 解决图片不清晰,不完整的问题
plt.show()

3、股票数据下载

关于股价的前复权和后复权:
“总而言之,前复权与后复权一样,都是对股价和成交量进行权息修复,按照股票的实际涨跌绘制股价走势图,并把成交量调整为相同的股本口径,以避免除权给投资者带来的误导。”

  • 获得美国银行在2012年1月-2016年6月的股票数据
# Bank of America(investment banking)美国银行(代码:106.BAC)
# 默认 adjust="", 则返回未复权的数据; adjust="qfq" 则返回前复权的数据, adjust="hfq" 则返回后复权的数据,
df = ak.stock_us_hist(symbol='106.BAC', start_date="20120101", end_date="20160630", adjust="qfq")
df
  • 绘制股价涨跌幅的变化曲线
df['涨跌幅'].plot(grid = True).axhline(y = 0, color = "black", lw = 2)
df['涨跌幅'].plot().axhline(y = 5, color = "red", lw = 2)
df['涨跌幅'].plot(grid = True).axhline(y = -5, color = "blue", lw = 2)
  • 下载每个公司的数据集
def download_data(code, path, company_name):
    for i in range(len(code)):
        # Bank of America(investment banking) 美国银行(代码:106.BAC)
        # 时间与出组车数据集对齐,由于之后分析需要,股票数据相对延后半年
        df = ak.stock_us_hist(symbol = code[i], start_date="20120101", end_date="20160630", adjust="qfq")
        # 保存数据
        df.to_csv(path + company_name[i] + '.csv', encoding="utf_8_sig")
    print("已完成数据的下载并保存!")

# 股票代码
code = ['106.BAC','106.BCS','106.BK','106.C','106.CS','106.DB','106.GS','106.JPM','106.MS','106.RY','106.UBS']
company_name = ['Citigroup', 'JPMorgan Chase' ,'Bank of America','BNY Mellon', 'Barclays' ,'Goldman Sachs' ,
                'Morgan Stanley','UBS', 'RBC Capital Markets', 'Deutsche Bank' ,'Credit Suisse']
# 存储路径:相对路径
path = 'C:\\Users\\JunLiu\\Desktop\\数据分析\\'
download_data(code, path, company_name)

四、股票数据可视化

  • 可用数据抽取
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# Control the default size of figures in this Jupyter notebook
%pylab inline
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.rc('font',family='Times New Roman')

import warnings
warnings.filterwarnings('ignore')
company_name = ['Citigroup', 'JPMorgan Chase' ,'Bank of America','BNY Mellon', 'Barclays' ,'Goldman Sachs' ,
                'Morgan Stanley','UBS', 'RBC Capital Markets', 'Deutsche Bank' ,'Credit Suisse']

# 同时读取多个数据集
path = 'C:\\Users\\Desktop\\数据分析\\'
Citigroup_stock = pd.read_csv(path + 'Citigroup.csv')
JPMorgan_Chase_stock = pd.read_csv(path + 'JPMorgan Chase.csv')
Bank_of_America_stock = pd.read_csv(path + 'Bank of America.csv')
BNY_Mellon_stock = pd.read_csv(path + 'BNY Mellon.csv')
Barclays_stock = pd.read_csv(path + 'Barclays.csv')
Goldman_Sachs_stock = pd.read_csv(path + 'Goldman Sachs.csv')
Morgan_Stanley_stock = pd.read_csv(path + 'Morgan Stanley.csv')
UBS_stock = pd.read_csv(path + 'UBS.csv')
RBC_Capital_Markets_stock = pd.read_csv(path + 'RBC Capital Markets.csv')
Deutsche_Bank_stock = pd.read_csv(path + 'Deutsche Bank.csv')
Credit_Suisse_stock = pd.read_csv(path + 'Credit Suisse.csv')

stocks = pd.DataFrame({"Date": Citigroup_stock["日期"],
                       "Citigroup": Citigroup_stock["收盘"],
                       "JPMorgan_Chase": JPMorgan_Chase_stock["收盘"],
                       "Bank_of_America": Bank_of_America_stock["收盘"],
                       "Barclays": Barclays_stock["收盘"],
                       "BNY Mellon":BNY_Mellon_stock["收盘"],
                       "Goldman_Sachs": Goldman_Sachs_stock["收盘"],
                       "Morgan_Stanley": Morgan_Stanley_stock["收盘"],
                       "UBS": UBS_stock["收盘"],
                       "RBC_Capital_Markets": RBC_Capital_Markets_stock["收盘"],
                       "Deutsche_Bank": Deutsche_Bank_stock["收盘"],
                       "Credit_Suisse": Credit_Suisse_stock["收盘"]
                      })
stocks.head()
  • 数据可视化
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import matplotlib.dates as mdates
from dateutil import parser

company_name = ['Citigroup', 'JPMorgan Chase' ,'Bank of America','BNY Mellon', 'Barclays' ,'Goldman Sachs' ,
                'Morgan Stanley','UBS', 'RBC Capital Markets', 'Deutsche Bank' ,'Credit Suisse']

for i in range(len(company_name)):
    data_date_str = stocks.iloc[:, 0]
    data_date = list(map(parser.parse, data_date_str))
    data_Citigroup = stocks.iloc[:, i+1]

    # plt.figure(figsize=(13,7), dpi=80)
    pylab.rcParams['figure.figsize'] = (25, 10)   # Change the size of plots
    plt.subplot(1,1,1)
    # 绘制曲线
#     plt.plot(data_date, data_Citigroup, color='blue',label='Citigroup')
    plt.plot(data_date, data_Citigroup, label = company_name[i])

    # 显示图示
    font1 = {'weight': 'normal', 'size': 15}
    plt.legend(loc="upper right", prop = font1)
    
    # 显示网格线
    plt.grid(True) 

    # 配置横坐标
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d/%Y'))
    plt.gca().xaxis.set_major_locator(mdates.YearLocator())    # 按年显示,按月/日显示的话,将MonthLocator()改成DayLocator()
    plt.gcf().autofmt_xdate()  # 自动旋转日期标记

    plt.title('company stock')
#     plt.savefig('temper.png')
font2 = {'family': 'Times New Roman',
         'weight': 'normal',
         'size': 15,
         }
plt.xlabel('Date', font2)
plt.ylabel('Value', font2)
plt.show()
  • 绘制Morgan_Stanley公司股票数据的20日和60日均线
from dateutil import parser
# 分别绘制20日(一个月)和60日(一个季度)均线,
stocks["Morgan_Stanley_20d"] = np.round(stocks["Morgan_Stanley"].rolling(window = 20, center = False).mean(), 2)
stocks["Morgan_Stanley_60d"] = np.round(stocks["Morgan_Stanley"].rolling(window = 60, center = False).mean(), 2)

pylab.rcParams['figure.figsize'] = (25, 10)   # Change the size of plots

data_date_str = stocks.iloc[:, 0]
data_date = list(map(parser.parse, data_date_str))

# 配置横坐标

stocks["Morgan_Stanley"].plot(grid = True)
stocks["Morgan_Stanley_20d"].plot(grid = True)
stocks["Morgan_Stanley_60d"].plot(grid = True)
font1 = {'weight': 'normal', 'size': 15}
plt.legend(loc="upper right", prop = font1)

font2 = {'family': 'Times New Roman',
         'weight': 'normal',
         'size': 15,
         }
plt.xlabel('Date', font2)
plt.ylabel('Value', font2)
plt.show()
  • 由于Morgan_Stanley 的数据趋势差异明显,删除Morgan_Stanley 后进行绘制
stocks.drop(['Morgan_Stanley'], axis = 1, inplace = True)

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import matplotlib.dates as mdates
from dateutil import parser

company_name = ['Citigroup', 'JPMorgan Chase' ,'Bank of America','BNY Mellon', 'Barclays' ,'Goldman Sachs' ,
               'UBS', 'RBC Capital Markets', 'Deutsche Bank' ,'Credit Suisse']

for i in range(len(company_name)):
    data_date_str = stocks.iloc[:, 0]
    data_date = list(map(parser.parse, data_date_str))
    data_company = stocks.iloc[:, i+1]

    # plt.figure(figsize=(13,7), dpi=80)
    pylab.rcParams['figure.figsize'] = (25, 10)   # Change the size of plots
    plt.subplot(1,1,1)
    # 绘制曲线
    plt.plot(data_date, data_company, label = company_name[i])

    # 显示图示
    font1 = {'weight': 'normal', 'size': 15}
    plt.legend(loc="upper right", prop = font1)
    # 显示网格线
    plt.grid(True) 

    # 配置横坐标
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d/%Y'))
    plt.gca().xaxis.set_major_locator(mdates.YearLocator())    # 按年显示,按月/日显示的话,将MonthLocator()改成DayLocator()
    #设置主刻度 即长刻度线
#     plt.gca().xaxis.set_major_locator(monthdays)
    #设置副刻度 即短刻度线
#     plt.gca().xaxis.set_minor_locator(monthdays)
    plt.gcf().autofmt_xdate()  # 自动旋转日期标记

    plt.title('The stock of company', fontsize = 15)
#     plt.savefig('temper.png')

font2 = {'family': 'Times New Roman',
         'weight': 'normal',
         'size': 15,
         }
plt.xlabel('Date', font2)
plt.ylabel('Value', font2)
plt.show()
  • 绘制20日滑动窗口均线数据
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import matplotlib.dates as mdates
from dateutil import parser

company_name = ['Citigroup', 'JPMorgan Chase' ,'Bank of America','BNY Mellon', 'Barclays' ,'Goldman Sachs' ,
               'UBS', 'RBC Capital Markets', 'Deutsche Bank' ,'Credit Suisse']

for i in range(len(company_name)):
    data_date_str = stocks.iloc[:, 0]
    data_date = list(map(parser.parse, data_date_str))
    data_company = stocks.iloc[:, i+1]
    
    stocks["value_20d"] = np.round(data_company.rolling(window = 20, center = False).mean(), 2)
    data_20 = stocks["value_20d"]

    # plt.figure(figsize=(13,7), dpi=80)
    pylab.rcParams['figure.figsize'] = (25, 10)   # Change the size of plots
    plt.subplot(1,1,1)
    # 绘制曲线
    plt.plot(data_date, data_20, label = company_name[i])

    # 显示图示
    font1 = {'weight': 'normal', 'size': 15}
    plt.legend(loc="upper right", prop = font1)
    # 显示网格线
    plt.grid(True) 

    # 配置横坐标
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d/%Y'))
    plt.gca().xaxis.set_major_locator(mdates.YearLocator())    # 按年显示,按月/日显示的话,将MonthLocator()改成DayLocator()
    #设置主刻度 即长刻度线
#     plt.gca().xaxis.set_major_locator(monthdays)
    #设置副刻度 即短刻度线
#     plt.gca().xaxis.set_minor_locator(monthdays)
    plt.gcf().autofmt_xdate()  # 自动旋转日期标记

    plt.title('The stock of company', fontsize = 15)
#     plt.savefig('temper.png')

font2 = {'family': 'Times New Roman',
         'weight': 'normal',
         'size': 15,
         }
plt.xlabel('Date', font2)
plt.ylabel('Value', font2)
plt.show()

相关文章

网友评论

      本文标题:美股部分银行股票数据分析

      本文链接:https://www.haomeiwen.com/subject/toeqmrtx.html