对于交易记录有中断的股票, 我们采用向前填充, 即使用断点的值连续到下一个开始交易的地点。而不是直线连接两点, 原因是直线连接会创造出不存在的点, 影响判断.
而对于起点之前不存在的数据, 我们采用向后填充, 即将起点的数据连续到第一天.
向前填充:
df.fillna(method='ffill', inplace=True)
df.fillna(method='pad', inplace=True) # 或者pad
inplace为True将会在同一个DataFrame中保存
向后填充:
df.fillna(method='bfill', inplace=True)
df.fillna(method='backfill', inplace=True) # 或者backfill
注意: 一定要先使用向前填充再使用向后填充, 否则会使用到未来的值(即不存在的值)
全部代码如下
"""Fill missing values"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
def fill_missing_values(df_data):
"""Fill missing values in data frame, in place."""
df_data.fillna(method='ffill', inplace=True)
df_data.fillna(method='bfill', inplace=True)
return df_data
def symbol_to_path(symbol, base_dir="data"):
"""Return CSV file path given ticker symbol."""
return os.path.join(base_dir, "{}.csv".format(str(symbol)))
def get_data(symbols, dates):
"""Read stock data (adjusted close) for given symbols from CSV files."""
df_final = pd.DataFrame(index=dates)
if "SPY" not in symbols: # add SPY for reference, if absent
symbols.insert(0, "SPY")
for symbol in symbols:
file_path = symbol_to_path(symbol)
df_temp = pd.read_csv(file_path, parse_dates=True, index_col="Date",
usecols=["Date", "Adj Close"], na_values=["nan"])
df_temp = df_temp.rename(columns={"Adj Close": symbol})
df_final = df_final.join(df_temp)
if symbol == "SPY": # drop dates SPY did not trade
df_final = df_final.dropna(subset=["SPY"])
return df_final
def plot_data(df_data):
"""Plot stock data with appropriate axis labels."""
ax = df_data.plot(title="Stock Data", fontsize=2)
ax.set_xlabel("Date")
ax.set_ylabel("Price")
plt.show()
def test_run():
"""Function called by Test Run."""
# Read data
symbol_list = ["JAVA", "FAKE1", "FAKE2"] # list of symbols
start_date = "2005-12-31"
end_date = "2014-12-07"
dates = pd.date_range(start_date, end_date) # date range as index
df_data = get_data(symbol_list, dates) # get data for each symbol
# Fill missing values
fill_missing_values(df_data)
# Plot
plot_data(df_data)
if __name__ == "__main__":
test_run()
网友评论