处理多只股票
- 创建空的DataFrame
#Build a DataFrame in pandas
import pandas as pd
def test_run():
start_date = '2010-01-22'
end_date = '2010-01-26'
dates = pd.date_range(start_date, end_date)
df1 = pd.DataFrame(index = dates)
if __name__ == "__main__":
test_run()
- 连结SPY数据
#SPY(标普500指数) 可以用来参考是不是交易日
import pandas as pd
def test_run():
#Define date range
start_date = '2010-01-22'
end_date = '2010-01-26'
dates = pd.date_range(start_date, end_date)
#Create an empty dataframe
df1 = pd.DataFrame(index = dates)
#Read SPY data into temporary DataFrame
dfSPY = pd.read_csv("data/SPY.csv",
index_col = "Date",
parse_dates = True,
usecols = ['Date', 'Adj Close'],
na_values = ['nan'])
#Rename 'Adj Close' column to 'SPY' to prevent clash
dfSPY = dfSPY.rename(columns = {'Adj Close' : 'SPY'})
#Join the two DataFrames, Drop NaN values
df1 = df1.join(dfSPY)
df1 = df1.dropna()
#也可以用 df1 = df1.join(dfSPY, how = 'inner') 来实现
- 读取更多股票数据
#Read in more stocks:
symbols = ['GOOG', 'IBM', 'GLD']
df_temp = pd.read_csv("data/{}.csv".format(symbol),
index_col = 'Date',
parse_dates = True,
usecols = ['Date', 'Adj Close'],
na_values = ['nan'])
#Rename to prevent clash
df_temp = df_temp.rename(columns = {'Adj Close' : symbol})
df1 = df1.join(df_temp)
#use default how = left
- 读取数据的实用函数
import os
import pandas as pd
def symbol_to_path(symbol, base_dir = "data"):
#Return CSV file path given ticker symbol
return os.path.join(base_dir, "{}.csv".format(str(symbol)))
def get_data(symbols, dates)
#Read stock data (Adj Close) for given syb from csv
df = pd.DataFrame(index = dates)
if 'SPY' not in symbols:
#Add SPY for referance if absent
#symbols.insert(0, 'SPY')
for symbol in symbols:
df_temp = pd.read_csv(symbol_to_path(symbol),
index_col = 'Date',
parse_dates = True,
usecols = ['Date', 'Adj Close'],
na_values = ['nan'])
df_temp = df.temp.rename(columns = {'Adj Close' : symbol})
df = df.join(df_temp)
if symbol == 'SPY': #drop dates SPY did not trade
df = df.dropna(subset=["SPY"])
return df
#More slicing
def test_run():
#Define a date range
dates = pd.date_range('2010-01-01', '2010-12-31')
#Choose stock symbols to read
symbols = ['GOOG', 'IBM', 'GLD']
#SPY will be added in get_data()
#Get stock data
df = get_data(symbols, dates)
#Slice by row range (dates) using DataFrame.ix[] selector
print df.ix['2010-01-01':'2010-01-31']
#the month of January
- 绘制多只股票的图形
def plot_data(df, title = 'Stock prices')
#plot stock prices
ax = df.plot(title = title)
#you can imagine it as an object, axis
ax.set_xlabel("Date")
ax.set_ylabel("Price")
#ax = df.plot(title = title, fontsize = 2) 可改字体
plt.show()
-
movement(变动):股票的相对涨跌
-
标准化 Normalizing
best way to normalize price data so that all prices start at 1.0:
df1 = df1 / df1.ix[0]
or: df1 = df1 / df1.ix[0,i]
def normalize_data(df):
return df / df.ix[0,i]
- 切片和绘制两只股票的图形
def plot_selected(df, columns, start_index, end_index)
plot_data(df.ix[start_index:end_index, columns],
title = "Selected data")
pandas可处理带有大量统计函数的ndarry
网友评论