Download csv file from yahoo
urllib urlretrieve
/*
python2: from urllib import urlretrieve
*/
// Python3
from urllib import request
// download csv file to local
request.urlretrieve('http://table.finance.yahoo.com/table.csv?s=000001.sz', 'pingan.csv')
CSV Module
import csv, os
with open('pingan.csv', 'r') as rf:
reader = csv.reader(rf)
print(reader)
for row in reader:
print(row)
rf.seek(0) // rf指针要归0,否则,下面next的时候会报错
# rf.seek(0, os.SEEK_SET)
with open('pingan_copy.csv', 'wb') as wf:
writer = csv.writer(wf)
writer.writerow(next(reader))
writer.writerow(next(reader))
writer.writerow(next(reader))
wf.flush() // 文本中马上可见
'''
// 这里读写方式要把'rb'/'wb'改为'r'/'w'才行
# with open('pingan.csv', 'rb') as rf:
# with open('pingan_copy.csv', 'wb') as wf:
# _csv.Error: iterator should return strings, not bytes (did you open the file in text mode?)
// 这里要把reader.next()改为next(reader)才行
# writer.writerow(reader.next())
# '_csv.reader' object has no attribute 'next'
'''
Final code
import csv
with open('pingan.csv', 'r') as rf:
reader = csv.reader(rf)
with open('pingan2.csv', 'w') as wf:
writer = csv.writer(wf)
headers = next(reader) # 越过第一行,因为是标题行,不是数据
writer.writerow(headers)
'''
['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close']
['2016-09-09', '9.40', '9.43', '9.36', '9.38', '32743100', '9.38']
每列的数据,可以直接取,比如日期Date:row[0],如成交量Volume:row[5],但是这里取到的是string,而不是数值
'''
for row in reader:
if row[0] < '2016-01-01': # 日期可以直接比较
break
if int(row[5]) >= 50000000:
writer.writerow(row)
print('end')
网友评论