一、读写.txt结尾的文件
1.读写.txt结尾的文件
相对路径:相对于当前工作目录而言
r"data/szzs.txt"#相对路径
#加r是为了减少歧义,为了保持路径在读取时不被漏读,错读。
绝对路径:总是从根文件夹开始
r"D:/Jupyter_working_path/data/szzs.txt"#绝对路径
代码打开文件:
method 1
with open(filepath) as filehandle:
for line in linehandle:
do something
method 2
with open(filepath) as filehandle:
line = filehandle.readlines()
ps: .strip()对空行的删除
filepath=r"data/szzs.txt"
with open(filepath) as filehandle:
for line in filehandle:
print(line.strip())
输出:
1990/12/28 15.871
1991/1/4 3.9462
1991/1/11 2.4041
1991/1/18 -0.2675
1991/1/25 -1.6314
1991/2/1 -1.9235
1991/2/8 0.6718
1991/2/14 1.649
1991/2/22 1.7656
1991/3/1 -1.735
1991/3/8 -1.992
1991/3/15 -3.6338
1991/3/22 -2.0372
1991/3/29 -1.9817
1991/4/5 1.1232
.......
2.写入文件
写入空文件
写入多行
附加到文件
filename = "first_file_to_write.txt"
(1)写入
#'w'表示“写”
with open(filename, 'w') as filehandle:
filehandle.write("I love programming")
#表示结束
filehandle.close
结果:会新建一个文件(有的话就打开原来的,没有就新建)
然后输出你写的文字I love programming
(2)修改
with open(filename, 'w') as filehandle:
filehandle.write("I love programming,too.\n")
接上文,文字会改变I love programming,too.
(3)追加
#'a'表示“追加”
with open(filename, 'a') as filehandle:
filehandle.write("I love programming,too.\n")
filehandle.write("I want to be an excellent financial analyst.\n")
接上文,不会修改掉之前写的,会增加。“\n”表示换行
I love programming,too.
I love programming,too.
I want to be an excellent financial analyst.
二、读写Excel文件 (.xlsx Excel文件后缀)
1.读取Excel文件
import openpyxl
用openpyxl模块打开Excel文档
从工作簿中获取工作表
从表中获取单元格
import openpyxl#调用openpyxl模块打开Excel文档
wb = openpyxl.Workbook()#新建一个名为wb的工作簿
ws = wb.active#激活worksheet,sheet名为ws
ws1 = wb.create_sheet("Wysheet")#在'wb'工作簿中新建一个名为"Wysheet"的sheet,默认的位置为最后
ws.title = "New title"#'ws'sheet的标题改为"New title"
ws['A4'] = 4#'ws'sheet的A4位置为4
cell_range = ws['A1':'c2']#取一个单元格的范围是'A1':'c2'
for cell in cell_range:#取除范围中的每一个元组
for i in cell:#取出每个元组中的元素(单元格)
i.value = 2#单元格的值为2
wb.save("first_excel_with_python.xlsx")
wb.close
2.实战练习:从EXcel表格中获取数据并分析
(1)读取Excel表格数据
(2)填充数据结构
(3)将结果写入Excel文档
例:读取Excel文件并统计分析,将各行业净利润求和并填充在右侧的相应行业旁边
# 导入Excel工作簿
wb = openpyxl.load_workbook(r"data/test.xlsx")
# 查看工作表名称
wb.sheetnames
# 选择工作表
ws = wb['profits']
# 查看数据结构
for row in ws.values:
print(row)
# 设计程序提取并存储数据
len(ws['D'])
dict = {}
for row in ws.iter_rows(min_row=2, max_col=5, max_row=len(ws['D'])):
dict[row[3].value] = dict.get(row[3].value, 0) + row[4].value
# 将数据进行填充
a = len(ws['H'])
end = 'I' + str(a)
use_range = ws['H1':end]
for i in use_range:
if dict.get(i[0].value, 0) != 0:
i[1].value = dict[i[0].value]
wb.save('test_finished.xlsx')
wb.close
例:批量修改sheet名举例
# 批量重命名Excel表格
wb = openpyxl.load_workbook(r"E:\Jupyter工作路径\data\PE_data_original.xlsx")
for i in range(0, len(wb.sheetnames)):
ws = wb.worksheets[i]
ws.title = "a" + str(i)
wb.save(r"E:\Jupyter工作路径\pe_revised.xlsx")
例:统计上证指数有史以来连续上涨情况
#第一步,读取文件
#第一步,读取数据
date = []
pct = []
with open("data/szzs.txt") as filehandle:
for line in filehandle:
a, b = line.split()
date.append(a)
pct.append(b)
#把pct的字符串形式转化为浮点形式
new_pct = []
data = {}
for i in pct:
new_pct.append(float(i))
#字典data:每个日期对应一个涨跌幅
for i in range(len(date)):
data[date[i]] = new_pct[i]
count = 0
results = {}
for k, v in data.items(): #循环字典数据
if v > 0:
count + 1
else:
date_list = []
date_list.append(k)
results[count] = results.get(count, []) + date_list
count = 0
# 导出数据
with open('上证指数连涨统计数据.txt', 'w') as filehandle:
for k, v in results.items():
if k >= 6:
filehandle.write("连涨{}周的情况对应的被终结日期是:{}".format(k, v))
filehandle.close
网友评论