1、文件:
- test1.txt
A1 A2 A3 B1 B2 B3
909 312 670 660 264 943
726 335 905 143 696 892
- test2.txt
A1 A2 A3 B1 B2 B3
633 429 534 941 124 370
223 540 845 488 878 134
2、目的:合并多个文件,并标注来源
A1 A2 A3 B1 B2 B3 source
909 312 670 660 264 943 test1.txt
726 335 905 143 696 892 test2.txt
3、脚本1: 用pandas append实现
import os
import pandas as pd
path = "E:/Script/python/xia_test/4"
output = "E:/Script/python/xia_test/4/out.csv"
file_list = os.listdir(path)
file_list = [i for i in file_list if i.endswith(".txt")]
df_w = pd.DataFrame()
for i in file_list:
data = pd.read_table(i)
data["source"] = i.replace(".txt", "")
# print(type(data))
df_w = df_w.append(data,ignore_index=True)
print(df_w)
df_w.to_csv(output,index=False)
- 脚本2: 用pandas 的concat 实现
import pandas as pd
import os
dir0 = "E:/资料/201804_学习/Python/练习/xia_test/4/"
sum0 = 'E:/资料/201804_学习/Python/练习/xia_test/4/sum_1.csv'
def get_name(dir0):
xtt_name = []
for root, dirname, files in os.walk(dir0):
for name in files:
if name.endswith('txt'):
xtt = name.split('.')[0]
xtt_name.append(xtt)
return xtt_name
list_name = get_name(dir0)
df1 = pd.read_table(dir0 + str(list_name[0]) + '.txt')
df1['source'] = list_name[0]
df2 = pd.read_table(dir0 + str(list_name[1]) + '.txt')
df2['source'] = list_name[1]
df3 = pd.read_table(dir0 + str(list_name[2]) + '.txt')
df3['source'] = list_name[2]
df4 = pd.read_table(dir0 + str(list_name[3]) + '.txt')
df4['source'] = list_name[3]
reader = pd.concat([df1, df2, df3, df4], axis=0, ignore_index=True)
reader.to_csv(sum0, index=False)
- 方法3:用csv 模块中的 csv.DictWriter
import os
import csv
file_dir = "E:/资料/201804_学习/Python/练习/xia_test/4/file"
sum0 = 'E:/资料/201804_学习/Python/练习/xia_test/4/sum.csv'
def file_name(file_dir):
name_list = []
for dirpath, dirnames, filenames in os.walk(file_dir):
for i in filenames:
name = i.split(".")[0]
name_list.append(name)
return name_list
def b(name):
res_list = []
path = file_dir + "/" + name + ".csv"
with open(path, 'r') as file:
reader = csv.DictReader(file)
# with open(sum0, 'a+', newline="") as newfile:
# writer = csv.DictWriter(newfile, fieldnames=reader.fieldnames + ['source'])
# writer.writeheader()
for row in reader:
row.update({'source': name})
res_list.append(row)
return res_list
with open(sum0, 'w', newline="") as new:
header = ['A1', 'A2', 'A3', 'B1', 'B2', 'B3', 'source']
f_csv = csv.DictWriter(new, header)
f_csv.writeheader()
name_list = file_name(file_dir)
for n in name_list:
# a(n,f_csv)
rows = b(n)
f_csv.writerows(rows)
网友评论