美文网首页
test_xia4:合并列头相同的多个文件

test_xia4:合并列头相同的多个文件

作者: 夕颜00 | 来源:发表于2020-06-08 14:37 被阅读0次

    1、文件:

    • test1.txt
    A1  A2  A3  B1  B2  B3
    909 312 670 660 264 943
    726 335 905 143 696 892
    
    • test2.txt
    A1  A2  A3  B1  B2  B3
    633 429 534 941 124 370
    223 540 845 488 878 134
    

    2、目的:合并多个文件,并标注来源

    A1  A2  A3  B1  B2  B3 source
    909 312 670 660 264 943  test1.txt
    726 335 905 143 696 892  test2.txt
    

    3、脚本1: 用pandas append实现

    import os
    import pandas as pd
    
    path = "E:/Script/python/xia_test/4"
    output = "E:/Script/python/xia_test/4/out.csv"
    
    file_list = os.listdir(path)
    file_list = [i for i in file_list if i.endswith(".txt")]
    
    df_w = pd.DataFrame()
    for i in file_list:
        data = pd.read_table(i)
        data["source"] = i.replace(".txt", "")
        # print(type(data))
        df_w = df_w.append(data,ignore_index=True)
    
    print(df_w)
    df_w.to_csv(output,index=False)
    
    • 脚本2: 用pandas 的concat 实现
    import pandas as pd
    import os
    
    dir0 = "E:/资料/201804_学习/Python/练习/xia_test/4/"
    sum0 = 'E:/资料/201804_学习/Python/练习/xia_test/4/sum_1.csv'
    
    def get_name(dir0):
        xtt_name = []
        for root, dirname, files in os.walk(dir0):
            for name in files:
                if name.endswith('txt'):
                    xtt = name.split('.')[0]
                    xtt_name.append(xtt)
        return xtt_name
    
    list_name = get_name(dir0)
    df1 = pd.read_table(dir0 + str(list_name[0]) + '.txt')
    df1['source'] = list_name[0]
    df2 = pd.read_table(dir0 + str(list_name[1]) + '.txt')
    df2['source'] = list_name[1]
    df3 = pd.read_table(dir0 + str(list_name[2]) + '.txt')
    df3['source'] = list_name[2]
    df4 = pd.read_table(dir0 + str(list_name[3]) + '.txt')
    df4['source'] = list_name[3]
    
    reader = pd.concat([df1, df2, df3, df4], axis=0, ignore_index=True)
    reader.to_csv(sum0, index=False)
    
    • 方法3:用csv 模块中的 csv.DictWriter
    import os
    import csv
    
    file_dir = "E:/资料/201804_学习/Python/练习/xia_test/4/file"
    sum0 = 'E:/资料/201804_学习/Python/练习/xia_test/4/sum.csv'
    
    
    def file_name(file_dir):
        name_list = []
        for dirpath, dirnames, filenames in os.walk(file_dir):
            for i in filenames:
                name = i.split(".")[0]
                name_list.append(name)
            return name_list
    
    def b(name):
        res_list = []
        path = file_dir + "/" + name + ".csv"
        with open(path, 'r') as file:
            reader = csv.DictReader(file)
            # with open(sum0, 'a+', newline="") as newfile:
            #     writer = csv.DictWriter(newfile, fieldnames=reader.fieldnames + ['source'])
            # writer.writeheader()
            for row in reader:
                row.update({'source': name})
                res_list.append(row)
        return res_list
    
    
    with open(sum0, 'w', newline="") as new:
        header = ['A1', 'A2', 'A3', 'B1', 'B2', 'B3', 'source']
        f_csv = csv.DictWriter(new, header)
        f_csv.writeheader()
        name_list = file_name(file_dir)
    
        for n in name_list:
            # a(n,f_csv)
            rows = b(n)
            f_csv.writerows(rows)
    

    相关文章

      网友评论

          本文标题:test_xia4:合并列头相同的多个文件

          本文链接:https://www.haomeiwen.com/subject/anjbzhtx.html