美文网首页
使用Python合并多个excel文件或者sheet

使用Python合并多个excel文件或者sheet

作者: Stevent | 来源:发表于2016-08-19 14:05 被阅读1893次

    由于工作需要,经常会涉及到合并excel表格,而且我刚好在学习python,于是就用python来帮我完成这件事,我前后写了很多版本,下面就是我的合并excel进化史, 也是我的python学习过程,希望能对新手或者办公有所帮助。也希望能得到大家的指点,让我能够进步。直接上代码。

    python刚开始

    这时,对python才入门,就想着应该是找能操作excel的库,网上找到openpyxl

    import openpyxl
    import os
    
    '''
    @author:steven
    date: 2016-05-18
    '''
    
    def get_filenames(path):
        filenames = []
        for i in os.walk(path):
            for filename in i[-1]:
                full_filename = os.path.join(i[0],filename)
                filenames.append(full_filename)
        return filenames
    
    def read_excel(path):
    
        wb = openpyxl.load_workbook(path)
        ws = wb.active
    
        ncols = ws.max_column
        nrows= ws.max_row
        titles = []
        info = {}
        data = []
        for i in range(1, ncols+1):
            titles.append(ws.cell(row=1,column=i).value)
        # print(titles)
    
        for row in range(2,nrows+1):
            temp_list = []
            for col in range(1, ncols+1):
                temp_list.append(ws.cell(row=row,column=col).value)
            data.append(temp_list)
        # print(data)
        return data,titles
    
    
    def save_excel(data,titles,path = None):
        if path == None:
            path = 'Total.xlsx'
    
        wb = openpyxl.Workbook()
        ws = wb.active
    
        for index, title in enumerate(titles):
            ws.cell(row=1,column=index+1,value=title)
        for row,item in enumerate(data):
            for col,value in enumerate(item):
                ws.cell(row=row+2,column=col+1,value=value)
    
        wb.save(path)
    
    if __name__ == '__main__':
        print('Program is running...')
        path = r'./.'
        target_path = r'./.'
        data = []
        titles = ''
        for filename in get_filenames(path):
            titles = read_excel(filename)[-1]
            for item in read_excel(filename)[0]:
                if item not in data:
                    data.append(item)
        save_excel(data,titles,target_path+os.sep+'Total.xlsx')
        print('Success!')
    

    听说有个库叫pandas,大家都说好,到底怎么好,自己试过才知道。

    # coding:utf-8
    '''
    @auth: Steven
    @date: 2016-07-27
    func: 合并目录下的所有excel文件,去重后存入新的excel
    '''
    
    import os
    import pandas as pd
    
    def get_filenames(path):
        """获取目录内所有文件名"""
        filenames = []
        for i in os.walk(path):
            for filename in i[-1]:
                full_filename = os.path.join(i[0],filename)
                filenames.append(full_filename)
        return filenames
    
    def read_excel(filename):
        """读入excel,返回dataFrame"""
        df = pd.read_excel(filename, index_col=None, headers = 0, na_values=['NA'])
        return df
    
    def merge_excel(datas,index):
        """合并数据,index为参考去重的列名"""
        return pd.concat(datas,ignore_index=True).drop_duplicates(index)
    
    
    if __name__ == '__main__':
        print('Program is running...')
        path = r'E:\Work'
        target_path = r'E:\Work'
        data = []
        for filename in get_filenames(path):
            data.append(read_excel(filename))
    
        df = merge_excel(data)
        df.to_excel(target_path+os.sep+'All.xlsx',index=False)
        print('Success!')
    

    面虽然写了三个函数,但其实可以简单点

    #!/usr/bin/env python3.5
    # -*- coding:utf-8 -*-
    """
    Function:
    合并目录下的所有excel文件,去重后存入新的excel
    Version:    2016-10-10
    Author:     Steven
    Contact:    lucibriel (at) 163.com
    """
     
    import os
    import glob
    import pandas as pd
     
     
    def merge_excel(path, on):
        """合并数据"""
        all_data = pd.DataFrame()
        for f in glob.glob(path):
            df = pd.read_excel(f)
            all_data = all_data.append(df, ignore_index=True)
        return all_data.drop_duplicates(on)
     
     
    if __name__ == '__main__':
        print('Program is running...')
        path = r'./*'
        target_path = r'./.'
     
        df = merge_excel(path)
        df.to_excel(os.path.join(target_path,"Total.xlsx"),index=False)
        print('Success!')
    

    其实后面我又改了很多次,后面有时间,我会把新版本再分享出来。

    相关文章

      网友评论

          本文标题:使用Python合并多个excel文件或者sheet

          本文链接:https://www.haomeiwen.com/subject/mrybsttx.html