美文网首页Python
[Python] 自动化办公 Excel数据调整和样式修改

[Python] 自动化办公 Excel数据调整和样式修改

作者: 半为花间酒 | 来源:发表于2020-05-22 12:46 被阅读0次

    转载请注明:陈熹 chenx6542@foxmail.com (简书号:半为花间酒)
    若公众号内转载请联系公众号:早起Python

    本例可以学到的知识点:pandasopenpyxl协同操作excel文件

    数据表格https://pan.baidu.com/s/1mw4IagrRxoCkAvD0Wh5LRA
    提取码:l9e8

    读者的基础需求如下:

    简单来说就是每天都会对Excel中多个sheet进行更新,需要操作完后可以用程序完成第一张sheet 汇总表 的更新

    这里假设我手上现在是只有一个sheet的空表,第一步需要完成的工作是:

    一、创建多个sheet和随机数据(非核心步骤)

    为了尽可能模拟读者的生产环境多了这一步。数据已经充分则可以跳到第二步

    我已经把这个excel文件命名好放在桌面的data文件夹中

    from openpyxl import load_workbook
    import os
    import glob
    import random
    
    def GetDesktopPath():
        return os.path.join(os.path.expanduser("~"), 'Desktop')
    
    # 调用glob可以利用通配符获取指定命名格式的文件
    path = glob.glob(f'{GetDesktopPath()}/data/*.xls*')[0]
    workbook = load_workbook(filename=path)
    sheet_init = workbook.active
    

    接着是创建一些供随机的内容,可以随意写

    name_lst = ['皮卡丘', '小火龙', '杰尼龟', '妙蛙种子', '风速狗', '小拳石', '飞天螳螂']
    place_lst = [chr(i).upper() for i in range(97, 123)] 
    # 我忘记大写字母的码了哈哈哈 这样变着法换大写字母
    activity_lst = ['椭圆机', '篮球', '足球', '羽毛球', '跳绳']
    source_lst = ['朋友介绍', '微信聊天', '网页弹窗', '其他']
    
    for i in range(30):
        # 不断去拷贝第一页并重命名
        sheet = workbook.copy_worksheet(sheet_init)
        sheet.title = f'{i+1}日'
        for j in range(random.randint(10, 30)):
            # 从第三行开始行遍历
            for row in sheet.iter_rows(min_row=3+j, max_row=3+j):
                info = [f'{j+1}', f'{i+1}日', f'{random.choice(name_lst)}', f'{random.choice(place_lst)}馆',
                        f'{random.choice(activity_lst)}', f'{random.choice(source_lst)}', f'{random.randint(1, 10)}',
                        '无', f'{random.choice(["Y", "N"])}', f'{random.choice(["Y", "N"])}', f'{random.choice(["Y", "N"])}']
                # 嵌套循环,对当前行的格子进行遍历把内容写入
                for index, k in enumerate(info):
                    row[index].value = k
        print(f'第{i+1}日已完成')
    
    workbook.save(filename=f'{GetDesktopPath()}/data/results.xlsx')
    

    数据就创建好了,然后正式进入读者的问题:

    二、合并多个sheet并写入汇总sheet

    由于后面多个表的更新后需要按日期顺序在汇总表里呈现,因此有一个策略是利用openpyxl按顺序遍历各表然后写回汇总表

    但注意,表格中存在边框居中等样式修改

    这种情况下,openpyxl会识别样式,认为这些行是已经有数据的,故纯粹的sheet.append()方法是无法将数据写入这些所谓的空行,而会从没有样式的行开始写入

    所以需要在各表写入的时候不断计算所在行,并利用sheet.iter_rows()定位

    是不是有点麻烦?

    因此换了简单的思路:利用pandas,其方便的地方在于无视表格样式

    path_new = glob.glob(f'{GetDesktopPath()}/data/results.xls*')[0]
    workbook_new = load_workbook(filename=path_new)
    # 方便获取总表数便于遍历
    sheetnames = workbook.sheetnames
    df_lst = []
    
    for i in range(1, len(sheetnames)):
        df = pd.read_excel(path_new , encoding='utf-8', sheet_name=i, skiprows=1)
        df_lst.append(df)
    
    # 把获取的各表纵向合并,注意纵向合并常常需要重置索引
    df_total = pd.concat(df_lst,axis=0,ignore_index=True)
    # 索引是从0开始,利用索引+1重置各记录的编号
    df_total['编号'] = df_total.index + 1
    

    将生成的表写回汇总表即可,涉及的内容稍微比较复杂。由于直接使用dataframe.to_excel会覆盖原excel导致只有一张sheet,其他全部丢失,需要利用pd.ExcelWriter,具体见代码

    删除原来的 汇总表 并写入新的 汇总表。因为新写入的sheet会置于末尾,可以用list.insert(0, list.pop())将最后一个元素置于开头

    writer = pd.ExcelWriter(path_new, engine='openpyxl')
    writer.book = workbook
    workbook.remove(workbook['汇总表'])
    df_total.to_excel(excel_writer=writer, sheet_name=u'汇总表', index=None)
    writer.close()
    workbook._sheets.insert(0, workbook._sheets.pop())
    workbook.save(filename=f'{GetDesktopPath()}/data/results.xlsx')
    

    这就完成了吗?没有

    pandas的优势“无视样式”也成为了它的缺陷:写入文件时没有样式信息
    因此最后再用openpyxl对第一页的样式调整

    三、设置样式

    # 设置对齐、线性、边框、字体
    from openpyxl.styles import Alignment
    from openpyxl.styles import Side, Border
    from openpyxl.styles import Font
    
    sheet = workbook[sheetnames[0]]
    sheet.insert_rows(idx=0) # 插入第一行
    font = Font(name='宋体', size=18, bold=True)
    sheet['A1'] = '皮卡丘体育2020年06月新学员信息登记表'
    sheet['A1'].font = font # 设置字体大小和加粗
    
    req = ':(\w)'
    weight = re.findall(req, sheet.dimensions)[0]
    sheet.merge_cells(f'A1:{weight}1')
    
    # 样式先准备好
    alignment = Alignment(horizontal='center', vertical='center')
    side = Side(style='thin', color='000000')
    border = Border(left=side, right=side, top=side, bottom=side)
    
    # 遍历cell设置样式
    rows = sheet[f'{sheet.dimensions}']
    for row in rows:
        for cell in row:
            cell.alignment = alignment
            cell.border = border
    
    # 设置前两行的行高
    sheet.row_dimensions[1].height = 38
    sheet.row_dimensions[2].height = 38
    
    # 设置列宽
    letter_lst = [chr(i+64).upper() for i in range(2, ord(weight)-ord('A')+1+1)]
    sheet.column_dimensions['A'].width = 8
    for i in letter_lst:
         sheet.column_dimensions[f'{i}'].width = 14
    
    workbook.save(filename=f'{GetDesktopPath()}/data/results.xlsx')
    

    样式就修改完成了!

    完整代码

    from openpyxl import load_workbook
    import os
    import glob
    import random
    import pandas as pd
    import re
    from openpyxl.styles import Alignment
    from openpyxl.styles import Side, Border
    from openpyxl.styles import Font
    
    def GetDesktopPath():
        return os.path.join(os.path.expanduser("~"), 'Desktop')
    
    path = glob.glob(f'{GetDesktopPath()}/data/*.xls*')[0]
    workbook = load_workbook(filename=path)
    sheet_init = workbook.active
    name_lst = ['皮卡丘', '小火龙', '杰尼龟', '妙蛙种子', '风速狗', '小拳石', '飞天螳螂']
    place_lst = [chr(i).upper() for i in range(97, 123)]
    activity_lst = ['椭圆机', '篮球', '足球', '羽毛球', '跳绳']
    source_lst = ['朋友介绍', '微信聊天', '网页弹窗', '其他']
    for i in range(30):
        sheet = workbook.copy_worksheet(sheet_init)
        sheet.title = f'{i+1}日'
        for j in range(random.randint(10, 30)):
            for row in sheet.iter_rows(min_row=3+j, max_row=3+j):
                info = [f'{j+1}', f'{i+1}日', f'{random.choice(name_lst)}', f'{random.choice(place_lst)}馆',
                        f'{random.choice(activity_lst)}', f'{random.choice(source_lst)}', f'{random.randint(1, 10)}',
                        '无', f'{random.choice(["Y", "N"])}', f'{random.choice(["Y", "N"])}', f'{random.choice(["Y", "N"])}']
                for index, k in enumerate(info):
                    row[index].value = k
    
    workbook.save(filename=f'{GetDesktopPath()}/data/results.xlsx')
    
    path_new = glob.glob(f'{GetDesktopPath()}/data/results.xls*')[0]
    # 方便获取总表数便于遍历
    workbook = load_workbook(path_new)
    sheetnames = workbook.sheetnames
    df_lst = []
    for i in range(1, len(sheetnames)):
        df = pd.read_excel(path_new, encoding='utf-8', sheet_name=i, skiprows=1)
        df_lst.append(df)
    
    df_total = pd.concat(df_lst, axis=0, ignore_index=True)
    df_total['编号'] = df_total.index + 1
    
    writer = pd.ExcelWriter(path_new, engine='openpyxl')
    writer.book = workbook
    workbook.remove(workbook['汇总表'])
    df_total.to_excel(excel_writer=writer, sheet_name=u'汇总表', index=None)
    writer.close()
    workbook._sheets.insert(0, workbook._sheets.pop())
    
    sheet = workbook[sheetnames[0]]
    sheet.insert_rows(idx=0)
    font = Font(name='宋体', size=18, bold=True)
    sheet['A1'] = '皮卡丘体育2020年06月新学员信息登记表'
    sheet['A1'].font = font
    
    req = ':(\w)'
    weight = re.findall(req, sheet.dimensions)[0]
    sheet.merge_cells(f'A1:{weight}1')
    
    alignment = Alignment(horizontal='center', vertical='center')
    side = Side(style='thin', color='000000')
    border = Border(left=side, right=side, top=side, bottom=side)
    
    rows = sheet[f'{sheet.dimensions}']
    for row in rows:
        for cell in row:
            cell.alignment = alignment
            cell.border = border
    
    sheet.row_dimensions[1].height = 38
    sheet.row_dimensions[2].height = 38
    
    letter_lst = [chr(i+64).upper() for i in range(2, ord(weight)-ord('A')+1+1)]
    sheet.column_dimensions['A'].width = 8
    for i in letter_lst:
         sheet.column_dimensions[f'{i}'].width = 14
    
    workbook.save(filename=f'{GetDesktopPath()}/data/results.xlsx')
    

    相关文章

      网友评论

        本文标题:[Python] 自动化办公 Excel数据调整和样式修改

        本文链接:https://www.haomeiwen.com/subject/doifahtx.html