工作中,从数据库中导出一份Excel表格,里面全是未经加工的数据。需要对它们进行重新的整合,然后输出为一份报告表。以前曾经使用vba写过程序处理,但最近原始数据库中的表格架构变了。改VBA代码相当于重写。于是尝试使用openpyxl和pandas库进行一轮操作。
功能需求:
原始表格中,含有工单及其对应的一系列生产数据:

要求针对不同的产品型号,工单,生成对应的表格报告。
代码的逻辑大概如下:
- 读取原始数据,然后把它们归类为指定的文件中去
def readOrigData(TLA): #读取原始从Ipro下载出来的数据,然后把它们归类为指定的PWA文件中去
originalData=getfile()
df=pd.read_excel(str(originalData))
data=df[['整机型号','采购单号','TLA 物料号','故障原因','故障原因描述','备注','坏机数量',\
'坏件数量','OFD','坏点数量','检查台数','产品总数','产品序列号','零件位置']]
d_TLA=data[(data['整机型号']==TLA)]
for pwa in d_TLA['TLA 物料号'].drop_duplicates():
create_PWA_Report(TLA,pwa)
d_pwa=d_TLA[(d_TLA['TLA 物料号']==pwa)]
outputlink="D:\\My Documents\\Wave Solder Report\\"+TLA+"\\"+ pwa[0:10]+" Wave Solder Defect.xlsx"
book=load_workbook(outputlink)
writer=pd.ExcelWriter(outputlink,engine='openpyxl')
writer.book=book
d_pwa[::].to_excel(writer,index=False,sheet_name=pwa[0:10])
writer.save()
上面我定义了一个函数getfile(),用于获取原始数据的路径。
def getfile():
root=tk.Tk()
root.withdraw()
#Folderpath=filedialog.askdirectory() #获得选择好的文件夹路径
Filepath=filedialog.askopenfilename() #获得选择好的文件路径
return Filepath
而函数create_PWA_Report(TLA,pwa) 从模板中复制创建了新的文件,以PWA名字开头。
def create_PWA_Report(TLA,pwa): #从模板中复制创建了新的文件,以PWA名字开头
sourcefile="D:\\My Documents\\Wave Solder Report\\Wave_Solder_Report_Template.xlsx"
path="D:\\My Documents\\Wave Solder Report\\"+TLA
if os.path.isdir(path):
pass
else:
os.mkdir(path)
newfile="D:\\My Documents\\Wave Solder Report\\"+TLA+"\\"+ pwa[0:10]+" Wave Solder Defect.xlsx"
if os.path.isfile(newfile):
print("Exist file: "+pwa[0:10])
else:
print("Create file: "+pwa[0:10])
shutil.copyfile(sourcefile,newfile)
- 针对不同的excel表,填充数据及图表
def writeExcelData(filelink): #针对不同的excel表,填充数据及图表
#filelink="D:\\My Documents\\Wave Solder Report\\700-014896-0000\\790-023619 Wave Solder Defect.xlsx"
pwa=filelink.split("\\")[-1][0:10]
all_dj_data=pd.read_excel(filelink,sheet_name=pwa)
#if len(all_dj_data['零件位置'])==1:
if len(all_dj_data[['零件位置']])<2:
dj=all_dj_data[['采购单号']].iloc[0,0]
wb=load_workbook(filelink)
sht=wb.worksheets[0] #Template sheet
sht_copy=wb.copy_worksheet(sht) #复制Template sheet
sht.sheet_state ='hidden' #隐藏template页
sht_copy.title=dj #以DJ改名新增的sheet
dj_sht=wb[dj]
dj_sht.sheet_view.showGridLines = False #隐藏表格中的网格线
dj_sht["B5"]=all_dj_data.iloc[0,0] #TLA PN
dj_sht["G6"]=all_dj_data['产品总数'].max() #DJ 总数
dj_sht["U5"]=all_dj_data['OFD'].max() #OFD
wb.save(filelink)
else:
for dj in all_dj_data['采购单号'].drop_duplicates():
print(dj)
wb=load_workbook(filelink)
sht=wb.worksheets[0] #Template sheet
sht_copy=wb.copy_worksheet(sht) #复制Template sheet
sht.sheet_state ='hidden' #隐藏template页
sht_copy.title=dj #以DJ改名新增的sheet
dj_data=all_dj_data[(all_dj_data['采购单号']==dj)].sort_values(by=['坏件数量'],ascending=False) #过滤指定dj的数据
dj_sht=wb[dj]
dj_sht.sheet_view.showGridLines = False #隐藏表格中的网格线
dj_sht["B5"]=dj_data.iloc[0,0] #TLA PN
dj_sht["G6"]=dj_data['产品总数'].max() #DJ 总数
dj_sht["U5"]=dj_data['OFD'].max() #OFD
dj_data=dj_data.dropna() #删除缺失值
maxrow=27 #对应template中的problem下一个空行
original_ref_col=2 #对应template中的Circuit Code/Location
original_def_col=2 #对应template中的Defects
original_qty_col=2 #对应template中的Failed Quantity
for index, row in dj_data[['零件位置','故障原因','故障原因描述','备注','坏件数量']].iterrows():
defect_description=row['零件位置'] + ": "+ row['故障原因描述'] + " ("+ row['备注']+")" + "\n"+"坏件数量: " +str(row['坏件数量'])
#maxrow=dj_sht.max_row
dj_sht.cell(row=19,column=original_def_col).value=row['故障原因']
dj_sht.cell(row=20,column=original_ref_col).value=row['零件位置']
dj_sht.cell(row=21,column=original_qty_col).value=row['坏件数量']
dj_sht.cell(row=maxrow,column=1).value=defect_description #把不良写入problem表中
maxrow+=1
original_ref_col+=1
original_def_col+=1
original_qty_col+=1
#以下代码给表单添加barchart
chart1 = BarChart()
chart1.type = "col"
chart1.style = 10
chart1.title = "Wave Solder Defect"
chart1.y_axis.title = 'Defect number'
data = Reference(dj_sht, min_col=1, max_col=21, min_row=21, max_row=21) #定义数据的来源
cats = Reference(dj_sht, min_col=2, max_col=21, min_row=20, max_row=20) #定义x轴的分类
chart1.add_data(data=data,titles_from_data=True, from_rows=True)
chart1.set_categories(cats)
chart1.shape = 4
chart1.legend.position = "t"
chart1.height=5 #chart的高度
chart1.width=20 #chart的宽度
dj_sht.add_chart(chart1, "B8")
wb.save(filelink)
这里面的难点是利用openpyxl画图表。需要提前导入相关函数。而且要区分清楚行和列的范围。
from openpyxl.chart import BarChart, Series, Reference
from openpyxl.chart.layout import Layout, ManualLayout
下面整个程序的完整代码:
import shutil
from openpyxl import load_workbook
import os
from openpyxl.chart import BarChart, Series, Reference
from openpyxl.chart.layout import Layout, ManualLayout
from openpyxl import Workbook
import tkinter as tk
from tkinter import filedialog
def getfile():
root=tk.Tk()
root.withdraw()
#Folderpath=filedialog.askdirectory() #获得选择好的文件夹
Filepath=filedialog.askopenfilename() #获得选择好的文件
return Filepath
def create_PWA_Report(TLA,pwa): #从模板中复制创建了新的文件,以PWA名字开头
sourcefile="D:\\My Documents\\Wave Solder Report\\Wave_Solder_Report_Template.xlsx"
path="D:\\My Documents\\Wave Solder Report\\"+TLA
if os.path.isdir(path):
pass
else:
os.mkdir(path)
newfile="D:\\My Documents\\Wave Solder Report\\"+TLA+"\\"+ pwa[0:10]+" Wave Solder Defect.xlsx"
if os.path.isfile(newfile):
print("Exist file: "+pwa[0:10])
else:
print("Create file: "+pwa[0:10])
shutil.copyfile(sourcefile,newfile)
def readOrigData(TLA): #读取原始从Ipro下载出来的数据,然后把它们归类为指定的PWA文件中去
originalData=getfile()
df=pd.read_excel(str(originalData))
data=df[['整机型号','采购单号','TLA 物料号','故障原因','故障原因描述','备注','坏机数量',\
'坏件数量','OFD','坏点数量','检查台数','产品总数','产品序列号','零件位置']]
d_TLA=data[(data['整机型号']==TLA)]
for pwa in d_TLA['TLA 物料号'].drop_duplicates():
create_PWA_Report(TLA,pwa)
d_pwa=d_TLA[(d_TLA['TLA 物料号']==pwa)]
outputlink="D:\\My Documents\\Wave Solder Report\\"+TLA+"\\"+ pwa[0:10]+" Wave Solder Defect.xlsx"
book=load_workbook(outputlink)
writer=pd.ExcelWriter(outputlink,engine='openpyxl')
writer.book=book
d_pwa[::].to_excel(writer,index=False,sheet_name=pwa[0:10])
writer.save()
def writeExcelData(filelink): #针对不同的excel表,填充数据及图表
#filelink="D:\\My Documents\\Wave Solder Report\\700-014896-0000\\790-023619 Wave Solder Defect.xlsx"
pwa=filelink.split("\\")[-1][0:10]
all_dj_data=pd.read_excel(filelink,sheet_name=pwa)
#if len(all_dj_data['零件位置'])==1:
if len(all_dj_data[['零件位置']])<2:
dj=all_dj_data[['采购单号']].iloc[0,0]
wb=load_workbook(filelink)
sht=wb.worksheets[0] #Template sheet
sht_copy=wb.copy_worksheet(sht) #复制Template sheet
sht.sheet_state ='hidden' #隐藏template页
sht_copy.title=dj #以DJ改名新增的sheet
dj_sht=wb[dj]
dj_sht.sheet_view.showGridLines = False #隐藏表格中的网格线
dj_sht["B5"]=all_dj_data.iloc[0,0] #TLA PN
dj_sht["G6"]=all_dj_data['产品总数'].max() #DJ 总数
dj_sht["U5"]=all_dj_data['OFD'].max() #OFD
wb.save(filelink)
else:
for dj in all_dj_data['采购单号'].drop_duplicates():
print(dj)
wb=load_workbook(filelink)
sht=wb.worksheets[0] #Template sheet
sht_copy=wb.copy_worksheet(sht) #复制Template sheet
sht.sheet_state ='hidden' #隐藏template页
sht_copy.title=dj #以DJ改名新增的sheet
dj_data=all_dj_data[(all_dj_data['采购单号']==dj)].sort_values(by=['坏件数量'],ascending=False) #过滤指定dj的数据
dj_sht=wb[dj]
dj_sht.sheet_view.showGridLines = False #隐藏表格中的网格线
dj_sht["B5"]=dj_data.iloc[0,0] #TLA PN
dj_sht["G6"]=dj_data['产品总数'].max() #DJ 总数
dj_sht["U5"]=dj_data['OFD'].max() #OFD
dj_data=dj_data.dropna() #删除缺失值
maxrow=27 #对应template中的problem下一个空行
original_ref_col=2 #对应template中的Circuit Code/Location
original_def_col=2 #对应template中的Defects
original_qty_col=2 #对应template中的Failed Quantity
for index, row in dj_data[['零件位置','故障原因','故障原因描述','备注','坏件数量']].iterrows():
defect_description=row['零件位置'] + ": "+ row['故障原因描述'] + " ("+ row['备注']+")" + "\n"+"坏件数量: " +str(row['坏件数量'])
#maxrow=dj_sht.max_row
dj_sht.cell(row=19,column=original_def_col).value=row['故障原因']
dj_sht.cell(row=20,column=original_ref_col).value=row['零件位置']
dj_sht.cell(row=21,column=original_qty_col).value=row['坏件数量']
dj_sht.cell(row=maxrow,column=1).value=defect_description #把不良写入problem表中
maxrow+=1
original_ref_col+=1
original_def_col+=1
original_qty_col+=1
#以下代码给表单添加barchart
chart1 = BarChart()
chart1.type = "col"
chart1.style = 10
chart1.title = "Wave Solder Defect"
chart1.y_axis.title = 'Defect number'
data = Reference(dj_sht, min_col=1, max_col=21, min_row=21, max_row=21) #定义数据的来源
cats = Reference(dj_sht, min_col=2, max_col=21, min_row=20, max_row=20) #定义x轴的分类
chart1.add_data(data=data,titles_from_data=True, from_rows=True)
chart1.set_categories(cats)
chart1.shape = 4
chart1.legend.position = "t"
chart1.height=5 #chart的高度
chart1.width=20 #chart的宽度
dj_sht.add_chart(chart1, "B8")
wb.save(filelink)
def searchTLAfolder(TLA):
filelist=[]
file_dir="D:\\My Documents\\Wave Solder Report\\"+TLA
for files in os.walk(file_dir):
for file in files[2]:
filelist.append(file_dir + "\\"+file)
return filelist
print("Hi, This tool is help to generate a wave solder defect report from Ipro data")
TLA=input("Please input a TLA PN: ")
print("Select Ipro download data")
readOrigData(TLA)
for j in searchTLAfolder(TLA):
print(j)
writeExcelData(j)
print("Sir, Wave Solder Build Report Is Ready")
网友评论