美文网首页
正则表达式的应用

正则表达式的应用

作者: autumn1919 | 来源:发表于2020-10-30 19:47 被阅读0次


import pandas as pd
import xlrd
import os
import warnings
import re
#warnings.filterwarnings("ignore")



excel_name=[]
start=[]
number=[]
bname=[]
job=[]

excel_staff=[]
start_staff=[]
name_staff=[]
number_staff=[]
job_staff=[]
dept=[]

def del_null(ls):
    while '' in ls:
        ls.remove('')
    return ls


files=os.listdir('D:/company')

for k,j in enumerate(files):
    excel=r'D:/company/%s'%j
    data=xlrd.open_workbook(excel)
    sheet=data.sheets()[0]
    for rowidx in range(sheet.nrows):
            row = sheet.row(rowidx)
            for colidx, cell in enumerate(row):
                    if cell.value == "BoD":
                        excel_name.append(j)
                        bname.append(del_null(sheet.row_values(rowidx))[1])
                        searchObj = re.search( r'since(.*?)\/(.*?)\/(.*?)\)',del_null(sheet.row_values(rowidx))[3] , re.M|re.I)
                        if searchObj:
                            start.append(searchObj.group(3))
                        else:
                            start.append('n.a.')
                        job_temp=re.sub(r'\(since.*?\)',"",del_null(sheet.row_values(rowidx))[3])
                        job.append(job_temp)
                        number.append(del_null(sheet.row_values(rowidx+1))[0])
                        # searchObj2 = re.search( r'Worldbox',del_null(sheet.row_values(rowidx+1))[0] , re.M|re.I)
                        # if searchObj2:
                        #   number_staff.append(del_null(sheet.row_values(rowidx+2))[0])
                        # else:
                        #   number_staff.append(del_null(sheet.row_values(rowidx+1))[0])

                        
#print(excel_name,bname,start,job,number)
df=pd.DataFrame(columns=['excel_name','start','number','bname','job'])
df['excel_name']=excel_name
df['start']=start
df['number']=number
df['bname']=bname
df['job']=job
#df.to_csv('d:/board.csv')
#print(df)

#----------------------staff information-----------------------

for k,j in enumerate(files):
    excel=r'D:/company/%s'%j
    data=xlrd.open_workbook(excel)
    sheet=data.sheets()[0]
    for rowidx in range(sheet.nrows):
            row = sheet.row(rowidx)
            for colidx, cell in enumerate(row):
                    if cell.value == "SenMan"or cell.value == "SenMan, LegDep"or cell.value == "OthDep"or cell.value == "FinAcc"or cell.value == "CustSv"or cell.value == "PMPMan":
                        excel_staff.append(j)
                        #print(del_null(sheet.row_values(rowidx)))
           
                        dept.append(cell.value)
                        name_staff.append(del_null(sheet.row_values(rowidx))[1])
                        searchObj = re.search( r'since(.*?)\/(.*?)\/(.*?)\)',del_null(sheet.row_values(rowidx))[3] , re.M|re.I)
                        if searchObj:
                            start_staff.append(searchObj.group(3))
                        else:
                            start_staff.append('n.a.')
                        job_temp1=re.sub(r'\(since.*?\)',"",del_null(sheet.row_values(rowidx))[3])
                        job_staff.append(job_temp1)
                        number_staff.append(del_null(sheet.row_values(rowidx+1))[0])
                        # searchObj2 = re.search( r'Worldbox',del_null(sheet.row_values(rowidx+1))[0] , re.M|re.I)
                        # if searchObj2:
                        #   number_staff.append(del_null(sheet.row_values(rowidx+2))[0])
                        # else:
                        #   number_staff.append(del_null(sheet.row_values(rowidx+1))[0])
#print(excel_staff,name_staff,start_staff,job_staff,number_staff)
df1=pd.DataFrame(columns=['excel_staff','start_staff','number_staff','job_staff','name_staff','dept'])
df1['excel_staff']=excel_staff
df1['start_staff']=start_staff
df1['number_staff']=number_staff
df1['name_staff']=name_staff
df1['job_staff']=job_staff
df1['dept']=dept
df1.to_csv('d:/staff.csv')



相关文章

网友评论

      本文标题:正则表达式的应用

      本文链接:https://www.haomeiwen.com/subject/yxlcvktx.html