美文网首页
不确定年份excel面板数据的整理

不确定年份excel面板数据的整理

作者: autumn1919 | 来源:发表于2020-10-28 16:50 被阅读0次

通过搜索得到所有的年份,为了使得年份与财务指标相对应,需要在两者之间建立链接。通过对指标的排序进行编号,通过1,2,3,4...在年份与指标之间建立链接,形成面板数据。里边涉及pandas的操作。

import xlrd
import os
import warnings
warnings.filterwarnings("ignore")
import pandas as pd 


#构建236个公司的字典
companys=[]
for k in range(236):
    new_company={'excel_name':'','year_2020':'','year_2019':'','year_2018':'','year_2017':'',
    'year_2016':'','year_2015':'','year_2014':'','year_2013':'','year_2012':'','year_2011':'','year_2010':''
    ,'year_2009':'','year_2008':'','year_2007':'','year_2006':'','year_2005':'','year_2004':'','year_2003':'','year_2002':''
    ,'year_2001':'','year_2000':''}
    companys.append(new_company)
def del_null(ls):
    while '' in ls:
        ls.remove('')
    return ls

files=os.listdir('D:/company')
#---------------------------------------------------年份的获取-------------
for k,j in enumerate(files):
    excel=r'D:/company/%s'%j
    companys[k]['excel_name']=j
    data=xlrd.open_workbook(excel)
    sheet=data.sheets()[0]
    for rowidx in range(sheet.nrows):
            row = sheet.row(rowidx)
            for colidx, cell in enumerate(row):
                for t in range(2000,2021):
                    if cell.value == "31/03/%s"%t:
                        companys[k]['year_%s'%t]='%s'%t
#剔除不存在记录的年份
for k in companys:
    for j in range(2000,2021):
        if k['year_%s'%j]=='':
            k.pop('year_%s'%j)
df=pd.DataFrame(companys)
#print(companys)
dfs=df[['excel_name','year_2000']].dropna(axis=0, how='any').drop(columns=['year_2000'])
dfs['year']=2000

for k in range(2001,2021):
    dfnew=df[['excel_name','year_%s'%k]].dropna(axis=0, how='any').drop(columns=['year_%s'%k])
    dfnew['year']=k
    dfs=dfs.append(dfnew)
    print(dfnew)
dfs.to_csv('d:/企业.csv')
#print(dfs)


#--------------------------------------指标搜寻-------
#ta_temp,int_temp,ftemp,ctemp,clia_temp,operev_temp,sales_temp,gprofit_temp,pl_temp,rd_temp,ebitda_temp,export_temp=[]

ta_temp=[]
int_temp=[]
ftemp=[]
ctemp=[]
clia_temp=[]
operev_temp=[]
sales_temp=[]
gprofit_temp=[]
pl_temp=[]
rd_temp=[]
ebitda_temp=[]
export_temp=[]
empl_temp=[]
total_asset,int_asset,fix_asset,curr_asset,clia,operev,sales,gprofit,pl_ind,rd_ind,ebitda,export,employ=[],[],[],[],[],[],[],[],[],[],[],[],[]

id1,id2,id3,id4,id5,id6,id7,id8,id9,id10,id11,id12,id13=[],[],[],[],[],[],[],[],[],[],[],[],[]
excel1,excel2,excel3,excel4,excel5,excel6,excel7,excel8,excel9,excel10,excel11,excel12,excel13=[],[],[],[],[],[],[],[],[],[],[],[],[]

for k,j in enumerate(files):
    excel=r'D:/company/%s'%j
    data=xlrd.open_workbook(excel)
    sheet=data.sheets()[0]
    for rowidx in range(sheet.nrows):
            row = sheet.row(rowidx)
            for colidx, cell in enumerate(row):
                if cell.value == "   TOTAL ASSETS":
                    #print(j,del_null(sheet.row_values(rowidx)))
                    at=del_null(sheet.row_values(rowidx))
                    at[0]=j
                    #print(asset)
                    ta_temp.append(at)
                if cell.value == "      Intangible fixed assets":
                    ia=del_null(sheet.row_values(rowidx))
                    ia[0]=j
                    int_temp.append(ia)
                    
                if cell.value == "   Fixed assets":
                    #print(j,del_null(sheet.row_values(rowidx)))
                    ft=del_null(sheet.row_values(rowidx))
                    ft[0]=j
                    ftemp.append(ft)
                if cell.value == "   Current assets":
                    ct=del_null(sheet.row_values(rowidx))
                    ct[0]=j
                    ctemp.append(ct)
                    #print(j,del_null(sheet.row_values(rowidx)))
                if cell.value == "   Current liabilities":
                    cl=del_null(sheet.row_values(rowidx))
                    cl[0]=j
                    #print(clia)
                    clia_temp.append(cl)
                    #print(j,del_null(sheet.row_values(rowidx)))
                if cell.value == " Profit & loss account":
                    orv=del_null(sheet.row_values(rowidx+1))
                    orv[0]=j
                    operev_temp.append(orv)
                    #print(j,del_null(sheet.row_values(rowidx+1)))
                if cell.value == "      Sales":
                    #print(j,del_null(sheet.row_values(rowidx)))
                    sa=del_null(sheet.row_values(rowidx))
                    sa[0]=j
                    sales_temp.append(sa)
                if cell.value == "   Gross profit":
                    gp=del_null(sheet.row_values(rowidx))
                    gp[0]=j
                    gprofit_temp.append(gp)
                    #print(j,del_null(sheet.row_values(rowidx)))
                if cell.value == "   P/L for period [=Net income]":
                    pl=del_null(sheet.row_values(rowidx))
                    pl[0]=j
                    pl_temp.append(pl)
                    #print(j,del_null(sheet.row_values(rowidx)))
                if cell.value == "    Research & Development expenses":
                    rds=del_null(sheet.row_values(rowidx))
                    rds[0]=j
                    rd_temp.append(rds)

                    #print(j,del_null(sheet.row_values(rowidx)))
                if cell.value == "    EBITDA":
                    eb=del_null(sheet.row_values(rowidx))
                    eb[0]=j
                    ebitda_temp.append(eb)
                    #print(j,del_null(sheet.row_values(rowidx)))

                if cell.value == "    Export revenue":
                    ex=del_null(sheet.row_values(rowidx))
                    ex[0]=j
                    export_temp.append(ex)
                    
                if cell.value == "   Number of employees":
                    em=del_null(sheet.row_values(rowidx))
                    em[0]=j
                    empl_temp.append(em)
                    #print(j,del_null(sheet.row_values(rowidx)))
#---------------------------第一个指标
for k,j in enumerate(ta_temp):
    for m,n in enumerate(j[1:]):
        total_asset.append(n)
        n=j[0]
        excel1.append(n)
        id1.append(m)
df1=pd.DataFrame(columns=['id1','excel1','total_asset'])
df1['id1']=id1
df1['total_asset']=total_asset
df1['excel1']=excel1
df1=df1.drop_duplicates()
print(df1)
df1.to_csv('d:/df1.csv')



#--------------------------第二个指标

for k,j in enumerate(int_temp):
    for m,n in enumerate(j[1:]):
        int_asset.append(n)
        n=j[0]
        excel2.append(n)
        id2.append(m)
df2=pd.DataFrame(columns=['id2','excel2','int_asset'])
df2['id2']=id2
df2['int_asset']=int_asset
df2['excel2']=excel2
df2=df2.drop_duplicates()
df2.to_csv('d:/df2.csv')
print(df2)
#-----------------------------第三个指标

#print(ftemp)               
for k,j in enumerate(ftemp):
    for m,n in enumerate(j[1:]):
        fix_asset.append(n)
        n=j[0]
        excel3.append(n)
        id3.append(m)
#print(id3)
#print(excel3)
df3=pd.DataFrame(columns=['id3','excel3','fix_asset'])
df3['id3']=id3
df3['fix_asset']=fix_asset
df3['excel3']=excel3
df3=df3.drop_duplicates()
df3.to_csv('d:/df3.csv')
print(df3)
#------------------------------第四个指标-------
for k,j in enumerate(ctemp):
    for m,n in enumerate(j[1:]):
        curr_asset.append(n)
        n=j[0]
        excel4.append(n)
        id4.append(m)
#print(id4)
#print(excel4)
df4=pd.DataFrame(columns=['id4','excel4','curr_asset'])
df4['id4']=id4
df4['curr_asset']=curr_asset
df4['excel4']=excel4
df4=df4.drop_duplicates()
print(df4)
df4.to_csv('d:/df4.csv')
#----------------------------第五个指标

for k,j in enumerate(clia_temp):
    for m,n in enumerate(j[1:]):
        clia.append(n)
        n=j[0]
        excel5.append(n)
        id5.append(m)
#print(clia_temp)
#print(id5)
#print(excel5)
df5=pd.DataFrame(columns=['id5','excel5','clia'])
df5['id5']=id5
df5['clia']=clia
df5['excel5']=excel5
df5=df5.drop_duplicates()
print(df5)
df5.to_csv('d:/df5.csv')

#----------------------第六个指标----
for k,j in enumerate(operev_temp):
    for m,n in enumerate(j[1:]):
        operev.append(n)
        n=j[0]
        excel6.append(n)
        id6.append(m)

df6=pd.DataFrame(columns=['id6','excel6','operev'])
df6['id6']=id6
df6['operev']=operev
df6['excel6']=excel6
df6=df6.drop_duplicates()
print(df6)
df6.to_csv('d:/df6.csv')
#--------------第七个指标
for k,j in enumerate(sales_temp):
    for m,n in enumerate(j[1:]):
        sales.append(n)
        n=j[0]
        excel7.append(n)
        id7.append(m)

df7=pd.DataFrame(columns=['id7','excel7','sales'])
df7['id7']=id7
df7['sales']=sales
df7['excel7']=excel7
df7=df7.drop_duplicates()
print(df7)
df7.to_csv('d:/df7.csv')

#----------------------第八个指标-------
for k,j in enumerate(gprofit_temp):
    for m,n in enumerate(j[1:]):
        gprofit.append(n)
        n=j[0]
        excel8.append(n)
        id8.append(m)

df8=pd.DataFrame(columns=['id8','excel8','gprofit'])
df8['id8']=id8
df8['gprofit']=gprofit
df8['excel8']=excel8
df8=df8.drop_duplicates()
print(df8)
df8.to_csv('d:/df8.csv')
#------------------------------第九个指标
for k,j in enumerate(pl_temp):
    for m,n in enumerate(j[1:]):
        pl_ind.append(n)
        n=j[0]
        excel9.append(n)
        id9.append(m)

df9=pd.DataFrame(columns=['id9','excel9','pl_ind'])
df9['id9']=id9
df9['pl_ind']=pl_ind
df9['excel9']=excel9
df9=df9.drop_duplicates()
print(df9)
df9.to_csv('d:/df9.csv')
#-----------------------------第十个指标-----

for k,j in enumerate(rd_temp):
    for m,n in enumerate(j[1:]):
        rd_ind.append(n)
        n=j[0]
        excel10.append(n)
        id10.append(m)

df10=pd.DataFrame(columns=['id10','excel10','rd_ind'])
df10['id10']=id10
df10['rd_ind']=rd_ind
df10['excel10']=excel10
df10=df10.drop_duplicates()
print(df10)
df10.to_csv('d:/df10.csv')
#-------------------第11个指标---------
for k,j in enumerate(ebitda_temp):
    for m,n in enumerate(j[1:]):
        ebitda.append(n)
        n=j[0]
        excel11.append(n)
        id11.append(m)

df11=pd.DataFrame(columns=['id11','excel11','ebitda'])
df11['id11']=id11
df11['ebitda']=ebitda
df11['excel11']=excel11
df11=df11.drop_duplicates()
print(df11)
df11.to_csv('d:/df11.csv')

#--------------------------第12个指标

for k,j in enumerate(export_temp):
    for m,n in enumerate(j[1:]):
        export.append(n)
        n=j[0]
        excel12.append(n)
        id12.append(m)

df12=pd.DataFrame(columns=['id12','excel12','export'])
df12['id12']=id12
df12['export']=export
df12['excel12']=excel12
df12=df12.drop_duplicates()
print(df12)
df12.to_csv('d:/df12.csv')

#--------------------------第13个指标

for k,j in enumerate(empl_temp):
    for m,n in enumerate(j[1:]):
        employ.append(n)
        n=j[0]
        excel13.append(n)
        id13.append(m)

df13=pd.DataFrame(columns=['id13','excel13','employ'])
df13['id13']=id13
df13['employ']=employ
df13['excel13']=excel13
df13=df13.drop_duplicates()
print(df13)
df13.to_csv('d:df13.csv')






相关文章

网友评论

      本文标题:不确定年份excel面板数据的整理

      本文链接:https://www.haomeiwen.com/subject/vdkcvktx.html