通过搜索得到所有的年份,为了使得年份与财务指标相对应,需要在两者之间建立链接。通过对指标的排序进行编号,通过1,2,3,4...在年份与指标之间建立链接,形成面板数据。里边涉及pandas的操作。
import xlrd
import os
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
#构建236个公司的字典
companys=[]
for k in range(236):
new_company={'excel_name':'','year_2020':'','year_2019':'','year_2018':'','year_2017':'',
'year_2016':'','year_2015':'','year_2014':'','year_2013':'','year_2012':'','year_2011':'','year_2010':''
,'year_2009':'','year_2008':'','year_2007':'','year_2006':'','year_2005':'','year_2004':'','year_2003':'','year_2002':''
,'year_2001':'','year_2000':''}
companys.append(new_company)
def del_null(ls):
while '' in ls:
ls.remove('')
return ls
files=os.listdir('D:/company')
#---------------------------------------------------年份的获取-------------
for k,j in enumerate(files):
excel=r'D:/company/%s'%j
companys[k]['excel_name']=j
data=xlrd.open_workbook(excel)
sheet=data.sheets()[0]
for rowidx in range(sheet.nrows):
row = sheet.row(rowidx)
for colidx, cell in enumerate(row):
for t in range(2000,2021):
if cell.value == "31/03/%s"%t:
companys[k]['year_%s'%t]='%s'%t
#剔除不存在记录的年份
for k in companys:
for j in range(2000,2021):
if k['year_%s'%j]=='':
k.pop('year_%s'%j)
df=pd.DataFrame(companys)
#print(companys)
dfs=df[['excel_name','year_2000']].dropna(axis=0, how='any').drop(columns=['year_2000'])
dfs['year']=2000
for k in range(2001,2021):
dfnew=df[['excel_name','year_%s'%k]].dropna(axis=0, how='any').drop(columns=['year_%s'%k])
dfnew['year']=k
dfs=dfs.append(dfnew)
print(dfnew)
dfs.to_csv('d:/企业.csv')
#print(dfs)
#--------------------------------------指标搜寻-------
#ta_temp,int_temp,ftemp,ctemp,clia_temp,operev_temp,sales_temp,gprofit_temp,pl_temp,rd_temp,ebitda_temp,export_temp=[]
ta_temp=[]
int_temp=[]
ftemp=[]
ctemp=[]
clia_temp=[]
operev_temp=[]
sales_temp=[]
gprofit_temp=[]
pl_temp=[]
rd_temp=[]
ebitda_temp=[]
export_temp=[]
empl_temp=[]
total_asset,int_asset,fix_asset,curr_asset,clia,operev,sales,gprofit,pl_ind,rd_ind,ebitda,export,employ=[],[],[],[],[],[],[],[],[],[],[],[],[]
id1,id2,id3,id4,id5,id6,id7,id8,id9,id10,id11,id12,id13=[],[],[],[],[],[],[],[],[],[],[],[],[]
excel1,excel2,excel3,excel4,excel5,excel6,excel7,excel8,excel9,excel10,excel11,excel12,excel13=[],[],[],[],[],[],[],[],[],[],[],[],[]
for k,j in enumerate(files):
excel=r'D:/company/%s'%j
data=xlrd.open_workbook(excel)
sheet=data.sheets()[0]
for rowidx in range(sheet.nrows):
row = sheet.row(rowidx)
for colidx, cell in enumerate(row):
if cell.value == " TOTAL ASSETS":
#print(j,del_null(sheet.row_values(rowidx)))
at=del_null(sheet.row_values(rowidx))
at[0]=j
#print(asset)
ta_temp.append(at)
if cell.value == " Intangible fixed assets":
ia=del_null(sheet.row_values(rowidx))
ia[0]=j
int_temp.append(ia)
if cell.value == " Fixed assets":
#print(j,del_null(sheet.row_values(rowidx)))
ft=del_null(sheet.row_values(rowidx))
ft[0]=j
ftemp.append(ft)
if cell.value == " Current assets":
ct=del_null(sheet.row_values(rowidx))
ct[0]=j
ctemp.append(ct)
#print(j,del_null(sheet.row_values(rowidx)))
if cell.value == " Current liabilities":
cl=del_null(sheet.row_values(rowidx))
cl[0]=j
#print(clia)
clia_temp.append(cl)
#print(j,del_null(sheet.row_values(rowidx)))
if cell.value == " Profit & loss account":
orv=del_null(sheet.row_values(rowidx+1))
orv[0]=j
operev_temp.append(orv)
#print(j,del_null(sheet.row_values(rowidx+1)))
if cell.value == " Sales":
#print(j,del_null(sheet.row_values(rowidx)))
sa=del_null(sheet.row_values(rowidx))
sa[0]=j
sales_temp.append(sa)
if cell.value == " Gross profit":
gp=del_null(sheet.row_values(rowidx))
gp[0]=j
gprofit_temp.append(gp)
#print(j,del_null(sheet.row_values(rowidx)))
if cell.value == " P/L for period [=Net income]":
pl=del_null(sheet.row_values(rowidx))
pl[0]=j
pl_temp.append(pl)
#print(j,del_null(sheet.row_values(rowidx)))
if cell.value == " Research & Development expenses":
rds=del_null(sheet.row_values(rowidx))
rds[0]=j
rd_temp.append(rds)
#print(j,del_null(sheet.row_values(rowidx)))
if cell.value == " EBITDA":
eb=del_null(sheet.row_values(rowidx))
eb[0]=j
ebitda_temp.append(eb)
#print(j,del_null(sheet.row_values(rowidx)))
if cell.value == " Export revenue":
ex=del_null(sheet.row_values(rowidx))
ex[0]=j
export_temp.append(ex)
if cell.value == " Number of employees":
em=del_null(sheet.row_values(rowidx))
em[0]=j
empl_temp.append(em)
#print(j,del_null(sheet.row_values(rowidx)))
#---------------------------第一个指标
for k,j in enumerate(ta_temp):
for m,n in enumerate(j[1:]):
total_asset.append(n)
n=j[0]
excel1.append(n)
id1.append(m)
df1=pd.DataFrame(columns=['id1','excel1','total_asset'])
df1['id1']=id1
df1['total_asset']=total_asset
df1['excel1']=excel1
df1=df1.drop_duplicates()
print(df1)
df1.to_csv('d:/df1.csv')
#--------------------------第二个指标
for k,j in enumerate(int_temp):
for m,n in enumerate(j[1:]):
int_asset.append(n)
n=j[0]
excel2.append(n)
id2.append(m)
df2=pd.DataFrame(columns=['id2','excel2','int_asset'])
df2['id2']=id2
df2['int_asset']=int_asset
df2['excel2']=excel2
df2=df2.drop_duplicates()
df2.to_csv('d:/df2.csv')
print(df2)
#-----------------------------第三个指标
#print(ftemp)
for k,j in enumerate(ftemp):
for m,n in enumerate(j[1:]):
fix_asset.append(n)
n=j[0]
excel3.append(n)
id3.append(m)
#print(id3)
#print(excel3)
df3=pd.DataFrame(columns=['id3','excel3','fix_asset'])
df3['id3']=id3
df3['fix_asset']=fix_asset
df3['excel3']=excel3
df3=df3.drop_duplicates()
df3.to_csv('d:/df3.csv')
print(df3)
#------------------------------第四个指标-------
for k,j in enumerate(ctemp):
for m,n in enumerate(j[1:]):
curr_asset.append(n)
n=j[0]
excel4.append(n)
id4.append(m)
#print(id4)
#print(excel4)
df4=pd.DataFrame(columns=['id4','excel4','curr_asset'])
df4['id4']=id4
df4['curr_asset']=curr_asset
df4['excel4']=excel4
df4=df4.drop_duplicates()
print(df4)
df4.to_csv('d:/df4.csv')
#----------------------------第五个指标
for k,j in enumerate(clia_temp):
for m,n in enumerate(j[1:]):
clia.append(n)
n=j[0]
excel5.append(n)
id5.append(m)
#print(clia_temp)
#print(id5)
#print(excel5)
df5=pd.DataFrame(columns=['id5','excel5','clia'])
df5['id5']=id5
df5['clia']=clia
df5['excel5']=excel5
df5=df5.drop_duplicates()
print(df5)
df5.to_csv('d:/df5.csv')
#----------------------第六个指标----
for k,j in enumerate(operev_temp):
for m,n in enumerate(j[1:]):
operev.append(n)
n=j[0]
excel6.append(n)
id6.append(m)
df6=pd.DataFrame(columns=['id6','excel6','operev'])
df6['id6']=id6
df6['operev']=operev
df6['excel6']=excel6
df6=df6.drop_duplicates()
print(df6)
df6.to_csv('d:/df6.csv')
#--------------第七个指标
for k,j in enumerate(sales_temp):
for m,n in enumerate(j[1:]):
sales.append(n)
n=j[0]
excel7.append(n)
id7.append(m)
df7=pd.DataFrame(columns=['id7','excel7','sales'])
df7['id7']=id7
df7['sales']=sales
df7['excel7']=excel7
df7=df7.drop_duplicates()
print(df7)
df7.to_csv('d:/df7.csv')
#----------------------第八个指标-------
for k,j in enumerate(gprofit_temp):
for m,n in enumerate(j[1:]):
gprofit.append(n)
n=j[0]
excel8.append(n)
id8.append(m)
df8=pd.DataFrame(columns=['id8','excel8','gprofit'])
df8['id8']=id8
df8['gprofit']=gprofit
df8['excel8']=excel8
df8=df8.drop_duplicates()
print(df8)
df8.to_csv('d:/df8.csv')
#------------------------------第九个指标
for k,j in enumerate(pl_temp):
for m,n in enumerate(j[1:]):
pl_ind.append(n)
n=j[0]
excel9.append(n)
id9.append(m)
df9=pd.DataFrame(columns=['id9','excel9','pl_ind'])
df9['id9']=id9
df9['pl_ind']=pl_ind
df9['excel9']=excel9
df9=df9.drop_duplicates()
print(df9)
df9.to_csv('d:/df9.csv')
#-----------------------------第十个指标-----
for k,j in enumerate(rd_temp):
for m,n in enumerate(j[1:]):
rd_ind.append(n)
n=j[0]
excel10.append(n)
id10.append(m)
df10=pd.DataFrame(columns=['id10','excel10','rd_ind'])
df10['id10']=id10
df10['rd_ind']=rd_ind
df10['excel10']=excel10
df10=df10.drop_duplicates()
print(df10)
df10.to_csv('d:/df10.csv')
#-------------------第11个指标---------
for k,j in enumerate(ebitda_temp):
for m,n in enumerate(j[1:]):
ebitda.append(n)
n=j[0]
excel11.append(n)
id11.append(m)
df11=pd.DataFrame(columns=['id11','excel11','ebitda'])
df11['id11']=id11
df11['ebitda']=ebitda
df11['excel11']=excel11
df11=df11.drop_duplicates()
print(df11)
df11.to_csv('d:/df11.csv')
#--------------------------第12个指标
for k,j in enumerate(export_temp):
for m,n in enumerate(j[1:]):
export.append(n)
n=j[0]
excel12.append(n)
id12.append(m)
df12=pd.DataFrame(columns=['id12','excel12','export'])
df12['id12']=id12
df12['export']=export
df12['excel12']=excel12
df12=df12.drop_duplicates()
print(df12)
df12.to_csv('d:/df12.csv')
#--------------------------第13个指标
for k,j in enumerate(empl_temp):
for m,n in enumerate(j[1:]):
employ.append(n)
n=j[0]
excel13.append(n)
id13.append(m)
df13=pd.DataFrame(columns=['id13','excel13','employ'])
df13['id13']=id13
df13['employ']=employ
df13['excel13']=excel13
df13=df13.drop_duplicates()
print(df13)
df13.to_csv('d:df13.csv')
网友评论