微信
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 16 17:59:13 2020
@author: MW
"""
import os
import sys
import pandas as pd
import chardet
#创建空DataFrame以备存储汇总后的数据
wechat_zi = pd.DataFrame()
wechat_te = pd.DataFrame()
wechat_zi_sum = pd.DataFrame()
wechat_te_sum = pd.DataFrame()
#获取文件地址+文件名
address = []
for root, dirs, files in os.walk(".", topdown=False):
for name in files:
address.append(os.path.join(root, name))
#记录符合条件文档总数
cwd_file=[s for s in address if 'csv' in s]
cwd_file=[s for s in cwd_file if 'All' in s]
cwd_count=len(cwd_file)
index=1
#遍历文件
for cwd in address:
if 'csv' in cwd:
if 'All' in cwd:
#获取文件编码
with open(cwd,'rb') as f:
for line in f.readlines():
conding = chardet.detect(line)['encoding']
print(index,'/',cwd_count,cwd,conding)
break
index=index+1
#将明细数据放入wechat_for DataFrame里
try:
wechat_for = pd.read_table(cwd,sep = ',', encoding = conding)
#替换列名中的空格
col_names = wechat_for.columns.tolist() # 获取列名字
# 把列名中的空格去掉
for index1,value in enumerate(col_names):
col_names[index1]= value.replace(" ","")
wechat_for.columns=col_names
#增加一列文件地址
wechat_for['file_dir']=cwd
except:
print('--pandas读取出错:',cwd,' --')
#将汇总数据放入wechat_for_sum DataFrame里
#如果倒数第二行是汇总数据
if wechat_for.iloc[len(wechat_for)-2,0] =='总交易单数' :
#则从文档中获取汇总塑胶
wechat_for_sum = pd.read_table(cwd,skiprows=len(wechat_for)-1,sep = ',', encoding = conding)
#增加一列文件地址
wechat_for_sum['file_dir']=cwd
#如果表头包含子商户号,则列入子商户表
if wechat_for.columns.tolist()[3] == '子商户号':
if wechat_for.iloc[len(wechat_for)-2,0] =='总交易单数' :
wechat_zi = wechat_zi.append(wechat_for[:-2], ignore_index=True,sort=True)
wechat_zi_sum = wechat_zi_sum.append(wechat_for_sum, ignore_index=True,sort=True)
else:
wechat_zi = wechat_zi.append(wechat_for, ignore_index=True,sort=True)
print('子商户号,无总交易记录数',cwd,conding)
#如果表头包含“特约商户号”,则列入特约商户号表
elif wechat_for.columns.tolist()[3] == '特约商户号':
if wechat_for.iloc[len(wechat_for)-2,0] =='总交易单数' :
wechat_te = wechat_te.append(wechat_for[:-2], ignore_index=True,sort=True)
wechat_te_sum = wechat_te_sum.append(wechat_for_sum, ignore_index=True,sort=True)
else:
wechat_te = wechat_te.append(wechat_for, ignore_index=True,sort=True)
print('特约商户号:无总交易记录数',cwd,conding)
else:
print('--字段名称读取出错:',cwd,conding,' --')
wechat_zi.to_csv('子商户号.csv',sep='|',index=False,quoting=1,encoding = 'GB18030')
wechat_te.to_csv('特约商户号.csv',sep='|',index=False,quoting=1,encoding = 'GB18030')
wechat_zi_sum.to_csv('子商户号sum.csv',sep='|',index=False,quoting=1,encoding = 'GB18030')
wechat_te_sum.to_csv('特约商户号sum.csv',sep='|',index=False,quoting=1,encoding = 'GB18030')
支付宝
对账单分四个,业务账单汇总和明细,账务账单汇总和明细,我们对账主要用明细进行去对账务账单明细和账务账单汇总
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 9 20:59:55 2020
@author: mw
"""
import os
dirpath = './'
import pandas as pd
zhifubao = pd.DataFrame()
for root,dirs,files in os.walk(dirpath):
for file in files:
# 使用join函数将文件名称和文件所在根目录连接起来
file_dir = os.path.join(root, file)
file_dir = file_dir.replace("\\","/")
if file_dir[-3:]=='csv':
print(file_dir,'*********************************************')
with open(file_dir,'r') as f:
a = f.readline().replace('\n', '').replace('\r', '')
b = f.readline().replace('\n', '').replace('\r', '').replace('#账号:[','').replace(']','')
c = f.readline().replace('\n', '').replace('\r', '')
print(a)
print(b)
print(c)
if '支付宝账务明细查询' in a:
print('*****************增加****************************')
dat = pd.read_csv(file_dir,dtype=object,encoding='GB18030',sep=',',engine='python',skiprows=range(0,4))
dat['file_dir']=file_dir
dat['账号']=b
dat['起止日期']=c
dat.replace('\t', '',inplace = True,regex=True)
if dat.iloc[len(dat)-4,0]=='#-----------------------------------------账务明细列表结束------------------------------------':
zhifubao = zhifubao.append(dat[:-4], ignore_index=True,sort=True)
print(file_dir,'账务明细列表结束')
else:
zhifubao = zhifubao.append(dat)
else:
continue
zhifubao.to_csv('zhifubao.csv',sep='|',index=False,quoting=1,encoding = 'GB18030')
网友评论