数据在文末啦
导入需要的包
import pandas as pd
import numpy as np
一、导入需要使用的数据
store_achivement = pd.read_csv("0401-0405门店业绩报表.csv",engine = 'python')
customer_data = pd.read_csv("0401-0405客服数据分析表.csv",engine = 'python')
project_universal = pd.read_csv("0401-0405项目普及明细表.csv",engine = 'python')
project_sale = pd.read_csv("0401-0405项目销售统计明细信息.csv",engine = 'python')
surplus_project = pd.read_csv("0401-0405顾客剩余项目疗程报表5次及以上.csv",engine = 'python')
二、处理数据
1、门店业绩表store_achivement (确定行字段)
store_achivement.head() #查看读取的结果,为什么不用excel读取要转化为csv,因为excel里面的列名没有自动补全
store_achivement.shape #查看源数据的大小,525行,59列(业绩表里面有些字段间的关系还不是很懂)
数据清洗,删除不需要的行
store_achivement['Unnamed: 4'].unique()
store_achivement = store_achivement[store_achivement['Unnamed: 4'].isin(['门店类型','纯美','综合'])]
store_achivement = store_achivement[store_achivement['Unnamed: 2'].isin(['区域', '事业一部',
'事业二部','川五区', '事业三部川区', '渝一区','事业三部渝区', '事业五部', '事业七部'])]
store_achivement = store_achivement[store_achivement['Unnamed: 6'].isin(['门店状态','正常'])]
store_achivement.head(3)
store_achivement.shape #查看数据大小
构建行字段
store_01 = store_achivement
store_01.columns = store_01.loc[0]
store_01 = store_01.drop(store_01.index[0])
store = store_01[['店号','店名','区域','门店类型','区域经理','城市经理','业务经理']]
store.head()
2、客服数据分析表customer_data
查看数据读取情况
customer_data.head()
customer_data = customer_data[customer_data['Unnamed: 7'].isin(['顾客类型','一星会员', '二星会员', '三星会员', '爱莎之星','爱莎璀璨之星', '臻爱时光卡', '老爱莎之星'])]
customer_data.columns = customer_data.iloc[0]
customer_data = customer_data.drop(customer_data.index[0])
customer_data.info() #查看数据类型并修改异动顾客的数据类型
修改数据类型
customer_data['异动顾客(6个月内到店)'] = customer_data['异动顾客(6个月内到店)'].astype('int')
customer_data.head(2)
客服数据分析表中数据的处理完成,用pivot_table进行数据透视
customer_data_01 = customer_data.pivot_table(index = ['店号'],
values = '异动顾客(6个月内到店)',
aggfunc = {'异动顾客(6个月内到店)':'sum'})
customer_data_01.head()
3、0401-0405项目销售统计明细信息project_sale :购买人数people、购买次数number、购买金额amount
查看数据读取情况
project_sale.head(3) #所有的数据格式都是字符串的格式
数据清洗,删除不需要的行
project_sale = project_sale[project_sale['Unnamed: 7'].isin(['项目编号','054','917','318', '454','928','457','458'])]
project_sale[['Unnamed: 9','Unnamed: 10', 'Unnamed: 11']].head(3)
project_sale.rename(columns = {'产品销售统计表(星级分析)':'区域','Unnamed: 1':'门店号',
'Unnamed: 2':'门店名','Unnamed: 7':'项目编号','Unnamed: 8':'项目名称',
'Unnamed: 9':'购买人数','Unnamed: 10':'购买数量','Unnamed: 11':'购买金额'},
inplace = True)
project_sale = project_sale.drop(project_sale.index[0])
project_sale.head(2)
project_sale['项目编号'].unique()
修改字段的内容
project_sale.loc[project_sale['项目编号'] == '054','项目编号—01'] = '054'
project_sale.loc[project_sale['项目编号'] == '917','项目编号—01'] = '917-318'
project_sale.loc[project_sale['项目编号'] == '318','项目编号—01'] = '917-318'
project_sale.loc[project_sale['项目编号'] == '454','项目编号—01'] = '454-928'
project_sale.loc[project_sale['项目编号'] == '928','项目编号—01'] = '454-928'
project_sale.loc[project_sale['项目编号'] == '457','项目编号—01'] = '457-458'
project_sale.loc[project_sale['项目编号'] == '458','项目编号—01'] = '457-458'
更改数据类型
project_sale['购买人数'] = project_sale['购买人数'].astype('int')
project_sale['购买数量'] = project_sale['购买数量'].astype('int')
project_sale['购买金额'] = project_sale['购买金额'].astype('int')
购买人数people
project_sale_people = project_sale.pivot_table(index = '门店号',
columns = '项目编号—01',
values = '购买人数',
aggfunc = {'购买人数':'sum'})
购买金额amount
project_sale_amount = project_sale.pivot_table(index = '门店号',
columns = '项目编号—01',
values = '购买金额',
aggfunc = {'购买金额':'sum'})
购买次数 number
project_sale_number = project_sale.pivot_table(index = '门店号',
columns = '项目编号—01',
values = '购买数量',
aggfunc = {'购买数量':'sum'})
1
2
3
4、项目普及明细表project_universal:新增人数、续购人数
查看表读取情况
清洗数据
project_universal.columns = project_universal.loc[0]
project_universal = project_universal.drop(project_universal.index[0])
project_universal.shape #查看数据大小
project_universal.info() #全是字符串的格式
筛选数据,更改字段内容
project_universal= project_universal[project_universal['项目编号'].isin(['054','917','318','454','928','457','458'])]
project_universal.shape
project_universal.loc[project_universal['项目编号'] == '054','项目编号—01'] = '054'
project_universal.loc[project_universal['项目编号'] == '917','项目编号—01'] = '917-318'
project_universal.loc[project_universal['项目编号'] == '318','项目编号—01'] = '917-318'
project_universal.loc[project_universal['项目编号'] == '454','项目编号—01'] = '454-928'
project_universal.loc[project_universal['项目编号'] == '928','项目编号—01'] = '454-928'
project_universal.loc[project_universal['项目编号'] == '457','项目编号—01'] = '457-458'
project_universal.loc[project_universal['项目编号'] == '458','项目编号—01'] = '457-458'
#重命名
project_universal['项目编号—01'] .unique()
爆品新增人数
project_universal_new = project_universal[project_universal['购买类型'].isin(['新增'])]
project_universal_new.shape
project_universal_new = project_universal_new.drop_duplicates(subset = ['顾客编号','店号','项目编号—01'])
project_universal_new = project_universal_new.pivot_table(index = '店号',
columns = '项目编号—01',
values = '顾客编号',
aggfunc = {'顾客编号':'count'})
project_universal_new = project_universal_new.fillna(0)
project_universal_new.head(3)
爆品续购人数:project_universal_add
替换列字段内容
project_universal['项目名称'] = project_universal['项目名称'].replace('脑部亚健康调理(美)','脑部亚健康调理')
project_universal['项目名称'] = project_universal['项目名称'].replace('脑部亚健康调理(足)','脑部亚健康调理') #替换行字段中的某个内容
project_universal['项目名称'].unique()
用于循环的辅助列
project_universal['顾客编号—项目名称'] = project_universal['顾客编号']+project_universal['项目名称']
project_universal.columns
project_universal_new01 = project_universal[project_universal[ '购买类型'].isin(['新增'])]
project_universal_add01 = project_universal[project_universal[ '购买类型'].isin(['续购'])]
customernumber_new = list(pd.Series(project_universal_new01['顾客编号—项目名称'])) #新增中的顾客编号,新增中每个项目对对应的顾客编号是唯一的
customernumber_add = list(pd.Series(project_universal_add01['顾客编号—项目名称'])) #续购用户中顾客的编号
数据清洗,如果新增表中A项目的顾客编号出现在续购表A项目中,在续购表中就删除该顾客编号
def drop_duplicates(x): #x中传入新增顾客中的顾客编号
df = project_universal_add01
for i in range(len(x)):
if x[i] in customernumber_add:
df = df[~df['顾客编号—项目名称'].isin([x[i]])] #如果新增表中A项目的顾客编号出现在续购表A项目中,在续购表中就删除该顾客编号
return df
把函数的结果复制给变量project_universal_add
project_universal_add = drop_duplicates(customernumber_new)
去重不完整,excel中续购人数229,python中续购人数249人
- 会出现顾客在同一个项目,续购购买类型中,有两条记录。用duplicats(项目编号,顾客编号两个进行去重)
project_universal_add = project_universal_add.drop_duplicates(subset = ['顾客编号','项目编号—01'],keep = 'first')
project_universal_add.shape
查看去重后的数据
project_universal_add = project_universal_add.pivot_table(index = '店号',
columns = '项目编号—01',
values = '顾客编号',
aggfunc = {'顾客编号':'count'})
project_universal_add = project_universal_add.fillna(0) #填充确实值
5、0401-0405顾客剩余项目疗程报表5次及以上 (求拥~字段)
这里我们只分析项目编码54,917,318,454,928,457,458
查看数据读取情况
surplus_project = surplus_project[surplus_project['项目编号'].isin([54,917,318,454,928,457,458])]
surplus_project.dtypes #数据结构
surplus_project_number =surplus_project.pivot_table(index = '门店编号',
columns = '项目编号',
values = '顾客编号',
aggfunc = {'顾客编号':'count'})
surplus_project_number.head()
填充确实值
surplus_project_number = surplus_project_number.fillna(0)
将有两个项目编号的数据汇总
surplus_project_number['917-318'] = surplus_project_number[917] + surplus_project_number[318]
surplus_project_number['454-928'] = surplus_project_number[454] + surplus_project_number[928]
surplus_project_number['457-458'] = surplus_project_number[457] + surplus_project_number[458]
surplus_project_number= surplus_project_number[[54,'917-318','454-928','457-458']]#选取需要的字段
surplus_project_number.head()
三、将爆品店面数据进行汇总 store_popular_project
主要是用pd.merge函数将两个表数据进行匹配
- 爆品店面数据汇总表:store_popular_project
- 门店行字段表:store
- 6个月到店异动会员:customer_data_01
- 购买人数表:project_sale_people
- 购买次数表:project_sale_number
- 购买金额表:project_sale_amount
- 项目新增人数表:project_universal_new
- 项目续购人数表:project_universal_add
- 项目拥有量表:surplus_project_number
1、门店行字段表:store
store_popular_project = pd.merge(store,customer_data_01,how = 'left',left_on = store['店号'],right_on = customer_data_01.index )
store_popular_project = store_popular_project.drop(columns = 'key_0')
store_popular_project.head()
3、购买人数表:project_sale_people
查看读取数据
project_sale_people.head(2)
更改字段名
project_sale_people.rename(columns = {'054':'054购买人数',
'454-928':'454-928购买人数',
'457-458':'457-458购买人数',
'917-318':'917-318购买人数'},inplace = True)
合并表
store_popular_project = pd.merge(store_popular_project,project_sale_people,how = 'left',left_on = store_popular_project['店号'],
right_on = project_sale_people.index)
store_popular_project = store_popular_project.drop(columns = 'key_0')
store_popular_project.head(3)
4、购买次数表:project_sale_number
查看读取数据
project_sale_number.head(2)
更改字段名
project_sale_number = project_sale_number.rename(columns = {'054':'054购买次数',
'454-928':'454-928购买次数',
'457-458':'457-458购买次数',
'917-318':'917-318购买次数'})
project_sale_number.fillna(0).head() #填充缺失值
数据合并
store_popular_project = pd.merge(store_popular_project,project_sale_number,how = 'left',left_on = store_popular_project['店号'],
right_on = project_sale_number.index)
store_popular_project.head()
store_popular_project = store_popular_project.drop(columns = 'key_0') #删除key_0字段
5、购买金额表:project_sale_amount
查看读取数据
project_sale_amount.head()
更改字段名称
project_sale_amount.rename(columns = {'054':'054购买金额',
'454-928':'454-928购买金额',
'457-458':'457-458购买金额',
'917-318':'917-318购买金额'},inplace = True)
合并表
store_popular_project = pd.merge(store_popular_project,project_sale_amount,how = 'left',left_on = store_popular_project['店号'],
right_on = project_sale_amount.index)
6、项目新增人数表:project_universal_new
查看导入数据
更改字段名
project_universal_new.rename(columns = {'054':'054新增',
'454-928':'454-928新增',
'457-458':'457-458新增',
'917-318':'917-318新增'},inplace = True)
合并表
store_popular_project = pd.merge(store_popular_project,project_universal_new,how = 'left',left_on = store_popular_project['店号'],
right_on = project_universal_new.index)
7、项目拥有量表:surplus_project_number
查看导入数据
更改字段名
surplus_project_number.rename(columns = {54:'54拥用量',
'917-318':'917-318拥用量',
'454-928':'454-928拥用量',
'457-458':'457-458拥用量'},inplace = True)
更改数据类型
surplus_project_number.index = surplus_project_number.index.astype('str')
合并表
store_popular_project = pd.merge(store_popular_project,surplus_project_number,how = 'left',left_on = store_popular_project['店号'],
right_on = surplus_project_number.index)
8、项目续购人数表:project_universal_add
查看读取数据
更改字段名称
project_universal_add.rename(columns = {'054':'54续购',
'454-928':'454-928续购',
'457-458':'457-458续购',
'917-318':'917-318续购',
},inplace = True)
合并数据
store_popular_project = pd.merge(store_popular_project,project_universal_add,how = 'left',
left_on = store_popular_project['店号'],right_on =project_universal_add.index )
所有的表完成了汇总。调整格式和增加业绩占比
所有的数据都完成了汇总,现在只需要转化为excel格式就可以了
writer = pd.ExcelWriter('store_popular_project.xlsx')
store_popular_project.to_excel(writer,index = True)
writer.save()
查看文件地址
%pwd
就找到我们导出的文件啦
这就是在python里面做的excel表格。像我们每周或者每天都会统计销售数据,在python里面做好模板后就不用车哦给你敷操作了,每次只需要带入新的数据,或者更改文件名就可以了,就可以实现自动化报表
当然罗通过Python连接数据库,来实现自动化报表是最理想的,目前还在学习中
**链接: https://pan.baidu.com/s/1LOiHXPovwN6qhHin-PIDlA 提取码: vp72 **
网友评论