python自动化门店报表(一)

作者: 夜希辰 | 来源:发表于2019-05-19 19:03 被阅读21次

数据在文末啦

导入需要的包

import pandas as pd
import numpy as np

一、导入需要使用的数据

store_achivement = pd.read_csv("0401-0405门店业绩报表.csv",engine = 'python') 
customer_data = pd.read_csv("0401-0405客服数据分析表.csv",engine = 'python')
project_universal = pd.read_csv("0401-0405项目普及明细表.csv",engine = 'python')
project_sale = pd.read_csv("0401-0405项目销售统计明细信息.csv",engine = 'python')
surplus_project = pd.read_csv("0401-0405顾客剩余项目疗程报表5次及以上.csv",engine = 'python')

二、处理数据

1、门店业绩表store_achivement (确定行字段)

store_achivement.head()  #查看读取的结果,为什么不用excel读取要转化为csv,因为excel里面的列名没有自动补全
store_achivement.shape   #查看源数据的大小,525行,59列(业绩表里面有些字段间的关系还不是很懂)

数据清洗,删除不需要的行

store_achivement['Unnamed: 4'].unique()
store_achivement = store_achivement[store_achivement['Unnamed: 4'].isin(['门店类型','纯美','综合'])]
store_achivement = store_achivement[store_achivement['Unnamed: 2'].isin(['区域',  '事业一部', 
                '事业二部','川五区', '事业三部川区', '渝一区','事业三部渝区', '事业五部',  '事业七部'])]
store_achivement = store_achivement[store_achivement['Unnamed: 6'].isin(['门店状态','正常'])]
store_achivement.head(3)
store_achivement.shape #查看数据大小

构建行字段

store_01 = store_achivement
store_01.columns = store_01.loc[0]
store_01 = store_01.drop(store_01.index[0])
store = store_01[['店号','店名','区域','门店类型','区域经理','城市经理','业务经理']]
store.head()

2、客服数据分析表customer_data

查看数据读取情况

customer_data.head()
customer_data = customer_data[customer_data['Unnamed: 7'].isin(['顾客类型','一星会员', '二星会员', '三星会员',  '爱莎之星','爱莎璀璨之星', '臻爱时光卡', '老爱莎之星'])]
customer_data.columns = customer_data.iloc[0]
customer_data = customer_data.drop(customer_data.index[0])
customer_data.info()  #查看数据类型并修改异动顾客的数据类型

修改数据类型
customer_data['异动顾客(6个月内到店)'] = customer_data['异动顾客(6个月内到店)'].astype('int')
customer_data.head(2)

客服数据分析表中数据的处理完成,用pivot_table进行数据透视

customer_data_01 = customer_data.pivot_table(index = ['店号'],
                         values = '异动顾客(6个月内到店)',
                         aggfunc = {'异动顾客(6个月内到店)':'sum'})
customer_data_01.head()

3、0401-0405项目销售统计明细信息project_sale :购买人数people、购买次数number、购买金额amount

查看数据读取情况

project_sale.head(3)  #所有的数据格式都是字符串的格式

数据清洗,删除不需要的行
project_sale = project_sale[project_sale['Unnamed: 7'].isin(['项目编号','054','917','318', '454','928','457','458'])]
project_sale[['Unnamed: 9','Unnamed: 10', 'Unnamed: 11']].head(3)
project_sale.rename(columns = {'产品销售统计表(星级分析)':'区域','Unnamed: 1':'门店号',
                    'Unnamed: 2':'门店名','Unnamed: 7':'项目编号','Unnamed: 8':'项目名称',
                    'Unnamed: 9':'购买人数','Unnamed: 10':'购买数量','Unnamed: 11':'购买金额'},
                    inplace = True)
project_sale = project_sale.drop(project_sale.index[0])
project_sale.head(2)
project_sale['项目编号'].unique()

修改字段的内容
project_sale.loc[project_sale['项目编号'] == '054','项目编号—01'] = '054'
project_sale.loc[project_sale['项目编号'] == '917','项目编号—01'] = '917-318'
project_sale.loc[project_sale['项目编号'] == '318','项目编号—01'] = '917-318'
project_sale.loc[project_sale['项目编号'] == '454','项目编号—01'] = '454-928'
project_sale.loc[project_sale['项目编号'] == '928','项目编号—01'] = '454-928'
project_sale.loc[project_sale['项目编号'] == '457','项目编号—01'] = '457-458'
project_sale.loc[project_sale['项目编号'] == '458','项目编号—01'] = '457-458'

更改数据类型

project_sale['购买人数'] = project_sale['购买人数'].astype('int')
project_sale['购买数量'] = project_sale['购买数量'].astype('int')
project_sale['购买金额'] = project_sale['购买金额'].astype('int')

购买人数people

project_sale_people = project_sale.pivot_table(index = '门店号',
                        columns = '项目编号—01',
                        values = '购买人数',
                        aggfunc = {'购买人数':'sum'})

购买金额amount

project_sale_amount = project_sale.pivot_table(index = '门店号',
                        columns = '项目编号—01',
                        values = '购买金额',
                        aggfunc = {'购买金额':'sum'})

购买次数 number

project_sale_number = project_sale.pivot_table(index = '门店号',
                        columns = '项目编号—01',
                        values = '购买数量',
                        aggfunc = {'购买数量':'sum'})
1
2
3

4、项目普及明细表project_universal:新增人数、续购人数

查看表读取情况

清洗数据

project_universal.columns = project_universal.loc[0]
project_universal = project_universal.drop(project_universal.index[0])
project_universal.shape  #查看数据大小
project_universal.info()   #全是字符串的格式

筛选数据,更改字段内容
project_universal= project_universal[project_universal['项目编号'].isin(['054','917','318','454','928','457','458'])]
project_universal.shape
project_universal.loc[project_universal['项目编号'] == '054','项目编号—01'] = '054'
project_universal.loc[project_universal['项目编号'] == '917','项目编号—01'] = '917-318'
project_universal.loc[project_universal['项目编号'] == '318','项目编号—01'] = '917-318'
project_universal.loc[project_universal['项目编号'] == '454','项目编号—01'] = '454-928'
project_universal.loc[project_universal['项目编号'] == '928','项目编号—01'] = '454-928'
project_universal.loc[project_universal['项目编号'] == '457','项目编号—01'] = '457-458'
project_universal.loc[project_universal['项目编号'] == '458','项目编号—01'] = '457-458'
#重命名
project_universal['项目编号—01'] .unique()

爆品新增人数

project_universal_new = project_universal[project_universal['购买类型'].isin(['新增'])]
project_universal_new.shape
project_universal_new = project_universal_new.drop_duplicates(subset = ['顾客编号','店号','项目编号—01'])
project_universal_new = project_universal_new.pivot_table(index = '店号',
                                 columns  = '项目编号—01',
                                 values = '顾客编号',
                                 aggfunc = {'顾客编号':'count'})
project_universal_new = project_universal_new.fillna(0)
project_universal_new.head(3)

爆品续购人数:project_universal_add

替换列字段内容

project_universal['项目名称'] = project_universal['项目名称'].replace('脑部亚健康调理(美)','脑部亚健康调理')
project_universal['项目名称'] = project_universal['项目名称'].replace('脑部亚健康调理(足)','脑部亚健康调理')   #替换行字段中的某个内容
project_universal['项目名称'].unique()

用于循环的辅助列
project_universal['顾客编号—项目名称'] = project_universal['顾客编号']+project_universal['项目名称']
project_universal.columns
project_universal_new01 = project_universal[project_universal[ '购买类型'].isin(['新增'])]
project_universal_add01 = project_universal[project_universal[ '购买类型'].isin(['续购'])]
customernumber_new = list(pd.Series(project_universal_new01['顾客编号—项目名称']))  #新增中的顾客编号,新增中每个项目对对应的顾客编号是唯一的
customernumber_add = list(pd.Series(project_universal_add01['顾客编号—项目名称']))  #续购用户中顾客的编号

数据清洗,如果新增表中A项目的顾客编号出现在续购表A项目中,在续购表中就删除该顾客编号
def drop_duplicates(x):  #x中传入新增顾客中的顾客编号
    df = project_universal_add01
    for i in range(len(x)):
        if x[i] in customernumber_add:
            df = df[~df['顾客编号—项目名称'].isin([x[i]])]  #如果新增表中A项目的顾客编号出现在续购表A项目中,在续购表中就删除该顾客编号
    return df

把函数的结果复制给变量project_universal_add

project_universal_add = drop_duplicates(customernumber_new)

去重不完整,excel中续购人数229,python中续购人数249人

  • 会出现顾客在同一个项目,续购购买类型中,有两条记录。用duplicats(项目编号,顾客编号两个进行去重)
project_universal_add = project_universal_add.drop_duplicates(subset = ['顾客编号','项目编号—01'],keep = 'first')
project_universal_add.shape

查看去重后的数据
project_universal_add  = project_universal_add.pivot_table(index = '店号',
                                 columns =  '项目编号—01',
                                 values = '顾客编号',
                                 aggfunc = {'顾客编号':'count'})
project_universal_add = project_universal_add.fillna(0) #填充确实值

5、0401-0405顾客剩余项目疗程报表5次及以上 (求拥~字段)

这里我们只分析项目编码54,917,318,454,928,457,458
查看数据读取情况

surplus_project = surplus_project[surplus_project['项目编号'].isin([54,917,318,454,928,457,458])]
surplus_project.dtypes  #数据结构
surplus_project_number =surplus_project.pivot_table(index = '门店编号',
                            columns = '项目编号',
                            values = '顾客编号',
                            aggfunc = {'顾客编号':'count'})
surplus_project_number.head()

填充确实值
surplus_project_number = surplus_project_number.fillna(0)

将有两个项目编号的数据汇总

surplus_project_number['917-318'] = surplus_project_number[917] + surplus_project_number[318]
surplus_project_number['454-928'] = surplus_project_number[454] + surplus_project_number[928]
surplus_project_number['457-458'] = surplus_project_number[457] + surplus_project_number[458]

surplus_project_number= surplus_project_number[[54,'917-318','454-928','457-458']]#选取需要的字段
surplus_project_number.head()

三、将爆品店面数据进行汇总 store_popular_project

主要是用pd.merge函数将两个表数据进行匹配

  • 爆品店面数据汇总表:store_popular_project
  • 门店行字段表:store
  • 6个月到店异动会员:customer_data_01
  • 购买人数表:project_sale_people
  • 购买次数表:project_sale_number
  • 购买金额表:project_sale_amount
  • 项目新增人数表:project_universal_new
  • 项目续购人数表:project_universal_add
  • 项目拥有量表:surplus_project_number

1、门店行字段表:store

store_popular_project = pd.merge(store,customer_data_01,how = 'left',left_on = store['店号'],right_on = customer_data_01.index )
store_popular_project = store_popular_project.drop(columns = 'key_0')
store_popular_project.head()

3、购买人数表:project_sale_people

查看读取数据

project_sale_people.head(2)

更改字段名
project_sale_people.rename(columns = {'054':'054购买人数',
                                      '454-928':'454-928购买人数',
                                      '457-458':'457-458购买人数',
                                      '917-318':'917-318购买人数'},inplace = True)

合并表

store_popular_project = pd.merge(store_popular_project,project_sale_people,how = 'left',left_on = store_popular_project['店号'],
        right_on = project_sale_people.index)

store_popular_project = store_popular_project.drop(columns = 'key_0')
store_popular_project.head(3)

4、购买次数表:project_sale_number

查看读取数据

project_sale_number.head(2)

更改字段名

project_sale_number = project_sale_number.rename(columns = {'054':'054购买次数',
                                      '454-928':'454-928购买次数',
                                      '457-458':'457-458购买次数',
                                      '917-318':'917-318购买次数'})
project_sale_number.fillna(0).head()  #填充缺失值

数据合并
store_popular_project =  pd.merge(store_popular_project,project_sale_number,how = 'left',left_on = store_popular_project['店号'],
        right_on = project_sale_number.index)

store_popular_project.head()
store_popular_project = store_popular_project.drop(columns = 'key_0') #删除key_0字段

5、购买金额表:project_sale_amount

查看读取数据

project_sale_amount.head()

更改字段名称
project_sale_amount.rename(columns = {'054':'054购买金额',
                                     '454-928':'454-928购买金额',
                                     '457-458':'457-458购买金额',
                                     '917-318':'917-318购买金额'},inplace = True)

合并表

store_popular_project = pd.merge(store_popular_project,project_sale_amount,how = 'left',left_on = store_popular_project['店号'],
        right_on = project_sale_amount.index)

6、项目新增人数表:project_universal_new

查看导入数据


更改字段名
project_universal_new.rename(columns = {'054':'054新增',
                                       '454-928':'454-928新增',
                                       '457-458':'457-458新增',
                                       '917-318':'917-318新增'},inplace = True)

合并表

store_popular_project = pd.merge(store_popular_project,project_universal_new,how = 'left',left_on = store_popular_project['店号'],
         right_on = project_universal_new.index)

7、项目拥有量表:surplus_project_number

查看导入数据


更改字段名
surplus_project_number.rename(columns = {54:'54拥用量',
                                       '917-318':'917-318拥用量',
                                        '454-928':'454-928拥用量',
                                        '457-458':'457-458拥用量'},inplace = True)

更改数据类型

surplus_project_number.index = surplus_project_number.index.astype('str')

合并表

store_popular_project = pd.merge(store_popular_project,surplus_project_number,how = 'left',left_on = store_popular_project['店号'],
          right_on = surplus_project_number.index)

8、项目续购人数表:project_universal_add

查看读取数据


更改字段名称
project_universal_add.rename(columns = {'054':'54续购',
                                       '454-928':'454-928续购',
                                       '457-458':'457-458续购',
                                       '917-318':'917-318续购',
                                       },inplace = True)

合并数据

store_popular_project = pd.merge(store_popular_project,project_universal_add,how = 'left',
         left_on = store_popular_project['店号'],right_on =project_universal_add.index )

所有的表完成了汇总。调整格式和增加业绩占比


所有的数据都完成了汇总,现在只需要转化为excel格式就可以了
writer = pd.ExcelWriter('store_popular_project.xlsx')
store_popular_project.to_excel(writer,index = True)
writer.save()

查看文件地址

%pwd

就找到我们导出的文件啦

这就是在python里面做的excel表格。像我们每周或者每天都会统计销售数据,在python里面做好模板后就不用车哦给你敷操作了,每次只需要带入新的数据,或者更改文件名就可以了,就可以实现自动化报表

当然罗通过Python连接数据库,来实现自动化报表是最理想的,目前还在学习中

**链接: https://pan.baidu.com/s/1LOiHXPovwN6qhHin-PIDlA 提取码: vp72 **

相关文章

网友评论

    本文标题:python自动化门店报表(一)

    本文链接:https://www.haomeiwen.com/subject/ddetzqtx.html