python自动化门店报表(一)

作者: 夜希辰 | 来源:发表于2019-05-19 19:03 被阅读21次

    数据在文末啦

    导入需要的包

    import pandas as pd
    import numpy as np
    

    一、导入需要使用的数据

    store_achivement = pd.read_csv("0401-0405门店业绩报表.csv",engine = 'python') 
    customer_data = pd.read_csv("0401-0405客服数据分析表.csv",engine = 'python')
    project_universal = pd.read_csv("0401-0405项目普及明细表.csv",engine = 'python')
    project_sale = pd.read_csv("0401-0405项目销售统计明细信息.csv",engine = 'python')
    surplus_project = pd.read_csv("0401-0405顾客剩余项目疗程报表5次及以上.csv",engine = 'python')
    

    二、处理数据

    1、门店业绩表store_achivement (确定行字段)

    store_achivement.head()  #查看读取的结果,为什么不用excel读取要转化为csv,因为excel里面的列名没有自动补全
    
    store_achivement.shape   #查看源数据的大小,525行,59列(业绩表里面有些字段间的关系还不是很懂)
    

    数据清洗,删除不需要的行

    store_achivement['Unnamed: 4'].unique()
    store_achivement = store_achivement[store_achivement['Unnamed: 4'].isin(['门店类型','纯美','综合'])]
    store_achivement = store_achivement[store_achivement['Unnamed: 2'].isin(['区域',  '事业一部', 
                    '事业二部','川五区', '事业三部川区', '渝一区','事业三部渝区', '事业五部',  '事业七部'])]
    store_achivement = store_achivement[store_achivement['Unnamed: 6'].isin(['门店状态','正常'])]
    store_achivement.head(3)
    store_achivement.shape #查看数据大小
    

    构建行字段

    store_01 = store_achivement
    store_01.columns = store_01.loc[0]
    store_01 = store_01.drop(store_01.index[0])
    store = store_01[['店号','店名','区域','门店类型','区域经理','城市经理','业务经理']]
    store.head()
    

    2、客服数据分析表customer_data

    查看数据读取情况

    customer_data.head()
    
    customer_data = customer_data[customer_data['Unnamed: 7'].isin(['顾客类型','一星会员', '二星会员', '三星会员',  '爱莎之星','爱莎璀璨之星', '臻爱时光卡', '老爱莎之星'])]
    customer_data.columns = customer_data.iloc[0]
    customer_data = customer_data.drop(customer_data.index[0])
    customer_data.info()  #查看数据类型并修改异动顾客的数据类型
    

    修改数据类型
    customer_data['异动顾客(6个月内到店)'] = customer_data['异动顾客(6个月内到店)'].astype('int')
    customer_data.head(2)
    

    客服数据分析表中数据的处理完成,用pivot_table进行数据透视

    customer_data_01 = customer_data.pivot_table(index = ['店号'],
                             values = '异动顾客(6个月内到店)',
                             aggfunc = {'异动顾客(6个月内到店)':'sum'})
    customer_data_01.head()
    

    3、0401-0405项目销售统计明细信息project_sale :购买人数people、购买次数number、购买金额amount

    查看数据读取情况

    project_sale.head(3)  #所有的数据格式都是字符串的格式
    

    数据清洗,删除不需要的行
    project_sale = project_sale[project_sale['Unnamed: 7'].isin(['项目编号','054','917','318', '454','928','457','458'])]
    project_sale[['Unnamed: 9','Unnamed: 10', 'Unnamed: 11']].head(3)
    
    project_sale.rename(columns = {'产品销售统计表(星级分析)':'区域','Unnamed: 1':'门店号',
                        'Unnamed: 2':'门店名','Unnamed: 7':'项目编号','Unnamed: 8':'项目名称',
                        'Unnamed: 9':'购买人数','Unnamed: 10':'购买数量','Unnamed: 11':'购买金额'},
                        inplace = True)
    project_sale = project_sale.drop(project_sale.index[0])
    project_sale.head(2)
    
    project_sale['项目编号'].unique()
    

    修改字段的内容
    project_sale.loc[project_sale['项目编号'] == '054','项目编号—01'] = '054'
    project_sale.loc[project_sale['项目编号'] == '917','项目编号—01'] = '917-318'
    project_sale.loc[project_sale['项目编号'] == '318','项目编号—01'] = '917-318'
    project_sale.loc[project_sale['项目编号'] == '454','项目编号—01'] = '454-928'
    project_sale.loc[project_sale['项目编号'] == '928','项目编号—01'] = '454-928'
    project_sale.loc[project_sale['项目编号'] == '457','项目编号—01'] = '457-458'
    project_sale.loc[project_sale['项目编号'] == '458','项目编号—01'] = '457-458'
    

    更改数据类型

    project_sale['购买人数'] = project_sale['购买人数'].astype('int')
    project_sale['购买数量'] = project_sale['购买数量'].astype('int')
    project_sale['购买金额'] = project_sale['购买金额'].astype('int')
    

    购买人数people

    project_sale_people = project_sale.pivot_table(index = '门店号',
                            columns = '项目编号—01',
                            values = '购买人数',
                            aggfunc = {'购买人数':'sum'})
    

    购买金额amount

    project_sale_amount = project_sale.pivot_table(index = '门店号',
                            columns = '项目编号—01',
                            values = '购买金额',
                            aggfunc = {'购买金额':'sum'})
    

    购买次数 number

    project_sale_number = project_sale.pivot_table(index = '门店号',
                            columns = '项目编号—01',
                            values = '购买数量',
                            aggfunc = {'购买数量':'sum'})
    
    1
    2
    3

    4、项目普及明细表project_universal:新增人数、续购人数

    查看表读取情况

    清洗数据

    project_universal.columns = project_universal.loc[0]
    project_universal = project_universal.drop(project_universal.index[0])
    project_universal.shape  #查看数据大小
    project_universal.info()   #全是字符串的格式
    

    筛选数据,更改字段内容
    project_universal= project_universal[project_universal['项目编号'].isin(['054','917','318','454','928','457','458'])]
    project_universal.shape
    project_universal.loc[project_universal['项目编号'] == '054','项目编号—01'] = '054'
    project_universal.loc[project_universal['项目编号'] == '917','项目编号—01'] = '917-318'
    project_universal.loc[project_universal['项目编号'] == '318','项目编号—01'] = '917-318'
    project_universal.loc[project_universal['项目编号'] == '454','项目编号—01'] = '454-928'
    project_universal.loc[project_universal['项目编号'] == '928','项目编号—01'] = '454-928'
    project_universal.loc[project_universal['项目编号'] == '457','项目编号—01'] = '457-458'
    project_universal.loc[project_universal['项目编号'] == '458','项目编号—01'] = '457-458'
    #重命名
    
    project_universal['项目编号—01'] .unique()
    

    爆品新增人数

    project_universal_new = project_universal[project_universal['购买类型'].isin(['新增'])]
    project_universal_new.shape
    
    project_universal_new = project_universal_new.drop_duplicates(subset = ['顾客编号','店号','项目编号—01'])
    project_universal_new = project_universal_new.pivot_table(index = '店号',
                                     columns  = '项目编号—01',
                                     values = '顾客编号',
                                     aggfunc = {'顾客编号':'count'})
    project_universal_new = project_universal_new.fillna(0)
    project_universal_new.head(3)
    

    爆品续购人数:project_universal_add

    替换列字段内容

    project_universal['项目名称'] = project_universal['项目名称'].replace('脑部亚健康调理(美)','脑部亚健康调理')
    project_universal['项目名称'] = project_universal['项目名称'].replace('脑部亚健康调理(足)','脑部亚健康调理')   #替换行字段中的某个内容
    project_universal['项目名称'].unique()
    

    用于循环的辅助列
    project_universal['顾客编号—项目名称'] = project_universal['顾客编号']+project_universal['项目名称']
    project_universal.columns
    
    project_universal_new01 = project_universal[project_universal[ '购买类型'].isin(['新增'])]
    project_universal_add01 = project_universal[project_universal[ '购买类型'].isin(['续购'])]
    customernumber_new = list(pd.Series(project_universal_new01['顾客编号—项目名称']))  #新增中的顾客编号,新增中每个项目对对应的顾客编号是唯一的
    customernumber_add = list(pd.Series(project_universal_add01['顾客编号—项目名称']))  #续购用户中顾客的编号
    

    数据清洗,如果新增表中A项目的顾客编号出现在续购表A项目中,在续购表中就删除该顾客编号
    def drop_duplicates(x):  #x中传入新增顾客中的顾客编号
        df = project_universal_add01
        for i in range(len(x)):
            if x[i] in customernumber_add:
                df = df[~df['顾客编号—项目名称'].isin([x[i]])]  #如果新增表中A项目的顾客编号出现在续购表A项目中,在续购表中就删除该顾客编号
        return df
    

    把函数的结果复制给变量project_universal_add

    project_universal_add = drop_duplicates(customernumber_new)
    

    去重不完整,excel中续购人数229,python中续购人数249人

    • 会出现顾客在同一个项目,续购购买类型中,有两条记录。用duplicats(项目编号,顾客编号两个进行去重)
    project_universal_add = project_universal_add.drop_duplicates(subset = ['顾客编号','项目编号—01'],keep = 'first')
    project_universal_add.shape
    

    查看去重后的数据
    project_universal_add  = project_universal_add.pivot_table(index = '店号',
                                     columns =  '项目编号—01',
                                     values = '顾客编号',
                                     aggfunc = {'顾客编号':'count'})
    project_universal_add = project_universal_add.fillna(0) #填充确实值
    

    5、0401-0405顾客剩余项目疗程报表5次及以上 (求拥~字段)

    这里我们只分析项目编码54,917,318,454,928,457,458
    查看数据读取情况

    surplus_project = surplus_project[surplus_project['项目编号'].isin([54,917,318,454,928,457,458])]
    surplus_project.dtypes  #数据结构
    
    surplus_project_number =surplus_project.pivot_table(index = '门店编号',
                                columns = '项目编号',
                                values = '顾客编号',
                                aggfunc = {'顾客编号':'count'})
    
    surplus_project_number.head()
    

    填充确实值
    surplus_project_number = surplus_project_number.fillna(0)
    

    将有两个项目编号的数据汇总

    surplus_project_number['917-318'] = surplus_project_number[917] + surplus_project_number[318]
    surplus_project_number['454-928'] = surplus_project_number[454] + surplus_project_number[928]
    surplus_project_number['457-458'] = surplus_project_number[457] + surplus_project_number[458]
    
    surplus_project_number= surplus_project_number[[54,'917-318','454-928','457-458']]#选取需要的字段
    surplus_project_number.head()
    

    三、将爆品店面数据进行汇总 store_popular_project

    主要是用pd.merge函数将两个表数据进行匹配

    • 爆品店面数据汇总表:store_popular_project
    • 门店行字段表:store
    • 6个月到店异动会员:customer_data_01
    • 购买人数表:project_sale_people
    • 购买次数表:project_sale_number
    • 购买金额表:project_sale_amount
    • 项目新增人数表:project_universal_new
    • 项目续购人数表:project_universal_add
    • 项目拥有量表:surplus_project_number

    1、门店行字段表:store

    store_popular_project = pd.merge(store,customer_data_01,how = 'left',left_on = store['店号'],right_on = customer_data_01.index )
    store_popular_project = store_popular_project.drop(columns = 'key_0')
    store_popular_project.head()
    

    3、购买人数表:project_sale_people

    查看读取数据

    project_sale_people.head(2)
    

    更改字段名
    project_sale_people.rename(columns = {'054':'054购买人数',
                                          '454-928':'454-928购买人数',
                                          '457-458':'457-458购买人数',
                                          '917-318':'917-318购买人数'},inplace = True)
    

    合并表

    store_popular_project = pd.merge(store_popular_project,project_sale_people,how = 'left',left_on = store_popular_project['店号'],
            right_on = project_sale_people.index)
    
    store_popular_project = store_popular_project.drop(columns = 'key_0')
    store_popular_project.head(3)
    

    4、购买次数表:project_sale_number

    查看读取数据

    project_sale_number.head(2)
    

    更改字段名

    project_sale_number = project_sale_number.rename(columns = {'054':'054购买次数',
                                          '454-928':'454-928购买次数',
                                          '457-458':'457-458购买次数',
                                          '917-318':'917-318购买次数'})
    project_sale_number.fillna(0).head()  #填充缺失值
    

    数据合并
    store_popular_project =  pd.merge(store_popular_project,project_sale_number,how = 'left',left_on = store_popular_project['店号'],
            right_on = project_sale_number.index)
    
    store_popular_project.head()
    store_popular_project = store_popular_project.drop(columns = 'key_0') #删除key_0字段
    

    5、购买金额表:project_sale_amount

    查看读取数据

    project_sale_amount.head()
    

    更改字段名称
    project_sale_amount.rename(columns = {'054':'054购买金额',
                                         '454-928':'454-928购买金额',
                                         '457-458':'457-458购买金额',
                                         '917-318':'917-318购买金额'},inplace = True)
    

    合并表

    store_popular_project = pd.merge(store_popular_project,project_sale_amount,how = 'left',left_on = store_popular_project['店号'],
            right_on = project_sale_amount.index)
    

    6、项目新增人数表:project_universal_new

    查看导入数据


    更改字段名
    project_universal_new.rename(columns = {'054':'054新增',
                                           '454-928':'454-928新增',
                                           '457-458':'457-458新增',
                                           '917-318':'917-318新增'},inplace = True)
    

    合并表

    store_popular_project = pd.merge(store_popular_project,project_universal_new,how = 'left',left_on = store_popular_project['店号'],
             right_on = project_universal_new.index)
    

    7、项目拥有量表:surplus_project_number

    查看导入数据


    更改字段名
    surplus_project_number.rename(columns = {54:'54拥用量',
                                           '917-318':'917-318拥用量',
                                            '454-928':'454-928拥用量',
                                            '457-458':'457-458拥用量'},inplace = True)
    

    更改数据类型

    surplus_project_number.index = surplus_project_number.index.astype('str')
    

    合并表

    store_popular_project = pd.merge(store_popular_project,surplus_project_number,how = 'left',left_on = store_popular_project['店号'],
              right_on = surplus_project_number.index)
    

    8、项目续购人数表:project_universal_add

    查看读取数据


    更改字段名称
    project_universal_add.rename(columns = {'054':'54续购',
                                           '454-928':'454-928续购',
                                           '457-458':'457-458续购',
                                           '917-318':'917-318续购',
                                           },inplace = True)
    

    合并数据

    store_popular_project = pd.merge(store_popular_project,project_universal_add,how = 'left',
             left_on = store_popular_project['店号'],right_on =project_universal_add.index )
    

    所有的表完成了汇总。调整格式和增加业绩占比


    所有的数据都完成了汇总,现在只需要转化为excel格式就可以了
    writer = pd.ExcelWriter('store_popular_project.xlsx')
    store_popular_project.to_excel(writer,index = True)
    writer.save()
    

    查看文件地址

    %pwd
    

    就找到我们导出的文件啦

    这就是在python里面做的excel表格。像我们每周或者每天都会统计销售数据,在python里面做好模板后就不用车哦给你敷操作了,每次只需要带入新的数据,或者更改文件名就可以了,就可以实现自动化报表

    当然罗通过Python连接数据库,来实现自动化报表是最理想的,目前还在学习中

    **链接: https://pan.baidu.com/s/1LOiHXPovwN6qhHin-PIDlA 提取码: vp72 **

    相关文章

      网友评论

        本文标题:python自动化门店报表(一)

        本文链接:https://www.haomeiwen.com/subject/ddetzqtx.html