data:image/s3,"s3://crabby-images/15299/15299a27577a4e23c8fe2d9af62e08c0ff744ebf" alt=""
数据在文末啦
导入需要的包
import pandas as pd
import numpy as np
一、导入需要使用的数据
store_achivement = pd.read_csv("0401-0405门店业绩报表.csv",engine = 'python')
customer_data = pd.read_csv("0401-0405客服数据分析表.csv",engine = 'python')
project_universal = pd.read_csv("0401-0405项目普及明细表.csv",engine = 'python')
project_sale = pd.read_csv("0401-0405项目销售统计明细信息.csv",engine = 'python')
surplus_project = pd.read_csv("0401-0405顾客剩余项目疗程报表5次及以上.csv",engine = 'python')
data:image/s3,"s3://crabby-images/857e0/857e0d0b665f62e18da5234f419ae689e2f584a8" alt=""
二、处理数据
1、门店业绩表store_achivement (确定行字段)
store_achivement.head() #查看读取的结果,为什么不用excel读取要转化为csv,因为excel里面的列名没有自动补全
data:image/s3,"s3://crabby-images/c524e/c524e4b82cb91de161ef874f27e6a97c1cf3cc49" alt=""
store_achivement.shape #查看源数据的大小,525行,59列(业绩表里面有些字段间的关系还不是很懂)
data:image/s3,"s3://crabby-images/06334/06334a3f657a392fc647c9fc47a86819119c89cf" alt=""
数据清洗,删除不需要的行
store_achivement['Unnamed: 4'].unique()
store_achivement = store_achivement[store_achivement['Unnamed: 4'].isin(['门店类型','纯美','综合'])]
store_achivement = store_achivement[store_achivement['Unnamed: 2'].isin(['区域', '事业一部',
'事业二部','川五区', '事业三部川区', '渝一区','事业三部渝区', '事业五部', '事业七部'])]
store_achivement = store_achivement[store_achivement['Unnamed: 6'].isin(['门店状态','正常'])]
store_achivement.head(3)
store_achivement.shape #查看数据大小
data:image/s3,"s3://crabby-images/9ee55/9ee559e3cb7bd65ab93b6d7ed66c22543e72b6ff" alt=""
构建行字段
store_01 = store_achivement
store_01.columns = store_01.loc[0]
store_01 = store_01.drop(store_01.index[0])
store = store_01[['店号','店名','区域','门店类型','区域经理','城市经理','业务经理']]
store.head()
data:image/s3,"s3://crabby-images/0dd18/0dd18f2cd8a0c8438419dab7e82affb9ffa9f0cb" alt=""
2、客服数据分析表customer_data
查看数据读取情况
customer_data.head()
data:image/s3,"s3://crabby-images/73f9d/73f9d35a0c0f3355a85f9b121631242e063bda53" alt=""
customer_data = customer_data[customer_data['Unnamed: 7'].isin(['顾客类型','一星会员', '二星会员', '三星会员', '爱莎之星','爱莎璀璨之星', '臻爱时光卡', '老爱莎之星'])]
customer_data.columns = customer_data.iloc[0]
customer_data = customer_data.drop(customer_data.index[0])
customer_data.info() #查看数据类型并修改异动顾客的数据类型
data:image/s3,"s3://crabby-images/e96f4/e96f42ab643b3b62f9dbf760f61b98281e694217" alt=""
修改数据类型
customer_data['异动顾客(6个月内到店)'] = customer_data['异动顾客(6个月内到店)'].astype('int')
customer_data.head(2)
data:image/s3,"s3://crabby-images/5abdc/5abdcdebfa82bb08fd932795bef037208ec6b0be" alt=""
客服数据分析表中数据的处理完成,用pivot_table进行数据透视
customer_data_01 = customer_data.pivot_table(index = ['店号'],
values = '异动顾客(6个月内到店)',
aggfunc = {'异动顾客(6个月内到店)':'sum'})
customer_data_01.head()
data:image/s3,"s3://crabby-images/8ce48/8ce48d9a11b50946053f59b7d02ebf558df92a3b" alt=""
3、0401-0405项目销售统计明细信息project_sale :购买人数people、购买次数number、购买金额amount
查看数据读取情况
project_sale.head(3) #所有的数据格式都是字符串的格式
data:image/s3,"s3://crabby-images/863c9/863c993a1d036c2c9092f5846cc3866a7142d05d" alt=""
数据清洗,删除不需要的行
project_sale = project_sale[project_sale['Unnamed: 7'].isin(['项目编号','054','917','318', '454','928','457','458'])]
project_sale[['Unnamed: 9','Unnamed: 10', 'Unnamed: 11']].head(3)
data:image/s3,"s3://crabby-images/47f54/47f5483d363b488493fa54a0b26ed1a2bac75533" alt=""
project_sale.rename(columns = {'产品销售统计表(星级分析)':'区域','Unnamed: 1':'门店号',
'Unnamed: 2':'门店名','Unnamed: 7':'项目编号','Unnamed: 8':'项目名称',
'Unnamed: 9':'购买人数','Unnamed: 10':'购买数量','Unnamed: 11':'购买金额'},
inplace = True)
project_sale = project_sale.drop(project_sale.index[0])
project_sale.head(2)
data:image/s3,"s3://crabby-images/1753b/1753ba20270200d1948918f96cddcfce809bc576" alt=""
project_sale['项目编号'].unique()
data:image/s3,"s3://crabby-images/0156a/0156a30cb8b6d495cfddd68b4fe59cb554fe890f" alt=""
修改字段的内容
project_sale.loc[project_sale['项目编号'] == '054','项目编号—01'] = '054'
project_sale.loc[project_sale['项目编号'] == '917','项目编号—01'] = '917-318'
project_sale.loc[project_sale['项目编号'] == '318','项目编号—01'] = '917-318'
project_sale.loc[project_sale['项目编号'] == '454','项目编号—01'] = '454-928'
project_sale.loc[project_sale['项目编号'] == '928','项目编号—01'] = '454-928'
project_sale.loc[project_sale['项目编号'] == '457','项目编号—01'] = '457-458'
project_sale.loc[project_sale['项目编号'] == '458','项目编号—01'] = '457-458'
更改数据类型
project_sale['购买人数'] = project_sale['购买人数'].astype('int')
project_sale['购买数量'] = project_sale['购买数量'].astype('int')
project_sale['购买金额'] = project_sale['购买金额'].astype('int')
data:image/s3,"s3://crabby-images/8dc92/8dc9245376e2f3c6931d40e5d5bb38f55a4bbc30" alt=""
购买人数people
project_sale_people = project_sale.pivot_table(index = '门店号',
columns = '项目编号—01',
values = '购买人数',
aggfunc = {'购买人数':'sum'})
购买金额amount
project_sale_amount = project_sale.pivot_table(index = '门店号',
columns = '项目编号—01',
values = '购买金额',
aggfunc = {'购买金额':'sum'})
购买次数 number
project_sale_number = project_sale.pivot_table(index = '门店号',
columns = '项目编号—01',
values = '购买数量',
aggfunc = {'购买数量':'sum'})
data:image/s3,"s3://crabby-images/44920/44920123a8a0450e75781b91f283f1af8dd8354c" alt=""
data:image/s3,"s3://crabby-images/21111/2111122173d931dde788423e33580e79c619bed6" alt=""
data:image/s3,"s3://crabby-images/3e86c/3e86c982080f01dccab8c838e181a2d44abda407" alt=""
4、项目普及明细表project_universal:新增人数、续购人数
查看表读取情况
data:image/s3,"s3://crabby-images/b83f5/b83f5b9f8fc3d700680fdcc86e755db6d28a1e79" alt=""
清洗数据
project_universal.columns = project_universal.loc[0]
project_universal = project_universal.drop(project_universal.index[0])
project_universal.shape #查看数据大小
project_universal.info() #全是字符串的格式
data:image/s3,"s3://crabby-images/2dcf5/2dcf5d9279417941b6e4cf02f043572ab7710cc3" alt=""
筛选数据,更改字段内容
project_universal= project_universal[project_universal['项目编号'].isin(['054','917','318','454','928','457','458'])]
project_universal.shape
project_universal.loc[project_universal['项目编号'] == '054','项目编号—01'] = '054'
project_universal.loc[project_universal['项目编号'] == '917','项目编号—01'] = '917-318'
project_universal.loc[project_universal['项目编号'] == '318','项目编号—01'] = '917-318'
project_universal.loc[project_universal['项目编号'] == '454','项目编号—01'] = '454-928'
project_universal.loc[project_universal['项目编号'] == '928','项目编号—01'] = '454-928'
project_universal.loc[project_universal['项目编号'] == '457','项目编号—01'] = '457-458'
project_universal.loc[project_universal['项目编号'] == '458','项目编号—01'] = '457-458'
#重命名
project_universal['项目编号—01'] .unique()
data:image/s3,"s3://crabby-images/76c58/76c581ac6e6c49b2dbd84d73dccb54bd4dd467af" alt=""
爆品新增人数
project_universal_new = project_universal[project_universal['购买类型'].isin(['新增'])]
project_universal_new.shape
data:image/s3,"s3://crabby-images/34c10/34c101bebf00b3e822b6d43caf72716dec249ada" alt=""
project_universal_new = project_universal_new.drop_duplicates(subset = ['顾客编号','店号','项目编号—01'])
project_universal_new = project_universal_new.pivot_table(index = '店号',
columns = '项目编号—01',
values = '顾客编号',
aggfunc = {'顾客编号':'count'})
project_universal_new = project_universal_new.fillna(0)
project_universal_new.head(3)
data:image/s3,"s3://crabby-images/34a15/34a150ff8e7529cb97ee5ac3d96eb5520e3cc0aa" alt=""
爆品续购人数:project_universal_add
替换列字段内容
project_universal['项目名称'] = project_universal['项目名称'].replace('脑部亚健康调理(美)','脑部亚健康调理')
project_universal['项目名称'] = project_universal['项目名称'].replace('脑部亚健康调理(足)','脑部亚健康调理') #替换行字段中的某个内容
project_universal['项目名称'].unique()
data:image/s3,"s3://crabby-images/f1ca0/f1ca09ac9663496d208678de09ba922c0c9da0f6" alt=""
用于循环的辅助列
project_universal['顾客编号—项目名称'] = project_universal['顾客编号']+project_universal['项目名称']
project_universal.columns
data:image/s3,"s3://crabby-images/352a5/352a5d768fc6a8b13d0d55973ee17720b2650585" alt=""
project_universal_new01 = project_universal[project_universal[ '购买类型'].isin(['新增'])]
project_universal_add01 = project_universal[project_universal[ '购买类型'].isin(['续购'])]
customernumber_new = list(pd.Series(project_universal_new01['顾客编号—项目名称'])) #新增中的顾客编号,新增中每个项目对对应的顾客编号是唯一的
customernumber_add = list(pd.Series(project_universal_add01['顾客编号—项目名称'])) #续购用户中顾客的编号
data:image/s3,"s3://crabby-images/6bc0f/6bc0fde0fcf22fb27cf00f90c01c742a40a96906" alt=""
数据清洗,如果新增表中A项目的顾客编号出现在续购表A项目中,在续购表中就删除该顾客编号
def drop_duplicates(x): #x中传入新增顾客中的顾客编号
df = project_universal_add01
for i in range(len(x)):
if x[i] in customernumber_add:
df = df[~df['顾客编号—项目名称'].isin([x[i]])] #如果新增表中A项目的顾客编号出现在续购表A项目中,在续购表中就删除该顾客编号
return df
把函数的结果复制给变量project_universal_add
project_universal_add = drop_duplicates(customernumber_new)
去重不完整,excel中续购人数229,python中续购人数249人
- 会出现顾客在同一个项目,续购购买类型中,有两条记录。用duplicats(项目编号,顾客编号两个进行去重)
project_universal_add = project_universal_add.drop_duplicates(subset = ['顾客编号','项目编号—01'],keep = 'first')
project_universal_add.shape
data:image/s3,"s3://crabby-images/f158a/f158acb87eb2437d8aeda640b7beb1df94951a29" alt=""
查看去重后的数据
data:image/s3,"s3://crabby-images/df504/df504c88f04b64408cd9250cd33cd732919e08c3" alt=""
project_universal_add = project_universal_add.pivot_table(index = '店号',
columns = '项目编号—01',
values = '顾客编号',
aggfunc = {'顾客编号':'count'})
project_universal_add = project_universal_add.fillna(0) #填充确实值
data:image/s3,"s3://crabby-images/3f054/3f054301d94cd798956bf3d949eb74c46b80712c" alt=""
5、0401-0405顾客剩余项目疗程报表5次及以上 (求拥~字段)
这里我们只分析项目编码54,917,318,454,928,457,458
查看数据读取情况
data:image/s3,"s3://crabby-images/06305/063051d45d85c5da7294d64607a3b78ef4bf852c" alt=""
surplus_project = surplus_project[surplus_project['项目编号'].isin([54,917,318,454,928,457,458])]
surplus_project.dtypes #数据结构
data:image/s3,"s3://crabby-images/09592/09592491611aa0a6a474335f5060a2a16b5b0fdd" alt=""
surplus_project_number =surplus_project.pivot_table(index = '门店编号',
columns = '项目编号',
values = '顾客编号',
aggfunc = {'顾客编号':'count'})
surplus_project_number.head()
data:image/s3,"s3://crabby-images/c34b6/c34b64bdfa044bfb5af962cec7034b22cd3aa4f5" alt=""
填充确实值
surplus_project_number = surplus_project_number.fillna(0)
将有两个项目编号的数据汇总
surplus_project_number['917-318'] = surplus_project_number[917] + surplus_project_number[318]
surplus_project_number['454-928'] = surplus_project_number[454] + surplus_project_number[928]
surplus_project_number['457-458'] = surplus_project_number[457] + surplus_project_number[458]
surplus_project_number= surplus_project_number[[54,'917-318','454-928','457-458']]#选取需要的字段
surplus_project_number.head()
data:image/s3,"s3://crabby-images/7e7b6/7e7b663bd56d3c7d2cdea89d675ca935fe541c4a" alt=""
三、将爆品店面数据进行汇总 store_popular_project
主要是用pd.merge函数将两个表数据进行匹配
- 爆品店面数据汇总表:store_popular_project
- 门店行字段表:store
- 6个月到店异动会员:customer_data_01
- 购买人数表:project_sale_people
- 购买次数表:project_sale_number
- 购买金额表:project_sale_amount
- 项目新增人数表:project_universal_new
- 项目续购人数表:project_universal_add
- 项目拥有量表:surplus_project_number
1、门店行字段表:store
data:image/s3,"s3://crabby-images/03437/0343701f22fcdfa8b5566cce00f9406ea68373fe" alt=""
store_popular_project = pd.merge(store,customer_data_01,how = 'left',left_on = store['店号'],right_on = customer_data_01.index )
store_popular_project = store_popular_project.drop(columns = 'key_0')
store_popular_project.head()
data:image/s3,"s3://crabby-images/d2a94/d2a94da66d12ab51941296fd98f20116f9da7e37" alt=""
3、购买人数表:project_sale_people
查看读取数据
project_sale_people.head(2)
data:image/s3,"s3://crabby-images/3ab88/3ab888ade7e265df2680c4a596c7500aa2000082" alt=""
更改字段名
project_sale_people.rename(columns = {'054':'054购买人数',
'454-928':'454-928购买人数',
'457-458':'457-458购买人数',
'917-318':'917-318购买人数'},inplace = True)
合并表
store_popular_project = pd.merge(store_popular_project,project_sale_people,how = 'left',left_on = store_popular_project['店号'],
right_on = project_sale_people.index)
store_popular_project = store_popular_project.drop(columns = 'key_0')
store_popular_project.head(3)
data:image/s3,"s3://crabby-images/b22da/b22da4160e8c3480b2df6209936c0427ab93b1a1" alt=""
4、购买次数表:project_sale_number
查看读取数据
project_sale_number.head(2)
更改字段名
project_sale_number = project_sale_number.rename(columns = {'054':'054购买次数',
'454-928':'454-928购买次数',
'457-458':'457-458购买次数',
'917-318':'917-318购买次数'})
project_sale_number.fillna(0).head() #填充缺失值
data:image/s3,"s3://crabby-images/713b0/713b02fd130df2268e331d2f9c3a854cad8cc051" alt=""
数据合并
store_popular_project = pd.merge(store_popular_project,project_sale_number,how = 'left',left_on = store_popular_project['店号'],
right_on = project_sale_number.index)
store_popular_project.head()
store_popular_project = store_popular_project.drop(columns = 'key_0') #删除key_0字段
data:image/s3,"s3://crabby-images/2649a/2649ae7e0d56646c0901ffe1a1ba50fc4ce2cccf" alt=""
5、购买金额表:project_sale_amount
查看读取数据
project_sale_amount.head()
data:image/s3,"s3://crabby-images/39213/39213ac2b9ba0927bac17b24d7c7d235b317fccb" alt=""
更改字段名称
project_sale_amount.rename(columns = {'054':'054购买金额',
'454-928':'454-928购买金额',
'457-458':'457-458购买金额',
'917-318':'917-318购买金额'},inplace = True)
合并表
store_popular_project = pd.merge(store_popular_project,project_sale_amount,how = 'left',left_on = store_popular_project['店号'],
right_on = project_sale_amount.index)
data:image/s3,"s3://crabby-images/ab52d/ab52d70f4ea79edd7f33b78d6bbde8595d70b0a4" alt=""
6、项目新增人数表:project_universal_new
查看导入数据
data:image/s3,"s3://crabby-images/38a73/38a73d1e0bd9e3a2cf9db8031d1d6d3e7e7e745d" alt=""
更改字段名
project_universal_new.rename(columns = {'054':'054新增',
'454-928':'454-928新增',
'457-458':'457-458新增',
'917-318':'917-318新增'},inplace = True)
合并表
store_popular_project = pd.merge(store_popular_project,project_universal_new,how = 'left',left_on = store_popular_project['店号'],
right_on = project_universal_new.index)
data:image/s3,"s3://crabby-images/2c1c4/2c1c4b1ddf441e4bff21b999dc9d1b7d7ae008af" alt=""
7、项目拥有量表:surplus_project_number
查看导入数据
data:image/s3,"s3://crabby-images/16cd9/16cd92e334ee1b936828ba3716c1394af4ab0e89" alt=""
更改字段名
surplus_project_number.rename(columns = {54:'54拥用量',
'917-318':'917-318拥用量',
'454-928':'454-928拥用量',
'457-458':'457-458拥用量'},inplace = True)
更改数据类型
surplus_project_number.index = surplus_project_number.index.astype('str')
合并表
store_popular_project = pd.merge(store_popular_project,surplus_project_number,how = 'left',left_on = store_popular_project['店号'],
right_on = surplus_project_number.index)
data:image/s3,"s3://crabby-images/66156/66156ff3ab83c070c89a2ff6127306ed62371b16" alt=""
8、项目续购人数表:project_universal_add
查看读取数据
data:image/s3,"s3://crabby-images/8be1e/8be1e505b50f11822f3e161847c318ba31395c4b" alt=""
更改字段名称
project_universal_add.rename(columns = {'054':'54续购',
'454-928':'454-928续购',
'457-458':'457-458续购',
'917-318':'917-318续购',
},inplace = True)
合并数据
store_popular_project = pd.merge(store_popular_project,project_universal_add,how = 'left',
left_on = store_popular_project['店号'],right_on =project_universal_add.index )
所有的表完成了汇总。调整格式和增加业绩占比
data:image/s3,"s3://crabby-images/8d6c4/8d6c49f8e3ab8bcc63f46229bb64a6a7aa55c1ce" alt=""
所有的数据都完成了汇总,现在只需要转化为excel格式就可以了
writer = pd.ExcelWriter('store_popular_project.xlsx')
store_popular_project.to_excel(writer,index = True)
writer.save()
查看文件地址
%pwd
data:image/s3,"s3://crabby-images/966d8/966d859e5f5ba87749c90a3fe42d6bf703fcf361" alt=""
就找到我们导出的文件啦
data:image/s3,"s3://crabby-images/8d6b2/8d6b2e27c7f0c61466604cdcfafeac71852ceb46" alt=""
这就是在python里面做的excel表格。像我们每周或者每天都会统计销售数据,在python里面做好模板后就不用车哦给你敷操作了,每次只需要带入新的数据,或者更改文件名就可以了,就可以实现自动化报表
data:image/s3,"s3://crabby-images/137a9/137a9299ea3919f2236da46f96771a5c18cc4f53" alt=""
当然罗通过Python连接数据库,来实现自动化报表是最理想的,目前还在学习中
**链接: https://pan.baidu.com/s/1LOiHXPovwN6qhHin-PIDlA 提取码: vp72 **
网友评论