本章内容包括:matplotlib 、jyputer内显示图片、时间转化
用户个体消费分析
//input1
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
columns = ['user_id','order_dt','order_products','order_amount']
df = pd.read_table('CDNOW_master.txt',names = columns,sep = '\s+')
//user_id:用户ID
//order_dt:购买日期
//orderproducts:购买产品数
//order_amount:购买金额
df['order_dt'] = pd.to_datetime(df.order_dt,format='%Y%m%d')
df['month'] = df.order_dt.values.astype('datetime64[M]')
df.info()
//output1
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69659 entries, 0 to 69658
Data columns (total 5 columns):
user_id 69659 non-null int64
order_dt 69659 non-null datetime64[ns]
order_products 69659 non-null int64
order_amount 69659 non-null float64
month 69659 non-null datetime64[ns]
dtypes: datetime64[ns](2), float64(1), int64(2)
memory usage: 2.7 MB
//input2
df.head()
//output2
user_id order_dt order_products order_amount month
0 1 1997-01-01 1 11.77 1997-01-01
1 2 1997-01-12 1 12.00 1997-01-01
2 2 1997-01-12 5 77.00 1997-01-01
3 3 1997-01-02 2 20.76 1997-01-01
4 3 1997-03-30 2 20.76 1997-03-01
//input3
//## 1.进行用户消费趋势的分析(按月)
//每月消费的金额
//每月的消费次数
//每月的产品购买量
//每月的消费人数
order_month_amount.plot()
//output3
output3
由上图可知,消费金额在前三个月达到最高峰,后续休息金额较为稳定,有轻微下降趋势。
//input4
grouped_month.user_id.count().plot()
//output4
output4
//input5
grouped_month.order_products.sum().plot()
//output5
output5
//input6
df.groupby('month').user_id.apply(lambda x:len(x.drop_duplicates())).plot()
//output6
output6
//input7
df.pivot_table(index = 'month',
values = ['order_products','order_amount','user_id'],
aggfunc = {'order_products':'sum',
'order_amount':'sum',
'user_id':'count'
}
).head()
//output7
order_amount order_products user_id
month
1997-01-01 299060.17 19416 8928
1997-02-01 379590.03 24921 11272
1997-03-01 393155.27 26159 11598
1997-04-01 142824.49 9729 3781
1997-05-01 107933.30 7275 2895
//除此之外还可以计算
//每月用户平均消费金额的趋势
//每月用户平均消费次数的趋势
网友评论