本章内容包括:seaborn
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
get_ipython().run_line_magic('matplotlib', 'inline')
# http://seaborn.org/examples.html
#
# 分布
# - displot 概率分布图
# - kdeplot 概率密度图
# - joinplot 联合密度图
# - pairplot 多变量图
#
# 分类
# - boxplots 箱线图
# - viollnplots 提琴图
# - barplot 柱形图
# - factorplot 因子图
#
# 线性
# - lmplot 回归图
# - heatmap 热力图
# In[2]:
columns = ['user_id','order_dt','order_products','order_amount']
df = pd.read_table('CDNOW_master.txt',names = columns, sep = '\s+')
# In[3]:
df.head()
# In[4]:
sns.distplot(df.order_amount)
# In[5]:
grouped_user = df.groupby('user_id').sum()
# In[6]:
sns.jointplot(grouped_user.order_products,grouped_user.order_amount,kind = 'reg')
# In[7]:
df['order_dt'] = pd.to_datetime(df.order_dt,format = '%Y%m%d')
rfm = df.pivot_table(index = 'user_id',
values = ['order_products','order_amount','order_dt'],
aggfunc = {'order_dt':'max',
'order_amount':'sum',
'order_products':'sum'
})
rfm['R'] = (rfm.order_dt-rfm.order_dt.max()) / np.timedelta64(1,'D')
rfm.rename(columns={'order_products':'F','order_amount':'M'},inplace = True)
rfm.head()
# In[8]:
sns.jointplot(rfm.R,rfm.F,kind = 'reg') #密度图
# In[9]:
sns.jointplot(rfm.F,rfm.M,kind = 'reg') #回归图
# In[10]:
sns.pairplot(rfm[['R','F','M']]) #九宫格图
# In[11]:
plt.rcParams['font.sans-serif'] = 'SimHei'
df = pd.read_csv('cy.csv',encoding = 'gbk')
# In[12]:
df.head()
# In[13]:
df2 = df.query("(城市=='上海')|(城市=='北京')")
# In[14]:
plt.figure(figsize=(20,5))
sns.boxplot(x='类型',y = '口味',data =df2) #箱型图
# In[15]:
plt.figure(figsize=(20,5))
sns.violinplot(x='类型',y = '口味',hue = '城市',data =df2 ,split = True) #提琴图
# In[16]:
sns.factorplot(x='类型',y='口味',hue = '城市',data = df2,size = 10) #因子图
# In[21]:
sns.factorplot(x = '类型',y = '口味',col = '城市',kind = 'bar',data = df,size = 6,aspect = 2,col_wrap = 4)
# In[33]:
sns.lmplot(x='口味',y='环境',data = df2.query('点评<2000'),col= '城市',order = 3)
# In[36]:
pt = df.pivot_table(index = '城市',columns = '类型',values='口味',aggfunc= 'mean')
# In[41]:
plt.figure(figsize = (20,20)) #调节图片尺寸
sns.heatmap(pt,annot = True) #热力图
# In[ ]:









网友评论