import matplotlib.pyplot as plt
import numpy as np
第一步 生成数据集
x = np.linspace(-3,3,50)#平均采样,[-3,3]采样50个
x.shape
(50,)
y1 = 2*x + 1
y1.shape
(50,)
y2 = x**2
y2
array([9.00000000e+00, 8.28029988e+00, 7.59058726e+00, 6.93086214e+00,
6.30112453e+00, 5.70137443e+00, 5.13161183e+00, 4.59183673e+00,
4.08204915e+00, 3.60224906e+00, 3.15243648e+00, 2.73261141e+00,
2.34277384e+00, 1.98292378e+00, 1.65306122e+00, 1.35318617e+00,
1.08329863e+00, 8.43398584e-01, 6.33486047e-01, 4.53561016e-01,
3.03623490e-01, 1.83673469e-01, 9.37109538e-02, 3.37359434e-02,
3.74843815e-03, 3.74843815e-03, 3.37359434e-02, 9.37109538e-02,
1.83673469e-01, 3.03623490e-01, 4.53561016e-01, 6.33486047e-01,
8.43398584e-01, 1.08329863e+00, 1.35318617e+00, 1.65306122e+00,
1.98292378e+00, 2.34277384e+00, 2.73261141e+00, 3.15243648e+00,
3.60224906e+00, 4.08204915e+00, 4.59183673e+00, 5.13161183e+00,
5.70137443e+00, 6.30112453e+00, 6.93086214e+00, 7.59058726e+00,
8.28029988e+00, 9.00000000e+00])
plt.figure()
plt.plot(x,y1)
[<matplotlib.lines.Line2D at 0x111d0f9e8>]
data:image/s3,"s3://crabby-images/02122/02122d972213d4dcd7aad74facf308ac6debe392" alt=""
plt.figure()
plt.plot(x,y2)
[<matplotlib.lines.Line2D at 0x111da3860>]
data:image/s3,"s3://crabby-images/a689f/a689f0d8a63837907e77990f8c51195248db0804" alt=""
plt.plot(x,y2)
plt.show()
data:image/s3,"s3://crabby-images/ec7f8/ec7f8ff490868fc6be883d4d2630c27d82cdccf3" alt=""
# 将x,y1,y2画在一起
plt.plot(x,y1)
plt.plot(x,y2)
[<matplotlib.lines.Line2D at 0x111d60fd0>]
data:image/s3,"s3://crabby-images/6b4ea/6b4ea854b01ec243e212e5bdf0a6effc8c1fedb8" alt=""
支持中文字体
from pylab import mpl#import matplotlib as mpl
mpl.rcParams['font.sans-serif']=['FangSong']
mpl.rcParams['axes.unicode_minus']=False
# 参数修改
plt.plot(x,y1,'.b')
plt.plot(x,y2,color='r',linewidth=5.0,linestyle=':')#linestyle取值:"-",”-.“,":".该变量是复合变量也可以省略字段直接写”.r“
[<matplotlib.lines.Line2D at 0x111f908d0>]
data:image/s3,"s3://crabby-images/d3056/d305611a7e99e314fb88d7aec42609bd00a90af6" alt=""
##label标记
plt.plot([1,2,3,4],[2,3,3,3])
plt.ylabel('Some Num')
plt.xlabel('自变量')#默认不支持中文字体
Text(0.5,0,'自变量')
data:image/s3,"s3://crabby-images/a60ac/a60ac910cc3047643b3ce642a34b014301d8613c" alt=""
散点图
plt.plot([1,2,3,4],[2,3,3,3],'g^')
[<matplotlib.lines.Line2D at 0x1121b4080>]
data:image/s3,"s3://crabby-images/d10fb/d10fb9a870a3b0dc1b272c12e728803bfbb59c2f" alt=""
常用的linestyle
ro:红色的圆点
bs:蓝色的方块
g^:绿色的三角
t=np.linspace(-5,5,100)
plt.plot(t,t**2)
plt.plot(t,t**5)
[<matplotlib.lines.Line2D at 0x1121159e8>]
data:image/s3,"s3://crabby-images/f3892/f38922a285221235b60cf68ce7992dbf7f41b0c1" alt=""
plt.plot(t,t**2,'r--',t,t**5,'y-.')#多个函数图,可以合并为一个函数,但是要求(自变量,因变量,style字段)
[<matplotlib.lines.Line2D at 0x112327390>,
<matplotlib.lines.Line2D at 0x1123274e0>]
data:image/s3,"s3://crabby-images/3c2d0/3c2d0e000fa85bc17ef4083f2e22205549368458" alt=""
结构化数据绘制散点图
np.arange(50)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])
data = {
'a':np.arange(50),
'c':np.random.randint(0,50,50),
'd':np.random.rand(50)
}
data
{'a': array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),
'c': array([21, 22, 31, 1, 30, 13, 47, 19, 16, 45, 45, 34, 24, 11, 30, 49, 3,
38, 24, 26, 9, 24, 33, 44, 48, 49, 6, 49, 8, 30, 11, 43, 16, 25,
29, 34, 14, 21, 4, 20, 13, 46, 11, 25, 20, 39, 41, 34, 47, 36]),
'd': array([0.03337497, 0.58555231, 0.6983719 , 0.3098672 , 0.0355206 ,
0.27251523, 0.968375 , 0.7585922 , 0.53316131, 0.2134523 ,
0.76735142, 0.56798347, 0.98154299, 0.07708504, 0.93535569,
0.84546409, 0.13395731, 0.24076688, 0.44660032, 0.88671819,
0.00921326, 0.39650877, 0.44355761, 0.30306934, 0.98691421,
0.39195663, 0.6424303 , 0.68474638, 0.02455291, 0.90485831,
0.7171299 , 0.18596694, 0.12510926, 0.57805232, 0.93718472,
0.21482173, 0.02909599, 0.26395894, 0.39508085, 0.74490499,
0.17457859, 0.93607408, 0.58727838, 0.76517609, 0.53999965,
0.5932926 , 0.05968155, 0.70313421, 0.72178338, 0.47063122])}
plt.scatter()绘制散点图
plt.scatter('a','d',data=data)
plt.xlabel('a 数据')
plt.ylabel('d 数据')
Text(0,0.5,'d 数据')
data:image/s3,"s3://crabby-images/b04b4/b04b462a45f907b25eb3d0446b63089324bf417c" alt=""
plt.scatter('a','c',data=data)
<matplotlib.collections.PathCollection at 0x1124a3d68>
data:image/s3,"s3://crabby-images/f485f/f485f8585821d8d57e68454140bcb11cbcc8d2a1" alt=""
data['b'] = np.abs(data['d'])
plt.scatter('a','b',data = data,marker='>',c = 'c')
<matplotlib.collections.PathCollection at 0x112557eb8>
data:image/s3,"s3://crabby-images/b3264/b32640513b41e76e191136b6fd53a102544c907c" alt=""
plt.scatter('c','d',data = data,marker='>',c = 'c')
<matplotlib.collections.PathCollection at 0x1125b8198>
data:image/s3,"s3://crabby-images/60bf3/60bf361a2810a2bc284b3e6bd001d223feb17810" alt=""
data
{'a': array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),
'c': array([21, 22, 31, 1, 30, 13, 47, 19, 16, 45, 45, 34, 24, 11, 30, 49, 3,
38, 24, 26, 9, 24, 33, 44, 48, 49, 6, 49, 8, 30, 11, 43, 16, 25,
29, 34, 14, 21, 4, 20, 13, 46, 11, 25, 20, 39, 41, 34, 47, 36]),
'd': array([0.03337497, 0.58555231, 0.6983719 , 0.3098672 , 0.0355206 ,
0.27251523, 0.968375 , 0.7585922 , 0.53316131, 0.2134523 ,
0.76735142, 0.56798347, 0.98154299, 0.07708504, 0.93535569,
0.84546409, 0.13395731, 0.24076688, 0.44660032, 0.88671819,
0.00921326, 0.39650877, 0.44355761, 0.30306934, 0.98691421,
0.39195663, 0.6424303 , 0.68474638, 0.02455291, 0.90485831,
0.7171299 , 0.18596694, 0.12510926, 0.57805232, 0.93718472,
0.21482173, 0.02909599, 0.26395894, 0.39508085, 0.74490499,
0.17457859, 0.93607408, 0.58727838, 0.76517609, 0.53999965,
0.5932926 , 0.05968155, 0.70313421, 0.72178338, 0.47063122]),
'b': array([0.03337497, 0.58555231, 0.6983719 , 0.3098672 , 0.0355206 ,
0.27251523, 0.968375 , 0.7585922 , 0.53316131, 0.2134523 ,
0.76735142, 0.56798347, 0.98154299, 0.07708504, 0.93535569,
0.84546409, 0.13395731, 0.24076688, 0.44660032, 0.88671819,
0.00921326, 0.39650877, 0.44355761, 0.30306934, 0.98691421,
0.39195663, 0.6424303 , 0.68474638, 0.02455291, 0.90485831,
0.7171299 , 0.18596694, 0.12510926, 0.57805232, 0.93718472,
0.21482173, 0.02909599, 0.26395894, 0.39508085, 0.74490499,
0.17457859, 0.93607408, 0.58727838, 0.76517609, 0.53999965,
0.5932926 , 0.05968155, 0.70313421, 0.72178338, 0.47063122])}
柱状图
names = ['A类型','B类型','C类型']
value = [1,10,100]
plt.bar(range(len(names)),value)
plt.xticks(range(len(names)),names)#横坐标
([<matplotlib.axis.XTick at 0x112650a20>,
<matplotlib.axis.XTick at 0x112650438>,
<matplotlib.axis.XTick at 0x1126437f0>],
<a list of 3 Text xticklabel objects>)
data:image/s3,"s3://crabby-images/0c469/0c469b00d20dd960fbc476ee0e8f22b7e4ca0567" alt=""
plt.scatter(names,value)
<matplotlib.collections.PathCollection at 0x111f32fd0>
data:image/s3,"s3://crabby-images/4f580/4f58072a2b79b59aec25be98f0678aee92849657" alt=""
plt.scatter(range(len(names)),value)
plt.xticks(range(len(names)),names)
([<matplotlib.axis.XTick at 0x111f69be0>,
<matplotlib.axis.XTick at 0x111f69518>,
<matplotlib.axis.XTick at 0x111f692b0>],
<a list of 3 Text xticklabel objects>)
data:image/s3,"s3://crabby-images/d740e/d740e3c31028e4cea3bddfce8be769849d784174" alt=""
plt.scatter(range(len(names)),value)
plt.xticks(range(len(names)),names)
plt.title('离散数据散点图')
Text(0.5,1,'离散数据散点图')
data:image/s3,"s3://crabby-images/c3ded/c3dede3711c93413d0d476bc363104ff2a49f281" alt=""
子图 SubPlot
1. 讲一个画布进行切分(Figure)
2.将切分后的图分配到固定的位置
3.将图可以设置成固定的大小
plt.figure(1)
plt.subplot(131)#一行三列放在第一的位置
plt.bar(names,value,color='r')
plt.subplot(235)#二行三列放在第五的位置
plt.scatter(names,value,color='y')
plt.subplot(233)#二行三列放在第三的位置
plt.plot(names,value,color='g')
plt.title("离散数据的柱状图,散点图,折线图")
Text(0.5,1,'离散数据的柱状图,散点图,折线图')
data:image/s3,"s3://crabby-images/5fcb5/5fcb557e13d1cce4009bc294483f9e7d86073aa7" alt=""
第 2 部分 Seaborn的绘图练习
道/法/术/器
import seaborn as sns
tips = sns.load_dataset('tips')
tips
# total_bill 和
2.1 带状图-离散数据和l连续数据的之间的关系
sns.stripplot(data=tips,x='day',y='total_bill',jitter = True)#jitter抖动,默认为TRUE
<matplotlib.axes._subplots.AxesSubplot at 0x1a16f45710>
data:image/s3,"s3://crabby-images/07135/071355b1f376d991bf272577f2e7797ce90910f5" alt=""
蜂群图-离散数据和连续数据之间的关系-密度排列
sns.swarmplot(x='day',y='total_bill',data=tips)
<matplotlib.axes._subplots.AxesSubplot at 0x1a16aadc18>
data:image/s3,"s3://crabby-images/7b31b/7b31b841231ffd16a88eb6b6ee42263a71fbdbbe" alt=""
tips.head()
分析每天中 午餐和晚餐的账单分布
2.3 Hue 分组参数
sns.swarmplot(x='day',y='total_bill',data=tips,hue='time')
<matplotlib.axes._subplots.AxesSubplot at 0x1a16ff30b8>
data:image/s3,"s3://crabby-images/3390a/3390ae552b857e00b1242a036639f8530b3814b3" alt=""
在每天的付账人群中的性别分布
sns.swarmplot(x='day',y='total_bill',data=tips,hue='sex')
<matplotlib.axes._subplots.AxesSubplot at 0x1a16e2fc50>
data:image/s3,"s3://crabby-images/972b5/972b569b85ec7fc087865b579accc4268663263d" alt=""
sns.swarmplot(x='day',y='total_bill',data=tips,hue='size')
<matplotlib.axes._subplots.AxesSubplot at 0x1a1719cf60>
data:image/s3,"s3://crabby-images/1463a/1463a838216bd1ffe8c6c03d7657590aee1630d3" alt=""
sns.swarmplot(x='size',y='total_bill',data=tips)
<matplotlib.axes._subplots.AxesSubplot at 0x1a1726a470>
data:image/s3,"s3://crabby-images/1b711/1b711c93f58fad70ec128a3f8f8178e645946f77" alt=""
# 上图解释了,pizza的不同size的基础价格
# size和相关系数(皮尔逊系数)的关系
tips['size'].corr(tips['total_bill'])
0.5983151309049012
2.4箱型图
sns.boxplot('day','total_bill',data=tips)
<matplotlib.axes._subplots.AxesSubplot at 0x1a17971ef0>
data:image/s3,"s3://crabby-images/42c64/42c643317bafd70a617d968ce4f810e53f1646de" alt=""
sns.swarmplot('day','total_bill',data=tips)
<matplotlib.axes._subplots.AxesSubplot at 0x1a178bc390>
data:image/s3,"s3://crabby-images/df3cc/df3cc824b693f10da09b5f6cf5ce4c6a1af4b4db" alt=""
sns.boxplot("day","total_bill",data=tips,hue='time')
<matplotlib.axes._subplots.AxesSubplot at 0x1a17b547b8>
data:image/s3,"s3://crabby-images/b264b/b264b6ca350f7089ef4ff330b733c9f1eab3ef70" alt=""
2.5 提琴图
如何来表示total_bill的概率分布
sns.violinplot('day','total_bill',data=tips,hue='time')
/anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
<matplotlib.axes._subplots.AxesSubplot at 0x1a17c781d0>
data:image/s3,"s3://crabby-images/7e0c5/7e0c5db9815e23258913fab3a3a0567e88aa70f0" alt=""
sns.violinplot('day','total_bill',data=tips,hue='time',split = True)
/anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
<matplotlib.axes._subplots.AxesSubplot at 0x1a17ddd860>
data:image/s3,"s3://crabby-images/13449/13449c6cd049febb139a2d5308fe5c65ca51ac44" alt=""
### 多图合成展示
sns.violinplot('day','total_bill',data=tips)
sns.swarmplot('day','total_bill',data=tips,color='w')
/anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
<matplotlib.axes._subplots.AxesSubplot at 0x1a17eb31d0>
data:image/s3,"s3://crabby-images/f07f3/f07f3ddb34e7628dac5acb337e8fca6202bc93f3" alt=""
3.0 单一变量估计---离散型变量的统计
每天的交易数量
sns.countplot('day',data=tips)
<matplotlib.axes._subplots.AxesSubplot at 0x1a17f9a550>
data:image/s3,"s3://crabby-images/7d530/7d530ac6e70c1d19fc2beea29fdc989cd8d3849e" alt=""
sns.countplot('time',data=tips)
<matplotlib.axes._subplots.AxesSubplot at 0x1a17eb33c8>
data:image/s3,"s3://crabby-images/7ac34/7ac34e917f0b76f494815c7e4471bcebac70175b" alt=""
sns.countplot('day',data=tips,hue='time')
<matplotlib.axes._subplots.AxesSubplot at 0x1a180f3a90>
data:image/s3,"s3://crabby-images/9fe02/9fe0265d37a619d508485b3f144b6263f8c0f67e" alt=""
4.0 连续型数据的核密度估计
tips.head()
sns.distplot(tips['total_bill'])
/anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
<matplotlib.axes._subplots.AxesSubplot at 0x1a181b0780>
data:image/s3,"s3://crabby-images/5d24e/5d24ed38a638bd1af7b230c3f461a03222e62def" alt=""
抵消偏度(修改接近正态分布)
sns.distplot(np.log(tips['total_bill']))
/anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
<matplotlib.axes._subplots.AxesSubplot at 0x1a18289908>
data:image/s3,"s3://crabby-images/b840d/b840d7fa6eb96e9d7b5f844131582a58afeb5999" alt=""
sns.distplot(np.sqrt(tips['total_bill']))
/anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
<matplotlib.axes._subplots.AxesSubplot at 0x1a1803e7b8>
data:image/s3,"s3://crabby-images/bb991/bb99179a0d52eb91979efc12ab79b2a92ad8a996" alt=""
抽取前99.5%的数据(去除离散值的方法)
np.percentile(tips['total_bill'],99.5)
48.317099999999996
tips[tips['total_bill']>48.31]
带回归的散点图
sns.lmplot('size','total_bill',data = tips)
/anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
<seaborn.axisgrid.FacetGrid at 0x1a18465cf8>
data:image/s3,"s3://crabby-images/cfb52/cfb529d5edbb2b560d6d22bd4dffcf41f42607db" alt=""
联合分布(既有散点的特性,又有线性回归,同事还有概率分布)
sns.jointplot('total_bill','tip',data=tips,kind='reg')
/anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
<seaborn.axisgrid.JointGrid at 0x1a18556940>
data:image/s3,"s3://crabby-images/81a4f/81a4f36b3b16a30730d01f7acccc872b9ec6f5d2" alt=""
网友评论