美文网首页
机器学习基础——matplotlib.pyplot和seabor

机器学习基础——matplotlib.pyplot和seabor

作者: 小螳螂 | 来源:发表于2019-01-06 12:21 被阅读0次
    import matplotlib.pyplot as plt
    import numpy as np
    

    第一步 生成数据集

    x = np.linspace(-3,3,50)#平均采样,[-3,3]采样50个
    
    x.shape
    
    (50,)
    
    y1 = 2*x + 1
    
    y1.shape
    
    (50,)
    
    y2 = x**2
    y2
    
    array([9.00000000e+00, 8.28029988e+00, 7.59058726e+00, 6.93086214e+00,
           6.30112453e+00, 5.70137443e+00, 5.13161183e+00, 4.59183673e+00,
           4.08204915e+00, 3.60224906e+00, 3.15243648e+00, 2.73261141e+00,
           2.34277384e+00, 1.98292378e+00, 1.65306122e+00, 1.35318617e+00,
           1.08329863e+00, 8.43398584e-01, 6.33486047e-01, 4.53561016e-01,
           3.03623490e-01, 1.83673469e-01, 9.37109538e-02, 3.37359434e-02,
           3.74843815e-03, 3.74843815e-03, 3.37359434e-02, 9.37109538e-02,
           1.83673469e-01, 3.03623490e-01, 4.53561016e-01, 6.33486047e-01,
           8.43398584e-01, 1.08329863e+00, 1.35318617e+00, 1.65306122e+00,
           1.98292378e+00, 2.34277384e+00, 2.73261141e+00, 3.15243648e+00,
           3.60224906e+00, 4.08204915e+00, 4.59183673e+00, 5.13161183e+00,
           5.70137443e+00, 6.30112453e+00, 6.93086214e+00, 7.59058726e+00,
           8.28029988e+00, 9.00000000e+00])
    
    plt.figure()
    plt.plot(x,y1)
    
    [<matplotlib.lines.Line2D at 0x111d0f9e8>]
    
    output_7_1.png
    plt.figure()
    plt.plot(x,y2)
    
    [<matplotlib.lines.Line2D at 0x111da3860>]
    
    output_8_1.png
    plt.plot(x,y2)
    plt.show()
    
    output_9_0.png
    
    
    # 将x,y1,y2画在一起
    plt.plot(x,y1)
    plt.plot(x,y2)
    
    [<matplotlib.lines.Line2D at 0x111d60fd0>]
    
    output_11_1.png

    支持中文字体

    from pylab import mpl#import matplotlib as mpl
    mpl.rcParams['font.sans-serif']=['FangSong']
    mpl.rcParams['axes.unicode_minus']=False
    
    # 参数修改
    plt.plot(x,y1,'.b')
    plt.plot(x,y2,color='r',linewidth=5.0,linestyle=':')#linestyle取值:"-",”-.“,":".该变量是复合变量也可以省略字段直接写”.r“
    
    [<matplotlib.lines.Line2D at 0x111f908d0>]
    
    output_14_1.png
    ##label标记
    
    plt.plot([1,2,3,4],[2,3,3,3])
    plt.ylabel('Some Num')
    plt.xlabel('自变量')#默认不支持中文字体
    
    Text(0.5,0,'自变量')
    
    output_16_1.png

    散点图

    plt.plot([1,2,3,4],[2,3,3,3],'g^')
    
    [<matplotlib.lines.Line2D at 0x1121b4080>]
    
    output_18_1.png

    常用的linestyle

    ro:红色的圆点
    
    bs:蓝色的方块
    
    g^:绿色的三角
    
    t=np.linspace(-5,5,100)
    
    plt.plot(t,t**2)
    plt.plot(t,t**5)
    
    [<matplotlib.lines.Line2D at 0x1121159e8>]
    
    output_21_1.png
    plt.plot(t,t**2,'r--',t,t**5,'y-.')#多个函数图,可以合并为一个函数,但是要求(自变量,因变量,style字段)
    
    [<matplotlib.lines.Line2D at 0x112327390>,
     <matplotlib.lines.Line2D at 0x1123274e0>]
    
    output_22_1.png

    结构化数据绘制散点图

    np.arange(50)
    
    array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
           17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
           34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])
    
    data = {
        'a':np.arange(50),
        'c':np.random.randint(0,50,50),
        'd':np.random.rand(50)
    }
    data
    
    {'a': array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
            34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),
     'c': array([21, 22, 31,  1, 30, 13, 47, 19, 16, 45, 45, 34, 24, 11, 30, 49,  3,
            38, 24, 26,  9, 24, 33, 44, 48, 49,  6, 49,  8, 30, 11, 43, 16, 25,
            29, 34, 14, 21,  4, 20, 13, 46, 11, 25, 20, 39, 41, 34, 47, 36]),
     'd': array([0.03337497, 0.58555231, 0.6983719 , 0.3098672 , 0.0355206 ,
            0.27251523, 0.968375  , 0.7585922 , 0.53316131, 0.2134523 ,
            0.76735142, 0.56798347, 0.98154299, 0.07708504, 0.93535569,
            0.84546409, 0.13395731, 0.24076688, 0.44660032, 0.88671819,
            0.00921326, 0.39650877, 0.44355761, 0.30306934, 0.98691421,
            0.39195663, 0.6424303 , 0.68474638, 0.02455291, 0.90485831,
            0.7171299 , 0.18596694, 0.12510926, 0.57805232, 0.93718472,
            0.21482173, 0.02909599, 0.26395894, 0.39508085, 0.74490499,
            0.17457859, 0.93607408, 0.58727838, 0.76517609, 0.53999965,
            0.5932926 , 0.05968155, 0.70313421, 0.72178338, 0.47063122])}
    

    plt.scatter()绘制散点图

    plt.scatter('a','d',data=data)
    plt.xlabel('a 数据')
    plt.ylabel('d 数据')
    
    Text(0,0.5,'d 数据')
    
    output_27_1.png
    plt.scatter('a','c',data=data)
    
    <matplotlib.collections.PathCollection at 0x1124a3d68>
    
    output_28_1.png
    data['b'] = np.abs(data['d'])
    
    plt.scatter('a','b',data = data,marker='>',c = 'c')
    
    <matplotlib.collections.PathCollection at 0x112557eb8>
    
    output_30_1.png
    plt.scatter('c','d',data = data,marker='>',c = 'c')
    
    <matplotlib.collections.PathCollection at 0x1125b8198>
    
    output_31_1.png
    data
    
    {'a': array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
            34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]),
     'c': array([21, 22, 31,  1, 30, 13, 47, 19, 16, 45, 45, 34, 24, 11, 30, 49,  3,
            38, 24, 26,  9, 24, 33, 44, 48, 49,  6, 49,  8, 30, 11, 43, 16, 25,
            29, 34, 14, 21,  4, 20, 13, 46, 11, 25, 20, 39, 41, 34, 47, 36]),
     'd': array([0.03337497, 0.58555231, 0.6983719 , 0.3098672 , 0.0355206 ,
            0.27251523, 0.968375  , 0.7585922 , 0.53316131, 0.2134523 ,
            0.76735142, 0.56798347, 0.98154299, 0.07708504, 0.93535569,
            0.84546409, 0.13395731, 0.24076688, 0.44660032, 0.88671819,
            0.00921326, 0.39650877, 0.44355761, 0.30306934, 0.98691421,
            0.39195663, 0.6424303 , 0.68474638, 0.02455291, 0.90485831,
            0.7171299 , 0.18596694, 0.12510926, 0.57805232, 0.93718472,
            0.21482173, 0.02909599, 0.26395894, 0.39508085, 0.74490499,
            0.17457859, 0.93607408, 0.58727838, 0.76517609, 0.53999965,
            0.5932926 , 0.05968155, 0.70313421, 0.72178338, 0.47063122]),
     'b': array([0.03337497, 0.58555231, 0.6983719 , 0.3098672 , 0.0355206 ,
            0.27251523, 0.968375  , 0.7585922 , 0.53316131, 0.2134523 ,
            0.76735142, 0.56798347, 0.98154299, 0.07708504, 0.93535569,
            0.84546409, 0.13395731, 0.24076688, 0.44660032, 0.88671819,
            0.00921326, 0.39650877, 0.44355761, 0.30306934, 0.98691421,
            0.39195663, 0.6424303 , 0.68474638, 0.02455291, 0.90485831,
            0.7171299 , 0.18596694, 0.12510926, 0.57805232, 0.93718472,
            0.21482173, 0.02909599, 0.26395894, 0.39508085, 0.74490499,
            0.17457859, 0.93607408, 0.58727838, 0.76517609, 0.53999965,
            0.5932926 , 0.05968155, 0.70313421, 0.72178338, 0.47063122])}
    

    柱状图

    names = ['A类型','B类型','C类型']
    value = [1,10,100]
    
    plt.bar(range(len(names)),value)
    plt.xticks(range(len(names)),names)#横坐标
    
    ([<matplotlib.axis.XTick at 0x112650a20>,
      <matplotlib.axis.XTick at 0x112650438>,
      <matplotlib.axis.XTick at 0x1126437f0>],
     <a list of 3 Text xticklabel objects>)
    
    output_35_1.png
    plt.scatter(names,value)
    
    <matplotlib.collections.PathCollection at 0x111f32fd0>
    
    output_36_1.png
    plt.scatter(range(len(names)),value)
    plt.xticks(range(len(names)),names)
    
    ([<matplotlib.axis.XTick at 0x111f69be0>,
      <matplotlib.axis.XTick at 0x111f69518>,
      <matplotlib.axis.XTick at 0x111f692b0>],
     <a list of 3 Text xticklabel objects>)
    
    output_37_1.png
    plt.scatter(range(len(names)),value)
    plt.xticks(range(len(names)),names)
    plt.title('离散数据散点图')
    
    Text(0.5,1,'离散数据散点图')
    
    output_38_1.png

    子图 SubPlot

    1. 讲一个画布进行切分(Figure)
    
    2.将切分后的图分配到固定的位置
    
    3.将图可以设置成固定的大小
    
    plt.figure(1)
    plt.subplot(131)#一行三列放在第一的位置
    plt.bar(names,value,color='r')
    plt.subplot(235)#二行三列放在第五的位置
    plt.scatter(names,value,color='y')
    plt.subplot(233)#二行三列放在第三的位置
    plt.plot(names,value,color='g')
    plt.title("离散数据的柱状图,散点图,折线图")
    
    Text(0.5,1,'离散数据的柱状图,散点图,折线图')
    
    output_40_1.png
    
    

    第 2 部分 Seaborn的绘图练习

    道/法/术/器
    
    import seaborn as sns
    
    tips = sns.load_dataset('tips')
    tips
    
    # total_bill 和 
    
    

    2.1 带状图-离散数据和l连续数据的之间的关系

    sns.stripplot(data=tips,x='day',y='total_bill',jitter = True)#jitter抖动,默认为TRUE
    
    
    <matplotlib.axes._subplots.AxesSubplot at 0x1a16f45710>
    
    output_47_1.png

    蜂群图-离散数据和连续数据之间的关系-密度排列

    sns.swarmplot(x='day',y='total_bill',data=tips)
    
    <matplotlib.axes._subplots.AxesSubplot at 0x1a16aadc18>
    
    output_49_1.png
    tips.head()
    

    分析每天中 午餐和晚餐的账单分布

    2.3 Hue 分组参数

    sns.swarmplot(x='day',y='total_bill',data=tips,hue='time')
    
    <matplotlib.axes._subplots.AxesSubplot at 0x1a16ff30b8>
    
    output_53_1.png

    在每天的付账人群中的性别分布

    sns.swarmplot(x='day',y='total_bill',data=tips,hue='sex')
    
    <matplotlib.axes._subplots.AxesSubplot at 0x1a16e2fc50>
    
    output_55_1.png
    sns.swarmplot(x='day',y='total_bill',data=tips,hue='size')
    
    <matplotlib.axes._subplots.AxesSubplot at 0x1a1719cf60>
    
    output_56_1.png
    sns.swarmplot(x='size',y='total_bill',data=tips)
    
    <matplotlib.axes._subplots.AxesSubplot at 0x1a1726a470>
    
    output_57_1.png
    # 上图解释了,pizza的不同size的基础价格
    
    # size和相关系数(皮尔逊系数)的关系
    tips['size'].corr(tips['total_bill'])
    
    0.5983151309049012
    

    2.4箱型图

    sns.boxplot('day','total_bill',data=tips)
    
    <matplotlib.axes._subplots.AxesSubplot at 0x1a17971ef0>
    
    output_61_1.png
    
    
    sns.swarmplot('day','total_bill',data=tips)
    
    <matplotlib.axes._subplots.AxesSubplot at 0x1a178bc390>
    
    output_63_1.png
    sns.boxplot("day","total_bill",data=tips,hue='time')
    
    <matplotlib.axes._subplots.AxesSubplot at 0x1a17b547b8>
    
    output_64_1.png

    2.5 提琴图

    如何来表示total_bill的概率分布
    
    sns.violinplot('day','total_bill',data=tips,hue='time')
    
    /anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
      return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
    
    
    
    
    
    <matplotlib.axes._subplots.AxesSubplot at 0x1a17c781d0>
    
    output_66_2.png
    sns.violinplot('day','total_bill',data=tips,hue='time',split = True)
    
    /anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
      return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
    
    
    
    
    
    <matplotlib.axes._subplots.AxesSubplot at 0x1a17ddd860>
    
    output_67_2.png
    ### 多图合成展示
    
    sns.violinplot('day','total_bill',data=tips)
    sns.swarmplot('day','total_bill',data=tips,color='w')
    
    /anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
      return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
    
    
    
    
    
    <matplotlib.axes._subplots.AxesSubplot at 0x1a17eb31d0>
    
    output_69_2.png
    
    

    3.0 单一变量估计---离散型变量的统计

    每天的交易数量

    sns.countplot('day',data=tips)
    
    <matplotlib.axes._subplots.AxesSubplot at 0x1a17f9a550>
    
    output_73_1.png
    sns.countplot('time',data=tips)
    
    <matplotlib.axes._subplots.AxesSubplot at 0x1a17eb33c8>
    
    output_74_1.png
    sns.countplot('day',data=tips,hue='time')
    
    <matplotlib.axes._subplots.AxesSubplot at 0x1a180f3a90>
    
    output_75_1.png

    4.0 连续型数据的核密度估计

    tips.head()
    
    sns.distplot(tips['total_bill'])
    
    /anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
      return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
    
    
    
    
    
    <matplotlib.axes._subplots.AxesSubplot at 0x1a181b0780>
    
    output_78_2.png

    抵消偏度(修改接近正态分布)

    sns.distplot(np.log(tips['total_bill']))
    
    /anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
      return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
    
    
    
    
    
    <matplotlib.axes._subplots.AxesSubplot at 0x1a18289908>
    
    output_80_2.png
    sns.distplot(np.sqrt(tips['total_bill']))
    
    /anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
      return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
    
    
    
    
    
    <matplotlib.axes._subplots.AxesSubplot at 0x1a1803e7b8>
    
    output_81_2.png

    抽取前99.5%的数据(去除离散值的方法)

    np.percentile(tips['total_bill'],99.5)
    
    48.317099999999996
    
    tips[tips['total_bill']>48.31]
    

    带回归的散点图

    sns.lmplot('size','total_bill',data = tips)
    
    /anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
      return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
    
    
    
    
    
    <seaborn.axisgrid.FacetGrid at 0x1a18465cf8>
    
    output_86_2.png

    联合分布(既有散点的特性,又有线性回归,同事还有概率分布)

    sns.jointplot('total_bill','tip',data=tips,kind='reg')
    
    /anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
      return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
    
    
    
    
    
    <seaborn.axisgrid.JointGrid at 0x1a18556940>
    
    output_88_2.png

    相关文章

      网友评论

          本文标题:机器学习基础——matplotlib.pyplot和seabor

          本文链接:https://www.haomeiwen.com/subject/xiqsrqtx.html