美文网首页
2.数据降维--variance

2.数据降维--variance

作者: 羽天驿 | 来源:发表于2020-04-06 16:10 被阅读0次

    一、variance特征的选择

    使用的是 low variance小方差

    二、代码的实例

    from sklearn.feature_selection import VarianceThreshold
    
    from sklearn import datasets
    
    import matplotlib.pyplot as plt
    
    import numpy as np
    
    d:\python3.7.4\lib\importlib\_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject
      return f(*args, **kwds)
    
    X,y = datasets.load_iris(True)
    # 可视化,数据X,不能进行可视化!4个属性,4维空间
    X.shape
    vt = VarianceThreshold(threshold=0.5)
    # 自古多情留不住,从来套路得人心!
    vt.fit(X)
    X2 = vt.transform(X)
    X2[:5]
    
    array([[5.1, 1.4, 0.2],
           [4.9, 1.4, 0.2],
           [4.7, 1.3, 0.2],
           [4.6, 1.5, 0.2],
           [5. , 1.4, 0.2]])
    

    可视化

    from mpl_toolkits.mplot3d.axes3d import Axes3D
    
    fig = plt.figure(figsize=(9,6))#二维
    ax3 = Axes3D(fig)
    ax3.scatter(X2[:,0],X2[:,1],X2[:,2],c = y)
    ax3.view_init(90,0)#调整3D图像视图方向!
    
    output_4_0.png
    X,y = datasets.load_wine(True)
    
    # 阈值,给多好合适呢?
    # 代码验证,给多个选择一个最好的!
    # 后验!
    vt = VarianceThreshold(1.0)
    X3 = vt.fit_transform(X)
    X3.shape
    display(X[:5],X3[:5])
    
    array([[1.423e+01, 1.710e+00, 2.430e+00, 1.560e+01, 1.270e+02, 2.800e+00,
            3.060e+00, 2.800e-01, 2.290e+00, 5.640e+00, 1.040e+00, 3.920e+00,
            1.065e+03],
           [1.320e+01, 1.780e+00, 2.140e+00, 1.120e+01, 1.000e+02, 2.650e+00,
            2.760e+00, 2.600e-01, 1.280e+00, 4.380e+00, 1.050e+00, 3.400e+00,
            1.050e+03],
           [1.316e+01, 2.360e+00, 2.670e+00, 1.860e+01, 1.010e+02, 2.800e+00,
            3.240e+00, 3.000e-01, 2.810e+00, 5.680e+00, 1.030e+00, 3.170e+00,
            1.185e+03],
           [1.437e+01, 1.950e+00, 2.500e+00, 1.680e+01, 1.130e+02, 3.850e+00,
            3.490e+00, 2.400e-01, 2.180e+00, 7.800e+00, 8.600e-01, 3.450e+00,
            1.480e+03],
           [1.324e+01, 2.590e+00, 2.870e+00, 2.100e+01, 1.180e+02, 2.800e+00,
            2.690e+00, 3.900e-01, 1.820e+00, 4.320e+00, 1.040e+00, 2.930e+00,
            7.350e+02]])
    
    
    
    array([[   1.71,   15.6 ,  127.  ,    5.64, 1065.  ],
           [   1.78,   11.2 ,  100.  ,    4.38, 1050.  ],
           [   2.36,   18.6 ,  101.  ,    5.68, 1185.  ],
           [   1.95,   16.8 ,  113.  ,    7.8 , 1480.  ],
           [   2.59,   21.  ,  118.  ,    4.32,  735.  ]])
    

    相关文章

      网友评论

          本文标题:2.数据降维--variance

          本文链接:https://www.haomeiwen.com/subject/tuajphtx.html