一、variance特征的选择
使用的是 low variance小方差
二、代码的实例
from sklearn.feature_selection import VarianceThreshold
from sklearn import datasets
import matplotlib.pyplot as plt
import numpy as np
d:\python3.7.4\lib\importlib\_bootstrap.py:219: RuntimeWarning: numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject
return f(*args, **kwds)
X,y = datasets.load_iris(True)
# 可视化,数据X,不能进行可视化!4个属性,4维空间
X.shape
vt = VarianceThreshold(threshold=0.5)
# 自古多情留不住,从来套路得人心!
vt.fit(X)
X2 = vt.transform(X)
X2[:5]
array([[5.1, 1.4, 0.2],
[4.9, 1.4, 0.2],
[4.7, 1.3, 0.2],
[4.6, 1.5, 0.2],
[5. , 1.4, 0.2]])
可视化
from mpl_toolkits.mplot3d.axes3d import Axes3D
fig = plt.figure(figsize=(9,6))#二维
ax3 = Axes3D(fig)
ax3.scatter(X2[:,0],X2[:,1],X2[:,2],c = y)
ax3.view_init(90,0)#调整3D图像视图方向!
output_4_0.png
X,y = datasets.load_wine(True)
# 阈值,给多好合适呢?
# 代码验证,给多个选择一个最好的!
# 后验!
vt = VarianceThreshold(1.0)
X3 = vt.fit_transform(X)
X3.shape
display(X[:5],X3[:5])
array([[1.423e+01, 1.710e+00, 2.430e+00, 1.560e+01, 1.270e+02, 2.800e+00,
3.060e+00, 2.800e-01, 2.290e+00, 5.640e+00, 1.040e+00, 3.920e+00,
1.065e+03],
[1.320e+01, 1.780e+00, 2.140e+00, 1.120e+01, 1.000e+02, 2.650e+00,
2.760e+00, 2.600e-01, 1.280e+00, 4.380e+00, 1.050e+00, 3.400e+00,
1.050e+03],
[1.316e+01, 2.360e+00, 2.670e+00, 1.860e+01, 1.010e+02, 2.800e+00,
3.240e+00, 3.000e-01, 2.810e+00, 5.680e+00, 1.030e+00, 3.170e+00,
1.185e+03],
[1.437e+01, 1.950e+00, 2.500e+00, 1.680e+01, 1.130e+02, 3.850e+00,
3.490e+00, 2.400e-01, 2.180e+00, 7.800e+00, 8.600e-01, 3.450e+00,
1.480e+03],
[1.324e+01, 2.590e+00, 2.870e+00, 2.100e+01, 1.180e+02, 2.800e+00,
2.690e+00, 3.900e-01, 1.820e+00, 4.320e+00, 1.040e+00, 2.930e+00,
7.350e+02]])
array([[ 1.71, 15.6 , 127. , 5.64, 1065. ],
[ 1.78, 11.2 , 100. , 4.38, 1050. ],
[ 2.36, 18.6 , 101. , 5.68, 1185. ],
[ 1.95, 16.8 , 113. , 7.8 , 1480. ],
[ 2.59, 21. , 118. , 4.32, 735. ]])
网友评论