- 决策树
# 决策树
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
X,y = load_iris(return_X_y=True)
train_X,test_X,train_Y,test_Y = train_test_split(X,y,test_size=0.2,random_state=123)
tree = DecisionTreeClassifier()
tree.fit(train_X,train_Y)
print(tree.score(test_X,test_Y))
# 0.9666666666666667
- 随机森林
# 随机森林
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
X,y = load_iris(return_X_y=True)
train_X,test_X,train_Y,test_Y = train_test_split(X,y,test_size=0.1,random_state=123)
forest = RandomForestClassifier(n_estimators=8)
forest=forest.fit(train_X,train_Y)
# 1.0
rf_output = forest.predict(test_X)
print(rf_output)
# [1 2 2 1 0 2 1 0 0 1 2 0 1 2 2]
- 不放回采样
# 不替换采样(采样标记后不放回)
from sklearn.utils import resample
import numpy as np
np.random.seed(123)
data = [1,2,3,4,5,6,7,8,9]
num_divisions = 2 # 分成2个筒
list_of_data_divisions = []
for x in range(0, num_divisions):
sample = resample(data,replace=False,n_samples=5)
list_of_data_divisions.append(sample)
print('Sample',list_of_data_divisions)
# Sample [[8, 1, 6, 7, 4], [4, 6, 5, 3, 8]]
- 放回采样
# 替换采样(采样标记后,再放回,继续采样)
from sklearn.utils import resample
import numpy as np
np.random.seed(123)
data = [1,2,3,4,5,6,7,8,9]
num_divisions = 3 # 分成3个筒
list_of_data_divisions = []
for x in range(0, num_divisions):
sample = resample(data,replace=False,n_samples=4) # 每个桶4个数据
list_of_data_divisions.append(sample)
print('Sample',list_of_data_divisions)
# Sample [[8, 1, 6, 7], [4, 6, 5, 3], [3, 2, 9, 8]]
网友评论