from sklearn.ensembleimport RandomForestClassifier
from sklearn.model_selectionimport GridSearchCV
from sklearn.model_selectionimport train_test_split
train_X,test_X,train_Y,test_Y= train_test_split(X,Y,test_size=0.2,random_state=1) #训练集,测试集
estimator= RandomForestClassifier(bootstrap=True,class_weight=None,criterion='gini',
max_depth=None,max_features='auto',max_leaf_nodes=None,
min_samples_leaf=1,min_samples_split=2,min_weight_fraction_leaf=0.0,
oob_score=True,random_state=None) #gridsearchcv()中的分类器
param= {'n_estimator':range(10,100,10),'criterion':['gini','entropy']} #分类器需要调整的参数
gsearsh= GridSearchCV(estimator=estimator,param_grid=param,cv=5)#网格搜索来调参
gsearch.fit(train_X,train_Y)
best_param= gsearsh.best_params_ #最优的参数,类型为字典dict
clf= RandomForestClassifier(n_estimators = best_param["n_estimator"],
criterion=best_param["criterion"],oob_score=True) #使用经过网格搜索得到的 最优参数
clf.fit(train_X,train_Y) #生成训练模型
clf.predict(test_X) #对训练集进行预测
clf.score(test_X,test_Y) #查看效果
网友评论