主要是xgboost的回归
scikit-learn: machine learning in Python — scikit-learn 1.0.2 documentation
XGBoost Documentation — xgboost 1.5.2 documentation
视频
https://www.youtube.com/watch?v=OtD8wVaFm6E
XGBoost in Python from Start to Finish - YouTube
如何对回归结果进行评价
针对单个指标使用的情况
image.png针对多个指标配合使用的情况
image.png!pip install xgboost
!pwd
一,分类问题
from sklearn.datasets import load_iris
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from xgboost import plot_importance
from matplotlib import pyplot as plt
iris = load_iris()
x,y=-iris.data,iris.target
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)
params={
'booster':'gbtree',
'objective':'multi:softmax',
'gamma':0.1,
'max_depth':5,
'lambda':10,
'subsample':0.7,
'colsample_bytree':0.7,
'min_child_weight':3,
'eta':0.1,
'seed':1000,
'nthread':4,
'num_class':3,
'verbosity':0
}
plst=list(params.items())
plst
dtrain=xgb.DMatrix(x_train,label=y_train)
num_rounds=10
model=xgb.train(plst,dtrain,num_rounds)
model
dtest = xgb.DMatrix(x_test)
ans = model.predict(dtest)
ans
cnt1=0
cnt2=0
for i in range(len(y_test)):
if y_test[i]==ans[i]:
cnt1+=1
else:
cnt2+=1
print("Accuracy:",cnt1/(cnt1+cnt2))
plot_importance(model)
plt.show()
二,xgboost导入数据的方法
import xgboost as xgb
import numpy as np
import scipy
import pandas
data=np.random.randn(100,10)
label=np.random.randint(2,size=100)
dtrain=xgb.DMatrix(data,label=label)
scr=scipy.sparse.csr_matrix(data,(100,2)) ## 进行稀疏矩阵转换
dtrain = xgb.DMatrix(scr)
scr
data = pandas.DataFrame(np.arange(12).reshape((4,3)), columns=['a', 'b', 'c'])
label = pandas.DataFrame(np.random.randint(2, size=4))
dtrain = xgb.DMatrix(data, label=label)
三, xgboost回归问题
from sklearn import datasets
boston=datasets.load_boston()
features=boston.data[:,0:2]
target=boston.target
import xgboost as xgb
from xgboost import plot_importance
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(features,target,test_size=0.2,random_state=0)
x_train.shape
x_train.ndim
len(x_train)
model=xgb.XGBRegressor(max_depth=3,n_estimators=100,learning_rate=0.1)
model.fit(x_train,y_train)
x_predicted=model.predict(x_test)
x_predicted
plot_importance(model)
plt.show()
案例实战
import os
os.listdir(os.getcwd())
https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/compressive/Concrete_Data.xls
data=pd.read_excel('Concrete_Data.xls')
data.head()
data.rename(columns={"Concrete compressive strength(MPa, megapascals) ":'label'},inplace=True)
data.shape
data.columns
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(data.drop(['label'],axis=1),data['label'],test_size=0.2,random_state=0)
xgb_train=xgb.DMatrix(x_train,label=y_train)
xgb_test=xgb.DMatrix(x_test,label=y_test)
params={
'booster':'gbtree',
'objective':'reg:linear',
'gamma':0.1,
'max_depth':6,
'lambda':10,
'subsample':0.8,
'eta':0.1
}
num_rounds=100
watchlist=[(xgb_train,'train'),(xgb_test,'test')]
model=xgb.train(params,xgb_train,num_rounds,watchlist)
model.save_model('0309testxgb.model')
Loadmodel
model=xgb.Booster(model_file='0309testxgb.model')
x_predicted=model.predict(xgb.DMatrix(x_test))
print(x_predicted)
网友评论