美文网首页
xgboost模型demo

xgboost模型demo

作者: 拼搏向上001 | 来源:发表于2019-04-04 12:10 被阅读0次
    import xgboost as xgb
    def construct_model_data(X_train,X_validation,X_test,
                             y_train,y_validation,y_test):
        dtrain = xgb.DMatrix(X_train.values,label = y_train.values,
                             feature_names=X_train.columns,missing=np.nan)
        dvalidation =  xgb.DMatrix(X_validation.values,label = y_validation.values, feature_names=X_train.columns,missing=np.nan)
        dtest = xgb.DMatrix(X_test.values, label = y_test.values,
                            feature_names=X_test.columns,missing=np.nan)
        return dtrain,dtest,dvalidation
    
    def train_model(parameters,num_round,dtrain,dtest,dvalidation,es=10000):
        watchlist  = [(dtrain,'train'), (dtest,'test'), (dvalidation,'validation')]
        bst = xgb.train(parameters, dtrain, num_round, evals=watchlist, early_stopping_rounds=es)
        return bst
    
    parameters={'eta':0.38, 'objective':'binary:logistic', 'eval_metric':'auc', 'max_depth':5, 
    'min_child_weight':240, 'scale_pos_weight':11, 'colsample_bylevel':0.7, 'subsample':0.78,
    'colsample_bytree':0.7, 'seed':2019, 'tree_method':'approx', 'gamma':15, 'lambda':8600}
    
    def ceate_feature_map(features):   #查看重要特征
        outfile = open('xgb.fmap', 'w')
        i = 0
        for feat in features:
            outfile.write('{0}\t{1}\tq\n'.format(i, feat))
            i = i + 1
        outfile.close()
        
    ceate_feature_map(X_train.columns)
    import operator
    importance = bst.get_score(fmap='xgb.fmap',importance_type='gain')
    importance = sorted(importance.items(), key=operator.itemgetter(1),reverse=True)
    
    import xgboost
    from sklearn.model_selection import StratifiedKFold
    from sklearn.model_selection import GridSearchCV
    from sklearn.metrics import classification_report
    
    xgb_classfier = xgboost.XGBClassifier(gamma=0.2,reg_alpha=0.5,reg_lambda=2,scale_pos_weight=1,random_state=0)
    param_grid = {'n_estimators':[70,80,90], 'min_child_weight':[2,3,4,5], 'max_depth':[6,8,10]}
    #交叉验证切分每一折按类别分层取样
    skf = StratifiedKFold(n_splits=4, shuffle=True, random_state=0)  
     
    grid_search = GridSearchCV(xgb_classifier, param_grid, n_jobs=8, cv=skf, verbose=10)   #网格搜索+交叉验证
    grid_search.fit(X_trainval, y_trainval)
    
    best_parameters=grid_search.best_params_
    print('GridSearchCV得的最优参数组合:',best_parameters,'\n')
    
    #最好参数灌入模型,再训练一次模型,给出模型在训练验证集和测试集上的表现
    xgb_config = xgboost.XGBClassifier(**best_parameters,gamma=0.2,reg_alpha=0.5,reg_lambda=2,scale_pos_weight=1,random_state=0)
    xgbf = xgb_config.fit(X_trainval,y_trainval)
    
    y_tvp=xgbf.predict(X_trainval)
    print('最优参数对应的估计器,在trainval上的分类报告:\n',classification_report(y_tvp,y_trainval))
    y_tp=xgbf.predict(X_test)
    print('最优参数对应的估计器,在test上的分类报告:\n',classification_report(y_tp,y_test))
    

    相关文章

      网友评论

          本文标题:xgboost模型demo

          本文链接:https://www.haomeiwen.com/subject/kyefiqtx.html