美文网首页
xgboost调参相关

xgboost调参相关

作者: 我永远喜欢高木同学 | 来源:发表于2020-05-13 09:09 被阅读0次

首先因为要使用xgboost进行pairwise的排序,所以objective应设为 rank:pairwise

*   `rank:pairwise`: Use LambdaMART to perform pairwise ranking where the pairwise loss is minimized

*   `rank:ndcg`: Use LambdaMART to perform list-wise ranking where [Normalized Discounted Cumulative Gain (NDCG)](http://en.wikipedia.org/wiki/NDCG) is maximized

import xgboost as xgb
from xgboost import DMatrix
from sklearn.datasets import load_svmlight_file
import numpy as np
from sklearn.metrics import ndcg_score

# data prepare
x_train, y_train = load_svmlight_file("libsvm_format.train.txt")
x_valid, y_valid = load_svmlight_file("libsvm_format.valid.txt")
x_test, y_test = load_svmlight_file("libsvm_format.test.txt")
group_list =[]

with open("group.test.txt", "r") as f:
    data = f.readlines()
    i = 0
    for line in data:
        i += int(line.split("\n")[0])
        group_list.append(i)

group_train = []
with open("group.train.txt", "r") as f:
    data = f.readlines()
    for line in data:
        group_train.append(int(line.split("\n")[0]))

group_valid = []
with open("group.valid.txt", "r") as f:
    data = f.readlines()
    for line in data:
        group_valid.append(int(line.split("\n")[0]))

group_test = []
with open("group.test.txt", "r") as f:
    data = f.readlines()
    for line in data:
        group_test.append(int(line.split("\n")[0]))

train_dmatrix = DMatrix(x_train, y_train)
valid_dmatrix = DMatrix(x_valid, y_valid)
test_dmatrix = DMatrix(x_test,y_test)

train_dmatrix.set_group(group_train)
valid_dmatrix.set_group(group_valid)
test_dmatrix.set_group(group_test)

params = {    'max_depth':4,
    'eta':0.3, 'silent':1,'min_child_weight':5,'gamma':0,'subsample':1,'reg_lambda':1,'alpha':0,
    'objective':'rank:pairwise',#lambdaMART的pairwise排序
    'eval_metric':'ndcg@4'}#ndcg@4-是标准的算法,不带减号则会把idcg为0时对应的dcg当成1

xgb_model = xgb.train(params, train_dmatrix, num_boost_round=1000 ,early_stopping_rounds=100,
                      evals=[(train_dmatrix,'train'),(valid_dmatrix, 'validation')])
pred = xgb_model.predict(test_dmatrix)

#cal metric
pred = np.split(pred,group_list)
y_test = np.split(y_test,group_list)
totallen = len(pred)
pred.pop()
y_test.pop()
ndcg4score = 0
count = 0
for pre,lab in zip(pred,y_test):
    if not any(pre): break
    if not any(lab): break
    a = []
    a.append(pre.tolist())
    b = []
    b.append(lab.tolist())
    #print(pre)
    #print(lab)
    #print(count)
    if len(pre)==1:curr = 1
    else : curr=ndcg_score(b,a,k=4)
    #count+=1
    ndcg4score+=curr

res = ndcg4score/len(pred)
print(res)

相关文章

网友评论

      本文标题:xgboost调参相关

      本文链接:https://www.haomeiwen.com/subject/klyvnhtx.html