源码参考:
label_weight
有两个超参会影响label_weight,分别是scale_pos_weight和is_unbalance.
对于二分类,正、负样本的label_weight 默认值是1:1,当设置了scale_pos_weight时,正、负样本的label_weight比例变成scale_pos_weight:1。如果未设置scale_pos_weight 但是设置了is_unbalance=true超参后,则正负样本的label_weight比例是样本数的反比。
具体的参考代码:
void Init(const Metadata& metadata, data_size_t num_data) override {
num_data_ = num_data;
label_ = metadata.label();
weights_ = metadata.weights();
data_size_t cnt_positive = 0;
data_size_t cnt_negative = 0;
// count for positive and negative samples
#pragma omp parallel for schedule(static) reduction(+:cnt_positive, cnt_negative)
for (data_size_t i = 0; i < num_data_; ++i) {
if (is_pos_(label_[i])) {
++cnt_positive;
} else {
++cnt_negative;
}
}
if (cnt_negative == 0 || cnt_positive == 0) {
Log::Warning("Contains only one class");
// not need to boost.
num_data_ = 0;
}
Log::Info("Number of positive: %d, number of negative: %d", cnt_positive, cnt_negative);
// use -1 for negative class, and 1 for positive class
label_val_[0] = -1;
label_val_[1] = 1;
// weight for label
label_weights_[0] = 1.0f;
label_weights_[1] = 1.0f;
// if using unbalance, change the labels weight
if (is_unbalance_ && cnt_positive > 0 && cnt_negative > 0) {
if (cnt_positive > cnt_negative) {
label_weights_[1] = 1.0f;
label_weights_[0] = static_cast<double>(cnt_positive) / cnt_negative;
} else {
label_weights_[1] = static_cast<double>(cnt_negative) / cnt_positive;
label_weights_[0] = 1.0f;
}
}
label_weights_[1] *= scale_pos_weight_;
}
sample_weight
lgb还支持设置样本权重sample_weight,样本权重一旦设置会和label_weight一起影响最终的gradients和hessians,且sample_weight可以为0,为0的意思是当前样本不参与模型训练。
具体的参考代码:
void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override {
if (weights_ == nullptr) {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
// get label and label weights
const int is_pos = is_pos_(label_[i]);
const int label = label_val_[is_pos];
const double label_weight = label_weights_[is_pos];
// calculate gradients and hessians
const double response = -label * sigmoid_ / (1.0f + std::exp(label * sigmoid_ * score[i]));
const double abs_response = fabs(response);
gradients[i] = static_cast<score_t>(response * label_weight);
hessians[i] = static_cast<score_t>(abs_response * (sigmoid_ - abs_response) * label_weight);
}
} else {
#pragma omp parallel for schedule(static)
for (data_size_t i = 0; i < num_data_; ++i) {
// get label and label weights
const int is_pos = is_pos_(label_[i]);
const int label = label_val_[is_pos];
const double label_weight = label_weights_[is_pos];
// calculate gradients and hessians
const double response = -label * sigmoid_ / (1.0f + std::exp(label * sigmoid_ * score[i]));
const double abs_response = fabs(response);
gradients[i] = static_cast<score_t>(response * label_weight * weights_[i]);
hessians[i] = static_cast<score_t>(abs_response * (sigmoid_ - abs_response) * label_weight * weights_[i]);
}
}
}
(待续~)
网友评论