1.xgboost训练流程图
xgboost训练流程
二.利用xgboost进行pairwise排序
rank_obj.cu UML类图 rank:pairwise相关数据类型
/*! \brief helper information in a list */
struct ListEntry {
/*! \brief the predict score we in the data */
bst_float pred;
/*! \brief the actual label of the entry */
bst_float label;
/*! \brief row index in the data matrix */
unsigned rindex;
// constructor
ListEntry(bst_float pred, bst_float label, unsigned rindex)
: pred(pred), label(label), rindex(rindex) {}
// comparator by prediction
inline static bool CmpPred(const ListEntry &a, const ListEntry &b) {
return a.pred > b.pred;
}
// comparator by label
inline static bool CmpLabel(const ListEntry &a, const ListEntry &b) {
return a.label > b.label;
}
};
/*! \brief a pair in the lambda rank */ //pos_index neg_index weights
struct LambdaPair {
/*! \brief positive index: this is a position in the list */
unsigned pos_index;
/*! \brief negative index: this is a position in the list */
unsigned neg_index;
/*! \brief weight to be filled in */
bst_float weight;
// constructor
LambdaPair(unsigned pos_index, unsigned neg_index)
: pos_index(pos_index), neg_index(neg_index), weight(1.0f) {}
// constructor
LambdaPair(unsigned pos_index, unsigned neg_index, bst_float weight)
: pos_index(pos_index), neg_index(neg_index), weight(weight) {}
};
pairwise loss 计算相关
template <typename LambdaWeightComputerT>
class LambdaRankObj : public ObjFunction {
public:
void GetGradient(const HostDeviceVector<bst_float>& preds,
const MetaInfo& info,
int iter,
HostDeviceVector<GradientPair>* out_gpair) override {
CHECK_EQ(preds.Size(), info.labels_.Size()) << "label size predict size not match";
// quick consistency when group is not available
std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels_.Size());
const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
CHECK(gptr.size() != 0 && gptr.back() == info.labels_.Size())
<< "group structure not consistent with #rows" << ", "
<< "group ponter size: " << gptr.size() << ", "
<< "labels size: " << info.labels_.Size() << ", "
<< "group pointer back: " << (gptr.size() == 0 ? 0 : gptr.back());
#if defined(__CUDACC__)
// Check if we have a GPU assignment; else, revert back to CPU
auto device = tparam_->gpu_id;
if (device >= 0) {
ComputeGradientsOnGPU(preds, info, iter, out_gpair, gptr);
} else {
// Revert back to CPU
#endif
ComputeGradientsOnCPU(preds, info, iter, out_gpair, gptr);
#if defined(__CUDACC__)
}
#endif
}
private:
bst_float ComputeWeightNormalizationFactor(const MetaInfo& info,
const std::vector<unsigned> &gptr) {
const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
bst_float sum_weights = 0;
for (bst_omp_uint k = 0; k < ngroup; ++k) {
sum_weights += info.GetWeight(k);
}
return ngroup / sum_weights;
}
取pair举例
Lambda汇总
2.1利用cpu计算梯度值
void ComputeGradientsOnCPU(const HostDeviceVector<bst_float>& preds,
const MetaInfo& info,
int iter,
HostDeviceVector<GradientPair>* out_gpair,
const std::vector<unsigned> &gptr) {
LOG(DEBUG) << "Computing " << LambdaWeightComputerT::Name() << " gradients on CPU.";
bst_float weight_normalization_factor = ComputeWeightNormalizationFactor(info, gptr);
const auto& preds_h = preds.HostVector();
const auto& labels = info.labels_.HostVector();
std::vector<GradientPair>& gpair = out_gpair->HostVector();
const auto ngroup = static_cast<bst_omp_uint>(gptr.size() - 1);
out_gpair->Resize(preds.Size());
#pragma omp parallel
{
// parallel construct, declare random number generator here, so that each
// thread use its own random number generator, seed by thread id and current iteration
std::minstd_rand rnd((iter + 1) * 1111);
std::vector<LambdaPair> pairs;
std::vector<ListEntry> lst;
std::vector< std::pair<bst_float, unsigned> > rec;
//gpair存的是相对应的一二阶梯度
#pragma omp for schedule(static)
for (bst_omp_uint k = 0; k < ngroup; ++k) {//在同一个group里进行这个操作
lst.clear(); pairs.clear();
for (unsigned j = gptr[k]; j < gptr[k+1]; ++j) {
lst.emplace_back(preds_h[j], labels[j], j);
gpair[j] = GradientPair(0.0f, 0.0f);
}//初始化梯度对
std::stable_sort(lst.begin(), lst.end(), ListEntry::CmpPred);//按预测值排序
rec.resize(lst.size());
for (unsigned i = 0; i < lst.size(); ++i) {
rec[i] = std::make_pair(lst[i].label, i);
}//rec存储预测值对应的label以及在预测值中对应的序号
std::stable_sort(rec.begin(), rec.end(), common::CmpFirst);
// enumerate buckets with same label, for each item in the lst, grab another sample randomly
for (unsigned i = 0; i < rec.size(); ) {
unsigned j = i + 1;
while (j < rec.size() && rec[j].first == rec[i].first) ++j;//桶内元素是否增加
// bucket in [i,j), get a sample outside bucket
unsigned nleft = i, nright = static_cast<unsigned>(rec.size() - j);
if (nleft + nright != 0) {
int nsample = param_.num_pairsample;
while (nsample --) {//一个label取nsample个样本
for (unsigned pid = i; pid < j; ++pid) {//出现同一个label的情况下遍历桶内的每个点
unsigned ridx = std::uniform_int_distribution<unsigned>(0, nleft + nright - 1)(rnd);//在0到数组长度减桶长之间产生随机数
if (ridx < nleft) {
pairs.emplace_back(rec[ridx].second, rec[pid].second,//小于的话不变
info.GetWeight(k) * weight_normalization_factor);
} else {
pairs.emplace_back(rec[pid].second, rec[ridx+j-i].second,//大于的话加上桶长
info.GetWeight(k) * weight_normalization_factor);
}
}
}
}
i = j;
}
// get lambda weight for the pairs
LambdaWeightComputerT::GetLambdaWeight(lst, &pairs);
// rescale each gradient and hessian so that the lst have constant weighted
float scale = 1.0f / param_.num_pairsample;
if (param_.fix_list_weight != 0.0f) {
scale *= param_.fix_list_weight / (gptr[k + 1] - gptr[k]);
}
for (auto & pair : pairs) {
const ListEntry &pos = lst[pair.pos_index];
const ListEntry &neg = lst[pair.neg_index];
const bst_float w = pair.weight * scale;
const float eps = 1e-16f;
bst_float p = common::Sigmoid(pos.pred - neg.pred);
bst_float g = p - 1.0f;
bst_float h = std::max(p * (1.0f - p), eps);
// accumulate gradient and hessian in both pid, and nid
gpair[pos.rindex] += GradientPair(g * w, 2.0f*w*h);
gpair[neg.rindex] += GradientPair(-g * w, 2.0f*w*h);
}
}
}
}
};
class PairwiseLambdaWeightComputer {
public:
/*!
* \brief get lambda weight for existing pairs - for pairwise objective
* \param list a list that is sorted by pred score
* \param io_pairs record of pairs, containing the pairs to fill in weights
*/
static void GetLambdaWeight(const std::vector<ListEntry> &sorted_list,
std::vector<LambdaPair> *io_pairs) {}
static char const* Name() {
return "rank:pairwise";
}
#if defined(__CUDACC__)
PairwiseLambdaWeightComputer(const bst_float *dpreds,
const bst_float *dlabels,
const dh::SegmentSorter<float> &segment_label_sorter) {}
class PairwiseLambdaWeightMultiplier {
public:
// Adjust the items weight by this value
__device__ __forceinline__ bst_float GetWeight(uint32_t gidx, int pidx, int nidx) const {
return 1.0f;
}
};
inline const PairwiseLambdaWeightMultiplier GetWeightMultiplier() const {
return {};
}
#endif
};
Class Diagram1.png
void GetGradient(const HostDeviceVector<bst_float>& preds,
const MetaInfo& info,
int iter,
HostDeviceVector<GradientPair>* out_gpair) override {
CHECK_EQ(preds.Size(), info.labels_.Size()) << "label size predict size not match";
// quick consistency when group is not available
std::vector<unsigned> tgptr(2, 0); tgptr[1] = static_cast<unsigned>(info.labels_.Size());
const std::vector<unsigned> &gptr = info.group_ptr_.size() == 0 ? tgptr : info.group_ptr_;
CHECK(gptr.size() != 0 && gptr.back() == info.labels_.Size())
<< "group structure not consistent with #rows" << ", "
<< "group ponter size: " << gptr.size() << ", "
<< "labels size: " << info.labels_.Size() << ", "
<< "group pointer back: " << (gptr.size() == 0 ? 0 : gptr.back());
#if defined(__CUDACC__)
// Check if we have a GPU assignment; else, revert back to CPU
auto device = tparam_->gpu_id;
if (device >= 0) {
ComputeGradientsOnGPU(preds, info, iter, out_gpair, gptr);
} else {
// Revert back to CPU
#endif
ComputeGradientsOnCPU(preds, info, iter, out_gpair, gptr);
#if defined(__CUDACC__)
}
#endif
}
2.2利用gpu计算梯度值
#if defined(__CUDACC__)
void ComputeGradientsOnGPU(const HostDeviceVector<bst_float>& preds,
const MetaInfo& info,
int iter,
HostDeviceVector<GradientPair>* out_gpair,
const std::vector<unsigned> &gptr) {
LOG(DEBUG) << "Computing " << LambdaWeightComputerT::Name() << " gradients on GPU.";
auto device = tparam_->gpu_id;
dh::safe_cuda(cudaSetDevice(device));
bst_float weight_normalization_factor = ComputeWeightNormalizationFactor(info, gptr);
// Set the device ID and copy them to the device
out_gpair->SetDevice(device);
info.labels_.SetDevice(device);
preds.SetDevice(device);
info.weights_.SetDevice(device);
out_gpair->Resize(preds.Size());
auto d_preds = preds.ConstDevicePointer();
auto d_gpair = out_gpair->DevicePointer();
auto d_labels = info.labels_.ConstDevicePointer();
SortedLabelList slist(param_);
// Sort the labels within the groups on the device
slist.Sort(info.labels_, gptr);
// Initialize the gradients next
out_gpair->Fill(GradientPair(0.0f, 0.0f));
// Finally, compute the gradients
slist.ComputeGradients<LambdaWeightComputerT>
(d_preds, d_labels, info.weights_, iter, d_gpair, weight_normalization_factor);
}
#endif
#if defined(__CUDACC__)
class SortedLabelList : dh::SegmentSorter<float> {
private:
const LambdaRankParam ¶m_;// Objective configuration
public:
explicit SortedLabelList(const LambdaRankParam ¶m)
: param_(param) {}
// Sort the labels that are grouped by 'groups'
void Sort(const HostDeviceVector<bst_float> &dlabels, const std::vector<uint32_t> &groups) {
this->SortItems(dlabels.ConstDevicePointer(), dlabels.Size(), groups);
}
Class Diagram 4.png
// This kernel can only run *after* the kernel in sort is completed, as they
// use the default stream
template <typename LambdaWeightComputerT>
void ComputeGradients(const bst_float *dpreds,// Unsorted predictions
const bst_float *dlabels,// Unsorted labels
const HostDeviceVector<bst_float> &weights,
int iter,
GradientPair *out_gpair,
float weight_normalization_factor) {
// Group info on device
const auto &dgroups = this->GetGroupsSpan();
uint32_t ngroups = this->GetNumGroups() + 1;
uint32_t total_items = this->GetNumItems();
uint32_t niter = param_.num_pairsample * total_items;
float fix_list_weight = param_.fix_list_weight;
const auto &original_pos = this->GetOriginalPositionsSpan();
uint32_t num_weights = weights.Size();
auto dweights = num_weights ? weights.ConstDevicePointer() : nullptr;
const auto &sorted_labels = this->GetItemsSpan();
// This is used to adjust the weight of different elements based on the different ranking
// objective function policies
LambdaWeightComputerT weight_computer(dpreds, dlabels, *this);
auto wmultiplier = weight_computer.GetWeightMultiplier();
int device_id = -1;
dh::safe_cuda(cudaGetDevice(&device_id));
// For each instance in the group, compute the gradient pair concurrently
dh::LaunchN(device_id, niter, nullptr, [=] __device__(uint32_t idx) {
// First, determine the group 'idx' belongs to
uint32_t item_idx = idx % total_items;
uint32_t group_idx =
thrust::upper_bound(thrust::seq, dgroups.begin(),
dgroups.begin() + ngroups, item_idx) -
dgroups.begin();
// Span of this group within the larger labels/predictions sorted tuple
uint32_t group_begin = dgroups[group_idx - 1];
uint32_t group_end = dgroups[group_idx];
uint32_t total_group_items = group_end - group_begin;
// Are the labels diverse enough? If they are all the same, then there is nothing to pick
// from another group - bail sooner
if (sorted_labels[group_begin] == sorted_labels[group_end - 1]) return;
// Find the number of labels less than and greater than the current label
// at the sorted index position item_idx
uint32_t nleft = CountNumItemsToTheLeftOf(
sorted_labels.data() + group_begin, item_idx - group_begin + 1, sorted_labels[item_idx]);
uint32_t nright = CountNumItemsToTheRightOf(
sorted_labels.data() + item_idx, group_end - item_idx, sorted_labels[item_idx]);
// Create a minstd_rand object to act as our source of randomness
thrust::minstd_rand rng((iter + 1) * 1111);
rng.discard(((idx / total_items) * total_group_items) + item_idx - group_begin);
// Create a uniform_int_distribution to produce a sample from outside of the
// present label group
thrust::uniform_int_distribution<int> dist(0, nleft + nright - 1);
int sample = dist(rng);
int pos_idx = -1;// Bigger label
int neg_idx = -1;// Smaller label
// Are we picking a sample to the left/right of the current group?
if (sample < nleft) {
// Go left
pos_idx = sample + group_begin;
neg_idx = item_idx;
} else {
pos_idx = item_idx;
uint32_t items_in_group = total_group_items - nleft - nright;
neg_idx = sample + items_in_group + group_begin;
}
// Compute and assign the gradients now
const float eps = 1e-16f;
bst_float p = common::Sigmoid(dpreds[original_pos[pos_idx]] - dpreds[original_pos[neg_idx]]);
bst_float g = p - 1.0f;
bst_float h = thrust::max(p * (1.0f - p), eps);
// Rescale each gradient and hessian so that the group has a weighted constant
float scale = __frcp_ru(niter / total_items);
if (fix_list_weight != 0.0f) {
scale *= fix_list_weight / total_group_items;
}
float weight = num_weights ? dweights[group_idx - 1] : 1.0f;
weight *= weight_normalization_factor;
weight *= wmultiplier.GetWeight(group_idx - 1, pos_idx, neg_idx);
weight *= scale;
// Accumulate gradient and hessian in both positive and negative indices
const GradientPair in_pos_gpair(g * weight, 2.0f * weight * h);
dh::AtomicAddGpair(&out_gpair[original_pos[pos_idx]], in_pos_gpair);
const GradientPair in_neg_gpair(-g * weight, 2.0f * weight * h);
dh::AtomicAddGpair(&out_gpair[original_pos[neg_idx]], in_neg_gpair);
});
// Wait until the computations done by the kernel is complete
dh::safe_cuda(cudaStreamSynchronize(nullptr));
}
};
#endif
网友评论