交叉熵推导过程
交叉熵数损失推导 - 简书 (jianshu.com)
代码
datasets.h
#ifndef DATASETS_H__
#define DATASETS_H__
#include <iostream>
#include <vector>
namespace Data
{
struct Item
{
double area;
double distance;
int label;
};
class datasets
{
public:
datasets(std::string data_path): m_data_path(data_path)
{
m_mean.area = 0;
m_mean.distance = 0;
m_mean.label = 0;
m_stdval.area = 0;
m_stdval.distance = 0;
m_stdval.label = 0;
}
void load_data();
void preprocess(Item& item);
inline std::vector<Item> get_data() const { return m_data; }
inline Item get_mean() const { return m_mean; }
inline Item get_std() const { return m_stdval; }
private:
void compute_std_mean();
void normalization();
private:
std::string m_data_path;
std::vector<Item> m_data;
Item m_mean;
Item m_stdval;
};
std::ostream& operator<<(std::ostream& os, datasets dt);
}
#endif
dataset.cpp
#include "datasets.h"
#include <fstream>
#include <cmath>
using namespace std;
namespace Data
{
void datasets::datasets::load_data()
{
fstream ifile(datasets::m_data_path, ios::binary | ios::in);
string line;
getline(ifile, line);
while(getline(ifile, line))
{
int p0 = line.find(",");
int p1 = line.find(",", p0 + 1);
Item item;
item.area = atof(line.substr(0, p0).c_str());
item.distance = atof(line.substr(p0+1, p1).c_str());
item.label = atof(line.substr(p1+1).c_str());
m_data.emplace_back(item);
}
compute_std_mean();
normalization();
}
void datasets::compute_std_mean()
{
for(auto& item : m_data)
{
m_mean.area += item.area;
m_mean.distance += item.distance;
}
m_mean.area /= m_data.size();
m_mean.distance /= m_data.size();
for(auto& item : m_data)
{
m_stdval.area += std::pow(item.area - m_mean.area, 2.0f);
m_stdval.distance += std::pow(item.distance - m_mean.distance, 2.0f);;
}
m_stdval.area = std::sqrt(m_stdval.area / m_data.size());
m_stdval.distance = std::sqrt(m_stdval.distance / m_data.size());
}
void datasets::normalization()
{
/* 对数据进行减去均值除以标准差,使得均值为0,标准差为1 */
for(auto& item : m_data)
{
item.area = (item.area - m_mean.area) / m_stdval.area;
item.distance = (item.distance - m_mean.distance) / m_stdval.distance;
}
}
void datasets::preprocess(Item& item)
{
item.area = (item.area - m_mean.area) / m_stdval.area;
item.distance = (item.distance - m_mean.distance) / m_stdval.distance;
}
ostream& operator<<(ostream& os, datasets dt)
{
os << "-----datasets params-----" << endl;
os << "area mean:" << dt.get_mean().area << endl
<< "area stdv:" << dt.get_std().area << endl
<< "dist mean:" << dt.get_mean().distance << endl
<< "dist stdv:" << dt.get_std().distance << endl;
return os;
}
}
model.h
#ifndef MODEL_H__
#define MODEL_H__
#include "datasets.h"
#include <cmath>
#include <iostream>
namespace Model
{
class model
{
public:
model() :
m_loss(0),
m_k_distance(0.1),
m_k_area(0.1),
m_bias(0),
m_delta_k_distance(0),
m_delta_k_area(0),
m_delta_bias(0),
m_lr(0.1)
{}
double operator()(Data::Item item);
void back_forward(double p, Data::Item item);
double predict();
void step();
void zero_grad();
inline double get_m_k_distance() const { return m_k_distance; }
inline double get_m_k_area() const { return m_k_area; }
inline double get_m_bias() const { return m_bias; }
inline double get_m_loss() const { return m_loss; }
private:
double forward(Data::Item item);
inline double sigmoid(double x) const
{
double eps = 1e-5;
double p = x > 0 ? 1.0 / (1 + std::exp(-x)) : std::exp(x) / (1 + std::exp(x));
// 防止计算log时溢出
p = std::max(std::min(p, 1 - eps), eps);
return p;
}
private:
double m_lr;
double m_loss;
double m_k_distance;
double m_k_area;
double m_bias;
double m_delta_k_distance;
double m_delta_k_area;
double m_delta_bias;
};
std::ostream& operator<<(std::ostream &os, model md);
}
model.cpp
#include "model.h"
using namespace std;
namespace Model
{
double model::operator()(Data::Item item)
{
return forward(item);
}
double model::forward(Data::Item item)
{
double predict = m_k_distance * item.distance + m_k_area * item.area + m_bias;
predict = sigmoid(predict);
return predict;
}
void model::back_forward(double p, Data::Item item)
{
// 逻辑回归交叉熵损失
m_loss += -(item.label * log(p) + (1 - item.label)*log(1 - p));
m_delta_k_distance += item.distance*(p - item.label);
m_delta_k_area += item.area*(p - item.label);
m_delta_bias += p - item.label;
}
void model::step()
{
m_k_distance -= m_lr * m_delta_k_distance;
m_k_area -= m_lr * m_delta_k_area;
m_bias -= m_lr * m_delta_bias;
}
void model::zero_grad()
{
m_delta_k_distance = 0;
m_delta_k_area = 0;
m_delta_bias = 0;
m_loss = 0;
}
ostream& operator<<(ostream& os, model md)
{
os << "-----params-----" << endl;
os << "m_k_distance:" << md.get_m_k_distance() << endl
<< "m_k_area:" << md.get_m_k_area() << endl
<< "m_bias:" << md.get_m_bias() << endl;
return os;
}
}
main.cpp
#include "datasets.h"
#include "model.h"
#include <vector>
using namespace std;
int main()
{
Data::datasets dt("./shanghai.csv");
dt.load_data();
vector<Data::Item> datas = dt.get_data();
Model::model md;
md.zero_grad();
for (int i = 0; i < 1000; i++)
{
for (auto& item : datas)
{
double predict = md(item);
md.back_forward(predict, item);
}
md.step();
if (i % 100 == 0)
{
cout << "Iter:" << i << ", loss:" << md.get_m_loss() << endl;
}
md.zero_grad();
}
cout << dt << endl;
cout << md << endl;
cout << "----predict-----" << endl;
Data::Item test{100,2000,0};
cout << "area is :" << test.area << endl;
cout << "distance is :" << test.distance << endl;
dt.preprocess(test);
double predict = md(test);
if (predict > 0.5)
{
cout << "happy" << endl;
}
else
{
cout << "not happy" << endl;
}
return 0;
}
网友评论