前言
由于研究内容需要,需要在python项目使用到SVM模型。目前大量库都提供了SVM模型,比如libsvm、opencv ml模块等。 但是在项目中,第三方采用了Matlab的 lv_svm, 为了保持一致,只能在工程中引入vl_svm。
开发测试环境
- windows10 64bit
- Anaconda 3, with python 3.7
- pybind11
- vl_feat
Aboult VLFeat
image.png image.png image.pngThe VLFeat open source library implements popular computer vision algorithms specializing in image understanding and local features extraction and matching. Algorithms include Fisher Vector, VLAD, SIFT, MSER, k-means, hierarchical k-means, agglomerative information bottleneck, SLIC superpixels, quick shift superpixels, large scale SVM training, and many others. It is written in C for efficiency and compatibility, with interfaces in MATLAB for ease of use, and detailed documentation throughout. It supports Windows, Mac OS X, and Linux. The latest version of VLFeat is
0.9.21
.
VLFeat是一个开源的库,包含许多流行的计算视觉算法,比如图像理解识别、局部特征提取、匹配等。
VLFeat采用C语言实现, 目前有Matlab接口以及文档,但是没有python接口,没有python接口那就自己实现!!!
由于VLFeat库算法太多,而且工程目前只需要SVM、HOG、LBP提取算法的python接口,因此本文主要SVM的python接口。
测试SVM
vlfeat提供了比较详细的document和API reference
image.png
除此之外,还提供了一个简单的例子,关于训练SVM
C语言代码
- 训练部分
输入: 4个2维的样本, 以及标签y, -1代表负样本, +1代表正样本
输出: SVM训练的参数,权重model, 偏置bias
#include <stdio.h>
#include <vl/svm.h>
int main()
{
vl_size const numData = 4 ;
vl_size const dimension = 2 ;
double x [dimension * numData] = {
0.0, -0.5,
0.6, -0.3,
0.0, 0.5
0.6, 0.0} ;
double y [numData] = {1, 1, -1, 1} ;
double lambda = 0.01;
double * const model ;
double bias ;
VlSvm * svm = vl_svm_new(VlSvmSolverSgd,
x, dimension, numData,
y,
lambda) ;
vl_svm_train(svm) ;
model = vl_svm_get_model(svm) ;
bias = vl_svm_get_bias(svm) ;
printf("model w = [ %f , %f ] , bias b = %f \n",
model[0],
model[1],
bias);
vl_svm_delete(svm) ;
return 0;
}
- 测试部分
测试的代码非常简单, 就是输入一个样本,得出其判别输出。
double svm_test(double* svm_w, double svm_b, double* inputData, int dims) {
double result = 0;
for (int i = 0; i < dims; i++)
{
result += svm_w[i] * inputData[i];
}
result += svm_b;
return result;
}
封装python接口
封装接口主要把输入、输出数据搞定即可,其他部分直接代码重用即可。
svm训练函数:
- 输入数据的转换
语言类型 | 描述 | 参数类型 |
---|---|---|
C | 多个输入样本(向量形式) | double数组/指针 |
python | 多个输入样本(向量形式) | list |
语言类型 | 描述 | 参数类型 |
---|---|---|
C | 多个标签 | double数组 |
python | 多个标签 | list |
- 输出数据的转换
语言类型 | 描述 | 参数类型 |
---|---|---|
C | 一个权值向量(向量形式) ,一个偏置bias | double数组/指针 + double数 |
python | 一个权值向量(向量形式) ,一个偏置bias | list, 包含2项 |
pytho接口代码实现
#include<vector>
#include<pybind11/pybind11.h>
#include<pybind11/stl.h>
#include<pybind11/numpy.h>
#include"svm_classifier.h"
namespace py = pybind11;
std::vector<std::vector<double>> train_svm(std::vector<std::vector<double>>& trainData, std::vector<double>& labels, double lambda) {
std::vector<double> weights;
double bias;
int numData = trainData.size();
int dims = trainData.at(0).size();
double* x = new double[dims*numData];
double* y = new double[numData];
int cnt = 0;
for (int i = 0; i < numData; i++)
{
for (int j = 0; j < dims; j++)
{
x[cnt] = trainData[i][j];
cnt++;
}
y[i] = labels[i];
}
VlSvm* svm = vl_svm_new(VlSvmSolverSgd, x, dims, numData, y, lambda);
vl_svm_train(svm);
const double * model = vl_svm_get_model(svm);
bias = vl_svm_get_bias(svm);
for (int i = 0; i < dims; i++)
{
weights.push_back(model[i]);
}
vl_svm_delete(svm);
return std::vector<std::vector<double>>{weights, { bias }};
}
PYBIND11_MODULE(vlfeat_svm, m) {
m.doc() = "Simple svm demo!";
m.def("train_svm", &train_svm, py::arg("train_dataset"), py::arg("labels"), py::arg("lambda_value"));
}
编译直接生成.pyd动态链接库
python接口的测试
为了符合OO面向对象, 将代码封装到类中:
首先构造一个类SVM, 包含2个方法:
- train() 训练
- eval() 测试
import numpy as np
import detector.svm.vlfeat_svm as svm
import random
class SVM:
def __init__(self):
self.name = 'svm'
self.weights = []
self.bias = 0.0
self.lambda_value = 0.0
self.optim_method = 'SGD'
def train(self, train_datas, labels, lambda_value):
self.lambda_value = lambda_value
self.weights, bias_ = svm.train_svm(train_datas, labels, lambda_value)
self.bias = bias_[0]
def eval(self, sample):
if len(sample) != len(self.weights):
raise ValueError
value = np.sum(np.array(self.weights) * np.array(sample))+self.bias
return value
在此基础上,在定义一个派生类 ClassifierSVM:
- setPSamples() 设置正样本训练集
- setNSamples() 设置负样本训练集
- setLabel() 设置训练标签
- train() 训练, overwrite父类方法
- eval() 测试, 继承父类
class ClassifierSVM(SVM):
def __init__(self):
super(ClassifierSVM, self).__init__()
self.nSamples = []
self.pSamples = []
self.pLabels = []
self.nLables = []
self.nLable = -1
self.pLabel = 1
def setPSamples(self, samples):
self.pSamples = samples
def setNSamples(self, samples):
self.nSamples = samples
def setLabel(self, pLabel, nLabel):
self.pLabel = pLabel
self.nLable = nLabel
def train(self, lambda_vlue, shuffle, **kwargs):
self.lambda_value = lambda_vlue
self.nLables = [self.nLable for i in range(len(self.nSamples))]
self.pLabels = [self.pLabel for i in range(len(self.pSamples))]
train_samples = self.pSamples + self.nSamples
train_labels = self.pLabels + self.nLables
all_data = []
for sample, label in zip(train_samples, train_labels):
all_data.append({'image': sample, 'label': label})
if shuffle:
random.shuffle(all_data)
train_samples = list(map(lambda x: x['image'], all_data))
train_labels = list(map(lambda x: x['label'], all_data))
super(ClassifierSVM, self).train(train_datas=train_samples, labels=train_labels, lambda_value=self.lambda_value)
训练结果: 权值,偏置
image.png完整工程
import numpy as np
import detector.svm.vlfeat_svm as svm
import random
class SVM:
def __init__(self):
self.name = 'svm'
self.weights = []
self.bias = 0.0
self.lambda_value = 0.0
self.optim_method = 'SGD'
def train(self, train_datas, labels, lambda_value):
self.lambda_value = lambda_value
self.weights, bias_ = svm.train_svm(train_datas, labels, lambda_value)
self.bias = bias_[0]
def eval(self, sample):
if len(sample) != len(self.weights):
raise ValueError
value = np.sum(np.array(self.weights) * np.array(sample))+self.bias
return value
class ClassifierSVM(SVM):
def __init__(self):
super(ClassifierSVM, self).__init__()
self.nSamples = []
self.pSamples = []
self.pLabels = []
self.nLables = []
self.nLable = -1
self.pLabel = 1
def setPSamples(self, samples):
self.pSamples = samples
def setNSamples(self, samples):
self.nSamples = samples
def setLabel(self, pLabel, nLabel):
self.pLabel = pLabel
self.nLable = nLabel
def train(self, lambda_vlue, shuffle, **kwargs):
self.lambda_value = lambda_vlue
self.nLables = [self.nLable for i in range(len(self.nSamples))]
self.pLabels = [self.pLabel for i in range(len(self.pSamples))]
train_samples = self.pSamples + self.nSamples
train_labels = self.pLabels + self.nLables
all_data = []
for sample, label in zip(train_samples, train_labels):
all_data.append({'image': sample, 'label': label})
random.shuffle(all_data)
train_samples = list(map(lambda x: x['image'], all_data))
train_labels = list(map(lambda x: x['label'], all_data))
super(ClassifierSVM, self).train(train_datas=train_samples, labels=train_labels, lambda_value=self.lambda_value)
if __name__ == '__main__':
print('*'*30)
pSamples = [[0.0, -0.5],
[0.6, -0.3],
[0.6, 0.0]]
nSamples = [[0.0, 0.5]]
classifier = ClassifierSVM()
classifier.setLabel(pLabel=1, nLabel=-1)
classifier.setPSamples(samples=pSamples)
classifier.setNSamples(samples=nSamples)
classifier.train(lambda_vlue=0.01, shuffle=False)
print(classifier.weights)
print(classifier.bias)
网友评论