美文网首页pybind11—Python与C++混合编程
pybind11—HOG特征提取以及python接口封装

pybind11—HOG特征提取以及python接口封装

作者: 侠之大者_7d3f | 来源:发表于2019-05-09 21:01 被阅读0次

前言

HOG(Histogram of Oriented Gradients)是梯度方向直方图特征。


开发测试环境

  • windows10, 64bit
  • Visual Studio 2017
  • Anaconda, with python 3.7, numpy, opencv-python
  • pybind11
  • opencv C++
  • vlfeat库

测试结果

image.png

完整代码

  • python测试代码
import vlHOG.vl_hog as vl_hog
import cv2
import numpy as np

help(vl_hog)

image = cv2.imread(r'F:\lena\lena_gray.jpg', cv2.IMREAD_GRAYSCALE)

feature = vl_hog.extract_hog_feature(img_gray=image, cell_size=4, bins=9, type=vl_hog.HOGType.HOG_VariantUoctti)
print(feature)

with open(r'./vl_hog.dat', 'w') as f:
    for i in range(feature.shape[0]):
        for j in range(feature.shape[1]):
            for k in range(feature.shape[2]):
                f.write('{}\n'.format(feature[i, j, k]))

print('Save ok')
  • C++代码
    main.cpp
#include<iostream>
#include<opencv2/opencv.hpp>
#include<vl/hog.h>
#include<pybind11/pybind11.h>
#include<pybind11/numpy.h>
#include<pybind11/stl.h>
#include"ndarray_converter.h"


#define BUILD_FOR_PYTHON_API 1


namespace py = pybind11;

enum class HOGType
{
    HOG_DalalTriggs,
    HOG_VariantUoctti
};


py::array_t<double> extract_hog_featrue(cv::Mat& image_gray, int cell_size, int bins, HOGType type) {

    cv::Mat img_input = image_gray;
    int numOrientations = bins;
    int height = img_input.rows;
    int width = img_input.cols;
    int numChannels = img_input.channels();
    int cellSize = cell_size;


    float* image_buffer = new float[width*height*numChannels];
    assert(image_buffer != nullptr);
    int count = 0;
    for (int i = 0; i < img_input.rows; i++)
    {
        for (int j = 0; j < img_input.cols; j++)
        {
            image_buffer[count] = (float)img_input.at<uchar>(i, j);
            count++;
        }
    }
    VlHog * hog = nullptr;
    if (type == HOGType::HOG_VariantUoctti)
    {
        hog = vl_hog_new(VlHogVariantUoctti, numOrientations, VL_FALSE);
    }
    else if (type == HOGType::HOG_DalalTriggs) {
        hog = vl_hog_new(VlHogVariantDalalTriggs, numOrientations, VL_FALSE);
    }

    vl_hog_put_image(hog, image_buffer, height, width, numChannels, cellSize);
    int hogWidth = vl_hog_get_width(hog);
    int hogHeight = vl_hog_get_height(hog);
    int hogDimension = vl_hog_get_dimension(hog);
    float* hogArray = (float*)vl_malloc(hogWidth*hogHeight*hogDimension * sizeof(float));
    vl_hog_extract(hog, hogArray);
    vl_hog_delete(hog);

    delete[] image_buffer;

    // allocate the output buffer
    py::array_t<double> outFeature = py::array_t<float>(hogWidth*hogHeight*hogDimension);
    outFeature.resize({ hogHeight, hogWidth, hogDimension });
    auto t = outFeature.mutable_unchecked<3>();
    count = 0;
    for (int i = 0; i < hogHeight; i++)
    {
        for (int j = 0; j < hogWidth; j++)
        {
            for (int k = 0; k < hogDimension; k++)
            {
                t(i, j, k) = hogArray[count];
                count++;
            }
        }
    }

    return outFeature;

}



#if !BUILD_FOR_PYTHON_API


int main() {

    cv::Mat img_input = cv::imread("F:\\lena\\lena_gray.jpg", 0);
    int numOrientations = 9;
    int height = img_input.rows;
    int width = img_input.cols;
    int numChannels = img_input.channels();
    int cellSize = 4;

#if 0

    float* image_buffer = new float[width*height*numChannels];
    assert(image_buffer != nullptr);
    int count = 0;
    for (int i = 0; i < img_input.rows; i++)
    {
        for (int j = 0; j < img_input.cols; j++)
        {
            image_buffer[count] = (float)img_input.at<uchar>(i, j);
        }
    }


    VlHog * hog = vl_hog_new(VlHogVariantUoctti, numOrientations, VL_FALSE);
    vl_hog_put_image(hog, image_buffer, height, width, numChannels, cellSize);
    int hogWidth = vl_hog_get_width(hog);
    int hogHeight = vl_hog_get_height(hog);
    int hogDimension = vl_hog_get_dimension(hog);
    float* hogArray = (float*)vl_malloc(hogWidth*hogHeight*hogDimension * sizeof(float));
    vl_hog_extract(hog, hogArray);
    vl_hog_delete(hog);
    printf("hogWidth=%d\nhogHeight=%d\nhogDimension=%d\n", hogWidth, hogHeight, hogDimension);

#endif // 0

    auto dst = extract_hog_featrue(img_input, cellSize, numOrientations, HOGType::HOG_VariantUoctti);


    system("pause");

}


#else

PYBIND11_MODULE(vl_hog, m) {

    NDArrayConverter::init_numpy();

    py::enum_<HOGType>(m, "HOGType")
        .value("HOG_DalalTriggs", HOGType::HOG_DalalTriggs)
        .value("HOG_VariantUoctti", HOGType::HOG_VariantUoctti)
        .export_values();

    m.def("extract_hog_feature", &extract_hog_featrue, py::arg("img_gray"), py::arg("cell_size"), py::arg("bins"), py::arg("type"));
}



#endif // !BUILD_FOR_PYTHON_API
  • ndarray_converter.cpp
// borrowed in spirit from https://github.com/yati-sagade/opencv-ndarray-conversion
// MIT License

#include "ndarray_converter.h"

#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include <numpy/ndarrayobject.h>

#if PY_VERSION_HEX >= 0x03000000
    #define PyInt_Check PyLong_Check
    #define PyInt_AsLong PyLong_AsLong
#endif

struct Tmp {
    const char * name;

    Tmp(const char * name ) : name(name) {}
};

Tmp info("return value");

bool NDArrayConverter::init_numpy() {
    // this has to be in this file, since PyArray_API is defined as static
    import_array1(false);
    return true;
}

/*
 * The following conversion functions are taken/adapted from OpenCV's cv2.cpp file
 * inside modules/python/src2 folder (OpenCV 3.1.0)
 */

static PyObject* opencv_error = 0;

static int failmsg(const char *fmt, ...)
{
    char str[1000];

    va_list ap;
    va_start(ap, fmt);
    vsnprintf(str, sizeof(str), fmt, ap);
    va_end(ap);

    PyErr_SetString(PyExc_TypeError, str);
    return 0;
}

class PyAllowThreads
{
public:
    PyAllowThreads() : _state(PyEval_SaveThread()) {}
    ~PyAllowThreads()
    {
        PyEval_RestoreThread(_state);
    }
private:
    PyThreadState* _state;
};

class PyEnsureGIL
{
public:
    PyEnsureGIL() : _state(PyGILState_Ensure()) {}
    ~PyEnsureGIL()
    {
        PyGILState_Release(_state);
    }
private:
    PyGILState_STATE _state;
};

#define ERRWRAP2(expr) \
try \
{ \
    PyAllowThreads allowThreads; \
    expr; \
} \
catch (const cv::Exception &e) \
{ \
    PyErr_SetString(opencv_error, e.what()); \
    return 0; \
}

using namespace cv;

class NumpyAllocator : public MatAllocator
{
public:
    NumpyAllocator() { stdAllocator = Mat::getStdAllocator(); }
    ~NumpyAllocator() {}

    UMatData* allocate(PyObject* o, int dims, const int* sizes, int type, size_t* step) const
    {
        UMatData* u = new UMatData(this);
        u->data = u->origdata = (uchar*)PyArray_DATA((PyArrayObject*) o);
        npy_intp* _strides = PyArray_STRIDES((PyArrayObject*) o);
        for( int i = 0; i < dims - 1; i++ )
            step[i] = (size_t)_strides[i];
        step[dims-1] = CV_ELEM_SIZE(type);
        u->size = sizes[0]*step[0];
        u->userdata = o;
        return u;
    }

    UMatData* allocate(int dims0, const int* sizes, int type, void* data, size_t* step, int flags, UMatUsageFlags usageFlags) const
    {
        if( data != 0 )
        {
            CV_Error(Error::StsAssert, "The data should normally be NULL!");
            // probably this is safe to do in such extreme case
            return stdAllocator->allocate(dims0, sizes, type, data, step, flags, usageFlags);
        }
        PyEnsureGIL gil;

        int depth = CV_MAT_DEPTH(type);
        int cn = CV_MAT_CN(type);
        const int f = (int)(sizeof(size_t)/8);
        int typenum = depth == CV_8U ? NPY_UBYTE : depth == CV_8S ? NPY_BYTE :
        depth == CV_16U ? NPY_USHORT : depth == CV_16S ? NPY_SHORT :
        depth == CV_32S ? NPY_INT : depth == CV_32F ? NPY_FLOAT :
        depth == CV_64F ? NPY_DOUBLE : f*NPY_ULONGLONG + (f^1)*NPY_UINT;
        int i, dims = dims0;
        cv::AutoBuffer<npy_intp> _sizes(dims + 1);
        for( i = 0; i < dims; i++ )
            _sizes[i] = sizes[i];
        if( cn > 1 )
            _sizes[dims++] = cn;
        PyObject* o = PyArray_SimpleNew(dims, _sizes, typenum);
        if(!o)
            CV_Error_(Error::StsError, ("The numpy array of typenum=%d, ndims=%d can not be created", typenum, dims));
        return allocate(o, dims0, sizes, type, step);
    }

    bool allocate(UMatData* u, int accessFlags, UMatUsageFlags usageFlags) const
    {
        return stdAllocator->allocate(u, accessFlags, usageFlags);
    }

    void deallocate(UMatData* u) const
    {
        if(!u)
            return;
        PyEnsureGIL gil;
        CV_Assert(u->urefcount >= 0);
        CV_Assert(u->refcount >= 0);
        if(u->refcount == 0)
        {
            PyObject* o = (PyObject*)u->userdata;
            Py_XDECREF(o);
            delete u;
        }
    }

    const MatAllocator* stdAllocator;
};

NumpyAllocator g_numpyAllocator;

bool NDArrayConverter::toMat(PyObject *o, Mat &m)
{
    bool allowND = true;
    if(!o || o == Py_None)
    {
        if( !m.data )
            m.allocator = &g_numpyAllocator;
        return true;
    }

    if( PyInt_Check(o) )
    {
        double v[] = {static_cast<double>(PyInt_AsLong((PyObject*)o)), 0., 0., 0.};
        m = Mat(4, 1, CV_64F, v).clone();
        return true;
    }
    if( PyFloat_Check(o) )
    {
        double v[] = {PyFloat_AsDouble((PyObject*)o), 0., 0., 0.};
        m = Mat(4, 1, CV_64F, v).clone();
        return true;
    }
    if( PyTuple_Check(o) )
    {
        int i, sz = (int)PyTuple_Size((PyObject*)o);
        m = Mat(sz, 1, CV_64F);
        for( i = 0; i < sz; i++ )
        {
            PyObject* oi = PyTuple_GET_ITEM(o, i);
            if( PyInt_Check(oi) )
                m.at<double>(i) = (double)PyInt_AsLong(oi);
            else if( PyFloat_Check(oi) )
                m.at<double>(i) = (double)PyFloat_AsDouble(oi);
            else
            {
                failmsg("%s is not a numerical tuple", info.name);
                m.release();
                return false;
            }
        }
        return true;
    }

    if( !PyArray_Check(o) )
    {
        failmsg("%s is not a numpy array, neither a scalar", info.name);
        return false;
    }

    PyArrayObject* oarr = (PyArrayObject*) o;

    bool needcopy = false, needcast = false;
    int typenum = PyArray_TYPE(oarr), new_typenum = typenum;
    int type = typenum == NPY_UBYTE ? CV_8U :
               typenum == NPY_BYTE ? CV_8S :
               typenum == NPY_USHORT ? CV_16U :
               typenum == NPY_SHORT ? CV_16S :
               typenum == NPY_INT ? CV_32S :
               typenum == NPY_INT32 ? CV_32S :
               typenum == NPY_FLOAT ? CV_32F :
               typenum == NPY_DOUBLE ? CV_64F : -1;

    if( type < 0 )
    {
        if( typenum == NPY_INT64 || typenum == NPY_UINT64 || typenum == NPY_LONG )
        {
            needcopy = needcast = true;
            new_typenum = NPY_INT;
            type = CV_32S;
        }
        else
        {
            failmsg("%s data type = %d is not supported", info.name, typenum);
            return false;
        }
    }

#ifndef CV_MAX_DIM
    const int CV_MAX_DIM = 32;
#endif

    int ndims = PyArray_NDIM(oarr);
    if(ndims >= CV_MAX_DIM)
    {
        failmsg("%s dimensionality (=%d) is too high", info.name, ndims);
        return false;
    }

    int size[CV_MAX_DIM+1];
    size_t step[CV_MAX_DIM+1];
    size_t elemsize = CV_ELEM_SIZE1(type);
    const npy_intp* _sizes = PyArray_DIMS(oarr);
    const npy_intp* _strides = PyArray_STRIDES(oarr);
    bool ismultichannel = ndims == 3 && _sizes[2] <= CV_CN_MAX;

    for( int i = ndims-1; i >= 0 && !needcopy; i-- )
    {
        // these checks handle cases of
        //  a) multi-dimensional (ndims > 2) arrays, as well as simpler 1- and 2-dimensional cases
        //  b) transposed arrays, where _strides[] elements go in non-descending order
        //  c) flipped arrays, where some of _strides[] elements are negative
        // the _sizes[i] > 1 is needed to avoid spurious copies when NPY_RELAXED_STRIDES is set
        if( (i == ndims-1 && _sizes[i] > 1 && (size_t)_strides[i] != elemsize) ||
            (i < ndims-1 && _sizes[i] > 1 && _strides[i] < _strides[i+1]) )
            needcopy = true;
    }

    if( ismultichannel && _strides[1] != (npy_intp)elemsize*_sizes[2] )
        needcopy = true;

    if (needcopy)
    {
        //if (info.outputarg)
        //{
        //    failmsg("Layout of the output array %s is incompatible with cv::Mat (step[ndims-1] != elemsize or step[1] != elemsize*nchannels)", info.name);
        //    return false;
        //}

        if( needcast ) {
            o = PyArray_Cast(oarr, new_typenum);
            oarr = (PyArrayObject*) o;
        }
        else {
            oarr = PyArray_GETCONTIGUOUS(oarr);
            o = (PyObject*) oarr;
        }

        _strides = PyArray_STRIDES(oarr);
    }

    // Normalize strides in case NPY_RELAXED_STRIDES is set
    size_t default_step = elemsize;
    for ( int i = ndims - 1; i >= 0; --i )
    {
        size[i] = (int)_sizes[i];
        if ( size[i] > 1 )
        {
            step[i] = (size_t)_strides[i];
            default_step = step[i] * size[i];
        }
        else
        {
            step[i] = default_step;
            default_step *= size[i];
        }
    }

    // handle degenerate case
    if( ndims == 0) {
        size[ndims] = 1;
        step[ndims] = elemsize;
        ndims++;
    }

    if( ismultichannel )
    {
        ndims--;
        type |= CV_MAKETYPE(0, size[2]);
    }

    if( ndims > 2 && !allowND )
    {
        failmsg("%s has more than 2 dimensions", info.name);
        return false;
    }

    m = Mat(ndims, size, type, PyArray_DATA(oarr), step);
    m.u = g_numpyAllocator.allocate(o, ndims, size, type, step);
    m.addref();

    if( !needcopy )
    {
        Py_INCREF(o);
    }
    m.allocator = &g_numpyAllocator;

    return true;
}

PyObject* NDArrayConverter::toNDArray(const cv::Mat& m)
{
    if( !m.data )
        Py_RETURN_NONE;
    Mat temp, *p = (Mat*)&m;
    if(!p->u || p->allocator != &g_numpyAllocator)
    {
        temp.allocator = &g_numpyAllocator;
        ERRWRAP2(m.copyTo(temp));
        p = &temp;
    }
    PyObject* o = (PyObject*)p->u->userdata;
    Py_INCREF(o);
    return o;
}

  • ndarray_converter.h
# ifndef __NDARRAY_CONVERTER_H__
# define __NDARRAY_CONVERTER_H__

#include <Python.h>
#include <opencv2/core/core.hpp>


class NDArrayConverter {
public:
    // must call this first, or the other routines don't work!
    static bool init_numpy();
    
    static bool toMat(PyObject* o, cv::Mat &m);
    static PyObject* toNDArray(const cv::Mat& mat);
};

//
// Define the type converter
//

#include <pybind11/pybind11.h>

namespace pybind11 { namespace detail {
    
template <> struct type_caster<cv::Mat> {
public:
    
    PYBIND11_TYPE_CASTER(cv::Mat, _("numpy.ndarray"));
    
    bool load(handle src, bool) {
        return NDArrayConverter::toMat(src.ptr(), value);
    }
    
    static handle cast(const cv::Mat &m, return_value_policy, handle defval) {
        return handle(NDArrayConverter::toNDArray(m));
    }
};
    
    
}} // namespace pybind11::detail

# endif

相关文章

网友评论

    本文标题:pybind11—HOG特征提取以及python接口封装

    本文链接:https://www.haomeiwen.com/subject/rvogoqtx.html