Opencv CUDA应用图像峰值查找

作者: dzqiu | 来源:发表于2018-04-30 15:36 被阅读256次

Opencv CUDA应用图像峰值查找
使用python-opencv在图像中查找最亮点
OpenCV多平台编译指南
openCV
模板匹配(OpenCV+Python)
iOS OpenCV 图像灰度处理
opencv cuda 编程
6. Caffe的安装
Python OpenCV库的常见应用2
linux下编译opencv的无cuda版本，自定义路径

图像处理中，经常通过距离变换对粘连物体图像进行分割。而距离变换常常通过峰值查找确定物体图像对质心。查找单通道图像局部峰值对方法，Matlab工具箱中有imregionalmax，python库skimage中feature.peak_local_max，但是OpenCV中没有相关查找局部峰值对函数。论坛中有人讨论使用滑动窗口，通过图像分割成几个局部，查找局部中对全局峰值来确定局部峰值，但是误差可想而知而且对于峰值出现为一个等值平面的话，是否能够找出峰值就取决于窗的大小。

论文《Morphological Grayscale Reconstruction in Image Analysis:
Applications and Effcient Algorithms》中提出了一种通过形态学灰度重建（Morphological Grayscale Reconstruction）对方法来求取局部峰值，Matlab中就是应用该方法实现得，详见imreconstruct说明中对参考文献，正是该篇论文。使用原图像数值偏移-1作为mask，对原图像进行形态学灰度重建，得到重建图。然后用原图像减去重建图就能得求出峰值的位置（大于零处）。

形态学灰度重建（图源于论文）

论文中多种形态学灰度重建算法，其中，运用并行计算的方法如下图。NG表示对应像素点对邻近像素，可取4邻近或8邻近。本文使用8邻近对算法进行实现。

并行算法实现（图源于论文）

使用CUDA对上述算法进行实现，对圆形粘连图像进行距离变换后查找峰值。（该算法的非CUDA实现，详见【图像处理】灰度图、亮度峰值极值查找）图像效果如下：

原分割图像

距离变换

峰值查找

源代码：
imregionmax.cu

#include "cuda.h"
#include "cuda_runtime.h"
#include "cuda_runtime_api.h"
#include "device_functions.h"

#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/opencv.hpp"
#include "stdio.h"
#include <vector>
#include <iostream>
using namespace cv;
using namespace std;

#define Accuracy 0
typedef  unsigned char eleType;
__global__ void  DilationStep(eleType *k,eleType *j,unsigned int total)
{
    unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
    unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
    unsigned int offset = x + y*blockDim.x*gridDim.x;

    unsigned int width  = blockDim.x*gridDim.x;
    unsigned int heigth = blockDim.y*gridDim.y;

    if(offset > total) return;

    unsigned int left,right,top,bottom;
    left = offset -1;
    right = offset+1;
    if (x==0) left++;
    if (x==width-1) right--;
    top     = offset - width;
    bottom  = offset + width;
    if (y==0)           top += width;
    if (y==heigth-1)    bottom -= width;

    eleType max = j[offset];
    if(j[left]  -   max > Accuracy)     max = j[left];
    if(j[right] -   max > Accuracy)     max = j[right];
    if(j[bottom]-   max > Accuracy)     max = j[bottom];
    if(j[top]   -   max > Accuracy)     max = j[top];
    unsigned int leftbottom,lefttop,righttop,rightbottom;
    leftbottom  = bottom - 1;
    if(x==0) leftbottom++;
    rightbottom = bottom + 1;
    if(x==width-1) rightbottom--;
    lefttop     = top    - 1;
    if(x==0)    lefttop++;
    righttop    = top    + 1;
    if(x==width-1) righttop--;

    if(j[leftbottom]    -   max > Accuracy)     max = j[leftbottom];
    if(j[rightbottom]   -   max > Accuracy)     max = j[rightbottom];
    if(j[lefttop]       -   max > Accuracy)     max = j[lefttop];
    if(j[righttop]      -   max > Accuracy)     max = j[righttop];

    k[offset] =max;
}
__global__ void PointwiseMinimum(eleType *I,eleType *J,eleType *K,unsigned int total)
{
    unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
    unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
    unsigned int offset = x + y*blockDim.x*gridDim.x;
    if(I[offset] - K[offset] <Accuracy)
        J[offset] = I[offset];
    else
        J[offset] = K[offset];
}

#define DIM 16
Mat imregionmax(const Mat *src,eleType h)
{
     Mat LocMax     = src->clone();
     int width      = src->cols;
     int height     = src->rows;
     Mat Imask      = src->clone();
     Mat Jmasker    = Imask - h;
     Mat K          = Jmasker.clone();
     Mat Tmp        = src->clone();

     eleType *Jmasker_dev,*Imask_dev,*K_dev;

     cudaMalloc((void**)&Jmasker_dev,width*height*sizeof(eleType));
     cudaMemcpy(Jmasker_dev,Jmasker.data,width*height*sizeof(eleType),cudaMemcpyHostToDevice);
     cudaMalloc((void**)&Imask_dev,width*height*sizeof(eleType));
     cudaMemcpy(Imask_dev,Imask.data,width*height*sizeof(eleType),cudaMemcpyHostToDevice);
     cudaMalloc((void**)&K_dev,width*height*sizeof(eleType));

     cudaError_t cudaStatus;
     while(1)
     {

         dim3 blocks(64,64);
         dim3 threads((blocks.x+width-1)/blocks.x,(blocks.y+height-1)/blocks.y);
         DilationStep<<<blocks,threads>>>(K_dev,Jmasker_dev,width*height);
         cudaDeviceSynchronize();
         cudaStatus= cudaGetLastError();
         if (cudaStatus != cudaSuccess)
         {
             fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
             return LocMax;
         }
         cudaMemcpy(K.data,K_dev,width*height*sizeof(eleType),cudaMemcpyDeviceToHost);
         PointwiseMinimum<<<blocks,threads>>>(Imask_dev,Jmasker_dev,K_dev,width*height);
         if (cudaStatus != cudaSuccess)
         {
             fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
             return LocMax;
         }
         cudaMemcpy(Tmp.data,Jmasker_dev,width*height*sizeof(eleType),cudaMemcpyDeviceToHost);
         if (memcmp(Tmp.data,Jmasker.data,width*height*sizeof(eleType))==0) break;
         else cudaMemcpy(Jmasker.data,Jmasker_dev,width*height*sizeof(eleType),cudaMemcpyDeviceToHost);
     }
     cudaFree(Imask_dev);
     cudaFree(Jmasker_dev);
     cudaFree(K_dev);
     LocMax = (Imask-Jmasker>0);
     return LocMax;
}

main.cpp

#include <iostream>
#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/opencv.hpp"
using namespace std;
using namespace cv;
typedef  unsigned char eleType;
Mat imregionmax(const Mat *src,eleType h);

void LabShow(char wname[20],Mat src,Mat Label,Size offset)
{

    Mat sImg = src.clone();
    if(sImg.channels()<3)
        cvtColor(sImg,sImg,COLOR_GRAY2RGB);
    int font_face = cv::FONT_HERSHEY_COMPLEX;
    double font_scale = 1;
    for(int x=0;x<Label.cols;x++)
    {
        for(int y=0;y<Label.cols;y++)
        {
            Point pos;
            pos.x=x+offset.width;
            pos.y=y+offset.height;
            if(Label.at<uchar>(y,x))
                putText(sImg,"x",pos,font_face,font_scale,Scalar(0,0,255),3,8);
        }
    }
    imshow(wname,sImg);
}


int main(int argc, char *argv[])
{

    Mat src(Size(1024,1024),CV_8UC1);
    src.setTo(0);
    circle(src,Point(200,200),100,Scalar(255),-1);
    circle(src,Point(300,300),100,Scalar(255),-1);
    circle(src,Point(400,600),100,Scalar(255),-1);
    circle(src,Point(550,600),100,Scalar(255),-1);
    namedWindow("input", CV_WINDOW_NORMAL);
    imshow("input",src);

    Mat dis=Mat(src.size(),CV_32FC1);
    distanceTransform(src,dis,CV_DIST_L2,3);
    normalize(dis,dis,1.0,0.0,NORM_MINMAX);
    dis = dis *255;dis.convertTo(dis,CV_8U);
    namedWindow("distance Transfrom", CV_WINDOW_NORMAL);
    imshow("distance Transfrom",dis);

    namedWindow("peeks", CV_WINDOW_NORMAL);
    Mat peeks = imregionmax(&dis,1);
    LabShow("peeks",dis,peeks,Size(0,0));

    waitKey(0);
    return 0;
}

源码下载：
GitHub
参考：
Morphological grayscale reconstruction in image analysis: applications and efficient algorithms
python数字图像处理（19）：骨架提取与分水岭算法

网友评论

ec29a5055dc3:还没学cuda, 有没有不用cuda的版本
ec29a5055dc3:@dzqiu 我在scikit-image找到了[peak_local_max](https://github.com/scikit-image/scikit-image/blob/master/skimage/feature/peak.py)函数, 正打算把它翻译成C++的
ec29a5055dc3:好的, 谢谢, 还在看论文
dzqiu:CUDA只是用于线程并发计算，论文中有提到使用队列得算法实现，可以参考来写以下。

本文标题：Opencv CUDA应用图像峰值查找

本文链接：https://www.haomeiwen.com/subject/edgzlftx.html

延伸阅读

深度阅读

您也可以注册成为美文阅读网的作者，发表您的原创作品、分享您的心情！

Opencv CUDA应用图像峰值查找

相关文章