美文网首页CUDAroi
Opencv CUDA应用 图像峰值查找

Opencv CUDA应用 图像峰值查找

作者: dzqiu | 来源:发表于2018-04-30 15:36 被阅读256次

    图像处理中,经常通过距离变换对粘连物体图像进行分割。而距离变换常常通过峰值查找确定物体图像对质心。查找单通道图像局部峰值对方法,Matlab工具箱中有imregionalmax,python库skimage中feature.peak_local_max,但是OpenCV中没有相关查找局部峰值对函数。论坛中有人讨论使用滑动窗口,通过图像分割成几个局部,查找局部中对全局峰值来确定局部峰值,但是误差可想而知而且对于峰值出现为一个等值平面的话,是否能够找出峰值就取决于窗的大小。

    论文《Morphological Grayscale Reconstruction in Image Analysis:
    Applications and Effcient Algorithms》中提出了一种通过形态学灰度重建(Morphological Grayscale Reconstruction)对方法来求取局部峰值,Matlab中就是应用该方法实现得,详见imreconstruct说明中对参考文献,正是该篇论文。使用原图像数值偏移-1作为mask,对原图像进行形态学灰度重建,得到重建图。然后用原图像减去重建图就能得求出峰值的位置(大于零处)。


    形态学灰度重建(图源于论文)

    论文中多种形态学灰度重建算法,其中,运用并行计算的方法如下图。NG表示对应像素点对邻近像素,可取4邻近或8邻近。本文使用8邻近对算法进行实现。


    并行算法实现(图源于论文)

    使用CUDA对上述算法进行实现,对圆形粘连图像进行距离变换后查找峰值。(该算法的非CUDA实现,详见 【图像处理】灰度图、亮度峰值极值查找)图像效果如下:

    原分割图像 距离变换 峰值查找

    源代码:
    imregionmax.cu

    #include "cuda.h"
    #include "cuda_runtime.h"
    #include "cuda_runtime_api.h"
    #include "device_functions.h"
    
    #include "opencv2/core/core.hpp"
    #include "opencv2/highgui/highgui.hpp"
    #include "opencv2/opencv.hpp"
    #include "stdio.h"
    #include <vector>
    #include <iostream>
    using namespace cv;
    using namespace std;
    
    #define Accuracy 0
    typedef  unsigned char eleType;
    __global__ void  DilationStep(eleType *k,eleType *j,unsigned int total)
    {
        unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
        unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
        unsigned int offset = x + y*blockDim.x*gridDim.x;
    
        unsigned int width  = blockDim.x*gridDim.x;
        unsigned int heigth = blockDim.y*gridDim.y;
    
        if(offset > total) return;
    
        unsigned int left,right,top,bottom;
        left = offset -1;
        right = offset+1;
        if (x==0) left++;
        if (x==width-1) right--;
        top     = offset - width;
        bottom  = offset + width;
        if (y==0)           top += width;
        if (y==heigth-1)    bottom -= width;
    
        eleType max = j[offset];
        if(j[left]  -   max > Accuracy)     max = j[left];
        if(j[right] -   max > Accuracy)     max = j[right];
        if(j[bottom]-   max > Accuracy)     max = j[bottom];
        if(j[top]   -   max > Accuracy)     max = j[top];
        unsigned int leftbottom,lefttop,righttop,rightbottom;
        leftbottom  = bottom - 1;
        if(x==0) leftbottom++;
        rightbottom = bottom + 1;
        if(x==width-1) rightbottom--;
        lefttop     = top    - 1;
        if(x==0)    lefttop++;
        righttop    = top    + 1;
        if(x==width-1) righttop--;
    
        if(j[leftbottom]    -   max > Accuracy)     max = j[leftbottom];
        if(j[rightbottom]   -   max > Accuracy)     max = j[rightbottom];
        if(j[lefttop]       -   max > Accuracy)     max = j[lefttop];
        if(j[righttop]      -   max > Accuracy)     max = j[righttop];
    
        k[offset] =max;
    }
    __global__ void PointwiseMinimum(eleType *I,eleType *J,eleType *K,unsigned int total)
    {
        unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
        unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
        unsigned int offset = x + y*blockDim.x*gridDim.x;
        if(I[offset] - K[offset] <Accuracy)
            J[offset] = I[offset];
        else
            J[offset] = K[offset];
    }
    
    #define DIM 16
    Mat imregionmax(const Mat *src,eleType h)
    {
         Mat LocMax     = src->clone();
         int width      = src->cols;
         int height     = src->rows;
         Mat Imask      = src->clone();
         Mat Jmasker    = Imask - h;
         Mat K          = Jmasker.clone();
         Mat Tmp        = src->clone();
    
         eleType *Jmasker_dev,*Imask_dev,*K_dev;
    
         cudaMalloc((void**)&Jmasker_dev,width*height*sizeof(eleType));
         cudaMemcpy(Jmasker_dev,Jmasker.data,width*height*sizeof(eleType),cudaMemcpyHostToDevice);
         cudaMalloc((void**)&Imask_dev,width*height*sizeof(eleType));
         cudaMemcpy(Imask_dev,Imask.data,width*height*sizeof(eleType),cudaMemcpyHostToDevice);
         cudaMalloc((void**)&K_dev,width*height*sizeof(eleType));
    
         cudaError_t cudaStatus;
         while(1)
         {
    
             dim3 blocks(64,64);
             dim3 threads((blocks.x+width-1)/blocks.x,(blocks.y+height-1)/blocks.y);
             DilationStep<<<blocks,threads>>>(K_dev,Jmasker_dev,width*height);
             cudaDeviceSynchronize();
             cudaStatus= cudaGetLastError();
             if (cudaStatus != cudaSuccess)
             {
                 fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
                 return LocMax;
             }
             cudaMemcpy(K.data,K_dev,width*height*sizeof(eleType),cudaMemcpyDeviceToHost);
             PointwiseMinimum<<<blocks,threads>>>(Imask_dev,Jmasker_dev,K_dev,width*height);
             if (cudaStatus != cudaSuccess)
             {
                 fprintf(stderr, "addKernel launch failed: %s\n", cudaGetErrorString(cudaStatus));
                 return LocMax;
             }
             cudaMemcpy(Tmp.data,Jmasker_dev,width*height*sizeof(eleType),cudaMemcpyDeviceToHost);
             if (memcmp(Tmp.data,Jmasker.data,width*height*sizeof(eleType))==0) break;
             else cudaMemcpy(Jmasker.data,Jmasker_dev,width*height*sizeof(eleType),cudaMemcpyDeviceToHost);
         }
         cudaFree(Imask_dev);
         cudaFree(Jmasker_dev);
         cudaFree(K_dev);
         LocMax = (Imask-Jmasker>0);
         return LocMax;
    }
    

    main.cpp

    #include <iostream>
    #include "opencv2/core/core.hpp"
    #include "opencv2/highgui/highgui.hpp"
    #include "opencv2/opencv.hpp"
    using namespace std;
    using namespace cv;
    typedef  unsigned char eleType;
    Mat imregionmax(const Mat *src,eleType h);
    
    void LabShow(char wname[20],Mat src,Mat Label,Size offset)
    {
    
        Mat sImg = src.clone();
        if(sImg.channels()<3)
            cvtColor(sImg,sImg,COLOR_GRAY2RGB);
        int font_face = cv::FONT_HERSHEY_COMPLEX;
        double font_scale = 1;
        for(int x=0;x<Label.cols;x++)
        {
            for(int y=0;y<Label.cols;y++)
            {
                Point pos;
                pos.x=x+offset.width;
                pos.y=y+offset.height;
                if(Label.at<uchar>(y,x))
                    putText(sImg,"x",pos,font_face,font_scale,Scalar(0,0,255),3,8);
            }
        }
        imshow(wname,sImg);
    }
    
    
    int main(int argc, char *argv[])
    {
    
        Mat src(Size(1024,1024),CV_8UC1);
        src.setTo(0);
        circle(src,Point(200,200),100,Scalar(255),-1);
        circle(src,Point(300,300),100,Scalar(255),-1);
        circle(src,Point(400,600),100,Scalar(255),-1);
        circle(src,Point(550,600),100,Scalar(255),-1);
        namedWindow("input", CV_WINDOW_NORMAL);
        imshow("input",src);
    
        Mat dis=Mat(src.size(),CV_32FC1);
        distanceTransform(src,dis,CV_DIST_L2,3);
        normalize(dis,dis,1.0,0.0,NORM_MINMAX);
        dis = dis *255;dis.convertTo(dis,CV_8U);
        namedWindow("distance Transfrom", CV_WINDOW_NORMAL);
        imshow("distance Transfrom",dis);
    
        namedWindow("peeks", CV_WINDOW_NORMAL);
        Mat peeks = imregionmax(&dis,1);
        LabShow("peeks",dis,peeks,Size(0,0));
    
        waitKey(0);
        return 0;
    }
    
    

    源码下载:
    GitHub
    参考:
    Morphological grayscale reconstruction in image analysis: applications and efficient algorithms
    python数字图像处理(19):骨架提取与分水岭算法

    相关文章

      网友评论

      • ec29a5055dc3:还没学cuda, 有没有不用cuda的版本
        ec29a5055dc3:@dzqiu 我在scikit-image找到了[peak_local_max](https://github.com/scikit-image/scikit-image/blob/master/skimage/feature/peak.py)函数, 正打算把它翻译成C++的
        ec29a5055dc3:好的, 谢谢, 还在看论文
        dzqiu:CUDA只是用于线程并发计算,论文中有提到使用队列得算法实现,可以参考来写以下。

      本文标题:Opencv CUDA应用 图像峰值查找

      本文链接:https://www.haomeiwen.com/subject/edgzlftx.html