美文网首页
cuda学习一

cuda学习一

作者: 无敌科 | 来源:发表于2019-04-24 17:31 被阅读0次

    重写一下opencv的resize实现 Linear方式

    cuda实现函数

    __global__ void ResizeLinear_32FC1_kernel(const cv::cuda::PtrStepSz<float> src, cv::cuda::PtrStepSz<float> dst, const float fy, const float fx)

    {

    const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;

    const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;

    float out = 0.f;

    if (dst_x < dst.cols && dst_y < dst.rows)

    {

    const float src_x = (dst_x + 0.5f) * fx - 0.5f;

    const float src_y = (dst_y + 0.5f) * fy - 0.5f;

    const int x1 = __float2int_rd(src_x);

    const int y1 = __float2int_rd(src_y);

    const int x2 = x1 + 1;

    const int y2 = y1 + 1;

    const int x1_read = ::max(::min(x1, src.cols - 1), 0);

    const int y1_read = ::max(::min(y1, src.rows - 1), 0);

    const int x2_read = ::max(::min(x2, src.cols - 1), 0);

    const int y2_read = ::max(::min(y2, src.rows - 1), 0);

    float src_reg = src(y1_read, x1_read);

    out = out + src_reg * ((x2 - src_x) * (y2 - src_y));

    src_reg = src(y1_read, x2_read);

    out = out + src_reg * ((src_x - x1) * (y2 - src_y));

    src_reg = src(y2_read, x1_read);

    out = out + src_reg * ((x2 - src_x) * (src_y - y1));

    src_reg = src(y2_read, x2_read);

    out = out + src_reg * ((src_x - x1) * (src_y - y1));

    out = out > 255 ? 255 : out;

    out = out < 0 ? 0 : out;

    dst(dst_y, dst_x) = out;

    }

    }

    调用封装成ResizeLinear_32FC1_call

    void ResizeLinear_32FC1_call(const cv::cuda::PtrStepSz<float>& src, const cv::cuda::PtrStepSz<float>& dst, float fy, float fx, cudaStream_t stream)

    {

    const dim3 block(32, 8);

    const dim3 grid(cv::cuda::device::divUp(dst.cols, block.x), cv::cuda::device::divUp(dst.rows, block.y));

    ResizeLinear_32FC1_kernel << <grid, block, 0, stream >> > (src, dst, fy, fx);

    cudaSafeCall(cudaGetLastError());

    if (stream == 0)

    cudaSafeCall(cudaDeviceSynchronize());

    }

    这个函数就是c++下面的函数  可以在cpp中调用该函数

    void ResizeLinear_32FC1(cv::InputArray _src, cv::OutputArray _dst, cv::Size dsize, double fx, double fy, int interpolation, cv::cuda::Stream& stream)

    {

    cv::cuda::GpuMat src = _src.getGpuMat();

    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);

    CV_Assert(!(dsize == cv::Size()) || (fx > 0 && fy > 0));

    if (dsize == cv::Size())

    {

    dsize = cv::Size(cv::saturate_cast<int>(src.cols * fx), cv::saturate_cast<int>(src.rows * fy));

    }

    else

    {

    fx = static_cast<double>(dsize.width) / src.cols;

    fy = static_cast<double>(dsize.height) / src.rows;

    }

    _dst.create(dsize, src.type());

    cv::cuda::GpuMat dst = _dst.getGpuMat();

    if (dsize == src.size())

    {

    src.copyTo(dst, stream);

    return;

    }

    ResizeLinear_32FC1_call(src, dst, static_cast<float>(1.0 / fy), static_cast<float>(1.0 / fx), cv::cuda::StreamAccessor::getStream(stream));

    }

    相关文章

      网友评论

          本文标题:cuda学习一

          本文链接:https://www.haomeiwen.com/subject/aizggqtx.html