cuda学习一

作者: 无敌科 | 来源:发表于2019-04-24 17:31 被阅读0次

重写一下opencv的resize实现 Linear方式

cuda实现函数

__global__ void ResizeLinear_32FC1_kernel(const cv::cuda::PtrStepSz<float> src, cv::cuda::PtrStepSz<float> dst, const float fy, const float fx)

{

const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;

const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;

float out = 0.f;

if (dst_x < dst.cols && dst_y < dst.rows)

{

const float src_x = (dst_x + 0.5f) * fx - 0.5f;

const float src_y = (dst_y + 0.5f) * fy - 0.5f;

const int x1 = __float2int_rd(src_x);

const int y1 = __float2int_rd(src_y);

const int x2 = x1 + 1;

const int y2 = y1 + 1;

const int x1_read = ::max(::min(x1, src.cols - 1), 0);

const int y1_read = ::max(::min(y1, src.rows - 1), 0);

const int x2_read = ::max(::min(x2, src.cols - 1), 0);

const int y2_read = ::max(::min(y2, src.rows - 1), 0);

float src_reg = src(y1_read, x1_read);

out = out + src_reg * ((x2 - src_x) * (y2 - src_y));

src_reg = src(y1_read, x2_read);

out = out + src_reg * ((src_x - x1) * (y2 - src_y));

src_reg = src(y2_read, x1_read);

out = out + src_reg * ((x2 - src_x) * (src_y - y1));

src_reg = src(y2_read, x2_read);

out = out + src_reg * ((src_x - x1) * (src_y - y1));

out = out > 255 ? 255 : out;

out = out < 0 ? 0 : out;

dst(dst_y, dst_x) = out;

}

}

调用封装成ResizeLinear_32FC1_call

void ResizeLinear_32FC1_call(const cv::cuda::PtrStepSz<float>& src, const cv::cuda::PtrStepSz<float>& dst, float fy, float fx, cudaStream_t stream)

{

const dim3 block(32, 8);

const dim3 grid(cv::cuda::device::divUp(dst.cols, block.x), cv::cuda::device::divUp(dst.rows, block.y));

ResizeLinear_32FC1_kernel << <grid, block, 0, stream >> > (src, dst, fy, fx);

cudaSafeCall(cudaGetLastError());

if (stream == 0)

cudaSafeCall(cudaDeviceSynchronize());

}

这个函数就是c++下面的函数可以在cpp中调用该函数

void ResizeLinear_32FC1(cv::InputArray _src, cv::OutputArray _dst, cv::Size dsize, double fx, double fy, int interpolation, cv::cuda::Stream& stream)

{

cv::cuda::GpuMat src = _src.getGpuMat();

CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);

CV_Assert(!(dsize == cv::Size()) || (fx > 0 && fy > 0));

if (dsize == cv::Size())

{

dsize = cv::Size(cv::saturate_cast<int>(src.cols * fx), cv::saturate_cast<int>(src.rows * fy));

}

else

{

fx = static_cast<double>(dsize.width) / src.cols;

fy = static_cast<double>(dsize.height) / src.rows;

}

_dst.create(dsize, src.type());

cv::cuda::GpuMat dst = _dst.getGpuMat();

if (dsize == src.size())

{

src.copyTo(dst, stream);

return;

}

ResizeLinear_32FC1_call(src, dst, static_cast<float>(1.0 / fy), static_cast<float>(1.0 / fx), cv::cuda::StreamAccessor::getStream(stream));

}

网友评论

本文标题：cuda学习一

本文链接：https://www.haomeiwen.com/subject/aizggqtx.html

延伸阅读

深度阅读

您也可以注册成为美文阅读网的作者，发表您的原创作品、分享您的心情！

cuda学习一

相关文章

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读