重写一下opencv的resize实现 Linear方式
cuda实现函数
__global__ void ResizeLinear_32FC1_kernel(const cv::cuda::PtrStepSz<float> src, cv::cuda::PtrStepSz<float> dst, const float fy, const float fx)
{
const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
float out = 0.f;
if (dst_x < dst.cols && dst_y < dst.rows)
{
const float src_x = (dst_x + 0.5f) * fx - 0.5f;
const float src_y = (dst_y + 0.5f) * fy - 0.5f;
const int x1 = __float2int_rd(src_x);
const int y1 = __float2int_rd(src_y);
const int x2 = x1 + 1;
const int y2 = y1 + 1;
const int x1_read = ::max(::min(x1, src.cols - 1), 0);
const int y1_read = ::max(::min(y1, src.rows - 1), 0);
const int x2_read = ::max(::min(x2, src.cols - 1), 0);
const int y2_read = ::max(::min(y2, src.rows - 1), 0);
float src_reg = src(y1_read, x1_read);
out = out + src_reg * ((x2 - src_x) * (y2 - src_y));
src_reg = src(y1_read, x2_read);
out = out + src_reg * ((src_x - x1) * (y2 - src_y));
src_reg = src(y2_read, x1_read);
out = out + src_reg * ((x2 - src_x) * (src_y - y1));
src_reg = src(y2_read, x2_read);
out = out + src_reg * ((src_x - x1) * (src_y - y1));
out = out > 255 ? 255 : out;
out = out < 0 ? 0 : out;
dst(dst_y, dst_x) = out;
}
}
调用封装成ResizeLinear_32FC1_call
void ResizeLinear_32FC1_call(const cv::cuda::PtrStepSz<float>& src, const cv::cuda::PtrStepSz<float>& dst, float fy, float fx, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(cv::cuda::device::divUp(dst.cols, block.x), cv::cuda::device::divUp(dst.rows, block.y));
ResizeLinear_32FC1_kernel << <grid, block, 0, stream >> > (src, dst, fy, fx);
cudaSafeCall(cudaGetLastError());
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
这个函数就是c++下面的函数 可以在cpp中调用该函数
void ResizeLinear_32FC1(cv::InputArray _src, cv::OutputArray _dst, cv::Size dsize, double fx, double fy, int interpolation, cv::cuda::Stream& stream)
{
cv::cuda::GpuMat src = _src.getGpuMat();
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
CV_Assert(!(dsize == cv::Size()) || (fx > 0 && fy > 0));
if (dsize == cv::Size())
{
dsize = cv::Size(cv::saturate_cast<int>(src.cols * fx), cv::saturate_cast<int>(src.rows * fy));
}
else
{
fx = static_cast<double>(dsize.width) / src.cols;
fy = static_cast<double>(dsize.height) / src.rows;
}
_dst.create(dsize, src.type());
cv::cuda::GpuMat dst = _dst.getGpuMat();
if (dsize == src.size())
{
src.copyTo(dst, stream);
return;
}
ResizeLinear_32FC1_call(src, dst, static_cast<float>(1.0 / fy), static_cast<float>(1.0 / fx), cv::cuda::StreamAccessor::getStream(stream));
}
网友评论