I am trying to use the Npp library in CUDA for Gaussian filtering of images. The function I am using is nppiFilterGauss_8u_C1R. However, my code always fails to return the correct result. The image returned is either pure black or pure grey, or some unruly stripes. Here is the main implementations of my code and I have written some comments to let the code make some sense (I hope the comments I wrote is correct...). The input image I used is a 512*512 Ms.Lena's image.
void NppGaussianFilter(std::string strSrc, std::string strDst) {
// Convert the image to gray scale image
cv::Mat img = cv::imread(strSrc);
if (img.empty()) {
std::cerr << "Failed to load image: " << strSrc << std::endl;
return;
}
cv::Mat grayImg;
cv::cvtColor(img, grayImg, cv::COLOR_BGR2GRAY);
// Some image parameters
int nWidth = grayImg.cols;
int nHeight = grayImg.rows;
int nChannels = grayImg.channels();
Npp8u nStep = grayImg.step[0];
size_t sizeToCopy = nWidth * nHeight * nChannels * sizeof(Npp8u);
// Allocate memory of source image pointer on device and copy image data from host to device
Npp8u* pSrc_dev = nullptr;
cudaError_t err = cudaMalloc((void**)&pSrc_dev, sizeToCopy);
if (err != cudaSuccess) {
std::cerr << "Failed to allocate device memory for pSrc_dev" << std::endl;
return;
}
cudaMemcpy(pSrc_dev, grayImg.data, sizeToCopy, cudaMemcpyHostToDevice);
// Allocate memory of destination image pointer on device
Npp8u* pDst_dev = nullptr;
err = cudaMalloc((void**)&pDst_dev, sizeToCopy);
if (err != cudaSuccess) {
std::cerr << "Failed to allocate device memory for pDst_dev" << std::endl;
cudaFree(pSrc_dev);
return;
}
// Implement the gauss filter function
NppiMaskSize eMaskSize = NPP_MASK_SIZE_3_X_3;
NppiSize roiSize = { nWidth, nHeight };
nppiFilterGauss_8u_C1R(pSrc_dev, nStep, pDst_dev, nStep, roiSize, eMaskSize);
// Copy image data from device to host
cv::Mat newImg(nHeight, nWidth, CV_8UC1);
cudaMemcpy(newImg.data, pDst_dev, sizeToCopy, cudaMemcpyDeviceToHost);
cv::imwrite(strDst, newImg);
cudaFree(pSrc_dev);
cudaFree(pDst_dev);
}
I really cannot find any errors in my code. But the results don't lie. I would be very grateful if you could point out any errors in my code.
The issue is that
nStepis defined asNpp8u.Npp8utype is one byte (unsigned), so it can store values is range [0, 255].Replace
Npp8u nStep = grayImg.step[0];with:When
stepis 512, the value ofnStepis overflowed to0.When
step=0is used innppiFilterGauss_8u_C1R(pSrc_dev, nStep, pDst_dev, nStep, roiSize, eMaskSize);, the result is a black image (or uninitialized image).Corrected code sample:
Output:
