How should I correctly use the gaussian filter function in CUDA Npp library?

218 Views Asked by At

I am trying to use the Npp library in CUDA for Gaussian filtering of images. The function I am using is nppiFilterGauss_8u_C1R. However, my code always fails to return the correct result. The image returned is either pure black or pure grey, or some unruly stripes. Here is the main implementations of my code and I have written some comments to let the code make some sense (I hope the comments I wrote is correct...). The input image I used is a 512*512 Ms.Lena's image.

void NppGaussianFilter(std::string strSrc, std::string strDst) {

    // Convert the image to gray scale image
    cv::Mat img = cv::imread(strSrc);
    if (img.empty()) {
        std::cerr << "Failed to load image: " << strSrc << std::endl;
        return;
    }
    cv::Mat grayImg;
    cv::cvtColor(img, grayImg, cv::COLOR_BGR2GRAY);

    // Some image parameters
    int nWidth = grayImg.cols;
    int nHeight = grayImg.rows;
    int nChannels = grayImg.channels();
    Npp8u nStep = grayImg.step[0];
    size_t sizeToCopy = nWidth * nHeight * nChannels * sizeof(Npp8u);

    // Allocate memory of source image pointer on device and copy image data from host to device
    Npp8u* pSrc_dev = nullptr;
    cudaError_t err = cudaMalloc((void**)&pSrc_dev, sizeToCopy);
    if (err != cudaSuccess) {
        std::cerr << "Failed to allocate device memory for pSrc_dev" << std::endl;
        return;
    }
    cudaMemcpy(pSrc_dev, grayImg.data, sizeToCopy, cudaMemcpyHostToDevice);
    
    // Allocate memory of destination image pointer on device
    Npp8u* pDst_dev = nullptr;
    err = cudaMalloc((void**)&pDst_dev, sizeToCopy);
    if (err != cudaSuccess) {
        std::cerr << "Failed to allocate device memory for pDst_dev" << std::endl;
        cudaFree(pSrc_dev);
        return;
    }

    // Implement the gauss filter function
    NppiMaskSize eMaskSize = NPP_MASK_SIZE_3_X_3;
    NppiSize roiSize = { nWidth, nHeight };
    nppiFilterGauss_8u_C1R(pSrc_dev, nStep, pDst_dev, nStep, roiSize, eMaskSize);

    // Copy image data from device to host
    cv::Mat newImg(nHeight, nWidth, CV_8UC1);
    cudaMemcpy(newImg.data, pDst_dev, sizeToCopy, cudaMemcpyDeviceToHost);

    cv::imwrite(strDst, newImg);

    cudaFree(pSrc_dev);
    cudaFree(pDst_dev);
}

I really cannot find any errors in my code. But the results don't lie. I would be very grateful if you could point out any errors in my code.

1

There are 1 best solutions below

0
Rotem On

The issue is that nStep is defined as Npp8u.
Npp8u type is one byte (unsigned), so it can store values is range [0, 255].

Replace Npp8u nStep = grayImg.step[0]; with:

int nStep = grayImg.step[0];  

When step is 512, the value of nStep is overflowed to 0.
When step = 0 is used in nppiFilterGauss_8u_C1R(pSrc_dev, nStep, pDst_dev, nStep, roiSize, eMaskSize);, the result is a black image (or uninitialized image).


Corrected code sample:

#include <opencv2/opencv.hpp>
#include "nppi.h"

void NppGaussianFilter(std::string strSrc, std::string strDst) {

    // Convert the image to gray scale image
    cv::Mat img = cv::imread(strSrc);
    if (img.empty()) {
        std::cerr << "Failed to load image: " << strSrc << std::endl;
        return;
    }
    cv::Mat grayImg;
    cv::cvtColor(img, grayImg, cv::COLOR_BGR2GRAY);

    // Some image parameters
    int nWidth = grayImg.cols;
    int nHeight = grayImg.rows;
    int nChannels = grayImg.channels();
    //Npp8u nStep = grayImg.step[0];  <---- Here is the issue Npp8u variable can't store values above 255
    int nStep = grayImg.step[0]; // Correction: using int instead of Npp8u.
    size_t sizeToCopy = nWidth * nHeight * nChannels * sizeof(Npp8u);

    // Allocate memory of source image pointer on device and copy image data from host to device
    Npp8u* pSrc_dev = nullptr;
    cudaError_t err = cudaMalloc((void**)&pSrc_dev, sizeToCopy);
    if (err != cudaSuccess) {
        std::cerr << "Failed to allocate device memory for pSrc_dev" << std::endl;
        return;
    }
    cudaMemcpy(pSrc_dev, grayImg.data, sizeToCopy, cudaMemcpyHostToDevice);
    
    // Allocate memory of destination image pointer on device
    Npp8u* pDst_dev = nullptr;
    err = cudaMalloc((void**)&pDst_dev, sizeToCopy);
    if (err != cudaSuccess) {
        std::cerr << "Failed to allocate device memory for pDst_dev" << std::endl;
        cudaFree(pSrc_dev);
        return;
    }

    // Implement the gauss filter function
    NppiMaskSize eMaskSize = NPP_MASK_SIZE_3_X_3;
    NppiSize roiSize = { nWidth, nHeight };
    nppiFilterGauss_8u_C1R(pSrc_dev, nStep, pDst_dev, nStep, roiSize, eMaskSize);

    // Copy image data from device to host
    cv::Mat newImg(nHeight, nWidth, CV_8UC1);
    cudaMemcpy(newImg.data, pDst_dev, sizeToCopy, cudaMemcpyDeviceToHost);

    cv::imwrite(strDst, newImg);

    cudaFree(pSrc_dev);
    cudaFree(pDst_dev);
}


int main()
{
    NppGaussianFilter("Lenna_test_image.png", "filtered_output.png");

    return 0;
}

Output:
enter image description here