I have the following example fortran code cufft_example.f90:
program cufft_example
use cudafor
use cufft
use iso_c_binding
implicit none
! Parameters
integer, parameter :: nx = 8
integer, parameter :: batch_size = 1
integer, parameter :: array_size = nx*batch_size
! Complex data arrays
complex(8), dimension(array_size) :: data_in, data_out
! cuFFT plan handle
type(c_ptr):: plan
! CUDA error variable
integer :: cuda_err
! Initialize data
data_in = 0.0d0
!data_in = [(dble(i), 0.0d0) :: i = 1, nx*batch_size]
! Initialize cuFFT
cuda_err = cufftCreate(plan)
if (cuda_err /= 0) then
write(*,*) 'Error creating cuFFT plan'
stop
end if
! Set cuFFT plan parameters
cuda_err = cufftSetAutoAllocation(plan, 1)
if (cuda_err /= 0) then
write(*,*) 'Error setting cuFFT plan parameters'
stop
end if
cuda_err = cufftPlan1d(plan, nx, CUFFT_Z2Z, batch_size)
if (cuda_err /= 0) then
write(*,*) 'Error setting cuFFT plan parameters'
stop
end if
! Execute cuFFT forward transform
cuda_err = cufftExecZ2Z(plan, data_in, data_out, CUFFT_FORWARD)
if (cuda_err /= 0) then
write(*,*) 'Error executing cuFFT transform'
stop
end if
! Clean up cuFFT plan
cuda_err = cufftDestroy(plan)
if (cuda_err /= 0) then
write(*,*) 'Error destroying cuFFT plan'
stop
end if
! Print results
write(*,*) 'Original Data:'
write(*,*) data_in
write(*,*)
write(*,*) 'Transformed Data:'
write(*,*) data_out
end program cufft_example
I have tried compiling it with:
nvfortran -fast -cudalib=cufft -o cufft_example cufft_example.f90
but i get the following errors:
NVFORTRAN-S-0155-Could not resolve generic procedure cufftcreate (../cufft_example2.f90: 26)
NVFORTRAN-S-0155-Could not resolve generic procedure cufftsetautoallocation (../cufft_example2.f90: 33)
NVFORTRAN-S-0155-Could not resolve generic procedure cufftplan1d (../cufft_example2.f90: 39)
NVFORTRAN-S-0155-Could not resolve generic procedure cufftexecz2z (../cufft_example2.f90: 46)
NVFORTRAN-S-0155-Could not resolve generic procedure cufftdestroy (../cufft_example2.f90: 53)
suggesting that the cufft libs are not being linked properly.
Though the example that comes with nvhpc toolkit nvhpc/23.3/Linux_x86_64/23.3/examples/CUDA-Libraries/cuFFT/test_fft_cuf/tfft1.cuf:
program cufft2dTest
use cufft
implicit none
integer, parameter :: n=450
complex :: a(n,n),b(n,n)
complex, device :: a_d(n,n), b_d(n,n)
real :: ar(n,n),br(n,n),x
real, device :: ar_d(n,n), br_d(n,n)
integer :: plan, ierr
logical passing
a = 1; a_d = a
ar = 1; ar_d = ar
ierr = cufftPlan2D(plan,n,n,CUFFT_C2C)
ierr = ierr + cufftExecC2C(plan,a_d,b_d,CUFFT_FORWARD)
b = b_d
write(*,*) maxval(real(b)),sum(b),450*450
ierr = ierr + cufftExecC2C(plan,b_d,b_d,CUFFT_INVERSE)
b = b_d
x = maxval(abs(a-b/(n*n)))
write(*,*) 'Max error C2C: ', x
passing = x .le. 1.0e-5
ierr = ierr + cufftPlan2D(plan,n,n,CUFFT_R2C)
ierr = ierr + cufftExecR2C(plan,ar_d,b_d)
ierr = ierr + cufftPlan2D(plan,n,n,CUFFT_C2R)
ierr = ierr + cufftExecC2R(plan,b_d,br_d)
br = br_d
x = maxval(abs(ar-br/(n*n)))
write(*,*) 'Max error R2C/C2R: ', x
passing = passing .and. (x .le. 1.0e-5)
ierr = ierr + cufftDestroy(plan)
print *,ierr
passing = passing .and. (ierr .eq. 0)
if (passing) then
print *,"Test PASSED"
else
print *,"Test FAILED"
endif
end program cufft2dTest
Compiles and passes the test fine with:
nvfortran -fast -cudalib=cufft -o tfft1.exe tfft1.cuf
Why would the library be linked properly when the suffix is .cuf, but not .f90?