Solve the problem when compiling CUDA fortran with PGI fortran compiler
when we have a Fortran source code as given below, named test_gemm.cuf
The code is coming from here.
module precision
! Precision control
integer, parameter, public :: Single = kind(0.0) ! Single precision
integer, parameter, public :: Double = kind(0.0d0) ! Double precision
integer, parameter, public :: fp_kind = Double
!integer, parameter, public :: fp_kind = Single
end module precision
module cublas
!
! Define the INTERFACE to the NVIDIA C code cublasSgemm and cublasDgemm
!
interface cuda_gemm
!
! void cublasSgemm (char transa, char transb, int m, int n,
! int k, float alpha, const float *A, int lda,
! const float *B, int ldb, float beta, float *C, int ldc)
!
subroutine cuda_sgemm(cta, ctb, m, n, k,&
alpha, A, lda, B, ldb, beta, c, ldc) bind(C,name='cublasSgemm')
use iso_c_binding
character(1,c_char),value :: cta, ctb
integer(c_int),value :: m,n,k,lda,ldb,ldc
real(c_float),value :: alpha,beta
real(c_float), device, dimension(lda,*) :: A
real(c_float), device, dimension(ldb,*) :: B
real(c_float), device, dimension(ldc,*) :: C
end subroutine cuda_sgemm
!
! void cublasDgemm (char transa, char transb, int m, int n,
! int k, double alpha, const double *A, int lda,
! const double *B, int ldb, double beta, double *C, int ldc)
!
subroutine cuda_dgemm(cta, ctb, m, n, k,&
alpha, A, lda, B, ldb, beta, c, ldc) bind(C,name='cublasDgemm')
use iso_c_binding
character(1,c_char),value :: cta, ctb
integer(c_int),value :: m,n,k,lda,ldb,ldc
real(c_double),value :: alpha,beta
real(c_double), device, dimension(lda,*) :: A
real(c_double), device, dimension(ldb,*) :: B
real(c_double), device, dimension(ldc,*) :: C
end subroutine cuda_dgemm
end interface
end module cublas
program gemm_test
use precision
use cublas
real(fp_kind) ,allocatable:: a(:,:),b(:,:),c(:,:)
real(fp_kind),device,allocatable:: a_d(:,:),b_d(:,:),c_d(:,:)
real(fp_kind):: alpha,beta
integer:: n,m,k
n=4
m=4
k=4
alpha=1._fp_kind
beta=2._fp_kind
! allocate arrays on the host
allocate (a(m,k))
allocate (b(k,n))
allocate (c(m,n))
! allocate arrays on the device
allocate (a_d(m,k))
allocate (b_d(k,n))
allocate (c_d(m,n))
!initialize arrays on host
a=1
b=2
c=3
!copy arrays to device
a_d=a
b_d=b
c_d=c
print *, "Matrix A:"
print *, a
print *, "Matrix B:"
print *, b
print *, "Matrix C:"
print *, c
call cuda_gemm ('N','N',m,n,k,alpha,a_d,m,b_d,k,beta,c_d,m)
c=c_d
print *, "Matrix C = alpha A*B+ beta C"
print *, c
!release memory on the host
deallocate (a,b,c)
!release memory on the device
deallocate (a_d,b_d,c_d)
end program gemm_test
Compile it from PGI BASH, using the command below will give you LINK Error: LNK1104: Can not open "libcublas.lib"
pgfortran -Mcuda -o test_gemm test_gemm.cuf -lcublas
After carefully checking, we already have the NVIDIA CUDA library installed. In PGI we also have they ready at C:Program FilesMATLABR2019bsyscudawin64cudalibx64 in my case.
Such error simply implies that the linker is trying to find the lib file named after libcublas.lib rather than cublas.lib, the latter of which is the one really exists.
The solution is, go to the directory where the library files asides, using soft link or hard link to make a copy using the name that can comfort the linker like below
for i in `ls *.lib`
do ln -s $i lib$i
done
Then the problem is solved. The linker now can find the library files it needs.