parallel_map_operators_cuda_s.f90 Source File


Contents


Source Code

submodule (parallel_map_m) parallel_map_operators_cuda_s
    !! Parallel operator routines (mapping, parallel gradient/divergence)
    !! CUDA implementation
    use perf_m, only : perf_start, perf_stop
    implicit none

contains

    module subroutine par_grad_cuda(self, ucano, ucano_fwd, par_grad_u_stag)
        use perf_m, only : perf_start, perf_stop
        class(parallel_map_t), intent(in) :: self
        real(GP), dimension(self%grad_stag_cano_bwd%ncol), intent(in) :: ucano
        real(GP), dimension(self%grad_stag_cano_fwd%ncol), intent(in) :: ucano_fwd
        real(GP), dimension(self%grad_stag_cano_fwd%ndim), intent(out) :: par_grad_u_stag
 
        call perf_start('pgrad_cuda_copy')
        call copydoubleh2d(self%grad_stag_cano_fwd_cuda%x, ucano_fwd, self%grad_stag_cano_fwd_cuda%nrows)
        call copydoubleh2d(self%grad_stag_cano_bwd_cuda%x, ucano, self%grad_stag_cano_bwd_cuda%nrows)
        call setvecvals(self%grad_stag_cano_fwd_cuda%cuda_struct, self%grad_stag_cano_fwd_cuda%x)
        call setvecvals(self%grad_stag_cano_bwd_cuda%cuda_struct, self%grad_stag_cano_bwd_cuda%x)
        call perf_stop('pgrad_cuda_copy')
        call perf_start('pgrad_cuda_spmv')
        call spmv_cusparse(self%grad_stag_cano_fwd_cuda%cuda_struct)
        call spmv_cusparse(self%grad_stag_cano_bwd_cuda%cuda_struct)
        call perf_stop('pgrad_cuda_spmv')
    
        par_grad_u_stag = self%grad_stag_cano_fwd_cuda%y-self%grad_stag_cano_bwd_cuda%y
                  
    end subroutine
        
    module subroutine par_divb_cuda(self, ustag, ustag_bwd, par_pdiv_u_cano)
        class(parallel_map_t), intent(in) :: self
        real(GP), dimension(self%pdiv_cano_stag_fwd%ncol), intent(in) :: ustag
        real(GP), dimension(self%pdiv_cano_stag_bwd%ncol), intent(in) :: ustag_bwd
        real(GP), dimension(self%pdiv_cano_stag_fwd%ndim), intent(out) :: par_pdiv_u_cano
         
        call perf_start('divb_cuda_copy')
        call copydoubleh2d(self%pdiv_cano_stag_fwd_cuda%x, ustag, self%pdiv_cano_stag_fwd_cuda%nrows)
        call copydoubleh2d(self%pdiv_cano_stag_bwd_cuda%x, ustag_bwd, self%pdiv_cano_stag_bwd_cuda%nrows)
        call setvecvals(self%pdiv_cano_stag_fwd_cuda%cuda_struct, self%pdiv_cano_stag_fwd_cuda%x)
        call setvecvals(self%pdiv_cano_stag_bwd_cuda%cuda_struct, self%pdiv_cano_stag_bwd_cuda%x)
        call perf_stop('divb_cuda_copy')
        call perf_start('divb_cuda_spmv')
        call spmv_cusparse(self%pdiv_cano_stag_fwd_cuda%cuda_struct)
        call spmv_cusparse(self%pdiv_cano_stag_bwd_cuda%cuda_struct)
        call perf_stop('divb_cuda_spmv')
        par_pdiv_u_cano = -self%pdiv_cano_stag_fwd_cuda%y+self%pdiv_cano_stag_bwd_cuda%y
         
    end subroutine
    
end submodule