submodule (parallel_map_m) parallel_map_operators_cuda_s !! Parallel operator routines (mapping, parallel gradient/divergence) !! CUDA implementation use perf_m, only : perf_start, perf_stop implicit none contains module subroutine par_grad_cuda(self, ucano, ucano_fwd, par_grad_u_stag) use perf_m, only : perf_start, perf_stop class(parallel_map_t), intent(in) :: self real(GP), dimension(self%grad_stag_cano_bwd%ncol), intent(in) :: ucano real(GP), dimension(self%grad_stag_cano_fwd%ncol), intent(in) :: ucano_fwd real(GP), dimension(self%grad_stag_cano_fwd%ndim), intent(out) :: par_grad_u_stag call perf_start('pgrad_cuda_copy') call copydoubleh2d(self%grad_stag_cano_fwd_cuda%x, ucano_fwd, self%grad_stag_cano_fwd_cuda%nrows) call copydoubleh2d(self%grad_stag_cano_bwd_cuda%x, ucano, self%grad_stag_cano_bwd_cuda%nrows) call setvecvals(self%grad_stag_cano_fwd_cuda%cuda_struct, self%grad_stag_cano_fwd_cuda%x) call setvecvals(self%grad_stag_cano_bwd_cuda%cuda_struct, self%grad_stag_cano_bwd_cuda%x) call perf_stop('pgrad_cuda_copy') call perf_start('pgrad_cuda_spmv') call spmv_cusparse(self%grad_stag_cano_fwd_cuda%cuda_struct) call spmv_cusparse(self%grad_stag_cano_bwd_cuda%cuda_struct) call perf_stop('pgrad_cuda_spmv') par_grad_u_stag = self%grad_stag_cano_fwd_cuda%y-self%grad_stag_cano_bwd_cuda%y end subroutine module subroutine par_divb_cuda(self, ustag, ustag_bwd, par_pdiv_u_cano) class(parallel_map_t), intent(in) :: self real(GP), dimension(self%pdiv_cano_stag_fwd%ncol), intent(in) :: ustag real(GP), dimension(self%pdiv_cano_stag_bwd%ncol), intent(in) :: ustag_bwd real(GP), dimension(self%pdiv_cano_stag_fwd%ndim), intent(out) :: par_pdiv_u_cano call perf_start('divb_cuda_copy') call copydoubleh2d(self%pdiv_cano_stag_fwd_cuda%x, ustag, self%pdiv_cano_stag_fwd_cuda%nrows) call copydoubleh2d(self%pdiv_cano_stag_bwd_cuda%x, ustag_bwd, self%pdiv_cano_stag_bwd_cuda%nrows) call setvecvals(self%pdiv_cano_stag_fwd_cuda%cuda_struct, self%pdiv_cano_stag_fwd_cuda%x) call setvecvals(self%pdiv_cano_stag_bwd_cuda%cuda_struct, self%pdiv_cano_stag_bwd_cuda%x) call perf_stop('divb_cuda_copy') call perf_start('divb_cuda_spmv') call spmv_cusparse(self%pdiv_cano_stag_fwd_cuda%cuda_struct) call spmv_cusparse(self%pdiv_cano_stag_bwd_cuda%cuda_struct) call perf_stop('divb_cuda_spmv') par_pdiv_u_cano = -self%pdiv_cano_stag_fwd_cuda%y+self%pdiv_cano_stag_bwd_cuda%y end subroutine end submodule