Skip to content

Commit

Permalink
Simplified the BUFFERED_ALLOCATOR_FIX protection
Browse files Browse the repository at this point in the history
  • Loading branch information
PaulMullowney committed Jan 16, 2025
1 parent 111d93a commit fe63f9c
Show file tree
Hide file tree
Showing 12 changed files with 31 additions and 277 deletions.
34 changes: 7 additions & 27 deletions src/trans/gpu/algor/buffered_allocator_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -102,29 +102,7 @@ SUBROUTINE INSTANTIATE_ALLOCATOR(ALLOCATOR, GROWING_ALLOCATION)
CALL REALLOCATE_GROWING_ALLOCATION(GROWING_ALLOCATION, SUM(ALLOCATOR%BUFR_SZ))
END SUBROUTINE

#ifdef BUFFERED_ALLOCATOR_FIX

SUBROUTINE GET_ALLOCATION(ALLOCATOR, RESERVATION, ALLOCATION)
IMPLICIT NONE
TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR
TYPE(ALLOCATION_RESERVATION_HANDLE), INTENT(IN) :: RESERVATION

INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: ALLOCATION(:)

IF (RESERVATION%SZ > ALLOCATOR%BUFR_SZ(RESERVATION%BUF)) THEN
CALL ABORT_TRANS( "Logical Error in GET_ALLOCATION")
ENDIF
IF (RESERVATION%BUF == 0) THEN
ALLOCATION(1:) => ALLOCATOR%PTR%PTR(1:RESERVATION%SZ)
ELSE
ALLOCATION(1:) => ALLOCATOR%PTR%PTR(SUM(ALLOCATOR%BUFR_SZ(0:RESERVATION%BUF-1))+1: &
SUM(ALLOCATOR%BUFR_SZ(0:RESERVATION%BUF-1))+RESERVATION%SZ)
ENDIF
END SUBROUTINE

#else

FUNCTION GET_ALLOCATION(ALLOCATOR, RESERVATION) RESULT(GET_ALLOCATION)
FUNCTION GET_ALLOCATION(ALLOCATOR, RESERVATION)
IMPLICIT NONE
TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR
TYPE(ALLOCATION_RESERVATION_HANDLE), INTENT(IN) :: RESERVATION
Expand All @@ -142,12 +120,10 @@ FUNCTION GET_ALLOCATION(ALLOCATOR, RESERVATION) RESULT(GET_ALLOCATION)
ENDIF
END FUNCTION GET_ALLOCATION

#endif

SUBROUTINE ASSIGN_PTR_FLOAT(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE, SET_STREAM)
USE ISO_C_BINDING, ONLY: C_FLOAT, C_F_POINTER, C_SIZEOF
IMPLICIT NONE
INTEGER(KIND=C_INT8_T), TARGET, INTENT(INOUT) :: SRC(:)
INTEGER(KIND=C_INT8_T), TARGET, INTENT(IN) :: SRC(:)
REAL(KIND=C_FLOAT), POINTER, INTENT(OUT) :: DST(:)
LOGICAL, INTENT(IN), OPTIONAL :: SET_VALUE
INTEGER(KIND=4), INTENT(IN), OPTIONAL :: SET_STREAM
Expand All @@ -174,6 +150,7 @@ SUBROUTINE ASSIGN_PTR_FLOAT(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE
#ifdef OMPGPU
#endif
ENDIF
#ifdef BUFFERED_ALLOCATOR_FIX
IF (SET_VALUE_EFF .AND. LENGTH_IN_BYTES > 0) THEN
! This option is turned off by default, but for experimentation we can turn it on. This is
! setting all bits to 1 (meaning NaN in floating point)
Expand All @@ -191,13 +168,14 @@ SUBROUTINE ASSIGN_PTR_FLOAT(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE
#ifdef OMPGPU
#endif
ENDIF
#endif
CALL C_F_POINTER(C_LOC(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1)), DST, &
& [C_SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/C_SIZEOF(DST(0))])
END SUBROUTINE ASSIGN_PTR_FLOAT
SUBROUTINE ASSIGN_PTR_DOUBLE(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE, SET_STREAM)
USE ISO_C_BINDING, ONLY: C_DOUBLE, C_F_POINTER, C_SIZEOF
IMPLICIT NONE
INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: SRC(:)
INTEGER(KIND=C_INT8_T), POINTER, INTENT(IN) :: SRC(:)
REAL(KIND=C_DOUBLE), POINTER, INTENT(OUT) :: DST(:)
LOGICAL, INTENT(IN), OPTIONAL :: SET_VALUE
INTEGER(KIND=4), INTENT(IN), OPTIONAL :: SET_STREAM
Expand All @@ -224,6 +202,7 @@ SUBROUTINE ASSIGN_PTR_DOUBLE(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALU
#ifdef OMPGPU
#endif
ENDIF
#ifdef BUFFERED_ALLOCATOR_FIX
IF (SET_VALUE_EFF .AND. LENGTH_IN_BYTES > 0) THEN
! This option is turned off by default, but for experimentation we can turn it on. This is
! setting all bits to 1 (meaning NaN in floating point)
Expand All @@ -242,6 +221,7 @@ SUBROUTINE ASSIGN_PTR_DOUBLE(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALU
#ifdef OMPGPU
#endif
ENDIF
#endif
CALL C_F_POINTER(C_LOC(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1)), DST, &
& [C_SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/C_SIZEOF(DST(0))])
END SUBROUTINE ASSIGN_PTR_DOUBLE
Expand Down
3 changes: 1 addition & 2 deletions src/trans/gpu/algor/ext_acc.F90
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,11 @@ module openacc_ext
implicit none

private
#ifdef ACCGPU
public :: ext_acc_pass, ext_acc_create, ext_acc_copyin, ext_acc_copyout, &
#ifdef ACCGPU
& ext_acc_delete, ext_acc_arr_desc, acc_handle_kind
#endif
#ifdef OMPGPU
public :: ext_acc_pass, ext_acc_create, ext_acc_copyin, ext_acc_copyout, &
& ext_acc_delete, ext_acc_arr_desc
#endif

Expand Down
15 changes: 0 additions & 15 deletions src/trans/gpu/internal/ftdir_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -81,37 +81,22 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD)
USE TPM_HICFFT, ONLY: EXECUTE_DIR_FFT
USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM
USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX
#ifdef BUFFERED_ALLOCATOR_FIX
USE ISO_C_BINDING, ONLY: C_INT8_T, C_SIZE_T, C_SIZEOF
#else
USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF
#endif

IMPLICIT NONE

INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD
REAL(KIND=JPRBT), INTENT(INOUT), POINTER :: PREEL_REAL(:)
REAL(KIND=JPRBT), INTENT(OUT), POINTER :: PREEL_COMPLEX(:)
#ifdef BUFFERED_ALLOCATOR_FIX
INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: TMP(:)
TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR
#else
TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR
#endif
TYPE(FTDIR_HANDLE) :: HFTDIR

INTEGER(KIND=JPIM) :: KGL

#ifdef IN_PLACE_FFT
PREEL_COMPLEX => PREEL_REAL
#else

#ifdef BUFFERED_ALLOCATOR_FIX
CALL GET_ALLOCATION(ALLOCATOR, HFTDIR%HREEL_COMPLEX, TMP)
CALL ASSIGN_PTR(PREEL_COMPLEX, TMP,&
#else
CALL ASSIGN_PTR(PREEL_COMPLEX, GET_ALLOCATION(ALLOCATOR, HFTDIR%HREEL_COMPLEX),&
#endif
& 1_JPIB, 1_JPIB*KFIELD*D%NLENGTF*C_SIZEOF(PREEL_COMPLEX(1)))
#endif

Expand Down
15 changes: 0 additions & 15 deletions src/trans/gpu/internal/ftinv_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -80,23 +80,14 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD)
USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM
USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX
USE BUFFERED_ALLOCATOR_MOD, ONLY: ASSIGN_PTR, GET_ALLOCATION
#ifdef BUFFERED_ALLOCATOR_FIX
USE ISO_C_BINDING, ONLY: C_INT8_T, C_SIZE_T, C_SIZEOF
#else
USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF
#endif

IMPLICIT NONE

INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD
REAL(KIND=JPRBT), INTENT(INOUT), POINTER :: PREEL_REAL(:)
REAL(KIND=JPRBT), INTENT(OUT), POINTER :: PREEL_COMPLEX(:)
#ifdef BUFFERED_ALLOCATOR_FIX
INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: TMP(:)
TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR
#else
TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR
#endif
TYPE(FTINV_HANDLE), INTENT(IN) :: HFTINV

INTEGER(KIND=JPIM) :: KGL
Expand All @@ -105,15 +96,9 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD)

#ifdef IN_PLACE_FFT
PREEL_REAL => PREEL_COMPLEX
#else
#ifdef BUFFERED_ALLOCATOR_FIX
CALL GET_ALLOCATION(ALLOCATOR, HFTINV%HREEL_REAL, TMP)
CALL ASSIGN_PTR(PREEL_REAL, TMP,&
#else
CALL ASSIGN_PTR(PREEL_REAL, GET_ALLOCATION(ALLOCATOR, HFTINV%HREEL_REAL),&
#endif
& 1_JPIB, 1_JPIB*KFIELD*D%NLENGTF*C_SIZEOF(PREEL_REAL(1)))
#endif

#ifdef OMPGPU
#ifdef WORKAROUND_PRESENT_ALLOC
Expand Down
25 changes: 0 additions & 25 deletions src/trans/gpu/internal/ltdir_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,7 @@ SUBROUTINE LTDIR(ALLOCATOR,HLTDIR,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV,KF_SCALA
REAL(KIND=JPRB), POINTER :: PU(:,:,:), PV(:,:,:), PVOR(:,:,:), PDIV(:,:,:)
REAL(KIND=JPRBT), POINTER :: ZOUT(:)
REAL(KIND=JPRD), POINTER :: ZOUT0(:)
#ifdef BUFFERED_ALLOCATOR_FIX
INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: TMP(:)
TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR
#else
TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR
#endif
TYPE(LTDIR_HANDLE), INTENT(IN) :: HLTDIR
INTEGER(KIND=JPIB) :: IALLOC_POS, IALLOC_SZ
INTEGER(KIND=JPIM) :: IOUT_STRIDES0
Expand Down Expand Up @@ -194,46 +189,26 @@ SUBROUTINE LTDIR(ALLOCATOR,HLTDIR,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV,KF_SCALA
IALLOC_POS = 1

IALLOC_SZ = ALIGN(2_JPIB*KF_FS*(R%NTMAX+3)*D%NUMP*C_SIZEOF(POA1_L(1)),128)
#ifdef BUFFERED_ALLOCATOR_FIX
CALL GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA, TMP)
CALL ASSIGN_PTR(POA1_L, TMP,&
#else
CALL ASSIGN_PTR(POA1_L, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),&
#endif
& IALLOC_POS, IALLOC_SZ, SET_STREAM=1)
CALL C_F_POINTER(C_LOC(POA1_L), POA1, (/ 2*KF_FS, R%NTMAX+3, D%NUMP /))
IALLOC_POS = IALLOC_POS + IALLOC_SZ

IALLOC_SZ = ALIGN(4_JPIB*KF_UV*(R%NTMAX+3)*D%NUMP*C_SIZEOF(POA2_L(1)),128)
#ifdef BUFFERED_ALLOCATOR_FIX
CALL GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA, TMP)
CALL ASSIGN_PTR(POA2_L, TMP,&
#else
CALL ASSIGN_PTR(POA2_L, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),&
#endif
& IALLOC_POS, IALLOC_SZ, SET_STREAM=1)
CALL C_F_POINTER(C_LOC(POA2_L), POA2, (/ 4*KF_UV, R%NTMAX+3, D%NUMP /))
IALLOC_POS = IALLOC_POS + IALLOC_SZ

! ZOUT
IALLOC_SZ = ALIGN(IOUT_SIZE*C_SIZEOF(ZOUT(1)),128)
#ifdef BUFFERED_ALLOCATOR_FIX
CALL GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA, TMP)
CALL ASSIGN_PTR(ZOUT, TMP,&
#else
CALL ASSIGN_PTR(ZOUT, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),&
#endif
& IALLOC_POS, IALLOC_SZ, SET_STREAM=1)
IALLOC_POS = IALLOC_POS + IALLOC_SZ

! ZOUT0
IALLOC_SZ = ALIGN(IOUT0_SIZE*C_SIZEOF(ZOUT0(1)),128)
#ifdef BUFFERED_ALLOCATOR_FIX
CALL GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA, TMP)
CALL ASSIGN_PTR(ZOUT0, TMP,&
#else
CALL ASSIGN_PTR(ZOUT0, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),&
#endif
& IALLOC_POS, IALLOC_SZ, SET_STREAM=1)
IALLOC_POS = IALLOC_POS + IALLOC_SZ

Expand Down
44 changes: 0 additions & 44 deletions src/trans/gpu/internal/ltinv_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,7 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,&
USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM
USE TPM_GEN, ONLY: LSYNC_TRANS
USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX
#ifdef BUFFERED_ALLOCATOR_FIX
USE ISO_C_BINDING, ONLY: C_INT8_T, C_SIZE_T, C_LOC, C_SIZEOF
#else
USE ISO_C_BINDING, ONLY: C_SIZE_T, C_LOC, C_SIZEOF
#endif

!**** *LTINV* - Inverse Legendre transform
!
Expand Down Expand Up @@ -195,12 +191,7 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,&
REAL(KIND=JPRB), POINTER :: PSCALARS(:,:,:), PSCALARS_NSDER(:,:,:)

REAL(KIND=JPHOOK) :: ZHOOK_HANDLE
#ifdef BUFFERED_ALLOCATOR_FIX
INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: TMP(:)
TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR
#else
TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR
#endif
TYPE(LTINV_HANDLE), INTENT(IN) :: HLTINV

INTEGER(KIND=JPIM) :: IOUT_STRIDES0
Expand Down Expand Up @@ -248,81 +239,46 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,&

! PIA
IALLOC_SZ = ALIGN(2_JPIB*IF_READIN*(R%NTMAX+3)*D%NUMP*C_SIZEOF(PIA_L(1)),128)
#ifdef BUFFERED_ALLOCATOR_FIX
CALL GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN, TMP)
CALL ASSIGN_PTR(PIA_L, TMP,&
#else
CALL ASSIGN_PTR(PIA_L, GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN),&
#endif
& IALLOC_POS, IALLOC_SZ)
CALL C_F_POINTER(C_LOC(PIA_L), PIA, (/ 2*IF_READIN, R%NTMAX+3, D%NUMP /))
IALLOC_POS = IALLOC_POS + IALLOC_SZ

! ZINP
IALLOC_SZ = ALIGN(IIN_SIZE*C_SIZEOF(ZINP(1)),128)
#ifdef BUFFERED_ALLOCATOR_FIX
CALL GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN, TMP)
CALL ASSIGN_PTR(ZINP, TMP,&
#else
CALL ASSIGN_PTR(ZINP, GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN),&
#endif
& IALLOC_POS, IALLOC_SZ)
IALLOC_POS = IALLOC_POS + IALLOC_SZ

! ZINP0
IALLOC_SZ = ALIGN(IIN0_SIZE*C_SIZEOF(ZINP0(1)),128)
#ifdef BUFFERED_ALLOCATOR_FIX
CALL GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN, TMP)
CALL ASSIGN_PTR(ZINP0, TMP,&
#else
CALL ASSIGN_PTR(ZINP0, GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN),&
#endif
& IALLOC_POS, IALLOC_SZ)
IALLOC_POS = IALLOC_POS + IALLOC_SZ

IALLOC_POS = 1

! ZOUTA
IALLOC_SZ = ALIGN(IOUT_SIZE*C_SIZEOF(ZOUTA(1)),128)
#ifdef BUFFERED_ALLOCATOR_FIX
CALL GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA, TMP)
CALL ASSIGN_PTR(ZOUTA, TMP,&
#else
CALL ASSIGN_PTR(ZOUTA, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),&
#endif
& IALLOC_POS, IALLOC_SZ)
IALLOC_POS = IALLOC_POS + IALLOC_SZ

! ZOUTS
IALLOC_SZ = ALIGN(IOUT_SIZE*C_SIZEOF(ZOUTS(1)),128)
#ifdef BUFFERED_ALLOCATOR_FIX
CALL GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA, TMP)
CALL ASSIGN_PTR(ZOUTS, TMP,&
#else
CALL ASSIGN_PTR(ZOUTS, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),&
#endif
& IALLOC_POS, IALLOC_SZ)
IALLOC_POS = IALLOC_POS + IALLOC_SZ

! ZOUTA0
IALLOC_SZ = ALIGN(IOUT0_SIZE*C_SIZEOF(ZOUTA0(1)),128)
#ifdef BUFFERED_ALLOCATOR_FIX
CALL GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA, TMP)
CALL ASSIGN_PTR(ZOUTA0, TMP,&
#else
CALL ASSIGN_PTR(ZOUTA0, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),&
#endif
& IALLOC_POS, IALLOC_SZ)
IALLOC_POS = IALLOC_POS + IALLOC_SZ

! ZOUTS0
IALLOC_SZ = ALIGN(IOUT0_SIZE*C_SIZEOF(ZOUTS0(1)),128)
#ifdef BUFFERED_ALLOCATOR_FIX
CALL GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA, TMP)
CALL ASSIGN_PTR(ZOUTS0, TMP,&
#else
CALL ASSIGN_PTR(ZOUTS0, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),&
#endif
& IALLOC_POS, IALLOC_SZ)
IALLOC_POS = IALLOC_POS + IALLOC_SZ

Expand Down
Loading

0 comments on commit fe63f9c

Please sign in to comment.