From fe63f9c24fceeac6b10c359ea1d0a9920b1669ef Mon Sep 17 00:00:00 2001 From: Paul Mullowney Date: Thu, 16 Jan 2025 13:15:23 -0600 Subject: [PATCH] Simplified the BUFFERED_ALLOCATOR_FIX protection --- .../gpu/algor/buffered_allocator_mod.F90 | 34 +++----------- src/trans/gpu/algor/ext_acc.F90 | 3 +- src/trans/gpu/internal/ftdir_mod.F90 | 15 ------- src/trans/gpu/internal/ftinv_mod.F90 | 15 ------- src/trans/gpu/internal/ltdir_mod.F90 | 25 ----------- src/trans/gpu/internal/ltinv_mod.F90 | 44 ------------------- src/trans/gpu/internal/trgtol_mod.F90 | 38 ++++++---------- src/trans/gpu/internal/trltog_mod.F90 | 31 ++++--------- src/trans/gpu/internal/trltom_mod.F90 | 14 ------ src/trans/gpu/internal/trltom_pack_unpack.F90 | 43 ------------------ src/trans/gpu/internal/trmtol_mod.F90 | 14 ------ src/trans/gpu/internal/trmtol_pack_unpack.F90 | 32 +------------- 12 files changed, 31 insertions(+), 277 deletions(-) diff --git a/src/trans/gpu/algor/buffered_allocator_mod.F90 b/src/trans/gpu/algor/buffered_allocator_mod.F90 index d3aaf4275..346aec0b5 100644 --- a/src/trans/gpu/algor/buffered_allocator_mod.F90 +++ b/src/trans/gpu/algor/buffered_allocator_mod.F90 @@ -102,29 +102,7 @@ SUBROUTINE INSTANTIATE_ALLOCATOR(ALLOCATOR, GROWING_ALLOCATION) CALL REALLOCATE_GROWING_ALLOCATION(GROWING_ALLOCATION, SUM(ALLOCATOR%BUFR_SZ)) END SUBROUTINE -#ifdef BUFFERED_ALLOCATOR_FIX - - SUBROUTINE GET_ALLOCATION(ALLOCATOR, RESERVATION, ALLOCATION) - IMPLICIT NONE - TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR - TYPE(ALLOCATION_RESERVATION_HANDLE), INTENT(IN) :: RESERVATION - - INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: ALLOCATION(:) - - IF (RESERVATION%SZ > ALLOCATOR%BUFR_SZ(RESERVATION%BUF)) THEN - CALL ABORT_TRANS( "Logical Error in GET_ALLOCATION") - ENDIF - IF (RESERVATION%BUF == 0) THEN - ALLOCATION(1:) => ALLOCATOR%PTR%PTR(1:RESERVATION%SZ) - ELSE - ALLOCATION(1:) => ALLOCATOR%PTR%PTR(SUM(ALLOCATOR%BUFR_SZ(0:RESERVATION%BUF-1))+1: & - SUM(ALLOCATOR%BUFR_SZ(0:RESERVATION%BUF-1))+RESERVATION%SZ) - ENDIF - END SUBROUTINE - -#else - - FUNCTION GET_ALLOCATION(ALLOCATOR, RESERVATION) RESULT(GET_ALLOCATION) + FUNCTION GET_ALLOCATION(ALLOCATOR, RESERVATION) IMPLICIT NONE TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR TYPE(ALLOCATION_RESERVATION_HANDLE), INTENT(IN) :: RESERVATION @@ -142,12 +120,10 @@ FUNCTION GET_ALLOCATION(ALLOCATOR, RESERVATION) RESULT(GET_ALLOCATION) ENDIF END FUNCTION GET_ALLOCATION -#endif - SUBROUTINE ASSIGN_PTR_FLOAT(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE, SET_STREAM) USE ISO_C_BINDING, ONLY: C_FLOAT, C_F_POINTER, C_SIZEOF IMPLICIT NONE - INTEGER(KIND=C_INT8_T), TARGET, INTENT(INOUT) :: SRC(:) + INTEGER(KIND=C_INT8_T), TARGET, INTENT(IN) :: SRC(:) REAL(KIND=C_FLOAT), POINTER, INTENT(OUT) :: DST(:) LOGICAL, INTENT(IN), OPTIONAL :: SET_VALUE INTEGER(KIND=4), INTENT(IN), OPTIONAL :: SET_STREAM @@ -174,6 +150,7 @@ SUBROUTINE ASSIGN_PTR_FLOAT(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE #ifdef OMPGPU #endif ENDIF +#ifdef BUFFERED_ALLOCATOR_FIX IF (SET_VALUE_EFF .AND. LENGTH_IN_BYTES > 0) THEN ! This option is turned off by default, but for experimentation we can turn it on. This is ! setting all bits to 1 (meaning NaN in floating point) @@ -191,13 +168,14 @@ SUBROUTINE ASSIGN_PTR_FLOAT(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE #ifdef OMPGPU #endif ENDIF +#endif CALL C_F_POINTER(C_LOC(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1)), DST, & & [C_SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/C_SIZEOF(DST(0))]) END SUBROUTINE ASSIGN_PTR_FLOAT SUBROUTINE ASSIGN_PTR_DOUBLE(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALUE, SET_STREAM) USE ISO_C_BINDING, ONLY: C_DOUBLE, C_F_POINTER, C_SIZEOF IMPLICIT NONE - INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: SRC(:) + INTEGER(KIND=C_INT8_T), POINTER, INTENT(IN) :: SRC(:) REAL(KIND=C_DOUBLE), POINTER, INTENT(OUT) :: DST(:) LOGICAL, INTENT(IN), OPTIONAL :: SET_VALUE INTEGER(KIND=4), INTENT(IN), OPTIONAL :: SET_STREAM @@ -224,6 +202,7 @@ SUBROUTINE ASSIGN_PTR_DOUBLE(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALU #ifdef OMPGPU #endif ENDIF +#ifdef BUFFERED_ALLOCATOR_FIX IF (SET_VALUE_EFF .AND. LENGTH_IN_BYTES > 0) THEN ! This option is turned off by default, but for experimentation we can turn it on. This is ! setting all bits to 1 (meaning NaN in floating point) @@ -242,6 +221,7 @@ SUBROUTINE ASSIGN_PTR_DOUBLE(DST, SRC, START_IN_BYTES, LENGTH_IN_BYTES, SET_VALU #ifdef OMPGPU #endif ENDIF +#endif CALL C_F_POINTER(C_LOC(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1)), DST, & & [C_SIZEOF(SRC(START_IN_BYTES:START_IN_BYTES+LENGTH_IN_BYTES-1))/C_SIZEOF(DST(0))]) END SUBROUTINE ASSIGN_PTR_DOUBLE diff --git a/src/trans/gpu/algor/ext_acc.F90 b/src/trans/gpu/algor/ext_acc.F90 index 7263cad55..fe8883a0b 100644 --- a/src/trans/gpu/algor/ext_acc.F90 +++ b/src/trans/gpu/algor/ext_acc.F90 @@ -27,12 +27,11 @@ module openacc_ext implicit none private -#ifdef ACCGPU public :: ext_acc_pass, ext_acc_create, ext_acc_copyin, ext_acc_copyout, & +#ifdef ACCGPU & ext_acc_delete, ext_acc_arr_desc, acc_handle_kind #endif #ifdef OMPGPU - public :: ext_acc_pass, ext_acc_create, ext_acc_copyin, ext_acc_copyout, & & ext_acc_delete, ext_acc_arr_desc #endif diff --git a/src/trans/gpu/internal/ftdir_mod.F90 b/src/trans/gpu/internal/ftdir_mod.F90 index f0ce1b5ec..7d373bbc3 100755 --- a/src/trans/gpu/internal/ftdir_mod.F90 +++ b/src/trans/gpu/internal/ftdir_mod.F90 @@ -81,23 +81,14 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD) USE TPM_HICFFT, ONLY: EXECUTE_DIR_FFT USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX -#ifdef BUFFERED_ALLOCATOR_FIX - USE ISO_C_BINDING, ONLY: C_INT8_T, C_SIZE_T, C_SIZEOF -#else USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF -#endif IMPLICIT NONE INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD REAL(KIND=JPRBT), INTENT(INOUT), POINTER :: PREEL_REAL(:) REAL(KIND=JPRBT), INTENT(OUT), POINTER :: PREEL_COMPLEX(:) -#ifdef BUFFERED_ALLOCATOR_FIX - INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: TMP(:) - TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR -#else TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR -#endif TYPE(FTDIR_HANDLE) :: HFTDIR INTEGER(KIND=JPIM) :: KGL @@ -105,13 +96,7 @@ SUBROUTINE FTDIR(ALLOCATOR,HFTDIR,PREEL_REAL,PREEL_COMPLEX,KFIELD) #ifdef IN_PLACE_FFT PREEL_COMPLEX => PREEL_REAL #else - -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HFTDIR%HREEL_COMPLEX, TMP) - CALL ASSIGN_PTR(PREEL_COMPLEX, TMP,& -#else CALL ASSIGN_PTR(PREEL_COMPLEX, GET_ALLOCATION(ALLOCATOR, HFTDIR%HREEL_COMPLEX),& -#endif & 1_JPIB, 1_JPIB*KFIELD*D%NLENGTF*C_SIZEOF(PREEL_COMPLEX(1))) #endif diff --git a/src/trans/gpu/internal/ftinv_mod.F90 b/src/trans/gpu/internal/ftinv_mod.F90 index 269f6034c..f41eabe93 100755 --- a/src/trans/gpu/internal/ftinv_mod.F90 +++ b/src/trans/gpu/internal/ftinv_mod.F90 @@ -80,23 +80,14 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD) USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE BUFFERED_ALLOCATOR_MOD, ONLY: ASSIGN_PTR, GET_ALLOCATION -#ifdef BUFFERED_ALLOCATOR_FIX - USE ISO_C_BINDING, ONLY: C_INT8_T, C_SIZE_T, C_SIZEOF -#else USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF -#endif IMPLICIT NONE INTEGER(KIND=JPIM),INTENT(IN) :: KFIELD REAL(KIND=JPRBT), INTENT(INOUT), POINTER :: PREEL_REAL(:) REAL(KIND=JPRBT), INTENT(OUT), POINTER :: PREEL_COMPLEX(:) -#ifdef BUFFERED_ALLOCATOR_FIX - INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: TMP(:) - TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR -#else TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR -#endif TYPE(FTINV_HANDLE), INTENT(IN) :: HFTINV INTEGER(KIND=JPIM) :: KGL @@ -105,15 +96,9 @@ SUBROUTINE FTINV(ALLOCATOR,HFTINV,PREEL_COMPLEX,PREEL_REAL,KFIELD) #ifdef IN_PLACE_FFT PREEL_REAL => PREEL_COMPLEX -#else -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HFTINV%HREEL_REAL, TMP) - CALL ASSIGN_PTR(PREEL_REAL, TMP,& #else CALL ASSIGN_PTR(PREEL_REAL, GET_ALLOCATION(ALLOCATOR, HFTINV%HREEL_REAL),& -#endif & 1_JPIB, 1_JPIB*KFIELD*D%NLENGTF*C_SIZEOF(PREEL_REAL(1))) -#endif #ifdef OMPGPU #ifdef WORKAROUND_PRESENT_ALLOC diff --git a/src/trans/gpu/internal/ltdir_mod.F90 b/src/trans/gpu/internal/ltdir_mod.F90 index 168227e8c..359782d29 100755 --- a/src/trans/gpu/internal/ltdir_mod.F90 +++ b/src/trans/gpu/internal/ltdir_mod.F90 @@ -161,12 +161,7 @@ SUBROUTINE LTDIR(ALLOCATOR,HLTDIR,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV,KF_SCALA REAL(KIND=JPRB), POINTER :: PU(:,:,:), PV(:,:,:), PVOR(:,:,:), PDIV(:,:,:) REAL(KIND=JPRBT), POINTER :: ZOUT(:) REAL(KIND=JPRD), POINTER :: ZOUT0(:) -#ifdef BUFFERED_ALLOCATOR_FIX - INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: TMP(:) - TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR -#else TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR -#endif TYPE(LTDIR_HANDLE), INTENT(IN) :: HLTDIR INTEGER(KIND=JPIB) :: IALLOC_POS, IALLOC_SZ INTEGER(KIND=JPIM) :: IOUT_STRIDES0 @@ -194,46 +189,26 @@ SUBROUTINE LTDIR(ALLOCATOR,HLTDIR,ZINPS,ZINPA,ZINPS0,ZINPA0,KF_FS,KF_UV,KF_SCALA IALLOC_POS = 1 IALLOC_SZ = ALIGN(2_JPIB*KF_FS*(R%NTMAX+3)*D%NUMP*C_SIZEOF(POA1_L(1)),128) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA, TMP) - CALL ASSIGN_PTR(POA1_L, TMP,& -#else CALL ASSIGN_PTR(POA1_L, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),& -#endif & IALLOC_POS, IALLOC_SZ, SET_STREAM=1) CALL C_F_POINTER(C_LOC(POA1_L), POA1, (/ 2*KF_FS, R%NTMAX+3, D%NUMP /)) IALLOC_POS = IALLOC_POS + IALLOC_SZ IALLOC_SZ = ALIGN(4_JPIB*KF_UV*(R%NTMAX+3)*D%NUMP*C_SIZEOF(POA2_L(1)),128) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA, TMP) - CALL ASSIGN_PTR(POA2_L, TMP,& -#else CALL ASSIGN_PTR(POA2_L, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),& -#endif & IALLOC_POS, IALLOC_SZ, SET_STREAM=1) CALL C_F_POINTER(C_LOC(POA2_L), POA2, (/ 4*KF_UV, R%NTMAX+3, D%NUMP /)) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZOUT IALLOC_SZ = ALIGN(IOUT_SIZE*C_SIZEOF(ZOUT(1)),128) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA, TMP) - CALL ASSIGN_PTR(ZOUT, TMP,& -#else CALL ASSIGN_PTR(ZOUT, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),& -#endif & IALLOC_POS, IALLOC_SZ, SET_STREAM=1) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZOUT0 IALLOC_SZ = ALIGN(IOUT0_SIZE*C_SIZEOF(ZOUT0(1)),128) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA, TMP) - CALL ASSIGN_PTR(ZOUT0, TMP,& -#else CALL ASSIGN_PTR(ZOUT0, GET_ALLOCATION(ALLOCATOR, HLTDIR%HOUT_AND_POA),& -#endif & IALLOC_POS, IALLOC_SZ, SET_STREAM=1) IALLOC_POS = IALLOC_POS + IALLOC_SZ diff --git a/src/trans/gpu/internal/ltinv_mod.F90 b/src/trans/gpu/internal/ltinv_mod.F90 index 0543e0c8e..8d5856a16 100755 --- a/src/trans/gpu/internal/ltinv_mod.F90 +++ b/src/trans/gpu/internal/ltinv_mod.F90 @@ -119,11 +119,7 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& USE MPL_MODULE, ONLY: MPL_BARRIER,MPL_ALL_MS_COMM USE TPM_GEN, ONLY: LSYNC_TRANS USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX -#ifdef BUFFERED_ALLOCATOR_FIX - USE ISO_C_BINDING, ONLY: C_INT8_T, C_SIZE_T, C_LOC, C_SIZEOF -#else USE ISO_C_BINDING, ONLY: C_SIZE_T, C_LOC, C_SIZEOF -#endif !**** *LTINV* - Inverse Legendre transform ! @@ -195,12 +191,7 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& REAL(KIND=JPRB), POINTER :: PSCALARS(:,:,:), PSCALARS_NSDER(:,:,:) REAL(KIND=JPHOOK) :: ZHOOK_HANDLE -#ifdef BUFFERED_ALLOCATOR_FIX - INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: TMP(:) - TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR -#else TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR -#endif TYPE(LTINV_HANDLE), INTENT(IN) :: HLTINV INTEGER(KIND=JPIM) :: IOUT_STRIDES0 @@ -248,35 +239,20 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& ! PIA IALLOC_SZ = ALIGN(2_JPIB*IF_READIN*(R%NTMAX+3)*D%NUMP*C_SIZEOF(PIA_L(1)),128) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN, TMP) - CALL ASSIGN_PTR(PIA_L, TMP,& -#else CALL ASSIGN_PTR(PIA_L, GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN),& -#endif & IALLOC_POS, IALLOC_SZ) CALL C_F_POINTER(C_LOC(PIA_L), PIA, (/ 2*IF_READIN, R%NTMAX+3, D%NUMP /)) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZINP IALLOC_SZ = ALIGN(IIN_SIZE*C_SIZEOF(ZINP(1)),128) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN, TMP) - CALL ASSIGN_PTR(ZINP, TMP,& -#else CALL ASSIGN_PTR(ZINP, GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN),& -#endif & IALLOC_POS, IALLOC_SZ) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZINP0 IALLOC_SZ = ALIGN(IIN0_SIZE*C_SIZEOF(ZINP0(1)),128) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN, TMP) - CALL ASSIGN_PTR(ZINP0, TMP,& -#else CALL ASSIGN_PTR(ZINP0, GET_ALLOCATION(ALLOCATOR, HLTINV%HPIA_AND_IN),& -#endif & IALLOC_POS, IALLOC_SZ) IALLOC_POS = IALLOC_POS + IALLOC_SZ @@ -284,45 +260,25 @@ SUBROUTINE LTINV(ALLOCATOR,HLTINV,KF_UV,KF_SCALARS,& ! ZOUTA IALLOC_SZ = ALIGN(IOUT_SIZE*C_SIZEOF(ZOUTA(1)),128) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA, TMP) - CALL ASSIGN_PTR(ZOUTA, TMP,& -#else CALL ASSIGN_PTR(ZOUTA, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),& -#endif & IALLOC_POS, IALLOC_SZ) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZOUTS IALLOC_SZ = ALIGN(IOUT_SIZE*C_SIZEOF(ZOUTS(1)),128) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA, TMP) - CALL ASSIGN_PTR(ZOUTS, TMP,& -#else CALL ASSIGN_PTR(ZOUTS, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),& -#endif & IALLOC_POS, IALLOC_SZ) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZOUTA0 IALLOC_SZ = ALIGN(IOUT0_SIZE*C_SIZEOF(ZOUTA0(1)),128) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA, TMP) - CALL ASSIGN_PTR(ZOUTA0, TMP,& -#else CALL ASSIGN_PTR(ZOUTA0, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),& -#endif & IALLOC_POS, IALLOC_SZ) IALLOC_POS = IALLOC_POS + IALLOC_SZ ! ZOUTS0 IALLOC_SZ = ALIGN(IOUT0_SIZE*C_SIZEOF(ZOUTS0(1)),128) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA, TMP) - CALL ASSIGN_PTR(ZOUTS0, TMP,& -#else CALL ASSIGN_PTR(ZOUTS0, GET_ALLOCATION(ALLOCATOR, HLTINV%HOUTS_AND_OUTA),& -#endif & IALLOC_POS, IALLOC_SZ) IALLOC_POS = IALLOC_POS + IALLOC_SZ diff --git a/src/trans/gpu/internal/trgtol_mod.F90 b/src/trans/gpu/internal/trgtol_mod.F90 index 3b523cfb0..610748a1a 100755 --- a/src/trans/gpu/internal/trgtol_mod.F90 +++ b/src/trans/gpu/internal/trgtol_mod.F90 @@ -137,12 +137,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETUV(:), KVSETSC(:), KVSETSC3A(:), KVSETSC3B(:), KVSETSC2(:) REAL(KIND=JPRB),OPTIONAL,INTENT(IN) :: PGP(:,:,:), PGPUV(:,:,:,:), PGP3A(:,:,:,:), PGP3B(:,:,:,:), PGP2(:,:,:) -#ifdef BUFFERED_ALLOCATOR_FIX - INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: TMP(:) - TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR -#else TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR -#endif TYPE(TRGTOL_HANDLE), INTENT(IN) :: HTRGTOL ! LOCAL VARIABLES @@ -340,12 +335,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ENDDO block -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFR_AND_REEL, TMP) - CALL ASSIGN_PTR(PREEL_REAL, TMP,& -#else CALL ASSIGN_PTR(PREEL_REAL, GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFR_AND_REEL),& -#endif & 1_JPIB*KF_FS*D%NLENGTF*C_SIZEOF(PREEL_REAL(1))+1, 1_JPIB*KF_FS*D%NLENGTF*C_SIZEOF(PREEL_REAL(1))) !!CALL ASSIGN_PTR(PREEL_REAL, GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFR_AND_REEL), size1, size2) end block @@ -396,13 +386,15 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP3B) ENDIF + + IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT), & #ifdef ACCGPU - IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1_ACC_HANDLE_KIND) + & STREAM=1_ACC_HANDLE_KIND) #endif #ifdef OMPGPU - IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1) + & STREAM=1) #endif - + #ifdef ACCGPU !$ACC WAIT(1) #endif @@ -490,12 +482,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ENDDO IF (ISEND_COUNTS > 0) THEN -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFS, TMP) - CALL ASSIGN_PTR(ZCOMBUFS, TMP, & -#else CALL ASSIGN_PTR(ZCOMBUFS, GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFS),& -#endif & 1_JPIB, ICOMBUFS_OFFSET(ISEND_COUNTS+1)*C_SIZEOF(ZCOMBUFS(1))) ENDIF @@ -604,12 +591,7 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, CALL GSTATS(411,0) IF (IRECV_COUNTS > 0) THEN -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFR_AND_REEL, TMP) - CALL ASSIGN_PTR(ZCOMBUFR, TMP, & -#else CALL ASSIGN_PTR(ZCOMBUFR, GET_ALLOCATION(ALLOCATOR, HTRGTOL%HCOMBUFR_AND_REEL),& -#endif & 1_JPIB, ICOMBUFR_OFFSET(IRECV_COUNTS+1)*C_SIZEOF(ZCOMBUFR(1))) ENDIF #ifdef OMPGPU @@ -627,8 +609,12 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, !$ACC HOST_DATA USE_DEVICE(ZCOMBUFR,ZCOMBUFS) #endif #else +#ifdef OMPGPU +#endif +#ifdef ACCGPU !! this is safe-but-slow fallback for running without GPU-aware MPI !$ACC UPDATE HOST(ZCOMBUFS) IF(ISEND_COUNTS > 0) +#endif #endif ! Skip the own contribution because this is ok to overflow @@ -821,11 +807,13 @@ SUBROUTINE TRGTOL(ALLOCATOR,HTRGTOL,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, !$ACC END DATA !PGPUV !$ACC END DATA !PGP #endif + + IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_DELETE(ACC_POINTERS(1:ACC_POINTERS_CNT), & #ifdef ACCGPU - IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_DELETE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1_ACC_HANDLE_KIND) + & STREAM=1_ACC_HANDLE_KIND) #endif #ifdef OMPGPU - IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_DELETE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1) + & STREAM=1) #endif IF (LHOOK) CALL DR_HOOK('TRGTOL',1,ZHOOK_HANDLE) diff --git a/src/trans/gpu/internal/trltog_mod.F90 b/src/trans/gpu/internal/trltog_mod.F90 index 041f58969..45875540c 100755 --- a/src/trans/gpu/internal/trltog_mod.F90 +++ b/src/trans/gpu/internal/trltog_mod.F90 @@ -122,11 +122,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE TPM_TRANS, ONLY: LDIVGP, LSCDERS, LUVDER, LVORGP, NPROMA USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION -#ifdef BUFFERED_ALLOCATOR_FIX - USE ISO_C_BINDING, ONLY: C_INT8_T, C_SIZE_T, C_SIZEOF -#else USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF -#endif USE OPENACC_EXT, ONLY: EXT_ACC_ARR_DESC, EXT_ACC_PASS, EXT_ACC_CREATE, & & EXT_ACC_DELETE #ifdef ACCGPU @@ -149,12 +145,7 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC3B(:) INTEGER(KIND=JPIM) ,OPTIONAL, INTENT(IN) :: KVSETSC2(:) -#ifdef BUFFERED_ALLOCATOR_FIX - INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: TMP(:) - TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR -#else TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR -#endif TYPE(TRLTOG_HANDLE) :: HTRLTOG ! LOCAL VARIABLES @@ -528,12 +519,15 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ACC_POINTERS_CNT = ACC_POINTERS_CNT + 1 ACC_POINTERS(ACC_POINTERS_CNT) = EXT_ACC_PASS(PGP3B) ENDIF + + IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT), & #ifdef ACCGPU - IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1_ACC_HANDLE_KIND) + & STREAM=1_ACC_HANDLE_KIND) #endif #ifdef OMPGPU - IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_CREATE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1) + & STREAM=1) #endif + #ifdef OMPGPU #if defined(WORKAROUND_PRESENT_ALLOC) && defined(WORKAROUND_502486) !$OMP TARGET DATA MAP(ALLOC:PGP) @@ -697,21 +691,11 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, ENDDO IF (IRECV_COUNTS > 0) THEN -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HTRLTOG%HCOMBUFR_AND_COMBUFS, TMP) - CALL ASSIGN_PTR(ZCOMBUFR, TMP,& -#else CALL ASSIGN_PTR(ZCOMBUFR, GET_ALLOCATION(ALLOCATOR, HTRLTOG%HCOMBUFR_AND_COMBUFS),& -#endif & 1_JPIB, ICOMBUFR_OFFSET(IRECV_COUNTS+1)*C_SIZEOF(ZCOMBUFR(1))) ENDIF IF (ISEND_COUNTS > 0) THEN -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HTRLTOG%HCOMBUFR_AND_COMBUFS, TMP) - CALL ASSIGN_PTR(ZCOMBUFS, TMP,& -#else CALL ASSIGN_PTR(ZCOMBUFS, GET_ALLOCATION(ALLOCATOR, HTRLTOG%HCOMBUFR_AND_COMBUFS),& -#endif & ALIGN(1_JPIB*KF_GP*D%NGPTOT*C_SIZEOF(ZCOMBUFR(1)),128)+1, & & ICOMBUFS_OFFSET(ISEND_COUNTS+1)*C_SIZEOF(ZCOMBUFS(1))) ENDIF @@ -1048,11 +1032,12 @@ SUBROUTINE TRLTOG(ALLOCATOR,HTRLTOG,PREEL_REAL,KF_FS,KF_GP,KF_UV_G,KF_SCALARS_G, !$ACC UPDATE HOST(PGP3B) ASYNC(1) #endif ENDIF + IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_DELETE(ACC_POINTERS(1:ACC_POINTERS_CNT), & #ifdef ACCGPU - IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_DELETE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1_ACC_HANDLE_KIND) + & STREAM=1_ACC_HANDLE_KIND) #endif #ifdef OMPGPU - IF (ACC_POINTERS_CNT > 0) CALL EXT_ACC_DELETE(ACC_POINTERS(1:ACC_POINTERS_CNT),STREAM=1) + & STREAM=1) #endif IF (LSYNC_TRANS) THEN #ifdef OMPGPU diff --git a/src/trans/gpu/internal/trltom_mod.F90 b/src/trans/gpu/internal/trltom_mod.F90 index c3973f5aa..f0e23f4b6 100755 --- a/src/trans/gpu/internal/trltom_mod.F90 +++ b/src/trans/gpu/internal/trltom_mod.F90 @@ -99,11 +99,7 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) #endif USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION -#ifdef BUFFERED_ALLOCATOR_FIX - USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT8_T, C_SIZE_T, C_SIZEOF -#else USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF -#endif USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS IMPLICIT NONE @@ -118,12 +114,7 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) REAL(KIND=JPHOOK) :: ZHOOK_HANDLE INTEGER(KIND=JPIM) :: IERROR -#ifdef BUFFERED_ALLOCATOR_FIX - INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: TMP(:) - TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR -#else TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR -#endif TYPE(TRLTOM_HANDLE), INTENT(IN) :: HTRLTOM #if ECTRANS_HAVE_MPI TYPE(MPI_COMM) :: LOCAL_COMM @@ -143,12 +134,7 @@ SUBROUTINE TRLTOM(ALLOCATOR,HTRLTOM,PFBUF_IN,PFBUF,KF_FS) IF (LHOOK) CALL DR_HOOK('TRLTOM',0,ZHOOK_HANDLE) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HTRLTOM%HPFBUF, TMP) - CALL ASSIGN_PTR(PFBUF, TMP, & -#else CALL ASSIGN_PTR(PFBUF, GET_ALLOCATION(ALLOCATOR, HTRLTOM%HPFBUF),& -#endif & 1_JPIB, 2_JPIB*D%NLENGT1B*KF_FS*C_SIZEOF(PFBUF(1))) diff --git a/src/trans/gpu/internal/trltom_pack_unpack.F90 b/src/trans/gpu/internal/trltom_pack_unpack.F90 index 4ae40c6b0..88fa991d6 100755 --- a/src/trans/gpu/internal/trltom_pack_unpack.F90 +++ b/src/trans/gpu/internal/trltom_pack_unpack.F90 @@ -74,11 +74,7 @@ SUBROUTINE TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) USE TPM_DISTR, ONLY: D, MYSETW USE TPM_GEOMETRY, ONLY: G USE TPM_DIM, ONLY: R -#ifdef BUFFERED_ALLOCATOR_FIX - USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT8_T, C_SIZE_T, C_SIZEOF -#else USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF -#endif ! IMPLICIT NONE @@ -86,12 +82,7 @@ SUBROUTINE TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) REAL(KIND=JPRBT), INTENT(IN) :: PREEL_COMPLEX(:) REAL(KIND=JPRBT), POINTER, INTENT(OUT) :: FOUBUF_IN(:) INTEGER(KIND=JPIM),INTENT(IN) :: KF_FS -#ifdef BUFFERED_ALLOCATOR_FIX - INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: TMP(:) - TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR -#else TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR -#endif TYPE(TRLTOM_PACK_HANDLE), INTENT(IN) :: HTRLTOM_PACK INTEGER(KIND=JPIM) :: JM,JF,IGLG,OFFSET_VAR,KGL @@ -102,12 +93,7 @@ SUBROUTINE TRLTOM_PACK(ALLOCATOR,HTRLTOM_PACK,PREEL_COMPLEX,FOUBUF_IN,KF_FS) ASSOCIATE(D_NSTAGTF=>D%NSTAGTF, D_NPNTGTB0=>D%NPNTGTB0, D_NPTRLS=>D%NPTRLS, & & D_NDGL_FS=>D%NDGL_FS, G_NMEN=>G%NMEN, G_NLOEN=>G%NLOEN, R_NSMAX=>R%NSMAX) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HTRLTOM_PACK%HFOUBUF_IN, TMP) - CALL ASSIGN_PTR(FOUBUF_IN, TMP,& -#else CALL ASSIGN_PTR(FOUBUF_IN, GET_ALLOCATION(ALLOCATOR, HTRLTOM_PACK%HFOUBUF_IN),& -#endif & 1_JPIB, 2_JPIB*D%NLENGT0B*KF_FS*C_SIZEOF(FOUBUF_IN(1))) #ifdef OMPGPU @@ -197,23 +183,14 @@ SUBROUTINE TRLTOM_UNPACK(ALLOCATOR,HTRLTOM_UNPACK,FOUBUF,ZINPS,ZINPA,ZINPS0,ZINP USE TPM_FIELDS, ONLY: F USE TPM_DISTR, ONLY: D USE LEDIR_MOD, ONLY: LEDIR_STRIDES -#ifdef BUFFERED_ALLOCATOR_FIX - USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_INT8_T, C_SIZE_T, C_SIZEOF -#else USE, INTRINSIC :: ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF -#endif IMPLICIT NONE REAL(KIND=JPRBT), INTENT(IN) :: FOUBUF(:) REAL(KIND=JPRBT), POINTER, INTENT(INOUT) :: ZINPS(:), ZINPA(:) REAL(KIND=JPRD), POINTER, INTENT(INOUT) :: ZINPS0(:), ZINPA0(:) INTEGER(KIND=JPIM), INTENT(IN) :: KF_FS, KF_UV -#ifdef BUFFERED_ALLOCATOR_FIX - INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: TMP(:) - TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR -#else TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR -#endif TYPE(TRLTOM_UNPACK_HANDLE), INTENT(IN) :: HTRLTOM_UNPACK REAL(KIND=JPRBT), POINTER :: PREEL_COMPLEX(:) @@ -238,42 +215,22 @@ SUBROUTINE TRLTOM_UNPACK(ALLOCATOR,HTRLTOM_UNPACK,FOUBUF,ZINPS,ZINPA,ZINPS0,ZINP IALLOC_POS=1 IALLOC_SZ = ALIGN(IIN_SIZE*C_SIZEOF(ZINPS(0)),128) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HTRLTOM_UNPACK%HINPS_AND_ZINPA, TMP) - CALL ASSIGN_PTR(ZINPS, TMP,& -#else CALL ASSIGN_PTR(ZINPS, GET_ALLOCATION(ALLOCATOR, HTRLTOM_UNPACK%HINPS_AND_ZINPA),& -#endif & IALLOC_POS, IALLOC_SZ) IALLOC_POS=IALLOC_POS+IALLOC_SZ IALLOC_SZ = ALIGN(IIN_SIZE*C_SIZEOF(ZINPA(0)),128) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HTRLTOM_UNPACK%HINPS_AND_ZINPA, TMP) - CALL ASSIGN_PTR(ZINPA, TMP,& -#else CALL ASSIGN_PTR(ZINPA, GET_ALLOCATION(ALLOCATOR, HTRLTOM_UNPACK%HINPS_AND_ZINPA),& -#endif & IALLOC_POS, IALLOC_SZ) IALLOC_POS=IALLOC_POS+IALLOC_SZ IALLOC_SZ = ALIGN(IIN0_SIZE*C_SIZEOF(ZINPS0(0)),128) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HTRLTOM_UNPACK%HINPS_AND_ZINPA, TMP) - CALL ASSIGN_PTR(ZINPS0, TMP,& -#else CALL ASSIGN_PTR(ZINPS0, GET_ALLOCATION(ALLOCATOR, HTRLTOM_UNPACK%HINPS_AND_ZINPA),& -#endif & IALLOC_POS, IALLOC_SZ) IALLOC_POS=IALLOC_POS+IALLOC_SZ IALLOC_SZ = ALIGN(IIN0_SIZE*C_SIZEOF(ZINPA0(0)),128) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HTRLTOM_UNPACK%HINPS_AND_ZINPA, TMP) - CALL ASSIGN_PTR(ZINPA0, TMP,& -#else CALL ASSIGN_PTR(ZINPA0, GET_ALLOCATION(ALLOCATOR, HTRLTOM_UNPACK%HINPS_AND_ZINPA),& -#endif & IALLOC_POS, IALLOC_SZ) IALLOC_POS=IALLOC_POS+IALLOC_SZ diff --git a/src/trans/gpu/internal/trmtol_mod.F90 b/src/trans/gpu/internal/trmtol_mod.F90 index c0364e9b3..5d49c46a4 100755 --- a/src/trans/gpu/internal/trmtol_mod.F90 +++ b/src/trans/gpu/internal/trmtol_mod.F90 @@ -99,11 +99,7 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) #endif USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION USE TPM_STATS, ONLY: GSTATS => GSTATS_NVTX -#ifdef BUFFERED_ALLOCATOR_FIX - USE ISO_C_BINDING, ONLY: C_INT8_T, C_SIZE_T, C_SIZEOF -#else USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF -#endif USE ABORT_TRANS_MOD, ONLY: ABORT_TRANS IMPLICIT NONE @@ -118,12 +114,7 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) REAL(KIND=JPHOOK) :: ZHOOK_HANDLE INTEGER(KIND=JPIM) :: IERROR -#ifdef BUFFERED_ALLOCATOR_FIX - INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: TMP(:) - TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR -#else TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR -#endif TYPE(TRMTOL_HANDLE), INTENT(IN) :: HTRMTOL #if ECTRANS_HAVE_MPI @@ -144,12 +135,7 @@ SUBROUTINE TRMTOL(ALLOCATOR,HTRMTOL,PFBUF_IN,PFBUF,KF_LEG) IF (LHOOK) CALL DR_HOOK('TRMTOL',0,ZHOOK_HANDLE) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HTRMTOL%HPFBUF, TMP) - CALL ASSIGN_PTR(PFBUF, TMP,& -#else CALL ASSIGN_PTR(PFBUF, GET_ALLOCATION(ALLOCATOR, HTRMTOL%HPFBUF),& -#endif & 1_JPIB, 2_JPIB*D%NLENGT0B*KF_LEG*C_SIZEOF(PFBUF(1))) IF(NPROC > 1) THEN diff --git a/src/trans/gpu/internal/trmtol_pack_unpack.F90 b/src/trans/gpu/internal/trmtol_pack_unpack.F90 index 3db10862e..c911be9a3 100755 --- a/src/trans/gpu/internal/trmtol_pack_unpack.F90 +++ b/src/trans/gpu/internal/trmtol_pack_unpack.F90 @@ -91,22 +91,13 @@ SUBROUTINE TRMTOL_PACK(ALLOCATOR,HTRMTOL_PACK,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,FOUBUF_I USE TPM_DISTR, ONLY: D USE LEINV_MOD, ONLY: LEINV_STRIDES USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION -#ifdef BUFFERED_ALLOCATOR_FIX - USE ISO_C_BINDING, ONLY: C_INT8_T, C_SIZE_T, C_SIZEOF -#else USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF -#endif IMPLICIT NONE ! DUMMY ARGUMENTS -#ifdef BUFFERED_ALLOCATOR_FIX - INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: TMP(:) - TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR -#else TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR -#endif TYPE(TRMTOL_PACK_HANDLE), INTENT(IN) :: HTRMTOL_PACK REAL(KIND=JPRB), INTENT(OUT), POINTER :: FOUBUF_IN(:) REAL(KIND=JPRBT), INTENT(IN) :: ZOUTS(:), ZOUTA(:) @@ -129,12 +120,7 @@ SUBROUTINE TRMTOL_PACK(ALLOCATOR,HTRMTOL_PACK,ZOUTS,ZOUTA,ZOUTS0,ZOUTA0,FOUBUF_I IF (LHOOK) CALL DR_HOOK('TRMTOL_PACK',0,ZHOOK_HANDLE) -#ifdef BUFFERED_ALLOCATOR_FIX - CALL GET_ALLOCATION(ALLOCATOR, HTRMTOL_PACK%HFOUBUF_IN, TMP) - CALL ASSIGN_PTR(FOUBUF_IN, TMP,& -#else CALL ASSIGN_PTR(FOUBUF_IN, GET_ALLOCATION(ALLOCATOR, HTRMTOL_PACK%HFOUBUF_IN),& -#endif & 1_JPIB, 2_JPIB*D%NLENGT1B*KF_LEG*C_SIZEOF(FOUBUF_IN(1))) CALL LEINV_STRIDES(KF_LEG,IOUT_STRIDES0=IOUT_STRIDES0,IOUT_SIZE=IOUT_SIZE,& @@ -273,11 +259,7 @@ SUBROUTINE TRMTOL_UNPACK(ALLOCATOR,HTRMTOL_UNPACK,FOUBUF,PREEL_COMPLEX,KF_CURREN USE TPM_DISTR, ONLY: D, MYSETW USE TPM_GEOMETRY, ONLY: G USE BUFFERED_ALLOCATOR_MOD, ONLY: BUFFERED_ALLOCATOR, ASSIGN_PTR, GET_ALLOCATION -#ifdef BUFFERED_ALLOCATOR_FIX - USE ISO_C_BINDING, ONLY: C_INT8_T, C_SIZE_T, C_SIZEOF -#else - USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF -#endif +USE ISO_C_BINDING, ONLY: C_SIZE_T, C_SIZEOF ! IMPLICIT NONE @@ -285,12 +267,7 @@ SUBROUTINE TRMTOL_UNPACK(ALLOCATOR,HTRMTOL_UNPACK,FOUBUF,PREEL_COMPLEX,KF_CURREN REAL(KIND=JPRBT), INTENT(IN) :: FOUBUF(:) REAL(KIND=JPRBT), INTENT(OUT), POINTER :: PREEL_COMPLEX(:) INTEGER(KIND=JPIM),INTENT(IN) :: KF_CURRENT, KF_TOTAL -#ifdef BUFFERED_ALLOCATOR_FIX - INTEGER(KIND=C_INT8_T), POINTER, INTENT(INOUT) :: TMP(:) - TYPE(BUFFERED_ALLOCATOR), INTENT(INOUT) :: ALLOCATOR -#else - TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR -#endif +TYPE(BUFFERED_ALLOCATOR), INTENT(IN) :: ALLOCATOR TYPE(TRMTOL_UNPACK_HANDLE), INTENT(IN) :: HTRMTOL_UNPACK INTEGER(KIND=JPIM) :: JM,JF,IGLG,OFFSET_VAR,KGL,ILOEN_MAX @@ -300,12 +277,7 @@ SUBROUTINE TRMTOL_UNPACK(ALLOCATOR,HTRMTOL_UNPACK,FOUBUF,PREEL_COMPLEX,KF_CURREN ASSOCIATE(D_NDGL_FS=>D%NDGL_FS, D_NSTAGTF=>D%NSTAGTF, D_NPNTGTB0=>D%NPNTGTB0, D_NPTRLS=>D%NPTRLS, & & G_NLOEN=>G%NLOEN, G_NMEN=>G%NMEN) -#ifdef BUFFERED_ALLOCATOR_FIX -CALL GET_ALLOCATION(ALLOCATOR, HTRMTOL_UNPACK%HREEL, TMP) -CALL ASSIGN_PTR(PREEL_COMPLEX, TMP,& -#else CALL ASSIGN_PTR(PREEL_COMPLEX, GET_ALLOCATION(ALLOCATOR, HTRMTOL_UNPACK%HREEL),& -#endif & 1_JPIB, 1_JPIB*KF_TOTAL*D%NLENGTF*C_SIZEOF(PREEL_COMPLEX(1))) #ifdef OMPGPU