diff --git a/src/gpu/cuda/gpu_get2e_grad_ffff.cu b/src/gpu/cuda/gpu_get2e_grad_ffff.cu index 65829b4a..0c34d2bb 100644 --- a/src/gpu/cuda/gpu_get2e_grad_ffff.cu +++ b/src/gpu/cuda/gpu_get2e_grad_ffff.cu @@ -18,40 +18,35 @@ #include "gpu_type.h" #include "gpu_get2e_grad_ffff.h" -//#ifdef GPU_SPDF -//#endif - - /* - Constant Memory in GPU is fast but quite limited and hard to operate, usually not allocatable and - readonly. So we put the following variables into constant memory: -devSim: a gpu simluation type variable. which is to store to location of basic information about molecule and basis -set. Note it only store the location, so it's mostly a set of pointer to GPU memory. and with some non-pointer -value like the number of basis set. See gpu_type.h for details. -devTrans : arrays to save the mapping index, will be elimited by hand writing unrolling code. -Sumindex: a array to store refect how many temp variable needed in VRR. can be elimited by hand writing code. -*/ + * Constant Memory in GPU is fast but quite limited and hard to operate, usually not allocatable and + * readonly. So we put the following variables into constant memory: + * devSim: a gpu simluation type variable. which is to store to location of basic information about molecule and basis + * set. Note it only store the location, so it's mostly a set of pointer to GPU memory. and with some non-pointer + * value like the number of basis set. See gpu_type.h for details. + * devTrans: arrays to save the mapping index, will be elimited by hand writing unrolling code. + * Sumindex: a array to store refect how many temp variable needed in VRR. can be elimited by hand writing code. + */ static __constant__ gpu_simulation_type devSim; static __constant__ unsigned char devTrans[TRANSDIM * TRANSDIM * TRANSDIM]; static __constant__ int Sumindex[10] = {0, 0, 1, 4, 10, 20, 35, 56, 84, 120}; //#define USE_TEXTURE - -#ifdef USE_TEXTURE -#define USE_TEXTURE_CUTMATRIX -#define USE_TEXTURE_YCUTOFF -#define USE_TEXTURE_XCOEFF +#if defined(USE_TEXTURE) + #define USE_TEXTURE_CUTMATRIX + #define USE_TEXTURE_YCUTOFF + #define USE_TEXTURE_XCOEFF #endif -#ifdef USE_TEXTURE_CUTMATRIX -texture tex_cutMatrix; +#if defined(USE_TEXTURE_CUTMATRIX) + texture tex_cutMatrix; #endif -#ifdef USE_TEXTURE_YCUTOFF -texture tex_YCutoff; +#if defined(USE_TEXTURE_YCUTOFF) + texture tex_YCutoff; #endif -#ifdef USE_TEXTURE_XCOEFF -texture tex_Xcoeff; +#if defined(USE_TEXTURE_XCOEFF) + texture tex_Xcoeff; #endif //#define USE_ERI_GRAD_STOREADD @@ -74,55 +69,55 @@ texture tex_Xcoeff; #define ERI_GRAD_FFFF_SMEM_PTR_SIZE (1) -#define DEV_SIM_INT_PTR_KATOM smem_int_ptr[ERI_GRAD_FFFF_TPB*0+threadIdx.x] -#define DEV_SIM_INT_PTR_KPRIM smem_int_ptr[ERI_GRAD_FFFF_TPB*1+threadIdx.x] -#define DEV_SIM_INT_PTR_KSTART smem_int_ptr[ERI_GRAD_FFFF_TPB*2+threadIdx.x] -#define DEV_SIM_INT_PTR_KSUMTYPE smem_int_ptr[ERI_GRAD_FFFF_TPB*3+threadIdx.x] -#define DEV_SIM_INT_PTR_PRIM_START smem_int_ptr[ERI_GRAD_FFFF_TPB*4+threadIdx.x] -#define DEV_SIM_INT_PTR_QFBASIS smem_int_ptr[ERI_GRAD_FFFF_TPB*5+threadIdx.x] -#define DEV_SIM_INT_PTR_QSBASIS smem_int_ptr[ERI_GRAD_FFFF_TPB*6+threadIdx.x] -#define DEV_SIM_INT_PTR_QSTART smem_int_ptr[ERI_GRAD_FFFF_TPB*7+threadIdx.x] -#define DEV_SIM_INT_PTR_SORTED_Q smem_int_ptr[ERI_GRAD_FFFF_TPB*8+threadIdx.x] -#define DEV_SIM_INT_PTR_SORTED_QNUMBER smem_int_ptr[ERI_GRAD_FFFF_TPB*9+threadIdx.x] -#define DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ smem_int2_ptr[ERI_GRAD_FFFF_TPB*0+threadIdx.x] -#define DEV_SIM_CHAR_PTR_MPI_BCOMPUTE smem_char_ptr[ERI_GRAD_FFFF_TPB*0+threadIdx.x] -#define DEV_SIM_CHAR_PTR_KLMN smem_char_ptr[ERI_GRAD_FFFF_TPB*1+threadIdx.x] -#define DEV_SIM_DBL_PTR_CONS smem_dbl_ptr[ERI_GRAD_FFFF_TPB*0+threadIdx.x] -#define DEV_SIM_DBL_PTR_CUTMATRIX smem_dbl_ptr[ERI_GRAD_FFFF_TPB*1+threadIdx.x] -#define DEV_SIM_DBL_PTR_CUTPRIM smem_dbl_ptr[ERI_GRAD_FFFF_TPB*2+threadIdx.x] -#define DEV_SIM_DBL_PTR_DENSE smem_dbl_ptr[ERI_GRAD_FFFF_TPB*3+threadIdx.x] -#define DEV_SIM_DBL_PTR_DENSEB smem_dbl_ptr[ERI_GRAD_FFFF_TPB*4+threadIdx.x] -#define DEV_SIM_DBL_PTR_EXPOSUM smem_dbl_ptr[ERI_GRAD_FFFF_TPB*5+threadIdx.x] -#define DEV_SIM_DBL_PTR_GCEXPO smem_dbl_ptr[ERI_GRAD_FFFF_TPB*6+threadIdx.x] -#define DEV_SIM_DBL_PTR_STORE smem_dbl_ptr[ERI_GRAD_FFFF_TPB*7+threadIdx.x] -#define DEV_SIM_DBL_PTR_STORE2 smem_dbl_ptr[ERI_GRAD_FFFF_TPB*8+threadIdx.x] -#define DEV_SIM_DBL_PTR_STOREAA smem_dbl_ptr[ERI_GRAD_FFFF_TPB*9+threadIdx.x] -#define DEV_SIM_DBL_PTR_STOREBB smem_dbl_ptr[ERI_GRAD_FFFF_TPB*10+threadIdx.x] -#define DEV_SIM_DBL_PTR_STORECC smem_dbl_ptr[ERI_GRAD_FFFF_TPB*11+threadIdx.x] -#define DEV_SIM_DBL_PTR_WEIGHTEDCENTERX smem_dbl_ptr[ERI_GRAD_FFFF_TPB*12+threadIdx.x] -#define DEV_SIM_DBL_PTR_WEIGHTEDCENTERY smem_dbl_ptr[ERI_GRAD_FFFF_TPB*13+threadIdx.x] -#define DEV_SIM_DBL_PTR_WEIGHTEDCENTERZ smem_dbl_ptr[ERI_GRAD_FFFF_TPB*14+threadIdx.x] -#define DEV_SIM_DBL_PTR_XCOEFF smem_dbl_ptr[ERI_GRAD_FFFF_TPB*15+threadIdx.x] -#define DEV_SIM_DBL_PTR_XYZ smem_dbl_ptr[ERI_GRAD_FFFF_TPB*16+threadIdx.x] -#define DEV_SIM_DBL_PTR_YCUTOFF smem_dbl_ptr[ERI_GRAD_FFFF_TPB*17+threadIdx.x] -#define DEV_SIM_DBL_PTR_YVERTICALTEMP smem_dbl_ptr[ERI_GRAD_FFFF_TPB*18+threadIdx.x] -#define DEV_SIM_DBL_PRIMLIMIT smem_dbl[ERI_GRAD_FFFF_TPB*0+threadIdx.x] -#define DEV_SIM_DBL_GRADCUTOFF smem_dbl[ERI_GRAD_FFFF_TPB*1+threadIdx.x] -#define DEV_SIM_DBL_HYB_COEFF smem_dbl[ERI_GRAD_FFFF_TPB*2+threadIdx.x] -#define DEV_SIM_INT_NATOM smem_int[ERI_GRAD_FFFF_TPB*0+threadIdx.x] -#define DEV_SIM_INT_NBASIS smem_int[ERI_GRAD_FFFF_TPB*1+threadIdx.x] -#define DEV_SIM_INT_NSHELL smem_int[ERI_GRAD_FFFF_TPB*2+threadIdx.x] -#define DEV_SIM_INT_JBASIS smem_int[ERI_GRAD_FFFF_TPB*3+threadIdx.x] -#define DEV_SIM_INT_SQRQSHELL smem_int[ERI_GRAD_FFFF_TPB*4+threadIdx.x] -#define DEV_SIM_INT_PRIM_TOTAL smem_int[ERI_GRAD_FFFF_TPB*5+threadIdx.x] -#define DEV_SIM_INT_FFSTART smem_int[ERI_GRAD_FFFF_TPB*6+threadIdx.x] - -#define DEV_SIM_PTR_GRAD smem_grad_ptr[ERI_GRAD_FFFF_TPB*0+threadIdx.x] - -#define LOCTRANS(A,i1,i2,i3,d1,d2,d3) A[(i3+((i2)+(i1)*(d2))*(d3))*ERI_GRAD_FFFF_TPB+threadIdx.x] +#define DEV_SIM_INT_PTR_KATOM smem_int_ptr[threadIdx.x] +#define DEV_SIM_INT_PTR_KPRIM smem_int_ptr[ERI_GRAD_FFFF_TPB + threadIdx.x] +#define DEV_SIM_INT_PTR_KSTART smem_int_ptr[ERI_GRAD_FFFF_TPB * 2 + threadIdx.x] +#define DEV_SIM_INT_PTR_KSUMTYPE smem_int_ptr[ERI_GRAD_FFFF_TPB * 3 + threadIdx.x] +#define DEV_SIM_INT_PTR_PRIM_START smem_int_ptr[ERI_GRAD_FFFF_TPB * 4 + threadIdx.x] +#define DEV_SIM_INT_PTR_QFBASIS smem_int_ptr[ERI_GRAD_FFFF_TPB * 5 + threadIdx.x] +#define DEV_SIM_INT_PTR_QSBASIS smem_int_ptr[ERI_GRAD_FFFF_TPB * 6 + threadIdx.x] +#define DEV_SIM_INT_PTR_QSTART smem_int_ptr[ERI_GRAD_FFFF_TPB * 7 + threadIdx.x] +#define DEV_SIM_INT_PTR_SORTED_Q smem_int_ptr[ERI_GRAD_FFFF_TPB * 8 + threadIdx.x] +#define DEV_SIM_INT_PTR_SORTED_QNUMBER smem_int_ptr[ERI_GRAD_FFFF_TPB * 9 + threadIdx.x] +#define DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ smem_int2_ptr[threadIdx.x] +#define DEV_SIM_CHAR_PTR_MPI_BCOMPUTE smem_char_ptr[threadIdx.x] +#define DEV_SIM_CHAR_PTR_KLMN smem_char_ptr[ERI_GRAD_FFFF_TPB + threadIdx.x] +#define DEV_SIM_DBL_PTR_CONS smem_dbl_ptr[threadIdx.x] +#define DEV_SIM_DBL_PTR_CUTMATRIX smem_dbl_ptr[ERI_GRAD_FFFF_TPB + threadIdx.x] +#define DEV_SIM_DBL_PTR_CUTPRIM smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 2 + threadIdx.x] +#define DEV_SIM_DBL_PTR_DENSE smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 3 + threadIdx.x] +#define DEV_SIM_DBL_PTR_DENSEB smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 4 + threadIdx.x] +#define DEV_SIM_DBL_PTR_EXPOSUM smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 5 + threadIdx.x] +#define DEV_SIM_DBL_PTR_GCEXPO smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 6 + threadIdx.x] +#define DEV_SIM_DBL_PTR_STORE smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 7 + threadIdx.x] +#define DEV_SIM_DBL_PTR_STORE2 smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 8 + threadIdx.x] +#define DEV_SIM_DBL_PTR_STOREAA smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 9 + threadIdx.x] +#define DEV_SIM_DBL_PTR_STOREBB smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 10 + threadIdx.x] +#define DEV_SIM_DBL_PTR_STORECC smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 11 + threadIdx.x] +#define DEV_SIM_DBL_PTR_WEIGHTEDCENTERX smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 12 + threadIdx.x] +#define DEV_SIM_DBL_PTR_WEIGHTEDCENTERY smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 13 + threadIdx.x] +#define DEV_SIM_DBL_PTR_WEIGHTEDCENTERZ smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 14 + threadIdx.x] +#define DEV_SIM_DBL_PTR_XCOEFF smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 15 + threadIdx.x] +#define DEV_SIM_DBL_PTR_XYZ smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 16 + threadIdx.x] +#define DEV_SIM_DBL_PTR_YCUTOFF smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 17 + threadIdx.x] +#define DEV_SIM_DBL_PTR_YVERTICALTEMP smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 18 + threadIdx.x] +#define DEV_SIM_DBL_PRIMLIMIT smem_dbl[threadIdx.x] +#define DEV_SIM_DBL_GRADCUTOFF smem_dbl[ERI_GRAD_FFFF_TPB + threadIdx.x] +#define DEV_SIM_DBL_HYB_COEFF smem_dbl[ERI_GRAD_FFFF_TPB * 2 + threadIdx.x] +#define DEV_SIM_INT_NATOM smem_int[threadIdx.x] +#define DEV_SIM_INT_NBASIS smem_int[ERI_GRAD_FFFF_TPB + threadIdx.x] +#define DEV_SIM_INT_NSHELL smem_int[ERI_GRAD_FFFF_TPB * 2 + threadIdx.x] +#define DEV_SIM_INT_JBASIS smem_int[ERI_GRAD_FFFF_TPB * 3 + threadIdx.x] +#define DEV_SIM_INT_SQRQSHELL smem_int[ERI_GRAD_FFFF_TPB * 4 + threadIdx.x] +#define DEV_SIM_INT_PRIM_TOTAL smem_int[ERI_GRAD_FFFF_TPB * 5 + threadIdx.x] +#define DEV_SIM_INT_FFSTART smem_int[ERI_GRAD_FFFF_TPB * 6 + threadIdx.x] + +#define DEV_SIM_PTR_GRAD smem_grad_ptr[threadIdx.x] + +#define LOCTRANS(A,i1,i2,i3,d1,d2,d3) A[(i3 + ((i2) + (i1) * (d2)) * (d3)) * ERI_GRAD_FFFF_TPB + threadIdx.x] #define DEV_SIM_CHAR_TRANS smem_char -#ifdef GPU_SPDF +#if defined(GPU_SPDF) #define int_spdf4 #include "../gpu_eri_grad_vrr_ffff.h" #include "gpu_get2e_grad_ffff.cuh" @@ -131,16 +126,16 @@ texture tex_Xcoeff; //Include the kernels for open shell eri calculations #define OSHELL -#ifdef GPU_SPDF +#if defined(GPU_SPDF) #define int_spdf4 // #include "gpu_get2e_grad_ffff.cuh" -#endif + #endif #undef OSHELL // totTime is the timer for GPU 2e time. Only on under debug mode -#if defined DEBUG || defined DEBUGTIME -static float totTime; +#if defined(DEBUG) || defined(DEBUGTIME) + static float totTime; #endif @@ -186,8 +181,8 @@ void ResortERIs(_gpu_type gpu) { for (int i = 0; i < gpu->gpu_cutoff->sqrQshell; i++) { if (gpu->gpu_basis->sorted_Qnumber->_hostData[gpu->gpu_cutoff->sorted_YCutoffIJ->_hostData[i].x] == lbl_t.x && gpu->gpu_basis->sorted_Qnumber->_hostData[gpu->gpu_cutoff->sorted_YCutoffIJ->_hostData[i].y] == lbl_t.y) { - resorted_YCutoffIJ[idx1].x = gpu->gpu_cutoff->sorted_YCutoffIJ ->_hostData[i].x; - resorted_YCutoffIJ[idx1].y = gpu->gpu_cutoff->sorted_YCutoffIJ ->_hostData[i].y; + resorted_YCutoffIJ[idx1].x = gpu->gpu_cutoff->sorted_YCutoffIJ->_hostData[i].x; + resorted_YCutoffIJ[idx1].y = gpu->gpu_cutoff->sorted_YCutoffIJ->_hostData[i].y; idx1++; } } @@ -203,7 +198,7 @@ void ResortERIs(_gpu_type gpu) { if (ffset == false && gpu->gpu_basis->sorted_Qnumber->_hostData[resorted_YCutoffIJ[i].x] - + gpu->gpu_basis->sorted_Qnumber->_hostData[resorted_YCutoffIJ[i].y] == 6){ + + gpu->gpu_basis->sorted_Qnumber->_hostData[resorted_YCutoffIJ[i].y] == 6) { ffStart = i; ffset = true; } @@ -249,62 +244,62 @@ void getGrad_ffff(_gpu_type gpu) { ResortERIs(gpu); - int *int_buffer = (int*) malloc(ERI_GRAD_FFFF_SMEM_INT_SIZE*ERI_GRAD_FFFF_TPB*sizeof(int)); - int **int_ptr_buffer = (int**) malloc(ERI_GRAD_FFFF_SMEM_INT_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(int*)); - QUICKDouble *dbl_buffer = (QUICKDouble*) malloc(ERI_GRAD_FFFF_SMEM_DBL_SIZE*ERI_GRAD_FFFF_TPB*sizeof(QUICKDouble)); - QUICKDouble **dbl_ptr_buffer = (QUICKDouble**) malloc(ERI_GRAD_FFFF_SMEM_DBL_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(QUICKDouble*)); - int2 **int2_ptr_buffer = (int2**) malloc(ERI_GRAD_FFFF_SMEM_INT2_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(int2*)); - unsigned char **char_ptr_buffer = (unsigned char**) malloc(ERI_GRAD_FFFF_SMEM_CHAR_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(unsigned char*)); - QUICKAtomicType **grad_ptr_buffer = (QUICKAtomicType**) malloc(ERI_GRAD_FFFF_SMEM_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(QUICKAtomicType*)); - unsigned char trans[TRANSDIM*TRANSDIM*TRANSDIM]; - - for(int i=0; igpu_sim.natom; - int_buffer[ERI_GRAD_FFFF_TPB*1+i] = gpu->gpu_sim.nbasis; - int_buffer[ERI_GRAD_FFFF_TPB*2+i] = gpu->gpu_sim.nshell; - int_buffer[ERI_GRAD_FFFF_TPB*3+i] = gpu->gpu_sim.jbasis; - int_buffer[ERI_GRAD_FFFF_TPB*4+i] = gpu->gpu_sim.sqrQshell; - int_buffer[ERI_GRAD_FFFF_TPB*5+i] = gpu->gpu_sim.prim_total; - int_buffer[ERI_GRAD_FFFF_TPB*6+i] = gpu->gpu_sim.ffStart; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*0+i] = gpu->gpu_sim.katom; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*1+i] = gpu->gpu_sim.kprim; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*2+i] = gpu->gpu_sim.kstart; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*3+i] = gpu->gpu_sim.Ksumtype; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*4+i] = gpu->gpu_sim.prim_start; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*5+i] = gpu->gpu_sim.Qfbasis; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*6+i] = gpu->gpu_sim.Qsbasis; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*7+i] = gpu->gpu_sim.Qstart; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*8+i] = gpu->gpu_sim.sorted_Q; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*9+i] = gpu->gpu_sim.sorted_Qnumber; - dbl_buffer[ERI_GRAD_FFFF_TPB*0+i] = gpu->gpu_sim.primLimit; - dbl_buffer[ERI_GRAD_FFFF_TPB*1+i] = gpu->gpu_sim.gradCutoff; - dbl_buffer[ERI_GRAD_FFFF_TPB*2+i] = gpu->gpu_sim.hyb_coeff; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*0+i] = gpu->gpu_sim.cons; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*1+i] = gpu->gpu_sim.cutMatrix; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*2+i] = gpu->gpu_sim.cutPrim; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*3+i] = gpu->gpu_sim.dense; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*4+i] = gpu->gpu_sim.denseb; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*5+i] = gpu->gpu_sim.expoSum; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*6+i] = gpu->gpu_sim.gcexpo; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*7+i] = gpu->gpu_sim.store; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*8+i] = gpu->gpu_sim.store2; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*9+i] = gpu->gpu_sim.storeAA; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*10+i] = gpu->gpu_sim.storeBB; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*11+i] = gpu->gpu_sim.storeCC; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*12+i] = gpu->gpu_sim.weightedCenterX; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*13+i] = gpu->gpu_sim.weightedCenterY; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*14+i] = gpu->gpu_sim.weightedCenterZ; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*15+i] = gpu->gpu_sim.Xcoeff; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*16+i] = gpu->gpu_sim.xyz; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*17+i] = gpu->gpu_sim.YCutoff; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*18+i] = gpu->gpu_sim.YVerticalTemp; - int2_ptr_buffer[ERI_GRAD_FFFF_TPB*0+i] = gpu->gpu_sim.sorted_YCutoffIJ; - char_ptr_buffer[ERI_GRAD_FFFF_TPB*0+i] = gpu->gpu_sim.mpi_bcompute; - char_ptr_buffer[ERI_GRAD_FFFF_TPB*1+i] = gpu->gpu_sim.KLMN; + int *int_buffer = (int*) malloc(ERI_GRAD_FFFF_SMEM_INT_SIZE * ERI_GRAD_FFFF_TPB * sizeof(int)); + int **int_ptr_buffer = (int**) malloc(ERI_GRAD_FFFF_SMEM_INT_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(int*)); + QUICKDouble *dbl_buffer = (QUICKDouble*) malloc(ERI_GRAD_FFFF_SMEM_DBL_SIZE*ERI_GRAD_FFFF_TPB * sizeof(QUICKDouble)); + QUICKDouble **dbl_ptr_buffer = (QUICKDouble**) malloc(ERI_GRAD_FFFF_SMEM_DBL_PTR_SIZE * ERI_GRAD_FFFF_TPB*sizeof(QUICKDouble*)); + int2 **int2_ptr_buffer = (int2**) malloc(ERI_GRAD_FFFF_SMEM_INT2_PTR_SIZE*ERI_GRAD_FFFF_TPB * sizeof(int2*)); + unsigned char **char_ptr_buffer = (unsigned char**) malloc(ERI_GRAD_FFFF_SMEM_CHAR_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(unsigned char*)); + QUICKAtomicType **grad_ptr_buffer = (QUICKAtomicType**) malloc(ERI_GRAD_FFFF_SMEM_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(QUICKAtomicType*)); + unsigned char trans[TRANSDIM * TRANSDIM * TRANSDIM]; + + for (int i = 0; i < ERI_GRAD_FFFF_TPB; i++) { + int_buffer[i] = gpu->gpu_sim.natom; + int_buffer[ERI_GRAD_FFFF_TPB + i] = gpu->gpu_sim.nbasis; + int_buffer[ERI_GRAD_FFFF_TPB * 2 + i] = gpu->gpu_sim.nshell; + int_buffer[ERI_GRAD_FFFF_TPB * 3 + i] = gpu->gpu_sim.jbasis; + int_buffer[ERI_GRAD_FFFF_TPB * 4 + i] = gpu->gpu_sim.sqrQshell; + int_buffer[ERI_GRAD_FFFF_TPB * 5 + i] = gpu->gpu_sim.prim_total; + int_buffer[ERI_GRAD_FFFF_TPB * 6 + i] = gpu->gpu_sim.ffStart; + int_ptr_buffer[i] = gpu->gpu_sim.katom; + int_ptr_buffer[ERI_GRAD_FFFF_TPB + i] = gpu->gpu_sim.kprim; + int_ptr_buffer[ERI_GRAD_FFFF_TPB * 2 + i] = gpu->gpu_sim.kstart; + int_ptr_buffer[ERI_GRAD_FFFF_TPB * 3 + i] = gpu->gpu_sim.Ksumtype; + int_ptr_buffer[ERI_GRAD_FFFF_TPB * 4 + i] = gpu->gpu_sim.prim_start; + int_ptr_buffer[ERI_GRAD_FFFF_TPB * 5 + i] = gpu->gpu_sim.Qfbasis; + int_ptr_buffer[ERI_GRAD_FFFF_TPB * 6 + i] = gpu->gpu_sim.Qsbasis; + int_ptr_buffer[ERI_GRAD_FFFF_TPB * 7 + i] = gpu->gpu_sim.Qstart; + int_ptr_buffer[ERI_GRAD_FFFF_TPB * 8 + i] = gpu->gpu_sim.sorted_Q; + int_ptr_buffer[ERI_GRAD_FFFF_TPB * 9 + i] = gpu->gpu_sim.sorted_Qnumber; + dbl_buffer[i] = gpu->gpu_sim.primLimit; + dbl_buffer[ERI_GRAD_FFFF_TPB + i] = gpu->gpu_sim.gradCutoff; + dbl_buffer[ERI_GRAD_FFFF_TPB * 2 + i] = gpu->gpu_sim.hyb_coeff; + dbl_ptr_buffer[i] = gpu->gpu_sim.cons; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB + i] = gpu->gpu_sim.cutMatrix; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 2 + i] = gpu->gpu_sim.cutPrim; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 3 + i] = gpu->gpu_sim.dense; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 4 + i] = gpu->gpu_sim.denseb; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 5 + i] = gpu->gpu_sim.expoSum; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 6 + i] = gpu->gpu_sim.gcexpo; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 7 + i] = gpu->gpu_sim.store; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 8 + i] = gpu->gpu_sim.store2; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 9 + i] = gpu->gpu_sim.storeAA; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 10 + i] = gpu->gpu_sim.storeBB; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 11 + i] = gpu->gpu_sim.storeCC; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 12 + i] = gpu->gpu_sim.weightedCenterX; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 13 + i] = gpu->gpu_sim.weightedCenterY; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 14 + i] = gpu->gpu_sim.weightedCenterZ; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 15 + i] = gpu->gpu_sim.Xcoeff; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 16 + i] = gpu->gpu_sim.xyz; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 17 + i] = gpu->gpu_sim.YCutoff; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 18 + i] = gpu->gpu_sim.YVerticalTemp; + int2_ptr_buffer[i] = gpu->gpu_sim.sorted_YCutoffIJ; + char_ptr_buffer[i] = gpu->gpu_sim.mpi_bcompute; + char_ptr_buffer[ERI_GRAD_FFFF_TPB + i] = gpu->gpu_sim.KLMN; #if defined(USE_LEGACY_ATOMICS) - grad_ptr_buffer[ERI_GRAD_FFFF_TPB*0+i] = gpu->gpu_sim.gradULL; + grad_ptr_buffer[i] = gpu->gpu_sim.gradULL; #else - grad_ptr_buffer[ERI_GRAD_FFFF_TPB*0+i] = gpu->gpu_sim.grad; + grad_ptr_buffer[i] = gpu->gpu_sim.grad; #endif } @@ -438,24 +433,24 @@ void getGrad_ffff(_gpu_type gpu) unsigned char *dev_char_buffer; QUICKAtomicType **dev_grad_ptr_buffer; - gpuMalloc((void **) &dev_int_buffer, ERI_GRAD_FFFF_SMEM_INT_SIZE*ERI_GRAD_FFFF_TPB*sizeof(int)); - gpuMalloc((void **) &dev_int_ptr_buffer, ERI_GRAD_FFFF_SMEM_INT_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(int*)); - gpuMalloc((void **) &dev_dbl_buffer, ERI_GRAD_FFFF_SMEM_DBL_SIZE*ERI_GRAD_FFFF_TPB*sizeof(QUICKDouble)); - gpuMalloc((void **) &dev_dbl_ptr_buffer, ERI_GRAD_FFFF_SMEM_DBL_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(QUICKDouble*)); - gpuMalloc((void **) &dev_int2_ptr_buffer, ERI_GRAD_FFFF_SMEM_INT2_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(int2*)); - gpuMalloc((void **) &dev_char_ptr_buffer, ERI_GRAD_FFFF_SMEM_CHAR_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(unsigned char*)); - gpuMalloc((void **) &dev_char_buffer, ERI_GRAD_FFFF_SMEM_CHAR_SIZE*sizeof(unsigned char)); - gpuMalloc((void **) &dev_grad_ptr_buffer, ERI_GRAD_FFFF_SMEM_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(QUICKAtomicType*)); - - gpuMemcpy(dev_int_buffer, int_buffer, ERI_GRAD_FFFF_SMEM_INT_SIZE*ERI_GRAD_FFFF_TPB*sizeof(int), cudaMemcpyHostToDevice); - gpuMemcpy(dev_int_ptr_buffer, int_ptr_buffer, ERI_GRAD_FFFF_SMEM_INT_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(int*), cudaMemcpyHostToDevice); - gpuMemcpy(dev_dbl_buffer, dbl_buffer, ERI_GRAD_FFFF_SMEM_DBL_SIZE*ERI_GRAD_FFFF_TPB*sizeof(QUICKDouble), cudaMemcpyHostToDevice); - gpuMemcpy(dev_dbl_ptr_buffer, dbl_ptr_buffer, ERI_GRAD_FFFF_SMEM_DBL_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(QUICKDouble*), cudaMemcpyHostToDevice); - gpuMemcpy(dev_int2_ptr_buffer, int2_ptr_buffer, ERI_GRAD_FFFF_SMEM_INT2_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(int2*), cudaMemcpyHostToDevice); - gpuMemcpy(dev_char_ptr_buffer, char_ptr_buffer, ERI_GRAD_FFFF_SMEM_CHAR_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(unsigned + gpuMalloc((void **) &dev_int_buffer, ERI_GRAD_FFFF_SMEM_INT_SIZE * ERI_GRAD_FFFF_TPB * sizeof(int)); + gpuMalloc((void **) &dev_int_ptr_buffer, ERI_GRAD_FFFF_SMEM_INT_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(int*)); + gpuMalloc((void **) &dev_dbl_buffer, ERI_GRAD_FFFF_SMEM_DBL_SIZE * ERI_GRAD_FFFF_TPB * sizeof(QUICKDouble)); + gpuMalloc((void **) &dev_dbl_ptr_buffer, ERI_GRAD_FFFF_SMEM_DBL_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(QUICKDouble*)); + gpuMalloc((void **) &dev_int2_ptr_buffer, ERI_GRAD_FFFF_SMEM_INT2_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(int2*)); + gpuMalloc((void **) &dev_char_ptr_buffer, ERI_GRAD_FFFF_SMEM_CHAR_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(unsigned char*)); + gpuMalloc((void **) &dev_char_buffer, ERI_GRAD_FFFF_SMEM_CHAR_SIZE * sizeof(unsigned char)); + gpuMalloc((void **) &dev_grad_ptr_buffer, ERI_GRAD_FFFF_SMEM_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(QUICKAtomicType*)); + + gpuMemcpy(dev_int_buffer, int_buffer, ERI_GRAD_FFFF_SMEM_INT_SIZE * ERI_GRAD_FFFF_TPB * sizeof(int), cudaMemcpyHostToDevice); + gpuMemcpy(dev_int_ptr_buffer, int_ptr_buffer, ERI_GRAD_FFFF_SMEM_INT_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(int*), cudaMemcpyHostToDevice); + gpuMemcpy(dev_dbl_buffer, dbl_buffer, ERI_GRAD_FFFF_SMEM_DBL_SIZE * ERI_GRAD_FFFF_TPB * sizeof(QUICKDouble), cudaMemcpyHostToDevice); + gpuMemcpy(dev_dbl_ptr_buffer, dbl_ptr_buffer, ERI_GRAD_FFFF_SMEM_DBL_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(QUICKDouble*), cudaMemcpyHostToDevice); + gpuMemcpy(dev_int2_ptr_buffer, int2_ptr_buffer, ERI_GRAD_FFFF_SMEM_INT2_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(int2*), cudaMemcpyHostToDevice); + gpuMemcpy(dev_char_ptr_buffer, char_ptr_buffer, ERI_GRAD_FFFF_SMEM_CHAR_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(unsigned char*), cudaMemcpyHostToDevice); - gpuMemcpy(dev_char_buffer, &trans, ERI_GRAD_FFFF_SMEM_CHAR_SIZE*sizeof(unsigned char), cudaMemcpyHostToDevice); - gpuMemcpy(dev_grad_ptr_buffer, grad_ptr_buffer, ERI_GRAD_FFFF_SMEM_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(QUICKAtomicType*), + gpuMemcpy(dev_char_buffer, &trans, ERI_GRAD_FFFF_SMEM_CHAR_SIZE * sizeof(unsigned char), cudaMemcpyHostToDevice); + gpuMemcpy(dev_grad_ptr_buffer, grad_ptr_buffer, ERI_GRAD_FFFF_SMEM_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(QUICKAtomicType*), cudaMemcpyHostToDevice); // Part f-3 @@ -471,7 +466,7 @@ void getGrad_ffff(_gpu_type gpu) + sizeof(QUICKAtomicType *) * ERI_GRAD_FFFF_SMEM_PTR_SIZE) * ERI_GRAD_FFFF_TPB + sizeof(unsigned char) * ERI_GRAD_FFFF_SMEM_CHAR_SIZE>>> (dev_int_buffer, dev_int_ptr_buffer, dev_dbl_buffer, dev_dbl_ptr_buffer, dev_int2_ptr_buffer, - dev_char_ptr_buffer, dev_char_buffer, dev_grad_ptr_buffer,gpu->gpu_sim.ffStart, gpu->gpu_sim.sqrQshell))) + dev_char_ptr_buffer, dev_char_buffer, dev_grad_ptr_buffer,gpu->gpu_sim.ffStart, gpu->gpu_sim.sqrQshell))); #endif } @@ -501,7 +496,7 @@ void get_oshell_eri_grad_ffff(_gpu_type gpu) // nvtxRangePushA("Gradient 2e"); // compute one electron gradients in the meantime - //get_oneen_grad_(); +// get_oneen_grad_(); // Part f-3 // if (gpu->maxL >= 3) { diff --git a/src/gpu/cuda/gpu_get2e_grad_ffff.cuh b/src/gpu/cuda/gpu_get2e_grad_ffff.cuh index 9b9d8e98..79d63c42 100644 --- a/src/gpu/cuda/gpu_get2e_grad_ffff.cuh +++ b/src/gpu/cuda/gpu_get2e_grad_ffff.cuh @@ -10,612 +10,580 @@ #undef STOREDIM #if defined int_spdf4 -#undef VDIM3 -#define VDIM3 VDIM3_L -#define STOREDIM STOREDIM_XL -#define STORE_INIT 4 -#define STORE_DIM 80 -#define STORE_INIT_I_AA 4 -#define STORE_INIT_J_AA 10 -#define STORE_DIM_I_AA 80 -#define STORE_DIM_J_AA 110 - -#define STORE_INIT_I_CC 10 -#define STORE_INIT_J_CC 4 -#define STORE_DIM_I_CC 110 -#define STORE_DIM_J_CC 80 + #undef VDIM3 + #define VDIM3 VDIM3_L + #define STOREDIM STOREDIM_XL + #define STORE_INIT (4) + #define STORE_DIM (80) + #define STORE_INIT_I_AA (4) + #define STORE_INIT_J_AA (10) + #define STORE_DIM_I_AA (80) + #define STORE_DIM_J_AA (110) + + #define STORE_INIT_I_CC (10) + #define STORE_INIT_J_CC (4) + #define STORE_DIM_I_CC (110) + #define STORE_DIM_J_CC (80) #endif -#ifndef new_quick_2_gpu_get2e_subs_grad_h +#if !defined(new_quick_2_gpu_get2e_subs_grad_h) #define new_quick_2_gpu_get2e_subs_grad_h #undef STOREDIM #define STOREDIM STOREDIM_XL -#ifndef OSHELL -#define FMT_NAME FmT -#include "../gpu_fmt.h" +#if !defined(OSHELL) + #define FMT_NAME FmT + #include "../gpu_fmt.h" -__device__ __inline__ int lefthrr_2(const QUICKDouble RAx, const QUICKDouble RAy, const QUICKDouble RAz, - const QUICKDouble RBx, const QUICKDouble RBy, const QUICKDouble RBz, - const int KLMNAx, const int KLMNAy, const int KLMNAz, - const int KLMNBx, const int KLMNBy, const int KLMNBz, - const int IJTYPE,QUICKDouble* coefAngularL, unsigned char* const angularL, unsigned char* const smem_char) +__device__ static inline int lefthrr_2(const QUICKDouble RAx, const QUICKDouble RAy, const QUICKDouble RAz, + const QUICKDouble RBx, const QUICKDouble RBy, const QUICKDouble RBz, + const int KLMNAx, const int KLMNAy, const int KLMNAz, + const int KLMNBx, const int KLMNBy, const int KLMNBz, + QUICKDouble * coefAngularL, unsigned char * const angularL, unsigned char * const smem_char) { - int numAngularL; coefAngularL[0] = 1.0; - angularL[0] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + KLMNBx, KLMNAy + KLMNBy, KLMNAz + KLMNBz, TRANSDIM, TRANSDIM, TRANSDIM); - + angularL[0] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + KLMNBx, + KLMNAy + KLMNBy, KLMNAz + KLMNBz, TRANSDIM, TRANSDIM, TRANSDIM); + + if (KLMNBx == 2 || KLMNBy == 2 || KLMNBz == 2) { + numAngularL = 3; + QUICKDouble tmp; + + if (KLMNBx == 2) { + tmp = RAx - RBx; + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if(KLMNBy == 2) { + tmp = RAy - RBy; + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBz == 2) { + tmp = RAz - RBz; + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + } - if (KLMNBx == 2 || KLMNBy == 2 || KLMNBz == 2) { - numAngularL = 3; - QUICKDouble tmp; - - - if (KLMNBx == 2) { - tmp = RAx - RBx; - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if(KLMNBy == 2) { - tmp = RAy - RBy; - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBz == 2 ){ - tmp = RAz - RBz; - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - } - - coefAngularL[1] = 2 * tmp; - coefAngularL[2]= tmp * tmp; - - - angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - return numAngularL; - - }else{ - - numAngularL = 4; - QUICKDouble tmp, tmp2; - - if(KLMNBx == 1 && KLMNBy == 1){ - tmp = RAx - RBx; - tmp2 = RAy - RBy; - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - }else if (KLMNBx == 1 && KLMNBz == 1) { - tmp = RAx - RBx; - tmp2 = RAz - RBz; - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBy == 1 && KLMNBz == 1) { - tmp = RAy - RBy; - tmp2 = RAz - RBz; - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - } - - - coefAngularL[1] = tmp; - coefAngularL[2] = tmp2; - coefAngularL[3] = tmp * tmp2; - - - angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - return numAngularL; + coefAngularL[1] = 2 * tmp; + coefAngularL[2]= tmp * tmp; + + angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + + return numAngularL; + } else { + numAngularL = 4; + QUICKDouble tmp, tmp2; + + if (KLMNBx == 1 && KLMNBy == 1) { + tmp = RAx - RBx; + tmp2 = RAy - RBy; + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBx == 1 && KLMNBz == 1) { + tmp = RAx - RBx; + tmp2 = RAz - RBz; + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBy == 1 && KLMNBz == 1) { + tmp = RAy - RBy; + tmp2 = RAz - RBz; + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); } -} + coefAngularL[1] = tmp; + coefAngularL[2] = tmp2; + coefAngularL[3] = tmp * tmp2; + angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + + return numAngularL; + } +} -__device__ __inline__ int lefthrr(const QUICKDouble RAx, const QUICKDouble RAy, const QUICKDouble RAz, - const QUICKDouble RBx, const QUICKDouble RBy, const QUICKDouble RBz, - const int KLMNAx, const int KLMNAy, const int KLMNAz, - const int KLMNBx, const int KLMNBy, const int KLMNBz, - const int IJTYPE,QUICKDouble* coefAngularL, unsigned char* const angularL, unsigned char* -const smem_char) +__device__ static inline int lefthrr(const QUICKDouble RAx, const QUICKDouble RAy, const QUICKDouble RAz, + const QUICKDouble RBx, const QUICKDouble RBy, const QUICKDouble RBz, + const int KLMNAx, const int KLMNAy, const int KLMNAz, + const int KLMNBx, const int KLMNBy, const int KLMNBz, + QUICKDouble * coefAngularL, unsigned char * const angularL, unsigned char * const smem_char) { - - int numAngularL; - + coefAngularL[0] = 1.0; angularL[0] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + KLMNBx, KLMNAy + KLMNBy, KLMNAz + KLMNBz, TRANSDIM, TRANSDIM, TRANSDIM); - if (KLMNBx == 3 || KLMNBy == 3 || KLMNBz == 3) { - numAngularL = 4; - QUICKDouble tmp; - - if (KLMNBx == 3) { - tmp = RAx - RBx; - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBy == 3) { - tmp = RAy - RBy; - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBz == 3) { - tmp = RAz - RBz; - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - } - - - coefAngularL[1] = 3 * tmp; - coefAngularL[2] = 3 * tmp * tmp; - coefAngularL[3] = tmp * tmp * tmp; - - - angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - return numAngularL; - }else if (KLMNBx == 1 && KLMNBy == 1) { - numAngularL = 8; - QUICKDouble tmp = RAx - RBx; - QUICKDouble tmp2 = RAy - RBy; - QUICKDouble tmp3 = RAz - RBz; - - coefAngularL[1] = tmp; - coefAngularL[2] = tmp2; - coefAngularL[3] = tmp3; - coefAngularL[4] = tmp * tmp2; - coefAngularL[5] = tmp * tmp3; - coefAngularL[6] = tmp2 * tmp3; - coefAngularL[7] = tmp * tmp2 * tmp3; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - return numAngularL; - }else{ - - numAngularL = 6; - QUICKDouble tmp; - QUICKDouble tmp2; - - if (KLMNBx == 1) { - tmp = RAx - RBx; - }else if (KLMNBy == 1){ - tmp = RAy - RBy; - }else if (KLMNBz == 1){ - tmp = RAz - RBz; + if (KLMNBx == 3 || KLMNBy == 3 || KLMNBz == 3) { + numAngularL = 4; + QUICKDouble tmp; + + if (KLMNBx == 3) { + tmp = RAx - RBx; + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBy == 3) { + tmp = RAy - RBy; + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBz == 3) { + tmp = RAz - RBz; + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + } + + coefAngularL[1] = 3 * tmp; + coefAngularL[2] = 3 * tmp * tmp; + coefAngularL[3] = tmp * tmp * tmp; + + angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBx == 1 && KLMNBy == 1) { + numAngularL = 8; + QUICKDouble tmp = RAx - RBx; + QUICKDouble tmp2 = RAy - RBy; + QUICKDouble tmp3 = RAz - RBz; + + coefAngularL[1] = tmp; + coefAngularL[2] = tmp2; + coefAngularL[3] = tmp3; + coefAngularL[4] = tmp * tmp2; + coefAngularL[5] = tmp * tmp3; + coefAngularL[6] = tmp2 * tmp3; + coefAngularL[7] = tmp * tmp2 * tmp3; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + + angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else { + numAngularL = 6; + QUICKDouble tmp; + QUICKDouble tmp2; + + if (KLMNBx == 1) { + tmp = RAx - RBx; + } else if (KLMNBy == 1) { + tmp = RAy - RBy; + } else if (KLMNBz == 1) { + tmp = RAz - RBz; + } + + if (KLMNBx == 2) { + tmp2 = RAx - RBx; + } else if (KLMNBy == 2) { + tmp2 = RAy - RBy; + } else if (KLMNBz == 2) { + tmp2 = RAz - RBz; + } + + coefAngularL[1] = tmp; + coefAngularL[2] = 2 * tmp2; + coefAngularL[3] = 2 * tmp * tmp2; + coefAngularL[4] = tmp2 * tmp2; + coefAngularL[5] = tmp * tmp2 * tmp2; + + if (KLMNBx == 2) { + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } + + if (KLMNBy == 2) { + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } + + if (KLMNBz == 2) { + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + } + + if (KLMNBx == 1) { + // 120 + if (KLMNBy == 2) { + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + // 102 + } else { + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); } - + } + + if (KLMNBy == 1) { + // 210 if (KLMNBx == 2) { - tmp2 = RAx - RBx; - }else if (KLMNBy == 2){ - tmp2 = RAy - RBy; - }else if (KLMNBz == 2){ - tmp2 = RAz - RBz; + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + // 012 + } else { + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); } - - coefAngularL[1] = tmp; - coefAngularL[2] = 2 * tmp2; - coefAngularL[3] = 2 * tmp * tmp2; - coefAngularL[4] = tmp2 * tmp2; - coefAngularL[5] = tmp * tmp2 * tmp2; - - + } + + if (KLMNBz == 1) { + // 201 if (KLMNBx == 2) { - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - } - - if (KLMNBy == 2) { - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - } - - if (KLMNBz == 2) { - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - } - - if (KLMNBx == 1) { - if (KLMNBy == 2) { //120 - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else{ //102 - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - } - } - - if (KLMNBy == 1) { - if (KLMNBx == 2) { // 210 - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else{ // 012 - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - } - } - - if (KLMNBz == 1) { - if (KLMNBx == 2) { // 201 - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - }else{ // 021 - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - } - } - - - if (KLMNBx == 1) { - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + // 021 + } else { + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); } - - if (KLMNBy == 1) { - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - } - - if (KLMNBz == 1) { - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - } - - angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - return numAngularL; - - - } + + if (KLMNBx == 1) { + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } + + if (KLMNBy == 1) { + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } + + if (KLMNBz == 1) { + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + } + + angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } + + return numAngularL; } -__device__ __inline__ int lefthrr_4(const QUICKDouble RAx, const QUICKDouble RAy, const QUICKDouble RAz, - const QUICKDouble RBx, const QUICKDouble RBy, const QUICKDouble RBz, - const int KLMNAx, const int KLMNAy, const int KLMNAz, - const int KLMNBx, const int KLMNBy, const int KLMNBz, - const int IJTYPE,QUICKDouble* coefAngularL, unsigned char* const angularL, unsigned char* -const smem_char) +__device__ static inline int lefthrr_4(const QUICKDouble RAx, const QUICKDouble RAy, const QUICKDouble RAz, + const QUICKDouble RBx, const QUICKDouble RBy, const QUICKDouble RBz, + const int KLMNAx, const int KLMNAy, const int KLMNAz, + const int KLMNBx, const int KLMNBy, const int KLMNBz, + QUICKDouble * coefAngularL, unsigned char * const angularL, unsigned char * const smem_char) { - int numAngularL; coefAngularL[0] = 1.0; - angularL[0] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + KLMNBx, KLMNAy + KLMNBy, KLMNAz + KLMNBz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[0] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + KLMNBx, + KLMNAy + KLMNBy, KLMNAz + KLMNBz, TRANSDIM, TRANSDIM, TRANSDIM); + + if (KLMNBx == 4) { + numAngularL = 5; + QUICKDouble tmp = RAx - RBx; + + coefAngularL[1] = 4 * tmp; + coefAngularL[2] = 6 * tmp * tmp; + coefAngularL[3] = 4 * tmp * tmp * tmp; + coefAngularL[4] = tmp * tmp * tmp * tmp; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 3, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBy == 4) { + numAngularL = 5; + QUICKDouble tmp = RAy - RBy; + coefAngularL[1] = 4 * tmp; + coefAngularL[2] = 6 * tmp * tmp; + coefAngularL[3] = 4 * tmp * tmp * tmp; + coefAngularL[4] = tmp * tmp * tmp * tmp; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 3, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBz == 4) { + numAngularL = 5; + + QUICKDouble tmp = RAz - RBz; + coefAngularL[1] = 4 * tmp; + coefAngularL[2] = 6 * tmp * tmp; + coefAngularL[3] = 4 * tmp * tmp * tmp; + coefAngularL[4] = tmp * tmp * tmp * tmp; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 3, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBx == 1 && KLMNBy == 3) { + numAngularL = 8; + QUICKDouble tmp = RAx - RBx; + QUICKDouble tmp2 = RAy - RBy; + + coefAngularL[1] = tmp; + coefAngularL[2] = 3 * tmp2; + coefAngularL[3] = 3 * tmp * tmp2; + coefAngularL[4] = 3 * tmp2 * tmp2; + coefAngularL[5] = 3 * tmp * tmp2 * tmp2; + coefAngularL[6] = tmp2 * tmp2 * tmp2; + coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 3, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBx == 3 && KLMNBy == 1) { + numAngularL = 8; + QUICKDouble tmp = RAy - RBy; + QUICKDouble tmp2 = RAx - RBx; + + coefAngularL[1] = tmp; + coefAngularL[2] = 3 * tmp2; + coefAngularL[3] = 3 * tmp * tmp2; + coefAngularL[4] = 3 * tmp2 * tmp2; + coefAngularL[5] = 3 * tmp * tmp2 * tmp2; + coefAngularL[6] = tmp2 * tmp2 * tmp2; + coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 3, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBx == 1 && KLMNBz == 3) { + numAngularL = 8; + QUICKDouble tmp = RAx - RBx; + QUICKDouble tmp2 = RAz - RBz; + + coefAngularL[1] = tmp; + coefAngularL[2] = 3 * tmp2; + coefAngularL[3] = 3 * tmp * tmp2; + coefAngularL[4] = 3 * tmp2 * tmp2; + coefAngularL[5] = 3 * tmp * tmp2 * tmp2; + coefAngularL[6] = tmp2 * tmp2 * tmp2; + coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 3, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + + } else if (KLMNBx == 3 && KLMNBz == 1) { + numAngularL = 8; + QUICKDouble tmp = RAz - RBz; + QUICKDouble tmp2 = RAx - RBx; + + coefAngularL[1] = tmp; + coefAngularL[2] = 3 * tmp2; + coefAngularL[3] = 3 * tmp * tmp2; + coefAngularL[4] = 3 * tmp2 * tmp2; + coefAngularL[5] = 3 * tmp * tmp2 * tmp2; + coefAngularL[6] = tmp2 * tmp2 * tmp2; + coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 3, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBy == 1 && KLMNBz == 3) { + numAngularL = 8; + QUICKDouble tmp = RAy - RBy; + QUICKDouble tmp2 = RAz - RBz; + + coefAngularL[1] = tmp; + coefAngularL[2] = 3 * tmp2; + coefAngularL[3] = 3 * tmp * tmp2; + coefAngularL[4] = 3 * tmp2 * tmp2; + coefAngularL[5] = 3 * tmp * tmp2 * tmp2; + coefAngularL[6] = tmp2 * tmp2 * tmp2; + coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+3, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBy == 3 && KLMNBz == 1) { + numAngularL = 8; + QUICKDouble tmp = RAz - RBz; + QUICKDouble tmp2 = RAy - RBy; + + coefAngularL[1] = tmp; + coefAngularL[2] = 3 * tmp2; + coefAngularL[3] = 3 * tmp * tmp2; + coefAngularL[4] = 3 * tmp2 * tmp2; + coefAngularL[5] = 3 * tmp * tmp2 * tmp2; + coefAngularL[6] = tmp2 * tmp2 * tmp2; + coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 3, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBx == 2 && KLMNBy == 2) { + numAngularL = 9; + QUICKDouble tmp = RAx - RBx; + QUICKDouble tmp2 = RAy - RBy; + + coefAngularL[1] = 2 * tmp; + coefAngularL[2] = 2 * tmp2; + coefAngularL[3] = 4 * tmp * tmp2; + coefAngularL[4] = tmp * tmp; + coefAngularL[5] = tmp2 * tmp2; + coefAngularL[6] = 2 * tmp * tmp2 * tmp2; + coefAngularL[7] = 2 * tmp * tmp * tmp2; + coefAngularL[8] = tmp * tmp * tmp2 * tmp2; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBx == 2 && KLMNBz == 2) { + numAngularL = 9; + QUICKDouble tmp = RAx - RBx; + QUICKDouble tmp2 = RAz - RBz; + + coefAngularL[1] = 2 * tmp; + coefAngularL[2] = 2 * tmp2; + coefAngularL[3] = 4 * tmp * tmp2; + coefAngularL[4] = tmp * tmp; + coefAngularL[5] = tmp2 * tmp2; + coefAngularL[6] = 2 * tmp * tmp2 * tmp2; + coefAngularL[7] = 2 * tmp * tmp * tmp2; + coefAngularL[8] = tmp * tmp * tmp2 * tmp2; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBy == 2 && KLMNBz == 2) { + numAngularL = 9; + QUICKDouble tmp = RAy - RBy; + QUICKDouble tmp2 = RAz - RBz; + + coefAngularL[1] = 2 * tmp; + coefAngularL[2] = 2 * tmp2; + coefAngularL[3] = 4 * tmp * tmp2; + coefAngularL[4] = tmp * tmp; + coefAngularL[5] = tmp2 * tmp2; + coefAngularL[6] = 2 * tmp * tmp2 * tmp2; + coefAngularL[7] = 2 * tmp * tmp * tmp2; + coefAngularL[8] = tmp * tmp * tmp2 * tmp2; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBx == 1 && KLMNBy == 1 && KLMNBz == 2) { + numAngularL = 12; + QUICKDouble tmp = RAx - RBx; + QUICKDouble tmp2 = RAy - RBy; + QUICKDouble tmp3 = RAz - RBz; + + coefAngularL[1] = tmp; + coefAngularL[2] = tmp2; + coefAngularL[3] = 2 * tmp3; + coefAngularL[4] = tmp * tmp2; + coefAngularL[5] = 2 * tmp * tmp3; + coefAngularL[6] = 2 * tmp2 * tmp3; + coefAngularL[7] = tmp3 * tmp3; + coefAngularL[8] = 2 * tmp * tmp2 * tmp3; + coefAngularL[9] = tmp * tmp3 * tmp3; + coefAngularL[10] = tmp2 * tmp3 * tmp3; + coefAngularL[11] = tmp * tmp2 * tmp3 * tmp3; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[8] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[9] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[10] =LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBx == 1 && KLMNBz == 1 && KLMNBy == 2) { + numAngularL = 12; + QUICKDouble tmp = RAx - RBx; + QUICKDouble tmp2 = RAz - RBz; + QUICKDouble tmp3 = RAy - RBy; + + coefAngularL[1] = tmp; + coefAngularL[2] = tmp2; + coefAngularL[3] = 2 * tmp3; + coefAngularL[4] = tmp * tmp2; + coefAngularL[5] = 2 * tmp * tmp3; + coefAngularL[6] = 2 * tmp2 * tmp3; + coefAngularL[7] = tmp3 * tmp3; + coefAngularL[8] = 2 * tmp * tmp2 * tmp3; + coefAngularL[9] = tmp * tmp3 * tmp3; + coefAngularL[10] = tmp2 * tmp3 * tmp3; + coefAngularL[11] = tmp * tmp2 * tmp3 * tmp3; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[8] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[9] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[10] =LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBy == 1 && KLMNBz == 1 && KLMNBx == 2) { + numAngularL = 12; + QUICKDouble tmp = RAy - RBy; + QUICKDouble tmp2 = RAz - RBz; + QUICKDouble tmp3 = RAx - RBx; + + coefAngularL[1] = tmp; + coefAngularL[2] = tmp2; + coefAngularL[3] = 2 * tmp3; + coefAngularL[4] = tmp * tmp2; + coefAngularL[5] = 2 * tmp * tmp3; + coefAngularL[6] = 2 * tmp2 * tmp3; + coefAngularL[7] = tmp3 * tmp3; + coefAngularL[8] = 2 * tmp * tmp2 * tmp3; + coefAngularL[9] = tmp * tmp3 * tmp3; + coefAngularL[10] = tmp2 * tmp3 * tmp3; + coefAngularL[11] = tmp * tmp2 * tmp3 * tmp3; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[8] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[9] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[10] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } - if (KLMNBx == 4) { - numAngularL = 5; - QUICKDouble tmp = RAx - RBx; - - coefAngularL[1] = 4 * tmp; - coefAngularL[2] = 6 * tmp * tmp; - coefAngularL[3] = 4 * tmp * tmp * tmp; - coefAngularL[4] = tmp * tmp * tmp * tmp; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+3, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBy == 4) { - numAngularL = 5; - QUICKDouble tmp = RAy - RBy; - coefAngularL[1] = 4 * tmp; - coefAngularL[2] = 6 * tmp * tmp; - coefAngularL[3] = 4 * tmp * tmp * tmp; - coefAngularL[4] = tmp * tmp * tmp * tmp; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+3, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - }else if (KLMNBz == 4) { - numAngularL = 5; - - QUICKDouble tmp = RAz - RBz; - coefAngularL[1] = 4 * tmp; - coefAngularL[2] = 6 * tmp * tmp; - coefAngularL[3] = 4 * tmp * tmp * tmp; - coefAngularL[4] = tmp * tmp * tmp * tmp; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+3, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - - }else if (KLMNBx == 1 && KLMNBy == 3) { - numAngularL = 8; - QUICKDouble tmp = RAx - RBx; - QUICKDouble tmp2 = RAy - RBy; - - coefAngularL[1] = tmp; - coefAngularL[2] = 3 * tmp2; - coefAngularL[3] = 3 * tmp * tmp2; - coefAngularL[4] = 3 * tmp2 * tmp2; - coefAngularL[5] = 3 * tmp * tmp2 * tmp2; - coefAngularL[6] = tmp2 * tmp2 * tmp2; - coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+3, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBx == 3 && KLMNBy == 1) { - numAngularL = 8; - QUICKDouble tmp = RAy - RBy; - QUICKDouble tmp2 = RAx - RBx; - - coefAngularL[1] = tmp; - coefAngularL[2] = 3 * tmp2; - coefAngularL[3] = 3 * tmp * tmp2; - coefAngularL[4] = 3 * tmp2 * tmp2; - coefAngularL[5] = 3 * tmp * tmp2 * tmp2; - coefAngularL[6] = tmp2 * tmp2 * tmp2; - coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+3, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - } - - else if (KLMNBx == 1 && KLMNBz ==3) { - numAngularL = 8; - QUICKDouble tmp = RAx - RBx; - QUICKDouble tmp2 = RAz - RBz; - - coefAngularL[1] = tmp; - coefAngularL[2] = 3 * tmp2; - coefAngularL[3] = 3 * tmp * tmp2; - coefAngularL[4] = 3 * tmp2 * tmp2; - coefAngularL[5] = 3 * tmp * tmp2 * tmp2; - coefAngularL[6] = tmp2 * tmp2 * tmp2; - coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+3, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - }else if (KLMNBx == 3 && KLMNBz == 1) { - numAngularL = 8; - QUICKDouble tmp = RAz - RBz; - QUICKDouble tmp2 = RAx - RBx; - - coefAngularL[1] = tmp; - coefAngularL[2] = 3 * tmp2; - coefAngularL[3] = 3 * tmp * tmp2; - coefAngularL[4] = 3 * tmp2 * tmp2; - coefAngularL[5] = 3 * tmp * tmp2 * tmp2; - coefAngularL[6] = tmp2 * tmp2 * tmp2; - coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+3, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - - }else if (KLMNBy == 1 && KLMNBz == 3) { - numAngularL = 8; - QUICKDouble tmp = RAy - RBy; - QUICKDouble tmp2 = RAz - RBz; - - coefAngularL[1] = tmp; - coefAngularL[2] = 3 * tmp2; - coefAngularL[3] = 3 * tmp * tmp2; - coefAngularL[4] = 3 * tmp2 * tmp2; - coefAngularL[5] = 3 * tmp * tmp2 * tmp2; - coefAngularL[6] = tmp2 * tmp2 * tmp2; - coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+3, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - }else if (KLMNBy == 3 && KLMNBz == 1) { - numAngularL = 8; - QUICKDouble tmp = RAz - RBz; - QUICKDouble tmp2 = RAy - RBy; - - coefAngularL[1] = tmp; - coefAngularL[2] = 3 * tmp2; - coefAngularL[3] = 3 * tmp * tmp2; - coefAngularL[4] = 3 * tmp2 * tmp2; - coefAngularL[5] = 3 * tmp * tmp2 * tmp2; - coefAngularL[6] = tmp2 * tmp2 * tmp2; - coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+3, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - - }else if (KLMNBx == 2 && KLMNBy == 2) { - numAngularL = 9; - QUICKDouble tmp = RAx - RBx; - QUICKDouble tmp2 = RAy - RBy; - - coefAngularL[1] = 2 * tmp; - coefAngularL[2] = 2 * tmp2; - coefAngularL[3] = 4 * tmp * tmp2; - coefAngularL[4] = tmp * tmp; - coefAngularL[5] = tmp2 * tmp2; - coefAngularL[6] = 2 * tmp * tmp2 * tmp2; - coefAngularL[7] = 2 * tmp * tmp * tmp2; - coefAngularL[8] = tmp * tmp * tmp2 * tmp2; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBx == 2 && KLMNBz == 2) { - numAngularL = 9; - QUICKDouble tmp = RAx - RBx; - QUICKDouble tmp2 = RAz - RBz; - - coefAngularL[1] = 2 * tmp; - coefAngularL[2] = 2 * tmp2; - coefAngularL[3] = 4 * tmp * tmp2; - coefAngularL[4] = tmp * tmp; - coefAngularL[5] = tmp2 * tmp2; - coefAngularL[6] = 2 * tmp * tmp2 * tmp2; - coefAngularL[7] = 2 * tmp * tmp * tmp2; - coefAngularL[8] = tmp * tmp * tmp2 * tmp2; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBy == 2 && KLMNBz == 2) { - numAngularL = 9; - QUICKDouble tmp = RAy - RBy; - QUICKDouble tmp2 = RAz - RBz; - - coefAngularL[1] = 2 * tmp; - coefAngularL[2] = 2 * tmp2; - coefAngularL[3] = 4 * tmp * tmp2; - coefAngularL[4] = tmp * tmp; - coefAngularL[5] = tmp2 * tmp2; - coefAngularL[6] = 2 * tmp * tmp2 * tmp2; - coefAngularL[7] = 2 * tmp * tmp * tmp2; - coefAngularL[8] = tmp * tmp * tmp2 * tmp2; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBx == 1 && KLMNBy == 1 && KLMNBz == 2) { - numAngularL = 12; - QUICKDouble tmp = RAx - RBx; - QUICKDouble tmp2 = RAy - RBy; - QUICKDouble tmp3 = RAz - RBz; - - coefAngularL[1] = tmp; - coefAngularL[2] = tmp2; - coefAngularL[3] = 2 * tmp3; - coefAngularL[4] = tmp * tmp2; - coefAngularL[5] = 2 * tmp * tmp3; - coefAngularL[6] = 2 * tmp2 * tmp3; - coefAngularL[7] = tmp3 * tmp3; - coefAngularL[8] = 2 * tmp * tmp2 * tmp3; - coefAngularL[9] = tmp * tmp3 * tmp3; - coefAngularL[10] = tmp2 * tmp3 * tmp3; - coefAngularL[11] = tmp * tmp2 * tmp3 * tmp3; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[8] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[9] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[10] =LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBx == 1 && KLMNBz == 1 && KLMNBy == 2) { - numAngularL = 12; - QUICKDouble tmp = RAx - RBx; - QUICKDouble tmp2 = RAz - RBz; - QUICKDouble tmp3 = RAy - RBy; - - coefAngularL[1] = tmp; - coefAngularL[2] = tmp2; - coefAngularL[3] = 2 * tmp3; - coefAngularL[4] = tmp * tmp2; - coefAngularL[5] = 2 * tmp * tmp3; - coefAngularL[6] = 2 * tmp2 * tmp3; - coefAngularL[7] = tmp3 * tmp3; - coefAngularL[8] = 2 * tmp * tmp2 * tmp3; - coefAngularL[9] = tmp * tmp3 * tmp3; - coefAngularL[10] = tmp2 * tmp3 * tmp3; - coefAngularL[11] = tmp * tmp2 * tmp3 * tmp3; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[8] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[9] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[10] =LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBy == 1 && KLMNBz == 1 && KLMNBx == 2) { - numAngularL = 12; - QUICKDouble tmp = RAy - RBy; - QUICKDouble tmp2 = RAz - RBz; - QUICKDouble tmp3 = RAx - RBx; - - coefAngularL[1] = tmp; - coefAngularL[2] = tmp2; - coefAngularL[3] = 2 * tmp3; - coefAngularL[4] = tmp * tmp2; - coefAngularL[5] = 2 * tmp * tmp3; - coefAngularL[6] = 2 * tmp2 * tmp3; - coefAngularL[7] = tmp3 * tmp3; - coefAngularL[8] = 2 * tmp * tmp2 * tmp3; - coefAngularL[9] = tmp * tmp3 * tmp3; - coefAngularL[10] = tmp2 * tmp3 * tmp3; - coefAngularL[11] = tmp * tmp2 * tmp3 * tmp3; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[8] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[9] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[10] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - } - angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); return numAngularL; - - } - - - #endif -__device__ __forceinline__ void hrrwholegrad2_ffff(QUICKDouble* const Yaax, QUICKDouble* const Yaay, QUICKDouble* const Yaaz, \ - QUICKDouble* const Ybbx, QUICKDouble* const Ybby, QUICKDouble* const Ybbz, \ - QUICKDouble* const Yccx, QUICKDouble* const Yccy, QUICKDouble* const Yccz, \ - const int I, const int J, const int K, const int L, \ - const int III, int JJJ, const int KKK, const int LLL, const int IJKLTYPE, - const QUICKDouble* store, const QUICKDouble* storeAA, const QUICKDouble* storeBB, const QUICKDouble* storeCC, \ - const QUICKDouble RAx, const QUICKDouble RAy, const QUICKDouble RAz, \ - const QUICKDouble RBx, const QUICKDouble RBy, const QUICKDouble RBz, \ - const QUICKDouble RCx, const QUICKDouble RCy, const QUICKDouble RCz, \ - const QUICKDouble RDx, const QUICKDouble RDy, const QUICKDouble RDz, int* const smem_int, -int** const smem_int_ptr, QUICKDouble** const smem_dbl_ptr, unsigned char** const smem_char_ptr, unsigned char* const smem_char) +__device__ static inline void hrrwholegrad2_ffff(QUICKDouble* const Yaax, QUICKDouble* const Yaay, QUICKDouble* const Yaaz, + QUICKDouble* const Ybbx, QUICKDouble* const Ybby, QUICKDouble* const Ybbz, + QUICKDouble* const Yccx, QUICKDouble* const Yccy, QUICKDouble* const Yccz, + const int III, int JJJ, const int KKK, const int LLL, + const QUICKDouble* store, const QUICKDouble* storeAA, const QUICKDouble* storeBB, const QUICKDouble* storeCC, + const QUICKDouble RAx, const QUICKDouble RAy, const QUICKDouble RAz, + const QUICKDouble RBx, const QUICKDouble RBy, const QUICKDouble RBz, + const QUICKDouble RCx, const QUICKDouble RCy, const QUICKDouble RCz, + const QUICKDouble RDx, const QUICKDouble RDy, const QUICKDouble RDz, int* const smem_int, + int** const smem_int_ptr, QUICKDouble** const smem_dbl_ptr, + unsigned char** const smem_char_ptr, unsigned char* const smem_char) { unsigned char angularL[12], angularR[12]; QUICKDouble coefAngularL[12], coefAngularR[12]; - + *Yaax = 0.0; *Yaay = 0.0; *Yaaz = 0.0; @@ -625,783 +593,869 @@ int** const smem_int_ptr, QUICKDouble** const smem_dbl_ptr, unsigned char** cons *Yccx = 0.0; *Yccy = 0.0; *Yccz = 0.0; - - QUICKDouble constant = DEV_SIM_DBL_PTR_CONS[III-1] * DEV_SIM_DBL_PTR_CONS[JJJ-1] * DEV_SIM_DBL_PTR_CONS[KKK-1] * DEV_SIM_DBL_PTR_CONS[LLL-1]; + + QUICKDouble constant = DEV_SIM_DBL_PTR_CONS[III - 1] * DEV_SIM_DBL_PTR_CONS[JJJ - 1] + * DEV_SIM_DBL_PTR_CONS[KKK - 1] * DEV_SIM_DBL_PTR_CONS[LLL - 1]; int numAngularL, numAngularR; - - numAngularR = lefthrr(RCx, RCy, RCz, RDx, RDy, RDz, \ - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,KKK-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,KKK-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,KKK-1,3,DEV_SIM_INT_NBASIS), \ - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,LLL-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,LLL-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,LLL-1,3,DEV_SIM_INT_NBASIS), \ - L, coefAngularR, angularR, smem_char); - - - // Part A - x - - numAngularL = lefthrr(RAx, RAy, RAz, RBx, RBy, RBz, \ - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,III-1,3,DEV_SIM_INT_NBASIS) + 1, LOC2(DEV_SIM_CHAR_PTR_KLMN,1,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,III-1,3,DEV_SIM_INT_NBASIS), \ - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,JJJ-1,3,DEV_SIM_INT_NBASIS), \ - J, coefAngularL, angularL, smem_char); + numAngularR = lefthrr(RCx, RCy, RCz, RDx, RDy, RDz, + LOC2(DEV_SIM_CHAR_PTR_KLMN, 0, KKK - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 1, KKK - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 2, KKK - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 0, LLL - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 1, LLL - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 2, LLL - 1, 3, DEV_SIM_INT_NBASIS), + coefAngularR, angularR, smem_char); - for (int i = 0; i= 1) { - + + if (LOC2(DEV_SIM_CHAR_PTR_KLMN, 0, III - 1, 3, DEV_SIM_INT_NBASIS) >= 1) { numAngularL = lefthrr(RAx, RAy, RAz, RBx, RBy, RBz, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,III-1,3,DEV_SIM_INT_NBASIS) - 1, LOC2(DEV_SIM_CHAR_PTR_KLMN,1,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,III-1,3,DEV_SIM_INT_NBASIS), - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,JJJ-1,3,DEV_SIM_INT_NBASIS), - J, coefAngularL, angularL, smem_char); - for (int i = 0; i= 1) { - + + if (LOC2(DEV_SIM_CHAR_PTR_KLMN, 1, III - 1, 3, DEV_SIM_INT_NBASIS) >= 1) { numAngularL = lefthrr(RAx, RAy, RAz, RBx, RBy, RBz, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,III-1,3,DEV_SIM_INT_NBASIS) - 1, LOC2(DEV_SIM_CHAR_PTR_KLMN,2,III-1,3,DEV_SIM_INT_NBASIS), - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,JJJ-1,3,DEV_SIM_INT_NBASIS), - J, coefAngularL, angularL, smem_char); - for (int i = 0; i= 1) { - + + if (LOC2(DEV_SIM_CHAR_PTR_KLMN, 2, III - 1, 3, DEV_SIM_INT_NBASIS) >= 1) { numAngularL = lefthrr(RAx, RAy, RAz, RBx, RBy, RBz, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,III-1,3,DEV_SIM_INT_NBASIS) - 1, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,JJJ-1,3,DEV_SIM_INT_NBASIS), - J, coefAngularL, angularL, smem_char); - for (int i = 0; i= 1) { - + + if (LOC2(DEV_SIM_CHAR_PTR_KLMN, 0, JJJ - 1, 3, DEV_SIM_INT_NBASIS) >= 1) { numAngularL = lefthrr_2(RAx, RAy, RAz, RBx, RBy, RBz, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,III-1,3,DEV_SIM_INT_NBASIS), - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,JJJ-1,3,DEV_SIM_INT_NBASIS) - 1, LOC2(DEV_SIM_CHAR_PTR_KLMN,1,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,JJJ-1,3,DEV_SIM_INT_NBASIS), - J - 1, coefAngularL, angularL, smem_char); - for (int i = 0; i= 1) { - + + if (LOC2(DEV_SIM_CHAR_PTR_KLMN, 1, JJJ - 1, 3, DEV_SIM_INT_NBASIS) >= 1) { numAngularL = lefthrr_2(RAx, RAy, RAz, RBx, RBy, RBz, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,III-1,3,DEV_SIM_INT_NBASIS), - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,JJJ-1,3,DEV_SIM_INT_NBASIS) - 1, LOC2(DEV_SIM_CHAR_PTR_KLMN,2,JJJ-1,3,DEV_SIM_INT_NBASIS), - J - 1, coefAngularL, angularL, smem_char); - for (int i = 0; i= 1) { - + + if (LOC2(DEV_SIM_CHAR_PTR_KLMN, 2, JJJ - 1, 3, DEV_SIM_INT_NBASIS) >= 1) { numAngularL = lefthrr_2(RAx, RAy, RAz, RBx, RBy, RBz, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,III-1,3,DEV_SIM_INT_NBASIS), - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,JJJ-1,3,DEV_SIM_INT_NBASIS) - 1, - J - 1, coefAngularL, angularL, smem_char); + LOC2(DEV_SIM_CHAR_PTR_KLMN, 0, III - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 1, III - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 2, III - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 0, JJJ - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 1, JJJ - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 2, JJJ - 1, 3, DEV_SIM_INT_NBASIS) - 1, + coefAngularL, angularL, smem_char); + for (int i = 0; i= 1) { - + + if (LOC2(DEV_SIM_CHAR_PTR_KLMN, 0, KKK - 1, 3, DEV_SIM_INT_NBASIS) >= 1) { numAngularR = lefthrr(RCx, RCy, RCz, RDx, RDy, RDz, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,KKK-1,3,DEV_SIM_INT_NBASIS) - 1, LOC2(DEV_SIM_CHAR_PTR_KLMN,1,KKK-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,KKK-1,3,DEV_SIM_INT_NBASIS), - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,LLL-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,LLL-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,LLL-1,3,DEV_SIM_INT_NBASIS), - L, coefAngularR, angularR, smem_char); - - for (int i = 0; i= 1) { - + + if (LOC2(DEV_SIM_CHAR_PTR_KLMN, 1, KKK - 1, 3, DEV_SIM_INT_NBASIS) >= 1) { numAngularR = lefthrr(RCx, RCy, RCz, RDx, RDy, RDz, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,KKK-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,KKK-1,3,DEV_SIM_INT_NBASIS) - 1, LOC2(DEV_SIM_CHAR_PTR_KLMN,2,KKK-1,3,DEV_SIM_INT_NBASIS), - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,LLL-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,LLL-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,LLL-1,3,DEV_SIM_INT_NBASIS), - L, coefAngularR, angularR, smem_char); - - for (int i = 0; i= 1) { - + + if (LOC2(DEV_SIM_CHAR_PTR_KLMN, 2, KKK - 1, 3, DEV_SIM_INT_NBASIS) >= 1) { numAngularR = lefthrr(RCx, RCy, RCz, RDx, RDy, RDz, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,KKK-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,KKK-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,KKK-1,3,DEV_SIM_INT_NBASIS) - 1, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,LLL-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,LLL-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,LLL-1,3,DEV_SIM_INT_NBASIS), - L, coefAngularR, angularR, smem_char); - - for (int i = 0; i ---> - -> expo(I) * xyz (I) + expo(J) * xyz(J) - P = --------------------------------------- - expo(I) + expo(J) - Those two are pre-calculated in CPU stage. - - */ + In the following comments, we have I, J, K, L denote the primitive gaussian function we use, and + for example, expo(III, ksumtype(II)) stands for the expo for the IIIth primitive guassian function for II shell, + we use I to express the corresponding index. + AB = expo(I)+expo(J) + ---> ---> + -> expo(I) * xyz (I) + expo(J) * xyz(J) + P = --------------------------------------- + expo(I) + expo(J) + Those two are pre-calculated in CPU stage. + + */ int ii_start = DEV_SIM_INT_PTR_PRIM_START[II]; int jj_start = DEV_SIM_INT_PTR_PRIM_START[JJ]; - - QUICKDouble AA = LOC2(DEV_SIM_DBL_PTR_GCEXPO, III , DEV_SIM_INT_PTR_KSUMTYPE[II] - 1, MAXPRIM, DEV_SIM_INT_NBASIS); - QUICKDouble BB = LOC2(DEV_SIM_DBL_PTR_GCEXPO, JJJ , DEV_SIM_INT_PTR_KSUMTYPE[JJ] - 1, MAXPRIM, DEV_SIM_INT_NBASIS); - - QUICKDouble AB = LOC2(DEV_SIM_DBL_PTR_EXPOSUM, ii_start+III, jj_start+JJJ, DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - QUICKDouble Px = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERX, ii_start+III, jj_start+JJJ, DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - QUICKDouble Py = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERY, ii_start+III, jj_start+JJJ, DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - QUICKDouble Pz = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERZ, ii_start+III, jj_start+JJJ, DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - - /* - X1 is the contracted coeffecient, which is pre-calcuated in CPU stage as well. - cutoffprim is used to cut too small prim gaussian function when bring density matrix into consideration. - */ - QUICKDouble cutoffPrim = DNMax * LOC2(DEV_SIM_DBL_PTR_CUTPRIM, kStartI+III, kStartJ+JJJ, DEV_SIM_INT_JBASIS, DEV_SIM_INT_JBASIS); - QUICKDouble X1 = LOC4(DEV_SIM_DBL_PTR_XCOEFF, kStartI+III, kStartJ+JJJ, I - DEV_SIM_INT_PTR_QSTART[II], J - DEV_SIM_INT_PTR_QSTART[JJ], DEV_SIM_INT_JBASIS, DEV_SIM_INT_JBASIS, 2, 2); - - - for (int j = 0; j DEV_SIM_DBL_PRIMLIMIT) { - - QUICKDouble CC = LOC2(DEV_SIM_DBL_PTR_GCEXPO, KKK , DEV_SIM_INT_PTR_KSUMTYPE[KK] - 1, MAXPRIM, DEV_SIM_INT_NBASIS); - /* - CD = expo(L)+expo(K) - ABCD = 1/ (AB + CD) = 1 / (expo(I)+expo(J)+expo(K)+expo(L)) - AB * CD (expo(I)+expo(J))*(expo(K)+expo(L)) - Rou(Greek Letter) = ----------- = ------------------------------------ - AB + CD expo(I)+expo(J)+expo(K)+expo(L) - - expo(I)+expo(J) expo(K)+expo(L) - ABcom = -------------------------------- CDcom = -------------------------------- - expo(I)+expo(J)+expo(K)+expo(L) expo(I)+expo(J)+expo(K)+expo(L) - - ABCDtemp = 1/2(expo(I)+expo(J)+expo(K)+expo(L)) - */ - - int kk_start = DEV_SIM_INT_PTR_PRIM_START[KK]; - int ll_start = DEV_SIM_INT_PTR_PRIM_START[LL]; - - QUICKDouble CD = LOC2(DEV_SIM_DBL_PTR_EXPOSUM, kk_start+KKK, ll_start+LLL, DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - - QUICKDouble ABCD = 1/(AB+CD); - - /* - X2 is the multiplication of four indices normalized coeffecient - */ - QUICKDouble X2 = sqrt(ABCD) * X1 * LOC4(DEV_SIM_DBL_PTR_XCOEFF, kStartK+KKK, kStartL+LLL, K - DEV_SIM_INT_PTR_QSTART[KK], L - DEV_SIM_INT_PTR_QSTART[LL], DEV_SIM_INT_JBASIS, DEV_SIM_INT_JBASIS, 2, 2); - - /* - Q' is the weighting center of K and L - ---> ---> - -> ------> expo(K)*xyz(K)+expo(L)*xyz(L) - Q = P'(K,L) = ------------------------------ - expo(K) + expo(L) - - W' is the weight center for I, J, K, L - - ---> ---> ---> ---> - -> expo(I)*xyz(I) + expo(J)*xyz(J) + expo(K)*xyz(K) +expo(L)*xyz(L) - W = ------------------------------------------------------------------- - expo(I) + expo(J) + expo(K) + expo(L) - -> -> 2 - RPQ =| P - Q | - - -> -> 2 - T = ROU * | P - Q| - */ - - QUICKDouble Qx = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERX, kk_start+KKK, ll_start+LLL, DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - QUICKDouble Qy = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERY, kk_start+KKK, ll_start+LLL, DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - QUICKDouble Qz = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERZ, kk_start+KKK, ll_start+LLL, DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - - //QUICKDouble T = AB * CD * ABCD * (SQR(Px - Qx) + SQR(Py - Qy) + SQR(Pz - Qz)); - - //QUICKDouble YVerticalTemp[VDIM1*VDIM2*VDIM3]; - FmT(I + J + K + L + 2, AB * CD * ABCD * (SQR(Px - Qx) + SQR(Py - Qy) + SQR(Pz - Qz)), YVerticalTemp); - - for (int i = 0; i<=I+J+K+L+2; i++) { - VY(0, 0, i) = VY(0, 0, i) * X2; - } - - //QUICKDouble store2[STOREDIM*STOREDIM]; - -#if defined int_spdf4 - - ERint_grad_vrr_ffff_1(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_2(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_3(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_4(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_5(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_6(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_7(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_8(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_9(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_10(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_11(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_12(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_13(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_14(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_15(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_16(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_17(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_18(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_19(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_20(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_21(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_22(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_23(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_24(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_25(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - ERint_grad_vrr_ffff_26(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + QUICKDouble AA = LOC2(DEV_SIM_DBL_PTR_GCEXPO, III, DEV_SIM_INT_PTR_KSUMTYPE[II] - 1, MAXPRIM, DEV_SIM_INT_NBASIS); + QUICKDouble BB = LOC2(DEV_SIM_DBL_PTR_GCEXPO, JJJ, DEV_SIM_INT_PTR_KSUMTYPE[JJ] - 1, MAXPRIM, DEV_SIM_INT_NBASIS); + QUICKDouble AB = LOC2(DEV_SIM_DBL_PTR_EXPOSUM, ii_start + III, jj_start + JJJ, + DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); + QUICKDouble Px = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERX, ii_start + III, jj_start + JJJ, + DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); + QUICKDouble Py = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERY, ii_start + III, jj_start + JJJ, + DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); + QUICKDouble Pz = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERZ, ii_start + III, jj_start + JJJ, + DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - ERint_grad_vrr_ffff_27(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_28(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_29(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - + /* + X1 is the contracted coeffecient, which is pre-calcuated in CPU stage as well. + cutoffprim is used to cut too small prim gaussian function when bring density matrix into consideration. + */ + QUICKDouble cutoffPrim = DNMax + * LOC2(DEV_SIM_DBL_PTR_CUTPRIM, kStartI + III, kStartJ + JJJ, DEV_SIM_INT_JBASIS, DEV_SIM_INT_JBASIS); + QUICKDouble X1 = LOC4(DEV_SIM_DBL_PTR_XCOEFF, kStartI + III, kStartJ + JJJ, + I - DEV_SIM_INT_PTR_QSTART[II], J - DEV_SIM_INT_PTR_QSTART[JJ], + DEV_SIM_INT_JBASIS, DEV_SIM_INT_JBASIS, 2, 2); + + for (int j = 0; j < kPrimK * kPrimL; j++) { + int LLL = (int) j / kPrimK; + int KKK = (int) j - kPrimK * LLL; + + if (cutoffPrim * LOC2(DEV_SIM_DBL_PTR_CUTPRIM, kStartK + KKK, kStartL + LLL, DEV_SIM_INT_JBASIS, DEV_SIM_INT_JBASIS) + > DEV_SIM_DBL_PRIMLIMIT) { + QUICKDouble CC = LOC2(DEV_SIM_DBL_PTR_GCEXPO, KKK, DEV_SIM_INT_PTR_KSUMTYPE[KK] - 1, MAXPRIM, DEV_SIM_INT_NBASIS); + /* + CD = expo(L)+expo(K) + ABCD = 1/ (AB + CD) = 1 / (expo(I)+expo(J)+expo(K)+expo(L)) + AB * CD (expo(I)+expo(J))*(expo(K)+expo(L)) + Rou(Greek Letter) = ----------- = ------------------------------------ + AB + CD expo(I)+expo(J)+expo(K)+expo(L) - ERint_grad_vrr_ffff_30(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + expo(I)+expo(J) expo(K)+expo(L) + ABcom = -------------------------------- CDcom = -------------------------------- + expo(I)+expo(J)+expo(K)+expo(L) expo(I)+expo(J)+expo(K)+expo(L) + ABCDtemp = 1/2(expo(I)+expo(J)+expo(K)+expo(L)) + */ - ERint_grad_vrr_ffff_31(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + int kk_start = DEV_SIM_INT_PTR_PRIM_START[KK]; + int ll_start = DEV_SIM_INT_PTR_PRIM_START[LL]; + QUICKDouble CD = LOC2(DEV_SIM_DBL_PTR_EXPOSUM, kk_start + KKK, ll_start + LLL, + DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - ERint_grad_vrr_ffff_32(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + QUICKDouble ABCD = 1.0 / (AB + CD); + /* + X2 is the multiplication of four indices normalized coeffecient + */ + QUICKDouble X2 = sqrt(ABCD) * X1 + * LOC4(DEV_SIM_DBL_PTR_XCOEFF, kStartK + KKK, kStartL + LLL, + K - DEV_SIM_INT_PTR_QSTART[KK], L - DEV_SIM_INT_PTR_QSTART[LL], + DEV_SIM_INT_JBASIS, DEV_SIM_INT_JBASIS, 2, 2); + /* + Q' is the weighting center of K and L + ---> ---> + -> ------> expo(K)*xyz(K)+expo(L)*xyz(L) + Q = P'(K,L) = ------------------------------ + expo(K) + expo(L) + + W' is the weight center for I, J, K, L + + ---> ---> ---> ---> + -> expo(I)*xyz(I) + expo(J)*xyz(J) + expo(K)*xyz(K) +expo(L)*xyz(L) + W = ------------------------------------------------------------------- + expo(I) + expo(J) + expo(K) + expo(L) + -> -> 2 + RPQ =| P - Q | + + -> -> 2 + T = ROU * | P - Q| + */ + + QUICKDouble Qx = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERX, kk_start + KKK, ll_start + LLL, + DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); + QUICKDouble Qy = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERY, kk_start + KKK, ll_start + LLL, + DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); + QUICKDouble Qz = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERZ, kk_start + KKK, ll_start + LLL, + DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); + + FmT(I + J + K + L + 2, AB * CD * ABCD + * (SQR(Px - Qx) + SQR(Py - Qy) + SQR(Pz - Qz)), YVerticalTemp); + + for (int i = 0; i <= I + J + K + L + 2; i++) { + VY(0, 0, i) = VY(0, 0, i) * X2; + } +#if defined(int_spdf4) + ERint_grad_vrr_ffff_1(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_2(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_3(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_4(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_5(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_6(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_7(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_8(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_9(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_10(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_11(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_12(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_13(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_14(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_15(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_16(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_17(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_18(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_19(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_20(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_21(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_22(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_23(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_24(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_25(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_26(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_27(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_28(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_29(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_30(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_31(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_32(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); #endif - - for (int i = 4; i< 84; i++) { - for (int j = 4; j< 84; j++) { + for (int i = 4; i < 84; i++) { + for (int j = 4; j < 84; j++) { // if (i < STOREDIM && j < STOREDIM) { - LOCSTORE(store, j-STORE_INIT, i-STORE_INIT , STORE_DIM, STORE_DIM) += LOCSTORE(store2, j, i, STOREDIM, STOREDIM); + LOCSTORE(store, j - STORE_INIT, i - STORE_INIT, STORE_DIM, STORE_DIM) += LOCSTORE(store2, j, i, STOREDIM, STOREDIM); // } } } - - for (int i = 4; i< 84; i++) { - for (int j = 10; j< 120; j++) { + for (int i = 4; i < 84; i++) { + for (int j = 10; j < 120; j++) { // if (i < STOREDIM && j < STOREDIM) { - LOCSTORE(storeAA, j-STORE_INIT_J_AA, i-STORE_INIT_I_AA, STORE_DIM_J_AA, STORE_DIM_I_AA) += LOCSTORE(store2, j, i, STOREDIM, STOREDIM) * AA * 2 ; - LOCSTORE(storeBB, j-STORE_INIT_J_AA, i-STORE_INIT_I_AA, STORE_DIM_J_AA, STORE_DIM_I_AA) += LOCSTORE(store2, j, i, STOREDIM, STOREDIM) * BB * 2 ; + LOCSTORE(storeAA, j - STORE_INIT_J_AA, i - STORE_INIT_I_AA, STORE_DIM_J_AA, STORE_DIM_I_AA) + += LOCSTORE(store2, j, i, STOREDIM, STOREDIM) * AA * 2; // } - } } - for (int i = 10; i< 120; i++) { - for (int j = 4; j< 84; j++) { + for (int i = 4; i < 84; i++) { + for (int j = 10; j < 120; j++) { // if (i < STOREDIM && j < STOREDIM) { - LOCSTORE(storeCC, j-STORE_INIT_J_CC, i-STORE_INIT_I_CC, STORE_DIM_J_CC, STORE_DIM_I_CC) += LOCSTORE(store2, j, i, STOREDIM, STOREDIM) * CC * 2 ; + LOCSTORE(storeBB, j - STORE_INIT_J_AA, i - STORE_INIT_I_AA, STORE_DIM_J_AA, STORE_DIM_I_AA) + += LOCSTORE(store2, j, i, STOREDIM, STOREDIM) * BB * 2; +// } + } + } + for (int i = 10; i < 120; i++) { + for (int j = 4; j < 84; j++) { +// if (i < STOREDIM && j < STOREDIM) { + LOCSTORE(storeCC, j - STORE_INIT_J_CC, i - STORE_INIT_I_CC, STORE_DIM_J_CC, STORE_DIM_I_CC) + += LOCSTORE(store2, j, i, STOREDIM, STOREDIM) * CC * 2; // } } - } - - + } } } } - -/* - for (int i = Sumindex[K]; i< Sumindex[K+L+2]; i++) { - for (int j = Sumindex[I]; j< Sumindex[I+J+2]; j++) { - if (i < STOREDIM && j < STOREDIM) { - printf("STORE %d %d %d %d %d %d %d %d %d %d %.9f \n",II, JJ, KK, LL, I, J, K, L, j, i, LOCSTORE(store, j, i , STOREDIM, STOREDIM)); - } - } - } -*/ + +// for (int i = Sumindex[K]; i < Sumindex[K + L + 2]; i++) { +// for (int j = Sumindex[I]; j < Sumindex[I + J + 2]; j++) { +// if (i < STOREDIM && j < STOREDIM) { +// printf("STORE %d %d %d %d %d %d %d %d %d %d %.9f \n",II, JJ, KK, LL, I, J, K, L, j, i, LOCSTORE(store, j, i , STOREDIM, STOREDIM)); +// } +// } +// } + QUICKDouble AGradx = 0.0; QUICKDouble AGrady = 0.0; QUICKDouble AGradz = 0.0; @@ -1412,373 +1466,345 @@ const smem_dbl_ptr, unsigned char** const smem_char_ptr, unsigned char* const sm QUICKDouble CGrady = 0.0; QUICKDouble CGradz = 0.0; - int AStart = (DEV_SIM_INT_PTR_KATOM[II]-1) * 3; - int BStart = (DEV_SIM_INT_PTR_KATOM[JJ]-1) * 3; - int CStart = (DEV_SIM_INT_PTR_KATOM[KK]-1) * 3; - int DStart = (DEV_SIM_INT_PTR_KATOM[LL]-1) * 3; - - QUICKDouble RBx, RBy, RBz; - QUICKDouble RDx, RDy, RDz; - - RBx = LOC2(DEV_SIM_DBL_PTR_XYZ, 0 , DEV_SIM_INT_PTR_KATOM[JJ]-1, 3, DEV_SIM_INT_NATOM); - RBy = LOC2(DEV_SIM_DBL_PTR_XYZ, 1 , DEV_SIM_INT_PTR_KATOM[JJ]-1, 3, DEV_SIM_INT_NATOM); - RBz = LOC2(DEV_SIM_DBL_PTR_XYZ, 2 , DEV_SIM_INT_PTR_KATOM[JJ]-1, 3, DEV_SIM_INT_NATOM); - - - RDx = LOC2(DEV_SIM_DBL_PTR_XYZ, 0 , DEV_SIM_INT_PTR_KATOM[LL]-1, 3, DEV_SIM_INT_NATOM); - RDy = LOC2(DEV_SIM_DBL_PTR_XYZ, 1 , DEV_SIM_INT_PTR_KATOM[LL]-1, 3, DEV_SIM_INT_NATOM); - RDz = LOC2(DEV_SIM_DBL_PTR_XYZ, 2 , DEV_SIM_INT_PTR_KATOM[LL]-1, 3, DEV_SIM_INT_NATOM); - - int III1 = LOC2(DEV_SIM_INT_PTR_QSBASIS, II, I, DEV_SIM_INT_NSHELL, 4); - int III2 = LOC2(DEV_SIM_INT_PTR_QFBASIS, II, I, DEV_SIM_INT_NSHELL, 4); - int JJJ1 = LOC2(DEV_SIM_INT_PTR_QSBASIS, JJ, J, DEV_SIM_INT_NSHELL, 4); - int JJJ2 = LOC2(DEV_SIM_INT_PTR_QFBASIS, JJ, J, DEV_SIM_INT_NSHELL, 4); - int KKK1 = LOC2(DEV_SIM_INT_PTR_QSBASIS, KK, K, DEV_SIM_INT_NSHELL, 4); - int KKK2 = LOC2(DEV_SIM_INT_PTR_QFBASIS, KK, K, DEV_SIM_INT_NSHELL, 4); - int LLL1 = LOC2(DEV_SIM_INT_PTR_QSBASIS, LL, L, DEV_SIM_INT_NSHELL, 4); - int LLL2 = LOC2(DEV_SIM_INT_PTR_QFBASIS, LL, L, DEV_SIM_INT_NSHELL, 4); - - - int IJKLTYPE = 999; - - int nbasis = DEV_SIM_INT_NBASIS; - - for (int III = III1; III <= III2; III++) { - for (int JJJ = MAX(III,JJJ1); JJJ <= JJJ2; JJJ++) { - for (int KKK = MAX(III,KKK1); KKK <= KKK2; KKK++) { - for (int LLL = MAX(KKK,LLL1); LLL <= LLL2; LLL++) { - - if (III < KKK || - ((III == JJJ) && (III == LLL)) || - ((III == JJJ) && (III < LLL)) || - ((JJJ == LLL) && (III < JJJ)) || - ((III == KKK) && (III < JJJ) && (JJJ < LLL))) { - - QUICKDouble Yaax, Yaay, Yaaz; - QUICKDouble Ybbx, Ybby, Ybbz; - QUICKDouble Yccx, Yccy, Yccz; - - - hrrwholegrad2_ffff - (&Yaax, &Yaay, &Yaaz, \ - &Ybbx, &Ybby, &Ybbz, \ - &Yccx, &Yccy, &Yccz, \ - I, J, K, L,\ - III, JJJ, KKK, LLL, IJKLTYPE, \ - store, storeAA, storeBB, storeCC, \ - RAx, RAy, RAz, RBx, RBy, RBz, \ - RCx, RCy, RCz, RDx, RDy, RDz, smem_int, smem_int_ptr, smem_dbl_ptr, smem_char_ptr, smem_char); - - QUICKDouble constant = 0.0 ; - -#ifdef OSHELL - QUICKDouble DENSELJ = (QUICKDouble) (LOC2(DEV_SIM_DBL_PTR_DENSE, LLL-1, JJJ-1, nbasis, nbasis)+LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL-1, JJJ-1, nbasis, nbasis)); - QUICKDouble DENSELI = (QUICKDouble) (LOC2(DEV_SIM_DBL_PTR_DENSE, LLL-1, III-1, nbasis, nbasis)+LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL-1, III-1, nbasis, nbasis)); - QUICKDouble DENSELK = (QUICKDouble) (LOC2(DEV_SIM_DBL_PTR_DENSE, LLL-1, KKK-1, nbasis, nbasis)+LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL-1, KKK-1, nbasis, nbasis)); - QUICKDouble DENSEJI = (QUICKDouble) (LOC2(DEV_SIM_DBL_PTR_DENSE, JJJ-1, III-1, nbasis, nbasis)+LOC2(DEV_SIM_DBL_PTR_DENSEb, JJJ-1, III-1, nbasis, nbasis)); - - QUICKDouble DENSEKIA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, KKK-1, III-1, nbasis, nbasis); - QUICKDouble DENSEKJA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, KKK-1, JJJ-1, nbasis, nbasis); - QUICKDouble DENSELJA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL-1, JJJ-1, nbasis, nbasis); - QUICKDouble DENSELIA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL-1, III-1, nbasis, nbasis); - QUICKDouble DENSEJIA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, JJJ-1, III-1, nbasis, nbasis); - - QUICKDouble DENSEKIB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, KKK-1, III-1, nbasis, nbasis); - QUICKDouble DENSEKJB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, KKK-1, JJJ-1, nbasis, nbasis); - QUICKDouble DENSELJB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL-1, JJJ-1, nbasis, nbasis); - QUICKDouble DENSELIB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL-1, III-1, nbasis, nbasis); - QUICKDouble DENSEJIB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, JJJ-1, III-1, nbasis, nbasis); + int AStart = (DEV_SIM_INT_PTR_KATOM[II] - 1) * 3; + int BStart = (DEV_SIM_INT_PTR_KATOM[JJ] - 1) * 3; + int CStart = (DEV_SIM_INT_PTR_KATOM[KK] - 1) * 3; + int DStart = (DEV_SIM_INT_PTR_KATOM[LL] - 1) * 3; + + QUICKDouble RBx, RBy, RBz; + QUICKDouble RDx, RDy, RDz; + + RBx = LOC2(DEV_SIM_DBL_PTR_XYZ, 0, DEV_SIM_INT_PTR_KATOM[JJ] - 1, 3, DEV_SIM_INT_NATOM); + RBy = LOC2(DEV_SIM_DBL_PTR_XYZ, 1, DEV_SIM_INT_PTR_KATOM[JJ] - 1, 3, DEV_SIM_INT_NATOM); + RBz = LOC2(DEV_SIM_DBL_PTR_XYZ, 2, DEV_SIM_INT_PTR_KATOM[JJ] - 1, 3, DEV_SIM_INT_NATOM); + + RDx = LOC2(DEV_SIM_DBL_PTR_XYZ, 0, DEV_SIM_INT_PTR_KATOM[LL] - 1, 3, DEV_SIM_INT_NATOM); + RDy = LOC2(DEV_SIM_DBL_PTR_XYZ, 1, DEV_SIM_INT_PTR_KATOM[LL] - 1, 3, DEV_SIM_INT_NATOM); + RDz = LOC2(DEV_SIM_DBL_PTR_XYZ, 2, DEV_SIM_INT_PTR_KATOM[LL] - 1, 3, DEV_SIM_INT_NATOM); + + int III1 = LOC2(DEV_SIM_INT_PTR_QSBASIS, II, I, DEV_SIM_INT_NSHELL, 4); + int III2 = LOC2(DEV_SIM_INT_PTR_QFBASIS, II, I, DEV_SIM_INT_NSHELL, 4); + int JJJ1 = LOC2(DEV_SIM_INT_PTR_QSBASIS, JJ, J, DEV_SIM_INT_NSHELL, 4); + int JJJ2 = LOC2(DEV_SIM_INT_PTR_QFBASIS, JJ, J, DEV_SIM_INT_NSHELL, 4); + int KKK1 = LOC2(DEV_SIM_INT_PTR_QSBASIS, KK, K, DEV_SIM_INT_NSHELL, 4); + int KKK2 = LOC2(DEV_SIM_INT_PTR_QFBASIS, KK, K, DEV_SIM_INT_NSHELL, 4); + int LLL1 = LOC2(DEV_SIM_INT_PTR_QSBASIS, LL, L, DEV_SIM_INT_NSHELL, 4); + int LLL2 = LOC2(DEV_SIM_INT_PTR_QFBASIS, LL, L, DEV_SIM_INT_NSHELL, 4); + + int IJKLTYPE = 999; + + int nbasis = DEV_SIM_INT_NBASIS; + + for (int III = III1; III <= III2; III++) { + for (int JJJ = MAX(III, JJJ1); JJJ <= JJJ2; JJJ++) { + for (int KKK = MAX(III, KKK1); KKK <= KKK2; KKK++) { + for (int LLL = MAX(KKK, LLL1); LLL <= LLL2; LLL++) { + if (III < KKK + || (III == JJJ && III == LLL) + || (III == JJJ && III < LLL) + || (JJJ == LLL && III < JJJ) + || (III == KKK && III < JJJ && JJJ < LLL)) { + QUICKDouble Yaax, Yaay, Yaaz; + QUICKDouble Ybbx, Ybby, Ybbz; + QUICKDouble Yccx, Yccy, Yccz; + + hrrwholegrad2_ffff(&Yaax, &Yaay, &Yaaz, &Ybbx, &Ybby, &Ybbz, &Yccx, &Yccy, &Yccz, + III, JJJ, KKK, LLL, + store, storeAA, storeBB, storeCC, RAx, RAy, RAz, RBx, RBy, RBz, + RCx, RCy, RCz, RDx, RDy, RDz, smem_int, smem_int_ptr, smem_dbl_ptr, smem_char_ptr, smem_char); + + QUICKDouble constant = 0.0; +#if defined(OSHELL) + QUICKDouble DENSELJ = (QUICKDouble) (LOC2(DEV_SIM_DBL_PTR_DENSE, LLL - 1, JJJ - 1, nbasis, nbasis) + + LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL - 1, JJJ - 1, nbasis, nbasis)); + QUICKDouble DENSELI = (QUICKDouble) (LOC2(DEV_SIM_DBL_PTR_DENSE, LLL - 1, III - 1, nbasis, nbasis) + + LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL - 1, III - 1, nbasis, nbasis)); + QUICKDouble DENSELK = (QUICKDouble) (LOC2(DEV_SIM_DBL_PTR_DENSE, LLL - 1, KKK - 1, nbasis, nbasis) + + LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL - 1, KKK - 1, nbasis, nbasis)); + QUICKDouble DENSEJI = (QUICKDouble) (LOC2(DEV_SIM_DBL_PTR_DENSE, JJJ - 1, III - 1, nbasis, nbasis) + + LOC2(DEV_SIM_DBL_PTR_DENSEb, JJJ - 1, III - 1, nbasis, nbasis)); + + QUICKDouble DENSEKIA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, KKK - 1, III - 1, nbasis, nbasis); + QUICKDouble DENSEKJA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, KKK - 1, JJJ - 1, nbasis, nbasis); + QUICKDouble DENSELJA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL - 1, JJJ - 1, nbasis, nbasis); + QUICKDouble DENSELIA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL - 1, III - 1, nbasis, nbasis); + QUICKDouble DENSEJIA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, JJJ - 1, III - 1, nbasis, nbasis); + + QUICKDouble DENSEKIB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, KKK - 1, III - 1, nbasis, nbasis); + QUICKDouble DENSEKJB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, KKK - 1, JJJ - 1, nbasis, nbasis); + QUICKDouble DENSELJB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL - 1, JJJ - 1, nbasis, nbasis); + QUICKDouble DENSELIB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL - 1, III - 1, nbasis, nbasis); + QUICKDouble DENSEJIB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, JJJ - 1, III - 1, nbasis, nbasis); #else - QUICKDouble DENSEKI = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, KKK-1, III-1, nbasis, nbasis); - QUICKDouble DENSEKJ = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, KKK-1, JJJ-1, nbasis, nbasis); - QUICKDouble DENSELJ = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL-1, JJJ-1, nbasis, nbasis); - QUICKDouble DENSELI = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL-1, III-1, nbasis, nbasis); - QUICKDouble DENSELK = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL-1, KKK-1, nbasis, nbasis); - QUICKDouble DENSEJI = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, JJJ-1, III-1, nbasis, nbasis); + QUICKDouble DENSEKI = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, KKK - 1, III - 1, nbasis, nbasis); + QUICKDouble DENSEKJ = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, KKK - 1, JJJ - 1, nbasis, nbasis); + QUICKDouble DENSELJ = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL - 1, JJJ - 1, nbasis, nbasis); + QUICKDouble DENSELI = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL - 1, III - 1, nbasis, nbasis); + QUICKDouble DENSELK = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL - 1, KKK - 1, nbasis, nbasis); + QUICKDouble DENSEJI = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, JJJ - 1, III - 1, nbasis, nbasis); #endif - - if (II < JJ && II < KK && KK < LL || - ( III < KKK && III < JJJ && KKK < LLL)) { - //constant = ( 4.0 * DENSEJI * DENSELK - DENSEKI * DENSELJ - DENSELI * DENSEKJ); -#ifdef OSHELL - constant = ( 4.0 * DENSEJI * DENSELK - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIA * DENSELJA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELIA * DENSEKJA - - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIB * DENSELJB - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELIB * DENSEKJB); + + if (II < JJ && II < KK && KK < LL + || (III < KKK && III < JJJ && KKK < LLL)) { + //constant = ( 4.0 * DENSEJI * DENSELK - DENSEKI * DENSELJ - DENSELI * DENSEKJ); +#if defined(OSHELL) + constant = ( 4.0 * DENSEJI * DENSELK - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIA * DENSELJA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELIA * DENSEKJA + - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIB * DENSELJB - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELIB * DENSEKJB); #else - constant = ( 4.0 * DENSEJI * DENSELK - DEV_SIM_DBL_HYB_COEFF * DENSEKI * DENSELJ - DEV_SIM_DBL_HYB_COEFF * DENSELI * DENSEKJ); + constant = ( 4.0 * DENSEJI * DENSELK - DEV_SIM_DBL_HYB_COEFF * DENSEKI * DENSELJ - DEV_SIM_DBL_HYB_COEFF * DENSELI * DENSEKJ); #endif - }else{ - if (III < KKK) { - if( III == JJJ && KKK == LLL){ - //constant = (DENSEJI * DENSELK - 0.5 * DENSEKI * DENSEKI); -#ifdef OSHELL - constant = (DENSEJI * DENSELK - DEV_SIM_DBL_HYB_COEFF * DENSEKIA * DENSEKIA - DEV_SIM_DBL_HYB_COEFF * DENSEKIB * DENSEKIB); + } else { + if (III < KKK) { + if (III == JJJ && KKK == LLL) { + //constant = (DENSEJI * DENSELK - 0.5 * DENSEKI * DENSEKI); +#if defined(OSHELL) + constant = (DENSEJI * DENSELK - DEV_SIM_DBL_HYB_COEFF * DENSEKIA * DENSEKIA - DEV_SIM_DBL_HYB_COEFF * DENSEKIB * DENSEKIB); #else - constant = (DENSEJI * DENSELK - 0.5 * DEV_SIM_DBL_HYB_COEFF * DENSEKI * DENSEKI); + constant = (DENSEJI * DENSELK - 0.5 * DEV_SIM_DBL_HYB_COEFF * DENSEKI * DENSEKI); #endif - }else if (JJJ == KKK && JJJ == LLL){ - //constant = DENSELJ * DENSEJI; -#ifdef OSHELL - constant = 2.0 * DENSELJ * DENSEJI - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELJA * DENSEJIA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELJB * DENSEJIB; + } else if (JJJ == KKK && JJJ == LLL) { + //constant = DENSELJ * DENSEJI; +#if defined(OSHELL) + constant = 2.0 * DENSELJ * DENSEJI - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELJA * DENSEJIA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELJB * DENSEJIB; #else - constant = 2.0 * DENSELJ * DENSEJI - DEV_SIM_DBL_HYB_COEFF * DENSELJ * DENSEJI; + constant = 2.0 * DENSELJ * DENSEJI - DEV_SIM_DBL_HYB_COEFF * DENSELJ * DENSEJI; #endif - }else if (KKK == LLL && III < JJJ && JJJ != KKK){ - //constant = (2.0* DENSEJI * DENSELK - DENSEKI * DENSEKJ); -#ifdef OSHELL - constant = (2.0* DENSEJI * DENSELK - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIA * DENSEKJA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIB * DENSEKJB); + } else if (KKK == LLL && III < JJJ && JJJ != KKK) { + //constant = (2.0* DENSEJI * DENSELK - DENSEKI * DENSEKJ); +#if defined(OSHELL) + constant = (2.0* DENSEJI * DENSELK - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIA * DENSEKJA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIB * DENSEKJB); #else - constant = (2.0* DENSEJI * DENSELK - DEV_SIM_DBL_HYB_COEFF * DENSEKI * DENSEKJ); + constant = (2.0* DENSEJI * DENSELK - DEV_SIM_DBL_HYB_COEFF * DENSEKI * DENSEKJ); #endif - }else if ( III == JJJ && KKK < LLL){ - //constant = (2.0* DENSELK * DENSEJI - DENSEKI * DENSELI); -#ifdef OSHELL - constant = (2.0* DENSELK * DENSEJI - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIA * DENSELIA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIB * DENSELIB); + } else if (III == JJJ && KKK < LLL) { + //constant = (2.0* DENSELK * DENSEJI - DENSEKI * DENSELI); +#if defined(OSHELL) + constant = (2.0* DENSELK * DENSEJI - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIA * DENSELIA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIB * DENSELIB); #else - constant = (2.0* DENSELK * DENSEJI - DEV_SIM_DBL_HYB_COEFF * DENSEKI * DENSELI); + constant = (2.0* DENSELK * DENSEJI - DEV_SIM_DBL_HYB_COEFF * DENSEKI * DENSELI); #endif - } - } - else{ - if (JJJ <= LLL) { - if (III == JJJ && III == KKK && III == LLL) { - // Do nothing - }else if (III==JJJ && III==KKK && III < LLL){ - //constant = DENSELI * DENSEJI; -#ifdef OSHELL - constant = 2.0 * DENSELI * DENSEJI - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELIA * DENSEJIA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELIB * DENSEJIB; + } + } else { + if (JJJ <= LLL) { + if (III == JJJ && III == KKK && III == LLL) { + // Do nothing + } else if (III == JJJ && III == KKK && III < LLL) { + //constant = DENSELI * DENSEJI; +#if defined(OSHELL) + constant = 2.0 * DENSELI * DENSEJI - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELIA * DENSEJIA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELIB * DENSEJIB; #else - constant = 2.0 * DENSELI * DENSEJI - DEV_SIM_DBL_HYB_COEFF * DENSELI * DENSEJI; + constant = 2.0 * DENSELI * DENSEJI - DEV_SIM_DBL_HYB_COEFF * DENSELI * DENSEJI; #endif - }else if (III==KKK && JJJ==LLL && III < JJJ){ - //constant = (1.5 * DENSEJI * DENSEJI - 0.5 * DENSELJ * DENSEKI); -#ifdef OSHELL - constant = (2.0 * DENSEJI * DENSEJI - DEV_SIM_DBL_HYB_COEFF * DENSEJIA * DENSEJIA - DEV_SIM_DBL_HYB_COEFF * DENSELJA * DENSEKIA - - DEV_SIM_DBL_HYB_COEFF * DENSEJIB * DENSEJIB - DEV_SIM_DBL_HYB_COEFF * DENSELJB * DENSEKIB); + } else if (III == KKK && JJJ == LLL && III < JJJ) { + //constant = (1.5 * DENSEJI * DENSEJI - 0.5 * DENSELJ * DENSEKI); +#if defined(OSHELL) + constant = (2.0 * DENSEJI * DENSEJI - DEV_SIM_DBL_HYB_COEFF * DENSEJIA * DENSEJIA - DEV_SIM_DBL_HYB_COEFF * DENSELJA * DENSEKIA + - DEV_SIM_DBL_HYB_COEFF * DENSEJIB * DENSEJIB - DEV_SIM_DBL_HYB_COEFF * DENSELJB * DENSEKIB); #else - constant = (2.0 * DENSEJI * DENSEJI - 0.5 * DEV_SIM_DBL_HYB_COEFF * DENSEJI * DENSEJI - 0.5 * DEV_SIM_DBL_HYB_COEFF * DENSELJ * DENSEKI); + constant = (2.0 * DENSEJI * DENSEJI - 0.5 * DEV_SIM_DBL_HYB_COEFF * DENSEJI * DENSEJI - 0.5 * DEV_SIM_DBL_HYB_COEFF * DENSELJ * DENSEKI); #endif - }else if (III== KKK && III < JJJ && JJJ < LLL){ - //constant = (3.0 * DENSEJI * DENSELI - DENSELJ * DENSEKI); -#ifdef OSHELL - constant = (4.0 * DENSEJI * DENSELI - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEJIA * DENSELIA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELJA * DENSEKIA - - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEJIB * DENSELIB - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELJB * DENSEKIB); + } else if (III == KKK && III < JJJ && JJJ < LLL) { + //constant = (3.0 * DENSEJI * DENSELI - DENSELJ * DENSEKI); +#if defined(OSHELL) + constant = (4.0 * DENSEJI * DENSELI - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEJIA * DENSELIA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELJA * DENSEKIA + - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEJIB * DENSELIB - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELJB * DENSEKIB); #else - constant = (4.0 * DENSEJI * DENSELI - DEV_SIM_DBL_HYB_COEFF * DENSEJI * DENSELI - DEV_SIM_DBL_HYB_COEFF * DENSELJ * DENSEKI); + constant = (4.0 * DENSEJI * DENSELI - DEV_SIM_DBL_HYB_COEFF * DENSEJI * DENSELI - DEV_SIM_DBL_HYB_COEFF * DENSELJ * DENSEKI); #endif - } - } - } } - - AGradx += constant * Yaax; - AGrady += constant * Yaay; - AGradz += constant * Yaaz; - - BGradx += constant * Ybbx; - BGrady += constant * Ybby; - BGradz += constant * Ybbz; - - CGradx += constant * Yccx; - CGrady += constant * Yccy; - CGradz += constant * Yccz; - - - } } } + + AGradx += constant * Yaax; + AGrady += constant * Yaay; + AGradz += constant * Yaaz; + + BGradx += constant * Ybbx; + BGrady += constant * Ybby; + BGradz += constant * Ybbz; + + CGradx += constant * Yccx; + CGrady += constant * Yccy; + CGradz += constant * Yccz; } } - - - - /* - if ( abs(AGradx) > 0.0 || abs(AGrady) > 0.0 || abs(AGradz) > 0.0 || - abs(BGradx) > 0.0 || abs(BGrady) > 0.0 || abs(BGradz) > 0.0 || - abs(CGradx) > 0.0 || abs(CGrady) > 0.0 || abs(CGradz) > 0.0) { - - printf("%i %i %i %i %i %i %i %i %20.10e %20.10e %20.10e %20.10e %20.10e %20.10e %20.10e %20.10e %20.10e \n", II, JJ, KK, LL, \ - I, J, K, L, AGradx, AGrady, AGradz, BGradx, BGrady, BGradz, CGradx, CGrady, CGradz); - }*/ - -/* - } } } -*/ - + +// if (abs(AGradx) > 0.0 || abs(AGrady) > 0.0 || abs(AGradz) > 0.0 +// || abs(BGradx) > 0.0 || abs(BGrady) > 0.0 || abs(BGradz) > 0.0 +// || abs(CGradx) > 0.0 || abs(CGrady) > 0.0 || abs(CGradz) > 0.0) { +// printf("%i %i %i %i %i %i %i %i %20.10e %20.10e %20.10e %20.10e %20.10e %20.10e %20.10e %20.10e %20.10e \n", +// II, JJ, KK, LL, I, J, K, L, AGradx, AGrady, AGradz, +// BGradx, BGrady, BGradz, CGradx, CGrady, CGradz); +// } + #ifdef DEBUG - //printf("FILE: %s, LINE: %d, FUNCTION: %s, DEV_SIM_DBL_HYB_COEFF \n", __FILE__, __LINE__, __func__); -#endif - +// printf("FILE: %s, LINE: %d, FUNCTION: %s, DEV_SIM_DBL_HYB_COEFF \n", __FILE__, __LINE__, __func__); +#endif + GPUATOMICADD(&DEV_SIM_PTR_GRAD[AStart], AGradx, GRADSCALE); GPUATOMICADD(&DEV_SIM_PTR_GRAD[AStart + 1], AGrady, GRADSCALE); GPUATOMICADD(&DEV_SIM_PTR_GRAD[AStart + 2], AGradz, GRADSCALE); - + GPUATOMICADD(&DEV_SIM_PTR_GRAD[BStart], BGradx, GRADSCALE); GPUATOMICADD(&DEV_SIM_PTR_GRAD[BStart + 1], BGrady, GRADSCALE); GPUATOMICADD(&DEV_SIM_PTR_GRAD[BStart + 2], BGradz, GRADSCALE); - + GPUATOMICADD(&DEV_SIM_PTR_GRAD[CStart], CGradx, GRADSCALE); GPUATOMICADD(&DEV_SIM_PTR_GRAD[CStart + 1], CGrady, GRADSCALE); GPUATOMICADD(&DEV_SIM_PTR_GRAD[CStart + 2], CGradz, GRADSCALE); - + GPUATOMICADD(&DEV_SIM_PTR_GRAD[DStart], -AGradx - BGradx - CGradx, GRADSCALE); GPUATOMICADD(&DEV_SIM_PTR_GRAD[DStart + 1], -AGrady - BGrady - CGrady, GRADSCALE); GPUATOMICADD(&DEV_SIM_PTR_GRAD[DStart + 2], -AGradz - BGradz - CGradz, GRADSCALE); } - -#ifdef OSHELL -#if defined int_spdf4 -__global__ void -__launch_bounds__(ERI_GRAD_FFFF_TPB, ERI_GRAD_FFFF_BPSM) getGrad_oshell_kernel_ffff() -#endif +#if defined(OSHELL) + #if defined(int_spdf4) +__global__ void __launch_bounds__(ERI_GRAD_FFFF_TPB, ERI_GRAD_FFFF_BPSM) getGrad_oshell_kernel_ffff() + #endif #else -#if defined int_spdf4 -__global__ void -__launch_bounds__(ERI_GRAD_FFFF_TPB, ERI_GRAD_FFFF_BPSM) getGrad_kernel_ffff(int *dev_int_data, -int **dev_int_ptr_data, QUICKDouble *dev_dbl_data, QUICKDouble **dev_dbl_ptr_data, int2 -**dev_int2_ptr_data, unsigned char **dev_char_ptr_data, unsigned char *dev_char_data, QUICKAtomicType **dev_grad_ptr_data, const int ffStart, const int sqrQshell) -#endif + #if defined(int_spdf4) +__global__ void __launch_bounds__(ERI_GRAD_FFFF_TPB, ERI_GRAD_FFFF_BPSM) getGrad_kernel_ffff(int *dev_int_data, + int **dev_int_ptr_data, QUICKDouble *dev_dbl_data, QUICKDouble **dev_dbl_ptr_data, + int2 **dev_int2_ptr_data, unsigned char **dev_char_ptr_data, unsigned char *dev_char_data, + QUICKAtomicType **dev_grad_ptr_data, const int ffStart, const int sqrQshell) + #endif #endif { - extern __shared__ QUICKDouble smem_buffer[]; - QUICKDouble *smem_dbl = smem_buffer; - QUICKDouble **smem_dbl_ptr = (QUICKDouble**) &smem_dbl[ERI_GRAD_FFFF_SMEM_DBL_SIZE*ERI_GRAD_FFFF_TPB]; - int **smem_int_ptr = (int**) &smem_dbl_ptr[ERI_GRAD_FFFF_SMEM_DBL_PTR_SIZE*ERI_GRAD_FFFF_TPB]; - int2 **smem_int2_ptr = (int2**) &smem_int_ptr[ERI_GRAD_FFFF_SMEM_INT_PTR_SIZE*ERI_GRAD_FFFF_TPB]; - unsigned char **smem_char_ptr = (unsigned char**) &smem_int2_ptr[ERI_GRAD_FFFF_SMEM_INT2_PTR_SIZE*ERI_GRAD_FFFF_TPB]; - int *smem_int = (int*) &smem_char_ptr[ERI_GRAD_FFFF_SMEM_CHAR_PTR_SIZE*ERI_GRAD_FFFF_TPB]; - unsigned char *smem_char=(unsigned char*) &smem_int[ERI_GRAD_FFFF_SMEM_INT_SIZE*ERI_GRAD_FFFF_TPB]; - QUICKAtomicType **smem_grad_ptr = (QUICKAtomicType**) &smem_char[ERI_GRAD_FFFF_SMEM_CHAR_SIZE]; + QUICKDouble *smem_dbl = smem_buffer; + QUICKDouble **smem_dbl_ptr = (QUICKDouble **) &smem_dbl[ERI_GRAD_FFFF_SMEM_DBL_SIZE * ERI_GRAD_FFFF_TPB]; + int **smem_int_ptr = (int **) &smem_dbl_ptr[ERI_GRAD_FFFF_SMEM_DBL_PTR_SIZE * ERI_GRAD_FFFF_TPB]; + int2 **smem_int2_ptr = (int2 **) &smem_int_ptr[ERI_GRAD_FFFF_SMEM_INT_PTR_SIZE * ERI_GRAD_FFFF_TPB]; + unsigned char **smem_char_ptr = (unsigned char **) &smem_int2_ptr[ERI_GRAD_FFFF_SMEM_INT2_PTR_SIZE * ERI_GRAD_FFFF_TPB]; + int *smem_int = (int *) &smem_char_ptr[ERI_GRAD_FFFF_SMEM_CHAR_PTR_SIZE * ERI_GRAD_FFFF_TPB]; + unsigned char *smem_char = (unsigned char *) &smem_int[ERI_GRAD_FFFF_SMEM_INT_SIZE * ERI_GRAD_FFFF_TPB]; + QUICKAtomicType **smem_grad_ptr = (QUICKAtomicType **) &smem_char[ERI_GRAD_FFFF_SMEM_CHAR_SIZE]; - for(int i = threadIdx.x; i 0){ -#endif - - int II = DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ[a].x; - int KK = DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ[b].x; - - int ii = DEV_SIM_INT_PTR_SORTED_Q[II]; - int kk = DEV_SIM_INT_PTR_SORTED_Q[KK]; - - if (ii<=kk){ - - int JJ = DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ[a].y; - int LL = DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ[b].y; - - int iii = DEV_SIM_INT_PTR_SORTED_QNUMBER[II]; - int jjj = DEV_SIM_INT_PTR_SORTED_QNUMBER[JJ]; - int kkk = DEV_SIM_INT_PTR_SORTED_QNUMBER[KK]; - int lll = DEV_SIM_INT_PTR_SORTED_QNUMBER[LL]; - - - int jj = DEV_SIM_INT_PTR_SORTED_Q[JJ]; - int ll = DEV_SIM_INT_PTR_SORTED_Q[LL]; - - - if ( !((DEV_SIM_INT_PTR_KATOM[ii] == DEV_SIM_INT_PTR_KATOM[jj]) && - (DEV_SIM_INT_PTR_KATOM[ii] == DEV_SIM_INT_PTR_KATOM[kk]) && - (DEV_SIM_INT_PTR_KATOM[ii] == DEV_SIM_INT_PTR_KATOM[ll])) // In case 4 indices are in the same atom - ) { - - int nshell = DEV_SIM_INT_NSHELL; - - QUICKDouble DNMax = MAX(MAX(4.0*LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, ii, jj, nshell, nshell), 4.0*LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, kk, ll, nshell, nshell)), - MAX(MAX(LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, ii, ll, nshell, nshell), LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, ii, kk, nshell, nshell)), - MAX(LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, jj, kk, nshell, nshell), LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, jj, ll, nshell, nshell)))); - - - if ((LOC2(DEV_SIM_DBL_PTR_YCUTOFF, kk, ll, nshell, nshell) * LOC2(DEV_SIM_DBL_PTR_YCUTOFF, ii, jj, nshell, nshell))> DEV_SIM_DBL_GRADCUTOFF && \ - (LOC2(DEV_SIM_DBL_PTR_YCUTOFF, kk, ll, nshell, nshell) * LOC2(DEV_SIM_DBL_PTR_YCUTOFF, ii, jj, nshell, nshell) * -DNMax) > DEV_SIM_DBL_GRADCUTOFF) { - -#ifdef OSHELL -#if defined int_spdf4 - if( iii == 3 && jjj == 3 && kkk ==3 && lll ==3){ - iclass_oshell_grad_ffff(iii, jjj, kkk, lll, ii, jj, kk, ll, DNMax, DEV_SIM_DBL_PTR_YVERTICALTEMP+offset, -DEV_SIM_DBL_PTR_STORE+offset, DEV_SIM_DBL_PTR_STORE2+offset, DEV_SIM_DBL_PTR_STOREAA+offset, DEV_SIM_DBL_PTR_STOREBB+offset, -DEV_SIM_DBL_PTR_STORECC+offset, smem_int, smem_dbl, smem_int_ptr, smem_dbl_ptr, smem_char_ptr, smem_char, smem_grad_ptr); - } +#if defined(MPIV_GPU) + if (DEV_SIM_CHAR_PTR_MPI_BCOMPUTE[a] > 0) { #endif + int II = DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ[a].x; + int KK = DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ[b].x; + + int ii = DEV_SIM_INT_PTR_SORTED_Q[II]; + int kk = DEV_SIM_INT_PTR_SORTED_Q[KK]; + + if (ii <= kk) { + int JJ = DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ[a].y; + int LL = DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ[b].y; + + int iii = DEV_SIM_INT_PTR_SORTED_QNUMBER[II]; + int jjj = DEV_SIM_INT_PTR_SORTED_QNUMBER[JJ]; + int kkk = DEV_SIM_INT_PTR_SORTED_QNUMBER[KK]; + int lll = DEV_SIM_INT_PTR_SORTED_QNUMBER[LL]; + + int jj = DEV_SIM_INT_PTR_SORTED_Q[JJ]; + int ll = DEV_SIM_INT_PTR_SORTED_Q[LL]; + + // In case 4 indices are in the same atom + if (!((DEV_SIM_INT_PTR_KATOM[ii] == DEV_SIM_INT_PTR_KATOM[jj]) + && (DEV_SIM_INT_PTR_KATOM[ii] == DEV_SIM_INT_PTR_KATOM[kk]) + && (DEV_SIM_INT_PTR_KATOM[ii] == DEV_SIM_INT_PTR_KATOM[ll]))) { + int nshell = DEV_SIM_INT_NSHELL; + + QUICKDouble DNMax = MAX(MAX(4.0 * LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, ii, jj, nshell, nshell), + 4.0 * LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, kk, ll, nshell, nshell)), + MAX(MAX(LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, ii, ll, nshell, nshell), + LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, ii, kk, nshell, nshell)), + MAX(LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, jj, kk, nshell, nshell), + LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, jj, ll, nshell, nshell)))); + + if ((LOC2(DEV_SIM_DBL_PTR_YCUTOFF, kk, ll, nshell, nshell) + * LOC2(DEV_SIM_DBL_PTR_YCUTOFF, ii, jj, nshell, nshell)) + > DEV_SIM_DBL_GRADCUTOFF + && (LOC2(DEV_SIM_DBL_PTR_YCUTOFF, kk, ll, nshell, nshell) + * LOC2(DEV_SIM_DBL_PTR_YCUTOFF, ii, jj, nshell, nshell) * DNMax) + > DEV_SIM_DBL_GRADCUTOFF) { +#if defined(OSHELL) + #if defined(int_spdf4) + if (iii == 3 && jjj == 3 && kkk == 3 && lll == 3) { + iclass_oshell_grad_ffff(iii, jjj, kkk, lll, ii, jj, kk, ll, + DNMax, DEV_SIM_DBL_PTR_YVERTICALTEMP + offset, + DEV_SIM_DBL_PTR_STORE + offset, DEV_SIM_DBL_PTR_STORE2 + offset, + DEV_SIM_DBL_PTR_STOREAA + offset, DEV_SIM_DBL_PTR_STOREBB + offset, + DEV_SIM_DBL_PTR_STORECC + offset, + smem_int, smem_dbl, smem_int_ptr, smem_dbl_ptr, smem_char_ptr, smem_char, smem_grad_ptr); + } + #endif #else -#if defined int_spdf4 - - if( iii == 3 && jjj == 3 && kkk ==3 && lll ==3){ - iclass_grad_ffff(iii, jjj, kkk, lll, ii, jj, kk, ll, DNMax, DEV_SIM_DBL_PTR_YVERTICALTEMP+offset, -DEV_SIM_DBL_PTR_STORE+offset, DEV_SIM_DBL_PTR_STORE2+offset, DEV_SIM_DBL_PTR_STOREAA+offset, DEV_SIM_DBL_PTR_STOREBB+offset, -DEV_SIM_DBL_PTR_STORECC+offset, smem_int, smem_dbl, smem_int_ptr, smem_dbl_ptr,smem_char_ptr, smem_char, smem_grad_ptr); - } -#endif + #if defined(int_spdf4) + if (iii == 3 && jjj == 3 && kkk == 3 && lll == 3) { + iclass_grad_ffff(iii, jjj, kkk, lll, ii, jj, kk, ll, + DNMax, DEV_SIM_DBL_PTR_YVERTICALTEMP + offset, + DEV_SIM_DBL_PTR_STORE + offset, DEV_SIM_DBL_PTR_STORE2 + offset, + DEV_SIM_DBL_PTR_STOREAA + offset, DEV_SIM_DBL_PTR_STOREBB + offset, + DEV_SIM_DBL_PTR_STORECC + offset, + smem_int, smem_dbl, smem_int_ptr, smem_dbl_ptr,smem_char_ptr, smem_char, smem_grad_ptr); + } + #endif #endif - + } } } - - } - -#ifdef MPIV_GPU +#if defined(MPIV_GPU) } #endif - } } - diff --git a/src/gpu/cuda/gpu_get2e_grad_ffff.h b/src/gpu/cuda/gpu_get2e_grad_ffff.h index ee85b02e..5241f4de 100644 --- a/src/gpu/cuda/gpu_get2e_grad_ffff.h +++ b/src/gpu/cuda/gpu_get2e_grad_ffff.h @@ -1,4 +1,3 @@ - void getGrad_ffff(_gpu_type gpu); void get_oshell_eri_grad_ffff(_gpu_type gpu); void upload_sim_to_constant_ffff(_gpu_type gpu); diff --git a/src/gpu/hip/gpu_get2e_grad_ffff.cu b/src/gpu/hip/gpu_get2e_grad_ffff.cu index 9c667327..941d4324 100644 --- a/src/gpu/hip/gpu_get2e_grad_ffff.cu +++ b/src/gpu/hip/gpu_get2e_grad_ffff.cu @@ -18,40 +18,35 @@ #include "gpu_type.h" #include "gpu_get2e_grad_ffff.h" -//#ifdef GPU_SPDF -//#endif - - /* - Constant Memory in GPU is fast but quite limited and hard to operate, usually not allocatable and - readonly. So we put the following variables into constant memory: -devSim: a gpu simluation type variable. which is to store to location of basic information about molecule and basis -set. Note it only store the location, so it's mostly a set of pointer to GPU memory. and with some non-pointer -value like the number of basis set. See gpu_type.h for details. -devTrans : arrays to save the mapping index, will be elimited by hand writing unrolling code. -Sumindex: a array to store refect how many temp variable needed in VRR. can be elimited by hand writing code. -*/ + * Constant Memory in GPU is fast but quite limited and hard to operate, usually not allocatable and + * readonly. So we put the following variables into constant memory: + * devSim: a gpu simluation type variable. which is to store to location of basic information about molecule and basis + * set. Note it only store the location, so it's mostly a set of pointer to GPU memory. and with some non-pointer + * value like the number of basis set. See gpu_type.h for details. + * devTrans: arrays to save the mapping index, will be elimited by hand writing unrolling code. + * Sumindex: a array to store refect how many temp variable needed in VRR. can be elimited by hand writing code. + */ static __constant__ gpu_simulation_type devSim; static __constant__ unsigned char devTrans[TRANSDIM * TRANSDIM * TRANSDIM]; static __constant__ int Sumindex[10] = {0, 0, 1, 4, 10, 20, 35, 56, 84, 120}; //#define USE_TEXTURE - -#ifdef USE_TEXTURE -#define USE_TEXTURE_CUTMATRIX -#define USE_TEXTURE_YCUTOFF -#define USE_TEXTURE_XCOEFF +#if defined(USE_TEXTURE) + #define USE_TEXTURE_CUTMATRIX + #define USE_TEXTURE_YCUTOFF + #define USE_TEXTURE_XCOEFF #endif -#ifdef USE_TEXTURE_CUTMATRIX -texture tex_cutMatrix; +#if defined(USE_TEXTURE_CUTMATRIX) + texture tex_cutMatrix; #endif -#ifdef USE_TEXTURE_YCUTOFF -texture tex_YCutoff; +#if defined(USE_TEXTURE_YCUTOFF) + texture tex_YCutoff; #endif -#ifdef USE_TEXTURE_XCOEFF -texture tex_Xcoeff; +#if defined(USE_TEXTURE_XCOEFF) + texture tex_Xcoeff; #endif //#define USE_ERI_GRAD_STOREADD @@ -74,55 +69,55 @@ texture tex_Xcoeff; #define ERI_GRAD_FFFF_SMEM_PTR_SIZE (1) -#define DEV_SIM_INT_PTR_KATOM smem_int_ptr[ERI_GRAD_FFFF_TPB*0+threadIdx.x] -#define DEV_SIM_INT_PTR_KPRIM smem_int_ptr[ERI_GRAD_FFFF_TPB*1+threadIdx.x] -#define DEV_SIM_INT_PTR_KSTART smem_int_ptr[ERI_GRAD_FFFF_TPB*2+threadIdx.x] -#define DEV_SIM_INT_PTR_KSUMTYPE smem_int_ptr[ERI_GRAD_FFFF_TPB*3+threadIdx.x] -#define DEV_SIM_INT_PTR_PRIM_START smem_int_ptr[ERI_GRAD_FFFF_TPB*4+threadIdx.x] -#define DEV_SIM_INT_PTR_QFBASIS smem_int_ptr[ERI_GRAD_FFFF_TPB*5+threadIdx.x] -#define DEV_SIM_INT_PTR_QSBASIS smem_int_ptr[ERI_GRAD_FFFF_TPB*6+threadIdx.x] -#define DEV_SIM_INT_PTR_QSTART smem_int_ptr[ERI_GRAD_FFFF_TPB*7+threadIdx.x] -#define DEV_SIM_INT_PTR_SORTED_Q smem_int_ptr[ERI_GRAD_FFFF_TPB*8+threadIdx.x] -#define DEV_SIM_INT_PTR_SORTED_QNUMBER smem_int_ptr[ERI_GRAD_FFFF_TPB*9+threadIdx.x] -#define DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ smem_int2_ptr[ERI_GRAD_FFFF_TPB*0+threadIdx.x] -#define DEV_SIM_CHAR_PTR_MPI_BCOMPUTE smem_char_ptr[ERI_GRAD_FFFF_TPB*0+threadIdx.x] -#define DEV_SIM_CHAR_PTR_KLMN smem_char_ptr[ERI_GRAD_FFFF_TPB*1+threadIdx.x] -#define DEV_SIM_DBL_PTR_CONS smem_dbl_ptr[ERI_GRAD_FFFF_TPB*0+threadIdx.x] -#define DEV_SIM_DBL_PTR_CUTMATRIX smem_dbl_ptr[ERI_GRAD_FFFF_TPB*1+threadIdx.x] -#define DEV_SIM_DBL_PTR_CUTPRIM smem_dbl_ptr[ERI_GRAD_FFFF_TPB*2+threadIdx.x] -#define DEV_SIM_DBL_PTR_DENSE smem_dbl_ptr[ERI_GRAD_FFFF_TPB*3+threadIdx.x] -#define DEV_SIM_DBL_PTR_DENSEB smem_dbl_ptr[ERI_GRAD_FFFF_TPB*4+threadIdx.x] -#define DEV_SIM_DBL_PTR_EXPOSUM smem_dbl_ptr[ERI_GRAD_FFFF_TPB*5+threadIdx.x] -#define DEV_SIM_DBL_PTR_GCEXPO smem_dbl_ptr[ERI_GRAD_FFFF_TPB*6+threadIdx.x] -#define DEV_SIM_DBL_PTR_STORE smem_dbl_ptr[ERI_GRAD_FFFF_TPB*7+threadIdx.x] -#define DEV_SIM_DBL_PTR_STORE2 smem_dbl_ptr[ERI_GRAD_FFFF_TPB*8+threadIdx.x] -#define DEV_SIM_DBL_PTR_STOREAA smem_dbl_ptr[ERI_GRAD_FFFF_TPB*9+threadIdx.x] -#define DEV_SIM_DBL_PTR_STOREBB smem_dbl_ptr[ERI_GRAD_FFFF_TPB*10+threadIdx.x] -#define DEV_SIM_DBL_PTR_STORECC smem_dbl_ptr[ERI_GRAD_FFFF_TPB*11+threadIdx.x] -#define DEV_SIM_DBL_PTR_WEIGHTEDCENTERX smem_dbl_ptr[ERI_GRAD_FFFF_TPB*12+threadIdx.x] -#define DEV_SIM_DBL_PTR_WEIGHTEDCENTERY smem_dbl_ptr[ERI_GRAD_FFFF_TPB*13+threadIdx.x] -#define DEV_SIM_DBL_PTR_WEIGHTEDCENTERZ smem_dbl_ptr[ERI_GRAD_FFFF_TPB*14+threadIdx.x] -#define DEV_SIM_DBL_PTR_XCOEFF smem_dbl_ptr[ERI_GRAD_FFFF_TPB*15+threadIdx.x] -#define DEV_SIM_DBL_PTR_XYZ smem_dbl_ptr[ERI_GRAD_FFFF_TPB*16+threadIdx.x] -#define DEV_SIM_DBL_PTR_YCUTOFF smem_dbl_ptr[ERI_GRAD_FFFF_TPB*17+threadIdx.x] -#define DEV_SIM_DBL_PTR_YVERTICALTEMP smem_dbl_ptr[ERI_GRAD_FFFF_TPB*18+threadIdx.x] -#define DEV_SIM_DBL_PRIMLIMIT smem_dbl[ERI_GRAD_FFFF_TPB*0+threadIdx.x] -#define DEV_SIM_DBL_GRADCUTOFF smem_dbl[ERI_GRAD_FFFF_TPB*1+threadIdx.x] -#define DEV_SIM_DBL_HYB_COEFF smem_dbl[ERI_GRAD_FFFF_TPB*2+threadIdx.x] -#define DEV_SIM_INT_NATOM smem_int[ERI_GRAD_FFFF_TPB*0+threadIdx.x] -#define DEV_SIM_INT_NBASIS smem_int[ERI_GRAD_FFFF_TPB*1+threadIdx.x] -#define DEV_SIM_INT_NSHELL smem_int[ERI_GRAD_FFFF_TPB*2+threadIdx.x] -#define DEV_SIM_INT_JBASIS smem_int[ERI_GRAD_FFFF_TPB*3+threadIdx.x] -#define DEV_SIM_INT_SQRQSHELL smem_int[ERI_GRAD_FFFF_TPB*4+threadIdx.x] -#define DEV_SIM_INT_PRIM_TOTAL smem_int[ERI_GRAD_FFFF_TPB*5+threadIdx.x] -#define DEV_SIM_INT_FFSTART smem_int[ERI_GRAD_FFFF_TPB*6+threadIdx.x] - -#define DEV_SIM_PTR_GRAD smem_grad_ptr[ERI_GRAD_FFFF_TPB*0+threadIdx.x] - -#define LOCTRANS(A,i1,i2,i3,d1,d2,d3) A[(i3+((i2)+(i1)*(d2))*(d3))*ERI_GRAD_FFFF_TPB+threadIdx.x] +#define DEV_SIM_INT_PTR_KATOM smem_int_ptr[threadIdx.x] +#define DEV_SIM_INT_PTR_KPRIM smem_int_ptr[ERI_GRAD_FFFF_TPB + threadIdx.x] +#define DEV_SIM_INT_PTR_KSTART smem_int_ptr[ERI_GRAD_FFFF_TPB * 2 + threadIdx.x] +#define DEV_SIM_INT_PTR_KSUMTYPE smem_int_ptr[ERI_GRAD_FFFF_TPB * 3 + threadIdx.x] +#define DEV_SIM_INT_PTR_PRIM_START smem_int_ptr[ERI_GRAD_FFFF_TPB * 4 + threadIdx.x] +#define DEV_SIM_INT_PTR_QFBASIS smem_int_ptr[ERI_GRAD_FFFF_TPB * 5 + threadIdx.x] +#define DEV_SIM_INT_PTR_QSBASIS smem_int_ptr[ERI_GRAD_FFFF_TPB * 6 + threadIdx.x] +#define DEV_SIM_INT_PTR_QSTART smem_int_ptr[ERI_GRAD_FFFF_TPB * 7 + threadIdx.x] +#define DEV_SIM_INT_PTR_SORTED_Q smem_int_ptr[ERI_GRAD_FFFF_TPB * 8 + threadIdx.x] +#define DEV_SIM_INT_PTR_SORTED_QNUMBER smem_int_ptr[ERI_GRAD_FFFF_TPB * 9 + threadIdx.x] +#define DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ smem_int2_ptr[threadIdx.x] +#define DEV_SIM_CHAR_PTR_MPI_BCOMPUTE smem_char_ptr[threadIdx.x] +#define DEV_SIM_CHAR_PTR_KLMN smem_char_ptr[ERI_GRAD_FFFF_TPB + threadIdx.x] +#define DEV_SIM_DBL_PTR_CONS smem_dbl_ptr[threadIdx.x] +#define DEV_SIM_DBL_PTR_CUTMATRIX smem_dbl_ptr[ERI_GRAD_FFFF_TPB + threadIdx.x] +#define DEV_SIM_DBL_PTR_CUTPRIM smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 2 + threadIdx.x] +#define DEV_SIM_DBL_PTR_DENSE smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 3 + threadIdx.x] +#define DEV_SIM_DBL_PTR_DENSEB smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 4 + threadIdx.x] +#define DEV_SIM_DBL_PTR_EXPOSUM smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 5 + threadIdx.x] +#define DEV_SIM_DBL_PTR_GCEXPO smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 6 + threadIdx.x] +#define DEV_SIM_DBL_PTR_STORE smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 7 + threadIdx.x] +#define DEV_SIM_DBL_PTR_STORE2 smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 8 + threadIdx.x] +#define DEV_SIM_DBL_PTR_STOREAA smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 9 + threadIdx.x] +#define DEV_SIM_DBL_PTR_STOREBB smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 10 + threadIdx.x] +#define DEV_SIM_DBL_PTR_STORECC smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 11 + threadIdx.x] +#define DEV_SIM_DBL_PTR_WEIGHTEDCENTERX smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 12 + threadIdx.x] +#define DEV_SIM_DBL_PTR_WEIGHTEDCENTERY smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 13 + threadIdx.x] +#define DEV_SIM_DBL_PTR_WEIGHTEDCENTERZ smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 14 + threadIdx.x] +#define DEV_SIM_DBL_PTR_XCOEFF smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 15 + threadIdx.x] +#define DEV_SIM_DBL_PTR_XYZ smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 16 + threadIdx.x] +#define DEV_SIM_DBL_PTR_YCUTOFF smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 17 + threadIdx.x] +#define DEV_SIM_DBL_PTR_YVERTICALTEMP smem_dbl_ptr[ERI_GRAD_FFFF_TPB * 18 + threadIdx.x] +#define DEV_SIM_DBL_PRIMLIMIT smem_dbl[threadIdx.x] +#define DEV_SIM_DBL_GRADCUTOFF smem_dbl[ERI_GRAD_FFFF_TPB + threadIdx.x] +#define DEV_SIM_DBL_HYB_COEFF smem_dbl[ERI_GRAD_FFFF_TPB * 2 + threadIdx.x] +#define DEV_SIM_INT_NATOM smem_int[threadIdx.x] +#define DEV_SIM_INT_NBASIS smem_int[ERI_GRAD_FFFF_TPB + threadIdx.x] +#define DEV_SIM_INT_NSHELL smem_int[ERI_GRAD_FFFF_TPB * 2 + threadIdx.x] +#define DEV_SIM_INT_JBASIS smem_int[ERI_GRAD_FFFF_TPB * 3 + threadIdx.x] +#define DEV_SIM_INT_SQRQSHELL smem_int[ERI_GRAD_FFFF_TPB * 4 + threadIdx.x] +#define DEV_SIM_INT_PRIM_TOTAL smem_int[ERI_GRAD_FFFF_TPB * 5 + threadIdx.x] +#define DEV_SIM_INT_FFSTART smem_int[ERI_GRAD_FFFF_TPB * 6 + threadIdx.x] + +#define DEV_SIM_PTR_GRAD smem_grad_ptr[threadIdx.x] + +#define LOCTRANS(A,i1,i2,i3,d1,d2,d3) A[(i3 + ((i2) + (i1) * (d2)) * (d3)) * ERI_GRAD_FFFF_TPB + threadIdx.x] #define DEV_SIM_CHAR_TRANS smem_char -#ifdef GPU_SPDF +#if defined(GPU_SPDF) #define int_spdf4 #include "../gpu_eri_grad_vrr_ffff.h" #include "gpu_get2e_grad_ffff.cuh" @@ -131,16 +126,16 @@ texture tex_Xcoeff; //Include the kernels for open shell eri calculations #define OSHELL -#ifdef GPU_SPDF +#if defined(GPU_SPDF) #define int_spdf4 // #include "gpu_get2e_grad_ffff.cuh" -#endif + #endif #undef OSHELL // totTime is the timer for GPU 2e time. Only on under debug mode -#if defined DEBUG || defined DEBUGTIME -static float totTime; +#if defined(DEBUG) || defined(DEBUGTIME) + static float totTime; #endif @@ -186,8 +181,8 @@ void ResortERIs(_gpu_type gpu) { for (int i = 0; i < gpu->gpu_cutoff->sqrQshell; i++) { if (gpu->gpu_basis->sorted_Qnumber->_hostData[gpu->gpu_cutoff->sorted_YCutoffIJ->_hostData[i].x] == lbl_t.x && gpu->gpu_basis->sorted_Qnumber->_hostData[gpu->gpu_cutoff->sorted_YCutoffIJ->_hostData[i].y] == lbl_t.y) { - resorted_YCutoffIJ[idx1].x = gpu->gpu_cutoff->sorted_YCutoffIJ ->_hostData[i].x; - resorted_YCutoffIJ[idx1].y = gpu->gpu_cutoff->sorted_YCutoffIJ ->_hostData[i].y; + resorted_YCutoffIJ[idx1].x = gpu->gpu_cutoff->sorted_YCutoffIJ->_hostData[i].x; + resorted_YCutoffIJ[idx1].y = gpu->gpu_cutoff->sorted_YCutoffIJ->_hostData[i].y; idx1++; } } @@ -203,7 +198,7 @@ void ResortERIs(_gpu_type gpu) { if (ffset == false && gpu->gpu_basis->sorted_Qnumber->_hostData[resorted_YCutoffIJ[i].x] - + gpu->gpu_basis->sorted_Qnumber->_hostData[resorted_YCutoffIJ[i].y] == 6){ + + gpu->gpu_basis->sorted_Qnumber->_hostData[resorted_YCutoffIJ[i].y] == 6) { ffStart = i; ffset = true; } @@ -249,62 +244,62 @@ void getGrad_ffff(_gpu_type gpu) { ResortERIs(gpu); - int *int_buffer = (int*) malloc(ERI_GRAD_FFFF_SMEM_INT_SIZE*ERI_GRAD_FFFF_TPB*sizeof(int)); - int **int_ptr_buffer = (int**) malloc(ERI_GRAD_FFFF_SMEM_INT_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(int*)); - QUICKDouble *dbl_buffer = (QUICKDouble*) malloc(ERI_GRAD_FFFF_SMEM_DBL_SIZE*ERI_GRAD_FFFF_TPB*sizeof(QUICKDouble)); - QUICKDouble **dbl_ptr_buffer = (QUICKDouble**) malloc(ERI_GRAD_FFFF_SMEM_DBL_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(QUICKDouble*)); - int2 **int2_ptr_buffer = (int2**) malloc(ERI_GRAD_FFFF_SMEM_INT2_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(int2*)); - unsigned char **char_ptr_buffer = (unsigned char**) malloc(ERI_GRAD_FFFF_SMEM_CHAR_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(unsigned char*)); - QUICKAtomicType **grad_ptr_buffer = (QUICKAtomicType**) malloc(ERI_GRAD_FFFF_SMEM_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(QUICKAtomicType*)); - unsigned char trans[TRANSDIM*TRANSDIM*TRANSDIM]; - - for(int i=0; igpu_sim.natom; - int_buffer[ERI_GRAD_FFFF_TPB*1+i] = gpu->gpu_sim.nbasis; - int_buffer[ERI_GRAD_FFFF_TPB*2+i] = gpu->gpu_sim.nshell; - int_buffer[ERI_GRAD_FFFF_TPB*3+i] = gpu->gpu_sim.jbasis; - int_buffer[ERI_GRAD_FFFF_TPB*4+i] = gpu->gpu_sim.sqrQshell; - int_buffer[ERI_GRAD_FFFF_TPB*5+i] = gpu->gpu_sim.prim_total; - int_buffer[ERI_GRAD_FFFF_TPB*6+i] = gpu->gpu_sim.ffStart; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*0+i] = gpu->gpu_sim.katom; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*1+i] = gpu->gpu_sim.kprim; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*2+i] = gpu->gpu_sim.kstart; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*3+i] = gpu->gpu_sim.Ksumtype; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*4+i] = gpu->gpu_sim.prim_start; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*5+i] = gpu->gpu_sim.Qfbasis; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*6+i] = gpu->gpu_sim.Qsbasis; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*7+i] = gpu->gpu_sim.Qstart; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*8+i] = gpu->gpu_sim.sorted_Q; - int_ptr_buffer[ERI_GRAD_FFFF_TPB*9+i] = gpu->gpu_sim.sorted_Qnumber; - dbl_buffer[ERI_GRAD_FFFF_TPB*0+i] = gpu->gpu_sim.primLimit; - dbl_buffer[ERI_GRAD_FFFF_TPB*1+i] = gpu->gpu_sim.gradCutoff; - dbl_buffer[ERI_GRAD_FFFF_TPB*2+i] = gpu->gpu_sim.hyb_coeff; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*0+i] = gpu->gpu_sim.cons; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*1+i] = gpu->gpu_sim.cutMatrix; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*2+i] = gpu->gpu_sim.cutPrim; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*3+i] = gpu->gpu_sim.dense; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*4+i] = gpu->gpu_sim.denseb; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*5+i] = gpu->gpu_sim.expoSum; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*6+i] = gpu->gpu_sim.gcexpo; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*7+i] = gpu->gpu_sim.store; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*8+i] = gpu->gpu_sim.store2; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*9+i] = gpu->gpu_sim.storeAA; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*10+i] = gpu->gpu_sim.storeBB; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*11+i] = gpu->gpu_sim.storeCC; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*12+i] = gpu->gpu_sim.weightedCenterX; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*13+i] = gpu->gpu_sim.weightedCenterY; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*14+i] = gpu->gpu_sim.weightedCenterZ; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*15+i] = gpu->gpu_sim.Xcoeff; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*16+i] = gpu->gpu_sim.xyz; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*17+i] = gpu->gpu_sim.YCutoff; - dbl_ptr_buffer[ERI_GRAD_FFFF_TPB*18+i] = gpu->gpu_sim.YVerticalTemp; - int2_ptr_buffer[ERI_GRAD_FFFF_TPB*0+i] = gpu->gpu_sim.sorted_YCutoffIJ; - char_ptr_buffer[ERI_GRAD_FFFF_TPB*0+i] = gpu->gpu_sim.mpi_bcompute; - char_ptr_buffer[ERI_GRAD_FFFF_TPB*1+i] = gpu->gpu_sim.KLMN; + int *int_buffer = (int*) malloc(ERI_GRAD_FFFF_SMEM_INT_SIZE * ERI_GRAD_FFFF_TPB * sizeof(int)); + int **int_ptr_buffer = (int**) malloc(ERI_GRAD_FFFF_SMEM_INT_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(int*)); + QUICKDouble *dbl_buffer = (QUICKDouble*) malloc(ERI_GRAD_FFFF_SMEM_DBL_SIZE*ERI_GRAD_FFFF_TPB * sizeof(QUICKDouble)); + QUICKDouble **dbl_ptr_buffer = (QUICKDouble**) malloc(ERI_GRAD_FFFF_SMEM_DBL_PTR_SIZE * ERI_GRAD_FFFF_TPB*sizeof(QUICKDouble*)); + int2 **int2_ptr_buffer = (int2**) malloc(ERI_GRAD_FFFF_SMEM_INT2_PTR_SIZE*ERI_GRAD_FFFF_TPB * sizeof(int2*)); + unsigned char **char_ptr_buffer = (unsigned char**) malloc(ERI_GRAD_FFFF_SMEM_CHAR_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(unsigned char*)); + QUICKAtomicType **grad_ptr_buffer = (QUICKAtomicType**) malloc(ERI_GRAD_FFFF_SMEM_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(QUICKAtomicType*)); + unsigned char trans[TRANSDIM * TRANSDIM * TRANSDIM]; + + for (int i = 0; i < ERI_GRAD_FFFF_TPB; i++) { + int_buffer[i] = gpu->gpu_sim.natom; + int_buffer[ERI_GRAD_FFFF_TPB + i] = gpu->gpu_sim.nbasis; + int_buffer[ERI_GRAD_FFFF_TPB * 2 + i] = gpu->gpu_sim.nshell; + int_buffer[ERI_GRAD_FFFF_TPB * 3 + i] = gpu->gpu_sim.jbasis; + int_buffer[ERI_GRAD_FFFF_TPB * 4 + i] = gpu->gpu_sim.sqrQshell; + int_buffer[ERI_GRAD_FFFF_TPB * 5 + i] = gpu->gpu_sim.prim_total; + int_buffer[ERI_GRAD_FFFF_TPB * 6 + i] = gpu->gpu_sim.ffStart; + int_ptr_buffer[i] = gpu->gpu_sim.katom; + int_ptr_buffer[ERI_GRAD_FFFF_TPB + i] = gpu->gpu_sim.kprim; + int_ptr_buffer[ERI_GRAD_FFFF_TPB * 2 + i] = gpu->gpu_sim.kstart; + int_ptr_buffer[ERI_GRAD_FFFF_TPB * 3 + i] = gpu->gpu_sim.Ksumtype; + int_ptr_buffer[ERI_GRAD_FFFF_TPB * 4 + i] = gpu->gpu_sim.prim_start; + int_ptr_buffer[ERI_GRAD_FFFF_TPB * 5 + i] = gpu->gpu_sim.Qfbasis; + int_ptr_buffer[ERI_GRAD_FFFF_TPB * 6 + i] = gpu->gpu_sim.Qsbasis; + int_ptr_buffer[ERI_GRAD_FFFF_TPB * 7 + i] = gpu->gpu_sim.Qstart; + int_ptr_buffer[ERI_GRAD_FFFF_TPB * 8 + i] = gpu->gpu_sim.sorted_Q; + int_ptr_buffer[ERI_GRAD_FFFF_TPB * 9 + i] = gpu->gpu_sim.sorted_Qnumber; + dbl_buffer[i] = gpu->gpu_sim.primLimit; + dbl_buffer[ERI_GRAD_FFFF_TPB + i] = gpu->gpu_sim.gradCutoff; + dbl_buffer[ERI_GRAD_FFFF_TPB * 2 + i] = gpu->gpu_sim.hyb_coeff; + dbl_ptr_buffer[i] = gpu->gpu_sim.cons; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB + i] = gpu->gpu_sim.cutMatrix; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 2 + i] = gpu->gpu_sim.cutPrim; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 3 + i] = gpu->gpu_sim.dense; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 4 + i] = gpu->gpu_sim.denseb; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 5 + i] = gpu->gpu_sim.expoSum; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 6 + i] = gpu->gpu_sim.gcexpo; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 7 + i] = gpu->gpu_sim.store; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 8 + i] = gpu->gpu_sim.store2; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 9 + i] = gpu->gpu_sim.storeAA; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 10 + i] = gpu->gpu_sim.storeBB; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 11 + i] = gpu->gpu_sim.storeCC; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 12 + i] = gpu->gpu_sim.weightedCenterX; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 13 + i] = gpu->gpu_sim.weightedCenterY; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 14 + i] = gpu->gpu_sim.weightedCenterZ; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 15 + i] = gpu->gpu_sim.Xcoeff; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 16 + i] = gpu->gpu_sim.xyz; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 17 + i] = gpu->gpu_sim.YCutoff; + dbl_ptr_buffer[ERI_GRAD_FFFF_TPB * 18 + i] = gpu->gpu_sim.YVerticalTemp; + int2_ptr_buffer[i] = gpu->gpu_sim.sorted_YCutoffIJ; + char_ptr_buffer[i] = gpu->gpu_sim.mpi_bcompute; + char_ptr_buffer[ERI_GRAD_FFFF_TPB + i] = gpu->gpu_sim.KLMN; #if defined(USE_LEGACY_ATOMICS) - grad_ptr_buffer[ERI_GRAD_FFFF_TPB*0+i] = gpu->gpu_sim.gradULL; + grad_ptr_buffer[i] = gpu->gpu_sim.gradULL; #else - grad_ptr_buffer[ERI_GRAD_FFFF_TPB*0+i] = gpu->gpu_sim.grad; + grad_ptr_buffer[i] = gpu->gpu_sim.grad; #endif } @@ -438,24 +433,24 @@ void getGrad_ffff(_gpu_type gpu) unsigned char *dev_char_buffer; QUICKAtomicType **dev_grad_ptr_buffer; - gpuMalloc((void **) &dev_int_buffer, ERI_GRAD_FFFF_SMEM_INT_SIZE*ERI_GRAD_FFFF_TPB*sizeof(int)); - gpuMalloc((void **) &dev_int_ptr_buffer, ERI_GRAD_FFFF_SMEM_INT_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(int*)); - gpuMalloc((void **) &dev_dbl_buffer, ERI_GRAD_FFFF_SMEM_DBL_SIZE*ERI_GRAD_FFFF_TPB*sizeof(QUICKDouble)); - gpuMalloc((void **) &dev_dbl_ptr_buffer, ERI_GRAD_FFFF_SMEM_DBL_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(QUICKDouble*)); - gpuMalloc((void **) &dev_int2_ptr_buffer, ERI_GRAD_FFFF_SMEM_INT2_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(int2*)); - gpuMalloc((void **) &dev_char_ptr_buffer, ERI_GRAD_FFFF_SMEM_CHAR_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(unsigned char*)); - gpuMalloc((void **) &dev_char_buffer, ERI_GRAD_FFFF_SMEM_CHAR_SIZE*sizeof(unsigned char)); - gpuMalloc((void **) &dev_grad_ptr_buffer, ERI_GRAD_FFFF_SMEM_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(QUICKAtomicType*)); - - gpuMemcpy(dev_int_buffer, int_buffer, ERI_GRAD_FFFF_SMEM_INT_SIZE*ERI_GRAD_FFFF_TPB*sizeof(int), hipMemcpyHostToDevice); - gpuMemcpy(dev_int_ptr_buffer, int_ptr_buffer, ERI_GRAD_FFFF_SMEM_INT_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(int*), hipMemcpyHostToDevice); - gpuMemcpy(dev_dbl_buffer, dbl_buffer, ERI_GRAD_FFFF_SMEM_DBL_SIZE*ERI_GRAD_FFFF_TPB*sizeof(QUICKDouble), hipMemcpyHostToDevice); - gpuMemcpy(dev_dbl_ptr_buffer, dbl_ptr_buffer, ERI_GRAD_FFFF_SMEM_DBL_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(QUICKDouble*), hipMemcpyHostToDevice); - gpuMemcpy(dev_int2_ptr_buffer, int2_ptr_buffer, ERI_GRAD_FFFF_SMEM_INT2_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(int2*), hipMemcpyHostToDevice); - gpuMemcpy(dev_char_ptr_buffer, char_ptr_buffer, ERI_GRAD_FFFF_SMEM_CHAR_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(unsigned + gpuMalloc((void **) &dev_int_buffer, ERI_GRAD_FFFF_SMEM_INT_SIZE * ERI_GRAD_FFFF_TPB * sizeof(int)); + gpuMalloc((void **) &dev_int_ptr_buffer, ERI_GRAD_FFFF_SMEM_INT_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(int*)); + gpuMalloc((void **) &dev_dbl_buffer, ERI_GRAD_FFFF_SMEM_DBL_SIZE * ERI_GRAD_FFFF_TPB * sizeof(QUICKDouble)); + gpuMalloc((void **) &dev_dbl_ptr_buffer, ERI_GRAD_FFFF_SMEM_DBL_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(QUICKDouble*)); + gpuMalloc((void **) &dev_int2_ptr_buffer, ERI_GRAD_FFFF_SMEM_INT2_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(int2*)); + gpuMalloc((void **) &dev_char_ptr_buffer, ERI_GRAD_FFFF_SMEM_CHAR_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(unsigned char*)); + gpuMalloc((void **) &dev_char_buffer, ERI_GRAD_FFFF_SMEM_CHAR_SIZE * sizeof(unsigned char)); + gpuMalloc((void **) &dev_grad_ptr_buffer, ERI_GRAD_FFFF_SMEM_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(QUICKAtomicType*)); + + gpuMemcpy(dev_int_buffer, int_buffer, ERI_GRAD_FFFF_SMEM_INT_SIZE * ERI_GRAD_FFFF_TPB * sizeof(int), hipMemcpyHostToDevice); + gpuMemcpy(dev_int_ptr_buffer, int_ptr_buffer, ERI_GRAD_FFFF_SMEM_INT_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(int*), hipMemcpyHostToDevice); + gpuMemcpy(dev_dbl_buffer, dbl_buffer, ERI_GRAD_FFFF_SMEM_DBL_SIZE * ERI_GRAD_FFFF_TPB * sizeof(QUICKDouble), hipMemcpyHostToDevice); + gpuMemcpy(dev_dbl_ptr_buffer, dbl_ptr_buffer, ERI_GRAD_FFFF_SMEM_DBL_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(QUICKDouble*), hipMemcpyHostToDevice); + gpuMemcpy(dev_int2_ptr_buffer, int2_ptr_buffer, ERI_GRAD_FFFF_SMEM_INT2_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(int2*), hipMemcpyHostToDevice); + gpuMemcpy(dev_char_ptr_buffer, char_ptr_buffer, ERI_GRAD_FFFF_SMEM_CHAR_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(unsigned char*), hipMemcpyHostToDevice); - gpuMemcpy(dev_char_buffer, &trans, ERI_GRAD_FFFF_SMEM_CHAR_SIZE*sizeof(unsigned char), hipMemcpyHostToDevice); - gpuMemcpy(dev_grad_ptr_buffer, grad_ptr_buffer, ERI_GRAD_FFFF_SMEM_PTR_SIZE*ERI_GRAD_FFFF_TPB*sizeof(QUICKAtomicType*), + gpuMemcpy(dev_char_buffer, &trans, ERI_GRAD_FFFF_SMEM_CHAR_SIZE * sizeof(unsigned char), hipMemcpyHostToDevice); + gpuMemcpy(dev_grad_ptr_buffer, grad_ptr_buffer, ERI_GRAD_FFFF_SMEM_PTR_SIZE * ERI_GRAD_FFFF_TPB * sizeof(QUICKAtomicType*), hipMemcpyHostToDevice); // Part f-3 @@ -471,7 +466,7 @@ void getGrad_ffff(_gpu_type gpu) + sizeof(QUICKAtomicType *) * ERI_GRAD_FFFF_SMEM_PTR_SIZE) * ERI_GRAD_FFFF_TPB + sizeof(unsigned char) * ERI_GRAD_FFFF_SMEM_CHAR_SIZE>>> (dev_int_buffer, dev_int_ptr_buffer, dev_dbl_buffer, dev_dbl_ptr_buffer, dev_int2_ptr_buffer, - dev_char_ptr_buffer, dev_char_buffer, dev_grad_ptr_buffer,gpu->gpu_sim.ffStart, gpu->gpu_sim.sqrQshell))) + dev_char_ptr_buffer, dev_char_buffer, dev_grad_ptr_buffer,gpu->gpu_sim.ffStart, gpu->gpu_sim.sqrQshell))); #endif } @@ -501,7 +496,7 @@ void get_oshell_eri_grad_ffff(_gpu_type gpu) // nvtxRangePushA("Gradient 2e"); // compute one electron gradients in the meantime - //get_oneen_grad_(); +// get_oneen_grad_(); // Part f-3 // if (gpu->maxL >= 3) { diff --git a/src/gpu/hip/gpu_get2e_grad_ffff.cuh b/src/gpu/hip/gpu_get2e_grad_ffff.cuh index 6b5b9982..a28cdbc9 100644 --- a/src/gpu/hip/gpu_get2e_grad_ffff.cuh +++ b/src/gpu/hip/gpu_get2e_grad_ffff.cuh @@ -11,612 +11,580 @@ #undef STOREDIM #if defined int_spdf4 -#undef VDIM3 -#define VDIM3 VDIM3_L -#define STOREDIM STOREDIM_XL -#define STORE_INIT 4 -#define STORE_DIM 80 -#define STORE_INIT_I_AA 4 -#define STORE_INIT_J_AA 10 -#define STORE_DIM_I_AA 80 -#define STORE_DIM_J_AA 110 - -#define STORE_INIT_I_CC 10 -#define STORE_INIT_J_CC 4 -#define STORE_DIM_I_CC 110 -#define STORE_DIM_J_CC 80 + #undef VDIM3 + #define VDIM3 VDIM3_L + #define STOREDIM STOREDIM_XL + #define STORE_INIT (4) + #define STORE_DIM (80) + #define STORE_INIT_I_AA (4) + #define STORE_INIT_J_AA (10) + #define STORE_DIM_I_AA (80) + #define STORE_DIM_J_AA (110) + + #define STORE_INIT_I_CC (10) + #define STORE_INIT_J_CC (4) + #define STORE_DIM_I_CC (110) + #define STORE_DIM_J_CC (80) #endif -#ifndef new_quick_2_gpu_get2e_subs_grad_h +#if !defined(new_quick_2_gpu_get2e_subs_grad_h) #define new_quick_2_gpu_get2e_subs_grad_h #undef STOREDIM #define STOREDIM STOREDIM_XL -#ifndef OSHELL -#define FMT_NAME FmT -#include "../gpu_fmt.h" +#if !defined(OSHELL) + #define FMT_NAME FmT + #include "../gpu_fmt.h" -__device__ __inline__ int lefthrr_2(const QUICKDouble RAx, const QUICKDouble RAy, const QUICKDouble RAz, - const QUICKDouble RBx, const QUICKDouble RBy, const QUICKDouble RBz, - const int KLMNAx, const int KLMNAy, const int KLMNAz, - const int KLMNBx, const int KLMNBy, const int KLMNBz, - const int IJTYPE,QUICKDouble* coefAngularL, unsigned char* const angularL, unsigned char* const smem_char) +__device__ static inline int lefthrr_2(const QUICKDouble RAx, const QUICKDouble RAy, const QUICKDouble RAz, + const QUICKDouble RBx, const QUICKDouble RBy, const QUICKDouble RBz, + const int KLMNAx, const int KLMNAy, const int KLMNAz, + const int KLMNBx, const int KLMNBy, const int KLMNBz, + QUICKDouble * coefAngularL, unsigned char * const angularL, unsigned char * const smem_char) { - int numAngularL; coefAngularL[0] = 1.0; - angularL[0] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + KLMNBx, KLMNAy + KLMNBy, KLMNAz + KLMNBz, TRANSDIM, TRANSDIM, TRANSDIM); - + angularL[0] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + KLMNBx, + KLMNAy + KLMNBy, KLMNAz + KLMNBz, TRANSDIM, TRANSDIM, TRANSDIM); + + if (KLMNBx == 2 || KLMNBy == 2 || KLMNBz == 2) { + numAngularL = 3; + QUICKDouble tmp; + + if (KLMNBx == 2) { + tmp = RAx - RBx; + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if(KLMNBy == 2) { + tmp = RAy - RBy; + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBz == 2) { + tmp = RAz - RBz; + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + } - if (KLMNBx == 2 || KLMNBy == 2 || KLMNBz == 2) { - numAngularL = 3; - QUICKDouble tmp; - - - if (KLMNBx == 2) { - tmp = RAx - RBx; - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if(KLMNBy == 2) { - tmp = RAy - RBy; - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBz == 2 ){ - tmp = RAz - RBz; - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - } - - coefAngularL[1] = 2 * tmp; - coefAngularL[2]= tmp * tmp; - - - angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - return numAngularL; - - }else{ - - numAngularL = 4; - QUICKDouble tmp, tmp2; - - if(KLMNBx == 1 && KLMNBy == 1){ - tmp = RAx - RBx; - tmp2 = RAy - RBy; - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - }else if (KLMNBx == 1 && KLMNBz == 1) { - tmp = RAx - RBx; - tmp2 = RAz - RBz; - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBy == 1 && KLMNBz == 1) { - tmp = RAy - RBy; - tmp2 = RAz - RBz; - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - } - - - coefAngularL[1] = tmp; - coefAngularL[2] = tmp2; - coefAngularL[3] = tmp * tmp2; - - - angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - return numAngularL; + coefAngularL[1] = 2 * tmp; + coefAngularL[2]= tmp * tmp; + + angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + + return numAngularL; + } else { + numAngularL = 4; + QUICKDouble tmp, tmp2; + + if (KLMNBx == 1 && KLMNBy == 1) { + tmp = RAx - RBx; + tmp2 = RAy - RBy; + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBx == 1 && KLMNBz == 1) { + tmp = RAx - RBx; + tmp2 = RAz - RBz; + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBy == 1 && KLMNBz == 1) { + tmp = RAy - RBy; + tmp2 = RAz - RBz; + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); } -} + coefAngularL[1] = tmp; + coefAngularL[2] = tmp2; + coefAngularL[3] = tmp * tmp2; + angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + + return numAngularL; + } +} -__device__ __inline__ int lefthrr(const QUICKDouble RAx, const QUICKDouble RAy, const QUICKDouble RAz, - const QUICKDouble RBx, const QUICKDouble RBy, const QUICKDouble RBz, - const int KLMNAx, const int KLMNAy, const int KLMNAz, - const int KLMNBx, const int KLMNBy, const int KLMNBz, - const int IJTYPE,QUICKDouble* coefAngularL, unsigned char* const angularL, unsigned char* -const smem_char) +__device__ static inline int lefthrr(const QUICKDouble RAx, const QUICKDouble RAy, const QUICKDouble RAz, + const QUICKDouble RBx, const QUICKDouble RBy, const QUICKDouble RBz, + const int KLMNAx, const int KLMNAy, const int KLMNAz, + const int KLMNBx, const int KLMNBy, const int KLMNBz, + QUICKDouble * coefAngularL, unsigned char * const angularL, unsigned char * const smem_char) { - - int numAngularL; - + coefAngularL[0] = 1.0; angularL[0] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + KLMNBx, KLMNAy + KLMNBy, KLMNAz + KLMNBz, TRANSDIM, TRANSDIM, TRANSDIM); - if (KLMNBx == 3 || KLMNBy == 3 || KLMNBz == 3) { - numAngularL = 4; - QUICKDouble tmp; - - if (KLMNBx == 3) { - tmp = RAx - RBx; - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBy == 3) { - tmp = RAy - RBy; - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBz == 3) { - tmp = RAz - RBz; - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - } - - - coefAngularL[1] = 3 * tmp; - coefAngularL[2] = 3 * tmp * tmp; - coefAngularL[3] = tmp * tmp * tmp; - - - angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - return numAngularL; - }else if (KLMNBx == 1 && KLMNBy == 1) { - numAngularL = 8; - QUICKDouble tmp = RAx - RBx; - QUICKDouble tmp2 = RAy - RBy; - QUICKDouble tmp3 = RAz - RBz; - - coefAngularL[1] = tmp; - coefAngularL[2] = tmp2; - coefAngularL[3] = tmp3; - coefAngularL[4] = tmp * tmp2; - coefAngularL[5] = tmp * tmp3; - coefAngularL[6] = tmp2 * tmp3; - coefAngularL[7] = tmp * tmp2 * tmp3; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - return numAngularL; - }else{ - - numAngularL = 6; - QUICKDouble tmp; - QUICKDouble tmp2; - - if (KLMNBx == 1) { - tmp = RAx - RBx; - }else if (KLMNBy == 1){ - tmp = RAy - RBy; - }else if (KLMNBz == 1){ - tmp = RAz - RBz; + if (KLMNBx == 3 || KLMNBy == 3 || KLMNBz == 3) { + numAngularL = 4; + QUICKDouble tmp; + + if (KLMNBx == 3) { + tmp = RAx - RBx; + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBy == 3) { + tmp = RAy - RBy; + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBz == 3) { + tmp = RAz - RBz; + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + } + + coefAngularL[1] = 3 * tmp; + coefAngularL[2] = 3 * tmp * tmp; + coefAngularL[3] = tmp * tmp * tmp; + + angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBx == 1 && KLMNBy == 1) { + numAngularL = 8; + QUICKDouble tmp = RAx - RBx; + QUICKDouble tmp2 = RAy - RBy; + QUICKDouble tmp3 = RAz - RBz; + + coefAngularL[1] = tmp; + coefAngularL[2] = tmp2; + coefAngularL[3] = tmp3; + coefAngularL[4] = tmp * tmp2; + coefAngularL[5] = tmp * tmp3; + coefAngularL[6] = tmp2 * tmp3; + coefAngularL[7] = tmp * tmp2 * tmp3; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + + angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else { + numAngularL = 6; + QUICKDouble tmp; + QUICKDouble tmp2; + + if (KLMNBx == 1) { + tmp = RAx - RBx; + } else if (KLMNBy == 1) { + tmp = RAy - RBy; + } else if (KLMNBz == 1) { + tmp = RAz - RBz; + } + + if (KLMNBx == 2) { + tmp2 = RAx - RBx; + } else if (KLMNBy == 2) { + tmp2 = RAy - RBy; + } else if (KLMNBz == 2) { + tmp2 = RAz - RBz; + } + + coefAngularL[1] = tmp; + coefAngularL[2] = 2 * tmp2; + coefAngularL[3] = 2 * tmp * tmp2; + coefAngularL[4] = tmp2 * tmp2; + coefAngularL[5] = tmp * tmp2 * tmp2; + + if (KLMNBx == 2) { + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } + + if (KLMNBy == 2) { + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } + + if (KLMNBz == 2) { + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + } + + if (KLMNBx == 1) { + // 120 + if (KLMNBy == 2) { + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + // 102 + } else { + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); } - + } + + if (KLMNBy == 1) { + // 210 if (KLMNBx == 2) { - tmp2 = RAx - RBx; - }else if (KLMNBy == 2){ - tmp2 = RAy - RBy; - }else if (KLMNBz == 2){ - tmp2 = RAz - RBz; + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + // 012 + } else { + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); } - - coefAngularL[1] = tmp; - coefAngularL[2] = 2 * tmp2; - coefAngularL[3] = 2 * tmp * tmp2; - coefAngularL[4] = tmp2 * tmp2; - coefAngularL[5] = tmp * tmp2 * tmp2; - - + } + + if (KLMNBz == 1) { + // 201 if (KLMNBx == 2) { - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - } - - if (KLMNBy == 2) { - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - } - - if (KLMNBz == 2) { - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - } - - if (KLMNBx == 1) { - if (KLMNBy == 2) { //120 - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else{ //102 - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - } - } - - if (KLMNBy == 1) { - if (KLMNBx == 2) { // 210 - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else{ // 012 - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - } - } - - if (KLMNBz == 1) { - if (KLMNBx == 2) { // 201 - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - }else{ // 021 - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - } - } - - - if (KLMNBx == 1) { - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + // 021 + } else { + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); } - - if (KLMNBy == 1) { - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - } - - if (KLMNBz == 1) { - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - } - - angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - return numAngularL; - - - } + + if (KLMNBx == 1) { + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } + + if (KLMNBy == 1) { + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } + + if (KLMNBz == 1) { + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + } + + angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } + + return numAngularL; } -__device__ __inline__ int lefthrr_4(const QUICKDouble RAx, const QUICKDouble RAy, const QUICKDouble RAz, - const QUICKDouble RBx, const QUICKDouble RBy, const QUICKDouble RBz, - const int KLMNAx, const int KLMNAy, const int KLMNAz, - const int KLMNBx, const int KLMNBy, const int KLMNBz, - const int IJTYPE,QUICKDouble* coefAngularL, unsigned char* const angularL, unsigned char* -const smem_char) +__device__ static inline int lefthrr_4(const QUICKDouble RAx, const QUICKDouble RAy, const QUICKDouble RAz, + const QUICKDouble RBx, const QUICKDouble RBy, const QUICKDouble RBz, + const int KLMNAx, const int KLMNAy, const int KLMNAz, + const int KLMNBx, const int KLMNBy, const int KLMNBz, + QUICKDouble * coefAngularL, unsigned char * const angularL, unsigned char * const smem_char) { - int numAngularL; coefAngularL[0] = 1.0; - angularL[0] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + KLMNBx, KLMNAy + KLMNBy, KLMNAz + KLMNBz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[0] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + KLMNBx, + KLMNAy + KLMNBy, KLMNAz + KLMNBz, TRANSDIM, TRANSDIM, TRANSDIM); + + if (KLMNBx == 4) { + numAngularL = 5; + QUICKDouble tmp = RAx - RBx; + + coefAngularL[1] = 4 * tmp; + coefAngularL[2] = 6 * tmp * tmp; + coefAngularL[3] = 4 * tmp * tmp * tmp; + coefAngularL[4] = tmp * tmp * tmp * tmp; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 3, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBy == 4) { + numAngularL = 5; + QUICKDouble tmp = RAy - RBy; + coefAngularL[1] = 4 * tmp; + coefAngularL[2] = 6 * tmp * tmp; + coefAngularL[3] = 4 * tmp * tmp * tmp; + coefAngularL[4] = tmp * tmp * tmp * tmp; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 3, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBz == 4) { + numAngularL = 5; + + QUICKDouble tmp = RAz - RBz; + coefAngularL[1] = 4 * tmp; + coefAngularL[2] = 6 * tmp * tmp; + coefAngularL[3] = 4 * tmp * tmp * tmp; + coefAngularL[4] = tmp * tmp * tmp * tmp; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 3, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBx == 1 && KLMNBy == 3) { + numAngularL = 8; + QUICKDouble tmp = RAx - RBx; + QUICKDouble tmp2 = RAy - RBy; + + coefAngularL[1] = tmp; + coefAngularL[2] = 3 * tmp2; + coefAngularL[3] = 3 * tmp * tmp2; + coefAngularL[4] = 3 * tmp2 * tmp2; + coefAngularL[5] = 3 * tmp * tmp2 * tmp2; + coefAngularL[6] = tmp2 * tmp2 * tmp2; + coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 3, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBx == 3 && KLMNBy == 1) { + numAngularL = 8; + QUICKDouble tmp = RAy - RBy; + QUICKDouble tmp2 = RAx - RBx; + + coefAngularL[1] = tmp; + coefAngularL[2] = 3 * tmp2; + coefAngularL[3] = 3 * tmp * tmp2; + coefAngularL[4] = 3 * tmp2 * tmp2; + coefAngularL[5] = 3 * tmp * tmp2 * tmp2; + coefAngularL[6] = tmp2 * tmp2 * tmp2; + coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 3, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBx == 1 && KLMNBz == 3) { + numAngularL = 8; + QUICKDouble tmp = RAx - RBx; + QUICKDouble tmp2 = RAz - RBz; + + coefAngularL[1] = tmp; + coefAngularL[2] = 3 * tmp2; + coefAngularL[3] = 3 * tmp * tmp2; + coefAngularL[4] = 3 * tmp2 * tmp2; + coefAngularL[5] = 3 * tmp * tmp2 * tmp2; + coefAngularL[6] = tmp2 * tmp2 * tmp2; + coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 3, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + + } else if (KLMNBx == 3 && KLMNBz == 1) { + numAngularL = 8; + QUICKDouble tmp = RAz - RBz; + QUICKDouble tmp2 = RAx - RBx; + + coefAngularL[1] = tmp; + coefAngularL[2] = 3 * tmp2; + coefAngularL[3] = 3 * tmp * tmp2; + coefAngularL[4] = 3 * tmp2 * tmp2; + coefAngularL[5] = 3 * tmp * tmp2 * tmp2; + coefAngularL[6] = tmp2 * tmp2 * tmp2; + coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 3, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBy == 1 && KLMNBz == 3) { + numAngularL = 8; + QUICKDouble tmp = RAy - RBy; + QUICKDouble tmp2 = RAz - RBz; + + coefAngularL[1] = tmp; + coefAngularL[2] = 3 * tmp2; + coefAngularL[3] = 3 * tmp * tmp2; + coefAngularL[4] = 3 * tmp2 * tmp2; + coefAngularL[5] = 3 * tmp * tmp2 * tmp2; + coefAngularL[6] = tmp2 * tmp2 * tmp2; + coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+3, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBy == 3 && KLMNBz == 1) { + numAngularL = 8; + QUICKDouble tmp = RAz - RBz; + QUICKDouble tmp2 = RAy - RBy; + + coefAngularL[1] = tmp; + coefAngularL[2] = 3 * tmp2; + coefAngularL[3] = 3 * tmp * tmp2; + coefAngularL[4] = 3 * tmp2 * tmp2; + coefAngularL[5] = 3 * tmp * tmp2 * tmp2; + coefAngularL[6] = tmp2 * tmp2 * tmp2; + coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 3, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBx == 2 && KLMNBy == 2) { + numAngularL = 9; + QUICKDouble tmp = RAx - RBx; + QUICKDouble tmp2 = RAy - RBy; + + coefAngularL[1] = 2 * tmp; + coefAngularL[2] = 2 * tmp2; + coefAngularL[3] = 4 * tmp * tmp2; + coefAngularL[4] = tmp * tmp; + coefAngularL[5] = tmp2 * tmp2; + coefAngularL[6] = 2 * tmp * tmp2 * tmp2; + coefAngularL[7] = 2 * tmp * tmp * tmp2; + coefAngularL[8] = tmp * tmp * tmp2 * tmp2; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBx == 2 && KLMNBz == 2) { + numAngularL = 9; + QUICKDouble tmp = RAx - RBx; + QUICKDouble tmp2 = RAz - RBz; + + coefAngularL[1] = 2 * tmp; + coefAngularL[2] = 2 * tmp2; + coefAngularL[3] = 4 * tmp * tmp2; + coefAngularL[4] = tmp * tmp; + coefAngularL[5] = tmp2 * tmp2; + coefAngularL[6] = 2 * tmp * tmp2 * tmp2; + coefAngularL[7] = 2 * tmp * tmp * tmp2; + coefAngularL[8] = tmp * tmp * tmp2 * tmp2; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBy == 2 && KLMNBz == 2) { + numAngularL = 9; + QUICKDouble tmp = RAy - RBy; + QUICKDouble tmp2 = RAz - RBz; + + coefAngularL[1] = 2 * tmp; + coefAngularL[2] = 2 * tmp2; + coefAngularL[3] = 4 * tmp * tmp2; + coefAngularL[4] = tmp * tmp; + coefAngularL[5] = tmp2 * tmp2; + coefAngularL[6] = 2 * tmp * tmp2 * tmp2; + coefAngularL[7] = 2 * tmp * tmp * tmp2; + coefAngularL[8] = tmp * tmp * tmp2 * tmp2; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBx == 1 && KLMNBy == 1 && KLMNBz == 2) { + numAngularL = 12; + QUICKDouble tmp = RAx - RBx; + QUICKDouble tmp2 = RAy - RBy; + QUICKDouble tmp3 = RAz - RBz; + + coefAngularL[1] = tmp; + coefAngularL[2] = tmp2; + coefAngularL[3] = 2 * tmp3; + coefAngularL[4] = tmp * tmp2; + coefAngularL[5] = 2 * tmp * tmp3; + coefAngularL[6] = 2 * tmp2 * tmp3; + coefAngularL[7] = tmp3 * tmp3; + coefAngularL[8] = 2 * tmp * tmp2 * tmp3; + coefAngularL[9] = tmp * tmp3 * tmp3; + coefAngularL[10] = tmp2 * tmp3 * tmp3; + coefAngularL[11] = tmp * tmp2 * tmp3 * tmp3; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 2, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[8] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[9] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[10] =LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBx == 1 && KLMNBz == 1 && KLMNBy == 2) { + numAngularL = 12; + QUICKDouble tmp = RAx - RBx; + QUICKDouble tmp2 = RAz - RBz; + QUICKDouble tmp3 = RAy - RBy; + + coefAngularL[1] = tmp; + coefAngularL[2] = tmp2; + coefAngularL[3] = 2 * tmp3; + coefAngularL[4] = tmp * tmp2; + coefAngularL[5] = 2 * tmp * tmp3; + coefAngularL[6] = 2 * tmp2 * tmp3; + coefAngularL[7] = tmp3 * tmp3; + coefAngularL[8] = 2 * tmp * tmp2 * tmp3; + coefAngularL[9] = tmp * tmp3 * tmp3; + coefAngularL[10] = tmp2 * tmp3 * tmp3; + coefAngularL[11] = tmp * tmp2 * tmp3 * tmp3; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[8] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[9] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[10] =LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } else if (KLMNBy == 1 && KLMNBz == 1 && KLMNBx == 2) { + numAngularL = 12; + QUICKDouble tmp = RAy - RBy; + QUICKDouble tmp2 = RAz - RBz; + QUICKDouble tmp3 = RAx - RBx; + + coefAngularL[1] = tmp; + coefAngularL[2] = tmp2; + coefAngularL[3] = 2 * tmp3; + coefAngularL[4] = tmp * tmp2; + coefAngularL[5] = 2 * tmp * tmp3; + coefAngularL[6] = 2 * tmp2 * tmp3; + coefAngularL[7] = tmp3 * tmp3; + coefAngularL[8] = 2 * tmp * tmp2 * tmp3; + coefAngularL[9] = tmp * tmp3 * tmp3; + coefAngularL[10] = tmp2 * tmp3 * tmp3; + coefAngularL[11] = tmp * tmp2 * tmp3 * tmp3; + + angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[8] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx + 1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[9] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz + 1, TRANSDIM, TRANSDIM, TRANSDIM); + angularL[10] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy + 1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); + } - if (KLMNBx == 4) { - numAngularL = 5; - QUICKDouble tmp = RAx - RBx; - - coefAngularL[1] = 4 * tmp; - coefAngularL[2] = 6 * tmp * tmp; - coefAngularL[3] = 4 * tmp * tmp * tmp; - coefAngularL[4] = tmp * tmp * tmp * tmp; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+3, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBy == 4) { - numAngularL = 5; - QUICKDouble tmp = RAy - RBy; - coefAngularL[1] = 4 * tmp; - coefAngularL[2] = 6 * tmp * tmp; - coefAngularL[3] = 4 * tmp * tmp * tmp; - coefAngularL[4] = tmp * tmp * tmp * tmp; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+3, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - }else if (KLMNBz == 4) { - numAngularL = 5; - - QUICKDouble tmp = RAz - RBz; - coefAngularL[1] = 4 * tmp; - coefAngularL[2] = 6 * tmp * tmp; - coefAngularL[3] = 4 * tmp * tmp * tmp; - coefAngularL[4] = tmp * tmp * tmp * tmp; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+3, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - - }else if (KLMNBx == 1 && KLMNBy == 3) { - numAngularL = 8; - QUICKDouble tmp = RAx - RBx; - QUICKDouble tmp2 = RAy - RBy; - - coefAngularL[1] = tmp; - coefAngularL[2] = 3 * tmp2; - coefAngularL[3] = 3 * tmp * tmp2; - coefAngularL[4] = 3 * tmp2 * tmp2; - coefAngularL[5] = 3 * tmp * tmp2 * tmp2; - coefAngularL[6] = tmp2 * tmp2 * tmp2; - coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+3, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBx == 3 && KLMNBy == 1) { - numAngularL = 8; - QUICKDouble tmp = RAy - RBy; - QUICKDouble tmp2 = RAx - RBx; - - coefAngularL[1] = tmp; - coefAngularL[2] = 3 * tmp2; - coefAngularL[3] = 3 * tmp * tmp2; - coefAngularL[4] = 3 * tmp2 * tmp2; - coefAngularL[5] = 3 * tmp * tmp2 * tmp2; - coefAngularL[6] = tmp2 * tmp2 * tmp2; - coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+3, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - } - - else if (KLMNBx == 1 && KLMNBz ==3) { - numAngularL = 8; - QUICKDouble tmp = RAx - RBx; - QUICKDouble tmp2 = RAz - RBz; - - coefAngularL[1] = tmp; - coefAngularL[2] = 3 * tmp2; - coefAngularL[3] = 3 * tmp * tmp2; - coefAngularL[4] = 3 * tmp2 * tmp2; - coefAngularL[5] = 3 * tmp * tmp2 * tmp2; - coefAngularL[6] = tmp2 * tmp2 * tmp2; - coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+3, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - }else if (KLMNBx == 3 && KLMNBz == 1) { - numAngularL = 8; - QUICKDouble tmp = RAz - RBz; - QUICKDouble tmp2 = RAx - RBx; - - coefAngularL[1] = tmp; - coefAngularL[2] = 3 * tmp2; - coefAngularL[3] = 3 * tmp * tmp2; - coefAngularL[4] = 3 * tmp2 * tmp2; - coefAngularL[5] = 3 * tmp * tmp2 * tmp2; - coefAngularL[6] = tmp2 * tmp2 * tmp2; - coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+3, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - - }else if (KLMNBy == 1 && KLMNBz == 3) { - numAngularL = 8; - QUICKDouble tmp = RAy - RBy; - QUICKDouble tmp2 = RAz - RBz; - - coefAngularL[1] = tmp; - coefAngularL[2] = 3 * tmp2; - coefAngularL[3] = 3 * tmp * tmp2; - coefAngularL[4] = 3 * tmp2 * tmp2; - coefAngularL[5] = 3 * tmp * tmp2 * tmp2; - coefAngularL[6] = tmp2 * tmp2 * tmp2; - coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+3, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - - }else if (KLMNBy == 3 && KLMNBz == 1) { - numAngularL = 8; - QUICKDouble tmp = RAz - RBz; - QUICKDouble tmp2 = RAy - RBy; - - coefAngularL[1] = tmp; - coefAngularL[2] = 3 * tmp2; - coefAngularL[3] = 3 * tmp * tmp2; - coefAngularL[4] = 3 * tmp2 * tmp2; - coefAngularL[5] = 3 * tmp * tmp2 * tmp2; - coefAngularL[6] = tmp2 * tmp2 * tmp2; - coefAngularL[7] = tmp * tmp2 * tmp2 * tmp2; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+3, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - - }else if (KLMNBx == 2 && KLMNBy == 2) { - numAngularL = 9; - QUICKDouble tmp = RAx - RBx; - QUICKDouble tmp2 = RAy - RBy; - - coefAngularL[1] = 2 * tmp; - coefAngularL[2] = 2 * tmp2; - coefAngularL[3] = 4 * tmp * tmp2; - coefAngularL[4] = tmp * tmp; - coefAngularL[5] = tmp2 * tmp2; - coefAngularL[6] = 2 * tmp * tmp2 * tmp2; - coefAngularL[7] = 2 * tmp * tmp * tmp2; - coefAngularL[8] = tmp * tmp * tmp2 * tmp2; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBx == 2 && KLMNBz == 2) { - numAngularL = 9; - QUICKDouble tmp = RAx - RBx; - QUICKDouble tmp2 = RAz - RBz; - - coefAngularL[1] = 2 * tmp; - coefAngularL[2] = 2 * tmp2; - coefAngularL[3] = 4 * tmp * tmp2; - coefAngularL[4] = tmp * tmp; - coefAngularL[5] = tmp2 * tmp2; - coefAngularL[6] = 2 * tmp * tmp2 * tmp2; - coefAngularL[7] = 2 * tmp * tmp * tmp2; - coefAngularL[8] = tmp * tmp * tmp2 * tmp2; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBy == 2 && KLMNBz == 2) { - numAngularL = 9; - QUICKDouble tmp = RAy - RBy; - QUICKDouble tmp2 = RAz - RBz; - - coefAngularL[1] = 2 * tmp; - coefAngularL[2] = 2 * tmp2; - coefAngularL[3] = 4 * tmp * tmp2; - coefAngularL[4] = tmp * tmp; - coefAngularL[5] = tmp2 * tmp2; - coefAngularL[6] = 2 * tmp * tmp2 * tmp2; - coefAngularL[7] = 2 * tmp * tmp * tmp2; - coefAngularL[8] = tmp * tmp * tmp2 * tmp2; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBx == 1 && KLMNBy == 1 && KLMNBz == 2) { - numAngularL = 12; - QUICKDouble tmp = RAx - RBx; - QUICKDouble tmp2 = RAy - RBy; - QUICKDouble tmp3 = RAz - RBz; - - coefAngularL[1] = tmp; - coefAngularL[2] = tmp2; - coefAngularL[3] = 2 * tmp3; - coefAngularL[4] = tmp * tmp2; - coefAngularL[5] = 2 * tmp * tmp3; - coefAngularL[6] = 2 * tmp2 * tmp3; - coefAngularL[7] = tmp3 * tmp3; - coefAngularL[8] = 2 * tmp * tmp2 * tmp3; - coefAngularL[9] = tmp * tmp3 * tmp3; - coefAngularL[10] = tmp2 * tmp3 * tmp3; - coefAngularL[11] = tmp * tmp2 * tmp3 * tmp3; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+2, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[8] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[9] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[10] =LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBx == 1 && KLMNBz == 1 && KLMNBy == 2) { - numAngularL = 12; - QUICKDouble tmp = RAx - RBx; - QUICKDouble tmp2 = RAz - RBz; - QUICKDouble tmp3 = RAy - RBy; - - coefAngularL[1] = tmp; - coefAngularL[2] = tmp2; - coefAngularL[3] = 2 * tmp3; - coefAngularL[4] = tmp * tmp2; - coefAngularL[5] = 2 * tmp * tmp3; - coefAngularL[6] = 2 * tmp2 * tmp3; - coefAngularL[7] = tmp3 * tmp3; - coefAngularL[8] = 2 * tmp * tmp2 * tmp3; - coefAngularL[9] = tmp * tmp3 * tmp3; - coefAngularL[10] = tmp2 * tmp3 * tmp3; - coefAngularL[11] = tmp * tmp2 * tmp3 * tmp3; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+2, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[8] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[9] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[10] =LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - }else if (KLMNBy == 1 && KLMNBz == 1 && KLMNBx == 2) { - numAngularL = 12; - QUICKDouble tmp = RAy - RBy; - QUICKDouble tmp2 = RAz - RBz; - QUICKDouble tmp3 = RAx - RBx; - - coefAngularL[1] = tmp; - coefAngularL[2] = tmp2; - coefAngularL[3] = 2 * tmp3; - coefAngularL[4] = tmp * tmp2; - coefAngularL[5] = 2 * tmp * tmp3; - coefAngularL[6] = 2 * tmp2 * tmp3; - coefAngularL[7] = tmp3 * tmp3; - coefAngularL[8] = 2 * tmp * tmp2 * tmp3; - coefAngularL[9] = tmp * tmp3 * tmp3; - coefAngularL[10] = tmp2 * tmp3 * tmp3; - coefAngularL[11] = tmp * tmp2 * tmp3 * tmp3; - - angularL[1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[2] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[3] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[4] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+2, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[5] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[6] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[7] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[8] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx+1, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[9] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz+1, TRANSDIM, TRANSDIM, TRANSDIM); - angularL[10] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy+1, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); - } - angularL[numAngularL - 1] = LOC3(DEV_SIM_CHAR_TRANS, KLMNAx, KLMNAy, KLMNAz, TRANSDIM, TRANSDIM, TRANSDIM); return numAngularL; - - } - - - #endif -__device__ __forceinline__ void hrrwholegrad2_ffff(QUICKDouble* const Yaax, QUICKDouble* const Yaay, QUICKDouble* const Yaaz, \ - QUICKDouble* const Ybbx, QUICKDouble* const Ybby, QUICKDouble* const Ybbz, \ - QUICKDouble* const Yccx, QUICKDouble* const Yccy, QUICKDouble* const Yccz, \ - const int I, const int J, const int K, const int L, \ - const int III, int JJJ, const int KKK, const int LLL, const int IJKLTYPE, - const QUICKDouble* store, const QUICKDouble* storeAA, const QUICKDouble* storeBB, const QUICKDouble* storeCC, \ - const QUICKDouble RAx, const QUICKDouble RAy, const QUICKDouble RAz, \ - const QUICKDouble RBx, const QUICKDouble RBy, const QUICKDouble RBz, \ - const QUICKDouble RCx, const QUICKDouble RCy, const QUICKDouble RCz, \ - const QUICKDouble RDx, const QUICKDouble RDy, const QUICKDouble RDz, int* const smem_int, -int** const smem_int_ptr, QUICKDouble** const smem_dbl_ptr, unsigned char** const smem_char_ptr, unsigned char* const smem_char) +__device__ static inline void hrrwholegrad2_ffff(QUICKDouble* const Yaax, QUICKDouble* const Yaay, QUICKDouble* const Yaaz, + QUICKDouble* const Ybbx, QUICKDouble* const Ybby, QUICKDouble* const Ybbz, + QUICKDouble* const Yccx, QUICKDouble* const Yccy, QUICKDouble* const Yccz, + const int III, int JJJ, const int KKK, const int LLL, + const QUICKDouble* store, const QUICKDouble* storeAA, const QUICKDouble* storeBB, const QUICKDouble* storeCC, + const QUICKDouble RAx, const QUICKDouble RAy, const QUICKDouble RAz, + const QUICKDouble RBx, const QUICKDouble RBy, const QUICKDouble RBz, + const QUICKDouble RCx, const QUICKDouble RCy, const QUICKDouble RCz, + const QUICKDouble RDx, const QUICKDouble RDy, const QUICKDouble RDz, int* const smem_int, + int** const smem_int_ptr, QUICKDouble** const smem_dbl_ptr, + unsigned char** const smem_char_ptr, unsigned char* const smem_char) { unsigned char angularL[12], angularR[12]; QUICKDouble coefAngularL[12], coefAngularR[12]; - + *Yaax = 0.0; *Yaay = 0.0; *Yaaz = 0.0; @@ -626,783 +594,869 @@ int** const smem_int_ptr, QUICKDouble** const smem_dbl_ptr, unsigned char** cons *Yccx = 0.0; *Yccy = 0.0; *Yccz = 0.0; - - QUICKDouble constant = DEV_SIM_DBL_PTR_CONS[III-1] * DEV_SIM_DBL_PTR_CONS[JJJ-1] * DEV_SIM_DBL_PTR_CONS[KKK-1] * DEV_SIM_DBL_PTR_CONS[LLL-1]; + + QUICKDouble constant = DEV_SIM_DBL_PTR_CONS[III - 1] * DEV_SIM_DBL_PTR_CONS[JJJ - 1] + * DEV_SIM_DBL_PTR_CONS[KKK - 1] * DEV_SIM_DBL_PTR_CONS[LLL - 1]; int numAngularL, numAngularR; - - numAngularR = lefthrr(RCx, RCy, RCz, RDx, RDy, RDz, \ - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,KKK-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,KKK-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,KKK-1,3,DEV_SIM_INT_NBASIS), \ - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,LLL-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,LLL-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,LLL-1,3,DEV_SIM_INT_NBASIS), \ - L, coefAngularR, angularR, smem_char); - - - // Part A - x - - numAngularL = lefthrr(RAx, RAy, RAz, RBx, RBy, RBz, \ - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,III-1,3,DEV_SIM_INT_NBASIS) + 1, LOC2(DEV_SIM_CHAR_PTR_KLMN,1,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,III-1,3,DEV_SIM_INT_NBASIS), \ - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,JJJ-1,3,DEV_SIM_INT_NBASIS), \ - J, coefAngularL, angularL, smem_char); + numAngularR = lefthrr(RCx, RCy, RCz, RDx, RDy, RDz, + LOC2(DEV_SIM_CHAR_PTR_KLMN, 0, KKK - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 1, KKK - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 2, KKK - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 0, LLL - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 1, LLL - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 2, LLL - 1, 3, DEV_SIM_INT_NBASIS), + coefAngularR, angularR, smem_char); - for (int i = 0; i= 1) { - + + if (LOC2(DEV_SIM_CHAR_PTR_KLMN, 0, III - 1, 3, DEV_SIM_INT_NBASIS) >= 1) { numAngularL = lefthrr(RAx, RAy, RAz, RBx, RBy, RBz, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,III-1,3,DEV_SIM_INT_NBASIS) - 1, LOC2(DEV_SIM_CHAR_PTR_KLMN,1,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,III-1,3,DEV_SIM_INT_NBASIS), - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,JJJ-1,3,DEV_SIM_INT_NBASIS), - J, coefAngularL, angularL, smem_char); - for (int i = 0; i= 1) { - + + if (LOC2(DEV_SIM_CHAR_PTR_KLMN, 1, III - 1, 3, DEV_SIM_INT_NBASIS) >= 1) { numAngularL = lefthrr(RAx, RAy, RAz, RBx, RBy, RBz, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,III-1,3,DEV_SIM_INT_NBASIS) - 1, LOC2(DEV_SIM_CHAR_PTR_KLMN,2,III-1,3,DEV_SIM_INT_NBASIS), - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,JJJ-1,3,DEV_SIM_INT_NBASIS), - J, coefAngularL, angularL, smem_char); - for (int i = 0; i= 1) { - + + if (LOC2(DEV_SIM_CHAR_PTR_KLMN, 2, III - 1, 3, DEV_SIM_INT_NBASIS) >= 1) { numAngularL = lefthrr(RAx, RAy, RAz, RBx, RBy, RBz, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,III-1,3,DEV_SIM_INT_NBASIS) - 1, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,JJJ-1,3,DEV_SIM_INT_NBASIS), - J, coefAngularL, angularL, smem_char); - for (int i = 0; i= 1) { - + + if (LOC2(DEV_SIM_CHAR_PTR_KLMN, 0, JJJ - 1, 3, DEV_SIM_INT_NBASIS) >= 1) { numAngularL = lefthrr_2(RAx, RAy, RAz, RBx, RBy, RBz, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,III-1,3,DEV_SIM_INT_NBASIS), - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,JJJ-1,3,DEV_SIM_INT_NBASIS) - 1, LOC2(DEV_SIM_CHAR_PTR_KLMN,1,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,JJJ-1,3,DEV_SIM_INT_NBASIS), - J - 1, coefAngularL, angularL, smem_char); - for (int i = 0; i= 1) { - + + if (LOC2(DEV_SIM_CHAR_PTR_KLMN, 1, JJJ - 1, 3, DEV_SIM_INT_NBASIS) >= 1) { numAngularL = lefthrr_2(RAx, RAy, RAz, RBx, RBy, RBz, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,III-1,3,DEV_SIM_INT_NBASIS), - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,JJJ-1,3,DEV_SIM_INT_NBASIS) - 1, LOC2(DEV_SIM_CHAR_PTR_KLMN,2,JJJ-1,3,DEV_SIM_INT_NBASIS), - J - 1, coefAngularL, angularL, smem_char); - for (int i = 0; i= 1) { - + + if (LOC2(DEV_SIM_CHAR_PTR_KLMN, 2, JJJ - 1, 3, DEV_SIM_INT_NBASIS) >= 1) { numAngularL = lefthrr_2(RAx, RAy, RAz, RBx, RBy, RBz, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,III-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,III-1,3,DEV_SIM_INT_NBASIS), - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,JJJ-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,JJJ-1,3,DEV_SIM_INT_NBASIS) - 1, - J - 1, coefAngularL, angularL, smem_char); + LOC2(DEV_SIM_CHAR_PTR_KLMN, 0, III - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 1, III - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 2, III - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 0, JJJ - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 1, JJJ - 1, 3, DEV_SIM_INT_NBASIS), + LOC2(DEV_SIM_CHAR_PTR_KLMN, 2, JJJ - 1, 3, DEV_SIM_INT_NBASIS) - 1, + coefAngularL, angularL, smem_char); + for (int i = 0; i= 1) { - + + if (LOC2(DEV_SIM_CHAR_PTR_KLMN, 0, KKK - 1, 3, DEV_SIM_INT_NBASIS) >= 1) { numAngularR = lefthrr(RCx, RCy, RCz, RDx, RDy, RDz, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,KKK-1,3,DEV_SIM_INT_NBASIS) - 1, LOC2(DEV_SIM_CHAR_PTR_KLMN,1,KKK-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,KKK-1,3,DEV_SIM_INT_NBASIS), - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,LLL-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,LLL-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,LLL-1,3,DEV_SIM_INT_NBASIS), - L, coefAngularR, angularR, smem_char); - - for (int i = 0; i= 1) { - + + if (LOC2(DEV_SIM_CHAR_PTR_KLMN, 1, KKK - 1, 3, DEV_SIM_INT_NBASIS) >= 1) { numAngularR = lefthrr(RCx, RCy, RCz, RDx, RDy, RDz, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,KKK-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,KKK-1,3,DEV_SIM_INT_NBASIS) - 1, LOC2(DEV_SIM_CHAR_PTR_KLMN,2,KKK-1,3,DEV_SIM_INT_NBASIS), - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,LLL-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,LLL-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,LLL-1,3,DEV_SIM_INT_NBASIS), - L, coefAngularR, angularR, smem_char); - - for (int i = 0; i= 1) { - + + if (LOC2(DEV_SIM_CHAR_PTR_KLMN, 2, KKK - 1, 3, DEV_SIM_INT_NBASIS) >= 1) { numAngularR = lefthrr(RCx, RCy, RCz, RDx, RDy, RDz, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,KKK-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,KKK-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,KKK-1,3,DEV_SIM_INT_NBASIS) - 1, - LOC2(DEV_SIM_CHAR_PTR_KLMN,0,LLL-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,1,LLL-1,3,DEV_SIM_INT_NBASIS), LOC2(DEV_SIM_CHAR_PTR_KLMN,2,LLL-1,3,DEV_SIM_INT_NBASIS), - L, coefAngularR, angularR, smem_char); - - for (int i = 0; i ---> - -> expo(I) * xyz (I) + expo(J) * xyz(J) - P = --------------------------------------- - expo(I) + expo(J) - Those two are pre-calculated in CPU stage. - - */ + In the following comments, we have I, J, K, L denote the primitive gaussian function we use, and + for example, expo(III, ksumtype(II)) stands for the expo for the IIIth primitive guassian function for II shell, + we use I to express the corresponding index. + AB = expo(I)+expo(J) + ---> ---> + -> expo(I) * xyz (I) + expo(J) * xyz(J) + P = --------------------------------------- + expo(I) + expo(J) + Those two are pre-calculated in CPU stage. + + */ int ii_start = DEV_SIM_INT_PTR_PRIM_START[II]; int jj_start = DEV_SIM_INT_PTR_PRIM_START[JJ]; - - QUICKDouble AA = LOC2(DEV_SIM_DBL_PTR_GCEXPO, III , DEV_SIM_INT_PTR_KSUMTYPE[II] - 1, MAXPRIM, DEV_SIM_INT_NBASIS); - QUICKDouble BB = LOC2(DEV_SIM_DBL_PTR_GCEXPO, JJJ , DEV_SIM_INT_PTR_KSUMTYPE[JJ] - 1, MAXPRIM, DEV_SIM_INT_NBASIS); - - QUICKDouble AB = LOC2(DEV_SIM_DBL_PTR_EXPOSUM, ii_start+III, jj_start+JJJ, DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - QUICKDouble Px = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERX, ii_start+III, jj_start+JJJ, DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - QUICKDouble Py = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERY, ii_start+III, jj_start+JJJ, DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - QUICKDouble Pz = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERZ, ii_start+III, jj_start+JJJ, DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - - /* - X1 is the contracted coeffecient, which is pre-calcuated in CPU stage as well. - cutoffprim is used to cut too small prim gaussian function when bring density matrix into consideration. - */ - QUICKDouble cutoffPrim = DNMax * LOC2(DEV_SIM_DBL_PTR_CUTPRIM, kStartI+III, kStartJ+JJJ, DEV_SIM_INT_JBASIS, DEV_SIM_INT_JBASIS); - QUICKDouble X1 = LOC4(DEV_SIM_DBL_PTR_XCOEFF, kStartI+III, kStartJ+JJJ, I - DEV_SIM_INT_PTR_QSTART[II], J - DEV_SIM_INT_PTR_QSTART[JJ], DEV_SIM_INT_JBASIS, DEV_SIM_INT_JBASIS, 2, 2); - - - for (int j = 0; j DEV_SIM_DBL_PRIMLIMIT) { - - QUICKDouble CC = LOC2(DEV_SIM_DBL_PTR_GCEXPO, KKK , DEV_SIM_INT_PTR_KSUMTYPE[KK] - 1, MAXPRIM, DEV_SIM_INT_NBASIS); - /* - CD = expo(L)+expo(K) - ABCD = 1/ (AB + CD) = 1 / (expo(I)+expo(J)+expo(K)+expo(L)) - AB * CD (expo(I)+expo(J))*(expo(K)+expo(L)) - Rou(Greek Letter) = ----------- = ------------------------------------ - AB + CD expo(I)+expo(J)+expo(K)+expo(L) - - expo(I)+expo(J) expo(K)+expo(L) - ABcom = -------------------------------- CDcom = -------------------------------- - expo(I)+expo(J)+expo(K)+expo(L) expo(I)+expo(J)+expo(K)+expo(L) - - ABCDtemp = 1/2(expo(I)+expo(J)+expo(K)+expo(L)) - */ - - int kk_start = DEV_SIM_INT_PTR_PRIM_START[KK]; - int ll_start = DEV_SIM_INT_PTR_PRIM_START[LL]; - - QUICKDouble CD = LOC2(DEV_SIM_DBL_PTR_EXPOSUM, kk_start+KKK, ll_start+LLL, DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - - QUICKDouble ABCD = 1/(AB+CD); - - /* - X2 is the multiplication of four indices normalized coeffecient - */ - QUICKDouble X2 = sqrt(ABCD) * X1 * LOC4(DEV_SIM_DBL_PTR_XCOEFF, kStartK+KKK, kStartL+LLL, K - DEV_SIM_INT_PTR_QSTART[KK], L - DEV_SIM_INT_PTR_QSTART[LL], DEV_SIM_INT_JBASIS, DEV_SIM_INT_JBASIS, 2, 2); - - /* - Q' is the weighting center of K and L - ---> ---> - -> ------> expo(K)*xyz(K)+expo(L)*xyz(L) - Q = P'(K,L) = ------------------------------ - expo(K) + expo(L) - - W' is the weight center for I, J, K, L - - ---> ---> ---> ---> - -> expo(I)*xyz(I) + expo(J)*xyz(J) + expo(K)*xyz(K) +expo(L)*xyz(L) - W = ------------------------------------------------------------------- - expo(I) + expo(J) + expo(K) + expo(L) - -> -> 2 - RPQ =| P - Q | - - -> -> 2 - T = ROU * | P - Q| - */ - - QUICKDouble Qx = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERX, kk_start+KKK, ll_start+LLL, DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - QUICKDouble Qy = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERY, kk_start+KKK, ll_start+LLL, DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - QUICKDouble Qz = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERZ, kk_start+KKK, ll_start+LLL, DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - - //QUICKDouble T = AB * CD * ABCD * (SQR(Px - Qx) + SQR(Py - Qy) + SQR(Pz - Qz)); - - //QUICKDouble YVerticalTemp[VDIM1*VDIM2*VDIM3]; - FmT(I + J + K + L + 2, AB * CD * ABCD * (SQR(Px - Qx) + SQR(Py - Qy) + SQR(Pz - Qz)), YVerticalTemp); - - for (int i = 0; i<=I+J+K+L+2; i++) { - VY(0, 0, i) = VY(0, 0, i) * X2; - } - - //QUICKDouble store2[STOREDIM*STOREDIM]; - -#if defined int_spdf4 - - ERint_grad_vrr_ffff_1(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_2(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_3(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_4(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_5(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_6(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_7(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_8(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_9(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_10(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_11(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_12(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_13(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_14(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_15(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_16(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_17(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_18(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_19(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_20(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_21(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_22(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_23(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_24(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_25(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - ERint_grad_vrr_ffff_26(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + QUICKDouble AA = LOC2(DEV_SIM_DBL_PTR_GCEXPO, III, DEV_SIM_INT_PTR_KSUMTYPE[II] - 1, MAXPRIM, DEV_SIM_INT_NBASIS); + QUICKDouble BB = LOC2(DEV_SIM_DBL_PTR_GCEXPO, JJJ, DEV_SIM_INT_PTR_KSUMTYPE[JJ] - 1, MAXPRIM, DEV_SIM_INT_NBASIS); + QUICKDouble AB = LOC2(DEV_SIM_DBL_PTR_EXPOSUM, ii_start + III, jj_start + JJJ, + DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); + QUICKDouble Px = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERX, ii_start + III, jj_start + JJJ, + DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); + QUICKDouble Py = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERY, ii_start + III, jj_start + JJJ, + DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); + QUICKDouble Pz = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERZ, ii_start + III, jj_start + JJJ, + DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - ERint_grad_vrr_ffff_27(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_28(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - - - ERint_grad_vrr_ffff_29(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); - + /* + X1 is the contracted coeffecient, which is pre-calcuated in CPU stage as well. + cutoffprim is used to cut too small prim gaussian function when bring density matrix into consideration. + */ + QUICKDouble cutoffPrim = DNMax + * LOC2(DEV_SIM_DBL_PTR_CUTPRIM, kStartI + III, kStartJ + JJJ, DEV_SIM_INT_JBASIS, DEV_SIM_INT_JBASIS); + QUICKDouble X1 = LOC4(DEV_SIM_DBL_PTR_XCOEFF, kStartI + III, kStartJ + JJJ, + I - DEV_SIM_INT_PTR_QSTART[II], J - DEV_SIM_INT_PTR_QSTART[JJ], + DEV_SIM_INT_JBASIS, DEV_SIM_INT_JBASIS, 2, 2); + + for (int j = 0; j < kPrimK * kPrimL; j++) { + int LLL = (int) j / kPrimK; + int KKK = (int) j - kPrimK * LLL; + + if (cutoffPrim * LOC2(DEV_SIM_DBL_PTR_CUTPRIM, kStartK + KKK, kStartL + LLL, DEV_SIM_INT_JBASIS, DEV_SIM_INT_JBASIS) + > DEV_SIM_DBL_PRIMLIMIT) { + QUICKDouble CC = LOC2(DEV_SIM_DBL_PTR_GCEXPO, KKK, DEV_SIM_INT_PTR_KSUMTYPE[KK] - 1, MAXPRIM, DEV_SIM_INT_NBASIS); + /* + CD = expo(L)+expo(K) + ABCD = 1/ (AB + CD) = 1 / (expo(I)+expo(J)+expo(K)+expo(L)) + AB * CD (expo(I)+expo(J))*(expo(K)+expo(L)) + Rou(Greek Letter) = ----------- = ------------------------------------ + AB + CD expo(I)+expo(J)+expo(K)+expo(L) - ERint_grad_vrr_ffff_30(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + expo(I)+expo(J) expo(K)+expo(L) + ABcom = -------------------------------- CDcom = -------------------------------- + expo(I)+expo(J)+expo(K)+expo(L) expo(I)+expo(J)+expo(K)+expo(L) + ABCDtemp = 1/2(expo(I)+expo(J)+expo(K)+expo(L)) + */ - ERint_grad_vrr_ffff_31(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + int kk_start = DEV_SIM_INT_PTR_PRIM_START[KK]; + int ll_start = DEV_SIM_INT_PTR_PRIM_START[LL]; + QUICKDouble CD = LOC2(DEV_SIM_DBL_PTR_EXPOSUM, kk_start + KKK, ll_start + LLL, + DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); - ERint_grad_vrr_ffff_32(I, J, K, L, II, JJ, KK, LL, - Px - RAx, Py - RAy, Pz - RAz, (Px*AB+Qx*CD)*ABCD - Px, (Py*AB+Qy*CD)*ABCD - Py, (Pz*AB+Qz*CD)*ABCD - Pz, - Qx - RCx, Qy - RCy, Qz - RCz, (Px*AB+Qx*CD)*ABCD - Qx, (Py*AB+Qy*CD)*ABCD - Qy, (Pz*AB+Qz*CD)*ABCD - Qz, - 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + QUICKDouble ABCD = 1.0 / (AB + CD); + /* + X2 is the multiplication of four indices normalized coeffecient + */ + QUICKDouble X2 = sqrt(ABCD) * X1 + * LOC4(DEV_SIM_DBL_PTR_XCOEFF, kStartK + KKK, kStartL + LLL, + K - DEV_SIM_INT_PTR_QSTART[KK], L - DEV_SIM_INT_PTR_QSTART[LL], + DEV_SIM_INT_JBASIS, DEV_SIM_INT_JBASIS, 2, 2); + /* + Q' is the weighting center of K and L + ---> ---> + -> ------> expo(K)*xyz(K)+expo(L)*xyz(L) + Q = P'(K,L) = ------------------------------ + expo(K) + expo(L) + + W' is the weight center for I, J, K, L + + ---> ---> ---> ---> + -> expo(I)*xyz(I) + expo(J)*xyz(J) + expo(K)*xyz(K) +expo(L)*xyz(L) + W = ------------------------------------------------------------------- + expo(I) + expo(J) + expo(K) + expo(L) + -> -> 2 + RPQ =| P - Q | + + -> -> 2 + T = ROU * | P - Q| + */ + + QUICKDouble Qx = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERX, kk_start + KKK, ll_start + LLL, + DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); + QUICKDouble Qy = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERY, kk_start + KKK, ll_start + LLL, + DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); + QUICKDouble Qz = LOC2(DEV_SIM_DBL_PTR_WEIGHTEDCENTERZ, kk_start + KKK, ll_start + LLL, + DEV_SIM_INT_PRIM_TOTAL, DEV_SIM_INT_PRIM_TOTAL); + + FmT(I + J + K + L + 2, AB * CD * ABCD + * (SQR(Px - Qx) + SQR(Py - Qy) + SQR(Pz - Qz)), YVerticalTemp); + + for (int i = 0; i <= I + J + K + L + 2; i++) { + VY(0, 0, i) = VY(0, 0, i) * X2; + } +#if defined(int_spdf4) + ERint_grad_vrr_ffff_1(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_2(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_3(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_4(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_5(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_6(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_7(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_8(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_9(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_10(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_11(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_12(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_13(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_14(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_15(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_16(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_17(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_18(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_19(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_20(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_21(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_22(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_23(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_24(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_25(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_26(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_27(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_28(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_29(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_30(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_31(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); + + ERint_grad_vrr_ffff_32(I, J, K, L, II, JJ, KK, LL, + Px - RAx, Py - RAy, Pz - RAz, + (Px * AB + Qx * CD) * ABCD - Px, (Py * AB + Qy * CD) * ABCD - Py, (Pz * AB + Qz * CD) * ABCD - Pz, + Qx - RCx, Qy - RCy, Qz - RCz, + (Px * AB + Qx * CD) * ABCD - Qx, (Py * AB + Qy * CD) * ABCD - Qy, (Pz * AB + Qz * CD) * ABCD - Qz, + 0.5 * ABCD, 0.5 / AB, 0.5 / CD, AB * ABCD, CD * ABCD, store2, YVerticalTemp); #endif - - for (int i = 4; i< 84; i++) { - for (int j = 4; j< 84; j++) { + for (int i = 4; i < 84; i++) { + for (int j = 4; j < 84; j++) { // if (i < STOREDIM && j < STOREDIM) { - LOCSTORE(store, j-STORE_INIT, i-STORE_INIT , STORE_DIM, STORE_DIM) += LOCSTORE(store2, j, i, STOREDIM, STOREDIM); + LOCSTORE(store, j - STORE_INIT, i - STORE_INIT, STORE_DIM, STORE_DIM) += LOCSTORE(store2, j, i, STOREDIM, STOREDIM); // } } } - - for (int i = 4; i< 84; i++) { - for (int j = 10; j< 120; j++) { + for (int i = 4; i < 84; i++) { + for (int j = 10; j < 120; j++) { // if (i < STOREDIM && j < STOREDIM) { - LOCSTORE(storeAA, j-STORE_INIT_J_AA, i-STORE_INIT_I_AA, STORE_DIM_J_AA, STORE_DIM_I_AA) += LOCSTORE(store2, j, i, STOREDIM, STOREDIM) * AA * 2 ; - LOCSTORE(storeBB, j-STORE_INIT_J_AA, i-STORE_INIT_I_AA, STORE_DIM_J_AA, STORE_DIM_I_AA) += LOCSTORE(store2, j, i, STOREDIM, STOREDIM) * BB * 2 ; + LOCSTORE(storeAA, j - STORE_INIT_J_AA, i - STORE_INIT_I_AA, STORE_DIM_J_AA, STORE_DIM_I_AA) + += LOCSTORE(store2, j, i, STOREDIM, STOREDIM) * AA * 2; // } - } } - for (int i = 10; i< 120; i++) { - for (int j = 4; j< 84; j++) { + for (int i = 4; i < 84; i++) { + for (int j = 10; j < 120; j++) { // if (i < STOREDIM && j < STOREDIM) { - LOCSTORE(storeCC, j-STORE_INIT_J_CC, i-STORE_INIT_I_CC, STORE_DIM_J_CC, STORE_DIM_I_CC) += LOCSTORE(store2, j, i, STOREDIM, STOREDIM) * CC * 2 ; + LOCSTORE(storeBB, j - STORE_INIT_J_AA, i - STORE_INIT_I_AA, STORE_DIM_J_AA, STORE_DIM_I_AA) + += LOCSTORE(store2, j, i, STOREDIM, STOREDIM) * BB * 2; +// } + } + } + for (int i = 10; i < 120; i++) { + for (int j = 4; j < 84; j++) { +// if (i < STOREDIM && j < STOREDIM) { + LOCSTORE(storeCC, j - STORE_INIT_J_CC, i - STORE_INIT_I_CC, STORE_DIM_J_CC, STORE_DIM_I_CC) + += LOCSTORE(store2, j, i, STOREDIM, STOREDIM) * CC * 2; // } } - } - - + } } } } - -/* - for (int i = Sumindex[K]; i< Sumindex[K+L+2]; i++) { - for (int j = Sumindex[I]; j< Sumindex[I+J+2]; j++) { - if (i < STOREDIM && j < STOREDIM) { - printf("STORE %d %d %d %d %d %d %d %d %d %d %.9f \n",II, JJ, KK, LL, I, J, K, L, j, i, LOCSTORE(store, j, i , STOREDIM, STOREDIM)); - } - } - } -*/ + +// for (int i = Sumindex[K]; i < Sumindex[K + L + 2]; i++) { +// for (int j = Sumindex[I]; j < Sumindex[I + J + 2]; j++) { +// if (i < STOREDIM && j < STOREDIM) { +// printf("STORE %d %d %d %d %d %d %d %d %d %d %.9f \n",II, JJ, KK, LL, I, J, K, L, j, i, LOCSTORE(store, j, i , STOREDIM, STOREDIM)); +// } +// } +// } + QUICKDouble AGradx = 0.0; QUICKDouble AGrady = 0.0; QUICKDouble AGradz = 0.0; @@ -1413,373 +1467,345 @@ const smem_dbl_ptr, unsigned char** const smem_char_ptr, unsigned char* const sm QUICKDouble CGrady = 0.0; QUICKDouble CGradz = 0.0; - int AStart = (DEV_SIM_INT_PTR_KATOM[II]-1) * 3; - int BStart = (DEV_SIM_INT_PTR_KATOM[JJ]-1) * 3; - int CStart = (DEV_SIM_INT_PTR_KATOM[KK]-1) * 3; - int DStart = (DEV_SIM_INT_PTR_KATOM[LL]-1) * 3; - - QUICKDouble RBx, RBy, RBz; - QUICKDouble RDx, RDy, RDz; - - RBx = LOC2(DEV_SIM_DBL_PTR_XYZ, 0 , DEV_SIM_INT_PTR_KATOM[JJ]-1, 3, DEV_SIM_INT_NATOM); - RBy = LOC2(DEV_SIM_DBL_PTR_XYZ, 1 , DEV_SIM_INT_PTR_KATOM[JJ]-1, 3, DEV_SIM_INT_NATOM); - RBz = LOC2(DEV_SIM_DBL_PTR_XYZ, 2 , DEV_SIM_INT_PTR_KATOM[JJ]-1, 3, DEV_SIM_INT_NATOM); - - - RDx = LOC2(DEV_SIM_DBL_PTR_XYZ, 0 , DEV_SIM_INT_PTR_KATOM[LL]-1, 3, DEV_SIM_INT_NATOM); - RDy = LOC2(DEV_SIM_DBL_PTR_XYZ, 1 , DEV_SIM_INT_PTR_KATOM[LL]-1, 3, DEV_SIM_INT_NATOM); - RDz = LOC2(DEV_SIM_DBL_PTR_XYZ, 2 , DEV_SIM_INT_PTR_KATOM[LL]-1, 3, DEV_SIM_INT_NATOM); - - int III1 = LOC2(DEV_SIM_INT_PTR_QSBASIS, II, I, DEV_SIM_INT_NSHELL, 4); - int III2 = LOC2(DEV_SIM_INT_PTR_QFBASIS, II, I, DEV_SIM_INT_NSHELL, 4); - int JJJ1 = LOC2(DEV_SIM_INT_PTR_QSBASIS, JJ, J, DEV_SIM_INT_NSHELL, 4); - int JJJ2 = LOC2(DEV_SIM_INT_PTR_QFBASIS, JJ, J, DEV_SIM_INT_NSHELL, 4); - int KKK1 = LOC2(DEV_SIM_INT_PTR_QSBASIS, KK, K, DEV_SIM_INT_NSHELL, 4); - int KKK2 = LOC2(DEV_SIM_INT_PTR_QFBASIS, KK, K, DEV_SIM_INT_NSHELL, 4); - int LLL1 = LOC2(DEV_SIM_INT_PTR_QSBASIS, LL, L, DEV_SIM_INT_NSHELL, 4); - int LLL2 = LOC2(DEV_SIM_INT_PTR_QFBASIS, LL, L, DEV_SIM_INT_NSHELL, 4); - - - int IJKLTYPE = 999; - - int nbasis = DEV_SIM_INT_NBASIS; - - for (int III = III1; III <= III2; III++) { - for (int JJJ = MAX(III,JJJ1); JJJ <= JJJ2; JJJ++) { - for (int KKK = MAX(III,KKK1); KKK <= KKK2; KKK++) { - for (int LLL = MAX(KKK,LLL1); LLL <= LLL2; LLL++) { - - if (III < KKK || - ((III == JJJ) && (III == LLL)) || - ((III == JJJ) && (III < LLL)) || - ((JJJ == LLL) && (III < JJJ)) || - ((III == KKK) && (III < JJJ) && (JJJ < LLL))) { - - QUICKDouble Yaax, Yaay, Yaaz; - QUICKDouble Ybbx, Ybby, Ybbz; - QUICKDouble Yccx, Yccy, Yccz; - - - hrrwholegrad2_ffff - (&Yaax, &Yaay, &Yaaz, \ - &Ybbx, &Ybby, &Ybbz, \ - &Yccx, &Yccy, &Yccz, \ - I, J, K, L,\ - III, JJJ, KKK, LLL, IJKLTYPE, \ - store, storeAA, storeBB, storeCC, \ - RAx, RAy, RAz, RBx, RBy, RBz, \ - RCx, RCy, RCz, RDx, RDy, RDz, smem_int, smem_int_ptr, smem_dbl_ptr, smem_char_ptr, smem_char); - - QUICKDouble constant = 0.0 ; - -#ifdef OSHELL - QUICKDouble DENSELJ = (QUICKDouble) (LOC2(DEV_SIM_DBL_PTR_DENSE, LLL-1, JJJ-1, nbasis, nbasis)+LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL-1, JJJ-1, nbasis, nbasis)); - QUICKDouble DENSELI = (QUICKDouble) (LOC2(DEV_SIM_DBL_PTR_DENSE, LLL-1, III-1, nbasis, nbasis)+LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL-1, III-1, nbasis, nbasis)); - QUICKDouble DENSELK = (QUICKDouble) (LOC2(DEV_SIM_DBL_PTR_DENSE, LLL-1, KKK-1, nbasis, nbasis)+LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL-1, KKK-1, nbasis, nbasis)); - QUICKDouble DENSEJI = (QUICKDouble) (LOC2(DEV_SIM_DBL_PTR_DENSE, JJJ-1, III-1, nbasis, nbasis)+LOC2(DEV_SIM_DBL_PTR_DENSEb, JJJ-1, III-1, nbasis, nbasis)); - - QUICKDouble DENSEKIA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, KKK-1, III-1, nbasis, nbasis); - QUICKDouble DENSEKJA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, KKK-1, JJJ-1, nbasis, nbasis); - QUICKDouble DENSELJA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL-1, JJJ-1, nbasis, nbasis); - QUICKDouble DENSELIA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL-1, III-1, nbasis, nbasis); - QUICKDouble DENSEJIA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, JJJ-1, III-1, nbasis, nbasis); - - QUICKDouble DENSEKIB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, KKK-1, III-1, nbasis, nbasis); - QUICKDouble DENSEKJB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, KKK-1, JJJ-1, nbasis, nbasis); - QUICKDouble DENSELJB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL-1, JJJ-1, nbasis, nbasis); - QUICKDouble DENSELIB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL-1, III-1, nbasis, nbasis); - QUICKDouble DENSEJIB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, JJJ-1, III-1, nbasis, nbasis); + int AStart = (DEV_SIM_INT_PTR_KATOM[II] - 1) * 3; + int BStart = (DEV_SIM_INT_PTR_KATOM[JJ] - 1) * 3; + int CStart = (DEV_SIM_INT_PTR_KATOM[KK] - 1) * 3; + int DStart = (DEV_SIM_INT_PTR_KATOM[LL] - 1) * 3; + + QUICKDouble RBx, RBy, RBz; + QUICKDouble RDx, RDy, RDz; + + RBx = LOC2(DEV_SIM_DBL_PTR_XYZ, 0, DEV_SIM_INT_PTR_KATOM[JJ] - 1, 3, DEV_SIM_INT_NATOM); + RBy = LOC2(DEV_SIM_DBL_PTR_XYZ, 1, DEV_SIM_INT_PTR_KATOM[JJ] - 1, 3, DEV_SIM_INT_NATOM); + RBz = LOC2(DEV_SIM_DBL_PTR_XYZ, 2, DEV_SIM_INT_PTR_KATOM[JJ] - 1, 3, DEV_SIM_INT_NATOM); + + RDx = LOC2(DEV_SIM_DBL_PTR_XYZ, 0, DEV_SIM_INT_PTR_KATOM[LL] - 1, 3, DEV_SIM_INT_NATOM); + RDy = LOC2(DEV_SIM_DBL_PTR_XYZ, 1, DEV_SIM_INT_PTR_KATOM[LL] - 1, 3, DEV_SIM_INT_NATOM); + RDz = LOC2(DEV_SIM_DBL_PTR_XYZ, 2, DEV_SIM_INT_PTR_KATOM[LL] - 1, 3, DEV_SIM_INT_NATOM); + + int III1 = LOC2(DEV_SIM_INT_PTR_QSBASIS, II, I, DEV_SIM_INT_NSHELL, 4); + int III2 = LOC2(DEV_SIM_INT_PTR_QFBASIS, II, I, DEV_SIM_INT_NSHELL, 4); + int JJJ1 = LOC2(DEV_SIM_INT_PTR_QSBASIS, JJ, J, DEV_SIM_INT_NSHELL, 4); + int JJJ2 = LOC2(DEV_SIM_INT_PTR_QFBASIS, JJ, J, DEV_SIM_INT_NSHELL, 4); + int KKK1 = LOC2(DEV_SIM_INT_PTR_QSBASIS, KK, K, DEV_SIM_INT_NSHELL, 4); + int KKK2 = LOC2(DEV_SIM_INT_PTR_QFBASIS, KK, K, DEV_SIM_INT_NSHELL, 4); + int LLL1 = LOC2(DEV_SIM_INT_PTR_QSBASIS, LL, L, DEV_SIM_INT_NSHELL, 4); + int LLL2 = LOC2(DEV_SIM_INT_PTR_QFBASIS, LL, L, DEV_SIM_INT_NSHELL, 4); + + int IJKLTYPE = 999; + + int nbasis = DEV_SIM_INT_NBASIS; + + for (int III = III1; III <= III2; III++) { + for (int JJJ = MAX(III, JJJ1); JJJ <= JJJ2; JJJ++) { + for (int KKK = MAX(III, KKK1); KKK <= KKK2; KKK++) { + for (int LLL = MAX(KKK, LLL1); LLL <= LLL2; LLL++) { + if (III < KKK + || (III == JJJ && III == LLL) + || (III == JJJ && III < LLL) + || (JJJ == LLL && III < JJJ) + || (III == KKK && III < JJJ && JJJ < LLL)) { + QUICKDouble Yaax, Yaay, Yaaz; + QUICKDouble Ybbx, Ybby, Ybbz; + QUICKDouble Yccx, Yccy, Yccz; + + hrrwholegrad2_ffff(&Yaax, &Yaay, &Yaaz, &Ybbx, &Ybby, &Ybbz, &Yccx, &Yccy, &Yccz, + III, JJJ, KKK, LLL, + store, storeAA, storeBB, storeCC, RAx, RAy, RAz, RBx, RBy, RBz, + RCx, RCy, RCz, RDx, RDy, RDz, smem_int, smem_int_ptr, smem_dbl_ptr, smem_char_ptr, smem_char); + + QUICKDouble constant = 0.0; +#if defined(OSHELL) + QUICKDouble DENSELJ = (QUICKDouble) (LOC2(DEV_SIM_DBL_PTR_DENSE, LLL - 1, JJJ - 1, nbasis, nbasis) + + LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL - 1, JJJ - 1, nbasis, nbasis)); + QUICKDouble DENSELI = (QUICKDouble) (LOC2(DEV_SIM_DBL_PTR_DENSE, LLL - 1, III - 1, nbasis, nbasis) + + LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL - 1, III - 1, nbasis, nbasis)); + QUICKDouble DENSELK = (QUICKDouble) (LOC2(DEV_SIM_DBL_PTR_DENSE, LLL - 1, KKK - 1, nbasis, nbasis) + + LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL - 1, KKK - 1, nbasis, nbasis)); + QUICKDouble DENSEJI = (QUICKDouble) (LOC2(DEV_SIM_DBL_PTR_DENSE, JJJ - 1, III - 1, nbasis, nbasis) + + LOC2(DEV_SIM_DBL_PTR_DENSEb, JJJ - 1, III - 1, nbasis, nbasis)); + + QUICKDouble DENSEKIA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, KKK - 1, III - 1, nbasis, nbasis); + QUICKDouble DENSEKJA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, KKK - 1, JJJ - 1, nbasis, nbasis); + QUICKDouble DENSELJA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL - 1, JJJ - 1, nbasis, nbasis); + QUICKDouble DENSELIA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL - 1, III - 1, nbasis, nbasis); + QUICKDouble DENSEJIA = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, JJJ - 1, III - 1, nbasis, nbasis); + + QUICKDouble DENSEKIB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, KKK - 1, III - 1, nbasis, nbasis); + QUICKDouble DENSEKJB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, KKK - 1, JJJ - 1, nbasis, nbasis); + QUICKDouble DENSELJB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL - 1, JJJ - 1, nbasis, nbasis); + QUICKDouble DENSELIB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, LLL - 1, III - 1, nbasis, nbasis); + QUICKDouble DENSEJIB = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSEb, JJJ - 1, III - 1, nbasis, nbasis); #else - QUICKDouble DENSEKI = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, KKK-1, III-1, nbasis, nbasis); - QUICKDouble DENSEKJ = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, KKK-1, JJJ-1, nbasis, nbasis); - QUICKDouble DENSELJ = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL-1, JJJ-1, nbasis, nbasis); - QUICKDouble DENSELI = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL-1, III-1, nbasis, nbasis); - QUICKDouble DENSELK = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL-1, KKK-1, nbasis, nbasis); - QUICKDouble DENSEJI = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, JJJ-1, III-1, nbasis, nbasis); + QUICKDouble DENSEKI = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, KKK - 1, III - 1, nbasis, nbasis); + QUICKDouble DENSEKJ = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, KKK - 1, JJJ - 1, nbasis, nbasis); + QUICKDouble DENSELJ = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL - 1, JJJ - 1, nbasis, nbasis); + QUICKDouble DENSELI = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL - 1, III - 1, nbasis, nbasis); + QUICKDouble DENSELK = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, LLL - 1, KKK - 1, nbasis, nbasis); + QUICKDouble DENSEJI = (QUICKDouble) LOC2(DEV_SIM_DBL_PTR_DENSE, JJJ - 1, III - 1, nbasis, nbasis); #endif - - if (II < JJ && II < KK && KK < LL || - ( III < KKK && III < JJJ && KKK < LLL)) { - //constant = ( 4.0 * DENSEJI * DENSELK - DENSEKI * DENSELJ - DENSELI * DENSEKJ); -#ifdef OSHELL - constant = ( 4.0 * DENSEJI * DENSELK - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIA * DENSELJA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELIA * DENSEKJA - - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIB * DENSELJB - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELIB * DENSEKJB); + + if (II < JJ && II < KK && KK < LL + || (III < KKK && III < JJJ && KKK < LLL)) { + //constant = ( 4.0 * DENSEJI * DENSELK - DENSEKI * DENSELJ - DENSELI * DENSEKJ); +#if defined(OSHELL) + constant = ( 4.0 * DENSEJI * DENSELK - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIA * DENSELJA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELIA * DENSEKJA + - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIB * DENSELJB - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELIB * DENSEKJB); #else - constant = ( 4.0 * DENSEJI * DENSELK - DEV_SIM_DBL_HYB_COEFF * DENSEKI * DENSELJ - DEV_SIM_DBL_HYB_COEFF * DENSELI * DENSEKJ); + constant = ( 4.0 * DENSEJI * DENSELK - DEV_SIM_DBL_HYB_COEFF * DENSEKI * DENSELJ - DEV_SIM_DBL_HYB_COEFF * DENSELI * DENSEKJ); #endif - }else{ - if (III < KKK) { - if( III == JJJ && KKK == LLL){ - //constant = (DENSEJI * DENSELK - 0.5 * DENSEKI * DENSEKI); -#ifdef OSHELL - constant = (DENSEJI * DENSELK - DEV_SIM_DBL_HYB_COEFF * DENSEKIA * DENSEKIA - DEV_SIM_DBL_HYB_COEFF * DENSEKIB * DENSEKIB); + } else { + if (III < KKK) { + if (III == JJJ && KKK == LLL) { + //constant = (DENSEJI * DENSELK - 0.5 * DENSEKI * DENSEKI); +#if defined(OSHELL) + constant = (DENSEJI * DENSELK - DEV_SIM_DBL_HYB_COEFF * DENSEKIA * DENSEKIA - DEV_SIM_DBL_HYB_COEFF * DENSEKIB * DENSEKIB); #else - constant = (DENSEJI * DENSELK - 0.5 * DEV_SIM_DBL_HYB_COEFF * DENSEKI * DENSEKI); + constant = (DENSEJI * DENSELK - 0.5 * DEV_SIM_DBL_HYB_COEFF * DENSEKI * DENSEKI); #endif - }else if (JJJ == KKK && JJJ == LLL){ - //constant = DENSELJ * DENSEJI; -#ifdef OSHELL - constant = 2.0 * DENSELJ * DENSEJI - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELJA * DENSEJIA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELJB * DENSEJIB; + } else if (JJJ == KKK && JJJ == LLL) { + //constant = DENSELJ * DENSEJI; +#if defined(OSHELL) + constant = 2.0 * DENSELJ * DENSEJI - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELJA * DENSEJIA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELJB * DENSEJIB; #else - constant = 2.0 * DENSELJ * DENSEJI - DEV_SIM_DBL_HYB_COEFF * DENSELJ * DENSEJI; + constant = 2.0 * DENSELJ * DENSEJI - DEV_SIM_DBL_HYB_COEFF * DENSELJ * DENSEJI; #endif - }else if (KKK == LLL && III < JJJ && JJJ != KKK){ - //constant = (2.0* DENSEJI * DENSELK - DENSEKI * DENSEKJ); -#ifdef OSHELL - constant = (2.0* DENSEJI * DENSELK - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIA * DENSEKJA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIB * DENSEKJB); + } else if (KKK == LLL && III < JJJ && JJJ != KKK) { + //constant = (2.0* DENSEJI * DENSELK - DENSEKI * DENSEKJ); +#if defined(OSHELL) + constant = (2.0* DENSEJI * DENSELK - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIA * DENSEKJA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIB * DENSEKJB); #else - constant = (2.0* DENSEJI * DENSELK - DEV_SIM_DBL_HYB_COEFF * DENSEKI * DENSEKJ); + constant = (2.0* DENSEJI * DENSELK - DEV_SIM_DBL_HYB_COEFF * DENSEKI * DENSEKJ); #endif - }else if ( III == JJJ && KKK < LLL){ - //constant = (2.0* DENSELK * DENSEJI - DENSEKI * DENSELI); -#ifdef OSHELL - constant = (2.0* DENSELK * DENSEJI - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIA * DENSELIA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIB * DENSELIB); + } else if (III == JJJ && KKK < LLL) { + //constant = (2.0* DENSELK * DENSEJI - DENSEKI * DENSELI); +#if defined(OSHELL) + constant = (2.0* DENSELK * DENSEJI - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIA * DENSELIA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEKIB * DENSELIB); #else - constant = (2.0* DENSELK * DENSEJI - DEV_SIM_DBL_HYB_COEFF * DENSEKI * DENSELI); + constant = (2.0* DENSELK * DENSEJI - DEV_SIM_DBL_HYB_COEFF * DENSEKI * DENSELI); #endif - } - } - else{ - if (JJJ <= LLL) { - if (III == JJJ && III == KKK && III == LLL) { - // Do nothing - }else if (III==JJJ && III==KKK && III < LLL){ - //constant = DENSELI * DENSEJI; -#ifdef OSHELL - constant = 2.0 * DENSELI * DENSEJI - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELIA * DENSEJIA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELIB * DENSEJIB; + } + } else { + if (JJJ <= LLL) { + if (III == JJJ && III == KKK && III == LLL) { + // Do nothing + } else if (III == JJJ && III == KKK && III < LLL) { + //constant = DENSELI * DENSEJI; +#if defined(OSHELL) + constant = 2.0 * DENSELI * DENSEJI - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELIA * DENSEJIA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELIB * DENSEJIB; #else - constant = 2.0 * DENSELI * DENSEJI - DEV_SIM_DBL_HYB_COEFF * DENSELI * DENSEJI; + constant = 2.0 * DENSELI * DENSEJI - DEV_SIM_DBL_HYB_COEFF * DENSELI * DENSEJI; #endif - }else if (III==KKK && JJJ==LLL && III < JJJ){ - //constant = (1.5 * DENSEJI * DENSEJI - 0.5 * DENSELJ * DENSEKI); -#ifdef OSHELL - constant = (2.0 * DENSEJI * DENSEJI - DEV_SIM_DBL_HYB_COEFF * DENSEJIA * DENSEJIA - DEV_SIM_DBL_HYB_COEFF * DENSELJA * DENSEKIA - - DEV_SIM_DBL_HYB_COEFF * DENSEJIB * DENSEJIB - DEV_SIM_DBL_HYB_COEFF * DENSELJB * DENSEKIB); + } else if (III == KKK && JJJ == LLL && III < JJJ) { + //constant = (1.5 * DENSEJI * DENSEJI - 0.5 * DENSELJ * DENSEKI); +#if defined(OSHELL) + constant = (2.0 * DENSEJI * DENSEJI - DEV_SIM_DBL_HYB_COEFF * DENSEJIA * DENSEJIA - DEV_SIM_DBL_HYB_COEFF * DENSELJA * DENSEKIA + - DEV_SIM_DBL_HYB_COEFF * DENSEJIB * DENSEJIB - DEV_SIM_DBL_HYB_COEFF * DENSELJB * DENSEKIB); #else - constant = (2.0 * DENSEJI * DENSEJI - 0.5 * DEV_SIM_DBL_HYB_COEFF * DENSEJI * DENSEJI - 0.5 * DEV_SIM_DBL_HYB_COEFF * DENSELJ * DENSEKI); + constant = (2.0 * DENSEJI * DENSEJI - 0.5 * DEV_SIM_DBL_HYB_COEFF * DENSEJI * DENSEJI - 0.5 * DEV_SIM_DBL_HYB_COEFF * DENSELJ * DENSEKI); #endif - }else if (III== KKK && III < JJJ && JJJ < LLL){ - //constant = (3.0 * DENSEJI * DENSELI - DENSELJ * DENSEKI); -#ifdef OSHELL - constant = (4.0 * DENSEJI * DENSELI - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEJIA * DENSELIA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELJA * DENSEKIA - - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEJIB * DENSELIB - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELJB * DENSEKIB); + } else if (III == KKK && III < JJJ && JJJ < LLL) { + //constant = (3.0 * DENSEJI * DENSELI - DENSELJ * DENSEKI); +#if defined(OSHELL) + constant = (4.0 * DENSEJI * DENSELI - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEJIA * DENSELIA - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELJA * DENSEKIA + - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSEJIB * DENSELIB - 2.0 * DEV_SIM_DBL_HYB_COEFF * DENSELJB * DENSEKIB); #else - constant = (4.0 * DENSEJI * DENSELI - DEV_SIM_DBL_HYB_COEFF * DENSEJI * DENSELI - DEV_SIM_DBL_HYB_COEFF * DENSELJ * DENSEKI); + constant = (4.0 * DENSEJI * DENSELI - DEV_SIM_DBL_HYB_COEFF * DENSEJI * DENSELI - DEV_SIM_DBL_HYB_COEFF * DENSELJ * DENSEKI); #endif - } - } - } } - - AGradx += constant * Yaax; - AGrady += constant * Yaay; - AGradz += constant * Yaaz; - - BGradx += constant * Ybbx; - BGrady += constant * Ybby; - BGradz += constant * Ybbz; - - CGradx += constant * Yccx; - CGrady += constant * Yccy; - CGradz += constant * Yccz; - - - } } } + + AGradx += constant * Yaax; + AGrady += constant * Yaay; + AGradz += constant * Yaaz; + + BGradx += constant * Ybbx; + BGrady += constant * Ybby; + BGradz += constant * Ybbz; + + CGradx += constant * Yccx; + CGrady += constant * Yccy; + CGradz += constant * Yccz; } } - - - - /* - if ( abs(AGradx) > 0.0 || abs(AGrady) > 0.0 || abs(AGradz) > 0.0 || - abs(BGradx) > 0.0 || abs(BGrady) > 0.0 || abs(BGradz) > 0.0 || - abs(CGradx) > 0.0 || abs(CGrady) > 0.0 || abs(CGradz) > 0.0) { - - printf("%i %i %i %i %i %i %i %i %20.10e %20.10e %20.10e %20.10e %20.10e %20.10e %20.10e %20.10e %20.10e \n", II, JJ, KK, LL, \ - I, J, K, L, AGradx, AGrady, AGradz, BGradx, BGrady, BGradz, CGradx, CGrady, CGradz); - }*/ - -/* - } } } -*/ - + +// if (abs(AGradx) > 0.0 || abs(AGrady) > 0.0 || abs(AGradz) > 0.0 +// || abs(BGradx) > 0.0 || abs(BGrady) > 0.0 || abs(BGradz) > 0.0 +// || abs(CGradx) > 0.0 || abs(CGrady) > 0.0 || abs(CGradz) > 0.0) { +// printf("%i %i %i %i %i %i %i %i %20.10e %20.10e %20.10e %20.10e %20.10e %20.10e %20.10e %20.10e %20.10e \n", +// II, JJ, KK, LL, I, J, K, L, AGradx, AGrady, AGradz, +// BGradx, BGrady, BGradz, CGradx, CGrady, CGradz); +// } + #ifdef DEBUG - //printf("FILE: %s, LINE: %d, FUNCTION: %s, DEV_SIM_DBL_HYB_COEFF \n", __FILE__, __LINE__, __func__); -#endif - +// printf("FILE: %s, LINE: %d, FUNCTION: %s, DEV_SIM_DBL_HYB_COEFF \n", __FILE__, __LINE__, __func__); +#endif + GPUATOMICADD(&DEV_SIM_PTR_GRAD[AStart], AGradx, GRADSCALE); GPUATOMICADD(&DEV_SIM_PTR_GRAD[AStart + 1], AGrady, GRADSCALE); GPUATOMICADD(&DEV_SIM_PTR_GRAD[AStart + 2], AGradz, GRADSCALE); - + GPUATOMICADD(&DEV_SIM_PTR_GRAD[BStart], BGradx, GRADSCALE); GPUATOMICADD(&DEV_SIM_PTR_GRAD[BStart + 1], BGrady, GRADSCALE); GPUATOMICADD(&DEV_SIM_PTR_GRAD[BStart + 2], BGradz, GRADSCALE); - + GPUATOMICADD(&DEV_SIM_PTR_GRAD[CStart], CGradx, GRADSCALE); GPUATOMICADD(&DEV_SIM_PTR_GRAD[CStart + 1], CGrady, GRADSCALE); GPUATOMICADD(&DEV_SIM_PTR_GRAD[CStart + 2], CGradz, GRADSCALE); - + GPUATOMICADD(&DEV_SIM_PTR_GRAD[DStart], -AGradx - BGradx - CGradx, GRADSCALE); GPUATOMICADD(&DEV_SIM_PTR_GRAD[DStart + 1], -AGrady - BGrady - CGrady, GRADSCALE); GPUATOMICADD(&DEV_SIM_PTR_GRAD[DStart + 2], -AGradz - BGradz - CGradz, GRADSCALE); } - -#ifdef OSHELL -#if defined int_spdf4 -__global__ void -__launch_bounds__(ERI_GRAD_FFFF_TPB, ERI_GRAD_FFFF_BPSM) getGrad_oshell_kernel_ffff() -#endif +#if defined(OSHELL) + #if defined(int_spdf4) +__global__ void __launch_bounds__(ERI_GRAD_FFFF_TPB, ERI_GRAD_FFFF_BPSM) getGrad_oshell_kernel_ffff() + #endif #else -#if defined int_spdf4 -__global__ void -__launch_bounds__(ERI_GRAD_FFFF_TPB, ERI_GRAD_FFFF_BPSM) getGrad_kernel_ffff(int *dev_int_data, -int **dev_int_ptr_data, QUICKDouble *dev_dbl_data, QUICKDouble **dev_dbl_ptr_data, int2 -**dev_int2_ptr_data, unsigned char **dev_char_ptr_data, unsigned char *dev_char_data, QUICKAtomicType **dev_grad_ptr_data, const int ffStart, const int sqrQshell) -#endif + #if defined(int_spdf4) +__global__ void __launch_bounds__(ERI_GRAD_FFFF_TPB, ERI_GRAD_FFFF_BPSM) getGrad_kernel_ffff(int *dev_int_data, + int **dev_int_ptr_data, QUICKDouble *dev_dbl_data, QUICKDouble **dev_dbl_ptr_data, + int2 **dev_int2_ptr_data, unsigned char **dev_char_ptr_data, unsigned char *dev_char_data, + QUICKAtomicType **dev_grad_ptr_data, const int ffStart, const int sqrQshell) + #endif #endif { - extern __shared__ QUICKDouble smem_buffer[]; - QUICKDouble *smem_dbl = smem_buffer; - QUICKDouble **smem_dbl_ptr = (QUICKDouble**) &smem_dbl[ERI_GRAD_FFFF_SMEM_DBL_SIZE*ERI_GRAD_FFFF_TPB]; - int **smem_int_ptr = (int**) &smem_dbl_ptr[ERI_GRAD_FFFF_SMEM_DBL_PTR_SIZE*ERI_GRAD_FFFF_TPB]; - int2 **smem_int2_ptr = (int2**) &smem_int_ptr[ERI_GRAD_FFFF_SMEM_INT_PTR_SIZE*ERI_GRAD_FFFF_TPB]; - unsigned char **smem_char_ptr = (unsigned char**) &smem_int2_ptr[ERI_GRAD_FFFF_SMEM_INT2_PTR_SIZE*ERI_GRAD_FFFF_TPB]; - int *smem_int = (int*) &smem_char_ptr[ERI_GRAD_FFFF_SMEM_CHAR_PTR_SIZE*ERI_GRAD_FFFF_TPB]; - unsigned char *smem_char=(unsigned char*) &smem_int[ERI_GRAD_FFFF_SMEM_INT_SIZE*ERI_GRAD_FFFF_TPB]; - QUICKAtomicType **smem_grad_ptr = (QUICKAtomicType**) &smem_char[ERI_GRAD_FFFF_SMEM_CHAR_SIZE]; + QUICKDouble *smem_dbl = smem_buffer; + QUICKDouble **smem_dbl_ptr = (QUICKDouble **) &smem_dbl[ERI_GRAD_FFFF_SMEM_DBL_SIZE * ERI_GRAD_FFFF_TPB]; + int **smem_int_ptr = (int **) &smem_dbl_ptr[ERI_GRAD_FFFF_SMEM_DBL_PTR_SIZE * ERI_GRAD_FFFF_TPB]; + int2 **smem_int2_ptr = (int2 **) &smem_int_ptr[ERI_GRAD_FFFF_SMEM_INT_PTR_SIZE * ERI_GRAD_FFFF_TPB]; + unsigned char **smem_char_ptr = (unsigned char **) &smem_int2_ptr[ERI_GRAD_FFFF_SMEM_INT2_PTR_SIZE * ERI_GRAD_FFFF_TPB]; + int *smem_int = (int *) &smem_char_ptr[ERI_GRAD_FFFF_SMEM_CHAR_PTR_SIZE * ERI_GRAD_FFFF_TPB]; + unsigned char *smem_char = (unsigned char *) &smem_int[ERI_GRAD_FFFF_SMEM_INT_SIZE * ERI_GRAD_FFFF_TPB]; + QUICKAtomicType **smem_grad_ptr = (QUICKAtomicType **) &smem_char[ERI_GRAD_FFFF_SMEM_CHAR_SIZE]; - for(int i = threadIdx.x; i 0){ -#endif - - int II = DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ[a].x; - int KK = DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ[b].x; - - int ii = DEV_SIM_INT_PTR_SORTED_Q[II]; - int kk = DEV_SIM_INT_PTR_SORTED_Q[KK]; - - if (ii<=kk){ - - int JJ = DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ[a].y; - int LL = DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ[b].y; - - int iii = DEV_SIM_INT_PTR_SORTED_QNUMBER[II]; - int jjj = DEV_SIM_INT_PTR_SORTED_QNUMBER[JJ]; - int kkk = DEV_SIM_INT_PTR_SORTED_QNUMBER[KK]; - int lll = DEV_SIM_INT_PTR_SORTED_QNUMBER[LL]; - - - int jj = DEV_SIM_INT_PTR_SORTED_Q[JJ]; - int ll = DEV_SIM_INT_PTR_SORTED_Q[LL]; - - - if ( !((DEV_SIM_INT_PTR_KATOM[ii] == DEV_SIM_INT_PTR_KATOM[jj]) && - (DEV_SIM_INT_PTR_KATOM[ii] == DEV_SIM_INT_PTR_KATOM[kk]) && - (DEV_SIM_INT_PTR_KATOM[ii] == DEV_SIM_INT_PTR_KATOM[ll])) // In case 4 indices are in the same atom - ) { - - int nshell = DEV_SIM_INT_NSHELL; - - QUICKDouble DNMax = MAX(MAX(4.0*LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, ii, jj, nshell, nshell), 4.0*LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, kk, ll, nshell, nshell)), - MAX(MAX(LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, ii, ll, nshell, nshell), LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, ii, kk, nshell, nshell)), - MAX(LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, jj, kk, nshell, nshell), LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, jj, ll, nshell, nshell)))); - - - if ((LOC2(DEV_SIM_DBL_PTR_YCUTOFF, kk, ll, nshell, nshell) * LOC2(DEV_SIM_DBL_PTR_YCUTOFF, ii, jj, nshell, nshell))> DEV_SIM_DBL_GRADCUTOFF && \ - (LOC2(DEV_SIM_DBL_PTR_YCUTOFF, kk, ll, nshell, nshell) * LOC2(DEV_SIM_DBL_PTR_YCUTOFF, ii, jj, nshell, nshell) * -DNMax) > DEV_SIM_DBL_GRADCUTOFF) { - -#ifdef OSHELL -#if defined int_spdf4 - if( iii == 3 && jjj == 3 && kkk ==3 && lll ==3){ - iclass_oshell_grad_ffff(iii, jjj, kkk, lll, ii, jj, kk, ll, DNMax, DEV_SIM_DBL_PTR_YVERTICALTEMP+offset, -DEV_SIM_DBL_PTR_STORE+offset, DEV_SIM_DBL_PTR_STORE2+offset, DEV_SIM_DBL_PTR_STOREAA+offset, DEV_SIM_DBL_PTR_STOREBB+offset, -DEV_SIM_DBL_PTR_STORECC+offset, smem_int, smem_dbl, smem_int_ptr, smem_dbl_ptr, smem_char_ptr, smem_char, smem_grad_ptr); - } +#if defined(MPIV_GPU) + if (DEV_SIM_CHAR_PTR_MPI_BCOMPUTE[a] > 0) { #endif + int II = DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ[a].x; + int KK = DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ[b].x; + + int ii = DEV_SIM_INT_PTR_SORTED_Q[II]; + int kk = DEV_SIM_INT_PTR_SORTED_Q[KK]; + + if (ii <= kk) { + int JJ = DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ[a].y; + int LL = DEV_SIM_INT2_PTR_SORTED_YCUTOFFIJ[b].y; + + int iii = DEV_SIM_INT_PTR_SORTED_QNUMBER[II]; + int jjj = DEV_SIM_INT_PTR_SORTED_QNUMBER[JJ]; + int kkk = DEV_SIM_INT_PTR_SORTED_QNUMBER[KK]; + int lll = DEV_SIM_INT_PTR_SORTED_QNUMBER[LL]; + + int jj = DEV_SIM_INT_PTR_SORTED_Q[JJ]; + int ll = DEV_SIM_INT_PTR_SORTED_Q[LL]; + + // In case 4 indices are in the same atom + if (!((DEV_SIM_INT_PTR_KATOM[ii] == DEV_SIM_INT_PTR_KATOM[jj]) + && (DEV_SIM_INT_PTR_KATOM[ii] == DEV_SIM_INT_PTR_KATOM[kk]) + && (DEV_SIM_INT_PTR_KATOM[ii] == DEV_SIM_INT_PTR_KATOM[ll]))) { + int nshell = DEV_SIM_INT_NSHELL; + + QUICKDouble DNMax = MAX(MAX(4.0 * LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, ii, jj, nshell, nshell), + 4.0 * LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, kk, ll, nshell, nshell)), + MAX(MAX(LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, ii, ll, nshell, nshell), + LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, ii, kk, nshell, nshell)), + MAX(LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, jj, kk, nshell, nshell), + LOC2(DEV_SIM_DBL_PTR_CUTMATRIX, jj, ll, nshell, nshell)))); + + if ((LOC2(DEV_SIM_DBL_PTR_YCUTOFF, kk, ll, nshell, nshell) + * LOC2(DEV_SIM_DBL_PTR_YCUTOFF, ii, jj, nshell, nshell)) + > DEV_SIM_DBL_GRADCUTOFF + && (LOC2(DEV_SIM_DBL_PTR_YCUTOFF, kk, ll, nshell, nshell) + * LOC2(DEV_SIM_DBL_PTR_YCUTOFF, ii, jj, nshell, nshell) * DNMax) + > DEV_SIM_DBL_GRADCUTOFF) { +#if defined(OSHELL) + #if defined(int_spdf4) + if (iii == 3 && jjj == 3 && kkk == 3 && lll == 3) { + iclass_oshell_grad_ffff(iii, jjj, kkk, lll, ii, jj, kk, ll, + DNMax, DEV_SIM_DBL_PTR_YVERTICALTEMP + offset, + DEV_SIM_DBL_PTR_STORE + offset, DEV_SIM_DBL_PTR_STORE2 + offset, + DEV_SIM_DBL_PTR_STOREAA + offset, DEV_SIM_DBL_PTR_STOREBB + offset, + DEV_SIM_DBL_PTR_STORECC + offset, + smem_int, smem_dbl, smem_int_ptr, smem_dbl_ptr, smem_char_ptr, smem_char, smem_grad_ptr); + } + #endif #else -#if defined int_spdf4 - - if( iii == 3 && jjj == 3 && kkk ==3 && lll ==3){ - iclass_grad_ffff(iii, jjj, kkk, lll, ii, jj, kk, ll, DNMax, DEV_SIM_DBL_PTR_YVERTICALTEMP+offset, -DEV_SIM_DBL_PTR_STORE+offset, DEV_SIM_DBL_PTR_STORE2+offset, DEV_SIM_DBL_PTR_STOREAA+offset, DEV_SIM_DBL_PTR_STOREBB+offset, -DEV_SIM_DBL_PTR_STORECC+offset, smem_int, smem_dbl, smem_int_ptr, smem_dbl_ptr,smem_char_ptr, smem_char, smem_grad_ptr); - } -#endif + #if defined(int_spdf4) + if (iii == 3 && jjj == 3 && kkk == 3 && lll == 3) { + iclass_grad_ffff(iii, jjj, kkk, lll, ii, jj, kk, ll, + DNMax, DEV_SIM_DBL_PTR_YVERTICALTEMP + offset, + DEV_SIM_DBL_PTR_STORE + offset, DEV_SIM_DBL_PTR_STORE2 + offset, + DEV_SIM_DBL_PTR_STOREAA + offset, DEV_SIM_DBL_PTR_STOREBB + offset, + DEV_SIM_DBL_PTR_STORECC + offset, + smem_int, smem_dbl, smem_int_ptr, smem_dbl_ptr,smem_char_ptr, smem_char, smem_grad_ptr); + } + #endif #endif - + } } } - - } - -#ifdef MPIV_GPU +#if defined(MPIV_GPU) } #endif - } } - diff --git a/src/gpu/hip/gpu_get2e_grad_ffff.h b/src/gpu/hip/gpu_get2e_grad_ffff.h index ee85b02e..5241f4de 100644 --- a/src/gpu/hip/gpu_get2e_grad_ffff.h +++ b/src/gpu/hip/gpu_get2e_grad_ffff.h @@ -1,4 +1,3 @@ - void getGrad_ffff(_gpu_type gpu); void get_oshell_eri_grad_ffff(_gpu_type gpu); void upload_sim_to_constant_ffff(_gpu_type gpu);