Merge pull request #139 from atillack/read_ligand

Major feature updates and code optimizations
ccsb-scripps · Aug 20, 2021 · 11d0a51 · 11d0a51
2 parents db7968b + f17e873
commit 11d0a51
Show file tree

Hide file tree

Showing 34 changed files with 2,296 additions and 2,374 deletions.
diff --git a/Makefile.Cuda b/Makefile.Cuda
@@ -18,8 +18,6 @@ UNAME := $(shell uname)
 TARGETS = 52 60 61 70
 CUDA_TARGETS=$(foreach target,$(TARGETS),-gencode arch=compute_$(target),code=sm_$(target))
 
-$(shell ./link_cuda.sh)
-
 ifeq ($(DEVICE), CPU)
 	DEV =-DCPU_DEVICE
 else ifeq ($(DEVICE), GPU)
@@ -37,16 +35,14 @@ KCMN_DIR=$(COMMON_DIR)
 BIN_DIR=./bin
 LIB_CUDA = kernels.o -lcurand -lcudart 
 
-
-# Host sources
-HOST_SRC=$(wildcard $(HOST_SRC_DIR)/*.cpp)
-SRC=$(HOST_SRC)
+TARGET := autodock
+TOOL_TARGET := adgpu_analysis
 
 IFLAGS=-I$(COMMON_DIR) -I$(HOST_INC_DIR) -I$(GPU_INCLUDE_PATH) -I$(KRNL_DIR)
 LFLAGS=-L$(GPU_LIBRARY_PATH) -Wl,-rpath=$(GPU_LIBRARY_PATH):$(CPU_LIBRARY_PATH)
 CFLAGS=-std=c++11 $(IFLAGS) $(LFLAGS)
+TOOL_CFLAGS=-std=c++11 -I$(COMMON_DIR) -I$(HOST_INC_DIR)
 
-TARGET := autodock
 ifeq ($(DEVICE), CPU)
 	TARGET:=$(TARGET)_cpu
 else ifeq ($(DEVICE), GPU)
@@ -96,8 +92,6 @@ endif
 CONFIG=RELEASE
 #CONFIG=FDEBUG
 
-
-
 ifeq ($(CONFIG),FDEBUG)
 	OPT =-O0 -g3 -Wall -DDOCK_DEBUG
 	CUDA_FLAGS = -G -use_fast_math --ptxas-options="-v" $(CUDA_TARGETS) -std=c++11
@@ -123,20 +117,22 @@ else
 endif
 # ------------------------------------------------------
 
-all: odock
+all: otool odock
 
 check-env-dev:
 	@if test -z "$$DEVICE"; then \
-		echo "DEVICE is undefined"; \
+		echo "Please set DEVICE to either CPU, GPU, CUDA, or OCLGPU to build docking software."; \
 		exit 1; \
 	else \
 		if [ "$$DEVICE" = "CPU" ]; then \
-			echo "DEVICE is set to $$DEVICE"; \
+			echo "DEVICE is set to $$DEVICE which is not a valid Cuda device."; \
+			exit 1; \
 		else \
 			if [ "$$DEVICE" = "GPU" ]; then \
 				echo "DEVICE is set to $$DEVICE"; \
 			else \
-				echo "DEVICE value is invalid. Set DEVICE to either CPU or GPU"; \
+				echo "DEVICE value is invalid. Please set DEVICE to either CPU, GPU, CUDA, or OCLGPU"; \
+				exit 1; \
 			fi; \
 		fi; \
 	fi; \
@@ -176,15 +172,32 @@ check-env-all: check-env-dev check-env-cpu check-env-gpu
 GIT_VERSION := $(shell ./version_string.sh)
 
 CFLAGS+=-DVERSION=\"$(GIT_VERSION)\"
+TOOL_CFLAGS+=-DVERSION=\"$(GIT_VERSION)\"
 
 # ------------------------------------------------------
 
+link-code:
+	ln -sf performdocking.h.Cuda $(HOST_INC_DIR)/performdocking.h
+	ln -sf performdocking.cpp.Cuda $(HOST_SRC_DIR)/performdocking.cpp
+
+unlink-code:
+	rm -f $(HOST_INC_DIR)/performdocking.h $(HOST_SRC_DIR)/performdocking.cpp
+
 kernels: $(KERNEL_SRC)
 	$(NVCC) $(NWI) $(REP) $(CUDA_FLAGS) $(IFLAGS) $(CUDA_INCLUDES) -c $(KRNL_DIR)/kernels.cu
 
-odock: check-env-all kernels $(SRC)
+otool: unlink-code
+	@echo "Building" $(TOOL_TARGET) "..."
+	$(CPP) \
+	$(shell ls $(HOST_SRC_DIR)/*.cpp) \
+	$(TOOL_CFLAGS) \
+	-o$(BIN_DIR)/$(TOOL_TARGET) \
+	$(PIPELINE) $(OPT) -DTOOLMODE $(REP)
+
+odock: check-env-all kernels link-code
+	@echo "Building" $(TARGET) "..."
 	$(CPP) \
-	$(SRC) \
+	$(shell ls $(HOST_SRC_DIR)/*.cpp) \
 	$(CFLAGS) \
 	$(LIB_CUDA) \
 	-o$(BIN_DIR)/$(TARGET) \

diff --git a/Makefile.OpenCL b/Makefile.OpenCL
@@ -15,8 +15,6 @@ CPP = g++
 LIB_OPENCL = -lOpenCL
 UNAME := $(shell uname)
 
-$(shell ./link_opencl.sh)
-
 ifeq ($(UNAME), Darwin)
 # In case ScoreP (for profiling/tracing) is used,
 # need to link to a *.dylib for instrumentation
@@ -57,14 +55,16 @@ KRNL_DIR=./device
 KCMN_DIR=$(COMMON_DIR)
 BIN_DIR=./bin
 
-# Host sources
-OCL_SRC=$(wildcard $(OCL_SRC_DIR)/*.cpp)
-HOST_SRC=$(wildcard $(HOST_SRC_DIR)/*.cpp)
-SRC=$(OCL_SRC) $(HOST_SRC)
+TARGET := autodock
+TOOL_TARGET := adgpu_analysis
 
 IFLAGS=-I$(COMMON_DIR) -I$(OCL_INC_DIR) -I$(HOST_INC_DIR) -I$(KRNL_DIR) -I$(OCLA_INC_PATH)
 LFLAGS=-L$(OCLA_LIB_PATH)
 CFLAGS=-std=c++11 $(IFLAGS) $(LFLAGS)
+TOOL_CFLAGS=-std=c++11 -I$(COMMON_DIR) -I$(HOST_INC_DIR)
+
+# Host sources
+OCL_SRC=$(wildcard $(OCL_SRC_DIR)/*.cpp)
 
 # Device sources
 KRNL_MAIN=calcenergy.cl
@@ -81,7 +81,6 @@ K_NAMES=-DK1=$(K1_NAME) -DK2=$(K2_NAME) -DK3=$(K3_NAME) -DK4=$(K4_NAME) -DK5=$(K
 # Kernel flags
 KFLAGS=-DKRNL_SOURCE=$(KRNL_DIR)/$(KRNL_MAIN) -DKRNL_DIRECTORY=$(KRNL_DIR) -DKCMN_DIRECTORY=$(KCMN_DIR) $(K_NAMES)
 
-TARGET := autodock
 ifeq ($(DEVICE), CPU)
 	TARGET:=$(TARGET)_cpu
 else ifeq ($(DEVICE), GPU)
@@ -183,11 +182,11 @@ else
 endif
 # ------------------------------------------------------
 
-all: odock
+all: otool odock
 
 check-env-dev:
 	@if test -z "$$DEVICE"; then \
-		echo "DEVICE is undefined"; \
+		echo "Please set DEVICE to either CPU, GPU, CUDA, or OCLGPU to build docking software."; \
 		exit 1; \
 	else \
 		if [ "$$DEVICE" = "CPU" ]; then \
@@ -196,7 +195,8 @@ check-env-dev:
 			if [ "$$DEVICE" = "GPU" ]; then \
 				echo "DEVICE is set to $$DEVICE"; \
 			else \
-				echo "DEVICE value is invalid. Set DEVICE to either CPU or GPU"; \
+				echo "DEVICE value is invalid. Please set DEVICE to either CPU, GPU, CUDA, or OCLGPU"; \
+				exit 1; \
 			fi; \
 		fi; \
 	fi; \
@@ -236,15 +236,36 @@ check-env-all: check-env-dev check-env-cpu check-env-gpu
 GIT_VERSION := $(shell ./version_string.sh)
 
 CFLAGS+=-DVERSION=\"$(GIT_VERSION)\"
+TOOL_CFLAGS+=-DVERSION=\"$(GIT_VERSION)\"
 
 # ------------------------------------------------------
 
+# Tool host sources
+
+# Host sources
+
+link-code:
+	ln -sf performdocking.h.OpenCL $(HOST_INC_DIR)/performdocking.h
+	ln -sf performdocking.cpp.OpenCL $(HOST_SRC_DIR)/performdocking.cpp
+
+unlink-code:
+	rm -f $(HOST_INC_DIR)/performdocking.h $(HOST_SRC_DIR)/performdocking.cpp
+
 stringify:
 	./stringify_ocl_krnls.sh
 
-odock: check-env-all stringify $(SRC)
+otool: unlink-code
+	@echo "Building" $(TOOL_TARGET) "..."
+	$(CPP) \
+	$(shell ls $(HOST_SRC_DIR)/*.cpp) \
+	$(TOOL_CFLAGS) \
+	-o$(BIN_DIR)/$(TOOL_TARGET) \
+	$(PIPELINE) $(OPT) -DTOOLMODE $(REP)
+
+odock: check-env-all stringify link-code
+	@echo "Building" $(TARGET) "..."
 	$(CPP) \
-	$(SRC) \
+	$(OCL_SRC) $(shell ls $(HOST_SRC_DIR)/*.cpp) \
 	$(CFLAGS) \
 	$(LIB_OPENCL) \
 	-o$(BIN_DIR)/$(TARGET) \

diff --git a/README.md b/README.md
@@ -134,7 +134,6 @@ By default the output log file is written in the current working folder. Example
 |--smooth           |   | Smoothing parameter for vdW interactions              | 0.5 (Å)          |
 |--elecmindist      |   | Min. electrostatic potential distance (w/ dpf: 0.5 Å) | 0.01 (Å)         |
 |--modqp            |   | Use modified QASP from VirtualDrug or AD4 original    | 0 (no, use AD4)  |
-|--cgmaps           |   | Use individual maps for CG-G0 instead of the same one | 0 (no, same map) |
 
 Autostop is ON by default since v1.4. The collective distribution of scores among all LGA populations
 is tested for convergence every `<asfreq>` generations, and docking is stopped if the top-scored poses

diff --git a/common/calcenergy_basic.h b/common/calcenergy_basic.h
@@ -43,38 +43,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 // WARNING: it is supposed that unsigned int is 32 bit long
 #define MAX_UINT                4294967296.0f
 
-// Macro for capturing grid values
-	// Original
-	#define GETGRIDVALUE(mempoi,gridsize_x,gridsize_y,gridsize_z,t,z,y,x)   *(mempoi + gridsize_x*(y + gridsize_y*(z + gridsize_z*t)) + x)
-
-	// Optimization 1
-	// #define GETGRIDVALUE_OPT(mempoi,gridsize_x,gridsize_y,mul_tmp,z,y,x)   *(mempoi + gridsize_x*(y + gridsize_y*(z + mul_tmp)) + x)
-
-	// Optimization 2
-	// Implemented directly in the kernel code: calcenergy_fourkernels_intel.cl
-
-typedef enum
-{
-	idx_000 = 0,
-	idx_010 = 1,
-	idx_001 = 2,
-	idx_011 = 3,
-	idx_100 = 4,
-	idx_110 = 5,
-	idx_101 = 6,
-	idx_111 = 7
-} indices;
-
-// Macro for trilinear interpolation
-#define TRILININTERPOL(cube, weights) (cube[idx_000]*weights[idx_000] + \
-                                       cube[idx_010]*weights[idx_010] + \
-                                       cube[idx_001]*weights[idx_001] + \
-                                       cube[idx_011]*weights[idx_011] + \
-                                       cube[idx_100]*weights[idx_100] + \
-                                       cube[idx_110]*weights[idx_110] + \
-                                       cube[idx_101]*weights[idx_101] + \
-                                       cube[idx_111]*weights[idx_111])
-
 // Sticking to array boundaries
 #define stick_to_bounds(x,a,b) x + (x <= a)*(a-x) + (x >= b)*(b-x)
 

diff --git a/common/defines.h b/common/defines.h
@@ -48,6 +48,18 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 	#define NUM_OF_THREADS_PER_BLOCK 16
 #endif
 
+typedef enum
+{
+	idx_000 = 0,
+	idx_010 = 1,
+	idx_001 = 2,
+	idx_011 = 3,
+	idx_100 = 4,
+	idx_110 = 5,
+	idx_101 = 6,
+	idx_111 = 7
+} indices;
+
 enum {C=0,N=1,O=2,H=3,XX=4,P=5,S=6};  // see "bond_index" in the "AD4.1_bound.dat" or "AD4_parameters.dat" file.
 #define NUM_ENUM_ATOMTYPES 7 // this should be the length of the enumerated atom types above
 

diff --git a/cuda/GpuData.h b/cuda/GpuData.h
@@ -111,7 +111,8 @@ typedef struct
 
 struct GpuData {
 	int                             devnum;
-	int                             preload_gridsize;
+	int                             devid;
+	int                             preallocated_gridsize;
 	GpuDockparameters               dockpars;
 
 	// Consolidated constants and memory pointers to reduce kernel launch overhead
@@ -146,6 +147,8 @@ struct GpuTempData {
 	int*        pMem_evals_of_new_entities;
 	int*        pMem_gpu_evals_of_runs;
 	uint32_t*   pMem_prng_states;
+	char*       device_name;
+	bool        device_busy;
 };
 #endif
 
diff --git a/cuda/kernel3.cu b/cuda/kernel3.cu
@@ -62,10 +62,10 @@ gpu_perform_LS_kernel(
 	float3* calc_coords = (float3*)sFloatBuff;
 
 	// Genotype pointers
-	float* genotype_candidate = (float*)(calc_coords + cData.dockpars.num_of_atoms);
-	float* genotype_deviate = (float*)(genotype_candidate + cData.dockpars.num_of_genes);
-	float* genotype_bias = (float*)(genotype_deviate + cData.dockpars.num_of_genes);
-	float* offspring_genotype = (float*)(genotype_bias + cData.dockpars.num_of_genes);
+	float* genotype_candidate = (float*)(calc_coords + MAX_NUM_OF_ATOMS);
+	float* genotype_deviate = (float*)(genotype_candidate + ACTUAL_GENOTYPE_LENGTH);
+	float* genotype_bias = (float*)(genotype_deviate + ACTUAL_GENOTYPE_LENGTH);
+	float* offspring_genotype = (float*)(genotype_bias + ACTUAL_GENOTYPE_LENGTH);
 
 	// Determining run ID and entity ID
 	// Initializing offspring genotype
@@ -321,7 +321,7 @@ void gpu_perform_LS(
                     float*   pMem_energies_next
                    )
 {
-	size_t sz_shared = (3 * cpuData.dockpars.num_of_atoms + 4 * cpuData.dockpars.num_of_genes) * sizeof(float);
+	size_t sz_shared = (3 * MAX_NUM_OF_ATOMS + 4 * ACTUAL_GENOTYPE_LENGTH) * sizeof(float);
 	gpu_perform_LS_kernel<<<blocks, threads, sz_shared>>>(pMem_conformations_next, pMem_energies_next);
 	LAUNCHERROR("gpu_perform_LS_kernel");
 #if 0

diff --git a/cuda/kernel_ad.cu b/cuda/kernel_ad.cu
@@ -92,22 +92,22 @@ gpu_gradient_minAD_kernel(
 	// Gradient of the intermolecular energy per each ligand atom
 	// Also used to store the accummulated gradient per each ligand atom
 #ifdef FLOAT_GRADIENTS
-	float3* cartesian_gradient = (float3*)(calc_coords + cData.dockpars.num_of_atoms);
+	float3* cartesian_gradient = (float3*)(calc_coords + MAX_NUM_OF_ATOMS);
 #else
-	int3* cartesian_gradient = (int3*)(calc_coords + cData.dockpars.num_of_atoms);
+	int3* cartesian_gradient = (int3*)(calc_coords + MAX_NUM_OF_ATOMS);
 #endif
 	// Genotype pointers
-	float* genotype = (float*)(cartesian_gradient + cData.dockpars.num_of_atoms);
-	float* best_genotype = genotype + cData.dockpars.num_of_genes;
+	float* genotype = (float*)(cartesian_gradient + MAX_NUM_OF_ATOMS); // so far used 3*2*MAX_NUM_OF_ATOMS
+	float* best_genotype = genotype + ACTUAL_GENOTYPE_LENGTH;
 
 	// Partial results of the gradient step
-	float* gradient = best_genotype + cData.dockpars.num_of_genes;
+	float* gradient = best_genotype + ACTUAL_GENOTYPE_LENGTH;
 
 	// Squared updates E[dx^2]
-	float* square_delta = gradient + cData.dockpars.num_of_genes;
+	float* square_delta = gradient + ACTUAL_GENOTYPE_LENGTH;
 
 	// Vector for storing squared gradients E[g^2]
-	float* square_gradient = square_delta + cData.dockpars.num_of_genes;
+	float* square_gradient = square_delta + ACTUAL_GENOTYPE_LENGTH; // so far used 5*ACTUAL_GENOTYPE_LENGTH
 
 	// Iteration counter for the minimizer
 	uint32_t iteration_cnt = 0;
@@ -407,7 +407,7 @@ void gpu_gradient_minAD(
                         float*   pMem_energies_next
                        )
 {
-	size_t sz_shared = (6 * cpuData.dockpars.num_of_atoms + 5 * cpuData.dockpars.num_of_genes) * sizeof(float);
+	size_t sz_shared = (6 * MAX_NUM_OF_ATOMS + 5 * ACTUAL_GENOTYPE_LENGTH) * sizeof(float);
 	gpu_gradient_minAD_kernel<<<blocks, threads, sz_shared>>>(pMem_conformations_next, pMem_energies_next);
 	LAUNCHERROR("gpu_gradient_minAD_kernel");
 #if 0

diff --git a/cuda/kernel_adam.cu b/cuda/kernel_adam.cu
@@ -85,23 +85,23 @@ gpu_gradient_minAdam_kernel(
 	// Gradient of the intermolecular energy per each ligand atom
 	// Also used to store the accummulated gradient per each ligand atom
 #ifdef FLOAT_GRADIENTS
-	float3* cartesian_gradient = (float3*)(calc_coords + cData.dockpars.num_of_atoms);
+	float3* cartesian_gradient = (float3*)(calc_coords + MAX_NUM_OF_ATOMS);
 #else
-	int3* cartesian_gradient = (int3*)(calc_coords + cData.dockpars.num_of_atoms);
+	int3* cartesian_gradient = (int3*)(calc_coords + MAX_NUM_OF_ATOMS);
 #endif
 
 	// Genotype pointers
-	float* genotype = (float*)(cartesian_gradient + cData.dockpars.num_of_atoms);
-	float* best_genotype = genotype + cData.dockpars.num_of_genes;
+	float* genotype = (float*)(cartesian_gradient + MAX_NUM_OF_ATOMS);
+	float* best_genotype = genotype + ACTUAL_GENOTYPE_LENGTH;
 
 	// Partial results of the gradient step
-	float* gradient = best_genotype + cData.dockpars.num_of_genes;
+	float* gradient = best_genotype + ACTUAL_GENOTYPE_LENGTH;
 
 	// Adam mt parameter
-	float* mt = gradient + cData.dockpars.num_of_genes;
+	float* mt = gradient + ACTUAL_GENOTYPE_LENGTH;
 
 	// Adam vt parameter
-	float* vt = mt + cData.dockpars.num_of_genes;
+	float* vt = mt + ACTUAL_GENOTYPE_LENGTH;
 
 	// Iteration counter for the minimizer
 	uint32_t iteration_cnt = 0;
@@ -410,7 +410,7 @@ void gpu_gradient_minAdam(
                           float* pMem_energies_next
 )
 {
-	size_t sz_shared = (6 * cpuData.dockpars.num_of_atoms + 5 * cpuData.dockpars.num_of_genes) * sizeof(float);
+	size_t sz_shared = (6 * MAX_NUM_OF_ATOMS + 5 * ACTUAL_GENOTYPE_LENGTH) * sizeof(float);
 	gpu_gradient_minAdam_kernel<<<blocks, threads, sz_shared>>>(pMem_conformations_next, pMem_energies_next);
 	LAUNCHERROR("gpu_gradient_minAdam_kernel");
 #if 0