Skip to content

Commit

Permalink
Merge pull request #139 from atillack/read_ligand
Browse files Browse the repository at this point in the history
Major feature updates and code optimizations
  • Loading branch information
atillack authored Aug 20, 2021
2 parents db7968b + f17e873 commit 11d0a51
Show file tree
Hide file tree
Showing 34 changed files with 2,296 additions and 2,374 deletions.
43 changes: 28 additions & 15 deletions Makefile.Cuda
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ UNAME := $(shell uname)
TARGETS = 52 60 61 70
CUDA_TARGETS=$(foreach target,$(TARGETS),-gencode arch=compute_$(target),code=sm_$(target))

$(shell ./link_cuda.sh)

ifeq ($(DEVICE), CPU)
DEV =-DCPU_DEVICE
else ifeq ($(DEVICE), GPU)
Expand All @@ -37,16 +35,14 @@ KCMN_DIR=$(COMMON_DIR)
BIN_DIR=./bin
LIB_CUDA = kernels.o -lcurand -lcudart


# Host sources
HOST_SRC=$(wildcard $(HOST_SRC_DIR)/*.cpp)
SRC=$(HOST_SRC)
TARGET := autodock
TOOL_TARGET := adgpu_analysis

IFLAGS=-I$(COMMON_DIR) -I$(HOST_INC_DIR) -I$(GPU_INCLUDE_PATH) -I$(KRNL_DIR)
LFLAGS=-L$(GPU_LIBRARY_PATH) -Wl,-rpath=$(GPU_LIBRARY_PATH):$(CPU_LIBRARY_PATH)
CFLAGS=-std=c++11 $(IFLAGS) $(LFLAGS)
TOOL_CFLAGS=-std=c++11 -I$(COMMON_DIR) -I$(HOST_INC_DIR)

TARGET := autodock
ifeq ($(DEVICE), CPU)
TARGET:=$(TARGET)_cpu
else ifeq ($(DEVICE), GPU)
Expand Down Expand Up @@ -96,8 +92,6 @@ endif
CONFIG=RELEASE
#CONFIG=FDEBUG



ifeq ($(CONFIG),FDEBUG)
OPT =-O0 -g3 -Wall -DDOCK_DEBUG
CUDA_FLAGS = -G -use_fast_math --ptxas-options="-v" $(CUDA_TARGETS) -std=c++11
Expand All @@ -123,20 +117,22 @@ else
endif
# ------------------------------------------------------

all: odock
all: otool odock

check-env-dev:
@if test -z "$$DEVICE"; then \
echo "DEVICE is undefined"; \
echo "Please set DEVICE to either CPU, GPU, CUDA, or OCLGPU to build docking software."; \
exit 1; \
else \
if [ "$$DEVICE" = "CPU" ]; then \
echo "DEVICE is set to $$DEVICE"; \
echo "DEVICE is set to $$DEVICE which is not a valid Cuda device."; \
exit 1; \
else \
if [ "$$DEVICE" = "GPU" ]; then \
echo "DEVICE is set to $$DEVICE"; \
else \
echo "DEVICE value is invalid. Set DEVICE to either CPU or GPU"; \
echo "DEVICE value is invalid. Please set DEVICE to either CPU, GPU, CUDA, or OCLGPU"; \
exit 1; \
fi; \
fi; \
fi; \
Expand Down Expand Up @@ -176,15 +172,32 @@ check-env-all: check-env-dev check-env-cpu check-env-gpu
GIT_VERSION := $(shell ./version_string.sh)

CFLAGS+=-DVERSION=\"$(GIT_VERSION)\"
TOOL_CFLAGS+=-DVERSION=\"$(GIT_VERSION)\"

# ------------------------------------------------------

link-code:
ln -sf performdocking.h.Cuda $(HOST_INC_DIR)/performdocking.h
ln -sf performdocking.cpp.Cuda $(HOST_SRC_DIR)/performdocking.cpp

unlink-code:
rm -f $(HOST_INC_DIR)/performdocking.h $(HOST_SRC_DIR)/performdocking.cpp

kernels: $(KERNEL_SRC)
$(NVCC) $(NWI) $(REP) $(CUDA_FLAGS) $(IFLAGS) $(CUDA_INCLUDES) -c $(KRNL_DIR)/kernels.cu

odock: check-env-all kernels $(SRC)
otool: unlink-code
@echo "Building" $(TOOL_TARGET) "..."
$(CPP) \
$(shell ls $(HOST_SRC_DIR)/*.cpp) \
$(TOOL_CFLAGS) \
-o$(BIN_DIR)/$(TOOL_TARGET) \
$(PIPELINE) $(OPT) -DTOOLMODE $(REP)

odock: check-env-all kernels link-code
@echo "Building" $(TARGET) "..."
$(CPP) \
$(SRC) \
$(shell ls $(HOST_SRC_DIR)/*.cpp) \
$(CFLAGS) \
$(LIB_CUDA) \
-o$(BIN_DIR)/$(TARGET) \
Expand Down
45 changes: 33 additions & 12 deletions Makefile.OpenCL
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ CPP = g++
LIB_OPENCL = -lOpenCL
UNAME := $(shell uname)

$(shell ./link_opencl.sh)

ifeq ($(UNAME), Darwin)
# In case ScoreP (for profiling/tracing) is used,
# need to link to a *.dylib for instrumentation
Expand Down Expand Up @@ -57,14 +55,16 @@ KRNL_DIR=./device
KCMN_DIR=$(COMMON_DIR)
BIN_DIR=./bin

# Host sources
OCL_SRC=$(wildcard $(OCL_SRC_DIR)/*.cpp)
HOST_SRC=$(wildcard $(HOST_SRC_DIR)/*.cpp)
SRC=$(OCL_SRC) $(HOST_SRC)
TARGET := autodock
TOOL_TARGET := adgpu_analysis

IFLAGS=-I$(COMMON_DIR) -I$(OCL_INC_DIR) -I$(HOST_INC_DIR) -I$(KRNL_DIR) -I$(OCLA_INC_PATH)
LFLAGS=-L$(OCLA_LIB_PATH)
CFLAGS=-std=c++11 $(IFLAGS) $(LFLAGS)
TOOL_CFLAGS=-std=c++11 -I$(COMMON_DIR) -I$(HOST_INC_DIR)

# Host sources
OCL_SRC=$(wildcard $(OCL_SRC_DIR)/*.cpp)

# Device sources
KRNL_MAIN=calcenergy.cl
Expand All @@ -81,7 +81,6 @@ K_NAMES=-DK1=$(K1_NAME) -DK2=$(K2_NAME) -DK3=$(K3_NAME) -DK4=$(K4_NAME) -DK5=$(K
# Kernel flags
KFLAGS=-DKRNL_SOURCE=$(KRNL_DIR)/$(KRNL_MAIN) -DKRNL_DIRECTORY=$(KRNL_DIR) -DKCMN_DIRECTORY=$(KCMN_DIR) $(K_NAMES)

TARGET := autodock
ifeq ($(DEVICE), CPU)
TARGET:=$(TARGET)_cpu
else ifeq ($(DEVICE), GPU)
Expand Down Expand Up @@ -183,11 +182,11 @@ else
endif
# ------------------------------------------------------

all: odock
all: otool odock

check-env-dev:
@if test -z "$$DEVICE"; then \
echo "DEVICE is undefined"; \
echo "Please set DEVICE to either CPU, GPU, CUDA, or OCLGPU to build docking software."; \
exit 1; \
else \
if [ "$$DEVICE" = "CPU" ]; then \
Expand All @@ -196,7 +195,8 @@ check-env-dev:
if [ "$$DEVICE" = "GPU" ]; then \
echo "DEVICE is set to $$DEVICE"; \
else \
echo "DEVICE value is invalid. Set DEVICE to either CPU or GPU"; \
echo "DEVICE value is invalid. Please set DEVICE to either CPU, GPU, CUDA, or OCLGPU"; \
exit 1; \
fi; \
fi; \
fi; \
Expand Down Expand Up @@ -236,15 +236,36 @@ check-env-all: check-env-dev check-env-cpu check-env-gpu
GIT_VERSION := $(shell ./version_string.sh)

CFLAGS+=-DVERSION=\"$(GIT_VERSION)\"
TOOL_CFLAGS+=-DVERSION=\"$(GIT_VERSION)\"

# ------------------------------------------------------

# Tool host sources

# Host sources

link-code:
ln -sf performdocking.h.OpenCL $(HOST_INC_DIR)/performdocking.h
ln -sf performdocking.cpp.OpenCL $(HOST_SRC_DIR)/performdocking.cpp

unlink-code:
rm -f $(HOST_INC_DIR)/performdocking.h $(HOST_SRC_DIR)/performdocking.cpp

stringify:
./stringify_ocl_krnls.sh

odock: check-env-all stringify $(SRC)
otool: unlink-code
@echo "Building" $(TOOL_TARGET) "..."
$(CPP) \
$(shell ls $(HOST_SRC_DIR)/*.cpp) \
$(TOOL_CFLAGS) \
-o$(BIN_DIR)/$(TOOL_TARGET) \
$(PIPELINE) $(OPT) -DTOOLMODE $(REP)

odock: check-env-all stringify link-code
@echo "Building" $(TARGET) "..."
$(CPP) \
$(SRC) \
$(OCL_SRC) $(shell ls $(HOST_SRC_DIR)/*.cpp) \
$(CFLAGS) \
$(LIB_OPENCL) \
-o$(BIN_DIR)/$(TARGET) \
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ By default the output log file is written in the current working folder. Example
|--smooth | | Smoothing parameter for vdW interactions | 0.5 (Å) |
|--elecmindist | | Min. electrostatic potential distance (w/ dpf: 0.5 Å) | 0.01 (Å) |
|--modqp | | Use modified QASP from VirtualDrug or AD4 original | 0 (no, use AD4) |
|--cgmaps | | Use individual maps for CG-G0 instead of the same one | 0 (no, same map) |

Autostop is ON by default since v1.4. The collective distribution of scores among all LGA populations
is tested for convergence every `<asfreq>` generations, and docking is stopped if the top-scored poses
Expand Down
32 changes: 0 additions & 32 deletions common/calcenergy_basic.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,38 +43,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
// WARNING: it is supposed that unsigned int is 32 bit long
#define MAX_UINT 4294967296.0f

// Macro for capturing grid values
// Original
#define GETGRIDVALUE(mempoi,gridsize_x,gridsize_y,gridsize_z,t,z,y,x) *(mempoi + gridsize_x*(y + gridsize_y*(z + gridsize_z*t)) + x)

// Optimization 1
// #define GETGRIDVALUE_OPT(mempoi,gridsize_x,gridsize_y,mul_tmp,z,y,x) *(mempoi + gridsize_x*(y + gridsize_y*(z + mul_tmp)) + x)

// Optimization 2
// Implemented directly in the kernel code: calcenergy_fourkernels_intel.cl

typedef enum
{
idx_000 = 0,
idx_010 = 1,
idx_001 = 2,
idx_011 = 3,
idx_100 = 4,
idx_110 = 5,
idx_101 = 6,
idx_111 = 7
} indices;

// Macro for trilinear interpolation
#define TRILININTERPOL(cube, weights) (cube[idx_000]*weights[idx_000] + \
cube[idx_010]*weights[idx_010] + \
cube[idx_001]*weights[idx_001] + \
cube[idx_011]*weights[idx_011] + \
cube[idx_100]*weights[idx_100] + \
cube[idx_110]*weights[idx_110] + \
cube[idx_101]*weights[idx_101] + \
cube[idx_111]*weights[idx_111])

// Sticking to array boundaries
#define stick_to_bounds(x,a,b) x + (x <= a)*(a-x) + (x >= b)*(b-x)

Expand Down
12 changes: 12 additions & 0 deletions common/defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,18 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#define NUM_OF_THREADS_PER_BLOCK 16
#endif

typedef enum
{
idx_000 = 0,
idx_010 = 1,
idx_001 = 2,
idx_011 = 3,
idx_100 = 4,
idx_110 = 5,
idx_101 = 6,
idx_111 = 7
} indices;

enum {C=0,N=1,O=2,H=3,XX=4,P=5,S=6}; // see "bond_index" in the "AD4.1_bound.dat" or "AD4_parameters.dat" file.
#define NUM_ENUM_ATOMTYPES 7 // this should be the length of the enumerated atom types above

Expand Down
5 changes: 4 additions & 1 deletion cuda/GpuData.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ typedef struct

struct GpuData {
int devnum;
int preload_gridsize;
int devid;
int preallocated_gridsize;
GpuDockparameters dockpars;

// Consolidated constants and memory pointers to reduce kernel launch overhead
Expand Down Expand Up @@ -146,6 +147,8 @@ struct GpuTempData {
int* pMem_evals_of_new_entities;
int* pMem_gpu_evals_of_runs;
uint32_t* pMem_prng_states;
char* device_name;
bool device_busy;
};
#endif

10 changes: 5 additions & 5 deletions cuda/kernel3.cu
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ gpu_perform_LS_kernel(
float3* calc_coords = (float3*)sFloatBuff;

// Genotype pointers
float* genotype_candidate = (float*)(calc_coords + cData.dockpars.num_of_atoms);
float* genotype_deviate = (float*)(genotype_candidate + cData.dockpars.num_of_genes);
float* genotype_bias = (float*)(genotype_deviate + cData.dockpars.num_of_genes);
float* offspring_genotype = (float*)(genotype_bias + cData.dockpars.num_of_genes);
float* genotype_candidate = (float*)(calc_coords + MAX_NUM_OF_ATOMS);
float* genotype_deviate = (float*)(genotype_candidate + ACTUAL_GENOTYPE_LENGTH);
float* genotype_bias = (float*)(genotype_deviate + ACTUAL_GENOTYPE_LENGTH);
float* offspring_genotype = (float*)(genotype_bias + ACTUAL_GENOTYPE_LENGTH);

// Determining run ID and entity ID
// Initializing offspring genotype
Expand Down Expand Up @@ -321,7 +321,7 @@ void gpu_perform_LS(
float* pMem_energies_next
)
{
size_t sz_shared = (3 * cpuData.dockpars.num_of_atoms + 4 * cpuData.dockpars.num_of_genes) * sizeof(float);
size_t sz_shared = (3 * MAX_NUM_OF_ATOMS + 4 * ACTUAL_GENOTYPE_LENGTH) * sizeof(float);
gpu_perform_LS_kernel<<<blocks, threads, sz_shared>>>(pMem_conformations_next, pMem_energies_next);
LAUNCHERROR("gpu_perform_LS_kernel");
#if 0
Expand Down
16 changes: 8 additions & 8 deletions cuda/kernel_ad.cu
Original file line number Diff line number Diff line change
Expand Up @@ -92,22 +92,22 @@ gpu_gradient_minAD_kernel(
// Gradient of the intermolecular energy per each ligand atom
// Also used to store the accummulated gradient per each ligand atom
#ifdef FLOAT_GRADIENTS
float3* cartesian_gradient = (float3*)(calc_coords + cData.dockpars.num_of_atoms);
float3* cartesian_gradient = (float3*)(calc_coords + MAX_NUM_OF_ATOMS);
#else
int3* cartesian_gradient = (int3*)(calc_coords + cData.dockpars.num_of_atoms);
int3* cartesian_gradient = (int3*)(calc_coords + MAX_NUM_OF_ATOMS);
#endif
// Genotype pointers
float* genotype = (float*)(cartesian_gradient + cData.dockpars.num_of_atoms);
float* best_genotype = genotype + cData.dockpars.num_of_genes;
float* genotype = (float*)(cartesian_gradient + MAX_NUM_OF_ATOMS); // so far used 3*2*MAX_NUM_OF_ATOMS
float* best_genotype = genotype + ACTUAL_GENOTYPE_LENGTH;

// Partial results of the gradient step
float* gradient = best_genotype + cData.dockpars.num_of_genes;
float* gradient = best_genotype + ACTUAL_GENOTYPE_LENGTH;

// Squared updates E[dx^2]
float* square_delta = gradient + cData.dockpars.num_of_genes;
float* square_delta = gradient + ACTUAL_GENOTYPE_LENGTH;

// Vector for storing squared gradients E[g^2]
float* square_gradient = square_delta + cData.dockpars.num_of_genes;
float* square_gradient = square_delta + ACTUAL_GENOTYPE_LENGTH; // so far used 5*ACTUAL_GENOTYPE_LENGTH

// Iteration counter for the minimizer
uint32_t iteration_cnt = 0;
Expand Down Expand Up @@ -407,7 +407,7 @@ void gpu_gradient_minAD(
float* pMem_energies_next
)
{
size_t sz_shared = (6 * cpuData.dockpars.num_of_atoms + 5 * cpuData.dockpars.num_of_genes) * sizeof(float);
size_t sz_shared = (6 * MAX_NUM_OF_ATOMS + 5 * ACTUAL_GENOTYPE_LENGTH) * sizeof(float);
gpu_gradient_minAD_kernel<<<blocks, threads, sz_shared>>>(pMem_conformations_next, pMem_energies_next);
LAUNCHERROR("gpu_gradient_minAD_kernel");
#if 0
Expand Down
16 changes: 8 additions & 8 deletions cuda/kernel_adam.cu
Original file line number Diff line number Diff line change
Expand Up @@ -85,23 +85,23 @@ gpu_gradient_minAdam_kernel(
// Gradient of the intermolecular energy per each ligand atom
// Also used to store the accummulated gradient per each ligand atom
#ifdef FLOAT_GRADIENTS
float3* cartesian_gradient = (float3*)(calc_coords + cData.dockpars.num_of_atoms);
float3* cartesian_gradient = (float3*)(calc_coords + MAX_NUM_OF_ATOMS);
#else
int3* cartesian_gradient = (int3*)(calc_coords + cData.dockpars.num_of_atoms);
int3* cartesian_gradient = (int3*)(calc_coords + MAX_NUM_OF_ATOMS);
#endif

// Genotype pointers
float* genotype = (float*)(cartesian_gradient + cData.dockpars.num_of_atoms);
float* best_genotype = genotype + cData.dockpars.num_of_genes;
float* genotype = (float*)(cartesian_gradient + MAX_NUM_OF_ATOMS);
float* best_genotype = genotype + ACTUAL_GENOTYPE_LENGTH;

// Partial results of the gradient step
float* gradient = best_genotype + cData.dockpars.num_of_genes;
float* gradient = best_genotype + ACTUAL_GENOTYPE_LENGTH;

// Adam mt parameter
float* mt = gradient + cData.dockpars.num_of_genes;
float* mt = gradient + ACTUAL_GENOTYPE_LENGTH;

// Adam vt parameter
float* vt = mt + cData.dockpars.num_of_genes;
float* vt = mt + ACTUAL_GENOTYPE_LENGTH;

// Iteration counter for the minimizer
uint32_t iteration_cnt = 0;
Expand Down Expand Up @@ -410,7 +410,7 @@ void gpu_gradient_minAdam(
float* pMem_energies_next
)
{
size_t sz_shared = (6 * cpuData.dockpars.num_of_atoms + 5 * cpuData.dockpars.num_of_genes) * sizeof(float);
size_t sz_shared = (6 * MAX_NUM_OF_ATOMS + 5 * ACTUAL_GENOTYPE_LENGTH) * sizeof(float);
gpu_gradient_minAdam_kernel<<<blocks, threads, sz_shared>>>(pMem_conformations_next, pMem_energies_next);
LAUNCHERROR("gpu_gradient_minAdam_kernel");
#if 0
Expand Down
Loading

0 comments on commit 11d0a51

Please sign in to comment.