diff --git a/common/defines.h b/common/defines.h index 87a5ea66..c3b181bd 100644 --- a/common/defines.h +++ b/common/defines.h @@ -107,6 +107,13 @@ enum {C=0,N=1,O=2,H=3,XX=4,P=5,S=6}; // see "bond_index" in the "AD4.1_bound.da // Added definition to support flexrings. #define G 50.0f +// Enables full floating point gradient calculation. +// Use is not advised as: +// - the determinism gradients (aka integer gradients) are much faster *and* +// - speed up the local search convergence +// Please only use for debugging +// #define FLOAT_GRADIENTS + // Use one more coefficient in the fit to the Mehler-Solmajer dielectric in energrad implementation // Although this improves the fit (particularly for the gradient), it costs a little bit more and // does not return better accuracy overall (default: commented out, don't use) diff --git a/cuda/calcMergeEneGra.cu b/cuda/calcMergeEneGra.cu index 99bbc076..7641db45 100644 --- a/cuda/calcMergeEneGra.cu +++ b/cuda/calcMergeEneGra.cu @@ -659,13 +659,26 @@ __device__ void gpu_calc_energrad( REDUCEFLOATSUM(gz, pFloatAccumulator); global_energy = energy; +#ifndef FLOAT_GRADIENTS int* gradient_genotype = (int*)fgradient_genotype; - +#endif if (threadIdx.x == 0) { // Scaling gradient for translational genes as // their corresponding gradients were calculated in the space // where these genes are in Angstrom, // but AutoDock-GPU translational genes are within in grids +#ifdef FLOAT_GRADIENTS + fgradient_genotype[0] = gx * cData.dockpars.grid_spacing; + fgradient_genotype[1] = gy * cData.dockpars.grid_spacing; + fgradient_genotype[2] = gz * cData.dockpars.grid_spacing; + + #if defined (PRINT_GRAD_TRANSLATION_GENES) + printf("\n%s\n", "----------------------------------------------------------"); + printf("gradient_x:%f\n", fgradient_genotype [0]); + printf("gradient_y:%f\n", fgradient_genotype [1]); + printf("gradient_z:%f\n", fgradient_genotype [2]); + #endif +#else gradient_genotype[0] = lrintf(fminf(MAXTERM, fmaxf(-MAXTERM, TERMSCALE * gx * cData.dockpars.grid_spacing))); gradient_genotype[1] = lrintf(fminf(MAXTERM, fmaxf(-MAXTERM, TERMSCALE * gy * cData.dockpars.grid_spacing))); gradient_genotype[2] = lrintf(fminf(MAXTERM, fmaxf(-MAXTERM, TERMSCALE * gz * cData.dockpars.grid_spacing))); @@ -676,6 +689,7 @@ __device__ void gpu_calc_energrad( printf("gradient_y:%f\n", gradient_genotype [1]); printf("gradient_z:%f\n", gradient_genotype [2]); #endif +#endif } __syncthreads(); @@ -861,6 +875,17 @@ __device__ void gpu_calc_energrad( // Setting gradient rotation-related genotypes in cube // Multiplicating by DEG_TO_RAD is to make it uniform to DEG (see torsion gradients) +#ifdef FLOAT_GRADIENTS + fgradient_genotype[3] = (grad_phi / (dependence_on_theta * dependence_on_rotangle)) * DEG_TO_RAD; + fgradient_genotype[4] = (grad_theta / dependence_on_rotangle) * DEG_TO_RAD; + fgradient_genotype[5] = grad_rotangle * DEG_TO_RAD; + #if defined (PRINT_GRAD_ROTATION_GENES) + printf("\n%s\n", "----------------------------------------------------------"); + printf("%-30s \n", "grad_axisangle (1,2,3) - after empirical scaling: "); + printf("%-13s %-13s %-13s \n", "grad_phi", "grad_theta", "grad_rotangle"); + printf("%-13.6f %-13.6f %-13.6f\n", fgradient_genotype[3], fgradient_genotype[4], fgradient_genotype[5]); + #endif +#else gradient_genotype[3] = lrintf(fminf(MAXTERM, fmaxf(-MAXTERM, TERMSCALE * (grad_phi / (dependence_on_theta * dependence_on_rotangle)) * DEG_TO_RAD))); gradient_genotype[4] = lrintf(fminf(MAXTERM, fmaxf(-MAXTERM, TERMSCALE * (grad_theta / dependence_on_rotangle) * DEG_TO_RAD))); gradient_genotype[5] = lrintf(fminf(MAXTERM, fmaxf(-MAXTERM, TERMSCALE * grad_rotangle * DEG_TO_RAD))); @@ -870,6 +895,7 @@ __device__ void gpu_calc_energrad( printf("%-13s %-13s %-13s \n", "grad_phi", "grad_theta", "grad_rotangle"); printf("%-13.6f %-13.6f %-13.6f\n", gradient_genotype[3], gradient_genotype[4], gradient_genotype[5]); #endif +#endif } __syncthreads(); @@ -930,17 +956,22 @@ __device__ void gpu_calc_energrad( // Assignment of gene-based gradient // - this works because a * (a_1 + a_2 + ... + a_n) = a*a_1 + a*a_2 + ... + a*a_n +#ifdef FLOAT_GRADIENTS + ATOMICADDF32(&fgradient_genotype[rotbond_id+6], torque_on_axis * DEG_TO_RAD); /*(M_PI / 180.0f)*/; +#else ATOMICADDI32(&gradient_genotype[rotbond_id+6], lrintf(fminf(MAXTERM, fmaxf(-MAXTERM, TERMSCALE * torque_on_axis * DEG_TO_RAD)))); /*(M_PI / 180.0f)*/; +#endif } __syncthreads(); +#ifndef FLOAT_GRADIENTS for (uint32_t gene_cnt = threadIdx.x; gene_cnt < cData.dockpars.num_of_genes; gene_cnt+= blockDim.x) { fgradient_genotype[gene_cnt] = ONEOVERTERMSCALE * (float)gradient_genotype[gene_cnt]; } __syncthreads(); - +#endif #if defined (CONVERT_INTO_ANGSTROM_RADIAN) for (uint32_t gene_cnt = threadIdx.x+3; // Only for gene_cnt > 2 means start gene_cnt at 3 gene_cnt < cData.dockpars.num_of_genes; diff --git a/device/calcMergedEneGra.cl b/device/calcMergedEneGra.cl index 676679c9..22d0a2ee 100644 --- a/device/calcMergedEneGra.cl +++ b/device/calcMergedEneGra.cl @@ -22,18 +22,18 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ - +#ifndef FLOAT_GRADIENTS #define TERMBITS 10 #define MAXTERM ((float)(1 << (31 - TERMBITS - 8))) // 2^(31 - 10 - 8) = 2^13 = 8192 #define TERMSCALE ((float)(1 << TERMBITS)) // 2^10 = 1024 #define ONEOVERTERMSCALE (1.0f / TERMSCALE) // 1 / 1024 = 0.000977 - -// Enables full floating point gradient calculation. -// Use is not advised as: -// - the determinism gradients (aka integer gradients) are much faster *and* -// - speed up the local search convergence -// Please only use for debugging -// #define FLOAT_GRADIENTS +// inspired by: https://streamhpc.com/blog/2014-11-07/opencl-integer-rounding-c/ +int float2int_round (float number) +{ + int addsub = (int)((number < 0.0f) - (number > 0.0f)); // +1 for number < 0 (round up, i.e. 1.5 -> 2.0), -1 for number > 0 (round down, i.e. -1.5 -> -2.0) + return ((int)(number+0.5f*addsub)); +} +#endif // Enable restoring map gradient // Currently, this is not a good idea @@ -117,7 +117,7 @@ void gpu_calc_energrad( // Initializing gradients (forces) // Derived from autodockdev/maps.py for ( int atom_id = tidx; - atom_id < MAX_NUM_OF_ATOMS; // makes sure that gradient sum reductions give correct results if dockpars_num_atoms < NUM_OF_THREADS_PER_BLOCK + atom_id < dockpars_num_of_atoms; atom_id+= NUM_OF_THREADS_PER_BLOCK) { // Initialize coordinates @@ -215,7 +215,7 @@ void gpu_calc_energrad( // ================================================ // CALCULATING INTERMOLECULAR GRADIENTS // ================================================ - float inv_grid_spacing = native_recip(dockpars_grid_spacing); + float inv_grid_spacing = native_divide(1.0f,dockpars_grid_spacing); float weights[8]; float cube[8]; for ( int atom_id = tidx; @@ -233,10 +233,10 @@ void gpu_calc_energrad( if ((x < 0) || (y < 0) || (z < 0) || (x >= dockpars_gridsize_x-1) || (y >= dockpars_gridsize_y-1) || (z >= dockpars_gridsize_z-1)){ +#ifdef RESTORING_MAP_GRADIENT x -= 0.5f * dockpars_gridsize_x; y -= 0.5f * dockpars_gridsize_y; z -= 0.5f * dockpars_gridsize_z; -#ifdef RESTORING_MAP_GRADIENT partial_energies[tidx] += 21.0f * (x*x+y*y+z*z); //100000.0f; #else partial_energies[tidx] += 16777216.0f; //100000.0f; @@ -248,26 +248,32 @@ void gpu_calc_energrad( // Setting gradients (forces) penalties. // The idea here is to push the offending // molecule towards the center -#ifdef FLOAT_GRADIENTS - gradient_x[atom_id] += TERMSCALE * 42.0f * x * inv_grid_spacing; - gradient_y[atom_id] += TERMSCALE * 42.0f * y * inv_grid_spacing; - gradient_z[atom_id] += TERMSCALE * 42.0f * z * inv_grid_spacing; -#else - gradient_x[atom_id] += convert_int_rte( TERMSCALE * 42.0f * x * inv_grid_spacing ); - gradient_y[atom_id] += convert_int_rte( TERMSCALE * 42.0f * y * inv_grid_spacing ); - gradient_z[atom_id] += convert_int_rte( TERMSCALE * 42.0f * z * inv_grid_spacing ); -#endif // FLOAT_GRADIENTS + #ifdef FLOAT_GRADIENTS + gradient_x[atom_id] += 42.0f * x * inv_grid_spacing; + gradient_y[atom_id] += 42.0f * y * inv_grid_spacing; + gradient_z[atom_id] += 42.0f * z * inv_grid_spacing; + #else + gradient_x[atom_id] += float2int_round( TERMSCALE * 42.0f * x * inv_grid_spacing ); + gradient_y[atom_id] += float2int_round( TERMSCALE * 42.0f * y * inv_grid_spacing ); + gradient_z[atom_id] += float2int_round( TERMSCALE * 42.0f * z * inv_grid_spacing ); + #endif // FLOAT_GRADIENTS #else + #ifdef FLOAT_GRADIENTS gradient_x[atom_id] += 16777216.0f; gradient_y[atom_id] += 16777216.0f; gradient_z[atom_id] += 16777216.0f; + #else + gradient_x[atom_id] += 16777216; + gradient_y[atom_id] += 16777216; + gradient_z[atom_id] += 16777216; + #endif // FLOAT_GRADIENTS #endif continue; } // Getting coordinates - float x_low = floor(x); - float y_low = floor(y); - float z_low = floor(z); + int x_low = floor(x); + int y_low = floor(y); + int z_low = floor(z); // Grid value at 000 __global const float* grid_value_000 = dockpars_fgrids + ((ulong)(x_low + y_low*g1 + z_low*g2)<<2); @@ -429,9 +435,9 @@ void gpu_calc_energrad( gradient_y[atom_id] += gy; gradient_z[atom_id] += gz; #else - gradient_x[atom_id] += convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * gx))); - gradient_y[atom_id] += convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * gy))); - gradient_z[atom_id] += convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * gz))); + gradient_x[atom_id] += float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * gx))); + gradient_y[atom_id] += float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * gy))); + gradient_z[atom_id] += float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * gz))); #endif } // End atom_id for-loop (INTERMOLECULAR ENERGY) @@ -509,7 +515,7 @@ void gpu_calc_energrad( float rmn=native_powr(smoothed_distance,m-n); float rm=native_powr(smoothed_distance,-m); partial_energies[tidx] += (kerconst_intra->VWpars_AC_const[idx]-rmn*kerconst_intra->VWpars_BD_const[idx])*rm; - priv_gradient_per_intracontributor += (n*kerconst_intra->VWpars_BD_const[idx]*rmn-m*kerconst_intra->VWpars_AC_const[idx])*rm*native_recip(smoothed_distance); + priv_gradient_per_intracontributor += native_divide((n*kerconst_intra->VWpars_BD_const[idx]*rmn-m*kerconst_intra->VWpars_AC_const[idx])*rm,smoothed_distance); #if defined (DEBUG_ENERGY_KERNEL) partial_intraE[tidx] += (kerconst_intra->VWpars_AC_const[idx]-rmn*kerconst_intra->VWpars_BD_const[idx])*rm; #endif @@ -544,16 +550,16 @@ void gpu_calc_energrad( #ifndef DIEL_FIT_ABC float dist_shift=atomic_distance+1.26366f; dist2=dist_shift*dist_shift; - float diel = 1.10859f*native_recip(dist2)+0.010358f; + float diel = native_divide(1.10859f,dist2)+0.010358f; #else float dist_shift=atomic_distance+1.588f; dist2=dist_shift*dist_shift; float disth_shift=atomic_distance+0.794f; float disth4=disth_shift*disth_shift; disth4*=disth4; - float diel = 1.404f*native_recip(dist2)+0.072f*native_recip(disth4)+0.00831f; + float diel = native_divide(1.404f,dist2)+native_divide(0.072f,disth4)+0.00831f; #endif - float es_energy = dockpars_coeff_elec * q1 * q2 * native_recip(atomic_distance); + float es_energy = native_divide(dockpars_coeff_elec * q1 * q2,atomic_distance); partial_energies[tidx] += diel * es_energy + desolv_energy; #if defined (DEBUG_ENERGY_KERNEL) @@ -574,11 +580,11 @@ void gpu_calc_energrad( // priv_gradient_per_intracontributor += -dockpars_coeff_elec * q1 * q2 * native_divide (upper, lower) - // 0.0771605f * atomic_distance * desolv_energy; - priv_gradient_per_intracontributor += -es_energy*native_recip(atomic_distance) * diel + priv_gradient_per_intracontributor += native_divide(-es_energy,atomic_distance) * diel #ifndef DIEL_FIT_ABC - -es_energy * 2.21718f*native_recip(dist2*dist_shift) + -native_divide(es_energy * 2.21718f,dist2*dist_shift) #else - -es_energy * (2.808f * native_recip(dist2*dist_shift)+0.288f*native_recip(disth4*disth_shift)) + -es_energy * (native_divide(2.808f,dist2*dist_shift)+native_divide(0.288f,disth4*disth_shift)) #endif -0.0771605f * atomic_distance * desolv_energy; // 1/3.6^2 = 1/12.96 = 0.0771605 } // if cuttoff2 - internuclear-distance at 20.48A @@ -587,15 +593,15 @@ void gpu_calc_energrad( // into the contribution of each atom of the pair. // Distances in Angstroms of vector that goes from // "atom1_id"-to-"atom2_id", therefore - subx, - suby, and - subz are used - float grad_div_dist = -priv_gradient_per_intracontributor*native_recip(dist); + float grad_div_dist = native_divide(-priv_gradient_per_intracontributor,dist); #ifdef FLOAT_GRADIENTS float priv_intra_gradient_x = subx * grad_div_dist; float priv_intra_gradient_y = suby * grad_div_dist; float priv_intra_gradient_z = subz * grad_div_dist; #else - int priv_intra_gradient_x = convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * subx * grad_div_dist))); - int priv_intra_gradient_y = convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * suby * grad_div_dist))); - int priv_intra_gradient_z = convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * subz * grad_div_dist))); + int priv_intra_gradient_x = float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * subx * grad_div_dist))); + int priv_intra_gradient_y = float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * suby * grad_div_dist))); + int priv_intra_gradient_z = float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * subz * grad_div_dist))); #endif // Calculating gradients in xyz components. // Gradients for both atoms in a single contributor pair @@ -655,24 +661,37 @@ void gpu_calc_energrad( accumulator_z[tidx] += accumulator_z[tidx+off]; } } +#ifndef FLOAT_GRADIENTS __local int* i_gradient_genotype = (__local int*)gradient_genotype; +#endif if (tidx == 0) { *energy = partial_energies[0]; // Scaling gradient for translational genes as // their corresponding gradients were calculated in the space // where these genes are in Angstrom, // but AutoDock-GPU translational genes are within in grids - i_gradient_genotype[0] = convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * accumulator_x[0] * dockpars_grid_spacing))); - i_gradient_genotype[1] = convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * accumulator_y[0] * dockpars_grid_spacing))); - i_gradient_genotype[2] = convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * accumulator_z[0] * dockpars_grid_spacing))); +#ifdef FLOAT_GRADIENTS + gradient_genotype[0] = accumulator_x[0] * dockpars_grid_spacing; + gradient_genotype[1] = accumulator_y[0] * dockpars_grid_spacing; + gradient_genotype[2] = accumulator_z[0] * dockpars_grid_spacing; + #if defined (PRINT_GRAD_TRANSLATION_GENES) + printf("\n%s\n", "----------------------------------------------------------"); + printf("gradient_x:%f\n", gradient_genotype [0]); + printf("gradient_y:%f\n", gradient_genotype [1]); + printf("gradient_z:%f\n", gradient_genotype [2]); + #endif +#else + i_gradient_genotype[0] = float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * accumulator_x[0] * dockpars_grid_spacing))); + i_gradient_genotype[1] = float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * accumulator_y[0] * dockpars_grid_spacing))); + i_gradient_genotype[2] = float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * accumulator_z[0] * dockpars_grid_spacing))); #if defined (PRINT_GRAD_TRANSLATION_GENES) printf("\n%s\n", "----------------------------------------------------------"); - printf("gradient_x:%f\n", i_gradient_genotype [0]); - printf("gradient_y:%f\n", i_gradient_genotype [1]); - printf("gradient_z:%f\n", i_gradient_genotype [2]); + printf("i_gradient_x:%f\n", i_gradient_genotype [0]); + printf("i_gradient_y:%f\n", i_gradient_genotype [1]); + printf("i_gradient_z:%f\n", i_gradient_genotype [2]); #endif +#endif } - barrier(CLK_LOCAL_MEM_FENCE); // ------------------------------------------ // Obtaining rotation-related gradients @@ -736,7 +755,7 @@ void gpu_calc_energrad( // Derived from rotation.py/axisangle_to_q() // genes[3:7] = rotation.axisangle_to_q(torque, rad) - float torque_length = fast_length(torque_rot); + float torque_length = native_sqrt(torque_rot.x*torque_rot.x+torque_rot.y*torque_rot.y+torque_rot.z*torque_rot.z); torque_length += (torque_length<1e-20f)*1e-20f; #if defined (PRINT_GRAD_ROTATION_GENES) @@ -746,7 +765,7 @@ void gpu_calc_energrad( // Finding the quaternion that performs // the infinitesimal rotation around torque axis - float4 quat_torque = torque_rot * SIN_HALF_INFINITESIMAL_RADIAN * native_recip(torque_length); + float4 quat_torque = native_divide(torque_rot * SIN_HALF_INFINITESIMAL_RADIAN, torque_length); quat_torque.w = COS_HALF_INFINITESIMAL_RADIAN; #if defined (PRINT_GRAD_ROTATION_GENES) @@ -898,9 +917,15 @@ void gpu_calc_energrad( // Setting gradient rotation-related genotypes in cube // Multiplicating by DEG_TO_RAD is to make it uniform to DEG (see torsion gradients) - i_gradient_genotype[3] = convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * native_divide(grad_phi, (dependence_on_theta * dependence_on_rotangle)) * DEG_TO_RAD))); - i_gradient_genotype[4] = convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * native_divide(grad_theta, dependence_on_rotangle) * DEG_TO_RAD))); - i_gradient_genotype[5] = convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * grad_rotangle * DEG_TO_RAD))); +#ifdef FLOAT_GRADIENTS + gradient_genotype[3] = native_divide(grad_phi, (dependence_on_theta * dependence_on_rotangle)) * DEG_TO_RAD; + gradient_genotype[4] = native_divide(grad_theta, dependence_on_rotangle) * DEG_TO_RAD; + gradient_genotype[5] = grad_rotangle * DEG_TO_RAD; +#else + i_gradient_genotype[3] = float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * native_divide(grad_phi, (dependence_on_theta * dependence_on_rotangle)) * DEG_TO_RAD))); + i_gradient_genotype[4] = float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * native_divide(grad_theta, dependence_on_rotangle) * DEG_TO_RAD))); + i_gradient_genotype[5] = float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * grad_rotangle * DEG_TO_RAD))); +#endif #if defined (PRINT_GRAD_ROTATION_GENES) printf("\n%s\n", "----------------------------------------------------------"); printf("%-30s \n", "grad_axisangle (1,2,3) - after empirical scaling: "); @@ -956,9 +981,14 @@ void gpu_calc_energrad( // Assignment of gene-based gradient // - this works because a * (a_1 + a_2 + ... + a_n) = a*a_1 + a*a_2 + ... + a*a_n - atomic_add(&i_gradient_genotype[rotbond_id+6], convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * torque_on_axis * DEG_TO_RAD)))); /*(M_PI / 180.0f)*/; +#ifdef FLOAT_GRADIENTS + atomicAdd_g_f(&gradient_genotype[rotbond_id+6], torque_on_axis * DEG_TO_RAD); /*(M_PI / 180.0f)*/; +#else + atomic_add(&i_gradient_genotype[rotbond_id+6], float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * torque_on_axis * DEG_TO_RAD)))); /*(M_PI / 180.0f)*/; +#endif } barrier(CLK_LOCAL_MEM_FENCE); +#ifndef FLOAT_GRADIENTS for ( int gene_cnt = tidx; gene_cnt < dockpars_num_of_genes; gene_cnt+= NUM_OF_THREADS_PER_BLOCK) @@ -966,8 +996,9 @@ void gpu_calc_energrad( gradient_genotype[gene_cnt] = ONEOVERTERMSCALE * (float)i_gradient_genotype[gene_cnt]; } barrier(CLK_LOCAL_MEM_FENCE); +#endif - #if defined (CONVERT_INTO_ANGSTROM_RADIAN) +#if defined (CONVERT_INTO_ANGSTROM_RADIAN) for ( int gene_cnt = tidx+3; // Only for gene_cnt > 2 means start gene_cnt at 3 gene_cnt < dockpars_num_of_genes; gene_cnt+= NUM_OF_THREADS_PER_BLOCK) @@ -975,5 +1006,5 @@ void gpu_calc_energrad( gradient_genotype[gene_cnt] *= dockpars_grid_spacing * dockpars_grid_spacing * SCFACTOR_ANGSTROM_RADIAN; } barrier(CLK_LOCAL_MEM_FENCE); - #endif +#endif } diff --git a/device/calcenergy.cl b/device/calcenergy.cl index d0d8edd5..59404881 100644 --- a/device/calcenergy.cl +++ b/device/calcenergy.cl @@ -86,7 +86,7 @@ inline float positive_power(float a, uint exp) inline float fmod_pi2(float x) { - return x-(int)(invpi2*x)*PI_TIMES_2; + return x-((int)(invpi2*x))*PI_TIMES_2; } #define fast_acos_a 9.78056e-05f diff --git a/device/calcgradient.cl b/device/calcgradient.cl index b398fa69..e300eb7d 100644 --- a/device/calcgradient.cl +++ b/device/calcgradient.cl @@ -200,7 +200,7 @@ void gpu_calc_gradient( uint g1 = dockpars_gridsize_x; uint g2 = dockpars_gridsize_x_times_y; - uint g3 = dockpars_gridsize_x_times_y_times_z; + uint g3 = dockpars_gridsize_x_times_y_times_z; // ================================================ // CALCULATING ATOMIC POSITIONS AFTER ROTATIONS @@ -864,7 +864,8 @@ void gpu_calc_gradient( // Derived from rotation.py/axisangle_to_q() // genes[3:7] = rotation.axisangle_to_q(torque, rad) - float torque_length = fast_length(torque_rot); + float torque_length = native_sqrt(torque_rot.x*torque_rot.x+torque_rot.y*torque_rot.y+torque_rot.z*torque_rot.z); + torque_length += (torque_length<1e-20f)*1e-20f; #if defined (PRINT_GRAD_ROTATION_GENES) printf("\n%s\n", "----------------------------------------------------------"); diff --git a/device/kernel3.cl b/device/kernel3.cl index da7e2efd..f52c4340 100644 --- a/device/kernel3.cl +++ b/device/kernel3.cl @@ -110,7 +110,7 @@ __constant kernelconstant_conform* kerconst_conform if (tidx == 0) { run_id = get_group_id(0) / dockpars_num_of_lsentities; - entity_id = get_group_id(0) % dockpars_num_of_lsentities; + entity_id = get_group_id(0) - run_id * dockpars_num_of_lsentities; // Since entity 0 is the best one due to elitism, // it should be subjected to random selection diff --git a/device/kernel_ad.cl b/device/kernel_ad.cl index e6df819e..999500ff 100644 --- a/device/kernel_ad.cl +++ b/device/kernel_ad.cl @@ -121,7 +121,7 @@ gradient_minAD( __local float energy; __local float genotype[ACTUAL_GENOTYPE_LENGTH]; - // Iteration counter fot the minimizer + // Iteration counter for the minimizer __local uint iteration_cnt; if (tidx == 0) @@ -180,9 +180,9 @@ gradient_minAD( // Gradient of the intermolecular energy per each ligand atom // Also used to store the accummulated gradient per each ligand atom #ifdef FLOAT_GRADIENTS - __local float gradient_x[MAX_NUM_OF_ATOMS]; - __local float gradient_y[MAX_NUM_OF_ATOMS]; - __local float gradient_z[MAX_NUM_OF_ATOMS]; + __local float gradient_x[MAX_NUM_OF_ATOMS]; + __local float gradient_y[MAX_NUM_OF_ATOMS]; + __local float gradient_z[MAX_NUM_OF_ATOMS]; #else __local int gradient_x[MAX_NUM_OF_ATOMS]; __local int gradient_y[MAX_NUM_OF_ATOMS]; @@ -310,7 +310,7 @@ gradient_minAD( square_gradient[i] = 0.0f; delta[i] = 0.0f; square_delta[i] = 0.0f; - best_genotype [i] = genotype [i]; + best_genotype[i] = genotype[i]; } // Initializing best energy @@ -319,11 +319,11 @@ gradient_minAD( } #ifdef ADADELTA_AUTOSTOP - __local float rho; + __local float varrho; __local int cons_succ; __local int cons_fail; if (tidx == 0) { - rho = 1.0f; + varrho = 1.0f; cons_succ = 0; cons_fail = 0; } @@ -447,14 +447,16 @@ gradient_minAD( // Accummulating gradient^2 (eq.8 in the paper) // square_gradient corresponds to E[g^2] - square_gradient[i] = RHO * square_gradient[i] + (1.0f - RHO) * gradient[i] * gradient[i]; + square_gradient[i] *= RHO; + square_gradient[i] += (1.0f - RHO) * gradient[i] * gradient[i]; // Computing update (eq.9 in the paper) delta[i] = -1.0f * gradient[i] * native_sqrt(native_divide((float)(square_delta[i] + EPSILON), (float)(square_gradient[i] + EPSILON))); // Accummulating update^2 // square_delta corresponds to E[dx^2] - square_delta[i] = RHO * square_delta[i] + (1.0f - RHO) * delta[i] * delta [i]; + square_delta[i] *= RHO; + square_delta[i] += (1.0f - RHO) * delta[i] * delta [i]; // Applying update genotype[i] += delta[i]; @@ -504,20 +506,20 @@ gradient_minAD( #ifdef ADADELTA_AUTOSTOP if (cons_succ >= 4) { - rho *= LS_EXP_FACTOR; + varrho *= LS_EXP_FACTOR; cons_succ = 0; } else if (cons_fail >= 4) { - rho *= LS_CONT_FACTOR; + varrho *= LS_CONT_FACTOR; cons_fail = 0; } #endif } barrier(CLK_LOCAL_MEM_FENCE); // making sure that iteration_cnt is up-to-date #ifdef ADADELTA_AUTOSTOP - } while ((iteration_cnt < dockpars_max_num_of_iters) && (rho > 0.01f)); + } while ((iteration_cnt < dockpars_max_num_of_iters) && (varrho > 0.01f)); #else } while (iteration_cnt < dockpars_max_num_of_iters); #endif diff --git a/device/kernel_fire.cl b/device/kernel_fire.cl index 92f50af0..943a780a 100644 --- a/device/kernel_fire.cl +++ b/device/kernel_fire.cl @@ -207,10 +207,15 @@ gradient_minFire( // ------------------------------------------------------------------- // Gradient of the intermolecular energy per each ligand atom // Also used to store the accummulated gradient per each ligand atom - __local int i_gradient_x[MAX_NUM_OF_ATOMS]; - __local int i_gradient_y[MAX_NUM_OF_ATOMS]; - __local int i_gradient_z[MAX_NUM_OF_ATOMS]; - +#ifdef FLOAT_GRADIENTS + __local float gradient_x[MAX_NUM_OF_ATOMS]; + __local float gradient_y[MAX_NUM_OF_ATOMS]; + __local float gradient_z[MAX_NUM_OF_ATOMS]; +#else + __local int gradient_x[MAX_NUM_OF_ATOMS]; + __local int gradient_y[MAX_NUM_OF_ATOMS]; + __local int gradient_z[MAX_NUM_OF_ATOMS]; +#endif __local float f_gradient_x[MAX_NUM_OF_ATOMS]; __local float f_gradient_y[MAX_NUM_OF_ATOMS]; __local float f_gradient_z[MAX_NUM_OF_ATOMS]; @@ -354,7 +359,7 @@ gradient_minFire( dependence_on_rotangle_const, // Gradient-related arguments dockpars_num_of_genes, - (__local float*)i_gradient_x, (__local float*)i_gradient_y, (__local float*)i_gradient_z, + gradient_x, gradient_y, gradient_z, f_gradient_x, f_gradient_y, f_gradient_z, gradient ); @@ -505,7 +510,7 @@ gradient_minFire( dependence_on_rotangle_const, // Gradient-related arguments dockpars_num_of_genes, - i_gradient_x, i_gradient_y, i_gradient_z, + gradient_x, gradient_y, gradient_z, f_gradient_x, f_gradient_y, f_gradient_z, candidate_gradient ); @@ -612,7 +617,7 @@ gradient_minFire( dependence_on_rotangle_const, // Gradient-related arguments dockpars_num_of_genes, - i_gradient_x, i_gradient_y, i_gradient_z, + gradient_x, gradient_y, gradient_z, f_gradient_x, f_gradient_y, f_gradient_z, candidate_gradient ); diff --git a/device/kernel_sd.cl b/device/kernel_sd.cl index 886161d5..621a87c7 100644 --- a/device/kernel_sd.cl +++ b/device/kernel_sd.cl @@ -173,9 +173,9 @@ gradient_minSD( // ------------------------------------------------------------------- // Gradient of the intermolecular energy per each ligand atom // Also used to store the accummulated gradient per each ligand atom - __local int i_gradient_x[MAX_NUM_OF_ATOMS]; - __local int i_gradient_y[MAX_NUM_OF_ATOMS]; - __local int i_gradient_z[MAX_NUM_OF_ATOMS]; + __local float gradient_x[MAX_NUM_OF_ATOMS]; + __local float gradient_y[MAX_NUM_OF_ATOMS]; + __local float gradient_z[MAX_NUM_OF_ATOMS]; __local float f_gradient_x[MAX_NUM_OF_ATOMS]; __local float f_gradient_y[MAX_NUM_OF_ATOMS]; @@ -464,7 +464,7 @@ gradient_minSD( dependence_on_rotangle_const, // Gradient-related arguments dockpars_num_of_genes, - (__local float*)i_gradient_x, (__local float*)i_gradient_y, (__local float*)i_gradient_z, + gradient_x, gradient_y, gradient_z, f_gradient_x, f_gradient_y, f_gradient_z, gradient ); diff --git a/host/src/processgrid.cpp b/host/src/processgrid.cpp index 9caa7cd1..884d69c0 100644 --- a/host/src/processgrid.cpp +++ b/host/src/processgrid.cpp @@ -147,7 +147,7 @@ int get_gridinfo( } if(mygrid->grid_mapping.size() != 2*grid_types){ - printf("Error: Number of grid map labels (%d) and filenames (%d) mismatched in fld file.\n", grid_types, mygrid->grid_mapping.size()-grid_types); + printf("Error: Number of grid map labels (%d) and filenames (%d) mismatched in fld file.\n", grid_types, (int)mygrid->grid_mapping.size()-grid_types); return 1; } if(!have_e){ diff --git a/host/src/processligand.cpp b/host/src/processligand.cpp index c2a7afc2..67f86a2b 100644 --- a/host/src/processligand.cpp +++ b/host/src/processligand.cpp @@ -1920,9 +1920,9 @@ float calc_interE_f( } // Getting coordinates - float x_low = floor(x); - float y_low = floor(y); - float z_low = floor(z); + x_low = floor(x); + y_low = floor(y); + z_low = floor(z); // Grid value at 000 const float* grid_value_000 = mygrid->grids.data() + ((unsigned long)(x_low + y_low*g1 + z_low*g2)<<2);