Skip to content

Commit

Permalink
Merge pull request #169 from atillack/M1_fix
Browse files Browse the repository at this point in the history
Fixes for Apple
  • Loading branch information
atillack authored Dec 17, 2021
2 parents fbe1342 + 0fe6f00 commit 7b7d5ed
Show file tree
Hide file tree
Showing 11 changed files with 162 additions and 85 deletions.
7 changes: 7 additions & 0 deletions common/defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,13 @@ enum {C=0,N=1,O=2,H=3,XX=4,P=5,S=6}; // see "bond_index" in the "AD4.1_bound.da
// Added definition to support flexrings.
#define G 50.0f

// Enables full floating point gradient calculation.
// Use is not advised as:
// - the determinism gradients (aka integer gradients) are much faster *and*
// - speed up the local search convergence
// Please only use for debugging
// #define FLOAT_GRADIENTS

// Use one more coefficient in the fit to the Mehler-Solmajer dielectric in energrad implementation
// Although this improves the fit (particularly for the gradient), it costs a little bit more and
// does not return better accuracy overall (default: commented out, don't use)
Expand Down
35 changes: 33 additions & 2 deletions cuda/calcMergeEneGra.cu
Original file line number Diff line number Diff line change
Expand Up @@ -659,13 +659,26 @@ __device__ void gpu_calc_energrad(
REDUCEFLOATSUM(gz, pFloatAccumulator);

global_energy = energy;
#ifndef FLOAT_GRADIENTS
int* gradient_genotype = (int*)fgradient_genotype;

#endif
if (threadIdx.x == 0) {
// Scaling gradient for translational genes as
// their corresponding gradients were calculated in the space
// where these genes are in Angstrom,
// but AutoDock-GPU translational genes are within in grids
#ifdef FLOAT_GRADIENTS
fgradient_genotype[0] = gx * cData.dockpars.grid_spacing;
fgradient_genotype[1] = gy * cData.dockpars.grid_spacing;
fgradient_genotype[2] = gz * cData.dockpars.grid_spacing;

#if defined (PRINT_GRAD_TRANSLATION_GENES)
printf("\n%s\n", "----------------------------------------------------------");
printf("gradient_x:%f\n", fgradient_genotype [0]);
printf("gradient_y:%f\n", fgradient_genotype [1]);
printf("gradient_z:%f\n", fgradient_genotype [2]);
#endif
#else
gradient_genotype[0] = lrintf(fminf(MAXTERM, fmaxf(-MAXTERM, TERMSCALE * gx * cData.dockpars.grid_spacing)));
gradient_genotype[1] = lrintf(fminf(MAXTERM, fmaxf(-MAXTERM, TERMSCALE * gy * cData.dockpars.grid_spacing)));
gradient_genotype[2] = lrintf(fminf(MAXTERM, fmaxf(-MAXTERM, TERMSCALE * gz * cData.dockpars.grid_spacing)));
Expand All @@ -676,6 +689,7 @@ __device__ void gpu_calc_energrad(
printf("gradient_y:%f\n", gradient_genotype [1]);
printf("gradient_z:%f\n", gradient_genotype [2]);
#endif
#endif
}
__syncthreads();

Expand Down Expand Up @@ -861,6 +875,17 @@ __device__ void gpu_calc_energrad(

// Setting gradient rotation-related genotypes in cube
// Multiplicating by DEG_TO_RAD is to make it uniform to DEG (see torsion gradients)
#ifdef FLOAT_GRADIENTS
fgradient_genotype[3] = (grad_phi / (dependence_on_theta * dependence_on_rotangle)) * DEG_TO_RAD;
fgradient_genotype[4] = (grad_theta / dependence_on_rotangle) * DEG_TO_RAD;
fgradient_genotype[5] = grad_rotangle * DEG_TO_RAD;
#if defined (PRINT_GRAD_ROTATION_GENES)
printf("\n%s\n", "----------------------------------------------------------");
printf("%-30s \n", "grad_axisangle (1,2,3) - after empirical scaling: ");
printf("%-13s %-13s %-13s \n", "grad_phi", "grad_theta", "grad_rotangle");
printf("%-13.6f %-13.6f %-13.6f\n", fgradient_genotype[3], fgradient_genotype[4], fgradient_genotype[5]);
#endif
#else
gradient_genotype[3] = lrintf(fminf(MAXTERM, fmaxf(-MAXTERM, TERMSCALE * (grad_phi / (dependence_on_theta * dependence_on_rotangle)) * DEG_TO_RAD)));
gradient_genotype[4] = lrintf(fminf(MAXTERM, fmaxf(-MAXTERM, TERMSCALE * (grad_theta / dependence_on_rotangle) * DEG_TO_RAD)));
gradient_genotype[5] = lrintf(fminf(MAXTERM, fmaxf(-MAXTERM, TERMSCALE * grad_rotangle * DEG_TO_RAD)));
Expand All @@ -870,6 +895,7 @@ __device__ void gpu_calc_energrad(
printf("%-13s %-13s %-13s \n", "grad_phi", "grad_theta", "grad_rotangle");
printf("%-13.6f %-13.6f %-13.6f\n", gradient_genotype[3], gradient_genotype[4], gradient_genotype[5]);
#endif
#endif
}
__syncthreads();

Expand Down Expand Up @@ -930,17 +956,22 @@ __device__ void gpu_calc_energrad(

// Assignment of gene-based gradient
// - this works because a * (a_1 + a_2 + ... + a_n) = a*a_1 + a*a_2 + ... + a*a_n
#ifdef FLOAT_GRADIENTS
ATOMICADDF32(&fgradient_genotype[rotbond_id+6], torque_on_axis * DEG_TO_RAD); /*(M_PI / 180.0f)*/;
#else
ATOMICADDI32(&gradient_genotype[rotbond_id+6], lrintf(fminf(MAXTERM, fmaxf(-MAXTERM, TERMSCALE * torque_on_axis * DEG_TO_RAD)))); /*(M_PI / 180.0f)*/;
#endif
}
__syncthreads();

#ifndef FLOAT_GRADIENTS
for (uint32_t gene_cnt = threadIdx.x;
gene_cnt < cData.dockpars.num_of_genes;
gene_cnt+= blockDim.x) {
fgradient_genotype[gene_cnt] = ONEOVERTERMSCALE * (float)gradient_genotype[gene_cnt];
}
__syncthreads();

#endif
#if defined (CONVERT_INTO_ANGSTROM_RADIAN)
for (uint32_t gene_cnt = threadIdx.x+3; // Only for gene_cnt > 2 means start gene_cnt at 3
gene_cnt < cData.dockpars.num_of_genes;
Expand Down
135 changes: 83 additions & 52 deletions device/calcMergedEneGra.cl
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,18 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/


#ifndef FLOAT_GRADIENTS
#define TERMBITS 10
#define MAXTERM ((float)(1 << (31 - TERMBITS - 8))) // 2^(31 - 10 - 8) = 2^13 = 8192
#define TERMSCALE ((float)(1 << TERMBITS)) // 2^10 = 1024
#define ONEOVERTERMSCALE (1.0f / TERMSCALE) // 1 / 1024 = 0.000977

// Enables full floating point gradient calculation.
// Use is not advised as:
// - the determinism gradients (aka integer gradients) are much faster *and*
// - speed up the local search convergence
// Please only use for debugging
// #define FLOAT_GRADIENTS
// inspired by: https://streamhpc.com/blog/2014-11-07/opencl-integer-rounding-c/
int float2int_round (float number)
{
int addsub = (int)((number < 0.0f) - (number > 0.0f)); // +1 for number < 0 (round up, i.e. 1.5 -> 2.0), -1 for number > 0 (round down, i.e. -1.5 -> -2.0)
return ((int)(number+0.5f*addsub));
}
#endif

// Enable restoring map gradient
// Currently, this is not a good idea
Expand Down Expand Up @@ -117,7 +117,7 @@ void gpu_calc_energrad(
// Initializing gradients (forces)
// Derived from autodockdev/maps.py
for ( int atom_id = tidx;
atom_id < MAX_NUM_OF_ATOMS; // makes sure that gradient sum reductions give correct results if dockpars_num_atoms < NUM_OF_THREADS_PER_BLOCK
atom_id < dockpars_num_of_atoms;
atom_id+= NUM_OF_THREADS_PER_BLOCK)
{
// Initialize coordinates
Expand Down Expand Up @@ -215,7 +215,7 @@ void gpu_calc_energrad(
// ================================================
// CALCULATING INTERMOLECULAR GRADIENTS
// ================================================
float inv_grid_spacing = native_recip(dockpars_grid_spacing);
float inv_grid_spacing = native_divide(1.0f,dockpars_grid_spacing);
float weights[8];
float cube[8];
for ( int atom_id = tidx;
Expand All @@ -233,10 +233,10 @@ void gpu_calc_energrad(
if ((x < 0) || (y < 0) || (z < 0) || (x >= dockpars_gridsize_x-1)
|| (y >= dockpars_gridsize_y-1)
|| (z >= dockpars_gridsize_z-1)){
#ifdef RESTORING_MAP_GRADIENT
x -= 0.5f * dockpars_gridsize_x;
y -= 0.5f * dockpars_gridsize_y;
z -= 0.5f * dockpars_gridsize_z;
#ifdef RESTORING_MAP_GRADIENT
partial_energies[tidx] += 21.0f * (x*x+y*y+z*z); //100000.0f;
#else
partial_energies[tidx] += 16777216.0f; //100000.0f;
Expand All @@ -248,26 +248,32 @@ void gpu_calc_energrad(
// Setting gradients (forces) penalties.
// The idea here is to push the offending
// molecule towards the center
#ifdef FLOAT_GRADIENTS
gradient_x[atom_id] += TERMSCALE * 42.0f * x * inv_grid_spacing;
gradient_y[atom_id] += TERMSCALE * 42.0f * y * inv_grid_spacing;
gradient_z[atom_id] += TERMSCALE * 42.0f * z * inv_grid_spacing;
#else
gradient_x[atom_id] += convert_int_rte( TERMSCALE * 42.0f * x * inv_grid_spacing );
gradient_y[atom_id] += convert_int_rte( TERMSCALE * 42.0f * y * inv_grid_spacing );
gradient_z[atom_id] += convert_int_rte( TERMSCALE * 42.0f * z * inv_grid_spacing );
#endif // FLOAT_GRADIENTS
#ifdef FLOAT_GRADIENTS
gradient_x[atom_id] += 42.0f * x * inv_grid_spacing;
gradient_y[atom_id] += 42.0f * y * inv_grid_spacing;
gradient_z[atom_id] += 42.0f * z * inv_grid_spacing;
#else
gradient_x[atom_id] += float2int_round( TERMSCALE * 42.0f * x * inv_grid_spacing );
gradient_y[atom_id] += float2int_round( TERMSCALE * 42.0f * y * inv_grid_spacing );
gradient_z[atom_id] += float2int_round( TERMSCALE * 42.0f * z * inv_grid_spacing );
#endif // FLOAT_GRADIENTS
#else
#ifdef FLOAT_GRADIENTS
gradient_x[atom_id] += 16777216.0f;
gradient_y[atom_id] += 16777216.0f;
gradient_z[atom_id] += 16777216.0f;
#else
gradient_x[atom_id] += 16777216;
gradient_y[atom_id] += 16777216;
gradient_z[atom_id] += 16777216;
#endif // FLOAT_GRADIENTS
#endif
continue;
}
// Getting coordinates
float x_low = floor(x);
float y_low = floor(y);
float z_low = floor(z);
int x_low = floor(x);
int y_low = floor(y);
int z_low = floor(z);

// Grid value at 000
__global const float* grid_value_000 = dockpars_fgrids + ((ulong)(x_low + y_low*g1 + z_low*g2)<<2);
Expand Down Expand Up @@ -429,9 +435,9 @@ void gpu_calc_energrad(
gradient_y[atom_id] += gy;
gradient_z[atom_id] += gz;
#else
gradient_x[atom_id] += convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * gx)));
gradient_y[atom_id] += convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * gy)));
gradient_z[atom_id] += convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * gz)));
gradient_x[atom_id] += float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * gx)));
gradient_y[atom_id] += float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * gy)));
gradient_z[atom_id] += float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * gz)));
#endif
} // End atom_id for-loop (INTERMOLECULAR ENERGY)

Expand Down Expand Up @@ -509,7 +515,7 @@ void gpu_calc_energrad(
float rmn=native_powr(smoothed_distance,m-n);
float rm=native_powr(smoothed_distance,-m);
partial_energies[tidx] += (kerconst_intra->VWpars_AC_const[idx]-rmn*kerconst_intra->VWpars_BD_const[idx])*rm;
priv_gradient_per_intracontributor += (n*kerconst_intra->VWpars_BD_const[idx]*rmn-m*kerconst_intra->VWpars_AC_const[idx])*rm*native_recip(smoothed_distance);
priv_gradient_per_intracontributor += native_divide((n*kerconst_intra->VWpars_BD_const[idx]*rmn-m*kerconst_intra->VWpars_AC_const[idx])*rm,smoothed_distance);
#if defined (DEBUG_ENERGY_KERNEL)
partial_intraE[tidx] += (kerconst_intra->VWpars_AC_const[idx]-rmn*kerconst_intra->VWpars_BD_const[idx])*rm;
#endif
Expand Down Expand Up @@ -544,16 +550,16 @@ void gpu_calc_energrad(
#ifndef DIEL_FIT_ABC
float dist_shift=atomic_distance+1.26366f;
dist2=dist_shift*dist_shift;
float diel = 1.10859f*native_recip(dist2)+0.010358f;
float diel = native_divide(1.10859f,dist2)+0.010358f;
#else
float dist_shift=atomic_distance+1.588f;
dist2=dist_shift*dist_shift;
float disth_shift=atomic_distance+0.794f;
float disth4=disth_shift*disth_shift;
disth4*=disth4;
float diel = 1.404f*native_recip(dist2)+0.072f*native_recip(disth4)+0.00831f;
float diel = native_divide(1.404f,dist2)+native_divide(0.072f,disth4)+0.00831f;
#endif
float es_energy = dockpars_coeff_elec * q1 * q2 * native_recip(atomic_distance);
float es_energy = native_divide(dockpars_coeff_elec * q1 * q2,atomic_distance);
partial_energies[tidx] += diel * es_energy + desolv_energy;

#if defined (DEBUG_ENERGY_KERNEL)
Expand All @@ -574,11 +580,11 @@ void gpu_calc_energrad(

// priv_gradient_per_intracontributor += -dockpars_coeff_elec * q1 * q2 * native_divide (upper, lower) -
// 0.0771605f * atomic_distance * desolv_energy;
priv_gradient_per_intracontributor += -es_energy*native_recip(atomic_distance) * diel
priv_gradient_per_intracontributor += native_divide(-es_energy,atomic_distance) * diel
#ifndef DIEL_FIT_ABC
-es_energy * 2.21718f*native_recip(dist2*dist_shift)
-native_divide(es_energy * 2.21718f,dist2*dist_shift)
#else
-es_energy * (2.808f * native_recip(dist2*dist_shift)+0.288f*native_recip(disth4*disth_shift))
-es_energy * (native_divide(2.808f,dist2*dist_shift)+native_divide(0.288f,disth4*disth_shift))
#endif
-0.0771605f * atomic_distance * desolv_energy; // 1/3.6^2 = 1/12.96 = 0.0771605
} // if cuttoff2 - internuclear-distance at 20.48A
Expand All @@ -587,15 +593,15 @@ void gpu_calc_energrad(
// into the contribution of each atom of the pair.
// Distances in Angstroms of vector that goes from
// "atom1_id"-to-"atom2_id", therefore - subx, - suby, and - subz are used
float grad_div_dist = -priv_gradient_per_intracontributor*native_recip(dist);
float grad_div_dist = native_divide(-priv_gradient_per_intracontributor,dist);
#ifdef FLOAT_GRADIENTS
float priv_intra_gradient_x = subx * grad_div_dist;
float priv_intra_gradient_y = suby * grad_div_dist;
float priv_intra_gradient_z = subz * grad_div_dist;
#else
int priv_intra_gradient_x = convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * subx * grad_div_dist)));
int priv_intra_gradient_y = convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * suby * grad_div_dist)));
int priv_intra_gradient_z = convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * subz * grad_div_dist)));
int priv_intra_gradient_x = float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * subx * grad_div_dist)));
int priv_intra_gradient_y = float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * suby * grad_div_dist)));
int priv_intra_gradient_z = float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * subz * grad_div_dist)));
#endif
// Calculating gradients in xyz components.
// Gradients for both atoms in a single contributor pair
Expand Down Expand Up @@ -655,24 +661,37 @@ void gpu_calc_energrad(
accumulator_z[tidx] += accumulator_z[tidx+off];
}
}
#ifndef FLOAT_GRADIENTS
__local int* i_gradient_genotype = (__local int*)gradient_genotype;
#endif
if (tidx == 0) {
*energy = partial_energies[0];
// Scaling gradient for translational genes as
// their corresponding gradients were calculated in the space
// where these genes are in Angstrom,
// but AutoDock-GPU translational genes are within in grids
i_gradient_genotype[0] = convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * accumulator_x[0] * dockpars_grid_spacing)));
i_gradient_genotype[1] = convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * accumulator_y[0] * dockpars_grid_spacing)));
i_gradient_genotype[2] = convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * accumulator_z[0] * dockpars_grid_spacing)));
#ifdef FLOAT_GRADIENTS
gradient_genotype[0] = accumulator_x[0] * dockpars_grid_spacing;
gradient_genotype[1] = accumulator_y[0] * dockpars_grid_spacing;
gradient_genotype[2] = accumulator_z[0] * dockpars_grid_spacing;
#if defined (PRINT_GRAD_TRANSLATION_GENES)
printf("\n%s\n", "----------------------------------------------------------");
printf("gradient_x:%f\n", gradient_genotype [0]);
printf("gradient_y:%f\n", gradient_genotype [1]);
printf("gradient_z:%f\n", gradient_genotype [2]);
#endif
#else
i_gradient_genotype[0] = float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * accumulator_x[0] * dockpars_grid_spacing)));
i_gradient_genotype[1] = float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * accumulator_y[0] * dockpars_grid_spacing)));
i_gradient_genotype[2] = float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * accumulator_z[0] * dockpars_grid_spacing)));
#if defined (PRINT_GRAD_TRANSLATION_GENES)
printf("\n%s\n", "----------------------------------------------------------");
printf("gradient_x:%f\n", i_gradient_genotype [0]);
printf("gradient_y:%f\n", i_gradient_genotype [1]);
printf("gradient_z:%f\n", i_gradient_genotype [2]);
printf("i_gradient_x:%f\n", i_gradient_genotype [0]);
printf("i_gradient_y:%f\n", i_gradient_genotype [1]);
printf("i_gradient_z:%f\n", i_gradient_genotype [2]);
#endif
#endif
}
barrier(CLK_LOCAL_MEM_FENCE);

// ------------------------------------------
// Obtaining rotation-related gradients
Expand Down Expand Up @@ -736,7 +755,7 @@ void gpu_calc_energrad(

// Derived from rotation.py/axisangle_to_q()
// genes[3:7] = rotation.axisangle_to_q(torque, rad)
float torque_length = fast_length(torque_rot);
float torque_length = native_sqrt(torque_rot.x*torque_rot.x+torque_rot.y*torque_rot.y+torque_rot.z*torque_rot.z);
torque_length += (torque_length<1e-20f)*1e-20f;

#if defined (PRINT_GRAD_ROTATION_GENES)
Expand All @@ -746,7 +765,7 @@ void gpu_calc_energrad(

// Finding the quaternion that performs
// the infinitesimal rotation around torque axis
float4 quat_torque = torque_rot * SIN_HALF_INFINITESIMAL_RADIAN * native_recip(torque_length);
float4 quat_torque = native_divide(torque_rot * SIN_HALF_INFINITESIMAL_RADIAN, torque_length);
quat_torque.w = COS_HALF_INFINITESIMAL_RADIAN;

#if defined (PRINT_GRAD_ROTATION_GENES)
Expand Down Expand Up @@ -898,9 +917,15 @@ void gpu_calc_energrad(

// Setting gradient rotation-related genotypes in cube
// Multiplicating by DEG_TO_RAD is to make it uniform to DEG (see torsion gradients)
i_gradient_genotype[3] = convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * native_divide(grad_phi, (dependence_on_theta * dependence_on_rotangle)) * DEG_TO_RAD)));
i_gradient_genotype[4] = convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * native_divide(grad_theta, dependence_on_rotangle) * DEG_TO_RAD)));
i_gradient_genotype[5] = convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * grad_rotangle * DEG_TO_RAD)));
#ifdef FLOAT_GRADIENTS
gradient_genotype[3] = native_divide(grad_phi, (dependence_on_theta * dependence_on_rotangle)) * DEG_TO_RAD;
gradient_genotype[4] = native_divide(grad_theta, dependence_on_rotangle) * DEG_TO_RAD;
gradient_genotype[5] = grad_rotangle * DEG_TO_RAD;
#else
i_gradient_genotype[3] = float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * native_divide(grad_phi, (dependence_on_theta * dependence_on_rotangle)) * DEG_TO_RAD)));
i_gradient_genotype[4] = float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * native_divide(grad_theta, dependence_on_rotangle) * DEG_TO_RAD)));
i_gradient_genotype[5] = float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * grad_rotangle * DEG_TO_RAD)));
#endif
#if defined (PRINT_GRAD_ROTATION_GENES)
printf("\n%s\n", "----------------------------------------------------------");
printf("%-30s \n", "grad_axisangle (1,2,3) - after empirical scaling: ");
Expand Down Expand Up @@ -956,24 +981,30 @@ void gpu_calc_energrad(

// Assignment of gene-based gradient
// - this works because a * (a_1 + a_2 + ... + a_n) = a*a_1 + a*a_2 + ... + a*a_n
atomic_add(&i_gradient_genotype[rotbond_id+6], convert_int_rte(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * torque_on_axis * DEG_TO_RAD)))); /*(M_PI / 180.0f)*/;
#ifdef FLOAT_GRADIENTS
atomicAdd_g_f(&gradient_genotype[rotbond_id+6], torque_on_axis * DEG_TO_RAD); /*(M_PI / 180.0f)*/;
#else
atomic_add(&i_gradient_genotype[rotbond_id+6], float2int_round(fmin(MAXTERM, fmax(-MAXTERM, TERMSCALE * torque_on_axis * DEG_TO_RAD)))); /*(M_PI / 180.0f)*/;
#endif
}
barrier(CLK_LOCAL_MEM_FENCE);
#ifndef FLOAT_GRADIENTS
for ( int gene_cnt = tidx;
gene_cnt < dockpars_num_of_genes;
gene_cnt+= NUM_OF_THREADS_PER_BLOCK)
{
gradient_genotype[gene_cnt] = ONEOVERTERMSCALE * (float)i_gradient_genotype[gene_cnt];
}
barrier(CLK_LOCAL_MEM_FENCE);
#endif

#if defined (CONVERT_INTO_ANGSTROM_RADIAN)
#if defined (CONVERT_INTO_ANGSTROM_RADIAN)
for ( int gene_cnt = tidx+3; // Only for gene_cnt > 2 means start gene_cnt at 3
gene_cnt < dockpars_num_of_genes;
gene_cnt+= NUM_OF_THREADS_PER_BLOCK)
{
gradient_genotype[gene_cnt] *= dockpars_grid_spacing * dockpars_grid_spacing * SCFACTOR_ANGSTROM_RADIAN;
}
barrier(CLK_LOCAL_MEM_FENCE);
#endif
#endif
}
Loading

0 comments on commit 7b7d5ed

Please sign in to comment.