-
Notifications
You must be signed in to change notification settings - Fork 8
/
collisions.cc
137 lines (123 loc) · 4.66 KB
/
collisions.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <stdint.h>
#include <cuda_runtime.h>
#include "collisions.cuh"
#include "collisions.h"
#include "collisions_cpu.h"
#define gpuErrChk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
using namespace std;
inline void gpuAssert(cudaError_t code, const char *file, int line,
bool abort = true) {
if (code != cudaSuccess) {
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file,
line);
exit(code);
}
}
unsigned int num_blocks = 100;
unsigned int threads_per_block = 512;
int main(int argc, char *argv[]) {
if (argc < 4) {
printf(
"Usage: %s NUMOBJECTS MAXSPEED MAXDIM [NUMBLOCKS [THREADSPERBLOCK]]\n",
argv[0]);
return -1;
}
unsigned int num_objects = atoi(argv[1]);
float max_speed = atof(argv[2]);
float max_dim = atof(argv[3]);
if (argc >= 5) {
num_blocks = atoi(argv[4]);
}
if (argc >= 6) {
threads_per_block = atoi(argv[5]);
}
unsigned int object_size = (num_objects - 1) / threads_per_block + 1;
if (object_size < num_blocks) {
num_blocks = object_size;
}
object_size = num_objects * DIM * sizeof(float);
unsigned int cell_size = num_objects * DIM_2 * sizeof(uint32_t);
unsigned int num_cells;
unsigned int num_collisions;
unsigned int num_collisions_cpu;
unsigned int num_tests;
unsigned int num_tests_cpu;
unsigned int *d_temp;
float *positions = (float *) malloc(object_size);
float *velocities = (float *) malloc(object_size);
float *dims = (float *) malloc(object_size);
float *d_positions;
float *d_velocities;
float *d_dims;
double time;
double time_cpu;
uint32_t *d_cells;
uint32_t *d_cells_temp;
uint32_t *d_objects;
uint32_t *d_objects_temp;
uint32_t *d_radices;
uint32_t *d_radix_sums;
chrono::time_point<chrono::system_clock> start;
chrono::duration<double> duration;
cudaMalloc((void **) &d_temp, 2 * sizeof(unsigned int));
cudaMalloc((void **) &d_positions, object_size);
cudaMalloc((void **) &d_velocities, object_size);
cudaMalloc((void **) &d_dims, object_size);
cudaMalloc((void **) &d_cells, cell_size);
cudaMalloc((void **) &d_cells_temp, cell_size);
cudaMalloc((void **) &d_objects, cell_size);
cudaMalloc((void **) &d_objects_temp, cell_size);
cudaMalloc((void **) &d_radices, NUM_BLOCKS * GROUPS_PER_BLOCK *
NUM_RADICES * sizeof(uint32_t));
cudaMalloc((void **) &d_radix_sums, NUM_RADICES * sizeof(uint32_t));
cudaInitObjects(d_positions, d_velocities, d_dims, num_objects, max_speed,
max_dim, num_blocks, threads_per_block);
cudaMemcpy(positions, d_positions, object_size, cudaMemcpyDeviceToHost);
cudaMemcpy(velocities, d_velocities, object_size, cudaMemcpyDeviceToHost);
cudaMemcpy(dims, d_dims, object_size, cudaMemcpyDeviceToHost);
start = chrono::system_clock::now();
num_cells = cudaInitCells(d_cells, d_objects, d_positions, d_dims,
num_objects, max_dim, d_temp, num_blocks,
threads_per_block);
cudaSortCells(d_cells, d_objects, d_cells_temp, d_objects_temp, d_radices,
d_radix_sums, num_objects);
num_collisions = cudaCellCollide(d_cells, d_objects, d_positions,
d_velocities, d_dims, num_objects,
num_cells, d_temp, &num_tests, num_blocks,
threads_per_block);
duration = chrono::system_clock::now() - start;
time = duration.count();
start = chrono::system_clock::now();
num_collisions_cpu = CellCollide(positions, velocities, dims, num_objects);
num_tests_cpu = num_objects * (num_objects - 1) / 2;
duration = chrono::system_clock::now() - start;
time_cpu = duration.count();
printf("Collisions encountered on GPU: %d\n", num_collisions);
printf("Collisions encountered on CPU: %d\n", num_collisions_cpu);
printf("Collision tests performed on GPU: %d\n", num_tests);
printf("Collision tests performed on CPU: %d\n", num_tests_cpu);
printf("Time spent performing tests on GPU: %f s\n", time);
printf("Time spent performing tests on CPU: %f s\n", time_cpu);
printf("Reduction in collision tests performed: %f%%\n",
100.0 * (num_tests_cpu - num_tests) / num_tests_cpu);
printf("Reduction in time spent performing tests: %f%%\n",
100.0 * (time_cpu - time) / time_cpu);
cudaFree(d_temp);
cudaFree(d_positions);
cudaFree(d_velocities);
cudaFree(d_dims);
cudaFree(d_cells);
cudaFree(d_cells_temp);
cudaFree(d_objects);
cudaFree(d_objects_temp);
cudaFree(d_radices);
cudaFree(d_radix_sums);
free(positions);
free(velocities);
free(dims);
gpuErrChk(cudaGetLastError());
return 0;
}