-
Notifications
You must be signed in to change notification settings - Fork 0
/
LinearRegression_cuBLAS.cu
174 lines (144 loc) · 5.84 KB
/
LinearRegression_cuBLAS.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
// Linear Regression implemented from scratch, but using cuBLAS.
// calculate_coefficients: 2xcublasSasum + 2xcublasSdot
// make_predictions: cublasSaxpy
// calculate_mse: cublasSaxpy + cublasSdot
// reference (python) -- https://www.geeksforgeeks.org/linear-regression-python-implementation/
#ifndef __LINEAR_REGRESSION_CUBLAS__
#define __LINEAR_REGRESSION_CUBLAS__
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include "CUDA_helpers.cu"
class LinearRegression_cuBLAS {
public:
LinearRegression_cuBLAS (
const int n,
const int train_size,
const int test_size,
float* x,
float* y
) {
// Variables on stack
this->n = n;
this->train_size = train_size;
this->test_size = test_size;
this->trained = false;
this->made_predictions = false;
this->calculated_mse = false;
// Variables on heap
this->h_x = new float[n];
this->h_y = new float[n];
memcpy(h_x, x, n * sizeof(float));
memcpy(h_y, y, n * sizeof(float));
this->h_predictions = new float[test_size];
// Variables on GPU
CUDA_CHECK( cudaMalloc(&d_x, n * sizeof(float)) );
CUDA_CHECK( cudaMalloc(&d_y, n * sizeof(float)) );
CUDA_CHECK( cudaMemcpy(d_x, this->h_x, n * sizeof(float), cudaMemcpyHostToDevice) );
CUDA_CHECK( cudaMemcpy(d_y, this->h_y, n * sizeof(float), cudaMemcpyHostToDevice) );
CUDA_CHECK( cudaMalloc(&d_predictions, test_size*sizeof(float)) );
// cuBLAS
CUBLAS_CHECK( cublasCreate(&handle) );
}
~LinearRegression_cuBLAS() {
delete[] h_x;
delete[] h_y;
delete[] h_predictions;
CUDA_CHECK( cudaFree(d_x) );
CUDA_CHECK( cudaFree(d_y) );
CUDA_CHECK( cudaFree(d_predictions) );
CUBLAS_CHECK( cublasDestroy(handle) );
}
bool calculate_coefficients() {
if (made_predictions) {
printf ("error: predictions already made\n");
return false;
}
// Calculate means of x and y using cuBLAS
float x_sum, y_sum;
CUBLAS_CHECK( cublasSasum(handle, train_size, d_x, 1, &x_sum) );
CUBLAS_CHECK( cublasSasum(handle, train_size, d_y, 1, &y_sum) );
float x_mean = x_sum / train_size;
float y_mean = y_sum / train_size;
// Calculate xy and xx dot products using cuBLAS
float xy_sum = 0, xx_sum = 0;
CUBLAS_CHECK( cublasSdot(handle, train_size, d_x, 1, d_y, 1, &xy_sum) );
CUBLAS_CHECK( cublasSdot(handle, train_size, d_x, 1, d_x, 1, &xx_sum) );
// Calculate slope (b1) and intercept (b0) on host
float nxy = train_size * x_mean * y_mean;
float nxx = train_size * x_mean * x_mean;
this->slope = (xy_sum - nxy) / (xx_sum - nxx);
this->intercept = y_mean - slope * x_mean;
// Print slope and intercept
printf ("Slope: %f\n", this->slope);
printf ("Intercept: %f\n", this->intercept);
// Update
trained = true;
return true;
}
bool make_predictions() {
if (!trained) {
printf ("error: not trained\n");
return false;
}
if (made_predictions) {
printf ("error: predictions already made\n");
return false;
}
// GPU Data
for (int i = 0; i < test_size; i++) {
h_predictions[i] = intercept;
}
CUDA_CHECK( cudaMemcpy(d_predictions, h_predictions, test_size * sizeof(float), cudaMemcpyHostToDevice) );
// Compute y = mx + b using cuBLAS
float *d_x_test = d_x+train_size; // Pointer to where test data starts
CUBLAS_CHECK( cublasSaxpy(handle, test_size, &slope, d_x_test, 1, d_predictions, 1) );
// Copy data back to host and print
CUDA_CHECK( cudaMemcpy(h_predictions, d_predictions, test_size * sizeof(float), cudaMemcpyDeviceToHost));
printf ("Predictions (first 10)\n");
for (int i = 0; i < 10; i++) {
printf ("%f: %f\n", h_x[train_size+i], h_predictions[i]);
}
// Update
made_predictions = true;
return true;
}
bool calculate_mse() {
if (calculated_mse) {
printf ("error: MSE already calculated");
return false;
}
if (!made_predictions) {
printf ("error: predictions not made\n");
return false;
}
// Calculate the difference on cuBLAS. Note: This is destructive as it will modify the d_predictions values
float alpha = -1.0f; // To subtract
float *d_y_train = d_y + train_size; // Pointer to where test data starts
// Diff calculation on cuBLAS
CUBLAS_CHECK( cublasSaxpy(handle, test_size, &alpha, d_y_train, 1, d_predictions, 1));
// Calculate sum of the squares on cuBLAS
float squared_error_sum = 0;
CUBLAS_CHECK( cublasSdot(handle, test_size, d_predictions, 1, d_predictions, 1, &squared_error_sum) );
// Mean
mse = squared_error_sum / test_size;
// Print MSE
printf("Mean Squared Error: %f\n", mse);
// Update
calculated_mse = true;
return true;
}
private:
int n; // Dataset size
int train_size, test_size; // Number of elements in train and test set
float *h_x, *h_y; // Independent and dependent values on host
float *d_x, *d_y; // Independent and dependent values on device
cublasHandle_t handle; // cuBLAS handle
bool trained; // If coefficients have been calculated
float slope, intercept; // The model weights
bool made_predictions; // If predictions have been made
float *h_predictions; // Predictions on host
float *d_predictions; // Predictions on device
bool calculated_mse; // If MSE has been calculated
float mse; // Mean squared error
};
#endif // __LINEAR_REGRESSION_CUBLAS__