Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added mapping for dw conv #18

Open
wants to merge 2 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bareMetalC/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ tests = \
padded \
mvin_scale \
conv \
conv_dw \
conv_with_pool \
conv_with_dilation \
conv_with_dilation_and_rot180 \
Expand Down
325 changes: 325 additions & 0 deletions bareMetalC/conv_dw.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,325 @@
#include <stdint.h>
#include <stddef.h>
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#ifndef BAREMETAL
#include <sys/mman.h>
#endif
#include "include/gemmini_testutils.h"

#ifndef BAREMETAL

#define BATCH_SIZE 4
#define IN_DIM 224
#define IN_CHANNELS 3
#define OUT_CHANNELS 32
#define KERNEL_DIM 3
#define PADDING 1
#define STRIDE 2

#else

#define IN_DIM 14
#define IN_CHANNELS 34
#define OUT_CHANNELS IN_CHANNELS
#define BATCH_SIZE 1
#define KERNEL_DIM 3
#define PADDING 1
#define STRIDE 2

#endif

#define NO_BIAS false

#define OUT_DIM ((IN_DIM + 2*PADDING - KERNEL_DIM) / STRIDE + 1)
#define PATCH_SIZE (KERNEL_DIM * KERNEL_DIM)
#define N_PATCHES (BATCH_SIZE * OUT_DIM * OUT_DIM)

static void conv_dw(const size_t batch_size, const size_t channels, const size_t in_dim, const size_t out_dim, const size_t kernel_size,
const size_t padding, const size_t stride,
const elem_t input[batch_size][in_dim][in_dim][channels],
const elem_t weight[channels][kernel_size][kernel_size],
const acc_t * bias,
elem_t output [batch_size][out_dim][out_dim][channels])
//elem_t output [I][J],
//const struct ConvParams * params)
{
for (int batch = 0; batch < batch_size; batch++) {
for (int channel = 0; channel < channels; channel++) {
for (int out_row = 0; out_row < out_dim; out_row++) {
for (int out_col = 0; out_col < out_dim; out_col++) {
int in_row = out_row * stride - padding;

acc_t result = 0;
if (bias!=NULL) {
result = bias[channel];
}

for (int kernel_row = 0; kernel_row < kernel_size; kernel_row++) {
int in_col = out_col * stride - padding;

for (int kernel_col = 0; kernel_col < kernel_size; kernel_col++) {
if (in_row >= 0 && in_row < in_dim && in_col >= 0 && in_col < in_dim) {
result += input[batch][in_row][in_col][channel] * weight[channel][kernel_row][kernel_col];
}

in_col++;
}

in_row++;
}

// if (result < 0) {
// result = 0;
// }

acc_t scaled = result;//ACC_SCALE(result, params->output_scale);

if (scaled > elem_t_max) {
scaled = elem_t_max;
} else if (scaled < elem_t_min) {
scaled = elem_t_min;
}

size_t r = batch * out_dim * out_dim + out_row * out_dim + out_col;
//output[r][channel] = scaled;
output[batch][out_row][out_col][channel] = scaled;
}
}
}
}
}
void flatten_weights(int out_channels, int kernel_dim,
int patch_size,
elem_t weights[out_channels][kernel_dim][kernel_dim],
elem_t weights_mat[patch_size][out_channels]) {

assert(patch_size == kernel_dim * kernel_dim);

for (int outc = 0; outc < out_channels; outc++) {
for (int krow = 0; krow < kernel_dim; krow++) {
for (int kcol = 0; kcol < kernel_dim; kcol++) {
int wmatrow = krow * kernel_dim +
kcol;

weights_mat[wmatrow][outc] =
weights[outc][krow][kcol];
}
}
}
}

bool vec_is_equal(elem_t * a, elem_t * b, int len) {
for (int i = 0; i < len; i++)
if (a[i] != b[i])
return false;
return true;
}

void init_random(elem_t * buf, int len) {
elem_t i = 0;
for (elem_t * ptr = buf; ptr < buf + len; ptr++) {
// *ptr = (rand() % 32) - 16;
#ifdef FAST
*ptr = 1;
#else
*ptr = (rand() % 5) - 2;
#endif
}
}

void init_random_acc(acc_t * buf, int len) {
elem_t i = 0;
for (acc_t * ptr = buf; ptr < buf + len; ptr++) {
// *ptr = (rand() % 32) - 16;
#ifdef FAST
*ptr = 1;
#else
*ptr = (rand() % 5) - 2;
#endif
}
}

void init_zeros_acc(acc_t * buf, int len) {
for (acc_t * ptr = buf; ptr < buf + len; ptr++) {
*ptr = 0;
}
}

int main() {
#ifndef BAREMETAL
if (mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {
perror("mlockall failed");
exit(1);
}
#endif

gemmini_flush(0);

// assert((in_dim + 2*padding - kernel_dim) % stride == 0);

printf("Output dimension: %u\n\n", OUT_DIM);

static elem_t input[BATCH_SIZE][IN_DIM][IN_DIM][IN_CHANNELS];
static elem_t weights[OUT_CHANNELS][KERNEL_DIM][KERNEL_DIM];
static acc_t bias[OUT_CHANNELS];
static elem_t output[BATCH_SIZE][OUT_DIM][OUT_DIM][OUT_CHANNELS];

printf("Randomize inputs...\n");
init_random(&input[0][0][0][0], sizeof(input) / sizeof(elem_t));

printf("Randomize weights...\n");
init_random(&weights[0][0][0], sizeof(weights) / sizeof(elem_t));

printf("Randomize bias...\n");
if (NO_BIAS)
init_zeros_acc(&bias[0], sizeof(bias) / sizeof(acc_t));
else
init_random_acc(&bias[0], sizeof(bias) / sizeof(acc_t));

printf("CPU conv...\n");
uint64_t start_cpu = read_cycles();
#ifndef FAST
conv_dw(BATCH_SIZE, IN_CHANNELS, IN_DIM,
OUT_DIM, KERNEL_DIM,
PADDING, STRIDE,
input,
weights,
NO_BIAS ? NULL : bias,
output);
#endif
uint64_t end_cpu = read_cycles();
printf("CPU conv took %llu cycles\n", end_cpu - start_cpu);

static elem_t weights_mat[PATCH_SIZE][OUT_CHANNELS];
static elem_t output_mat[N_PATCHES][OUT_CHANNELS];

printf("Flatten weights...\n");
flatten_weights(OUT_CHANNELS, KERNEL_DIM,
PATCH_SIZE,
weights,
weights_mat);

printf("Gemmini conv...\n");
uint64_t start_gemmini = read_cycles();
tiled_conv_A_stride_dw_auto(
BATCH_SIZE, IN_DIM, IN_CHANNELS,
OUT_CHANNELS, OUT_DIM,
STRIDE, 1, PADDING, KERNEL_DIM,

(elem_t*)input,
(elem_t*)weights_mat,
NO_BIAS ? NULL : (acc_t*)bias,
(elem_t*)output_mat,

NO_ACTIVATION, ACC_SCALE_IDENTITY, 0, 0, 0, 0,

WS);
uint64_t end_gemmini = read_cycles();
printf("Gemmini conv took %llu cycles\n", end_gemmini - start_gemmini);

assert(sizeof(output_mat) == sizeof(output));

#ifdef FAST
bool success = true;
for (int orow = 0; orow < BATCH_SIZE * OUT_DIM * OUT_DIM; orow++) {
for (int ocol = 0; ocol < OUT_CHANNELS; ocol++) {
elem_t v = output_mat[orow][ocol];
if (v != 21 && v != 31 && v != 46) {
success = false;
break;
}
}
}
#else
bool success = vec_is_equal(&output[0][0][0][0], &output_mat[0][0], sizeof(output) / sizeof(elem_t));
#endif

if (!success) {
// return 1;
printf("bias:\n");
for (int och = 0; och < OUT_CHANNELS; och++) {
printf("%d,", bias[och]);
}
printf("\b\n\n");

printf("weights:\n");
for (int och = 0; och < OUT_CHANNELS; och++) {
printf("[");
for (int wrow = 0; wrow < KERNEL_DIM; wrow++) {
printf("[");
for (int wcol = 0; wcol < KERNEL_DIM; wcol++) {
printf("[");
for (int ich = 0; ich < IN_CHANNELS; ich++) {
printf("%d,", weights[och][wrow][wcol][ich]);
}
printf("\b],");
}
printf("\b],\n");
}
printf("\b],");
}
printf("\b\n\n");

printf("weights_mat:\n");
for (int wrow = 0; wrow < KERNEL_DIM * KERNEL_DIM; wrow++) {
printf("[");
for (int wcol = 0; wcol < OUT_CHANNELS; wcol++) {
printf("%d,", weights_mat[wrow][wcol]);
}
printf("\b],\n");
}
printf("\b\n\n");

printf("input:\n");
for (int batch = 0; batch < BATCH_SIZE; batch++) {
printf("[");
for (int irow = 0; irow < IN_DIM; irow++) {
printf("[");
for (int icol = 0; icol < IN_DIM; icol++) {
printf("[");
for (int ich = 0; ich < IN_CHANNELS; ich++) {
printf("%d,", input[batch][irow][icol][ich]);
}
printf("\b],");
}
printf("\b],\n");
}
printf("\b],");
}
printf("\b\n\n");

printf("output:\n");
for (int batch = 0; batch < BATCH_SIZE; batch++) {
printf("[");
for (int orow = 0; orow < OUT_DIM; orow++) {
printf("[");
for (int ocol = 0; ocol < OUT_DIM; ocol++) {
printf("[");
for (int och = 0; och < OUT_CHANNELS; och++) {
printf("%d,", output[batch][orow][ocol][och]);
}
printf("\b],\n");
}
printf("\b],\n");
}
printf("\b],");
}
printf("\b\n\n");

printf("output_mat:\n");
for (int orow = 0; orow < BATCH_SIZE * OUT_DIM * OUT_DIM; orow++) {
printf("[");
for (int ocol = 0; ocol < OUT_CHANNELS; ocol++) {
printf("%d,", output_mat[orow][ocol]);
}
printf("\b],\n");
}
printf("\b\n\n");

return 1;
}

return 0;
}
Loading