Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add testing for CL_KERNEL_LOCAL_MEM_SIZE #1235 #2089

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
1 change: 1 addition & 0 deletions test_conformance/api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ set(${MODULE_NAME}_SOURCES
test_queries.cpp
test_create_kernels.cpp
test_kernels.cpp
test_kernel_local_memory_size.cpp
test_kernel_private_memory_size.cpp
test_api_min_max.cpp
test_kernel_arg_changes.cpp
Expand Down
1 change: 1 addition & 0 deletions test_conformance/api/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ test_definition test_list[] = {
ADD_TEST_VERSION(negative_create_command_queue_with_properties,
Version(2, 0)),
ADD_TEST(negative_create_command_queue_with_properties_khr),
ADD_TEST(kernel_local_memory_size),
};

const int test_num = ARRAY_SIZE(test_list);
Expand Down
4 changes: 4 additions & 0 deletions test_conformance/api/procs.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ extern int test_consistency_requirements_fp16(cl_device_id deviceID,

extern int test_min_image_formats(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements);
extern int test_kernel_local_memory_size(cl_device_id deviceID,
cl_context context,
cl_command_queue queue,
int num_elements);
extern int test_negative_get_platform_info(cl_device_id deviceID,
cl_context context,
cl_command_queue queue,
Expand Down
351 changes: 351 additions & 0 deletions test_conformance/api/test_kernel_local_memory_size.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,351 @@
//
// Copyright (c) 2020 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <cinttypes>

#include "testBase.h"
#include "harness/testHarness.h"
#include <memory>

static const char* empty_kernel = R"CLC(
__kernel void empty_kernel() {
}
)CLC";

static const char* local_memory_kernel = R"CLC(
__kernel void local_memory_kernel(global int* data) {
__local int array[10];

size_t id = get_global_id(0);
array[id] = 2 * id;
data[id] = array[id];

barrier(CLK_LOCAL_MEM_FENCE);
if(id == 0)
{
for(size_t i = 0; i < 10; i++)
data[id] += array[i];
}
}
)CLC";

static const char* local_param_kernel = R"CLC(
__kernel void local_param_kernel(__local int* local_ptr, __global int* src,
__global int* dst) {

size_t id = get_global_id(0);

local_ptr[id] = src[id];
barrier(CLK_GLOBAL_MEM_FENCE);
dst[id] = local_ptr[id];
barrier(CLK_LOCAL_MEM_FENCE);
if(id == 9)
{
for(size_t i = 0; i < 10; i++)
dst[id] += local_ptr[i];
}
}
)CLC";

static const char* local_param_local_memory_kernel = R"CLC(
__kernel void local_param_local_memory_kernel(__local int* local_ptr,
__global int* src, __global int* dst) {

size_t id = get_global_id(0);

__local int local_data[10];
local_ptr[id] = src[id];

barrier(CLK_LOCAL_MEM_FENCE);
if(id / 2 == 0) {
for(size_t i = 0; i < 10; i++)
local_data[id] += local_ptr[i];
}
else
local_data[id] = local_ptr[id] * 2;
bashbaug marked this conversation as resolved.
Show resolved Hide resolved

barrier(CLK_LOCAL_MEM_FENCE);

dst[id] = local_data[id];
barrier(CLK_LOCAL_MEM_FENCE);
if(id == 9)
{
for(size_t i = 0; i < 10; i++)
dst[id] += local_data[i];
dst[id] += 666;
}
}
)CLC";

int test_kernel_local_memory_size(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements)
{
int error;
clProgramWrapper program;
clKernelWrapper kernel;

// Check memory needed to execute empty kernel
if (create_single_kernel_helper(context, &program, &kernel, 1,
&empty_kernel, "empty_kernel")
!= 0)
{
return TEST_FAIL;
}

cl_ulong kernel_local_usage = 0;
size_t param_value_size_ret = 0;
error = clGetKernelWorkGroupInfo(
kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernel_local_usage),
&kernel_local_usage, &param_value_size_ret);
test_error(error,
"clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed");

test_assert_error(param_value_size_ret == sizeof(cl_ulong),
"param_value_size_ret failed");

test_assert_error(kernel_local_usage >= 0, "kernel local mem size failed");
bashbaug marked this conversation as resolved.
Show resolved Hide resolved

// Check memory needed to execute empty kernel with __local variable
if (create_single_kernel_helper(context, &program, &kernel, 1,
&local_memory_kernel, "local_memory_kernel")
!= 0)
{
return TEST_FAIL;
}

kernel_local_usage = 0;
param_value_size_ret = 0;
error = clGetKernelWorkGroupInfo(
kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernel_local_usage),
&kernel_local_usage, &param_value_size_ret);
test_error(error,
"clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed");

test_assert_error(param_value_size_ret == sizeof(cl_ulong),
"param_value_size_ret failed");

constexpr size_t size = 10;
constexpr size_t memory = size * sizeof(cl_int);

const size_t global_work_size[] = { size };

int data[size];
for (size_t i = 0; i < size; i++)
{
data[i] = 0;
}
clMemWrapper streams[2];

streams[0] =
clCreateBuffer(context, CL_MEM_READ_WRITE, memory, NULL, &error);
test_error(error, "Creating test array failed");

error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
test_error(error, "Unable to set indexed kernel arguments");

error = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, global_work_size,
nullptr, 0, NULL, nullptr);
bashbaug marked this conversation as resolved.
Show resolved Hide resolved
test_error(error, "clEnqueueNDRangeKernel failed.");

error = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0, memory, data, 0,
NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");

auto local_memory_kernel_verify = [&]() {
constexpr size_t size = 10;
int testData[size];
for (size_t i = 0; i < size; i++)
{
testData[i] = i * 2;
testData[0] += testData[i];
}
for (size_t i = 0; i < size; i++)
{
if (data[i] != testData[i]) return false;
}
return true;
};
test_assert_error(local_memory_kernel_verify(),
"local_memory_kernel data verificaion failed");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
test_assert_error(local_memory_kernel_verify(),
"local_memory_kernel data verificaion failed");
test_assert_error(local_memory_kernel_verify(),
"local_memory_kernel data verification failed");

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are still two additional places with the same misspelling.


test_assert_error(kernel_local_usage >= memory,
"kernel local mem size failed");
Comment on lines +153 to +154
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW, this check is failing for me for POCL because the queried value is coming back as zero. I'm not sure if this is a POCL issue or if POCL is being clever and optimizing away the local memory array - maybe @pjaaskel can check?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It shouldn't optimize this case away. I opened a PoCL issue. We'll check what's going on.



// Check memory needed to execute empty kernel with __local parameter with
// setKernelArg
if (create_single_kernel_helper(context, &program, &kernel, 1,
&local_param_kernel, "local_param_kernel")
!= 0)
{
return TEST_FAIL;
}

kernel_local_usage = 0;
param_value_size_ret = 0;

for (size_t i = 0; i < size; i++)
{
data[i] = i;
}

streams[0] =
clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, memory, data, &error);
test_error(error, "Creating test array failed");
streams[1] =
clCreateBuffer(context, CL_MEM_READ_WRITE, memory, nullptr, &error);
test_error(error, "Creating test array failed");

error = clSetKernelArg(kernel, 0, memory, NULL);
test_error(error, "Unable to set indexed kernel arguments");
error = clSetKernelArg(kernel, 1, sizeof(streams[0]), &streams[0]);
test_error(error, "Unable to set indexed kernel arguments");
error = clSetKernelArg(kernel, 2, sizeof(streams[1]), &streams[1]);
test_error(error, "Unable to set indexed kernel arguments");

error = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, global_work_size,
nullptr, 0, NULL, nullptr);
test_error(error, "clEnqueueNDRangeKernel failed.");

error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, memory, data, 0,
NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");

error = clGetKernelWorkGroupInfo(
kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernel_local_usage),
&kernel_local_usage, &param_value_size_ret);
test_error(error,
"clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed");

test_assert_error(param_value_size_ret == sizeof(cl_ulong),
"param_value_size_ret failed");

auto local_param_kernel_verify = [&]() {
constexpr size_t size = 10;
int testData[size];
int sum = 0;
for (size_t i = 0; i < size; i++)
{
testData[i] = i;
sum += testData[i];
}
testData[9] += sum;
for (size_t i = 0; i < size; i++)
{
if (data[i] != testData[i]) return false;
}

return true;
};
test_assert_error(local_param_kernel_verify(),
"local_param_kernel data verificaion failed");

test_assert_error(kernel_local_usage >= memory,
"kernel local mem size failed");


// Check memory needed to execute kernel with __local variable and __local
// parameter with setKernelArg
if (create_single_kernel_helper(context, &program, &kernel, 1,
&local_param_local_memory_kernel,
"local_param_local_memory_kernel")
!= 0)
{
return TEST_FAIL;
}

kernel_local_usage = 0;
param_value_size_ret = 0;

for (size_t i = 0; i < size; i++)
{
data[i] = i;
}

streams[0] =
clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, memory, data, &error);
test_error(error, "Creating test array failed");
streams[1] =
clCreateBuffer(context, CL_MEM_READ_WRITE, memory, nullptr, &error);
test_error(error, "Creating test array failed");

error = clSetKernelArg(kernel, 0, memory, NULL);
test_error(error, "Unable to set indexed kernel arguments");
error = clSetKernelArg(kernel, 1, sizeof(streams[0]), &streams[0]);
test_error(error, "Unable to set indexed kernel arguments");
error = clSetKernelArg(kernel, 2, sizeof(streams[1]), &streams[1]);
test_error(error, "Unable to set indexed kernel arguments");

error = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, global_work_size,
nullptr, 0, NULL, nullptr);
test_error(error, "clEnqueueNDRangeKernel failed.");

error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, memory, data, 0,
NULL, NULL);
test_error(error, "clEnqueueReadBuffer failed");


error = clGetKernelWorkGroupInfo(
kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernel_local_usage),
&kernel_local_usage, &param_value_size_ret);
test_error(error,
"clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed");

test_assert_error(param_value_size_ret == sizeof(cl_ulong),
"param_value_size_ret failed");

auto local_param_local_memory_kernel_verify = [&]() {
constexpr size_t size = 10;
int testData[size];
int sum = 0;
for (size_t i = 0; i < size; i++)
{
testData[i] = i;
sum += testData[i];
}
for (size_t i = 0; i < size; i++)
{
if (i / 2 == 0)
testData[i] += sum;
else
testData[i] = testData[i] * 2;
}

int temp = testData[9];
for (size_t i = 0; i < size; i++)
{
if (i == 9)
testData[9] += temp;
else
testData[9] += testData[i];
}
testData[9] += 666;

for (size_t i = 0; i < size; i++)
{
if (data[i] != testData[i]) return false;
}

return true;
};
test_assert_error(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be changed to:

auto local_param_local_memory_kernel_verify = & {
constexpr size_t size = 10;
int testData[size];

for (size_t i = 0; i < size; i++)
{
    testData[i] = i * 2;
}

int temp = testData[9];
for (size_t i = 0; i < size; i++)
{
    if (i == 9)
        testData[9] += temp;
    else
        testData[9] += testData[i];
}
testData[9] += 666;

for (size_t i = 0; i < size; i++)
{
    if (data[i] != testData[i]) return false;
}

return true;

};

local_param_local_memory_kernel_verify(),
"local_param_local_memory_kernel data verificaion failed");

test_assert_error(kernel_local_usage >= 2 * memory,
"kernel local mem size failed");

return CL_SUCCESS;
}
Loading