Skip to content

Commit

Permalink
Add bias parameter to Covariance algorithm (#2581) (#2603)
Browse files Browse the repository at this point in the history
  • Loading branch information
mergify[bot] authored Nov 27, 2023
1 parent e221a82 commit 66a1c19
Show file tree
Hide file tree
Showing 32 changed files with 501 additions and 74 deletions.
1 change: 1 addition & 0 deletions cpp/daal/include/algorithms/covariance/covariance_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ struct DAAL_EXPORT Parameter : public daal::algorithms::Parameter
{
/** Default constructor */
Parameter();
bool bias = false; /*!< Determines if covariance estimation biased or not*/
OutputMatrixType outputMatrixType; /*!< Type of the computed matrix */
};

Expand Down
8 changes: 7 additions & 1 deletion cpp/daal/src/algorithms/covariance/covariance_impl.i
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,12 @@ services::Status finalizeCovariance(size_t nFeatures, algorithmFPType nObservati
invNObservationsM1 = 1.0 / (nObservations - 1.0);
}

algorithmFPType multiplier = invNObservationsM1;
if (parameter->bias)
{
multiplier = invNObservations;
}

/* Calculate resulting mean vector */
for (size_t i = 0; i < nFeatures; i++)
{
Expand Down Expand Up @@ -437,7 +443,7 @@ services::Status finalizeCovariance(size_t nFeatures, algorithmFPType nObservati
{
for (size_t j = 0; j <= i; j++)
{
cov[i * nFeatures + j] = crossProduct[i * nFeatures + j] * invNObservationsM1;
cov[i * nFeatures + j] = crossProduct[i * nFeatures + j] * multiplier;
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ static compute_result<Task> call_daal_kernel(const context_cpu& ctx,

daal_covariance::Parameter daal_parameter;
daal_parameter.outputMatrixType = daal_covariance::covarianceMatrix;
daal_parameter.bias = desc.get_bias();

const daal_hyperparameters_t& hp = convert_parameters<Float, Task>(params);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ static compute_result<Task> call_daal_kernel_finalize(const context_cpu& ctx,
bool is_mean_computed = false;

daal_covariance::Parameter daal_parameter;
daal_parameter.bias = desc.get_bias();
daal_parameter.outputMatrixType = daal_covariance::covarianceMatrix;

dal::detail::check_mul_overflow(component_count, component_count);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ auto compute_covariance(sycl::queue& q,
std::int64_t row_count,
const pr::ndview<Float, 2>& xtx,
const pr::ndarray<Float, 1>& sums,
bool bias,
const bk::event_vector& deps = {}) {
ONEDAL_PROFILER_TASK(compute_covariance, q);
ONEDAL_ASSERT(sums.has_data());
Expand All @@ -88,7 +89,7 @@ auto compute_covariance(sycl::queue& q,

auto copy_event = copy(q, cov, xtx, { deps });

auto cov_event = pr::covariance(q, row_count, sums, cov, { copy_event });
auto cov_event = pr::covariance(q, row_count, sums, cov, bias, { copy_event });
return std::make_tuple(cov, cov_event);
}

Expand Down Expand Up @@ -128,6 +129,7 @@ result_t compute_kernel_dense_impl<Float>::operator()(const descriptor_t& desc,
auto rows_count_global = row_count;
const std::int64_t column_count = data.get_column_count();
ONEDAL_ASSERT(data.get_column_count() > 0);
auto bias = desc.get_bias();
auto result = compute_result<task_t>{}.set_result_options(desc.get_result_options());

const auto data_nd = pr::table2ndarray<Float>(q_, data, alloc::device);
Expand Down Expand Up @@ -159,7 +161,7 @@ result_t compute_kernel_dense_impl<Float>::operator()(const descriptor_t& desc,

if (desc.get_result_options().test(result_options::cov_matrix)) {
auto [cov, cov_event] =
compute_covariance(q_, rows_count_global, xtx, sums, { gemm_event });
compute_covariance(q_, rows_count_global, xtx, sums, bias, { gemm_event });
result.set_cov_matrix(
(homogen_table::wrap(cov.flatten(q_, { cov_event }), column_count, column_count)));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ auto compute_covariance(sycl::queue& q,
std::int64_t row_count,
const pr::ndview<Float, 2>& xtx,
const pr::ndarray<Float, 1>& sums,
bool bias,
const bk::event_vector& deps = {}) {
ONEDAL_PROFILER_TASK(compute_covariance, q);
ONEDAL_ASSERT(sums.has_data());
Expand All @@ -67,7 +68,7 @@ auto compute_covariance(sycl::queue& q,

auto copy_event = copy(q, cov, xtx, { deps });

auto cov_event = pr::covariance(q, row_count, sums, cov, { copy_event });
auto cov_event = pr::covariance(q, row_count, sums, cov, bias, { copy_event });
return std::make_tuple(cov, cov_event);
}

Expand Down Expand Up @@ -108,6 +109,7 @@ static compute_result<Task> finalize_compute(const context_gpu& ctx,
dal::detail::check_mul_overflow(column_count, column_count);
dal::detail::check_mul_overflow(component_count, column_count);

auto bias = desc.get_bias();
auto result = compute_result<task_t>{}.set_result_options(desc.get_result_options());

sycl::event event;
Expand All @@ -121,7 +123,7 @@ static compute_result<Task> finalize_compute(const context_gpu& ctx,
pr::table2ndarray<Float>(q, input.get_partial_crossproduct(), sycl::usm::alloc::device);

if (desc.get_result_options().test(result_options::cov_matrix)) {
auto [cov, cov_event] = compute_covariance(q, rows_count_global, xtx, sums);
auto [cov, cov_event] = compute_covariance(q, rows_count_global, xtx, sums, bias);
result.set_cov_matrix(
(homogen_table::wrap(cov.flatten(q, { cov_event }), column_count, column_count)));
}
Expand Down
11 changes: 11 additions & 0 deletions cpp/oneapi/dal/algo/covariance/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,28 @@ namespace v1 {
template <typename Task>
class descriptor_impl : public base {
public:
bool bias = false;
result_option_id result_options = get_default_result_options<Task>();
};

template <typename Task>
descriptor_base<Task>::descriptor_base() : impl_(new descriptor_impl<Task>{}) {}

template <typename Task>
bool descriptor_base<Task>::get_bias() const {
return impl_->bias;
}

template <typename Task>
result_option_id descriptor_base<Task>::get_result_options() const {
return impl_->result_options;
}

template <typename Task>
void descriptor_base<Task>::set_bias_impl(const bool& value) {
impl_->bias = value;
}

template <typename Task>
void descriptor_base<Task>::set_result_options_impl(const result_option_id& value) {
using msg = dal::detail::error_messages;
Expand Down
12 changes: 12 additions & 0 deletions cpp/oneapi/dal/algo/covariance/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,11 @@ class descriptor_base : public base {

descriptor_base();

bool get_bias() const;
result_option_id get_result_options() const;

protected:
void set_bias_impl(const bool& value);
void set_result_options_impl(const result_option_id& value);

private:
Expand Down Expand Up @@ -159,6 +161,16 @@ class descriptor : public detail::descriptor_base<Task> {
/// Creates a new instance of the class with the default property values.
descriptor() = default;

/// Choose if result biased or not
bool get_bias() const {
return base_t::get_bias();
}

auto& set_bias(const bool& value) {
base_t::set_bias_impl(value);
return *this;
}

/// Choose which results should be computed and returned.
result_option_id get_result_options() const {
return base_t::get_result_options();
Expand Down
70 changes: 45 additions & 25 deletions cpp/oneapi/dal/algo/covariance/test/fixture.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,55 +83,63 @@ class covariance_test : public te::crtp_algo_fixture<TestType, Derived> {
covariance::result_options::means);
INFO("run compute optional: cov cor means");
auto compute_result = this->compute(cov_desc, data);
check_compute_result(data, compute_result);
check_compute_result(cov_desc, data, compute_result);

INFO("create descriptor cov")
cov_desc =
covariance::descriptor<Float, Method, covariance::task::compute>().set_result_options(
covariance::result_options::cov_matrix);
INFO("run compute optional: cov");
compute_result = this->compute(cov_desc, data);
check_compute_result(data, compute_result);
check_compute_result(cov_desc, data, compute_result);

INFO("create descriptor cov biased")
cov_desc = covariance::descriptor<Float, Method, covariance::task::compute>()
.set_result_options(covariance::result_options::cov_matrix)
.set_bias(true);
INFO("run compute optional: cov biased");
compute_result = this->compute(cov_desc, data);
check_compute_result(cov_desc, data, compute_result);

INFO("create descriptor cor")
cov_desc =
covariance::descriptor<Float, Method, covariance::task::compute>().set_result_options(
covariance::result_options::cor_matrix);
INFO("run compute optional: cor");
compute_result = this->compute(cov_desc, data);
check_compute_result(data, compute_result);
check_compute_result(cov_desc, data, compute_result);

INFO("create descriptor means")
cov_desc =
covariance::descriptor<Float, Method, covariance::task::compute>().set_result_options(
covariance::result_options::means);
INFO("run compute optional: means");
compute_result = this->compute(cov_desc, data);
check_compute_result(data, compute_result);
check_compute_result(cov_desc, data, compute_result);

INFO("create descriptor cov cor")
cov_desc =
covariance::descriptor<Float, Method, covariance::task::compute>().set_result_options(
covariance::result_options::cov_matrix | covariance::result_options::cor_matrix);
INFO("run compute optional: cov cor");
compute_result = this->compute(cov_desc, data);
check_compute_result(data, compute_result);
check_compute_result(cov_desc, data, compute_result);

INFO("create descriptor cov means")
cov_desc =
covariance::descriptor<Float, Method, covariance::task::compute>().set_result_options(
covariance::result_options::cov_matrix | covariance::result_options::means);
INFO("run compute optional: cov means");
compute_result = this->compute(cov_desc, data);
check_compute_result(data, compute_result);
check_compute_result(cov_desc, data, compute_result);

INFO("create descriptor cor means")
cov_desc =
covariance::descriptor<Float, Method, covariance::task::compute>().set_result_options(
covariance::result_options::cor_matrix | covariance::result_options::means);
INFO("run compute optional: cor means");
compute_result = this->compute(cov_desc, data);
check_compute_result(data, compute_result);
check_compute_result(cov_desc, data, compute_result);
}

void online_general_checks(const te::dataframe& input,
Expand All @@ -150,7 +158,7 @@ class covariance_test : public te::crtp_algo_fixture<TestType, Derived> {
partial_result = this->partial_compute(cov_desc, partial_result, input_table[i]);
}
auto compute_result = this->finalize_compute(cov_desc, partial_result);
check_compute_result(data, compute_result);
check_compute_result(cov_desc, data, compute_result);

INFO("create descriptor cov")
cov_desc =
Expand All @@ -163,7 +171,7 @@ class covariance_test : public te::crtp_algo_fixture<TestType, Derived> {
partial_result = this->partial_compute(cov_desc, partial_result, input_table[i]);
}
compute_result = this->finalize_compute(cov_desc, partial_result);
check_compute_result(data, compute_result);
check_compute_result(cov_desc, data, compute_result);

INFO("create descriptor cor")
cov_desc =
Expand All @@ -176,7 +184,7 @@ class covariance_test : public te::crtp_algo_fixture<TestType, Derived> {
partial_result = this->partial_compute(cov_desc, partial_result, input_table[i]);
}
compute_result = this->finalize_compute(cov_desc, partial_result);
check_compute_result(data, compute_result);
check_compute_result(cov_desc, data, compute_result);

INFO("create descriptor means")
cov_desc =
Expand All @@ -189,7 +197,7 @@ class covariance_test : public te::crtp_algo_fixture<TestType, Derived> {
partial_result = this->partial_compute(cov_desc, partial_result, input_table[i]);
}
compute_result = this->finalize_compute(cov_desc, partial_result);
check_compute_result(data, compute_result);
check_compute_result(cov_desc, data, compute_result);

INFO("create descriptor cov cor")
cov_desc =
Expand All @@ -202,7 +210,7 @@ class covariance_test : public te::crtp_algo_fixture<TestType, Derived> {
partial_result = this->partial_compute(cov_desc, partial_result, input_table[i]);
}
compute_result = this->finalize_compute(cov_desc, partial_result);
check_compute_result(data, compute_result);
check_compute_result(cov_desc, data, compute_result);

INFO("create descriptor cov means")
cov_desc =
Expand All @@ -215,7 +223,7 @@ class covariance_test : public te::crtp_algo_fixture<TestType, Derived> {
partial_result = this->partial_compute(cov_desc, partial_result, input_table[i]);
}
compute_result = this->finalize_compute(cov_desc, partial_result);
check_compute_result(data, compute_result);
check_compute_result(cov_desc, data, compute_result);

INFO("create descriptor cor means")
cov_desc =
Expand All @@ -228,10 +236,12 @@ class covariance_test : public te::crtp_algo_fixture<TestType, Derived> {
partial_result = this->partial_compute(cov_desc, partial_result, input_table[i]);
}
compute_result = this->finalize_compute(cov_desc, partial_result);
check_compute_result(data, compute_result);
check_compute_result(cov_desc, data, compute_result);
}

void check_compute_result(const table& data, const covariance::compute_result<>& result) {
void check_compute_result(const covariance::descriptor<Float, Method>& desc,
const table& data,
const covariance::compute_result<>& result) {
if (result.get_result_options().test(result_options::cov_matrix)) {
const auto cov_matrix = result.get_cov_matrix();
INFO("check if cov matrix table shape is expected")
Expand All @@ -240,7 +250,7 @@ class covariance_test : public te::crtp_algo_fixture<TestType, Derived> {
INFO("check if there is no NaN in cov matrix table")
REQUIRE(te::has_no_nans(cov_matrix));
INFO("check if cov matrix values are expected")
check_cov_matrix_values(data, cov_matrix);
check_cov_matrix_values(desc, data, cov_matrix);
}
if (result.get_result_options().test(result_options::cor_matrix)) {
const auto cor_matrix = result.get_cor_matrix();
Expand All @@ -250,7 +260,7 @@ class covariance_test : public te::crtp_algo_fixture<TestType, Derived> {
INFO("check if there is no NaN in cor matrix table")
REQUIRE(te::has_no_nans(cor_matrix));
INFO("check if cor matrix values are expected")
check_cor_matrix_values(data, cor_matrix);
check_cor_matrix_values(desc, data, cor_matrix);
}
if (result.get_result_options().test(result_options::means)) {
const auto means = result.get_means();
Expand Down Expand Up @@ -287,45 +297,55 @@ class covariance_test : public te::crtp_algo_fixture<TestType, Derived> {
return reference_means;
}

void check_cov_matrix_values(const table& data, const table& cov_matrix) {
const auto reference_cov = compute_reference_cov(data);
void check_cov_matrix_values(const covariance::descriptor<Float, Method>& desc,
const table& data,
const table& cov_matrix) {
const auto reference_cov = compute_reference_cov(desc, data);
const auto data_matrix = la::matrix<double>::wrap(cov_matrix);
const double tol = te::get_tolerance<Float>(1e-2, 1e-9);
const double diff = te::abs_error(reference_cov, cov_matrix);
CHECK(diff < tol);
}

la::matrix<double> compute_reference_cov(const table& data) {
la::matrix<double> compute_reference_cov(const covariance::descriptor<Float, Method>& desc,
const table& data) {
const auto data_matrix = la::matrix<double>::wrap(data);
const auto row_count_data = data_matrix.get_row_count();
const auto column_count_data = data_matrix.get_column_count();
auto reference_means = compute_reference_means(data);
auto reference_cov =
la::matrix<double>::full({ column_count_data, column_count_data }, 0.0);
auto multiplier = 1 / static_cast<double>(row_count_data - 1);
if (desc.get_bias()) {
multiplier = 1 / static_cast<double>(row_count_data);
}
for (std::int64_t i = 0; i < column_count_data; i++) {
for (std::int64_t j = 0; j < column_count_data; j++) {
double elem = 0;
for (std::int64_t k = 0; k < row_count_data; k++) {
elem += (data_matrix.get(k, i) - reference_means.get(0, i)) *
(data_matrix.get(k, j) - reference_means.get(0, j));
}
reference_cov.set(i, j) = elem * (1 / static_cast<double>(row_count_data - 1));
reference_cov.set(i, j) = elem * multiplier;
}
}
return reference_cov;
}
void check_cor_matrix_values(const table& data, const table& cor_matrix) {
const auto reference_cor = compute_reference_cor(data);
void check_cor_matrix_values(const covariance::descriptor<Float, Method>& desc,
const table& data,
const table& cor_matrix) {
const auto reference_cor = compute_reference_cor(desc, data);
const double tol = te::get_tolerance<Float>(1e-2, 1e-9);
const double diff = te::abs_error(reference_cor, cor_matrix);
CHECK(diff < tol);
}

la::matrix<double> compute_reference_cor(const table& data) {
la::matrix<double> compute_reference_cor(const covariance::descriptor<Float, Method>& desc,
const table& data) {
const auto data_matrix = la::matrix<double>::wrap(data);
const auto column_count_data = data_matrix.get_column_count();
auto reference_means = compute_reference_means(data);
auto reference_cov = compute_reference_cov(data);
auto reference_cov = compute_reference_cov(desc, data);
auto reference_cor =
la::matrix<double>::full({ column_count_data, column_count_data }, 0.0);
for (std::int64_t i = 0; i < column_count_data; i++) {
Expand Down
Loading

0 comments on commit 66a1c19

Please sign in to comment.