diff --git a/cpp/daal/include/algorithms/covariance/covariance_types.h b/cpp/daal/include/algorithms/covariance/covariance_types.h index 259664d3f48..28fc1d914d5 100644 --- a/cpp/daal/include/algorithms/covariance/covariance_types.h +++ b/cpp/daal/include/algorithms/covariance/covariance_types.h @@ -259,6 +259,7 @@ struct DAAL_EXPORT Parameter : public daal::algorithms::Parameter { /** Default constructor */ Parameter(); + bool bias = false; /*!< Determines if covariance estimation biased or not*/ OutputMatrixType outputMatrixType; /*!< Type of the computed matrix */ }; diff --git a/cpp/daal/src/algorithms/covariance/covariance_impl.i b/cpp/daal/src/algorithms/covariance/covariance_impl.i index b96ab8b40a1..24cb48524c5 100644 --- a/cpp/daal/src/algorithms/covariance/covariance_impl.i +++ b/cpp/daal/src/algorithms/covariance/covariance_impl.i @@ -403,6 +403,12 @@ services::Status finalizeCovariance(size_t nFeatures, algorithmFPType nObservati invNObservationsM1 = 1.0 / (nObservations - 1.0); } + algorithmFPType multiplier = invNObservationsM1; + if (parameter->bias) + { + multiplier = invNObservations; + } + /* Calculate resulting mean vector */ for (size_t i = 0; i < nFeatures; i++) { @@ -437,7 +443,7 @@ services::Status finalizeCovariance(size_t nFeatures, algorithmFPType nObservati { for (size_t j = 0; j <= i; j++) { - cov[i * nFeatures + j] = crossProduct[i * nFeatures + j] * invNObservationsM1; + cov[i * nFeatures + j] = crossProduct[i * nFeatures + j] * multiplier; } } } diff --git a/cpp/oneapi/dal/algo/covariance/backend/cpu/compute_kernel_dense.cpp b/cpp/oneapi/dal/algo/covariance/backend/cpu/compute_kernel_dense.cpp index 1ad2b173ee0..d6e03ae77e2 100644 --- a/cpp/oneapi/dal/algo/covariance/backend/cpu/compute_kernel_dense.cpp +++ b/cpp/oneapi/dal/algo/covariance/backend/cpu/compute_kernel_dense.cpp @@ -62,6 +62,7 @@ static compute_result call_daal_kernel(const context_cpu& ctx, daal_covariance::Parameter daal_parameter; daal_parameter.outputMatrixType = daal_covariance::covarianceMatrix; + daal_parameter.bias = desc.get_bias(); const daal_hyperparameters_t& hp = convert_parameters(params); diff --git a/cpp/oneapi/dal/algo/covariance/backend/cpu/finalize_compute_kernel_dense.cpp b/cpp/oneapi/dal/algo/covariance/backend/cpu/finalize_compute_kernel_dense.cpp index 7244144d5a2..4091a492cbc 100644 --- a/cpp/oneapi/dal/algo/covariance/backend/cpu/finalize_compute_kernel_dense.cpp +++ b/cpp/oneapi/dal/algo/covariance/backend/cpu/finalize_compute_kernel_dense.cpp @@ -44,6 +44,7 @@ static compute_result call_daal_kernel_finalize(const context_cpu& ctx, bool is_mean_computed = false; daal_covariance::Parameter daal_parameter; + daal_parameter.bias = desc.get_bias(); daal_parameter.outputMatrixType = daal_covariance::covarianceMatrix; dal::detail::check_mul_overflow(component_count, component_count); diff --git a/cpp/oneapi/dal/algo/covariance/backend/gpu/compute_kernel_dense_impl_dpc.cpp b/cpp/oneapi/dal/algo/covariance/backend/gpu/compute_kernel_dense_impl_dpc.cpp index 04cbc37e8a7..a75d609c1d4 100644 --- a/cpp/oneapi/dal/algo/covariance/backend/gpu/compute_kernel_dense_impl_dpc.cpp +++ b/cpp/oneapi/dal/algo/covariance/backend/gpu/compute_kernel_dense_impl_dpc.cpp @@ -76,6 +76,7 @@ auto compute_covariance(sycl::queue& q, std::int64_t row_count, const pr::ndview& xtx, const pr::ndarray& sums, + bool bias, const bk::event_vector& deps = {}) { ONEDAL_PROFILER_TASK(compute_covariance, q); ONEDAL_ASSERT(sums.has_data()); @@ -88,7 +89,7 @@ auto compute_covariance(sycl::queue& q, auto copy_event = copy(q, cov, xtx, { deps }); - auto cov_event = pr::covariance(q, row_count, sums, cov, { copy_event }); + auto cov_event = pr::covariance(q, row_count, sums, cov, bias, { copy_event }); return std::make_tuple(cov, cov_event); } @@ -128,6 +129,7 @@ result_t compute_kernel_dense_impl::operator()(const descriptor_t& desc, auto rows_count_global = row_count; const std::int64_t column_count = data.get_column_count(); ONEDAL_ASSERT(data.get_column_count() > 0); + auto bias = desc.get_bias(); auto result = compute_result{}.set_result_options(desc.get_result_options()); const auto data_nd = pr::table2ndarray(q_, data, alloc::device); @@ -159,7 +161,7 @@ result_t compute_kernel_dense_impl::operator()(const descriptor_t& desc, if (desc.get_result_options().test(result_options::cov_matrix)) { auto [cov, cov_event] = - compute_covariance(q_, rows_count_global, xtx, sums, { gemm_event }); + compute_covariance(q_, rows_count_global, xtx, sums, bias, { gemm_event }); result.set_cov_matrix( (homogen_table::wrap(cov.flatten(q_, { cov_event }), column_count, column_count))); } diff --git a/cpp/oneapi/dal/algo/covariance/backend/gpu/finalize_compute_kernel_dense_dpc.cpp b/cpp/oneapi/dal/algo/covariance/backend/gpu/finalize_compute_kernel_dense_dpc.cpp index d78d6825718..d279adcd3c3 100644 --- a/cpp/oneapi/dal/algo/covariance/backend/gpu/finalize_compute_kernel_dense_dpc.cpp +++ b/cpp/oneapi/dal/algo/covariance/backend/gpu/finalize_compute_kernel_dense_dpc.cpp @@ -55,6 +55,7 @@ auto compute_covariance(sycl::queue& q, std::int64_t row_count, const pr::ndview& xtx, const pr::ndarray& sums, + bool bias, const bk::event_vector& deps = {}) { ONEDAL_PROFILER_TASK(compute_covariance, q); ONEDAL_ASSERT(sums.has_data()); @@ -67,7 +68,7 @@ auto compute_covariance(sycl::queue& q, auto copy_event = copy(q, cov, xtx, { deps }); - auto cov_event = pr::covariance(q, row_count, sums, cov, { copy_event }); + auto cov_event = pr::covariance(q, row_count, sums, cov, bias, { copy_event }); return std::make_tuple(cov, cov_event); } @@ -108,6 +109,7 @@ static compute_result finalize_compute(const context_gpu& ctx, dal::detail::check_mul_overflow(column_count, column_count); dal::detail::check_mul_overflow(component_count, column_count); + auto bias = desc.get_bias(); auto result = compute_result{}.set_result_options(desc.get_result_options()); sycl::event event; @@ -121,7 +123,7 @@ static compute_result finalize_compute(const context_gpu& ctx, pr::table2ndarray(q, input.get_partial_crossproduct(), sycl::usm::alloc::device); if (desc.get_result_options().test(result_options::cov_matrix)) { - auto [cov, cov_event] = compute_covariance(q, rows_count_global, xtx, sums); + auto [cov, cov_event] = compute_covariance(q, rows_count_global, xtx, sums, bias); result.set_cov_matrix( (homogen_table::wrap(cov.flatten(q, { cov_event }), column_count, column_count))); } diff --git a/cpp/oneapi/dal/algo/covariance/common.cpp b/cpp/oneapi/dal/algo/covariance/common.cpp index 42b98d004b3..2f276b6a789 100644 --- a/cpp/oneapi/dal/algo/covariance/common.cpp +++ b/cpp/oneapi/dal/algo/covariance/common.cpp @@ -46,17 +46,28 @@ namespace v1 { template class descriptor_impl : public base { public: + bool bias = false; result_option_id result_options = get_default_result_options(); }; template descriptor_base::descriptor_base() : impl_(new descriptor_impl{}) {} +template +bool descriptor_base::get_bias() const { + return impl_->bias; +} + template result_option_id descriptor_base::get_result_options() const { return impl_->result_options; } +template +void descriptor_base::set_bias_impl(const bool& value) { + impl_->bias = value; +} + template void descriptor_base::set_result_options_impl(const result_option_id& value) { using msg = dal::detail::error_messages; diff --git a/cpp/oneapi/dal/algo/covariance/common.hpp b/cpp/oneapi/dal/algo/covariance/common.hpp index de3c64a2bd3..1878c180282 100644 --- a/cpp/oneapi/dal/algo/covariance/common.hpp +++ b/cpp/oneapi/dal/algo/covariance/common.hpp @@ -111,9 +111,11 @@ class descriptor_base : public base { descriptor_base(); + bool get_bias() const; result_option_id get_result_options() const; protected: + void set_bias_impl(const bool& value); void set_result_options_impl(const result_option_id& value); private: @@ -159,6 +161,16 @@ class descriptor : public detail::descriptor_base { /// Creates a new instance of the class with the default property values. descriptor() = default; + /// Choose if result biased or not + bool get_bias() const { + return base_t::get_bias(); + } + + auto& set_bias(const bool& value) { + base_t::set_bias_impl(value); + return *this; + } + /// Choose which results should be computed and returned. result_option_id get_result_options() const { return base_t::get_result_options(); diff --git a/cpp/oneapi/dal/algo/covariance/test/fixture.hpp b/cpp/oneapi/dal/algo/covariance/test/fixture.hpp index 00c8bc4ab17..5cd74690a7f 100644 --- a/cpp/oneapi/dal/algo/covariance/test/fixture.hpp +++ b/cpp/oneapi/dal/algo/covariance/test/fixture.hpp @@ -83,7 +83,7 @@ class covariance_test : public te::crtp_algo_fixture { covariance::result_options::means); INFO("run compute optional: cov cor means"); auto compute_result = this->compute(cov_desc, data); - check_compute_result(data, compute_result); + check_compute_result(cov_desc, data, compute_result); INFO("create descriptor cov") cov_desc = @@ -91,7 +91,15 @@ class covariance_test : public te::crtp_algo_fixture { covariance::result_options::cov_matrix); INFO("run compute optional: cov"); compute_result = this->compute(cov_desc, data); - check_compute_result(data, compute_result); + check_compute_result(cov_desc, data, compute_result); + + INFO("create descriptor cov biased") + cov_desc = covariance::descriptor() + .set_result_options(covariance::result_options::cov_matrix) + .set_bias(true); + INFO("run compute optional: cov biased"); + compute_result = this->compute(cov_desc, data); + check_compute_result(cov_desc, data, compute_result); INFO("create descriptor cor") cov_desc = @@ -99,7 +107,7 @@ class covariance_test : public te::crtp_algo_fixture { covariance::result_options::cor_matrix); INFO("run compute optional: cor"); compute_result = this->compute(cov_desc, data); - check_compute_result(data, compute_result); + check_compute_result(cov_desc, data, compute_result); INFO("create descriptor means") cov_desc = @@ -107,7 +115,7 @@ class covariance_test : public te::crtp_algo_fixture { covariance::result_options::means); INFO("run compute optional: means"); compute_result = this->compute(cov_desc, data); - check_compute_result(data, compute_result); + check_compute_result(cov_desc, data, compute_result); INFO("create descriptor cov cor") cov_desc = @@ -115,7 +123,7 @@ class covariance_test : public te::crtp_algo_fixture { covariance::result_options::cov_matrix | covariance::result_options::cor_matrix); INFO("run compute optional: cov cor"); compute_result = this->compute(cov_desc, data); - check_compute_result(data, compute_result); + check_compute_result(cov_desc, data, compute_result); INFO("create descriptor cov means") cov_desc = @@ -123,7 +131,7 @@ class covariance_test : public te::crtp_algo_fixture { covariance::result_options::cov_matrix | covariance::result_options::means); INFO("run compute optional: cov means"); compute_result = this->compute(cov_desc, data); - check_compute_result(data, compute_result); + check_compute_result(cov_desc, data, compute_result); INFO("create descriptor cor means") cov_desc = @@ -131,7 +139,7 @@ class covariance_test : public te::crtp_algo_fixture { covariance::result_options::cor_matrix | covariance::result_options::means); INFO("run compute optional: cor means"); compute_result = this->compute(cov_desc, data); - check_compute_result(data, compute_result); + check_compute_result(cov_desc, data, compute_result); } void online_general_checks(const te::dataframe& input, @@ -150,7 +158,7 @@ class covariance_test : public te::crtp_algo_fixture { partial_result = this->partial_compute(cov_desc, partial_result, input_table[i]); } auto compute_result = this->finalize_compute(cov_desc, partial_result); - check_compute_result(data, compute_result); + check_compute_result(cov_desc, data, compute_result); INFO("create descriptor cov") cov_desc = @@ -163,7 +171,7 @@ class covariance_test : public te::crtp_algo_fixture { partial_result = this->partial_compute(cov_desc, partial_result, input_table[i]); } compute_result = this->finalize_compute(cov_desc, partial_result); - check_compute_result(data, compute_result); + check_compute_result(cov_desc, data, compute_result); INFO("create descriptor cor") cov_desc = @@ -176,7 +184,7 @@ class covariance_test : public te::crtp_algo_fixture { partial_result = this->partial_compute(cov_desc, partial_result, input_table[i]); } compute_result = this->finalize_compute(cov_desc, partial_result); - check_compute_result(data, compute_result); + check_compute_result(cov_desc, data, compute_result); INFO("create descriptor means") cov_desc = @@ -189,7 +197,7 @@ class covariance_test : public te::crtp_algo_fixture { partial_result = this->partial_compute(cov_desc, partial_result, input_table[i]); } compute_result = this->finalize_compute(cov_desc, partial_result); - check_compute_result(data, compute_result); + check_compute_result(cov_desc, data, compute_result); INFO("create descriptor cov cor") cov_desc = @@ -202,7 +210,7 @@ class covariance_test : public te::crtp_algo_fixture { partial_result = this->partial_compute(cov_desc, partial_result, input_table[i]); } compute_result = this->finalize_compute(cov_desc, partial_result); - check_compute_result(data, compute_result); + check_compute_result(cov_desc, data, compute_result); INFO("create descriptor cov means") cov_desc = @@ -215,7 +223,7 @@ class covariance_test : public te::crtp_algo_fixture { partial_result = this->partial_compute(cov_desc, partial_result, input_table[i]); } compute_result = this->finalize_compute(cov_desc, partial_result); - check_compute_result(data, compute_result); + check_compute_result(cov_desc, data, compute_result); INFO("create descriptor cor means") cov_desc = @@ -228,10 +236,12 @@ class covariance_test : public te::crtp_algo_fixture { partial_result = this->partial_compute(cov_desc, partial_result, input_table[i]); } compute_result = this->finalize_compute(cov_desc, partial_result); - check_compute_result(data, compute_result); + check_compute_result(cov_desc, data, compute_result); } - void check_compute_result(const table& data, const covariance::compute_result<>& result) { + void check_compute_result(const covariance::descriptor& desc, + const table& data, + const covariance::compute_result<>& result) { if (result.get_result_options().test(result_options::cov_matrix)) { const auto cov_matrix = result.get_cov_matrix(); INFO("check if cov matrix table shape is expected") @@ -240,7 +250,7 @@ class covariance_test : public te::crtp_algo_fixture { INFO("check if there is no NaN in cov matrix table") REQUIRE(te::has_no_nans(cov_matrix)); INFO("check if cov matrix values are expected") - check_cov_matrix_values(data, cov_matrix); + check_cov_matrix_values(desc, data, cov_matrix); } if (result.get_result_options().test(result_options::cor_matrix)) { const auto cor_matrix = result.get_cor_matrix(); @@ -250,7 +260,7 @@ class covariance_test : public te::crtp_algo_fixture { INFO("check if there is no NaN in cor matrix table") REQUIRE(te::has_no_nans(cor_matrix)); INFO("check if cor matrix values are expected") - check_cor_matrix_values(data, cor_matrix); + check_cor_matrix_values(desc, data, cor_matrix); } if (result.get_result_options().test(result_options::means)) { const auto means = result.get_means(); @@ -287,21 +297,28 @@ class covariance_test : public te::crtp_algo_fixture { return reference_means; } - void check_cov_matrix_values(const table& data, const table& cov_matrix) { - const auto reference_cov = compute_reference_cov(data); + void check_cov_matrix_values(const covariance::descriptor& desc, + const table& data, + const table& cov_matrix) { + const auto reference_cov = compute_reference_cov(desc, data); const auto data_matrix = la::matrix::wrap(cov_matrix); const double tol = te::get_tolerance(1e-2, 1e-9); const double diff = te::abs_error(reference_cov, cov_matrix); CHECK(diff < tol); } - la::matrix compute_reference_cov(const table& data) { + la::matrix compute_reference_cov(const covariance::descriptor& desc, + const table& data) { const auto data_matrix = la::matrix::wrap(data); const auto row_count_data = data_matrix.get_row_count(); const auto column_count_data = data_matrix.get_column_count(); auto reference_means = compute_reference_means(data); auto reference_cov = la::matrix::full({ column_count_data, column_count_data }, 0.0); + auto multiplier = 1 / static_cast(row_count_data - 1); + if (desc.get_bias()) { + multiplier = 1 / static_cast(row_count_data); + } for (std::int64_t i = 0; i < column_count_data; i++) { for (std::int64_t j = 0; j < column_count_data; j++) { double elem = 0; @@ -309,23 +326,26 @@ class covariance_test : public te::crtp_algo_fixture { elem += (data_matrix.get(k, i) - reference_means.get(0, i)) * (data_matrix.get(k, j) - reference_means.get(0, j)); } - reference_cov.set(i, j) = elem * (1 / static_cast(row_count_data - 1)); + reference_cov.set(i, j) = elem * multiplier; } } return reference_cov; } - void check_cor_matrix_values(const table& data, const table& cor_matrix) { - const auto reference_cor = compute_reference_cor(data); + void check_cor_matrix_values(const covariance::descriptor& desc, + const table& data, + const table& cor_matrix) { + const auto reference_cor = compute_reference_cor(desc, data); const double tol = te::get_tolerance(1e-2, 1e-9); const double diff = te::abs_error(reference_cor, cor_matrix); CHECK(diff < tol); } - la::matrix compute_reference_cor(const table& data) { + la::matrix compute_reference_cor(const covariance::descriptor& desc, + const table& data) { const auto data_matrix = la::matrix::wrap(data); const auto column_count_data = data_matrix.get_column_count(); auto reference_means = compute_reference_means(data); - auto reference_cov = compute_reference_cov(data); + auto reference_cov = compute_reference_cov(desc, data); auto reference_cor = la::matrix::full({ column_count_data, column_count_data }, 0.0); for (std::int64_t i = 0; i < column_count_data; i++) { diff --git a/cpp/oneapi/dal/algo/covariance/test/spmd.cpp b/cpp/oneapi/dal/algo/covariance/test/spmd.cpp index 99acafc3f48..568fd9cfaf0 100644 --- a/cpp/oneapi/dal/algo/covariance/test/spmd.cpp +++ b/cpp/oneapi/dal/algo/covariance/test/spmd.cpp @@ -73,7 +73,7 @@ class covariance_spmd_test : public covariance_testcompute(cov_desc, data); - base_t::check_compute_result(data, compute_result); + base_t::check_compute_result(cov_desc, data, compute_result); } private: diff --git a/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_cov_impl_dpc.cpp b/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_cov_impl_dpc.cpp index bc98f92cd57..13be579cf87 100644 --- a/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_cov_impl_dpc.cpp +++ b/cpp/oneapi/dal/algo/pca/backend/gpu/train_kernel_cov_impl_dpc.cpp @@ -104,7 +104,8 @@ auto compute_covariance(sycl::queue& q, auto copy_event = copy(q, cov, xtx, { deps }); - auto cov_event = pr::covariance(q, row_count, sums, cov, { copy_event }); + const bool bias = false; // Currently we use only unbiased covariance for PCA computation. + auto cov_event = pr::covariance(q, row_count, sums, cov, bias, { copy_event }); return std::make_tuple(cov, cov_event); } @@ -124,7 +125,8 @@ auto compute_correlation_from_covariance(sycl::queue& q, auto corr = pr::ndarray::empty(q, { column_count, column_count }, alloc::device); - auto corr_event = pr::correlation_from_covariance(q, row_count, cov, corr, tmp, deps); + const bool bias = false; // Currently we use only unbiased covariance for PCA computation. + auto corr_event = pr::correlation_from_covariance(q, row_count, cov, corr, tmp, bias, deps); return std::make_tuple(corr, corr_event); } diff --git a/cpp/oneapi/dal/backend/primitives/stat/cov.hpp b/cpp/oneapi/dal/backend/primitives/stat/cov.hpp index 49c1f91a2dd..e9e80b30ca3 100644 --- a/cpp/oneapi/dal/backend/primitives/stat/cov.hpp +++ b/cpp/oneapi/dal/backend/primitives/stat/cov.hpp @@ -44,12 +44,14 @@ sycl::event means(sycl::queue& queue, /// @param[in] queue The queue /// @param[in] row_count The number of rows /// @param[in] sums The [p] sums computed along each column of the data +/// @param[in] bias If true biased covariance estimated by maximum likelihood method computed /// @param[out] cov The [p x p] covariance matrix template sycl::event covariance(sycl::queue& q, std::int64_t row_count, const ndview& sums, ndview& cov, + bool bias, const event_vector& deps = {}); /// Compute variances @@ -94,12 +96,14 @@ sycl::event correlation(sycl::queue& q, /// @param[out] cov The [p x p] covariance matrix /// @param[out] corr The [p x p] correlation matrix /// @param[out] tmp The [p] temporary buffer +/// @param[in] bias Determines if provided covariance estimation biased template sycl::event correlation_from_covariance(sycl::queue& q, std::int64_t row_count, const ndview& cov, ndview& corr, ndview& tmp, + bool bias, const event_vector& deps = {}); #endif diff --git a/cpp/oneapi/dal/backend/primitives/stat/cov_dpc.cpp b/cpp/oneapi/dal/backend/primitives/stat/cov_dpc.cpp index a63f389a19b..edcfb286542 100644 --- a/cpp/oneapi/dal/backend/primitives/stat/cov_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/stat/cov_dpc.cpp @@ -56,6 +56,7 @@ inline sycl::event compute_covariance(sycl::queue& q, std::int64_t row_count, const ndview& sums, ndview& cov, + bool bias, const event_vector& deps) { ONEDAL_ASSERT(sums.has_data()); ONEDAL_ASSERT(cov.has_mutable_data()); @@ -66,7 +67,8 @@ inline sycl::event compute_covariance(sycl::queue& q, const std::int64_t n = row_count; const std::int64_t p = sums.get_count(); const Float inv_n = Float(1.0 / double(n)); - const Float inv_n1 = (n > Float(1)) ? Float(1.0 / double(n - 1)) : Float(1); + const Float inv_n1 = (n > 1) ? Float(1.0 / double(n - 1)) : Float(1); + const Float multiplier = bias ? inv_n : inv_n1; const Float* sums_ptr = sums.get_data(); Float* cov_ptr = cov.get_mutable_data(); @@ -80,9 +82,8 @@ inline sycl::event compute_covariance(sycl::queue& q, const std::int64_t j = id.get_id(1); if (i < p && j < p) { - Float c = cov_ptr[gi]; - c -= inv_n * sums_ptr[i] * sums_ptr[j]; - cov_ptr[gi] = c * inv_n1; + cov_ptr[gi] -= inv_n * sums_ptr[i] * sums_ptr[j]; + cov_ptr[gi] *= multiplier; } }); }); @@ -93,6 +94,7 @@ sycl::event covariance(sycl::queue& q, std::int64_t row_count, const ndview& sums, ndview& cov, + bool bias, const event_vector& deps) { ONEDAL_ASSERT(sums.has_data()); ONEDAL_ASSERT(cov.has_mutable_data()); @@ -100,7 +102,7 @@ sycl::event covariance(sycl::queue& q, ONEDAL_ASSERT(is_known_usm(q, sums.get_data())); ONEDAL_ASSERT(is_known_usm(q, cov.get_mutable_data())); - auto compute_event = compute_covariance(q, row_count, sums, cov, deps); + auto compute_event = compute_covariance(q, row_count, sums, cov, bias, deps); return compute_event; } @@ -237,6 +239,7 @@ inline sycl::event prepare_correlation_from_covariance(sycl::queue& q, std::int64_t row_count, const ndview& cov, ndview& tmp, + bool bias, const event_vector& deps) { ONEDAL_ASSERT(cov.has_data()); ONEDAL_ASSERT(tmp.has_mutable_data()); @@ -246,7 +249,9 @@ inline sycl::event prepare_correlation_from_covariance(sycl::queue& q, const auto n = row_count; const auto p = cov.get_dimension(1); - const Float inv_n1 = (n > Float(1)) ? Float(1.0 / double(n - 1)) : Float(1); + const Float unbiased_multiplier = (n > 1) ? Float(n - 1) : Float(1); + const Float biased_multiplier = Float(n); + const Float multiplier = bias ? biased_multiplier : unbiased_multiplier; const Float* cov_ptr = cov.get_data(); @@ -259,12 +264,11 @@ inline sycl::event prepare_correlation_from_covariance(sycl::queue& q, cgh.depends_on(deps); cgh.parallel_for(range, [=](sycl::id<1> idx) { - Float c = cov_ptr[idx * p + idx] / inv_n1; - const Float v = c; + Float c = cov_ptr[idx * p + idx] * multiplier; // If $Var[x_i] > 0$ is close to zero, add $\varepsilon$ // to avoid NaN/Inf in the resulting correlation matrix - tmp_ptr[idx] = v + eps * Float(v < eps); + tmp_ptr[idx] = c + eps * Float(c < eps); }); }); } @@ -275,6 +279,7 @@ inline sycl::event finalize_correlation_from_covariance(sycl::queue& q, const ndview& cov, const ndview& tmp, ndview& corr, + bool bias, const event_vector& deps) { ONEDAL_ASSERT(cov.has_data()); ONEDAL_ASSERT(corr.has_mutable_data()); @@ -288,7 +293,9 @@ inline sycl::event finalize_correlation_from_covariance(sycl::queue& q, const auto n = row_count; const auto p = cov.get_dimension(1); - const Float inv_n1 = (n > Float(1)) ? Float(1.0 / double(n - 1)) : Float(1); + const Float unbiased_multiplier = (n > 1) ? Float(n - 1) : Float(1); + const Float biased_multiplier = Float(n); + const Float multiplier = bias ? biased_multiplier : unbiased_multiplier; const Float* tmp_ptr = tmp.get_data(); Float* corr_ptr = corr.get_mutable_data(); const Float* cov_ptr = cov.get_data(); @@ -301,8 +308,7 @@ inline sycl::event finalize_correlation_from_covariance(sycl::queue& q, const std::int64_t j = idx[1]; const std::int64_t gi = i * p + j; const Float is_diag = Float(i == j); - - const Float c = cov_ptr[gi] / inv_n1 * sycl::rsqrt(tmp_ptr[i] * tmp_ptr[j]); + Float c = cov_ptr[gi] * multiplier * sycl::rsqrt(tmp_ptr[i] * tmp_ptr[j]); corr_ptr[gi] = c * (Float(1.0) - is_diag) + is_diag; }); }); @@ -314,6 +320,7 @@ sycl::event correlation_from_covariance(sycl::queue& q, const ndview& cov, ndview& corr, ndview& tmp, + bool bias, const event_vector& deps) { ONEDAL_ASSERT(cov.has_mutable_data()); ONEDAL_ASSERT(corr.has_mutable_data()); @@ -325,9 +332,9 @@ sycl::event correlation_from_covariance(sycl::queue& q, ONEDAL_ASSERT(is_known_usm(q, cov.get_mutable_data())); ONEDAL_ASSERT(is_known_usm(q, tmp.get_mutable_data())); - auto prepare_event = prepare_correlation_from_covariance(q, row_count, cov, tmp, deps); + auto prepare_event = prepare_correlation_from_covariance(q, row_count, cov, tmp, bias, deps); auto finalize_event = - finalize_correlation_from_covariance(q, row_count, cov, tmp, corr, { prepare_event }); + finalize_correlation_from_covariance(q, row_count, cov, tmp, corr, bias, { prepare_event }); finalize_event.wait_and_throw(); return finalize_event; } @@ -347,6 +354,7 @@ INSTANTIATE_MEANS(double) std::int64_t, \ const ndview&, \ ndview&, \ + bool, \ const event_vector&); INSTANTIATE_COV(float) @@ -358,6 +366,7 @@ INSTANTIATE_COV(double) const ndview&, \ ndview&, \ ndview&, \ + bool, \ const event_vector&); INSTANTIATE_COR_FROM_COV(float) diff --git a/cpp/oneapi/dal/backend/primitives/stat/test/cov_dpc.cpp b/cpp/oneapi/dal/backend/primitives/stat/test/cov_dpc.cpp index a6451a7da9f..66e030dfaa0 100644 --- a/cpp/oneapi/dal/backend/primitives/stat/test/cov_dpc.cpp +++ b/cpp/oneapi/dal/backend/primitives/stat/test/cov_dpc.cpp @@ -233,6 +233,7 @@ TEMPLATE_TEST_M(cov_test, "correlation on diagonal data", "[cor]", float, double // [ 0 0 x ] // [ 0 0 0 ] const auto data = this->generate_diagonal_data(row_count, column_count, diag_element); + const bool bias = false; auto [sums, corr, cov, means, vars, tmp] = this->allocate_arrays(column_count); auto sums_event = sums.fill(this->get_queue(), diag_element); @@ -247,8 +248,13 @@ TEMPLATE_TEST_M(cov_test, "correlation on diagonal data", "[cor]", float, double float_t(0), { gemm_event_cov }); pr::means(this->get_queue(), data.get_dimension(0), sums, means, { gemm_event_corr }); - pr::covariance(this->get_queue(), data.get_dimension(0), sums, cov, { gemm_event_corr }); - pr::variances(this->get_queue(), cov, vars, { gemm_event_corr }); + auto cov_event = pr::covariance(this->get_queue(), + data.get_dimension(0), + sums, + cov, + bias, + { gemm_event_corr }); + pr::variances(this->get_queue(), cov, vars, { cov_event }).wait_and_throw(); correlation(this->get_queue(), data.get_dimension(0), sums, corr, tmp, { gemm_event_corr }) .wait_and_throw(); @@ -287,6 +293,7 @@ TEMPLATE_TEST_M(cov_test, "correlation on one-row table", "[cor]", float) { const float data_ptr[column_count] = { 0.1f, 0.2f, 0.3f }; const auto data_host = ndarray::wrap(data_ptr, { 1, column_count }); const auto data = data_host.to_device(this->get_queue()); + const bool bias = false; auto [sums, corr, cov, means, vars, tmp] = this->allocate_arrays(column_count); @@ -296,8 +303,12 @@ TEMPLATE_TEST_M(cov_test, "correlation on one-row table", "[cor]", float) { auto gemm_event_corr = pr::gemm(this->get_queue(), data.t(), data, corr, float_t(1), float_t(0), {}); - auto cov_event = - pr::covariance(this->get_queue(), data.get_dimension(0), sums, cov, { gemm_event_cov }); + auto cov_event = pr::covariance(this->get_queue(), + data.get_dimension(0), + sums, + cov, + bias, + { gemm_event_cov }); auto var_event = pr::variances(this->get_queue(), cov, vars, { cov_event }); auto corr_event = correlation(this->get_queue(), data.get_dimension(0), sums, corr, tmp, { gemm_event_corr }); @@ -320,6 +331,7 @@ TEMPLATE_TEST_M(cov_test, "correlation on gold data", "[cor]", float, double) { auto [data, sums] = this->get_gold_input(); auto [_, corr, cov, means, vars, tmp] = this->allocate_arrays(data.get_dimension(1)); + const bool bias = false; INFO("run correlation"); auto gemm_event_cov = pr::gemm(this->get_queue(), data.t(), data, cov, float_t(1), float_t(0)); auto gemm_event_corr = pr::gemm(this->get_queue(), @@ -330,7 +342,7 @@ TEMPLATE_TEST_M(cov_test, "correlation on gold data", "[cor]", float, double) { float_t(0), { gemm_event_cov }); pr::means(this->get_queue(), data.get_dimension(0), sums, means, { gemm_event_corr }); - pr::covariance(this->get_queue(), data.get_dimension(0), sums, cov, { gemm_event_corr }); + pr::covariance(this->get_queue(), data.get_dimension(0), sums, cov, bias, { gemm_event_corr }); pr::variances(this->get_queue(), cov, vars, { gemm_event_corr }); correlation(this->get_queue(), data.get_dimension(0), sums, corr, tmp, { gemm_event_corr }) .wait_and_throw(); diff --git a/docs/source/includes/covariance/covariance-introduction.rst b/docs/source/includes/covariance/covariance-introduction.rst index 879b5b4457e..436086c3719 100644 --- a/docs/source/includes/covariance/covariance-introduction.rst +++ b/docs/source/includes/covariance/covariance-introduction.rst @@ -18,11 +18,11 @@ In statistics, covariance and correlation are two of the most fundamental measur The covariance and the correlation represent the joint variability of any two features. The correlation is dimensionless, while the covariance is measured in units obtained by multiplying the units of the two features. Another important distinction is that covariance can be affected by the higher variance of one feature, while -correalation removes the effect of the variances by normalizing the covariance of two features by their square-root of variances. +correlation removes the effect of the variances by normalizing the covariance of two features by their square-root of variances. Their usage is application-dependent. The covariance algorithm computes the following: - Means -- Covariance +- Covariance (sample and estimated by maximum likelihood method) - Correlation .. |c_math| replace:: :ref:`dense ` @@ -31,8 +31,8 @@ Their usage is application-dependent. The covariance algorithm computes the foll .. |c_result| replace:: :ref:`compute_result ` .. |c_op| replace:: :ref:`compute(...) ` -============= =============== ========= ============= =========== -**Operation** **Computational methods** **Programming Interface** -------------- -------------------------- -------------------------- -|c_math| |c_dense| |c_op| |c_input| |c_result| -============= =============== ========= ============= =========== +============= ========================== ======== =========== ============ +**Operation** **Computational methods** **Programming Interface** +------------- -------------------------- --------------------------------- + |c_math| |c_dense| |c_op| |c_input| |c_result| +============= ========================== ======== =========== ============ diff --git a/docs/source/onedal/algorithms/covariance/covariance.rst b/docs/source/onedal/algorithms/covariance/covariance.rst index 145848a16ed..02fa1bd8f68 100644 --- a/docs/source/onedal/algorithms/covariance/covariance.rst +++ b/docs/source/onedal/algorithms/covariance/covariance.rst @@ -38,16 +38,18 @@ the means is a :math:`1 \times p` matrix, the covariance and the correlation mat The means, the covariance, and the correlation are computed with the following formulas: .. list-table:: - :widths: 10 60 + :widths: 20 50 :header-rows: 1 :align: left * - Statistic - Definition * - Means - - :math:`M = (m(1), \ldots , m(p))`, where :math:`m\left(j\right)=\frac{1}{n}\sum _{i}{x}_{ij}` - * - Covariance matrix - - :math:`Cov = (v_{ij})`, where :math:`v_{ij}=\frac{1}{n-1}\sum_{k=1}^{n}(x_{ki}-m(i))(x_{kj}-m(j))`, :math:`i=\overline{1,p}`, :math:`j=\overline{1,p}` + - :math:`M = (m_{1}, \ldots , m_{p})`, where :math:`m_{j}=\frac{1}{n}\sum _{i}{x}_{ij}` + * - Covariance matrix (sample) + - :math:`Cov = (v_{ij})`, where :math:`v_{ij}=\frac{1}{n-1}\sum_{k=1}^{n}(x_{ki}-m_{i})(x_{kj}-m{j})`, :math:`i=\overline{1,p}`, :math:`j=\overline{1,p}` + * - Covariance matrix (maximum likelihood) + - :math:`Cov' = (v'_{ij})`, where :math:`v'_{ij}=\frac{1}{n}\sum_{k=1}^{n}(x_{ki}-m_{i})(x_{kj}-m{j})`, :math:`i=\overline{1,p}`, :math:`j=\overline{1,p}` * - Correlation matrix - :math:`Cor = (c_{ij})`, where :math:`c_{ij}=\frac{v_{ij}}{\sqrt{v_{ii}\cdot v_{jj}}}`, :math:`i=\overline{1,p}`, :math:`j=\overline{1,p}` @@ -68,4 +70,4 @@ Refer to :ref:`API Reference: Covariance `. Distributed mode ---------------- -The algorithm supports distributed execution in SMPD mode (only on GPU). +The algorithm supports distributed execution in SPMD mode (only on GPU). diff --git a/examples/oneapi/cpp/source/covariance/cor_dense_batch.cpp b/examples/oneapi/cpp/source/covariance/cor_dense_batch.cpp index 4ab81e6c712..392e8da035c 100644 --- a/examples/oneapi/cpp/source/covariance/cor_dense_batch.cpp +++ b/examples/oneapi/cpp/source/covariance/cor_dense_batch.cpp @@ -31,7 +31,7 @@ int main(int argc, char const *argv[]) { const auto result = dal::compute(cov_desc, input); std::cout << "Means:\n" << result.get_means() << std::endl; - std::cout << "Cor:\n" << result.get_cor_matrix() << std::endl; + std::cout << "Correlation:\n" << result.get_cor_matrix() << std::endl; return 0; } diff --git a/examples/oneapi/cpp/source/covariance/cor_dense_online.cpp b/examples/oneapi/cpp/source/covariance/cor_dense_online.cpp index 25ac47af3a8..c94751b88b2 100644 --- a/examples/oneapi/cpp/source/covariance/cor_dense_online.cpp +++ b/examples/oneapi/cpp/source/covariance/cor_dense_online.cpp @@ -37,5 +37,5 @@ int main(int argc, char const *argv[]) { auto result = dal::finalize_compute(cov_desc, partial_result); std::cout << "Means:\n" << result.get_means() << std::endl; - std::cout << "Cor:\n" << result.get_cor_matrix() << std::endl; + std::cout << "Correlation:\n" << result.get_cor_matrix() << std::endl; } diff --git a/examples/oneapi/cpp/source/covariance/cov_dense_batch.cpp b/examples/oneapi/cpp/source/covariance/cov_dense_batch.cpp index 4a3c21ecc4e..b7d8e114caf 100644 --- a/examples/oneapi/cpp/source/covariance/cov_dense_batch.cpp +++ b/examples/oneapi/cpp/source/covariance/cov_dense_batch.cpp @@ -30,7 +30,7 @@ int main(int argc, char const *argv[]) { auto result = dal::compute(cov_desc, input); - std::cout << "Cov:\n" << result.get_cov_matrix() << std::endl; + std::cout << "Sample covariance:\n" << result.get_cov_matrix() << std::endl; return 0; } diff --git a/examples/oneapi/cpp/source/covariance/cov_dense_biased_batch.cpp b/examples/oneapi/cpp/source/covariance/cov_dense_biased_batch.cpp new file mode 100644 index 00000000000..dbf73fd33a2 --- /dev/null +++ b/examples/oneapi/cpp/source/covariance/cov_dense_biased_batch.cpp @@ -0,0 +1,38 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/covariance.hpp" +#include "oneapi/dal/io/csv.hpp" + +#include "example_util/utils.hpp" + +namespace dal = oneapi::dal; + +int main(int argc, char const *argv[]) { + const auto input_file_name = get_data_path("covcormoments_dense.csv"); + + const auto input = dal::read(dal::csv::data_source{ input_file_name }); + auto cov_desc = dal::covariance::descriptor{} + .set_result_options(dal::covariance::result_options::cov_matrix) + .set_bias(true); + + auto result = dal::compute(cov_desc, input); + + std::cout << "Maximum likelihood covariance estimation:\n" + << result.get_cov_matrix() << std::endl; + + return 0; +} diff --git a/examples/oneapi/cpp/source/covariance/cov_dense_biased_online.cpp b/examples/oneapi/cpp/source/covariance/cov_dense_biased_online.cpp new file mode 100644 index 00000000000..99d5db2969c --- /dev/null +++ b/examples/oneapi/cpp/source/covariance/cov_dense_biased_online.cpp @@ -0,0 +1,43 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include "oneapi/dal/algo/covariance.hpp" +#include "oneapi/dal/io/csv.hpp" + +#include "example_util/utils.hpp" + +namespace dal = oneapi::dal; +int main(int argc, char const *argv[]) { + const auto input_file_name = get_data_path("covcormoments_dense.csv"); + const std::int64_t nBlocks = 10; + + const auto input = dal::read(dal::csv::data_source{ input_file_name }); + const auto cov_desc = dal::covariance::descriptor{} + .set_result_options(dal::covariance::result_options::cov_matrix | + dal::covariance::result_options::means) + .set_bias(true); + + dal::covariance::partial_compute_result<> partial_result; + + auto input_table = split_table_by_rows(input, nBlocks); + for (std::int64_t i = 0; i < nBlocks; i++) { + partial_result = dal::partial_compute(cov_desc, partial_result, input_table[i]); + } + auto result = dal::finalize_compute(cov_desc, partial_result); + + std::cout << "Maximum likelihood covariance estimation:\n" + << result.get_cov_matrix() << std::endl; +} diff --git a/examples/oneapi/cpp/source/covariance/cov_dense_online.cpp b/examples/oneapi/cpp/source/covariance/cov_dense_online.cpp index c9b0eafd19f..d9f8c803cac 100644 --- a/examples/oneapi/cpp/source/covariance/cov_dense_online.cpp +++ b/examples/oneapi/cpp/source/covariance/cov_dense_online.cpp @@ -36,5 +36,5 @@ int main(int argc, char const *argv[]) { } auto result = dal::finalize_compute(cov_desc, partial_result); - std::cout << "Cov:\n" << result.get_cov_matrix() << std::endl; + std::cout << "Sample covariance:\n" << result.get_cov_matrix() << std::endl; } diff --git a/examples/oneapi/dpc/source/covariance/cor_dense_batch.cpp b/examples/oneapi/dpc/source/covariance/cor_dense_batch.cpp index f2cb5c1f16d..1f8eaf81cca 100644 --- a/examples/oneapi/dpc/source/covariance/cor_dense_batch.cpp +++ b/examples/oneapi/dpc/source/covariance/cor_dense_batch.cpp @@ -37,7 +37,7 @@ void run(sycl::queue &q) { const auto result = dal::compute(q, cov_desc, input); std::cout << "Means:\n" << result.get_means() << std::endl; - std::cout << "Cor:\n" << result.get_cor_matrix() << std::endl; + std::cout << "Correlation:\n" << result.get_cor_matrix() << std::endl; } int main(int argc, char const *argv[]) { diff --git a/examples/oneapi/dpc/source/covariance/cor_dense_online.cpp b/examples/oneapi/dpc/source/covariance/cor_dense_online.cpp index 264d213f955..e8cb80400d4 100644 --- a/examples/oneapi/dpc/source/covariance/cor_dense_online.cpp +++ b/examples/oneapi/dpc/source/covariance/cor_dense_online.cpp @@ -43,7 +43,7 @@ void run(sycl::queue &q) { auto result = dal::finalize_compute(q, cov_desc, partial_result); std::cout << "Means:\n" << result.get_means() << std::endl; - std::cout << "Cor:\n" << result.get_cor_matrix() << std::endl; + std::cout << "Correlation:\n" << result.get_cor_matrix() << std::endl; } int main(int argc, char const *argv[]) { diff --git a/examples/oneapi/dpc/source/covariance/cov_dense_batch.cpp b/examples/oneapi/dpc/source/covariance/cov_dense_batch.cpp index e6a484c7c9d..8502f4fa91c 100644 --- a/examples/oneapi/dpc/source/covariance/cov_dense_batch.cpp +++ b/examples/oneapi/dpc/source/covariance/cov_dense_batch.cpp @@ -36,7 +36,7 @@ void run(sycl::queue &q) { auto result = dal::compute(q, cov_desc, input); - std::cout << "Cov:\n" << result.get_cov_matrix() << std::endl; + std::cout << "Sample covariance:\n" << result.get_cov_matrix() << std::endl; } int main(int argc, char const *argv[]) { diff --git a/examples/oneapi/dpc/source/covariance/cov_dense_biased_batch.cpp b/examples/oneapi/dpc/source/covariance/cov_dense_biased_batch.cpp new file mode 100644 index 00000000000..93d6c71e78b --- /dev/null +++ b/examples/oneapi/dpc/source/covariance/cov_dense_biased_batch.cpp @@ -0,0 +1,53 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include + +#ifndef ONEDAL_DATA_PARALLEL +#define ONEDAL_DATA_PARALLEL +#endif + +#include "oneapi/dal/algo/covariance.hpp" +#include "oneapi/dal/io/csv.hpp" + +#include "example_util/utils.hpp" + +namespace dal = oneapi::dal; + +void run(sycl::queue &q) { + const auto input_file_name = get_data_path("covcormoments_dense.csv"); + + const auto input = dal::read(q, dal::csv::data_source{ input_file_name }); + auto cov_desc = dal::covariance::descriptor{} + .set_result_options(dal::covariance::result_options::cov_matrix) + .set_bias(true); + + auto result = dal::compute(q, cov_desc, input); + + std::cout << "Maximum likelihood covariance estimation:\n" + << result.get_cov_matrix() << std::endl; +} + +int main(int argc, char const *argv[]) { + for (auto d : list_devices()) { + std::cout << "Running on " << d.get_platform().get_info() + << ", " << d.get_info() << "\n" + << std::endl; + auto q = sycl::queue{ d }; + run(q); + } + return 0; +} diff --git a/examples/oneapi/dpc/source/covariance/cov_dense_biased_online.cpp b/examples/oneapi/dpc/source/covariance/cov_dense_biased_online.cpp new file mode 100644 index 00000000000..bf175a87775 --- /dev/null +++ b/examples/oneapi/dpc/source/covariance/cov_dense_biased_online.cpp @@ -0,0 +1,61 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include + +#ifndef ONEDAL_DATA_PARALLEL +#define ONEDAL_DATA_PARALLEL +#endif + +#include "oneapi/dal/algo/covariance.hpp" +#include "oneapi/dal/io/csv.hpp" + +#include "example_util/utils.hpp" + +namespace dal = oneapi::dal; + +void run(sycl::queue &q) { + const auto input_file_name = get_data_path("covcormoments_dense.csv"); + const std::int64_t nBlocks = 10; + + const auto input = dal::read(q, dal::csv::data_source{ input_file_name }); + const auto cov_desc = dal::covariance::descriptor{} + .set_result_options(dal::covariance::result_options::cov_matrix | + dal::covariance::result_options::means) + .set_bias(true); + + dal::covariance::partial_compute_result<> partial_result; + + auto input_table = split_table_by_rows(input, nBlocks); + for (std::int64_t i = 0; i < nBlocks; i++) { + partial_result = dal::partial_compute(q, cov_desc, partial_result, input_table[i]); + } + auto result = dal::finalize_compute(q, cov_desc, partial_result); + + std::cout << "Maximum likelihood covariance estimation:\n" + << result.get_cov_matrix() << std::endl; +} + +int main(int argc, char const *argv[]) { + for (auto d : list_devices()) { + std::cout << "Running on " << d.get_platform().get_info() + << ", " << d.get_info() << "\n" + << std::endl; + auto q = sycl::queue{ d }; + run(q); + } + return 0; +} diff --git a/examples/oneapi/dpc/source/covariance/cov_dense_online.cpp b/examples/oneapi/dpc/source/covariance/cov_dense_online.cpp index 62e22eab1a0..c6f124c2ac3 100644 --- a/examples/oneapi/dpc/source/covariance/cov_dense_online.cpp +++ b/examples/oneapi/dpc/source/covariance/cov_dense_online.cpp @@ -43,7 +43,7 @@ void run(sycl::queue &q) { } auto result = dal::finalize_compute(q, cov_desc, partial_result); - std::cout << "Cov:\n" << result.get_cov_matrix() << std::endl; + std::cout << "Sample covariance:\n" << result.get_cov_matrix() << std::endl; } int main(int argc, char const *argv[]) { diff --git a/samples/oneapi/dpc/ccl/sources/cov_biased_distr_ccl.cpp b/samples/oneapi/dpc/ccl/sources/cov_biased_distr_ccl.cpp new file mode 100644 index 00000000000..d78b89cbef2 --- /dev/null +++ b/samples/oneapi/dpc/ccl/sources/cov_biased_distr_ccl.cpp @@ -0,0 +1,74 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include +#include +#include +#include + +#ifndef ONEDAL_DATA_PARALLEL +#define ONEDAL_DATA_PARALLEL +#endif + +#include "oneapi/dal/algo/covariance.hpp" +#include "oneapi/dal/spmd/ccl/communicator.hpp" +#include "oneapi/dal/io/csv.hpp" + +#include "utils.hpp" + +namespace dal = oneapi::dal; + +void run(sycl::queue& queue) { + const auto data_file_name = get_data_path("data/covcormoments_dense.csv"); + + const auto data = dal::read(queue, dal::csv::data_source{ data_file_name }); + + const auto cov_desc = dal::covariance::descriptor{} + .set_result_options(dal::covariance::result_options::cov_matrix) + .set_bias(true); + + auto comm = dal::preview::spmd::make_communicator(queue); + auto rank_id = comm.get_rank(); + auto rank_count = comm.get_rank_count(); + + auto input_vec = split_table_by_rows(queue, data, rank_count); + + const auto result = dal::preview::compute(comm, cov_desc, input_vec[rank_id]); + if (comm.get_rank() == 0) { + std::cout << "Maximum likelihood covariance estimation:\n" + << result.get_cov_matrix() << std::endl; + } +} + +int main(int argc, char const* argv[]) { + ccl::init(); + int status = MPI_Init(nullptr, nullptr); + if (status != MPI_SUCCESS) { + throw std::runtime_error{ "Problem occurred during MPI init" }; + } + + auto device = sycl::device(sycl::gpu_selector_v); + std::cout << "Running on " << device.get_platform().get_info() + << ", " << device.get_info() << std::endl; + sycl::queue q{ device }; + run(q); + + status = MPI_Finalize(); + if (status != MPI_SUCCESS) { + throw std::runtime_error{ "Problem occurred during MPI finalize" }; + } + return 0; +} diff --git a/samples/oneapi/dpc/ccl/sources/cov_distr_ccl.cpp b/samples/oneapi/dpc/ccl/sources/cov_distr_ccl.cpp index 0263ae61160..227aa473fb0 100644 --- a/samples/oneapi/dpc/ccl/sources/cov_distr_ccl.cpp +++ b/samples/oneapi/dpc/ccl/sources/cov_distr_ccl.cpp @@ -47,7 +47,7 @@ void run(sycl::queue& queue) { const auto result = dal::preview::compute(comm, cov_desc, input_vec[rank_id]); if (comm.get_rank() == 0) { - std::cout << "Covariance:\n" << result.get_cov_matrix() << std::endl; + std::cout << "Sample covariance:\n" << result.get_cov_matrix() << std::endl; } } diff --git a/samples/oneapi/dpc/mpi/sources/cov_biased_distr_mpi.cpp b/samples/oneapi/dpc/mpi/sources/cov_biased_distr_mpi.cpp new file mode 100644 index 00000000000..52b67e22833 --- /dev/null +++ b/samples/oneapi/dpc/mpi/sources/cov_biased_distr_mpi.cpp @@ -0,0 +1,73 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include +#include +#include +#include + +#ifndef ONEDAL_DATA_PARALLEL +#define ONEDAL_DATA_PARALLEL +#endif + +#include "oneapi/dal/algo/covariance.hpp" +#include "oneapi/dal/spmd/mpi/communicator.hpp" +#include "oneapi/dal/io/csv.hpp" + +#include "utils.hpp" + +namespace dal = oneapi::dal; + +void run(sycl::queue& queue) { + const auto data_file_name = get_data_path("data/covcormoments_dense.csv"); + + const auto data = dal::read(queue, dal::csv::data_source{ data_file_name }); + + const auto cov_desc = dal::covariance::descriptor{} + .set_result_options(dal::covariance::result_options::cov_matrix) + .set_bias(true); + + auto comm = dal::preview::spmd::make_communicator(queue); + auto rank_id = comm.get_rank(); + auto rank_count = comm.get_rank_count(); + + auto input_vec = split_table_by_rows(queue, data, rank_count); + + const auto result = dal::preview::compute(comm, cov_desc, input_vec[rank_id]); + if (comm.get_rank() == 0) { + std::cout << "Maximum likelihood covariance estimation:\n" + << result.get_cov_matrix() << std::endl; + } +} + +int main(int argc, char const* argv[]) { + int status = MPI_Init(nullptr, nullptr); + if (status != MPI_SUCCESS) { + throw std::runtime_error{ "Problem occurred during MPI init" }; + } + + auto device = sycl::device(sycl::gpu_selector_v); + std::cout << "Running on " << device.get_platform().get_info() + << ", " << device.get_info() << std::endl; + sycl::queue q{ device }; + run(q); + + status = MPI_Finalize(); + if (status != MPI_SUCCESS) { + throw std::runtime_error{ "Problem occurred during MPI finalize" }; + } + return 0; +} diff --git a/samples/oneapi/dpc/mpi/sources/cov_distr_mpi.cpp b/samples/oneapi/dpc/mpi/sources/cov_distr_mpi.cpp index 23a1c636ecd..2b8eb136ea0 100644 --- a/samples/oneapi/dpc/mpi/sources/cov_distr_mpi.cpp +++ b/samples/oneapi/dpc/mpi/sources/cov_distr_mpi.cpp @@ -47,7 +47,7 @@ void run(sycl::queue& queue) { const auto result = dal::preview::compute(comm, cov_desc, input_vec[rank_id]); if (comm.get_rank() == 0) { - std::cout << "Covariance:\n" << result.get_cov_matrix() << std::endl; + std::cout << "Sample covariance:\n" << result.get_cov_matrix() << std::endl; } }