From a2afed5c9540bd877710710d0b9b3b393e0ccd20 Mon Sep 17 00:00:00 2001 From: janezd Date: Sat, 3 Jul 2021 22:50:13 +0200 Subject: [PATCH] PCA: Show covered variance in output tables --- Orange/widgets/unsupervised/owpca.py | 22 +++++++++++++++---- .../widgets/unsupervised/tests/test_owpca.py | 21 ++++++++++++++++++ 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/Orange/widgets/unsupervised/owpca.py b/Orange/widgets/unsupervised/owpca.py index d1904b25922..1233aba4194 100644 --- a/Orange/widgets/unsupervised/owpca.py +++ b/Orange/widgets/unsupervised/owpca.py @@ -293,22 +293,36 @@ def commit(self): self._transformed = self._pca(self.data) transformed = self._transformed + if self._variance_ratio is not None: + for var, explvar in zip( + transformed.domain.attributes, + self._variance_ratio[:self.ncomponents]): + var.attributes["variance"] = round(explvar, 6) domain = Domain( transformed.domain.attributes[:self.ncomponents], self.data.domain.class_vars, self.data.domain.metas ) transformed = transformed.from_table(domain, transformed) + # prevent caching new features by defining compute_value proposed = [a.name for a in self._pca.orig_domain.attributes] meta_name = get_unique_names(proposed, 'components') - dom = Domain( - [ContinuousVariable(name, compute_value=lambda _: None) - for name in proposed], - metas=[StringVariable(name=meta_name)]) + meta_vars = [StringVariable(name=meta_name)] metas = numpy.array([['PC{}'.format(i + 1) for i in range(self.ncomponents)]], dtype=object).T + if self._variance_ratio is not None: + variance_name = get_unique_names(proposed, "variance") + meta_vars.append(ContinuousVariable(variance_name)) + metas = numpy.hstack( + (metas, + self._variance_ratio[:self.ncomponents, None])) + + dom = Domain( + [ContinuousVariable(name, compute_value=lambda _: None) + for name in proposed], + metas=meta_vars) components = Table(dom, self._pca.components_[:self.ncomponents], metas=metas) components.name = 'components' diff --git a/Orange/widgets/unsupervised/tests/test_owpca.py b/Orange/widgets/unsupervised/tests/test_owpca.py index 55ced0798d7..9efb58876b1 100644 --- a/Orange/widgets/unsupervised/tests/test_owpca.py +++ b/Orange/widgets/unsupervised/tests/test_owpca.py @@ -95,6 +95,27 @@ def test_unique_domain_components(self): out = self.get_output(self.widget.Outputs.components) self.assertEqual(out.domain.metas[0].name, 'components (1)') + def test_variance_attr(self): + self.widget.ncomponents = 2 + self.send_signal(self.widget.Inputs.data, self.iris) + self.wait_until_stop_blocking() + self.widget._variance_ratio = np.array([0.5, 0.25, 0.2, 0.05]) + self.widget.unconditional_commit() + + result = self.get_output(self.widget.Outputs.transformed_data) + pc1, pc2 = result.domain.attributes + self.assertEqual(pc1.attributes["variance"], 0.5) + self.assertEqual(pc2.attributes["variance"], 0.25) + + result = self.get_output(self.widget.Outputs.data) + pc1, pc2 = result.domain.metas + self.assertEqual(pc1.attributes["variance"], 0.5) + self.assertEqual(pc2.attributes["variance"], 0.25) + + result = self.get_output(self.widget.Outputs.components) + np.testing.assert_almost_equal(result.get_column_view("variance")[0].T, + [0.5, 0.25]) + def test_sparse_data(self): """Check that PCA returns the same results for both dense and sparse data.""" dense_data, sparse_data = self.iris, self.iris.to_sparse()