Skip to content

Commit

Permalink
Merge pull request #5513 from janezd/pca-show-variance
Browse files Browse the repository at this point in the history
[ENH] PCA: Output variance of components
  • Loading branch information
markotoplak authored Aug 13, 2021
2 parents 8a1bb1b + a2afed5 commit 39d3cb6
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 4 deletions.
22 changes: 18 additions & 4 deletions Orange/widgets/unsupervised/owpca.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,22 +293,36 @@ def commit(self):
self._transformed = self._pca(self.data)
transformed = self._transformed

if self._variance_ratio is not None:
for var, explvar in zip(
transformed.domain.attributes,
self._variance_ratio[:self.ncomponents]):
var.attributes["variance"] = round(explvar, 6)
domain = Domain(
transformed.domain.attributes[:self.ncomponents],
self.data.domain.class_vars,
self.data.domain.metas
)
transformed = transformed.from_table(domain, transformed)

# prevent caching new features by defining compute_value
proposed = [a.name for a in self._pca.orig_domain.attributes]
meta_name = get_unique_names(proposed, 'components')
dom = Domain(
[ContinuousVariable(name, compute_value=lambda _: None)
for name in proposed],
metas=[StringVariable(name=meta_name)])
meta_vars = [StringVariable(name=meta_name)]
metas = numpy.array([['PC{}'.format(i + 1)
for i in range(self.ncomponents)]],
dtype=object).T
if self._variance_ratio is not None:
variance_name = get_unique_names(proposed, "variance")
meta_vars.append(ContinuousVariable(variance_name))
metas = numpy.hstack(
(metas,
self._variance_ratio[:self.ncomponents, None]))

dom = Domain(
[ContinuousVariable(name, compute_value=lambda _: None)
for name in proposed],
metas=meta_vars)
components = Table(dom, self._pca.components_[:self.ncomponents],
metas=metas)
components.name = 'components'
Expand Down
21 changes: 21 additions & 0 deletions Orange/widgets/unsupervised/tests/test_owpca.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,27 @@ def test_unique_domain_components(self):
out = self.get_output(self.widget.Outputs.components)
self.assertEqual(out.domain.metas[0].name, 'components (1)')

def test_variance_attr(self):
self.widget.ncomponents = 2
self.send_signal(self.widget.Inputs.data, self.iris)
self.wait_until_stop_blocking()
self.widget._variance_ratio = np.array([0.5, 0.25, 0.2, 0.05])
self.widget.unconditional_commit()

result = self.get_output(self.widget.Outputs.transformed_data)
pc1, pc2 = result.domain.attributes
self.assertEqual(pc1.attributes["variance"], 0.5)
self.assertEqual(pc2.attributes["variance"], 0.25)

result = self.get_output(self.widget.Outputs.data)
pc1, pc2 = result.domain.metas
self.assertEqual(pc1.attributes["variance"], 0.5)
self.assertEqual(pc2.attributes["variance"], 0.25)

result = self.get_output(self.widget.Outputs.components)
np.testing.assert_almost_equal(result.get_column_view("variance")[0].T,
[0.5, 0.25])

def test_sparse_data(self):
"""Check that PCA returns the same results for both dense and sparse data."""
dense_data, sparse_data = self.iris, self.iris.to_sparse()
Expand Down

0 comments on commit 39d3cb6

Please sign in to comment.