Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX] Rank: Fix crash on dataset with missing values #3458

Merged
merged 1 commit into from
Dec 10, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Orange/preprocess/fss.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import Orange
from Orange.util import Reprable
from Orange.preprocess.preprocess import Preprocess
from Orange.preprocess.score import ANOVA, GainRatio, UnivariateLinearRegression

__all__ = ["SelectBestFeatures", "RemoveNaNColumns", "SelectRandomFeatures"]

Expand Down Expand Up @@ -57,6 +56,10 @@ def __call__(self, data):
discr_ratio = (sum(a.is_discrete
for a in data.domain.attributes)
/ len(data.domain.attributes))

from Orange.preprocess.score import ANOVA, GainRatio, \
UnivariateLinearRegression

if data.domain.has_discrete_class:
if discr_ratio >= 0.5:
method = GainRatio()
Expand Down
13 changes: 12 additions & 1 deletion Orange/preprocess/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from Orange.data import Domain, Variable, DiscreteVariable, ContinuousVariable
from Orange.data.filter import HasClass
from Orange.misc.wrapper_meta import WrapperMeta
from Orange.preprocess.fss import RemoveNaNColumns
from Orange.preprocess.preprocess import Discretize, SklImpute
from Orange.preprocess.util import _RefuseDataInConstructor
from Orange.statistics import contingency, distribution
Expand Down Expand Up @@ -66,6 +67,7 @@ def __call__(self, data, feature=None):
f = data.domain[feature]
data = data.transform(Domain([f], data.domain.class_vars))

orig_domain = data.domain
for pp in self.preprocessors:
data = pp(data)

Expand All @@ -76,7 +78,14 @@ def __call__(self, data, feature=None):
.format(self.friendly_name,
self._friendly_vartype_name(type(var))))

return self.score_data(data, feature)
if feature is not None:
return self.score_data(data, feature)

scores = np.full(len(orig_domain.attributes), np.nan)
names = [a.name for a in data.domain.attributes]
mask = np.array([a.name in names for a in orig_domain.attributes])
scores[mask] = self.score_data(data, feature)
return scores

def score_data(self, data, feature):
raise NotImplementedError
Expand Down Expand Up @@ -340,6 +349,7 @@ class ReliefF(Scorer):
class_type = DiscreteVariable
supports_sparse_data = False
friendly_name = "ReliefF"
preprocessors = Scorer.preprocessors + [RemoveNaNColumns()]

def __init__(self, n_iterations=50, k_nearest=10, random_state=None):
self.n_iterations = n_iterations
Expand Down Expand Up @@ -374,6 +384,7 @@ class RReliefF(Scorer):
class_type = ContinuousVariable
supports_sparse_data = False
friendly_name = "RReliefF"
preprocessors = Scorer.preprocessors + [RemoveNaNColumns()]

def __init__(self, n_iterations=50, k_nearest=50, random_state=None):
self.n_iterations = n_iterations
Expand Down
8 changes: 7 additions & 1 deletion Orange/widgets/data/tests/test_owrank.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from Orange.regression import LinearRegressionLearner
from Orange.projection import PCA
from Orange.widgets.data.owrank import OWRank, ProblemType, CLS_SCORES, REG_SCORES
from Orange.widgets.tests.base import WidgetTest
from Orange.widgets.tests.base import WidgetTest, datasets
from Orange.widgets.widget import AttributeList


Expand Down Expand Up @@ -347,3 +347,9 @@ def test_no_attributes(self):
self.assertTrue(self.widget.Error.no_attributes.is_shown())
self.send_signal(self.widget.Inputs.data, data)
self.assertFalse(self.widget.Error.no_attributes.is_shown())

def test_dataset(self):
for method in CLS_SCORES + REG_SCORES:
self._get_checkbox(method.shortname).setChecked(True)
for ds in datasets.datasets():
self.send_signal(self.widget.Inputs.data, ds)