Skip to content

Commit

Permalink
Merge pull request #2355 from kernc/fcbf-dist
Browse files Browse the repository at this point in the history
[FIX] score.FCBF: don't segfault on continuous variables w/ <0 values
  • Loading branch information
janezd authored Jun 9, 2017
2 parents 9d4e8af + 3e235f1 commit 1dfb316
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 12 deletions.
17 changes: 7 additions & 10 deletions Orange/preprocess/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,13 +235,11 @@ def _gini(D):
np.sum(D, axis=0) / np.sum(D))


def _symmetrical_uncertainty(X, Y):
def _symmetrical_uncertainty(data, attr1, attr2):
"""Symmetrical uncertainty, Press et al., 1988."""
from Orange.preprocess._relieff import contingency_table
X, Y = np.around(X), np.around(Y)
cont = contingency_table(X, Y)
cont = np.asarray(contingency.Discrete(data, attr1, attr2), dtype=float)
ig = InfoGain().from_contingency(cont, 1)
return 2 * ig / (_entropy(cont.sum(0)) + _entropy(cont.sum(1)))
return 2 * ig / (_entropy(cont) + _entropy(cont.T))


class FCBF(ClassificationScorer):
Expand All @@ -253,9 +251,10 @@ class FCBF(ClassificationScorer):
2003. http://www.aaai.org/Papers/ICML/2003/ICML03-111.pdf
"""
def score_data(self, data, feature=None):
attributes = data.domain.attributes
S = []
for i, a in enumerate(data.X.T):
S.append((_symmetrical_uncertainty(a, data.Y), i))
for i, attr in enumerate(attributes):
S.append((_symmetrical_uncertainty(data, attr, data.domain.class_var), i))
S.sort()
worst = []

Expand All @@ -267,9 +266,7 @@ def score_data(self, data, feature=None):
while True:
try: SUqc, Fq = S[-q]
except IndexError: break
# TODO: cache
if _symmetrical_uncertainty(data.X.T[Fp],
data.X.T[Fq]) >= SUqc:
if _symmetrical_uncertainty(data, attributes[Fp], attributes[Fq]) >= SUqc:
del S[-q]
worst.append((1e-4*SUqc, Fq))
else:
Expand Down
12 changes: 10 additions & 2 deletions Orange/tests/test_score_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import numpy as np

from Orange.data import Table, Domain, DiscreteVariable
from Orange.data import Table, Domain, DiscreteVariable, ContinuousVariable
from Orange import preprocess
from Orange.preprocess.score import InfoGain, GainRatio, Gini, Chi2, ANOVA,\
UnivariateLinearRegression, ReliefF, FCBF, RReliefF
Expand Down Expand Up @@ -131,5 +131,13 @@ def test_fcbf(self):
scorer = FCBF()
weights = scorer(self.zoo, None)
found = [self.zoo.domain[attr].name for attr in reversed(weights.argsort()[-5:])]
reference = ['legs', 'backbone', 'toothed', 'hair', 'aquatic']
reference = ['legs', 'milk', 'toothed', 'feathers', 'backbone']
self.assertEqual(found, reference)

# GH-1916
data = Table(Domain([ContinuousVariable('1'), ContinuousVariable('2')],
DiscreteVariable('target')),
np.full((2, 2), np.nan),
np.r_[0., 1])
weights = scorer(data, None)
np.testing.assert_equal(weights, np.nan)

0 comments on commit 1dfb316

Please sign in to comment.