diff --git a/Orange/preprocess/score.py b/Orange/preprocess/score.py index 3d795024b7f..57c3a531a38 100644 --- a/Orange/preprocess/score.py +++ b/Orange/preprocess/score.py @@ -205,9 +205,9 @@ def _entropy(D): def _gini(D): """Gini index of class-distribution matrix""" - P = D / np.sum(D, axis=0) - return sum((np.ones(1 if len(D.shape) == 1 else D.shape[1]) - np.sum(np.square(P), axis=0)) - * 0.5 * np.sum(D, axis=0) / np.sum(D)) + P = np.asarray(D / np.sum(D, axis=0)) + return np.sum((1 - np.sum(P ** 2, axis=0)) * + np.sum(D, axis=0) / np.sum(D)) def _symmetrical_uncertainty(X, Y): @@ -287,8 +287,9 @@ def from_contingency(self, cont, nan_adjustment): class Gini(ClassificationScorer): """ - Gini index is the probability that two randomly chosen instances will have different - classes. See `Wikipedia entry on gini index `_. + Gini impurity is the probability that two randomly chosen instances will have different + classes. See `Wikipedia entry on Gini impurity + `_. """ def from_contingency(self, cont, nan_adjustment): return (_gini(np.sum(cont, axis=1)) - _gini(cont)) * nan_adjustment diff --git a/Orange/tests/test_score_feature.py b/Orange/tests/test_score_feature.py index ccf1199e8b3..8d5aa2902fa 100644 --- a/Orange/tests/test_score_feature.py +++ b/Orange/tests/test_score_feature.py @@ -34,7 +34,7 @@ def test_gain_ratio(self): def test_gini(self): scorer = Gini() - correct = [0.11893, 0.10427, 0.13117, 0.14650, 0.05973] + correct = [0.23786, 0.20855, 0.26235, 0.29300, 0.11946] np.testing.assert_almost_equal([scorer(self.zoo, a) for a in range(5)], correct, decimal=5) diff --git a/Orange/widgets/data/owrank.py b/Orange/widgets/data/owrank.py index 378d2e07ef3..e75b02e9ce0 100644 --- a/Orange/widgets/data/owrank.py +++ b/Orange/widgets/data/owrank.py @@ -41,7 +41,7 @@ def table(shape, fill=None): SCORES = [ score_meta("Information Gain", "Inf. gain", score.InfoGain), score_meta("Gain Ratio", "Gain Ratio", score.GainRatio), - score_meta("Gini Gain", "Gini", score.Gini), + score_meta("Gini Decrease", "Gini", score.Gini), score_meta("ANOVA", "ANOVA", score.ANOVA), score_meta("Chi2", "Chi2", score.Chi2), score_meta("Univariate Linear Regression", "Univar. Lin. Reg.", score.UnivariateLinearRegression),