scikit-learn-contrib · adamzenith · Oct 26, 2022 · Oct 26, 2022 · Nov 2, 2022 · Nov 2, 2022
diff --git a/.gitignore b/.gitignore
@@ -19,7 +19,7 @@ doc/datasets/generated/
 doc/generated/
 
 # Distribution / packaging
-
+mapieenv
 .Python
 env/
 build/

diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -24,4 +24,5 @@ Contributors
 * Louis Lacombe <[email protected]>
 * Arnaud Capitaine <[email protected]>
 * Tarik Tazi <[email protected]>
+* Matthias Adamsen <[email protected]>
 To be continued ...
diff --git a/Makefile b/Makefile
@@ -1,7 +1,7 @@
 .PHONY: tests doc build
 
 lint:	
-	flake8 . --exclude=doc
+	flake8 . --exclude=doc,mapieenv
 
 type-check:
 	mypy mapie

diff --git a/mapie/classification.py b/mapie/classification.py
@@ -167,7 +167,13 @@ class MapieClassifier(BaseEstimator, ClassifierMixin):
      [False False  True]]
     """
 
-    valid_methods_ = ["naive", "score", "cumulated_score", "top_k", "raps"]
+    valid_methods_ = ["naive",
+                      "score",
+                      "cumulated_score",
+                      "top_k",
+                      "raps",
+                      "mondrian"
+                      ]
     fit_attributes = [
         "single_estimator_",
         "estimators_",
@@ -1003,7 +1009,11 @@ def fit(
 
         X, y = indexable(X, y)
         y = _check_y(y)
-        assert type_of_target(y) == "multiclass"
+        if type_of_target(y) != "multiclass" and self.method != "mondrian":
+            raise ValueError(
+                "Invalid method. "
+                "Binary classification problems require the mondrian method. "
+            )
         sample_weight, X, y = check_null_weight(sample_weight, X, y)
         y = cast(NDArray, y)
         n_samples = _num_samples(y)
@@ -1077,6 +1087,9 @@ def fit(
             self.conformity_scores_ = np.take_along_axis(
                 1 - y_pred_proba, y.reshape(-1, 1), axis=1
             )
+        elif self.method == "mondrian":
+            self.conformity_scores_ = 1-y_pred_proba
+
         elif self.method in ["cumulated_score", "raps"]:
             self.conformity_scores_, self.cutoff = (
                 self._get_true_label_cumsum_proba(
@@ -1255,11 +1268,20 @@ def predict(
                         self.conformity_scores_regularized,
                         alpha_np
                     )
+                elif self.method == "mondrian":
+
+                    self.quantiles_ = compute_quantiles(
+                        self.conformity_scores_,
+                        alpha_np,
+                        mondrian=True
+                    )
                 else:
+
                     self.quantiles_ = compute_quantiles(
                         self.conformity_scores_,
                         alpha_np
                     )
+
             else:
                 self.quantiles_ = (n + 1) * (1 - alpha_np)
 
@@ -1352,6 +1374,14 @@ def predict(
                 - y_pred_proba_last,
                 -EPSILON
             )
+        elif self.method == "mondrian":
+            self.quantiles_ = np.transpose(self.quantiles_, [1, 0])
+
+            prediction_sets = np.greater_equal(
+                y_pred_proba - (1 - self.quantiles_),
+                -EPSILON
+            )
+
         else:
             raise ValueError(
                 "Invalid method. "

diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py
@@ -22,7 +22,7 @@
 from mapie.metrics import classification_coverage_score
 from mapie.utils import check_alpha
 
-METHODS = ["score", "cumulated_score", "raps"]
+METHODS = ["score", "cumulated_score", "raps", "mondrian"]
 WRONG_METHODS = ["scores", "cumulated", "test", "", 1, 2.5, (1, 2)]
 WRONG_INCLUDE_LABELS = ["randomised", "True", "False", "other", 1, 2.5, (1, 2)]
 Y_PRED_PROBA_WRONG = [
@@ -263,6 +263,17 @@
             agg_scores="mean"
         )
     ),
+    "mondrian": (
+        Params(
+            method="mondrian",
+            cv="prefit",
+            random_state=None
+        ),
+        ParamsPredict(
+            include_last_label="randomized",
+            agg_scores="mean"
+        )
+    ),
 }
 
 COVERAGES = {
@@ -281,7 +292,8 @@
     "naive": 5 / 9,
     "top_k": 1,
     "raps": 1,
-    "raps_randomized": 8/9
+    "raps_randomized": 8/9,
+    "mondrian": 1
 }
 
 X_toy = np.arange(9).reshape(-1, 1)
@@ -464,6 +476,17 @@
         [False, True, True],
         [False, False, True],
     ],
+    "mondrian": [
+        [True, False, False],
+        [True, False, False],
+        [True, True, False],
+        [True, True, True],
+        [True, True, True],
+        [True, True, True],
+        [False, True, True],
+        [False, True, True],
+        [False, False, True],
+    ],
 }
 
 REGULARIZATION_PARAMETERS = [
@@ -865,6 +888,7 @@ def test_toy_dataset_predictions(strategy: str) -> None:
         include_last_label=args_predict["include_last_label"],
         agg_scores=args_predict["agg_scores"]
     )
+
     np.testing.assert_allclose(y_ps[:, :, 0], y_toy_mapie[strategy])
     np.testing.assert_allclose(
         classification_coverage_score(y_toy, y_ps[:, :, 0]),

diff --git a/mapie/utils.py b/mapie/utils.py
@@ -616,7 +616,9 @@ def check_alpha_and_last_axis(vector: NDArray, alpha_np: NDArray):
         return vector, alpha_np
 
 
-def compute_quantiles(vector: NDArray, alpha: NDArray) -> NDArray:
+def compute_quantiles(vector: NDArray,
+                      alpha: NDArray,
+                      mondrian=False) -> NDArray:
     """Compute the desired quantiles of a vector.
 
     Parameters
@@ -634,15 +636,23 @@ def compute_quantiles(vector: NDArray, alpha: NDArray) -> NDArray:
         Quantiles of the vector.
     """
     n = len(vector)
-    if len(vector.shape) <= 2:
+    if len(vector.shape) <= 2 and not mondrian:
+        quantiles_ = np.stack([
+                    np_quantile(
+                        vector,
+                        ((n + 1) * (1 - _alpha)) / n,
+                        method="higher"
+                    ) for _alpha in alpha
+        ])
+    elif len(vector.shape) <= 2 and mondrian:
         quantiles_ = np.stack([
                     np_quantile(
                         vector,
                         ((n + 1) * (1 - _alpha)) / n,
                         method="higher",
+                        axis=0
                     ) for _alpha in alpha
         ])
-
     else:
         check_alpha_and_last_axis(vector, alpha)
         quantiles_ = np.stack(
-Original file line number
+Diff line change
@@ Expand Up / @@ -19,7 +19,7 @@ doc/datasets/generated/ @@
     doc/generated/
     # Distribution / packaging
+    mapieenv
     .Python
     env/
     build/
@@ Expand Down @@