Merge pull request #82 from scikit-learn-contrib/mapieclassifier_add_…

…classification_coverage_score Classification coverage score
scikit-learn-contrib · Aug 27, 2021 · 2241a66 · 2241a66
2 parents b81c9ac + 208d41b
commit 2241a66
Show file tree

Hide file tree

Showing 10 changed files with 108 additions and 27 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -11,6 +11,7 @@ History
 * Modification of the documentation architecture
 * Split example gallery into separate regression and classification galleries
 * Add classification examples
+* Add method classification_coverage_score in the module metrics.py
 
 0.2.3 (2021-07-09)
 ------------------

diff --git a/doc/api.rst b/doc/api.rst
@@ -29,5 +29,6 @@ Metrics
    :toctree: generated/
    :template: function.rst
 
-   metrics.coverage_score
+   metrics.regression_coverage_score
+   metrics.classification_coverage_score
 
diff --git a/examples/regression/plot_barber2020_simulations.py b/examples/regression/plot_barber2020_simulations.py
@@ -35,7 +35,7 @@
 from matplotlib import pyplot as plt
 
 from mapie.regression import MapieRegressor
-from mapie.metrics import coverage_score
+from mapie.metrics import regression_coverage_score
 
 
 def PIs_vs_dimensions(
@@ -115,7 +115,9 @@ def PIs_vs_dimensions(
                 mapie.fit(X_train, y_train)
                 y_pred, y_pis = mapie.predict(X_test, alpha=alpha)
                 results[strategy][dimension]["coverage"][trial] = (
-                    coverage_score(y_test, y_pis[:, 0, 0], y_pis[:, 1, 0])
+                    regression_coverage_score(
+                        y_test, y_pis[:, 0, 0], y_pis[:, 1, 0]
+                    )
                 )
                 results[strategy][dimension]["width_mean"][trial] = (
                     y_pis[:, 1, 0] - y_pis[:, 0, 0]

diff --git a/examples/regression/plot_nested-cv.py b/examples/regression/plot_nested-cv.py
@@ -53,7 +53,7 @@
 from sklearn.metrics import mean_squared_error
 
 from mapie.regression import MapieRegressor
-from mapie.metrics import coverage_score
+from mapie.metrics import regression_coverage_score
 
 # Load the Boston data
 X_boston, y_boston = load_boston(return_X_y=True)
@@ -102,7 +102,7 @@
     X_test, alpha=alpha
 )
 widths_non_nested = y_pis_non_nested[:, 1, 0] - y_pis_non_nested[:, 0, 0]
-coverage_non_nested = coverage_score(
+coverage_non_nested = regression_coverage_score(
     y_test, y_pis_non_nested[:, 0, 0], y_pis_non_nested[:, 1, 0]
 )
 score_non_nested = mean_squared_error(
@@ -130,7 +130,7 @@
 mapie_nested.fit(X_train, y_train)
 y_pred_nested, y_pis_nested = mapie_nested.predict(X_test, alpha=alpha)
 widths_nested = y_pis_nested[:, 1, 0] - y_pis_nested[:, 0, 0]
-coverage_nested = coverage_score(
+coverage_nested = regression_coverage_score(
     y_test, y_pis_nested[:, 0, 0], y_pis_nested[:, 1, 0]
 )
 score_nested = mean_squared_error(y_test, y_pred_nested, squared=False)

diff --git a/examples/regression/plot_prefit_nn.py b/examples/regression/plot_prefit_nn.py
@@ -18,7 +18,7 @@
 from matplotlib import pyplot as plt
 
 from mapie.regression import MapieRegressor
-from mapie.metrics import coverage_score
+from mapie.metrics import regression_coverage_score
 
 
 def f(x: np.ndarray) -> np.ndarray:
@@ -52,7 +52,7 @@ def f(x: np.ndarray) -> np.ndarray:
 alpha = 0.1
 y_pred, y_pis = mapie.predict(X_test.reshape(-1, 1), alpha=alpha)
 y_pred_low, y_pred_up = y_pis[:, 0, 0], y_pis[:, 1, 0]
-coverage = coverage_score(y_test, y_pred_low, y_pred_up)
+coverage = regression_coverage_score(y_test, y_pred_low, y_pred_up)
 
 # Plot obtained prediction intervals on testing set
 theoretical_semi_width = scipy.stats.norm.ppf(1 - alpha)*sigma

diff --git a/examples/regression/plot_timeseries_example.py b/examples/regression/plot_timeseries_example.py
@@ -35,7 +35,7 @@
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit
 from mapie.regression import MapieRegressor
-from mapie.metrics import coverage_score
+from mapie.metrics import regression_coverage_score
 
 # Load input data and feature engineering
 demand_df = pd.read_csv(
@@ -94,7 +94,7 @@
 )
 mapie.fit(X_train, y_train)
 y_pred, y_pis = mapie.predict(X_test, alpha=alpha)
-coverage = coverage_score(
+coverage = regression_coverage_score(
     y_test, y_pis[:, 0, 0], y_pis[:, 1, 0]
 )
 width = (y_pis[:, 1, 0] - y_pis[:, 0, 0]).mean()

diff --git a/mapie/metrics.py b/mapie/metrics.py
@@ -1,9 +1,9 @@
 from sklearn.utils.validation import column_or_1d
-
+import numpy as np
 from ._typing import ArrayLike
 
 
-def coverage_score(
+def regression_coverage_score(
     y_true: ArrayLike,
     y_pred_low: ArrayLike,
     y_pred_up: ArrayLike,
@@ -30,16 +30,58 @@ def coverage_score(
 
     Examples
     --------
-    >>> from mapie.metrics import coverage_score
+    >>> from mapie.metrics import regression_coverage_score
     >>> import numpy as np
     >>> y_true = np.array([5, 7.5, 9.5, 10.5, 12.5])
     >>> y_pred_low = np.array([4, 6, 9, 8.5, 10.5])
     >>> y_pred_up = np.array([6, 9, 10, 12.5, 12])
-    >>> print(coverage_score(y_true, y_pred_low, y_pred_up))
+    >>> print(regression_coverage_score(y_true, y_pred_low, y_pred_up))
     0.8
     """
     y_true = column_or_1d(y_true)
     y_pred_low = column_or_1d(y_pred_low)
     y_pred_up = column_or_1d(y_pred_up)
     coverage = ((y_pred_low <= y_true) & (y_pred_up >= y_true)).mean()
     return float(coverage)
+
+
+def classification_coverage_score(
+    y_true: ArrayLike,
+    y_pred_set: ArrayLike
+) -> float:
+    """
+    Effective coverage score obtained by the prediction sets.
+
+    The effective coverage is obtained by estimating the fraction
+    of true labels that lie within the prediction sets.
+
+    Parameters
+    ----------
+    y_true : ArrayLike of shape (n_samples,)
+        True labels.
+    y_pred_set : ArrayLike of shape (n_samples, n_class)
+        Prediction sets given by booleans of labels.
+
+    Returns
+    -------
+    float
+        Effective coverage obtained by the prediction sets.
+
+    Examples
+    --------
+    >>> from mapie.metrics import classification_coverage_score
+    >>> import numpy as np
+    >>> y_true = np.array([3, 3, 1, 2, 2])
+    >>> y_pred_set = np.array([
+    ...     [False, False, False,  True],
+    ...     [False, False, False,  True],
+    ...     [False,  True, False, False],
+    ...     [False, False,  True, False],
+    ...     [False,  True, False, False]
+    ... ])
+    >>> print(classification_coverage_score(y_true, y_pred_set))
+    0.8
+    """
+    y_true = column_or_1d(y_true)
+    coverage = y_pred_set[np.arange(len(y_true)), y_true].mean()
+    return float(coverage)
diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py
@@ -18,6 +18,7 @@
 from sklearn.naive_bayes import GaussianNB
 
 from mapie.classification import MapieClassifier
+from mapie.metrics import classification_coverage_score
 
 
 class DumbClassifier:
@@ -423,4 +424,7 @@ def test_toy_dataset_predictions() -> None:
     clf = GaussianNB().fit(X_toy, y_toy)
     mapie = MapieClassifier(estimator=clf, cv="prefit").fit(X_toy, y_toy)
     _, y_pi_mapie = mapie.predict(X_toy, alpha=0.1)
+    np.testing.assert_allclose(
+        classification_coverage_score(y_toy, y_pi_mapie), 7/9
+    )
     np.testing.assert_allclose(y_pi_mapie[:, :, 0], y_toy_mapie)
diff --git a/mapie/tests/test_metrics.py b/mapie/tests/test_metrics.py
@@ -3,7 +3,8 @@
 """
 import pytest
 import numpy as np
-from mapie.metrics import coverage_score
+from mapie.metrics import regression_coverage_score
+from mapie.metrics import classification_coverage_score
 
 
 X_toy = np.array([0, 1, 2, 3, 4]).reshape(-1, 1)
@@ -16,40 +17,68 @@
     [11.5, 10.5, 12.]
 ])
 
+y_true_class = np.array([3, 3, 1, 2, 2])
+y_pred_set = np.array([
+    [False, False, False,  True],
+    [False, False, False,  True],
+    [False,  True, False, False],
+    [False, False,  True, False],
+    [False,  True, False, False]
+])
+
 
 def test_ypredlow_shape() -> None:
     "Test shape of y_pred_low."
     with pytest.raises(ValueError, match=r".*y should be a 1d array*"):
-        coverage_score(y_toy, y_preds[:, :2], y_preds[:, 2])
+        regression_coverage_score(y_toy, y_preds[:, :2], y_preds[:, 2])
 
 
 def test_ypredup_shape() -> None:
-    "Test shape of y_pred_low."
+    "Test shape of y_pred_up."
     with pytest.raises(ValueError, match=r".*y should be a 1d array*"):
-        coverage_score(y_toy, y_preds[:, 1], y_preds[:, 1:])
+        regression_coverage_score(y_toy, y_preds[:, 1], y_preds[:, 1:])
 
 
 def test_same_length() -> None:
     "Test when y_true and y_preds have different lengths."
     with pytest.raises(ValueError, match=r".*could not be broadcast*"):
-        coverage_score(y_toy, y_preds[:-1, 1], y_preds[:-1, 2])
+        regression_coverage_score(y_toy, y_preds[:-1, 1], y_preds[:-1, 2])
 
 
 def test_toydata() -> None:
     "Test coverage_score for toy data"
-    assert coverage_score(y_toy, y_preds[:, 1], y_preds[:, 2]) == 0.8
+    assert regression_coverage_score(
+        y_toy, y_preds[:, 1], y_preds[:, 2]
+    ) == 0.8
 
 
 def test_ytrue_type() -> None:
     "Test that list(y_true) gives right coverage."
-    assert coverage_score(list(y_toy), y_preds[:, 1], y_preds[:, 2]) == 0.8
+    assert regression_coverage_score(
+        list(y_toy), y_preds[:, 1], y_preds[:, 2]
+    ) == 0.8
 
 
 def test_ypredlow_type() -> None:
     "Test that list(y_pred_low) gives right coverage."
-    assert coverage_score(y_toy, list(y_preds[:, 1]), y_preds[:, 2]) == 0.8
+    assert regression_coverage_score(
+        y_toy, list(y_preds[:, 1]), y_preds[:, 2]
+    ) == 0.8
 
 
 def test_ypredup_type() -> None:
     "Test that list(y_pred_up) gives right coverage."
-    assert coverage_score(y_toy, y_preds[:, 1], list(y_preds[:, 2])) == 0.8
+    assert regression_coverage_score(
+        y_toy, y_preds[:, 1], list(y_preds[:, 2])
+    ) == 0.8
+
+
+def test_same_length_y_pred_set__y_true_class() -> None:
+    "Test when y_true_class and y_pred_set have different lengths."
+    with pytest.raises(IndexError, match=r".*index 4 is out of bounds*"):
+        classification_coverage_score(y_true_class, y_pred_set[:-1, :])
+
+
+def test_classification_coverage_score() -> None:
+    "Test coverage_score for y_true_class"
+    assert classification_coverage_score(y_true_class, y_pred_set) == 0.8
diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py
@@ -17,7 +17,7 @@
 from sklearn.utils.validation import check_is_fitted
 
 from mapie.regression import MapieRegressor
-from mapie.metrics import coverage_score
+from mapie.metrics import regression_coverage_score
 
 
 X_toy = np.array([0, 1, 2, 3, 4, 5]).reshape(-1, 1)
@@ -447,7 +447,7 @@ def test_linear_regression_results(strategy: str) -> None:
     _, y_pis = mapie.predict(X_reg, alpha=0.05)
     y_pred_low, y_pred_up = y_pis[:, 0, 0], y_pis[:, 1, 0]
     width_mean = (y_pred_up - y_pred_low).mean()
-    coverage = coverage_score(y_reg, y_pred_low, y_pred_up)
+    coverage = regression_coverage_score(y_reg, y_pred_low, y_pred_up)
     np.testing.assert_allclose(width_mean, WIDTHS[strategy], rtol=1e-2)
     np.testing.assert_allclose(coverage, COVERAGES[strategy], rtol=1e-2)
 
@@ -573,7 +573,7 @@ def test_results_prefit_naive() -> None:
     mapie.fit(X_reg, y_reg)
     _, y_pis = mapie.predict(X_reg, alpha=0.05)
     width_mean = (y_pis[:, 1, 0] - y_pis[:, 0, 0]).mean()
-    coverage = coverage_score(y_reg, y_pis[:, 0, 0], y_pis[:, 1, 0])
+    coverage = regression_coverage_score(y_reg, y_pis[:, 0, 0], y_pis[:, 1, 0])
     np.testing.assert_allclose(width_mean, WIDTHS["naive"], rtol=1e-2)
     np.testing.assert_allclose(coverage, COVERAGES["naive"], rtol=1e-2)
 
@@ -591,6 +591,8 @@ def test_results_prefit() -> None:
     mapie.fit(X_val, y_val)
     _, y_pis = mapie.predict(X_test, alpha=0.05)
     width_mean = (y_pis[:, 1, 0] - y_pis[:, 0, 0]).mean()
-    coverage = coverage_score(y_test, y_pis[:, 0, 0], y_pis[:, 1, 0])
+    coverage = regression_coverage_score(
+        y_test, y_pis[:, 0, 0], y_pis[:, 1, 0]
+    )
     np.testing.assert_allclose(width_mean, WIDTHS["prefit"], rtol=1e-2)
     np.testing.assert_allclose(coverage, COVERAGES["prefit"], rtol=1e-2)