diff --git a/skada/metrics.py b/skada/metrics.py index 6f3288cc..24b5030b 100644 --- a/skada/metrics.py +++ b/skada/metrics.py @@ -201,6 +201,18 @@ class PredictionEntropyScorer(_BaseDomainAwareScorer): Whether `scorer` is a score function (default), meaning high is good, or a loss function, meaning low is good. In the latter case, the scorer object will sign-flip the outcome of the `scorer`. + reduction: str, default='mean' + Specifies the reduction to apply to the entropy values. + Must be one of ['none', 'mean', 'sum']. + If 'none', the entropy values for each sample are returned ([1]_ method). + If 'mean', the mean of the entropy values is returned. + If 'sum', the sum of the entropy values is returned. + + Returns + ------- + entropy : float or ndarray of floats + If `reduction` is 'none', then ndarray of shape (n_samples,). + Otherwise float. References ---------- @@ -209,9 +221,18 @@ class PredictionEntropyScorer(_BaseDomainAwareScorer): ICLR, 2018. """ - def __init__(self, greater_is_better=False): + def __init__(self, + greater_is_better=False, + reduction='mean'): super().__init__() self._sign = 1 if greater_is_better else -1 + self.reduction = reduction + + if self.reduction not in ['none', 'mean', 'sum']: + raise ValueError( + f"Unknown reduction '{self.reduction}'. " + "Valid options are: 'none', 'mean', 'sum'." + ) def _score(self, estimator, X, y, sample_domain=None, **params): if not hasattr(estimator, "predict_proba"): @@ -235,8 +256,20 @@ def _score(self, estimator, X, y, sample_domain=None, **params): ) else: log_proba = np.log(proba + 1e-7) - entropy = np.sum(-proba * log_proba, axis=1) - return - np.mean(entropy) + + entropy_per_sample = -proba * log_proba + + if self.reduction == 'none': + return self._sign * entropy_per_sample + elif self.reduction == 'sum': + return self._sign * np.sum(entropy_per_sample) + elif self.reduction == 'mean': + return self._sign * np.mean(entropy_per_sample) + else: + raise ValueError( + f"Unknown reduction '{self.reduction}'. " + "Valid options are: 'none', 'mean', 'sum'." + ) class SoftNeighborhoodDensity(_BaseDomainAwareScorer): diff --git a/skada/tests/test_scorer.py b/skada/tests/test_scorer.py index 8dceb2aa..f23ef15d 100644 --- a/skada/tests/test_scorer.py +++ b/skada/tests/test_scorer.py @@ -127,3 +127,39 @@ def test_scorer_with_log_proba(): )['test_score'] assert scores.shape[0] == 3, "evaluate 3 splits" assert np.all(~np.isnan(scores)), "all scores are computed" + assert np.all(scores <= 0), "all scores are negative" + + +def test_prediction_entropy_scorer_reduction(da_dataset): + X, y, sample_domain = da_dataset.pack_train(as_sources=['s'], as_targets=['t']) + estimator = make_da_pipeline( + ReweightDensityAdapter(), + LogisticRegression().set_fit_request( + sample_weight=True + ), + ) + + estimator.fit(X, y, sample_domain=sample_domain) + + scorer = PredictionEntropyScorer(reduction='mean') + score_mean = scorer._score(estimator, X, y, sample_domain=sample_domain) + assert isinstance(score_mean, float), "score_mean is not a float" + + scorer = PredictionEntropyScorer(reduction='sum') + score_sum = scorer._score(estimator, X, y, sample_domain=sample_domain) + assert isinstance(score_sum, float), "score_sum is not a float" + + assert score_mean == pytest.approx(score_sum / X.shape[0], rel=1e-5) + + scorer = PredictionEntropyScorer(reduction='none') + score_none = scorer._score(estimator, X, y, sample_domain=sample_domain) + assert isinstance(score_none, np.ndarray), "score_none is not a numpy array" + + with pytest.raises(ValueError): + scorer = PredictionEntropyScorer(reduction='WRONG_REDUCTION') + + # Really unlikely to happen, but still + with pytest.raises(ValueError): + scorer = PredictionEntropyScorer(reduction='none') + scorer.reduction = 'WRONG_REDUCTION' + scorer._score(estimator, X, y, sample_domain=sample_domain)