diff --git a/skada/metrics.py b/skada/metrics.py
index 6f3288cc..24b5030b 100644
--- a/skada/metrics.py
+++ b/skada/metrics.py
@@ -201,6 +201,18 @@ class PredictionEntropyScorer(_BaseDomainAwareScorer):
         Whether `scorer` is a score function (default), meaning high is
         good, or a loss function, meaning low is good. In the latter case, the
         scorer object will sign-flip the outcome of the `scorer`.
+    reduction: str, default='mean'
+        Specifies the reduction to apply to the entropy values.
+        Must be one of ['none', 'mean', 'sum'].
+        If 'none', the entropy values for each sample are returned ([1]_ method).
+        If 'mean', the mean of the entropy values is returned.
+        If 'sum', the sum of the entropy values is returned.
+
+    Returns
+    -------
+    entropy : float or ndarray of floats
+        If `reduction` is 'none', then ndarray of shape (n_samples,).
+        Otherwise float.
 
     References
     ----------
@@ -209,9 +221,18 @@ class PredictionEntropyScorer(_BaseDomainAwareScorer):
             ICLR, 2018.
     """
 
-    def __init__(self, greater_is_better=False):
+    def __init__(self,
+                 greater_is_better=False,
+                 reduction='mean'):
         super().__init__()
         self._sign = 1 if greater_is_better else -1
+        self.reduction = reduction
+
+        if self.reduction not in ['none', 'mean', 'sum']:
+            raise ValueError(
+                f"Unknown reduction '{self.reduction}'. "
+                "Valid options are: 'none', 'mean', 'sum'."
+            )
 
     def _score(self, estimator, X, y, sample_domain=None, **params):
         if not hasattr(estimator, "predict_proba"):
@@ -235,8 +256,20 @@ def _score(self, estimator, X, y, sample_domain=None, **params):
             )
         else:
             log_proba = np.log(proba + 1e-7)
-        entropy = np.sum(-proba * log_proba, axis=1)
-        return - np.mean(entropy)
+
+        entropy_per_sample = -proba * log_proba
+
+        if self.reduction == 'none':
+            return self._sign * entropy_per_sample
+        elif self.reduction == 'sum':
+            return self._sign * np.sum(entropy_per_sample)
+        elif self.reduction == 'mean':
+            return self._sign * np.mean(entropy_per_sample)
+        else:
+            raise ValueError(
+                f"Unknown reduction '{self.reduction}'. "
+                "Valid options are: 'none', 'mean', 'sum'."
+            )
 
 
 class SoftNeighborhoodDensity(_BaseDomainAwareScorer):
diff --git a/skada/tests/test_scorer.py b/skada/tests/test_scorer.py
index 8dceb2aa..f23ef15d 100644
--- a/skada/tests/test_scorer.py
+++ b/skada/tests/test_scorer.py
@@ -127,3 +127,39 @@ def test_scorer_with_log_proba():
     )['test_score']
     assert scores.shape[0] == 3, "evaluate 3 splits"
     assert np.all(~np.isnan(scores)), "all scores are computed"
+    assert np.all(scores <= 0), "all scores are negative"
+
+
+def test_prediction_entropy_scorer_reduction(da_dataset):
+    X, y, sample_domain = da_dataset.pack_train(as_sources=['s'], as_targets=['t'])
+    estimator = make_da_pipeline(
+        ReweightDensityAdapter(),
+        LogisticRegression().set_fit_request(
+            sample_weight=True
+        ),
+    )
+
+    estimator.fit(X, y, sample_domain=sample_domain)
+
+    scorer = PredictionEntropyScorer(reduction='mean')
+    score_mean = scorer._score(estimator, X, y, sample_domain=sample_domain)
+    assert isinstance(score_mean, float), "score_mean is not a float"
+
+    scorer = PredictionEntropyScorer(reduction='sum')
+    score_sum = scorer._score(estimator, X, y, sample_domain=sample_domain)
+    assert isinstance(score_sum, float), "score_sum is not a float"
+
+    assert score_mean == pytest.approx(score_sum / X.shape[0], rel=1e-5)
+
+    scorer = PredictionEntropyScorer(reduction='none')
+    score_none = scorer._score(estimator, X, y, sample_domain=sample_domain)
+    assert isinstance(score_none, np.ndarray), "score_none is not a numpy array"
+
+    with pytest.raises(ValueError):
+        scorer = PredictionEntropyScorer(reduction='WRONG_REDUCTION')
+
+    # Really unlikely to happen, but still
+    with pytest.raises(ValueError):
+        scorer = PredictionEntropyScorer(reduction='none')
+        scorer.reduction = 'WRONG_REDUCTION'
+        scorer._score(estimator, X, y, sample_domain=sample_domain)