Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] PredictionEntropyScorer output negative scores #63

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 36 additions & 3 deletions skada/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,18 @@ class PredictionEntropyScorer(_BaseDomainAwareScorer):
Whether `scorer` is a score function (default), meaning high is
good, or a loss function, meaning low is good. In the latter case, the
scorer object will sign-flip the outcome of the `scorer`.
reduction: str, default='mean'
Specifies the reduction to apply to the entropy values.
Must be one of ['none', 'mean', 'sum'].
If 'none', the entropy values for each sample are returned ([1]_ method).
If 'mean', the mean of the entropy values is returned.
If 'sum', the sum of the entropy values is returned.

Returns
-------
entropy : float or ndarray of floats
If `reduction` is 'none', then ndarray of shape (n_samples,).
Otherwise float.

References
----------
Expand All @@ -209,9 +221,18 @@ class PredictionEntropyScorer(_BaseDomainAwareScorer):
ICLR, 2018.
"""

def __init__(self, greater_is_better=False):
def __init__(self,
greater_is_better=False,
reduction='mean'):
super().__init__()
self._sign = 1 if greater_is_better else -1
self.reduction = reduction

if self.reduction not in ['none', 'mean', 'sum']:
raise ValueError(
f"Unknown reduction '{self.reduction}'. "
"Valid options are: 'none', 'mean', 'sum'."
)

def _score(self, estimator, X, y, sample_domain=None, **params):
if not hasattr(estimator, "predict_proba"):
Expand All @@ -235,8 +256,20 @@ def _score(self, estimator, X, y, sample_domain=None, **params):
)
else:
log_proba = np.log(proba + 1e-7)
entropy = np.sum(-proba * log_proba, axis=1)
return - np.mean(entropy)

entropy_per_sample = -proba * log_proba

if self.reduction == 'none':
return self._sign * entropy_per_sample
elif self.reduction == 'sum':
return self._sign * np.sum(entropy_per_sample)
elif self.reduction == 'mean':
return self._sign * np.mean(entropy_per_sample)
else:
raise ValueError(
f"Unknown reduction '{self.reduction}'. "
"Valid options are: 'none', 'mean', 'sum'."
)


class SoftNeighborhoodDensity(_BaseDomainAwareScorer):
Expand Down
36 changes: 36 additions & 0 deletions skada/tests/test_scorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,39 @@ def test_scorer_with_log_proba():
)['test_score']
assert scores.shape[0] == 3, "evaluate 3 splits"
assert np.all(~np.isnan(scores)), "all scores are computed"
assert np.all(scores <= 0), "all scores are negative"


def test_prediction_entropy_scorer_reduction(da_dataset):
X, y, sample_domain = da_dataset.pack_train(as_sources=['s'], as_targets=['t'])
estimator = make_da_pipeline(
ReweightDensityAdapter(),
LogisticRegression().set_fit_request(
sample_weight=True
),
)

estimator.fit(X, y, sample_domain=sample_domain)

scorer = PredictionEntropyScorer(reduction='mean')
score_mean = scorer._score(estimator, X, y, sample_domain=sample_domain)
assert isinstance(score_mean, float), "score_mean is not a float"

scorer = PredictionEntropyScorer(reduction='sum')
score_sum = scorer._score(estimator, X, y, sample_domain=sample_domain)
assert isinstance(score_sum, float), "score_sum is not a float"

assert score_mean == pytest.approx(score_sum / X.shape[0], rel=1e-5)

scorer = PredictionEntropyScorer(reduction='none')
score_none = scorer._score(estimator, X, y, sample_domain=sample_domain)
assert isinstance(score_none, np.ndarray), "score_none is not a numpy array"

with pytest.raises(ValueError):
scorer = PredictionEntropyScorer(reduction='WRONG_REDUCTION')

# Really unlikely to happen, but still
with pytest.raises(ValueError):
scorer = PredictionEntropyScorer(reduction='none')
scorer.reduction = 'WRONG_REDUCTION'
scorer._score(estimator, X, y, sample_domain=sample_domain)
Loading