Skip to content

Commit

Permalink
fix #1204: c2st with constant features; refactoring.
Browse files Browse the repository at this point in the history
  • Loading branch information
janfb committed Jul 26, 2024
1 parent 83e122a commit 644e0d1
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 121 deletions.
171 changes: 54 additions & 117 deletions sbi/utils/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# under the Apache License Version 2.0, see <https://www.apache.org/licenses/>

from logging import warning
from typing import Any, Dict, Optional, Union
from typing import Any, Callable, Dict, Optional, Union

import numpy as np
import torch
Expand All @@ -18,55 +18,72 @@ def c2st(
seed: int = 1,
n_folds: int = 5,
metric: str = "accuracy",
classifier: str = "rf",
classifier: Union[str, Callable] = "rf",
classifier_kwargs: Optional[Dict[str, Any]] = None,
z_score: bool = True,
noise_scale: Optional[float] = None,
verbosity: int = 0,
) -> Tensor:
"""
Return accuracy of classifier trained to distinguish samples from supposedly two
distributions <X> and <Y>. For details on the method, see [1,2]. If the returned
accuracy is 0.5, <X> and <Y> are considered to be from the same generating PDF, i.e.
they can not be differentiated. If the returned accuracy is around 1., <X> and <Y>
are considered to be from two different generating PDFs.
Training of the classifier with N-fold cross-validation [3] using sklearn. By
default, a `RandomForestClassifier` by from `sklearn.ensemble` is used (<classifier>
= 'rf'). Alternatively, a multi-layer perceptron is available (<classifier> =
'mlp'). For a small study on the pros and cons for this choice see [4]. Before both
samples are ingested, they are normalized (z scored) under the assumption that each
dimension in X follows a normal distribution, i.e. the mean(X) is subtracted from X
and this difference is divided by std(X) for every dimension.
If you need a more flexible interface which is able to take a sklearn compatible
classifier and more, see the `c2st_` method in this module.
Return classifier based two-sample test accuracy between X and Y.
For details on the method, see [1,2]. If the returned accuracy is 0.5, <X>
and <Y> are considered to be from the same generating PDF, i.e. they can not
be differentiated. If the returned accuracy is around 1., <X> and <Y> are
considered to be from two different generating PDFs.
Training of the classifier with N-fold cross-validation [3] using sklearn.
By default, a `RandomForestClassifier` by from `sklearn.ensemble` is used
(<classifier> = 'rf'). Alternatively, a multi-layer perceptron is available
(<classifier> = 'mlp'). For a small study on the pros and cons for this
choice see [4]. Before both samples are ingested, they are normalized (z
scored) under the assumption that each dimension in X follows a normal
distribution, i.e. the mean(X) is subtracted from X and this difference is
divided by std(X) for every dimension.
If you need a more flexible interface which is able to take a sklearn
compatible classifier and more, see the `c2st_` method in this module.
Args:
X: Samples from one distribution. Y: Samples from another distribution. seed:
Seed for the sklearn classifier and the KFold cross-validation n_folds: Number
of folds to use metric: sklearn compliant metric to use for the scoring
parameter of cross_val_score classifier: classification architecture to use,
possible values: 'rf' or 'mlp'
X: Samples from one distribution.
Y: Samples from another distribution.
seed: Seed for the sklearn classifier and the KFold cross-validation
n_folds: Number of folds to use
metric: sklearn compliant metric to use for the scoring parameter of
cross_val_score
classifier: classification architecture to use. Defaults to "rf" for a
RandomForestClassifier. Should be a sklearn classifier, or a
Callable that behaves like one.
z_score: Z-scoring using X, i.e. mean and std deviation of X is
used to normalize X and Y, i.e. Y=(Y - mean)/std
noise_scale: If passed, will add Gaussian noise with standard deviation
<noise_scale> to samples of X and of Y
verbosity: control the verbosity of sklearn.model_selection.cross_val_score
Return:
torch.tensor containing the mean accuracy score over the test sets from
cross-validation
Example: ``` py > c2st(X,Y) [0.51904464] #X and Y likely come from the same PDF or
ensemble > c2st(P,Q) [0.998456] #P and Q likely come from two different PDFs or
ensembles ```
Example: ``` py > c2st(X,Y) [0.51904464] #X and Y likely come from the same
PDF or ensemble > c2st(P,Q) [0.998456] #P and Q likely come from two
different PDFs or ensembles ```
References:
[1]: http://arxiv.org/abs/1610.06545 [2]: https://www.osti.gov/biblio/826696/
[3]: https://scikit-learn.org/stable/modules/cross_validation.html [4]:
[1]: http://arxiv.org/abs/1610.06545 [2]:
https://www.osti.gov/biblio/826696/ [3]:
https://scikit-learn.org/stable/modules/cross_validation.html [4]:
https://github.com/psteinb/c2st/
"""

# the default configuration
clf_class = RandomForestClassifier
clf_kwargs = {}

if "mlp" in classifier.lower():
if classifier == "rf":
clf_class = RandomForestClassifier
clf_kwargs = classifier_kwargs or {} # use sklearn defaults
elif classifier == "mlp":
ndim = X.shape[-1]
clf_class = MLPClassifier
clf_kwargs = {
# set defaults for the MLP
clf_kwargs = classifier_kwargs or {
"activation": "relu",
"hidden_layer_sizes": (10 * ndim, 10 * ndim),
"max_iter": 1000,
Expand All @@ -75,99 +92,19 @@ def c2st(
"n_iter_no_change": 50,
}

noise_scale = None
z_score = True
verbosity = 0

scores_ = c2st_scores(
X,
Y,
seed=seed,
n_folds=n_folds,
metric=metric,
z_score=z_score,
noise_scale=noise_scale,
verbosity=verbosity,
clf_class=clf_class,
clf_kwargs=clf_kwargs,
)

# TODO: unclear why np.asarray needs to be used here
scores = np.asarray(np.mean(scores_)).astype(np.float32)
value = torch.from_numpy(np.atleast_1d(scores))
return value


def c2st_scores(
X: Tensor,
Y: Tensor,
seed: int = 1,
n_folds: int = 5,
metric: str = "accuracy",
z_score: bool = True,
noise_scale: Optional[float] = None,
verbosity: int = 0,
clf_class: Any = RandomForestClassifier,
clf_kwargs: Optional[Dict[str, Any]] = None,
) -> Tensor:
"""
Return accuracy of classifier trained to distinguish samples from supposedly two
distributions <X> and <Y>. For details on the method, see [1,2]. If the returned
accuracy is 0.5, <X> and <Y> are considered to be from the same generating PDF, i.e.
they can not be differentiated. If the returned accuracy is around 1., <X> and <Y>
are considered to be from two different generating PDFs.
This function performs training of the classifier with N-fold cross-validation [3]
using sklearn. By default, a `RandomForestClassifier` by from `sklearn.ensemble` is
used which is recommended based on the study performed in [4]. This can be changed
using <clf_class>. This class is used in the following fashion:
``` py clf = clf_class(random_state=seed, **clf_kwargs) #... scores =
cross_val_score(
clf, data, target, cv=shuffle, scoring=scoring, verbose=verbosity
)
```
Further configuration of the classifier can be performed using <clf_kwargs>. If you
like to provide a custom class for training, it has to satisfy the internal
requirements of `sklearn.model_selection.cross_val_score`.
Args:
X: Samples from one distribution. Y: Samples from another distribution. seed:
Seed for the sklearn classifier and the KFold cross validation n_folds: Number
of folds to use for cross validation metric: sklearn compliant metric to use for
the scoring parameter of cross_val_score z_score: Z-scoring using X, i.e. mean
and std deviation of X is used to normalize Y, i.e. Y=(Y - mean)/std
noise_scale: If passed, will add Gaussian noise with standard deviation
<noise_scale> to samples of X and of Y verbosity: control the verbosity of
sklearn.model_selection.cross_val_score clf_class: a scikit-learn classifier
class clf_kwargs: key-value arguments dictionary to the class specified by
clf_class, e.g. sklearn.ensemble.RandomForestClassifier
Return:
np.ndarray containing the calculated <metric> scores over the test set folds
from cross-validation
Example: ``` py > c2st_scores(X,Y)
[0.51904464,0.5309201,0.4959452,0.5487709,0.50682926] #X and Y likely come from the
same PDF or ensemble > c2st_scores(P,Q)
[0.998456,0.9982912,0.9980476,0.9980488,0.99805826] #P and Q likely come from two
different PDFs or ensembles ```
References:
[1]: http://arxiv.org/abs/1610.06545 [2]: https://www.osti.gov/biblio/826696/
[3]: https://scikit-learn.org/stable/modules/cross_validation.html [4]:
https://github.com/psteinb/c2st/
"""
if z_score:
X_mean = torch.mean(X, dim=0)
X_std = torch.std(X, dim=0)
# Set std to 1 if it is 0
X_std[X_std == 0] = 1
X = (X - X_mean) / X_std
Y = (Y - X_mean) / X_std

if noise_scale is not None:
X += noise_scale * torch.randn(X.shape)
Y += noise_scale * torch.randn(Y.shape)

# Convert to numpy for sklearn
X = X.cpu().numpy()
Y = Y.cpu().numpy()

Expand All @@ -183,7 +120,7 @@ class clf_kwargs: key-value arguments dictionary to the class specified by
clf, data, target, cv=shuffle, scoring=metric, verbose=verbosity
)

return scores
return torch.from_numpy(scores).mean()


def unbiased_mmd_squared(x: Tensor, y: Tensor, scale: Optional[float] = None):
Expand Down
19 changes: 15 additions & 4 deletions tests/metrics_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from sbi.utils.metrics import (
biased_mmd_hypothesis_test,
c2st,
c2st_scores,
posterior_shrinkage,
posterior_zscore,
unbiased_mmd_squared_hypothesis_test,
Expand Down Expand Up @@ -103,7 +102,7 @@ def test_c2st_with_different_distributions_mlp(
"dist_sigma, c2st_lowerbound, c2st_upperbound,",
C2ST_TESTCASECONFIG,
)
def test_c2st_scores(dist_sigma, c2st_lowerbound, c2st_upperbound):
def test_c2st(dist_sigma, c2st_lowerbound, c2st_upperbound):
ndim = 10
nsamples = 1024

Expand All @@ -115,7 +114,7 @@ def test_c2st_scores(dist_sigma, c2st_lowerbound, c2st_upperbound):
X = xnormal.sample((nsamples,))
Y = ynormal.sample((nsamples,))

obs_c2st = c2st_scores(X, Y)
obs_c2st = c2st(X, Y)

assert hasattr(obs_c2st, "mean")
assert c2st_lowerbound < obs_c2st.mean()
Expand All @@ -131,7 +130,7 @@ def test_c2st_scores(dist_sigma, c2st_lowerbound, c2st_upperbound):
"n_iter_no_change": 20,
}

obs2_c2st = c2st_scores(X, Y, clf_class=clf_class, clf_kwargs=clf_kwargs)
obs2_c2st = c2st(X, Y, clf_class=clf_class, clf_kwargs=clf_kwargs)

assert hasattr(obs2_c2st, "mean")
assert c2st_lowerbound < obs2_c2st.mean()
Expand All @@ -140,6 +139,18 @@ def test_c2st_scores(dist_sigma, c2st_lowerbound, c2st_upperbound):
assert np.allclose(obs2_c2st, obs_c2st, atol=0.05)


@pytest.mark.parametrize("dims_constant", (1, 2))
def test_c2st_with_constant_features(dims_constant: int):
num_dim = 2
num_samples = 1024
x = torch.randn(num_samples, num_dim)
y = torch.randn(num_samples, num_dim)
x[:, :dims_constant] = 1.0
y[:, :dims_constant] = 1.0

c2st(x, y)


@pytest.mark.slow
@pytest.mark.parametrize(
"sigma",
Expand Down

0 comments on commit 644e0d1

Please sign in to comment.