From 9c7070cf0f682773fdab338aafbfff87885b07bf Mon Sep 17 00:00:00 2001
From: Yanis Lalou <ylalou.g@gmail.com>
Date: Sun, 11 Aug 2024 18:48:12 +0200
Subject: [PATCH 1/4] Add new scorer: MixValScorer

---
 README.md                            |   3 +
 docs/source/all.rst                  |   1 +
 skada/deep/tests/test_deep_scorer.py |   2 +
 skada/metrics.py                     | 124 ++++++++++++++++++++++++++-
 skada/tests/test_scorer.py           |  27 ++++++
 5 files changed, 156 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8a3e8c26..c8087e27 100644
--- a/README.md
+++ b/README.md
@@ -235,3 +235,6 @@ The library is distributed under the 3-Clause BSD license.
 
 [31] Redko, Ievgen, Nicolas Courty, Rémi Flamary, and Devis Tuia.[ "Optimal transport for multi-source domain adaptation under target shift."](https://proceedings.mlr.press/v89/redko19a/redko19a.pdf) In The 22nd International Conference on artificial intelligence and statistics, pp. 849-858. PMLR, 2019.
 
+[32] Hu, D., Liang, J., Liew, J. H., Xue, C., Bai, S., & Wang, X. (2023). [Mixed Samples as Probes for Unsupervised Model Selection in Domain Adaptation](https://proceedings.neurips.cc/paper_files/paper/2023/file/7721f1fea280e9ffae528dc78c732576-Paper-Conference.pdf). Advances in Neural Information Processing Systems 36 (2024).
+
+
diff --git a/docs/source/all.rst b/docs/source/all.rst
index 57b61b1a..e9c05cfe 100644
--- a/docs/source/all.rst
+++ b/docs/source/all.rst
@@ -180,6 +180,7 @@ DA metrics :py:mod:`skada.metrics`
    DeepEmbeddedValidation
    SoftNeighborhoodDensity
    CircularValidation
+   MixValScorer
 
 
 Model Selection :py:mod:`skada.model_selection`
diff --git a/skada/deep/tests/test_deep_scorer.py b/skada/deep/tests/test_deep_scorer.py
index 79019068..7d0d56c6 100644
--- a/skada/deep/tests/test_deep_scorer.py
+++ b/skada/deep/tests/test_deep_scorer.py
@@ -13,6 +13,7 @@
 from skada.metrics import (
     CircularValidation,
     DeepEmbeddedValidation,
+    MixValScorer,
     PredictionEntropyScorer,
     SoftNeighborhoodDensity,
 )
@@ -25,6 +26,7 @@
         PredictionEntropyScorer(),
         SoftNeighborhoodDensity(),
         CircularValidation(),
+        MixValScorer(),
     ],
 )
 def test_generic_scorer_on_deepmodel(scorer, da_dataset):
diff --git a/skada/metrics.py b/skada/metrics.py
index 6625d660..e38414e8 100644
--- a/skada/metrics.py
+++ b/skada/metrics.py
@@ -12,7 +12,7 @@
 import numpy as np
 from sklearn.base import BaseEstimator, clone
 from sklearn.linear_model import LogisticRegression
-from sklearn.metrics import balanced_accuracy_score, check_scoring
+from sklearn.metrics import accuracy_score, balanced_accuracy_score, check_scoring
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KernelDensity
 from sklearn.preprocessing import LabelEncoder, Normalizer
@@ -619,3 +619,125 @@ def _score(self, estimator, X, y, sample_domain=None):
             score = self.source_scorer(y[source_idx], y_pred_source)
 
         return self._sign * score
+
+
+class MixValScorer(_BaseDomainAwareScorer):
+    """
+    MixVal scorer for unsupervised domain adaptation.
+
+    This scorer uses mixup to create mixed samples from the target domain,
+    and evaluates the model's consistency on these mixed samples.
+
+    See [32]_ for details.
+
+    Parameters
+    ----------
+    lmbd : float, default=0.55
+        Mixing parameter for mixup.
+    random_state : int, RandomState instance or None, default=None
+        Controls the randomness of the mixing process.
+    greater_is_better : bool, default=True
+        Whether higher scores are better.
+
+    Attributes
+    ----------
+    lmbd : float
+        Mixing parameter.
+    random_state : RandomState
+        Random number generator.
+    _sign : int
+        1 if greater_is_better is True, -1 otherwise.
+
+    References
+    ----------
+    .. [32] Dapeng Hu et al. Mixed Samples as Probes for Unsupervised Model
+            Selection in Domain Adaptation.
+            NeurIPS, 2023.
+    """
+
+    def __init__(
+        self,
+        lmbd=0.55,
+        random_state=None,
+        greater_is_better=True,
+    ):
+        super().__init__()
+        self.lmbd = lmbd
+        self.random_state = random_state
+        self._sign = 1 if greater_is_better else -1
+
+    def _generate_mixed_samples(self, X, y_pred):
+        """
+        Generate mixed samples using mixup.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The input samples.
+        y_pred : array-like of shape (n_samples, n_classes)
+            The predicted probabilities for each class.
+
+        Returns
+        -------
+        X_mixed : array-like of shape (n_samples, n_features)
+            The mixed input samples.
+        y_mixed : array-like of shape (n_samples, n_classes)
+            The mixed labels.
+        """
+        rng = check_random_state(self.random_state)
+        n_samples, n_features = X.shape
+
+        # Generate indices for mixing
+        idx = np.arange(n_samples)
+        rand_idx = rng.randint(0, n_samples, size=n_samples)
+
+        # Mix samples
+        X_mixed = self.lmbd * X[idx] + (1 - self.lmbd) * X[rand_idx]
+        y_mixed = self.lmbd * y_pred[idx] + (1 - self.lmbd) * y_pred[rand_idx]
+
+        return X_mixed, y_mixed
+
+    def _score(self, estimator, X, y=None, sample_domain=None, **params):
+        """
+        Compute the Interpolation Consistency Evaluation (ICE) score.
+
+        Parameters
+        ----------
+        estimator : object
+            The fitted estimator to evaluate.
+        X : array-like of shape (n_samples, n_features)
+            The input samples.
+        y : Ignored
+            Not used, present for API consistency by convention.
+        sample_domain : array-like, default=None
+            Domain labels for each sample.
+
+        Returns
+        -------
+        score : float
+            The ICE score.
+        """
+        X, _, sample_domain = check_X_y_domain(X, y, sample_domain)
+        source_idx = extract_source_indices(sample_domain)
+
+        # Get predictions for target samples
+        y_pred = estimator.predict_proba(
+            X[~source_idx], sample_domain=sample_domain[~source_idx]
+        )
+
+        # Generate mixed samples
+        X_mixed, y_mixed = self._generate_mixed_samples(X[~source_idx], y_pred)
+
+        # Get predictions for mixed samples
+        y_pred_mixed = estimator.predict_proba(
+            X_mixed, sample_domain=np.full(X_mixed.shape[0], -1)
+        )
+
+        # Calculate ICE score
+        ice_score = accuracy_score(y_mixed.argmax(axis=1), y_pred_mixed.argmax(axis=1))
+
+        print("y_mixed shape: ", y_mixed)
+        print("y_pred_mixed shape: ", y_pred_mixed)
+        print("ice_score: ", ice_score)
+
+        return self._sign * ice_score
diff --git a/skada/tests/test_scorer.py b/skada/tests/test_scorer.py
index 374981fe..dfc36785 100644
--- a/skada/tests/test_scorer.py
+++ b/skada/tests/test_scorer.py
@@ -23,6 +23,7 @@
     CircularValidation,
     DeepEmbeddedValidation,
     ImportanceWeightedScorer,
+    MixValScorer,
     PredictionEntropyScorer,
     SoftNeighborhoodDensity,
     SupervisedScorer,
@@ -246,3 +247,29 @@ def test_deep_embedding_validation_no_transform(da_dataset):
     )["test_score"]
     assert scores.shape[0] == 3, "evaluate 3 splits"
     assert np.all(~np.isnan(scores)), "all scores are computed"
+
+
+def test_mixval_scorer(da_dataset):
+    X, y, sample_domain = da_dataset.pack_train(as_sources=["s"], as_targets=["t"])
+    estimator = make_da_pipeline(
+        DensityReweightAdapter(),
+        LogisticRegression()
+        .set_fit_request(sample_weight=True)
+        .set_score_request(sample_weight=True),
+    )
+    cv = ShuffleSplit(n_splits=3, test_size=0.3, random_state=0)
+
+    # Test with default parameters
+    scorer = MixValScorer(lmbd=0.7)
+    scores = cross_validate(
+        estimator,
+        X,
+        y,
+        cv=cv,
+        params={"sample_domain": sample_domain},
+        scoring=scorer,
+    )["test_score"]
+
+    assert scores.shape[0] == 3, "evaluate 3 splits"
+    assert np.all(~np.isnan(scores)), "all scores are computed"
+    assert np.all(scores >= 0) and np.all(scores <= 1), "scores are between 0 and 1"

From c8f3d8b357feb905a111b4af33b598bed48d5127 Mon Sep 17 00:00:00 2001
From: Yanis Lalou <ylalou.g@gmail.com>
Date: Mon, 12 Aug 2024 10:10:19 +0200
Subject: [PATCH 2/4] Modif following appendix A of paper

---
 skada/metrics.py           | 84 ++++++++++++++++----------------------
 skada/tests/test_scorer.py |  2 +-
 2 files changed, 37 insertions(+), 49 deletions(-)

diff --git a/skada/metrics.py b/skada/metrics.py
index e38414e8..75584bc7 100644
--- a/skada/metrics.py
+++ b/skada/metrics.py
@@ -12,7 +12,7 @@
 import numpy as np
 from sklearn.base import BaseEstimator, clone
 from sklearn.linear_model import LogisticRegression
-from sklearn.metrics import accuracy_score, balanced_accuracy_score, check_scoring
+from sklearn.metrics import balanced_accuracy_score, check_scoring
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KernelDensity
 from sklearn.preprocessing import LabelEncoder, Normalizer
@@ -632,7 +632,7 @@ class MixValScorer(_BaseDomainAwareScorer):
 
     Parameters
     ----------
-    lmbd : float, default=0.55
+    alpha : float, default=0.55
         Mixing parameter for mixup.
     random_state : int, RandomState instance or None, default=None
         Controls the randomness of the mixing process.
@@ -641,7 +641,7 @@ class MixValScorer(_BaseDomainAwareScorer):
 
     Attributes
     ----------
-    lmbd : float
+    alpha : float
         Mixing parameter.
     random_state : RandomState
         Random number generator.
@@ -657,46 +657,15 @@ class MixValScorer(_BaseDomainAwareScorer):
 
     def __init__(
         self,
-        lmbd=0.55,
+        alpha=0.55,
         random_state=None,
         greater_is_better=True,
     ):
         super().__init__()
-        self.lmbd = lmbd
+        self.alpha = alpha
         self.random_state = random_state
         self._sign = 1 if greater_is_better else -1
 
-    def _generate_mixed_samples(self, X, y_pred):
-        """
-        Generate mixed samples using mixup.
-
-        Parameters
-        ----------
-        X : array-like of shape (n_samples, n_features)
-            The input samples.
-        y_pred : array-like of shape (n_samples, n_classes)
-            The predicted probabilities for each class.
-
-        Returns
-        -------
-        X_mixed : array-like of shape (n_samples, n_features)
-            The mixed input samples.
-        y_mixed : array-like of shape (n_samples, n_classes)
-            The mixed labels.
-        """
-        rng = check_random_state(self.random_state)
-        n_samples, n_features = X.shape
-
-        # Generate indices for mixing
-        idx = np.arange(n_samples)
-        rand_idx = rng.randint(0, n_samples, size=n_samples)
-
-        # Mix samples
-        X_mixed = self.lmbd * X[idx] + (1 - self.lmbd) * X[rand_idx]
-        y_mixed = self.lmbd * y_pred[idx] + (1 - self.lmbd) * y_pred[rand_idx]
-
-        return X_mixed, y_mixed
-
     def _score(self, estimator, X, y=None, sample_domain=None, **params):
         """
         Compute the Interpolation Consistency Evaluation (ICE) score.
@@ -719,25 +688,44 @@ def _score(self, estimator, X, y=None, sample_domain=None, **params):
         """
         X, _, sample_domain = check_X_y_domain(X, y, sample_domain)
         source_idx = extract_source_indices(sample_domain)
+        X_target = X[~source_idx]
+
+        rng = check_random_state(self.random_state)
+        rand_idx = rng.permutation(X_target.shape[0])
 
         # Get predictions for target samples
-        y_pred = estimator.predict_proba(
-            X[~source_idx], sample_domain=sample_domain[~source_idx]
+        pred_a = estimator.predict_proba(
+            X_target, sample_domain=sample_domain[~source_idx]
         )
+        pl_a = pred_a.argmax(axis=1)
+        pl_b = pl_a[rand_idx]
+
+        # Intra-cluster and inter-cluster mixup
+        same_idx = (pl_a == pl_b).nonzero()[0]
+        diff_idx = (pl_a != pl_b).nonzero()[0]
 
-        # Generate mixed samples
-        X_mixed, y_mixed = self._generate_mixed_samples(X[~source_idx], y_pred)
+        # Mixup with images and hard pseudo labels
+        mix_inputs = self.alpha * X_target + (1 - self.alpha) * X_target[rand_idx]
+        mix_labels = pl_a if self.alpha > 0.5 else pl_b
 
-        # Get predictions for mixed samples
-        y_pred_mixed = estimator.predict_proba(
-            X_mixed, sample_domain=np.full(X_mixed.shape[0], -1)
+        # Obtain predictions for the mixed samples
+        mix_pred = estimator.predict_proba(
+            mix_inputs, sample_domain=np.full(mix_inputs.shape[0], -1)
         )
+        mix_pred_labels = mix_pred.argmax(axis=1)
 
-        # Calculate ICE score
-        ice_score = accuracy_score(y_mixed.argmax(axis=1), y_pred_mixed.argmax(axis=1))
+        # Calculate ICE scores for two-dimensional probing
+        ice_same = (
+            np.sum(mix_pred_labels[same_idx] == mix_labels[same_idx])
+            / same_idx.shape[0]
+        )
+        ice_diff = (
+            np.sum(mix_pred_labels[diff_idx] == mix_labels[diff_idx])
+            / diff_idx.shape[0]
+        )
 
-        print("y_mixed shape: ", y_mixed)
-        print("y_pred_mixed shape: ", y_pred_mixed)
-        print("ice_score: ", ice_score)
+        # In the paper they use the avg of rank
+        # Here we use a simple average
+        ice_score = (ice_same + ice_diff) / 2
 
         return self._sign * ice_score
diff --git a/skada/tests/test_scorer.py b/skada/tests/test_scorer.py
index dfc36785..73879dc6 100644
--- a/skada/tests/test_scorer.py
+++ b/skada/tests/test_scorer.py
@@ -260,7 +260,7 @@ def test_mixval_scorer(da_dataset):
     cv = ShuffleSplit(n_splits=3, test_size=0.3, random_state=0)
 
     # Test with default parameters
-    scorer = MixValScorer(lmbd=0.7)
+    scorer = MixValScorer(alpha=0.7)
     scores = cross_validate(
         estimator,
         X,

From 305e5d6a8809c50c17c99f36d49262b49b97f9a3 Mon Sep 17 00:00:00 2001
From: Yanis Lalou <ylalou.g@gmail.com>
Date: Mon, 12 Aug 2024 14:34:38 +0200
Subject: [PATCH 3/4] Add arg to compute intra - Inter - Both Ice scores

---
 skada/metrics.py           | 42 +++++++++++++++++++++++++++-----------
 skada/tests/test_scorer.py | 26 ++++++++++++++++++++++-
 2 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/skada/metrics.py b/skada/metrics.py
index 75584bc7..dbb2af3f 100644
--- a/skada/metrics.py
+++ b/skada/metrics.py
@@ -638,6 +638,11 @@ class MixValScorer(_BaseDomainAwareScorer):
         Controls the randomness of the mixing process.
     greater_is_better : bool, default=True
         Whether higher scores are better.
+    ice_type : {'both', 'intra', 'inter'}, default='both'
+        Type of ICE score to compute:
+        - 'both': Compute both intra-cluster and inter-cluster ICE scores (average).
+        - 'intra': Compute only intra-cluster ICE score.
+        - 'inter': Compute only inter-cluster ICE score.
 
     Attributes
     ----------
@@ -647,6 +652,8 @@ class MixValScorer(_BaseDomainAwareScorer):
         Random number generator.
     _sign : int
         1 if greater_is_better is True, -1 otherwise.
+    ice_type : str
+        Type of ICE score to compute.
 
     References
     ----------
@@ -660,11 +667,16 @@ def __init__(
         alpha=0.55,
         random_state=None,
         greater_is_better=True,
+        ice_type="both",
     ):
         super().__init__()
         self.alpha = alpha
         self.random_state = random_state
         self._sign = 1 if greater_is_better else -1
+        self.ice_type = ice_type
+
+        if self.ice_type not in ["both", "intra", "inter"]:
+            raise ValueError("ice_type must be 'both', 'intra', or 'inter'")
 
     def _score(self, estimator, X, y=None, sample_domain=None, **params):
         """
@@ -714,18 +726,24 @@ def _score(self, estimator, X, y=None, sample_domain=None, **params):
         )
         mix_pred_labels = mix_pred.argmax(axis=1)
 
-        # Calculate ICE scores for two-dimensional probing
-        ice_same = (
-            np.sum(mix_pred_labels[same_idx] == mix_labels[same_idx])
-            / same_idx.shape[0]
-        )
-        ice_diff = (
-            np.sum(mix_pred_labels[diff_idx] == mix_labels[diff_idx])
-            / diff_idx.shape[0]
-        )
+        # Calculate ICE scores based on ice_type
+        if self.ice_type in ["both", "intra"]:
+            ice_same = (
+                np.sum(mix_pred_labels[same_idx] == mix_labels[same_idx])
+                / same_idx.shape[0]
+            )
+
+        if self.ice_type in ["both", "inter"]:
+            ice_diff = (
+                np.sum(mix_pred_labels[diff_idx] == mix_labels[diff_idx])
+                / diff_idx.shape[0]
+            )
 
-        # In the paper they use the avg of rank
-        # Here we use a simple average
-        ice_score = (ice_same + ice_diff) / 2
+        if self.ice_type == "both":
+            ice_score = (ice_same + ice_diff) / 2
+        elif self.ice_type == "intra":
+            ice_score = ice_same
+        else:  # self.ice_type == 'inter'
+            ice_score = ice_diff
 
         return self._sign * ice_score
diff --git a/skada/tests/test_scorer.py b/skada/tests/test_scorer.py
index 73879dc6..9376a09f 100644
--- a/skada/tests/test_scorer.py
+++ b/skada/tests/test_scorer.py
@@ -260,7 +260,7 @@ def test_mixval_scorer(da_dataset):
     cv = ShuffleSplit(n_splits=3, test_size=0.3, random_state=0)
 
     # Test with default parameters
-    scorer = MixValScorer(alpha=0.7)
+    scorer = MixValScorer(alpha=0.55, random_state=42)
     scores = cross_validate(
         estimator,
         X,
@@ -273,3 +273,27 @@ def test_mixval_scorer(da_dataset):
     assert scores.shape[0] == 3, "evaluate 3 splits"
     assert np.all(~np.isnan(scores)), "all scores are computed"
     assert np.all(scores >= 0) and np.all(scores <= 1), "scores are between 0 and 1"
+
+    # Test different ice_type options
+    for ice_type in ["both", "intra", "inter"]:
+        scorer = MixValScorer(alpha=0.55, random_state=42, ice_type=ice_type)
+        scores = cross_validate(
+            estimator,
+            X,
+            y,
+            cv=cv,
+            params={"sample_domain": sample_domain},
+            scoring=scorer,
+        )["test_score"]
+
+        assert scores.shape[0] == 3, f"evaluate 3 splits for ice_type={ice_type}"
+        assert np.all(
+            ~np.isnan(scores)
+        ), f"all scores are computed for ice_type={ice_type}"
+        assert np.all(scores >= 0) and np.all(
+            scores <= 1
+        ), f"scores are between 0 and 1 for ice_type={ice_type}"
+
+    # Test invalid ice_type
+    with pytest.raises(ValueError):
+        MixValScorer(ice_type="invalid")

From 5de5d0e7815648247d324804932f03948c545480 Mon Sep 17 00:00:00 2001
From: Yanis Lalou <ylalou.g@gmail.com>
Date: Tue, 13 Aug 2024 12:05:56 +0200
Subject: [PATCH 4/4] Change impl to the one in the paper git

---
 skada/metrics.py | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/skada/metrics.py b/skada/metrics.py
index dbb2af3f..4f0e44f2 100644
--- a/skada/metrics.py
+++ b/skada/metrics.py
@@ -706,37 +706,31 @@ def _score(self, estimator, X, y=None, sample_domain=None, **params):
         rand_idx = rng.permutation(X_target.shape[0])
 
         # Get predictions for target samples
-        pred_a = estimator.predict_proba(
-            X_target, sample_domain=sample_domain[~source_idx]
-        )
-        pl_a = pred_a.argmax(axis=1)
-        pl_b = pl_a[rand_idx]
+        labels_a = estimator.predict(X_target, sample_domain=sample_domain[~source_idx])
+        labels_b = labels_a[rand_idx]
 
         # Intra-cluster and inter-cluster mixup
-        same_idx = (pl_a == pl_b).nonzero()[0]
-        diff_idx = (pl_a != pl_b).nonzero()[0]
+        same_idx = (labels_a == labels_b).nonzero()[0]
+        diff_idx = (labels_a != labels_b).nonzero()[0]
 
         # Mixup with images and hard pseudo labels
         mix_inputs = self.alpha * X_target + (1 - self.alpha) * X_target[rand_idx]
-        mix_labels = pl_a if self.alpha > 0.5 else pl_b
+        mix_labels = self.alpha * labels_a + (1 - self.alpha) * labels_b
 
         # Obtain predictions for the mixed samples
-        mix_pred = estimator.predict_proba(
+        mix_pred = estimator.predict(
             mix_inputs, sample_domain=np.full(mix_inputs.shape[0], -1)
         )
-        mix_pred_labels = mix_pred.argmax(axis=1)
 
         # Calculate ICE scores based on ice_type
         if self.ice_type in ["both", "intra"]:
             ice_same = (
-                np.sum(mix_pred_labels[same_idx] == mix_labels[same_idx])
-                / same_idx.shape[0]
+                np.sum(mix_pred[same_idx] == mix_labels[same_idx]) / same_idx.shape[0]
             )
 
         if self.ice_type in ["both", "inter"]:
             ice_diff = (
-                np.sum(mix_pred_labels[diff_idx] == mix_labels[diff_idx])
-                / diff_idx.shape[0]
+                np.sum(mix_pred[diff_idx] == mix_labels[diff_idx]) / diff_idx.shape[0]
             )
 
         if self.ice_type == "both":