Skip to content

Commit

Permalink
Rename CE...Evaluator to CrossEncoder...Evaluator
Browse files Browse the repository at this point in the history
With backwards compatibility, old classes are still implemented in deprecated.py. I also move renamed files, but old imports like 'from sentence_transformers.cross_encoder.evaluation.CEBinaryAccuracyEvaluator import CEBinaryAccuracyEvaluator' still work (see test_deprecated_imports.py).
  • Loading branch information
tomaarsen committed Feb 26, 2025
1 parent 4ec4f13 commit 2a557f5
Show file tree
Hide file tree
Showing 30 changed files with 364 additions and 258 deletions.
80 changes: 40 additions & 40 deletions docs/cross_encoder/training_overview.md

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions docs/package_reference/cross_encoder/evaluation.md
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
# Evaluation
CrossEncoder have their own evaluation classes in `sentence_transformers.cross_encoder.evaluation`.

## CERerankingEvaluator
## CrossEncoderRerankingEvaluator
```{eval-rst}
.. autoclass:: sentence_transformers.cross_encoder.evaluation.CERerankingEvaluator
.. autoclass:: sentence_transformers.cross_encoder.evaluation.CrossEncoderRerankingEvaluator
```

## CENanoBEIREvaluator
## CrossEncoderNanoBEIREvaluator
```{eval-rst}
.. autoclass:: sentence_transformers.cross_encoder.evaluation.CENanoBEIREvaluator
.. autoclass:: sentence_transformers.cross_encoder.evaluation.CrossEncoderNanoBEIREvaluator
```

## CEClassificationEvaluator
## CrossEncoderClassificationEvaluator
```{eval-rst}
.. autoclass:: sentence_transformers.cross_encoder.evaluation.CEClassificationEvaluator
.. autoclass:: sentence_transformers.cross_encoder.evaluation.CrossEncoderClassificationEvaluator
```

## CECorrelationEvaluator
## CrossEncoderCorrelationEvaluator
```{eval-rst}
.. autoclass:: sentence_transformers.cross_encoder.evaluation.CECorrelationEvaluator
.. autoclass:: sentence_transformers.cross_encoder.evaluation.CrossEncoderCorrelationEvaluator
```
12 changes: 7 additions & 5 deletions examples/training/cross-encoder/training_gooaq_v4.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@

from sentence_transformers import SentenceTransformer
from sentence_transformers.cross_encoder import CrossEncoder, CrossEncoderModelCardData
from sentence_transformers.cross_encoder.evaluation.CENanoBEIREvaluator import CENanoBEIREvaluator
from sentence_transformers.cross_encoder.evaluation.CERerankingEvaluator import CERerankingEvaluator
from sentence_transformers.cross_encoder.evaluation import (
CrossEncoderNanoBEIREvaluator,
CrossEncoderRerankingEvaluator,
)
from sentence_transformers.cross_encoder.losses.BinaryCrossEntropyLoss import BinaryCrossEntropyLoss
from sentence_transformers.cross_encoder.trainer import CrossEncoderTrainer
from sentence_transformers.cross_encoder.training_args import CrossEncoderTrainingArguments
Expand Down Expand Up @@ -71,8 +73,8 @@ def main():
# pos_weight is recommended to be set as the ratio between positives to negatives, a.k.a. `num_hard_negatives`
loss = BinaryCrossEntropyLoss(model=model, pos_weight=torch.tensor(num_hard_negatives))

# 4a. Define evaluators. We use the CENanoBEIREvaluator, which is a light-weight evaluator for English reranking
nano_beir_evaluator = CENanoBEIREvaluator(
# 4a. Define evaluators. We use the CrossEncoderNanoBEIREvaluator, which is a light-weight evaluator for English reranking
nano_beir_evaluator = CrossEncoderNanoBEIREvaluator(
dataset_names=["msmarco", "nfcorpus", "nq"],
batch_size=train_batch_size,
)
Expand All @@ -91,7 +93,7 @@ def main():
use_faiss=True,
)
logging.info(hard_eval_dataset)
reranking_evaluator = CERerankingEvaluator(
reranking_evaluator = CrossEncoderRerankingEvaluator(
samples=[
{
"query": sample["question"],
Expand Down
6 changes: 3 additions & 3 deletions examples/training/cross-encoder/training_gooaq_v4_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from datasets import load_dataset

from sentence_transformers.cross_encoder import CrossEncoder, CrossEncoderModelCardData
from sentence_transformers.cross_encoder.evaluation import CENanoBEIREvaluator
from sentence_transformers.cross_encoder.evaluation import CrossEncoderNanoBEIREvaluator
from sentence_transformers.cross_encoder.losses import CachedMultipleNegativesRankingLoss
from sentence_transformers.cross_encoder.trainer import CrossEncoderTrainer
from sentence_transformers.cross_encoder.training_args import CrossEncoderTrainingArguments
Expand Down Expand Up @@ -45,8 +45,8 @@
mini_batch_size=32, # Informs the memory usage
)

# 4. Use CENanoBEIREvaluator, a light-weight evaluator for English reranking
evaluator = CENanoBEIREvaluator(
# 4. Use CrossEncoderNanoBEIREvaluator, a light-weight evaluator for English reranking
evaluator = CrossEncoderNanoBEIREvaluator(
dataset_names=["msmarco", "nfcorpus", "nq"],
batch_size=train_batch_size,
)
Expand Down
6 changes: 3 additions & 3 deletions examples/training/cross-encoder/training_ms_marco_v4.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from torch import nn

from sentence_transformers.cross_encoder import CrossEncoder
from sentence_transformers.cross_encoder.evaluation.CENanoBEIREvaluator import CENanoBEIREvaluator
from sentence_transformers.cross_encoder.evaluation import CrossEncoderNanoBEIREvaluator
from sentence_transformers.cross_encoder.losses.BinaryCrossEntropyLoss import BinaryCrossEntropyLoss
from sentence_transformers.cross_encoder.losses.CachedMultipleNegativesRankingLoss import (
CachedMultipleNegativesRankingLoss,
Expand Down Expand Up @@ -87,8 +87,8 @@ def mnrl_mapper(batch):
else:
loss = BinaryCrossEntropyLoss(model)

# 4. Define the evaluator. We use the CENanoBEIREvaluator, which is a light-weight evaluator for English reranking
evaluator = CENanoBEIREvaluator(dataset_names=["msmarco", "nfcorpus", "nq"], batch_size=train_batch_size)
# 4. Define the evaluator. We use the CrossEncoderNanoBEIREvaluator, which is a light-weight evaluator for English reranking
evaluator = CrossEncoderNanoBEIREvaluator(dataset_names=["msmarco", "nfcorpus", "nq"], batch_size=train_batch_size)
evaluator(model)

# 5. Define the training arguments
Expand Down
8 changes: 4 additions & 4 deletions examples/training/cross-encoder/training_nli_v4.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from datasets import load_dataset

from sentence_transformers.cross_encoder import CrossEncoder
from sentence_transformers.cross_encoder.evaluation import CEClassificationEvaluator
from sentence_transformers.cross_encoder.evaluation import CrossEncoderClassificationEvaluator
from sentence_transformers.cross_encoder.losses.CrossEntropyLoss import CrossEntropyLoss
from sentence_transformers.cross_encoder.trainer import CrossEncoderTrainer
from sentence_transformers.cross_encoder.training_args import CrossEncoderTrainingArguments
Expand Down Expand Up @@ -43,8 +43,8 @@
# 3. Define our training loss:
loss = CrossEntropyLoss(model)

# 4. Before and during training, we use CEClassificationEvaluator to measure the performance on the dev set
dev_cls_evaluator = CEClassificationEvaluator(
# 4. Before and during training, we use CrossEncoderClassificationEvaluator to measure the performance on the dev set
dev_cls_evaluator = CrossEncoderClassificationEvaluator(
sentence_pairs=list(zip(eval_dataset["premise"], eval_dataset["hypothesis"])),
labels=eval_dataset["label"],
name="AllNLI-dev",
Expand Down Expand Up @@ -86,7 +86,7 @@
trainer.train()

# 7. Evaluate the final model on test dataset
test_cls_evaluator = CEClassificationEvaluator(
test_cls_evaluator = CrossEncoderClassificationEvaluator(
list(zip(test_dataset["premise"], test_dataset["hypothesis"])),
test_dataset["label"],
name="AllNLI-test",
Expand Down
12 changes: 7 additions & 5 deletions examples/training/cross-encoder/training_nq_v4.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@

from sentence_transformers import SentenceTransformer
from sentence_transformers.cross_encoder import CrossEncoder, CrossEncoderModelCardData
from sentence_transformers.cross_encoder.evaluation.CENanoBEIREvaluator import CENanoBEIREvaluator
from sentence_transformers.cross_encoder.evaluation.CERerankingEvaluator import CERerankingEvaluator
from sentence_transformers.cross_encoder.evaluation import (
CrossEncoderNanoBEIREvaluator,
CrossEncoderRerankingEvaluator,
)
from sentence_transformers.cross_encoder.losses.BinaryCrossEntropyLoss import BinaryCrossEntropyLoss
from sentence_transformers.cross_encoder.trainer import CrossEncoderTrainer
from sentence_transformers.cross_encoder.training_args import CrossEncoderTrainingArguments
Expand Down Expand Up @@ -71,8 +73,8 @@ def main():
# pos_weight is recommended to be set as the ratio between positives to negatives, a.k.a. `num_hard_negatives`
loss = BinaryCrossEntropyLoss(model=model, pos_weight=torch.tensor(num_hard_negatives))

# 4a. Define evaluators. We use the CENanoBEIREvaluator, which is a light-weight evaluator for English reranking
nano_beir_evaluator = CENanoBEIREvaluator(
# 4a. Define evaluators. We use the CrossEncoderNanoBEIREvaluator, which is a light-weight evaluator for English reranking
nano_beir_evaluator = CrossEncoderNanoBEIREvaluator(
dataset_names=["msmarco", "nfcorpus", "nq"],
batch_size=train_batch_size,
)
Expand All @@ -91,7 +93,7 @@ def main():
use_faiss=True,
)
logging.info(hard_eval_dataset)
reranking_evaluator = CERerankingEvaluator(
reranking_evaluator = CrossEncoderRerankingEvaluator(
samples=[
{
"query": sample["query"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from datasets import load_dataset

from sentence_transformers.cross_encoder import CrossEncoder, CrossEncoderTrainingArguments
from sentence_transformers.cross_encoder.evaluation import CEClassificationEvaluator
from sentence_transformers.cross_encoder.evaluation import CrossEncoderClassificationEvaluator
from sentence_transformers.cross_encoder.losses import BinaryCrossEntropyLoss
from sentence_transformers.cross_encoder.trainer import CrossEncoderTrainer

Expand Down Expand Up @@ -45,8 +45,8 @@
# 3. Define our training loss, we use one that accepts pairs with a binary label
loss = BinaryCrossEntropyLoss(model)

# 4. Before and during training, we use CEClassificationEvaluator to measure the performance on the dev set
dev_cls_evaluator = CEClassificationEvaluator(
# 4. Before and during training, we use CrossEncoderClassificationEvaluator to measure the performance on the dev set
dev_cls_evaluator = CrossEncoderClassificationEvaluator(
sentence_pairs=list(zip(eval_dataset["sentence1"], eval_dataset["sentence2"])),
labels=eval_dataset["label"],
name="quora-duplicates-dev",
Expand Down Expand Up @@ -88,7 +88,7 @@
trainer.train()

# 7. Evaluate the final model on test dataset
test_cls_evaluator = CEClassificationEvaluator(
test_cls_evaluator = CrossEncoderClassificationEvaluator(
sentence_pairs=list(zip(eval_dataset["sentence1"], eval_dataset["sentence2"])),
labels=eval_dataset["label"],
name="quora-duplicates-test",
Expand Down
6 changes: 3 additions & 3 deletions examples/training/cross-encoder/training_stsbenchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from sentence_transformers import InputExample, LoggingHandler, util
from sentence_transformers.cross_encoder import CrossEncoder
from sentence_transformers.cross_encoder.evaluation import CECorrelationEvaluator
from sentence_transformers.cross_encoder.evaluation import CrossEncoderCorrelationEvaluator

#### Just some code to print debug information to stdout
logging.basicConfig(
Expand Down Expand Up @@ -71,7 +71,7 @@


# We add an evaluator, which evaluates the performance during training
evaluator = CECorrelationEvaluator.from_input_examples(dev_samples, name="sts-dev")
evaluator = CrossEncoderCorrelationEvaluator.from_input_examples(dev_samples, name="sts-dev")


# Configure the training
Expand All @@ -92,5 +92,5 @@
##### Load model and eval on test set
model = CrossEncoder(model_save_path)

evaluator = CECorrelationEvaluator.from_input_examples(test_samples, name="sts-test")
evaluator = CrossEncoderCorrelationEvaluator.from_input_examples(test_samples, name="sts-test")
evaluator(model)
8 changes: 4 additions & 4 deletions examples/training/cross-encoder/training_stsbenchmark_v4.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from datasets import load_dataset

from sentence_transformers.cross_encoder import CrossEncoder
from sentence_transformers.cross_encoder.evaluation import CECorrelationEvaluator
from sentence_transformers.cross_encoder.evaluation import CrossEncoderCorrelationEvaluator
from sentence_transformers.cross_encoder.losses.BinaryCrossEntropyLoss import BinaryCrossEntropyLoss
from sentence_transformers.cross_encoder.trainer import CrossEncoderTrainer
from sentence_transformers.cross_encoder.training_args import CrossEncoderTrainingArguments
Expand All @@ -41,8 +41,8 @@
# 3. Define our training loss, we use one that accepts pairs with a binary label
loss = BinaryCrossEntropyLoss(model)

# 4. Before and during training, we use CEClassificationEvaluator to measure the performance on the dev set
eval_evaluator = CECorrelationEvaluator(
# 4. Before and during training, we use CrossEncoderClassificationEvaluator to measure the performance on the dev set
eval_evaluator = CrossEncoderCorrelationEvaluator(
sentence_pairs=list(zip(eval_dataset["sentence1"], eval_dataset["sentence2"])),
scores=eval_dataset["score"],
name="stsb-validation",
Expand Down Expand Up @@ -84,7 +84,7 @@
trainer.train()

# 7. Evaluate the final model on test dataset
test_evaluator = CECorrelationEvaluator(
test_evaluator = CrossEncoderCorrelationEvaluator(
sentence_pairs=list(zip(test_dataset["sentence1"], test_dataset["sentence2"])),
scores=test_dataset["score"],
name="stsb-test",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

from sentence_transformers import SentenceTransformer, losses
from sentence_transformers.cross_encoder import CrossEncoder
from sentence_transformers.cross_encoder.evaluation import CECorrelationEvaluator
from sentence_transformers.cross_encoder.evaluation import CrossEncoderCorrelationEvaluator
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
from sentence_transformers.readers import InputExample
from sentence_transformers.similarity_functions import SimilarityFunction
Expand Down Expand Up @@ -105,7 +105,7 @@
train_dataloader = DataLoader(gold_samples, shuffle=True, batch_size=batch_size)

# We add an evaluator, which evaluates the performance during training
evaluator = CECorrelationEvaluator(
evaluator = CrossEncoderCorrelationEvaluator(
sentence_pairs=[[data["sentence1"], data["sentence2"]] for data in eval_dataset],
scores=[data["score"] for data in eval_dataset],
name="sts-dev",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

from sentence_transformers import LoggingHandler, SentenceTransformer, losses, models, util
from sentence_transformers.cross_encoder import CrossEncoder
from sentence_transformers.cross_encoder.evaluation import CECorrelationEvaluator
from sentence_transformers.cross_encoder.evaluation import CrossEncoderCorrelationEvaluator
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
from sentence_transformers.readers import InputExample

Expand Down Expand Up @@ -127,7 +127,7 @@


# We add an evaluator, which evaluates the performance during training
evaluator = CECorrelationEvaluator.from_input_examples(dev_samples, name="sts-dev")
evaluator = CrossEncoderCorrelationEvaluator.from_input_examples(dev_samples, name="sts-dev")

# Configure the training
warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1) # 10% of train data for warm-up
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

from sentence_transformers import LoggingHandler, SentenceTransformer, losses, models, util
from sentence_transformers.cross_encoder import CrossEncoder
from sentence_transformers.cross_encoder.evaluation import CECorrelationEvaluator
from sentence_transformers.cross_encoder.evaluation import CrossEncoderCorrelationEvaluator
from sentence_transformers.evaluation import BinaryClassificationEvaluator
from sentence_transformers.readers import InputExample

Expand Down Expand Up @@ -136,7 +136,7 @@


# We add an evaluator, which evaluates the performance during training
evaluator = CECorrelationEvaluator.from_input_examples(dev_samples, name="sts-dev")
evaluator = CrossEncoderCorrelationEvaluator.from_input_examples(dev_samples, name="sts-dev")

# Configure the training
warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1) # 10% of train data for warm-up
Expand Down
4 changes: 2 additions & 2 deletions examples/training/ms_marco/train_cross-encoder_kd.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

from sentence_transformers import InputExample, LoggingHandler, util
from sentence_transformers.cross_encoder import CrossEncoder
from sentence_transformers.cross_encoder.evaluation import CERerankingEvaluator
from sentence_transformers.cross_encoder.evaluation import CrossEncoderRerankingEvaluator

#### Just some code to print debug information to stdout
logging.basicConfig(
Expand Down Expand Up @@ -152,7 +152,7 @@

# We add an evaluator, which evaluates the performance during training
# It performs a classification task and measures scores like F1 (finding relevant passages) and Average Precision
evaluator = CERerankingEvaluator(dev_samples, name="train-eval")
evaluator = CrossEncoderRerankingEvaluator(dev_samples, name="train-eval")

# Configure the training
warmup_steps = 5000
Expand Down
4 changes: 2 additions & 2 deletions examples/training/ms_marco/train_cross-encoder_scratch.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from sentence_transformers import InputExample, LoggingHandler, util
from sentence_transformers.cross_encoder import CrossEncoder
from sentence_transformers.cross_encoder.evaluation import CERerankingEvaluator
from sentence_transformers.cross_encoder.evaluation import CrossEncoderRerankingEvaluator

#### Just some code to print debug information to stdout
logging.basicConfig(
Expand Down Expand Up @@ -166,7 +166,7 @@

# We add an evaluator, which evaluates the performance during training
# It performs a classification task and measures scores like F1 (finding relevant passages) and Average Precision
evaluator = CERerankingEvaluator(dev_samples, name="train-eval")
evaluator = CrossEncoderRerankingEvaluator(dev_samples, name="train-eval")

# Configure the training
warmup_steps = 5000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from datasets import load_dataset, load_from_disk

from sentence_transformers.cross_encoder import CrossEncoder
from sentence_transformers.cross_encoder.evaluation.CENanoBEIREvaluator import CENanoBEIREvaluator
from sentence_transformers.cross_encoder.evaluation import CrossEncoderNanoBEIREvaluator
from sentence_transformers.cross_encoder.losses.MarginMSELoss import MarginMSELoss
from sentence_transformers.cross_encoder.trainer import CrossEncoderTrainer
from sentence_transformers.cross_encoder.training_args import CrossEncoderTrainingArguments
Expand Down Expand Up @@ -64,8 +64,8 @@ def id_to_text_map(batch):
# 3. Define our training loss
loss = MarginMSELoss(model)

# 4. Define the evaluator. We use the CENanoBEIREvaluator, which is a light-weight evaluator for English reranking
evaluator = CENanoBEIREvaluator(dataset_names=["msmarco", "nfcorpus", "nq"], batch_size=train_batch_size)
# 4. Define the evaluator. We use the CrossEncoderNanoBEIREvaluator, which is a light-weight evaluator for English reranking
evaluator = CrossEncoderNanoBEIREvaluator(dataset_names=["msmarco", "nfcorpus", "nq"], batch_size=train_batch_size)
evaluator(model)

# 5. Define the training arguments
Expand Down
6 changes: 3 additions & 3 deletions examples/training/ms_marco/train_cross_encoder_kd_mse.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from datasets import load_dataset, load_from_disk

from sentence_transformers.cross_encoder import CrossEncoder
from sentence_transformers.cross_encoder.evaluation.CENanoBEIREvaluator import CENanoBEIREvaluator
from sentence_transformers.cross_encoder.evaluation import CrossEncoderNanoBEIREvaluator
from sentence_transformers.cross_encoder.losses.MSELoss import MSELoss
from sentence_transformers.cross_encoder.trainer import CrossEncoderTrainer
from sentence_transformers.cross_encoder.training_args import CrossEncoderTrainingArguments
Expand Down Expand Up @@ -63,8 +63,8 @@ def id_to_text_map(batch):
# 3. Define our training loss
loss = MSELoss(model)

# 4. Define the evaluator. We use the CENanoBEIREvaluator, which is a light-weight evaluator for English reranking
evaluator = CENanoBEIREvaluator(dataset_names=["msmarco", "nfcorpus", "nq"], batch_size=train_batch_size)
# 4. Define the evaluator. We use the CrossEncoderNanoBEIREvaluator, which is a light-weight evaluator for English reranking
evaluator = CrossEncoderNanoBEIREvaluator(dataset_names=["msmarco", "nfcorpus", "nq"], batch_size=train_batch_size)
evaluator(model)

# 5. Define the training arguments
Expand Down
6 changes: 3 additions & 3 deletions examples/training/ms_marco/train_cross_encoder_scratch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from datasets import load_dataset, load_from_disk

from sentence_transformers.cross_encoder import CrossEncoder
from sentence_transformers.cross_encoder.evaluation.CENanoBEIREvaluator import CENanoBEIREvaluator
from sentence_transformers.cross_encoder.evaluation import CrossEncoderNanoBEIREvaluator
from sentence_transformers.cross_encoder.losses.BinaryCrossEntropyLoss import BinaryCrossEntropyLoss
from sentence_transformers.cross_encoder.trainer import CrossEncoderTrainer
from sentence_transformers.cross_encoder.training_args import CrossEncoderTrainingArguments
Expand Down Expand Up @@ -64,8 +64,8 @@ def id_to_text_map(batch):
# 3. Define our training loss
loss = BinaryCrossEntropyLoss(model)

# 4. Define the evaluator. We use the CENanoBEIREvaluator, which is a light-weight evaluator for English reranking
evaluator = CENanoBEIREvaluator(dataset_names=["msmarco", "nfcorpus", "nq"], batch_size=train_batch_size)
# 4. Define the evaluator. We use the CrossEncoderNanoBEIREvaluator, which is a light-weight evaluator for English reranking
evaluator = CrossEncoderNanoBEIREvaluator(dataset_names=["msmarco", "nfcorpus", "nq"], batch_size=train_batch_size)
evaluator(model)

# 5. Define the training arguments
Expand Down
Loading

0 comments on commit 2a557f5

Please sign in to comment.