Custom rerankers support (#496)

I8dNLo · joein · web-flow · commit 4c239b11d501 · 2025-03-16T20:02:58.000+03:00
* Custom rerankers support

* Test for reranker_custom_model

* test fix

* Model description type fix

* Test fix

* fix: fix naming

* fix: remove redundant arg from tests

* new: update readme

---------

Co-authored-by: George Panchuk &lt;george.panchuk@qdrant.tech&gt;
diff --git a/README.md b/README.md
@@ -190,6 +190,23 @@ scores = list(encoder.rerank(query, documents))
 # [-11.48061752319336, 5.472434997558594]
 ```
 
+Text cross encoders can also be extended with models which are not in the list of supported models.
+
+```python
+from fastembed.rerank.cross_encoder import TextCrossEncoder 
+from fastembed.common.model_description import ModelSource
+
+TextCrossEncoder.add_custom_model(
+    model="Xenova/ms-marco-MiniLM-L-4-v2",
+    model_file="onnx/model.onnx",
+    sources=ModelSource(hf="Xenova/ms-marco-MiniLM-L-4-v2"),
+)
+model = TextCrossEncoder(model_name="Xenova/ms-marco-MiniLM-L-4-v2")
+scores = list(model.rerank_pairs(
+    [("What is AI?", "Artificial intelligence is ..."), ("What is ML?", "Machine learning is ..."),]
+))
+```
+
 ## ⚡️ FastEmbed on a GPU
 
 FastEmbed supports running on GPU devices.
diff --git a/fastembed/rerank/cross_encoder/custom_text_cross_encoder.py b/fastembed/rerank/cross_encoder/custom_text_cross_encoder.py
@@ -0,0 +1,46 @@
+from typing import Optional, Sequence, Any
+
+from fastembed.common import OnnxProvider
+from fastembed.common.model_description import BaseModelDescription
+from fastembed.rerank.cross_encoder.onnx_text_cross_encoder import OnnxTextCrossEncoder
+
+
+class CustomTextCrossEncoder(OnnxTextCrossEncoder):
+    SUPPORTED_MODELS: list[BaseModelDescription] = []
+
+    def __init__(
+        self,
+        model_name: str,
+        cache_dir: Optional[str] = None,
+        threads: Optional[int] = None,
+        providers: Optional[Sequence[OnnxProvider]] = None,
+        cuda: bool = False,
+        device_ids: Optional[list[int]] = None,
+        lazy_load: bool = False,
+        device_id: Optional[int] = None,
+        specific_model_path: Optional[str] = None,
+        **kwargs: Any,
+    ):
+        super().__init__(
+            model_name=model_name,
+            cache_dir=cache_dir,
+            threads=threads,
+            providers=providers,
+            cuda=cuda,
+            device_ids=device_ids,
+            lazy_load=lazy_load,
+            device_id=device_id,
+            specific_model_path=specific_model_path,
+            **kwargs,
+        )
+
+    @classmethod
+    def _list_supported_models(cls) -> list[BaseModelDescription]:
+        return cls.SUPPORTED_MODELS
+
+    @classmethod
+    def add_model(
+        cls,
+        model_description: BaseModelDescription,
+    ) -> None:
+        cls.SUPPORTED_MODELS.append(model_description)
diff --git a/fastembed/rerank/cross_encoder/text_cross_encoder.py b/fastembed/rerank/cross_encoder/text_cross_encoder.py
@@ -3,13 +3,19 @@
 
 from fastembed.common import OnnxProvider
 from fastembed.rerank.cross_encoder.onnx_text_cross_encoder import OnnxTextCrossEncoder
+from fastembed.rerank.cross_encoder.custom_text_cross_encoder import CustomTextCrossEncoder
+
 from fastembed.rerank.cross_encoder.text_cross_encoder_base import TextCrossEncoderBase
-from fastembed.common.model_description import BaseModelDescription
+from fastembed.common.model_description import (
+    ModelSource,
+    BaseModelDescription,
+)
 
 
 class TextCrossEncoder(TextCrossEncoderBase):
     CROSS_ENCODER_REGISTRY: list[Type[TextCrossEncoderBase]] = [
         OnnxTextCrossEncoder,
+        CustomTextCrossEncoder,
     ]
 
     @classmethod
@@ -124,3 +130,34 @@ def rerank_pairs(
         yield from self.model.rerank_pairs(
             pairs, batch_size=batch_size, parallel=parallel, **kwargs
         )
+
+    @classmethod
+    def add_custom_model(
+        cls,
+        model: str,
+        sources: ModelSource,
+        model_file: str = "onnx/model.onnx",
+        description: str = "",
+        license: str = "",
+        size_in_gb: float = 0.0,
+        additional_files: Optional[list[str]] = None,
+    ) -> None:
+        registered_models = cls._list_supported_models()
+        for registered_model in registered_models:
+            if model == registered_model.model:
+                raise ValueError(
+                    f"Model {model} is already registered in CrossEncoderModel, if you still want to add this model, "
+                    f"please use another model name"
+                )
+
+        CustomTextCrossEncoder.add_model(
+            BaseModelDescription(
+                model=model,
+                sources=sources,
+                model_file=model_file,
+                description=description,
+                license=license,
+                size_in_GB=size_in_gb,
+                additional_files=additional_files or [],
+            )
+        )
diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py
@@ -3,19 +3,28 @@
 import numpy as np
 import pytest
 
-from fastembed.common.model_description import PoolingType, ModelSource, DenseModelDescription
+from fastembed.common.model_description import (
+    PoolingType,
+    ModelSource,
+    DenseModelDescription,
+    BaseModelDescription,
+)
 from fastembed.common.onnx_model import OnnxOutputContext
 from fastembed.common.utils import normalize, mean_pooling
 from fastembed.text.custom_text_embedding import CustomTextEmbedding, PostprocessingConfig
+from fastembed.rerank.cross_encoder.custom_text_cross_encoder import CustomTextCrossEncoder
+from fastembed.rerank.cross_encoder import TextCrossEncoder
 from fastembed.text.text_embedding import TextEmbedding
 from tests.utils import delete_model_cache
 
 
 @pytest.fixture(autouse=True)
 def restore_custom_models_fixture():
     CustomTextEmbedding.SUPPORTED_MODELS = []
+    CustomTextCrossEncoder.SUPPORTED_MODELS = []
     yield
     CustomTextEmbedding.SUPPORTED_MODELS = []
+    CustomTextCrossEncoder.SUPPORTED_MODELS = []
 
 
 def test_text_custom_model():
@@ -65,6 +74,43 @@ def test_text_custom_model():
         delete_model_cache(model.model._model_dir)
 
 
+def test_cross_encoder_custom_model():
+    is_ci = os.getenv("CI")
+    custom_model_name = "Xenova/ms-marco-MiniLM-L-4-v2"
+    size_in_gb = 0.08
+    source = ModelSource(hf=custom_model_name)
+    canonical_vector = np.array([-5.7170815, -11.112114], dtype=np.float32)
+
+    TextCrossEncoder.add_custom_model(
+        custom_model_name,
+        model_file="onnx/model.onnx",
+        sources=source,
+        size_in_gb=size_in_gb,
+    )
+
+    assert CustomTextCrossEncoder.SUPPORTED_MODELS[0] == BaseModelDescription(
+        model=custom_model_name,
+        sources=source,
+        model_file="onnx/model.onnx",
+        description="",
+        license="",
+        size_in_GB=size_in_gb,
+    )
+
+    model = TextCrossEncoder(custom_model_name)
+    pairs = [
+        ("What is AI?", "Artificial intelligence is ..."),
+        ("What is ML?", "Machine learning is ..."),
+    ]
+    scores = list(model.rerank_pairs(pairs))
+
+    embeddings = np.stack(scores, axis=0)
+    assert embeddings.shape == (2,)
+    assert np.allclose(embeddings, canonical_vector, atol=1e-3)
+    if is_ci:
+        delete_model_cache(model.model._model_dir)
+
+
 def test_mock_add_custom_models():
     dim = 5
     size_in_gb = 0.1
@@ -156,3 +202,28 @@ def test_do_not_add_existing_model():
             dim=384,
             size_in_gb=0.47,
         )
+
+
+def test_do_not_add_existing_cross_encoder():
+    existing_base_model = "Xenova/ms-marco-MiniLM-L-6-v2"
+    custom_model_name = "Xenova/ms-marco-MiniLM-L-4-v2"
+
+    with pytest.raises(ValueError, match=f"Model {existing_base_model} is already registered"):
+        TextCrossEncoder.add_custom_model(
+            existing_base_model,
+            sources=ModelSource(hf=existing_base_model),
+            size_in_gb=0.08,
+        )
+
+    TextCrossEncoder.add_custom_model(
+        custom_model_name,
+        sources=ModelSource(hf=existing_base_model),
+        size_in_gb=0.08,
+    )
+
+    with pytest.raises(ValueError, match=f"Model {custom_model_name} is already registered"):
+        TextCrossEncoder.add_custom_model(
+            custom_model_name,
+            sources=ModelSource(hf=custom_model_name),
+            size_in_gb=0.08,
+        )