two-inc
diff --git a/‎emm/indexing/__init__.py‎
Lines changed: 19 additions & 14 deletions b/‎emm/indexing/__init__.py‎
Lines changed: 19 additions & 14 deletions
diff --git a/‎emm/indexing/base_indexer.py‎
Lines changed: 23 additions & 1 deletion b/‎emm/indexing/base_indexer.py‎
Lines changed: 23 additions & 1 deletion
@@ -24,7 +24,7 @@
 - PandasNaiveIndexer: Simple O(n^2) indexing for small datasets
 - PandasSortedNeighbourhoodIndexer: Sorted neighborhood indexing
 
-Optional indexers (require additional dependencies):
+Optional indexers:
 - Spark indexers (requires pyspark):
   - SparkCosSimIndexer
   - SparkCandidateSelectionEstimator
@@ -35,28 +35,33 @@
 
 from __future__ import annotations
 
+# Core indexers
 from emm.indexing.pandas_cos_sim_matcher import PandasCosSimIndexer
 from emm.indexing.pandas_naive_indexer import PandasNaiveIndexer
 from emm.indexing.pandas_sni import PandasSortedNeighbourhoodIndexer
 
 __all__ = [
-    "PandasCosSimIndexer", 
+    "PandasCosSimIndexer",
     "PandasNaiveIndexer", 
-    "PandasSortedNeighbourhoodIndexer",
+    "PandasSortedNeighbourhoodIndexer"
 ]
 
-# Feature detection for sentence transformers
-HAS_SENTENCE_TRANSFORMER = False
+# Optional sentence transformer support
 try:
-    import sentence_transformers
-    HAS_SENTENCE_TRANSFORMER = True
+    from emm.indexing.pandas_sentence_transformer import PandasSentenceTransformerIndexer
+    __all__.append("PandasSentenceTransformerIndexer")
 except ImportError:
     pass
 
-# Only import if dependencies are available
-if HAS_SENTENCE_TRANSFORMER:
-    try:
-        from emm.indexing.pandas_sentence_transformer import PandasSentenceTransformerIndexer
-        __all__.append("PandasSentenceTransformerIndexer")
-    except ImportError:
-        HAS_SENTENCE_TRANSFORMER = False
+# Optional Spark support  
+try:
+    from emm.indexing.spark_cos_sim_matcher import SparkCosSimIndexer
+    from emm.indexing.spark_candidate_selection import SparkCandidateSelectionEstimator
+    from emm.indexing.spark_sni import SparkSortedNeighbourhoodIndexer
+    __all__.extend([
+        "SparkCosSimIndexer",
+        "SparkCandidateSelectionEstimator", 
+        "SparkSortedNeighbourhoodIndexer"
+    ])
+except ImportError:
+    pass
@@ -21,6 +21,8 @@
 
 from emm.base.module import Module
 from emm.version import __version__
+from typing import List, Optional, Dict, Any
+import torch
 
 
 class BaseIndexer(Module):
@@ -49,7 +51,7 @@ def decrease_window_by_one_step(self):
 class CosSimBaseIndexer(BaseIndexer):
     """Base implementation of CosSimIndexer class"""
 
-    def __init__(self, num_candidates: int) -> None:
+    def __init__(self, num_candidates: int = 5) -> None:
         super().__init__()
         if num_candidates <= 0:
             msg = "Number of candidates should be a positive integer"
@@ -71,6 +73,26 @@ def decrease_window_by_one_step(self) -> None:
         self.num_candidates -= 1
 
 
+class SentenceTransformerBaseIndexer(BaseIndexer):
+    """Base class for sentence transformer based indexers"""
+    def __init__(
+        self,
+        model_name: str = "all-MiniLM-L6-v2",
+        device: Optional[str] = None,
+        batch_size: int = 32,
+        model_kwargs: Optional[Dict[str, Any]] = None,
+        encode_kwargs: Optional[Dict[str, Any]] = None,
+        similarity_threshold: float = 0.5,
+    ):
+        super().__init__()
+        self.model_name = model_name
+        self.device = device
+        self.batch_size = batch_size
+        self.model_kwargs = model_kwargs or {}
+        self.encode_kwargs = encode_kwargs or {}
+        self.similarity_threshold = similarity_threshold
+
+
 class SNBaseIndexer(BaseIndexer):
     """Base implementation of SN Indexer class"""