Skip to content
This repository was archived by the owner on Jan 6, 2026. It is now read-only.

Commit de8c59b

Browse files
committed
featfix(transformers): lotts
1 parent a1cb2a1 commit de8c59b

13 files changed

Lines changed: 196 additions & 67 deletions

File tree

emm/indexing/__init__.py

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -45,25 +45,18 @@
4545
"PandasSortedNeighbourhoodIndexer",
4646
]
4747

48-
# Optional Spark support
48+
# Feature detection for sentence transformers
49+
HAS_SENTENCE_TRANSFORMER = False
4950
try:
50-
import pyspark
51-
from emm.indexing.spark_cos_sim_matcher import SparkCosSimIndexer
52-
from emm.indexing.spark_candidate_selection import SparkCandidateSelectionEstimator
53-
from emm.indexing.spark_sni import SparkSortedNeighbourhoodIndexer
54-
__all__.extend([
55-
"SparkCosSimIndexer",
56-
"SparkCandidateSelectionEstimator",
57-
"SparkSortedNeighbourhoodIndexer"
58-
])
51+
import sentence_transformers
52+
HAS_SENTENCE_TRANSFORMER = True
5953
except ImportError:
6054
pass
6155

62-
# Optional Sentence Transformer support
63-
try:
64-
from emm.indexing.pandas_sentence_transformer import PandasSentenceTransformerIndexer
65-
__all__.extend([
66-
"PandasSentenceTransformerIndexer"
67-
])
68-
except ImportError:
69-
pass # Transformer features unavailable
56+
# Only import if dependencies are available
57+
if HAS_SENTENCE_TRANSFORMER:
58+
try:
59+
from emm.indexing.pandas_sentence_transformer import PandasSentenceTransformerIndexer
60+
__all__.append("PandasSentenceTransformerIndexer")
61+
except ImportError:
62+
HAS_SENTENCE_TRANSFORMER = False

emm/models/__init__.py

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,40 @@
1+
"""Optional model extensions for entity matching.
2+
3+
This module provides optional model-based features that can be enabled by installing
4+
additional dependencies:
5+
6+
1. Sentence Transformers: Install with `pip install emm[transformers]`
7+
2. Model Tuning: Install with `pip install emm[tuning]`
8+
"""
9+
110
from __future__ import annotations
211

312
__all__ = []
413

514
# Core functionality is empty - models are all optional features
615
# This makes it explicit that this module only provides optional extensions
716

8-
# Primary optional feature: Sentence Transformers
9-
try:
10-
import sentence_transformers
11-
from emm.models.sentence_transformer.base import BaseSentenceTransformerComponent
12-
__all__.append("BaseSentenceTransformerComponent")
17+
def _import_sentence_transformers():
18+
"""Helper to import sentence transformer components"""
19+
try:
20+
import sentence_transformers
21+
from emm.models.sentence_transformer.base import BaseSentenceTransformerComponent
22+
__all__.append("BaseSentenceTransformerComponent")
23+
return True
24+
except ImportError:
25+
return False
1326

14-
# Secondary optional feature: Model Tuning
15-
# Only available if sentence transformers is installed
27+
def _import_tuning():
28+
"""Helper to import tuning components"""
1629
try:
1730
import lightning
1831
import wandb
1932
from emm.models.sentence_transformer.tuning import SentenceTransformerTuner, TuningConfig
2033
__all__.extend(["SentenceTransformerTuner", "TuningConfig"])
34+
return True
2135
except ImportError:
22-
pass # Tuning features unavailable
36+
return False
2337

24-
except ImportError:
25-
pass # Transformer features unavailable
38+
# Try to import optional features
39+
HAS_TRANSFORMERS = _import_sentence_transformers()
40+
HAS_TUNING = HAS_TRANSFORMERS and _import_tuning() # Tuning requires transformers

emm/models/sentence_transformer/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Sentence transformer functionality for entity matching.
22
3-
Note: This module assumes sentence-transformers is installed.
4-
Import checks are handled at the package level in emm.models.__init__.py
3+
This module requires the sentence-transformers package.
4+
Install with: pip install emm[transformers]
55
"""
66

77
from __future__ import annotations
@@ -10,7 +10,7 @@
1010

1111
__all__ = ["BaseSentenceTransformerComponent"]
1212

13-
# Move optional imports inside try block
13+
# Import tuning if available
1414
try:
1515
from emm.models.sentence_transformer.tuning import TuningConfig, SentenceTransformerTuner
1616
__all__ += ["TuningConfig", "SentenceTransformerTuner"]

emm/models/sentence_transformer/base.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,28 @@
1+
"""Base sentence transformer component for entity matching.
2+
3+
This module requires sentence-transformers and torch.
4+
Install with: pip install emm[transformers]
5+
"""
6+
17
from __future__ import annotations
28

39
from typing import List, Dict, Optional, Any, Tuple
4-
import torch
5-
import numpy as np
6-
from sentence_transformers import SentenceTransformer, util, SimilarityFunction
10+
import logging
11+
12+
# Check required dependencies at import time
13+
try:
14+
import torch
15+
import sentence_transformers
16+
from sentence_transformers import SentenceTransformer, util, SimilarityFunction
17+
except ImportError as e:
18+
raise ImportError(
19+
"sentence-transformers and torch are required for this module. "
20+
"Install with: pip install emm[transformers]"
21+
) from e
22+
23+
# Rest of imports
724
from functools import lru_cache
25+
import numpy as np
826

927
# Single numpy typing import with fallback
1028
try:
@@ -13,6 +31,8 @@
1331
from typing import Any
1432
NDArray = Any
1533

34+
logger = logging.getLogger(__name__)
35+
1636
class BaseSentenceTransformerComponent:
1737
"""Base component for sentence transformer functionality in EMM.
1838

emm/models/sentence_transformer/examples/company_name_tuning.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,22 @@
1+
"""Example of company name tuning with sentence transformers.
2+
3+
This example requires the full tuning dependencies.
4+
Install with: pip install emm[tuning]
5+
"""
6+
17
from pathlib import Path
28
from typing import List
3-
import torch
4-
from torch.utils.data import Dataset, DataLoader
9+
10+
# Check required dependencies at import time
11+
try:
12+
import torch
13+
from torch.utils.data import Dataset, DataLoader
14+
except ImportError as e:
15+
raise ImportError(
16+
"This example requires sentence-transformers and tuning dependencies. "
17+
"Install with: pip install emm[tuning]"
18+
) from e
19+
520
from emm.models.sentence_transformer.tuning import SentenceTransformerTuner, TuningConfig
621

722
class CompanyNameDataset(Dataset):

emm/models/sentence_transformer/examples/sentence_transformer_indexing.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,18 @@
1-
"""Example script demonstrating entity matching using sentence transformers for indexing."""
1+
"""Example of entity matching using sentence transformers for indexing.
22
3-
import pandas as pd
4-
from emm import PandasEntityMatching
3+
This example requires sentence-transformers.
4+
Install with: pip install emm[transformers]
5+
"""
6+
7+
# Check required dependencies at import time
8+
try:
9+
import pandas as pd
10+
from emm import PandasEntityMatching
11+
except ImportError as e:
12+
raise ImportError(
13+
"This example requires sentence-transformers. "
14+
"Install with: pip install emm[transformers]"
15+
) from e
516

617
def main():
718
# Create sample ground truth data

emm/models/sentence_transformer/tuning/__init__.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
"""Model tuning functionality for sentence transformers.
22
3-
Note: This module assumes lightning and wandb are installed.
4-
Import checks are handled at the package level in emm.models.__init__.py
3+
This module requires additional dependencies:
4+
- sentence-transformers
5+
- lightning
6+
- wandb
7+
8+
Install with: pip install emm[tuning]
59
"""
610

711
from __future__ import annotations

emm/models/sentence_transformer/tuning/config.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,25 @@
1+
"""Configuration for sentence transformer tuning.
2+
3+
This module requires lightning and wandb.
4+
Install with: pip install emm[tuning]
5+
"""
6+
7+
from __future__ import annotations
8+
19
from dataclasses import dataclass
210
from pathlib import Path
311
from typing import Optional
412

13+
# Check required dependencies at import time
14+
try:
15+
import lightning
16+
import wandb
17+
except ImportError as e:
18+
raise ImportError(
19+
"lightning and wandb are required for tuning functionality. "
20+
"Install with: pip install emm[tuning]"
21+
) from e
22+
523
@dataclass
624
class TuningConfig:
725
"""Configuration for fine-tuning sentence transformers

emm/models/sentence_transformer/tuning/tuner.py

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,45 @@
1+
"""Tuning functionality for sentence transformers.
2+
3+
This module requires additional dependencies:
4+
- sentence-transformers
5+
- lightning
6+
- wandb
7+
8+
Install with: pip install emm[tuning]
9+
"""
10+
111
from __future__ import annotations
212

313
from typing import Optional, Dict, Any, List
414
import logging
5-
import lightning as L
6-
from sentence_transformers import SentenceTransformer, losses
7-
import torch
8-
import wandb
915
from pathlib import Path
10-
from torch.utils.data import DataLoader
11-
from torch.cuda.amp import GradScaler
1216
import numpy as np
1317

18+
# Defer imports until actually needed
19+
HAS_TUNING_DEPS = False
20+
try:
21+
import torch
22+
import lightning as L
23+
import wandb
24+
from sentence_transformers import SentenceTransformer, losses
25+
from torch.utils.data import DataLoader
26+
from torch.cuda.amp import GradScaler
27+
HAS_TUNING_DEPS = True
28+
except ImportError:
29+
pass
30+
1431
from emm.models.sentence_transformer.tuning.config import TuningConfig
1532

1633
logger = logging.getLogger(__name__)
1734

35+
def check_tuning_dependencies():
36+
"""Check if tuning dependencies are available"""
37+
if not HAS_TUNING_DEPS:
38+
raise ImportError(
39+
"sentence-transformers, torch, lightning, and wandb are required for tuning. "
40+
"Install with: pip install emm[tuning]"
41+
)
42+
1843
class SentenceTransformerTuner:
1944
"""Fine-tuning for sentence transformers specialized for company name matching"""
2045

@@ -24,6 +49,7 @@ def __init__(self, config: TuningConfig):
2449
Args:
2550
config: Tuning configuration object
2651
"""
52+
check_tuning_dependencies()
2753
self.config = config
2854

2955
# Setup Lightning Fabric for distributed training

emm/pipeline/__init__.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,24 @@
1717
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
1818
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1919

20-
from emm.helper import spark_installed
20+
"""Pipeline implementations for entity matching.
21+
22+
Core pipeline:
23+
- PandasEntityMatching: Pipeline using pandas DataFrames
24+
25+
Optional pipelines:
26+
- SparkEntityMatching (requires pyspark)
27+
"""
28+
29+
from __future__ import annotations
30+
2131
from emm.pipeline.pandas_entity_matching import PandasEntityMatching
2232

2333
__all__ = ["PandasEntityMatching"]
2434

25-
if spark_installed:
35+
# Optional Spark support
36+
try:
2637
from emm.pipeline.spark_entity_matching import SparkEntityMatching
27-
28-
__all__ += ["SparkEntityMatching"]
38+
__all__.append("SparkEntityMatching")
39+
except ImportError:
40+
pass

0 commit comments

Comments
 (0)