Skip to content

Commit ae9f3aa

Browse files
committed
style: address review comments and linting issues
1 parent a4d49b6 commit ae9f3aa

6 files changed

Lines changed: 13 additions & 17 deletions

File tree

src/dve/core_engine/backends/implementations/duckdb/reference_data.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@
1515

1616
# pylint: disable=too-few-public-methods
1717
class DuckDBRefDataLoader(BaseRefDataLoader[DuckDBPyRelation]):
18-
"""A reference data loader using already existing DuckDB tables.
19-
reference_entity_config and dataset_config_uri (if config uses relative paths)
20-
should be supplied using setter methods for the dataset being processed before running."""
18+
"""A reference data loader using already existing DuckDB tables."""
2119

2220
def __init__(
2321
self,

src/dve/core_engine/backends/implementations/spark/backend.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from dve.core_engine.backends.implementations.spark.rules import SparkStepImplementations
1313
from dve.core_engine.backends.implementations.spark.spark_helpers import get_type_from_annotation
1414
from dve.core_engine.backends.implementations.spark.types import SparkEntities
15-
from dve.core_engine.backends.types import EntityType
1615
from dve.core_engine.constants import RECORD_INDEX_COLUMN_NAME
1716
from dve.core_engine.loggers import get_child_logger, get_logger
1817
from dve.core_engine.models import SubmissionInfo
@@ -64,7 +63,7 @@ def load_reference_data(
6463
reference_data_loader = SparkRefDataLoader(
6564
spark=self.spark_session,
6665
reference_data_config=reference_entity_config,
67-
dataset_config_uri=self.dataset_config_uri, # type: ignore
66+
dataset_config_uri=self.dataset_config_uri, # type: ignore
6867
)
6968
if sub_info_entity is not None:
7069
reference_data_loader.entity_cache["dve_submission_info"] = sub_info_entity

src/dve/core_engine/backends/implementations/spark/reference_data.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@
1515

1616
# pylint: disable=too-few-public-methods
1717
class SparkRefDataLoader(BaseRefDataLoader[DataFrame]):
18-
"""A reference data loader using already existing Apache Spark Tables.
19-
reference_entity_config and dataset_config_uri (if config uses relative paths)
20-
should be supplied using setter methods for the dataset being processed before running."""
18+
"""A reference data loader using already existing Apache Spark Tables."""
2119

2220
def __init__(
2321
self,

src/dve/pipeline/duckdb_pipeline.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from duckdb import DuckDBPyConnection, DuckDBPyRelation
77

88
import dve.parser.file_handling as fh
9-
from dve.core_engine.backends.base.reference_data import BaseRefDataLoader, ReferenceConfig
9+
from dve.core_engine.backends.base.reference_data import ReferenceConfig
1010
from dve.core_engine.backends.implementations.duckdb.auditing import DDBAuditingManager
1111
from dve.core_engine.backends.implementations.duckdb.contract import DuckDBDataContract
1212
from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import duckdb_get_entity_count
@@ -47,13 +47,13 @@ def __init__(
4747
logger,
4848
)
4949

50-
def get_reference_data_loader(
50+
def init_reference_data_loader(
5151
self, reference_data_config: dict[str, ReferenceConfig], **kwargs
5252
) -> DuckDBRefDataLoader:
5353
return DuckDBRefDataLoader(
5454
connection=self._connection,
5555
reference_data_config=reference_data_config,
56-
dataset_config_uri=fh.get_parent(self._rules_path), # type: ignore
56+
dataset_config_uri=fh.get_parent(self._rules_path), # type: ignore
5757
**kwargs
5858
)
5959

src/dve/pipeline/pipeline.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ def __init__(
6868
self._submitted_files_path = submitted_files_path
6969
self._processed_files_path = processed_files_path
7070
self._rules_path = rules_path
71-
self._reference_data_loader = None
7271
self._job_run_id = job_run_id
7372
self._audit_tables = audit_tables
7473
self._data_contract = data_contract
@@ -113,7 +112,7 @@ def get_entity_count(entity: EntityType) -> int:
113112
"""Get a row count of an entity stored as parquet"""
114113
raise NotImplementedError()
115114

116-
def get_reference_data_loader(
115+
def init_reference_data_loader(
117116
self, reference_data_config: dict[EntityName, ReferenceConfig], **kwargs
118117
) -> BaseRefDataLoader:
119118
"""Get reference data loader if required for business rules"""
@@ -558,7 +557,9 @@ def apply_business_rules( # pylint: disable=R0914
558557
self._processed_files_path, submission_info.submission_id
559558
)
560559
ref_data = config.get_reference_data_config()
561-
reference_data: BaseRefDataLoader = self.get_reference_data_loader(reference_data_config=ref_data)
560+
reference_data: BaseRefDataLoader = self.init_reference_data_loader(
561+
reference_data_config=ref_data
562+
)
562563
rules = config.get_rule_metadata()
563564
entities = {}
564565
contract = fh.joinuri(

src/dve/pipeline/spark_pipeline.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pyspark.sql import DataFrame, SparkSession
88

99
import dve.parser.file_handling as fh
10-
from dve.core_engine.backends.base.reference_data import BaseRefDataLoader, ReferenceConfig
10+
from dve.core_engine.backends.base.reference_data import ReferenceConfig
1111
from dve.core_engine.backends.implementations.spark.auditing import SparkAuditingManager
1212
from dve.core_engine.backends.implementations.spark.contract import SparkDataContract
1313
from dve.core_engine.backends.implementations.spark.reference_data import SparkRefDataLoader
@@ -49,13 +49,13 @@ def __init__(
4949
logger,
5050
)
5151

52-
def get_reference_data_loader(
52+
def init_reference_data_loader(
5353
self, reference_data_config: dict[str, ReferenceConfig], **kwargs
5454
) -> SparkRefDataLoader:
5555
return SparkRefDataLoader(
5656
spark=self._spark,
5757
reference_data_config=reference_data_config,
58-
dataset_config_uri=fh.get_parent(self._rules_path), # type: ignore
58+
dataset_config_uri=fh.get_parent(self._rules_path), # type: ignore
5959
**kwargs
6060
)
6161

0 commit comments

Comments
 (0)