Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .mise.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[tools]
python="3.11"
python="3.12"
poetry="2.3.3"
java="liberica-1.8.0"
678 changes: 312 additions & 366 deletions poetry.lock

Large diffs are not rendered by default.

113 changes: 0 additions & 113 deletions pylint_checkers/check_typing_imports.py

This file was deleted.

45 changes: 21 additions & 24 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,21 @@ packages = [
]

[tool.poetry.dependencies]
python = ">=3.10,<3.12"
python = ">=3.10,<3.13" # breaking changes beyond 3.12
boto3 = ">=1.34.162,<1.36" # breaking change beyond 1.36
botocore = ">=1.34.162,<1.36" # breaking change beyond 1.36
delta-spark = "2.4.*"
duckdb = "1.1.*" # breaking changes beyond 1.1
Jinja2 = "3.1.*"
lxml = "^4.9.1"
duckdb = "1.1.3" # breaking changes beyond 1.1
Jinja2 = "3.1.6"
lxml = "4.9.4"
numpy = "1.26.4"
openpyxl = "^3.1"
pandas = "^2.2.2"
polars = "0.20.*"
pyarrow = "^17.0.0"
pydantic = "1.10.16"
openpyxl = "3.1.5"
pandas = "2.3.3"
polars = "0.20.31"
pyarrow = "17.0.0"
pydantic = "1.10.19"
pyspark = "3.4.*"
typing_extensions = "^4.6.2"
typing_extensions = "4.15.0"

[tool.poetry.group.dev]
optional = true
Expand All @@ -58,30 +58,29 @@ commitizen = "4.9.1"
pre-commit = "4.3.0"
charset-normalizer = "3.4.6"
python-discovery = "1.2.0"
requests = "2.33.0"

[tool.poetry.group.test]
optional = true

[tool.poetry.group.test.dependencies]
faker = "18.11.1"
behave = "1.3.3"
coverage = "7.11.0"
moto = {extras = ["s3"], version = "4.0.13"}
moto = {extras = ["s3"], version = "4.2.14"}
requests = "2.33.0" # dependency of `moto`
Werkzeug = "3.1.6"
pytest = "8.4.2"
pytest-lazy-fixtures = "1.4.0" # switched from https://github.com/TvoroG/pytest-lazy-fixture as it's no longer supported
xlsx2csv = "0.8.2"
xlsx2csv = "0.8.4" # polars requirement

[tool.poetry.group.lint]
optional = true

[tool.poetry.group.lint.dependencies]
black = "24.3.0"
astroid = "2.14.2"
isort = "5.11.5"
pylint = "2.16.4"
mypy = "0.991"
astroid = "3.3.9"
isort = "5.13.2"
pylint = "3.3.9"
mypy = "1.11.2"
boto3-stubs = {extras = ["essential"], version = "1.26.72"}
botocore-stubs = "1.29.72"
pandas-stubs = "1.2.0.62"
Expand All @@ -100,7 +99,7 @@ optional = true

[tool.poetry.group.docs.dependencies]
click = "8.2.1"
mkdocs = "^1.6.1"
mkdocs = "1.6.1"
mkdocstrings = { version = "1.0.3", extras = ["python"] }
griffelib = "2.0.1"
pymdown-extensions = "10.21.2"
Expand Down Expand Up @@ -142,10 +141,6 @@ source_pkgs = [
[tool.coverage.report]
show_missing = true

[tool.pylint]
init-hook = "import sys; sys.path.append('./pylint_checkers')"
load-plugins = "check_typing_imports"

[tool.pylint.main]
extension-pkg-allow-list = ["pyspark", "lxml", "pydantic"]
fail-under = 10.0
Expand Down Expand Up @@ -194,7 +189,7 @@ max-statements = 50
min-public-methods = 2

[tool.pylint.exceptions]
overgeneral-exceptions = ["BaseException", "Exception"]
overgeneral-exceptions = ["builtins.BaseException", "builtins.Exception"]

[tool.pylint.format]
ignore-long-lines = "^\\s*(# )?<?https?://\\S+>?$"
Expand Down Expand Up @@ -229,6 +224,8 @@ disable = [
"use-symbolic-message-instead",
"logging-fstring-interpolation",
"fixme",
"too-many-positional-arguments",
"too-many-arguments",
]
enable = ["c-extension-no-member"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def _add_cnst_field(rel: DuckDBPyRelation) -> tuple[str, DuckDBPyRelation]:
group_pl = entity.pl().pivot(
columns=[config.pivot_column],
values=agg_cols,
index=(group_cols or [const_fld]),
index=(group_cols or [const_fld]), # pylint: disable=E0606
aggregate_function=config.agg_function,
)
if const_fld in group_pl.columns:
Expand Down
4 changes: 2 additions & 2 deletions src/dve/core_engine/backends/metadata/contract.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ def schemas(self) -> dict[EntityName, type[BaseModel]]:
"""The per-entity schemas, as pydantic models."""
if not self._schemas:
for entity_name, validator in self.validators.items():
self._schemas[entity_name] = validator.model # type: ignore
return self._schemas.copy()
self._schemas[entity_name] = validator.model # type: ignore # pylint: disable=E1137
return self._schemas.copy() # pylint: disable=E1101

@root_validator(allow_reuse=True)
@classmethod
Expand Down
2 changes: 1 addition & 1 deletion src/dve/core_engine/backends/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def stringify_type(type_: Union[type, GenericAlias]) -> type:

origin = get_origin(type_)
if origin is None: # A non-generic container type, return as-is
return type_
return type_ # type: ignore

type_args = get_args(type_)
if not type_args:
Expand Down
18 changes: 9 additions & 9 deletions src/dve/core_engine/configuration/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

FieldName = str
"""The name of a field within a model/schema."""
TypeOrDef = Union[
TypeOrDef = Union[ # pylint: disable=C0103
TypeName, "_CallableTypeDefinition", "_ModelTypeDefinition", "_TypeAliasDefinition"
]
"""The name or definition of a type."""
Expand Down Expand Up @@ -181,7 +181,7 @@ class V1EngineConfig(BaseEngineConfig):
@validate_arguments
def _update_rule_store(self, rule_store: dict[RuleName, BusinessComponentSpecConfigUnion]):
"""Update the rule store rules to add/override the rules from the new store."""
self._rule_store_rules.update(rule_store)
self._rule_store_rules.update(rule_store) # pylint: disable=E1101

def _load_rule_store(self, uri: URI):
"""Load a JSON rule store from the provided URI and update the stored
Expand All @@ -198,7 +198,7 @@ def _load_rule_store(self, uri: URI):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
uri_prefix = self.location.rsplit("/", 1)[0]
for rule_store_config in self.transformations.rule_stores:
for rule_store_config in self.transformations.rule_stores: # pylint: disable=E1101
uri = joinuri(uri_prefix, rule_store_config.filename)
self._load_rule_store(uri)

Expand Down Expand Up @@ -281,7 +281,7 @@ def _load_rules_and_vars(self) -> tuple[list[Rule], list[TemplateVariables]]:
rules, local_variable_list = [], []
added_rules: set[RuleName] = set()

for index, complex_rule_config in enumerate(self.transformations.complex_rules):
for index, complex_rule_config in enumerate(self.transformations.complex_rules): # pylint: disable=E1101
rule, local_params, deps = self._resolve_business_rule(complex_rule_config)
missing_rules = deps - added_rules
if missing_rules:
Expand All @@ -295,9 +295,9 @@ def _load_rules_and_vars(self) -> tuple[list[Rule], list[TemplateVariables]]:

rule, local_params = self._create_rule(
name="root",
rules=self.transformations.rules,
filters=self.transformations.filters,
post_filter_rules=self.transformations.post_filter_rules,
rules=self.transformations.rules, # pylint: disable=E1101
filters=self.transformations.filters, # pylint: disable=E1101
post_filter_rules=self.transformations.post_filter_rules, # pylint: disable=E1101
)
rules.append(rule)
local_variable_list.append(local_params)
Expand Down Expand Up @@ -338,14 +338,14 @@ def load_error_message_info(self, uri):

def get_reference_data_config(self) -> dict[EntityName, ReferenceConfig]: # type: ignore
"""Gets the reference data configuration from the transformations"""
return self.transformations.reference_data
return self.transformations.reference_data # pylint: disable=E1101

def get_rule_metadata(self) -> RuleMetadata:
"""Gets the rule metadata from the Engine configuration"""
rules, local_variables = self._load_rules_and_vars()
return RuleMetadata(
rules=rules,
local_variables=local_variables,
global_variables=self.transformations.parameters,
global_variables=self.transformations.parameters, # pylint: disable=E1101
reference_data_config=self.get_reference_data_config(),
)
20 changes: 10 additions & 10 deletions src/dve/core_engine/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def build(
debug=debug,
**kwargs,
)
self.main_log.info(f"Output path: {self.output_prefix_uri!r}")
self.main_log.info(f"Output path: {self.output_prefix_uri!r}") # pylint: disable=E1101
return self

@classmethod
Expand All @@ -155,13 +155,13 @@ def build_from_model(cls, model_str: JSONstring):
return cls.build(**EngineRunValidation(**json.loads(model_str)).dict())

def __enter__(self) -> "CoreEngine":
self.main_log.info("Entering pipeline context.")
self.main_log.info("Entering pipeline context.") # pylint: disable=E1101
if self._cache_dir is not None:
raise ValueError("Pipeline already within context")

self._cache_dir = TemporaryPrefix(self.cache_prefix_uri)
self._cache_dir.__enter__()
self.main_log.info(f"Pipeline will cache to {self.cache_prefix!r}")
self.main_log.info(f"Pipeline will cache to {self.cache_prefix!r}") # pylint: disable=E1101
return self

def __exit__(
Expand All @@ -170,14 +170,14 @@ def __exit__(
exc_value: Optional[Exception],
traceback: Optional[TracebackType],
) -> None:
self.main_log.info(f"Exiting pipeline context, clearing {self.cache_prefix!r}")
self.main_log.info(f"Exiting pipeline context, clearing {self.cache_prefix!r}") # pylint: disable=E1101
cache_dir = self._cache_dir
self._cache_dir = None

if cache_dir is not None:
cache_dir.__exit__(exc_type, exc_value, traceback)

self.main_log.info("Cleared cache.")
self.main_log.info("Cleared cache.") # pylint: disable=E1101

@property
def cache_prefix(self) -> URI:
Expand All @@ -198,17 +198,17 @@ def _write_entity_outputs(self, entities: SparkEntities) -> SparkEntities:
"""
output_entities = {}

self.main_log.info(f"Writing entities to the output location: {self.output_prefix_uri}")
self.main_log.info(f"Writing entities to the output location: {self.output_prefix_uri}") # pylint: disable=E1101
for entity_name, entity in entities.items():
entity = entity.drop(RECORD_INDEX_COLUMN_NAME)

self.main_log.info(f"Entity: {entity_name} {type(entity)}")
self.main_log.info(f"Entity: {entity_name} {type(entity)}") # pylint: disable=E1101

output_uri = joinuri(self.output_prefix_uri, entity_name)
if get_resource_exists(output_uri):
self.main_log.info(f"{output_uri} already exists - will be overwritten")
self.main_log.info(f"{output_uri} already exists - will be overwritten") # pylint: disable=E1101

self.main_log.info(f"+ Writing parquet output to {output_uri!r}")
self.main_log.info(f"+ Writing parquet output to {output_uri!r}") # pylint: disable=E1101
entity.write.mode("overwrite").parquet(output_uri)
spark_session = SparkSession.builder.getOrCreate()
output_entities[entity_name] = spark_session.read.format("parquet").load(
Expand All @@ -228,7 +228,7 @@ def _write_outputs(self, entities: SparkEntities) -> SparkEntities:

def _show_available_entities(self, entities: SparkEntities, *, verbose: bool = False) -> None:
"""Print current entities."""
self.main_log.info("Displaying available dataframes in this run:")
self.main_log.info("Displaying available dataframes in this run:") # pylint: disable=E1101

for entity_name, entity in entities.items():
# FIXME: Currently a print statement because log messages
Expand Down
Loading
Loading