Skip to content

Commit a0ec92d

Browse files
committed
style: sort formating, linting and static typing
1 parent bf170f4 commit a0ec92d

22 files changed

Lines changed: 111 additions & 65 deletions

File tree

src/dve/core_engine/backends/base/contract.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -337,9 +337,9 @@ def read_raw_entities(
337337
successful = True
338338
for entity_name, resource in entity_locations.items():
339339
reader_metadata = contract_metadata.reader_metadata[entity_name]
340-
extension = "." + (
341-
get_file_suffix(resource) or ""
342-
).lower() # Already checked that extension supported.
340+
extension = (
341+
"." + (get_file_suffix(resource) or "").lower()
342+
) # Already checked that extension supported.
343343

344344
reader_config = reader_metadata[extension]
345345
reader_type = get_reader(reader_config.reader)
@@ -368,11 +368,11 @@ def read_raw_entities(
368368
messages.extend(new_messages)
369369

370370
return entities, dedup_messages(messages), successful
371-
371+
372372
def add_record_index(self, entity: EntityType, **kwargs) -> EntityType:
373373
"""Add a record index to the entity"""
374374
raise NotImplementedError(f"add_record_index not implemented in {self.__class__}")
375-
375+
376376
def drop_record_index(self, entity: EntityType, **kwargs) -> EntityType:
377377
"""Drop a record index from the entity"""
378378
raise NotImplementedError(f"drop_record_index not implemented in {self.__class__}")

src/dve/core_engine/backends/base/reader.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,11 @@ def read_to_entity_type(
126126
raise ReaderLacksEntityTypeSupport(entity_type=entity_type) from err
127127

128128
return reader_func(self, resource, entity_name, schema)
129-
129+
130130
def add_record_index(self, entity: EntityType, **kwargs) -> EntityType:
131131
"""Add a record index to the entity"""
132132
raise NotImplementedError(f"add_record_index not implemented in {self.__class__}")
133-
133+
134134
def drop_record_index(self, entity: EntityType, **kwargs) -> EntityType:
135135
"""Drop a record index to the entity"""
136136
raise NotImplementedError(f"drop_record_index not implemented in {self.__class__}")

src/dve/core_engine/backends/base/utilities.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,6 @@
1212
from dve.core_engine.type_hints import ExpressionArray, MultiExpression
1313
from dve.parser.type_hints import URI
1414

15-
import polars as pl
16-
17-
from dve.core_engine.constants import RECORD_INDEX_COLUMN_NAME
18-
1915
BRACKETS = {"(": ")", "{": "}", "[": "]", "<": ">"}
2016
"""A mapping of opening brackets to their closing counterpart."""
2117
STRING_START_CHARS = {'"', "'"}

src/dve/core_engine/backends/implementations/duckdb/contract.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from pydantic import BaseModel
1717
from pydantic.fields import ModelField
1818

19-
from dve.core_engine.constants import RECORD_INDEX_COLUMN_NAME
2019
import dve.parser.file_handling as fh
2120
from dve.common.error_utils import (
2221
BackgroundMessageWriter,
@@ -39,6 +38,7 @@
3938
from dve.core_engine.backends.metadata.contract import DataContractMetadata
4039
from dve.core_engine.backends.types import StageSuccessful
4140
from dve.core_engine.backends.utilities import get_polars_type_from_annotation, stringify_model
41+
from dve.core_engine.constants import RECORD_INDEX_COLUMN_NAME
4242
from dve.core_engine.message import FeedbackMessage
4343
from dve.core_engine.type_hints import URI, EntityLocations
4444
from dve.core_engine.validation import RowValidator, apply_row_validator_helper
@@ -55,6 +55,7 @@ def __call__(self, row: pd.Series):
5555
self.errors.extend(self.row_validator(row.to_dict())[1]) # type: ignore
5656
return row # no op
5757

58+
5859
@duckdb_record_index
5960
@duckdb_write_parquet
6061
@duckdb_read_parquet
@@ -173,7 +174,7 @@ def apply_data_contract(
173174
msg_count += len(msgs)
174175

175176
self.logger.info(f"Data contract found {msg_count} issues in {entity_name}")
176-
177+
177178
if not RECORD_INDEX_COLUMN_NAME in relation.columns:
178179
relation = self.add_record_index(relation)
179180

src/dve/core_engine/backends/implementations/duckdb/duckdb_helpers.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -288,18 +288,28 @@ def duckdb_rel_to_dictionaries(
288288
while rows := entity.fetchmany(batch_size):
289289
yield from (dict(zip(cols, rw)) for rw in rows)
290290

291-
def _add_duckdb_record_index(self, entity: DuckDBPyRelation) -> DuckDBPyRelation:
291+
292+
def _add_duckdb_record_index(
293+
self, entity: DuckDBPyRelation # pylint: disable=W0613
294+
) -> DuckDBPyRelation:
295+
"""Add record index to duckdb relation"""
292296
if RECORD_INDEX_COLUMN_NAME in entity.columns:
293297
return entity
294298

295299
return entity.select(f"*, row_number() OVER () as {RECORD_INDEX_COLUMN_NAME}")
296300

297-
def _drop_duckdb_record_index(self, entity: DuckDBPyRelation) -> DuckDBPyRelation:
301+
302+
def _drop_duckdb_record_index(
303+
self, entity: DuckDBPyRelation # pylint: disable=W0613
304+
) -> DuckDBPyRelation:
305+
"""Drop record index from duckdb relation"""
298306
if RECORD_INDEX_COLUMN_NAME not in entity.columns:
299307
return entity
300308
return entity.select(StarExpression(exclude=[RECORD_INDEX_COLUMN_NAME]))
301309

310+
302311
def duckdb_record_index(cls):
312+
"""Class decorator to add record index methods for duckdb implementations"""
303313
setattr(cls, "add_record_index", _add_duckdb_record_index)
304314
setattr(cls, "drop_record_index", _drop_duckdb_record_index)
305315
return cls

src/dve/core_engine/backends/implementations/duckdb/readers/csv.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,13 @@
66

77
import duckdb as ddb
88
import polars as pl
9-
from duckdb import DuckDBPyConnection, DuckDBPyRelation, StarExpression, default_connection, read_csv
9+
from duckdb import (
10+
DuckDBPyConnection,
11+
DuckDBPyRelation,
12+
StarExpression,
13+
default_connection,
14+
read_csv,
15+
)
1016
from pydantic import BaseModel
1117

1218
from dve.core_engine.backends.base.reader import BaseFileReader, read_function
@@ -24,6 +30,7 @@
2430
from dve.core_engine.type_hints import URI, EntityName
2531
from dve.parser.file_handling import get_content_length
2632

33+
2734
@duckdb_record_index
2835
@duckdb_write_parquet
2936
class DuckDBCSVReader(BaseFileReader):
@@ -113,6 +120,7 @@ def read_to_relation( # pylint: disable=unused-argument
113120
reader_options["columns"] = ddb_schema
114121
return self.add_record_index(read_csv(resource, **reader_options, parallel=False))
115122

123+
116124
@polars_record_index
117125
class PolarsToDuckDBCSVReader(DuckDBCSVReader):
118126
"""
@@ -144,11 +152,14 @@ def read_to_relation( # pylint: disable=unused-argument
144152
for fld in schema.__fields__.values()
145153
}
146154
reader_options["dtypes"] = polars_types
147-
148155

149156
# there is a raise_if_empty arg for 0.18+. Future reference when upgrading. Makes L85
150157
# redundant
151-
df = self.add_record_index(pl.scan_csv(resource, **reader_options).select(list(polars_types.keys()))) # type: ignore # pylint: disable=W0612
158+
df = self.add_record_index( # pylint: disable=W0612
159+
pl.scan_csv(resource, **reader_options).select( # type: ignore
160+
list(polars_types.keys())
161+
)
162+
)
152163

153164
return ddb.sql("SELECT * FROM df")
154165

@@ -192,7 +203,9 @@ def __init__(
192203
def read_to_relation( # pylint: disable=unused-argument
193204
self, resource: URI, entity_name: EntityName, schema: type[BaseModel]
194205
) -> DuckDBPyRelation:
195-
entity: DuckDBPyRelation = super().read_to_relation(resource=resource, entity_name=entity_name, schema=schema)
206+
entity: DuckDBPyRelation = super().read_to_relation(
207+
resource=resource, entity_name=entity_name, schema=schema
208+
)
196209
entity = entity.select(StarExpression(exclude=[RECORD_INDEX_COLUMN_NAME])).distinct()
197210
no_records = entity.shape[0]
198211

src/dve/core_engine/backends/implementations/duckdb/readers/json.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,14 @@
99

1010
from dve.core_engine.backends.base.reader import BaseFileReader, read_function
1111
from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import (
12+
duckdb_record_index,
1213
duckdb_write_parquet,
1314
get_duckdb_type_from_annotation,
1415
)
1516
from dve.core_engine.backends.implementations.duckdb.types import SQLType
16-
from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import duckdb_record_index
1717
from dve.core_engine.type_hints import URI, EntityName
1818

19+
1920
@duckdb_record_index
2021
@duckdb_write_parquet
2122
class DuckDBJSONReader(BaseFileReader):
@@ -48,4 +49,6 @@ def read_to_relation( # pylint: disable=unused-argument
4849
for fld in schema.__fields__.values()
4950
}
5051

51-
return self.add_record_index(read_json(resource, columns=ddb_schema, format=self._json_format)) # type: ignore
52+
return self.add_record_index(
53+
read_json(resource, columns=ddb_schema, format=self._json_format) # type: ignore
54+
)

src/dve/core_engine/backends/implementations/duckdb/readers/xml.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,14 @@
1111
from dve.core_engine.backends.exceptions import MessageBearingError
1212
from dve.core_engine.backends.implementations.duckdb.duckdb_helpers import duckdb_write_parquet
1313
from dve.core_engine.backends.readers.xml import XMLStreamReader
14-
from dve.core_engine.backends.utilities import get_polars_type_from_annotation, polars_record_index, stringify_model
14+
from dve.core_engine.backends.utilities import (
15+
get_polars_type_from_annotation,
16+
polars_record_index,
17+
stringify_model,
18+
)
1519
from dve.core_engine.type_hints import URI
1620

21+
1722
@polars_record_index
1823
@duckdb_write_parquet
1924
class DuckDBXMLStreamReader(XMLStreamReader):
@@ -39,7 +44,9 @@ def read_to_relation(self, resource: URI, entity_name: str, schema: type[BaseMod
3944
for fld in stringify_model(schema).__fields__.values()
4045
}
4146

42-
_lazy_frame = self.add_record_index(pl.LazyFrame(
43-
data=self.read_to_py_iterator(resource, entity_name, schema), schema=polars_schema
44-
))
47+
_lazy_frame = self.add_record_index(
48+
pl.LazyFrame(
49+
data=self.read_to_py_iterator(resource, entity_name, schema), schema=polars_schema
50+
)
51+
)
4552
return self.ddb_connection.sql("select * from _lazy_frame")

src/dve/core_engine/backends/implementations/duckdb/rules.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
from dve.core_engine.templating import template_object
5858
from dve.core_engine.type_hints import Messages
5959

60+
6061
@duckdb_record_index
6162
@duckdb_write_parquet
6263
@duckdb_read_parquet

src/dve/core_engine/backends/implementations/spark/contract.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
dump_processing_errors,
1818
get_feedback_errors_uri,
1919
)
20-
2120
from dve.core_engine.backends.base.contract import BaseDataContract, reader_override
2221
from dve.core_engine.backends.base.utilities import generate_error_casting_entity_message
2322
from dve.core_engine.backends.exceptions import (
@@ -42,6 +41,7 @@
4241
COMPLEX_TYPES: set[type[DataType]] = {StructType, ArrayType, MapType}
4342
"""Spark types indicating complex types."""
4443

44+
4545
@spark_record_index
4646
@spark_write_parquet
4747
@spark_read_parquet
@@ -86,6 +86,7 @@ def create_entity_from_py_iterator(
8686
schema=get_type_from_annotation(schema),
8787
)
8888

89+
# pylint: disable=R0915
8990
def apply_data_contract(
9091
self,
9192
working_dir: URI,

0 commit comments

Comments
 (0)