99from functools import lru_cache
1010from itertools import starmap
1111from threading import Lock
12- from typing import Optional , Union
12+ from typing import Any , Optional , Union
1313from uuid import uuid4
1414
1515import polars as pl
4949)
5050
5151
52+ # pylint: disable=R0904
5253class BaseDVEPipeline :
5354 """
5455 Base class for running a DVE Pipeline either by a given step or a full e2e process.
@@ -64,6 +65,7 @@ def __init__(
6465 submitted_files_path : Optional [URI ],
6566 job_run_id : Optional [int ] = None ,
6667 logger : Optional [logging .Logger ] = None ,
68+ backend_reader_kwargs : Optional [dict [str , Any ]] = None ,
6769 ):
6870 self ._submitted_files_path = submitted_files_path
6971 self ._processed_files_path = processed_files_path
@@ -76,6 +78,7 @@ def __init__(
7678 self ._summary_lock = Lock ()
7779 self ._rec_tracking_lock = Lock ()
7880 self ._aggregates_lock = Lock ()
81+ self ._backend_reader_kwargs = backend_reader_kwargs
7982
8083 if self ._data_contract :
8184 self ._data_contract .logger = self ._logger
@@ -107,6 +110,12 @@ def step_implementations(self) -> Optional[BaseStepImplementations[EntityType]]:
107110 """The step implementations to apply the business rules to a given dataset"""
108111 return self ._step_implementations
109112
113+ @property
114+ def backend_reader_kwargs (self ) -> dict [str , Any ] | None :
115+ """Important required arguments for all readers related to the specific backend
116+ that can't be specified at time of writing config eg. duckdb connection"""
117+ return self ._backend_reader_kwargs
118+
110119 @staticmethod
111120 def get_entity_count (entity : EntityType ) -> int :
112121 """Get a row count of an entity stored as parquet"""
@@ -203,7 +212,9 @@ def write_file_to_parquet(
203212
204213 for model_name , model in models .items ():
205214 self ._logger .info (f"Transforming { model_name } to stringified parquet" )
206- reader : BaseFileReader = load_reader (dataset , model_name , ext )
215+ reader : BaseFileReader = load_reader (
216+ dataset , model_name , ext , self .backend_reader_kwargs
217+ )
207218 try :
208219 if not entity_type :
209220 reader .write_parquet (
0 commit comments