88from typing_extensions import Annotated , Literal
99
1010from dve .core_engine .backends .base .core import get_entity_type
11- from dve .core_engine .backends .exceptions import MissingRefDataEntity
11+ from dve .core_engine .backends .exceptions import MissingRefDataEntity , RefdataLacksFileExtensionSupport
1212from dve .core_engine .backends .types import EntityType
13- from dve .core_engine .type_hints import EntityName
13+ from dve .core_engine .type_hints import URI , EntityName
14+ import dve .parser .file_handling as fh
15+ from dve .parser .file_handling .implementations .file import LocalFilesystemImplementation
16+ from dve .parser .file_handling .service import _get_implementation
17+
18+ _FILE_EXTENSION_NAME : str = "_REFDATA_FILE_EXTENSION"
19+ """Name of attribute added to methods where they relate
20+ to loading a particular reference file type."""
21+
22+ def mark_refdata_file_extension (file_extension ):
23+ """Mark a method for loading a particular file extension"""
24+ def wrapper (func : Callable ):
25+ setattr (func , _FILE_EXTENSION_NAME , file_extension )
26+ return func
27+ return wrapper
1428
1529
1630class ReferenceTable (BaseModel , frozen = True ):
@@ -37,7 +51,10 @@ class ReferenceFile(BaseModel, frozen=True):
3751 type : Literal ["filename" ]
3852 """The object type."""
3953 filename : str
40- """The path to the reference data (as Parquet) relative to the contract."""
54+ """The path to the reference data relative to the contract."""
55+ @property
56+ def file_extension (self ) -> str :
57+ return fh .get_file_suffix (self .filename )
4158
4259
4360class ReferenceURI (BaseModel , frozen = True ):
@@ -47,6 +64,9 @@ class ReferenceURI(BaseModel, frozen=True):
4764 """The object type."""
4865 uri : str
4966 """The absolute URI of the reference data (as Parquet)."""
67+ @property
68+ def file_extension (self ) -> str :
69+ return fh .get_file_suffix (self .uri )
5070
5171
5272ReferenceConfig = Union [ReferenceFile , ReferenceTable , ReferenceURI ]
@@ -71,6 +91,12 @@ class BaseRefDataLoader(Generic[EntityType], Mapping[EntityName, EntityType], AB
7191 A mapping between refdata config types and functions to call to load these configs
7292 into reference data entities
7393 """
94+
95+ __reader_functions__ : ClassVar [dict [str , Callable ]] = {}
96+ """
97+ A mapping between file extensions and functions to load the file uris
98+ into reference data entities
99+ """
74100 prefix : str = "refdata_"
75101
76102 def __init_subclass__ (cls , * _ , ** __ ) -> None :
@@ -91,20 +117,29 @@ class variable for the subclass.
91117 method = getattr (cls , method_name , None )
92118 if method is None or not callable (method ):
93119 continue
120+
121+ if ext := getattr (method , _FILE_EXTENSION_NAME , None ):
122+ cls .__reader_functions__ [ext ] = method
123+ continue
94124
95125 type_hints = get_type_hints (method )
96126 if set (type_hints .keys ()) != {"config" , "return" }:
97127 continue
98128 config_type = type_hints ["config" ]
99129 if not issubclass (config_type , BaseModel ):
100130 continue
131+
101132 cls .__step_functions__ [config_type ] = method # type: ignore
102133
103134 # pylint: disable=unused-argument
104135 def __init__ (
105- self , reference_entity_config : dict [EntityName , ReferenceConfig ], ** kwargs
136+ self ,
137+ reference_entity_config : dict [EntityName , ReferenceConfig ],
138+ dataset_config_uri : Optional [URI ] = None ,
139+ ** kwargs
106140 ) -> None :
107141 self .reference_entity_config = reference_entity_config
142+ self .dataset_config_uri = dataset_config_uri
108143 """
109144 Configuration options for the reference data. This is likely to vary
110145 from backend to backend (e.g. might be locations and file types for
@@ -119,15 +154,30 @@ def load_table(self, config: ReferenceTable) -> EntityType:
119154 """Load reference entity from a database table"""
120155 raise NotImplementedError ()
121156
122- @abstractmethod
123157 def load_file (self , config : ReferenceFile ) -> EntityType :
124158 "Load reference entity from a relative file path"
125- raise NotImplementedError ()
159+ if not self .dataset_config_uri :
160+ raise AttributeError ("dataset_config_uri must be specified if using relative paths" )
161+ target_location = fh .build_relative_uri (self .dataset_config_uri , config .filename )
162+ if isinstance (_get_implementation (self .dataset_config_uri ), LocalFilesystemImplementation ):
163+ target_location = fh .file_uri_to_local_path (target_location ).as_posix ()
164+ try :
165+ impl = self .__reader_functions__ [config .file_extension ]
166+ return impl (self , target_location )
167+ except KeyError :
168+ raise RefdataLacksFileExtensionSupport (file_extension = config .file_extension )
126169
127- @abstractmethod
128170 def load_uri (self , config : ReferenceURI ) -> EntityType :
129171 "Load reference entity from an absolute URI"
130- raise NotImplementedError ()
172+ if isinstance (_get_implementation (config .uri ), LocalFilesystemImplementation ):
173+ target_location = fh .file_uri_to_local_path (config .uri ).as_posix ()
174+ else :
175+ target_location = config .uri
176+ try :
177+ impl = self .__reader_functions__ [config .file_extension ]
178+ return impl (self , target_location )
179+ except KeyError :
180+ raise RefdataLacksFileExtensionSupport (file_extension = config .file_extension )
131181
132182 def load_entity (self , entity_name : EntityName , config : ReferenceConfig ) -> EntityType :
133183 """Load a reference entity given the reference config"""
0 commit comments