Skip to content

Commit 6631baa

Browse files
Merge pull request #51 from NHSDigital/release_v062
release: v0.6.2 (#50)
2 parents 82bcdbf + aa3a266 commit 6631baa

10 files changed

Lines changed: 98 additions & 19 deletions

File tree

src/dve/core_engine/backends/base/contract.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ def read_raw_entities(
339339
reader_metadata = contract_metadata.reader_metadata[entity_name]
340340
extension = "." + (
341341
get_file_suffix(resource) or ""
342-
) # Already checked that extension supported.
342+
).lower() # Already checked that extension supported.
343343

344344
reader_config = reader_metadata[extension]
345345
reader_type = get_reader(reader_config.reader)

src/dve/core_engine/backends/implementations/duckdb/reference_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,4 @@ def load_parquet_file(self, uri: str) -> DuckDBPyRelation:
4646
@mark_refdata_file_extension("arrow")
4747
def load_arrow_file(self, uri: str) -> DuckDBPyRelation:
4848
"""Load an arrow ipc file into a duckdb relation"""
49-
return self.connection.from_arrow(ipc.open_file(uri).read_all()) # type:ignore
49+
return self.connection.from_arrow(ipc.open_stream(uri).read_all()) # type:ignore

src/dve/metadata_parser/domain_types.py

Lines changed: 49 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -173,33 +173,67 @@ def permissive_nhs_number(warn_on_test_numbers: bool = False):
173173
return type("NHSNumber", (NHSNumber, *NHSNumber.__bases__), dict_)
174174

175175

176-
# TODO: Make the spacing configurable. Not all downstream consumers want a single space
177176
class Postcode(types.ConstrainedStr):
178177
"""Postcode constrained string"""
179178

180179
regex: re.Pattern = POSTCODE_REGEX
181180
strip_whitespace = True
181+
apply_normalize = True
182182

183183
@staticmethod
184-
def normalize(postcode: str) -> Optional[str]:
184+
def normalize(_postcode: str) -> Optional[str]:
185185
"""Strips internal and external spaces"""
186-
postcode = postcode.replace(" ", "")
187-
if not postcode or postcode.lower() in NULL_POSTCODES:
186+
_postcode = _postcode.replace(" ", "")
187+
if not _postcode or _postcode.lower() in NULL_POSTCODES:
188188
return None
189-
postcode = postcode.replace(" ", "")
190-
return " ".join((postcode[0:-3], postcode[-3:])).upper()
189+
_postcode = _postcode.replace(" ", "")
190+
return " ".join((_postcode[0:-3], _postcode[-3:])).upper()
191191

192192
@classmethod
193193
def validate(cls, value: str) -> Optional[str]: # type: ignore
194194
"""Validates the given postcode"""
195-
stripped = cls.normalize(value)
196-
if not stripped:
195+
if cls.apply_normalize and value:
196+
value = cls.normalize(value) # type: ignore
197+
198+
if not value:
197199
return None
198200

199-
if not cls.regex.match(stripped):
201+
if not cls.regex.match(value):
200202
raise ValueError("Invalid Postcode submitted")
201203

202-
return stripped
204+
return value
205+
206+
207+
@lru_cache()
208+
@validate_arguments
209+
def postcode(
210+
# pylint: disable=R0913
211+
strip_whitespace: Optional[bool] = True,
212+
to_upper: Optional[bool] = False,
213+
to_lower: Optional[bool] = False,
214+
strict: Optional[bool] = False,
215+
min_length: Optional[int] = None,
216+
max_length: Optional[int] = None,
217+
curtail_length: Optional[int] = None,
218+
regex: Optional[str] = POSTCODE_REGEX, # type: ignore
219+
apply_normalize: Optional[bool] = True,
220+
) -> type[Postcode]:
221+
"""Return a formatted date class with a set date format
222+
and timezone treatment.
223+
224+
"""
225+
dict_ = Postcode.__dict__.copy()
226+
dict_["strip_whitespace"] = strip_whitespace
227+
dict_["to_upper"] = to_upper
228+
dict_["to_lower"] = to_lower
229+
dict_["strict"] = strict
230+
dict_["min_length"] = min_length
231+
dict_["max_length"] = max_length
232+
dict_["curtail_length"] = curtail_length
233+
dict_["regex"] = regex
234+
dict_["apply_normalize"] = apply_normalize
235+
236+
return type("Postcode", (Postcode, *Postcode.__bases__), dict_)
203237

204238

205239
class OrgID(_SimpleRegexValidator):
@@ -482,6 +516,11 @@ def validate(cls, value: Union[dt.time, dt.datetime, str]) -> dt.time | None:
482516

483517
return new_time
484518

519+
@classmethod
520+
def __get_validators__(cls) -> Iterator[classmethod]:
521+
"""Gets all validators"""
522+
yield cls.validate # type: ignore
523+
485524

486525
@lru_cache()
487526
@validate_arguments

src/dve/pipeline/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def load_config(
4747

4848
def load_reader(dataset: Dataset, model_name: str, file_extension: str):
4949
"""Loads the readers for the diven feed, model name and file extension"""
50-
reader_config = dataset[model_name].reader_config[f".{file_extension}"]
50+
reader_config = dataset[model_name].reader_config[f".{file_extension.lower()}"]
5151
reader = _READER_REGISTRY[reader_config.reader](**reader_config.kwargs_)
5252
return reader
5353

tests/features/books.feature

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Feature: Pipeline tests using the books dataset
55
introduces more complex transformations that require aggregation.
66

77
Scenario: Validate complex nested XML data (spark)
8-
Given I submit the books file nested_books.xml for processing
8+
Given I submit the books file nested_books.XML for processing
99
And A spark pipeline is configured with schema file 'nested_books.dischema.json'
1010
And I add initial audit entries for the submission
1111
Then the latest audit record for the submission is marked with processing status file_transformation
@@ -32,7 +32,7 @@ Feature: Pipeline tests using the books dataset
3232
| number_warnings | 0 |
3333

3434
Scenario: Validate complex nested XML data (duckdb)
35-
Given I submit the books file nested_books.xml for processing
35+
Given I submit the books file nested_books.XML for processing
3636
And A duckdb pipeline is configured with schema file 'nested_books_ddb.dischema.json'
3737
And I add initial audit entries for the submission
3838
Then the latest audit record for the submission is marked with processing status file_transformation

tests/test_core_engine/test_backends/test_implementations/test_duckdb/test_data_contract.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def test_duckdb_data_contract_csv(temp_csv_file):
5050
"description": "test",
5151
"callable": "formattedtime",
5252
"constraints": {
53-
"time_format": "%Y-%m-%d",
53+
"time_format": "%H:%M:%S",
5454
"timezone_treatment": "forbid"
5555
}
5656
}

tests/test_core_engine/test_engine.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ def test_dummy_books_run(self, spark, temp_dir: str):
9999
with test_instance:
100100
_, errors_uri = test_instance.run_pipeline(
101101
entity_locations={
102-
"header": get_test_file_path("books/nested_books.xml").as_posix(),
103-
"nested_books": get_test_file_path("books/nested_books.xml").as_posix(),
102+
"header": get_test_file_path("books/nested_books.XML").as_posix(),
103+
"nested_books": get_test_file_path("books/nested_books.XML").as_posix(),
104104
}
105105
)
106106

tests/test_model_generation/test_domain_types.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,24 @@ def test_postcode(postcode, expected):
9898
assert model.postcode == expected
9999

100100

101+
@pytest.mark.parametrize(
102+
("postcode", "should_error"),
103+
[
104+
("LS479AJ", True),
105+
("PostcodeIamNot", True),
106+
("LS47 9AJ", False)
107+
]
108+
)
109+
def test_postcode_errors_with_apply_normalize_disabled(postcode: str, should_error: bool):
110+
postcode_type = hct.postcode(apply_normalize=False)
111+
112+
if should_error:
113+
with pytest.raises(ValueError, match="Invalid Postcode submitted"):
114+
assert postcode_type.validate(postcode)
115+
else:
116+
assert postcode_type.validate(postcode)
117+
118+
101119
@pytest.mark.parametrize(("org_id", "expected"), [("AB123", "AB123"), ("ABCDE", "ABCDE")])
102120
def test_org_id_passes(org_id, expected):
103121
model = ATestModel(org_id=org_id)
@@ -347,7 +365,8 @@ def test_formattedtime(
347365
["23:00:00", "%H:%M:%S", "require",],
348366
["23:00:00Z", "%I:%M:%S", "forbid",],
349367
[dt.datetime(2025, 12, 1, 13, 0, 5, tzinfo=UTC), "%H:%M:%S", "forbid",],
350-
[dt.time(13, 0, 5, tzinfo=UTC), "%H:%M:%S", "forbid",]
368+
[dt.time(13, 0, 5, tzinfo=UTC), "%H:%M:%S", "forbid",],
369+
["12:00", "%H:%M:%S", "forbid",],
351370
]
352371
)
353372
def test_formattedtime_raises(
@@ -360,3 +379,24 @@ def test_formattedtime_raises(
360379
time_type = hct.formattedtime(time_format, timezone_treatment)
361380
with pytest.raises(ValueError):
362381
time_type.validate(time_to_validate) # pylint: disable=W0106
382+
383+
384+
class StrictTimeModel(BaseModel):
385+
time_val: hct.formattedtime(time_format="%H:%M:%S", timezone_treatment="forbid")
386+
387+
388+
@pytest.mark.parametrize(
389+
["time_to_validate", "expected_to_error"],
390+
[
391+
("12:00:00", False),
392+
("120000", True),
393+
("12:00", True),
394+
("12", True),
395+
]
396+
)
397+
def test_formattedtime_against_model(time_to_validate: str, expected_to_error: bool):
398+
if expected_to_error:
399+
with pytest.raises(ValueError):
400+
StrictTimeModel(time_val=time_to_validate)
401+
else:
402+
StrictTimeModel(time_val=time_to_validate)
-82 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)