Skip to content

Commit 8805b90

Browse files
committed
feat: further refinement of movies test dataset.
1 parent 4499041 commit 8805b90

8 files changed

Lines changed: 52 additions & 128 deletions

File tree

src/dve/core_engine/constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,7 @@
22

33
ROWID_COLUMN_NAME: str = "__rowid__"
44
"""The name of the column containing the row ID for each entity."""
5+
6+
CONTRACT_ERROR_VALUE_FIELD_NAME: str = "__error_value"
7+
"""The name of the field that can be used to extract the field value that caused
8+
a pydantic validation error"""

src/dve/core_engine/message.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@
1111
from pydantic import ValidationError, validator, BaseModel
1212
from pydantic.dataclasses import dataclass
1313

14-
from dve.core_engine.constants import ROWID_COLUMN_NAME
14+
from dve.core_engine.constants import (
15+
CONTRACT_ERROR_VALUE_FIELD_NAME,
16+
ROWID_COLUMN_NAME
17+
)
1518
from dve.core_engine.templating import ENVIRONMENT, template_object
1619
from dve.core_engine.type_hints import (
1720
EntityName,
@@ -24,6 +27,8 @@
2427
)
2528
from dve.parser.type_hints import FieldName
2629

30+
31+
2732
class DataContractErrorDetail(BaseModel):
2833
error_code: str
2934
error_message: Optional[str] = None
@@ -37,7 +42,9 @@ def template_message(self,
3742
def extract_error_value(records, error_location):
3843
_records = copy.copy(records)
3944
try:
40-
_records["__error_value"] = reduce(operator.getitem, error_location, _records)
45+
_records[CONTRACT_ERROR_VALUE_FIELD_NAME] = reduce(operator.getitem,
46+
error_location,
47+
_records)
4148
except KeyError:
4249
pass
4350
return _records

tests/features/movies.feature

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,11 @@ Feature: Pipeline tests using the movies dataset
1616
Then the movies entity is stored as a parquet after the file_transformation phase
1717
And the latest audit record for the submission is marked with processing status data_contract
1818
When I run the data contract phase
19-
Then there are 2 record rejections from the data_contract phase
19+
Then there are 3 record rejections from the data_contract phase
2020
And there are errors with the following details and associated error_count from the data_contract phase
21-
| ErrorCode | ErrorMessage | error_count |
22-
| BLANKYEAR | year not provided | 1 |
23-
| DODGYYEAR | year value (NOT_A_NUMBER) is invalid | 1 |
21+
| ErrorCode | ErrorMessage | error_count |
22+
| BLANKYEAR | year not provided | 1 |
23+
| DODGYYEAR | year value (NOT_A_NUMBER) is invalid | 1 |
24+
| DODGYDATE | date_joined value is not valid: daft_date | 1 |
2425
And the movies entity is stored as a parquet after the data_contract phase
2526
And the latest audit record for the submission is marked with processing status business_rules

tests/features/steps/steps_pipeline.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -186,9 +186,6 @@ def check_error_record_details_from_service(context: Context, service:str):
186186
filter_expr, error_count = err_details
187187
assert message_df.filter(filter_expr).shape[0] == error_count
188188

189-
190-
191-
192189

193190
@given("A {implementation} pipeline is configured")
194191
@given("A {implementation} pipeline is configured with schema file '{schema_file_name}'")

tests/test_core_engine/test_backends/test_implementations/test_spark/test_audit_spark.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ def test_dve_audit_using_thread_pool(spark_audit_manager_threaded: SparkAuditing
194194
_sub_info.submission_id = uuid4().hex
195195
aud.add_new_submissions([_sub_info])
196196
while not aud.queue.empty():
197-
time.sleep(2)
197+
time.sleep(0.5)
198198
assert _sub_info.submission_id
199199

200200
at_entry = (
@@ -206,7 +206,7 @@ def test_dve_audit_using_thread_pool(spark_audit_manager_threaded: SparkAuditing
206206
assert len(at_entry) == 1
207207
aud.mark_transform([_sub_info.submission_id])
208208
while not aud.queue.empty():
209-
time.sleep(2)
209+
time.sleep(0.5)
210210

211211
file_trans = aud.get_all_file_transformation_submissions()
212212
assert [rw.submission_id for rw in file_trans.collect()] == [_sub_info.submission_id]

tests/testdata/movies/movies.dischema.json

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,11 @@
11
{
22
"contract": {
33
"schemas": {
4-
"ratings": {
5-
"fields": {
6-
"IMDb": "NonNegativeFloat",
7-
"RottenTomatoes": "str"
8-
9-
}
10-
},
114
"cast": {
125
"fields": {
136
"name": "str",
14-
"role": "str"
7+
"role": "str",
8+
"date_joined": "date"
159
}
1610
}
1711
},
@@ -27,8 +21,8 @@
2721
},
2822
"duration_minutes": "int",
2923
"ratings": {
30-
"model": "ratings",
31-
"is_array": false
24+
"type": "NonNegativeFloat",
25+
"is_array": true
3226
},
3327
"cast": {
3428
"model": "cast",

tests/testdata/movies/movies.json

Lines changed: 22 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -1,141 +1,56 @@
11
[
22
{
3-
"title": "The Great Adventure",
3+
"title": "The Greatest Movie Ever",
44
"year": "NOT_A_NUMBER",
55
"genre": ["Adventure", "Family"],
66
"duration_minutes": 102,
7-
"ratings": {
8-
"IMDb": 7.4,
9-
"RottenTomatoes": "85%"
10-
},
7+
"ratings": [ 7.4, 8.5 ],
118
"cast": [
12-
{ "name": "Emma Stone", "role": "Mom" },
13-
{ "name": "Jacob Tremblay", "role": "Son" }
9+
{ "name": "A. Star", "role": "Mom", "date_joined": "2019-03-17" },
10+
{ "name": "C. Actor", "role": "Son", "date_joined": "daft_date" }
1411
]
1512
},
1613
{
17-
"title": "Magical Zoo Escape",
14+
"title": "Not a great one",
1815
"genre": ["Animation", "Comedy", "Family"],
1916
"duration_minutes": 88,
20-
"ratings": {
21-
"IMDb": 6.9,
22-
"RottenTomatoes": "78%"
23-
},
17+
"ratings": [6.9, 7.8],
2418
"cast": [
25-
{ "name": "Kevin Hart", "role": "Leo the Lion (voice)" },
26-
{ "name": "Kristen Bell", "role": "Ellie the Elephant (voice)" }
19+
{ "name": "J. Smith", "role": "Lion", "date_joined": "2015-11-12" },
20+
{ "name": "T. Car", "role": "Mouse", "date_joined": "2015-11-12" }
2721
]
2822
},
2923
{
30-
"title": "My Robot Brother",
24+
"title": "Good family movie",
3125
"year": 2022,
3226
"genre": ["Sci-Fi", "Family"],
3327
"duration_minutes": 95,
34-
"ratings": {
35-
"IMDb": 7.1,
36-
"RottenTomatoes": "81%"
37-
},
28+
"ratings": [7, 8.2, 6.3],
3829
"cast": [
39-
{ "name": "Noah Jupe", "role": "Max" },
40-
{ "name": "Gaten Matarazzo", "role": "Robo" }
30+
{ "name": "D. Farnesbarnes", "role": "Robot" },
31+
{ "name": "G. Adams", "role": "Alien", "date_joined": "2017-08-01" }
4132
]
4233
},
4334
{
44-
"title": "Grandpa's Magical Train",
35+
"title": "One with a cat and a dog",
4536
"year": 2020,
4637
"genre": ["Fantasy", "Family"],
4738
"duration_minutes": 110,
48-
"ratings": {
49-
"IMDb": 7.8,
50-
"RottenTomatoes": "88%"
51-
},
39+
"ratings": [6.1],
5240
"cast": [
53-
{ "name": "Tom Hanks", "role": "Grandpa George" },
54-
{ "name": "Millie Bobby Brown", "role": "Emily" }
41+
{ "name": "R. Williams", "role": "Cat", "date_joined": "2016-05-06" },
42+
{ "name": "T. Brown", "role": "Dog", "date_joined": "2016-05-07" }
5543
]
5644
},
5745
{
58-
"title": "Camp Mystery",
59-
"year": 2018,
46+
"title": "A bad 'un",
47+
"year": 2011,
6048
"genre": ["Mystery", "Family"],
6149
"duration_minutes": 97,
62-
"ratings": {
63-
"IMDb": 6.5,
64-
"RottenTomatoes": "70%"
65-
},
50+
"ratings": [1.2, 3.4, 5.6, 3.4],
6651
"cast": [
67-
{ "name": "Finn Wolfhard", "role": "Detective Dan" },
68-
{ "name": "McKenna Grace", "role": "Lily" }
69-
]
70-
},
71-
{
72-
"title": "The Secret Sandwich Club",
73-
"year": 2021,
74-
"genre": ["Comedy", "Family"],
75-
"duration_minutes": 89,
76-
"ratings": {
77-
"IMDb": 7.2,
78-
"RottenTomatoes": "83%"
79-
},
80-
"cast": [
81-
{ "name": "Jack Black", "role": "Chef Max" },
82-
{ "name": "Brooklynn Prince", "role": "Sally" }
83-
]
84-
},
85-
{
86-
"title": "Space Pals",
87-
"year": 2020,
88-
"genre": ["Animation", "Adventure", "Family"],
89-
"duration_minutes": 91,
90-
"ratings": {
91-
"IMDb": 6.8,
92-
"RottenTomatoes": "75%"
93-
},
94-
"cast": [
95-
{ "name": "John Cena", "role": "Buzz the Dog (voice)" },
96-
{ "name": "Zendaya", "role": "Luna the Cat (voice)" }
97-
]
98-
},
99-
{
100-
"title": "Treasure in the Treehouse",
101-
"year": 2017,
102-
"genre": ["Adventure", "Family"],
103-
"duration_minutes": 93,
104-
"ratings": {
105-
"IMDb": 7.0,
106-
"RottenTomatoes": "79%"
107-
},
108-
"cast": [
109-
{ "name": "Isabela Merced", "role": "Tina" },
110-
{ "name": "Noah Schnapp", "role": "Jamie" }
111-
]
112-
},
113-
{
114-
"title": "Pet Squad Heroes",
115-
"year": 2023,
116-
"genre": ["Animation", "Comedy", "Family"],
117-
"duration_minutes": 86,
118-
"ratings": {
119-
"IMDb": 6.7,
120-
"RottenTomatoes": "73%"
121-
},
122-
"cast": [
123-
{ "name": "Awkwafina", "role": "Penny the Poodle (voice)" },
124-
{ "name": "Josh Gad", "role": "Rocky the Raccoon (voice)" }
125-
]
126-
},
127-
{
128-
"title": "Holiday Magic",
129-
"year": 2022,
130-
"genre": ["Fantasy", "Family", "Holiday"],
131-
"duration_minutes": 99,
132-
"ratings": {
133-
"IMDb": 7.5,
134-
"RottenTomatoes": "90%"
135-
},
136-
"cast": [
137-
{ "name": "Octavia Spencer", "role": "Mrs. Claus" },
138-
{ "name": "Jaden Smith", "role": "Eli" }
52+
{ "name": "R. Green", "role": "Baby", "date_joined": "2013-11-12" },
53+
{ "name": "P. Plum", "role": "Dad", "date_joined": "2013-10-08" }
13954
]
14055
}
141-
]
56+
]

tests/testdata/movies/movies_contract_error_details.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,11 @@
1414
"error_code": "DODGYYEAR",
1515
"error_message": "year value ({{year}}) is invalid"
1616
}
17+
},
18+
"cast.date_joined": {
19+
"Bad value": {
20+
"error_code": "DODGYDATE",
21+
"error_message": "date_joined value is not valid: {{__error_value}}"
22+
}
1723
}
1824
}

0 commit comments

Comments
 (0)