feat: added functionality to allow error messages in business rules to be templated

stevenhsd · stevenhsd · commit 7923a2020a50 · 2025-11-05T13:23:01.000Z
diff --git a/src/dve/core_engine/backends/implementations/duckdb/rules.py b/src/dve/core_engine/backends/implementations/duckdb/rules.py
@@ -52,6 +52,7 @@
 from dve.core_engine.constants import ROWID_COLUMN_NAME
 from dve.core_engine.functions import implementations as functions
 from dve.core_engine.message import FeedbackMessage
+from dve.core_engine.templating import template_object
 from dve.core_engine.type_hints import Messages
 
 
@@ -510,12 +511,14 @@ def notify(self, entities: DuckDBEntities, *, config: Notification) -> Messages:
             matched = matched.select(StarExpression(exclude=config.excluded_columns))
 
         for record in matched.df().to_dict(orient="records"):
+            # NOTE: only templates using values directly accessible in record - nothing nested
+            # more complex extraction done in reporting module
             messages.append(
                 FeedbackMessage(
                     entity=config.reporting.reporting_entity_override or config.entity_name,
                     record=record,  # type: ignore
                     error_location=config.reporting.legacy_location,
-                    error_message=config.reporting.message,
+                    error_message=template_object(config.reporting.message, record),
                     failure_type=config.reporting.legacy_error_type,
                     error_type=config.reporting.legacy_error_type,
                     error_code=config.reporting.code,
diff --git a/src/dve/core_engine/backends/implementations/spark/rules.py b/src/dve/core_engine/backends/implementations/spark/rules.py
@@ -45,6 +45,7 @@
 from dve.core_engine.constants import ROWID_COLUMN_NAME
 from dve.core_engine.functions import implementations as functions
 from dve.core_engine.message import FeedbackMessage
+from dve.core_engine.templating import template_object
 from dve.core_engine.type_hints import Messages
 
 
@@ -406,11 +407,13 @@ def notify(self, entities: SparkEntities, *, config: Notification) -> Messages:
 
         for record in matched.toLocalIterator():
             messages.append(
+                # NOTE: only templates using values directly accessible in record - nothing nested
+                # more complex extraction done in reporting module
                 FeedbackMessage(
                     entity=config.reporting.reporting_entity_override or config.entity_name,
                     record=record.asDict(recursive=True),
                     error_location=config.reporting.legacy_location,
-                    error_message=config.reporting.message,
+                    error_message=template_object(config.reporting.message, record.asDict(recursive=True)),
                     failure_type=config.reporting.legacy_error_type,
                     error_type=config.reporting.legacy_error_type,
                     error_code=config.reporting.code,
diff --git a/src/dve/core_engine/backends/metadata/__init__.py b/src/dve/core_engine/backends/metadata/__init__.py
@@ -10,7 +10,7 @@
 from pydantic import BaseModel
 
 from dve.core_engine.backends.metadata.contract import DataContractMetadata, ReaderConfig
-from dve.core_engine.backends.metadata.reporting import ReportingConfig, UntemplatedReportingConfig
+from dve.core_engine.backends.metadata.reporting import ReportingConfig, LegacyReportingConfig
 from dve.core_engine.backends.metadata.rules import (
     AbstractStep,
     Aggregation,
diff --git a/src/dve/core_engine/backends/metadata/reporting.py b/src/dve/core_engine/backends/metadata/reporting.py
@@ -27,7 +27,7 @@ class BaseReportingConfig(BaseModel):
 
     """
 
-    UNTEMPLATED_FIELDS: ClassVar[Set[str]] = set()
+    UNTEMPLATED_FIELDS: ClassVar[Set[str]] = {"message"}
     """Fields that should not be templated."""
 
     emit: Optional[str] = None
@@ -117,15 +117,13 @@ def template(
         else:
             variables = local_variables
         templated = template_object(self.dict(exclude=self.UNTEMPLATED_FIELDS), variables, "jinja")
+        templated.update(self.dict(include=self.UNTEMPLATED_FIELDS))
         return type_(**templated)
 
 
 class ReportingConfig(BaseReportingConfig):
     """A base model defining the 'final' reporting config for a message."""
 
-    UNTEMPLATED_FIELDS: ClassVar[Set[str]] = {"message"}
-    """Fields that should not be templated."""
-
     emit: ErrorEmitValue = "record_failure"
     category: ErrorCategory = "Bad value"
 
@@ -246,7 +244,7 @@ def get_location_value(
         return self.get_location_selector()(record)
 
 
-class UntemplatedReportingConfig(BaseReportingConfig):
+class LegacyReportingConfig(BaseReportingConfig):
     """An untemplated reporting config. This _must_ be templated prior to use.
 
     This class also enables the conversion of deprecated fields to their
@@ -356,7 +354,8 @@ def template(
         else:
             variables = local_variables
 
-        templated = template_object(self.dict(), variables, "jinja")
+        templated = template_object(self.dict(exclude=self.UNTEMPLATED_FIELDS), variables, "jinja")
+        templated.update(self.dict(include=self.UNTEMPLATED_FIELDS))
         error_location = templated.pop("legacy_location")
         reporting_field = templated.pop("legacy_reporting_field")
         if templated.get("location") is None:
diff --git a/src/dve/core_engine/backends/metadata/rules.py b/src/dve/core_engine/backends/metadata/rules.py
@@ -20,7 +20,7 @@
 from typing_extensions import Literal
 
 from dve.core_engine.backends.base.reference_data import ReferenceConfigUnion
-from dve.core_engine.backends.metadata.reporting import ReportingConfig, UntemplatedReportingConfig
+from dve.core_engine.backends.metadata.reporting import ReportingConfig, LegacyReportingConfig
 from dve.core_engine.templating import template_object
 from dve.core_engine.type_hints import (
     Alias,
@@ -234,7 +234,7 @@ class DeferredFilter(AbstractStep):
     removed from the source entity if the reporting level is a record-level error.
 
     """
-    reporting: Union[ReportingConfig, UntemplatedReportingConfig]
+    reporting: Union[ReportingConfig, LegacyReportingConfig]
     """The reporting information for the filter."""
 
     def template(
diff --git a/src/dve/core_engine/configuration/v1/filters.py b/src/dve/core_engine/configuration/v1/filters.py
@@ -4,7 +4,7 @@
 
 from pydantic import BaseModel, Field
 
-from dve.core_engine.backends.metadata.reporting import UntemplatedReportingConfig
+from dve.core_engine.backends.metadata.reporting import LegacyReportingConfig
 from dve.core_engine.backends.metadata.rules import AbstractStep, DeferredFilter
 from dve.core_engine.type_hints import ErrorCategory
 
@@ -27,7 +27,7 @@ class ConcreteFilterConfig(BaseModel):
 
     def to_step(self) -> AbstractStep:
         """Create a deferred filter from the concrete filter config."""
-        reporting = UntemplatedReportingConfig(
+        reporting = LegacyReportingConfig(
             code=self.error_code,
             message=self.failure_message,
             category=self.category,
diff --git a/tests/features/movies.feature b/tests/features/movies.feature
@@ -1,44 +1,46 @@
 Feature: Pipeline tests using the movies dataset
-    Tests for the processing framework which use the movies dataset.
+        Tests for the processing framework which use the movies dataset.
 
-    This tests submissions in JSON format, with configuration in JSON config files.
-    Complex types are tested (arrays, nested structs)
+        This tests submissions in JSON format, with configuration in JSON config files.
+        Complex types are tested (arrays, nested structs)
 
-    Some validation of entity attributes is performed: SQL expressions and Python filter
-    functions are used, and templatable business rules feature in the transformations.
+        Some validation of entity attributes is performed: SQL expressions and Python filter
+        functions are used, and templatable business rules feature in the transformations.
 
         Scenario: Validate and filter movies (spark)
-        Given I submit the movies file movies.json for processing
-        And A spark pipeline is configured
-        And I create the following reference data tables in the database movies_refdata
-            | table_name | parquet_path                                         |
-            | sequels    | tests/testdata/movies/refdata/movies_sequels.parquet |
-        And I add initial audit entries for the submission
-        Then the latest audit record for the submission is marked with processing status file_transformation
-        When I run the file transformation phase
-        Then the movies entity is stored as a parquet after the file_transformation phase
-        And the latest audit record for the submission is marked with processing status data_contract
-        When I run the data contract phase
-        Then there are 3 record rejections from the data_contract phase
-        And there are errors with the following details and associated error_count from the data_contract phase
-            | ErrorCode | ErrorMessage                              | error_count |
-            | BLANKYEAR | year not provided                         | 1           |
-            | DODGYYEAR | year value (NOT_A_NUMBER) is invalid      | 1           |
-            | DODGYDATE | date_joined value is not valid: daft_date | 1           |
-        And the movies entity is stored as a parquet after the data_contract phase
-        And the latest audit record for the submission is marked with processing status business_rules
-        When I run the business rules phase
-        Then The rules restrict "movies" to 4 qualifying records
-        And At least one row from "movies" has generated error code "LIMITED_RATINGS"
-        And At least one row from "derived" has generated error code "RUBBISH_SEQUEL"
-        And the latest audit record for the submission is marked with processing status error_report
-        When I run the error report phase
-        Then An error report is produced
-        And The statistics entry for the submission shows the following information
-            | parameter                | value |
-            | record_count             | 5     |
-            | number_record_rejections | 4     |
-            | number_warnings          | 1     |
+            Given I submit the movies file movies.json for processing
+            And A spark pipeline is configured
+            And I create the following reference data tables in the database movies_refdata
+                | table_name | parquet_path                                         |
+                | sequels    | tests/testdata/movies/refdata/movies_sequels.parquet |
+            And I add initial audit entries for the submission
+            Then the latest audit record for the submission is marked with processing status file_transformation
+            When I run the file transformation phase
+            Then the movies entity is stored as a parquet after the file_transformation phase
+            And the latest audit record for the submission is marked with processing status data_contract
+            When I run the data contract phase
+            Then there are 3 record rejections from the data_contract phase
+            And there are errors with the following details and associated error_count from the data_contract phase
+                | ErrorCode | ErrorMessage                              | error_count |
+                | BLANKYEAR | year not provided                         | 1           |
+                | DODGYYEAR | year value (NOT_A_NUMBER) is invalid      | 1           |
+                | DODGYDATE | date_joined value is not valid: daft_date | 1           |
+            And the movies entity is stored as a parquet after the data_contract phase
+            And the latest audit record for the submission is marked with processing status business_rules
+            When I run the business rules phase
+            Then The rules restrict "movies" to 4 qualifying records
+            And there are errors with the following details and associated error_count from the business_rules phase
+                | ErrorCode       | ErrorMessage                                           | error_count |
+                | LIMITED_RATINGS | Movie has too few ratings ([6.1])                      | 1           |
+                | RUBBISH_SEQUEL  | The movie The Greatest Movie Ever has a rubbish sequel | 1           |
+            And the latest audit record for the submission is marked with processing status error_report
+            When I run the error report phase
+            Then An error report is produced
+            And The statistics entry for the submission shows the following information
+                | parameter                | value |
+                | record_count             | 5     |
+                | number_record_rejections | 4     |
+                | number_warnings          | 1     |
 
     Scenario: Validate and filter movies (duckdb)
         Given I submit the movies file movies.json for processing
@@ -62,8 +64,10 @@ Feature: Pipeline tests using the movies dataset
         And the latest audit record for the submission is marked with processing status business_rules
         When I run the business rules phase
         Then The rules restrict "movies" to 4 qualifying records
-        And At least one row from "movies" has generated error code "LIMITED_RATINGS"
-        And At least one row from "derived" has generated error code "RUBBISH_SEQUEL"
+        And there are errors with the following details and associated error_count from the business_rules phase
+            | ErrorCode       | ErrorMessage                                           | error_count |
+            | LIMITED_RATINGS | Movie has too few ratings ([6.1])                      | 1           |
+            | RUBBISH_SEQUEL  | The movie The Greatest Movie Ever has a rubbish sequel | 1           |
         And the latest audit record for the submission is marked with processing status error_report
         When I run the error report phase
         Then An error report is produced
diff --git a/tests/testdata/movies/movies_ddb_rule_store.json b/tests/testdata/movies/movies_ddb_rule_store.json
@@ -23,7 +23,7 @@
                     "expression": "no_of_ratings > 1",
                     "error_code": "LIMITED_RATINGS",
                     "reporting_field": "title",
-                    "failure_message": "Movie has too few ratings"
+                    "failure_message": "Movie has too few ratings ({{ratings}})"
                 }
             ],
             "post_filter_rules": [
@@ -76,7 +76,7 @@
                     "error_code": "RUBBISH_SEQUEL",
                     "reporting_entity": "derived",
                     "reporting_field": "title",
-                    "failure_message": "Movie has rubbish sequel",
+                    "failure_message": "The movie {{title}} has a rubbish sequel",
                     "is_informational": true
                 }
             ],
diff --git a/tests/testdata/movies/movies_spark_rule_store.json b/tests/testdata/movies/movies_spark_rule_store.json
@@ -23,7 +23,7 @@
                     "expression": "no_of_ratings > 1",
                     "error_code": "LIMITED_RATINGS",
                     "reporting_field": "title",
-                    "failure_message": "Movie has too few ratings"
+                    "failure_message": "Movie has too few ratings ({{ratings}})"
                 }
             ],
             "post_filter_rules": [
@@ -87,7 +87,7 @@
                     "error_code": "RUBBISH_SEQUEL",
                     "reporting_entity": "derived",
                     "reporting_field": "title",
-                    "failure_message": "Movie has rubbish sequel",
+                    "failure_message": "The movie {{title}} has a rubbish sequel",
                     "is_informational": true
                 }
             ],