1616 get_duckdb_type_from_annotation ,
1717)
1818from dve .core_engine .backends .implementations .duckdb .types import SQLType
19- from dve .core_engine .backends .implementations . duckdb .utilities import check_csv_header_expected
19+ from dve .core_engine .backends .readers .utilities import check_csv_header_expected
2020from dve .core_engine .backends .utilities import get_polars_type_from_annotation
2121from dve .core_engine .message import FeedbackMessage
2222from dve .core_engine .type_hints import URI , EntityName
2525
2626@duckdb_write_parquet
2727class DuckDBCSVReader (BaseFileReader ):
28- """A reader for CSV files"""
28+ """A reader for CSV files including the ability to compare the passed model
29+ to the file header, if it exists.
30+
31+ field_check: flag to compare submitted file header to the accompanying pydantic model
32+ field_check_error_code: The error code to provide if the file header doesn't contain
33+ the expected fields
34+ field_check_error_message: The error message to provide if the file header doesn't contain
35+ the expected fields"""
2936
3037 # TODO - the read_to_relation should include the schema and determine whether to
3138 # TODO - stringify or not
@@ -54,14 +61,11 @@ def __init__(
5461 def perform_field_check (
5562 self , resource : URI , entity_name : str , expected_schema : type [BaseModel ]
5663 ):
64+ """Check that the header of the CSV aligns with the provided model"""
5765 if not self .header :
5866 raise ValueError ("Cannot perform field check without a CSV header" )
5967
60- if missing := check_csv_header_expected (
61- resource ,
62- expected_schema ,
63- self .delim
64- ):
68+ if missing := check_csv_header_expected (resource , expected_schema , self .delim ):
6569 raise MessageBearingError (
6670 "The CSV header doesn't match what is expected" ,
6771 messages = [
@@ -71,7 +75,7 @@ def perform_field_check(
7175 failure_type = "submission" ,
7276 error_location = "Whole File" ,
7377 error_code = self .field_check_error_code ,
74- error_message = f"{ self .field_check_error_message } - missing fields: { missing } " ,
78+ error_message = f"{ self .field_check_error_message } - missing fields: { missing } " , # pylint: disable=line-too-long
7579 )
7680 ],
7781 )
@@ -171,9 +175,14 @@ class DuckDBCSVRepeatingHeaderReader(PolarsToDuckDBCSVReader):
171175 """
172176
173177 def __init__ (
174- self , non_unique_header_error_code : Optional [str ] = "NonUniqueHeader" , * args , ** kwargs
178+ self ,
179+ * args ,
180+ non_unique_header_error_code : Optional [str ] = "NonUniqueHeader" ,
181+ non_unique_header_error_message : Optional [str ] = None ,
182+ ** kwargs ,
175183 ):
176184 self ._non_unique_header_code = non_unique_header_error_code
185+ self ._non_unique_header_message = non_unique_header_error_message
177186 super ().__init__ (* args , ** kwargs )
178187
179188 @read_function (DuckDBPyRelation )
@@ -200,6 +209,8 @@ def read_to_relation( # pylint: disable=unused-argument
200209 failure_type = "submission" ,
201210 error_message = (
202211 f"Found { no_records } distinct combination of header values."
212+ if not self ._non_unique_header_message
213+ else self ._non_unique_header_message
203214 ),
204215 error_location = entity_name ,
205216 category = "Bad file" ,
0 commit comments