|
1 | 1 | """Utilities to support reporting""" |
2 | 2 |
|
3 | 3 | import datetime as dt |
4 | | -from itertools import chain |
5 | 4 | import json |
6 | 5 | import logging |
| 6 | +from collections.abc import Iterable |
| 7 | +from itertools import chain |
7 | 8 | from multiprocessing import Queue |
8 | 9 | from threading import Thread |
9 | | -from typing import Iterable, Iterator, Optional, Union |
| 10 | +from typing import Optional, Union |
10 | 11 |
|
11 | | -from dve.core_engine.message import UserMessage |
12 | | -from dve.core_engine.loggers import get_logger |
13 | 12 | import dve.parser.file_handling as fh |
14 | 13 | from dve.core_engine.exceptions import CriticalProcessingError |
| 14 | +from dve.core_engine.loggers import get_logger |
| 15 | +from dve.core_engine.message import UserMessage |
15 | 16 | from dve.core_engine.type_hints import URI, DVEStage, Messages |
16 | 17 |
|
17 | 18 |
|
18 | 19 | def get_feedback_errors_uri(working_folder: URI, step_name: DVEStage) -> URI: |
19 | 20 | """Determine the location of json lines file containing all errors generated in a step""" |
20 | 21 | return fh.joinuri(working_folder, "errors", f"{step_name}_errors.jsonl") |
21 | 22 |
|
| 23 | + |
22 | 24 | def get_processing_errors_uri(working_folder: URI) -> URI: |
23 | 25 | """Determine the location of json lines file containing all processing |
24 | | - errors generated from DVE run""" |
| 26 | + errors generated from DVE run""" |
25 | 27 | return fh.joinuri(working_folder, "errors", "processing_errors.jsonl") |
26 | 28 |
|
27 | 29 |
|
@@ -88,74 +90,85 @@ def dump_processing_errors( |
88 | 90 | ) |
89 | 91 |
|
90 | 92 | with fh.open_stream(error_file, "a") as f: |
91 | | - f.write("\n".join([json.dumps(rec, default=str) for rec in processed]) + "\n") |
92 | | - |
| 93 | + f.write("\n".join([json.dumps(rec, default=str) for rec in processed]) + "\n") |
| 94 | + |
93 | 95 | return error_file |
94 | 96 |
|
| 97 | + |
95 | 98 | def load_feedback_messages(feedback_messages_uri: URI) -> Iterable[UserMessage]: |
| 99 | + """Load user messages from jsonl file""" |
96 | 100 | if not fh.get_resource_exists(feedback_messages_uri): |
97 | 101 | return |
98 | 102 | with fh.open_stream(feedback_messages_uri) as errs: |
99 | 103 | yield from (UserMessage(**json.loads(err)) for err in errs.readlines()) |
100 | 104 |
|
| 105 | + |
101 | 106 | def load_all_error_messages(error_directory_uri: URI) -> Iterable[UserMessage]: |
102 | | - return chain.from_iterable([load_feedback_messages(err_file) for err_file, _ in fh.iter_prefix(error_directory_uri) if err_file.endswith(".jsonl")]) |
| 107 | + "Load user messages from all jsonl files" |
| 108 | + return chain.from_iterable( |
| 109 | + [ |
| 110 | + load_feedback_messages(err_file) |
| 111 | + for err_file, _ in fh.iter_prefix(error_directory_uri) |
| 112 | + if err_file.endswith(".jsonl") |
| 113 | + ] |
| 114 | + ) |
| 115 | + |
103 | 116 |
|
104 | 117 | class BackgroundMessageWriter: |
105 | | - def __init__(self, |
106 | | - working_directory: URI, |
107 | | - dve_stage: DVEStage, |
108 | | - key_fields: Optional[dict[str, list[str]]] = None, |
109 | | - logger: Optional[logging.Logger] = None): |
| 118 | + """Controls batch writes to error jsonl files""" |
| 119 | + |
| 120 | + def __init__( |
| 121 | + self, |
| 122 | + working_directory: URI, |
| 123 | + dve_stage: DVEStage, |
| 124 | + key_fields: Optional[dict[str, list[str]]] = None, |
| 125 | + logger: Optional[logging.Logger] = None, |
| 126 | + ): |
110 | 127 | self._working_directory = working_directory |
111 | 128 | self._dve_stage = dve_stage |
112 | | - self._feedback_message_uri = get_feedback_errors_uri(self._working_directory, self._dve_stage) |
| 129 | + self._feedback_message_uri = get_feedback_errors_uri( |
| 130 | + self._working_directory, self._dve_stage |
| 131 | + ) |
113 | 132 | self._key_fields = key_fields |
114 | 133 | self.logger = logger or get_logger(type(self).__name__) |
115 | 134 | self._write_thread = None |
116 | 135 | self._queue = Queue() |
117 | | - |
| 136 | + |
118 | 137 | @property |
119 | | - def write_queue(self): |
| 138 | + def write_queue(self) -> Queue: # type: ignore |
| 139 | + """Queue for storing batches of messages to be written""" |
120 | 140 | return self._queue |
121 | | - |
| 141 | + |
122 | 142 | @property |
123 | | - def write_thread(self): |
| 143 | + def write_thread(self) -> Thread: # type: ignore |
| 144 | + """Thread to write batches of messages to jsonl file""" |
124 | 145 | if not self._write_thread: |
125 | 146 | self._write_thread = Thread(target=self._write_process_wrapper) |
126 | 147 | return self._write_thread |
127 | | - |
128 | | - |
| 148 | + |
129 | 149 | def _write_process_wrapper(self): |
130 | 150 | """Wrapper for dump feedback errors to run in background process""" |
131 | 151 | while True: |
132 | 152 | if msgs := self.write_queue.get(): |
133 | | - dump_feedback_errors(self._working_directory, self._dve_stage, msgs, self._key_fields) |
| 153 | + dump_feedback_errors( |
| 154 | + self._working_directory, self._dve_stage, msgs, self._key_fields |
| 155 | + ) |
134 | 156 | else: |
135 | 157 | break |
136 | | - |
| 158 | + |
137 | 159 | def __enter__(self) -> "BackgroundMessageWriter": |
138 | 160 | self.write_thread.start() |
139 | 161 | return self |
140 | | - |
| 162 | + |
141 | 163 | def __exit__(self, exc_type, exc_value, traceback): |
142 | 164 | if exc_type: |
143 | 165 | self.logger.exception( |
144 | 166 | "Issue occured during background write process:", |
145 | | - exc_info=(exc_type, exc_value, traceback) |
| 167 | + exc_info=(exc_type, exc_value, traceback), |
146 | 168 | ) |
147 | 169 | self.write_queue.put(None) |
148 | 170 | self.write_thread.join() |
149 | | - |
150 | | - |
151 | | - |
152 | | -def write_process_wrapper(working_directory: URI, *, queue: Queue, key_fields: Optional[dict[str, list[str]]] = None): |
153 | | - """Wrapper for dump feedback errors to run in background process""" |
154 | | - while True: |
155 | | - if msgs := queue.get(): |
156 | | - dump_feedback_errors(fh.joinuri(working_directory, "data_contract"), msgs, key_fields) |
157 | | - else: |
158 | | - break |
| 171 | + |
159 | 172 |
|
160 | 173 | def conditional_cast(value, primary_keys: list[str], value_separator: str) -> Union[list[str], str]: |
161 | 174 | """Determines what to do with a value coming back from the error list""" |
|
0 commit comments