1111"""
1212Query masking utilities
1313
14- All masking functions (SqlParse, SqlFluff, SqlGlot ) reuse the already-parsed AST
14+ Masking functions (SqlParse, SqlFluff) reuse the already-parsed AST
1515from the LineageRunner to avoid duplicate parsing and improve performance.
1616"""
1717
2121
2222from cachetools import LRUCache
2323from collate_sqllineage .core .parser .sqlfluff .analyzer import SqlFluffLineageAnalyzer
24- from collate_sqllineage .core .parser .sqlglot .analyzer import SqlGlotLineageAnalyzer
2524from collate_sqllineage .core .parser .sqlparse .analyzer import SqlParseLineageAnalyzer
2625from collate_sqllineage .runner import LineageRunner
2726from sqlparse .sql import Comparison
@@ -130,6 +129,22 @@ def replace_literals(segment):
130129 return query
131130
132131
132+ @calculate_execution_time (context = "GetSqlParseLineageRunner" )
133+ def get_sqlparse_lineage_runner (query : str ) -> LineageRunner :
134+ lr_sqlparse = LineageRunner (query , analyzer = SqlParseLineageAnalyzer )
135+ len (lr_sqlparse .source_tables )
136+ return lr_sqlparse
137+
138+
139+ @calculate_execution_time (context = "GetSqlFluffLineageRunner" )
140+ def get_sqlfluff_lineage_runner (query : str , dialect : str ) -> LineageRunner :
141+ lr_sqlfluff = LineageRunner (
142+ query , dialect = dialect , analyzer = SqlFluffLineageAnalyzer
143+ )
144+ len (lr_sqlfluff .source_tables )
145+ return lr_sqlfluff
146+
147+
133148@calculate_execution_time (context = "MaskQuery" )
134149def mask_query (
135150 query : str ,
@@ -159,7 +174,8 @@ def mask_query_impl(
159174 query_hash : Optional [str ] = None ,
160175) -> Optional [str ]:
161176 """
162- Mask a query using SqlGlot, SqlFluff, or SqlParse based on the analyzer used.
177+ Mask a query using SqlParse or SqlFluff.
178+ Only these two analyzers support literal masking (SqlGlot is excluded).
163179 """
164180 hash_prefix = f"[{ query_hash } ] " if query_hash else ""
165181
@@ -170,34 +186,27 @@ def mask_query_impl(
170186 logger .debug (f"{ hash_prefix } Query masking skipped as no parser available." )
171187 return None
172188
173- masking_parser = parser
174- # Since SqlGlot generalizes query structures/syntax, we will use
175- # SqlParse for masking if SqlGlot is used for parsing
176- if parser and isinstance (parser ._analyzer , SqlGlotLineageAnalyzer ):
177- masking_parser = LineageRunner (query , analyzer = SqlParseLineageAnalyzer )
178- len (masking_parser .source_tables )
189+ masking_parser = None
190+
191+ # Only reuse parser if it's already SqlParse or SqlFluff
192+ if parser and isinstance (
193+ parser ._analyzer , (SqlParseLineageAnalyzer , SqlFluffLineageAnalyzer )
194+ ):
195+ masking_parser = parser
179196
197+ # If no suitable parser, create one with fallback: SqlParse → SqlFluff
180198 if not masking_parser :
181- # Try to create a parser with the same fallback strategy as LineageParser
182- # but since we are not using SqlGlot for masking, we skip it here.
183- # Try SqlFluff, then SqlParse
184- # TODO: Evaluate if sqlparse should be the first choice here since it is
185- # faster and almost same support as sqlfluff for masking literals.
186199 try :
187- masking_parser = LineageRunner (
188- query , dialect = dialect , analyzer = SqlFluffLineageAnalyzer
189- )
190- len (masking_parser .source_tables )
200+ masking_parser = get_sqlparse_lineage_runner (query )
191201 except Exception :
192- masking_parser = LineageRunner (query , analyzer = SqlParseLineageAnalyzer )
193- len (masking_parser .source_tables )
202+ masking_parser = get_sqlfluff_lineage_runner (query , dialect = dialect )
194203
195204 logger .debug (
196205 f"{ hash_prefix } Query masking started using [{ masking_parser ._analyzer .__class__ .__name__ } ]"
197206 f" for parser [{ parser and parser ._analyzer .__class__ .__name__ } ]"
198207 )
199208
200- # Check which analyzer was used based on _analyzer attribute
209+ # Dispatch to appropriate masking function
201210 if isinstance (masking_parser ._analyzer , SqlFluffLineageAnalyzer ):
202211 masked_query = mask_literals_with_sqlfluff (
203212 query , masking_parser , query_hash
@@ -208,7 +217,7 @@ def mask_query_impl(
208217 )
209218 else :
210219 logger .debug (
211- f"{ hash_prefix } Query masking skipped as no parser._analyzer available."
220+ f"{ hash_prefix } Query masking skipped as no supported analyzer available."
212221 f" Analyzer: { masking_parser ._analyzer } "
213222 )
214223 return None
0 commit comments