|
16 | 16 | from unittest import TestCase |
17 | 17 |
|
18 | 18 | import pytest |
| 19 | +from collate_sqllineage.core.models import Location |
| 20 | +from collate_sqllineage.core.models import Table as LineageTable |
19 | 21 |
|
20 | 22 | from metadata.generated.schema.entity.data.table import Table |
21 | 23 | from metadata.ingestion.lineage.models import Dialect |
@@ -335,3 +337,130 @@ def test_copy_to_statements_filtered(self): |
335 | 337 | result, |
336 | 338 | f"Query should be filtered: {query}", |
337 | 339 | ) |
| 340 | + |
| 341 | + # ------------------------------------------------------------------------- |
| 342 | + # Snowflake Stage Lineage Tests |
| 343 | + # ------------------------------------------------------------------------- |
| 344 | + |
| 345 | + def test_copy_into_stage_from_table_not_filtered(self): |
| 346 | + """ |
| 347 | + Test that Snowflake COPY INTO @stage FROM table (unload) statements |
| 348 | + are NOT filtered out, as they provide lineage from tables to stages. |
| 349 | + """ |
| 350 | + snowflake_unload_queries = [ |
| 351 | + "COPY INTO @my_stage FROM my_table", |
| 352 | + "COPY INTO @db.schema.stage FROM (SELECT * FROM t)", |
| 353 | + "copy into @stage/path FROM table1", |
| 354 | + "COPY INTO @~/ FROM my_table FILE_FORMAT = (TYPE = CSV COMPRESSION = GZIP)", |
| 355 | + "COPY INTO @~/staged FROM sales_data", |
| 356 | + "COPY INTO @my_stage/daily/2024/ FROM reporting.public.daily_metrics", |
| 357 | + "COPY INTO @external_stage/path/ FROM (SELECT col1 FROM src_table WHERE id > 100)", |
| 358 | + ] |
| 359 | + |
| 360 | + for query in snowflake_unload_queries: |
| 361 | + result = LineageParser.clean_raw_query(query) |
| 362 | + self.assertIsNotNone( |
| 363 | + result, |
| 364 | + f"COPY INTO @stage FROM table should NOT be filtered: {query}", |
| 365 | + ) |
| 366 | + |
| 367 | + def test_stage_lineage_source_as_location_type(self): |
| 368 | + """ |
| 369 | + Verify that COPY INTO table FROM @stage returns Location as source |
| 370 | + and Table as target with correct types. |
| 371 | + """ |
| 372 | + query = "COPY INTO wine_quality FROM @demo FILE_FORMAT = wine_csv_format;" |
| 373 | + parser = LineageParser(query, dialect=Dialect.SNOWFLAKE) |
| 374 | + |
| 375 | + self.assertEqual(len(parser.source_tables), 1) |
| 376 | + self.assertEqual(len(parser.target_tables), 1) |
| 377 | + self.assertIsInstance(parser.source_tables[0], Location) |
| 378 | + self.assertNotIsInstance(parser.source_tables[0], LineageTable) |
| 379 | + |
| 380 | + def test_stage_lineage_target_as_location_type(self): |
| 381 | + """ |
| 382 | + Verify that COPY INTO @stage FROM table returns Table as source |
| 383 | + and Location as target with correct types. |
| 384 | + """ |
| 385 | + query = "COPY INTO @my_stage FROM my_table" |
| 386 | + parser = LineageParser(query, dialect=Dialect.SNOWFLAKE) |
| 387 | + |
| 388 | + self.assertEqual(len(parser.source_tables), 1) |
| 389 | + self.assertEqual(len(parser.target_tables), 1) |
| 390 | + self.assertNotIsInstance(parser.target_tables[0], LineageTable) |
| 391 | + self.assertIsInstance(parser.target_tables[0], Location) |
| 392 | + |
| 393 | + def test_stage_lineage_fully_qualified_names(self): |
| 394 | + """ |
| 395 | + Test stage lineage with fully qualified database.schema.stage names |
| 396 | + for both source and target directions. |
| 397 | + """ |
| 398 | + # Stage as source (loading data into table) |
| 399 | + query_load = "COPY INTO db.schema.target_table FROM @db.schema.my_stage" |
| 400 | + parser_load = LineageParser(query_load, dialect=Dialect.SNOWFLAKE) |
| 401 | + |
| 402 | + self.assertEqual(len(parser_load.source_tables), 1) |
| 403 | + self.assertEqual(len(parser_load.target_tables), 1) |
| 404 | + self.assertIsInstance(parser_load.source_tables[0], Location) |
| 405 | + self.assertEqual(str(parser_load.source_tables[0]), "db.schema.my_stage") |
| 406 | + self.assertEqual(str(parser_load.target_tables[0]), "db.schema.target_table") |
| 407 | + |
| 408 | + # Stage as target (unloading data from table) |
| 409 | + query_unload = "COPY INTO @db.schema.my_stage FROM db.schema.source_table" |
| 410 | + parser_unload = LineageParser(query_unload, dialect=Dialect.SNOWFLAKE) |
| 411 | + |
| 412 | + self.assertEqual(len(parser_unload.source_tables), 1) |
| 413 | + self.assertEqual(len(parser_unload.target_tables), 1) |
| 414 | + self.assertIsInstance(parser_unload.target_tables[0], Location) |
| 415 | + self.assertEqual(str(parser_unload.source_tables[0]), "db.schema.source_table") |
| 416 | + self.assertEqual(str(parser_unload.target_tables[0]), "db.schema.my_stage") |
| 417 | + |
| 418 | + def test_stage_lineage_unload_with_select_subquery(self): |
| 419 | + """ |
| 420 | + Test COPY INTO @stage FROM (SELECT ...) extracts the underlying |
| 421 | + source table correctly from the subquery. |
| 422 | + """ |
| 423 | + query = ( |
| 424 | + "COPY INTO @external_stage/path/ FROM " |
| 425 | + "(SELECT col1, col2 FROM db.schema.source_table WHERE id > 100)" |
| 426 | + ) |
| 427 | + parser = LineageParser(query, dialect=Dialect.SNOWFLAKE) |
| 428 | + |
| 429 | + self.assertEqual(len(parser.source_tables), 1) |
| 430 | + self.assertEqual(len(parser.target_tables), 1) |
| 431 | + self.assertEqual(str(parser.source_tables[0]), "db.schema.source_table") |
| 432 | + self.assertIsInstance(parser.target_tables[0], Location) |
| 433 | + |
| 434 | + def test_stage_lineage_user_stage(self): |
| 435 | + """ |
| 436 | + Test COPY INTO with user stage (@~/) is properly handled. |
| 437 | + """ |
| 438 | + query = "COPY INTO @~/ FROM my_table FILE_FORMAT = (TYPE = CSV)" |
| 439 | + parser = LineageParser(query, dialect=Dialect.SNOWFLAKE) |
| 440 | + |
| 441 | + self.assertEqual(len(parser.source_tables), 1) |
| 442 | + self.assertEqual(len(parser.target_tables), 1) |
| 443 | + self.assertIsInstance(parser.source_tables[0], LineageTable) |
| 444 | + self.assertIsInstance(parser.target_tables[0], Location) |
| 445 | + |
| 446 | + def test_stage_lineage_with_file_format_options(self): |
| 447 | + """ |
| 448 | + Test that file format options don't interfere with lineage parsing. |
| 449 | + """ |
| 450 | + queries = [ |
| 451 | + "COPY INTO my_table FROM @stage FILE_FORMAT = (TYPE = CSV SKIP_HEADER = 1)", |
| 452 | + "COPY INTO my_table FROM @stage FILE_FORMAT = wine_csv_format", |
| 453 | + "COPY INTO @stage FROM my_table FILE_FORMAT = (TYPE = PARQUET)", |
| 454 | + "COPY INTO my_table FROM @stage PATTERN='.*[.]csv'", |
| 455 | + ] |
| 456 | + |
| 457 | + for query in queries: |
| 458 | + parser = LineageParser(query, dialect=Dialect.SNOWFLAKE) |
| 459 | + self.assertTrue( |
| 460 | + len(parser.source_tables) > 0, |
| 461 | + f"Expected source tables for query: {query}", |
| 462 | + ) |
| 463 | + self.assertTrue( |
| 464 | + len(parser.target_tables) > 0, |
| 465 | + f"Expected target tables for query: {query}", |
| 466 | + ) |
0 commit comments