From 0537d7b9e29d0250981576a1f30b1c70b8f6a660 Mon Sep 17 00:00:00 2001
From: orenniapete <peter.chwiecko@orennia.com>
Date: Wed, 10 Jun 2026 10:17:16 -0600
Subject: [PATCH] fix: populate results_sample when dbt sample_data is
 dict-shaped
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some dbt-core / elementary-dbt-package version combinations return
sample rows wrapped in a dict (e.g. {"rows": [...]}) instead of as a
bare list. The previous code used a hard cast to list which silently
left results_sample empty, so the EDR report showed failure counts but
no example rows.

Fix: add TestsAPI._normalize_results_sample() that accepts both shapes
and extract the rows list from common dict wrapper keys (rows,
sample_rows, results_sample, data). The fix is backward-compatible –
list payloads pass through unchanged and disable_samples is still
respected.

Tests:
- Unit tests for _normalize_results_sample with list, dict (all keys),
  None, and unknown dict
- Unit tests for _get_test_result_from_test_result_db_row covering list
  sample, dict sample, None sample, and disable_samples suppression
- Integration test through get_test_results() verifying the full
  serialization path populates results_sample from both payload shapes

Fixes #2269
---
 elementary/monitor/api/tests/tests.py         |  31 +++-
 tests/mocks/fetchers/tests_fetcher_mock.py    |  62 +++++++
 .../unit/monitor/api/tests/test_tests_api.py  | 162 ++++++++++++++++++
 3 files changed, 252 insertions(+), 3 deletions(-)

diff --git a/elementary/monitor/api/tests/tests.py b/elementary/monitor/api/tests/tests.py
index 41b8f407b..52306ec46 100644
--- a/elementary/monitor/api/tests/tests.py
+++ b/elementary/monitor/api/tests/tests.py
@@ -1,7 +1,7 @@
 import re
 import statistics
 from collections import defaultdict
-from typing import DefaultDict, Dict, List, Optional, Union, cast
+from typing import DefaultDict, Dict, List, Optional, Union
 
 from dateutil import tz
 
@@ -445,6 +445,30 @@ def _parse_test_db_row(cls, test_db_row: TestDBRowSchema) -> TestSchema:
             ),
         )
 
+    @staticmethod
+    def _normalize_results_sample(
+        sample_data: Optional[Union[dict, list]],
+    ) -> Optional[list]:
+        """Normalize sample_data to a list of row dicts.
+
+        The dbt macro get_test_results can return sample_data as either:
+          - a list of row dicts (expected shape, used by most dbt versions), or
+          - a dict wrapping the rows under a key such as "rows", "sample_rows",
+            "results_sample", or "data" (observed in some dbt-core 1.x / package
+            version combinations, causing results_sample to be silently empty in
+            the EDR report).
+
+        See: https://github.com/elementary-data/elementary/issues/2269
+        """
+        if isinstance(sample_data, list):
+            return sample_data
+        if isinstance(sample_data, dict):
+            for key in ("rows", "sample_rows", "results_sample", "data"):
+                value = sample_data.get(key)
+                if isinstance(value, list):
+                    return value
+        return None
+
     @staticmethod
     def _get_test_result_from_test_result_db_row(
         test_result_db_row: TestResultDBRowSchema,
@@ -453,8 +477,9 @@ def _get_test_result_from_test_result_db_row(
         test_results: Optional[Union[DbtTestResultSchema, ElementaryTestResultSchema]]
 
         if test_result_db_row.test_type == "dbt_test":
-            # Sample data is always a list for non-elementary tests
-            sample_data = cast(Optional[list], test_result_db_row.sample_data)
+            sample_data = TestsAPI._normalize_results_sample(
+                test_result_db_row.sample_data
+            )
             if disable_samples:
                 sample_data = None
 
diff --git a/tests/mocks/fetchers/tests_fetcher_mock.py b/tests/mocks/fetchers/tests_fetcher_mock.py
index bea35b740..a8b57f0fe 100644
--- a/tests/mocks/fetchers/tests_fetcher_mock.py
+++ b/tests/mocks/fetchers/tests_fetcher_mock.py
@@ -183,6 +183,68 @@ def get_all_test_results_db_rows(self, *args, **kwargs):
                 days_diff=1,
                 invocations_rank_index=1,
             ),
+            # Row with list-shaped sample_data — should populate results_sample directly.
+            TestResultDBRowSchema(
+                id="mock_id_7",
+                model_unique_id="model_id_1",
+                test_unique_id="test_id_5",
+                elementary_unique_id="test_id_5.generic",
+                detected_at="2023-01-02 10:00:00",
+                database_name="test_db",
+                schema_name="test_schema",
+                table_name="table",
+                column_name="column",
+                test_type="dbt_test",
+                test_sub_type="generic",
+                test_results_description="Got 3 results, configured to fail if != 0",
+                original_path="tests/elementary/tests/test_elementary.py",
+                owners='["Jeff"]',
+                tags='["awesome-o"]',
+                meta="{}",
+                model_meta="{}",
+                test_results_query="select * from table",
+                other=None,
+                test_name="The test list sample",
+                test_params="{}",
+                severity="ERROR",
+                status="fail",
+                test_created_at="2023-01-01 09:00:00",
+                days_diff=1,
+                invocations_rank_index=1,
+                sample_data=[{"id": 1, "val": "a"}, {"id": 2, "val": "b"}, {"id": 3, "val": "c"}],
+            ),
+            # Row with dict-shaped sample_data — regression case for issue #2269.
+            # Some dbt-core / elementary package version combinations return sample rows
+            # nested under a "rows" key rather than as a bare list.
+            TestResultDBRowSchema(
+                id="mock_id_8",
+                model_unique_id="model_id_1",
+                test_unique_id="test_id_6",
+                elementary_unique_id="test_id_6.generic",
+                detected_at="2023-01-02 10:00:00",
+                database_name="test_db",
+                schema_name="test_schema",
+                table_name="table",
+                column_name="column",
+                test_type="dbt_test",
+                test_sub_type="generic",
+                test_results_description="Got 2 results, configured to fail if != 0",
+                original_path="tests/elementary/tests/test_elementary.py",
+                owners='["Jeff"]',
+                tags='["awesome-o"]',
+                meta="{}",
+                model_meta="{}",
+                test_results_query="select * from table",
+                other=None,
+                test_name="The test dict sample",
+                test_params="{}",
+                severity="ERROR",
+                status="fail",
+                test_created_at="2023-01-01 09:00:00",
+                days_diff=1,
+                invocations_rank_index=1,
+                sample_data={"rows": [{"id": 10, "val": "x"}, {"id": 11, "val": "y"}]},
+            ),
         ]
         return [*ELEMENTARY_TEST_RESULT_DB_ROWS, *DBT_TEST_RESULT_DB_ROWS]
 
diff --git a/tests/unit/monitor/api/tests/test_tests_api.py b/tests/unit/monitor/api/tests/test_tests_api.py
index a9e51a403..d01a13055 100644
--- a/tests/unit/monitor/api/tests/test_tests_api.py
+++ b/tests/unit/monitor/api/tests/test_tests_api.py
@@ -1,5 +1,6 @@
 import pytest
 
+from elementary.monitor.fetchers.tests.schema import TestResultDBRowSchema
 from tests.mocks.api.tests_api_mock import MockTestsAPI
 
 
@@ -67,3 +68,164 @@ def test_parse_test_db_row(
 @pytest.fixture
 def tests_api_mock() -> MockTestsAPI:
     return MockTestsAPI()
+
+
+# ---------------------------------------------------------------------------
+# _normalize_results_sample
+# ---------------------------------------------------------------------------
+
+
+def test_normalize_results_sample_with_list():
+    """List-shaped sample_data passes through unchanged."""
+    rows = [{"id": 1}, {"id": 2}]
+    result = MockTestsAPI._normalize_results_sample(rows)
+    assert result == rows
+
+
+def test_normalize_results_sample_with_dict_rows_key():
+    """Dict payload with 'rows' key is unwrapped correctly (issue #2269)."""
+    rows = [{"id": 10}, {"id": 11}]
+    result = MockTestsAPI._normalize_results_sample({"rows": rows})
+    assert result == rows
+
+
+@pytest.mark.parametrize("key", ["sample_rows", "results_sample", "data"])
+def test_normalize_results_sample_with_dict_other_keys(key):
+    """Other common dict wrapper keys are also unwrapped."""
+    rows = [{"id": 1}]
+    result = MockTestsAPI._normalize_results_sample({key: rows})
+    assert result == rows
+
+
+def test_normalize_results_sample_with_none():
+    assert MockTestsAPI._normalize_results_sample(None) is None
+
+
+def test_normalize_results_sample_with_unknown_dict():
+    """Dict with no recognized key returns None rather than crashing."""
+    result = MockTestsAPI._normalize_results_sample({"unknown_key": [{"id": 1}]})
+    assert result is None
+
+
+# ---------------------------------------------------------------------------
+# _get_test_result_from_test_result_db_row – results_sample population
+# ---------------------------------------------------------------------------
+
+
+def _make_dbt_row(**overrides) -> TestResultDBRowSchema:
+    defaults = dict(
+        id="r1",
+        model_unique_id="model_id_1",
+        test_unique_id="test_id_x",
+        elementary_unique_id="test_id_x.generic",
+        detected_at="2023-01-01 10:00:00",
+        database_name="db",
+        schema_name="schema",
+        table_name="table",
+        column_name="col",
+        test_type="dbt_test",
+        test_sub_type="generic",
+        test_results_description="Got 2 results, configured to fail if != 0",
+        test_description=None,
+        original_path="models/test.sql",
+        owners="[]",
+        model_owner="[]",
+        tags="[]",
+        test_tags="[]",
+        model_tags="[]",
+        meta="{}",
+        model_meta="{}",
+        test_results_query=None,
+        other=None,
+        test_name="my_test",
+        test_params="{}",
+        severity="ERROR",
+        status="fail",
+        test_created_at="2023-01-01 09:00:00",
+        days_diff=1,
+        invocations_rank_index=1,
+        sample_data=None,
+        failures=2,
+        package_name=None,
+        execution_time=None,
+        invocation_id=None,
+        test_execution_id=None,
+    )
+    defaults.update(overrides)
+    return TestResultDBRowSchema(**defaults)
+
+
+def test_results_sample_populated_from_list_sample_data():
+    """List-shaped sample_data → results_sample is populated (existing behaviour)."""
+    rows = [{"col": "bad_val_1"}, {"col": "bad_val_2"}]
+    row = _make_dbt_row(sample_data=rows)
+    result = MockTestsAPI._get_test_result_from_test_result_db_row(row)
+    assert result is not None
+    assert result.results_sample == rows
+
+
+def test_results_sample_populated_from_dict_sample_data():
+    """Dict-shaped sample_data → results_sample is still populated (fix for #2269)."""
+    rows = [{"col": "bad_val_1"}, {"col": "bad_val_2"}]
+    row = _make_dbt_row(sample_data={"rows": rows})
+    result = MockTestsAPI._get_test_result_from_test_result_db_row(row)
+    assert result is not None
+    assert result.results_sample == rows
+
+
+def test_results_sample_empty_when_none():
+    row = _make_dbt_row(sample_data=None)
+    result = MockTestsAPI._get_test_result_from_test_result_db_row(row)
+    assert result is not None
+    assert result.results_sample is None
+
+
+def test_results_sample_suppressed_when_disable_samples():
+    rows = [{"col": "bad_val"}]
+    row = _make_dbt_row(sample_data=rows)
+    result = MockTestsAPI._get_test_result_from_test_result_db_row(row, disable_samples=True)
+    assert result is not None
+    assert result.results_sample is None
+
+
+def test_results_sample_suppressed_for_dict_sample_data_when_disable_samples():
+    """disable_samples=True must suppress rows even when sample_data is dict-shaped."""
+    rows = [{"col": "bad_val"}]
+    row = _make_dbt_row(sample_data={"rows": rows})
+    result = MockTestsAPI._get_test_result_from_test_result_db_row(row, disable_samples=True)
+    assert result is not None
+    assert result.results_sample is None
+
+
+# ---------------------------------------------------------------------------
+# Integration: get_test_results includes sample rows from dict-shaped payload
+# ---------------------------------------------------------------------------
+
+
+def test_get_test_results_includes_dict_sample_rows(tests_api_mock: MockTestsAPI):
+    """End-to-end: dict-shaped sample_data in the fetcher mock reaches the report payload."""
+    test_results = tests_api_mock.get_test_results(invocation_id=None)
+
+    # Flatten all result objects from all model keys
+    all_results = [r for results in test_results.values() for r in results]
+
+    # Find the two new mock rows added for issue #2269
+    list_sample_result = next(
+        (r for r in all_results if r.metadata.test_unique_id == "test_id_5"), None
+    )
+    dict_sample_result = next(
+        (r for r in all_results if r.metadata.test_unique_id == "test_id_6"), None
+    )
+
+    assert list_sample_result is not None, "mock row test_id_5 (list sample) not found"
+    assert dict_sample_result is not None, "mock row test_id_6 (dict sample) not found"
+
+    assert list_sample_result.test_results.results_sample == [
+        {"id": 1, "val": "a"},
+        {"id": 2, "val": "b"},
+        {"id": 3, "val": "c"},
+    ]
+    assert dict_sample_result.test_results.results_sample == [
+        {"id": 10, "val": "x"},
+        {"id": 11, "val": "y"},
+    ]