diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7de7977..589991d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,6 +2,7 @@ name: CI on: pull_request: + types: [opened, synchronize, reopened, labeled] push: branches: [main] diff --git a/db2sql/domain/policy/identifier.py b/db2sql/domain/policy/identifier.py index 656926e..474c7d2 100644 --- a/db2sql/domain/policy/identifier.py +++ b/db2sql/domain/policy/identifier.py @@ -4,12 +4,22 @@ import re -_CAMEL_RE = re.compile(r"(? str: - """Convert CamelCase to snake_case (idempotent on already-snake_case input).""" - return _CAMEL_RE.sub("_", name).lower() + """Convert CamelCase / PascalCase to snake_case. + + Keeps acronym runs intact (``HTTPServer`` → ``http_server``, + ``UserID`` → ``user_id``) and leaves all-caps identifiers as a single + word (``MYTABLE`` → ``mytable``). Idempotent on snake_case input. + """ + name = _CAMEL_BOUNDARY_RE.sub(r"\1_\2", name) + name = _LOWER_UPPER_RE.sub(r"\1_\2", name) + return name.lower() def normalize_identifier(name: str, preserve_case: bool) -> str: diff --git a/docs/cli.rst b/docs/cli.rst index 2d9c483..a446620 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -193,7 +193,19 @@ Output column names) are kept exactly as they appear in the source database. When disabled (the default), identifiers are converted to ``snake_case`` - so they work without quoting in PostgreSQL. + so they work without quoting in PostgreSQL. The conversion keeps + acronym runs glued together and collapses all-caps identifiers to a + single word: + + ===================== ===================== + Source identifier Converted + ===================== ===================== + ``CamelCase`` ``camel_case`` + ``HTTPServer`` ``http_server`` + ``UserID`` ``user_id`` + ``MYTABLE`` ``mytable`` + ``customer_ID`` ``customer_id`` + ===================== ===================== .. option:: --transaction / --no-transaction diff --git a/docs/configuration.rst b/docs/configuration.rst index 879e1cc..475c316 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -269,7 +269,11 @@ dump * - ``preserve_case`` - ``false`` - Preserve identifier case as-is. When ``false``, identifiers are - converted to ``snake_case``. + converted to ``snake_case`` — acronym runs stay glued + (``HTTPServer`` → ``http_server``, ``UserID`` → ``user_id``) and + all-caps names collapse to a single word + (``MYTABLE`` → ``mytable``). See :option:`--preserve-case` for + the full table of examples. * - ``limit_records`` - ``-1`` - Maximum rows per table. ``-1`` means no limit. @@ -642,7 +646,8 @@ Oracle connections are configured almost entirely through preserve_case: false default_data_format: copy mapping_schemas: - HR: hr # rewrite Oracle's upper-case owner to snake_case + HR: human_resources # optional rename; snake_case normalization + # alone would already produce "hr" Oracle → MSSQL ~~~~~~~~~~~~~~ diff --git a/tests/unit/domain/policy/test_identifier.py b/tests/unit/domain/policy/test_identifier.py index 1e26f8a..26aed42 100644 --- a/tests/unit/domain/policy/test_identifier.py +++ b/tests/unit/domain/policy/test_identifier.py @@ -11,17 +11,27 @@ "name, expected", [ ("CamelCase", "camel_case"), - ("HTTPServer", "h_t_t_p_server"), + ("HTTPServer", "http_server"), ("already_snake", "already_snake"), ("lower", "lower"), ("Single", "single"), - ("UserID", "user_i_d"), + ("UserID", "user_id"), + ("XMLParser", "xml_parser"), + ("getHTTPResponseCode", "get_http_response_code"), + ("MYTABLE", "mytable"), + ("customer_ID", "customer_id"), + ("", ""), ], ) def test_to_snake_case(name: str, expected: str) -> None: assert to_snake_case(name) == expected +def test_to_snake_case_is_idempotent() -> None: + once = to_snake_case("HTTPServerName") + assert to_snake_case(once) == once + + def test_normalize_identifier_preserve_case_returns_input() -> None: assert normalize_identifier("UserName", preserve_case=True) == "UserName"