Skip to content

Commit e5a06cf

Browse files
TeddyCrulixius9
andcommitted
MINOR - Column name Length patch (#26530)
* chore(bulkEndpoint): create bulk bundle suite body object * patch(columnName): remove maxLength for column name * patch(columnName): remove files from commit * patch(columnName): remove unused code * fix unit tests --------- Co-authored-by: ulixius9 <mayursingal9@gmail.com>
1 parent 498ffb5 commit e5a06cf

3 files changed

Lines changed: 9 additions & 148 deletions

File tree

ingestion/src/metadata/ingestion/models/custom_basemodel_validation.py

Lines changed: 9 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
RESERVED_COLON_KEYWORD = "__reserved__colon__"
2323
RESERVED_ARROW_KEYWORD = "__reserved__arrow__"
2424
RESERVED_QUOTE_KEYWORD = "__reserved__quote__"
25-
COLUMN_NAME_MAX_LENGTH = 256
2625

2726

2827
class TransformDirection(Enum):
@@ -149,13 +148,6 @@ def get_transformer(model: Optional[Any]) -> Optional[Callable]:
149148
return None
150149

151150

152-
def _truncate_if_encoding(value: str, transformer) -> str:
153-
"""Truncate encoded value to COLUMN_NAME_MAX_LENGTH if encoding."""
154-
if transformer == replace_separators and len(value) > COLUMN_NAME_MAX_LENGTH:
155-
return value[:COLUMN_NAME_MAX_LENGTH]
156-
return value
157-
158-
159151
def transform_all_names(obj, transformer):
160152
"""Transform all name fields recursively"""
161153
if not obj:
@@ -164,9 +156,9 @@ def transform_all_names(obj, transformer):
164156
# Transform name field if it exists (supports both obj.name.root and obj.root)
165157
name = getattr(obj, "name", None)
166158
if name and hasattr(name, "root") and name.root is not None:
167-
name.root = _truncate_if_encoding(transformer(name.root), transformer)
159+
name.root = transformer(name.root)
168160
elif hasattr(obj, "root") and obj.root is not None:
169-
obj.root = _truncate_if_encoding(transformer(obj.root), transformer)
161+
obj.root = transformer(obj.root)
170162

171163
# Transform nested collections in a single loop each
172164
for attr_name in ["columns", "children"]:
@@ -183,12 +175,11 @@ def transform_all_names(obj, transformer):
183175
for constraint in table_constraints:
184176
if hasattr(constraint, "columns"):
185177
constraint.columns = [
186-
_truncate_if_encoding(transformer(col), transformer)
187-
for col in constraint.columns
178+
transformer(col) for col in constraint.columns
188179
]
189180

190181
if transformer == replace_separators and type(name) == str:
191-
obj.name = _truncate_if_encoding(transformer(name), transformer)
182+
obj.name = transformer(name)
192183

193184

194185
def transform_entity_names(entity: Any, model: Optional[Any]) -> Any:
@@ -201,15 +192,11 @@ def transform_entity_names(entity: Any, model: Optional[Any]) -> Any:
201192

202193
# Root attribute handling
203194
if hasattr(entity, "root") and entity.root is not None:
204-
if model_name.startswith("Create"):
205-
encoded = replace_separators(entity.root)
206-
entity.root = (
207-
encoded[:COLUMN_NAME_MAX_LENGTH]
208-
if len(encoded) > COLUMN_NAME_MAX_LENGTH
209-
else encoded
210-
)
211-
else:
212-
entity.root = revert_separators(entity.root)
195+
entity.root = (
196+
replace_separators(entity.root)
197+
if model_name.startswith("Create")
198+
else revert_separators(entity.root)
199+
)
213200
return entity
214201

215202
# Get model-specific transformer

ingestion/tests/unit/models/test_custom_basemodel_validation.py

Lines changed: 0 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -992,131 +992,6 @@ def test_dashboard_datamodel_round_trip_validation(self):
992992
self.assertEqual(fetch_result.columns[0].name.root, original_name)
993993

994994

995-
class TestColumnNameEncodingTruncation:
996-
"""
997-
Test that column names expanded beyond 256 chars by replace_separators
998-
are truncated to fit the ColumnName maxLength constraint.
999-
1000-
Real-world scenario: PowerBI column names with double quotes (e.g. survey
1001-
questions) where each '"' expands to '__reserved__quote__' (+19 chars).
1002-
"""
1003-
1004-
REAL_WORLD_COLUMN_NAME = (
1005-
"Q8 (Root Cause) - Was the communication clear and structured? "
1006-
'following the relevant stages of communication, a "what, why, how" '
1007-
"format for the answer, and any major grammar/language mistakes "
1008-
"affecting communication. | Human Support - 33%"
1009-
)
1010-
1011-
def test_replace_separators_expands_name_beyond_256(self):
1012-
encoded = replace_separators(self.REAL_WORLD_COLUMN_NAME)
1013-
assert len(self.REAL_WORLD_COLUMN_NAME) <= 256
1014-
assert len(encoded) > 256
1015-
1016-
def test_create_table_request_truncates_encoded_column_name(self):
1017-
from metadata.generated.schema.type.basic import (
1018-
EntityName,
1019-
FullyQualifiedEntityName,
1020-
)
1021-
1022-
req = CreateTableRequest(
1023-
name=EntityName("test_table"),
1024-
databaseSchema=FullyQualifiedEntityName("service.db.schema"),
1025-
columns=[
1026-
Column(
1027-
name=ColumnName(self.REAL_WORLD_COLUMN_NAME),
1028-
displayName=self.REAL_WORLD_COLUMN_NAME,
1029-
dataType=DataType.STRING,
1030-
)
1031-
],
1032-
)
1033-
encoded_root = req.columns[0].name.root
1034-
assert "__reserved__quote__" in encoded_root
1035-
assert len(encoded_root) <= 256
1036-
1037-
def test_create_dashboard_data_model_truncates_encoded_column_name(self):
1038-
from metadata.generated.schema.api.data.createDashboardDataModel import (
1039-
CreateDashboardDataModelRequest,
1040-
)
1041-
from metadata.generated.schema.entity.data.dashboardDataModel import (
1042-
DataModelType,
1043-
)
1044-
from metadata.generated.schema.type.basic import (
1045-
EntityName,
1046-
FullyQualifiedEntityName,
1047-
)
1048-
1049-
req = CreateDashboardDataModelRequest(
1050-
name=EntityName("test_model"),
1051-
displayName="Test Model",
1052-
dataModelType=DataModelType.PowerBIDataModel,
1053-
service=FullyQualifiedEntityName("service.test"),
1054-
columns=[
1055-
Column(
1056-
name=ColumnName(self.REAL_WORLD_COLUMN_NAME),
1057-
displayName=self.REAL_WORLD_COLUMN_NAME,
1058-
dataType=DataType.STRING,
1059-
)
1060-
],
1061-
)
1062-
encoded_root = req.columns[0].name.root
1063-
assert "__reserved__quote__" in encoded_root
1064-
assert len(encoded_root) <= 256
1065-
1066-
def test_short_names_with_special_chars_not_truncated(self):
1067-
short_name = 'column "with" quotes'
1068-
encoded = replace_separators(short_name)
1069-
assert len(encoded) <= 256
1070-
1071-
from metadata.generated.schema.type.basic import (
1072-
EntityName,
1073-
FullyQualifiedEntityName,
1074-
)
1075-
1076-
req = CreateTableRequest(
1077-
name=EntityName("test_table"),
1078-
databaseSchema=FullyQualifiedEntityName("service.db.schema"),
1079-
columns=[
1080-
Column(
1081-
name=ColumnName(short_name),
1082-
displayName=short_name,
1083-
dataType=DataType.STRING,
1084-
)
1085-
],
1086-
)
1087-
assert req.columns[0].name.root == encoded
1088-
1089-
def test_names_without_special_chars_unchanged(self):
1090-
plain_name = "a" * 200
1091-
from metadata.generated.schema.type.basic import (
1092-
EntityName,
1093-
FullyQualifiedEntityName,
1094-
)
1095-
1096-
req = CreateTableRequest(
1097-
name=EntityName("test_table"),
1098-
databaseSchema=FullyQualifiedEntityName("service.db.schema"),
1099-
columns=[
1100-
Column(
1101-
name=ColumnName(plain_name),
1102-
displayName=plain_name,
1103-
dataType=DataType.STRING,
1104-
)
1105-
],
1106-
)
1107-
assert req.columns[0].name.root == plain_name
1108-
1109-
def test_transform_entity_names_root_attribute_truncated(self):
1110-
long_name_with_quotes = "a" * 230 + '"' + "b" * 10 + '"'
1111-
assert len(long_name_with_quotes) <= 256
1112-
encoded = replace_separators(long_name_with_quotes)
1113-
assert len(encoded) > 256
1114-
1115-
cn = ColumnName(long_name_with_quotes)
1116-
result = transform_entity_names(cn, ColumnName)
1117-
assert len(result.root) <= 256
1118-
1119-
1120995
if __name__ == "__main__":
1121996
import unittest
1122997

openmetadata-spec/src/main/resources/json/schema/entity/data/table.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,6 @@
243243
"description": "Local name (not fully qualified name) of the column. ColumnName is `-` when the column is not named in struct dataType. For example, BigQuery supports struct with unnamed fields.",
244244
"type": "string",
245245
"minLength": 1,
246-
"maxLength": 256,
247246
"pattern": "^((?!::).)*$"
248247
},
249248
"partitionIntervalTypes": {

0 commit comments

Comments
 (0)