Skip to content

Commit 199546a

Browse files
committed
Use CAST instead of SAFE_CAST for casts between numeric types
Signed-off-by: Yuanchun Shen <yuanchu@amazon.com>
1 parent 6d291b8 commit 199546a

19 files changed

Lines changed: 109 additions & 49 deletions

core/src/main/java/org/opensearch/sql/calcite/validate/PplTypeCoercion.java

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.apache.calcite.sql.SqlOperator;
2222
import org.apache.calcite.sql.fun.SqlLibraryOperators;
2323
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
24+
import org.apache.calcite.sql.type.SqlTypeAssignmentRule;
2425
import org.apache.calcite.sql.type.SqlTypeFamily;
2526
import org.apache.calcite.sql.type.SqlTypeMappingRule;
2627
import org.apache.calcite.sql.type.SqlTypeName;
@@ -172,7 +173,7 @@ protected boolean coerceOperandType(
172173
}
173174
// Fix up nullable attr.
174175
RelDataType targetType1 = ValidationUtils.syncAttributes(factory, operandType, targetType);
175-
SqlNode desired = castTo(operand, targetType1);
176+
SqlNode desired = castTo(operand, operandType, targetType1);
176177
call.setOperand(index, desired);
177178
// SAFE_CAST always results in nullable return type. See
178179
// SqlCastFunction#createTypeWithNullabilityFromExpr
@@ -183,9 +184,23 @@ protected boolean coerceOperandType(
183184
return true;
184185
}
185186

186-
private static SqlNode castTo(SqlNode node, RelDataType type) {
187-
if (OpenSearchTypeUtil.isDatetime(type) || OpenSearchTypeUtil.isIp(type)) {
188-
ExprType exprType = OpenSearchTypeFactory.convertRelDataTypeToExprType(type);
187+
/**
188+
* Creates a cast expression from the source node to the target type.
189+
*
190+
* <p>This method determines whether to use regular CAST or SAFE_CAST based on the following
191+
* rules:
192+
*
193+
* <ul>
194+
* <li>For user-defined types: use specialized conversion functions
195+
* <li>For non-string literals: use regular CAST (safe, folded at compile time)
196+
* <li>For safe numeric widening (e.g., SMALLINT → INTEGER): use regular CAST (no data loss
197+
* possible)
198+
* <li>For all other cases: use SAFE_CAST to handle malformed values gracefully
199+
* </ul>
200+
*/
201+
private static SqlNode castTo(SqlNode node, RelDataType sourceType, RelDataType targetType) {
202+
if (OpenSearchTypeUtil.isDatetime(targetType) || OpenSearchTypeUtil.isIp(targetType)) {
203+
ExprType exprType = OpenSearchTypeFactory.convertRelDataTypeToExprType(targetType);
189204
return switch (exprType) {
190205
case ExprCoreType.DATE ->
191206
PPLBuiltinOperators.DATE.createCall(node.getParserPosition(), node);
@@ -197,15 +212,37 @@ private static SqlNode castTo(SqlNode node, RelDataType type) {
197212
default -> throw new UnsupportedOperationException("Unsupported type: " + exprType);
198213
};
199214
}
200-
// Use CAST when node is a literal AND not a string literal
201-
// Use SAFE_CAST in rest cases to avoid throwing errors when the source node is malformatted
202-
SqlOperator cast =
203-
(node.getKind() == SqlKind.LITERAL && !(node instanceof SqlCharStringLiteral))
204-
? SqlStdOperatorTable.CAST
205-
: SqlLibraryOperators.SAFE_CAST;
215+
216+
SqlOperator cast;
217+
// Use CAST for non-string literals (safe, folded at compile time)
218+
if (node.getKind() == SqlKind.LITERAL && !(node instanceof SqlCharStringLiteral)) {
219+
cast = SqlStdOperatorTable.CAST;
220+
}
221+
// Use CAST for safe numeric widening (no data loss possible, avoids script generation)
222+
else if (isSafeNumericWidening(sourceType, targetType)) {
223+
cast = SqlStdOperatorTable.CAST;
224+
}
225+
// Use SAFE_CAST for all other cases to handle malformed values gracefully
226+
else {
227+
cast = SqlLibraryOperators.SAFE_CAST;
228+
}
206229
return cast.createCall(
207230
node.getParserPosition(),
208231
node,
209-
SqlTypeUtil.convertTypeToSpec(type).withNullable(type.isNullable()));
232+
SqlTypeUtil.convertTypeToSpec(targetType).withNullable(targetType.isNullable()));
233+
}
234+
235+
/**
236+
* Checks if the cast from sourceType to targetType is a safe numeric widening operation.
237+
*
238+
* <p>The cast is regarded safe when both types are numeric and the source can be assigned to the
239+
* target.
240+
*/
241+
private static boolean isSafeNumericWidening(RelDataType sourceType, RelDataType targetType) {
242+
if (!SqlTypeUtil.isNumeric(sourceType) || !SqlTypeUtil.isNumeric(targetType)) {
243+
return false;
244+
}
245+
return SqlTypeAssignmentRule.instance()
246+
.canApplyFrom(targetType.getSqlTypeName(), sourceType.getSqlTypeName());
210247
}
211248
}

integ-test/src/test/resources/expectedOutput/calcite/clickbench/q2.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ calcite:
22
logical: |
33
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
44
LogicalAggregate(group=[{}], count()=[COUNT()])
5-
LogicalFilter(condition=[<>(SAFE_CAST($19), 0)])
5+
LogicalFilter(condition=[<>(CAST($19):INTEGER, 0)])
66
CalciteLogicalIndexScan(table=[[OpenSearch, hits]])
77
physical: |
8-
CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[SCRIPT-><>(SAFE_CAST($0), 0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},count()=COUNT()), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCSnsKICAib3AiOiB7CiAgICAibmFtZSI6ICI8PiIsCiAgICAia2luZCI6ICJOT1RfRVFVQUxTIiwKICAgICJzeW50YXgiOiAiQklOQVJZIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAib3AiOiB7CiAgICAgICAgIm5hbWUiOiAiU0FGRV9DQVNUIiwKICAgICAgICAia2luZCI6ICJTQUZFX0NBU1QiLAogICAgICAgICJzeW50YXgiOiAiU1BFQ0lBTCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlNNQUxMSU5UIiwKICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiSU5URUdFUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9LAogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQklHSU5UIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlCiAgICAgIH0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["AdvEngineID",0]}},"boost":1.0}},"track_total_hits":2147483647}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
8+
CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER-><>(CAST($0):INTEGER, 0), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},count()=COUNT()), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"exists":{"field":"AdvEngineID","boost":1.0}}],"must_not":[{"term":{"AdvEngineID":{"value":0,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"track_total_hits":2147483647}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])

integ-test/src/test/resources/expectedOutput/calcite/clickbench/q37.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ calcite:
77
LogicalProject(URL=[$26])
88
LogicalFilter(condition=[IS NOT NULL($26)])
99
LogicalProject(EventDate=[$0], URLRegionID=[$1], HasGCLID=[$2], Income=[$3], Interests=[$4], Robotness=[$5], BrowserLanguage=[$6], CounterClass=[$7], BrowserCountry=[$8], OriginalURL=[$9], ClientTimeZone=[$10], RefererHash=[$11], TraficSourceID=[$12], HitColor=[$13], RefererRegionID=[$14], URLCategoryID=[$15], LocalEventTime=[$16], EventTime=[$17], UTMTerm=[$18], AdvEngineID=[$19], UserAgentMinor=[$20], UserAgentMajor=[$21], RemoteIP=[$22], Sex=[$23], JavaEnable=[$24], URLHash=[$25], URL=[$26], ParamOrderID=[$27], OpenstatSourceID=[$28], HTTPError=[$29], SilverlightVersion3=[$30], MobilePhoneModel=[$31], SilverlightVersion4=[$32], SilverlightVersion1=[$33], SilverlightVersion2=[$34], IsDownload=[$35], IsParameter=[$36], CLID=[$37], FlashMajor=[$38], FlashMinor=[$39], UTMMedium=[$40], WatchID=[$41], DontCountHits=[$42], CookieEnable=[$43], HID=[$44], SocialAction=[$45], WindowName=[$46], ConnectTiming=[$47], PageCharset=[$48], IsLink=[$49], IsArtifical=[$50], JavascriptEnable=[$51], ClientEventTime=[$52], DNSTiming=[$53], CodeVersion=[$54], ResponseEndTiming=[$55], FUniqID=[$56], WindowClientHeight=[$57], OpenstatServiceName=[$58], UTMContent=[$59], HistoryLength=[$60], IsOldCounter=[$61], MobilePhone=[$62], SearchPhrase=[$63], FlashMinor2=[$64], SearchEngineID=[$65], IsEvent=[$66], UTMSource=[$67], RegionID=[$68], OpenstatAdID=[$69], UTMCampaign=[$70], GoodEvent=[$71], IsRefresh=[$72], ParamCurrency=[$73], Params=[$74], ResolutionHeight=[$75], ClientIP=[$76], FromTag=[$77], ParamCurrencyID=[$78], ResponseStartTiming=[$79], ResolutionWidth=[$80], SendTiming=[$81], RefererCategoryID=[$82], OpenstatCampaignID=[$83], UserID=[$84], WithHash=[$85], UserAgent=[$86], ParamPrice=[$87], ResolutionDepth=[$88], IsMobile=[$89], Age=[$90], SocialSourceNetworkID=[$91], OpenerName=[$92], OS=[$93], IsNotBounce=[$94], Referer=[$95], NetMinor=[$96], Title=[$97], NetMajor=[$98], IPNetworkID=[$99], FetchTiming=[$100], SocialNetwork=[$101], SocialSourcePage=[$102], CounterID=[$103], WindowClientWidth=[$104], _id=[$105], _index=[$106], _score=[$107], _maxscore=[$108], _sort=[$109], _routing=[$110])
10-
LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00')), <=($0, TIMESTAMP('2013-07-31 00:00:00')), =(SAFE_CAST($42), 0), =(SAFE_CAST($72), 0), <>($26, ''))])
10+
LogicalFilter(condition=[AND(=($103, 62), >=($0, TIMESTAMP('2013-07-01 00:00:00')), <=($0, TIMESTAMP('2013-07-31 00:00:00')), =(CAST($42):INTEGER, 0), =(CAST($72):INTEGER, 0), <>($26, ''))])
1111
CalciteLogicalIndexScan(table=[[OpenSearch, hits]])
1212
physical: |
13-
CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[SCRIPT->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':EXPR_TIMESTAMP VARCHAR..'2013-07-31 00:00:00':EXPR_TIMESTAMP VARCHAR]]:EXPR_TIMESTAMP VARCHAR), =(SAFE_CAST($2), 0), =(SAFE_CAST($3), 0), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCRXsKICAib3AiOiB7CiAgICAibmFtZSI6ICI9IiwKICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIlNBRkVfQ0FTVCIsCiAgICAgICAgImtpbmQiOiAiU0FGRV9DQVNUIiwKICAgICAgICAic3ludGF4IjogIlNQRUNJQUwiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJTTUFMTElOVCIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["DontCountHits",0]}},"boost":1.0}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCRXsKICAib3AiOiB7CiAgICAibmFtZSI6ICI9IiwKICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIlNBRkVfQ0FTVCIsCiAgICAgICAgImtpbmQiOiAiU0FGRV9DQVNUIiwKICAgICAgICAic3ludGF4IjogIlNQRUNJQUwiCiAgICAgIH0sCiAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICB7CiAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAidHlwZSI6ICJTTUFMTElOVCIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0sCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIklOVEVHRVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["IsRefresh",0]}},"boost":1.0}},{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"term":{"URL":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
13+
CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(=($4, 62), SEARCH($0, Sarg[['2013-07-01 00:00:00':EXPR_TIMESTAMP VARCHAR..'2013-07-31 00:00:00':EXPR_TIMESTAMP VARCHAR]]:EXPR_TIMESTAMP VARCHAR), =(CAST($2):INTEGER, 0), =(CAST($3):INTEGER, 0), <>($1, '')), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},PageViews=COUNT()), SORT_AGG_METRICS->[1 DESC LAST], PROJECT->[PageViews, URL], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"term":{"CounterID":{"value":62,"boost":1.0}}},{"range":{"EventDate":{"from":"2013-07-01T00:00:00.000Z","to":"2013-07-31T00:00:00.000Z","include_lower":true,"include_upper":true,"format":"date_time","boost":1.0}}},{"term":{"DontCountHits":{"value":0,"boost":1.0}}},{"term":{"IsRefresh":{"value":0,"boost":1.0}}},{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"term":{"URL":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"URL":{"terms":{"field":"URL","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"_count":"desc"},{"_key":"asc"}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])

0 commit comments

Comments
 (0)