Skip to content

Commit 306e276

Browse files
committed
Add validation infrastructure and type system (not yet enabled)
This PR adds the foundation for PPL operand type validation: - Add OperandTypeChecker interface and implementations (fixed-arity, variadic, composite, etc.) - Add TypeFamily enum for categorizing SQL/PPL types - Add ValidationRule and ValidationContext for the validation pipeline - Add ValidatingRelNodeVisitor for walking Calcite rel trees - Wire validation infrastructure into CalcitePPLAbstractModule - Update expected output files for explain tests - None of the validation logic is enabled yet - it will be turned on in a subsequent PR Signed-off-by: Yuanchun Shen <yuanchu@amazon.com>
1 parent 9dc0060 commit 306e276

110 files changed

Lines changed: 4457 additions & 257 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

core/src/main/java/org/opensearch/sql/calcite/CalcitePlanContext.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,20 @@
1717
import java.util.function.BiFunction;
1818
import lombok.Getter;
1919
import lombok.Setter;
20+
import org.apache.calcite.config.NullCollation;
2021
import org.apache.calcite.rex.RexCorrelVariable;
2122
import org.apache.calcite.rex.RexLambdaRef;
2223
import org.apache.calcite.rex.RexNode;
24+
import org.apache.calcite.sql.validate.SqlValidator;
2325
import org.apache.calcite.tools.FrameworkConfig;
2426
import org.opensearch.sql.ast.expression.UnresolvedExpression;
2527
import org.opensearch.sql.calcite.utils.CalciteToolsHelper;
2628
import org.opensearch.sql.calcite.utils.CalciteToolsHelper.OpenSearchRelBuilder;
29+
import org.opensearch.sql.calcite.validate.OpenSearchSparkSqlDialect;
30+
import org.opensearch.sql.calcite.validate.PplTypeCoercion;
31+
import org.opensearch.sql.calcite.validate.PplTypeCoercionRule;
32+
import org.opensearch.sql.calcite.validate.PplValidator;
33+
import org.opensearch.sql.calcite.validate.SqlOperatorTableProvider;
2734
import org.opensearch.sql.common.setting.Settings;
2835
import org.opensearch.sql.executor.QueryType;
2936
import org.opensearch.sql.expression.function.FunctionProperties;
@@ -72,6 +79,14 @@ public class CalcitePlanContext {
7279
/** Whether we're currently inside a lambda context. */
7380
@Getter @Setter private boolean inLambdaContext = false;
7481

82+
/**
83+
* -- SETTER -- Sets the SQL operator table provider. This must be called during initialization by
84+
* the opensearch module.
85+
*
86+
* @param provider the provider to use for obtaining operator tables
87+
*/
88+
@Setter private static SqlOperatorTableProvider operatorTableProvider;
89+
7590
private CalcitePlanContext(FrameworkConfig config, SysLimit sysLimit, QueryType queryType) {
7691
this.config = config;
7792
this.sysLimit = sysLimit;
@@ -101,6 +116,34 @@ private CalcitePlanContext(CalcitePlanContext parent) {
101116
this.inLambdaContext = true; // Mark that we're inside a lambda
102117
}
103118

119+
/**
120+
* Creates a new SqlValidator instance. SqlValidator is stateful and should not be reused across
121+
* validations, so a new instance is created for each call.
122+
*
123+
* @return new SqlValidator instance
124+
*/
125+
public SqlValidator getValidator() {
126+
if (operatorTableProvider == null) {
127+
throw new IllegalStateException(
128+
"SqlOperatorTableProvider must be set before creating CalcitePlanContext");
129+
}
130+
SqlValidator.Config validatorConfig =
131+
SqlValidator.Config.DEFAULT
132+
.withTypeCoercionRules(PplTypeCoercionRule.instance())
133+
.withTypeCoercionFactory(PplTypeCoercion::create)
134+
// Use lenient conformance for PPL compatibility
135+
.withConformance(OpenSearchSparkSqlDialect.DEFAULT.getConformance())
136+
// Use Spark SQL's NULL collation (NULLs sorted LOW/FIRST)
137+
.withDefaultNullCollation(NullCollation.LOW)
138+
// This ensures that coerced arguments are replaced with cast version in sql
139+
// select list because coercion is performed during select list expansion during
140+
// sql validation. Affects 4356.yml
141+
// See SqlValidatorImpl#validateSelectList and AggConverter#translateAgg
142+
.withIdentifierExpansion(true);
143+
return PplValidator.create(
144+
config, operatorTableProvider.getOperatorTable(), TYPE_FACTORY, validatorConfig);
145+
}
146+
104147
public RexNode resolveJoinCondition(
105148
UnresolvedExpression expr,
106149
BiFunction<UnresolvedExpression, CalcitePlanContext, RexNode> transformFunction) {

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 88 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,12 @@
4343
import java.util.stream.IntStream;
4444
import java.util.stream.Stream;
4545
import lombok.AllArgsConstructor;
46+
import lombok.NonNull;
4647
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
4748
import org.apache.calcite.plan.RelOptTable;
4849
import org.apache.calcite.plan.ViewExpanders;
50+
import org.apache.calcite.rel.RelCollation;
51+
import org.apache.calcite.rel.RelFieldCollation;
4952
import org.apache.calcite.rel.RelNode;
5053
import org.apache.calcite.rel.core.Aggregate;
5154
import org.apache.calcite.rel.core.JoinRelType;
@@ -56,10 +59,14 @@
5659
import org.apache.calcite.rex.RexBuilder;
5760
import org.apache.calcite.rex.RexCall;
5861
import org.apache.calcite.rex.RexCorrelVariable;
62+
import org.apache.calcite.rex.RexFieldCollation;
5963
import org.apache.calcite.rex.RexInputRef;
6064
import org.apache.calcite.rex.RexLiteral;
6165
import org.apache.calcite.rex.RexNode;
66+
import org.apache.calcite.rex.RexOver;
67+
import org.apache.calcite.rex.RexShuttle;
6268
import org.apache.calcite.rex.RexVisitorImpl;
69+
import org.apache.calcite.rex.RexWindow;
6370
import org.apache.calcite.rex.RexWindowBounds;
6471
import org.apache.calcite.sql.SqlKind;
6572
import org.apache.calcite.sql.fun.SqlLibraryOperators;
@@ -765,8 +772,8 @@ public RelNode visitTranspose(
765772
.map(
766773
f ->
767774
Map.entry(
768-
ImmutableList.of(rx.makeLiteral(f)),
769-
ImmutableList.of((RexNode) rx.makeCast(varchar, b.field(f), true))))
775+
ImmutableList.of((RexLiteral) rx.makeLiteral(f, varchar, true)),
776+
ImmutableList.of(rx.makeCast(varchar, b.field(f), true, true))))
770777
.collect(Collectors.toList()));
771778

772779
// Step 3: Trim spaces from columnName column before pivot
@@ -1795,6 +1802,7 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context)
17951802
// Default: first get rawExpr
17961803
List<RexNode> overExpressions =
17971804
node.getWindowFunctionList().stream().map(w -> rexVisitor.analyze(w, context)).toList();
1805+
overExpressions = embedExistingCollationsIntoOver(overExpressions, context);
17981806

17991807
if (hasGroup) {
18001808
// only build sequence when there is by condition
@@ -1836,6 +1844,84 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context)
18361844
return context.relBuilder.peek();
18371845
}
18381846

1847+
/**
1848+
* Embed existing collation into window function's over clauses.
1849+
*
1850+
* <p>Window functions with frame specifications like {@code ROWS n PRECEDING} require ORDER BY to
1851+
* determine row order. Without it, results are non-deterministic.
1852+
*
1853+
* <p>Without this fix, the initial plan has ORDER BY separate from window functions:
1854+
*
1855+
* <pre>
1856+
* LogicalProject(SUM($5) OVER (ROWS 1 PRECEDING)) ← Missing ORDER BY
1857+
* LogicalSort(sort0=[$5])
1858+
* </pre>
1859+
*
1860+
* <p>This causes problems during validation as the order is not bound to the window. With this
1861+
* fix, sort collations are embeded into each {@code RexOver} window:
1862+
*
1863+
* <pre>
1864+
* LogicalProject(SUM($5) OVER (ORDER BY $5 ROWS 1 PRECEDING)) ← ORDER BY embedded
1865+
* </pre>
1866+
*
1867+
* @param overExpressions Window function expressions (may contain nested {@link RexOver})
1868+
* @param context Plan context for building RexNodes
1869+
* @return Expressions with ORDER BY embedded in all window specifications
1870+
*/
1871+
private List<RexNode> embedExistingCollationsIntoOver(
1872+
List<RexNode> overExpressions, CalcitePlanContext context) {
1873+
RelCollation existingCollation = context.relBuilder.peek().getTraitSet().getCollation();
1874+
List<@NonNull RelFieldCollation> relCollations =
1875+
existingCollation == null ? List.of() : existingCollation.getFieldCollations();
1876+
ImmutableList<@NonNull RexFieldCollation> rexCollations =
1877+
relCollations.stream()
1878+
.map(f -> relCollationToRexCollation(f, context.relBuilder))
1879+
.collect(ImmutableList.toImmutableList());
1880+
return overExpressions.stream()
1881+
.map(
1882+
n ->
1883+
n.accept(
1884+
new RexShuttle() {
1885+
@Override
1886+
public RexNode visitOver(RexOver over) {
1887+
RexWindow window = over.getWindow();
1888+
return context.rexBuilder.makeOver(
1889+
over.getType(),
1890+
over.getAggOperator(),
1891+
over.getOperands(),
1892+
window.partitionKeys,
1893+
rexCollations,
1894+
window.getLowerBound(),
1895+
window.getUpperBound(),
1896+
window.isRows(),
1897+
true,
1898+
false,
1899+
over.isDistinct(),
1900+
over.ignoreNulls());
1901+
}
1902+
}))
1903+
.collect(Collectors.toList());
1904+
}
1905+
1906+
private static RexFieldCollation relCollationToRexCollation(
1907+
RelFieldCollation relCollation, RelBuilder builder) {
1908+
RexNode fieldRef = builder.field(relCollation.getFieldIndex());
1909+
1910+
// Convert direction flags to SqlKind set
1911+
Set<SqlKind> flags = new HashSet<>();
1912+
if (relCollation.direction == RelFieldCollation.Direction.DESCENDING
1913+
|| relCollation.direction == RelFieldCollation.Direction.STRICTLY_DESCENDING) {
1914+
flags.add(SqlKind.DESCENDING);
1915+
}
1916+
if (relCollation.nullDirection == RelFieldCollation.NullDirection.FIRST) {
1917+
flags.add(SqlKind.NULLS_FIRST);
1918+
} else if (relCollation.nullDirection == RelFieldCollation.NullDirection.LAST) {
1919+
flags.add(SqlKind.NULLS_LAST);
1920+
}
1921+
1922+
return new RexFieldCollation(fieldRef, flags);
1923+
}
1924+
18391925
private List<RexNode> wrapWindowFunctionsWithGroupNotNull(
18401926
List<RexNode> overExpressions, RexNode groupNotNull, CalcitePlanContext context) {
18411927
List<RexNode> wrappedOverExprs = new ArrayList<>(overExpressions.size());

core/src/main/java/org/opensearch/sql/calcite/ExtendedRexBuilder.java

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,19 @@
1414
import org.apache.calcite.rex.RexBuilder;
1515
import org.apache.calcite.rex.RexLiteral;
1616
import org.apache.calcite.rex.RexNode;
17+
import org.apache.calcite.sql.SqlCallBinding;
1718
import org.apache.calcite.sql.SqlIntervalQualifier;
19+
import org.apache.calcite.sql.SqlKind;
20+
import org.apache.calcite.sql.SqlOperator;
1821
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
1922
import org.apache.calcite.sql.parser.SqlParserPos;
2023
import org.apache.calcite.sql.type.SqlTypeName;
2124
import org.apache.calcite.sql.type.SqlTypeUtil;
25+
import org.apache.calcite.sql.validate.implicit.TypeCoercionImpl;
2226
import org.opensearch.sql.ast.expression.SpanUnit;
2327
import org.opensearch.sql.calcite.type.AbstractExprRelDataType;
2428
import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory;
29+
import org.opensearch.sql.calcite.utils.OpenSearchTypeUtil;
2530
import org.opensearch.sql.data.type.ExprCoreType;
2631
import org.opensearch.sql.exception.ExpressionEvaluationException;
2732
import org.opensearch.sql.exception.SemanticCheckException;
@@ -146,7 +151,7 @@ public RexNode makeCast(
146151
// SqlStdOperatorTable.NOT_EQUALS,
147152
// ImmutableList.of(exp, makeZeroLiteral(sourceType)));
148153
}
149-
} else if (OpenSearchTypeFactory.isUserDefinedType(type)) {
154+
} else if (OpenSearchTypeUtil.isUserDefinedType(type)) {
150155
if (RexLiteral.isNullLiteral(exp)) {
151156
return super.makeCast(pos, type, exp, matchNullability, safe, format);
152157
}
@@ -185,4 +190,33 @@ else if ((SqlTypeUtil.isApproximateNumeric(sourceType) || SqlTypeUtil.isDecimal(
185190
}
186191
return super.makeCast(pos, type, exp, matchNullability, safe, format);
187192
}
193+
194+
/**
195+
* Derives the return type of call to an operator.
196+
*
197+
* <p>In Calcite, coercion between STRING and NUMERIC operands takes place during converting SQL
198+
* to RelNode. However, as we are building logical plans directly, the coercion is not yet
199+
* implemented at this point. Hence, we duplicate {@link
200+
* TypeCoercionImpl#binaryArithmeticWithStrings} here to infer the correct type, enabling
201+
* operations like {@code "5" / 10}. The actual coercion will be inserted later when performing
202+
* validation on SqlNode.
203+
*
204+
* @see TypeCoercionImpl#binaryArithmeticCoercion(SqlCallBinding)
205+
* @param op the operator being called
206+
* @param exprs actual operands
207+
* @return derived type
208+
*/
209+
@Override
210+
public RelDataType deriveReturnType(SqlOperator op, List<? extends RexNode> exprs) {
211+
if (op.getKind().belongsTo(SqlKind.BINARY_ARITHMETIC) && exprs.size() == 2) {
212+
final RelDataType type1 = exprs.get(0).getType();
213+
final RelDataType type2 = exprs.get(1).getType();
214+
if (SqlTypeUtil.isNumeric(type1) && OpenSearchTypeUtil.isCharacter(type2)) {
215+
return type1;
216+
} else if (OpenSearchTypeUtil.isCharacter(type1) && SqlTypeUtil.isNumeric(type2)) {
217+
return type2;
218+
}
219+
}
220+
return super.deriveReturnType(op, exprs);
221+
}
188222
}

core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java

Lines changed: 4 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,11 @@
6262
import org.apache.calcite.plan.RelOptRule;
6363
import org.apache.calcite.plan.RelOptSchema;
6464
import org.apache.calcite.plan.RelOptTable;
65-
import org.apache.calcite.plan.RelOptTable.ViewExpander;
6665
import org.apache.calcite.plan.hep.HepPlanner;
6766
import org.apache.calcite.plan.hep.HepProgram;
6867
import org.apache.calcite.plan.hep.HepProgramBuilder;
6968
import org.apache.calcite.prepare.CalciteCatalogReader;
7069
import org.apache.calcite.prepare.CalcitePrepareImpl;
71-
import org.apache.calcite.prepare.Prepare.CatalogReader;
7270
import org.apache.calcite.rel.RelHomogeneousShuttle;
7371
import org.apache.calcite.rel.RelNode;
7472
import org.apache.calcite.rel.RelRoot;
@@ -88,7 +86,6 @@
8886
import org.apache.calcite.sql.SqlKind;
8987
import org.apache.calcite.sql.parser.SqlParserPos;
9088
import org.apache.calcite.sql.validate.SqlValidator;
91-
import org.apache.calcite.sql2rel.RelFieldTrimmer;
9289
import org.apache.calcite.sql2rel.SqlRexConvertletTable;
9390
import org.apache.calcite.sql2rel.SqlToRelConverter;
9491
import org.apache.calcite.tools.FrameworkConfig;
@@ -98,12 +95,12 @@
9895
import org.apache.calcite.tools.RelRunner;
9996
import org.apache.calcite.util.Holder;
10097
import org.apache.calcite.util.Util;
101-
import org.checkerframework.checker.nullness.qual.Nullable;
10298
import org.opensearch.sql.calcite.CalcitePlanContext;
10399
import org.opensearch.sql.calcite.plan.Scannable;
104100
import org.opensearch.sql.calcite.plan.rule.OpenSearchRules;
105101
import org.opensearch.sql.calcite.plan.rule.PPLSimplifyDedupRule;
106102
import org.opensearch.sql.calcite.profile.PlanProfileBuilder;
103+
import org.opensearch.sql.calcite.validate.converters.OpenSearchSqlToRelConverter;
107104
import org.opensearch.sql.expression.function.PPLBuiltinOperators;
108105
import org.opensearch.sql.monitor.profile.ProfileContext;
109106
import org.opensearch.sql.monitor.profile.ProfileMetric;
@@ -259,7 +256,7 @@ private void registerCustomizedRules(RelOptPlanner planner) {
259256
* return {@link OpenSearchCalcitePreparingStmt}
260257
*/
261258
@Override
262-
protected CalcitePrepareImpl.CalcitePreparingStmt getPreparingStmt(
259+
public CalcitePrepareImpl.CalcitePreparingStmt getPreparingStmt(
263260
CalcitePrepare.Context context,
264261
Type elementType,
265262
CalciteCatalogReader catalogReader,
@@ -369,34 +366,6 @@ protected SqlToRelConverter getSqlToRelConverter(
369366
}
370367
}
371368

372-
public static class OpenSearchSqlToRelConverter extends SqlToRelConverter {
373-
protected final RelBuilder relBuilder;
374-
375-
public OpenSearchSqlToRelConverter(
376-
ViewExpander viewExpander,
377-
@Nullable SqlValidator validator,
378-
CatalogReader catalogReader,
379-
RelOptCluster cluster,
380-
SqlRexConvertletTable convertletTable,
381-
Config config) {
382-
super(viewExpander, validator, catalogReader, cluster, convertletTable, config);
383-
this.relBuilder =
384-
config
385-
.getRelBuilderFactory()
386-
.create(
387-
cluster,
388-
validator != null
389-
? validator.getCatalogReader().unwrap(RelOptSchema.class)
390-
: null)
391-
.transform(config.getRelBuilderConfigTransform());
392-
}
393-
394-
@Override
395-
protected RelFieldTrimmer newFieldTrimmer() {
396-
return new OpenSearchRelFieldTrimmer(validator, this.relBuilder);
397-
}
398-
}
399-
400369
public static class OpenSearchRelRunners {
401370
/**
402371
* Runs a relational expression by existing connection. This class copied from {@link
@@ -438,7 +407,8 @@ public RelNode visit(TableScan scan) {
438407
"The 'bins' parameter on timestamp fields requires: (1) pushdown to be enabled"
439408
+ " (controlled by plugins.calcite.pushdown.enabled, enabled by default), and"
440409
+ " (2) the timestamp field to be used as an aggregation bucket (e.g., 'stats"
441-
+ " count() by @timestamp').");
410+
+ " count() by @timestamp').",
411+
e);
442412
}
443413
throw Util.throwAsRuntime(e);
444414
}

0 commit comments

Comments
 (0)