Skip to content

Commit 5b792da

Browse files
authored
Add relevance search function support for unified SQL query (#5279)
* feat: Register full-text search functions in unified SQL path Add UnifiedFunctionSpec with fluent builder to define relevance function signatures (match, match_phrase, multi_match, etc.) as a composable SqlOperatorTable chained into Calcite's FrameworkConfig. Functions are language-level primitives, always resolvable regardless of default schema. Add SQL and PPL test coverage for all 7 relevance functions. Signed-off-by: Chen Dai <daichen@amazon.com> * feat: V2 named-argument syntax with NamedArgRewriter Add NamedArgRewriter SqlShuttle that normalizes V2/PPL relevance syntax into MAP-based form before Calcite validation. Transforms positional and key=value arguments into MAP[paramName, value] pairs matching PPL's internal representation for uniform pushdown rules. Refactor UnifiedFunctionSpec to instance-based design with fluent builder and Category record for grouping. Use SqlUserDefinedFunction for consistency with PPL path. Add error tests and QueryErrorAssert to test base. Signed-off-by: Chen Dai <daichen@amazon.com> * test: Add unit tests for NamedArgRewriter Add dedicated NamedArgRewriterTest covering: - Positional args rewritten to MAPs with correct param names - V2 equals syntax (key=value) flattened to MAP entries - Multi-field functions use 'fields' param name - Non-relevance functions pass through unchanged - Edge cases: all-equals args, mixed order, extra positional args Add high-level regression test in UnifiedRelevanceSearchSqlTest verifying non-relevance functions (upper) are unaffected by rewriter. Parser config matches production (Casing.UNCHANGED). Signed-off-by: Chen Dai <daichen@amazon.com> * fix: Address review feedback for NamedArgRewriter - Add bounds check for positional args with descriptive error message - Use SqlIdentifier.getSimple() to avoid backtick-decorated keys for reserved words like 'escape' in named arguments - Add null guard in QueryErrorAssert.assertErrorMessage to prevent NPE when root cause exception has null message - Update tests to assert on IllegalArgumentException with error message - Add test for reserved word as named argument key (query_string escape) Signed-off-by: Chen Dai <daichen@amazon.com> --------- Signed-off-by: Chen Dai <daichen@amazon.com>
1 parent c2c97db commit 5b792da

8 files changed

Lines changed: 669 additions & 1 deletion

File tree

api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,16 @@
2323
import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider;
2424
import org.apache.calcite.schema.Schema;
2525
import org.apache.calcite.schema.SchemaPlus;
26+
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
2627
import org.apache.calcite.sql.parser.SqlParser;
28+
import org.apache.calcite.sql.util.SqlOperatorTables;
2729
import org.apache.calcite.tools.FrameworkConfig;
2830
import org.apache.calcite.tools.Frameworks;
2931
import org.apache.calcite.tools.Programs;
3032
import org.opensearch.sql.api.parser.CalciteSqlQueryParser;
3133
import org.opensearch.sql.api.parser.PPLQueryParser;
3234
import org.opensearch.sql.api.parser.UnifiedQueryParser;
35+
import org.opensearch.sql.api.spec.UnifiedFunctionSpec;
3336
import org.opensearch.sql.calcite.CalcitePlanContext;
3437
import org.opensearch.sql.calcite.SysLimit;
3538
import org.opensearch.sql.common.setting.Settings;
@@ -243,6 +246,9 @@ private FrameworkConfig buildFrameworkConfig() {
243246
SchemaPlus defaultSchema = findSchemaByPath(rootSchema, defaultNamespace);
244247
return Frameworks.newConfigBuilder()
245248
.parserConfig(buildParserConfig())
249+
.operatorTable(
250+
SqlOperatorTables.chain(
251+
SqlStdOperatorTable.instance(), UnifiedFunctionSpec.RELEVANCE.operatorTable()))
246252
.defaultSchema(defaultSchema)
247253
.traitDefs((List<RelTraitDef>) null)
248254
.programs(Programs.calc(DefaultRelMetadataProvider.INSTANCE))

api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.apache.calcite.sql.SqlNode;
1818
import org.apache.calcite.tools.Frameworks;
1919
import org.apache.calcite.tools.Planner;
20+
import org.opensearch.sql.api.parser.NamedArgRewriter;
2021
import org.opensearch.sql.api.parser.UnifiedQueryParser;
2122
import org.opensearch.sql.ast.tree.UnresolvedPlan;
2223
import org.opensearch.sql.calcite.CalciteRelNodeVisitor;
@@ -81,7 +82,8 @@ private static class CalciteNativeStrategy implements PlanningStrategy {
8182
public RelNode plan(String query) throws Exception {
8283
try (Planner planner = Frameworks.getPlanner(context.getPlanContext().config)) {
8384
SqlNode parsed = planner.parse(query);
84-
SqlNode validated = planner.validate(parsed);
85+
SqlNode rewritten = parsed.accept(NamedArgRewriter.INSTANCE);
86+
SqlNode validated = planner.validate(rewritten);
8587
RelRoot relRoot = planner.rel(validated);
8688
return relRoot.project();
8789
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api.parser;
7+
8+
import java.util.List;
9+
import lombok.AccessLevel;
10+
import lombok.NoArgsConstructor;
11+
import org.apache.calcite.sql.SqlCall;
12+
import org.apache.calcite.sql.SqlIdentifier;
13+
import org.apache.calcite.sql.SqlKind;
14+
import org.apache.calcite.sql.SqlLiteral;
15+
import org.apache.calcite.sql.SqlNode;
16+
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
17+
import org.apache.calcite.sql.parser.SqlParserPos;
18+
import org.apache.calcite.sql.util.SqlShuttle;
19+
import org.checkerframework.checker.nullness.qual.Nullable;
20+
import org.opensearch.sql.api.spec.UnifiedFunctionSpec;
21+
22+
/**
23+
* Pre-validation rewriter for backward compatibility with non-standard named-argument syntax (e.g.,
24+
* {@code operator='AND'} instead of {@code operator => 'AND'}). Normalizes relevance function calls
25+
* into MAP-based form so SQL and PPL paths produce identical query plans for pushdown rules.
26+
*
27+
* <p>This rewriter is subject to removal if we adopt standard SQL named-argument syntax.
28+
*/
29+
@NoArgsConstructor(access = AccessLevel.PRIVATE)
30+
public final class NamedArgRewriter extends SqlShuttle {
31+
32+
public static final NamedArgRewriter INSTANCE = new NamedArgRewriter();
33+
34+
@Override
35+
public @Nullable SqlNode visit(SqlCall call) {
36+
SqlCall visited = (SqlCall) super.visit(call);
37+
return UnifiedFunctionSpec.of(visited.getOperator().getName())
38+
.filter(UnifiedFunctionSpec.RELEVANCE::contains)
39+
.map(spec -> (SqlNode) rewriteToMaps(visited, spec.getParamNames()))
40+
.orElse(visited);
41+
}
42+
43+
/**
44+
* Rewrites each argument into a MAP entry. For match(name, 'John', operator='AND'):
45+
* <li>Positional arg: name → MAP('field', name)
46+
* <li>Named arg: operator='AND' → MAP('operator', 'AND')
47+
*/
48+
private static SqlCall rewriteToMaps(SqlCall call, List<String> paramNames) {
49+
List<SqlNode> operands = call.getOperandList();
50+
SqlNode[] maps = new SqlNode[operands.size()];
51+
for (int i = 0; i < operands.size(); i++) {
52+
SqlNode op = operands.get(i);
53+
if (op instanceof SqlCall eq && op.getKind() == SqlKind.EQUALS) {
54+
SqlNode key = eq.operand(0);
55+
String name =
56+
key instanceof SqlIdentifier ident
57+
? ident.getSimple()
58+
: key.toString(); // avoid backtick-decorated keys for reserved words
59+
maps[i] = toMap(name, eq.operand(1));
60+
} else {
61+
if (i >= paramNames.size()) {
62+
throw new IllegalArgumentException(
63+
String.format("Invalid arguments for function '%s'", call.getOperator().getName()));
64+
}
65+
maps[i] = toMap(paramNames.get(i), op);
66+
}
67+
}
68+
return call.getOperator().createCall(call.getParserPosition(), maps);
69+
}
70+
71+
private static SqlNode toMap(String key, SqlNode value) {
72+
return SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR.createCall(
73+
SqlParserPos.ZERO, SqlLiteral.createCharString(key, SqlParserPos.ZERO), value);
74+
}
75+
}
Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api.spec;
7+
8+
import static org.apache.calcite.sql.type.ReturnTypes.BOOLEAN;
9+
10+
import java.util.List;
11+
import java.util.Map;
12+
import java.util.Objects;
13+
import java.util.Optional;
14+
import java.util.stream.Collectors;
15+
import java.util.stream.Stream;
16+
import lombok.AccessLevel;
17+
import lombok.EqualsAndHashCode;
18+
import lombok.Getter;
19+
import lombok.RequiredArgsConstructor;
20+
import lombok.ToString;
21+
import org.apache.calcite.rel.type.RelDataType;
22+
import org.apache.calcite.rel.type.RelDataTypeFactory;
23+
import org.apache.calcite.sql.SqlCallBinding;
24+
import org.apache.calcite.sql.SqlIdentifier;
25+
import org.apache.calcite.sql.SqlKind;
26+
import org.apache.calcite.sql.SqlOperandCountRange;
27+
import org.apache.calcite.sql.SqlOperator;
28+
import org.apache.calcite.sql.SqlOperatorTable;
29+
import org.apache.calcite.sql.parser.SqlParserPos;
30+
import org.apache.calcite.sql.type.InferTypes;
31+
import org.apache.calcite.sql.type.SqlOperandCountRanges;
32+
import org.apache.calcite.sql.type.SqlOperandMetadata;
33+
import org.apache.calcite.sql.type.SqlReturnTypeInference;
34+
import org.apache.calcite.sql.util.SqlOperatorTables;
35+
import org.apache.calcite.sql.validate.SqlUserDefinedFunction;
36+
37+
/**
38+
* Declarative registry of language-level functions for the unified query engine. Functions defined
39+
* here are part of the language spec — always resolvable regardless of the underlying data source.
40+
* They are grouped into {@link Category categories} that callers chain into Calcite's operator
41+
* table. Data-source capability is enforced at optimization time by pushdown rules.
42+
*/
43+
@Getter
44+
@ToString(of = "funcName")
45+
@EqualsAndHashCode(of = "funcName")
46+
@RequiredArgsConstructor(access = AccessLevel.PRIVATE)
47+
public final class UnifiedFunctionSpec {
48+
49+
/** Function name as registered in the operator table (e.g., "match", "multi_match"). */
50+
private final String funcName;
51+
52+
/** Calcite operator for chaining into the framework config's operator table. */
53+
private final SqlOperator operator;
54+
55+
/** Full-text search functions. */
56+
public static final Category RELEVANCE =
57+
new Category(
58+
List.of(
59+
function("match").vararg("field", "query").returnType(BOOLEAN).build(),
60+
function("match_phrase").vararg("field", "query").returnType(BOOLEAN).build(),
61+
function("match_bool_prefix").vararg("field", "query").returnType(BOOLEAN).build(),
62+
function("match_phrase_prefix").vararg("field", "query").returnType(BOOLEAN).build(),
63+
function("multi_match").vararg("fields", "query").returnType(BOOLEAN).build(),
64+
function("simple_query_string").vararg("fields", "query").returnType(BOOLEAN).build(),
65+
function("query_string").vararg("fields", "query").returnType(BOOLEAN).build()));
66+
67+
/** All registered function specs, keyed by function name. */
68+
private static final Map<String, UnifiedFunctionSpec> ALL_SPECS =
69+
Stream.of(RELEVANCE)
70+
.flatMap(c -> c.specs().stream())
71+
.collect(Collectors.toMap(UnifiedFunctionSpec::getFuncName, s -> s));
72+
73+
/**
74+
* Looks up a function spec by name across all categories.
75+
*
76+
* @param name function name (case-insensitive)
77+
* @return the spec, or empty if not found
78+
*/
79+
public static Optional<UnifiedFunctionSpec> of(String name) {
80+
return Optional.ofNullable(ALL_SPECS.get(name.toLowerCase()));
81+
}
82+
83+
/**
84+
* @return required param names from {@link SqlOperandMetadata}, or empty if not available.
85+
*/
86+
public List<String> getParamNames() {
87+
return operator.getOperandTypeChecker() instanceof SqlOperandMetadata metadata
88+
? metadata.paramNames()
89+
: List.of();
90+
}
91+
92+
/** A group of function specs that can be chained into Calcite's operator table. */
93+
public record Category(List<UnifiedFunctionSpec> specs) {
94+
public SqlOperatorTable operatorTable() {
95+
return SqlOperatorTables.of(specs.stream().map(UnifiedFunctionSpec::getOperator).toList());
96+
}
97+
98+
/** Returns true if this category contains the given spec. */
99+
public boolean contains(UnifiedFunctionSpec spec) {
100+
return specs.contains(spec);
101+
}
102+
}
103+
104+
public static Builder function(String name) {
105+
return new Builder(name);
106+
}
107+
108+
/** Fluent builder for function specs. */
109+
@RequiredArgsConstructor(access = AccessLevel.PRIVATE)
110+
public static class Builder {
111+
private final String funcName;
112+
private List<String> paramNames = List.of();
113+
private SqlReturnTypeInference returnType;
114+
115+
public Builder vararg(String... names) {
116+
this.paramNames = List.of(names);
117+
return this;
118+
}
119+
120+
public Builder returnType(SqlReturnTypeInference type) {
121+
this.returnType = type;
122+
return this;
123+
}
124+
125+
public UnifiedFunctionSpec build() {
126+
Objects.requireNonNull(returnType, "returnType is required");
127+
return new UnifiedFunctionSpec(
128+
funcName,
129+
new SqlUserDefinedFunction(
130+
new SqlIdentifier(funcName, SqlParserPos.ZERO),
131+
SqlKind.OTHER_FUNCTION,
132+
returnType,
133+
InferTypes.ANY_NULLABLE,
134+
new VariadicOperandMetadata(paramNames),
135+
List::of)); // Pushdown-only: no local implementation
136+
}
137+
}
138+
139+
/**
140+
* Custom operand metadata that bypasses Calcite's built-in type checking. Calcite's {@code
141+
* FamilyOperandTypeChecker} rejects variadic calls (CALCITE-5366), so this implementation accepts
142+
* any operand types and delegates validation to pushdown.
143+
*/
144+
private record VariadicOperandMetadata(List<String> paramNames) implements SqlOperandMetadata {
145+
146+
@Override
147+
public List<String> paramNames() {
148+
return paramNames;
149+
}
150+
151+
@Override
152+
public List<RelDataType> paramTypes(RelDataTypeFactory tf) {
153+
return List.of();
154+
}
155+
156+
@Override
157+
public boolean checkOperandTypes(SqlCallBinding binding, boolean throwOnFailure) {
158+
return true; // Bypass: CALCITE-5366 breaks optional argument type checking
159+
}
160+
161+
@Override
162+
public SqlOperandCountRange getOperandCountRange() {
163+
return SqlOperandCountRanges.from(paramNames.size());
164+
}
165+
166+
@Override
167+
public String getAllowedSignatures(SqlOperator op, String opName) {
168+
return opName + "(" + String.join(", ", paramNames) + "[, option=value ...])";
169+
}
170+
}
171+
}

0 commit comments

Comments
 (0)