Skip to content

Commit d18c514

Browse files
Add nomv command (#5130)
* Add nomv command + parser/AST/calcite wiring + tests + docs Signed-off-by: Srikanth Padakanti <srikanth_padakanti@apple.com> # Conflicts: # core/src/main/java/org/opensearch/sql/ast/analysis/FieldResolutionVisitor.java # integ-test/src/test/java/org/opensearch/sql/security/CalciteCrossClusterSearchIT.java # ppl/src/main/antlr/OpenSearchPPLParser.g4 # ppl/src/test/java/org/opensearch/sql/ppl/parser/FieldResolutionVisitorTest.java * Address coderrabbit suggestions. Signed-off-by: Srikanth Padakanti <srikanth_padakanti@apple.com> * Correct the no_push_down yaml expected result Signed-off-by: Srikanth Padakanti <srikanth_padakanti@apple.com> * Fix Windows CI: Override verifyPPLToSparkSQL to preserve ARRAY_JOIN '\n' delimiter Signed-off-by: Srikanth Padakanti <srikanth_padakanti@apple.com> * Fix Windows CI: Override verifyPPLToSparkSQL to preserve ARRAY_JOIN '\n' delimiter Signed-off-by: Srikanth Padakanti <srikanth_padakanti@apple.com> * Fix Windows CI: Override verifyPPLToSparkSQL to preserve ARRAY_JOIN '\n' delimiter Signed-off-by: Srikanth Padakanti <srikanth_padakanti@apple.com> * Address code review comments Signed-off-by: Srikanth Padakanti <srikanth_padakanti@apple.com> --------- Signed-off-by: Srikanth Padakanti <srikanth_padakanti@apple.com> Co-authored-by: Srikanth Padakanti <srikanth_padakanti@apple.com>
1 parent be44a8e commit d18c514

25 files changed

Lines changed: 1481 additions & 16 deletions

File tree

core/src/main/java/org/opensearch/sql/analysis/Analyzer.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
import org.opensearch.sql.ast.tree.ML;
8282
import org.opensearch.sql.ast.tree.Multisearch;
8383
import org.opensearch.sql.ast.tree.MvCombine;
84+
import org.opensearch.sql.ast.tree.NoMv;
8485
import org.opensearch.sql.ast.tree.Paginate;
8586
import org.opensearch.sql.ast.tree.Parse;
8687
import org.opensearch.sql.ast.tree.Patterns;
@@ -546,6 +547,11 @@ public LogicalPlan visitMvCombine(MvCombine node, AnalysisContext context) {
546547
throw getOnlyForCalciteException("mvcombine");
547548
}
548549

550+
@Override
551+
public LogicalPlan visitNoMv(NoMv node, AnalysisContext context) {
552+
throw getOnlyForCalciteException("nomv");
553+
}
554+
549555
/** Build {@link ParseExpression} to context and skip to child nodes. */
550556
@Override
551557
public LogicalPlan visitParse(Parse node, AnalysisContext context) {

core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
import org.opensearch.sql.ast.tree.ML;
7070
import org.opensearch.sql.ast.tree.Multisearch;
7171
import org.opensearch.sql.ast.tree.MvCombine;
72+
import org.opensearch.sql.ast.tree.NoMv;
7273
import org.opensearch.sql.ast.tree.Paginate;
7374
import org.opensearch.sql.ast.tree.Parse;
7475
import org.opensearch.sql.ast.tree.Patterns;
@@ -475,4 +476,8 @@ public T visitAddColTotals(AddColTotals node, C context) {
475476
public T visitMvCombine(MvCombine node, C context) {
476477
return visitChildren(node, context);
477478
}
479+
480+
public T visitNoMv(NoMv node, C context) {
481+
return visitChildren(node, context);
482+
}
478483
}
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.ast.tree;
7+
8+
import com.google.common.collect.ImmutableList;
9+
import java.util.List;
10+
import javax.annotation.Nullable;
11+
import lombok.EqualsAndHashCode;
12+
import lombok.Getter;
13+
import lombok.ToString;
14+
import org.opensearch.sql.ast.AbstractNodeVisitor;
15+
import org.opensearch.sql.ast.expression.DataType;
16+
import org.opensearch.sql.ast.expression.Field;
17+
import org.opensearch.sql.ast.expression.Function;
18+
import org.opensearch.sql.ast.expression.Let;
19+
import org.opensearch.sql.ast.expression.Literal;
20+
21+
/**
22+
* AST node for the NOMV command. Converts multi-value fields to single-value fields by joining
23+
* array elements with newline delimiter.
24+
*/
25+
@Getter
26+
@ToString(callSuper = true)
27+
@EqualsAndHashCode(callSuper = false)
28+
public class NoMv extends UnresolvedPlan {
29+
30+
private final Field field;
31+
@Nullable private UnresolvedPlan child;
32+
33+
public NoMv(Field field) {
34+
this.field = field;
35+
}
36+
37+
public NoMv attach(UnresolvedPlan child) {
38+
this.child = child;
39+
return this;
40+
}
41+
42+
@Override
43+
public List<UnresolvedPlan> getChild() {
44+
return child == null ? ImmutableList.of() : ImmutableList.of(child);
45+
}
46+
47+
@Override
48+
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
49+
return nodeVisitor.visitNoMv(this, context);
50+
}
51+
52+
/**
53+
* Rewrites the nomv command as an eval command using mvjoin function with null filtering. nomv
54+
* <field> is rewritten to: eval <field> = coalesce(mvjoin(array_compact(<field>), "\n"), "")
55+
*
56+
* <p>The array_compact removes null elements from the array, and coalesce ensures empty arrays
57+
* return empty string instead of null.
58+
*
59+
* @return an Eval node representing the equivalent mvjoin operation with null filtering
60+
*/
61+
public UnresolvedPlan rewriteAsEval() {
62+
Function arrayCompactFunc = new Function("array_compact", ImmutableList.of(field));
63+
64+
Function mvjoinFunc =
65+
new Function(
66+
"mvjoin", ImmutableList.of(arrayCompactFunc, new Literal("\n", DataType.STRING)));
67+
68+
Function coalesceFunc =
69+
new Function("coalesce", ImmutableList.of(mvjoinFunc, new Literal("", DataType.STRING)));
70+
71+
Let letExpr = new Let(field, coalesceFunc);
72+
73+
Eval eval = new Eval(ImmutableList.of(letExpr));
74+
if (this.child != null) {
75+
eval.attach(this.child);
76+
}
77+
return eval;
78+
}
79+
}

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@
126126
import org.opensearch.sql.ast.tree.ML;
127127
import org.opensearch.sql.ast.tree.Multisearch;
128128
import org.opensearch.sql.ast.tree.MvCombine;
129+
import org.opensearch.sql.ast.tree.NoMv;
129130
import org.opensearch.sql.ast.tree.Paginate;
130131
import org.opensearch.sql.ast.tree.Parse;
131132
import org.opensearch.sql.ast.tree.Patterns;
@@ -3290,6 +3291,26 @@ private void restoreColumnOrderAfterArrayAgg(
32903291
relBuilder.project(projections, projectionNames, /* force= */ true);
32913292
}
32923293

3294+
/**
3295+
* Visits a NoMv (no multivalue) node by rewriting it as an Eval node.
3296+
*
3297+
* <p>The NoMv command converts multivalue (array) fields to single-value strings by joining array
3298+
* elements with newline delimiters. Internally, NoMv rewrites itself to an Eval node containing a
3299+
* mvjoin function call: {@code eval field = mvjoin(field, "\n")}.
3300+
*
3301+
* <p>The explicit cast to Eval is safe because {@link NoMv#rewriteAsEval()} always returns a
3302+
* newly constructed Eval instance and never returns null or other types.
3303+
*
3304+
* @param node the NoMv node to visit
3305+
* @param context the Calcite plan context containing schema and optimization information
3306+
* @return the RelNode resulting from visiting the rewritten Eval node
3307+
* @see NoMv#rewriteAsEval()
3308+
*/
3309+
@Override
3310+
public RelNode visitNoMv(NoMv node, CalcitePlanContext context) {
3311+
return visitEval((Eval) node.rewriteAsEval(), context);
3312+
}
3313+
32933314
@Override
32943315
public RelNode visitValues(Values values, CalcitePlanContext context) {
32953316
if (values.getValues() == null || values.getValues().isEmpty()) {

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ public enum BuiltinFunctionName {
7171
ARRAY(FunctionName.of("array")),
7272
ARRAY_LENGTH(FunctionName.of("array_length")),
7373
ARRAY_SLICE(FunctionName.of("array_slice"), true),
74+
ARRAY_COMPACT(FunctionName.of("array_compact")),
7475
MAP_APPEND(FunctionName.of("map_append"), true),
7576
MAP_CONCAT(FunctionName.of("map_concat"), true),
7677
MAP_REMOVE(FunctionName.of("map_remove"), true),

core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/ArrayFunctionImpl.java

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
*/
3636
public class ArrayFunctionImpl extends ImplementorUDF {
3737
public ArrayFunctionImpl() {
38-
super(new ArrayImplementor(), NullPolicy.ANY);
38+
super(new ArrayImplementor(), NullPolicy.NONE);
3939
}
4040

4141
/**
@@ -81,7 +81,8 @@ public Expression implement(
8181

8282
/**
8383
* The asList will generate the List<Object>. We need to convert internally, otherwise, the
84-
* calcite will directly cast like DOUBLE -> INTEGER, which throw error
84+
* calcite will directly cast like DOUBLE -> INTEGER, which throw error. Null elements are
85+
* preserved in the array.
8586
*/
8687
public static Object internalCast(Object... args) {
8788
List<Object> originalList = (List<Object>) args[0];
@@ -93,7 +94,9 @@ public static Object internalCast(Object... args) {
9394
originalList.stream()
9495
.map(
9596
num -> {
96-
if (num instanceof BigDecimal) {
97+
if (num == null) {
98+
return null;
99+
} else if (num instanceof BigDecimal) {
97100
return (BigDecimal) num;
98101
} else {
99102
return BigDecimal.valueOf(((Number) num).doubleValue());
@@ -104,17 +107,20 @@ public static Object internalCast(Object... args) {
104107
case DOUBLE:
105108
result =
106109
originalList.stream()
107-
.map(i -> (Object) ((Number) i).doubleValue())
110+
.map(i -> i == null ? null : (Object) ((Number) i).doubleValue())
108111
.collect(Collectors.toList());
109112
break;
110113
case FLOAT:
111114
result =
112115
originalList.stream()
113-
.map(i -> (Object) ((Number) i).floatValue())
116+
.map(i -> i == null ? null : (Object) ((Number) i).floatValue())
114117
.collect(Collectors.toList());
115118
break;
116119
case VARCHAR, CHAR:
117-
result = originalList.stream().map(i -> (Object) i.toString()).collect(Collectors.toList());
120+
result =
121+
originalList.stream()
122+
.map(i -> i == null ? null : (Object) i.toString())
123+
.collect(Collectors.toList());
118124
break;
119125
default:
120126
result = originalList;

core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ADDTIME;
1717
import static org.opensearch.sql.expression.function.BuiltinFunctionName.AND;
1818
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY;
19+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_COMPACT;
1920
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_LENGTH;
2021
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ARRAY_SLICE;
2122
import static org.opensearch.sql.expression.function.BuiltinFunctionName.ASCII;
@@ -990,12 +991,7 @@ void populate() {
990991
PPLTypeChecker.family(SqlTypeFamily.ANY));
991992

992993
// Register MVJOIN to use Calcite's ARRAY_JOIN
993-
register(
994-
MVJOIN,
995-
(FunctionImp2)
996-
(builder, array, delimiter) ->
997-
builder.makeCall(SqlLibraryOperators.ARRAY_JOIN, array, delimiter),
998-
PPLTypeChecker.family(SqlTypeFamily.ARRAY, SqlTypeFamily.CHARACTER));
994+
registerOperator(MVJOIN, SqlLibraryOperators.ARRAY_JOIN);
999995

1000996
// Register SPLIT with custom logic for empty delimiter
1001997
// Case 1: Delimiter is not empty string, use SPLIT
@@ -1048,6 +1044,7 @@ void populate() {
10481044
registerOperator(MAP_REMOVE, PPLBuiltinOperators.MAP_REMOVE);
10491045
registerOperator(ARRAY_LENGTH, SqlLibraryOperators.ARRAY_LENGTH);
10501046
registerOperator(ARRAY_SLICE, SqlLibraryOperators.ARRAY_SLICE);
1047+
registerOperator(ARRAY_COMPACT, SqlLibraryOperators.ARRAY_COMPACT);
10511048
registerOperator(FORALL, PPLBuiltinOperators.FORALL);
10521049
registerOperator(EXISTS, PPLBuiltinOperators.EXISTS);
10531050
registerOperator(FILTER, PPLBuiltinOperators.FILTER);

0 commit comments

Comments
 (0)