Skip to content

Commit be44a8e

Browse files
authored
Add auto-extract mode for spath command (#5140)
* Add auto extraction mode in spath command Signed-off-by: Chen Dai <daichen@amazon.com> * Change json_extract_all to return map<string,string> and fix null perserve issue Signed-off-by: Chen Dai <daichen@amazon.com> * Refactor all unit test and integration tests Signed-off-by: Chen Dai <daichen@amazon.com> * Refactor json_extract_all and fix stringify issue Signed-off-by: Chen Dai <daichen@amazon.com> * Fix broken IT and doctest Signed-off-by: Chen Dai <daichen@amazon.com> * Address PR comments Signed-off-by: Chen Dai <daichen@amazon.com> * Mark auto extract mode as experimental Signed-off-by: Chen Dai <daichen@amazon.com> --------- Signed-off-by: Chen Dai <daichen@amazon.com>
1 parent 2e42e3f commit be44a8e

13 files changed

Lines changed: 569 additions & 279 deletions

File tree

core/src/main/java/org/opensearch/sql/ast/tree/SPath.java

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ public class SPath extends UnresolvedPlan {
3030

3131
@Nullable private final String outField;
3232

33-
private final String path;
33+
@Nullable private final String path;
3434

3535
@Override
3636
public UnresolvedPlan attach(UnresolvedPlan child) {
@@ -48,7 +48,20 @@ public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
4848
return nodeVisitor.visitSpath(this, context);
4949
}
5050

51+
/**
52+
* Rewrites this spath node to an equivalent {@link Eval} node.
53+
*
54+
* <p>In path mode, rewrites to {@code eval output = json_extract(input, path)}. In auto-extract
55+
* mode (path is null), rewrites to {@code eval output = json_extract_all(input)}.
56+
*/
5157
public Eval rewriteAsEval() {
58+
if (path != null) {
59+
return rewritePathMode();
60+
}
61+
return rewriteAutoExtractMode();
62+
}
63+
64+
private Eval rewritePathMode() {
5265
String outField = this.outField;
5366
String unquotedPath = unquoteText(this.path);
5467
if (outField == null) {
@@ -62,4 +75,12 @@ public Eval rewriteAsEval() {
6275
AstDSL.function(
6376
"json_extract", AstDSL.field(inField), AstDSL.stringLiteral(unquotedPath))));
6477
}
78+
79+
private Eval rewriteAutoExtractMode() {
80+
String output = (outField != null) ? outField : inField;
81+
return AstDSL.eval(
82+
child,
83+
AstDSL.let(
84+
AstDSL.field(output), AstDSL.function("json_extract_all", AstDSL.field(inField))));
85+
}
6586
}

core/src/main/java/org/opensearch/sql/expression/function/jsonUDF/JsonExtractAllFunctionImpl.java

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
package org.opensearch.sql.expression.function.jsonUDF;
77

8+
import static java.util.stream.Collectors.toMap;
89
import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY;
910

1011
import com.fasterxml.jackson.core.JsonFactory;
@@ -51,7 +52,7 @@ public SqlReturnTypeInference getReturnTypeInference() {
5152
return ReturnTypes.explicit(
5253
TYPE_FACTORY.createMapType(
5354
TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR),
54-
TYPE_FACTORY.createSqlType(SqlTypeName.ANY),
55+
TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR),
5556
true));
5657
}
5758

@@ -72,6 +73,11 @@ public Expression implement(
7273
}
7374
}
7475

76+
/**
77+
* Evaluate the JSON extract-all function. Returns a {@code Map<String, String>} where keys are
78+
* dot-separated JSON paths (with {@code {}} suffix for arrays) and all values are strings. Merged
79+
* array values use {@code [a, b, c]} format.
80+
*/
7581
public static Object eval(Object... args) {
7682
if (args.length < 1) {
7783
return null;
@@ -82,7 +88,18 @@ public static Object eval(Object... args) {
8288
return null;
8389
}
8490

85-
return parseJson(jsonStr);
91+
Map<String, Object> parsed = parseJson(jsonStr);
92+
return parsed == null ? null : stringifyMap(parsed);
93+
}
94+
95+
// TODO: JSON parsing dominates cost; consider stringify scalars in place during parsing
96+
// to avoid this extra pass.
97+
private static Map<String, String> stringifyMap(Map<String, Object> map) {
98+
return map.entrySet().stream()
99+
.collect(
100+
toMap(
101+
Map.Entry::getKey,
102+
e -> String.valueOf(e.getValue()))); // relies on List.toString() for [a, b, c]
86103
}
87104

88105
private static Map<String, Object> parseJson(String jsonStr) {
@@ -150,7 +167,7 @@ private static Map<String, Object> parseJson(String jsonStr) {
150167
@SuppressWarnings("unchecked")
151168
private static void appendValue(Map<String, Object> resultMap, String path, Object value) {
152169
Object existingValue = resultMap.get(path);
153-
if (existingValue == null) {
170+
if (existingValue == null && !resultMap.containsKey(path)) { // key absent, not null value
154171
resultMap.put(path, value);
155172
} else if (existingValue instanceof List) {
156173
((List<Object>) existingValue).add(value);

0 commit comments

Comments
 (0)