Skip to content

Commit 624f5e6

Browse files
authored
[BugFix] Fix the bug when boolean comparison condition is simplifed to field (#5071)
* Fix the bug when boolean comparison condition is simplifed to field Signed-off-by: Songkan Tang <songkant@amazon.com> * Update tests and cover more cases Signed-off-by: Songkan Tang <songkant@amazon.com> * Correct the logic of not boolean comparison Signed-off-by: Songkan Tang <songkant@amazon.com> * Add missing IS_FALSE RexNode translation Signed-off-by: Songkan Tang <songkant@amazon.com> * Remove unnecessary boolean expression conversion Signed-off-by: Songkan Tang <songkant@amazon.com> * Fix spotless check Signed-off-by: Songkan Tang <songkant@amazon.com> * Refactor PredicateAnalyzer logic a bit Signed-off-by: Songkan Tang <songkant@amazon.com> * Add more strict not expression match for field Signed-off-by: Songkan Tang <songkant@amazon.com> * Fix spotless check and flaky test Signed-off-by: Songkan Tang <songkant@amazon.com> * Cover more cases for IS_FALSE, IS_NOT_TRUE, IS_NOT_FALSE Signed-off-by: Songkan Tang <songkant@amazon.com> * Complement the truth tests for expressions Signed-off-by: Songkan Tang <songkant@amazon.com> * Fix logic Signed-off-by: Songkan Tang <songkant@amazon.com> * Fix spotless check Signed-off-by: Songkan Tang <songkant@amazon.com> * Add additional boolean filter only pushdown explain test cases Signed-off-by: Songkan Tang <songkant@amazon.com> --------- Signed-off-by: Songkan Tang <songkant@amazon.com>
1 parent b60ba30 commit 624f5e6

13 files changed

Lines changed: 544 additions & 37 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java

Lines changed: 80 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,12 @@
2727
import org.apache.calcite.rel.type.RelDataTypeFactory;
2828
import org.apache.calcite.rex.RexBuilder;
2929
import org.apache.calcite.rex.RexCall;
30+
import org.apache.calcite.rex.RexInputRef;
3031
import org.apache.calcite.rex.RexLambdaRef;
32+
import org.apache.calcite.rex.RexLiteral;
3133
import org.apache.calcite.rex.RexNode;
3234
import org.apache.calcite.sql.SqlIntervalQualifier;
35+
import org.apache.calcite.sql.SqlOperator;
3336
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
3437
import org.apache.calcite.sql.type.ArraySqlType;
3538
import org.apache.calcite.sql.type.SqlTypeName;
@@ -190,8 +193,15 @@ public RexNode visitXor(Xor node, CalcitePlanContext context) {
190193

191194
@Override
192195
public RexNode visitNot(Not node, CalcitePlanContext context) {
193-
final RexNode expr = analyze(node.getExpression(), context);
194-
return context.relBuilder.not(expr);
196+
// Special handling for NOT(boolean_field = true/false) - see boolean comparison helpers below
197+
UnresolvedExpression inner = node.getExpression();
198+
if (inner instanceof Compare compare && "=".equals(compare.getOperator())) {
199+
RexNode result = tryMakeBooleanNotEquals(compare, context);
200+
if (result != null) {
201+
return result;
202+
}
203+
}
204+
return context.relBuilder.not(analyze(node.getExpression(), context));
195205
}
196206

197207
@Override
@@ -221,7 +231,15 @@ public RexNode visitIn(In node, CalcitePlanContext context) {
221231
public RexNode visitCompare(Compare node, CalcitePlanContext context) {
222232
RexNode left = analyze(node.getLeft(), context);
223233
RexNode right = analyze(node.getRight(), context);
224-
return PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, node.getOperator(), left, right);
234+
String op = node.getOperator();
235+
// Handle boolean_field != literal -> IS_NOT_TRUE/IS_NOT_FALSE
236+
if ("!=".equals(op) || "<>".equals(op)) {
237+
RexNode result = tryMakeBooleanNotEquals(left, right, context);
238+
if (result != null) {
239+
return result;
240+
}
241+
}
242+
return PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, op, left, right);
225243
}
226244

227245
@Override
@@ -251,6 +269,65 @@ public RexNode visitEqualTo(EqualTo node, CalcitePlanContext context) {
251269
return context.rexBuilder.equals(left, right);
252270
}
253271

272+
// ==================== Boolean NOT comparison helpers ====================
273+
// Calcite's RexSimplify transforms:
274+
// - "field = true" -> "field" (handled by PredicateAnalyzer detecting boolean field)
275+
// - "field = false" -> "NOT(field)" (handled by PredicateAnalyzer.prefix())
276+
// - "NOT(field = true)" -> "NOT(field)" -> would generate term{false}, have conflicted semantics
277+
// - "NOT(field = false)" -> "NOT(NOT(field))" -> "field" -> would generate term{true}, have
278+
// conflicted semantics
279+
// We intercept NOT(field = true/false) at AST level before Calcite optimization:
280+
// - "NOT(field = true)" -> IS_NOT_TRUE(field): matches false, null, missing
281+
// - "NOT(field = false)" -> IS_NOT_FALSE(field): matches true, null, missing
282+
283+
/**
284+
* Try to convert boolean_field != literal or NOT(boolean_field = literal) to
285+
* IS_NOT_TRUE/IS_NOT_FALSE. This preserves correct null-handling semantics.
286+
*/
287+
private RexNode tryMakeBooleanNotEquals(RexNode left, RexNode right, CalcitePlanContext context) {
288+
BooleanFieldComparison cmp = extractBooleanFieldComparison(left, right);
289+
if (cmp == null) {
290+
return null;
291+
}
292+
SqlOperator op =
293+
Boolean.FALSE.equals(cmp.literalValue)
294+
? SqlStdOperatorTable.IS_NOT_FALSE
295+
: SqlStdOperatorTable.IS_NOT_TRUE;
296+
return context.rexBuilder.makeCall(op, cmp.field);
297+
}
298+
299+
/** Overload for NOT(Compare) AST pattern. */
300+
private RexNode tryMakeBooleanNotEquals(Compare compare, CalcitePlanContext context) {
301+
return tryMakeBooleanNotEquals(
302+
analyze(compare.getLeft(), context), analyze(compare.getRight(), context), context);
303+
}
304+
305+
/** Represents a comparison between a boolean field and a boolean literal. */
306+
private record BooleanFieldComparison(RexNode field, Boolean literalValue) {}
307+
308+
/**
309+
* Extract boolean field and literal value from a comparison, normalizing operand order. Returns
310+
* null if the comparison is not between a boolean field and a boolean literal.
311+
*/
312+
private BooleanFieldComparison extractBooleanFieldComparison(RexNode left, RexNode right) {
313+
if (isBooleanField(left) && isBooleanLiteral(right)) {
314+
return new BooleanFieldComparison(left, ((RexLiteral) right).getValueAs(Boolean.class));
315+
}
316+
if (isBooleanField(right) && isBooleanLiteral(left)) {
317+
return new BooleanFieldComparison(right, ((RexLiteral) left).getValueAs(Boolean.class));
318+
}
319+
return null;
320+
}
321+
322+
private boolean isBooleanField(RexNode node) {
323+
// Only match actual field references, not arbitrary boolean expressions like CASE
324+
return node instanceof RexInputRef && node.getType().getSqlTypeName() == SqlTypeName.BOOLEAN;
325+
}
326+
327+
private boolean isBooleanLiteral(RexNode node) {
328+
return node instanceof RexLiteral && node.getType().getSqlTypeName() == SqlTypeName.BOOLEAN;
329+
}
330+
254331
/** Resolve qualified name. Note, the name should be case-sensitive. */
255332
@Override
256333
public RexNode visitQualifiedName(QualifiedName node, CalcitePlanContext context) {

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2566,4 +2566,118 @@ private String explainQueryWithFetchSizeYaml(String query, int fetchSize) throws
25662566
Assert.assertEquals(200, response.getStatusLine().getStatusCode());
25672567
return getResponseBody(response, true);
25682568
}
2569+
2570+
// Only for Calcite - Test for issue #5054: query_string combined with boolean comparison
2571+
// This mimics the query pattern: "source=test url=http | where is_internal=true"
2572+
// where query_string search is combined with boolean field comparison.
2573+
@Test
2574+
public void testFilterQueryStringWithBooleanFieldPushDown() throws IOException {
2575+
enabledOnlyWhenPushdownIsEnabled();
2576+
// Verifies that query_string combined with boolean field comparison produces pure DSL query
2577+
// without embedded script. The boolean comparison should be pushed down as a term query.
2578+
String query =
2579+
StringUtils.format(
2580+
"source=%s firstname=Amber | where male = true | fields firstname", TEST_INDEX_BANK);
2581+
var result = explainQueryYaml(query);
2582+
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml");
2583+
assertYamlEqualsIgnoreId(expected, result);
2584+
}
2585+
2586+
@Test
2587+
public void testFilterBooleanFieldWithTRUE() throws IOException {
2588+
enabledOnlyWhenPushdownIsEnabled();
2589+
// Test boolean literal with uppercase TRUE
2590+
String query =
2591+
StringUtils.format(
2592+
"source=%s firstname=Amber | where male = TRUE | fields firstname", TEST_INDEX_BANK);
2593+
var result = explainQueryYaml(query);
2594+
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml");
2595+
assertYamlEqualsIgnoreId(expected, result);
2596+
}
2597+
2598+
@Test
2599+
public void testFilterBooleanFieldWithStringLiteral() throws IOException {
2600+
enabledOnlyWhenPushdownIsEnabled();
2601+
// Test boolean field with string literal 'TRUE' - Calcite converts to boolean true
2602+
// and generates same term query as boolean literal
2603+
String query =
2604+
StringUtils.format(
2605+
"source=%s firstname=Amber | where male = 'TRUE' | fields firstname", TEST_INDEX_BANK);
2606+
var result = explainQueryYaml(query);
2607+
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean.yaml");
2608+
assertYamlEqualsIgnoreId(expected, result);
2609+
}
2610+
2611+
@Test
2612+
public void testFilterBooleanFieldFalse() throws IOException {
2613+
enabledOnlyWhenPushdownIsEnabled();
2614+
// male = false is converted to IS_FALSE(male) which generates term query {value: false}.
2615+
// This only matches documents where male is explicitly false (not null or missing).
2616+
String query =
2617+
StringUtils.format(
2618+
"source=%s firstname=Amber | where male = false | fields firstname", TEST_INDEX_BANK);
2619+
var result = explainQueryYaml(query);
2620+
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean_false.yaml");
2621+
assertYamlEqualsIgnoreId(expected, result);
2622+
}
2623+
2624+
@Test
2625+
public void testFilterBooleanFieldNotTrue() throws IOException {
2626+
enabledOnlyWhenPushdownIsEnabled();
2627+
// NOT(male = true) generates IS_NOT_TRUE which produces mustNot(term query {value: true})
2628+
// This matches documents where male is false, null, or missing
2629+
String query =
2630+
StringUtils.format(
2631+
"source=%s firstname=Amber | where NOT male = true | fields firstname",
2632+
TEST_INDEX_BANK);
2633+
var result = explainQueryYaml(query);
2634+
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean_not_true.yaml");
2635+
assertYamlEqualsIgnoreId(expected, result);
2636+
}
2637+
2638+
@Test
2639+
public void testFilterBooleanFieldNotEquals() throws IOException {
2640+
enabledOnlyWhenPushdownIsEnabled();
2641+
// male != true generates IS_NOT_TRUE which produces mustNot(term query {value: true})
2642+
// This matches documents where male is false, null, or missing
2643+
String query =
2644+
StringUtils.format(
2645+
"source=%s firstname=Amber | where male != true | fields firstname", TEST_INDEX_BANK);
2646+
var result = explainQueryYaml(query);
2647+
String expected = loadExpectedPlan("explain_filter_query_string_with_boolean_not_true.yaml");
2648+
assertYamlEqualsIgnoreId(expected, result);
2649+
}
2650+
2651+
@Test
2652+
public void testFilterBooleanFieldOnlyTrue() throws IOException {
2653+
enabledOnlyWhenPushdownIsEnabled();
2654+
// Test single boolean filter without query_string
2655+
String query =
2656+
StringUtils.format("source=%s | where male = true | fields firstname", TEST_INDEX_BANK);
2657+
var result = explainQueryYaml(query);
2658+
String expected = loadExpectedPlan("explain_filter_boolean_only_true.yaml");
2659+
assertYamlEqualsIgnoreId(expected, result);
2660+
}
2661+
2662+
@Test
2663+
public void testFilterBooleanFieldOnlyFalse() throws IOException {
2664+
enabledOnlyWhenPushdownIsEnabled();
2665+
// Test single boolean filter with false value without query_string
2666+
String query =
2667+
StringUtils.format("source=%s | where male = false | fields firstname", TEST_INDEX_BANK);
2668+
var result = explainQueryYaml(query);
2669+
String expected = loadExpectedPlan("explain_filter_boolean_only_false.yaml");
2670+
assertYamlEqualsIgnoreId(expected, result);
2671+
}
2672+
2673+
@Test
2674+
public void testFilterBooleanFieldOnlyNotTrue() throws IOException {
2675+
enabledOnlyWhenPushdownIsEnabled();
2676+
// Test single NOT boolean filter without query_string
2677+
String query =
2678+
StringUtils.format("source=%s | where NOT male = true | fields firstname", TEST_INDEX_BANK);
2679+
var result = explainQueryYaml(query);
2680+
String expected = loadExpectedPlan("explain_filter_boolean_only_not_true.yaml");
2681+
assertYamlEqualsIgnoreId(expected, result);
2682+
}
25692683
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
calcite:
2+
logical: |
3+
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
4+
LogicalProject(firstname=[$1])
5+
LogicalFilter(condition=[NOT($12)])
6+
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
7+
physical: |
8+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->NOT($1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"term":{"male":{"value":false,"boost":1.0}}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
calcite:
2+
logical: |
3+
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
4+
LogicalProject(firstname=[$1])
5+
LogicalFilter(condition=[IS NOT TRUE($12)])
6+
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
7+
physical: |
8+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->IS NOT TRUE($1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must_not":[{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
calcite:
2+
logical: |
3+
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
4+
LogicalProject(firstname=[$1])
5+
LogicalFilter(condition=[$12])
6+
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
7+
physical: |
8+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->$1, PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"term":{"male":{"value":true,"boost":1.0}}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
calcite:
2+
logical: |
3+
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
4+
LogicalProject(firstname=[$1])
5+
LogicalFilter(condition=[$12])
6+
LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))])
7+
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
8+
physical: |
9+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), $1), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
calcite:
2+
logical: |
3+
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
4+
LogicalProject(firstname=[$1])
5+
LogicalFilter(condition=[NOT($12)])
6+
LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))])
7+
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
8+
physical: |
9+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), NOT($1)), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"term":{"male":{"value":false,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
calcite:
2+
logical: |
3+
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
4+
LogicalProject(firstname=[$1])
5+
LogicalFilter(condition=[IS NOT TRUE($12)])
6+
LogicalFilter(condition=[query_string(MAP('query', 'firstname:Amber':VARCHAR))])
7+
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
8+
physical: |
9+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[firstname, male], FILTER->AND(query_string(MAP('query', 'firstname:Amber':VARCHAR)), IS NOT TRUE($1)), PROJECT->[firstname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"query_string":{"query":"firstname:Amber","fields":[],"type":"best_fields","default_operator":"or","max_determinized_states":10000,"enable_position_increments":true,"fuzziness":"AUTO","fuzzy_prefix_length":0,"fuzzy_max_expansions":50,"phrase_slop":0,"escape":false,"auto_generate_synonyms_phrase_query":true,"fuzzy_transpositions":true,"boost":1.0}},{"bool":{"must_not":[{"term":{"male":{"value":true,"boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["firstname"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])

integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4866.yml

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -42,24 +42,10 @@ teardown:
4242
ppl:
4343
body:
4444
query: 'source=hdfs_logs | patterns content method=brain mode=aggregation max_sample_count=2 variable_count_threshold=3'
45-
- match: {"total": 2}
46-
- match: {"schema": [{"name": "patterns_field", "type": "string"}, {"name": "pattern_count", "type": "bigint"}, {"name": "sample_logs", "type": "array"}]}
47-
- match: {"datarows": [
48-
[
49-
"PacketResponder failed for blk_<*>",
50-
2,
51-
[
52-
"PacketResponder failed for blk_6996194389878584395",
53-
"PacketResponder failed for blk_-1547954353065580372"
54-
]
55-
],
56-
[
57-
"BLOCK* NameSystem.addStoredBlock: blockMap updated: <*IP*> is added to blk_<*> size <*>",
58-
2,
59-
[
60-
"BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.31.85:50010 is added to blk_-7017553867379051457 size 67108864",
61-
"BLOCK* NameSystem.addStoredBlock: blockMap updated: 10.251.107.19:50010 is added to blk_-3249711809227781266 size 67108864"
62-
]
63-
]
64-
]}
45+
- match: { total: 2 }
46+
- match: { schema: [{"name": "patterns_field", "type": "string"}, {"name": "pattern_count", "type": "bigint"}, {"name": "sample_logs", "type": "array"}] }
47+
- length: { datarows: 2 }
48+
# Verify each pattern has exactly 2 sample logs (max_sample_count=2)
49+
- length: { datarows.0.2: 2 }
50+
- length: { datarows.1.2: 2 }
6551

0 commit comments

Comments
 (0)