Skip to content

Commit c7c4130

Browse files
committed
Implement efficient filter pushdown branching and build-time validation in VectorSearchQueryBuilder
Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
1 parent 4ab27ae commit c7c4130

2 files changed

Lines changed: 98 additions & 6 deletions

File tree

opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilder.java

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package org.opensearch.sql.opensearch.storage.scan;
77

88
import java.util.Map;
9+
import java.util.function.Function;
910
import org.apache.commons.lang3.tuple.Pair;
1011
import org.opensearch.index.query.BoolQueryBuilder;
1112
import org.opensearch.index.query.QueryBuilder;
@@ -16,6 +17,7 @@
1617
import org.opensearch.sql.expression.Expression;
1718
import org.opensearch.sql.expression.ReferenceExpression;
1819
import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder;
20+
import org.opensearch.sql.opensearch.storage.FilterType;
1921
import org.opensearch.sql.opensearch.storage.script.filter.FilterQueryBuilder;
2022
import org.opensearch.sql.opensearch.storage.serde.DefaultExpressionSerializer;
2123
import org.opensearch.sql.planner.logical.LogicalFilter;
@@ -27,31 +29,62 @@
2729
* WHERE filters in a non-scoring (filter) context. This prevents the knn relevance scores from
2830
* being destroyed when a WHERE clause is pushed down.
2931
*
30-
* <p>Without this, the default pushDownFilter wraps both queries into bool.filter, which is a
31-
* non-scoring context.
32+
* <p>Supports two filter placement strategies via {@link FilterType}:
33+
*
34+
* <ul>
35+
* <li>{@code POST} — WHERE in {@code bool.filter} outside knn (post-filtering, default)
36+
* <li>{@code EFFICIENT} — WHERE inside {@code knn.filter} for pre-filtering during ANN search
37+
* </ul>
3238
*/
3339
public class VectorSearchQueryBuilder extends OpenSearchIndexScanQueryBuilder {
3440

3541
private final QueryBuilder knnQuery;
3642
private final Map<String, String> options;
43+
private final FilterType filterType;
44+
private final boolean filterTypeExplicit;
45+
private final Function<QueryBuilder, QueryBuilder> rebuildKnnWithFilter;
46+
private boolean filterPushed = false;
3747

48+
/** Full constructor with filter type support. */
3849
public VectorSearchQueryBuilder(
39-
OpenSearchRequestBuilder requestBuilder, QueryBuilder knnQuery, Map<String, String> options) {
50+
OpenSearchRequestBuilder requestBuilder,
51+
QueryBuilder knnQuery,
52+
Map<String, String> options,
53+
FilterType filterType,
54+
boolean filterTypeExplicit,
55+
Function<QueryBuilder, QueryBuilder> rebuildKnnWithFilter) {
4056
super(requestBuilder);
4157
requestBuilder.getSourceBuilder().query(knnQuery);
4258
this.knnQuery = knnQuery;
4359
this.options = options;
60+
this.filterType = filterType != null ? filterType : FilterType.POST;
61+
this.filterTypeExplicit = filterTypeExplicit;
62+
this.rebuildKnnWithFilter = rebuildKnnWithFilter;
63+
}
64+
65+
/** Backward-compatible constructor — defaults to POST, not explicit. */
66+
public VectorSearchQueryBuilder(
67+
OpenSearchRequestBuilder requestBuilder,
68+
QueryBuilder knnQuery,
69+
Map<String, String> options) {
70+
this(requestBuilder, knnQuery, options, FilterType.POST, false, null);
4471
}
4572

4673
@Override
4774
public boolean pushDownFilter(LogicalFilter filter) {
4875
FilterQueryBuilder queryBuilder = new FilterQueryBuilder(new DefaultExpressionSerializer());
4976
Expression queryCondition = filter.getCondition();
5077
QueryBuilder whereQuery = queryBuilder.build(queryCondition);
78+
filterPushed = true;
5179

52-
// Combine: knn in must (scores), WHERE in filter (no scoring impact)
53-
BoolQueryBuilder combined = QueryBuilders.boolQuery().must(knnQuery).filter(whereQuery);
54-
requestBuilder.getSourceBuilder().query(combined);
80+
if (filterType == FilterType.EFFICIENT) {
81+
QueryBuilder rebuiltKnn = rebuildKnnWithFilter.apply(whereQuery);
82+
requestBuilder.getSourceBuilder().query(rebuiltKnn);
83+
} else {
84+
// POST mode: knn in must (scores), WHERE in filter (no scoring impact)
85+
BoolQueryBuilder combined = QueryBuilders.boolQuery().must(knnQuery).filter(whereQuery);
86+
requestBuilder.getSourceBuilder().query(combined);
87+
}
5588
return true;
5689
}
5790

@@ -94,4 +127,13 @@ public boolean pushDownSort(LogicalSort sort) {
94127
}
95128
return true;
96129
}
130+
131+
@Override
132+
public OpenSearchRequestBuilder build() {
133+
if (filterTypeExplicit && !filterPushed) {
134+
throw new ExpressionEvaluationException(
135+
"filter_type requires a pushdownable WHERE clause");
136+
}
137+
return super.build();
138+
}
97139
}

opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilderTest.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import java.util.Collections;
1515
import java.util.List;
1616
import java.util.Map;
17+
import java.util.function.Function;
1718
import org.junit.jupiter.api.Test;
1819
import org.opensearch.index.query.BoolQueryBuilder;
1920
import org.opensearch.index.query.QueryBuilder;
@@ -25,6 +26,7 @@
2526
import org.opensearch.sql.expression.ReferenceExpression;
2627
import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory;
2728
import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder;
29+
import org.opensearch.sql.opensearch.storage.FilterType;
2830
import org.opensearch.sql.planner.logical.LogicalFilter;
2931
import org.opensearch.sql.planner.logical.LogicalLimit;
3032
import org.opensearch.sql.planner.logical.LogicalValues;
@@ -267,6 +269,54 @@ void pushDownFilterCompoundPredicateSurvives() {
267269
assertEquals(1, boolQuery.filter().size(), "compound WHERE should be in filter (non-scoring)");
268270
}
269271

272+
@Test
273+
void pushDownFilterEfficientPlacesInsideKnn() {
274+
var requestBuilder = createRequestBuilder();
275+
var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}");
276+
// Callback simulates VectorSearchIndex rebuilding knn with filter
277+
Function<QueryBuilder, QueryBuilder> rebuildWithFilter =
278+
whereQuery -> new WrapperQueryBuilder("{\"knn\":{\"filter\":\"embedded\"}}");
279+
var builder =
280+
new VectorSearchQueryBuilder(
281+
requestBuilder, knnQuery, Map.of("k", "5"),
282+
FilterType.EFFICIENT, true, rebuildWithFilter);
283+
284+
var condition = DSL.equal(new ReferenceExpression("city", STRING), DSL.literal("Miami"));
285+
var dummyChild = new LogicalValues(Collections.emptyList());
286+
var filter = new LogicalFilter(dummyChild, condition);
287+
288+
boolean pushed = builder.pushDownFilter(filter);
289+
290+
assertTrue(pushed, "pushDownFilter should succeed");
291+
QueryBuilder resultQuery = requestBuilder.getSourceBuilder().query();
292+
assertTrue(
293+
resultQuery instanceof WrapperQueryBuilder,
294+
"Efficient filter should produce a WrapperQueryBuilder (rebuilt knn), not BoolQuery");
295+
}
296+
297+
@Test
298+
void pushDownFilterExplicitPostProducesBool() {
299+
var requestBuilder = createRequestBuilder();
300+
var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}");
301+
var builder =
302+
new VectorSearchQueryBuilder(
303+
requestBuilder, knnQuery, Map.of("k", "5"),
304+
FilterType.POST, true, null);
305+
306+
var condition = DSL.equal(new ReferenceExpression("name", STRING), DSL.literal("John"));
307+
var dummyChild = new LogicalValues(Collections.emptyList());
308+
var filter = new LogicalFilter(dummyChild, condition);
309+
310+
boolean pushed = builder.pushDownFilter(filter);
311+
312+
assertTrue(pushed);
313+
QueryBuilder resultQuery = requestBuilder.getSourceBuilder().query();
314+
assertTrue(resultQuery instanceof BoolQueryBuilder);
315+
BoolQueryBuilder boolQuery = (BoolQueryBuilder) resultQuery;
316+
assertEquals(1, boolQuery.must().size());
317+
assertEquals(1, boolQuery.filter().size());
318+
}
319+
270320
private OpenSearchRequestBuilder createRequestBuilder() {
271321
return new OpenSearchRequestBuilder(
272322
mock(OpenSearchExprValueFactory.class), 10000, mock(Settings.class));

0 commit comments

Comments
 (0)