Skip to content

Commit ca81010

Browse files
committed
Add radial size policy, sort restriction, and integration tests
- Cap radial mode (max_distance/min_score) results at maxResultWindow to prevent unbounded result sets - Reject ORDER BY on non-_score fields and _score ASC in vectorSearch since knn results are naturally sorted by _score DESC - Add 12 integration tests: 4 _explain DSL shape verification tests and 8 validation error path tests Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
1 parent c1a6db0 commit ca81010

4 files changed

Lines changed: 277 additions & 10 deletions

File tree

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.sql;
7+
8+
import static org.hamcrest.Matchers.containsString;
9+
10+
import java.io.IOException;
11+
import org.junit.Test;
12+
import org.opensearch.client.ResponseException;
13+
import org.opensearch.sql.legacy.SQLIntegTestCase;
14+
import org.opensearch.sql.legacy.TestsConstants;
15+
16+
/**
17+
* Integration tests for vectorSearch SQL table function. These tests verify DSL push-down shape via
18+
* _explain and validation error paths. They do NOT require the k-NN plugin since _explain only
19+
* parses and plans the query without executing it against a knn index.
20+
*/
21+
public class VectorSearchIT extends SQLIntegTestCase {
22+
23+
@Override
24+
protected void init() throws Exception {
25+
// _explain needs the index to exist for field resolution.
26+
loadIndex(Index.ACCOUNT);
27+
}
28+
29+
private static final String TEST_INDEX = TestsConstants.TEST_INDEX_ACCOUNT;
30+
31+
// ── DSL shape verification via _explain ───────────────────────────────
32+
33+
@Test
34+
public void testExplainTopKProducesKnnQuery() throws IOException {
35+
String explain =
36+
explainQuery(
37+
"SELECT v._id, v._score "
38+
+ "FROM vectorSearch(table='"
39+
+ TEST_INDEX
40+
+ "', field='embedding', "
41+
+ "vector='[1.0, 2.0, 3.0]', option='k=5') AS v "
42+
+ "LIMIT 5");
43+
44+
// WrapperQueryBuilder wraps the knn JSON — verify the wrapper is present
45+
// and track_scores is enabled for score preservation.
46+
assertTrue("Explain should contain wrapper query:\n" + explain, explain.contains("wrapper"));
47+
assertTrue(
48+
"Explain should contain track_scores:\n" + explain, explain.contains("track_scores"));
49+
}
50+
51+
@Test
52+
public void testExplainRadialMaxDistanceProducesKnnQuery() throws IOException {
53+
String explain =
54+
explainQuery(
55+
"SELECT v._id, v._score "
56+
+ "FROM vectorSearch(table='"
57+
+ TEST_INDEX
58+
+ "', field='embedding', "
59+
+ "vector='[1.0, 2.0]', option='max_distance=10.5') AS v "
60+
+ "LIMIT 100");
61+
62+
assertTrue("Explain should contain wrapper query:\n" + explain, explain.contains("wrapper"));
63+
}
64+
65+
@Test
66+
public void testExplainRadialMinScoreProducesKnnQuery() throws IOException {
67+
String explain =
68+
explainQuery(
69+
"SELECT v._id, v._score "
70+
+ "FROM vectorSearch(table='"
71+
+ TEST_INDEX
72+
+ "', field='embedding', "
73+
+ "vector='[1.0, 2.0]', option='min_score=0.8') AS v "
74+
+ "LIMIT 100");
75+
76+
assertTrue("Explain should contain wrapper query:\n" + explain, explain.contains("wrapper"));
77+
}
78+
79+
@Test
80+
public void testExplainPostFilterProducesBoolQuery() throws IOException {
81+
String explain =
82+
explainQuery(
83+
"SELECT v._id, v._score "
84+
+ "FROM vectorSearch(table='"
85+
+ TEST_INDEX
86+
+ "', field='embedding', "
87+
+ "vector='[1.0, 2.0, 3.0]', option='k=10') AS v "
88+
+ "WHERE v.state = 'TX' "
89+
+ "LIMIT 10");
90+
91+
assertTrue("Explain should contain bool query:\n" + explain, explain.contains("bool"));
92+
assertTrue(
93+
"Explain should contain must clause (knn in scoring context):\n" + explain,
94+
explain.contains("must"));
95+
assertTrue(
96+
"Explain should contain filter clause (WHERE in non-scoring context):\n" + explain,
97+
explain.contains("filter"));
98+
}
99+
100+
// ── Validation error paths ────────────────────────────────────────────
101+
102+
@Test
103+
public void testMutualExclusivityRejectsKAndMaxDistance() throws IOException {
104+
ResponseException ex =
105+
expectThrows(
106+
ResponseException.class,
107+
() ->
108+
executeQuery(
109+
"SELECT v._id FROM vectorSearch(table='t', field='f', "
110+
+ "vector='[1.0]', option='k=5,max_distance=10') AS v"));
111+
112+
assertThat(ex.getMessage(), containsString("Only one of"));
113+
}
114+
115+
@Test
116+
public void testMutualExclusivityRejectsKAndMinScore() throws IOException {
117+
ResponseException ex =
118+
expectThrows(
119+
ResponseException.class,
120+
() ->
121+
executeQuery(
122+
"SELECT v._id FROM vectorSearch(table='t', field='f', "
123+
+ "vector='[1.0]', option='k=5,min_score=0.5') AS v"));
124+
125+
assertThat(ex.getMessage(), containsString("Only one of"));
126+
}
127+
128+
@Test
129+
public void testKTooLargeRejects() throws IOException {
130+
ResponseException ex =
131+
expectThrows(
132+
ResponseException.class,
133+
() ->
134+
executeQuery(
135+
"SELECT v._id FROM vectorSearch(table='t', field='f', "
136+
+ "vector='[1.0]', option='k=10001') AS v"));
137+
138+
assertThat(ex.getMessage(), containsString("k must be between 1 and 10000"));
139+
}
140+
141+
@Test
142+
public void testKZeroRejects() throws IOException {
143+
ResponseException ex =
144+
expectThrows(
145+
ResponseException.class,
146+
() ->
147+
executeQuery(
148+
"SELECT v._id FROM vectorSearch(table='t', field='f', "
149+
+ "vector='[1.0]', option='k=0') AS v"));
150+
151+
assertThat(ex.getMessage(), containsString("k must be between 1 and 10000"));
152+
}
153+
154+
@Test
155+
public void testUnknownOptionKeyRejects() throws IOException {
156+
ResponseException ex =
157+
expectThrows(
158+
ResponseException.class,
159+
() ->
160+
executeQuery(
161+
"SELECT v._id FROM vectorSearch(table='t', field='f', "
162+
+ "vector='[1.0]', option='k=5,method.ef_search=100') AS v"));
163+
164+
assertThat(ex.getMessage(), containsString("Unknown option key"));
165+
}
166+
167+
@Test
168+
public void testEmptyVectorRejects() throws IOException {
169+
ResponseException ex =
170+
expectThrows(
171+
ResponseException.class,
172+
() ->
173+
executeQuery(
174+
"SELECT v._id FROM vectorSearch(table='t', field='f', "
175+
+ "vector='[]', option='k=5') AS v"));
176+
177+
assertThat(ex.getMessage(), containsString("must not be empty"));
178+
}
179+
180+
@Test
181+
public void testInvalidFieldNameRejects() throws IOException {
182+
ResponseException ex =
183+
expectThrows(
184+
ResponseException.class,
185+
() ->
186+
executeQuery(
187+
"SELECT v._id FROM vectorSearch(table='t', "
188+
+ "field='field\\\"injection', vector='[1.0]', option='k=5') AS v"));
189+
190+
assertThat(ex.getMessage(), containsString("Invalid field name"));
191+
}
192+
193+
@Test
194+
public void testMissingRequiredOptionRejects() throws IOException {
195+
ResponseException ex =
196+
expectThrows(
197+
ResponseException.class,
198+
() ->
199+
executeQuery(
200+
"SELECT v._id FROM vectorSearch(table='t', field='f', "
201+
+ "vector='[1.0]', option='') AS v"));
202+
203+
assertThat(ex.getMessage(), containsString("Missing required option"));
204+
}
205+
}

opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchIndex.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,14 @@ public TableScanBuilder createScanBuilder() {
5151
var queryBuilder = new VectorSearchQueryBuilder(requestBuilder, buildKnnQuery(), options);
5252
requestBuilder.pushDownTrackedScore(true);
5353

54-
// Top-k mode: default size to k so queries without LIMIT return k results
55-
// instead of falling into the generic large-scan path.
56-
// LIMIT pushdown will further reduce this if present.
54+
// Default size policy: LIMIT pushdown will further reduce if present.
5755
if (options.containsKey("k")) {
56+
// Top-k mode: default size to k so queries without LIMIT return k results.
5857
requestBuilder.pushDownLimitToRequestTotal(Integer.parseInt(options.get("k")), 0);
58+
} else {
59+
// Radial mode (max_distance/min_score): cap at maxResultWindow.
60+
// Without an explicit cap, radial queries could return unbounded results.
61+
requestBuilder.pushDownLimitToRequestTotal(getMaxResultWindow(), 0);
5962
}
6063

6164
Function<OpenSearchRequestBuilder, OpenSearchIndexScan> createScanOperator =

opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilder.java

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,15 @@
66
package org.opensearch.sql.opensearch.storage.scan;
77

88
import java.util.Map;
9+
import org.apache.commons.lang3.tuple.Pair;
910
import org.opensearch.index.query.BoolQueryBuilder;
1011
import org.opensearch.index.query.QueryBuilder;
1112
import org.opensearch.index.query.QueryBuilders;
13+
import org.opensearch.sql.ast.tree.Sort;
14+
import org.opensearch.sql.ast.tree.Sort.SortOption;
1215
import org.opensearch.sql.exception.ExpressionEvaluationException;
1316
import org.opensearch.sql.expression.Expression;
17+
import org.opensearch.sql.expression.ReferenceExpression;
1418
import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder;
1519
import org.opensearch.sql.opensearch.storage.script.filter.FilterQueryBuilder;
1620
import org.opensearch.sql.opensearch.storage.serde.DefaultExpressionSerializer;
@@ -66,9 +70,23 @@ public boolean pushDownLimit(LogicalLimit limit) {
6670
@Override
6771
public boolean pushDownSort(LogicalSort sort) {
6872
// Vector search returns results sorted by _score DESC by default.
69-
// Reject non-trivial sort pushdowns — only _score DESC is meaningful.
70-
// For now, let the parent handle it; unsupported sort rejection is
71-
// deferred until we can inspect the sort expression for _score references.
72-
return super.pushDownSort(sort);
73+
// Only _score DESC is meaningful; reject all other sort expressions.
74+
for (Pair<SortOption, Expression> sortItem : sort.getSortList()) {
75+
Expression expr = sortItem.getRight();
76+
if (!(expr instanceof ReferenceExpression)
77+
|| !"_score".equals(((ReferenceExpression) expr).getAttr())) {
78+
throw new ExpressionEvaluationException(
79+
String.format(
80+
"vectorSearch only supports ORDER BY _score DESC; "
81+
+ "unsupported sort expression: %s",
82+
expr));
83+
}
84+
if (sortItem.getLeft().getSortOrder() != Sort.SortOrder.DESC) {
85+
throw new ExpressionEvaluationException(
86+
"vectorSearch only supports ORDER BY _score DESC; _score ASC is not supported");
87+
}
88+
}
89+
// _score DESC is the natural knn order — accept without pushing sort to OpenSearch
90+
return true;
7391
}
7492
}

opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilderTest.java

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import org.opensearch.index.query.QueryBuilder;
2020
import org.opensearch.index.query.WrapperQueryBuilder;
2121
import org.opensearch.sql.common.setting.Settings;
22+
import org.opensearch.sql.data.type.ExprCoreType;
2223
import org.opensearch.sql.exception.ExpressionEvaluationException;
2324
import org.opensearch.sql.expression.DSL;
2425
import org.opensearch.sql.expression.ReferenceExpression;
@@ -136,7 +137,26 @@ void pushDownLimitMinScoreModeNoRestriction() {
136137
}
137138

138139
@Test
139-
void pushDownSortDelegatesToParent() {
140+
void pushDownSortScoreDescAccepted() {
141+
var requestBuilder = createRequestBuilder();
142+
var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}");
143+
var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5"));
144+
145+
var dummyChild = new LogicalValues(Collections.emptyList());
146+
var sort =
147+
new org.opensearch.sql.planner.logical.LogicalSort(
148+
dummyChild,
149+
List.of(
150+
org.apache.commons.lang3.tuple.ImmutablePair.of(
151+
org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_DESC,
152+
new ReferenceExpression("_score", ExprCoreType.FLOAT))));
153+
154+
boolean pushed = builder.pushDownSort(sort);
155+
assertTrue(pushed, "ORDER BY _score DESC should be accepted");
156+
}
157+
158+
@Test
159+
void pushDownSortNonScoreFieldRejected() {
140160
var requestBuilder = createRequestBuilder();
141161
var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}");
142162
var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5"));
@@ -150,8 +170,29 @@ void pushDownSortDelegatesToParent() {
150170
org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC,
151171
new ReferenceExpression("name", STRING))));
152172

153-
boolean pushed = builder.pushDownSort(sort);
154-
assertTrue(pushed, "pushDownSort should delegate to parent and succeed");
173+
ExpressionEvaluationException ex =
174+
assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownSort(sort));
175+
assertTrue(ex.getMessage().contains("unsupported sort expression"));
176+
}
177+
178+
@Test
179+
void pushDownSortScoreAscRejected() {
180+
var requestBuilder = createRequestBuilder();
181+
var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}");
182+
var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5"));
183+
184+
var dummyChild = new LogicalValues(Collections.emptyList());
185+
var sort =
186+
new org.opensearch.sql.planner.logical.LogicalSort(
187+
dummyChild,
188+
List.of(
189+
org.apache.commons.lang3.tuple.ImmutablePair.of(
190+
org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_ASC,
191+
new ReferenceExpression("_score", ExprCoreType.FLOAT))));
192+
193+
ExpressionEvaluationException ex =
194+
assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownSort(sort));
195+
assertTrue(ex.getMessage().contains("_score ASC is not supported"));
155196
}
156197

157198
private OpenSearchRequestBuilder createRequestBuilder() {

0 commit comments

Comments
 (0)