Skip to content

Commit 26473bf

Browse files
committed
wip
1 parent 7d1e223 commit 26473bf

2 files changed

Lines changed: 719 additions & 0 deletions

File tree

Lines changed: 399 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,399 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2026 Eclipse RDF4J contributors.
3+
*
4+
* All rights reserved. This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Distribution License v1.0
6+
* which accompanies this distribution, and is available at
7+
* http://www.eclipse.org/org/documents/edl-v10.php.
8+
*
9+
* SPDX-License-Identifier: BSD-3-Clause
10+
*******************************************************************************/
11+
// Some portions generated by Codex
12+
package org.eclipse.rdf4j.sail.lmdb.benchmark;
13+
14+
import static org.junit.jupiter.api.Assertions.assertEquals;
15+
import static org.junit.jupiter.api.Assertions.assertTrue;
16+
17+
import java.io.IOException;
18+
import java.nio.file.Files;
19+
import java.nio.file.Path;
20+
import java.util.ArrayList;
21+
import java.util.LinkedHashMap;
22+
import java.util.List;
23+
import java.util.Map;
24+
import java.util.regex.Matcher;
25+
import java.util.regex.Pattern;
26+
import java.util.stream.Collectors;
27+
28+
import org.eclipse.rdf4j.benchmark.common.ThemeQueryCatalog;
29+
import org.eclipse.rdf4j.benchmark.rio.util.ThemeDataSetGenerator;
30+
import org.eclipse.rdf4j.benchmark.rio.util.ThemeDataSetGenerator.Theme;
31+
import org.eclipse.rdf4j.common.transaction.IsolationLevels;
32+
import org.eclipse.rdf4j.query.explanation.Explanation;
33+
import org.eclipse.rdf4j.repository.sail.SailRepository;
34+
import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection;
35+
import org.eclipse.rdf4j.repository.util.RDFInserter;
36+
import org.eclipse.rdf4j.sail.lmdb.LmdbStore;
37+
import org.junit.jupiter.api.Test;
38+
import org.junit.jupiter.api.io.TempDir;
39+
40+
class LmdbImprovedQueryPlanSnapshotTest {
41+
42+
private static final Pattern BENCHMARK_PARAMETERS = Pattern.compile(
43+
"# Parameters: \\(themeName = ([A-Z_]+), z_queryIndex = ([0-9]+)\\)");
44+
private static final Pattern BENCHMARK_ROW = Pattern.compile(
45+
"^ThemeQueryBenchmark\\.executeQuery\\s+(\\S+)\\s+(\\d+)\\s+avgt\\s+([0-9.]+)\\s+ms/op$");
46+
private static final String QUERY_KEYS_PROPERTY = "rdf4j.lmdb.improvedPlanSnapshot.queryKeys";
47+
private static final String RESULT_DIRECTORY = "src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/theme-query-benchmark-results";
48+
private static final String RECORDED_RESULTS_FILE = "results-2026-04-24-2.md";
49+
private static final List<TargetQuery> TARGET_QUERIES = List.of(
50+
target(Theme.ENGINEERING, 1, "results-2026-04-17.md", 138.312d, 99.772d),
51+
target(Theme.LIBRARY, 1, "results-2026-04-17.md", 143.142d, 105.387d),
52+
target(Theme.MEDICAL_RECORDS, 1, "results-2026-04-17.md", 99.712d, 53.748d),
53+
target(Theme.TRAIN, 1, "results-2026-04-17.md", 39.073d, 29.381d));
54+
55+
@Test
56+
void recordedResultsStillRepresentTwentyPercentWins() throws Exception {
57+
Map<String, Double> currentScores = parseBenchmarkScores(resultsFile(RECORDED_RESULTS_FILE));
58+
for (TargetQuery targetQuery : selectedTargetQueries()) {
59+
Map<String, Double> previousScores = parseBenchmarkScores(resultsFile(targetQuery.previousBestSource()));
60+
assertScore(previousScores, targetQuery.key(), targetQuery.previousBestScore(),
61+
targetQuery.previousBestSource());
62+
assertScore(currentScores, targetQuery.key(), targetQuery.currentScore(), RECORDED_RESULTS_FILE);
63+
assertTrue(targetQuery.improvementPercent() > 20.0d,
64+
targetQuery.key() + " should stay above the 20% improvement threshold");
65+
}
66+
}
67+
68+
@Test
69+
void optimizedPlansMatchRecordedImprovementSnapshots(@TempDir Path dataDir) throws Exception {
70+
Map<String, RecordedPlanSnapshot> expectedPlans = parseRecordedPlanSignatures(
71+
resultsFile(RECORDED_RESULTS_FILE));
72+
List<String> mismatches = new ArrayList<>();
73+
for (Map.Entry<Theme, List<TargetQuery>> entry : targetsByTheme(selectedTargetQueries()).entrySet()) {
74+
Path storeDirectory = prepareThemeStore(dataDir, entry.getKey());
75+
LmdbStore store = new LmdbStore(storeDirectory.toFile(), ConfigUtil.createConfig());
76+
SailRepository repository = new SailRepository(store);
77+
try {
78+
for (TargetQuery targetQuery : entry.getValue()) {
79+
RecordedPlanSnapshot expectedPlan = expectedPlans.get(targetQuery.key());
80+
assertTrue(expectedPlan != null,
81+
"Missing optimized plan in " + RECORDED_RESULTS_FILE + " for " + targetQuery.key());
82+
83+
primeLearnedFilterStats(repository, targetQuery);
84+
String actualPlan = explainOptimized(repository, targetQuery);
85+
assertPlanUsesRobustPlanner(targetQuery, actualPlan);
86+
87+
PlanSignature actualSignature = planSignature(actualPlan);
88+
if (!expectedPlan.signature().equals(actualSignature.lines())) {
89+
mismatches.add(mismatch(targetQuery, expectedPlan, actualSignature, actualPlan));
90+
}
91+
BenchmarkJoinEstimatorSupport.releaseEstimatorMemory(store);
92+
}
93+
} finally {
94+
shutdownAndRelease(repository, store);
95+
BenchmarkJoinEstimatorSupport.deleteStoreDirectory(storeDirectory);
96+
}
97+
}
98+
99+
assertTrue(mismatches.isEmpty(), String.join("\n\n", mismatches));
100+
}
101+
102+
private static void assertScore(Map<String, Double> scores, String key, double expectedScore, String source) {
103+
Double actualScore = scores.get(key);
104+
assertTrue(actualScore != null, "Missing benchmark row in " + source + " for " + key);
105+
assertEquals(expectedScore, actualScore.doubleValue(), 0.0001d,
106+
"Unexpected benchmark score in " + source + " for " + key);
107+
}
108+
109+
private static List<TargetQuery> selectedTargetQueries() {
110+
String selectedKeys = System.getProperty(QUERY_KEYS_PROPERTY, "").trim();
111+
if (selectedKeys.isEmpty()) {
112+
return TARGET_QUERIES;
113+
}
114+
115+
List<String> requestedKeys = List.of(selectedKeys.split(","));
116+
List<TargetQuery> selected = TARGET_QUERIES.stream()
117+
.filter(targetQuery -> requestedKeys.stream()
118+
.map(String::trim)
119+
.anyMatch(key -> key.equals(targetQuery.key()) || key.equals(targetQuery.theme().name())))
120+
.collect(Collectors.toList());
121+
assertTrue(!selected.isEmpty(), "No target queries matched " + QUERY_KEYS_PROPERTY + "=" + selectedKeys);
122+
return selected;
123+
}
124+
125+
private static Map<Theme, List<TargetQuery>> targetsByTheme(List<TargetQuery> targetQueries) {
126+
Map<Theme, List<TargetQuery>> targets = new LinkedHashMap<>();
127+
for (TargetQuery targetQuery : targetQueries) {
128+
targets.computeIfAbsent(targetQuery.theme(), ignored -> new ArrayList<>())
129+
.add(targetQuery);
130+
}
131+
return targets;
132+
}
133+
134+
private static Map<String, Double> parseBenchmarkScores(Path path) throws IOException {
135+
Map<String, Double> scores = new LinkedHashMap<>();
136+
for (String line : Files.readAllLines(path)) {
137+
Matcher matcher = BENCHMARK_ROW.matcher(line.strip().replace("`", ""));
138+
if (!matcher.matches()) {
139+
continue;
140+
}
141+
scores.put(matcher.group(1) + ":" + matcher.group(2), Double.parseDouble(matcher.group(3)));
142+
}
143+
return scores;
144+
}
145+
146+
private static Map<String, RecordedPlanSnapshot> parseRecordedPlanSignatures(Path path) throws IOException {
147+
Map<String, RecordedPlanSnapshot> signatures = new LinkedHashMap<>();
148+
List<String> lines = Files.readAllLines(path);
149+
String currentKey = null;
150+
int currentLine = -1;
151+
List<String> currentPlan = null;
152+
for (int i = 0; i < lines.size(); i++) {
153+
String line = lines.get(i);
154+
Matcher matcher = BENCHMARK_PARAMETERS.matcher(line);
155+
if (matcher.matches()) {
156+
currentKey = matcher.group(1) + ":" + matcher.group(2);
157+
currentLine = i + 1;
158+
continue;
159+
}
160+
if (line.contains("### Optimized Query ###")) {
161+
currentPlan = new ArrayList<>();
162+
continue;
163+
}
164+
if (currentPlan == null) {
165+
continue;
166+
}
167+
if (line.isBlank()) {
168+
if (currentKey != null && !currentPlan.isEmpty()) {
169+
signatures.put(currentKey, new RecordedPlanSnapshot(currentLine,
170+
planSignature(String.join("\n", currentPlan)).lines()));
171+
}
172+
currentPlan = null;
173+
continue;
174+
}
175+
currentPlan.add(line);
176+
}
177+
return signatures;
178+
}
179+
180+
private static Path resultsFile(String fileName) {
181+
Path basedirFile = Path.of(System.getProperty("basedir", "."), RESULT_DIRECTORY, fileName);
182+
if (Files.isRegularFile(basedirFile)) {
183+
return basedirFile;
184+
}
185+
186+
Path repositoryFile = Path.of("core/sail/lmdb", RESULT_DIRECTORY, fileName);
187+
if (Files.isRegularFile(repositoryFile)) {
188+
return repositoryFile;
189+
}
190+
191+
throw new AssertionError("Unable to locate benchmark results file " + fileName);
192+
}
193+
194+
private static Path prepareThemeStore(Path dataDir, Theme theme) throws Exception {
195+
Path storeDirectory = dataDir.resolve("improved-plan-snapshot-" + theme.name());
196+
LmdbStore store = new LmdbStore(storeDirectory.toFile(), ConfigUtil.createConfig());
197+
SailRepository repository = new SailRepository(store);
198+
boolean prepared = false;
199+
try {
200+
BenchmarkJoinEstimatorSupport.prepareEstimatorForBulkLoad(repository, store);
201+
loadData(repository, theme);
202+
BenchmarkJoinEstimatorSupport.persistEstimatorAfterBulkLoad(repository, store);
203+
BenchmarkJoinEstimatorSupport.persistStoreStatistics(store);
204+
prepared = true;
205+
return storeDirectory;
206+
} finally {
207+
shutdownAndRelease(repository, store);
208+
if (!prepared) {
209+
BenchmarkJoinEstimatorSupport.deleteStoreDirectory(storeDirectory);
210+
}
211+
}
212+
}
213+
214+
private static void loadData(SailRepository repository, Theme theme) throws IOException {
215+
try (SailRepositoryConnection connection = repository.getConnection()) {
216+
connection.begin(IsolationLevels.NONE);
217+
RDFInserter inserter = new RDFInserter(connection);
218+
ThemeDataSetGenerator.generate(theme, inserter);
219+
connection.commit();
220+
}
221+
}
222+
223+
private static void primeLearnedFilterStats(SailRepository repository, TargetQuery targetQuery) {
224+
String query = ThemeQueryCatalog.queryFor(targetQuery.theme(), targetQuery.queryIndex());
225+
try (SailRepositoryConnection connection = repository.getConnection()) {
226+
connection.prepareTupleQuery(query)
227+
.evaluate()
228+
.stream()
229+
.count();
230+
}
231+
}
232+
233+
private static String explainOptimized(SailRepository repository, TargetQuery targetQuery) {
234+
try (SailRepositoryConnection connection = repository.getConnection()) {
235+
String query = ThemeQueryCatalog.queryFor(targetQuery.theme(), targetQuery.queryIndex());
236+
return connection.prepareTupleQuery(query)
237+
.explain(Explanation.Level.Optimized)
238+
.toString();
239+
}
240+
}
241+
242+
private static void shutdownAndRelease(SailRepository repository, LmdbStore store) throws IOException {
243+
try {
244+
BenchmarkJoinEstimatorSupport.releaseEstimatorMemory(store);
245+
} finally {
246+
repository.shutDown();
247+
}
248+
}
249+
250+
private static void assertPlanUsesRobustPlanner(TargetQuery targetQuery, String actualPlan) {
251+
assertTrue(actualPlan.contains("plannerId=lmdb-sketch"),
252+
targetQuery.key() + " should use LMDB sketch planning:\n" + actualPlan);
253+
assertTrue(actualPlan.contains("plannerPath=ROBUST_USED"),
254+
targetQuery.key() + " should use the robust planner path:\n" + actualPlan);
255+
assertTrue(!actualPlan.contains("plannerPath=UNSUPPORTED_SHAPE"),
256+
targetQuery.key() + " should not reject the winning plan shape:\n" + actualPlan);
257+
}
258+
259+
private static PlanSignature planSignature(String plan) {
260+
List<String> signature = plan.lines()
261+
.map(LmdbImprovedQueryPlanSnapshotTest::canonicalPlanLine)
262+
.filter(line -> !line.isEmpty())
263+
.collect(Collectors.toList());
264+
return new PlanSignature(normalizeAggregateHavingWrapper(signature));
265+
}
266+
267+
private static List<String> normalizeAggregateHavingWrapper(List<String> signature) {
268+
List<String> normalized = new ArrayList<>(signature);
269+
for (int i = 0; i + 3 < normalized.size(); i++) {
270+
if (normalized.get(i).equals("Extension") && normalized.get(i + 1).equals("Filter")
271+
&& normalized.get(i + 2).equals("Extension") && normalized.get(i + 3).startsWith("Group ")) {
272+
normalized.set(i + 1, "Extension");
273+
normalized.set(i + 2, "Filter");
274+
i += 3;
275+
}
276+
}
277+
return normalized;
278+
}
279+
280+
private static String canonicalPlanLine(String line) {
281+
String value = stripTreePrefix(line);
282+
if (value.isEmpty()) {
283+
return "";
284+
}
285+
if (value.startsWith("s: Var") || value.startsWith("p: Var") || value.startsWith("o: Var")) {
286+
return canonicalVarLine(value);
287+
}
288+
if (value.startsWith("BindingSetAssignment")) {
289+
return stripTrailingMetadata(value);
290+
}
291+
if (value.startsWith("StatementPattern")) {
292+
return "StatementPattern";
293+
}
294+
if (value.startsWith("LeftJoin")) {
295+
return value.contains("LeftJoinIterator") ? "LeftJoin (LeftJoinIterator)" : "LeftJoin";
296+
}
297+
if (value.startsWith("Join")) {
298+
return value.contains("JoinIterator") ? "Join (JoinIterator)" : "Join";
299+
}
300+
if (value.startsWith("Group ")) {
301+
return stripTrailingMetadata(value);
302+
}
303+
if (value.startsWith("Filter")) {
304+
return "Filter";
305+
}
306+
if (value.startsWith("Projection")) {
307+
return "Projection";
308+
}
309+
if (value.startsWith("Extension")) {
310+
return "Extension";
311+
}
312+
if (value.startsWith("Union")) {
313+
return "Union";
314+
}
315+
if (value.startsWith("Difference")) {
316+
return "Difference";
317+
}
318+
return "";
319+
}
320+
321+
private static String stripTreePrefix(String line) {
322+
int start = 0;
323+
while (start < line.length() && !Character.isLetterOrDigit(line.charAt(start))) {
324+
start++;
325+
}
326+
return line.substring(start).trim();
327+
}
328+
329+
private static String stripTrailingMetadata(String value) {
330+
int metadataIndex = value.indexOf(") (");
331+
if (metadataIndex >= 0) {
332+
value = value.substring(0, metadataIndex + 1);
333+
}
334+
if (value.endsWith(" [left]") || value.endsWith(" [right]")) {
335+
value = value.substring(0, value.lastIndexOf(" ["));
336+
}
337+
return value;
338+
}
339+
340+
private static String canonicalVarLine(String value) {
341+
String role = value.substring(0, 3);
342+
return role + " " + canonicalVar(value.substring(3).trim());
343+
}
344+
345+
private static String canonicalVar(String value) {
346+
int valueIndex = value.indexOf("value=");
347+
if (valueIndex >= 0) {
348+
int valueEnd = value.indexOf(", anonymous", valueIndex);
349+
if (valueEnd < 0) {
350+
valueEnd = value.indexOf(") (", valueIndex);
351+
}
352+
if (valueEnd < 0) {
353+
valueEnd = value.indexOf(')', valueIndex);
354+
}
355+
return "Var (value=" + value.substring(valueIndex + "value=".length(), valueEnd) + ")";
356+
}
357+
358+
int nameIndex = value.indexOf("name=");
359+
if (nameIndex >= 0) {
360+
int nameEnd = value.indexOf(',', nameIndex);
361+
if (nameEnd < 0) {
362+
nameEnd = value.indexOf(')', nameIndex);
363+
}
364+
return "Var (name=" + value.substring(nameIndex + "name=".length(), nameEnd) + ")";
365+
}
366+
return value;
367+
}
368+
369+
private static String mismatch(TargetQuery targetQuery, RecordedPlanSnapshot expected, PlanSignature actual,
370+
String actualPlan) {
371+
return targetQuery.key() + " plan drifted from " + RECORDED_RESULTS_FILE + " line "
372+
+ expected.lineNumber() + "\nExpected canonical plan:\n" + String.join("\n", expected.signature())
373+
+ "\nActual canonical plan:\n" + String.join("\n", actual.lines()) + "\nActual plan:\n"
374+
+ actualPlan;
375+
}
376+
377+
private static TargetQuery target(Theme theme, int queryIndex, String previousBestSource, double previousBestScore,
378+
double currentScore) {
379+
return new TargetQuery(theme, queryIndex, previousBestSource, previousBestScore, currentScore);
380+
}
381+
382+
private record TargetQuery(Theme theme, int queryIndex, String previousBestSource, double previousBestScore,
383+
double currentScore) {
384+
385+
private String key() {
386+
return theme.name() + ":" + queryIndex;
387+
}
388+
389+
private double improvementPercent() {
390+
return ((previousBestScore - currentScore) / previousBestScore) * 100.0d;
391+
}
392+
}
393+
394+
private record RecordedPlanSnapshot(int lineNumber, List<String> signature) {
395+
}
396+
397+
private record PlanSignature(List<String> lines) {
398+
}
399+
}

0 commit comments

Comments
 (0)