|
| 1 | +/******************************************************************************* |
| 2 | + * Copyright (c) 2026 Eclipse RDF4J contributors. |
| 3 | + * |
| 4 | + * All rights reserved. This program and the accompanying materials |
| 5 | + * are made available under the terms of the Eclipse Distribution License v1.0 |
| 6 | + * which accompanies this distribution, and is available at |
| 7 | + * http://www.eclipse.org/org/documents/edl-v10.php. |
| 8 | + * |
| 9 | + * SPDX-License-Identifier: BSD-3-Clause |
| 10 | + *******************************************************************************/ |
| 11 | +// Some portions generated by Codex |
| 12 | +package org.eclipse.rdf4j.sail.lmdb.benchmark; |
| 13 | + |
| 14 | +import static org.junit.jupiter.api.Assertions.assertEquals; |
| 15 | +import static org.junit.jupiter.api.Assertions.assertTrue; |
| 16 | + |
| 17 | +import java.io.IOException; |
| 18 | +import java.nio.file.Files; |
| 19 | +import java.nio.file.Path; |
| 20 | +import java.util.ArrayList; |
| 21 | +import java.util.LinkedHashMap; |
| 22 | +import java.util.List; |
| 23 | +import java.util.Map; |
| 24 | +import java.util.regex.Matcher; |
| 25 | +import java.util.regex.Pattern; |
| 26 | +import java.util.stream.Collectors; |
| 27 | + |
| 28 | +import org.eclipse.rdf4j.benchmark.common.ThemeQueryCatalog; |
| 29 | +import org.eclipse.rdf4j.benchmark.rio.util.ThemeDataSetGenerator; |
| 30 | +import org.eclipse.rdf4j.benchmark.rio.util.ThemeDataSetGenerator.Theme; |
| 31 | +import org.eclipse.rdf4j.common.transaction.IsolationLevels; |
| 32 | +import org.eclipse.rdf4j.query.explanation.Explanation; |
| 33 | +import org.eclipse.rdf4j.repository.sail.SailRepository; |
| 34 | +import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; |
| 35 | +import org.eclipse.rdf4j.repository.util.RDFInserter; |
| 36 | +import org.eclipse.rdf4j.sail.lmdb.LmdbStore; |
| 37 | +import org.junit.jupiter.api.Test; |
| 38 | +import org.junit.jupiter.api.io.TempDir; |
| 39 | + |
| 40 | +class LmdbImprovedQueryPlanSnapshotTest { |
| 41 | + |
| 42 | + private static final Pattern BENCHMARK_PARAMETERS = Pattern.compile( |
| 43 | + "# Parameters: \\(themeName = ([A-Z_]+), z_queryIndex = ([0-9]+)\\)"); |
| 44 | + private static final Pattern BENCHMARK_ROW = Pattern.compile( |
| 45 | + "^ThemeQueryBenchmark\\.executeQuery\\s+(\\S+)\\s+(\\d+)\\s+avgt\\s+([0-9.]+)\\s+ms/op$"); |
| 46 | + private static final String QUERY_KEYS_PROPERTY = "rdf4j.lmdb.improvedPlanSnapshot.queryKeys"; |
| 47 | + private static final String RESULT_DIRECTORY = "src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/theme-query-benchmark-results"; |
| 48 | + private static final String RECORDED_RESULTS_FILE = "results-2026-04-24-2.md"; |
| 49 | + private static final List<TargetQuery> TARGET_QUERIES = List.of( |
| 50 | + target(Theme.ENGINEERING, 1, "results-2026-04-17.md", 138.312d, 99.772d), |
| 51 | + target(Theme.LIBRARY, 1, "results-2026-04-17.md", 143.142d, 105.387d), |
| 52 | + target(Theme.MEDICAL_RECORDS, 1, "results-2026-04-17.md", 99.712d, 53.748d), |
| 53 | + target(Theme.TRAIN, 1, "results-2026-04-17.md", 39.073d, 29.381d)); |
| 54 | + |
| 55 | + @Test |
| 56 | + void recordedResultsStillRepresentTwentyPercentWins() throws Exception { |
| 57 | + Map<String, Double> currentScores = parseBenchmarkScores(resultsFile(RECORDED_RESULTS_FILE)); |
| 58 | + for (TargetQuery targetQuery : selectedTargetQueries()) { |
| 59 | + Map<String, Double> previousScores = parseBenchmarkScores(resultsFile(targetQuery.previousBestSource())); |
| 60 | + assertScore(previousScores, targetQuery.key(), targetQuery.previousBestScore(), |
| 61 | + targetQuery.previousBestSource()); |
| 62 | + assertScore(currentScores, targetQuery.key(), targetQuery.currentScore(), RECORDED_RESULTS_FILE); |
| 63 | + assertTrue(targetQuery.improvementPercent() > 20.0d, |
| 64 | + targetQuery.key() + " should stay above the 20% improvement threshold"); |
| 65 | + } |
| 66 | + } |
| 67 | + |
| 68 | + @Test |
| 69 | + void optimizedPlansMatchRecordedImprovementSnapshots(@TempDir Path dataDir) throws Exception { |
| 70 | + Map<String, RecordedPlanSnapshot> expectedPlans = parseRecordedPlanSignatures( |
| 71 | + resultsFile(RECORDED_RESULTS_FILE)); |
| 72 | + List<String> mismatches = new ArrayList<>(); |
| 73 | + for (Map.Entry<Theme, List<TargetQuery>> entry : targetsByTheme(selectedTargetQueries()).entrySet()) { |
| 74 | + Path storeDirectory = prepareThemeStore(dataDir, entry.getKey()); |
| 75 | + LmdbStore store = new LmdbStore(storeDirectory.toFile(), ConfigUtil.createConfig()); |
| 76 | + SailRepository repository = new SailRepository(store); |
| 77 | + try { |
| 78 | + for (TargetQuery targetQuery : entry.getValue()) { |
| 79 | + RecordedPlanSnapshot expectedPlan = expectedPlans.get(targetQuery.key()); |
| 80 | + assertTrue(expectedPlan != null, |
| 81 | + "Missing optimized plan in " + RECORDED_RESULTS_FILE + " for " + targetQuery.key()); |
| 82 | + |
| 83 | + primeLearnedFilterStats(repository, targetQuery); |
| 84 | + String actualPlan = explainOptimized(repository, targetQuery); |
| 85 | + assertPlanUsesRobustPlanner(targetQuery, actualPlan); |
| 86 | + |
| 87 | + PlanSignature actualSignature = planSignature(actualPlan); |
| 88 | + if (!expectedPlan.signature().equals(actualSignature.lines())) { |
| 89 | + mismatches.add(mismatch(targetQuery, expectedPlan, actualSignature, actualPlan)); |
| 90 | + } |
| 91 | + BenchmarkJoinEstimatorSupport.releaseEstimatorMemory(store); |
| 92 | + } |
| 93 | + } finally { |
| 94 | + shutdownAndRelease(repository, store); |
| 95 | + BenchmarkJoinEstimatorSupport.deleteStoreDirectory(storeDirectory); |
| 96 | + } |
| 97 | + } |
| 98 | + |
| 99 | + assertTrue(mismatches.isEmpty(), String.join("\n\n", mismatches)); |
| 100 | + } |
| 101 | + |
| 102 | + private static void assertScore(Map<String, Double> scores, String key, double expectedScore, String source) { |
| 103 | + Double actualScore = scores.get(key); |
| 104 | + assertTrue(actualScore != null, "Missing benchmark row in " + source + " for " + key); |
| 105 | + assertEquals(expectedScore, actualScore.doubleValue(), 0.0001d, |
| 106 | + "Unexpected benchmark score in " + source + " for " + key); |
| 107 | + } |
| 108 | + |
| 109 | + private static List<TargetQuery> selectedTargetQueries() { |
| 110 | + String selectedKeys = System.getProperty(QUERY_KEYS_PROPERTY, "").trim(); |
| 111 | + if (selectedKeys.isEmpty()) { |
| 112 | + return TARGET_QUERIES; |
| 113 | + } |
| 114 | + |
| 115 | + List<String> requestedKeys = List.of(selectedKeys.split(",")); |
| 116 | + List<TargetQuery> selected = TARGET_QUERIES.stream() |
| 117 | + .filter(targetQuery -> requestedKeys.stream() |
| 118 | + .map(String::trim) |
| 119 | + .anyMatch(key -> key.equals(targetQuery.key()) || key.equals(targetQuery.theme().name()))) |
| 120 | + .collect(Collectors.toList()); |
| 121 | + assertTrue(!selected.isEmpty(), "No target queries matched " + QUERY_KEYS_PROPERTY + "=" + selectedKeys); |
| 122 | + return selected; |
| 123 | + } |
| 124 | + |
| 125 | + private static Map<Theme, List<TargetQuery>> targetsByTheme(List<TargetQuery> targetQueries) { |
| 126 | + Map<Theme, List<TargetQuery>> targets = new LinkedHashMap<>(); |
| 127 | + for (TargetQuery targetQuery : targetQueries) { |
| 128 | + targets.computeIfAbsent(targetQuery.theme(), ignored -> new ArrayList<>()) |
| 129 | + .add(targetQuery); |
| 130 | + } |
| 131 | + return targets; |
| 132 | + } |
| 133 | + |
| 134 | + private static Map<String, Double> parseBenchmarkScores(Path path) throws IOException { |
| 135 | + Map<String, Double> scores = new LinkedHashMap<>(); |
| 136 | + for (String line : Files.readAllLines(path)) { |
| 137 | + Matcher matcher = BENCHMARK_ROW.matcher(line.strip().replace("`", "")); |
| 138 | + if (!matcher.matches()) { |
| 139 | + continue; |
| 140 | + } |
| 141 | + scores.put(matcher.group(1) + ":" + matcher.group(2), Double.parseDouble(matcher.group(3))); |
| 142 | + } |
| 143 | + return scores; |
| 144 | + } |
| 145 | + |
| 146 | + private static Map<String, RecordedPlanSnapshot> parseRecordedPlanSignatures(Path path) throws IOException { |
| 147 | + Map<String, RecordedPlanSnapshot> signatures = new LinkedHashMap<>(); |
| 148 | + List<String> lines = Files.readAllLines(path); |
| 149 | + String currentKey = null; |
| 150 | + int currentLine = -1; |
| 151 | + List<String> currentPlan = null; |
| 152 | + for (int i = 0; i < lines.size(); i++) { |
| 153 | + String line = lines.get(i); |
| 154 | + Matcher matcher = BENCHMARK_PARAMETERS.matcher(line); |
| 155 | + if (matcher.matches()) { |
| 156 | + currentKey = matcher.group(1) + ":" + matcher.group(2); |
| 157 | + currentLine = i + 1; |
| 158 | + continue; |
| 159 | + } |
| 160 | + if (line.contains("### Optimized Query ###")) { |
| 161 | + currentPlan = new ArrayList<>(); |
| 162 | + continue; |
| 163 | + } |
| 164 | + if (currentPlan == null) { |
| 165 | + continue; |
| 166 | + } |
| 167 | + if (line.isBlank()) { |
| 168 | + if (currentKey != null && !currentPlan.isEmpty()) { |
| 169 | + signatures.put(currentKey, new RecordedPlanSnapshot(currentLine, |
| 170 | + planSignature(String.join("\n", currentPlan)).lines())); |
| 171 | + } |
| 172 | + currentPlan = null; |
| 173 | + continue; |
| 174 | + } |
| 175 | + currentPlan.add(line); |
| 176 | + } |
| 177 | + return signatures; |
| 178 | + } |
| 179 | + |
| 180 | + private static Path resultsFile(String fileName) { |
| 181 | + Path basedirFile = Path.of(System.getProperty("basedir", "."), RESULT_DIRECTORY, fileName); |
| 182 | + if (Files.isRegularFile(basedirFile)) { |
| 183 | + return basedirFile; |
| 184 | + } |
| 185 | + |
| 186 | + Path repositoryFile = Path.of("core/sail/lmdb", RESULT_DIRECTORY, fileName); |
| 187 | + if (Files.isRegularFile(repositoryFile)) { |
| 188 | + return repositoryFile; |
| 189 | + } |
| 190 | + |
| 191 | + throw new AssertionError("Unable to locate benchmark results file " + fileName); |
| 192 | + } |
| 193 | + |
| 194 | + private static Path prepareThemeStore(Path dataDir, Theme theme) throws Exception { |
| 195 | + Path storeDirectory = dataDir.resolve("improved-plan-snapshot-" + theme.name()); |
| 196 | + LmdbStore store = new LmdbStore(storeDirectory.toFile(), ConfigUtil.createConfig()); |
| 197 | + SailRepository repository = new SailRepository(store); |
| 198 | + boolean prepared = false; |
| 199 | + try { |
| 200 | + BenchmarkJoinEstimatorSupport.prepareEstimatorForBulkLoad(repository, store); |
| 201 | + loadData(repository, theme); |
| 202 | + BenchmarkJoinEstimatorSupport.persistEstimatorAfterBulkLoad(repository, store); |
| 203 | + BenchmarkJoinEstimatorSupport.persistStoreStatistics(store); |
| 204 | + prepared = true; |
| 205 | + return storeDirectory; |
| 206 | + } finally { |
| 207 | + shutdownAndRelease(repository, store); |
| 208 | + if (!prepared) { |
| 209 | + BenchmarkJoinEstimatorSupport.deleteStoreDirectory(storeDirectory); |
| 210 | + } |
| 211 | + } |
| 212 | + } |
| 213 | + |
| 214 | + private static void loadData(SailRepository repository, Theme theme) throws IOException { |
| 215 | + try (SailRepositoryConnection connection = repository.getConnection()) { |
| 216 | + connection.begin(IsolationLevels.NONE); |
| 217 | + RDFInserter inserter = new RDFInserter(connection); |
| 218 | + ThemeDataSetGenerator.generate(theme, inserter); |
| 219 | + connection.commit(); |
| 220 | + } |
| 221 | + } |
| 222 | + |
| 223 | + private static void primeLearnedFilterStats(SailRepository repository, TargetQuery targetQuery) { |
| 224 | + String query = ThemeQueryCatalog.queryFor(targetQuery.theme(), targetQuery.queryIndex()); |
| 225 | + try (SailRepositoryConnection connection = repository.getConnection()) { |
| 226 | + connection.prepareTupleQuery(query) |
| 227 | + .evaluate() |
| 228 | + .stream() |
| 229 | + .count(); |
| 230 | + } |
| 231 | + } |
| 232 | + |
| 233 | + private static String explainOptimized(SailRepository repository, TargetQuery targetQuery) { |
| 234 | + try (SailRepositoryConnection connection = repository.getConnection()) { |
| 235 | + String query = ThemeQueryCatalog.queryFor(targetQuery.theme(), targetQuery.queryIndex()); |
| 236 | + return connection.prepareTupleQuery(query) |
| 237 | + .explain(Explanation.Level.Optimized) |
| 238 | + .toString(); |
| 239 | + } |
| 240 | + } |
| 241 | + |
| 242 | + private static void shutdownAndRelease(SailRepository repository, LmdbStore store) throws IOException { |
| 243 | + try { |
| 244 | + BenchmarkJoinEstimatorSupport.releaseEstimatorMemory(store); |
| 245 | + } finally { |
| 246 | + repository.shutDown(); |
| 247 | + } |
| 248 | + } |
| 249 | + |
| 250 | + private static void assertPlanUsesRobustPlanner(TargetQuery targetQuery, String actualPlan) { |
| 251 | + assertTrue(actualPlan.contains("plannerId=lmdb-sketch"), |
| 252 | + targetQuery.key() + " should use LMDB sketch planning:\n" + actualPlan); |
| 253 | + assertTrue(actualPlan.contains("plannerPath=ROBUST_USED"), |
| 254 | + targetQuery.key() + " should use the robust planner path:\n" + actualPlan); |
| 255 | + assertTrue(!actualPlan.contains("plannerPath=UNSUPPORTED_SHAPE"), |
| 256 | + targetQuery.key() + " should not reject the winning plan shape:\n" + actualPlan); |
| 257 | + } |
| 258 | + |
| 259 | + private static PlanSignature planSignature(String plan) { |
| 260 | + List<String> signature = plan.lines() |
| 261 | + .map(LmdbImprovedQueryPlanSnapshotTest::canonicalPlanLine) |
| 262 | + .filter(line -> !line.isEmpty()) |
| 263 | + .collect(Collectors.toList()); |
| 264 | + return new PlanSignature(normalizeAggregateHavingWrapper(signature)); |
| 265 | + } |
| 266 | + |
| 267 | + private static List<String> normalizeAggregateHavingWrapper(List<String> signature) { |
| 268 | + List<String> normalized = new ArrayList<>(signature); |
| 269 | + for (int i = 0; i + 3 < normalized.size(); i++) { |
| 270 | + if (normalized.get(i).equals("Extension") && normalized.get(i + 1).equals("Filter") |
| 271 | + && normalized.get(i + 2).equals("Extension") && normalized.get(i + 3).startsWith("Group ")) { |
| 272 | + normalized.set(i + 1, "Extension"); |
| 273 | + normalized.set(i + 2, "Filter"); |
| 274 | + i += 3; |
| 275 | + } |
| 276 | + } |
| 277 | + return normalized; |
| 278 | + } |
| 279 | + |
| 280 | + private static String canonicalPlanLine(String line) { |
| 281 | + String value = stripTreePrefix(line); |
| 282 | + if (value.isEmpty()) { |
| 283 | + return ""; |
| 284 | + } |
| 285 | + if (value.startsWith("s: Var") || value.startsWith("p: Var") || value.startsWith("o: Var")) { |
| 286 | + return canonicalVarLine(value); |
| 287 | + } |
| 288 | + if (value.startsWith("BindingSetAssignment")) { |
| 289 | + return stripTrailingMetadata(value); |
| 290 | + } |
| 291 | + if (value.startsWith("StatementPattern")) { |
| 292 | + return "StatementPattern"; |
| 293 | + } |
| 294 | + if (value.startsWith("LeftJoin")) { |
| 295 | + return value.contains("LeftJoinIterator") ? "LeftJoin (LeftJoinIterator)" : "LeftJoin"; |
| 296 | + } |
| 297 | + if (value.startsWith("Join")) { |
| 298 | + return value.contains("JoinIterator") ? "Join (JoinIterator)" : "Join"; |
| 299 | + } |
| 300 | + if (value.startsWith("Group ")) { |
| 301 | + return stripTrailingMetadata(value); |
| 302 | + } |
| 303 | + if (value.startsWith("Filter")) { |
| 304 | + return "Filter"; |
| 305 | + } |
| 306 | + if (value.startsWith("Projection")) { |
| 307 | + return "Projection"; |
| 308 | + } |
| 309 | + if (value.startsWith("Extension")) { |
| 310 | + return "Extension"; |
| 311 | + } |
| 312 | + if (value.startsWith("Union")) { |
| 313 | + return "Union"; |
| 314 | + } |
| 315 | + if (value.startsWith("Difference")) { |
| 316 | + return "Difference"; |
| 317 | + } |
| 318 | + return ""; |
| 319 | + } |
| 320 | + |
| 321 | + private static String stripTreePrefix(String line) { |
| 322 | + int start = 0; |
| 323 | + while (start < line.length() && !Character.isLetterOrDigit(line.charAt(start))) { |
| 324 | + start++; |
| 325 | + } |
| 326 | + return line.substring(start).trim(); |
| 327 | + } |
| 328 | + |
| 329 | + private static String stripTrailingMetadata(String value) { |
| 330 | + int metadataIndex = value.indexOf(") ("); |
| 331 | + if (metadataIndex >= 0) { |
| 332 | + value = value.substring(0, metadataIndex + 1); |
| 333 | + } |
| 334 | + if (value.endsWith(" [left]") || value.endsWith(" [right]")) { |
| 335 | + value = value.substring(0, value.lastIndexOf(" [")); |
| 336 | + } |
| 337 | + return value; |
| 338 | + } |
| 339 | + |
| 340 | + private static String canonicalVarLine(String value) { |
| 341 | + String role = value.substring(0, 3); |
| 342 | + return role + " " + canonicalVar(value.substring(3).trim()); |
| 343 | + } |
| 344 | + |
| 345 | + private static String canonicalVar(String value) { |
| 346 | + int valueIndex = value.indexOf("value="); |
| 347 | + if (valueIndex >= 0) { |
| 348 | + int valueEnd = value.indexOf(", anonymous", valueIndex); |
| 349 | + if (valueEnd < 0) { |
| 350 | + valueEnd = value.indexOf(") (", valueIndex); |
| 351 | + } |
| 352 | + if (valueEnd < 0) { |
| 353 | + valueEnd = value.indexOf(')', valueIndex); |
| 354 | + } |
| 355 | + return "Var (value=" + value.substring(valueIndex + "value=".length(), valueEnd) + ")"; |
| 356 | + } |
| 357 | + |
| 358 | + int nameIndex = value.indexOf("name="); |
| 359 | + if (nameIndex >= 0) { |
| 360 | + int nameEnd = value.indexOf(',', nameIndex); |
| 361 | + if (nameEnd < 0) { |
| 362 | + nameEnd = value.indexOf(')', nameIndex); |
| 363 | + } |
| 364 | + return "Var (name=" + value.substring(nameIndex + "name=".length(), nameEnd) + ")"; |
| 365 | + } |
| 366 | + return value; |
| 367 | + } |
| 368 | + |
| 369 | + private static String mismatch(TargetQuery targetQuery, RecordedPlanSnapshot expected, PlanSignature actual, |
| 370 | + String actualPlan) { |
| 371 | + return targetQuery.key() + " plan drifted from " + RECORDED_RESULTS_FILE + " line " |
| 372 | + + expected.lineNumber() + "\nExpected canonical plan:\n" + String.join("\n", expected.signature()) |
| 373 | + + "\nActual canonical plan:\n" + String.join("\n", actual.lines()) + "\nActual plan:\n" |
| 374 | + + actualPlan; |
| 375 | + } |
| 376 | + |
| 377 | + private static TargetQuery target(Theme theme, int queryIndex, String previousBestSource, double previousBestScore, |
| 378 | + double currentScore) { |
| 379 | + return new TargetQuery(theme, queryIndex, previousBestSource, previousBestScore, currentScore); |
| 380 | + } |
| 381 | + |
| 382 | + private record TargetQuery(Theme theme, int queryIndex, String previousBestSource, double previousBestScore, |
| 383 | + double currentScore) { |
| 384 | + |
| 385 | + private String key() { |
| 386 | + return theme.name() + ":" + queryIndex; |
| 387 | + } |
| 388 | + |
| 389 | + private double improvementPercent() { |
| 390 | + return ((previousBestScore - currentScore) / previousBestScore) * 100.0d; |
| 391 | + } |
| 392 | + } |
| 393 | + |
| 394 | + private record RecordedPlanSnapshot(int lineNumber, List<String> signature) { |
| 395 | + } |
| 396 | + |
| 397 | + private record PlanSignature(List<String> lines) { |
| 398 | + } |
| 399 | +} |
0 commit comments