Skip to content

Commit 3d6cb91

Browse files
committed
maybe better in some cases but also mostly worse
1 parent 6d2f28f commit 3d6cb91

4 files changed

Lines changed: 21700 additions & 43 deletions

File tree

core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSketchJoinOptimizer.java

Lines changed: 117 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.util.List;
2323
import java.util.Map;
2424
import java.util.Optional;
25+
import java.util.OptionalDouble;
2526
import java.util.Set;
2627

2728
import org.eclipse.rdf4j.model.Literal;
@@ -50,6 +51,7 @@
5051
import org.eclipse.rdf4j.query.algebra.Var;
5152
import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizer;
5253
import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics;
54+
import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.JoinFactorCostModel;
5355
import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.JoinOrderPlanner;
5456
import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.QueryOptimizationScopeProvider;
5557
import org.eclipse.rdf4j.query.algebra.helpers.AbstractSimpleQueryModelVisitor;
@@ -61,6 +63,7 @@
6163
final class LmdbSketchJoinOptimizer implements QueryOptimizer {
6264

6365
private static final String RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
66+
private static final double BROAD_PREFIX_SCAN_MIN_WORK_ROWS = 100.0d;
6467

6568
private final EvaluationStatistics statistics;
6669
private final boolean trackResultSize;
@@ -517,7 +520,8 @@ private Deque<TupleExpr> orderSegment(List<TupleExpr> segment, Set<String> bound
517520
return new ArrayDeque<>(segment);
518521
}
519522
applyPlannerStepEstimates(plan.get());
520-
return new ArrayDeque<>(preferUnboundTypeGuards(plan.get().getOrderedArgs(), boundBeforeSegment));
523+
return new ArrayDeque<>(preferSafeSubjectTypeAnchors(plan.get().getOrderedArgs(), boundBeforeSegment,
524+
filters));
521525
}
522526

523527
private JoinOrderPlanner.Algorithm plannerAlgorithm(int segmentSize) {
@@ -638,27 +642,26 @@ private boolean isValidPlannerOrder(List<TupleExpr> originalSegment, JoinOrderPl
638642
return true;
639643
}
640644

641-
private List<TupleExpr> preferUnboundTypeGuards(List<TupleExpr> orderedArgs, Set<String> boundBeforeSegment) {
645+
private List<TupleExpr> preferSafeSubjectTypeAnchors(List<TupleExpr> orderedArgs,
646+
Set<String> boundBeforeSegment, List<DeferredFilter> filters) {
642647
if (orderedArgs.size() < 2) {
643648
return orderedArgs;
644649
}
645650
List<TupleExpr> reordered = new ArrayList<>(orderedArgs);
646-
preferCheaperIndependentAnchor(reordered, boundBeforeSegment);
647651
Set<String> bound = new HashSet<>(boundBeforeSegment);
648652
for (int i = 0; i < reordered.size(); i++) {
649653
TupleExpr current = reordered.get(i);
650-
if (!isTypeGuard(current) && current instanceof StatementPattern currentPattern
651-
&& Collections.disjoint(bound, currentPattern.getBindingNames())) {
654+
if (current instanceof StatementPattern currentPattern
655+
&& canConsiderSubjectTypeAnchor(currentPattern, filters)) {
652656
String subjectName = unboundName(currentPattern.getSubjectVar());
653-
if (subjectName != null) {
654-
for (int j = i + 1; j < reordered.size(); j++) {
655-
TupleExpr candidate = reordered.get(j);
656-
if (isTypeGuardForSubject(candidate, subjectName)) {
657-
reordered.remove(j);
658-
reordered.add(i, candidate);
659-
current = candidate;
660-
break;
661-
}
657+
for (int j = i + 1; j < reordered.size(); j++) {
658+
TupleExpr candidate = reordered.get(j);
659+
if (isTypeGuardForSubject(candidate, subjectName)
660+
&& isCheaperSubjectTypeOrder(currentPattern, candidate, bound)) {
661+
reordered.remove(j);
662+
reordered.add(i, candidate);
663+
current = candidate;
664+
break;
662665
}
663666
}
664667
}
@@ -667,41 +670,112 @@ private List<TupleExpr> preferUnboundTypeGuards(List<TupleExpr> orderedArgs, Set
667670
return List.copyOf(reordered);
668671
}
669672

670-
private void preferCheaperIndependentAnchor(List<TupleExpr> reordered, Set<String> boundBeforeSegment) {
671-
if (!boundBeforeSegment.isEmpty() || !isTypeGuard(reordered.get(0))) {
672-
return;
673+
private boolean canConsiderSubjectTypeAnchor(StatementPattern pattern, List<DeferredFilter> filters) {
674+
if (isTypeGuard(pattern) || hasPatternLocalDeferredFilter(pattern, filters)) {
675+
return false;
673676
}
674-
StatementPattern typeGuard = (StatementPattern) reordered.get(0);
675-
String typeSubject = unboundName(typeGuard.getSubjectVar());
676-
if (typeSubject == null) {
677-
return;
677+
String subjectName = unboundName(pattern.getSubjectVar());
678+
if (subjectName == null) {
679+
return false;
678680
}
679-
double typeRows = typeGuard.getResultSizeEstimate();
680-
if (!LmdbJoinPlanSupport.isFiniteNonNegative(typeRows)) {
681-
return;
681+
Var object = pattern.getObjectVar();
682+
return object != null && !object.hasValue();
683+
}
684+
685+
private boolean isCheaperSubjectTypeOrder(StatementPattern currentPattern, TupleExpr typeGuard,
686+
Set<String> bound) {
687+
if (!(statistics instanceof JoinFactorCostModel)) {
688+
return false;
682689
}
683-
int bestIndex = -1;
684-
double bestRows = typeRows;
685-
for (int i = 1; i < reordered.size(); i++) {
686-
TupleExpr candidate = reordered.get(i);
687-
if (!(candidate instanceof StatementPattern candidatePattern) || isTypeGuard(candidate)
688-
|| !Collections.disjoint(boundBeforeSegment, candidate.getBindingNames())) {
689-
continue;
690-
}
691-
String candidateSubject = unboundName(candidatePattern.getSubjectVar());
692-
if (typeSubject.equals(candidateSubject)) {
693-
continue;
694-
}
695-
double candidateRows = candidate.getResultSizeEstimate();
696-
if (LmdbJoinPlanSupport.isFiniteNonNegative(candidateRows) && candidateRows < bestRows) {
697-
bestRows = candidateRows;
698-
bestIndex = i;
690+
JoinFactorCostModel costModel = (JoinFactorCostModel) statistics;
691+
OptionalDouble currentThenType = estimateAdjacentOrderCost(costModel, currentPattern, typeGuard, bound);
692+
OptionalDouble typeThenCurrent = estimateAdjacentOrderCost(costModel, typeGuard, currentPattern, bound);
693+
return currentThenType.isPresent() && typeThenCurrent.isPresent()
694+
&& (typeThenCurrent.getAsDouble() < currentThenType.getAsDouble()
695+
|| isDirectTypeLookupTieBreak(currentPattern, typeGuard)
696+
&& isPairRowsNoWorse(typeGuard, currentPattern, currentPattern, typeGuard));
697+
}
698+
699+
private boolean isDirectTypeLookupTieBreak(StatementPattern currentPattern, TupleExpr typeGuard) {
700+
String currentMissing = currentPattern
701+
.getStringMetricPlanned(TelemetryMetricNames.PLANNED_MISSING_LOOKUP_COMPONENTS);
702+
String typeAccessMode = typeGuard.getStringMetricPlanned(TelemetryMetricNames.PLANNED_INDEX_ACCESS_MODE);
703+
double currentWork = currentPattern.getDoubleMetricPlanned(TelemetryMetricNames.PLANNED_WORK_ROWS);
704+
double typeWork = typeGuard.getDoubleMetricPlanned(TelemetryMetricNames.PLANNED_WORK_ROWS);
705+
return currentMissing != null && !currentMissing.isBlank()
706+
&& "directLookup".equals(typeAccessMode)
707+
&& LmdbJoinPlanSupport.isFiniteNonNegative(currentWork)
708+
&& LmdbJoinPlanSupport.isFiniteNonNegative(typeWork)
709+
&& currentWork > BROAD_PREFIX_SCAN_MIN_WORK_ROWS
710+
&& typeWork <= currentWork;
711+
}
712+
713+
private boolean isPairRowsNoWorse(TupleExpr promotedFirst, TupleExpr promotedSecond, TupleExpr currentFirst,
714+
TupleExpr currentSecond) {
715+
OptionalDouble promotedRows = estimatePairRows(promotedFirst, promotedSecond);
716+
OptionalDouble currentRows = estimatePairRows(currentFirst, currentSecond);
717+
return promotedRows.isPresent() && currentRows.isPresent()
718+
&& promotedRows.getAsDouble() <= currentRows.getAsDouble();
719+
}
720+
721+
private OptionalDouble estimateAdjacentOrderCost(JoinFactorCostModel costModel, TupleExpr first,
722+
TupleExpr second, Set<String> bound) {
723+
Optional<JoinFactorCostModel.FactorCostEstimate> firstEstimate = costModel.estimateFactorCost(first,
724+
bound);
725+
if (firstEstimate.isEmpty() || !isUsableCost(firstEstimate.get())) {
726+
return OptionalDouble.empty();
727+
}
728+
Set<String> boundAfterFirst = new HashSet<>(bound);
729+
boundAfterFirst.addAll(first.getBindingNames());
730+
Optional<JoinFactorCostModel.FactorCostEstimate> secondEstimate = costModel.estimateFactorCost(second,
731+
boundAfterFirst);
732+
if (secondEstimate.isEmpty() || !isUsableCost(secondEstimate.get())) {
733+
return OptionalDouble.empty();
734+
}
735+
double secondInvocations = repeatsForNewBinding(first, second, bound)
736+
? repeatedLookupInvocations(firstEstimate.get())
737+
: 1.0d;
738+
double cost = firstEstimate.get().getWorkRows()
739+
+ secondEstimate.get().getWorkRows() * secondInvocations
740+
+ estimatePairRows(first, second).orElse(0.0d);
741+
return LmdbJoinPlanSupport.isFiniteNonNegative(cost) ? OptionalDouble.of(cost) : OptionalDouble.empty();
742+
}
743+
744+
private OptionalDouble estimatePairRows(TupleExpr first, TupleExpr second) {
745+
if (!statistics.supportsJoinEstimation()) {
746+
return OptionalDouble.empty();
747+
}
748+
double rows = statistics.getCardinality(new Join(first.clone(), second.clone()));
749+
return LmdbJoinPlanSupport.isFiniteNonNegative(rows) ? OptionalDouble.of(rows) : OptionalDouble.empty();
750+
}
751+
752+
private boolean isUsableCost(JoinFactorCostModel.FactorCostEstimate estimate) {
753+
return LmdbJoinPlanSupport.isFiniteNonNegative(estimate.getWorkRows())
754+
&& LmdbJoinPlanSupport.isFiniteNonNegative(estimate.getOutputRows());
755+
}
756+
757+
private boolean repeatsForNewBinding(TupleExpr first, TupleExpr second, Set<String> bound) {
758+
Set<String> secondBindings = second.getBindingNames();
759+
for (String firstBinding : first.getBindingNames()) {
760+
if (!bound.contains(firstBinding) && secondBindings.contains(firstBinding)) {
761+
return true;
699762
}
700763
}
701-
if (bestIndex > 0) {
702-
TupleExpr anchor = reordered.remove(bestIndex);
703-
reordered.add(0, anchor);
764+
return false;
765+
}
766+
767+
private double repeatedLookupInvocations(JoinFactorCostModel.FactorCostEstimate prefixEstimate) {
768+
double prefixRows = Math.max(prefixEstimate.getOutputRows(), prefixEstimate.getWorkRows());
769+
return prefixRows > 1.0d ? prefixRows : 1.0d;
770+
}
771+
772+
private boolean hasPatternLocalDeferredFilter(StatementPattern pattern, List<DeferredFilter> filters) {
773+
for (DeferredFilter filter : filters) {
774+
if (filter.patternLocalBase == pattern || filter.originPatterns.contains(pattern)) {
775+
return true;
776+
}
704777
}
778+
return false;
705779
}
706780

707781
private boolean isTypeGuardForSubject(TupleExpr tupleExpr, String subjectName) {

core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/LmdbThemeQueryRegressionTest.java

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,65 @@ void trainScheduledTimeSeedStaysAheadOfBroadTypeAnchor(@TempDir Path dataDir) th
377377
}
378378
}
379379

380+
@Test
381+
void medicalEncounterDateFilterStaysAheadOfBroadTypeAnchor(@TempDir Path dataDir) throws Exception {
382+
Theme theme = Theme.MEDICAL_RECORDS;
383+
Path themeDir = prepareThemeStore(dataDir, theme, 2);
384+
try {
385+
LmdbStore store = new LmdbStore(themeDir.toFile(), ConfigUtil.createConfig());
386+
SailRepository repository = new SailRepository(store);
387+
try {
388+
assertQueryRegressionPasses(repository, theme, 2, snapshot -> {
389+
assertPlannerDiagnosticsPresent(theme, 2, snapshot.plan());
390+
assertBefore(snapshot.renderedQuery(), "<http://example.com/theme/medical/recordedOn> ?date",
391+
"FILTER (?date IN (\"2024-01-01\"^^<http://www.w3.org/2001/XMLSchema#date>, "
392+
+ "\"2024-02-01\"^^<http://www.w3.org/2001/XMLSchema#date>))",
393+
"Medical q2 should keep the date filter attached to the recordedOn lookup\n"
394+
+ snapshot.plan());
395+
assertBefore(snapshot.renderedQuery(),
396+
"FILTER (?date IN (\"2024-01-01\"^^<http://www.w3.org/2001/XMLSchema#date>, "
397+
+ "\"2024-02-01\"^^<http://www.w3.org/2001/XMLSchema#date>))",
398+
"?enc a <http://example.com/theme/medical/Encounter>",
399+
"Medical q2 should apply the selective date filter before the broad Encounter type scan\n"
400+
+ snapshot.plan());
401+
});
402+
} finally {
403+
shutdownAndRelease(repository, store);
404+
}
405+
} finally {
406+
BenchmarkJoinEstimatorSupport.deleteStoreDirectory(themeDir);
407+
}
408+
}
409+
410+
@Test
411+
void medicalOptionalNotExistsQueryCompletes(@TempDir Path dataDir) throws Exception {
412+
Theme theme = Theme.MEDICAL_RECORDS;
413+
int queryIndex = 10;
414+
Path themeDir = prepareThemeStore(dataDir, theme);
415+
try {
416+
LmdbStore store = new LmdbStore(themeDir.toFile(), ConfigUtil.createConfig());
417+
SailRepository repository = new SailRepository(store);
418+
try {
419+
String query = ThemeQueryCatalog.queryFor(theme, queryIndex);
420+
long expected = ThemeQueryCatalog.expectedCountFor(theme, queryIndex);
421+
BenchmarkJoinEstimatorSupport.assertQueryRegressionPassesWithinThirtySeconds(
422+
theme.name() + ":" + queryIndex,
423+
() -> {
424+
long actual = executeQuery(repository, query);
425+
if (actual != expected) {
426+
throw new AssertionError("LMDB theme query mismatch: theme=" + theme
427+
+ ", queryIndex=" + queryIndex + ", expected=" + expected + ", actual="
428+
+ actual + "\n" + explainBestEffort(repository, query));
429+
}
430+
});
431+
} finally {
432+
shutdownAndRelease(repository, store);
433+
}
434+
} finally {
435+
BenchmarkJoinEstimatorSupport.deleteStoreDirectory(themeDir);
436+
}
437+
}
438+
380439
@Test
381440
void electricalGridGeneratorCapacityThresholdUsesFastestKnownShape(@TempDir Path dataDir) throws Exception {
382441
Theme theme = Theme.ELECTRICAL_GRID;

0 commit comments

Comments
 (0)