Skip to content

Commit f46403c

Browse files
committed
wip
1 parent b7821d4 commit f46403c

9 files changed

Lines changed: 352 additions & 18 deletions

File tree

core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/JoinQueryEvaluationStep.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.eclipse.rdf4j.query.algebra.evaluation.iterator.JoinIterator;
2929
import org.eclipse.rdf4j.query.algebra.evaluation.iterator.JoinKeyCacheIterator;
3030
import org.eclipse.rdf4j.query.algebra.helpers.TupleExprs;
31+
import org.eclipse.rdf4j.query.algebra.helpers.collectors.VarNameCollector;
3132

3233
public class JoinQueryEvaluationStep implements QueryEvaluationStep {
3334

@@ -55,8 +56,12 @@ public JoinQueryEvaluationStep(EvaluationStrategy strategy, Join join, QueryEval
5556
} else {
5657
boolean nonDeterministicRight = DeterminismChecks.containsNonDeterministicFunction(join.getRightArg());
5758
Set<String> rightBindingNames = join.getRightArg().getBindingNames();
59+
Set<String> leftVarNames = join.getLeftArg() instanceof BindingSetAssignment
60+
? join.getLeftArg().getBindingNames()
61+
: VarNameCollector.process(join.getLeftArg());
62+
Set<String> rightVarNames = VarNameCollector.process(join.getRightArg());
5863
boolean cacheableRight = !(join.getRightArg() instanceof BindingSetAssignment)
59-
&& joinAttributes.length < rightBindingNames.size();
64+
&& rightVarNames.stream().anyMatch(varName -> !leftVarNames.contains(varName));
6065
if (JoinKeyCacheIterator.isEnabled(joinAttributes) && !nonDeterministicRight && cacheableRight) {
6166
eval = bindings -> JoinKeyCacheIterator.getInstance(leftPrepared, rightPrepared, bindings,
6267
joinAttributes, rightBindingNames, context);

core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/ExistsSemiJoinOptimizer.java

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
package org.eclipse.rdf4j.query.algebra.evaluation.optimizer;
1313

1414
import java.util.ArrayList;
15+
import java.util.Collection;
1516
import java.util.Collections;
1617
import java.util.HashMap;
1718
import java.util.HashSet;
@@ -26,6 +27,7 @@
2627
import org.eclipse.rdf4j.query.algebra.And;
2728
import org.eclipse.rdf4j.query.algebra.BinaryTupleOperator;
2829
import org.eclipse.rdf4j.query.algebra.BinaryValueOperator;
30+
import org.eclipse.rdf4j.query.algebra.BindingSetAssignment;
2931
import org.eclipse.rdf4j.query.algebra.Distinct;
3032
import org.eclipse.rdf4j.query.algebra.Exists;
3133
import org.eclipse.rdf4j.query.algebra.Extension;
@@ -144,14 +146,73 @@ private boolean shouldKeepExistsAsFilter(TupleExpr leftArg, TupleExpr subQuery,
144146
return true;
145147
}
146148

147-
double leftCardinality = evaluationStatistics.getCardinality(leftArg);
149+
double leftCardinality = estimateLeftArgCardinality(leftArg, joinVars);
148150
double rightCardinality = evaluationStatistics.getCardinality(subQuery);
149151
if (!Double.isFinite(leftCardinality) || !Double.isFinite(rightCardinality) || rightCardinality <= 0.0) {
150152
return true;
151153
}
152154
return leftCardinality < rightCardinality * MIN_LEFT_TO_RIGHT_RATIO_FOR_SINGLE_STATEMENT_PATTERN;
153155
}
154156

157+
private double estimateLeftArgCardinality(TupleExpr leftArg, Set<String> joinVars) {
158+
if (joinVars.isEmpty()) {
159+
return evaluationStatistics.getCardinality(leftArg);
160+
}
161+
double[] min = { Double.POSITIVE_INFINITY };
162+
leftArg.visit(new StopAtScopeChange(true) {
163+
@Override
164+
public void meet(BindingSetAssignment node) {
165+
if (node.getBindingNames().containsAll(joinVars)) {
166+
min[0] = Math.min(min[0], estimateBindingSetSize(node.getBindingSets()));
167+
}
168+
super.meet(node);
169+
}
170+
171+
@Override
172+
public void meet(StatementPattern node) {
173+
if (!statementPatternContainsAllNames(node, joinVars)) {
174+
return;
175+
}
176+
double cardinality = evaluationStatistics.getCardinality(node);
177+
if (Double.isFinite(cardinality)) {
178+
min[0] = Math.min(min[0], cardinality);
179+
}
180+
}
181+
});
182+
if (Double.isFinite(min[0]) && min[0] != Double.POSITIVE_INFINITY) {
183+
return min[0];
184+
}
185+
return evaluationStatistics.getCardinality(leftArg);
186+
}
187+
188+
private int estimateBindingSetSize(Iterable<BindingSet> bindingSets) {
189+
if (bindingSets == null) {
190+
return 0;
191+
}
192+
if (bindingSets instanceof Collection<?>) {
193+
return ((Collection<?>) bindingSets).size();
194+
}
195+
return 1;
196+
}
197+
198+
private boolean statementPatternContainsAllNames(StatementPattern pattern, Set<String> requiredNames) {
199+
for (String name : requiredNames) {
200+
if (!statementPatternContainsName(pattern, name)) {
201+
return false;
202+
}
203+
}
204+
return true;
205+
}
206+
207+
private boolean statementPatternContainsName(StatementPattern pattern, String name) {
208+
return varHasName(pattern.getSubjectVar(), name) || varHasName(pattern.getPredicateVar(), name)
209+
|| varHasName(pattern.getObjectVar(), name) || varHasName(pattern.getContextVar(), name);
210+
}
211+
212+
private boolean varHasName(Var var, String name) {
213+
return var != null && name.equals(var.getName());
214+
}
215+
155216
private boolean shouldRewrite(TupleExpr left, TupleExpr right) {
156217
if (allowNonImprovingTransforms) {
157218
return true;
@@ -209,9 +270,7 @@ private static Set<String> assuredBindingsWithAliases(TupleExpr expr) {
209270

210271
private static Set<String> collectSharedUnboundNames(TupleExpr left, TupleExpr right) {
211272
Set<String> leftUnbound = collectUnboundVarNames(left);
212-
if (leftUnbound.isEmpty()) {
213-
return Set.of();
214-
}
273+
leftUnbound.addAll(left.getBindingNames());
215274
Set<String> rightUnbound = collectUnboundVarNames(right);
216275
if (rightUnbound.isEmpty()) {
217276
return Set.of();

core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/MinusOptimizer.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
import org.eclipse.rdf4j.query.algebra.BinaryTupleOperator;
2828
import org.eclipse.rdf4j.query.algebra.BinaryValueOperator;
2929
import org.eclipse.rdf4j.query.algebra.Difference;
30-
import org.eclipse.rdf4j.query.algebra.Distinct;
3130
import org.eclipse.rdf4j.query.algebra.EmptySet;
3231
import org.eclipse.rdf4j.query.algebra.Extension;
3332
import org.eclipse.rdf4j.query.algebra.ExtensionElem;
@@ -170,19 +169,18 @@ private static void applyJoinKeyProjection(Difference difference) {
170169
if (!hasStatementPatternCoveringVars(rightArg, shared)) {
171170
return;
172171
}
173-
TupleExpr projected = buildDistinctProjection(rightArg.clone(), shared);
172+
TupleExpr projected = buildProjection(rightArg.clone(), shared);
174173
difference.setRightArg(projected);
175174
}
176175

177-
private static TupleExpr buildDistinctProjection(TupleExpr subQuery, Set<String> joinVars) {
176+
private static TupleExpr buildProjection(TupleExpr subQuery, Set<String> joinVars) {
178177
List<String> ordered = new ArrayList<>(joinVars);
179178
Collections.sort(ordered);
180179
ProjectionElemList projectionElemList = new ProjectionElemList();
181180
for (String name : ordered) {
182181
projectionElemList.addElement(new ProjectionElem(name));
183182
}
184-
Projection projection = new Projection(subQuery, projectionElemList);
185-
return new Distinct(projection);
183+
return new Projection(subQuery, projectionElemList);
186184
}
187185

188186
private static boolean containsService(TupleExpr subQuery) {

core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/NotExistsSemiJoinOptimizer.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,9 +184,7 @@ private static Set<String> assuredBindingsWithAliases(TupleExpr expr) {
184184

185185
private static Set<String> collectSharedUnboundNames(TupleExpr left, TupleExpr right) {
186186
Set<String> leftUnbound = collectUnboundVarNames(left);
187-
if (leftUnbound.isEmpty()) {
188-
return Set.of();
189-
}
187+
leftUnbound.addAll(left.getBindingNames());
190188
Set<String> rightUnbound = collectUnboundVarNames(right);
191189
if (rightUnbound.isEmpty()) {
192190
return Set.of();

core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/SparqlUoQueryOptimizerPipeline.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,21 @@
1313

1414
import java.util.ArrayList;
1515
import java.util.List;
16+
import java.util.Set;
1617

1718
import org.eclipse.rdf4j.query.BindingSet;
1819
import org.eclipse.rdf4j.query.Dataset;
20+
import org.eclipse.rdf4j.query.algebra.BindingSetAssignment;
21+
import org.eclipse.rdf4j.query.algebra.Join;
22+
import org.eclipse.rdf4j.query.algebra.LeftJoin;
1923
import org.eclipse.rdf4j.query.algebra.TupleExpr;
2024
import org.eclipse.rdf4j.query.algebra.evaluation.EvaluationStrategy;
2125
import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizer;
2226
import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizerPipeline;
2327
import org.eclipse.rdf4j.query.algebra.evaluation.TripleSource;
2428
import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics;
2529
import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.sparqluo.SparqlUoConfig;
30+
import org.eclipse.rdf4j.query.algebra.helpers.AbstractSimpleQueryModelVisitor;
2631

2732
public class SparqlUoQueryOptimizerPipeline implements QueryOptimizerPipeline {
2833

@@ -40,6 +45,7 @@ public class SparqlUoQueryOptimizerPipeline implements QueryOptimizerPipeline {
4045
private final ExistsSemiJoinOptimizer existsSemiJoinOptimizer;
4146
private final NotExistsSemiJoinOptimizer notExistsSemiJoinOptimizer;
4247
private final QueryJoinOptimizer joinOptimizer;
48+
private final QueryOptimizer boundJoinRightArgOptimizer;
4349
private final FilterOptimizer preJoinFilterOptimizer = new LimitAwareFilterOptimizer();
4450
private final boolean enableOptionalFilterJoin;
4551
private final boolean enableUnionCommonPullUp;
@@ -70,6 +76,7 @@ public SparqlUoQueryOptimizerPipeline(EvaluationStrategy strategy, TripleSource
7076
config.allowNonImprovingTransforms());
7177
this.joinOptimizer = new QueryJoinOptimizer(evaluationStatistics, strategy.isTrackResultSize(), tripleSource,
7278
false);
79+
this.boundJoinRightArgOptimizer = new BoundJoinRightArgOptimizer(this.joinOptimizer);
7380
this.enableOptionalFilterJoin = config.enableOptionalFilterJoin();
7481
this.enableUnionCommonPullUp = config.allowNonImprovingTransforms();
7582
}
@@ -96,6 +103,7 @@ public Iterable<QueryOptimizer> getOptimizers() {
96103
optimizers.add(optionalFilterJoinOptimizer);
97104
optimizers.add(optionalNotBoundFilterOptimizer);
98105
optimizers.add(optionalBindLeftJoinOptimizer);
106+
optimizers.add(boundJoinRightArgOptimizer);
99107
optionalFilterJoinInserted = true;
100108
}
101109
optimizers.add(existsSemiJoinOptimizer);
@@ -131,6 +139,7 @@ public Iterable<QueryOptimizer> getOptimizers() {
131139
optimizers.add(optionalFilterJoinOptimizer);
132140
optimizers.add(optionalNotBoundFilterOptimizer);
133141
optimizers.add(optionalBindLeftJoinOptimizer);
142+
optimizers.add(boundJoinRightArgOptimizer);
134143
optionalFilterJoinInserted = true;
135144
}
136145
optimizers.add(existsSemiJoinOptimizer);
@@ -148,6 +157,51 @@ public Iterable<QueryOptimizer> getOptimizers() {
148157
return optimizers;
149158
}
150159

160+
private static final class BoundJoinRightArgOptimizer implements QueryOptimizer {
161+
162+
private final QueryJoinOptimizer joinOptimizer;
163+
164+
private BoundJoinRightArgOptimizer(QueryJoinOptimizer joinOptimizer) {
165+
this.joinOptimizer = joinOptimizer;
166+
}
167+
168+
@Override
169+
public void optimize(TupleExpr tupleExpr, Dataset dataset, BindingSet bindings) {
170+
if (UnorderedSliceDetector.hasUnorderedSlice(tupleExpr)) {
171+
return;
172+
}
173+
tupleExpr.visit(new AbstractSimpleQueryModelVisitor<RuntimeException>() {
174+
@Override
175+
public void meet(Join node) throws RuntimeException {
176+
super.meet(node);
177+
178+
Set<String> boundNames = node.getLeftArg().getBindingNames();
179+
if (boundNames.isEmpty()) {
180+
return;
181+
}
182+
Set<String> shared = node.getRightArg().getBindingNames();
183+
boolean sharesBindings = false;
184+
for (String name : boundNames) {
185+
if (shared.contains(name)) {
186+
sharesBindings = true;
187+
break;
188+
}
189+
}
190+
if (!sharesBindings) {
191+
return;
192+
}
193+
BindingSetAssignment seed = new BindingSetAssignment();
194+
seed.setBindingNames(boundNames);
195+
seed.setBindingSets(List.of());
196+
TupleExpr optimizedRight = (TupleExpr) node.getRightArg().clone();
197+
LeftJoin leftJoin = new LeftJoin(seed, optimizedRight);
198+
joinOptimizer.optimize(leftJoin, dataset, bindings);
199+
node.setRightArg(leftJoin.getRightArg());
200+
}
201+
});
202+
}
203+
}
204+
151205
private static SparqlUoConfig disableOptionalFilterJoin(SparqlUoConfig config) {
152206
if (!config.enableOptionalFilterJoin()) {
153207
return config;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2025 Eclipse RDF4J contributors.
3+
*
4+
* All rights reserved. This program and the accompanying materials
5+
* are made available under the terms of the Eclipse Distribution License v1.0
6+
* which accompanies this distribution, and is available at
7+
* http://www.eclipse.org/org/documents/edl-v10.php.
8+
*
9+
* SPDX-License-Identifier: BSD-3-Clause
10+
*******************************************************************************/
11+
// Some portions generated by Codex
12+
package org.eclipse.rdf4j.query.algebra.evaluation.impl;
13+
14+
import static org.assertj.core.api.Assertions.assertThat;
15+
16+
import java.util.List;
17+
import java.util.concurrent.atomic.AtomicBoolean;
18+
19+
import org.eclipse.rdf4j.model.ValueFactory;
20+
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
21+
import org.eclipse.rdf4j.query.BindingSet;
22+
import org.eclipse.rdf4j.query.algebra.BindingSetAssignment;
23+
import org.eclipse.rdf4j.query.algebra.Compare;
24+
import org.eclipse.rdf4j.query.algebra.Compare.CompareOp;
25+
import org.eclipse.rdf4j.query.algebra.Difference;
26+
import org.eclipse.rdf4j.query.algebra.Exists;
27+
import org.eclipse.rdf4j.query.algebra.Filter;
28+
import org.eclipse.rdf4j.query.algebra.Join;
29+
import org.eclipse.rdf4j.query.algebra.Not;
30+
import org.eclipse.rdf4j.query.algebra.QueryRoot;
31+
import org.eclipse.rdf4j.query.algebra.StatementPattern;
32+
import org.eclipse.rdf4j.query.algebra.TupleExpr;
33+
import org.eclipse.rdf4j.query.algebra.Var;
34+
import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet;
35+
import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.NotExistsSemiJoinOptimizer;
36+
import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor;
37+
import org.eclipse.rdf4j.query.impl.EmptyBindingSet;
38+
import org.junit.jupiter.api.Test;
39+
40+
class NotExistsSemiJoinOptimizerCorrelationTest {
41+
42+
@Test
43+
void doesNotRewriteWhenSubqueryUsesExternalValuesVar() {
44+
ValueFactory vf = SimpleValueFactory.getInstance();
45+
46+
BindingSetAssignment values = new BindingSetAssignment();
47+
QueryBindingSet bindings = new QueryBindingSet();
48+
bindings.addBinding("threshold", vf.createLiteral(10));
49+
values.setBindingSets(List.<BindingSet>of(bindings));
50+
51+
TupleExpr left = new Join(values,
52+
new StatementPattern(Var.of("service"), Var.of("p"), Var.of("o")));
53+
54+
TupleExpr subQuery = new Filter(
55+
new StatementPattern(Var.of("service"), Var.of("p2"), Var.of("late")),
56+
new Compare(Var.of("late"), Var.of("threshold"), CompareOp.GT));
57+
58+
TupleExpr expr = new QueryRoot(new Filter(left, new Not(new Exists(subQuery))));
59+
60+
new NotExistsSemiJoinOptimizer(new EvaluationStatistics(), true)
61+
.optimize(expr, null, EmptyBindingSet.getInstance());
62+
63+
assertThat(containsDifference(expr)).isFalse();
64+
}
65+
66+
private static boolean containsDifference(TupleExpr expr) {
67+
AtomicBoolean found = new AtomicBoolean(false);
68+
expr.visit(new AbstractQueryModelVisitor<RuntimeException>() {
69+
@Override
70+
public void meet(Difference node) {
71+
found.set(true);
72+
}
73+
});
74+
return found.get();
75+
}
76+
}

core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/SparqlUoMinusOptimizerTest.java

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import org.eclipse.rdf4j.query.BindingSet;
2424
import org.eclipse.rdf4j.query.QueryLanguage;
2525
import org.eclipse.rdf4j.query.algebra.Difference;
26-
import org.eclipse.rdf4j.query.algebra.Distinct;
2726
import org.eclipse.rdf4j.query.algebra.Projection;
2827
import org.eclipse.rdf4j.query.algebra.ProjectionElem;
2928
import org.eclipse.rdf4j.query.algebra.TupleExpr;
@@ -123,11 +122,9 @@ void projectsMinusRightSideToSharedKeysWhenAssured() {
123122

124123
Difference difference = findFirstDifference(expr);
125124
assertThat(difference).isNotNull();
126-
assertThat(difference.getRightArg()).isInstanceOf(Distinct.class);
125+
assertThat(difference.getRightArg()).isInstanceOf(Projection.class);
127126

128-
TupleExpr right = ((Distinct) difference.getRightArg()).getArg();
129-
assertThat(right).isInstanceOf(Projection.class);
130-
Projection projection = (Projection) right;
127+
Projection projection = (Projection) difference.getRightArg();
131128
assertThat(projection.getProjectionElemList().getElements())
132129
.extracting(ProjectionElem::getName)
133130
.containsExactly("s");

0 commit comments

Comments
 (0)