Skip to content

Commit f2b45fe

Browse files
committed
maybe better in some cases but also mostly worse
1 parent d97d5f1 commit f2b45fe

16 files changed

Lines changed: 10039 additions & 77 deletions

core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SketchBasedJoinEstimator.java

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ public enum Component {
198198

199199
public enum Pair {
200200
SP(Component.S, Component.P, Component.O, Component.C),
201-
SO(Component.S, Component.O, Component.P, Component.C),
201+
// SO(Component.S, Component.O, Component.P, Component.C),
202202
SC(Component.S, Component.C, Component.P, Component.O),
203203
PO(Component.P, Component.O, Component.S, Component.C),
204204
PC(Component.P, Component.C, Component.S, Component.O),
@@ -411,8 +411,8 @@ enum SketchPlannerPath {
411411
private static final int SKETCH_PAYLOAD_FORMAT_NATIVE = -1;
412412
private static final int SKETCH_PAYLOAD_FRAME_HEADER_BYTES = Integer.BYTES + Integer.BYTES;
413413
private static final int TARGET_SKETCH_PART_FILES = 128;
414-
private static final int DEFAULT_BUCKET_COUNT = 1024;
415-
private static final int DEFAULT_SKETCH_NOMINAL_ENTRIES = 4096;
414+
private static final int DEFAULT_BUCKET_COUNT = 4*1024;
415+
private static final int DEFAULT_SKETCH_NOMINAL_ENTRIES = 64;
416416
private static final String ESTIMATE_CACHE_SECONDS_PROPERTY = "estimateCacheSeconds";
417417
private static final String ZERO_INTERSECTION_EXACT_DISTINCT_LIMIT_PROPERTY = "zeroIntersectionExactDistinctLimit";
418418
private static final String ZERO_INTERSECTION_SKEW_RATIO_PROPERTY = "zeroIntersectionSkewRatio";
@@ -2369,8 +2369,8 @@ private void accumulateIngestEvent(BatchUpdateAccumulator updates, IngestEvent e
23692369
accumulateSingleUpdates(updates, event);
23702370
accumulatePair(updates, Pair.SP, event.isDelete, event.spKey, event.thetaSig, event.thetaHo,
23712371
event.thetaHc);
2372-
accumulatePair(updates, Pair.SO, event.isDelete, event.soKey, event.thetaSig, event.thetaHp,
2373-
event.thetaHc);
2372+
// accumulatePair(updates, Pair.SO, event.isDelete, event.soKey, event.thetaSig, event.thetaHp,
2373+
// event.thetaHc);
23742374
if (contextPairSketchesEnabled) {
23752375
accumulatePair(updates, Pair.SC, event.isDelete, event.scKey, event.thetaSig, event.thetaHp,
23762376
event.thetaHo);
@@ -2393,10 +2393,10 @@ private void accumulateIngestEvent(BatchUpdateAccumulator updates, IngestEvent e
23932393
accumulatePair(updates, Pair.SP, event.isDelete, event.spKey, event.thetaSig, event.thetaHo,
23942394
event.thetaHc);
23952395
break;
2396-
case SO:
2397-
accumulatePair(updates, Pair.SO, event.isDelete, event.soKey, event.thetaSig, event.thetaHp,
2398-
event.thetaHc);
2399-
break;
2396+
// case SO:
2397+
// accumulatePair(updates, Pair.SO, event.isDelete, event.soKey, event.thetaSig, event.thetaHp,
2398+
// event.thetaHc);
2399+
// break;
24002400
case SC:
24012401
if (!contextPairSketchesEnabled) {
24022402
break;
@@ -3158,6 +3158,9 @@ private static final class State {
31583158

31593159
State(int k, int subjectBuckets, int predicateBuckets, int objectBuckets, int contextBuckets,
31603160
boolean contextPairSketchesEnabled) {
3161+
System.out.println("Initializing state: k=" + k + ", subjectBuckets=" + subjectBuckets + ", predicateBuckets="
3162+
+ predicateBuckets + ", objectBuckets=" + objectBuckets + ", contextBuckets=" + contextBuckets
3163+
+ ", contextPairSketchesEnabled=" + contextPairSketchesEnabled);
31613164
this.k = k;
31623165
this.subjectBuckets = subjectBuckets;
31633166
this.predicateBuckets = predicateBuckets;
@@ -3311,11 +3314,11 @@ private int hash(Component component, String v) {
33113314

33123315
private int componentBucketCount(Component component) {
33133316
return switch (component) {
3314-
case S -> subjectBucketCount;
3315-
case P -> predicateBucketCount;
3316-
case O -> objectBucketCount;
3317-
case C -> contextBucketCount;
3318-
default -> throw new IllegalStateException("Unsupported component: " + component);
3317+
case S -> subjectBucketCount;
3318+
case P -> predicateBucketCount;
3319+
case O -> objectBucketCount;
3320+
case C -> contextBucketCount;
3321+
default -> throw new IllegalStateException("Unsupported component: " + component);
33193322
};
33203323
}
33213324

@@ -7919,12 +7922,40 @@ private ArrayOfDoublesUpdatableSketch getSketchForWrite(State state, byte recTyp
79197922
}
79207923
}
79217924
if (sketch == null) {
7922-
sketch = newSk(state.k);
7925+
SketchAddress address = new SketchAddress(recType, isDelete, axisA, axisB, x, y);
7926+
sketch = newSketchForWrite(state, address, entryId);
79237927
setResidentSketch(state, recType, isDelete, axisA, axisB, x, y, sketch);
79247928
}
79257929
return sketch;
79267930
}
79277931

7932+
private ArrayOfDoublesUpdatableSketch newSketchForWrite(State state, SketchAddress address, int entryId) {
7933+
SketchEstimatorPersistenceStore store = persistenceStore;
7934+
if (entryId < 0 || !persistenceEnabled || persistenceFile == null || store == null
7935+
|| (rebuildEpoch.get() & 1L) == 0L) {
7936+
return newSk(state.k);
7937+
}
7938+
7939+
byte slot = slotByte(slotOf(state));
7940+
try {
7941+
synchronized (persistLock) {
7942+
SketchEstimatorPersistenceStore.FramedPayloadAllocation allocation = store.allocateFramedPayload(slot,
7943+
fileKindFor(address), SKETCH_PAYLOAD_FORMAT_NATIVE, state.maxSketchBytes,
7944+
slotGenerations[slot]);
7945+
ArrayOfDoublesUpdatableSketch sketch = TupleSketchOps.newSketch(state.k, allocation.payload);
7946+
SketchEstimatorPersistenceStore.Ref persistedRef = allocation.ref;
7947+
synchronized (sketchCacheLock) {
7948+
cacheDirectory.setPersistedRef(entryId, slot, persistedRef.fileKind, persistedRef.offset,
7949+
persistedRef.length, persistedRef.generation);
7950+
indexDirty.set(true);
7951+
}
7952+
return sketch;
7953+
}
7954+
} catch (IOException e) {
7955+
throw new IllegalStateException("Failed to allocate mapped join estimator sketch " + address, e);
7956+
}
7957+
}
7958+
79287959
private void updateSingleSketchRaw(State state, boolean isDelete, Component component, int idx, long thetaHash) {
79297960
updateSketchRaw(state, REC_SINGLE_TRIPLE, isDelete, (byte) component.ordinal(), (byte) 0, idx, 0, thetaHash);
79307961
}
@@ -8590,9 +8621,9 @@ public static final class Config {
85908621
int nominalEntries = DEFAULT_BUCKET_COUNT;
85918622
boolean bucketCountExplicit;
85928623
int subjectBucketCount = DEFAULT_BUCKET_COUNT;
8593-
int predicateBucketCount = DEFAULT_BUCKET_COUNT;
8624+
int predicateBucketCount = 64;
85948625
int objectBucketCount = DEFAULT_BUCKET_COUNT;
8595-
int contextBucketCount = DEFAULT_BUCKET_COUNT;
8626+
int contextBucketCount = 16;
85968627
boolean subjectBucketCountExplicit;
85978628
boolean predicateBucketCountExplicit;
85988629
boolean objectBucketCountExplicit;

core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SketchJoinOrderPlanner.java

Lines changed: 128 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -766,17 +766,22 @@ private long candidatesMask(long mask) {
766766
private long computeCandidatesMask(long mask) {
767767
long preferred = 0L;
768768
long disconnected = 0L;
769+
long disconnectedSmallBindingAssignments = 0L;
769770
for (int i = 0; i < factors.size(); i++) {
770771
if (contains(mask, i)) {
771772
continue;
772773
}
773774
if (hasConnection(mask, i)) {
774775
preferred |= bit(i);
775776
} else {
776-
disconnected |= bit(i);
777+
long bit = bit(i);
778+
disconnected |= bit;
779+
if (isSmallBindingSetAssignment(i)) {
780+
disconnectedSmallBindingAssignments |= bit;
781+
}
777782
}
778783
}
779-
return preferred != 0L ? preferred : disconnected;
784+
return preferred != 0L ? preferred | disconnectedSmallBindingAssignments : disconnected;
780785
}
781786

782787
private SketchBasedJoinEstimator.JoinStepEstimate estimateTransition(long mask,
@@ -1380,6 +1385,7 @@ private int compareBridgeUnlockOrder(StatePlan left, StatePlan right) {
13801385
int size = Math.min(left.order().size(), right.order().size());
13811386
long leftMask = 0L;
13821387
long rightMask = 0L;
1388+
long lastIntroducedVars = 0L;
13831389
for (int i = 0; i < size; i++) {
13841390
int leftIndex = left.order().get(i).intValue();
13851391
int rightIndex = right.order().get(i).intValue();
@@ -1391,20 +1397,78 @@ private int compareBridgeUnlockOrder(StatePlan left, StatePlan right) {
13911397
}
13921398
boolean leftLeaf = isLeafUnlock(leftIndex, leftMask);
13931399
boolean rightLeaf = isLeafUnlock(rightIndex, rightMask);
1400+
boolean leftOlderBoundLeaf = isOlderBoundRuntimeJoin(leftIndex, leftMask, lastIntroducedVars);
1401+
boolean rightOlderBoundLeaf = isOlderBoundRuntimeJoin(rightIndex, rightMask, lastIntroducedVars);
13941402
if (leftBridge && rightLeaf
1403+
&& !rightOlderBoundLeaf
13951404
&& bridgeUnlockStepComparable(left.physicalStepRanks().get(i),
13961405
right.physicalStepRanks().get(i))) {
13971406
return -1;
13981407
}
13991408
if (rightBridge && leftLeaf
1409+
&& !leftOlderBoundLeaf
14001410
&& bridgeUnlockStepComparable(right.physicalStepRanks().get(i),
14011411
left.physicalStepRanks().get(i))) {
14021412
return 1;
14031413
}
14041414
return 0;
14051415
}
1416+
long boundBeforeStep = boundVariableMask(leftMask);
14061417
leftMask |= bit(leftIndex);
14071418
rightMask |= bit(rightIndex);
1419+
lastIntroducedVars = bindingVarMasks[leftIndex] & ~boundBeforeStep;
1420+
}
1421+
return 0;
1422+
}
1423+
1424+
private boolean isOlderBoundLeaf(int factorIndex, long previousMask, long lastIntroducedVars) {
1425+
return lastIntroducedVars != 0L
1426+
&& isLeafUnlock(factorIndex, previousMask)
1427+
&& joinStepRole(factorIndex, previousMask, lastIntroducedVars) == JoinStepRole.OLDER_BOUND_JOIN;
1428+
}
1429+
1430+
private boolean isOlderBoundRuntimeJoin(int factorIndex, long previousMask, long lastIntroducedVars) {
1431+
if (lastIntroducedVars == 0L) {
1432+
return false;
1433+
}
1434+
long runtimeVars = runtimeVarMasks[factorIndex];
1435+
if (runtimeVars == 0L || (runtimeVars & lastIntroducedVars) != 0L) {
1436+
return false;
1437+
}
1438+
long olderBoundVars = runtimeBoundVariableMask(previousMask) & ~lastIntroducedVars;
1439+
return (runtimeVars & olderBoundVars) != 0L;
1440+
}
1441+
1442+
private int compareOlderBoundLeafOrder(StatePlan left, StatePlan right) {
1443+
int size = Math.min(left.order().size(), right.order().size());
1444+
long leftMask = 0L;
1445+
long rightMask = 0L;
1446+
long lastIntroducedVars = 0L;
1447+
for (int i = 0; i < size; i++) {
1448+
int leftIndex = left.order().get(i).intValue();
1449+
int rightIndex = right.order().get(i).intValue();
1450+
if (leftIndex != rightIndex) {
1451+
boolean leftOlderBoundLeaf = isOlderBoundRuntimeJoin(leftIndex, leftMask, lastIntroducedVars);
1452+
boolean rightOlderBoundLeaf = isOlderBoundRuntimeJoin(rightIndex, rightMask, lastIntroducedVars);
1453+
if (leftOlderBoundLeaf == rightOlderBoundLeaf) {
1454+
return 0;
1455+
}
1456+
PhysicalStepRank leftStep = left.physicalStepRanks().get(i);
1457+
PhysicalStepRank rightStep = right.physicalStepRanks().get(i);
1458+
if (leftOlderBoundLeaf && stepWorkComparable(leftStep.workRows(), rightStep.workRows(),
1459+
BRIDGE_CONTINUATION_MAX_STEP_WORK_RATIO)) {
1460+
return -1;
1461+
}
1462+
if (rightOlderBoundLeaf && stepWorkComparable(rightStep.workRows(), leftStep.workRows(),
1463+
BRIDGE_CONTINUATION_MAX_STEP_WORK_RATIO)) {
1464+
return 1;
1465+
}
1466+
return 0;
1467+
}
1468+
long boundBeforeStep = boundVariableMask(leftMask);
1469+
leftMask |= bit(leftIndex);
1470+
rightMask |= bit(rightIndex);
1471+
lastIntroducedVars = bindingVarMasks[leftIndex] & ~boundBeforeStep;
14081472
}
14091473
return 0;
14101474
}
@@ -1830,19 +1894,25 @@ private boolean isBetter(StatePlan candidate, StatePlan incumbent) {
18301894
if (incumbent == null) {
18311895
return true;
18321896
}
1833-
boolean structurallyComparableWork = structurallyComparableWork(candidate.totalWork(), incumbent.totalWork());
1834-
if (structurallyComparableWork && candidate.mask() == incumbent.mask()) {
1897+
if (candidate.mask() == incumbent.mask()) {
18351898
int deferredFilterComparison = compareDeferredFilterOrder(candidate.order(), incumbent.order());
18361899
if (deferredFilterComparison != 0) {
18371900
return deferredFilterComparison < 0;
18381901
}
1902+
int guardedBindingComparison = compareGuardedBindingAssignmentOrder(candidate, incumbent);
1903+
if (guardedBindingComparison != 0) {
1904+
return guardedBindingComparison < 0;
1905+
}
1906+
}
1907+
boolean structurallyComparableWork = structurallyComparableWork(candidate.totalWork(), incumbent.totalWork());
1908+
if (structurallyComparableWork && candidate.mask() == incumbent.mask()) {
18391909
int boundGuardComparison = compareBoundGuardOrder(candidate.order(), incumbent.order());
18401910
if (boundGuardComparison != 0) {
18411911
return boundGuardComparison < 0;
18421912
}
1843-
int guardedBindingComparison = compareGuardedBindingAssignmentOrder(candidate, incumbent);
1844-
if (guardedBindingComparison != 0) {
1845-
return guardedBindingComparison < 0;
1913+
int narrowAnchorComparison = compareNarrowAnchorOrder(candidate, incumbent);
1914+
if (narrowAnchorComparison != 0) {
1915+
return narrowAnchorComparison < 0;
18461916
}
18471917
}
18481918
if (structurallyComparableWork && candidate.mask() == incumbent.mask()) {
@@ -1851,9 +1921,15 @@ private boolean isBetter(StatePlan candidate, StatePlan incumbent) {
18511921
return bridgeUnlockComparison < 0;
18521922
}
18531923
}
1854-
int connectivityComparison = compareOrderConnectivity(candidate.order(), incumbent.order());
1855-
if (connectivityComparison != 0 && structurallyComparableWork) {
1856-
return connectivityComparison < 0;
1924+
if (!structurallyComparableWork && candidate.mask() == incumbent.mask()) {
1925+
int narrowAnchorComparison = compareNarrowAnchorOrder(candidate, incumbent);
1926+
if (narrowAnchorComparison != 0) {
1927+
return narrowAnchorComparison < 0;
1928+
}
1929+
}
1930+
int olderBoundLeafComparison = compareOlderBoundLeafOrder(candidate, incumbent);
1931+
if (olderBoundLeafComparison != 0) {
1932+
return olderBoundLeafComparison < 0;
18571933
}
18581934
int guardExpansionComparison = compareBoundGuardExpansionOrder(candidate, incumbent);
18591935
if (guardExpansionComparison != 0) {
@@ -1867,6 +1943,10 @@ private boolean isBetter(StatePlan candidate, StatePlan incumbent) {
18671943
if (continuationComparison != 0) {
18681944
return continuationComparison < 0;
18691945
}
1946+
int connectivityComparison = compareOrderConnectivity(candidate.order(), incumbent.order());
1947+
if (connectivityComparison != 0 && structurallyComparableWork) {
1948+
return connectivityComparison < 0;
1949+
}
18701950
int workComparison = Double.compare(candidate.totalWork(), incumbent.totalWork());
18711951
if (workComparison != 0) {
18721952
return workComparison < 0;
@@ -1885,6 +1965,41 @@ private boolean isBetter(StatePlan candidate, StatePlan incumbent) {
18851965
return compareOrder(candidate.order(), incumbent.order()) < 0;
18861966
}
18871967

1968+
private int compareNarrowAnchorOrder(StatePlan left, StatePlan right) {
1969+
int size = Math.min(left.order().size(), right.order().size());
1970+
long leftMask = 0L;
1971+
long rightMask = 0L;
1972+
for (int i = 0; i < size; i++) {
1973+
int leftIndex = left.order().get(i).intValue();
1974+
int rightIndex = right.order().get(i).intValue();
1975+
if (leftIndex != rightIndex) {
1976+
if ((runtimeVarMasks[leftIndex] & runtimeVarMasks[rightIndex]) == 0L) {
1977+
return 0;
1978+
}
1979+
long leftIntroduced = bindingVarMasks[leftIndex] & ~boundVariableMask(leftMask);
1980+
long rightIntroduced = bindingVarMasks[rightIndex] & ~boundVariableMask(rightMask);
1981+
if (isStrictSubset(leftIntroduced, rightIntroduced)
1982+
&& stepWorkComparable(left.physicalStepRanks().get(i).workRows(),
1983+
right.physicalStepRanks().get(i).workRows(), ANCHOR_BRIDGE_MAX_STEP_WORK_RATIO)) {
1984+
return -1;
1985+
}
1986+
if (isStrictSubset(rightIntroduced, leftIntroduced)
1987+
&& stepWorkComparable(right.physicalStepRanks().get(i).workRows(),
1988+
left.physicalStepRanks().get(i).workRows(), ANCHOR_BRIDGE_MAX_STEP_WORK_RATIO)) {
1989+
return 1;
1990+
}
1991+
return 0;
1992+
}
1993+
leftMask |= bit(leftIndex);
1994+
rightMask |= bit(rightIndex);
1995+
}
1996+
return 0;
1997+
}
1998+
1999+
private static boolean isStrictSubset(long subset, long superset) {
2000+
return subset != 0L && subset != superset && (subset & superset) == subset;
2001+
}
2002+
18882003
private boolean structurallyComparableWork(double leftWorkRows, double rightWorkRows) {
18892004
if (!isFiniteNonNegative(leftWorkRows) || !isFiniteNonNegative(rightWorkRows)) {
18902005
return false;
@@ -1958,6 +2073,9 @@ private double deferredFilterOrderingWeight(JoinOrderPlanner.FilterConstraint fi
19582073
&& passRatio <= DEFERRED_FILTER_ORDERING_MAX_PASS_RATIO) {
19592074
return 1.0d + (DEFERRED_FILTER_ORDERING_MAX_PASS_RATIO - passRatio);
19602075
}
2076+
if (filter.getConditionCost() > JoinOrderPlanner.FILTER_COST_CHEAP) {
2077+
return 1.0d;
2078+
}
19612079
return 0.0d;
19622080
}
19632081

0 commit comments

Comments
 (0)