2222import java .util .List ;
2323import java .util .Map ;
2424import java .util .Optional ;
25+ import java .util .OptionalDouble ;
2526import java .util .Set ;
2627
2728import org .eclipse .rdf4j .model .Literal ;
5051import org .eclipse .rdf4j .query .algebra .Var ;
5152import org .eclipse .rdf4j .query .algebra .evaluation .QueryOptimizer ;
5253import org .eclipse .rdf4j .query .algebra .evaluation .impl .EvaluationStatistics ;
54+ import org .eclipse .rdf4j .query .algebra .evaluation .optimizer .JoinFactorCostModel ;
5355import org .eclipse .rdf4j .query .algebra .evaluation .optimizer .JoinOrderPlanner ;
5456import org .eclipse .rdf4j .query .algebra .evaluation .optimizer .QueryOptimizationScopeProvider ;
5557import org .eclipse .rdf4j .query .algebra .helpers .AbstractSimpleQueryModelVisitor ;
6163final class LmdbSketchJoinOptimizer implements QueryOptimizer {
6264
6365 private static final String RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" ;
66+ private static final double BROAD_PREFIX_SCAN_MIN_WORK_ROWS = 100.0d ;
6467
6568 private final EvaluationStatistics statistics ;
6669 private final boolean trackResultSize ;
@@ -517,7 +520,8 @@ private Deque<TupleExpr> orderSegment(List<TupleExpr> segment, Set<String> bound
517520 return new ArrayDeque <>(segment );
518521 }
519522 applyPlannerStepEstimates (plan .get ());
520- return new ArrayDeque <>(preferUnboundTypeGuards (plan .get ().getOrderedArgs (), boundBeforeSegment ));
523+ return new ArrayDeque <>(preferSafeSubjectTypeAnchors (plan .get ().getOrderedArgs (), boundBeforeSegment ,
524+ filters ));
521525 }
522526
523527 private JoinOrderPlanner .Algorithm plannerAlgorithm (int segmentSize ) {
@@ -638,27 +642,26 @@ private boolean isValidPlannerOrder(List<TupleExpr> originalSegment, JoinOrderPl
638642 return true ;
639643 }
640644
641- private List <TupleExpr > preferUnboundTypeGuards (List <TupleExpr > orderedArgs , Set <String > boundBeforeSegment ) {
645+ private List <TupleExpr > preferSafeSubjectTypeAnchors (List <TupleExpr > orderedArgs ,
646+ Set <String > boundBeforeSegment , List <DeferredFilter > filters ) {
642647 if (orderedArgs .size () < 2 ) {
643648 return orderedArgs ;
644649 }
645650 List <TupleExpr > reordered = new ArrayList <>(orderedArgs );
646- preferCheaperIndependentAnchor (reordered , boundBeforeSegment );
647651 Set <String > bound = new HashSet <>(boundBeforeSegment );
648652 for (int i = 0 ; i < reordered .size (); i ++) {
649653 TupleExpr current = reordered .get (i );
650- if (! isTypeGuard ( current ) && current instanceof StatementPattern currentPattern
651- && Collections . disjoint ( bound , currentPattern . getBindingNames () )) {
654+ if (current instanceof StatementPattern currentPattern
655+ && canConsiderSubjectTypeAnchor ( currentPattern , filters )) {
652656 String subjectName = unboundName (currentPattern .getSubjectVar ());
653- if (subjectName != null ) {
654- for (int j = i + 1 ; j < reordered .size (); j ++) {
655- TupleExpr candidate = reordered .get (j );
656- if (isTypeGuardForSubject (candidate , subjectName )) {
657- reordered .remove (j );
658- reordered .add (i , candidate );
659- current = candidate ;
660- break ;
661- }
657+ for (int j = i + 1 ; j < reordered .size (); j ++) {
658+ TupleExpr candidate = reordered .get (j );
659+ if (isTypeGuardForSubject (candidate , subjectName )
660+ && isCheaperSubjectTypeOrder (currentPattern , candidate , bound )) {
661+ reordered .remove (j );
662+ reordered .add (i , candidate );
663+ current = candidate ;
664+ break ;
662665 }
663666 }
664667 }
@@ -667,41 +670,112 @@ private List<TupleExpr> preferUnboundTypeGuards(List<TupleExpr> orderedArgs, Set
667670 return List .copyOf (reordered );
668671 }
669672
670- private void preferCheaperIndependentAnchor ( List < TupleExpr > reordered , Set < String > boundBeforeSegment ) {
671- if (! boundBeforeSegment . isEmpty ( ) || ! isTypeGuard ( reordered . get ( 0 ) )) {
672- return ;
673+ private boolean canConsiderSubjectTypeAnchor ( StatementPattern pattern , List < DeferredFilter > filters ) {
674+ if (isTypeGuard ( pattern ) || hasPatternLocalDeferredFilter ( pattern , filters )) {
675+ return false ;
673676 }
674- StatementPattern typeGuard = (StatementPattern ) reordered .get (0 );
675- String typeSubject = unboundName (typeGuard .getSubjectVar ());
676- if (typeSubject == null ) {
677- return ;
677+ String subjectName = unboundName (pattern .getSubjectVar ());
678+ if (subjectName == null ) {
679+ return false ;
678680 }
679- double typeRows = typeGuard .getResultSizeEstimate ();
680- if (!LmdbJoinPlanSupport .isFiniteNonNegative (typeRows )) {
681- return ;
681+ Var object = pattern .getObjectVar ();
682+ return object != null && !object .hasValue ();
683+ }
684+
685+ private boolean isCheaperSubjectTypeOrder (StatementPattern currentPattern , TupleExpr typeGuard ,
686+ Set <String > bound ) {
687+ if (!(statistics instanceof JoinFactorCostModel )) {
688+ return false ;
682689 }
683- int bestIndex = -1 ;
684- double bestRows = typeRows ;
685- for (int i = 1 ; i < reordered .size (); i ++) {
686- TupleExpr candidate = reordered .get (i );
687- if (!(candidate instanceof StatementPattern candidatePattern ) || isTypeGuard (candidate )
688- || !Collections .disjoint (boundBeforeSegment , candidate .getBindingNames ())) {
689- continue ;
690- }
691- String candidateSubject = unboundName (candidatePattern .getSubjectVar ());
692- if (typeSubject .equals (candidateSubject )) {
693- continue ;
694- }
695- double candidateRows = candidate .getResultSizeEstimate ();
696- if (LmdbJoinPlanSupport .isFiniteNonNegative (candidateRows ) && candidateRows < bestRows ) {
697- bestRows = candidateRows ;
698- bestIndex = i ;
690+ JoinFactorCostModel costModel = (JoinFactorCostModel ) statistics ;
691+ OptionalDouble currentThenType = estimateAdjacentOrderCost (costModel , currentPattern , typeGuard , bound );
692+ OptionalDouble typeThenCurrent = estimateAdjacentOrderCost (costModel , typeGuard , currentPattern , bound );
693+ return currentThenType .isPresent () && typeThenCurrent .isPresent ()
694+ && (typeThenCurrent .getAsDouble () < currentThenType .getAsDouble ()
695+ || isDirectTypeLookupTieBreak (currentPattern , typeGuard )
696+ && isPairRowsNoWorse (typeGuard , currentPattern , currentPattern , typeGuard ));
697+ }
698+
699+ private boolean isDirectTypeLookupTieBreak (StatementPattern currentPattern , TupleExpr typeGuard ) {
700+ String currentMissing = currentPattern
701+ .getStringMetricPlanned (TelemetryMetricNames .PLANNED_MISSING_LOOKUP_COMPONENTS );
702+ String typeAccessMode = typeGuard .getStringMetricPlanned (TelemetryMetricNames .PLANNED_INDEX_ACCESS_MODE );
703+ double currentWork = currentPattern .getDoubleMetricPlanned (TelemetryMetricNames .PLANNED_WORK_ROWS );
704+ double typeWork = typeGuard .getDoubleMetricPlanned (TelemetryMetricNames .PLANNED_WORK_ROWS );
705+ return currentMissing != null && !currentMissing .isBlank ()
706+ && "directLookup" .equals (typeAccessMode )
707+ && LmdbJoinPlanSupport .isFiniteNonNegative (currentWork )
708+ && LmdbJoinPlanSupport .isFiniteNonNegative (typeWork )
709+ && currentWork > BROAD_PREFIX_SCAN_MIN_WORK_ROWS
710+ && typeWork <= currentWork ;
711+ }
712+
713+ private boolean isPairRowsNoWorse (TupleExpr promotedFirst , TupleExpr promotedSecond , TupleExpr currentFirst ,
714+ TupleExpr currentSecond ) {
715+ OptionalDouble promotedRows = estimatePairRows (promotedFirst , promotedSecond );
716+ OptionalDouble currentRows = estimatePairRows (currentFirst , currentSecond );
717+ return promotedRows .isPresent () && currentRows .isPresent ()
718+ && promotedRows .getAsDouble () <= currentRows .getAsDouble ();
719+ }
720+
721+ private OptionalDouble estimateAdjacentOrderCost (JoinFactorCostModel costModel , TupleExpr first ,
722+ TupleExpr second , Set <String > bound ) {
723+ Optional <JoinFactorCostModel .FactorCostEstimate > firstEstimate = costModel .estimateFactorCost (first ,
724+ bound );
725+ if (firstEstimate .isEmpty () || !isUsableCost (firstEstimate .get ())) {
726+ return OptionalDouble .empty ();
727+ }
728+ Set <String > boundAfterFirst = new HashSet <>(bound );
729+ boundAfterFirst .addAll (first .getBindingNames ());
730+ Optional <JoinFactorCostModel .FactorCostEstimate > secondEstimate = costModel .estimateFactorCost (second ,
731+ boundAfterFirst );
732+ if (secondEstimate .isEmpty () || !isUsableCost (secondEstimate .get ())) {
733+ return OptionalDouble .empty ();
734+ }
735+ double secondInvocations = repeatsForNewBinding (first , second , bound )
736+ ? repeatedLookupInvocations (firstEstimate .get ())
737+ : 1.0d ;
738+ double cost = firstEstimate .get ().getWorkRows ()
739+ + secondEstimate .get ().getWorkRows () * secondInvocations
740+ + estimatePairRows (first , second ).orElse (0.0d );
741+ return LmdbJoinPlanSupport .isFiniteNonNegative (cost ) ? OptionalDouble .of (cost ) : OptionalDouble .empty ();
742+ }
743+
744+ private OptionalDouble estimatePairRows (TupleExpr first , TupleExpr second ) {
745+ if (!statistics .supportsJoinEstimation ()) {
746+ return OptionalDouble .empty ();
747+ }
748+ double rows = statistics .getCardinality (new Join (first .clone (), second .clone ()));
749+ return LmdbJoinPlanSupport .isFiniteNonNegative (rows ) ? OptionalDouble .of (rows ) : OptionalDouble .empty ();
750+ }
751+
752+ private boolean isUsableCost (JoinFactorCostModel .FactorCostEstimate estimate ) {
753+ return LmdbJoinPlanSupport .isFiniteNonNegative (estimate .getWorkRows ())
754+ && LmdbJoinPlanSupport .isFiniteNonNegative (estimate .getOutputRows ());
755+ }
756+
757+ private boolean repeatsForNewBinding (TupleExpr first , TupleExpr second , Set <String > bound ) {
758+ Set <String > secondBindings = second .getBindingNames ();
759+ for (String firstBinding : first .getBindingNames ()) {
760+ if (!bound .contains (firstBinding ) && secondBindings .contains (firstBinding )) {
761+ return true ;
699762 }
700763 }
701- if (bestIndex > 0 ) {
702- TupleExpr anchor = reordered .remove (bestIndex );
703- reordered .add (0 , anchor );
764+ return false ;
765+ }
766+
767+ private double repeatedLookupInvocations (JoinFactorCostModel .FactorCostEstimate prefixEstimate ) {
768+ double prefixRows = Math .max (prefixEstimate .getOutputRows (), prefixEstimate .getWorkRows ());
769+ return prefixRows > 1.0d ? prefixRows : 1.0d ;
770+ }
771+
772+ private boolean hasPatternLocalDeferredFilter (StatementPattern pattern , List <DeferredFilter > filters ) {
773+ for (DeferredFilter filter : filters ) {
774+ if (filter .patternLocalBase == pattern || filter .originPatterns .contains (pattern )) {
775+ return true ;
776+ }
704777 }
778+ return false ;
705779 }
706780
707781 private boolean isTypeGuardForSubject (TupleExpr tupleExpr , String subjectName ) {
0 commit comments