Skip to content

Commit 97042c5

Browse files
committed
wip
1 parent ccac5ff commit 97042c5

1 file changed

Lines changed: 75 additions & 24 deletions

File tree

testsuites/benchmark-common/src/main/java/org/eclipse/rdf4j/benchmark/common/ThemeQueryCatalog.java

Lines changed: 75 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,9 @@ public final class ThemeQueryCatalog {
176176
" OPTIONAL { ?enc med:hasObservation ?obs . ?obs med:value ?value . BIND(?value AS ?optValue) }",
177177
" FILTER(?optValue > 95)",
178178
"}"),
179-
7280L),
179+
7280L), // Optimization: avoid the broad `?enc ?anyP ?anyO` scan by starting from selective
180+
// patterns (DX-200, date window, high observation), and rewrite OPTIONAL+FILTER into
181+
// inner joins with filter pushdown.
180182
query("Medical: MED-1000 patients excluding any DX-202 encounter (anti-join stress)",
181183
medicalPrefix + String.join("\n",
182184
"SELECT ?patient ?m ?optCode WHERE {",
@@ -197,7 +199,9 @@ public final class ThemeQueryCatalog {
197199
" OPTIONAL { ?patient med:name ?n . }",
198200
" FILTER(?n != \"\")",
199201
"}"),
200-
9863L),
202+
9863L), // Optimization: remove the redundant UNION branch, turn the medication OPTIONAL+FILTER
203+
// into a selective inner join, and implement the DX-202 exclusion as an indexed
204+
// anti-join evaluated early.
201205
query("Medical: practitioners treating 'patient 1*' with DX-201 (correlated EXISTS)",
202206
medicalPrefix + String.join("\n",
203207
"SELECT ?practitioner ?pp ?po WHERE {",
@@ -214,7 +218,10 @@ public final class ThemeQueryCatalog {
214218
" FILTER(?optC = \"DX-201\")",
215219
" }",
216220
"}"),
217-
146L)));
221+
146L) // Optimization: decorrelate EXISTS into a semi-join, push down the patient-name + DX-201
222+
// filters, and avoid expanding `?practitioner ?pp ?po` until after the selective join
223+
// has reduced candidates.
224+
));
218225

219226
String socialPrefix = String.join("\n",
220227
"PREFIX social: <http://example.com/theme/social/>",
@@ -468,7 +475,9 @@ public final class ThemeQueryCatalog {
468475
" OPTIONAL { ?post social:content ?content . BIND(LCASE(?content) AS ?lc) }",
469476
" FILTER(CONTAINS(?lc, \"alpha\"))",
470477
"} LIMIT 181843"),
471-
181843L),
478+
181843L), // Optimization: recognize the 4-way likedBy self-join as an “at least 4 likes”
479+
// pattern, push down tag/content predicates before expanding likes, and (ideally)
480+
// rewrite to a precomputed like-degree path/index.
472481
query("Social: user1* with mutual follower who liked their tag1 post (join-order trap)",
473482
socialPrefix + String.join("\n",
474483
"SELECT ?u ?v ?post WHERE {",
@@ -483,7 +492,9 @@ public final class ThemeQueryCatalog {
483492
" OPTIONAL { ?post social:createdAt ?t . BIND(?t AS ?optT) }",
484493
" FILTER(?optT > \"2024-01-10T00:00:00\"^^xsd:dateTime)",
485494
"}"),
486-
0L),
495+
0L), // Optimization: start from selective tag/time constraints (tag1 + createdAt) and join
496+
// outward to post→author/liker→mutual-follows, instead of scanning large user-name
497+
// prefixes then exploding joins.
487498
query("Social: posts with likes but no comments (left-join anti-pattern)",
488499
socialPrefix + String.join("\n",
489500
"SELECT ?post ?liker ?optLn WHERE {",
@@ -494,7 +505,10 @@ public final class ThemeQueryCatalog {
494505
" OPTIONAL { ?liker social:name ?ln . BIND(?ln AS ?optLn) }",
495506
" FILTER(?optLn != \"\")",
496507
"}"),
497-
0L)));
508+
0L) // Optimization: rewrite OPTIONAL+FILTER(!BOUND) into an anti-join (`FILTER NOT EXISTS {
509+
// ?post social:hasComment ?c }`) and reorder so the engine doesn’t build massive
510+
// NULL-extended intermediates.
511+
));
498512

499513
String libraryPrefix = String.join("\n",
500514
"PREFIX lib: <http://example.com/theme/library/>",
@@ -650,7 +664,9 @@ public final class ThemeQueryCatalog {
650664
" OPTIONAL { ?book lib:title ?title . BIND(LCASE(STR(?title)) AS ?titleLc) }",
651665
" FILTER(CONTAINS(?titleLc, \"book\"))",
652666
"}"),
653-
0L),
667+
0L), // Optimization: drop/reorder the `?copy ?p ?o` scan, turn OPTIONAL+FILTER blocks into
668+
// inner joins, and start from the highly selective Branch/Author constraints before
669+
// touching the huge Book space.
654670
query("Library: loans for Branch 0 books due after Jan 10 (join-order nightmare)",
655671
libraryPrefix + String.join("\n",
656672
"SELECT ?loan ?book ?copy ?optDue WHERE {",
@@ -663,7 +679,9 @@ public final class ThemeQueryCatalog {
663679
" OPTIONAL { ?loan a lib:Loan ; lib:loanedCopy ?copy ; lib:dueDate ?due . BIND(?due AS ?optDue) }",
664680
" FILTER(?optDue > \"2024-01-10\"^^xsd:date)",
665681
"}"),
666-
14377L),
682+
14377L), // Optimization: start from the smaller Loan/Copy/Branch side (or Branch→Copy→Loan) and
683+
// push the dueDate filter down; avoid scanning Books and then “discovering” loans
684+
// late.
667685
query("Library: omega-title books that were ever loaned (correlated EXISTS)",
668686
libraryPrefix + String.join("\n",
669687
"SELECT ?book ?copy ?loan WHERE {",
@@ -674,7 +692,9 @@ public final class ThemeQueryCatalog {
674692
" ?loan lib:loanedCopy ?copy .",
675693
" FILTER EXISTS { ?loan lib:borrowedBy ?m . }",
676694
"}"),
677-
339L)));
695+
339L) // Optimization: treat the EXISTS as a semi-join (or remove it as redundant), and reorder
696+
// to start from the smaller Loan→Copy→Book path instead of scanning all book titles.
697+
));
678698

679699
String engineeringPrefix = String.join("\n",
680700
"PREFIX eng: <http://example.com/theme/engineering/>",
@@ -812,7 +832,9 @@ public final class ThemeQueryCatalog {
812832
" }",
813833
" FILTER(CONTAINS(?optAn, \"Assembly 1\"))",
814834
"}"),
815-
57L),
835+
57L), // Optimization: convert OPTIONAL+FILTER chains into inner joins, push the `measuredValue
836+
// > 0.99` filter down, and join to assemblies only after the measurement filter has
837+
// reduced candidates.
816838
query("Engineering: components whose dependency is in the same assembly (redundant UNION)",
817839
engineeringPrefix + String.join("\n",
818840
"SELECT ?component ?dep ?optA1 ?optA2 WHERE {",
@@ -823,7 +845,8 @@ public final class ThemeQueryCatalog {
823845
" OPTIONAL { ?component eng:dependsOn ?dep . ?dep eng:partOf ?a2 . BIND(?a2 AS ?optA2) }",
824846
" FILTER(?optA1 = ?optA2)",
825847
"}"),
826-
326L),
848+
326L), // Optimization: eliminate the redundant UNION, rewrite OPTIONAL+FILTER into inner joins,
849+
// and start from the selective dependsOn edge to avoid scanning all components.
827850
query("Engineering: requirements for Component 1/2 with no low measurements (anti-join + string filter)",
828851
engineeringPrefix + String.join("\n",
829852
"SELECT ?requirement ?component WHERE {",
@@ -837,7 +860,10 @@ public final class ThemeQueryCatalog {
837860
" FILTER(?v < 0.85)",
838861
" }",
839862
"}"),
840-
79L)));
863+
79L) // Optimization: push the component-name predicate down (prefer exact IRIs over CONTAINS),
864+
// and execute the NOT EXISTS as an indexed anti-join so low-measurement requirements
865+
// are eliminated early.
866+
));
841867

842868
String connectedPrefix = String.join("\n",
843869
"PREFIX conn: <http://example.com/theme/connected/>",
@@ -964,7 +990,9 @@ public final class ThemeQueryCatalog {
964990
" OPTIONAL { ?node conn:weight ?w . BIND(?w AS ?optW) }",
965991
" FILTER(?optW > 8)",
966992
"}"),
967-
26949L),
993+
26949L), // Optimization: rewrite CONTAINS(STR(?nbr),\"node/0\") to an equality against the
994+
// concrete IRI, turn OPTIONAL+FILTER into inner joins, and start from selective
995+
// weight/edge bindings instead of scanning all nodes.
968996
query("Connected: weight-10 nodes participating in a mutual edge (UNION + EXISTS redundancy)",
969997
connectedPrefix + String.join("\n",
970998
"SELECT ?node ?other ?optW WHERE {",
@@ -975,7 +1003,8 @@ public final class ThemeQueryCatalog {
9751003
" FILTER(?optW = 10)",
9761004
" FILTER EXISTS { ?node conn:connectsTo ?other . ?other conn:connectsTo ?node . }",
9771005
"}"),
978-
54L),
1006+
54L), // Optimization: drop the redundant UNION and implement mutual-edge checking as a single
1007+
// join/semi-join; push `weight=10` down to shrink candidates before testing mutuality.
9791008
query("Connected: low-weight nodes with no very-low-weight neighbors (anti-join over UNION)",
9801009
connectedPrefix + String.join("\n",
9811010
"SELECT ?node ?optW WHERE {",
@@ -990,7 +1019,9 @@ public final class ThemeQueryCatalog {
9901019
" FILTER(?w2 < 3)",
9911020
" }",
9921021
"}"),
993-
2L)));
1022+
2L) // Optimization: collapse incoming/outgoing neighbor scans into efficient index scans
1023+
// feeding one anti-join, and execute the NOT EXISTS with early cutoff.
1024+
));
9941025

9951026
String trainPrefix = String.join("\n",
9961027
"PREFIX train: <http://example.com/theme/train/>",
@@ -1128,7 +1159,9 @@ public final class ThemeQueryCatalog {
11281159
" }",
11291160
" FILTER(CONTAINS(?lnLc, \"line 1\"))",
11301161
"}"),
1131-
276L),
1162+
276L), // Optimization: rewrite the time constraint into a single indexed anti-join, and push
1163+
// the line-name restriction down (ideally avoid CONTAINS by binding concrete line IRIs
1164+
// when possible).
11321165
query("Train: operational points on Line 1 via track back-link (OPTIONAL self-join)",
11331166
trainPrefix + String.join("\n",
11341167
"SELECT ?op ?section ?track WHERE {",
@@ -1147,7 +1180,9 @@ public final class ThemeQueryCatalog {
11471180
" }",
11481181
" FILTER(?optSection2 = ?section)",
11491182
"}"),
1150-
17794L),
1183+
17794L), // Optimization: drop/reorder the redundant backlink self-join (`trackSectionOf`) and
1184+
// start from selective Line/Section bindings before touching the huge
1185+
// OperationalPoint set.
11511186
query("Train: services passing through OP 1* and OP 2* (Cartesian-product trap)",
11521187
trainPrefix + String.join("\n",
11531188
"SELECT ?service ?opA ?opB WHERE {",
@@ -1156,7 +1191,10 @@ public final class ThemeQueryCatalog {
11561191
" OPTIONAL { ?service train:passesThrough ?opB . ?opB train:name ?nB . BIND(LCASE(STR(?nB)) AS ?optNB) }",
11571192
" FILTER(CONTAINS(?optNA, \"op 1\") && CONTAINS(?optNB, \"op 2\"))",
11581193
"}"),
1159-
7849L)));
1194+
7849L) // Optimization: rewrite the two OPTIONALs into two semi-joins/EXISTS checks to avoid the
1195+
// quadratic cross-product over multi-valued passesThrough, and push name filters down
1196+
// to the op binding.
1197+
));
11601198

11611199
String gridPrefix = String.join("\n",
11621200
"PREFIX grid: <http://example.com/theme/grid/>",
@@ -1285,7 +1323,9 @@ public final class ThemeQueryCatalog {
12851323
" OPTIONAL { ?substation grid:name ?n . BIND(LCASE(STR(?n)) AS ?nlc) }",
12861324
" FILTER(CONTAINS(?nlc, \"substation\"))",
12871325
"}"),
1288-
6714L),
1326+
6714L), // Optimization: push the `loadValue > 190` predicate down to the loadValue index and
1327+
// rewrite OPTIONAL+FILTER into a proper inner join so the engine doesn’t generate
1328+
// NULL-extended intermediates.
12891329
query("Grid: lines connecting substations '1*' and '2*' (IRI string + multi-join trap)",
12901330
gridPrefix + String.join("\n",
12911331
"SELECT ?line ?s1 ?s2 WHERE {",
@@ -1300,7 +1340,9 @@ public final class ThemeQueryCatalog {
13001340
" (CONTAINS(?s1Str, \"substation/2\") || CONTAINS(?s2Str, \"substation/2\"))",
13011341
" ))",
13021342
"}"),
1303-
1168L),
1343+
1168L), // Optimization: replace CONTAINS-on-IRI with equality against concrete IRIs (or
1344+
// pre-bound substation groups), and bind the target substations first so only matching
1345+
// lines are scanned.
13041346
query("Grid: transformers with no load <60 (anti-join via MINUS)",
13051347
gridPrefix + String.join("\n",
13061348
"SELECT ?transformer ?substation WHERE {",
@@ -1319,7 +1361,10 @@ public final class ThemeQueryCatalog {
13191361
" FILTER(?v2 < 60)",
13201362
" }",
13211363
"}"),
1322-
83387L)));
1364+
83387L) // Optimization: execute the MINUS as an indexed anti-join (subtract transformers seen
1365+
// with <60 loads early), and avoid building large intermediate join results before
1366+
// applying the exclusion.
1367+
));
13231368

13241369
String pharmaPrefix = String.join("\n",
13251370
"PREFIX pharma: <http://example.com/theme/pharma/>",
@@ -1501,7 +1546,9 @@ public final class ThemeQueryCatalog {
15011546
" OPTIONAL { ?result pharma:biomarkerValue ?bv . BIND(?bv AS ?optBv) }",
15021547
" FILTER(?optBv > 1.0)",
15031548
"}"),
1504-
1535L),
1549+
1535L), // Optimization: turn OPTIONAL+FILTER blocks into inner joins with pushdown (especially
1550+
// effect/pValue/biomarker thresholds), and run the “no Severe side effects” MINUS as an
1551+
// indexed anti-join early.
15051552
query("Pharma: phase-3 trials with extreme biomarker or p-value (common-subexpression UNION)",
15061553
pharmaPrefix + String.join("\n",
15071554
"SELECT ?trial ?result ?bv WHERE {",
@@ -1524,7 +1571,8 @@ public final class ThemeQueryCatalog {
15241571
" OPTIONAL { ?result pharma:biomarkerValue ?bv . }",
15251572
" }",
15261573
"}"),
1527-
65L),
1574+
65L), // Optimization: factor out the duplicated `trial→arm→result` prefix (evaluate once) and
1575+
// convert UNION to a single scan with an OR filter when beneficial.
15281576
query("Pharma: high-synergy combinations whose member drugs share a target (O(n^2) member join)",
15291577
pharmaPrefix + String.join("\n",
15301578
"SELECT ?combo ?a ?b ?t WHERE {",
@@ -1535,7 +1583,10 @@ public final class ThemeQueryCatalog {
15351583
" FILTER(?a != ?b)",
15361584
" FILTER EXISTS { ?a pharma:targets ?t . ?b pharma:targets ?t . }",
15371585
"}"),
1538-
0L)));
1586+
0L) // Optimization: avoid the quadratic self-join over combination members by using a
1587+
// grouped/member-scan strategy (or specialized intersection join), and evaluate the
1588+
// shared-target EXISTS as a semi-join with early pruning.
1589+
));
15391590

15401591
validateQueries();
15411592
}

0 commit comments

Comments
 (0)