Skip to content

Commit efc0240

Browse files
committed
wip
1 parent 4431f9e commit efc0240

4 files changed

Lines changed: 266 additions & 13 deletions

File tree

testsuites/benchmark-common/src/main/java/org/eclipse/rdf4j/benchmark/common/plan/QueryPlanCapture.java

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@
2222
import java.time.Instant;
2323
import java.time.ZoneOffset;
2424
import java.time.format.DateTimeFormatter;
25+
import java.util.ArrayList;
26+
import java.util.Collections;
2527
import java.util.LinkedHashMap;
28+
import java.util.List;
2629
import java.util.Locale;
2730
import java.util.Map;
2831
import java.util.Objects;
@@ -127,6 +130,12 @@ public QueryPlanSnapshot capture(QueryPlanCaptureContext context,
127130
if (context.getBenchmark() != null && !context.getBenchmark().isBlank()) {
128131
metadata.putIfAbsent("benchmark", context.getBenchmark());
129132
}
133+
if (context.getQueryString() != null && !context.getQueryString().isBlank()) {
134+
metadata.putIfAbsent("queryString.sha256", sha256Hex(context.getQueryString()));
135+
metadata.putIfAbsent("queryString.normalizedWhitespaceSha256",
136+
sha256Hex(normalizeWhitespace(context.getQueryString())));
137+
}
138+
copyUnoptimizedInputShapeMetadata(explanations, metadata);
130139
metadata.putIfAbsent("gitCommit", resolveGitCommit());
131140
metadata.putIfAbsent("gitBranch", resolveGitBranch());
132141
metadata.putIfAbsent("javaVersion", System.getProperty("java.version", FeatureFlagCollector.NULL_VALUE));
@@ -150,6 +159,31 @@ public QueryPlanSnapshot capture(QueryPlanCaptureContext context,
150159
return snapshot;
151160
}
152161

162+
private static void copyUnoptimizedInputShapeMetadata(Map<String, QueryPlanExplanation> explanations,
163+
Map<String, String> metadata) {
164+
QueryPlanExplanation unoptimized = explanations.get(levelKey(Explanation.Level.Unoptimized));
165+
if (unoptimized == null || unoptimized.getDebugMetrics() == null) {
166+
return;
167+
}
168+
Map<String, String> metrics = unoptimized.getDebugMetrics();
169+
copyMetric(metrics, "structureSignatureRawSha256", metadata, "optimizerInput.unoptimizedStructureRawSha256");
170+
copyMetric(metrics, "structureSignatureNormalizedSha256", metadata,
171+
"optimizerInput.unoptimizedStructureNormalizedSha256");
172+
copyMetric(metrics, "anonymousTypeTokenCount", metadata, "optimizerInput.unoptimizedAnonymousTypeTokenCount");
173+
}
174+
175+
private static void copyMetric(Map<String, String> source, String sourceKey, Map<String, String> target,
176+
String targetKey) {
177+
String value = source.get(sourceKey);
178+
if (value != null && !value.isBlank()) {
179+
target.putIfAbsent(targetKey, value);
180+
}
181+
}
182+
183+
private static String normalizeWhitespace(String value) {
184+
return value.trim().replaceAll("\\s+", " ");
185+
}
186+
153187
public Path captureAndWrite(QueryPlanCaptureContext context, Supplier<? extends TupleQuery> tupleQuerySupplier)
154188
throws IOException {
155189
QueryPlanSnapshot snapshot = capture(context, tupleQuerySupplier);
@@ -218,7 +252,7 @@ private QueryPlanExplanation captureLevel(Explanation.Level level, Explanation e
218252
return captured;
219253
}
220254

221-
private static Map<String, String> extractDebugMetrics(String explanationJson) {
255+
public static Map<String, String> extractDebugMetrics(String explanationJson) {
222256
LinkedHashMap<String, String> metrics = new LinkedHashMap<>();
223257
if (explanationJson == null || explanationJson.isBlank()) {
224258
return metrics;
@@ -235,8 +269,18 @@ private static Map<String, String> extractDebugMetrics(String explanationJson) {
235269
DebugMetricAccumulator accumulator = new DebugMetricAccumulator();
236270
appendDebugSignatures(root, 1, accumulator);
237271

272+
String rootType = readText(root, "type");
273+
String rootTypeNormalized = canonicalizeType(rootType);
274+
metrics.put("rootType", rootType);
275+
metrics.put("rootTypeNormalized", rootTypeNormalized);
276+
metrics.put("rootAlgorithm", readText(root, "algorithm"));
277+
metrics.put("rootCostEstimate", readNumberToken(root, "costEstimate"));
278+
metrics.put("rootResultSizeEstimate", readNumberToken(root, "resultSizeEstimate"));
279+
metrics.put("rootResultSizeActual", readNumberToken(root, "resultSizeActual"));
238280
metrics.put("planNodeCount", Integer.toString(accumulator.planNodeCount));
239281
metrics.put("maxDepth", Integer.toString(accumulator.maxDepth));
282+
metrics.put("leafNodeCount", Integer.toString(accumulator.leafNodeCount));
283+
metrics.put("maxBranchingFactor", Integer.toString(accumulator.maxBranchingFactor));
240284
metrics.put("joinNodeCount", Integer.toString(accumulator.joinNodeCount));
241285
metrics.put("filterNodeCount", Integer.toString(accumulator.filterNodeCount));
242286
metrics.put("statementPatternCount", Integer.toString(accumulator.statementPatternCount));
@@ -248,6 +292,16 @@ private static Map<String, String> extractDebugMetrics(String explanationJson) {
248292
metrics.put("joinAlgorithmSignatureSha256", sha256Hex(accumulator.joinSignature.toString()));
249293
metrics.put("actualResultSizesSignatureSha256", sha256Hex(accumulator.actualSignature.toString()));
250294
metrics.put("estimatesSignatureSha256", sha256Hex(accumulator.estimatesSignature.toString()));
295+
metrics.put("joinAlgorithmMultisetSignatureSha256",
296+
multisetSignatureSha256(accumulator.joinAlgorithmMultisetTokens));
297+
metrics.put("actualResultSizesMultisetSignatureSha256",
298+
multisetSignatureSha256(accumulator.actualResultMultisetTokens));
299+
metrics.put("estimatesMultisetSignatureSha256",
300+
multisetSignatureSha256(accumulator.estimatesMultisetTokens));
301+
metrics.put("statementPatternEstimatesMultisetSignatureSha256",
302+
multisetSignatureSha256(accumulator.statementPatternEstimatesMultisetTokens));
303+
metrics.put("statementPatternEstimateTokenCount",
304+
Integer.toString(accumulator.statementPatternEstimatesMultisetTokens.size()));
251305

252306
if (accumulator.costEstimateCount > 0) {
253307
metrics.put("costEstimateSum", toPlainString(accumulator.costEstimateSum));
@@ -273,10 +327,16 @@ private static void appendDebugSignatures(JsonNode node, int depth, DebugMetricA
273327

274328
String rawType = readText(node, "type");
275329
String normalizedType = canonicalizeType(rawType);
330+
JsonNode plans = node.get("plans");
331+
int childCount = plans != null && plans.isArray() ? plans.size() : 0;
276332

277333
accumulator.planNodeCount++;
278334
accumulator.maxDepth = Math.max(accumulator.maxDepth, depth);
335+
accumulator.maxBranchingFactor = Math.max(accumulator.maxBranchingFactor, childCount);
279336
accumulator.anonymousTypeTokenCount += countAnonymousTokens(rawType);
337+
if (childCount == 0) {
338+
accumulator.leafNodeCount++;
339+
}
280340
if (normalizedType.contains("Join")) {
281341
accumulator.joinNodeCount++;
282342
}
@@ -295,13 +355,15 @@ private static void appendDebugSignatures(JsonNode node, int depth, DebugMetricA
295355
if (normalizedType.contains("Join")) {
296356
accumulator.joinSignature.append("|algorithm=").append(algorithm);
297357
accumulator.joinAlgorithmCounts.merge(algorithm, 1, Integer::sum);
358+
accumulator.joinAlgorithmMultisetTokens.add(normalizedType + "|algorithm=" + algorithm);
298359
}
299360

300361
String actual = readNumberToken(node, "resultSizeActual");
301362
accumulator.actualSignature.append('(')
302363
.append(normalizedType)
303364
.append("|resultSizeActual=")
304365
.append(actual);
366+
accumulator.actualResultMultisetTokens.add(normalizedType + "|resultSizeActual=" + actual);
305367
updateAggregate(actual, AggregateKind.ACTUAL_RESULT_SIZE, accumulator);
306368

307369
String cost = readNumberToken(node, "costEstimate");
@@ -312,10 +374,15 @@ private static void appendDebugSignatures(JsonNode node, int depth, DebugMetricA
312374
.append(cost)
313375
.append("|resultSizeEstimate=")
314376
.append(estimate);
377+
accumulator.estimatesMultisetTokens
378+
.add(normalizedType + "|costEstimate=" + cost + "|resultSizeEstimate=" + estimate);
379+
if (normalizedType.startsWith("StatementPattern")) {
380+
accumulator.statementPatternEstimatesMultisetTokens
381+
.add("costEstimate=" + cost + "|resultSizeEstimate=" + estimate);
382+
}
315383
updateAggregate(cost, AggregateKind.COST_ESTIMATE, accumulator);
316384
updateAggregate(estimate, AggregateKind.RESULT_SIZE_ESTIMATE, accumulator);
317385

318-
JsonNode plans = node.get("plans");
319386
if (plans != null && plans.isArray()) {
320387
for (JsonNode child : plans) {
321388
appendDebugSignatures(child, depth + 1, accumulator);
@@ -435,6 +502,15 @@ private static String toPlainString(BigDecimal value) {
435502
return value.stripTrailingZeros().toPlainString();
436503
}
437504

505+
private static String multisetSignatureSha256(List<String> tokens) {
506+
if (tokens.isEmpty()) {
507+
return sha256Hex("<none>");
508+
}
509+
ArrayList<String> sorted = new ArrayList<>(tokens);
510+
Collections.sort(sorted);
511+
return sha256Hex(String.join("|", sorted));
512+
}
513+
438514
private static String sha256Hex(String input) {
439515
try {
440516
MessageDigest digest = MessageDigest.getInstance("SHA-256");
@@ -547,6 +623,8 @@ private enum AggregateKind {
547623
private static final class DebugMetricAccumulator {
548624
private int planNodeCount;
549625
private int maxDepth;
626+
private int leafNodeCount;
627+
private int maxBranchingFactor;
550628
private int joinNodeCount;
551629
private int filterNodeCount;
552630
private int statementPatternCount;
@@ -557,6 +635,10 @@ private static final class DebugMetricAccumulator {
557635
private final StringBuilder actualSignature = new StringBuilder();
558636
private final StringBuilder estimatesSignature = new StringBuilder();
559637
private final LinkedHashMap<String, Integer> joinAlgorithmCounts = new LinkedHashMap<>();
638+
private final ArrayList<String> joinAlgorithmMultisetTokens = new ArrayList<>();
639+
private final ArrayList<String> actualResultMultisetTokens = new ArrayList<>();
640+
private final ArrayList<String> estimatesMultisetTokens = new ArrayList<>();
641+
private final ArrayList<String> statementPatternEstimatesMultisetTokens = new ArrayList<>();
560642
private BigDecimal costEstimateSum = BigDecimal.ZERO;
561643
private BigDecimal costEstimateMax;
562644
private int costEstimateCount;

testsuites/benchmark-common/src/test/java/org/eclipse/rdf4j/benchmark/common/plan/QueryPlanCaptureTest.java

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,21 @@ void capturesPlanMetricsFieldsForPerformanceDebugging() throws IOException {
128128
.build();
129129

130130
Path outputFile = capture.captureAndWrite(context, () -> stubTupleQueryFor(query));
131-
String snapshotJson = Files.readString(outputFile);
132-
assertTrue(snapshotJson.contains("\"planNodeCount\""), snapshotJson);
133-
assertTrue(snapshotJson.contains("\"maxDepth\""), snapshotJson);
134-
assertTrue(snapshotJson.contains("\"joinAlgorithmCounts\""), snapshotJson);
131+
QueryPlanSnapshot snapshot = capture.readSnapshot(outputFile);
132+
QueryPlanExplanation optimized = snapshot.getExplanations().get("optimized");
133+
assertNotNull(optimized);
134+
assertTrue(optimized.getDebugMetrics().containsKey("planNodeCount"));
135+
assertTrue(optimized.getDebugMetrics().containsKey("maxDepth"));
136+
assertTrue(optimized.getDebugMetrics().containsKey("joinAlgorithmCounts"));
137+
assertTrue(optimized.getDebugMetrics().containsKey("leafNodeCount"));
138+
assertTrue(optimized.getDebugMetrics().containsKey("maxBranchingFactor"));
139+
assertTrue(optimized.getDebugMetrics().containsKey("estimatesMultisetSignatureSha256"));
140+
assertTrue(optimized.getDebugMetrics().containsKey("statementPatternEstimatesMultisetSignatureSha256"));
141+
assertTrue(snapshot.getMetadata().containsKey("queryString.sha256"));
142+
assertTrue(snapshot.getMetadata().containsKey("queryString.normalizedWhitespaceSha256"));
143+
assertTrue(snapshot.getMetadata().containsKey("optimizerInput.unoptimizedStructureRawSha256"));
144+
assertTrue(snapshot.getMetadata().containsKey("optimizerInput.unoptimizedStructureNormalizedSha256"));
145+
assertTrue(snapshot.getMetadata().containsKey("optimizerInput.unoptimizedAnonymousTypeTokenCount"));
135146
}
136147

137148
@Test

0 commit comments

Comments
 (0)