Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ CLAUDE.md @DataDog/apm-common-components-core
.gitlab-ci.yml @DataDog/apm-common-components-core
.gitlab/benchmarks.yml @DataDog/apm-common-components-core
.gitlab/fuzz.yml @DataDog/chaos-platform
.gitlab/impacted-crates.yml @DataDog/apm-common-components-core
benchmark/ @DataDog/apm-common-components-core
bin_tests/ @DataDog/libdatadog-profiling
build-common/ @DataDog/apm-common-components-core
Expand Down
1 change: 1 addition & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ variables:
description: "downstream jobs are triggered on this branch"

include:
- local: .gitlab/impacted-crates.yml
- local: .gitlab/benchmarks.yml
- local: .gitlab/fuzz.yml

Expand Down
84 changes: 80 additions & 4 deletions .gitlab/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,17 @@ variables:
# The Dockerfile to this image is located at:
# https://github.com/DataDog/benchmarking-platform/tree/libdatadog/benchmarks

# The benchmark suite is sharded across parallel jobs to reduce wall-clock time. Each shard runs
# both candidate and baseline for its assigned crates on the same runner (so the comparison stays
# noise-controlled), then benchmarks_combine merges the shards' results into a single PR comment.
benchmarks:
parallel: 2
tags: ["runner:apm-k8s-tweaked-metal"]
needs: []
needs:
# Not created on main/release/hotfix (full suite) -- hence optional.
- job: compute_impacted_crates
artifacts: true
optional: true
image:
name: $BASE_CI_IMAGE
rules:
Expand All @@ -17,6 +25,26 @@ benchmarks:
- interruptible: true
timeout: 80m
script:
# Decide which crates THIS shard benchmarks. Runs first, while $CI_PROJECT_DIR is the libdatadog
# checkout (so ./benchmark/... and the compute_impacted_crates artifacts are available). The
# helper prints SKIP (nothing to do), FULL (whole workspace), or a space-separated crate list;
# BENCH_PACKAGES is consumed by benchmark/run_benchmarks_ci.sh.
- |
DECISION="$(./benchmark/select_bench_packages.sh "${CI_NODE_INDEX:-1}" "${CI_NODE_TOTAL:-1}")"
case "$DECISION" in
SKIP)
echo "Shard ${CI_NODE_INDEX:-1}/${CI_NODE_TOTAL:-1}: nothing to benchmark."
exit 0
;;
FULL)
echo "Shard ${CI_NODE_INDEX:-1}/${CI_NODE_TOTAL:-1}: full-workspace benchmark run."
export BENCH_PACKAGES=""
;;
*)
export BENCH_PACKAGES="$DECISION"
echo "Shard ${CI_NODE_INDEX:-1}/${CI_NODE_TOTAL:-1} crates: $BENCH_PACKAGES"
;;
esac
- export ARTIFACTS_DIR="$(pwd)/reports" && (mkdir "${ARTIFACTS_DIR}" || :)
- git clone --branch libdatadog/benchmarks https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/benchmarking-platform /platform && cd /platform
- ./steps/capture-hardware-software-info.sh
Expand Down Expand Up @@ -47,11 +75,12 @@ benchmarks:
- ./steps/analyze-results.sh
- "./steps/upload-results-to-s3.sh || :"
- "./steps/upload-results-to-benchmarking-api.sh || :"
- "./steps/post-pr-comment.sh || :"
# Hand this shard's reports to benchmarks_combine, which posts the single PR comment.
- mv "${ARTIFACTS_DIR}" "${CI_PROJECT_DIR}/reports-${CI_NODE_INDEX:-1}"
artifacts:
name: "reports"
name: "reports-${CI_NODE_INDEX}"
paths:
- reports/
- reports-*/
expire_in: 3 months
variables:
UPSTREAM_PROJECT_ID: $CI_PROJECT_ID # The ID of the current project. This ID is unique across all projects on the GitLab instance.
Expand All @@ -62,3 +91,50 @@ benchmarks:

KUBERNETES_SERVICE_ACCOUNT_OVERWRITE: libdatadog
FF_USE_LEGACY_KUBERNETES_EXECUTION_STRATEGY: "true"

# Merges the parallel benchmark shards and posts a single PR comment. Each shard already uploaded its
# own results to S3 / the Benchmarking API with correct per-run metadata; this job only assembles the
# combined comment (pr-commenter uses --on-duplicate=replace, so exactly one job may comment).
benchmarks_combine:
tags: ["arch:amd64"]
needs:
- job: benchmarks
artifacts: true
optional: true
image:
name: $BASE_CI_IMAGE
rules:
- if: '$CI_COMMIT_BRANCH =~ /^mq-working-branch-/'
when: never
- if: $CI_COMMIT_BRANCH == "main"
interruptible: false
- interruptible: true
script:
- |
if ! ls -d "${CI_PROJECT_DIR}"/reports-*/ >/dev/null 2>&1; then
echo "No shard reports produced -> nothing to combine."
exit 0
fi
- git clone --branch libdatadog/benchmarks https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/benchmarking-platform /platform
- |
# Assemble the files post-pr-comment.sh reads by concatenating each shard's markdown.
export ARTIFACTS_DIR="${CI_PROJECT_DIR}/reports"
mkdir -p "${ARTIFACTS_DIR}/candidate" "${ARTIFACTS_DIR}/baseline"
cat "${CI_PROJECT_DIR}"/reports-*/comparison-baseline-vs-candidate.md > "${ARTIFACTS_DIR}/comparison-baseline-vs-candidate.md" 2>/dev/null || :
cat "${CI_PROJECT_DIR}"/reports-*/candidate/analysis-candidate.md > "${ARTIFACTS_DIR}/candidate/analysis-candidate.md" 2>/dev/null || :
cat "${CI_PROJECT_DIR}"/reports-*/baseline/analysis-baseline.md > "${ARTIFACTS_DIR}/baseline/analysis-baseline.md" 2>/dev/null || :
(cd /platform && ./steps/post-pr-comment.sh) || :
artifacts:
name: "reports-combined"
paths:
- reports/
expire_in: 3 months
variables:
UPSTREAM_PROJECT_ID: $CI_PROJECT_ID
UPSTREAM_PROJECT_NAME: $CI_PROJECT_NAME # libdatadog
UPSTREAM_BRANCH: $CI_COMMIT_REF_NAME
UPSTREAM_COMMIT_SHA: $CI_COMMIT_SHA
UPSTREAM_REPO_URL: "https://github.com/DataDog/libdatadog"

KUBERNETES_SERVICE_ACCOUNT_OVERWRITE: libdatadog
FF_USE_LEGACY_KUBERNETES_EXECUTION_STRATEGY: "true"
56 changes: 56 additions & 0 deletions .gitlab/impacted-crates.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
# SPDX-License-Identifier: Apache-2.0

# Computes the set of crates impacted by a PR (the changed crates plus their transitive
# dependents) and publishes it for downstream test-stage jobs to consume.
#
# It reuses the crates-reporter tool that already backs GitHub CI (.github/actions/). That
# tool writes its results to the file named by $GITHUB_OUTPUT, so we point $GITHUB_OUTPUT at
# a temp file and parse it here -- no changes to the Rust code are required.
#
# Only the benchmarks job consumes this today; other test jobs can depend on it later.

variables:
BASE_CI_IMAGE: registry.ddbuild.io/ci/benchmarking-platform:libdatadog-benchmarks

compute_impacted_crates:
stage: test
tags: ["arch:amd64"]
image:
name: $BASE_CI_IMAGE
needs: []
rules:
# main runs the full suite, so nothing to compute there; downstream jobs mark the dependency
# optional. Same for merge-queue branches (benchmarks is skipped there) and scheduled runs.
- if: '$CI_COMMIT_BRANCH == "main"'
when: never
- if: '$CI_COMMIT_BRANCH =~ /^mq-working-branch-/'
when: never
- if: '$CI_PIPELINE_SOURCE == "schedule"'
when: never
- when: on_success
variables:
# Full history so the three-dot merge-base diff against origin/main resolves.
GIT_DEPTH: 0
# A failure leaves the outputs absent; downstream jobs then fall back to running everything.
allow_failure: true
script:
- git fetch --no-tags origin main
- (cd .github/actions && cargo build --release -p crates-reporter)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's more of a nitpick / future work, but would there be a way to distribute a binary of crates-reporter instead? I suppose it doesn't change often. But sometimes getting a binary in a job is just so annoying that maybe building from source is simpler 🤷

- export GITHUB_OUTPUT="$(mktemp)"
- ./.github/actions/target/release/crates-reporter main || echo "status=skipped" >> "$GITHUB_OUTPUT"
- |
# Surface the crates-reporter outputs as dotenv variables for downstream jobs.
AFFECTED_CRATES=$(sed -n 's/^affected_crates=//p' "$GITHUB_OUTPUT" | tail -n1)
IMPACTED_STATUS=$(sed -n 's/^status=//p' "$GITHUB_OUTPUT" | tail -n1)
: "${AFFECTED_CRATES:=[]}"
: "${IMPACTED_STATUS:=skipped}"
printf 'AFFECTED_CRATES=%s\nIMPACTED_STATUS=%s\n' "$AFFECTED_CRATES" "$IMPACTED_STATUS" | tee impacted-crates.env
# Record the changed files so downstream jobs can make path-based decisions.
git diff --name-only origin/main...HEAD > changed_files.txt || : > changed_files.txt
artifacts:
reports:
dotenv: impacted-crates.env
paths:
- changed_files.txt
expire_in: 1 week
34 changes: 33 additions & 1 deletion benchmark/run_benchmarks_ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,41 @@ OUTPUT_DIR="${1:-}"

pushd "${PROJECT_DIR}" > /dev/null

# Some bench targets need crate-specific features enabled; when scoping the run to a subset of
# crates we must pass only the features for the selected crates (cargo errors on --features for a
# crate that isn't part of the selection).
bench_features_for_crate() {
case "$1" in
libdd-crashtracker) echo "libdd-crashtracker/benchmarking" ;;
libdd-sampling) echo "libdd-sampling/v04_span libdd-sampling/bench-internals" ;;
libdd-trace-utils) echo "libdd-trace-utils/bench-internals" ;;
*) echo "" ;;
esac
}

# Run benchmarks
message "Running benchmarks"
cargo bench --workspace --features libdd-crashtracker/benchmarking,libdd-sampling/v04_span,libdd-sampling/bench-internals,libdd-trace-utils/bench-internals -- --warm-up-time 1 --measurement-time 5 --sample-size=200
# BENCH_PACKAGES (optional, space-separated crate names) scopes the run to specific crates -- set by
# the GitLab benchmarks job so a PR only benchmarks the crates it impacts. When empty (e.g. on main)
# the full workspace is benchmarked.
if [[ -n "${BENCH_PACKAGES:-}" ]]; then
package_args=()
features=()
for crate in ${BENCH_PACKAGES}; do
package_args+=(-p "${crate}")
for feature in $(bench_features_for_crate "${crate}"); do
features+=("${feature}")
done
done
feature_args=()
if (( ${#features[@]} > 0 )); then
feature_args=(--features "$(IFS=,; echo "${features[*]}")")
fi
message "Benchmarking selected crates: ${BENCH_PACKAGES}"
cargo bench "${package_args[@]}" "${feature_args[@]}" -- --warm-up-time 1 --measurement-time 5 --sample-size=200
else
cargo bench --workspace --features libdd-crashtracker/benchmarking,libdd-sampling/v04_span,libdd-sampling/bench-internals,libdd-trace-utils/bench-internals -- --warm-up-time 1 --measurement-time 5 --sample-size=200

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: this is probably simpler with this one liner, but I wonder if we shouldn't generate a BENCH_PACKAGES unconditionally (just putting all the crates that are known to bench when it's empty) and then use a single code path for the cargo bench command and package features. Otherwise there are two places where we define which features a specific crate needs for benchmarking (this line and in bench_features_for_crate), and they could disagree/drift.

fi
message "Finished running benchmarks"

# Copy the benchmark results to the output directory
Expand Down
113 changes: 113 additions & 0 deletions benchmark/select_bench_packages.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#!/usr/bin/env bash

# Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
# SPDX-License-Identifier: Apache-2.0

# Decides which benchmark crates THIS shard should run, and prints exactly one of:
# SKIP - this shard has nothing to run (the job should exit 0)
# FULL - run the whole workspace (run_benchmarks_ci.sh uses --workspace)
# <crate ...> - space-separated crates this shard should benchmark
#
# Inputs (from the compute_impacted_crates job, consumed via the benchmarks job env):
# $CI_COMMIT_BRANCH, $IMPACTED_STATUS, $AFFECTED_CRATES (JSON array), and
# ./changed_files.txt in the current directory. Crates with benchmarks are discovered
# from `cargo metadata`.
#
# Usage: select_bench_packages.sh <node_index_1based> <node_total>
#
# Only diagnostics go to stderr; stdout is solely the decision token above.
set -eu

node_index="${1:-1}"
node_total="${2:-1}"

# Approximate per-crate benchmark cost (~minutes of candidate wall-time) used to balance the shards.
# Measured 2026-07-02 from a full run (all 11 benchmarked crates); only relative magnitudes matter.
# The seven crates that fall through to the default are all <1 min (normalization ~0.7, profiling
# ~0.6, ffe ~0.4, ipc ~0.2, trace-stats ~0.1, crashtracker ~0.1, trace-obfuscation ~1.4), so they
# act as interchangeable filler. Retune as benchmarks are added/removed; unknown crates default to 1.
crate_weight() {
case "$1" in
libdd-trace-utils) echo 9 ;;
libdd-sampling) echo 6 ;;
libdd-ddsketch) echo 2 ;;
libdd-data-pipeline) echo 2 ;;
*) echo 1 ;;
esac
}

log() { echo "$@" >&2; }

# All workspace crates that declare a benchmark target, as a sorted JSON array.
bench_json="$(cargo metadata --no-deps --format-version 1 2>/dev/null \
| jq -c '[.packages[] | select(any(.targets[]?; .kind[]? == "bench")) | .name] | sort' 2>/dev/null \
|| echo "")"
all_bench="$(printf '%s' "${bench_json:-[]}" | jq -r '.[]?' 2>/dev/null | tr '\n' ' ')"

# Determine the full set of crates to benchmark (before sharding).
packages=""
if [ "${CI_COMMIT_BRANCH:-}" = "main" ]; then
log "main -> full benchmark suite."
packages="$all_bench"
elif [ "${IMPACTED_STATUS:-}" != "success" ] || [ -z "${AFFECTED_CRATES:-}" ] || [ -z "$all_bench" ]; then
log "Impacted crates undetermined -> full benchmark suite."
packages="$all_bench"
# TEMP (DO NOT MERGE): infra-change guard disabled so this branch scopes to the impacted crates
# (for testing) even though it modifies the benchmark infra itself. Restore before merging.
# elif grep -qE '^(benchmark/|\.gitlab/benchmarks\.yml|\.gitlab/impacted-crates\.yml)' changed_files.txt 2>/dev/null; then
# log "Benchmark infrastructure changed -> full benchmark suite."
# packages="$all_bench"
else
packages="$(jq -nr --argjson a "$AFFECTED_CRATES" --argjson b "$bench_json" \
'($a - ($a - $b)) | .[]' 2>/dev/null | tr '\n' ' ')"
if [ -z "$(printf '%s' "$packages" | tr -d '[:space:]')" ]; then
echo "SKIP"
exit 0
fi
log "Impacted benchmarked crates: $packages"
fi

# If the crate list could not be determined, fall back to a single full-workspace run
# on shard 1 only (so we don't run the whole workspace on every shard).
if [ -z "$(printf '%s' "$packages" | tr -d '[:space:]')" ]; then
if [ "$node_index" = "1" ]; then echo "FULL"; else echo "SKIP"; fi
exit 0
fi

# Shard assignment via longest-processing-time: process crates heaviest-first and give each to
# the currently-lightest shard. Deterministic (stable sort by weight desc, then name asc), so every
# shard computes the same assignment and just reads its own bucket.
weighted="$(for crate in $packages; do echo "$(crate_weight "$crate") $crate"; done | LC_ALL=C sort -k1,1nr -k2,2)"

idx=0
while [ "$idx" -lt "$node_total" ]; do
loads[$idx]=0
assigned[$idx]=""
idx=$(( idx + 1 ))
done

while read -r weight crate; do
[ -z "$crate" ] && continue
min_idx=0
min_load="${loads[0]}"
j=1
while [ "$j" -lt "$node_total" ]; do
if [ "${loads[$j]}" -lt "$min_load" ]; then
min_load="${loads[$j]}"
min_idx="$j"
fi
j=$(( j + 1 ))
done
loads[$min_idx]=$(( min_load + weight ))
assigned[$min_idx]="${assigned[$min_idx]} $crate"
done <<EOF
$weighted
EOF

out="$(printf '%s' "${assigned[$(( node_index - 1 ))]}" | sed 's/^ *//;s/ *$//')"

if [ -z "$out" ]; then
echo "SKIP"
else
echo "$out"
fi
2 changes: 2 additions & 0 deletions libdd-trace-utils/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/
// SPDX-License-Identifier: Apache-2.0

// TEMP (DO NOT MERGE): trivial change to exercise PR benchmark crate scoping.

#![cfg_attr(not(test), deny(clippy::panic))]
#![cfg_attr(not(test), deny(clippy::unwrap_used))]
#![cfg_attr(not(test), deny(clippy::expect_used))]
Expand Down
Loading