-
Notifications
You must be signed in to change notification settings - Fork 21
chore(ci): shard and only run perf benchmarks on impacted crates in PRs #2191
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| # Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| # Computes the set of crates impacted by a PR (the changed crates plus their transitive | ||
| # dependents) and publishes it for downstream test-stage jobs to consume. | ||
| # | ||
| # It reuses the crates-reporter tool that already backs GitHub CI (.github/actions/). That | ||
| # tool writes its results to the file named by $GITHUB_OUTPUT, so we point $GITHUB_OUTPUT at | ||
| # a temp file and parse it here -- no changes to the Rust code are required. | ||
| # | ||
| # Only the benchmarks job consumes this today; other test jobs can depend on it later. | ||
|
|
||
| variables: | ||
| BASE_CI_IMAGE: registry.ddbuild.io/ci/benchmarking-platform:libdatadog-benchmarks | ||
|
|
||
| compute_impacted_crates: | ||
| stage: test | ||
| tags: ["arch:amd64"] | ||
| image: | ||
| name: $BASE_CI_IMAGE | ||
| needs: [] | ||
| rules: | ||
| # main runs the full suite, so nothing to compute there; downstream jobs mark the dependency | ||
| # optional. Same for merge-queue branches (benchmarks is skipped there) and scheduled runs. | ||
| - if: '$CI_COMMIT_BRANCH == "main"' | ||
| when: never | ||
| - if: '$CI_COMMIT_BRANCH =~ /^mq-working-branch-/' | ||
| when: never | ||
| - if: '$CI_PIPELINE_SOURCE == "schedule"' | ||
| when: never | ||
| - when: on_success | ||
| variables: | ||
| # Full history so the three-dot merge-base diff against origin/main resolves. | ||
| GIT_DEPTH: 0 | ||
| # A failure leaves the outputs absent; downstream jobs then fall back to running everything. | ||
| allow_failure: true | ||
| script: | ||
| - git fetch --no-tags origin main | ||
| - (cd .github/actions && cargo build --release -p crates-reporter) | ||
| - export GITHUB_OUTPUT="$(mktemp)" | ||
| - ./.github/actions/target/release/crates-reporter main || echo "status=skipped" >> "$GITHUB_OUTPUT" | ||
| - | | ||
| # Surface the crates-reporter outputs as dotenv variables for downstream jobs. | ||
| AFFECTED_CRATES=$(sed -n 's/^affected_crates=//p' "$GITHUB_OUTPUT" | tail -n1) | ||
| IMPACTED_STATUS=$(sed -n 's/^status=//p' "$GITHUB_OUTPUT" | tail -n1) | ||
| : "${AFFECTED_CRATES:=[]}" | ||
| : "${IMPACTED_STATUS:=skipped}" | ||
| printf 'AFFECTED_CRATES=%s\nIMPACTED_STATUS=%s\n' "$AFFECTED_CRATES" "$IMPACTED_STATUS" | tee impacted-crates.env | ||
| # Record the changed files so downstream jobs can make path-based decisions. | ||
| git diff --name-only origin/main...HEAD > changed_files.txt || : > changed_files.txt | ||
| artifacts: | ||
| reports: | ||
| dotenv: impacted-crates.env | ||
| paths: | ||
| - changed_files.txt | ||
| expire_in: 1 week | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,9 +20,41 @@ OUTPUT_DIR="${1:-}" | |
|
|
||
| pushd "${PROJECT_DIR}" > /dev/null | ||
|
|
||
| # Some bench targets need crate-specific features enabled; when scoping the run to a subset of | ||
| # crates we must pass only the features for the selected crates (cargo errors on --features for a | ||
| # crate that isn't part of the selection). | ||
| bench_features_for_crate() { | ||
| case "$1" in | ||
| libdd-crashtracker) echo "libdd-crashtracker/benchmarking" ;; | ||
| libdd-sampling) echo "libdd-sampling/v04_span libdd-sampling/bench-internals" ;; | ||
| libdd-trace-utils) echo "libdd-trace-utils/bench-internals" ;; | ||
| *) echo "" ;; | ||
| esac | ||
| } | ||
|
|
||
| # Run benchmarks | ||
| message "Running benchmarks" | ||
| cargo bench --workspace --features libdd-crashtracker/benchmarking,libdd-sampling/v04_span,libdd-sampling/bench-internals,libdd-trace-utils/bench-internals -- --warm-up-time 1 --measurement-time 5 --sample-size=200 | ||
| # BENCH_PACKAGES (optional, space-separated crate names) scopes the run to specific crates -- set by | ||
| # the GitLab benchmarks job so a PR only benchmarks the crates it impacts. When empty (e.g. on main) | ||
| # the full workspace is benchmarked. | ||
| if [[ -n "${BENCH_PACKAGES:-}" ]]; then | ||
| package_args=() | ||
| features=() | ||
| for crate in ${BENCH_PACKAGES}; do | ||
| package_args+=(-p "${crate}") | ||
| for feature in $(bench_features_for_crate "${crate}"); do | ||
| features+=("${feature}") | ||
| done | ||
| done | ||
| feature_args=() | ||
| if (( ${#features[@]} > 0 )); then | ||
| feature_args=(--features "$(IFS=,; echo "${features[*]}")") | ||
| fi | ||
| message "Benchmarking selected crates: ${BENCH_PACKAGES}" | ||
| cargo bench "${package_args[@]}" "${feature_args[@]}" -- --warm-up-time 1 --measurement-time 5 --sample-size=200 | ||
| else | ||
| cargo bench --workspace --features libdd-crashtracker/benchmarking,libdd-sampling/v04_span,libdd-sampling/bench-internals,libdd-trace-utils/bench-internals -- --warm-up-time 1 --measurement-time 5 --sample-size=200 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: this is probably simpler with this one liner, but I wonder if we shouldn't generate a |
||
| fi | ||
| message "Finished running benchmarks" | ||
|
|
||
| # Copy the benchmark results to the output directory | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,113 @@ | ||
| #!/usr/bin/env bash | ||
|
|
||
| # Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| # Decides which benchmark crates THIS shard should run, and prints exactly one of: | ||
| # SKIP - this shard has nothing to run (the job should exit 0) | ||
| # FULL - run the whole workspace (run_benchmarks_ci.sh uses --workspace) | ||
| # <crate ...> - space-separated crates this shard should benchmark | ||
| # | ||
| # Inputs (from the compute_impacted_crates job, consumed via the benchmarks job env): | ||
| # $CI_COMMIT_BRANCH, $IMPACTED_STATUS, $AFFECTED_CRATES (JSON array), and | ||
| # ./changed_files.txt in the current directory. Crates with benchmarks are discovered | ||
| # from `cargo metadata`. | ||
| # | ||
| # Usage: select_bench_packages.sh <node_index_1based> <node_total> | ||
| # | ||
| # Only diagnostics go to stderr; stdout is solely the decision token above. | ||
| set -eu | ||
|
|
||
| node_index="${1:-1}" | ||
| node_total="${2:-1}" | ||
|
|
||
| # Approximate per-crate benchmark cost (~minutes of candidate wall-time) used to balance the shards. | ||
| # Measured 2026-07-02 from a full run (all 11 benchmarked crates); only relative magnitudes matter. | ||
| # The seven crates that fall through to the default are all <1 min (normalization ~0.7, profiling | ||
| # ~0.6, ffe ~0.4, ipc ~0.2, trace-stats ~0.1, crashtracker ~0.1, trace-obfuscation ~1.4), so they | ||
| # act as interchangeable filler. Retune as benchmarks are added/removed; unknown crates default to 1. | ||
| crate_weight() { | ||
| case "$1" in | ||
| libdd-trace-utils) echo 9 ;; | ||
| libdd-sampling) echo 6 ;; | ||
| libdd-ddsketch) echo 2 ;; | ||
| libdd-data-pipeline) echo 2 ;; | ||
| *) echo 1 ;; | ||
| esac | ||
| } | ||
|
|
||
| log() { echo "$@" >&2; } | ||
|
|
||
| # All workspace crates that declare a benchmark target, as a sorted JSON array. | ||
| bench_json="$(cargo metadata --no-deps --format-version 1 2>/dev/null \ | ||
| | jq -c '[.packages[] | select(any(.targets[]?; .kind[]? == "bench")) | .name] | sort' 2>/dev/null \ | ||
| || echo "")" | ||
| all_bench="$(printf '%s' "${bench_json:-[]}" | jq -r '.[]?' 2>/dev/null | tr '\n' ' ')" | ||
|
|
||
| # Determine the full set of crates to benchmark (before sharding). | ||
| packages="" | ||
| if [ "${CI_COMMIT_BRANCH:-}" = "main" ]; then | ||
| log "main -> full benchmark suite." | ||
| packages="$all_bench" | ||
| elif [ "${IMPACTED_STATUS:-}" != "success" ] || [ -z "${AFFECTED_CRATES:-}" ] || [ -z "$all_bench" ]; then | ||
| log "Impacted crates undetermined -> full benchmark suite." | ||
| packages="$all_bench" | ||
| # TEMP (DO NOT MERGE): infra-change guard disabled so this branch scopes to the impacted crates | ||
| # (for testing) even though it modifies the benchmark infra itself. Restore before merging. | ||
| # elif grep -qE '^(benchmark/|\.gitlab/benchmarks\.yml|\.gitlab/impacted-crates\.yml)' changed_files.txt 2>/dev/null; then | ||
| # log "Benchmark infrastructure changed -> full benchmark suite." | ||
| # packages="$all_bench" | ||
| else | ||
| packages="$(jq -nr --argjson a "$AFFECTED_CRATES" --argjson b "$bench_json" \ | ||
| '($a - ($a - $b)) | .[]' 2>/dev/null | tr '\n' ' ')" | ||
| if [ -z "$(printf '%s' "$packages" | tr -d '[:space:]')" ]; then | ||
| echo "SKIP" | ||
| exit 0 | ||
| fi | ||
| log "Impacted benchmarked crates: $packages" | ||
| fi | ||
|
|
||
| # If the crate list could not be determined, fall back to a single full-workspace run | ||
| # on shard 1 only (so we don't run the whole workspace on every shard). | ||
| if [ -z "$(printf '%s' "$packages" | tr -d '[:space:]')" ]; then | ||
| if [ "$node_index" = "1" ]; then echo "FULL"; else echo "SKIP"; fi | ||
| exit 0 | ||
| fi | ||
|
|
||
| # Shard assignment via longest-processing-time: process crates heaviest-first and give each to | ||
| # the currently-lightest shard. Deterministic (stable sort by weight desc, then name asc), so every | ||
| # shard computes the same assignment and just reads its own bucket. | ||
| weighted="$(for crate in $packages; do echo "$(crate_weight "$crate") $crate"; done | LC_ALL=C sort -k1,1nr -k2,2)" | ||
|
|
||
| idx=0 | ||
| while [ "$idx" -lt "$node_total" ]; do | ||
| loads[$idx]=0 | ||
| assigned[$idx]="" | ||
| idx=$(( idx + 1 )) | ||
| done | ||
|
|
||
| while read -r weight crate; do | ||
| [ -z "$crate" ] && continue | ||
| min_idx=0 | ||
| min_load="${loads[0]}" | ||
| j=1 | ||
| while [ "$j" -lt "$node_total" ]; do | ||
| if [ "${loads[$j]}" -lt "$min_load" ]; then | ||
| min_load="${loads[$j]}" | ||
| min_idx="$j" | ||
| fi | ||
| j=$(( j + 1 )) | ||
| done | ||
| loads[$min_idx]=$(( min_load + weight )) | ||
| assigned[$min_idx]="${assigned[$min_idx]} $crate" | ||
| done <<EOF | ||
| $weighted | ||
| EOF | ||
|
|
||
| out="$(printf '%s' "${assigned[$(( node_index - 1 ))]}" | sed 's/^ *//;s/ *$//')" | ||
|
|
||
| if [ -z "$out" ]; then | ||
| echo "SKIP" | ||
| else | ||
| echo "$out" | ||
| fi |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's more of a nitpick / future work, but would there be a way to distribute a binary of crates-reporter instead? I suppose it doesn't change often. But sometimes getting a binary in a job is just so annoying that maybe building from source is simpler 🤷