diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 4836f91915..7d901c41d5 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -18,6 +18,7 @@ CLAUDE.md                            @DataDog/apm-common-components-core
 .gitlab-ci.yml                       @DataDog/apm-common-components-core
 .gitlab/benchmarks.yml               @DataDog/apm-common-components-core
 .gitlab/fuzz.yml                     @DataDog/chaos-platform
+.gitlab/impacted-crates.yml          @DataDog/apm-common-components-core
 benchmark/                           @DataDog/apm-common-components-core
 bin_tests/                           @DataDog/libdatadog-profiling
 build-common/                        @DataDog/apm-common-components-core
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 255c62d7c6..02992f7d0f 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -6,6 +6,7 @@ variables:
     description: "downstream jobs are triggered on this branch"
 
 include:
+  - local: .gitlab/impacted-crates.yml
   - local: .gitlab/benchmarks.yml
   - local: .gitlab/fuzz.yml
 
diff --git a/.gitlab/benchmarks.yml b/.gitlab/benchmarks.yml
index 19975eda7e..144a30ef3e 100644
--- a/.gitlab/benchmarks.yml
+++ b/.gitlab/benchmarks.yml
@@ -3,9 +3,17 @@ variables:
   # The Dockerfile to this image is located at:
   # https://github.com/DataDog/benchmarking-platform/tree/libdatadog/benchmarks
 
+# The benchmark suite is sharded across parallel jobs to reduce wall-clock time. Each shard runs
+# both candidate and baseline for its assigned crates on the same runner (so the comparison stays
+# noise-controlled), then benchmarks_combine merges the shards' results into a single PR comment.
 benchmarks:
+  parallel: 2
   tags: ["runner:apm-k8s-tweaked-metal"]
-  needs: []
+  needs:
+    # Not created on main/release/hotfix (full suite) -- hence optional.
+    - job: compute_impacted_crates
+      artifacts: true
+      optional: true
   image:
     name: $BASE_CI_IMAGE
   rules:
@@ -17,6 +25,26 @@ benchmarks:
     - interruptible: true
   timeout: 80m
   script:
+    # Decide which crates THIS shard benchmarks. Runs first, while $CI_PROJECT_DIR is the libdatadog
+    # checkout (so ./benchmark/... and the compute_impacted_crates artifacts are available). The
+    # helper prints SKIP (nothing to do), FULL (whole workspace), or a space-separated crate list;
+    # BENCH_PACKAGES is consumed by benchmark/run_benchmarks_ci.sh.
+    - |
+      DECISION="$(./benchmark/select_bench_packages.sh "${CI_NODE_INDEX:-1}" "${CI_NODE_TOTAL:-1}")"
+      case "$DECISION" in
+        SKIP)
+          echo "Shard ${CI_NODE_INDEX:-1}/${CI_NODE_TOTAL:-1}: nothing to benchmark."
+          exit 0
+          ;;
+        FULL)
+          echo "Shard ${CI_NODE_INDEX:-1}/${CI_NODE_TOTAL:-1}: full-workspace benchmark run."
+          export BENCH_PACKAGES=""
+          ;;
+        *)
+          export BENCH_PACKAGES="$DECISION"
+          echo "Shard ${CI_NODE_INDEX:-1}/${CI_NODE_TOTAL:-1} crates: $BENCH_PACKAGES"
+          ;;
+      esac
     - export ARTIFACTS_DIR="$(pwd)/reports" && (mkdir "${ARTIFACTS_DIR}" || :)
     - git clone --branch libdatadog/benchmarks https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/benchmarking-platform /platform && cd /platform
     - ./steps/capture-hardware-software-info.sh
@@ -47,11 +75,12 @@ benchmarks:
     - ./steps/analyze-results.sh
     - "./steps/upload-results-to-s3.sh || :"
     - "./steps/upload-results-to-benchmarking-api.sh || :"
-    - "./steps/post-pr-comment.sh || :"
+    # Hand this shard's reports to benchmarks_combine, which posts the single PR comment.
+    - mv "${ARTIFACTS_DIR}" "${CI_PROJECT_DIR}/reports-${CI_NODE_INDEX:-1}"
   artifacts:
-    name: "reports"
+    name: "reports-${CI_NODE_INDEX}"
     paths:
-      - reports/
+      - reports-*/
     expire_in: 3 months
   variables:
     UPSTREAM_PROJECT_ID: $CI_PROJECT_ID # The ID of the current project. This ID is unique across all projects on the GitLab instance.
@@ -62,3 +91,50 @@ benchmarks:
 
     KUBERNETES_SERVICE_ACCOUNT_OVERWRITE: libdatadog
     FF_USE_LEGACY_KUBERNETES_EXECUTION_STRATEGY: "true"
+
+# Merges the parallel benchmark shards and posts a single PR comment. Each shard already uploaded its
+# own results to S3 / the Benchmarking API with correct per-run metadata; this job only assembles the
+# combined comment (pr-commenter uses --on-duplicate=replace, so exactly one job may comment).
+benchmarks_combine:
+  tags: ["arch:amd64"]
+  needs:
+    - job: benchmarks
+      artifacts: true
+      optional: true
+  image:
+    name: $BASE_CI_IMAGE
+  rules:
+    - if: '$CI_COMMIT_BRANCH =~ /^mq-working-branch-/'
+      when: never
+    - if: $CI_COMMIT_BRANCH == "main"
+      interruptible: false
+    - interruptible: true
+  script:
+    - |
+      if ! ls -d "${CI_PROJECT_DIR}"/reports-*/ >/dev/null 2>&1; then
+        echo "No shard reports produced -> nothing to combine."
+        exit 0
+      fi
+    - git clone --branch libdatadog/benchmarks https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/benchmarking-platform /platform
+    - |
+      # Assemble the files post-pr-comment.sh reads by concatenating each shard's markdown.
+      export ARTIFACTS_DIR="${CI_PROJECT_DIR}/reports"
+      mkdir -p "${ARTIFACTS_DIR}/candidate" "${ARTIFACTS_DIR}/baseline"
+      cat "${CI_PROJECT_DIR}"/reports-*/comparison-baseline-vs-candidate.md > "${ARTIFACTS_DIR}/comparison-baseline-vs-candidate.md" 2>/dev/null || :
+      cat "${CI_PROJECT_DIR}"/reports-*/candidate/analysis-candidate.md     > "${ARTIFACTS_DIR}/candidate/analysis-candidate.md"     2>/dev/null || :
+      cat "${CI_PROJECT_DIR}"/reports-*/baseline/analysis-baseline.md       > "${ARTIFACTS_DIR}/baseline/analysis-baseline.md"       2>/dev/null || :
+      (cd /platform && ./steps/post-pr-comment.sh) || :
+  artifacts:
+    name: "reports-combined"
+    paths:
+      - reports/
+    expire_in: 3 months
+  variables:
+    UPSTREAM_PROJECT_ID: $CI_PROJECT_ID
+    UPSTREAM_PROJECT_NAME: $CI_PROJECT_NAME # libdatadog
+    UPSTREAM_BRANCH: $CI_COMMIT_REF_NAME
+    UPSTREAM_COMMIT_SHA: $CI_COMMIT_SHA
+    UPSTREAM_REPO_URL: "https://github.com/DataDog/libdatadog"
+
+    KUBERNETES_SERVICE_ACCOUNT_OVERWRITE: libdatadog
+    FF_USE_LEGACY_KUBERNETES_EXECUTION_STRATEGY: "true"
diff --git a/.gitlab/impacted-crates.yml b/.gitlab/impacted-crates.yml
new file mode 100644
index 0000000000..583e9c2b52
--- /dev/null
+++ b/.gitlab/impacted-crates.yml
@@ -0,0 +1,56 @@
+# Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
+# SPDX-License-Identifier: Apache-2.0
+
+# Computes the set of crates impacted by a PR (the changed crates plus their transitive
+# dependents) and publishes it for downstream test-stage jobs to consume.
+#
+# It reuses the crates-reporter tool that already backs GitHub CI (.github/actions/). That
+# tool writes its results to the file named by $GITHUB_OUTPUT, so we point $GITHUB_OUTPUT at
+# a temp file and parse it here -- no changes to the Rust code are required.
+#
+# Only the benchmarks job consumes this today; other test jobs can depend on it later.
+
+variables:
+  BASE_CI_IMAGE: registry.ddbuild.io/ci/benchmarking-platform:libdatadog-benchmarks
+
+compute_impacted_crates:
+  stage: test
+  tags: ["arch:amd64"]
+  image:
+    name: $BASE_CI_IMAGE
+  needs: []
+  rules:
+    # main runs the full suite, so nothing to compute there; downstream jobs mark the dependency
+    # optional. Same for merge-queue branches (benchmarks is skipped there) and scheduled runs.
+    - if: '$CI_COMMIT_BRANCH == "main"'
+      when: never
+    - if: '$CI_COMMIT_BRANCH =~ /^mq-working-branch-/'
+      when: never
+    - if: '$CI_PIPELINE_SOURCE == "schedule"'
+      when: never
+    - when: on_success
+  variables:
+    # Full history so the three-dot merge-base diff against origin/main resolves.
+    GIT_DEPTH: 0
+  # A failure leaves the outputs absent; downstream jobs then fall back to running everything.
+  allow_failure: true
+  script:
+    - git fetch --no-tags origin main
+    - (cd .github/actions && cargo build --release -p crates-reporter)
+    - export GITHUB_OUTPUT="$(mktemp)"
+    - ./.github/actions/target/release/crates-reporter main || echo "status=skipped" >> "$GITHUB_OUTPUT"
+    - |
+      # Surface the crates-reporter outputs as dotenv variables for downstream jobs.
+      AFFECTED_CRATES=$(sed -n 's/^affected_crates=//p' "$GITHUB_OUTPUT" | tail -n1)
+      IMPACTED_STATUS=$(sed -n 's/^status=//p' "$GITHUB_OUTPUT" | tail -n1)
+      : "${AFFECTED_CRATES:=[]}"
+      : "${IMPACTED_STATUS:=skipped}"
+      printf 'AFFECTED_CRATES=%s\nIMPACTED_STATUS=%s\n' "$AFFECTED_CRATES" "$IMPACTED_STATUS" | tee impacted-crates.env
+      # Record the changed files so downstream jobs can make path-based decisions.
+      git diff --name-only origin/main...HEAD > changed_files.txt || : > changed_files.txt
+  artifacts:
+    reports:
+      dotenv: impacted-crates.env
+    paths:
+      - changed_files.txt
+    expire_in: 1 week
diff --git a/benchmark/run_benchmarks_ci.sh b/benchmark/run_benchmarks_ci.sh
index 1bcce8009f..f3c4dce8cb 100755
--- a/benchmark/run_benchmarks_ci.sh
+++ b/benchmark/run_benchmarks_ci.sh
@@ -20,9 +20,41 @@ OUTPUT_DIR="${1:-}"
 
 pushd "${PROJECT_DIR}" > /dev/null
 
+# Some bench targets need crate-specific features enabled; when scoping the run to a subset of
+# crates we must pass only the features for the selected crates (cargo errors on --features for a
+# crate that isn't part of the selection).
+bench_features_for_crate() {
+  case "$1" in
+    libdd-crashtracker) echo "libdd-crashtracker/benchmarking" ;;
+    libdd-sampling) echo "libdd-sampling/v04_span libdd-sampling/bench-internals" ;;
+    libdd-trace-utils) echo "libdd-trace-utils/bench-internals" ;;
+    *) echo "" ;;
+  esac
+}
+
 # Run benchmarks
 message "Running benchmarks"
-cargo bench --workspace --features libdd-crashtracker/benchmarking,libdd-sampling/v04_span,libdd-sampling/bench-internals,libdd-trace-utils/bench-internals -- --warm-up-time 1 --measurement-time 5 --sample-size=200
+# BENCH_PACKAGES (optional, space-separated crate names) scopes the run to specific crates -- set by
+# the GitLab benchmarks job so a PR only benchmarks the crates it impacts. When empty (e.g. on main)
+# the full workspace is benchmarked.
+if [[ -n "${BENCH_PACKAGES:-}" ]]; then
+  package_args=()
+  features=()
+  for crate in ${BENCH_PACKAGES}; do
+    package_args+=(-p "${crate}")
+    for feature in $(bench_features_for_crate "${crate}"); do
+      features+=("${feature}")
+    done
+  done
+  feature_args=()
+  if (( ${#features[@]} > 0 )); then
+    feature_args=(--features "$(IFS=,; echo "${features[*]}")")
+  fi
+  message "Benchmarking selected crates: ${BENCH_PACKAGES}"
+  cargo bench "${package_args[@]}" "${feature_args[@]}" -- --warm-up-time 1 --measurement-time 5 --sample-size=200
+else
+  cargo bench --workspace --features libdd-crashtracker/benchmarking,libdd-sampling/v04_span,libdd-sampling/bench-internals,libdd-trace-utils/bench-internals -- --warm-up-time 1 --measurement-time 5 --sample-size=200
+fi
 message "Finished running benchmarks"
 
 # Copy the benchmark results to the output directory
diff --git a/benchmark/select_bench_packages.sh b/benchmark/select_bench_packages.sh
new file mode 100755
index 0000000000..0add16bc98
--- /dev/null
+++ b/benchmark/select_bench_packages.sh
@@ -0,0 +1,113 @@
+#!/usr/bin/env bash
+
+# Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
+# SPDX-License-Identifier: Apache-2.0
+
+# Decides which benchmark crates THIS shard should run, and prints exactly one of:
+#   SKIP        - this shard has nothing to run (the job should exit 0)
+#   FULL        - run the whole workspace (run_benchmarks_ci.sh uses --workspace)
+#   <crate ...> - space-separated crates this shard should benchmark
+#
+# Inputs (from the compute_impacted_crates job, consumed via the benchmarks job env):
+#   $CI_COMMIT_BRANCH, $IMPACTED_STATUS, $AFFECTED_CRATES (JSON array), and
+#   ./changed_files.txt in the current directory. Crates with benchmarks are discovered
+#   from `cargo metadata`.
+#
+# Usage: select_bench_packages.sh <node_index_1based> <node_total>
+#
+# Only diagnostics go to stderr; stdout is solely the decision token above.
+set -eu
+
+node_index="${1:-1}"
+node_total="${2:-1}"
+
+# Approximate per-crate benchmark cost (~minutes of candidate wall-time) used to balance the shards.
+# Measured 2026-07-02 from a full run (all 11 benchmarked crates); only relative magnitudes matter.
+# The seven crates that fall through to the default are all <1 min (normalization ~0.7, profiling
+# ~0.6, ffe ~0.4, ipc ~0.2, trace-stats ~0.1, crashtracker ~0.1, trace-obfuscation ~1.4), so they
+# act as interchangeable filler. Retune as benchmarks are added/removed; unknown crates default to 1.
+crate_weight() {
+  case "$1" in
+    libdd-trace-utils) echo 9 ;;
+    libdd-sampling) echo 6 ;;
+    libdd-ddsketch) echo 2 ;;
+    libdd-data-pipeline) echo 2 ;;
+    *) echo 1 ;;
+  esac
+}
+
+log() { echo "$@" >&2; }
+
+# All workspace crates that declare a benchmark target, as a sorted JSON array.
+bench_json="$(cargo metadata --no-deps --format-version 1 2>/dev/null \
+  | jq -c '[.packages[] | select(any(.targets[]?; .kind[]? == "bench")) | .name] | sort' 2>/dev/null \
+  || echo "")"
+all_bench="$(printf '%s' "${bench_json:-[]}" | jq -r '.[]?' 2>/dev/null | tr '\n' ' ')"
+
+# Determine the full set of crates to benchmark (before sharding).
+packages=""
+if [ "${CI_COMMIT_BRANCH:-}" = "main" ]; then
+  log "main -> full benchmark suite."
+  packages="$all_bench"
+elif [ "${IMPACTED_STATUS:-}" != "success" ] || [ -z "${AFFECTED_CRATES:-}" ] || [ -z "$all_bench" ]; then
+  log "Impacted crates undetermined -> full benchmark suite."
+  packages="$all_bench"
+# TEMP (DO NOT MERGE): infra-change guard disabled so this branch scopes to the impacted crates
+# (for testing) even though it modifies the benchmark infra itself. Restore before merging.
+# elif grep -qE '^(benchmark/|\.gitlab/benchmarks\.yml|\.gitlab/impacted-crates\.yml)' changed_files.txt 2>/dev/null; then
+#   log "Benchmark infrastructure changed -> full benchmark suite."
+#   packages="$all_bench"
+else
+  packages="$(jq -nr --argjson a "$AFFECTED_CRATES" --argjson b "$bench_json" \
+    '($a - ($a - $b)) | .[]' 2>/dev/null | tr '\n' ' ')"
+  if [ -z "$(printf '%s' "$packages" | tr -d '[:space:]')" ]; then
+    echo "SKIP"
+    exit 0
+  fi
+  log "Impacted benchmarked crates: $packages"
+fi
+
+# If the crate list could not be determined, fall back to a single full-workspace run
+# on shard 1 only (so we don't run the whole workspace on every shard).
+if [ -z "$(printf '%s' "$packages" | tr -d '[:space:]')" ]; then
+  if [ "$node_index" = "1" ]; then echo "FULL"; else echo "SKIP"; fi
+  exit 0
+fi
+
+# Shard assignment via longest-processing-time: process crates heaviest-first and give each to
+# the currently-lightest shard. Deterministic (stable sort by weight desc, then name asc), so every
+# shard computes the same assignment and just reads its own bucket.
+weighted="$(for crate in $packages; do echo "$(crate_weight "$crate") $crate"; done | LC_ALL=C sort -k1,1nr -k2,2)"
+
+idx=0
+while [ "$idx" -lt "$node_total" ]; do
+  loads[$idx]=0
+  assigned[$idx]=""
+  idx=$(( idx + 1 ))
+done
+
+while read -r weight crate; do
+  [ -z "$crate" ] && continue
+  min_idx=0
+  min_load="${loads[0]}"
+  j=1
+  while [ "$j" -lt "$node_total" ]; do
+    if [ "${loads[$j]}" -lt "$min_load" ]; then
+      min_load="${loads[$j]}"
+      min_idx="$j"
+    fi
+    j=$(( j + 1 ))
+  done
+  loads[$min_idx]=$(( min_load + weight ))
+  assigned[$min_idx]="${assigned[$min_idx]} $crate"
+done <<EOF
+$weighted
+EOF
+
+out="$(printf '%s' "${assigned[$(( node_index - 1 ))]}" | sed 's/^ *//;s/ *$//')"
+
+if [ -z "$out" ]; then
+  echo "SKIP"
+else
+  echo "$out"
+fi
diff --git a/libdd-trace-utils/src/lib.rs b/libdd-trace-utils/src/lib.rs
index 11d229eebf..34f087cf88 100644
--- a/libdd-trace-utils/src/lib.rs
+++ b/libdd-trace-utils/src/lib.rs
@@ -1,6 +1,8 @@
 // Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/
 // SPDX-License-Identifier: Apache-2.0
 
+// TEMP (DO NOT MERGE): trivial change to exercise PR benchmark crate scoping.
+
 #![cfg_attr(not(test), deny(clippy::panic))]
 #![cfg_attr(not(test), deny(clippy::unwrap_used))]
 #![cfg_attr(not(test), deny(clippy::expect_used))]