diff --git a/.github/workflows/_mirror-image.yml b/.github/workflows/_mirror-image.yml index a43cdcd..2f19b82 100644 --- a/.github/workflows/_mirror-image.yml +++ b/.github/workflows/_mirror-image.yml @@ -38,6 +38,11 @@ on: required: false default: "" type: string + copy_referrers: + description: "Also copy OCI referrer artifacts (SBOMs, provenance, VEX, signatures) attached to the image. Uses oras for the whole copy. Works for any image that has referrers." + required: false + default: false + type: boolean secrets: source_registry_username: description: "Username for source_login_registry. Required only when source_login_registry is set." @@ -56,6 +61,14 @@ jobs: - name: Set up crane uses: imjasonh/setup-crane@31b88efe9de28ae0ffa220711af4b60be9435f6e # v0.4 + - name: Set up oras + # Only needed when copying referrers; the default crane-only path keeps + # existing anonymous mirrors lightweight. + if: ${{ inputs.copy_referrers }} + uses: oras-project/setup-oras@38de303aac69abb66f3e6255b7198bff35f323e3 # v2.0.0 + with: + version: 1.3.1 + - name: Log in to source registry # Only runs for authenticated sources (e.g. Docker Hardened Images on # dhi.io). Anonymous public sources leave source_login_registry empty @@ -65,6 +78,7 @@ jobs: SOURCE_LOGIN_REGISTRY: "${{ inputs.source_login_registry }}" SOURCE_REGISTRY_USERNAME: "${{ secrets.source_registry_username }}" SOURCE_REGISTRY_PASSWORD: "${{ secrets.source_registry_password }}" + COPY_REFERRERS: "${{ inputs.copy_referrers }}" run: | set -euo pipefail if [ -z "${SOURCE_REGISTRY_USERNAME}" ] || [ -z "${SOURCE_REGISTRY_PASSWORD}" ]; then @@ -73,17 +87,31 @@ jobs: fi echo "${SOURCE_REGISTRY_PASSWORD}" | crane auth login "${SOURCE_LOGIN_REGISTRY}" \ --username "${SOURCE_REGISTRY_USERNAME}" --password-stdin + if [ "${COPY_REFERRERS}" = "true" ]; then + echo "${SOURCE_REGISTRY_PASSWORD}" | oras login "${SOURCE_LOGIN_REGISTRY}" \ + --username "${SOURCE_REGISTRY_USERNAME}" --password-stdin + fi - name: Log in to GHCR + env: + COPY_REFERRERS: "${{ inputs.copy_referrers }}" run: | + set -euo pipefail echo "${{ github.token }}" | crane auth login ghcr.io \ --username "${{ github.actor }}" --password-stdin + if [ "${COPY_REFERRERS}" = "true" ]; then + echo "${{ github.token }}" | oras login ghcr.io \ + --username "${{ github.actor }}" --password-stdin + fi - name: Compare digests and copy if changed env: + SOURCE_IMAGE: "${{ inputs.source_image }}" + DEST_IMAGE: "${{ inputs.dest_image }}" SOURCE_REF: "${{ inputs.source_image }}:${{ inputs.source_tag }}" DEST_REF: "${{ inputs.dest_image }}:${{ inputs.dest_tag }}" FORCE: "${{ inputs.force }}" + COPY_REFERRERS: "${{ inputs.copy_referrers }}" run: | set -euo pipefail @@ -100,7 +128,12 @@ jobs: echo "Destination digest: " fi - if [ "${FORCE}" != "true" ] && [ "${source_digest}" = "${dest_digest}" ]; then + # The digest short-circuit is intentionally skipped when copying + # referrers: OCI referrers (SBOM/VEX/provenance/signatures) can change + # independently of the subject manifest digest, so a matching image + # digest does not guarantee the referrers are in sync. In that mode we + # always re-run the referrer-aware copy (oras cp is itself idempotent). + if [ "${FORCE}" != "true" ] && [ "${COPY_REFERRERS}" != "true" ] && [ "${source_digest}" = "${dest_digest}" ]; then echo "Image is already up to date; nothing to copy." { echo "### Mirror image: up to date :white_check_mark:" @@ -114,11 +147,62 @@ jobs: if [ "${FORCE}" = "true" ]; then echo "Force enabled; copying regardless of digest match." + elif [ "${COPY_REFERRERS}" = "true" ] && [ "${source_digest}" = "${dest_digest}" ]; then + echo "Image digest matches, but copy_referrers is enabled; re-syncing image and referrers." else echo "Digests differ; copying updated image." fi - crane copy "${SOURCE_REF}" "${DEST_REF}" + referrers_note="" + if [ "${COPY_REFERRERS}" = "true" ]; then + # Some registries (notably dhi.io) intermittently return transient + # "not found"/5xx errors while oras fans out the many blob requests + # an `oras cp -r` makes. Retry the copy a few times with backoff so a + # single flaky response does not fail the whole mirror. + oras_cp_retry() { + local attempt=1 max=4 delay=5 + while true; do + if oras cp -r "$1" "$2"; then + return 0 + fi + if [ "${attempt}" -ge "${max}" ]; then + echo "::error::oras cp -r '$1' -> '$2' failed after ${max} attempts." + return 1 + fi + echo "::warning::oras cp -r '$1' -> '$2' failed (attempt ${attempt}/${max}); retrying in ${delay}s." + sleep "${delay}" + attempt=$((attempt + 1)) + delay=$((delay * 2)) + done + } + + # Copy the image graph and all referrers with oras. `oras cp -r` + # copies the index, its child manifests/blobs, and the referrers of + # the index itself. + echo "Copying image and referrers with oras." + oras_cp_retry "${SOURCE_REF}" "${DEST_REF}" + + # Referrers attached to each per-platform child manifest (e.g. DHI + # cosign SBOM/provenance/VEX attestations whose subject is a platform + # digest) are not pulled by the index-level copy, so copy each child + # manifest and its referrers explicitly. Digests are preserved, so + # every referrer's subject link stays valid against the copied image. + child_digests="$(crane manifest "${SOURCE_REF}" | jq -r '.manifests[]?.digest // empty')" + child_count=0 + for d in ${child_digests}; do + [ -n "${d}" ] || continue + echo "Copying child manifest ${d} and its referrers." + oras_cp_retry "${SOURCE_IMAGE}@${d}" "${DEST_IMAGE}@${d}" + child_count=$((child_count + 1)) + done + + # Best-effort count of referrers now on the destination image. + ref_count="$(oras discover --format json "${DEST_REF}" 2>/dev/null \ + | jq '[.. | objects | select(has("artifactType")) | .artifactType] | length' 2>/dev/null || echo "")" + referrers_note="- **Referrers:** copied (child manifests processed: ${child_count}${ref_count:+, image-level referrers: ${ref_count}})" + else + crane copy "${SOURCE_REF}" "${DEST_REF}" + fi new_digest="$(crane digest "${DEST_REF}")" echo "Copied. New destination digest: ${new_digest}" @@ -129,4 +213,5 @@ jobs: echo "- **Destination:** \`${DEST_REF}\`" echo "- **Previous digest:** \`${dest_digest:-}\`" echo "- **New digest:** \`${new_digest}\`" + [ -n "${referrers_note}" ] && echo "${referrers_note}" } >> "${GITHUB_STEP_SUMMARY}" diff --git a/.github/workflows/_scan-sbom-image.yml b/.github/workflows/_scan-sbom-image.yml new file mode 100644 index 0000000..09febfd --- /dev/null +++ b/.github/workflows/_scan-sbom-image.yml @@ -0,0 +1,451 @@ +# Reusable workflow: scan a quarantined image via its SBOM attestation and +# promote clean images. +# +# This workflow is internal (note the leading underscore in the filename) and is +# not meant to be triggered directly. It is called by per-image "scan-*" +# workflows via `uses:`. +# +# Unlike _scan-image.yml (which runs `trivy image` against the image filesystem), +# this workflow is built for distroless / hardened images such as Docker +# Hardened Images (DHI), which contain no package-manager metadata. Instead it +# scans the **SBOM attestation** that the mirror copied alongside the image. +# +# For every image tag in `source_repo` it: +# 1. enumerates the platforms in the image index, +# 2. for each platform, locates the SBOM referrer (an in-toto attestation whose +# `in-toto.io/predicate-type` annotation matches `sbom_predicate_type`), +# pulls it from GHCR, and extracts the embedded SBOM predicate, +# 3. scans each platform SBOM with `trivy sbom`, +# 4. applies a severity threshold plus an optional CVE exception list across +# ALL platforms (blocks if any platform fails), +# 5. promotes passing images into `dest_repo` with `oras cp -r` (carrying the +# image and all its referrers — SBOMs, provenance, VEX, signatures), +# 6. attaches an empty OCI scan-report referrer with `oras attach`, and +# 7. deletes the promoted tag from quarantine (when a delete token is set). +# +# See docs/architecture/workflows/scan-and-promote-workflows.md. +name: _reusable / scan-sbom-image + +on: + workflow_call: + inputs: + source_repo: + description: "Quarantine repository to scan, without tag (e.g. ghcr.io/toddysm/quarantine/hardened/python)." + required: true + type: string + dest_repo: + description: "Promotion-target repository, without tag (e.g. ghcr.io/toddysm/base/hardened/python)." + required: true + type: string + severity_threshold: + description: "Blocking severity floor: LOW, MEDIUM, HIGH, or CRITICAL." + required: false + default: HIGH + type: string + cve_exceptions: + description: "Pipe-separated allow-list of CVE IDs (e.g. CVE-2024-1234|CVE-2024-5678)." + required: false + default: "" + type: string + sbom_predicate_type: + description: "in-toto predicate type of the SBOM attestation to scan (e.g. https://cyclonedx.org/bom/v1.6 or https://spdx.dev/Document)." + required: false + default: https://cyclonedx.org/bom/v1.6 + type: string + delete_source: + description: "Delete the tag from quarantine after a successful promotion." + required: false + default: true + type: boolean + trivy_version: + description: "Trivy version to install (e.g. v0.71.0); also recorded in the referrer." + required: false + default: v0.71.0 + type: string + dry_run: + description: "Scan and report only; never copy, attach, or delete." + required: false + default: false + type: boolean + secrets: + ghcr_delete_token: + description: "PAT with delete:packages used to remove quarantine tags. Optional; deletion is skipped when absent." + required: false + +jobs: + scan: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - name: Set up crane + uses: imjasonh/setup-crane@31b88efe9de28ae0ffa220711af4b60be9435f6e # v0.4 + + - name: Set up oras + uses: oras-project/setup-oras@38de303aac69abb66f3e6255b7198bff35f323e3 # v2.0.0 + with: + version: 1.3.1 + + - name: Set up Trivy + uses: aquasecurity/setup-trivy@3fb12ec12f41e471780db15c232d5dd185dcb514 # v0.2.6 + with: + version: ${{ inputs.trivy_version }} + + - name: Log in to GHCR + run: | + set -euo pipefail + echo "${{ github.token }}" | crane auth login ghcr.io \ + --username "${{ github.actor }}" --password-stdin + echo "${{ github.token }}" | oras login ghcr.io \ + --username "${{ github.actor }}" --password-stdin + + - name: Scan SBOMs, gate, promote, attest, and clean up + env: + SOURCE_REPO: "${{ inputs.source_repo }}" + DEST_REPO: "${{ inputs.dest_repo }}" + THRESHOLD: "${{ inputs.severity_threshold }}" + EXCEPTIONS: "${{ inputs.cve_exceptions }}" + SBOM_PREDICATE_TYPE: "${{ inputs.sbom_predicate_type }}" + DELETE_SOURCE: "${{ inputs.delete_source }}" + DRY_RUN: "${{ inputs.dry_run }}" + TRIVY_VERSION: "${{ inputs.trivy_version }}" + DELETE_TOKEN: "${{ secrets.ghcr_delete_token }}" + run: | + set -euo pipefail + export LC_ALL=C + + work="$(mktemp -d)" + + # --- Resolve the blocking severity set from the threshold floor. ------ + case "${THRESHOLD}" in + LOW) SEVERITIES="LOW,MEDIUM,HIGH,CRITICAL" ;; + MEDIUM) SEVERITIES="MEDIUM,HIGH,CRITICAL" ;; + HIGH) SEVERITIES="HIGH,CRITICAL" ;; + CRITICAL) SEVERITIES="CRITICAL" ;; + *) echo "::error::Invalid severity_threshold '${THRESHOLD}' (expected LOW|MEDIUM|HIGH|CRITICAL)"; exit 1 ;; + esac + + # --- Normalise the exception list into a sorted, deduped file. -------- + exceptions_file="${work}/exceptions.txt" + : > "${exceptions_file}" + if [ -n "${EXCEPTIONS}" ]; then + printf '%s\n' "${EXCEPTIONS}" | tr '|' '\n' | sed '/^[[:space:]]*$/d' \ + | tr -d '[:space:]' | sort -u > "${exceptions_file}" + fi + + # --- Resolve the actual Trivy version for the referrer annotation. ---- + trivy_resolved="$(trivy version -f json 2>/dev/null | jq -r '.Version // empty' || true)" + [ -n "${trivy_resolved}" ] || trivy_resolved="${TRIVY_VERSION#v}" + + # --- Work out the GHCR package coordinates for deletion. -------------- + owner="$(printf '%s' "${SOURCE_REPO}" | awk -F/ '{print $2}')" + pkg_path="$(printf '%s' "${SOURCE_REPO}" | cut -d/ -f3-)" + pkg_enc="$(printf '%s' "${pkg_path}" | sed 's:/:%2F:g')" + owner_type="" + if [ "${DELETE_SOURCE}" = "true" ] && [ -n "${DELETE_TOKEN}" ]; then + owner_type="$(GH_TOKEN="${DELETE_TOKEN}" gh api "users/${owner}" --jq '.type' 2>/dev/null || true)" + fi + + delete_quarantine_tag() { + local tag="$1" base vid + if [ "${owner_type}" = "Organization" ]; then + base="orgs/${owner}/packages/container/${pkg_enc}" + else + base="user/packages/container/${pkg_enc}" + fi + vid="$(GH_TOKEN="${DELETE_TOKEN}" gh api --paginate "${base}/versions" \ + --jq ".[] | select(.metadata.container.tags[]? == \"${tag}\") | .id" 2>/dev/null \ + | head -n1 || true)" + if [ -z "${vid}" ]; then + echo "::warning::Could not resolve a package version id for tag '${tag}'; skipping deletion." + return 1 + fi + GH_TOKEN="${DELETE_TOKEN}" gh api -X DELETE "${base}/versions/${vid}" >/dev/null + } + + # --- Retry an oras cp -r to absorb transient registry errors. -------- + # An `oras cp -r` fans out many blob requests; registries occasionally + # return a transient "not found"/5xx for one of them. Retry with + # backoff so a single flaky response does not fail promotion. + oras_cp_retry() { + local attempt=1 max=4 delay=5 + while true; do + if oras cp -r "$1" "$2"; then + return 0 + fi + if [ "${attempt}" -ge "${max}" ]; then + echo "::error::oras cp -r '$1' -> '$2' failed after ${max} attempts." + return 1 + fi + echo "::warning::oras cp -r '$1' -> '$2' failed (attempt ${attempt}/${max}); retrying in ${delay}s." + sleep "${delay}" + attempt=$((attempt + 1)) + delay=$((delay * 2)) + done + } + + # --- Copy an image and all its referrers from source to destination. -- + # Mirrors the oras-based copy used by _mirror-image.yml: copy the index + # plus its referrers, then each per-platform child manifest plus its + # own referrers (DHI attestations are attached per platform). + copy_with_referrers() { + local src_ref="$1" dst_ref="$2" + oras_cp_retry "${src_ref}" "${dst_ref}" + local child + for child in $(crane manifest "${src_ref}" | jq -r '.manifests[]?.digest // empty'); do + [ -n "${child}" ] || continue + oras_cp_retry "${SOURCE_REPO}@${child}" "${DEST_REPO}@${child}" + done + } + + # --- Extract the SBOM predicate for one platform manifest. ------------ + # Echoes the path to a written SBOM file on success; returns non-zero + # when no SBOM referrer of the requested predicate type is found. + extract_platform_sbom() { + local plat_digest="$1" out="$2" ref_digest layer_digest stmt + ref_digest="$(oras discover --format json "${SOURCE_REPO}@${plat_digest}" 2>/dev/null \ + | jq -r --arg PT "${SBOM_PREDICATE_TYPE}" ' + [.. | objects | select(.artifactType? == "application/vnd.in-toto+json")] + | map(select(.annotations["in-toto.io/predicate-type"] == $PT)) + | .[0].digest // empty')" + [ -n "${ref_digest}" ] || return 1 + layer_digest="$(oras manifest fetch "${SOURCE_REPO}@${ref_digest}" \ + | jq -r '.layers[0].digest // empty')" + [ -n "${layer_digest}" ] || return 1 + stmt="${work}/stmt.json" + oras blob fetch --output "${stmt}" "${SOURCE_REPO}@${layer_digest}" + # The blob is an in-toto Statement (DHI) or a DSSE envelope wrapping + # one. Extract the `.predicate` (the actual CycloneDX/SPDX BOM). + if jq -e '.payload and .payloadType' "${stmt}" >/dev/null 2>&1; then + jq -r '.payload' "${stmt}" | base64 -d | jq '.predicate' > "${out}" + else + jq '.predicate' "${stmt}" > "${out}" + fi + [ -s "${out}" ] || return 1 + return 0 + } + + # --- Enumerate the quarantine tags (skip referrer fallback tags). ----- + ls_err="${work}/crane-ls.err" + if all_tags="$(crane ls "${SOURCE_REPO}" 2>"${ls_err}")"; then + : + else + if grep -qiE 'NAME_UNKNOWN|not found|MANIFEST_UNKNOWN|UNAUTHORIZED.*not found|repository name not known' "${ls_err}"; then + echo "Source repository ${SOURCE_REPO} has no tags yet; nothing to scan." + all_tags="" + else + echo "::error::Failed to list tags for ${SOURCE_REPO}:" + cat "${ls_err}" >&2 + exit 1 + fi + fi + tags="$(printf '%s\n' "${all_tags}" | grep -vE '^sha256-' || true)" + if [ -z "${tags}" ]; then + echo "No image tags found in ${SOURCE_REPO}; nothing to scan." + { + echo "### SBOM scan & promote: \`${SOURCE_REPO}\` → \`${DEST_REPO}\`" + echo "" + echo "No image tags found in the source repository; nothing to do." + } >> "${GITHUB_STEP_SUMMARY}" + exit 0 + fi + + promoted=0 + blocked=0 + rows="${work}/rows.md" + details_md="${work}/details.md" + : > "${rows}" + : > "${details_md}" + + while IFS= read -r tag; do + [ -n "${tag}" ] || continue + src="${SOURCE_REPO}:${tag}" + dest="${DEST_REPO}:${tag}" + safe="${tag//[^A-Za-z0-9_.-]/_}" + + echo "::group::Scanning SBOMs for ${src}" + + # --- Determine the platforms to scan. ------------------------------ + manifest_json="$(crane manifest "${src}")" + media_type="$(printf '%s' "${manifest_json}" | jq -r '.mediaType // empty')" + platforms_file="${work}/platforms-${safe}.tsv" # digestplatform + : > "${platforms_file}" + case "${media_type}" in + *image.index*|*manifest.list*) + printf '%s' "${manifest_json}" | jq -r ' + .manifests[] + | select((.platform.os // "") != "" and (.platform.os // "") != "unknown") + | [ .digest, "\(.platform.os)/\(.platform.architecture)\(if .platform.variant then "/"+.platform.variant else "" end)" ] + | @tsv' >> "${platforms_file}" + ;; + *) + printf '%s\t%s\n' "$(crane digest "${src}")" "single" >> "${platforms_file}" + ;; + esac + + tag_blocking="${work}/blocking-${safe}.txt" # union across platforms + : > "${tag_blocking}" + plat_detail="${work}/platdetail-${safe}.md" + : > "${plat_detail}" + missing_sbom=0 + platform_count=0 + + while IFS="$(printf '\t')" read -r plat_digest plat_name; do + [ -n "${plat_digest}" ] || continue + platform_count=$((platform_count + 1)) + echo "Platform ${plat_name} (${plat_digest})" + + sbom_file="${work}/sbom-${safe}-$(printf '%s' "${plat_name}" | tr '/' '_').json" + if ! extract_platform_sbom "${plat_digest}" "${sbom_file}"; then + echo "::warning::No SBOM referrer of type ${SBOM_PREDICATE_TYPE} found for ${src} (${plat_name})." + missing_sbom=$((missing_sbom + 1)) + printf '| `%s` | :warning: no SBOM | — | — |\n' "${plat_name}" >> "${plat_detail}" + continue + fi + + report="${work}/report-${safe}-$(printf '%s' "${plat_name}" | tr '/' '_').json" + trivy sbom --quiet --scanners vuln --severity "${SEVERITIES}" \ + --format json --output "${report}" "${sbom_file}" + + plat_ids="${work}/ids-${safe}.txt" + jq -r '[.Results[]?.Vulnerabilities[]?.VulnerabilityID] | unique | .[]' \ + "${report}" | sort -u > "${plat_ids}" + cat "${plat_ids}" >> "${tag_blocking}" + + # Per-platform counts that mirror the gate: total findings at/above + # the threshold, and the blocking subset after removing exceptions. + plat_total_ids="${work}/plattotal-${safe}.txt" + plat_block_ids="${work}/platblock-${safe}.txt" + sort -u "${plat_ids}" > "${plat_total_ids}" + comm -23 "${plat_total_ids}" "${exceptions_file}" > "${plat_block_ids}" + plat_total="$(wc -l < "${plat_total_ids}" | tr -d ' ')" + plat_block="$(wc -l < "${plat_block_ids}" | tr -d ' ')" + plat_block_list="$(paste -sd ',' "${plat_block_ids}" | sed 's/,/, /g')" + printf '| `%s` | %s | %s | %s |\n' \ + "${plat_name}" "${plat_total}" "${plat_block}" "${plat_block_list:-—}" >> "${plat_detail}" + done < "${platforms_file}" + echo "::endgroup::" + + # --- Aggregate the gate across all platforms. ---------------------- + blocking_file="${work}/blockingall-${safe}.txt" + remaining_file="${work}/remaining-${safe}.txt" + excepted_file="${work}/excepted-${safe}.txt" + sort -u "${tag_blocking}" > "${blocking_file}" + comm -23 "${blocking_file}" "${exceptions_file}" > "${remaining_file}" + comm -12 "${blocking_file}" "${exceptions_file}" > "${excepted_file}" + + blocking_count="$(wc -l < "${blocking_file}" | tr -d ' ')" + remaining_count="$(wc -l < "${remaining_file}" | tr -d ' ')" + excepted_count="$(wc -l < "${excepted_file}" | tr -d ' ')" + excepted_str="$(paste -sd '|' "${excepted_file}")" + remaining_md="$(paste -sd ',' "${remaining_file}" | sed 's/,/, /g')" + excepted_md="$(paste -sd ',' "${excepted_file}" | sed 's/,/, /g')" + + # --- Decide the outcome for this tag. ------------------------------ + # A missing SBOM is treated as a blocking failure: we cannot gate an + # image whose SBOM we cannot read. + if [ "${missing_sbom}" -gt 0 ]; then + blocked=$((blocked + 1)) + result="blocked (missing SBOM)" + icon=":no_entry:" + promoted_yesno="no (left in quarantine)" + elif [ "${remaining_count}" -gt 0 ]; then + blocked=$((blocked + 1)) + result="blocked" + icon=":no_entry:" + promoted_yesno="no (left in quarantine)" + elif [ "${DRY_RUN}" = "true" ]; then + promoted=$((promoted + 1)) + result="would promote" + icon=":mag:" + promoted_yesno="no (dry run)" + else + copy_with_referrers "${src}" "${dest}" + + created="$(date -u +%Y-%m-%dT%H:%M:%SZ)" + oras attach --artifact-type application/vnd.cssc.scan-report.v1+json \ + --annotation "org.opencontainers.image.created=${created}" \ + --annotation "com.cssc.scan.source=${SOURCE_REPO}" \ + --annotation "com.cssc.scan.tag=${tag}" \ + --annotation "com.cssc.scan.threshold=${THRESHOLD}" \ + --annotation "com.cssc.scan.exceptions=${excepted_str}" \ + --annotation "com.cssc.scan.scanner=trivy" \ + --annotation "com.cssc.scan.scanner-version=${trivy_resolved}" \ + --annotation "com.cssc.scan.method=sbom" \ + --annotation "com.cssc.scan.sbom-predicate-type=${SBOM_PREDICATE_TYPE}" \ + "${dest}" + + del_status="kept" + if [ "${DELETE_SOURCE}" = "true" ]; then + if [ -z "${DELETE_TOKEN}" ]; then + echo "::warning::delete_source=true but no ghcr_delete_token provided; skipping deletion of ${src}." + del_status="skipped (no token)" + elif delete_quarantine_tag "${tag}"; then + del_status="deleted" + else + del_status="delete failed" + fi + fi + + promoted=$((promoted + 1)) + result="promoted (source ${del_status})" + icon=":white_check_mark:" + promoted_yesno="yes → ${dest}" + fi + + # --- Human-readable report to the run log. ------------------------- + { + echo "================================================================" + echo "Image: ${src}" + echo "Result: ${result}" + echo "Promoted: ${promoted_yesno}" + echo "SBOM predicate: ${SBOM_PREDICATE_TYPE}" + echo "Platforms: ${platform_count} (missing SBOM: ${missing_sbom})" + echo "Threshold: ${THRESHOLD} (severities: ${SEVERITIES})" + echo "CVEs ≥ ${THRESHOLD}: ${blocking_count} (blocking: ${remaining_count}, excepted: ${excepted_count})" + } + + # --- Summary table row. -------------------------------------------- + printf '| `%s` | %s %s | %s | %s | %s | %s |\n' \ + "${tag}" "${icon}" "${result}" "${platform_count}" "${blocking_count}" \ + "${remaining_md:-—}" "${excepted_md:-—}" >> "${rows}" + + # --- Collapsible per-image detail for the job summary. ------------- + { + echo "" + echo "
${tag} — ${icon} ${result} · ${blocking_count} CVE(s) ≥ ${THRESHOLD} across ${platform_count} platform(s)" + echo "" + echo "- **Source:** \`${src}\`" + echo "- **Destination:** \`${dest}\`" + echo "- **Promoted:** ${promoted_yesno}" + echo "- **SBOM predicate type:** \`${SBOM_PREDICATE_TYPE}\`" + echo "- **Blocking (not excepted):** ${remaining_count} · **Excepted:** ${excepted_count}" + echo "" + echo "| Platform | CVEs ≥ ${THRESHOLD} | Blocking | Blocking CVE IDs |" + echo "| --- | --- | --- | --- |" + cat "${plat_detail}" + echo "" + echo "
" + } >> "${details_md}" + done <> "${GITHUB_STEP_SUMMARY}" diff --git a/.github/workflows/mirror-hardened-python.yml b/.github/workflows/mirror-hardened-python.yml index abdbdc0..8ed7fb7 100644 --- a/.github/workflows/mirror-hardened-python.yml +++ b/.github/workflows/mirror-hardened-python.yml @@ -42,6 +42,7 @@ jobs: source_login_registry: dhi.io dest_image: ghcr.io/toddysm/quarantine/hardened/python dest_tag: 3.14-alpine3.23 + copy_referrers: true force: ${{ github.event_name == 'workflow_dispatch' && inputs.force || false }} secrets: source_registry_username: ${{ secrets.DOCKERHUB_USERNAME }} diff --git a/.github/workflows/scan-hardened-python.yml b/.github/workflows/scan-hardened-python.yml index 15b7abb..38a5c62 100644 --- a/.github/workflows/scan-hardened-python.yml +++ b/.github/workflows/scan-hardened-python.yml @@ -1,10 +1,13 @@ -# Scan quarantine/hardened/python with Trivy and promote clean images into -# base/hardened/python. +# Scan quarantine/hardened/python via its SBOM attestation and promote clean +# images into base/hardened/python. # # This caller only defines triggers and the repository-specific inputs; the -# scan/gate/promote/attest/cleanup logic lives in the reusable _scan-image.yml -# workflow. The source image already lives in GHCR, so no dhi.io/Docker Hub -# authentication is required here. +# scan/gate/promote/attest/cleanup logic lives in the reusable +# _scan-sbom-image.yml workflow. Hardened (DHI) images are distroless and carry +# no package-manager metadata, so they are scanned via the SBOM attestation that +# the mirror copied into quarantine rather than with `trivy image`. The source +# image already lives in GHCR, so no dhi.io/Docker Hub authentication is required +# here. # # Promotion target exception: this workflow promotes into "base/hardened/python" # rather than the "golden/" scheme described in @@ -38,6 +41,14 @@ on: required: false default: "" type: string + sbom_predicate_type: + description: "in-toto predicate type of the SBOM attestation to scan." + required: false + default: https://cyclonedx.org/bom/v1.6 + type: choice + options: + - https://cyclonedx.org/bom/v1.6 + - https://spdx.dev/Document dry_run: description: "Scan and report only; never copy, attach, or delete." required: false @@ -55,12 +66,13 @@ permissions: jobs: scan-hardened-python: - uses: ./.github/workflows/_scan-image.yml + uses: ./.github/workflows/_scan-sbom-image.yml with: source_repo: ghcr.io/toddysm/quarantine/hardened/python dest_repo: ghcr.io/toddysm/base/hardened/python severity_threshold: ${{ github.event_name == 'workflow_dispatch' && inputs.severity_threshold || 'HIGH' }} cve_exceptions: ${{ github.event_name == 'workflow_dispatch' && inputs.cve_exceptions || '' }} + sbom_predicate_type: ${{ github.event_name == 'workflow_dispatch' && inputs.sbom_predicate_type || 'https://cyclonedx.org/bom/v1.6' }} dry_run: ${{ github.event_name == 'workflow_dispatch' && inputs.dry_run || false }} secrets: ghcr_delete_token: ${{ secrets.GHCR_DELETE_TOKEN }} diff --git a/docs/architecture/workflows/image-mirror-workflows.md b/docs/architecture/workflows/image-mirror-workflows.md index 9973f79..aea5b9a 100644 --- a/docs/architecture/workflows/image-mirror-workflows.md +++ b/docs/architecture/workflows/image-mirror-workflows.md @@ -37,7 +37,7 @@ caller workflow that only supplies configuration. [`_mirror-image.yml`](../../../.github/workflows/_mirror-image.yml) is an internal workflow (the leading underscore marks it as "do not run directly"). -It is triggered only through `workflow_call` and exposes five inputs: +It is triggered only through `workflow_call` and exposes these inputs: | Input | Required | Default | Description | | ----- | -------- | ------- | ----------- | @@ -46,15 +46,30 @@ It is triggered only through `workflow_call` and exposes five inputs: | `dest_image` | yes | — | Fully qualified destination image without tag (e.g. `ghcr.io//quarantine/python`). | | `dest_tag` | yes | — | Destination image tag. | | `force` | no | `false` | Copy even when the source and destination digests match. | +| `source_login_registry` | no | `""` | Registry to authenticate to before pulling the source (e.g. `dhi.io`). Empty means an anonymous public pull. | +| `copy_referrers` | no | `false` | Also copy OCI referrer artifacts (SBOMs, provenance, VEX, signatures) attached to the image. Switches the copy to `oras`. Works for any image that has referrers, not just hardened images. | + +It also accepts two optional secrets, used only for authenticated sources: + +| Secret | Required | Description | +| ------ | -------- | ----------- | +| `source_registry_username` | no | Username for `source_login_registry`. Required only when `source_login_registry` is set. | +| `source_registry_password` | no | Password/PAT for `source_login_registry`. Required only when `source_login_registry` is set. | It defines a single `mirror` job that runs on `ubuntu-latest` with the minimal -permissions `contents: read` and `packages: write`, and performs three steps: +permissions `contents: read` and `packages: write`, and performs these steps: 1. **Set up crane** — installs the `crane` CLI. -2. **Log in to GHCR** — authenticates to `ghcr.io` using the built-in +2. **Set up oras** — installs `oras`, only when `copy_referrers` is `true`. +3. **Log in to source registry** — only when `source_login_registry` is set + (e.g. Docker Hardened Images on `dhi.io`); authenticates `crane` (and `oras` + when copying referrers). +4. **Log in to GHCR** — authenticates to `ghcr.io` using the built-in `GITHUB_TOKEN` and the triggering actor. -3. **Compare digests and copy if changed** — the core idempotent-sync logic - (described below). +5. **Compare digests and copy if changed** — the core idempotent-sync logic + (described below). When `copy_referrers` is `true` the copy uses + `oras cp -r` for the index and each per-platform child manifest so the + attached referrers travel with the image; otherwise it uses `crane copy`. ### Caller workflows (`mirror-.yml`) @@ -106,6 +121,7 @@ flowchart TD | ---- | ---- | | **GitHub Actions** | Orchestration: scheduling, manual dispatch, reusable-workflow composition, concurrency control, and job summaries. | | **[`crane`](https://github.com/google/go-containerregistry/blob/main/cmd/crane/README.md)** | Registry client used for all image operations — `crane digest` to read manifest digests and `crane copy` to transfer images. | +| **[`oras`](https://oras.land)** | Used only when `copy_referrers` is enabled, to copy the image together with its OCI referrer artifacts (`oras cp -r`). | | **[`imjasonh/setup-crane`](https://github.com/imjasonh/setup-crane)** | Action that installs `crane` on the runner. It is pinned to a commit SHA (`31b88ef…`, v0.4) for supply-chain safety. | | **`GITHUB_TOKEN`** | The built-in, automatically scoped token used to authenticate to GHCR with `packages: write`. No long-lived registry secrets are required. | | **Bash** (`set -euo pipefail`) | The digest-compare-and-copy step is a single defensive shell script. | @@ -128,6 +144,16 @@ Key tooling characteristics: `crane digest`, reads the destination digest (treating a missing destination as empty), and copies only when the two differ — avoiding redundant transfers. - **Multi-architecture preservation** via `crane copy`. +- **Optional referrer copying.** When `copy_referrers` is enabled the mirror + copies the image together with its OCI referrer artifacts — SBOMs, provenance, + VEX statements, and signatures — using `oras cp -r` for the index and each + per-platform child manifest. This is what lets downstream SBOM-based scanning + read attestations straight from quarantine. It works for any image that has + referrers, not just Docker Hardened Images. +- **Authenticated sources.** Setting `source_login_registry` (plus the + `source_registry_username` / `source_registry_password` secrets) lets the + mirror pull from private or non–Docker Hub upstreams such as `dhi.io`. Public + sources leave it empty and pull anonymously. - **Scheduled refresh.** A daily cron (06:00 UTC) checks upstream for changes. - **Manual runs with force.** `workflow_dispatch` allows on-demand execution, and the optional `force` input copies even when digests already match (useful for @@ -147,17 +173,16 @@ Key tooling characteristics: - **No image scanning or vulnerability gating.** Despite the `quarantine/` naming, the workflows do not scan images or block copying based on CVEs, policy, or signatures — they perform a straight mirror. -- **No signing or attestation.** Mirrored images are not signed (e.g. cosign) - and no provenance/SBOM attestations are produced or verified. +- **No signing or attestation generation.** Mirrored images are not signed (e.g. + cosign) and no new provenance/SBOM attestations are produced or verified. When + `copy_referrers` is enabled, existing referrers (including the upstream's + SBOMs and signatures) are copied verbatim but are not re-verified. - **No automatic tag discovery.** Each workflow mirrors one explicitly pinned source tag (e.g. `python:3.14-slim`). New tags or floating/`latest` tags are not discovered or tracked automatically; updating a tag is a manual edit to the caller. - **No deletion / retention management.** Stale or superseded tags in GHCR are never pruned; the workflows only add or update. -- **No cross-registry generality.** Sources are assumed to be public Docker Hub - images pulled anonymously; private or non–Docker Hub upstreams would require - additional authentication that is not wired in. - **No build step.** These workflows only mirror base images. Building the application images under `apps/` on top of those bases is handled separately by planned `build-.yml` workflows. diff --git a/docs/architecture/workflows/scan-and-promote-workflows.md b/docs/architecture/workflows/scan-and-promote-workflows.md index 5baa4db..456a70f 100644 --- a/docs/architecture/workflows/scan-and-promote-workflows.md +++ b/docs/architecture/workflows/scan-and-promote-workflows.md @@ -48,10 +48,12 @@ configuration. ```text .github/workflows/ -├── _scan-image.yml # reusable workflow — all the logic -├── scan-python.yml # caller — quarantine/python → golden/python -├── scan-node.yml # caller — quarantine/node → golden/node -└── scan-openjdk.yml # caller — quarantine/openjdk → golden/openjdk +├── _scan-image.yml # reusable workflow — image-filesystem scan +├── _scan-sbom-image.yml # reusable workflow — SBOM-attestation scan (hardened images) +├── scan-python.yml # caller — quarantine/python → golden/python +├── scan-node.yml # caller — quarantine/node → golden/node +├── scan-openjdk.yml # caller — quarantine/openjdk → golden/openjdk +└── scan-hardened-python.yml # caller — quarantine/hardened/python → base/hardened/python (SBOM-based) ``` - **Display name:** `scan / quarantine/` (e.g. `scan / quarantine/python`). @@ -156,6 +158,51 @@ For each tag the gate is evaluated as follows: offending CVEs. Blocked images never fail the whole job; the run finishes and the summary lists every outcome. +## SBOM-based scanning for hardened images (`_scan-sbom-image.yml`) + +Distroless images such as [Docker Hardened Images](https://docs.docker.com/dhi/) +(DHI) ship no package-manager metadata, so `trivy image` cannot enumerate their +packages. Those images instead carry their package inventory as an **SBOM +attestation** — an in-toto statement attached to each platform manifest as an +OCI referrer. The mirror copies these referrers into quarantine (see +[`copy_referrers`](image-mirror-workflows.md)), and a dedicated reusable +workflow, `_scan-sbom-image.yml`, gates on the SBOM rather than the image +filesystem. + +It mirrors `_scan-image.yml` (same inputs, gate semantics, scan-report referrer, +source deletion, and reporting) with these differences: + +- **Extra input `sbom_predicate_type`** (default `https://cyclonedx.org/bom/v1.6`; + `https://spdx.dev/Document` also supported) selects which SBOM attestation to + scan. +- **No upstream re-pull.** The SBOM is read from the GHCR quarantine copy, so + only GHCR authentication is needed. +- **Per-platform SBOM extraction.** For every platform in the image index the + workflow: + 1. runs `oras discover` on the platform manifest and finds the referrer whose + `in-toto.io/predicate-type` annotation matches `sbom_predicate_type`, + 2. fetches that referrer manifest and pulls its first layer blob, + 3. extracts the in-toto statement's `.predicate` (the CycloneDX/SPDX BOM), + handling both plain statements and DSSE-wrapped envelopes, and + 4. scans the BOM with `trivy sbom`. +- **All platforms gated together.** CVE findings are unioned across platforms; + promotion is blocked if **any** platform has a blocking CVE after exceptions. + A platform whose SBOM cannot be found is also treated as a blocking failure, + since an image whose inventory cannot be read cannot be cleared. +- **Referrer-preserving promotion.** Passing images are promoted with + `oras cp -r` (index plus per-platform children) so the SBOMs, provenance, VEX, + and signatures travel into the destination alongside the scan-report referrer. + +The scan-report referrer adds two annotations on top of the common set: +`com.cssc.scan.method=sbom` and +`com.cssc.scan.sbom-predicate-type=`. + +The `scan-hardened-python.yml` caller wires this workflow for +`quarantine/hardened/python → base/hardened/python`. Hardened images are +promoted into a dedicated `base/hardened/` namespace rather than the +`golden/` scheme; see the +[workflow naming conventions](../../contributing/workflow-naming.md). + ## What tooling is used | Tool | Role | @@ -240,9 +287,11 @@ The workflow therefore treats deletion as configurable: ### Not implemented (deliberately out of scope) -- **No signing or SBOM attestation.** Promoted images are not signed (e.g. - cosign) and no SBOM/provenance attestations are produced. The only attached - metadata is the scan-report referrer. +- **No signing.** Promoted images are not signed (e.g. cosign). The image-based + scanner (`_scan-image.yml`) produces no SBOM/provenance; the SBOM-based + scanner (`_scan-sbom-image.yml`) does not generate attestations but copies the + upstream's existing SBOM/provenance/VEX/signature referrers verbatim during + promotion. - **No automatic remediation.** Blocked images are left in quarantine; the workflow does not patch, rebuild, or open tickets for them. - **No cross-scanner support.** Trivy is the only scanner; the referrer schema