Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions client/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ apiVersion: v2
name: client
description: A unified Helm chart for tracebloc on AKS, EKS, bare-metal, and OpenShift
type: application
version: 1.7.0
appVersion: "1.7.0"
version: 1.7.1
appVersion: "1.7.1"
keywords:
- tracebloc
- kubernetes
Expand Down
85 changes: 85 additions & 0 deletions client/templates/egress-enforcement-check.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{{- if and (default dict .Values.networkPolicy.training).enabled (not (dig "allowExternalHttps" true .Values.networkPolicy.training)) (dig "enforcementProbeHost" "1.1.1.1" .Values.networkPolicy.training) }}
{{- /*
Egress-lockdown enforcement check (SECURITY §8.2 / client-runtime#104).
Renders ONLY when the lockdown is enabled (allowExternalHttps=false) and a probe
host is set. This is a `helm test` hook — run `helm test <release>` after flipping
the lockdown to verify it. A `tracebloc.io/workload: training`-labelled pod (so the
training-egress NetworkPolicy governs it) curls a canary external host DIRECTLY:
blocked => the CNI enforces egress (test PASSES); reachable => NOT enforced => the
lockdown is a silent no-op (test FAILS, exit 1). Because it's a test hook it never
runs during install/upgrade, so it can NEVER block them or the hourly auto-upgrade.
The probe-host default is 1.1.1.1; set enforcementProbeHost="" to disable (e.g.
air-gapped clusters with no external host to test against).
*/ -}}
{{- $host := dig "enforcementProbeHost" "1.1.1.1" .Values.networkPolicy.training }}
apiVersion: batch/v1
kind: Job
metadata:
name: {{ .Release.Name }}-egress-enforcement-check
namespace: {{ .Release.Namespace }}
labels:
{{- include "tracebloc.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": test
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
spec:
backoffLimit: 0
Comment thread
saadqbal marked this conversation as resolved.
ttlSecondsAfterFinished: 120
template:
metadata:
labels:
{{- include "tracebloc.selectorLabels" . | nindent 8 }}
tracebloc.io/workload: training
spec:
restartPolicy: Never
automountServiceAccountToken: false
securityContext:
runAsNonRoot: true
# curlimages/curl's default user is non-numeric (curl_user); runAsNonRoot
# can't verify that, so pin the image's uid explicitly.
runAsUser: 100
seccompProfile:
type: RuntimeDefault
containers:
- name: probe
image: {{ include "tracebloc.image" (dict "repository" "curlimages/curl" "tag" "8.20.0" "digest" "sha256:b3f1fb2a51d923260350d21b8654bbc607164a987e2f7c84a0ac199a67df812a" "registry" "docker.io") | quote }}
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
readOnlyRootFilesystem: true
command:
- sh
- -c
- |
HOST={{ $host | quote }}
echo "[egress-enforcement-check] probing direct egress to https://$HOST (must be BLOCKED when the lockdown is enforced)..."
code=$(curl --noproxy '*' -s -m 5 -o /dev/null -w '%{http_code}' "https://$HOST" 2>/dev/null || true)
if [ -n "$code" ] && [ "$code" != "000" ]; then
echo "WARNING ================================================================"
echo "WARNING EGRESS LOCKDOWN NOT ENFORCED on this cluster."
echo "WARNING A tracebloc.io/workload=training pod reached https://$HOST"
echo "WARNING directly (HTTP $code). networkPolicy.training.allowExternalHttps"
echo "WARNING is false, but the CNI is NOT enforcing egress NetworkPolicy, so"
echo "WARNING the SECURITY §8.2 lockdown is INACTIVE and training pods can"
echo "WARNING still reach the open internet."
echo "WARNING Fix: enable egress NetworkPolicy on the CNI (Calico/Cilium, or"
echo "WARNING EKS VPC-CNI enableNetworkPolicy=true), then re-run 'helm test'."
echo "WARNING ================================================================"
exit 1
fi
echo "OK egress lockdown verified: direct external egress is blocked (curl -> ${code:-blocked})."
exit 0
resources:
requests:
cpu: "10m"
memory: "32Mi"
limits:
cpu: "100m"
memory: "64Mi"
{{- if include "tracebloc.useImagePullSecrets" . }}
imagePullSecrets:
- name: {{ include "tracebloc.registrySecretName" . }}
{{- end }}
{{- end }}
104 changes: 104 additions & 0 deletions client/tests/egress_enforcement_check_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
suite: Egress-lockdown enforcement pre-flight hook
# SECURITY §8.2 / client-runtime#104. A post-install/post-upgrade hook that, when
# the lockdown is enabled (allowExternalHttps=false), runs a training-labelled probe
# to verify the CNI actually blocks egress — and warns (non-blocking) if it doesn't.
# Guards: renders ONLY when the lockdown is on + a probe host is set; never otherwise.
templates:
- templates/egress-enforcement-check.yaml
set:
clientId: "test-id"
clientPassword: "test"
tests:
- it: does NOT render by default (lockdown off — allowExternalHttps defaults true)
asserts:
- hasDocuments:
count: 0

- it: does NOT render when training NetworkPolicy is disabled
set:
networkPolicy:
training:
enabled: false
allowExternalHttps: false
asserts:
- hasDocuments:
count: 0

- it: does NOT render when the probe host is empty (disabled, e.g. air-gapped)
set:
networkPolicy:
training:
allowExternalHttps: false
enforcementProbeHost: ""
asserts:
- hasDocuments:
count: 0

- it: renders as a helm test hook Job when the lockdown is enabled
set:
networkPolicy:
training:
allowExternalHttps: false
asserts:
- hasDocuments:
count: 1
- isKind:
of: Job
- equal:
path: metadata.annotations["helm.sh/hook"]
value: test
- equal:
path: metadata.annotations["helm.sh/hook-delete-policy"]
value: before-hook-creation,hook-succeeded

- it: probe pod is training-labelled (so the lockdown netpol governs it) and PSA-restricted
set:
networkPolicy:
training:
allowExternalHttps: false
asserts:
- equal:
path: spec.template.metadata.labels["tracebloc.io/workload"]
value: training
- equal:
path: spec.template.spec.securityContext.runAsNonRoot
value: true
- equal:
path: spec.template.spec.securityContext.runAsUser
value: 100
- equal:
path: spec.template.spec.containers[0].securityContext.readOnlyRootFilesystem
value: true
- contains:
path: spec.template.spec.containers[0].securityContext.capabilities.drop
content: "ALL"
- equal:
path: spec.template.spec.automountServiceAccountToken
value: false

- it: probes the configured host directly (no proxy) and fails the test on non-enforcement
set:
networkPolicy:
training:
allowExternalHttps: false
enforcementProbeHost: "canary.example.net"
asserts:
- matchRegex:
path: spec.template.spec.containers[0].command[2]
pattern: "HOST=.?canary\\.example\\.net"
- matchRegex:
path: spec.template.spec.containers[0].command[2]
pattern: "curl --noproxy '\\*'"
- matchRegex:
path: spec.template.spec.containers[0].command[2]
pattern: "exit 1"

- it: pins the curl probe image by digest
set:
networkPolicy:
training:
allowExternalHttps: false
asserts:
- matchRegex:
path: spec.template.spec.containers[0].image
pattern: "^docker\\.io/curlimages/curl@sha256:[a-f0-9]{64}$"
4 changes: 4 additions & 0 deletions client/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,10 @@
"default": true,
"description": "When false, drop the 0.0.0.0/0:443 egress rule so training pods reach only DNS, MySQL, requests-proxy and the egress gateway (SECURITY §8.2 / client-runtime#102). Default true keeps existing behaviour; flip per-fleet after verifying the egress gateway works (G2)."
},
"enforcementProbeHost": {
"type": "string",
"description": "Host the `helm test` enforcement check curls directly (when allowExternalHttps=false) to verify the CNI blocks egress; non-enforcement fails the test (a test hook never affects install/upgrade) — client-runtime#104. Empty string disables it (e.g. air-gapped clusters)."
},
"dnsNamespace": {
"type": "string",
"default": "kube-system",
Expand Down
6 changes: 6 additions & 0 deletions client/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,12 @@ networkPolicy:
# nil-guards this key, so a `helm upgrade --reuse-values` from a release
# predating it keeps the old behaviour (rule present).
allowExternalHttps: true
# client-runtime#104: enforcement check, run via `helm test <release>` after
# flipping the lockdown (allowExternalHttps=false). The test curls this host
# directly from a training-labelled pod; reachable => the CNI isn't enforcing
# egress => the test FAILS (a test hook never affects install/upgrade). Set ""
# to disable (e.g. air-gapped clusters with no external host to test against).
enforcementProbeHost: "1.1.1.1"
dnsNamespace: kube-system
# CoreDNS pod selector — varies per platform. Override in ci/<platform>-values.yaml.
# When empty, the template falls back to {k8s-app: kube-dns}, which works
Expand Down
Loading