From 2fb5b8fc1fcbc3f3354d3cb516e1110078100760 Mon Sep 17 00:00:00 2001 From: "Asad Iqbal (Saadi)" Date: Thu, 11 Jun 2026 13:33:47 +0500 Subject: [PATCH] Merge pull request #247 from tracebloc/feat/102-egress-gateway MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit feat(egress-proxy): training-pod egress lockdown — squid gateway, gated rollout (client-runtime#102) --- .github/workflows/helm-ci.yaml | 19 ++ .github/workflows/installer-tests.yaml | 4 +- .github/workflows/standard-checks.yml | 2 +- client/Chart.yaml | 4 +- client/templates/egress-proxy-configmap.yaml | 52 ++++ client/templates/egress-proxy-deployment.yaml | 88 +++++++ client/templates/egress-proxy-service.yaml | 23 ++ client/templates/jobs-manager-deployment.yaml | 7 + client/templates/network-policy-training.yaml | 26 +- client/tests/egress_proxy_test.yaml | 246 ++++++++++++++++++ client/values.schema.json | 48 ++++ client/values.yaml | 46 ++++ docs/SECURITY.md | 20 +- scripts/tests/e2e-auto-upgrade.sh | 147 +++++++++++ 14 files changed, 718 insertions(+), 14 deletions(-) create mode 100644 client/templates/egress-proxy-configmap.yaml create mode 100644 client/templates/egress-proxy-deployment.yaml create mode 100644 client/templates/egress-proxy-service.yaml create mode 100644 client/tests/egress_proxy_test.yaml create mode 100755 scripts/tests/e2e-auto-upgrade.sh diff --git a/.github/workflows/helm-ci.yaml b/.github/workflows/helm-ci.yaml index 1813eeb..cff0662 100644 --- a/.github/workflows/helm-ci.yaml +++ b/.github/workflows/helm-ci.yaml @@ -6,12 +6,14 @@ on: paths: - 'client/**' - 'ingestor/**' + - 'scripts/tests/e2e-auto-upgrade.sh' - '.github/workflows/helm-ci.yaml' pull_request: branches: [main, develop, openshift] paths: - 'client/**' - 'ingestor/**' + - 'scripts/tests/e2e-auto-upgrade.sh' - '.github/workflows/helm-ci.yaml' jobs: @@ -161,6 +163,23 @@ jobs: echo "images.ingestor.digest empty (default) — spawning by floating tag; no pinned digest to check." fi + upgrade-e2e: + # Fleet auto-upgrade non-regression gate (client-runtime#102 / #245-class + # regressions): installs the LAST PUBLISHED chart from gh-pages on a real + # k3d cluster, then upgrades to THIS working tree via both + # `--reuse-values` (manual-operator habit; nil-guards must hold, lockdown + # must not engage) and `--reset-then-reuse-values` (the auto-upgrade + # cronjob's path; new defaults must flow in inert), then flips the #102 + # egress-lockdown flags and proves the next auto-upgrade preserves them. + # Pods are never waited on — published images need real credentials; the + # regression class this guards lives in Helm templating/values semantics. + name: Fleet auto-upgrade E2E (k3d) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Upgrade from last published release through both flag paths + run: bash scripts/tests/e2e-auto-upgrade.sh + # Installer script tests (bats + Pester) + the cross-distro prerequisite matrix # live in their own workflow: .github/workflows/installer-tests.yaml # (triggered on scripts/** changes). diff --git a/.github/workflows/installer-tests.yaml b/.github/workflows/installer-tests.yaml index 95518fe..581f9e2 100644 --- a/.github/workflows/installer-tests.yaml +++ b/.github/workflows/installer-tests.yaml @@ -55,11 +55,11 @@ jobs: # below for visibility but don't fail the gate. shellcheck --severity=error --shell=bash \ scripts/install.sh scripts/install-k8s.sh scripts/lib/*.sh \ - scripts/tests/distro-prereqs.sh scripts/tests/e2e-cluster.sh scripts/tests/e2e-proxy.sh scripts/tests/check-drift.sh + scripts/tests/distro-prereqs.sh scripts/tests/e2e-cluster.sh scripts/tests/e2e-proxy.sh scripts/tests/e2e-auto-upgrade.sh scripts/tests/check-drift.sh echo "── shellcheck warnings (advisory, non-blocking) ──" shellcheck --severity=warning --shell=bash \ scripts/install.sh scripts/install-k8s.sh scripts/lib/*.sh \ - scripts/tests/distro-prereqs.sh scripts/tests/e2e-cluster.sh scripts/tests/e2e-proxy.sh scripts/tests/check-drift.sh || true + scripts/tests/distro-prereqs.sh scripts/tests/e2e-cluster.sh scripts/tests/e2e-proxy.sh scripts/tests/e2e-auto-upgrade.sh scripts/tests/check-drift.sh || true - name: PSScriptAnalyzer (PowerShell installer) shell: pwsh diff --git a/.github/workflows/standard-checks.yml b/.github/workflows/standard-checks.yml index d5b449c..70ec98e 100644 --- a/.github/workflows/standard-checks.yml +++ b/.github/workflows/standard-checks.yml @@ -44,7 +44,7 @@ jobs: shellcheck --version | grep version shellcheck --severity=error --shell=bash \ scripts/install.sh scripts/install-k8s.sh scripts/lib/*.sh \ - scripts/tests/distro-prereqs.sh scripts/tests/e2e-cluster.sh scripts/tests/e2e-proxy.sh + scripts/tests/distro-prereqs.sh scripts/tests/e2e-cluster.sh scripts/tests/e2e-proxy.sh scripts/tests/e2e-auto-upgrade.sh unit-tests: name: Unit tests diff --git a/client/Chart.yaml b/client/Chart.yaml index acf7b17..9c8a3f7 100644 --- a/client/Chart.yaml +++ b/client/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: client description: A unified Helm chart for tracebloc on AKS, EKS, bare-metal, and OpenShift type: application -version: 1.6.1 -appVersion: "1.6.1" +version: 1.7.0 +appVersion: "1.7.0" keywords: - tracebloc - kubernetes diff --git a/client/templates/egress-proxy-configmap.yaml b/client/templates/egress-proxy-configmap.yaml new file mode 100644 index 0000000..5724406 --- /dev/null +++ b/client/templates/egress-proxy-configmap.yaml @@ -0,0 +1,52 @@ +{{- if (default dict .Values.egressProxy).enabled }} +{{- /* + Egress gateway (squid) config — SECURITY §8.2 / client-runtime#102. + A forward proxy that ONLY permits HTTPS CONNECT to an FQDN allowlist, so that + once the training NetworkPolicy drops the 0.0.0.0/0:443 rule, a locked-down + training pod can still reach the tracebloc backend + App Insights through this + gateway — and nothing else. Service Bus is NOT here; it stays on the + requests-proxy:8888 path. The allowlist falls CLOSED: an empty list renders no + allow rule, so everything is denied. +*/ -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-egress-proxy + namespace: {{ .Release.Namespace }} + labels: + {{- include "tracebloc.labels" . | nindent 4 }} +data: + squid.conf: | + visible_hostname tracebloc-egress-proxy + http_port {{ .Values.egressProxy.port | default 3128 }} + + # Tunnel-only: no caching, no pid file, logs to the container's std streams — + # squid needs no writable paths, so the root filesystem stays read-only. + cache deny all + cache_store_log none + pid_filename none + access_log stdio:/dev/stdout + cache_log stdio:/dev/stderr + # No peers/cache → the ICMP pinger (needs CAP_NET_RAW, which the pod drops) is + # just noise/errors. Disable it. + pinger_enable off + + acl SSL_ports port 443 + acl CONNECT method CONNECT + {{- with .Values.egressProxy.allowlist }} + # dstdomain: a leading dot matches subdomains (e.g. .in.applicationinsights.azure.com), + # a bare host is an exact match (e.g. api.tracebloc.io). + acl allowed_fqdns dstdomain {{ join " " . }} + http_access deny CONNECT !SSL_ports + http_access allow CONNECT allowed_fqdns + {{- end }} + # Fail closed — anything not explicitly allowed above is denied. + http_access deny all + {{- if .Values.env.HTTP_PROXY_HOST }} + + # Corporate-proxy chaining: when the cluster sits behind a corporate proxy, + # forward upstream through it instead of going direct (mirrors tracebloc.proxyEnv). + cache_peer {{ .Values.env.HTTP_PROXY_HOST }} parent {{ .Values.env.HTTP_PROXY_PORT | default 8080 }} 0 no-query default{{ if .Values.env.HTTP_PROXY_USERNAME }} login={{ .Values.env.HTTP_PROXY_USERNAME }}:{{ .Values.env.HTTP_PROXY_PASSWORD }}{{ end }} + never_direct allow all + {{- end }} +{{- end }} diff --git a/client/templates/egress-proxy-deployment.yaml b/client/templates/egress-proxy-deployment.yaml new file mode 100644 index 0000000..b811bf1 --- /dev/null +++ b/client/templates/egress-proxy-deployment.yaml @@ -0,0 +1,88 @@ +{{- if (default dict .Values.egressProxy).enabled }} +{{- /* + Egress gateway (squid) — SECURITY §8.2 / client-runtime#102. + Carries label app=egress-proxy, NOT tracebloc.io/workload=training, so it is + deliberately OUTSIDE the training NetworkPolicy and can egress to the + allowlisted FQDNs on the locked-down training pod's behalf. + Nil-guards: a `helm upgrade --reuse-values` from a release predating + egressProxy leaves .Values.egressProxy nil → the outer `if` renders nothing + (no gateway, no behaviour change). image/resources use default-through-dict so + a partial --set can't nil-pointer. +*/ -}} +{{- $ep := default dict .Values.egressProxy }} +{{- $img := default dict $ep.image }} +{{- $epRes := default dict $ep.resources }} +{{- $epReq := default dict $epRes.requests }} +{{- $epLim := default dict $epRes.limits }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }}-egress-proxy + namespace: {{ .Release.Namespace }} + labels: + {{- include "tracebloc.labels" . | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + app: egress-proxy + template: + metadata: + labels: + app: egress-proxy + annotations: + checksum/config: {{ include (print $.Template.BasePath "/egress-proxy-configmap.yaml") . | sha256sum }} + spec: + automountServiceAccountToken: false + securityContext: + runAsNonRoot: true + # squid's non-root user (Ubuntu `proxy` uid). Configurable because it is + # image-specific — VERIFY against the chosen egressProxy.image. + runAsUser: {{ $ep.runAsUser | default 13 }} + seccompProfile: + type: RuntimeDefault + containers: + - name: squid + image: {{ include "tracebloc.image" (dict "repository" ($img.repository | default "ubuntu/squid") "tag" ($img.tag | default "6.6-24.04_beta") "digest" ($img.digest | default "") "registry" ($img.registry | default "docker.io")) | quote }} + imagePullPolicy: IfNotPresent + command: ["squid"] + # -N: no daemon (run in foreground). Logs go to the std streams via + # squid.conf (access_log/cache_log = stdio); omit -d to avoid duplicate lines. + args: ["-N", "-f", "/etc/squid/squid.conf"] + ports: + - containerPort: {{ $ep.port | default 3128 }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + readOnlyRootFilesystem: true + resources: + requests: + cpu: {{ $epReq.cpu | default "50m" | quote }} + memory: {{ $epReq.memory | default "64Mi" | quote }} + limits: + cpu: {{ $epLim.cpu | default "500m" | quote }} + memory: {{ $epLim.memory | default "256Mi" | quote }} + volumeMounts: + - name: squid-config + mountPath: /etc/squid/squid.conf + subPath: squid.conf + readOnly: true + - name: tmp + mountPath: /tmp + - name: var-run + mountPath: /var/run + volumes: + - name: squid-config + configMap: + name: {{ .Release.Name }}-egress-proxy + - name: tmp + emptyDir: {} + - name: var-run + emptyDir: {} + {{- if include "tracebloc.useImagePullSecrets" . }} + imagePullSecrets: + - name: {{ include "tracebloc.registrySecretName" . }} + {{- end }} + restartPolicy: Always +{{- end }} diff --git a/client/templates/egress-proxy-service.yaml b/client/templates/egress-proxy-service.yaml new file mode 100644 index 0000000..7043793 --- /dev/null +++ b/client/templates/egress-proxy-service.yaml @@ -0,0 +1,23 @@ +{{- if (default dict .Values.egressProxy).enabled }} +{{- /* + ClusterIP for the egress gateway. Training pods reach it as + egress-proxy-service: via their HTTPS_PROXY env (wired in Step 1). +*/ -}} +apiVersion: v1 +kind: Service +metadata: + name: egress-proxy-service + namespace: {{ .Release.Namespace }} + labels: + {{- include "tracebloc.labels" . | nindent 4 }} + app: egress-proxy +spec: + selector: + app: egress-proxy + ports: + - name: http-proxy + port: {{ .Values.egressProxy.port | default 3128 }} + targetPort: {{ .Values.egressProxy.port | default 3128 }} + protocol: TCP + type: ClusterIP +{{- end }} diff --git a/client/templates/jobs-manager-deployment.yaml b/client/templates/jobs-manager-deployment.yaml index ff0f03c..911dd1f 100644 --- a/client/templates/jobs-manager-deployment.yaml +++ b/client/templates/jobs-manager-deployment.yaml @@ -127,6 +127,13 @@ spec: value: {{ (default dict .Values.images.ingestor).digest | default "" | quote }} - name: REQUESTS_PROXY_URL value: "http://requests-proxy-service:8888" + {{- if and (default dict .Values.egressProxy).enabled (default dict .Values.egressProxy).routeWorkloads }} + # client-runtime#102: when routing is enabled, jobs-manager injects + # HTTPS_PROXY= into training pods (and suppresses raw HTTP_PROXY_HOST). + # Inert until egressProxy.routeWorkloads=true. + - name: EGRESS_PROXY_URL + value: "http://egress-proxy-service:{{ (default dict .Values.egressProxy).port | default 3128 }}" + {{- end }} - name: JOB_IMAGE_HOST value: "docker.io/" - name: CLIENT_ENV diff --git a/client/templates/network-policy-training.yaml b/client/templates/network-policy-training.yaml index 643d96a..4ad1131 100644 --- a/client/templates/network-policy-training.yaml +++ b/client/templates/network-policy-training.yaml @@ -61,10 +61,16 @@ spec: protocol: UDP - port: 53 protocol: TCP + {{- /* Rule 2 (external HTTPS) is the egress hole SECURITY §8.2 closes. Gated on + networkPolicy.training.allowExternalHttps via `dig` with a default of TRUE: + an absent key (helm upgrade --reuse-values from a release predating it) + keeps the rule — old behaviour — so only an explicit `false` drops it, once + an operator has verified the egress gateway on that cluster (#102). */}} + {{- if dig "allowExternalHttps" true .Values.networkPolicy.training }} # 2. External HTTPS — everything NOT in the cluster's pod/service CIDRs. - # Training pods call backend, Azure Service Bus, App Insights, etc. - # This blocks pod-to-pod, ClusterIPs, jobs-manager, K8s API. MySQL is - # explicitly re-permitted by the next rule. + # Training pods reach the backend / Azure Service Bus / App Insights directly. + # This blocks pod-to-pod, ClusterIPs, jobs-manager, K8s API. MySQL and the + # in-cluster proxies are explicitly re-permitted by the rules below. - to: - ipBlock: cidr: 0.0.0.0/0 @@ -75,6 +81,7 @@ spec: ports: - port: 443 protocol: TCP + {{- end }} # 3. MySQL — training pods read the training dataset from the # in-namespace mysql-client pod. podSelector with no namespaceSelector # matches pods in the same namespace as this NetworkPolicy. @@ -100,4 +107,17 @@ spec: ports: - port: 8888 protocol: TCP + {{- if (default dict .Values.egressProxy).enabled }} + # 5. egress gateway — training pods reach the in-cluster squid egress gateway + # (egress-proxy-service) for allowlisted external HTTPS, used once the + # external-HTTPS rule (rule 2) is dropped. Re-permitted explicitly like MySQL / + # requests-proxy above (rule 2's `except` blocks ClusterIP egress). + - to: + - podSelector: + matchLabels: + app: egress-proxy + ports: + - port: {{ (default dict .Values.egressProxy).port | default 3128 }} + protocol: TCP + {{- end }} {{- end }} diff --git a/client/tests/egress_proxy_test.yaml b/client/tests/egress_proxy_test.yaml new file mode 100644 index 0000000..b0a972a --- /dev/null +++ b/client/tests/egress_proxy_test.yaml @@ -0,0 +1,246 @@ +suite: Egress gateway (squid) +# SECURITY §8.2 / client-runtime#102. The egress gateway lets a locked-down +# training pod reach an FQDN allowlist (backend + App Insights) and nothing +# else. These guards pin: the on/off flag, the fail-closed allowlist, the +# PSA-restricted security context, the nil-guards against `helm upgrade +# --reuse-values` from a release predating egressProxy, and — critically — that +# the gateway is NOT labelled as a training workload (so the lockdown netpol +# never selects it and it keeps its own egress). +templates: + - templates/egress-proxy-deployment.yaml + - templates/egress-proxy-service.yaml + - templates/egress-proxy-configmap.yaml + - templates/jobs-manager-deployment.yaml + - templates/network-policy-training.yaml +set: + clientId: "test-id" + clientPassword: "test" +tests: + - it: renders the Deployment, Service and ConfigMap when enabled (default) + asserts: + - hasDocuments: + count: 1 + template: templates/egress-proxy-deployment.yaml + - hasDocuments: + count: 1 + template: templates/egress-proxy-service.yaml + - hasDocuments: + count: 1 + template: templates/egress-proxy-configmap.yaml + + - it: renders nothing when disabled + set: + egressProxy: + enabled: false + asserts: + - hasDocuments: + count: 0 + template: templates/egress-proxy-deployment.yaml + - hasDocuments: + count: 0 + template: templates/egress-proxy-service.yaml + + - it: renders nothing when egressProxy is absent (helm upgrade --reuse-values replay) + # A pre-#102 stored values set has no egressProxy key; the nil parent must + # render no gateway rather than crash with "nil pointer evaluating interface". + set: + egressProxy: null + asserts: + - hasDocuments: + count: 0 + template: templates/egress-proxy-deployment.yaml + + - it: is a ClusterIP service on the proxy port + template: templates/egress-proxy-service.yaml + asserts: + - equal: + path: metadata.name + value: egress-proxy-service + - equal: + path: spec.type + value: ClusterIP + - equal: + path: spec.ports[0].port + value: 3128 + + - it: is NOT labelled as a training workload (must stay outside the lockdown netpol) + template: templates/egress-proxy-deployment.yaml + asserts: + - equal: + path: spec.template.metadata.labels.app + value: egress-proxy + - notExists: + path: spec.template.metadata.labels["tracebloc.io/workload"] + + - it: enforces a PSA-restricted security context + template: templates/egress-proxy-deployment.yaml + asserts: + - equal: + path: spec.template.spec.securityContext.runAsNonRoot + value: true + - equal: + path: spec.template.spec.securityContext.seccompProfile.type + value: RuntimeDefault + - equal: + path: spec.template.spec.containers[0].securityContext.allowPrivilegeEscalation + value: false + - equal: + path: spec.template.spec.containers[0].securityContext.readOnlyRootFilesystem + value: true + - contains: + path: spec.template.spec.containers[0].securityContext.capabilities.drop + content: "ALL" + - equal: + path: spec.template.spec.automountServiceAccountToken + value: false + + - it: does not automount the service account token + template: templates/egress-proxy-deployment.yaml + asserts: + - equal: + path: spec.template.spec.automountServiceAccountToken + value: false + + - it: renders default resources and honors an override through the nil-guard + template: templates/egress-proxy-deployment.yaml + set: + egressProxy: + resources: + limits: + memory: 1Gi + asserts: + - equal: + path: spec.template.spec.containers[0].resources.requests.cpu + value: 50m + - equal: + path: spec.template.spec.containers[0].resources.limits.memory + value: 1Gi + + - it: allowlists the backend + App Insights and fails closed + template: templates/egress-proxy-configmap.yaml + asserts: + - matchRegex: + path: data["squid.conf"] + pattern: "acl allowed_fqdns dstdomain .*api\\.tracebloc\\.io" + - matchRegex: + path: data["squid.conf"] + pattern: "\\.in\\.applicationinsights\\.azure\\.com" + - matchRegex: + path: data["squid.conf"] + pattern: "http_access deny all" + + - it: does not chain to a corporate proxy unless one is configured + template: templates/egress-proxy-configmap.yaml + asserts: + - notMatchRegex: + path: data["squid.conf"] + pattern: "cache_peer" + + - it: chains to the corporate proxy when env.HTTP_PROXY_HOST is set + template: templates/egress-proxy-configmap.yaml + set: + env: + HTTP_PROXY_HOST: corp-proxy.internal + HTTP_PROXY_PORT: "8080" + asserts: + - matchRegex: + path: data["squid.conf"] + pattern: "cache_peer corp-proxy\\.internal parent 8080" + - matchRegex: + path: data["squid.conf"] + pattern: "never_direct allow all" + + # --- Step 1: routing training pods through the gateway --- + + - it: jobs-manager gets EGRESS_PROXY_URL only when routeWorkloads is enabled + template: templates/jobs-manager-deployment.yaml + documentIndex: 0 + set: + egressProxy: + routeWorkloads: true + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: EGRESS_PROXY_URL + value: "http://egress-proxy-service:3128" + + - it: jobs-manager does NOT get EGRESS_PROXY_URL by default (routeWorkloads false) + template: templates/jobs-manager-deployment.yaml + documentIndex: 0 + asserts: + - notContains: + path: spec.template.spec.containers[0].env + content: + name: EGRESS_PROXY_URL + value: "http://egress-proxy-service:3128" + + - it: training netpol permits egress to the gateway and keeps a stable training podSelector + template: templates/network-policy-training.yaml + asserts: + # auto-upgrade safety: the lockdown netpol must select ONLY training pods, + # never the auto-upgrade / image-refresh cronjobs. + - equal: + path: spec.podSelector.matchLabels + value: + tracebloc.io/workload: training + - contains: + path: spec.egress + content: + to: + - podSelector: + matchLabels: + app: egress-proxy + ports: + - port: 3128 + protocol: TCP + + # --- Step 3: the lockdown — drop the external 0.0.0.0/0:443 rule --- + + - it: keeps the external 443 egress rule by default (allowExternalHttps true) + template: templates/network-policy-training.yaml + asserts: + - contains: + path: spec.egress + content: + to: + - ipBlock: + cidr: 0.0.0.0/0 + except: + - 10.0.0.0/8 + - 172.16.0.0/12 + - 192.168.0.0/16 + ports: + - port: 443 + protocol: TCP + + - it: drops the external 443 rule but keeps the gateway path when allowExternalHttps is false + template: templates/network-policy-training.yaml + set: + networkPolicy: + training: + allowExternalHttps: false + asserts: + - notContains: + path: spec.egress + content: + to: + - ipBlock: + cidr: 0.0.0.0/0 + except: + - 10.0.0.0/8 + - 172.16.0.0/12 + - 192.168.0.0/16 + ports: + - port: 443 + protocol: TCP + - contains: + path: spec.egress + content: + to: + - podSelector: + matchLabels: + app: egress-proxy + ports: + - port: 3128 + protocol: TCP diff --git a/client/values.schema.json b/client/values.schema.json index c5a16d3..2eb42dc 100644 --- a/client/values.schema.json +++ b/client/values.schema.json @@ -221,6 +221,11 @@ "default": true, "description": "Create the training-egress NetworkPolicy. Set false on clusters without an enforcing CNI." }, + "allowExternalHttps": { + "type": "boolean", + "default": true, + "description": "When false, drop the 0.0.0.0/0:443 egress rule so training pods reach only DNS, MySQL, requests-proxy and the egress gateway (SECURITY §8.2 / client-runtime#102). Default true keeps existing behaviour; flip per-fleet after verifying the egress gateway works (G2)." + }, "dnsNamespace": { "type": "string", "default": "kube-system", @@ -256,6 +261,49 @@ } } }, + "egressProxy": { + "type": "object", + "description": "In-cluster squid egress gateway (SECURITY §8.2 / client-runtime#102). Forward proxy that permits HTTPS CONNECT only to the allowlist, so a locked-down training pod can reach the backend + App Insights and nothing else.", + "properties": { + "enabled": { "type": "boolean", "default": true }, + "routeWorkloads": { "type": "boolean", "default": false, "description": "Route training-pod outbound HTTPS through the gateway (jobs-manager injects HTTPS_PROXY). Default false — enable per-fleet, verify a run, then drop the direct egress rule (networkPolicy.training.allowExternalHttps=false)." }, + "port": { "type": "integer", "minimum": 1, "maximum": 65535, "default": 3128 }, + "runAsUser": { "type": "integer", "minimum": 1 }, + "image": { + "type": "object", + "properties": { + "registry": { "type": "string" }, + "repository": { "type": "string", "minLength": 1 }, + "tag": { "type": "string", "not": { "const": "latest" } }, + "digest": { "type": "string", "pattern": "^(sha256:[a-f0-9]{64})?$" } + } + }, + "allowlist": { + "type": "array", + "description": "FQDNs the gateway permits HTTPS CONNECT to (squid dstdomain syntax: leading dot = subdomain match, bare host = exact).", + "items": { "type": "string", "minLength": 1 } + }, + "resources": { + "type": "object", + "properties": { + "requests": { + "type": "object", + "properties": { + "cpu": { "type": "string", "pattern": "^[0-9]+m?$" }, + "memory": { "type": "string", "pattern": "^[0-9]+(Ki|Mi|Gi|Ti)$" } + } + }, + "limits": { + "type": "object", + "properties": { + "cpu": { "type": "string", "pattern": "^[0-9]+m?$" }, + "memory": { "type": "string", "pattern": "^[0-9]+(Ki|Mi|Gi|Ti)$" } + } + } + } + } + } + }, "images": { "type": "object", "description": "Container image pinning. Prefer digest over tag for immutability.", diff --git a/client/values.yaml b/client/values.yaml index 8115d62..9565b1f 100644 --- a/client/values.yaml +++ b/client/values.yaml @@ -168,6 +168,14 @@ namespace: networkPolicy: training: enabled: true + # Egress lockdown (SECURITY §8.2 / client-runtime#102). When false, the + # training NetworkPolicy DROPS the 0.0.0.0/0:443 rule, so training pods can + # reach only DNS, in-cluster MySQL, the requests-proxy, and the egress + # gateway. Default true keeps the fleet unchanged; flip OFF per-fleet AFTER + # verifying the egress gateway works on that cluster (G2). The template + # nil-guards this key, so a `helm upgrade --reuse-values` from a release + # predating it keeps the old behaviour (rule present). + allowExternalHttps: true dnsNamespace: kube-system # CoreDNS pod selector — varies per platform. Override in ci/-values.yaml. # When empty, the template falls back to {k8s-app: kube-dns}, which works @@ -184,6 +192,44 @@ networkPolicy: - "172.16.0.0/12" - "192.168.0.0/16" +# -- Egress gateway (squid) — SECURITY §8.2 / client-runtime#102. +# In-cluster forward proxy that lets a locked-down training pod reach an FQDN +# allowlist (backend + App Insights) and nothing else. Labelled app=egress-proxy +# so the training NetworkPolicy never selects it (it keeps its own egress). +egressProxy: + enabled: true + # Route training-pod outbound HTTPS through the gateway (Step 1 of #102). Default + # FALSE so the gateway ships inert; flip per-fleet to true, verify a real training + # run, THEN set networkPolicy.training.allowExternalHttps=false to drop direct egress. + routeWorkloads: false + # squid image, pinned by multi-arch (amd64+arm64) index digest — tracebloc pins + # all images by digest. `tag` stays for readability; the digest is authoritative. + # ubuntu/squid:6.6-24.04_beta (Ubuntu 24.04 LTS base), resolved 2026-06-10. + image: + registry: docker.io + repository: ubuntu/squid + tag: "6.6-24.04_beta" + digest: "sha256:6a097f68bae708cedbabd6188d68c7e2e7a38cedd05a176e1cc0ba29e3bbe029" + # squid's non-root uid (Ubuntu `proxy`). Image-specific — verify if you swap image. + runAsUser: 13 + port: 3128 + # FQDNs the gateway permits HTTPS CONNECT to. squid dstdomain syntax: a leading + # dot matches subdomains; a bare host is exact. Fails CLOSED when empty. + allowlist: + - dev-api.tracebloc.io + - stg-api.tracebloc.io + - api.tracebloc.io + - xray-backend.azurewebsites.net + - .in.applicationinsights.azure.com + - dc.services.visualstudio.com + resources: + requests: + cpu: "50m" + memory: "64Mi" + limits: + cpu: "500m" + memory: "256Mi" + # -- Container image pinning. # For each image, set `digest` to a sha256 (e.g. "sha256:abc123...") to pin # the image by content hash. Digest pinning is strongly preferred: tags are diff --git a/docs/SECURITY.md b/docs/SECURITY.md index 5f70321..45d3732 100644 --- a/docs/SECURITY.md +++ b/docs/SECURITY.md @@ -147,9 +147,10 @@ spec: **What this still allows:** - DNS lookups (needed to resolve backend + Azure endpoints) -- Outbound HTTPS/443 to the public internet (needed today for the training container to reach the tracebloc backend and Azure Service Bus; see §8.2) +- In-cluster egress to MySQL (3306), the requests-proxy (8888), and the egress gateway (3128) +- Outbound HTTPS/443 to the public internet — **only while `networkPolicy.training.allowExternalHttps: true` (the current default).** Set it to `false` and this rule is dropped, so training pods reach external services only through the in-cluster egress gateway (see §8.2). -**Configuration:** `networkPolicy.training.enabled: true` (the default). +**Configuration:** `networkPolicy.training.enabled: true` (the default). Egress lockdown: `networkPolicy.training.allowExternalHttps` + `egressProxy.*` (see §8.2). ### 4.3 Kubernetes API access (G3) @@ -408,13 +409,20 @@ Known gaps between the current state and a fully-hardened setup, with the owner **Mitigation plan:** backend endpoint that mints short-TTL, entity-scoped, send-only SAS tokens per experiment. Backend team owns the design and implementation. -**Interim mitigation:** the `NetworkPolicy` in §4.2 still allows outbound HTTPS, so a training pod can reach Azure Service Bus directly. The only way to hard-block forgery before backend support lands is to deny external egress entirely — not currently possible because training pods legitimately call the backend + App Insights + Service Bus. See §8.2. +**Interim mitigation:** with the §8.2 egress lockdown enabled (`networkPolicy.training.allowExternalHttps: false`), a training pod can no longer reach Azure Service Bus directly — SB traffic goes through the in-cluster requests-proxy (which holds the connection strings), and the conn-strings are no longer injected into the pod. Until a fleet enables the lockdown the NetworkPolicy still allows direct outbound HTTPS. The scoped/short-TTL SAS-token plan above remains the durable fix. See §8.2. -### 8.2 Training pods still have outbound HTTPS (G2) — **platform team** +### 8.2 Training-pod outbound HTTPS (G2) — **mechanism shipped (1.7.0), gated rollout** -The NetworkPolicy blocks in-cluster traffic and non-443 egress but must allow outbound HTTPS to let training pods function (backend API, Azure Service Bus, App Insights). A malicious pod can still `requests.post()` to an arbitrary endpoint. +By default the NetworkPolicy still allows outbound HTTPS/443 so training pods can reach the backend, Azure Service Bus, and App Insights — so a malicious pod can still `requests.post()` to an arbitrary endpoint until the lockdown is enabled. -**Final fix:** route all training-pod ↔ tracebloc communication through the jobs-manager sidecar, so training pods egress only to a cluster-internal IP and hold no external-facing credentials. Medium-size architectural change; not scheduled for this quarter. +**Mechanism (chart 1.7.0, client-runtime#102):** an in-cluster **egress gateway** (`egressProxy` — a squid forward proxy) permits HTTPS CONNECT only to an FQDN allowlist (backend + App Insights) and chains to a corporate proxy via `cache_peer`. With routing on, jobs-manager injects `HTTPS_PROXY=egress-proxy-service:3128` into each training pod (and drops the raw `HTTP_PROXY_HOST`), so backend + App-Insights traffic flows through the gateway; Service Bus already goes via the requests-proxy. The pod then needs no direct internet, and the external-443 rule can be dropped. + +**Rollout (per fleet, progressive — each step reversible):** +1. Upgrade to ≥ 1.7.0 — the gateway deploys, inert (`egressProxy.routeWorkloads: false`). +2. Set `egressProxy.routeWorkloads: true`; verify a training run completes via the gateway. +3. Set `networkPolicy.training.allowExternalHttps: false` to drop the external-443 rule, and verify **G2** (a training pod cannot reach an arbitrary external host). Requires a NetworkPolicy-enforcing CNI (§4.2). + +**Residual:** the pod still holds `BACKEND_TOKEN` (it authenticates to the backend through the gateway). Scoping / short-TTL of that token is tracked under §8.1. ### 8.3 Backend tokens never expire — **backend team** diff --git a/scripts/tests/e2e-auto-upgrade.sh b/scripts/tests/e2e-auto-upgrade.sh new file mode 100755 index 0000000..47433e6 --- /dev/null +++ b/scripts/tests/e2e-auto-upgrade.sh @@ -0,0 +1,147 @@ +#!/usr/bin/env bash +# ============================================================================= +# e2e-auto-upgrade.sh — fleet auto-upgrade non-regression gate +# ----------------------------------------------------------------------------- +# The fleet self-upgrades hourly via auto-upgrade-cronjob.yaml: +# helm upgrade tracebloc/client --version --reset-then-reuse-values +# and operators habitually run `helm upgrade --reuse-values` by hand. Both +# replay OLD stored values against the NEW chart — the failure mode that has +# repeatedly bitten this chart (nil-pointer templating on keys the stored +# values predate; see requests_proxy_test.yaml / resource_monitor_test.yaml). +# +# This gate installs the LAST PUBLISHED chart from gh-pages on a real k3d +# cluster, then upgrades to the LOCAL working-tree chart through both flag +# paths and asserts the contract that keeps the fleet safe: +# 1. `--reuse-values` -> upgrade succeeds (nil-guards hold) and the +# egress lockdown does NOT engage by accident. +# 2. `--reset-then-reuse-values` -> upgrade succeeds, new defaults flow in +# (egress gateway deploys, inert), and +# out-of-band image-refresh annotations survive. +# 3. flip the #102 lockdown flags -> rule 2 drops, jobs-manager routes pods +# at the gateway. +# 4. the next plain auto-upgrade -> the operator's flip PERSISTS. +# +# Pods are NEVER waited on: the published images need real credentials to go +# healthy, and the regression class this guards lives entirely in Helm +# templating / values semantics. No secrets; stock GitHub runners. +# +# Usage: bash scripts/tests/e2e-auto-upgrade.sh +# ============================================================================= +set -euo pipefail + +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LIB="$HERE/../lib" +CHART_DIR="$HERE/../../client" + +# Isolated cluster + release so we never touch a real 'tracebloc' install; opt +# out of autostart so we don't reconfigure docker.service on the host. +export USER="${USER:-$(id -un)}" +export CLUSTER_NAME="${CLUSTER_NAME:-tbupg}" +export TRACEBLOC_NO_AUTOSTART=1 +NS="tbupg" +REPO_NAME="tracebloc" +REPO_URL="https://tracebloc.github.io/client" + +# shellcheck source=/dev/null +source "$LIB/common.sh" +# shellcheck source=/dev/null +source "$LIB/setup-linux.sh" +# shellcheck source=/dev/null +source "$LIB/cluster.sh" +# shellcheck source=/dev/null +source "$LIB/preflight.sh" # provides _pf_recheck_runtime_mem (called by create_cluster) + +cleanup() { k3d cluster delete "$CLUSTER_NAME" >/dev/null 2>&1 || true; } +trap cleanup EXIT + +fail() { echo "FAIL: $*" >&2; exit 1; } + +# --- assertion helpers (read live cluster state, not helm output) ----------- +netpol_has_external_443() { + kubectl get networkpolicy "${NS}-training-egress" -n "$NS" -o yaml \ + | grep -q 'cidr: 0.0.0.0/0' +} + +jm_deploy() { + kubectl get deploy -n "$NS" -o name | grep -m1 'jobs-manager' +} + +jm_egress_proxy_url() { + kubectl get -n "$NS" "$(jm_deploy)" \ + -o jsonpath='{.spec.template.spec.containers[0].env[?(@.name=="EGRESS_PROXY_URL")].value}' +} + +echo "═══════════════════════════════════════════════════════════════════════" +echo " E2E auto-upgrade gate arch: $(uname -m) kernel: $(uname -r)" +echo "═══════════════════════════════════════════════════════════════════════" + +has docker || error "Docker is not available on this host." +umask 022 +install_kubectl +install_k3d +install_helm + +echo "── create_cluster() — the installer's real cluster-bring-up path ──" +create_cluster +kubectl wait --for=condition=Ready nodes --all --timeout=180s + +echo "── install the LAST PUBLISHED chart (what the fleet runs today) ──" +helm repo add "$REPO_NAME" "$REPO_URL" >/dev/null +helm repo update >/dev/null +# Same idiom the auto-upgrade cronjob uses to pick the newest version. +PREV="$(helm search repo "${REPO_NAME}/client" -o yaml \ + | awk '/^[[:space:]]*version:/ {print $2; exit}')" +[ -n "$PREV" ] || fail "could not resolve the latest published chart version from $REPO_URL" +LOCAL_VERSION="$(awk '/^version:/ {print $2; exit}' "$CHART_DIR/Chart.yaml")" +echo " published: $PREV local working tree: $LOCAL_VERSION" + +helm install "$NS" "${REPO_NAME}/client" --version "$PREV" \ + --namespace "$NS" --create-namespace \ + --set clientId=ci-e2e-upgrade \ + --set clientPassword=ci-e2e-upgrade \ + --set storageClass.provisioner=rancher.io/local-path + +echo "── simulate an image-refresh-managed annotation (must survive upgrades) ──" +kubectl annotate -n "$NS" "$(jm_deploy)" \ + "tracebloc.io/last-refreshed-jobs-manager-digest=sha256:e2e-sentinel" --overwrite + +echo "── path 1: manual-operator habit — helm upgrade --reuse-values ──" +# Old stored values replayed against the new chart: every new key is absent. +# The nil-guards must hold, and the lockdown must NOT engage by accident. +helm upgrade "$NS" "$CHART_DIR" --namespace "$NS" --reuse-values +netpol_has_external_443 || fail "--reuse-values upgrade dropped the external 443 rule (lockdown engaged by accident)" +[ -z "$(jm_egress_proxy_url)" ] || fail "--reuse-values upgrade injected EGRESS_PROXY_URL (routing engaged by accident)" +echo " OK: upgrade succeeded, lockdown stayed off" + +echo "── path 2: the fleet auto-upgrade — helm upgrade --reset-then-reuse-values ──" +helm upgrade "$NS" "$CHART_DIR" --namespace "$NS" --reset-then-reuse-values +netpol_has_external_443 || fail "auto-upgrade dropped the external 443 rule (allowExternalHttps default did not flow)" +[ -z "$(jm_egress_proxy_url)" ] || fail "auto-upgrade injected EGRESS_PROXY_URL (routeWorkloads should default false)" +kubectl get deploy "${NS}-egress-proxy" -n "$NS" >/dev/null \ + || fail "auto-upgrade did not deploy the egress gateway (new defaults did not flow)" +ANNOT="$(kubectl get -n "$NS" "$(jm_deploy)" \ + -o jsonpath='{.metadata.annotations.tracebloc\.io/last-refreshed-jobs-manager-digest}')" +[ "$ANNOT" = "sha256:e2e-sentinel" ] || fail "image-refresh annotation was clobbered by the upgrade" +DEPLOYED="$(helm list -n "$NS" --filter "^${NS}\$" -o yaml \ + | awk '/^[[:space:]]*chart:/ {print $2; exit}')" +[ "$DEPLOYED" = "client-${LOCAL_VERSION}" ] || fail "deployed chart is $DEPLOYED, expected client-${LOCAL_VERSION}" +echo " OK: new defaults flowed in (gateway deployed, inert), annotations survived" + +echo "── path 3: operator flips the #102 lockdown ──" +helm upgrade "$NS" "$CHART_DIR" --namespace "$NS" --reset-then-reuse-values \ + --set egressProxy.routeWorkloads=true \ + --set networkPolicy.training.allowExternalHttps=false +netpol_has_external_443 && fail "lockdown flip did NOT drop the external 443 rule" +[ "$(jm_egress_proxy_url)" = "http://egress-proxy-service:3128" ] \ + || fail "lockdown flip did not point jobs-manager at the egress gateway" +echo " OK: rule 2 dropped, training pods route via the gateway" + +echo "── path 4: the NEXT hourly auto-upgrade must preserve the flip ──" +helm upgrade "$NS" "$CHART_DIR" --namespace "$NS" --reset-then-reuse-values +netpol_has_external_443 && fail "auto-upgrade after the flip re-opened the external 443 rule (override lost)" +[ "$(jm_egress_proxy_url)" = "http://egress-proxy-service:3128" ] \ + || fail "auto-upgrade after the flip lost EGRESS_PROXY_URL (override lost)" +echo " OK: the operator's lockdown persists across auto-upgrades" + +echo "" +echo "E2E PASS: ${PREV} -> ${LOCAL_VERSION} upgrades safe on both flag paths; #102 flip engages and persists."