From 54bac391f63c84a0d4037c1ef7cd965b5daccf67 Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Mon, 1 Jun 2026 16:17:47 -0400 Subject: [PATCH 01/13] enable websockets (#4) Signed-off-by: Peter Jausovec Co-authored-by: Peter Jausovec --- cmd/atenet/internal/app/router/xds.go | 8 ++++++++ cmd/atenet/internal/app/router/xds_test.go | 14 ++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/cmd/atenet/internal/app/router/xds.go b/cmd/atenet/internal/app/router/xds.go index 964fc5e92..6b248f53b 100644 --- a/cmd/atenet/internal/app/router/xds.go +++ b/cmd/atenet/internal/app/router/xds.go @@ -62,6 +62,7 @@ const ( IngressHTTPSListener = "ingress_https_listener" RouteName = "substrate_routes" ClusterName = "ate-cluster" + websocketUpgradeType = "websocket" ) // XdsServer implements an aggregated discovery service server for dynamic Envoy router nodes. @@ -287,6 +288,9 @@ func (x *XdsServer) buildRoutes() *routev3.RouteConfiguration { Cluster: "dynamic_forward_proxy_cluster", }, Timeout: durationpb.New(10 * time.Second), + UpgradeConfigs: []*routev3.RouteAction_UpgradeConfig{ + {UpgradeType: websocketUpgradeType}, + }, }, }, }, @@ -334,6 +338,10 @@ func (x *XdsServer) buildHcm(statPrefix string) *anypb.Any { hcm, _ := anypb.New(&hcmv3.HttpConnectionManager{ StatPrefix: statPrefix, GenerateRequestId: &wrapperspb.BoolValue{Value: true}, + UpgradeConfigs: []*hcmv3.HttpConnectionManager_UpgradeConfig{ + {UpgradeType: websocketUpgradeType}, + }, + StreamIdleTimeout: durationpb.New(0), AccessLog: []*accesslogv3.AccessLog{ { Name: "envoy.access_loggers.stdout", diff --git a/cmd/atenet/internal/app/router/xds_test.go b/cmd/atenet/internal/app/router/xds_test.go index 92e347648..f6548f41a 100644 --- a/cmd/atenet/internal/app/router/xds_test.go +++ b/cmd/atenet/internal/app/router/xds_test.go @@ -24,6 +24,7 @@ import ( clusterv3 "github.com/envoyproxy/go-control-plane/envoy/config/cluster/v3" listenerv3 "github.com/envoyproxy/go-control-plane/envoy/config/listener/v3" routev3 "github.com/envoyproxy/go-control-plane/envoy/config/route/v3" + hcmv3 "github.com/envoyproxy/go-control-plane/envoy/extensions/filters/network/http_connection_manager/v3" cachev3 "github.com/envoyproxy/go-control-plane/pkg/cache/v3" resourcev3 "github.com/envoyproxy/go-control-plane/pkg/resource/v3" ) @@ -116,6 +117,10 @@ func TestXdsServer_UpdateSnapshot(t *testing.T) { if fallbackRoute.GetMatch().GetPrefix() != "/" { t.Errorf("Expected path mapping prefix '/', got '%s'", fallbackRoute.GetMatch().GetPrefix()) } + routeAction := fallbackRoute.GetRoute() + if len(routeAction.GetUpgradeConfigs()) != 1 || routeAction.GetUpgradeConfigs()[0].GetUpgradeType() != websocketUpgradeType { + t.Errorf("Expected route websocket upgrade config, got %+v", routeAction.GetUpgradeConfigs()) + } } // Verify listeners generated @@ -135,6 +140,15 @@ func TestXdsServer_UpdateSnapshot(t *testing.T) { if sa.GetAddress() != "0.0.0.0" { t.Errorf("Expected address '0.0.0.0', got %s", sa.GetAddress()) } + + hcmAny := l.GetFilterChains()[0].GetFilters()[0].GetTypedConfig() + hcm := &hcmv3.HttpConnectionManager{} + if err := hcmAny.UnmarshalTo(hcm); err != nil { + t.Fatalf("Failed to unmarshal HCM: %v", err) + } + if len(hcm.GetUpgradeConfigs()) != 1 || hcm.GetUpgradeConfigs()[0].GetUpgradeType() != websocketUpgradeType { + t.Errorf("Expected HCM websocket upgrade config, got %+v", hcm.GetUpgradeConfigs()) + } } } From 298d621bb1cd7930d67f72e165e628e017db494a Mon Sep 17 00:00:00 2001 From: Yuval Kohavi Date: Fri, 29 May 2026 16:25:54 -0400 Subject: [PATCH 02/13] feat: allow running with vanilla k8s - add a helm chart - allow JWT auth instead of mTLS --- Makefile | 11 + README.md | 21 +- charts/substrate/Chart.yaml | 13 + charts/substrate/README.md | 86 +++++++ charts/substrate/templates/NOTES.txt | 20 ++ charts/substrate/templates/_helpers.tpl | 38 +++ .../substrate/templates/ate-api-server.yaml | 194 ++++++++++++++ .../substrate/templates/ate-controller.yaml | 86 +++++++ charts/substrate/templates/atelet.yaml | 87 +++++++ charts/substrate/templates/atenet-dns.yaml | 161 ++++++++++++ charts/substrate/templates/atenet-router.yaml | 235 +++++++++++++++++ charts/substrate/templates/namespace.yaml | 7 + .../templates/pod-certificate-controller.yaml | 184 ++++++++++++++ .../substrate/templates}/role.yaml | 2 +- charts/substrate/templates/valkey.yaml | 237 ++++++++++++++++++ charts/substrate/values.yaml | 90 +++++++ cmd/ateapi/main.go | 41 ++- cmd/atecontroller/main.go | 30 ++- cmd/atenet/internal/app/router/provider.go | 6 +- cmd/atenet/internal/app/router/router.go | 34 ++- hack/create-kind-cluster.sh | 13 +- hack/gen-rbac.sh | 22 ++ hack/install-ate-kind-jwt.sh | 112 +++++++++ hack/install-ate.sh | 9 +- hack/render-manifests.sh | 102 ++++++++ hack/values-kind-jwt.yaml | 36 +++ internal/ateapiauth/client.go | 110 ++++++++ internal/ateapiauth/server.go | 152 +++++++++++ internal/ateapiauth/server_test.go | 103 ++++++++ internal/controllers/gen.go | 2 +- manifests/ate-install/ate-api-server.yaml | 121 ++++----- manifests/ate-install/ate-controller.yaml | 12 +- manifests/ate-install/atelet.yaml | 8 +- manifests/ate-install/atenet-dns.yaml | 64 ++--- manifests/ate-install/atenet-router.yaml | 95 +++---- manifests/ate-install/kind/kustomization.yaml | 1 + ...e-system-namespace.yaml => namespace.yaml} | 6 +- .../pod-certificate-controller.yaml | 3 + manifests/ate-install/role.yaml | 98 ++++++++ manifests/ate-install/valkey.yaml | 4 +- 40 files changed, 2461 insertions(+), 195 deletions(-) create mode 100644 charts/substrate/Chart.yaml create mode 100644 charts/substrate/README.md create mode 100644 charts/substrate/templates/NOTES.txt create mode 100644 charts/substrate/templates/_helpers.tpl create mode 100644 charts/substrate/templates/ate-api-server.yaml create mode 100644 charts/substrate/templates/ate-controller.yaml create mode 100644 charts/substrate/templates/atelet.yaml create mode 100644 charts/substrate/templates/atenet-dns.yaml create mode 100644 charts/substrate/templates/atenet-router.yaml create mode 100644 charts/substrate/templates/namespace.yaml create mode 100644 charts/substrate/templates/pod-certificate-controller.yaml rename {manifests/ate-install/generated => charts/substrate/templates}/role.yaml (95%) create mode 100644 charts/substrate/templates/valkey.yaml create mode 100644 charts/substrate/values.yaml create mode 100755 hack/gen-rbac.sh create mode 100755 hack/install-ate-kind-jwt.sh create mode 100755 hack/render-manifests.sh create mode 100644 hack/values-kind-jwt.yaml create mode 100644 internal/ateapiauth/client.go create mode 100644 internal/ateapiauth/server.go create mode 100644 internal/ateapiauth/server_test.go rename manifests/ate-install/{ate-system-namespace.yaml => namespace.yaml} (80%) create mode 100644 manifests/ate-install/role.yaml diff --git a/Makefile b/Makefile index c6b70cc5e..c6fed696c 100644 --- a/Makefile +++ b/Makefile @@ -92,3 +92,14 @@ verify: test .PHONY: clean clean: rm -rf $(BINDIR) + +# Render the substrate Helm chart into manifests/ate-install/ (mTLS mode, +# the historical default install). Run this whenever charts/substrate/ changes. +.PHONY: helm-template +helm-template: + @./hack/render-manifests.sh + +# Verify that manifests/ate-install/ matches the chart output. Used in CI. +.PHONY: verify-helm-template +verify-helm-template: + @./hack/render-manifests.sh --check diff --git a/README.md b/README.md index fb63afb54..6bb2de64c 100644 --- a/README.md +++ b/README.md @@ -102,7 +102,7 @@ To quickly set up the complete environment: 2. Run the following steps: ```shell -# create cluster and local registry +# create cluster and local registry (enables podcert feature gates for mTLS) hack/create-kind-cluster.sh # install ate, valkey, rustfs @@ -126,6 +126,25 @@ kubectl port-forward -n ate-system svc/atenet-router 8000:80 curl -X POST -H "Host: my-counter-1.actors.resources.substrate.ate.dev" -i http://localhost:8000/ ``` +#### JWT mode (no feature gates) + +For clusters where you can't enable the `ClusterTrustBundle` / +`PodCertificateRequest` feature gates (most managed Kubernetes), use the +JWT install path. Authentication is via projected ServiceAccount tokens +verified against the cluster's OIDC issuer; server certs come from a +self-signed pair bootstrapped by the install script. + +```shell +# create cluster WITHOUT podcert feature gates +KIND_ENABLE_PODCERT=false hack/create-kind-cluster.sh + +# install ate via Helm in JWT mode (auto-bootstraps Secret/ConfigMap) +hack/install-ate-kind-jwt.sh + +# the demo + kubectl-ate + port-forward steps from the mTLS Quickstart +# above work identically from here. +``` + ### GKE Quickstart (Development) 1. Create and configure your environment file: diff --git a/charts/substrate/Chart.yaml b/charts/substrate/Chart.yaml new file mode 100644 index 000000000..70961fba7 --- /dev/null +++ b/charts/substrate/Chart.yaml @@ -0,0 +1,13 @@ +apiVersion: v2 +name: substrate +description: Agent Substrate — actor runtime, control plane, and data-plane router. +type: application +version: 0.1.0 +appVersion: "0.1.0" +home: https://github.com/agent-substrate/substrate +sources: +- https://github.com/agent-substrate/substrate +keywords: +- agent +- actor +- substrate diff --git a/charts/substrate/README.md b/charts/substrate/README.md new file mode 100644 index 000000000..4ca8d08ff --- /dev/null +++ b/charts/substrate/README.md @@ -0,0 +1,86 @@ +# substrate + +Helm chart for installing Agent Substrate. + +## Install modes + +| Mode | Default? | Cluster requirements | Trade-off | +|------|----------|----------------------|-----------| +| `mtls` | yes | feature gates `ClusterTrustBundle`, `ClusterTrustBundleProjection`, `PodCertificateRequest` + `certificates.k8s.io/v1beta1` API | Full in-cluster mTLS via the bundled `podcertcontroller`. | +| `jwt` | | none beyond stock K8s | Server certs come from a user-provided Secret; clients authenticate via projected ServiceAccount tokens. Valkey runs plaintext intra-cluster. | + +```bash +# mTLS mode (default) +helm upgrade --install substrate ./charts/substrate + +# JWT mode (no off-by-default feature gates) +helm upgrade --install substrate ./charts/substrate \ + --set auth.mode=jwt \ + --set auth.jwt.issuer=https://kubernetes.default.svc.cluster.local +``` + +## JWT-mode prerequisites + +You provide two resources out-of-band: + +1. `Secret/ateapi-tls` (type `kubernetes.io/tls`) in the release namespace. + This is the server cert for `ateapi` and the Envoy data-plane listener. +2. `ConfigMap/ateapi-ca` with key `ca.crt` in the release namespace. + This is the CA bundle clients use to verify the server. + +Bootstrap snippet using `openssl`: + +```bash +NS=ate-system +kubectl create ns "$NS" --dry-run=client -o yaml | kubectl apply -f - + +# 1. Self-signed CA. +openssl req -x509 -newkey rsa:2048 -nodes -days 3650 \ + -subj "/CN=ateapi-ca" \ + -keyout ca.key -out ca.crt + +# 2. Server key + CSR + signed cert. +openssl req -newkey rsa:2048 -nodes \ + -subj "/CN=api.ate-system.svc" \ + -keyout server.key -out server.csr +cat > server.ext < /run/ateapi-tls/credential-bundle.pem + volumeMounts: + - { name: ateapi-tls-src, mountPath: /run/ateapi-tls-src, readOnly: true } + - { name: ateapi-tls, mountPath: /run/ateapi-tls } +{{- end }} + containers: + - name: ate-api-server + image: {{ .Values.images.ateapi }} + args: + - "--grpc-listen-addr=0.0.0.0:443" +{{- if eq .Values.auth.mode "mtls" }} + - "--grpc-server-cred-bundle=/run/servicedns.podcert.ate.dev/credential-bundle.pem" + - "--redis-cluster-address=@env" + - "--redis-ca-certs=/etc/valkey-ca/ca.crt" + - "--redis-use-iam-auth=@env" + - "--redis-tls-server-name=@env" + - "--redis-client-cert=@env" + - "--client-jwt-issuer=@env" + - "--client-jwt-audience={{ .Values.auth.jwt.audience }}" + - "--session-id-jwt-pool=/run/session-id-jwt-pool/pool.json" + - "--session-id-ca-pool=/run/session-id-ca-pool/pool.json" + - "--workerpool-ca-certs=/run/workerpool-ca-certs/trust-bundle.pem" +{{- else }} + - "--grpc-server-cred-bundle=/run/ateapi-tls/credential-bundle.pem" + - "--auth-mode=jwt" + - "--redis-cluster-address=@env" + - "--redis-no-tls=true" + - "--redis-use-iam-auth=@env" + - "--client-jwt-issuer={{ .Values.auth.jwt.issuer }}" + - "--client-jwt-audience={{ .Values.auth.jwt.audience }}" + - "--session-id-jwt-pool=/run/session-id-jwt-pool/pool.json" + - "--session-id-ca-pool=/run/session-id-ca-pool/pool.json" +{{- end }} + env: + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: {{ .Values.otel.endpoint }} + envFrom: + - configMapRef: + name: {{ .Values.ateApiServerEnvVarsConfigMap }} + optional: true + volumeMounts: +{{- if eq .Values.auth.mode "mtls" }} + - { name: servicedns, mountPath: /run/servicedns.podcert.ate.dev } + - { name: session-id-jwt-pool, mountPath: /run/session-id-jwt-pool } + - { name: valkey-ca-certs, mountPath: /etc/valkey-ca, readOnly: true } + - { name: session-id-ca-pool, mountPath: /run/session-id-ca-pool, readOnly: true } + - { name: workerpool-ca-certs, mountPath: /run/workerpool-ca-certs, readOnly: true } +{{- else }} + - { name: ateapi-tls, mountPath: /run/ateapi-tls, readOnly: true } + - { name: session-id-jwt-pool, mountPath: /run/session-id-jwt-pool } + - { name: session-id-ca-pool, mountPath: /run/session-id-ca-pool, readOnly: true } +{{- end }} + ports: + - containerPort: 443 + - name: prometheus + containerPort: 9090 + readinessProbe: + httpGet: + path: /readyz + port: 9090 + initialDelaySeconds: 5 + periodSeconds: 2 + volumes: +{{- if eq .Values.auth.mode "mtls" }} + - name: servicedns + projected: + sources: + - podCertificate: + signerName: servicedns.podcert.ate.dev/identity + keyType: ECDSAP256 + credentialBundlePath: credential-bundle.pem + - name: session-id-jwt-pool + projected: + sources: + - secret: + name: session-id-jwt-pool + items: + - { key: pool, path: pool.json } + - name: valkey-ca-certs + projected: + sources: + - secret: + name: valkey-ca-certs + items: + - { key: ca.crt, path: ca.crt } + - name: session-id-ca-pool + projected: + sources: + - secret: + name: session-id-ca-pool + items: + - { key: pool, path: pool.json } + - name: workerpool-ca-certs + projected: + sources: + - clusterTrustBundle: + signerName: podidentity.podcert.ate.dev/identity + labelSelector: + matchLabels: + podcert.ate.dev/canarying: live + path: trust-bundle.pem +{{- else }} + - name: ateapi-tls-src + secret: + secretName: {{ .Values.auth.jwt.serverCertSecret }} + - name: ateapi-tls + emptyDir: {} + - name: session-id-jwt-pool + projected: + sources: + - secret: + name: session-id-jwt-pool + items: + - { key: pool, path: pool.json } + - name: session-id-ca-pool + projected: + sources: + - secret: + name: session-id-ca-pool + items: + - { key: pool, path: pool.json } +{{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "substrate.fullname" (list "api" .) }} + namespace: {{ .Release.Namespace }} +spec: + type: ClusterIP + selector: + app: ate-api-server + ports: + - name: grpc + protocol: TCP + port: 443 + targetPort: 443 diff --git a/charts/substrate/templates/ate-controller.yaml b/charts/substrate/templates/ate-controller.yaml new file mode 100644 index 000000000..739744356 --- /dev/null +++ b/charts/substrate/templates/ate-controller.yaml @@ -0,0 +1,86 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "substrate.fullname" (list "ate-controller" .) }} + namespace: {{ .Release.Namespace }} + labels: + apps: ate-controller +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "substrate.fullname" (list "ate-controller" .) }} +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "ate-controller" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "substrate.fullname" (list "ate-controller" .) }} + apiGroup: rbac.authorization.k8s.io +--- +kind: Service +apiVersion: v1 +metadata: + name: {{ include "substrate.fullname" (list "ate-controller" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: ate-controller +spec: + selector: + app: ate-controller + ports: + - name: metrics + port: 8080 + targetPort: metrics + protocol: TCP +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: {{ include "substrate.fullname" (list "ate-controller" .) }} + namespace: {{ .Release.Namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: ate-controller + template: + metadata: + labels: + app: ate-controller + spec: + serviceAccountName: {{ include "substrate.fullname" (list "ate-controller" .) }} + containers: + - name: ate-controller + image: {{ .Values.images.atecontroller }} +{{- if eq .Values.auth.mode "jwt" }} + args: + - "--ateapi-auth=jwt" + - "--ateapi-ca-file=/run/ateapi-ca/ca.crt" + - "--ateapi-server-name={{ include "substrate.fullname" (list "api" .) }}.{{ .Release.Namespace }}.svc" + - "--ateapi-token-file=/var/run/secrets/tokens/ateapi/token" +{{- end }} + ports: + - name: metrics + containerPort: 8080 + protocol: TCP + - name: healthz + containerPort: 8081 + protocol: TCP +{{- if eq .Values.auth.mode "jwt" }} + volumeMounts: + - { name: ateapi-ca, mountPath: /run/ateapi-ca, readOnly: true } + - { name: ateapi-token, mountPath: /var/run/secrets/tokens/ateapi, readOnly: true } + volumes: + - name: ateapi-ca + configMap: + name: {{ .Values.auth.jwt.caBundleConfigMap }} + - name: ateapi-token + projected: + sources: + - serviceAccountToken: + audience: {{ .Values.auth.jwt.audience }} + expirationSeconds: 3600 + path: token +{{- end }} diff --git a/charts/substrate/templates/atelet.yaml b/charts/substrate/templates/atelet.yaml new file mode 100644 index 000000000..055c5887a --- /dev/null +++ b/charts/substrate/templates/atelet.yaml @@ -0,0 +1,87 @@ +# atelet — identical across auth modes (does not dial ateapi). +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "substrate.fullname" (list "atelet" .) }} + namespace: {{ .Release.Namespace }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "substrate.fullname" (list "atelet-role" .) }} +rules: +- apiGroups: [""] + resources: ["pods"] + verbs: ["get", "watch", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "substrate.fullname" (list "atelet-binding" .) }} +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "atelet" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "substrate.fullname" (list "atelet-role" .) }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "substrate.fullname" (list "atelet" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: atelet +spec: + selector: + matchLabels: + app: atelet + template: + metadata: + labels: + app: atelet + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" + spec: + serviceAccountName: {{ include "substrate.fullname" (list "atelet" .) }} + containers: + - name: atelet + image: {{ .Values.images.atelet }} + args: + - --gcp-auth-for-image-pulls={{ .Values.atelet.gcpAuthForImagePulls }} +{{- with .Values.atelet.extraArgs }} +{{ toYaml . | indent 8 }} +{{- end }} + securityContext: + privileged: true + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: {{ .Values.otel.endpoint }} + - name: ATE_STORAGE_BACKEND + value: {{ .Values.atelet.storageBackend | quote }} +{{- with .Values.atelet.extraEnv }} +{{ toYaml . | indent 8 }} +{{- end }} + ports: + - name: grpc + containerPort: 8085 + hostPort: 8085 + - name: prometheus + containerPort: 9090 + hostPort: 9090 + protocol: TCP + volumeMounts: + - name: run-ateom + mountPath: /var/lib/ateom-gvisor + volumes: + - name: run-ateom + hostPath: + path: /var/lib/ateom-gvisor + type: DirectoryOrCreate diff --git a/charts/substrate/templates/atenet-dns.yaml b/charts/substrate/templates/atenet-dns.yaml new file mode 100644 index 000000000..287bf9b6d --- /dev/null +++ b/charts/substrate/templates/atenet-dns.yaml @@ -0,0 +1,161 @@ +# atenet-dns — identical across auth modes (does not dial ateapi). +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: dns +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: {{ .Release.Namespace }} +rules: +- apiGroups: [""] + resources: ["services"] + verbs: ["get", "list", "watch"] +- apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "list", "watch", "create", "update", "patch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: {{ .Release.Namespace }} +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: Role + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: kube-system +rules: +- apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "list", "watch", "create", "update", "patch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: kube-system +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: Role + name: {{ include "substrate.fullname" (list "atenet-dns" .) }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "substrate.fullname" (list "dns" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: dns +spec: + replicas: 1 + selector: + matchLabels: + app: dns + template: + metadata: + labels: + app: dns + spec: + serviceAccountName: {{ include "substrate.fullname" (list "atenet-dns" .) }} + shareProcessNamespace: true + initContainers: + - name: init-dns + image: {{ .Values.images.busybox }} + command: ["sh", "-c"] + args: + - | + cat <<'EOF' > /etc/coredns/Corefile + .:53 { + errors + health :8080 + ready :8181 + reload + } + EOF + volumeMounts: + - name: dns-config-volume + mountPath: /etc/coredns + containers: + - name: coredns + image: {{ .Values.images.coredns }} + imagePullPolicy: IfNotPresent + args: [ "-conf", "/etc/coredns/Corefile" ] + volumeMounts: + - name: dns-config-volume + mountPath: /etc/coredns + ports: + - name: dns + containerPort: 53 + protocol: UDP + - name: dns-tcp + containerPort: 53 + protocol: TCP + livenessProbe: + httpGet: + path: /health + port: 8080 + scheme: HTTP + initialDelaySeconds: 10 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 5 + readinessProbe: + httpGet: + path: /ready + port: 8181 + scheme: HTTP + initialDelaySeconds: 5 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + - name: dns-controller + image: {{ .Values.images.atenet }} + args: + - "dns" + - "--log-level=debug" + - "--interval=10s" + - "--corefile-path=/etc/coredns/Corefile" + volumeMounts: + - name: dns-config-volume + mountPath: /etc/coredns + volumes: + - name: dns-config-volume + emptyDir: {} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "substrate.fullname" (list "dns" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: dns +spec: + selector: + app: dns + type: ClusterIP + ports: + - name: dns + port: 53 + protocol: UDP + - name: dns-tcp + port: 53 + protocol: TCP diff --git a/charts/substrate/templates/atenet-router.yaml b/charts/substrate/templates/atenet-router.yaml new file mode 100644 index 000000000..6caa2ee68 --- /dev/null +++ b/charts/substrate/templates/atenet-router.yaml @@ -0,0 +1,235 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "substrate.fullname" (list "atenet-router" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: atenet-router +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "substrate.fullname" (list "atenet-router" .) }} +rules: +- apiGroups: + - "ate.dev" + resources: + - actortemplates + verbs: + - get + - watch + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "substrate.fullname" (list "atenet-router" .) }} +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "atenet-router" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "substrate.fullname" (list "atenet-router" .) }} + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "substrate.fullname" (list "atenet-router-envoy-config" .) }} + namespace: {{ .Release.Namespace }} +data: + envoy.yaml: | + admin: + address: + socket_address: + address: 0.0.0.0 + port_value: 9901 + + node: + id: substrate-envoy-node + cluster: substrate-router-cluster + + dynamic_resources: + lds_config: + resource_api_version: V3 + ads: {} + cds_config: + resource_api_version: V3 + ads: {} + ads_config: + api_type: GRPC + transport_api_version: V3 + grpc_services: + - envoy_grpc: + cluster_name: xds_cluster + + static_resources: + clusters: + - name: xds_cluster + connect_timeout: 0.25s + type: STRICT_DNS + lb_policy: ROUND_ROBIN + typed_extension_protocol_options: + envoy.extensions.upstreams.http.v3.HttpProtocolOptions: + "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions + explicit_http_config: + http2_protocol_options: {} + load_assignment: + cluster_name: xds_cluster + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: 127.0.0.1 + port_value: 18000 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "substrate.fullname" (list "atenet-router" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: atenet-router +spec: + replicas: 1 + selector: + matchLabels: + app: atenet-router + template: + metadata: + labels: + app: atenet-router + spec: + serviceAccountName: {{ include "substrate.fullname" (list "atenet-router" .) }} +{{- if eq .Values.auth.mode "jwt" }} + initContainers: + - name: assemble-envoy-cred-bundle + image: {{ .Values.images.busybox }} + command: + - sh + - -c + - cat /run/ateapi-tls-src/tls.crt /run/ateapi-tls-src/tls.key > /run/envoy-tls/credential-bundle.pem + volumeMounts: + - name: ateapi-tls-src + mountPath: /run/ateapi-tls-src + readOnly: true + - name: envoy-tls + mountPath: /run/envoy-tls +{{- end }} + containers: + - name: atenet-router + image: {{ .Values.images.atenet }} + args: + - "router" + - "--standalone" + - "--namespace={{ .Release.Namespace }}" + - "--port-http=8080" + - "--port-xds=18000" + - "--port-extproc=50051" + - "--extproc-address=127.0.0.1" + - "--ateapi-address={{ include "substrate.fullname" (list "api" .) }}.{{ .Release.Namespace }}.svc:443" +{{- if eq .Values.auth.mode "jwt" }} + - "--ateapi-auth=jwt" + - "--ateapi-ca-file=/run/ateapi-ca/ca.crt" + - "--ateapi-server-name={{ include "substrate.fullname" (list "api" .) }}.{{ .Release.Namespace }}.svc" + - "--ateapi-token-file=/var/run/secrets/tokens/ateapi/token" +{{- end }} + - "--status-port=4040" + - "--port-https=8443" +{{- if eq .Values.auth.mode "mtls" }} + - "--envoy-cert-path=/run/servicedns.podcert.ate.dev/credential-bundle.pem" +{{- else }} + - "--envoy-cert-path=/run/envoy-tls/credential-bundle.pem" +{{- end }} + ports: + - name: xds + containerPort: 18000 + - name: extproc + containerPort: 50051 + - name: status + containerPort: 4040 +{{- if eq .Values.auth.mode "jwt" }} + volumeMounts: + - name: ateapi-ca + mountPath: /run/ateapi-ca + readOnly: true + - name: ateapi-token + mountPath: /var/run/secrets/tokens/ateapi + readOnly: true +{{- end }} + - name: envoy + image: {{ .Values.images.envoy }} + command: + - "/usr/local/bin/envoy" + - "-c" + - "/etc/envoy/envoy.yaml" + - "--component-log-level" + - "upstream:debug,router:debug,ext_proc:debug" + ports: + - name: http + containerPort: 8080 + - name: https + containerPort: 8443 + - name: admin + containerPort: 9901 + volumeMounts: + - name: envoy-config + mountPath: /etc/envoy +{{- if eq .Values.auth.mode "mtls" }} + - name: "servicedns" + mountPath: "/run/servicedns.podcert.ate.dev" +{{- else }} + - name: envoy-tls + mountPath: /run/envoy-tls + readOnly: true +{{- end }} + volumes: + - name: envoy-config + configMap: + name: {{ include "substrate.fullname" (list "atenet-router-envoy-config" .) }} +{{- if eq .Values.auth.mode "mtls" }} + - name: "servicedns" + projected: + sources: + - podCertificate: + signerName: servicedns.podcert.ate.dev/identity + keyType: ECDSAP256 + credentialBundlePath: credential-bundle.pem +{{- else }} + - name: ateapi-tls-src + secret: + secretName: {{ .Values.auth.jwt.serverCertSecret }} + - name: envoy-tls + emptyDir: {} + - name: ateapi-ca + configMap: + name: {{ .Values.auth.jwt.caBundleConfigMap }} + - name: ateapi-token + projected: + sources: + - serviceAccountToken: + audience: {{ .Values.auth.jwt.audience }} + expirationSeconds: 3600 + path: token +{{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "substrate.fullname" (list "atenet-router" .) }} + namespace: {{ .Release.Namespace }} +spec: + type: ClusterIP + selector: + app: atenet-router + ports: + - name: http + port: 80 + targetPort: 8080 + protocol: TCP + - name: https + port: 443 + targetPort: 8443 + protocol: TCP diff --git a/charts/substrate/templates/namespace.yaml b/charts/substrate/templates/namespace.yaml new file mode 100644 index 000000000..d63920621 --- /dev/null +++ b/charts/substrate/templates/namespace.yaml @@ -0,0 +1,7 @@ +{{- include "substrate.validateAuthMode" . -}} +{{- if .Values.createNamespace }} +apiVersion: v1 +kind: Namespace +metadata: + name: {{ .Release.Namespace }} +{{- end }} diff --git a/charts/substrate/templates/pod-certificate-controller.yaml b/charts/substrate/templates/pod-certificate-controller.yaml new file mode 100644 index 000000000..3654ec3b1 --- /dev/null +++ b/charts/substrate/templates/pod-certificate-controller.yaml @@ -0,0 +1,184 @@ +{{- if eq .Values.auth.mode "mtls" -}} +apiVersion: v1 +kind: Namespace +metadata: + name: podcertificate-controller-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "substrate.fullname" (list "podcert-ate-dev-signer" .) }} +rules: +# The service signer needs to be able to read services and pods. +- apiGroups: + - "" + resources: + - services + - pods + verbs: + - get + - list + - watch +- apiGroups: + - certificates.k8s.io + resources: + - podcertificaterequests + verbs: + - get + - list + - watch + - update +- apiGroups: + - certificates.k8s.io + resources: + - clustertrustbundles + verbs: + - create + - get + - list + - watch + - update + - delete +- apiGroups: + - certificates.k8s.io + resources: + - podcertificaterequests/status + verbs: + - update +- apiGroups: + - certificates.k8s.io + resources: + - signers + resourceNames: + - servicedns.podcert.ate.dev/* + - podidentity.podcert.ate.dev/* + verbs: + - sign + - attest +- apiGroups: + - events.k8s.io + resources: + - events + verbs: + - create +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "substrate.fullname" (list "podcert-ate-dev-signer" .) }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "substrate.fullname" (list "podcert-ate-dev-signer" .) }} +subjects: +- kind: ServiceAccount + namespace: podcertificate-controller-system + name: default +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + namespace: podcertificate-controller-system + name: coordinator +rules: +- apiGroups: + - "coordination.k8s.io" + resources: + - "leases" + verbs: + - create + - get + - list + - watch + - update + - delete +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: podcertificate-controller-is-a-coordinator + namespace: podcertificate-controller-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: coordinator +subjects: +- kind: ServiceAccount + namespace: podcertificate-controller-system + name: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: podcertificate-controller + namespace: podcertificate-controller-system + labels: + app: podcertificate-controller +spec: + replicas: 1 + selector: + matchLabels: + app: podcertificate-controller + template: + metadata: + labels: + app: podcertificate-controller + spec: + containers: + - name: controller + image: {{ .Values.images.podcertcontroller }} + args: + - --in-cluster=true + - --sharding-pod-namespace=$(POD_NAMESPACE) + - --sharding-pod-name=$(POD_NAME) + - --sharding-pod-uid=$(POD_UID) + - --sharding-application-name=podcertificate-controller + - --service-dns-ca-pool=/run/ca-state/service-dns-pool.json + - --pod-identity-ca-pool=/run/ca-state/pod-identity-pool.json + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + volumeMounts: + - name: "ca-state" + mountPath: "/run/ca-state" + securityContext: + allowPrivilegeEscalation: false + capabilities: + add: + - NET_BIND_SERVICE + drop: + - ALL + readOnlyRootFilesystem: true + volumes: + - name: "ca-state" + projected: + sources: + - secret: + name: "service-dns-ca-pool" + items: + - key: "pool" + path: "service-dns-pool.json" + - secret: + name: "pod-identity-ca-pool" + items: + - key: "pool" + path: "pod-identity-pool.json" + dnsPolicy: Default + nodeSelector: + kubernetes.io/os: linux + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + serviceAccountName: default + terminationGracePeriodSeconds: 30 +{{- end }} diff --git a/manifests/ate-install/generated/role.yaml b/charts/substrate/templates/role.yaml similarity index 95% rename from manifests/ate-install/generated/role.yaml rename to charts/substrate/templates/role.yaml index 7341d28dd..8e3f7117d 100644 --- a/manifests/ate-install/generated/role.yaml +++ b/charts/substrate/templates/role.yaml @@ -16,7 +16,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - name: ate-controller + name: {{ include "substrate.fullname" (list "ate-controller" .) }} rules: - apiGroups: - "" diff --git a/charts/substrate/templates/valkey.yaml b/charts/substrate/templates/valkey.yaml new file mode 100644 index 000000000..63b4758ad --- /dev/null +++ b/charts/substrate/templates/valkey.yaml @@ -0,0 +1,237 @@ +{{- if .Values.valkey.enabled -}} +{{- $sts := include "substrate.fullname" (list "valkey-cluster" .) -}} +{{- $headless := include "substrate.fullname" (list "valkey-cluster-service" .) -}} +{{- $ns := .Release.Namespace -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "substrate.fullname" (list "valkey-config" .) }} + namespace: {{ .Release.Namespace }} +data: + valkey.conf: | +{{- if eq .Values.auth.mode "mtls" }} + # Enforce TLS and disable standard port + port 0 + tls-port 6379 + tls-cluster yes + tls-replication yes + + # Load certificates from projected volume + tls-cert-file /run/servicedns.podcert.ate.dev/credential-bundle.pem + tls-key-file /run/servicedns.podcert.ate.dev/credential-bundle.pem + tls-ca-cert-file /etc/valkey-ca/ca.crt + tls-auth-clients yes + + # Enable cluster mode +{{- else }} + # Plaintext: serve on the standard port, no TLS. + port 6379 + +{{- end }} + cluster-enabled yes + cluster-config-file nodes.conf + cluster-node-timeout 5000 + appendonly yes + protected-mode no +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ $headless }} + namespace: {{ .Release.Namespace }} +spec: + clusterIP: None + selector: + app: valkey-cluster + ports: + - name: valkey + port: 6379 + targetPort: 6379 + - name: bus + port: 16379 + targetPort: 16379 +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ $sts }} + namespace: {{ .Release.Namespace }} +spec: + selector: + app: valkey-cluster + ports: + - name: valkey + port: 6379 + targetPort: 6379 +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ $sts }} + namespace: {{ .Release.Namespace }} +spec: + serviceName: {{ $headless }} + replicas: {{ .Values.valkey.replicas }} + podManagementPolicy: Parallel + selector: + matchLabels: + app: valkey-cluster + template: + metadata: + labels: + app: valkey-cluster + spec: + containers: + - name: valkey + image: {{ .Values.images.valkey }} + command: ["valkey-server", "/etc/valkey/valkey.conf"] + ports: + - name: valkey + containerPort: 6379 + - name: bus + containerPort: 16379 + volumeMounts: + - name: config + mountPath: /etc/valkey +{{- if eq .Values.auth.mode "mtls" }} + - name: servicedns + mountPath: /run/servicedns.podcert.ate.dev + - name: valkey-ca-certs + mountPath: /etc/valkey-ca + readOnly: true +{{- end }} + - name: data + mountPath: /data + volumes: + - name: config + configMap: + name: {{ include "substrate.fullname" (list "valkey-config" .) }} +{{- if eq .Values.auth.mode "mtls" }} + - name: servicedns + projected: + sources: + - podCertificate: + signerName: servicedns.podcert.ate.dev/identity + keyType: ECDSAP256 + credentialBundlePath: credential-bundle.pem + - name: valkey-ca-certs + projected: + sources: + - secret: + name: valkey-ca-certs + items: + - key: ca.crt + path: ca.crt +{{- end }} + volumeClaimTemplates: + - metadata: + name: data + spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: {{ .Values.valkey.storageSize }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "substrate.fullname" (list "valkey-cluster-init" .) }} + namespace: {{ .Release.Namespace }} +spec: + template: + metadata: + labels: + app: valkey-cluster-init + spec: + restartPolicy: OnFailure + containers: + - name: init + image: {{ .Values.images.valkey }} +{{- if eq .Values.auth.mode "mtls" }} + volumeMounts: + - name: servicedns + mountPath: /run/servicedns.podcert.ate.dev + - name: valkey-ca-certs + mountPath: /etc/valkey-ca + readOnly: true +{{- end }} + command: + - /bin/sh + - -c + - | + set -e + echo "Waiting for all Valkey pods to resolve..." + for i in 0 1 2 3 4 5; do + until getent hosts {{ $sts }}-${i}.{{ $headless }}.{{ $ns }}.svc >/dev/null 2>&1; do + echo "Waiting for {{ $sts }}-${i} DNS..." + sleep 2 + done + done + + echo "All pods resolved. Getting IPs..." + POD_IPS="" + for i in 0 1 2 3 4 5; do + ip=$(getent hosts {{ $sts }}-${i}.{{ $headless }}.{{ $ns }}.svc | awk '{print $1}') + POD_IPS="${POD_IPS} ${ip}:6379" + done + + echo "Checking if Valkey cluster is already initialized..." +{{- if eq .Values.auth.mode "mtls" }} + until valkey-cli --tls --cacert /etc/valkey-ca/ca.crt --cert /run/servicedns.podcert.ate.dev/credential-bundle.pem --key /run/servicedns.podcert.ate.dev/credential-bundle.pem -h {{ $sts }}-0.{{ $headless }}.{{ $ns }}.svc ping >/dev/null 2>&1; do + echo "Waiting for {{ $sts }}-0 to respond to ping..." + sleep 2 + done + + INIT_STATUS=$(valkey-cli --tls --cacert /etc/valkey-ca/ca.crt --cert /run/servicedns.podcert.ate.dev/credential-bundle.pem --key /run/servicedns.podcert.ate.dev/credential-bundle.pem -h {{ $sts }}-0.{{ $headless }}.{{ $ns }}.svc cluster info 2>/dev/null | grep cluster_state || true) + + if [ -z "${INIT_STATUS}" ] || ! echo "${INIT_STATUS}" | grep -q "cluster_state:ok"; then + echo "Initializing Valkey cluster..." + valkey-cli --tls \ + --cacert /etc/valkey-ca/ca.crt \ + --cert /run/servicedns.podcert.ate.dev/credential-bundle.pem \ + --key /run/servicedns.podcert.ate.dev/credential-bundle.pem \ + --cluster create ${POD_IPS} \ + --cluster-replicas 1 \ + --cluster-yes + echo "Cluster initialization complete!" + else + echo "Cluster already initialized." + fi +{{- else }} + until valkey-cli -h {{ $sts }}-0.{{ $headless }}.{{ $ns }}.svc -p 6379 ping >/dev/null 2>&1; do + echo "Waiting for {{ $sts }}-0 to respond to ping..." + sleep 2 + done + + INIT_STATUS=$(valkey-cli -h {{ $sts }}-0.{{ $headless }}.{{ $ns }}.svc -p 6379 cluster info 2>/dev/null | grep cluster_state || true) + + if [ -z "${INIT_STATUS}" ] || ! echo "${INIT_STATUS}" | grep -q "cluster_state:ok"; then + echo "Initializing Valkey cluster..." + valkey-cli \ + --cluster create ${POD_IPS} \ + --cluster-replicas 1 \ + --cluster-yes + echo "Cluster initialization complete!" + else + echo "Cluster already initialized." + fi +{{- end }} +{{- if eq .Values.auth.mode "mtls" }} + volumes: + - name: servicedns + projected: + sources: + - podCertificate: + signerName: servicedns.podcert.ate.dev/identity + keyType: ECDSAP256 + credentialBundlePath: credential-bundle.pem + - name: valkey-ca-certs + projected: + sources: + - secret: + name: valkey-ca-certs + items: + - key: ca.crt + path: ca.crt +{{- end }} +{{- end }} diff --git a/charts/substrate/values.yaml b/charts/substrate/values.yaml new file mode 100644 index 000000000..ae0b673fe --- /dev/null +++ b/charts/substrate/values.yaml @@ -0,0 +1,90 @@ +# Default values for the substrate chart. +# +# The chart supports two installation modes via `auth.mode`: +# +# - "mtls" (default): Today's behavior. Server certs are issued by the +# in-cluster podcertcontroller via PodCertificateRequest + projected +# into pods via the ClusterTrustBundle / podCertificate projection +# sources. Valkey runs with full TLS + client-cert verification. +# REQUIRES the off-by-default Kubernetes feature gates: +# ClusterTrustBundle, ClusterTrustBundleProjection, PodCertificateRequest +# and the v1beta1 certificates API. +# +# - "jwt": No PodCertificateRequest / ClusterTrustBundle usage. Server +# certs come from a user-provided Secret (auth.jwt.serverCertSecret). +# Clients verify the server with a user-provided CA bundle +# (auth.jwt.caBundleConfigMap) and authenticate to ateapi with a +# projected Kubernetes ServiceAccount token. Valkey runs plaintext. + +auth: + mode: mtls # mtls | jwt + + jwt: + # OIDC issuer URL the cluster uses to mint SA tokens. Required when + # mode=jwt. Examples: + # GKE: https://container.googleapis.com/v1/projects//locations//clusters/ + # kind: https://kubernetes.default.svc.cluster.local + # EKS: https://oidc.eks..amazonaws.com/id/ + issuer: "" + + # Audience SA tokens are minted for, and that ateapi expects. + audience: api.ate-system.svc + + # Name of a kubernetes.io/tls Secret in the release namespace, with keys + # tls.crt and tls.key. The chart references this Secret; the user creates + # it out-of-band (see chart README for an openssl bootstrap snippet). + serverCertSecret: ateapi-tls + + # Name of a ConfigMap in the release namespace with key "ca.crt" holding + # the CA(s) that signed serverCertSecret. Clients mount it to verify the + # ateapi server certificate. + caBundleConfigMap: ateapi-ca + +# Set to true to have the chart create the release namespace. +# Off by default — most helm workflows expect the namespace to already exist +# (helm install -n --create-namespace). Enable for the generated +# manifests/ate-install/ install path (kubectl apply). +createNamespace: false + +valkey: + enabled: true + replicas: 6 + storageSize: 1Gi + +# atelet daemonset overrides. Defaults match GKE; kind/dev installs override +# via hack/values-kind-jwt.yaml. extraArgs / extraEnv are appended verbatim +# for installer-specific knobs (e.g. AWS_* for rustfs/S3 storage). +atelet: + gcpAuthForImagePulls: true + storageBackend: gcs + extraArgs: [] + extraEnv: [] + +redis: + # Override the cluster address. Empty -> derived from valkey.enabled + # (defaults to "valkey-cluster.ate-system.svc:6379"). + clusterAddress: "" + # Google IAM auth (for managed Memorystore / cloud Valkey). + useIAMAuth: false + # Override TLS server name for Redis hostname verification (mtls mode). + tlsServerName: "" + +# Name of a ConfigMap in the release namespace that supplies per-environment +# overrides for ate-api-server (ATE_API_REDIS_*, ATE_API_K8SJWT_ISSUER, ...). +# Mounted via envFrom with optional=true so installs work without it. Created +# out-of-band by hack/install-ate.sh `create_api_server_env_vars` for kind/dev. +ateApiServerEnvVarsConfigMap: ate-api-server-envvars + +otel: + endpoint: http://opentelemetry-collector.gke-managed-otel.svc.cluster.local:4317 + +images: + ateapi: ko://github.com/agent-substrate/substrate/cmd/ateapi + atecontroller: ko://github.com/agent-substrate/substrate/cmd/atecontroller + atelet: ko://github.com/agent-substrate/substrate/cmd/atelet + atenet: ko://github.com/agent-substrate/substrate/cmd/atenet + podcertcontroller: ko://github.com/agent-substrate/substrate/cmd/podcertcontroller + valkey: valkey/valkey:8.0 + envoy: envoyproxy/envoy:v1.30-latest + coredns: coredns/coredns:1.11.1 + busybox: busybox:1.36 diff --git a/cmd/ateapi/main.go b/cmd/ateapi/main.go index 87d5a7433..c09d74154 100644 --- a/cmd/ateapi/main.go +++ b/cmd/ateapi/main.go @@ -27,6 +27,7 @@ import ( "github.com/agent-substrate/substrate/cmd/ateapi/internal/controlapi" "github.com/agent-substrate/substrate/cmd/ateapi/internal/sessionidentity" "github.com/agent-substrate/substrate/cmd/ateapi/internal/store/ateredis" + "github.com/agent-substrate/substrate/internal/ateapiauth" "github.com/agent-substrate/substrate/internal/ateinterceptors" "github.com/agent-substrate/substrate/internal/credbundle" "github.com/agent-substrate/substrate/internal/serverboot" @@ -56,6 +57,7 @@ var ( redisUseIAMAuth = pflag.String("redis-use-iam-auth", "true", "Whether to use Google IAM authentication for Redis/Valkey.") redisTLSServerName = pflag.String("redis-tls-server-name", "", "The ServerName to use for Redis TLS hostname verification.") redisClientCert = pflag.String("redis-client-cert", "", "The file containing client TLS certificate/key credential bundle for Redis/Valkey.") + redisNoTLS = pflag.Bool("redis-no-tls", false, "If true, connect to Redis/Valkey in plaintext (no TLS). For development / installs that don't enable Valkey TLS.") clientJWTIssuer = pflag.String("client-jwt-issuer", "", "The expected issuer URL for client JWTs.") clientJWTAudience = pflag.String("client-jwt-audience", "", "The expected audience for client JWTs.") @@ -65,6 +67,7 @@ var ( workerpoolCACerts = pflag.String("workerpool-ca-certs", "", "The file that contains the CA for verifying workerpool client certificates.") showVersion = pflag.Bool("version", false, "Print version and exit.") + authMode = pflag.String("auth-mode", "mtls", "Auth mode for incoming gRPC: mtls|jwt. 'mtls' (default) relies on transport-level mTLS for client identity. 'jwt' additionally requires a Kubernetes ServiceAccount Bearer token on every RPC.") ) func main() { @@ -94,6 +97,11 @@ func main() { loadFlagsFromEnv() logFlagValues(ctx) + authModeParsed, err := ateapiauth.ParseMode(*authMode) + if err != nil { + serverboot.Fatal(ctx, "Invalid --auth-mode", err) + } + redisClient, err := connectRedis(ctx) if err != nil { serverboot.Fatal(ctx, "Failed to set up Redis/Valkey", err) @@ -141,10 +149,22 @@ func main() { serverboot.Fatal(ctx, "Failed to start listener", err) } + authCfg := ateapiauth.ServerConfig{ + Mode: authModeParsed, + Issuer: *clientJWTIssuer, + Audience: *clientJWTAudience, + } + mux := grpc.NewServer( grpc.Creds(serverCreds), grpc.StatsHandler(otelgrpc.NewServerHandler()), - grpc.UnaryInterceptor(ateinterceptors.ServerUnaryInterceptor), + grpc.ChainUnaryInterceptor( + ateapiauth.UnaryServerInterceptor(authCfg), + ateinterceptors.ServerUnaryInterceptor, + ), + grpc.ChainStreamInterceptor( + ateapiauth.StreamServerInterceptor(authCfg), + ), ) reflection.Register(mux) ateapipb.RegisterControlServer(mux, sm) @@ -191,25 +211,30 @@ func logFlagValues(ctx context.Context) { slog.String("redis-use-iam-auth", *redisUseIAMAuth), slog.String("redis-tls-server-name", *redisTLSServerName), slog.String("redis-client-cert", *redisClientCert), + slog.Bool("redis-no-tls", *redisNoTLS), slog.String("client-jwt-issuer", *clientJWTIssuer), slog.String("client-jwt-audience", *clientJWTAudience), slog.String("session-id-jwt-pool", *sessionIDJWTPoolFile), slog.String("session-id-ca-pool", *sessionIDCAPoolFile), slog.String("workerpool-ca-certs", *workerpoolCACerts), + slog.String("auth-mode", *authMode), ) } // connectRedis builds the Redis/Valkey TLS config, plumbs IAM auth if // requested, opens the cluster client, and pings with retries. func connectRedis(ctx context.Context) (*redis.ClusterClient, error) { - tlsConfig, err := buildRedisTLSConfig(ctx) - if err != nil { - return nil, err - } - clusterOpts := &redis.ClusterOptions{ - Addrs: []string{*redisClusterAddress}, - TLSConfig: tlsConfig, + Addrs: []string{*redisClusterAddress}, + } + if *redisNoTLS { + slog.InfoContext(ctx, "Connecting to Redis/Valkey without TLS (--redis-no-tls=true)") + } else { + tlsConfig, err := buildRedisTLSConfig(ctx) + if err != nil { + return nil, err + } + clusterOpts.TLSConfig = tlsConfig } if *redisUseIAMAuth != "false" { diff --git a/cmd/atecontroller/main.go b/cmd/atecontroller/main.go index f7e922273..4db44cc02 100644 --- a/cmd/atecontroller/main.go +++ b/cmd/atecontroller/main.go @@ -14,15 +14,14 @@ package main import ( - "crypto/tls" "os" + "github.com/agent-substrate/substrate/internal/ateapiauth" "github.com/agent-substrate/substrate/internal/controllers" clientv1alpha1 "github.com/agent-substrate/substrate/pkg/api/v1alpha1" "github.com/agent-substrate/substrate/pkg/proto/ateapipb" "github.com/spf13/pflag" "google.golang.org/grpc" - "google.golang.org/grpc/credentials" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" @@ -39,6 +38,11 @@ var ( setupLog = ctrl.Log.WithName("setup") ateAPIConnSpec = pflag.String("ateapi-conn-spec", "dns:///api.ate-system.svc:443", "") + + ateapiAuthMode = pflag.String("ateapi-auth", "mtls", "Client auth to ateapi: mtls|jwt. 'mtls' (default) dials with insecure TLS and relies on pod-projected mTLS credentials for identity. 'jwt' verifies the server cert and sends a Bearer SA token.") + ateapiCAFile = pflag.String("ateapi-ca-file", "", "PEM file with CAs trusted to verify the ateapi server cert. Required for jwt.") + ateapiServerName = pflag.String("ateapi-server-name", "", "SNI / hostname expected on the ateapi server cert. Optional.") + ateapiTokenFile = pflag.String("ateapi-token-file", "", "Projected SA token file used as Bearer credential. Required for jwt.") ) func init() { @@ -47,15 +51,27 @@ func init() { } func main() { + pflag.Parse() ctrl.SetLogger(zap.New(zap.UseDevMode(true))) - // TODO: Verify server certificate, pass client certificate. - clientTLSConfig := &tls.Config{ - InsecureSkipVerify: true, // Temporarily bypass standard checks + mode, err := ateapiauth.ParseMode(*ateapiAuthMode) + if err != nil { + setupLog.Error(err, "invalid --ateapi-auth") + os.Exit(1) + } + + dialOpts, err := ateapiauth.DialOptions(ateapiauth.ClientConfig{ + Mode: mode, + CAFile: *ateapiCAFile, + ServerName: *ateapiServerName, + TokenFile: *ateapiTokenFile, + }) + if err != nil { + setupLog.Error(err, "building ateapi dial options") + os.Exit(1) } - clientCreds := credentials.NewTLS(clientTLSConfig) - ateapiConn, err := grpc.NewClient(*ateAPIConnSpec, grpc.WithTransportCredentials(clientCreds)) + ateapiConn, err := grpc.NewClient(*ateAPIConnSpec, dialOpts...) if err != nil { setupLog.Error(err, "Error creating grpc connection to ate api") os.Exit(1) diff --git a/cmd/atenet/internal/app/router/provider.go b/cmd/atenet/internal/app/router/provider.go index ccd68cf79..ecab5f8ae 100644 --- a/cmd/atenet/internal/app/router/provider.go +++ b/cmd/atenet/internal/app/router/provider.go @@ -44,10 +44,10 @@ type proxyProvider interface { func newProxyProvider(cfg RouterConfig) (proxyProvider, error) { switch strings.ToLower(cfg.NetworkingMode) { - case "", NetworkingModeEnvoy: - return envoyProvider{cfg: cfg}, nil - case NetworkingModeAgentgateway: + case "", NetworkingModeAgentgateway: return agentgatewayProvider{cfg: cfg}, nil + case NetworkingModeEnvoy: + return envoyProvider{cfg: cfg}, nil default: return nil, fmt.Errorf("unsupported networking mode %q", cfg.NetworkingMode) } diff --git a/cmd/atenet/internal/app/router/router.go b/cmd/atenet/internal/app/router/router.go index 0449aacbb..15c757078 100644 --- a/cmd/atenet/internal/app/router/router.go +++ b/cmd/atenet/internal/app/router/router.go @@ -18,7 +18,6 @@ import ( "context" "crypto/rand" "crypto/rsa" - "crypto/tls" "crypto/x509" "crypto/x509/pkix" "encoding/pem" @@ -37,7 +36,6 @@ import ( "github.com/spf13/cobra" "golang.org/x/sync/errgroup" "google.golang.org/grpc" - "google.golang.org/grpc/credentials" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/client-go/kubernetes" @@ -46,6 +44,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/config" + "github.com/agent-substrate/substrate/internal/ateapiauth" "github.com/agent-substrate/substrate/internal/serverboot" v1alpha1 "github.com/agent-substrate/substrate/pkg/api/v1alpha1" "github.com/agent-substrate/substrate/pkg/proto/ateapipb" @@ -81,6 +80,11 @@ type RouterConfig struct { TLSKeyPath string LogLevel string MetricsAddr string + + AteapiAuthMode string + AteapiCAFile string + AteapiServerName string + AteapiTokenFile string } // RouterServer instantiates and coordinates runtime threads executing system modules. @@ -154,7 +158,7 @@ func NewCmd() *cobra.Command { cmd.Flags().IntVar(&cfg.XdsPort, "port-xds", 18000, "TCP port listening for the xDS dynamic Envoy connections") cmd.Flags().IntVar(&cfg.ExtprocPort, "port-extproc", 50051, "Listen port for the Envoy dynamic External Processing (ext_proc) server") cmd.Flags().StringVar(&cfg.ExtprocAddr, "extproc-address", "127.0.0.1", "Host IP or address of the Envoy External Processing (ext_proc) server") - cmd.Flags().StringVar(&cfg.NetworkingMode, "networking-mode", NetworkingModeEnvoy, "Networking proxy mode: envoy or agentgateway") + cmd.Flags().StringVar(&cfg.NetworkingMode, "networking-mode", NetworkingModeAgentgateway, "Networking proxy mode: agentgateway or envoy") cmd.Flags().StringVar(&cfg.EnvoyImage, "envoy-image", "envoyproxy/envoy:v1.30-latest", "Image URI used for dynamically launched router instances") cmd.Flags().StringVar(&cfg.AgentgatewayImage, "agentgateway-image", "cr.agentgateway.dev/agentgateway:v1.3.0-alpha.1", "Image URI used for Agentgateway router instances") cmd.Flags().StringVar(&cfg.TemplatesFile, "actor-templates-file", "", "Path to offline YAML configuration file listing ActorTemplates") @@ -164,12 +168,17 @@ func NewCmd() *cobra.Command { cmd.Flags().StringVar(&cfg.TLSCertPath, "tls-cert-path", "", "Path to the proxy TLS certificate file") cmd.Flags().StringVar(&cfg.TLSKeyPath, "tls-key-path", "", "Path to the proxy TLS private key file") + cmd.Flags().StringVar(&cfg.AteapiAuthMode, "ateapi-auth", "mtls", "Client auth to ateapi: mtls|jwt. 'mtls' (default) dials with insecure TLS and relies on pod-projected mTLS credentials for identity. 'jwt' verifies the server cert and sends a Bearer SA token.") + cmd.Flags().StringVar(&cfg.AteapiCAFile, "ateapi-ca-file", "", "PEM file with CAs trusted to verify the ateapi server cert. Required for jwt.") + cmd.Flags().StringVar(&cfg.AteapiServerName, "ateapi-server-name", "", "SNI / hostname expected on the ateapi server cert. Optional.") + cmd.Flags().StringVar(&cfg.AteapiTokenFile, "ateapi-token-file", "", "Projected SA token file used as Bearer credential. Required for jwt.") + return cmd } func NewRouterServer(cfg RouterConfig) (*RouterServer, error) { if cfg.NetworkingMode == "" { - cfg.NetworkingMode = NetworkingModeEnvoy + cfg.NetworkingMode = NetworkingModeAgentgateway } var k8sClient client.Client @@ -203,11 +212,24 @@ func NewRouterServer(cfg RouterConfig) (*RouterServer, error) { } } - conn, err := grpc.NewClient(cfg.AteapiAddr, grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{InsecureSkipVerify: true}))) + authMode, err := ateapiauth.ParseMode(cfg.AteapiAuthMode) + if err != nil { + return nil, fmt.Errorf("invalid --ateapi-auth: %w", err) + } + dialOpts, err := ateapiauth.DialOptions(ateapiauth.ClientConfig{ + Mode: authMode, + CAFile: cfg.AteapiCAFile, + ServerName: cfg.AteapiServerName, + TokenFile: cfg.AteapiTokenFile, + }) + if err != nil { + return nil, fmt.Errorf("building ateapi dial options: %w", err) + } + conn, err := grpc.NewClient(cfg.AteapiAddr, dialOpts...) if err != nil { return nil, fmt.Errorf("failed to establish grpc channel to ateapi client: %w", err) } - slog.Info("Connecting to ateapi", slog.String("address", cfg.AteapiAddr)) + slog.Info("Connecting to ateapi", slog.String("address", cfg.AteapiAddr), slog.String("auth", string(authMode))) apiClient := ateapipb.NewControlClient(conn) diff --git a/hack/create-kind-cluster.sh b/hack/create-kind-cluster.sh index 2915a95a2..bd3ecebef 100755 --- a/hack/create-kind-cluster.sh +++ b/hack/create-kind-cluster.sh @@ -18,6 +18,12 @@ set -o errexit -o nounset -o pipefail ROOT="$(cd "$(dirname "$0")/.." && pwd)" KIND_CLUSTER_NAME="${KIND_CLUSTER_NAME:-kind}" +# Enable the off-by-default certificate feature gates required by the mTLS +# install path (cmd/podcertcontroller). On by default — the Quickstart's +# `hack/install-ate-kind.sh --deploy-ate-system` uses mTLS. Opt out +# (KIND_ENABLE_PODCERT=false) only when installing JWT-mode manifests, which +# do not require these gates. +KIND_ENABLE_PODCERT="${KIND_ENABLE_PODCERT:-true}" reg_name="kind-registry" reg_port="5001" @@ -43,12 +49,16 @@ if [ "$(docker inspect -f '{{.State.Running}}' "${reg_name}" 2>/dev/null || true fi # 2. Create kind configuration with containerdConfigPatches and feature gates -echo "Creating kind configuration for cluster '${KIND_CLUSTER_NAME}'..." +echo "Creating kind configuration for cluster '${KIND_CLUSTER_NAME}' (KIND_ENABLE_PODCERT=${KIND_ENABLE_PODCERT})..." cat < "${ROOT}/bin/kind-config.yaml" kind: Cluster apiVersion: kind.x-k8s.io/v1alpha4 nodes: - role: control-plane +EOF + +if [ "${KIND_ENABLE_PODCERT}" = "true" ]; then +cat <> "${ROOT}/bin/kind-config.yaml" # cmd/podcertcontroller depends on ClusterTrustBundle & PodCertificateRequest. # They are not enabled by default as of Kubernetes v1.36 # https://github.com/kubernetes/kubernetes/blob/master/test/compatibility_lifecycle/reference/versioned_feature_list.yaml @@ -59,6 +69,7 @@ featureGates: runtimeConfig: "certificates.k8s.io/v1beta1": "true" EOF +fi echo "Deleting existing kind cluster '${KIND_CLUSTER_NAME}' if it exists..." "${ROOT}"/hack/kind.sh delete cluster --name "${KIND_CLUSTER_NAME}" || true diff --git a/hack/gen-rbac.sh b/hack/gen-rbac.sh new file mode 100755 index 000000000..af806d29f --- /dev/null +++ b/hack/gen-rbac.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Generate the controller ClusterRole into the Helm chart and templatize its +# name so multi-release installs do not collide on a cluster-scoped resource. +# +# controller-gen emits a YAML file with a fixed `roleName=` value. We post- +# process that file to swap the static name for the chart's fullname helper, +# matching the convention used by every other resource in charts/substrate/. +# +# Invoked via `go generate ./internal/controllers/...`. +set -o errexit -o nounset -o pipefail + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +OUT="${ROOT}/charts/substrate/templates/role.yaml" + +bash "${ROOT}/hack/run-tool.sh" controller-gen \ + "rbac:headerFile=${ROOT}/hack/boilerplate/yaml.txt,roleName=ate-controller" \ + paths="${ROOT}/internal/controllers/..." \ + "output:rbac:artifacts:config=${ROOT}/charts/substrate/templates/" + +# Templatize the ClusterRole name. controller-gen emits ` name: ate-controller` +# at column 0; the substitution is exact-match to stay robust. +sed -i 's|^ name: ate-controller$| name: {{ include "substrate.fullname" (list "ate-controller" .) }}|' "${OUT}" diff --git a/hack/install-ate-kind-jwt.sh b/hack/install-ate-kind-jwt.sh new file mode 100755 index 000000000..c3ef609e0 --- /dev/null +++ b/hack/install-ate-kind-jwt.sh @@ -0,0 +1,112 @@ +#!/usr/bin/env bash +# Install Agent Substrate on a kind cluster in JWT auth mode. +# +# Unlike the mTLS install path (hack/install-ate-kind.sh), this works on a +# stock Kubernetes cluster — no ClusterTrustBundle / PodCertificateRequest +# feature gates required. Suitable for a kind cluster created with +# KIND_ENABLE_PODCERT=false hack/create-kind-cluster.sh. +# +# Steps: +# 1. Bootstrap a self-signed Secret/ateapi-tls + ConfigMap/ateapi-ca via +# openssl (the chart references these but expects them to exist +# out-of-band). +# 2. Render the chart with auth.mode=jwt + kind-specific values, resolve +# ko:// image refs against a local registry, and apply. +# 3. Apply the kind-only extras (rustfs storage, OTel collector) from +# manifests/ate-install/kind/. +set -o errexit -o nounset -o pipefail + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +NS="${NS:-ate-system}" +KUBECTL_CONTEXT="${KUBECTL_CONTEXT:-}" +KO_DOCKER_REPO="${KO_DOCKER_REPO:-localhost:5001}" +KO_DEFAULTPLATFORMS="${KO_DEFAULTPLATFORMS:-linux/$(go env GOARCH)}" + +export KO_DOCKER_REPO KO_DEFAULTPLATFORMS + +run_kubectl() { + kubectl ${KUBECTL_CONTEXT:+--context=${KUBECTL_CONTEXT}} "$@" +} + +log_step() { + echo -e "\033[1;36m[step]:\033[0m $1" +} + +ensure_namespace() { + log_step "ensure_namespace ${NS}" + run_kubectl create namespace "${NS}" --dry-run=client -o yaml | run_kubectl apply -f - +} + +# Generate a self-signed CA + server cert pair and install them as the +# Secret/ConfigMap pair the chart references. Idempotent: skips if both +# resources already exist. +bootstrap_jwt_tls() { + log_step "bootstrap_jwt_tls" + if run_kubectl get secret -n "${NS}" ateapi-tls >/dev/null 2>&1 \ + && run_kubectl get configmap -n "${NS}" ateapi-ca >/dev/null 2>&1; then + echo "Secret/ateapi-tls and ConfigMap/ateapi-ca already present — skipping." + return + fi + + local tmp + tmp=$(mktemp -d) + trap 'rm -rf "$tmp"' RETURN + + openssl req -x509 -newkey rsa:2048 -nodes -days 3650 \ + -subj "/CN=ateapi-ca" \ + -keyout "${tmp}/ca.key" -out "${tmp}/ca.crt" >/dev/null 2>&1 + + openssl req -newkey rsa:2048 -nodes \ + -subj "/CN=api.${NS}.svc" \ + -keyout "${tmp}/server.key" -out "${tmp}/server.csr" >/dev/null 2>&1 + + cat > "${tmp}/server.ext" </dev/null 2>&1 + + run_kubectl -n "${NS}" create secret tls ateapi-tls \ + --cert="${tmp}/server.crt" --key="${tmp}/server.key" \ + --dry-run=client -o yaml | run_kubectl apply -f - + + run_kubectl -n "${NS}" create configmap ateapi-ca \ + --from-file=ca.crt="${tmp}/ca.crt" \ + --dry-run=client -o yaml | run_kubectl apply -f - +} + +apply_chart() { + log_step "apply_chart (helm template | ko resolve | kubectl apply)" + local rendered + rendered=$(helm template substrate "${ROOT}/charts/substrate" \ + --namespace "${NS}" \ + -f "${ROOT}/hack/values-kind-jwt.yaml") + + # ko resolve replaces ko:// refs with built+pushed image refs. + echo "${rendered}" | bash "${ROOT}/hack/run-tool.sh" ko resolve -f - \ + | run_kubectl apply -f - +} + +apply_kind_extras() { + log_step "apply_kind_extras (rustfs + otel-collector)" + run_kubectl apply -f "${ROOT}/manifests/ate-install/kind/rustfs.yaml" + run_kubectl apply -f "${ROOT}/manifests/ate-install/kind/otel-collector.yaml" +} + +wait_rollouts() { + log_step "wait_rollouts" + run_kubectl -n "${NS}" rollout status deployment/ate-api-server-deployment --timeout=180s + run_kubectl -n "${NS}" rollout status deployment/ate-controller --timeout=180s + run_kubectl -n "${NS}" rollout status deployment/atenet-router --timeout=180s + run_kubectl -n "${NS}" rollout status daemonset/atelet --timeout=180s + run_kubectl -n "${NS}" rollout status statefulset/valkey-cluster --timeout=180s +} + +ensure_namespace +bootstrap_jwt_tls +apply_chart +apply_kind_extras +wait_rollouts + +echo "Substrate (JWT mode) installed in namespace ${NS}." diff --git a/hack/install-ate.sh b/hack/install-ate.sh index c8abaf3c4..729a308d0 100755 --- a/hack/install-ate.sh +++ b/hack/install-ate.sh @@ -271,6 +271,7 @@ ensure_crds() { deploy_crds() { log_step "deploy_crds" run_ko apply -f manifests/ate-install/generated + run_kubectl apply -f manifests/ate-install/role.yaml } deploy_ate_system() { @@ -278,7 +279,7 @@ deploy_ate_system() { ensure_crds # Ensure namespace exists - run_kubectl apply -f manifests/ate-install/ate-system-namespace.yaml \ + run_kubectl apply -f manifests/ate-install/namespace.yaml \ && run_kubectl wait --for=jsonpath='{.status.phase}'=Active namespace/ate-system --timeout=60s ensure_apiserver_prerequisites @@ -335,7 +336,7 @@ deploy_ate_apiserver() { ensure_crds # Ensure namespace exists - run_kubectl apply -f manifests/ate-install/ate-system-namespace.yaml \ + run_kubectl apply -f manifests/ate-install/namespace.yaml \ && run_kubectl wait --for=jsonpath='{.status.phase}'=Active namespace/ate-system --timeout=60s ensure_apiserver_prerequisites @@ -349,7 +350,7 @@ deploy_atelet() { ensure_crds # Ensure namespace exists - run_kubectl apply -f manifests/ate-install/ate-system-namespace.yaml \ + run_kubectl apply -f manifests/ate-install/namespace.yaml \ && run_kubectl wait --for=jsonpath='{.status.phase}'=Active namespace/ate-system --timeout=60s local manifest="" @@ -369,7 +370,7 @@ deploy_atenet() { ensure_crds # Ensure namespace exists - run_kubectl apply -f manifests/ate-install/ate-system-namespace.yaml \ + run_kubectl apply -f manifests/ate-install/namespace.yaml \ && run_kubectl wait --for=jsonpath='{.status.phase}'=Active namespace/ate-system --timeout=60s run_ko apply -f "$(atenet_router_manifest)" diff --git a/hack/render-manifests.sh b/hack/render-manifests.sh new file mode 100755 index 000000000..b326949b5 --- /dev/null +++ b/hack/render-manifests.sh @@ -0,0 +1,102 @@ +#!/usr/bin/env bash +# Render the substrate Helm chart into manifests/ate-install/ (mTLS-mode +# install) — the canonical kubectl-apply install path. The chart at +# charts/substrate/ is the single source of truth; this script only renders. +# +# Usage: +# hack/render-manifests.sh # write into manifests/ate-install/ +# hack/render-manifests.sh --check # fail if rendered output differs +# +set -o errexit -o nounset -o pipefail + +ROOT="$(cd "$(dirname "$0")/.." && pwd)" +OUT_DIR="${ROOT}/manifests/ate-install" +CHART_DIR="${ROOT}/charts/substrate" +CHECK_MODE="false" + +if [ "${1:-}" = "--check" ]; then + CHECK_MODE="true" +fi + +if ! command -v helm >/dev/null 2>&1; then + echo "helm not found in PATH" >&2 + exit 1 +fi + +TMP_DIR="$(mktemp -d)" +trap 'rm -rf "$TMP_DIR"' EXIT + +helm template substrate "${CHART_DIR}" \ + --namespace ate-system \ + --set auth.mode=mtls \ + --set createNamespace=true \ + > "${TMP_DIR}/all.yaml" + +# Split into per-source files so the directory structure mirrors the chart +# templates, making diffs friendlier. +python3 - "${TMP_DIR}/all.yaml" "${TMP_DIR}/out" <<'PY' +import os, re, sys, yaml +in_path, out_dir = sys.argv[1], sys.argv[2] +os.makedirs(out_dir, exist_ok=True) + +with open(in_path) as f: + raw = f.read() + +# Helm prepends a "# Source: /templates/" comment to each doc. +docs_by_source = {} +for doc in raw.split('\n---\n'): + m = re.search(r'#\s*Source:\s*\S+/templates/(\S+)', doc) + src = m.group(1) if m else "misc.yaml" + # Drop the leading "# Source:" line from the written file. + cleaned = re.sub(r'^\s*#\s*Source:.*\n', '', doc, count=1, flags=re.MULTILINE) + if not cleaned.strip(): + continue + docs_by_source.setdefault(src, []).append(cleaned.strip()) + +for src, docs in docs_by_source.items(): + header = ( + "# Copyright 2026 Google LLC\n" + "#\n" + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n" + "# you may not use this file except in compliance with the License.\n" + "# You may obtain a copy of the License at\n" + "#\n" + "# http://www.apache.org/licenses/LICENSE-2.0\n" + "#\n" + "# Unless required by applicable law or agreed to in writing, software\n" + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n" + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" + "# See the License for the specific language governing permissions and\n" + "# limitations under the License.\n" + "\n" + "# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh.\n" + "# Run `make helm-template` to regenerate.\n" + "\n" + ) + with open(os.path.join(out_dir, src), "w") as out: + out.write(header) + out.write("\n---\n".join(docs)) + out.write("\n") +PY + +if [ "${CHECK_MODE}" = "true" ]; then + # Only compare top-level files; subdirs like generated/ and kind/ are not + # produced by the chart and live alongside it intentionally. + CHECK_TMP="$(mktemp -d)" + trap 'rm -rf "$TMP_DIR" "$CHECK_TMP"' EXIT + mkdir -p "${CHECK_TMP}/current" + find "${OUT_DIR}" -maxdepth 1 -type f -name '*.yaml' -exec cp {} "${CHECK_TMP}/current/" \; + if ! diff -ruN "${CHECK_TMP}/current" "${TMP_DIR}/out" >/dev/null 2>&1; then + echo "manifests/ate-install/ is out of date. Run: make helm-template" >&2 + diff -ruN "${CHECK_TMP}/current" "${TMP_DIR}/out" | head -60 >&2 || true + exit 1 + fi + echo "manifests/ate-install/ matches chart output." + exit 0 +fi + +# Replace contents (preserve kind/ and generated/ subdirs which are not chart output). +mkdir -p "${OUT_DIR}" +find "${OUT_DIR}" -maxdepth 1 -type f -name '*.yaml' -delete +cp "${TMP_DIR}/out/"*.yaml "${OUT_DIR}/" +echo "Rendered $(ls "${OUT_DIR}"/*.yaml | wc -l | xargs) manifest files into ${OUT_DIR}" diff --git a/hack/values-kind-jwt.yaml b/hack/values-kind-jwt.yaml new file mode 100644 index 000000000..493ee9af5 --- /dev/null +++ b/hack/values-kind-jwt.yaml @@ -0,0 +1,36 @@ +# Helm values for installing substrate on a kind cluster in JWT mode. +# Used by hack/install-ate-kind-jwt.sh — does NOT require the off-by-default +# certificate feature gates that the mTLS install path needs. + +auth: + mode: jwt + jwt: + # Kind's default API server issuer. + issuer: https://kubernetes.default.svc.cluster.local + audience: api.ate-system.svc + serverCertSecret: ateapi-tls + caBundleConfigMap: ateapi-ca + +createNamespace: true + +# In-cluster OTel collector deployed alongside via manifests/ate-install/kind/otel-collector.yaml +otel: + endpoint: http://opentelemetry-collector.otel-system.svc:4317 + +atelet: + gcpAuthForImagePulls: false + storageBackend: s3 + extraArgs: + - --localhost-registry-replacement=kind-registry:5000 + extraEnv: + - name: AWS_REGION + value: us-east-1 + - name: AWS_ENDPOINT_URL + value: http://rustfs.ate-system.svc:9000 + - name: AWS_S3_USE_PATH_STYLE + value: "true" + # TODO: use a secret / identity management for rustfs credentials. + - name: AWS_ACCESS_KEY_ID + value: rustfsadmin + - name: AWS_SECRET_ACCESS_KEY + value: rustfsadmin diff --git a/internal/ateapiauth/client.go b/internal/ateapiauth/client.go new file mode 100644 index 000000000..db00807f7 --- /dev/null +++ b/internal/ateapiauth/client.go @@ -0,0 +1,110 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ateapiauth + +import ( + "context" + "crypto/tls" + "crypto/x509" + "fmt" + "os" + "strings" + + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" +) + +// ClientConfig configures how to dial the ateapi gRPC server. +// +// - Mode=ModeMTLS: insecure TLS dial (InsecureSkipVerify=true). Client +// identity is expected to come from mTLS credentials projected into +// the pod (servicedns.podcert.ate.dev). No app-level credentials. +// - Mode=ModeJWT: validates the server cert against CAFile, sends a Bearer +// token from TokenFile as per-RPC credentials. +type ClientConfig struct { + Mode Mode + + // CAFile is a PEM file containing CA certs that sign the server cert. + // Required for ModeJWT. Ignored for ModeMTLS. + CAFile string + + // ServerName overrides SNI / hostname verification. Optional. + ServerName string + + // TokenFile is a path to a Kubernetes projected ServiceAccount token used + // as a Bearer credential. Required for ModeJWT. + TokenFile string +} + +// DialOptions returns the grpc.DialOption set described by cfg, suitable to +// pass to grpc.NewClient. +func DialOptions(cfg ClientConfig) ([]grpc.DialOption, error) { + switch cfg.Mode { + case "", ModeMTLS: + tlsCfg := &tls.Config{InsecureSkipVerify: true} //nolint:gosec // explicit opt-in + return []grpc.DialOption{ + grpc.WithTransportCredentials(credentials.NewTLS(tlsCfg)), + }, nil + + case ModeJWT: + if cfg.CAFile == "" { + return nil, fmt.Errorf("ateapiauth: jwt mode requires CAFile") + } + if cfg.TokenFile == "" { + return nil, fmt.Errorf("ateapiauth: jwt mode requires TokenFile") + } + caPEM, err := os.ReadFile(cfg.CAFile) + if err != nil { + return nil, fmt.Errorf("ateapiauth: reading CA file: %w", err) + } + pool := x509.NewCertPool() + if !pool.AppendCertsFromPEM(caPEM) { + return nil, fmt.Errorf("ateapiauth: no certificates found in CA file %q", cfg.CAFile) + } + tlsCfg := &tls.Config{ + MinVersion: tls.VersionTLS12, + RootCAs: pool, + ServerName: cfg.ServerName, + } + return []grpc.DialOption{ + grpc.WithTransportCredentials(credentials.NewTLS(tlsCfg)), + grpc.WithPerRPCCredentials(&fileTokenCreds{path: cfg.TokenFile}), + }, nil + + default: + return nil, fmt.Errorf("ateapiauth: unknown client mode %q", cfg.Mode) + } +} + +// fileTokenCreds reads a Kubernetes projected SA token from disk for every +// RPC. Kubernetes refreshes the file in place; reading it each time picks up +// rotations. +type fileTokenCreds struct { + path string +} + +func (c *fileTokenCreds) GetRequestMetadata(_ context.Context, _ ...string) (map[string]string, error) { + b, err := os.ReadFile(c.path) + if err != nil { + return nil, fmt.Errorf("ateapiauth: reading token file %q: %w", c.path, err) + } + tok := strings.TrimSpace(string(b)) + if tok == "" { + return nil, fmt.Errorf("ateapiauth: token file %q is empty", c.path) + } + return map[string]string{"authorization": "Bearer " + tok}, nil +} + +func (c *fileTokenCreds) RequireTransportSecurity() bool { return true } diff --git a/internal/ateapiauth/server.go b/internal/ateapiauth/server.go new file mode 100644 index 000000000..784506207 --- /dev/null +++ b/internal/ateapiauth/server.go @@ -0,0 +1,152 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package ateapiauth adds optional Kubernetes ServiceAccount JWT +// authentication on top of the ateapi gRPC server, and a matching client +// dial helper. It does not replace the existing TLS / mTLS path — the +// server's transport credentials still apply unchanged. Set Mode=ModeJWT +// on the server to require an `authorization: Bearer ` header +// on every RPC; Mode=ModeMTLS (the default) leaves identity to the +// transport-layer mTLS credentials. +package ateapiauth + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/agent-substrate/substrate/internal/k8sjwt" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/metadata" + "google.golang.org/grpc/status" +) + +// Mode selects whether the JWT interceptor enforces a Bearer token. +type Mode string + +const ( + ModeMTLS Mode = "mtls" + ModeJWT Mode = "jwt" +) + +// ParseMode parses a flag value into a Mode, defaulting to ModeMTLS on empty. +// ModeMTLS means identity is established by the transport-layer mTLS +// credentials; the interceptor performs no app-level checks. ModeJWT +// additionally requires a Kubernetes SA Bearer token on every RPC. +func ParseMode(s string) (Mode, error) { + switch Mode(s) { + case "", ModeMTLS: + return ModeMTLS, nil + case ModeJWT: + return ModeJWT, nil + default: + return "", fmt.Errorf("unknown auth mode %q (want mtls|jwt)", s) + } +} + +// ServerConfig configures the server-side JWT interceptor. +type ServerConfig struct { + Mode Mode + Issuer string // OIDC issuer URL for JWT verification + Audience string // expected audience claim for JWT verification + + // Now returns the current time; nil uses time.Now. Exposed for tests. + Now func() time.Time +} + +type ctxKey struct{} + +// ClaimsFromContext returns the verified Kubernetes JWT claims that the +// interceptor attached to ctx, if any. +func ClaimsFromContext(ctx context.Context) (*k8sjwt.KubernetesClaims, bool) { + c, ok := ctx.Value(ctxKey{}).(*k8sjwt.KubernetesClaims) + return c, ok +} + +func contextWithClaims(ctx context.Context, c *k8sjwt.KubernetesClaims) context.Context { + return context.WithValue(ctx, ctxKey{}, c) +} + +// UnaryServerInterceptor returns a gRPC unary interceptor enforcing cfg. +func UnaryServerInterceptor(cfg ServerConfig) grpc.UnaryServerInterceptor { + return func(ctx context.Context, req any, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (any, error) { + newCtx, err := authenticate(ctx, cfg) + if err != nil { + return nil, err + } + return handler(newCtx, req) + } +} + +// StreamServerInterceptor returns a gRPC stream interceptor enforcing cfg. +func StreamServerInterceptor(cfg ServerConfig) grpc.StreamServerInterceptor { + return func(srv any, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error { + newCtx, err := authenticate(ss.Context(), cfg) + if err != nil { + return err + } + return handler(srv, &wrappedStream{ServerStream: ss, ctx: newCtx}) + } +} + +type wrappedStream struct { + grpc.ServerStream + ctx context.Context +} + +func (w *wrappedStream) Context() context.Context { return w.ctx } + +func authenticate(ctx context.Context, cfg ServerConfig) (context.Context, error) { + if cfg.Mode == "" || cfg.Mode == ModeMTLS { + return ctx, nil + } + + now := time.Now + if cfg.Now != nil { + now = cfg.Now + } + + bearer, ok := bearerToken(ctx) + if !ok { + return nil, status.Error(codes.Unauthenticated, "missing bearer token") + } + claims, err := k8sjwt.Verify(ctx, bearer, cfg.Issuer, cfg.Audience, now()) + if err != nil { + return nil, status.Errorf(codes.Unauthenticated, "invalid bearer token: %v", err) + } + return contextWithClaims(ctx, claims), nil +} + +func bearerToken(ctx context.Context) (string, bool) { + md, ok := metadata.FromIncomingContext(ctx) + if !ok { + return "", false + } + vals := md.Get("authorization") + if len(vals) == 0 { + return "", false + } + const prefix = "Bearer " + v := vals[0] + if !strings.HasPrefix(v, prefix) { + return "", false + } + tok := strings.TrimSpace(strings.TrimPrefix(v, prefix)) + if tok == "" { + return "", false + } + return tok, true +} diff --git a/internal/ateapiauth/server_test.go b/internal/ateapiauth/server_test.go new file mode 100644 index 000000000..cc8cd35d9 --- /dev/null +++ b/internal/ateapiauth/server_test.go @@ -0,0 +1,103 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ateapiauth + +import ( + "context" + "testing" + + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/metadata" + "google.golang.org/grpc/status" +) + +func TestParseMode(t *testing.T) { + cases := []struct { + in string + want Mode + wantErr bool + }{ + {"", ModeMTLS, false}, + {"mtls", ModeMTLS, false}, + {"jwt", ModeJWT, false}, + {"none", "", true}, + {"bogus", "", true}, + } + for _, tc := range cases { + got, err := ParseMode(tc.in) + if (err != nil) != tc.wantErr { + t.Errorf("ParseMode(%q) err=%v wantErr=%v", tc.in, err, tc.wantErr) + } + if !tc.wantErr && got != tc.want { + t.Errorf("ParseMode(%q)=%v want %v", tc.in, got, tc.want) + } + } +} + +func TestAuthenticate_MTLS_AllowsAnonymous(t *testing.T) { + _, err := authenticate(context.Background(), ServerConfig{Mode: ModeMTLS}) + if err != nil { + t.Fatalf("ModeMTLS should not error: %v", err) + } +} + +func TestAuthenticate_JWT_RequiresBearer(t *testing.T) { + cfg := ServerConfig{Mode: ModeJWT, Issuer: "https://example", Audience: "ateapi"} + + // Missing header -> Unauthenticated. + _, err := authenticate(context.Background(), cfg) + if code := status.Code(err); code != codes.Unauthenticated { + t.Fatalf("missing bearer: want Unauthenticated, got %v (err=%v)", code, err) + } + + // Garbage bearer -> Unauthenticated (k8sjwt.Verify will fail). + ctx := metadata.NewIncomingContext(context.Background(), metadata.Pairs("authorization", "Bearer not-a-jwt")) + _, err = authenticate(ctx, cfg) + if code := status.Code(err); code != codes.Unauthenticated { + t.Fatalf("bad bearer: want Unauthenticated, got %v (err=%v)", code, err) + } +} + +func TestBearerToken(t *testing.T) { + cases := []struct { + name string + hdr string + want string + found bool + }{ + {"missing", "", "", false}, + {"no prefix", "abc", "", false}, + {"prefix", "Bearer abc", "abc", true}, + {"prefix with spaces", "Bearer abc ", "abc", true}, + {"empty after prefix", "Bearer ", "", false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + var ctx context.Context = context.Background() + if tc.hdr != "" { + ctx = metadata.NewIncomingContext(ctx, metadata.Pairs("authorization", tc.hdr)) + } + got, ok := bearerToken(ctx) + if ok != tc.found || got != tc.want { + t.Errorf("bearerToken=(%q,%v) want (%q,%v)", got, ok, tc.want, tc.found) + } + }) + } +} + +// Build-time check. +var _ grpc.UnaryServerInterceptor = UnaryServerInterceptor(ServerConfig{}) +var _ grpc.StreamServerInterceptor = StreamServerInterceptor(ServerConfig{}) diff --git a/internal/controllers/gen.go b/internal/controllers/gen.go index df7df00a8..7ac77eff7 100644 --- a/internal/controllers/gen.go +++ b/internal/controllers/gen.go @@ -14,4 +14,4 @@ package controllers -//go:generate bash ../../hack/run-tool.sh controller-gen rbac:headerFile=../../hack/boilerplate/sh.txt,roleName=ate-controller paths="./..." output:rbac:artifacts:config=../../manifests/ate-install/generated/ +//go:generate bash ../../hack/gen-rbac.sh diff --git a/manifests/ate-install/ate-api-server.yaml b/manifests/ate-install/ate-api-server.yaml index 8d3c17086..f88f02173 100644 --- a/manifests/ate-install/ate-api-server.yaml +++ b/manifests/ate-install/ate-api-server.yaml @@ -12,8 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. -# Define Permissions (Read-Only for Pods) +apiVersion: v1 +kind: ServiceAccount +metadata: + name: ate-api-server + namespace: ate-system +--- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: @@ -31,14 +38,6 @@ rules: # ActorTemplates (e.g. via a namespace-scoped Role + RoleBinding using # resourceNames). --- -# Create Service Account for Workload Identity -apiVersion: v1 -kind: ServiceAccount -metadata: - name: ate-api-server - namespace: ate-system ---- -# 4. Bind Identity to Permissions apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: @@ -52,7 +51,21 @@ roleRef: name: ate-api-server-role apiGroup: rbac.authorization.k8s.io --- -# 5. Deploy the API Server +apiVersion: v1 +kind: Service +metadata: + name: api + namespace: ate-system +spec: + type: ClusterIP + selector: + app: ate-api-server + ports: + - name: grpc + protocol: TCP + port: 443 + targetPort: 443 +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -76,18 +89,18 @@ spec: - name: ate-api-server image: ko://github.com/agent-substrate/substrate/cmd/ateapi args: - - "--grpc-listen-addr=0.0.0.0:443" - - "--grpc-server-cred-bundle=/run/servicedns.podcert.ate.dev/credential-bundle.pem" - - --redis-cluster-address=@env - - --redis-ca-certs=/etc/valkey-ca/ca.crt - - --redis-use-iam-auth=@env - - --redis-tls-server-name=@env - - --redis-client-cert=@env - - --client-jwt-issuer=@env - - --client-jwt-audience=api.ate-system.svc - - --session-id-jwt-pool=/run/session-id-jwt-pool/pool.json - - --session-id-ca-pool=/run/session-id-ca-pool/pool.json - - --workerpool-ca-certs=/run/workerpool-ca-certs/trust-bundle.pem + - "--grpc-listen-addr=0.0.0.0:443" + - "--grpc-server-cred-bundle=/run/servicedns.podcert.ate.dev/credential-bundle.pem" + - "--redis-cluster-address=@env" + - "--redis-ca-certs=/etc/valkey-ca/ca.crt" + - "--redis-use-iam-auth=@env" + - "--redis-tls-server-name=@env" + - "--redis-client-cert=@env" + - "--client-jwt-issuer=@env" + - "--client-jwt-audience=api.ate-system.svc" + - "--session-id-jwt-pool=/run/session-id-jwt-pool/pool.json" + - "--session-id-ca-pool=/run/session-id-ca-pool/pool.json" + - "--workerpool-ca-certs=/run/workerpool-ca-certs/trust-bundle.pem" env: - name: POD_NAME valueFrom: @@ -105,29 +118,16 @@ spec: value: k8s.namespace.name=$(POD_NAMESPACE),k8s.pod.name=$(POD_NAME),k8s.pod.uid=$(POD_UID),service.instance.id=$(POD_UID) - name: OTEL_EXPORTER_OTLP_ENDPOINT value: http://opentelemetry-collector.gke-managed-otel.svc.cluster.local:4317 - # Inject env vars from a ConfigMap created by each developer. This lets - # each developer customize their own redis address, etc, without having - # to edit this manifest, which can remain constant across all - # developers. envFrom: - configMapRef: name: ate-api-server-envvars optional: true volumeMounts: - - name: "servicedns" - mountPath: "/run/servicedns.podcert.ate.dev" - - name: "session-id-jwt-pool" - mountPath: "/run/session-id-jwt-pool" - # Note: See README.md for how to generate this secret. - - name: "valkey-ca-certs" - mountPath: "/etc/valkey-ca" - readOnly: true - - name: "session-id-ca-pool" - mountPath: "/run/session-id-ca-pool" - readOnly: true - - name: "workerpool-ca-certs" - mountPath: "/run/workerpool-ca-certs" - readOnly: true + - { name: servicedns, mountPath: /run/servicedns.podcert.ate.dev } + - { name: session-id-jwt-pool, mountPath: /run/session-id-jwt-pool } + - { name: valkey-ca-certs, mountPath: /etc/valkey-ca, readOnly: true } + - { name: session-id-ca-pool, mountPath: /run/session-id-ca-pool, readOnly: true } + - { name: workerpool-ca-certs, mountPath: /run/workerpool-ca-certs, readOnly: true } ports: - containerPort: 443 - name: prometheus @@ -139,38 +139,35 @@ spec: initialDelaySeconds: 5 periodSeconds: 2 volumes: - - name: "servicedns" + - name: servicedns projected: sources: - podCertificate: signerName: servicedns.podcert.ate.dev/identity keyType: ECDSAP256 credentialBundlePath: credential-bundle.pem - - name: "session-id-jwt-pool" + - name: session-id-jwt-pool projected: sources: - secret: - name: "session-id-jwt-pool" + name: session-id-jwt-pool items: - - key: "pool" - path: "pool.json" - - name: "valkey-ca-certs" + - { key: pool, path: pool.json } + - name: valkey-ca-certs projected: sources: - secret: - name: "valkey-ca-certs" + name: valkey-ca-certs items: - - key: "ca.crt" - path: "ca.crt" - - name: "session-id-ca-pool" + - { key: ca.crt, path: ca.crt } + - name: session-id-ca-pool projected: sources: - secret: - name: "session-id-ca-pool" + name: session-id-ca-pool items: - - key: "pool" - path: "pool.json" - - name: "workerpool-ca-certs" + - { key: pool, path: pool.json } + - name: workerpool-ca-certs projected: sources: - clusterTrustBundle: @@ -179,19 +176,3 @@ spec: matchLabels: podcert.ate.dev/canarying: live path: trust-bundle.pem ---- -# 6. Expose the Session Assigner -apiVersion: v1 -kind: Service -metadata: - name: api - namespace: ate-system -spec: - type: ClusterIP - selector: - app: ate-api-server - ports: - - name: grpc - protocol: TCP - port: 443 - targetPort: 443 diff --git a/manifests/ate-install/ate-controller.yaml b/manifests/ate-install/ate-controller.yaml index 1f2756a27..534481ae0 100644 --- a/manifests/ate-install/ate-controller.yaml +++ b/manifests/ate-install/ate-controller.yaml @@ -12,12 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -kind: Namespace -apiVersion: v1 -metadata: - name: ate-system +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. ---- apiVersion: v1 kind: ServiceAccount metadata: @@ -25,7 +22,6 @@ metadata: namespace: ate-system labels: apps: ate-controller - --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding @@ -39,9 +35,7 @@ roleRef: kind: ClusterRole name: ate-controller apiGroup: rbac.authorization.k8s.io - --- - kind: Service apiVersion: v1 metadata: @@ -57,9 +51,7 @@ spec: port: 8080 targetPort: metrics protocol: TCP - --- - kind: Deployment apiVersion: apps/v1 metadata: diff --git a/manifests/ate-install/atelet.yaml b/manifests/ate-install/atelet.yaml index c9564c40d..4c6e26cb8 100644 --- a/manifests/ate-install/atelet.yaml +++ b/manifests/ate-install/atelet.yaml @@ -12,14 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -# 1. Create Service Account +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. + +# atelet — identical across auth modes (does not dial ateapi). apiVersion: v1 kind: ServiceAccount metadata: name: atelet namespace: ate-system --- -# 2. Define Permissions apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: @@ -29,7 +31,6 @@ rules: resources: ["pods"] verbs: ["get", "watch", "list"] --- -# 3. Bind Identity to Permissions apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: @@ -43,7 +44,6 @@ roleRef: name: atelet-role apiGroup: rbac.authorization.k8s.io --- -# 4. Create DaemonSet apiVersion: apps/v1 kind: DaemonSet metadata: diff --git a/manifests/ate-install/atenet-dns.yaml b/manifests/ate-install/atenet-dns.yaml index 46968a386..36d11af3f 100644 --- a/manifests/ate-install/atenet-dns.yaml +++ b/manifests/ate-install/atenet-dns.yaml @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. + +# atenet-dns — identical across auth modes (does not dial ateapi). apiVersion: v1 kind: ServiceAccount metadata: @@ -34,6 +38,16 @@ rules: verbs: ["get", "list", "watch", "create", "update", "patch"] --- apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: atenet-dns + namespace: kube-system +rules: +- apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "list", "watch", "create", "update", "patch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: atenet-dns @@ -48,16 +62,6 @@ roleRef: apiGroup: rbac.authorization.k8s.io --- apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: atenet-dns - namespace: kube-system -rules: -- apiGroups: [""] - resources: ["configmaps"] - verbs: ["get", "list", "watch", "create", "update", "patch"] ---- -apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: atenet-dns @@ -71,6 +75,25 @@ roleRef: name: atenet-dns apiGroup: rbac.authorization.k8s.io --- +apiVersion: v1 +kind: Service +metadata: + name: dns + namespace: ate-system + labels: + app: dns +spec: + selector: + app: dns + type: ClusterIP + ports: + - name: dns + port: 53 + protocol: UDP + - name: dns-tcp + port: 53 + protocol: TCP +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -94,8 +117,6 @@ spec: - name: init-dns image: busybox:1.36 command: ["sh", "-c"] - # Initial core file is sufficient to start CoreDNS but does not contain - # any additional configuration. The controller will update the Corefile. args: - | cat <<'EOF' > /etc/coredns/Corefile @@ -155,22 +176,3 @@ spec: volumes: - name: dns-config-volume emptyDir: {} ---- -apiVersion: v1 -kind: Service -metadata: - name: dns - namespace: ate-system - labels: - app: dns -spec: - selector: - app: dns - type: ClusterIP - ports: - - name: dns - port: 53 - protocol: UDP - - name: dns-tcp - port: 53 - protocol: TCP \ No newline at end of file diff --git a/manifests/ate-install/atenet-router.yaml b/manifests/ate-install/atenet-router.yaml index cddf7d367..7c1d4e052 100644 --- a/manifests/ate-install/atenet-router.yaml +++ b/manifests/ate-install/atenet-router.yaml @@ -12,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. + apiVersion: v1 kind: ServiceAccount metadata: @@ -20,33 +23,6 @@ metadata: labels: app: atenet-router --- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: atenet-router -rules: -- apiGroups: - - "ate.dev" - resources: - - actortemplates - verbs: - - get - - watch - - list ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: atenet-router -subjects: -- kind: ServiceAccount - name: atenet-router - namespace: ate-system -roleRef: - kind: ClusterRole - name: atenet-router - apiGroup: rbac.authorization.k8s.io ---- apiVersion: v1 kind: ConfigMap metadata: @@ -99,6 +75,52 @@ data: address: 127.0.0.1 port_value: 18000 --- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: atenet-router +rules: +- apiGroups: + - "ate.dev" + resources: + - actortemplates + verbs: + - get + - watch + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: atenet-router +subjects: +- kind: ServiceAccount + name: atenet-router + namespace: ate-system +roleRef: + kind: ClusterRole + name: atenet-router + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: v1 +kind: Service +metadata: + name: atenet-router + namespace: ate-system +spec: + type: ClusterIP + selector: + app: atenet-router + ports: + - name: http + port: 80 + targetPort: 8080 + protocol: TCP + - name: https + port: 443 + targetPort: 8443 + protocol: TCP +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -194,22 +216,3 @@ spec: signerName: servicedns.podcert.ate.dev/identity keyType: ECDSAP256 credentialBundlePath: credential-bundle.pem ---- -apiVersion: v1 -kind: Service -metadata: - name: atenet-router - namespace: ate-system -spec: - type: ClusterIP - selector: - app: atenet-router - ports: - - name: http - port: 80 - targetPort: 8080 - protocol: TCP - - name: https - port: 443 - targetPort: 8443 - protocol: TCP diff --git a/manifests/ate-install/kind/kustomization.yaml b/manifests/ate-install/kind/kustomization.yaml index 43683e075..26d5d170f 100644 --- a/manifests/ate-install/kind/kustomization.yaml +++ b/manifests/ate-install/kind/kustomization.yaml @@ -23,6 +23,7 @@ resources: - ../atenet-router.yaml - ../valkey.yaml - ../pod-certificate-controller.yaml + - ../role.yaml - rustfs.yaml - ./otel-collector.yaml - ./prometheus.yaml diff --git a/manifests/ate-install/ate-system-namespace.yaml b/manifests/ate-install/namespace.yaml similarity index 80% rename from manifests/ate-install/ate-system-namespace.yaml rename to manifests/ate-install/namespace.yaml index 4fa19da0a..a74a4b502 100644 --- a/manifests/ate-install/ate-system-namespace.yaml +++ b/manifests/ate-install/namespace.yaml @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. + +--- apiVersion: v1 kind: Namespace metadata: - name: ate-system \ No newline at end of file + name: ate-system diff --git a/manifests/ate-install/pod-certificate-controller.yaml b/manifests/ate-install/pod-certificate-controller.yaml index 17c7b6fd2..e62bebe78 100644 --- a/manifests/ate-install/pod-certificate-controller.yaml +++ b/manifests/ate-install/pod-certificate-controller.yaml @@ -12,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. + apiVersion: v1 kind: Namespace metadata: diff --git a/manifests/ate-install/role.yaml b/manifests/ate-install/role.yaml new file mode 100644 index 000000000..dc7a16399 --- /dev/null +++ b/manifests/ate-install/role.yaml @@ -0,0 +1,98 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: ate-controller +rules: +- apiGroups: + - "" + resources: + - configmaps + - secrets + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - pods + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - apps + resources: + - deployments + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - ate.dev + resources: + - actortemplates + - workerpools + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - ate.dev + resources: + - actortemplates/finalizers + - workerpools/finalizers + verbs: + - update +- apiGroups: + - ate.dev + resources: + - actortemplates/status + - workerpools/status + verbs: + - get + - patch + - update +--- +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/manifests/ate-install/valkey.yaml b/manifests/ate-install/valkey.yaml index ac649a555..2de10f6e4 100644 --- a/manifests/ate-install/valkey.yaml +++ b/manifests/ate-install/valkey.yaml @@ -12,6 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. + apiVersion: v1 kind: ConfigMap metadata: @@ -210,4 +213,3 @@ spec: items: - key: ca.crt path: ca.crt - From 1b5549a9118326fb32e2e21e25a90ba398ed49ed Mon Sep 17 00:00:00 2001 From: Yuval Kohavi Date: Mon, 1 Jun 2026 17:05:27 -0400 Subject: [PATCH 03/13] update helm chart images --- charts/substrate/templates/_helpers.tpl | 26 ++++++++++++++++++ .../substrate/templates/ate-api-server.yaml | 2 +- .../substrate/templates/ate-controller.yaml | 2 +- charts/substrate/templates/atelet.yaml | 2 +- charts/substrate/templates/atenet-dns.yaml | 2 +- charts/substrate/templates/atenet-router.yaml | 2 +- .../templates/pod-certificate-controller.yaml | 2 +- charts/substrate/values.yaml | 10 +++---- hack/install-ate-kind-jwt.sh | 27 +++++++++++++++++++ hack/render-manifests.sh | 2 ++ 10 files changed, 66 insertions(+), 11 deletions(-) diff --git a/charts/substrate/templates/_helpers.tpl b/charts/substrate/templates/_helpers.tpl index 4a297dec1..9a40859a2 100644 --- a/charts/substrate/templates/_helpers.tpl +++ b/charts/substrate/templates/_helpers.tpl @@ -23,6 +23,32 @@ colliding. {{- end -}} {{- end -}} +{{/* +Build an image reference for a substrate component binary. + +Usage: + {{ include "substrate.componentImage" (list "ateapi" .) }} + +Produces {image.registry}/{name}:{tag} where tag is resolved as: + 1. image.tag value, if set and not the sentinel "" + 2. .Chart.AppVersion, if image.tag is empty + 3. no tag (no colon) when image.tag is the sentinel "" + +The "" sentinel is used by hack/render-manifests.sh so that ko:// refs +are emitted without a tag, letting `ko resolve` supply the digest at build time. +*/}} +{{- define "substrate.componentImage" -}} +{{- $name := index . 0 -}} +{{- $ctx := index . 1 -}} +{{- $registry := $ctx.Values.image.registry -}} +{{- $tag := $ctx.Values.image.tag | default $ctx.Chart.AppVersion -}} +{{- if ne $tag "" -}} +{{- printf "%s/%s:%s" $registry $name $tag -}} +{{- else -}} +{{- printf "%s/%s" $registry $name -}} +{{- end -}} +{{- end -}} + {{/* Validate auth.mode at template time. */}} diff --git a/charts/substrate/templates/ate-api-server.yaml b/charts/substrate/templates/ate-api-server.yaml index fb964d3e2..b6c8d4017 100644 --- a/charts/substrate/templates/ate-api-server.yaml +++ b/charts/substrate/templates/ate-api-server.yaml @@ -62,7 +62,7 @@ spec: {{- end }} containers: - name: ate-api-server - image: {{ .Values.images.ateapi }} + image: {{ include "substrate.componentImage" (list "ateapi" .) }} args: - "--grpc-listen-addr=0.0.0.0:443" {{- if eq .Values.auth.mode "mtls" }} diff --git a/charts/substrate/templates/ate-controller.yaml b/charts/substrate/templates/ate-controller.yaml index 739744356..f41c4345c 100644 --- a/charts/substrate/templates/ate-controller.yaml +++ b/charts/substrate/templates/ate-controller.yaml @@ -53,7 +53,7 @@ spec: serviceAccountName: {{ include "substrate.fullname" (list "ate-controller" .) }} containers: - name: ate-controller - image: {{ .Values.images.atecontroller }} + image: {{ include "substrate.componentImage" (list "atecontroller" .) }} {{- if eq .Values.auth.mode "jwt" }} args: - "--ateapi-auth=jwt" diff --git a/charts/substrate/templates/atelet.yaml b/charts/substrate/templates/atelet.yaml index 055c5887a..112343f16 100644 --- a/charts/substrate/templates/atelet.yaml +++ b/charts/substrate/templates/atelet.yaml @@ -49,7 +49,7 @@ spec: serviceAccountName: {{ include "substrate.fullname" (list "atelet" .) }} containers: - name: atelet - image: {{ .Values.images.atelet }} + image: {{ include "substrate.componentImage" (list "atelet" .) }} args: - --gcp-auth-for-image-pulls={{ .Values.atelet.gcpAuthForImagePulls }} {{- with .Values.atelet.extraArgs }} diff --git a/charts/substrate/templates/atenet-dns.yaml b/charts/substrate/templates/atenet-dns.yaml index 287bf9b6d..8003c2ace 100644 --- a/charts/substrate/templates/atenet-dns.yaml +++ b/charts/substrate/templates/atenet-dns.yaml @@ -128,7 +128,7 @@ spec: successThreshold: 1 failureThreshold: 3 - name: dns-controller - image: {{ .Values.images.atenet }} + image: {{ include "substrate.componentImage" (list "atenet" .) }} args: - "dns" - "--log-level=debug" diff --git a/charts/substrate/templates/atenet-router.yaml b/charts/substrate/templates/atenet-router.yaml index 6caa2ee68..6addd6992 100644 --- a/charts/substrate/templates/atenet-router.yaml +++ b/charts/substrate/templates/atenet-router.yaml @@ -120,7 +120,7 @@ spec: {{- end }} containers: - name: atenet-router - image: {{ .Values.images.atenet }} + image: {{ include "substrate.componentImage" (list "atenet" .) }} args: - "router" - "--standalone" diff --git a/charts/substrate/templates/pod-certificate-controller.yaml b/charts/substrate/templates/pod-certificate-controller.yaml index 3654ec3b1..b52d006b3 100644 --- a/charts/substrate/templates/pod-certificate-controller.yaml +++ b/charts/substrate/templates/pod-certificate-controller.yaml @@ -126,7 +126,7 @@ spec: spec: containers: - name: controller - image: {{ .Values.images.podcertcontroller }} + image: {{ include "substrate.componentImage" (list "podcertcontroller" .) }} args: - --in-cluster=true - --sharding-pod-namespace=$(POD_NAMESPACE) diff --git a/charts/substrate/values.yaml b/charts/substrate/values.yaml index ae0b673fe..be9cc2f6c 100644 --- a/charts/substrate/values.yaml +++ b/charts/substrate/values.yaml @@ -78,13 +78,13 @@ ateApiServerEnvVarsConfigMap: ate-api-server-envvars otel: endpoint: http://opentelemetry-collector.gke-managed-otel.svc.cluster.local:4317 +image: + registry: ko://github.com/agent-substrate/substrate/cmd + tag: "" + images: - ateapi: ko://github.com/agent-substrate/substrate/cmd/ateapi - atecontroller: ko://github.com/agent-substrate/substrate/cmd/atecontroller - atelet: ko://github.com/agent-substrate/substrate/cmd/atelet - atenet: ko://github.com/agent-substrate/substrate/cmd/atenet - podcertcontroller: ko://github.com/agent-substrate/substrate/cmd/podcertcontroller valkey: valkey/valkey:8.0 envoy: envoyproxy/envoy:v1.30-latest coredns: coredns/coredns:1.11.1 busybox: busybox:1.36 + diff --git a/hack/install-ate-kind-jwt.sh b/hack/install-ate-kind-jwt.sh index c3ef609e0..644aa1578 100755 --- a/hack/install-ate-kind-jwt.sh +++ b/hack/install-ate-kind-jwt.sh @@ -88,6 +88,30 @@ apply_chart() { | run_kubectl apply -f - } +apply_crds() { + log_step "apply_crds" + run_kubectl apply -f "${ROOT}/manifests/ate-install/generated" +} + +bootstrap_session_id_secrets() { + log_step "bootstrap_session_id_secrets" + run_kubectl get secret -n "${NS}" session-id-jwt-pool >/dev/null 2>&1 \ + || kubectl-ate admin make-jwt-pool --key-id="1" --name="session-id-jwt-pool" --secret-namespace="${NS}" + run_kubectl get secret -n "${NS}" session-id-ca-pool >/dev/null 2>&1 \ + || kubectl-ate admin make-ca-pool --ca-id="1" --name="session-id-ca-pool" --secret-namespace="${NS}" +} + +bootstrap_envvars_configmap() { + log_step "bootstrap_envvars_configmap" + run_kubectl get configmap -n "${NS}" ate-api-server-envvars >/dev/null 2>&1 && return + run_kubectl create configmap -n "${NS}" ate-api-server-envvars \ + --from-literal=ATE_API_REDIS_ADDRESS="valkey-cluster.${NS}.svc:6379" \ + --from-literal=ATE_API_REDIS_USE_IAM_AUTH="false" \ + --from-literal=ATE_API_REDIS_TLS_SERVER_NAME="" \ + --from-literal=ATE_API_REDIS_CLIENT_CERT="" \ + --dry-run=client -o yaml | run_kubectl apply -f - +} + apply_kind_extras() { log_step "apply_kind_extras (rustfs + otel-collector)" run_kubectl apply -f "${ROOT}/manifests/ate-install/kind/rustfs.yaml" @@ -104,7 +128,10 @@ wait_rollouts() { } ensure_namespace +apply_crds bootstrap_jwt_tls +bootstrap_session_id_secrets +bootstrap_envvars_configmap apply_chart apply_kind_extras wait_rollouts diff --git a/hack/render-manifests.sh b/hack/render-manifests.sh index b326949b5..a1f3d0e3e 100755 --- a/hack/render-manifests.sh +++ b/hack/render-manifests.sh @@ -30,6 +30,8 @@ helm template substrate "${CHART_DIR}" \ --namespace ate-system \ --set auth.mode=mtls \ --set createNamespace=true \ + --set image.registry=ko://github.com/agent-substrate/substrate/cmd \ + --set image.tag="" \ > "${TMP_DIR}/all.yaml" # Split into per-source files so the directory structure mirrors the chart From 393acf5638caab7cea3b30fe09896ad2c91a6168 Mon Sep 17 00:00:00 2001 From: Yuval Kohavi Date: Tue, 2 Jun 2026 11:13:06 -0400 Subject: [PATCH 04/13] fix rbac. note that JWT verification is not cached and might not work on some k8s distributions that not expose the JWKS --- .../substrate/templates/ate-api-server.yaml | 1 + charts/substrate/templates/jwt-oidc-rbac.yaml | 26 ++++++++ .../sessionidentity/sessionidentity.go | 7 ++- cmd/ateapi/main.go | 63 +++++++++++++++++-- hack/install-ate-kind-jwt.sh | 3 +- internal/ateapiauth/server.go | 8 ++- internal/k8sjwt/k8sjwt.go | 20 +++--- 7 files changed, 110 insertions(+), 18 deletions(-) create mode 100644 charts/substrate/templates/jwt-oidc-rbac.yaml diff --git a/charts/substrate/templates/ate-api-server.yaml b/charts/substrate/templates/ate-api-server.yaml index b6c8d4017..1afbd6036 100644 --- a/charts/substrate/templates/ate-api-server.yaml +++ b/charts/substrate/templates/ate-api-server.yaml @@ -87,6 +87,7 @@ spec: - "--client-jwt-audience={{ .Values.auth.jwt.audience }}" - "--session-id-jwt-pool=/run/session-id-jwt-pool/pool.json" - "--session-id-ca-pool=/run/session-id-ca-pool/pool.json" + - "--client-jwt-ca-cert=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" {{- end }} env: - name: OTEL_EXPORTER_OTLP_ENDPOINT diff --git a/charts/substrate/templates/jwt-oidc-rbac.yaml b/charts/substrate/templates/jwt-oidc-rbac.yaml new file mode 100644 index 000000000..44fb71ede --- /dev/null +++ b/charts/substrate/templates/jwt-oidc-rbac.yaml @@ -0,0 +1,26 @@ +{{- if eq .Values.auth.mode "jwt" }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "substrate.fullname" (list "oidc-discovery-viewer" .) }} +rules: +- nonResourceURLs: + - /.well-known/openid-configuration + - /openid/v1/jwks + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "substrate.fullname" (list "oidc-discovery-viewer" .) }} +subjects: +- kind: ServiceAccount + name: {{ include "substrate.fullname" (list "ate-api-server" .) }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ include "substrate.fullname" (list "oidc-discovery-viewer" .) }} + apiGroup: rbac.authorization.k8s.io +{{- end }} diff --git a/cmd/ateapi/internal/sessionidentity/sessionidentity.go b/cmd/ateapi/internal/sessionidentity/sessionidentity.go index 71fa2a6bc..c69fbbb48 100644 --- a/cmd/ateapi/internal/sessionidentity/sessionidentity.go +++ b/cmd/ateapi/internal/sessionidentity/sessionidentity.go @@ -21,6 +21,7 @@ import ( "crypto/x509/pkix" "fmt" "log/slog" + "net/http" "net/url" "os" "path" @@ -51,17 +52,19 @@ type Server struct { sessionIDCAPoolFile string workerCACerts string + httpClient *http.Client } var _ ateapipb.SessionIdentityServer = (*Server)(nil) -func New(clientJWTIssuer, clientJWTAudience, sessionIDJWTPoolFile, sessionIDCAPoolFile, workerCACerts string) *Server { +func New(clientJWTIssuer, clientJWTAudience, sessionIDJWTPoolFile, sessionIDCAPoolFile, workerCACerts string, httpClient *http.Client) *Server { return &Server{ clientJWTIssuer: clientJWTIssuer, clientJWTAudience: clientJWTAudience, sessionIDJWTPoolFile: sessionIDJWTPoolFile, sessionIDCAPoolFile: sessionIDCAPoolFile, workerCACerts: workerCACerts, + httpClient: httpClient, } } @@ -78,7 +81,7 @@ func (s *Server) MintJWT(ctx context.Context, req *ateapipb.MintJWTRequest) (*at clientJWT := strings.TrimPrefix(authorization[0], "Bearer ") - clientClaims, err := k8sjwt.Verify(ctx, clientJWT, s.clientJWTIssuer, s.clientJWTAudience, time.Now()) + clientClaims, err := k8sjwt.Verify(ctx, s.httpClient, clientJWT, s.clientJWTIssuer, s.clientJWTAudience, time.Now()) if err != nil { slog.ErrorContext(ctx, "Error while verifying client JWT", slog.Any("err", err)) return nil, status.Errorf(codes.Unauthenticated, "Unauthenticated") diff --git a/cmd/ateapi/main.go b/cmd/ateapi/main.go index c09d74154..76705db6e 100644 --- a/cmd/ateapi/main.go +++ b/cmd/ateapi/main.go @@ -21,7 +21,9 @@ import ( "fmt" "log/slog" "net" + "net/http" "os" + "strings" "time" "github.com/agent-substrate/substrate/cmd/ateapi/internal/controlapi" @@ -66,8 +68,9 @@ var ( sessionIDCAPoolFile = pflag.String("session-id-ca-pool", "", "The file that contains the CA pool for signing session JWTs") workerpoolCACerts = pflag.String("workerpool-ca-certs", "", "The file that contains the CA for verifying workerpool client certificates.") - showVersion = pflag.Bool("version", false, "Print version and exit.") - authMode = pflag.String("auth-mode", "mtls", "Auth mode for incoming gRPC: mtls|jwt. 'mtls' (default) relies on transport-level mTLS for client identity. 'jwt' additionally requires a Kubernetes ServiceAccount Bearer token on every RPC.") + showVersion = pflag.Bool("version", false, "Print version and exit.") + authMode = pflag.String("auth-mode", "mtls", "Auth mode for incoming gRPC: mtls|jwt. 'mtls' (default) relies on transport-level mTLS for client identity. 'jwt' additionally requires a Kubernetes ServiceAccount Bearer token on every RPC.") + clientJWTCAFile = pflag.String("client-jwt-ca-cert", "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt", "CA cert file used to verify TLS when fetching the OIDC discovery document and JWKS for JWT authentication. Defaults to the in-cluster service account CA.") ) func main() { @@ -141,7 +144,9 @@ func main() { dialer := controlapi.NewAteletDialer(workerPodInformer.GetIndexer(), ateletPodInformer.GetIndexer()) sm := controlapi.NewService(redisPersistence, actorTemplateLister, dialer, clientset) - sessionIdentitySrv := sessionidentity.New(*clientJWTIssuer, *clientJWTAudience, *sessionIDJWTPoolFile, *sessionIDCAPoolFile, *workerpoolCACerts) + jwtHTTPClient := buildJWTHTTPClient(ctx, *clientJWTCAFile) + + sessionIdentitySrv := sessionidentity.New(*clientJWTIssuer, *clientJWTAudience, *sessionIDJWTPoolFile, *sessionIDCAPoolFile, *workerpoolCACerts, jwtHTTPClient) lisCfg := &net.ListenConfig{} lis, err := lisCfg.Listen(ctx, "tcp", *listenAddr) @@ -150,9 +155,10 @@ func main() { } authCfg := ateapiauth.ServerConfig{ - Mode: authModeParsed, - Issuer: *clientJWTIssuer, - Audience: *clientJWTAudience, + Mode: authModeParsed, + Issuer: *clientJWTIssuer, + Audience: *clientJWTAudience, + HTTPClient: jwtHTTPClient, } mux := grpc.NewServer( @@ -350,3 +356,48 @@ func buildServerCreds(ctx context.Context) (credentials.TransportCredentials, er ClientCAs: clientCAs, }), nil } + +const saTokenFile = "/var/run/secrets/kubernetes.io/serviceaccount/token" + +// buildJWTHTTPClient returns an *http.Client that trusts caFile for TLS +// verification and injects the pod's ServiceAccount Bearer token, used when +// fetching the OIDC discovery document and JWKS from the in-cluster Kubernetes +// API server. Returns nil (use http.DefaultClient) if caFile is empty or unreadable. +func buildJWTHTTPClient(ctx context.Context, caFile string) *http.Client { + if caFile == "" { + return nil + } + ca, err := os.ReadFile(caFile) + if err != nil { + slog.WarnContext(ctx, "Could not read JWT CA cert file; OIDC discovery will use system trust", slog.String("path", caFile), slog.Any("err", err)) + return nil + } + pool := x509.NewCertPool() + if !pool.AppendCertsFromPEM(ca) { + slog.WarnContext(ctx, "Could not parse JWT CA cert file; OIDC discovery will use system trust", slog.String("path", caFile)) + return nil + } + return &http.Client{ + Transport: &saTokenTransport{ + base: &http.Transport{ + TLSClientConfig: &tls.Config{RootCAs: pool}, + }, + }, + } +} + +// saTokenTransport injects the pod's ServiceAccount Bearer token on every +// request. Reads the token file fresh on each request so token rotation is +// handled automatically. +type saTokenTransport struct { + base http.RoundTripper +} + +func (t *saTokenTransport) RoundTrip(req *http.Request) (*http.Response, error) { + token, err := os.ReadFile(saTokenFile) + if err == nil && len(token) > 0 { + req = req.Clone(req.Context()) + req.Header.Set("Authorization", "Bearer "+strings.TrimSpace(string(token))) + } + return t.base.RoundTrip(req) +} diff --git a/hack/install-ate-kind-jwt.sh b/hack/install-ate-kind-jwt.sh index 644aa1578..1d8c04edc 100755 --- a/hack/install-ate-kind-jwt.sh +++ b/hack/install-ate-kind-jwt.sh @@ -81,7 +81,8 @@ apply_chart() { local rendered rendered=$(helm template substrate "${ROOT}/charts/substrate" \ --namespace "${NS}" \ - -f "${ROOT}/hack/values-kind-jwt.yaml") + -f "${ROOT}/hack/values-kind-jwt.yaml" \ + --set 'image.tag=') # ko resolve replaces ko:// refs with built+pushed image refs. echo "${rendered}" | bash "${ROOT}/hack/run-tool.sh" ko resolve -f - \ diff --git a/internal/ateapiauth/server.go b/internal/ateapiauth/server.go index 784506207..e924b0c79 100644 --- a/internal/ateapiauth/server.go +++ b/internal/ateapiauth/server.go @@ -24,6 +24,7 @@ package ateapiauth import ( "context" "fmt" + "net/http" "strings" "time" @@ -63,6 +64,11 @@ type ServerConfig struct { Issuer string // OIDC issuer URL for JWT verification Audience string // expected audience claim for JWT verification + // HTTPClient is used for OIDC discovery and JWKS fetches. Nil uses http.DefaultClient. + // Set this to a client that trusts the cluster CA when verifying tokens issued by + // the in-cluster Kubernetes API server (https://kubernetes.default.svc.cluster.local). + HTTPClient *http.Client + // Now returns the current time; nil uses time.Now. Exposed for tests. Now func() time.Time } @@ -123,7 +129,7 @@ func authenticate(ctx context.Context, cfg ServerConfig) (context.Context, error if !ok { return nil, status.Error(codes.Unauthenticated, "missing bearer token") } - claims, err := k8sjwt.Verify(ctx, bearer, cfg.Issuer, cfg.Audience, now()) + claims, err := k8sjwt.Verify(ctx, cfg.HTTPClient, bearer, cfg.Issuer, cfg.Audience, now()) if err != nil { return nil, status.Errorf(codes.Unauthenticated, "invalid bearer token: %v", err) } diff --git a/internal/k8sjwt/k8sjwt.go b/internal/k8sjwt/k8sjwt.go index e4403d0df..5b343ec4a 100644 --- a/internal/k8sjwt/k8sjwt.go +++ b/internal/k8sjwt/k8sjwt.go @@ -119,7 +119,9 @@ var permittedSkew = 5 * time.Minute // the object binding claims. If needed for your use case, you will need check the object bindings // by connecting to the cluster and seeing if the object(s) the bindings name still exist within the // cluster. -func Verify(ctx context.Context, jwt string, expectedIssuer, expectedAudience string, now time.Time) (*KubernetesClaims, error) { +// +// httpClient is used for OIDC discovery and JWKS fetches; nil uses http.DefaultClient. +func Verify(ctx context.Context, httpClient *http.Client, jwt string, expectedIssuer, expectedAudience string, now time.Time) (*KubernetesClaims, error) { segments := strings.Split(jwt, ".") if len(segments) != 3 { return nil, fmt.Errorf("malformed JWT") @@ -169,7 +171,7 @@ func Verify(ctx context.Context, jwt string, expectedIssuer, expectedAudience st } // TODO: Cache keys, and only fetch new keys if the JWT's key ID is not in the cache. - keys, err := discoverKeysForIssuer(ctx, rawClaims.Issuer) + keys, err := discoverKeysForIssuer(ctx, httpClient, rawClaims.Issuer) if err != nil { return nil, fmt.Errorf("while discovering keys from issuer: %w", err) } @@ -358,7 +360,7 @@ type jwkT struct { RSAE string `json:"e"` } -func discoverKeysForIssuer(ctx context.Context, issuer string) ([]*KeyAndID, error) { +func discoverKeysForIssuer(ctx context.Context, httpClient *http.Client, issuer string) ([]*KeyAndID, error) { var discoveryDocURL string if strings.HasSuffix(issuer, "/") { discoveryDocURL = issuer + ".well-known/openid-configuration" @@ -366,14 +368,14 @@ func discoverKeysForIssuer(ctx context.Context, issuer string) ([]*KeyAndID, err discoveryDocURL = issuer + "/.well-known/openid-configuration" } - oidcConfig, err := fetchJSON[oidcConfigT](discoveryDocURL) + oidcConfig, err := fetchJSON[oidcConfigT](httpClient, discoveryDocURL) if err != nil { return nil, fmt.Errorf("while fetching OIDC Discovery document: %w", err) } slog.InfoContext(ctx, "Fetched discovery doc", slog.Any("doc", oidcConfig)) - jwkSet, err := fetchJSON[jwkSetT](oidcConfig.JWKSURI) + jwkSet, err := fetchJSON[jwkSetT](httpClient, oidcConfig.JWKSURI) if err != nil { return nil, fmt.Errorf("while fetching JWKS: %w", err) } @@ -424,10 +426,12 @@ func discoverKeysForIssuer(ctx context.Context, issuer string) ([]*KeyAndID, err return ret, nil } -func fetchJSON[T any](url string) (T, error) { +func fetchJSON[T any](httpClient *http.Client, url string) (T, error) { var parsedBody T - - resp, err := http.Get(url) + if httpClient == nil { + httpClient = http.DefaultClient + } + resp, err := httpClient.Get(url) if err != nil { return parsedBody, fmt.Errorf("while making HTTP request: %w", err) } From ab7575a2df0b65f5106aff20cc732e04ff9d6434 Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Tue, 9 Jun 2026 14:20:35 +0000 Subject: [PATCH 05/13] fix: add chart boilerplate headers --- charts/substrate/Chart.yaml | 14 +++++++ charts/substrate/templates/_helpers.tpl | 16 ++++++++ .../substrate/templates/ate-api-server.yaml | 35 +++++++++++++++++ .../substrate/templates/ate-controller.yaml | 16 ++++++++ charts/substrate/templates/atelet.yaml | 16 ++++++++ charts/substrate/templates/atenet-dns.yaml | 16 ++++++++ .../templates/atenet-router-monitoring.yaml | 37 ++++++++++++++++++ charts/substrate/templates/atenet-router.yaml | 38 +++++++++++++++++++ charts/substrate/templates/jwt-oidc-rbac.yaml | 16 ++++++++ charts/substrate/templates/namespace.yaml | 16 ++++++++ .../templates/pod-certificate-controller.yaml | 16 ++++++++ charts/substrate/templates/valkey.yaml | 16 ++++++++ charts/substrate/values.yaml | 15 +++++++- hack/gen-rbac.sh | 17 ++++++++- hack/install-ate-kind-jwt.sh | 15 ++++++++ hack/render-manifests.sh | 18 ++++++++- hack/values-kind-jwt.yaml | 14 +++++++ internal/ateapiauth/server_test.go | 2 +- manifests/ate-install/ate-api-server.yaml | 20 +++++----- manifests/ate-install/ate-controller.yaml | 20 +++++----- manifests/ate-install/atelet.yaml | 34 +++++------------ manifests/ate-install/atenet-dns.yaml | 20 +++++----- .../ate-install/atenet-router-monitoring.yaml | 23 ++++++----- manifests/ate-install/atenet-router.yaml | 20 +++++----- manifests/ate-install/namespace.yaml | 20 +++++----- .../pod-certificate-controller.yaml | 20 +++++----- manifests/ate-install/role.yaml | 20 +++++----- manifests/ate-install/valkey.yaml | 20 +++++----- 28 files changed, 432 insertions(+), 118 deletions(-) create mode 100644 charts/substrate/templates/atenet-router-monitoring.yaml diff --git a/charts/substrate/Chart.yaml b/charts/substrate/Chart.yaml index 70961fba7..52bd74800 100644 --- a/charts/substrate/Chart.yaml +++ b/charts/substrate/Chart.yaml @@ -1,3 +1,17 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + apiVersion: v2 name: substrate description: Agent Substrate — actor runtime, control plane, and data-plane router. diff --git a/charts/substrate/templates/_helpers.tpl b/charts/substrate/templates/_helpers.tpl index 9a40859a2..2f8ecc8e6 100644 --- a/charts/substrate/templates/_helpers.tpl +++ b/charts/substrate/templates/_helpers.tpl @@ -1,3 +1,19 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + {{/* Qualified resource name for a chart component. diff --git a/charts/substrate/templates/ate-api-server.yaml b/charts/substrate/templates/ate-api-server.yaml index 1afbd6036..cbe952de5 100644 --- a/charts/substrate/templates/ate-api-server.yaml +++ b/charts/substrate/templates/ate-api-server.yaml @@ -1,3 +1,19 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: @@ -9,6 +25,11 @@ rules: - apiGroups: ["ate.dev"] resources: ["actortemplates"] verbs: ["get", "watch", "list"] +# Secret reads for env source resolution are intentionally NOT granted +# cluster-wide here. Each demo / tenant is responsible for granting +# ate-api-server read access only to the specific Secrets referenced by its +# ActorTemplates (e.g. via a namespace-scoped Role + RoleBinding using +# resourceNames). --- apiVersion: v1 kind: ServiceAccount @@ -90,6 +111,20 @@ spec: - "--client-jwt-ca-cert=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" {{- end }} env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: OTEL_RESOURCE_ATTRIBUTES + value: k8s.namespace.name=$(POD_NAMESPACE),k8s.pod.name=$(POD_NAME),k8s.pod.uid=$(POD_UID),service.instance.id=$(POD_UID) - name: OTEL_EXPORTER_OTLP_ENDPOINT value: {{ .Values.otel.endpoint }} envFrom: diff --git a/charts/substrate/templates/ate-controller.yaml b/charts/substrate/templates/ate-controller.yaml index f41c4345c..5c403837c 100644 --- a/charts/substrate/templates/ate-controller.yaml +++ b/charts/substrate/templates/ate-controller.yaml @@ -1,3 +1,19 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + apiVersion: v1 kind: ServiceAccount metadata: diff --git a/charts/substrate/templates/atelet.yaml b/charts/substrate/templates/atelet.yaml index 112343f16..fef93f547 100644 --- a/charts/substrate/templates/atelet.yaml +++ b/charts/substrate/templates/atelet.yaml @@ -1,3 +1,19 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + # atelet — identical across auth modes (does not dial ateapi). apiVersion: v1 kind: ServiceAccount diff --git a/charts/substrate/templates/atenet-dns.yaml b/charts/substrate/templates/atenet-dns.yaml index 8003c2ace..0838d2c0b 100644 --- a/charts/substrate/templates/atenet-dns.yaml +++ b/charts/substrate/templates/atenet-dns.yaml @@ -1,3 +1,19 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + # atenet-dns — identical across auth modes (does not dial ateapi). apiVersion: v1 kind: ServiceAccount diff --git a/charts/substrate/templates/atenet-router-monitoring.yaml b/charts/substrate/templates/atenet-router-monitoring.yaml new file mode 100644 index 000000000..17e58c6bf --- /dev/null +++ b/charts/substrate/templates/atenet-router-monitoring.yaml @@ -0,0 +1,37 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +# Scrape the Envoy sidecar's admin /stats/prometheus endpoint so its end-to-end +# request-latency histogram (envoy_http_downstream_rq_time, milliseconds) reaches +# Google Managed Prometheus. This is E2E *context* for the per-stage latency +# dashboard, not an SLI we own (the SLI is the OTLP atenet.router.route.duration +# histogram). Envoy only speaks Prometheus, so it needs an explicit scrape; the +# admin port (9901) is already exposed by the envoy container above. +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring +metadata: + name: {{ include "substrate.fullname" (list "atenet-router-envoy" .) }} + namespace: {{ .Release.Namespace }} + labels: + app: atenet-router +spec: + selector: + matchLabels: + app: atenet-router + endpoints: + - port: admin + path: /stats/prometheus + interval: 30s diff --git a/charts/substrate/templates/atenet-router.yaml b/charts/substrate/templates/atenet-router.yaml index 6addd6992..687dcea47 100644 --- a/charts/substrate/templates/atenet-router.yaml +++ b/charts/substrate/templates/atenet-router.yaml @@ -1,3 +1,19 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + apiVersion: v1 kind: ServiceAccount metadata: @@ -101,6 +117,9 @@ spec: metadata: labels: app: atenet-router + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" spec: serviceAccountName: {{ include "substrate.fullname" (list "atenet-router" .) }} {{- if eq .Values.auth.mode "jwt" }} @@ -143,6 +162,23 @@ spec: {{- else }} - "--envoy-cert-path=/run/envoy-tls/credential-bundle.pem" {{- end }} + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: OTEL_RESOURCE_ATTRIBUTES + value: k8s.namespace.name=$(POD_NAMESPACE),k8s.pod.name=$(POD_NAME),k8s.pod.uid=$(POD_UID),service.instance.id=$(POD_UID) + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: {{ .Values.otel.endpoint }} ports: - name: xds containerPort: 18000 @@ -150,6 +186,8 @@ spec: containerPort: 50051 - name: status containerPort: 4040 + - name: metrics + containerPort: 9090 {{- if eq .Values.auth.mode "jwt" }} volumeMounts: - name: ateapi-ca diff --git a/charts/substrate/templates/jwt-oidc-rbac.yaml b/charts/substrate/templates/jwt-oidc-rbac.yaml index 44fb71ede..a9fd499e9 100644 --- a/charts/substrate/templates/jwt-oidc-rbac.yaml +++ b/charts/substrate/templates/jwt-oidc-rbac.yaml @@ -1,3 +1,19 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + {{- if eq .Values.auth.mode "jwt" }} --- apiVersion: rbac.authorization.k8s.io/v1 diff --git a/charts/substrate/templates/namespace.yaml b/charts/substrate/templates/namespace.yaml index d63920621..63401c00d 100644 --- a/charts/substrate/templates/namespace.yaml +++ b/charts/substrate/templates/namespace.yaml @@ -1,3 +1,19 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + {{- include "substrate.validateAuthMode" . -}} {{- if .Values.createNamespace }} apiVersion: v1 diff --git a/charts/substrate/templates/pod-certificate-controller.yaml b/charts/substrate/templates/pod-certificate-controller.yaml index b52d006b3..3aaaa9df9 100644 --- a/charts/substrate/templates/pod-certificate-controller.yaml +++ b/charts/substrate/templates/pod-certificate-controller.yaml @@ -1,3 +1,19 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + {{- if eq .Values.auth.mode "mtls" -}} apiVersion: v1 kind: Namespace diff --git a/charts/substrate/templates/valkey.yaml b/charts/substrate/templates/valkey.yaml index 63b4758ad..b8233eea9 100644 --- a/charts/substrate/templates/valkey.yaml +++ b/charts/substrate/templates/valkey.yaml @@ -1,3 +1,19 @@ +{{/* +Copyright 2026 Google LLC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + {{- if .Values.valkey.enabled -}} {{- $sts := include "substrate.fullname" (list "valkey-cluster" .) -}} {{- $headless := include "substrate.fullname" (list "valkey-cluster-service" .) -}} diff --git a/charts/substrate/values.yaml b/charts/substrate/values.yaml index be9cc2f6c..6f4daf6cb 100644 --- a/charts/substrate/values.yaml +++ b/charts/substrate/values.yaml @@ -1,3 +1,17 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Default values for the substrate chart. # # The chart supports two installation modes via `auth.mode`: @@ -87,4 +101,3 @@ images: envoy: envoyproxy/envoy:v1.30-latest coredns: coredns/coredns:1.11.1 busybox: busybox:1.36 - diff --git a/hack/gen-rbac.sh b/hack/gen-rbac.sh index af806d29f..b8f9d9647 100755 --- a/hack/gen-rbac.sh +++ b/hack/gen-rbac.sh @@ -1,4 +1,19 @@ #!/usr/bin/env bash + +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Generate the controller ClusterRole into the Helm chart and templatize its # name so multi-release installs do not collide on a cluster-scoped resource. # @@ -13,7 +28,7 @@ ROOT="$(cd "$(dirname "$0")/.." && pwd)" OUT="${ROOT}/charts/substrate/templates/role.yaml" bash "${ROOT}/hack/run-tool.sh" controller-gen \ - "rbac:headerFile=${ROOT}/hack/boilerplate/yaml.txt,roleName=ate-controller" \ + "rbac:headerFile=${ROOT}/hack/boilerplate/sh.txt,roleName=ate-controller" \ paths="${ROOT}/internal/controllers/..." \ "output:rbac:artifacts:config=${ROOT}/charts/substrate/templates/" diff --git a/hack/install-ate-kind-jwt.sh b/hack/install-ate-kind-jwt.sh index 1d8c04edc..205c3d9fe 100755 --- a/hack/install-ate-kind-jwt.sh +++ b/hack/install-ate-kind-jwt.sh @@ -1,4 +1,19 @@ #!/usr/bin/env bash + +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Install Agent Substrate on a kind cluster in JWT auth mode. # # Unlike the mTLS install path (hack/install-ate-kind.sh), this works on a diff --git a/hack/render-manifests.sh b/hack/render-manifests.sh index a1f3d0e3e..bbe50befb 100755 --- a/hack/render-manifests.sh +++ b/hack/render-manifests.sh @@ -1,4 +1,19 @@ #!/usr/bin/env bash + +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Render the substrate Helm chart into manifests/ate-install/ (mTLS-mode # install) — the canonical kubectl-apply install path. The chart at # charts/substrate/ is the single source of truth; this script only renders. @@ -101,4 +116,5 @@ fi mkdir -p "${OUT_DIR}" find "${OUT_DIR}" -maxdepth 1 -type f -name '*.yaml' -delete cp "${TMP_DIR}/out/"*.yaml "${OUT_DIR}/" -echo "Rendered $(ls "${OUT_DIR}"/*.yaml | wc -l | xargs) manifest files into ${OUT_DIR}" +rendered_count="$(find "${OUT_DIR}" -maxdepth 1 -type f -name '*.yaml' | wc -l | xargs)" +echo "Rendered ${rendered_count} manifest files into ${OUT_DIR}" diff --git a/hack/values-kind-jwt.yaml b/hack/values-kind-jwt.yaml index 493ee9af5..82d90e8c7 100644 --- a/hack/values-kind-jwt.yaml +++ b/hack/values-kind-jwt.yaml @@ -1,3 +1,17 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Helm values for installing substrate on a kind cluster in JWT mode. # Used by hack/install-ate-kind-jwt.sh — does NOT require the off-by-default # certificate feature gates that the mTLS install path needs. diff --git a/internal/ateapiauth/server_test.go b/internal/ateapiauth/server_test.go index cc8cd35d9..a2dda17aa 100644 --- a/internal/ateapiauth/server_test.go +++ b/internal/ateapiauth/server_test.go @@ -86,7 +86,7 @@ func TestBearerToken(t *testing.T) { } for _, tc := range cases { t.Run(tc.name, func(t *testing.T) { - var ctx context.Context = context.Background() + ctx := context.Background() if tc.hdr != "" { ctx = metadata.NewIncomingContext(ctx, metadata.Pairs("authorization", tc.hdr)) } diff --git a/manifests/ate-install/ate-api-server.yaml b/manifests/ate-install/ate-api-server.yaml index f88f02173..0955e8adc 100644 --- a/manifests/ate-install/ate-api-server.yaml +++ b/manifests/ate-install/ate-api-server.yaml @@ -1,16 +1,16 @@ -# Copyright 2026 Google LLC +# Copyright 2026 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. # Run `make helm-template` to regenerate. diff --git a/manifests/ate-install/ate-controller.yaml b/manifests/ate-install/ate-controller.yaml index 534481ae0..2cfbeecb4 100644 --- a/manifests/ate-install/ate-controller.yaml +++ b/manifests/ate-install/ate-controller.yaml @@ -1,16 +1,16 @@ -# Copyright 2026 Google LLC +# Copyright 2026 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. # Run `make helm-template` to regenerate. diff --git a/manifests/ate-install/atelet.yaml b/manifests/ate-install/atelet.yaml index 4c6e26cb8..e2c75a07e 100644 --- a/manifests/ate-install/atelet.yaml +++ b/manifests/ate-install/atelet.yaml @@ -1,16 +1,16 @@ -# Copyright 2026 Google LLC +# Copyright 2026 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. # Run `make helm-template` to regenerate. @@ -76,20 +76,6 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: POD_UID - valueFrom: - fieldRef: - fieldPath: metadata.uid - - name: OTEL_RESOURCE_ATTRIBUTES - value: k8s.namespace.name=$(POD_NAMESPACE),k8s.pod.name=$(POD_NAME),k8s.pod.uid=$(POD_UID),service.instance.id=$(POD_UID) - name: OTEL_EXPORTER_OTLP_ENDPOINT value: http://opentelemetry-collector.gke-managed-otel.svc.cluster.local:4317 - name: ATE_STORAGE_BACKEND diff --git a/manifests/ate-install/atenet-dns.yaml b/manifests/ate-install/atenet-dns.yaml index 36d11af3f..c925ada82 100644 --- a/manifests/ate-install/atenet-dns.yaml +++ b/manifests/ate-install/atenet-dns.yaml @@ -1,16 +1,16 @@ -# Copyright 2026 Google LLC +# Copyright 2026 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. # Run `make helm-template` to regenerate. diff --git a/manifests/ate-install/atenet-router-monitoring.yaml b/manifests/ate-install/atenet-router-monitoring.yaml index 6a42ac904..a52edc067 100644 --- a/manifests/ate-install/atenet-router-monitoring.yaml +++ b/manifests/ate-install/atenet-router-monitoring.yaml @@ -1,16 +1,19 @@ -# Copyright 2026 Google LLC +# Copyright 2026 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. # Scrape the Envoy sidecar's admin /stats/prometheus endpoint so its end-to-end # request-latency histogram (envoy_http_downstream_rq_time, milliseconds) reaches diff --git a/manifests/ate-install/atenet-router.yaml b/manifests/ate-install/atenet-router.yaml index 7c1d4e052..1eaffa2d9 100644 --- a/manifests/ate-install/atenet-router.yaml +++ b/manifests/ate-install/atenet-router.yaml @@ -1,16 +1,16 @@ -# Copyright 2026 Google LLC +# Copyright 2026 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. # Run `make helm-template` to regenerate. diff --git a/manifests/ate-install/namespace.yaml b/manifests/ate-install/namespace.yaml index a74a4b502..9b6e06c5a 100644 --- a/manifests/ate-install/namespace.yaml +++ b/manifests/ate-install/namespace.yaml @@ -1,16 +1,16 @@ -# Copyright 2026 Google LLC +# Copyright 2026 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. # Run `make helm-template` to regenerate. diff --git a/manifests/ate-install/pod-certificate-controller.yaml b/manifests/ate-install/pod-certificate-controller.yaml index e62bebe78..987c1974b 100644 --- a/manifests/ate-install/pod-certificate-controller.yaml +++ b/manifests/ate-install/pod-certificate-controller.yaml @@ -1,16 +1,16 @@ -# Copyright 2026 Google LLC +# Copyright 2026 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. # Run `make helm-template` to regenerate. diff --git a/manifests/ate-install/role.yaml b/manifests/ate-install/role.yaml index dc7a16399..6a3558617 100644 --- a/manifests/ate-install/role.yaml +++ b/manifests/ate-install/role.yaml @@ -83,16 +83,16 @@ rules: - patch - update --- -# Copyright 2026 Google LLC +# Copyright 2026 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/manifests/ate-install/valkey.yaml b/manifests/ate-install/valkey.yaml index 2de10f6e4..f36724382 100644 --- a/manifests/ate-install/valkey.yaml +++ b/manifests/ate-install/valkey.yaml @@ -1,16 +1,16 @@ -# Copyright 2026 Google LLC +# Copyright 2026 Google LLC # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. # Run `make helm-template` to regenerate. From c03eab050908e3cbfa8779f824f339e165ab29d1 Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Tue, 9 Jun 2026 14:57:25 +0000 Subject: [PATCH 06/13] fix: support jwt helm install on plain kind --- .../templates/atenet-router-monitoring.yaml | 2 + charts/substrate/values.yaml | 4 ++ hack/install-ate-kind-jwt.sh | 50 +++++++++++++++++++ hack/values-kind-jwt.yaml | 4 ++ 4 files changed, 60 insertions(+) diff --git a/charts/substrate/templates/atenet-router-monitoring.yaml b/charts/substrate/templates/atenet-router-monitoring.yaml index 17e58c6bf..e2c522213 100644 --- a/charts/substrate/templates/atenet-router-monitoring.yaml +++ b/charts/substrate/templates/atenet-router-monitoring.yaml @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */}} +{{- if .Values.monitoring.gkePodMonitoring.enabled }} # Scrape the Envoy sidecar's admin /stats/prometheus endpoint so its end-to-end # request-latency histogram (envoy_http_downstream_rq_time, milliseconds) reaches # Google Managed Prometheus. This is E2E *context* for the per-stage latency @@ -35,3 +36,4 @@ spec: - port: admin path: /stats/prometheus interval: 30s +{{- end }} diff --git a/charts/substrate/values.yaml b/charts/substrate/values.yaml index 6f4daf6cb..5bac5e48b 100644 --- a/charts/substrate/values.yaml +++ b/charts/substrate/values.yaml @@ -92,6 +92,10 @@ ateApiServerEnvVarsConfigMap: ate-api-server-envvars otel: endpoint: http://opentelemetry-collector.gke-managed-otel.svc.cluster.local:4317 +monitoring: + gkePodMonitoring: + enabled: true + image: registry: ko://github.com/agent-substrate/substrate/cmd tag: "" diff --git a/hack/install-ate-kind-jwt.sh b/hack/install-ate-kind-jwt.sh index 205c3d9fe..7d7958f87 100755 --- a/hack/install-ate-kind-jwt.sh +++ b/hack/install-ate-kind-jwt.sh @@ -33,9 +33,12 @@ set -o errexit -o nounset -o pipefail ROOT="$(cd "$(dirname "$0")/.." && pwd)" NS="${NS:-ate-system}" +KIND_CLUSTER_NAME="${KIND_CLUSTER_NAME:-kind}" KUBECTL_CONTEXT="${KUBECTL_CONTEXT:-}" KO_DOCKER_REPO="${KO_DOCKER_REPO:-localhost:5001}" KO_DEFAULTPLATFORMS="${KO_DEFAULTPLATFORMS:-linux/$(go env GOARCH)}" +reg_name="kind-registry" +reg_port="5001" export KO_DOCKER_REPO KO_DEFAULTPLATFORMS @@ -52,6 +55,52 @@ ensure_namespace() { run_kubectl create namespace "${NS}" --dry-run=client -o yaml | run_kubectl apply -f - } +ensure_kind_local_registry() { + log_step "ensure_kind_local_registry" + + if [ "$(docker inspect -f '{{.State.Running}}' "${reg_name}" 2>/dev/null || true)" == "true" ]; then + if ! docker port "${reg_name}" | grep -q "${reg_port}"; then + echo "Registry exists but is not mapped to port ${reg_port}. Recreating..." + docker rm -f "${reg_name}" + fi + fi + + if [ "$(docker inspect -f '{{.State.Running}}' "${reg_name}" 2>/dev/null || true)" != "true" ]; then + docker run \ + -d --restart=always \ + --label created-by=agent-substrate \ + -p "127.0.0.1:${reg_port}:5000" \ + -p "[::1]:${reg_port}:5000" \ + --network bridge --name "${reg_name}" \ + registry:3 + fi + + if [ "$(docker inspect -f='{{json .NetworkSettings.Networks.kind}}' "${reg_name}")" = "null" ]; then + docker network connect "kind" "${reg_name}" + fi + + local registry_dir="/etc/containerd/certs.d/localhost:${reg_port}" + local node + for node in $("${ROOT}"/hack/kind.sh get nodes --name "${KIND_CLUSTER_NAME}"); do + docker exec "${node}" mkdir -p "${registry_dir}" + cat < Date: Tue, 9 Jun 2026 15:15:45 +0000 Subject: [PATCH 07/13] feat: add substrate crds helm chart --- Makefile | 5 + charts/substrate-crds/Chart.yaml | 28 ++ charts/substrate-crds/README.md | 13 + .../templates/ate.dev_actortemplates.yaml | 331 ++++++++++++++++++ .../templates/ate.dev_workerpools.yaml | 97 +++++ charts/substrate/README.md | 6 +- hack/install-ate-kind-jwt.sh | 6 +- hack/verify/crd-chart.sh | 47 +++ 8 files changed, 531 insertions(+), 2 deletions(-) create mode 100644 charts/substrate-crds/Chart.yaml create mode 100644 charts/substrate-crds/README.md create mode 100644 charts/substrate-crds/templates/ate.dev_actortemplates.yaml create mode 100644 charts/substrate-crds/templates/ate.dev_workerpools.yaml create mode 100755 hack/verify/crd-chart.sh diff --git a/Makefile b/Makefile index c6fed696c..3b1e95d76 100644 --- a/Makefile +++ b/Makefile @@ -103,3 +103,8 @@ helm-template: .PHONY: verify-helm-template verify-helm-template: @./hack/render-manifests.sh --check + +# Verify that the CRD chart mirrors the generated CRDs. +.PHONY: verify-crd-chart +verify-crd-chart: + @./hack/verify/crd-chart.sh diff --git a/charts/substrate-crds/Chart.yaml b/charts/substrate-crds/Chart.yaml new file mode 100644 index 000000000..a69dcee0e --- /dev/null +++ b/charts/substrate-crds/Chart.yaml @@ -0,0 +1,28 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v2 +name: substrate-crds +description: Agent Substrate CustomResourceDefinitions. +type: application +version: 0.1.0 +appVersion: "0.1.0" +home: https://github.com/agent-substrate/substrate +sources: +- https://github.com/agent-substrate/substrate +keywords: +- agent +- actor +- substrate +- crds diff --git a/charts/substrate-crds/README.md b/charts/substrate-crds/README.md new file mode 100644 index 000000000..12fa31f0a --- /dev/null +++ b/charts/substrate-crds/README.md @@ -0,0 +1,13 @@ +# substrate-crds + +Helm chart for installing the Agent Substrate CRDs. + +Install this chart before installing the main `substrate` chart: + +```bash +helm upgrade --install substrate-crds ./charts/substrate-crds +helm upgrade --install substrate ./charts/substrate --namespace ate-system --create-namespace +``` + +The CRD YAMLs in `templates/` mirror `manifests/ate-install/generated/`. +Run `hack/verify/crd-chart.sh` to verify they are in sync. diff --git a/charts/substrate-crds/templates/ate.dev_actortemplates.yaml b/charts/substrate-crds/templates/ate.dev_actortemplates.yaml new file mode 100644 index 000000000..25c8fabf9 --- /dev/null +++ b/charts/substrate-crds/templates/ate.dev_actortemplates.yaml @@ -0,0 +1,331 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.1 + name: actortemplates.ate.dev +spec: + group: ate.dev + names: + kind: ActorTemplate + listKind: ActorTemplateList + plural: actortemplates + shortNames: + - actortemplate + singular: actortemplate + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: spec defines the desired state of ActorTemplate + properties: + containers: + description: Containers is the workload definition. + items: + description: A single application container that you want to run + within a WorkerPool. + properties: + command: + description: Entrypoint array. Not executed within a shell. + items: + type: string + type: array + x-kubernetes-list-type: atomic + env: + description: Environment variables to set in the worker replicas. + items: + description: |- + EnvVar represents an environment variable supplied to a container in an + ActorTemplate. It models only a subset of Kubernetes Pod env behavior: + literal values are not expanded with Kubernetes-style $(VAR) references, + envFrom is not supported, and valueFrom currently supports only secretKeyRef. + properties: + name: + description: Name of the environment variable. Must be + a C_IDENTIFIER. + type: string + value: + description: Variable value. Defaults to "". Mutually + exclusive with ValueFrom. + type: string + valueFrom: + description: |- + Source for the environment variable's value. Mutually exclusive with + Value. + maxProperties: 1 + minProperties: 1 + properties: + secretKeyRef: + description: Selects a key of a Secret in the ActorTemplate's + namespace. + properties: + key: + description: Key to select within the Secret. + minLength: 1 + type: string + name: + description: Name of the referent Secret. + minLength: 1 + type: string + optional: + description: Specify whether the Secret or its + key must be defined. + type: boolean + required: + - key + - name + type: object + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: value and valueFrom are mutually exclusive + rule: '!(has(self.value) && has(self.valueFrom))' + type: array + image: + description: Image to use for the worker replicas. + type: string + x-kubernetes-validations: + - message: All images must be pinned (changing the image invalidates + snapshots) + rule: self.contains('@') + name: + description: Name of the container. + type: string + required: + - name + type: object + maxItems: 10 + type: array + pauseImage: + description: |- + PauseImage is the container to use as the root sandbox container. + + Typically, set it to [1] for on-gcp, and [2] for off-gcp + + - [1] gcr.io/gke-release/pause@sha256:bcbd57ba5653580ec647b16d8163cdd1112df3609129b01f912a8032e48265da + - [2] registry.k8s.io/pause:3.10.2@sha256:f548e0e8e3dc1896ca956272154dde3314e8cc4fde0a57577ee9fa1c63f5baf4 + type: string + x-kubernetes-validations: + - message: All images must be pinned (changing the image invalidates + snapshots) + rule: self.contains('@') + runsc: + description: Parameters for fetching the runsc binary to use. + properties: + amd64: + description: Configuration for the amd64 binary. + properties: + sha256Hash: + description: |- + The SHA256 hash of the binary to download. Used both to name the + downloaded file (for preventing conflicts), and to check the integrity of + the downloaded file. + type: string + url: + description: |- + A gs:// URL pointing to a runsc binary that can be downloaded (possibly + with atelet's credentials). + type: string + required: + - sha256Hash + - url + type: object + arm64: + description: Configuration for the arm64 binary. + properties: + sha256Hash: + description: |- + The SHA256 hash of the binary to download. Used both to name the + downloaded file (for preventing conflicts), and to check the integrity of + the downloaded file. + type: string + url: + description: |- + A gs:// URL pointing to a runsc binary that can be downloaded (possibly + with atelet's credentials). + type: string + required: + - sha256Hash + - url + type: object + authentication: + description: How should atelet authenticate to download the runsc + binary? + properties: + gcp: + description: Use GCP application-default credentials. + type: object + type: object + type: object + snapshotsConfig: + description: Snapshots configuration for the actor. + properties: + location: + description: Location to store snapshots in. + type: string + required: + - location + type: object + workerPoolRef: + description: Name of the worker pool to use for the actor. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: |- + If referring to a piece of an object instead of an entire object, this string + should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within a pod, this would take on a value like: + "spec.containers{name}" (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" (container with + index 2 in this pod). This syntax is chosen only to have some well-defined way of + referencing a part of an object. + type: string + kind: + description: |- + Kind of the referent. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + namespace: + description: |- + Namespace of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ + type: string + resourceVersion: + description: |- + Specific resourceVersion to which this reference is made, if any. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + type: string + uid: + description: |- + UID of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids + type: string + type: object + x-kubernetes-map-type: atomic + required: + - pauseImage + - runsc + - snapshotsConfig + - workerPoolRef + type: object + status: + description: status is the observed state of ActorTemplate + properties: + conditions: + description: conditions defines the status conditions array + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + goldenActorID: + type: string + goldenSnapshot: + type: string + phase: + description: Phase of the actor template. + type: string + takeGoldenSnapshotAt: + format: date-time + type: string + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + status: {} diff --git a/charts/substrate-crds/templates/ate.dev_workerpools.yaml b/charts/substrate-crds/templates/ate.dev_workerpools.yaml new file mode 100644 index 000000000..8fce0acbd --- /dev/null +++ b/charts/substrate-crds/templates/ate.dev_workerpools.yaml @@ -0,0 +1,97 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.1 + name: workerpools.ate.dev +spec: + group: ate.dev + names: + kind: WorkerPool + listKind: WorkerPoolList + plural: workerpools + shortNames: + - workerpool + singular: workerpool + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.replicas + name: Desired + type: integer + - jsonPath: .status.replicas + name: Replicas + type: integer + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: WorkerPool is the Schema for the workerpools API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: spec defines the desired state of WorkerPool + properties: + ateomImage: + description: AteomImage is the ateom container image to deploy as + workers. + type: string + replicas: + description: Replicas is the number of worker pods to run. + format: int32 + minimum: 0 + type: integer + required: + - ateomImage + - replicas + type: object + status: + description: status is the observed state of WorkerPool + properties: + replicas: + description: Replicas is the total number of worker pods. + format: int32 + type: integer + type: object + required: + - spec + type: object + served: true + storage: true + subresources: + scale: + specReplicasPath: .spec.replicas + statusReplicasPath: .status.replicas + status: {} diff --git a/charts/substrate/README.md b/charts/substrate/README.md index 4ca8d08ff..132b171e0 100644 --- a/charts/substrate/README.md +++ b/charts/substrate/README.md @@ -10,6 +10,9 @@ Helm chart for installing Agent Substrate. | `jwt` | | none beyond stock K8s | Server certs come from a user-provided Secret; clients authenticate via projected ServiceAccount tokens. Valkey runs plaintext intra-cluster. | ```bash +# CRDs +helm upgrade --install substrate-crds ./charts/substrate-crds + # mTLS mode (default) helm upgrade --install substrate ./charts/substrate @@ -67,7 +70,8 @@ helm template substrate ./charts/substrate --set auth.mode=jwt \ ``` `manifests/ate-install/` in the repo is the rendered mTLS output and is -regenerated by `make helm-template`. +regenerated by `make helm-template`. The separate `substrate-crds` chart +mirrors `manifests/ate-install/generated/`. ## Values diff --git a/hack/install-ate-kind-jwt.sh b/hack/install-ate-kind-jwt.sh index 7d7958f87..9937a5b8f 100755 --- a/hack/install-ate-kind-jwt.sh +++ b/hack/install-ate-kind-jwt.sh @@ -46,6 +46,10 @@ run_kubectl() { kubectl ${KUBECTL_CONTEXT:+--context=${KUBECTL_CONTEXT}} "$@" } +run_helm() { + helm ${KUBECTL_CONTEXT:+--kube-context=${KUBECTL_CONTEXT}} "$@" +} + log_step() { echo -e "\033[1;36m[step]:\033[0m $1" } @@ -155,7 +159,7 @@ apply_chart() { apply_crds() { log_step "apply_crds" - run_kubectl apply -f "${ROOT}/manifests/ate-install/generated" + run_helm upgrade --install substrate-crds "${ROOT}/charts/substrate-crds" } bootstrap_session_id_secrets() { diff --git a/hack/verify/crd-chart.sh b/hack/verify/crd-chart.sh new file mode 100755 index 000000000..0837a8473 --- /dev/null +++ b/hack/verify/crd-chart.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -o errexit -o nounset -o pipefail + +ROOT="$(git rev-parse --show-toplevel)" +cd "${ROOT}" + +GENERATED_DIR="manifests/ate-install/generated" +CHART_TEMPLATES_DIR="charts/substrate-crds/templates" + +TMP_DIR="$(mktemp -d)" +trap 'rm -rf "${TMP_DIR}"' EXIT + +mkdir -p "${TMP_DIR}/generated" "${TMP_DIR}/chart" +cp "${GENERATED_DIR}/"*.yaml "${TMP_DIR}/generated/" +cp "${CHART_TEMPLATES_DIR}/"*.yaml "${TMP_DIR}/chart/" + +# The generated CRDs start with a leading document separator after the +# boilerplate header. In chart templates that separator renders as a +# comment-only YAML document, so the chart copies intentionally omit it. +for file in "${TMP_DIR}/generated/"*.yaml; do + awk 'BEGIN { removed = 0 } /^---$/ && removed == 0 { removed = 1; next } { print }' "${file}" > "${file}.tmp" + mv "${file}.tmp" "${file}" +done + +if ! diff -ruN "${TMP_DIR}/generated" "${TMP_DIR}/chart" >/dev/null 2>&1; then + echo "charts/substrate-crds/templates is out of sync with ${GENERATED_DIR}" >&2 + echo "Copy updated CRDs into charts/substrate-crds/templates." >&2 + diff -ruN "${TMP_DIR}/generated" "${TMP_DIR}/chart" | head -80 >&2 || true + exit 1 +fi + +echo "charts/substrate-crds/templates matches generated CRDs." From 8b9905780465f683fcbed665bb64542406dae298 Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Tue, 9 Jun 2026 16:49:58 +0000 Subject: [PATCH 08/13] feat: make jwt helm installs standalone --- .github/workflows/release.yaml | 35 ++++++++- Makefile | 9 ++- charts/substrate/README.md | 49 ++++--------- charts/substrate/templates/NOTES.txt | 15 ++-- .../templates/ate-api-server-envvars.yaml | 27 +++++++ charts/substrate/templates/jwt-bootstrap.yaml | 73 +++++++++++++++++++ charts/substrate/values.yaml | 31 +++++--- hack/install-ate-kind-jwt.sh | 69 +----------------- hack/values-kind-jwt.yaml | 2 +- internal/localca/localca.go | 44 ++++++++++- internal/localca/localca_test.go | 51 +++++++++++++ .../localjwtauthority/localjwtauthority.go | 26 ++++++- .../localjwtauthority_test.go | 62 ++++++++++++++++ .../ate-install/ate-api-server-envvars.yaml | 28 +++++++ 14 files changed, 394 insertions(+), 127 deletions(-) create mode 100644 charts/substrate/templates/ate-api-server-envvars.yaml create mode 100644 charts/substrate/templates/jwt-bootstrap.yaml create mode 100644 internal/localjwtauthority/localjwtauthority_test.go create mode 100644 manifests/ate-install/ate-api-server-envvars.yaml diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index e87842781..2688d5e37 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -67,6 +67,9 @@ jobs: - name: Install ko uses: ko-build/setup-ko@v0.7 + - name: Install Helm + uses: azure/setup-helm@v4 + - name: Log in to GHCR uses: docker/login-action@v3 with: @@ -85,7 +88,7 @@ jobs: run: | set -o errexit -o nounset -o pipefail - for component in ateapi atelet ateom-gvisor podcertcontroller atenet; do + for component in ateapi atecontroller atelet ateom-gvisor podcertcontroller atenet; do KO_DOCKER_REPO="${IMAGE_REPOSITORY}/${component}" \ ./hack/run-tool.sh ko build \ --tags "${IMAGE_TAGS}" \ @@ -94,6 +97,36 @@ jobs: "./cmd/${component}" done + - name: Package and push Helm charts + if: inputs.create_release + env: + HELM_EXPERIMENTAL_OCI: "1" + CHART_REPOSITORY: oci://ghcr.io/kagent-dev/substrate/helm + run: | + set -o errexit -o nounset -o pipefail + + tag="${{ steps.tag.outputs.value }}" + chart_version="${tag#v}" + package_dir="${RUNNER_TEMP}/helm-packages" + mkdir -p "${package_dir}" + + echo "${{ secrets.GITHUB_TOKEN }}" \ + | helm registry login ghcr.io \ + --username "${{ github.actor }}" \ + --password-stdin + + helm package charts/substrate-crds \ + --destination "${package_dir}" \ + --version "${chart_version}" \ + --app-version "${tag}" + helm package charts/substrate \ + --destination "${package_dir}" \ + --version "${chart_version}" \ + --app-version "${tag}" + + helm push "${package_dir}/substrate-crds-${chart_version}.tgz" "${CHART_REPOSITORY}" + helm push "${package_dir}/substrate-${chart_version}.tgz" "${CHART_REPOSITORY}" + - name: Create GitHub Release if: inputs.create_release uses: softprops/action-gh-release@v2 diff --git a/Makefile b/Makefile index 3b1e95d76..770d426e7 100644 --- a/Makefile +++ b/Makefile @@ -44,10 +44,11 @@ build: build-images build-atectl .PHONY: build-images build-images: - $(KO) build --ldflags "$(LDFLAGS)" ./cmd/ateapi - $(KO) build --ldflags "$(LDFLAGS)" ./cmd/atelet - $(KO) build --ldflags "$(LDFLAGS)" ./cmd/podcertcontroller - $(KO) build --ldflags "$(LDFLAGS)" ./cmd/atenet + $(KO) build --base-import-paths --ldflags "$(LDFLAGS)" ./cmd/ateapi + $(KO) build --base-import-paths --ldflags "$(LDFLAGS)" ./cmd/atecontroller + $(KO) build --base-import-paths --ldflags "$(LDFLAGS)" ./cmd/atelet + $(KO) build --base-import-paths --ldflags "$(LDFLAGS)" ./cmd/podcertcontroller + $(KO) build --base-import-paths --ldflags "$(LDFLAGS)" ./cmd/atenet .PHONY: build-atectl build-atectl: diff --git a/charts/substrate/README.md b/charts/substrate/README.md index 132b171e0..142b8d808 100644 --- a/charts/substrate/README.md +++ b/charts/substrate/README.md @@ -7,7 +7,7 @@ Helm chart for installing Agent Substrate. | Mode | Default? | Cluster requirements | Trade-off | |------|----------|----------------------|-----------| | `mtls` | yes | feature gates `ClusterTrustBundle`, `ClusterTrustBundleProjection`, `PodCertificateRequest` + `certificates.k8s.io/v1beta1` API | Full in-cluster mTLS via the bundled `podcertcontroller`. | -| `jwt` | | none beyond stock K8s | Server certs come from a user-provided Secret; clients authenticate via projected ServiceAccount tokens. Valkey runs plaintext intra-cluster. | +| `jwt` | | none beyond stock K8s | Server certs and session signing pools are generated by the chart; clients authenticate via projected ServiceAccount tokens. Valkey runs plaintext intra-cluster. | ```bash # CRDs @@ -22,44 +22,22 @@ helm upgrade --install substrate ./charts/substrate \ --set auth.jwt.issuer=https://kubernetes.default.svc.cluster.local ``` -## JWT-mode prerequisites +By default, component images are pulled from `ghcr.io/kagent-dev/substrate` +using the chart `appVersion` as the tag. Override `image.registry` and +`image.tag` to install from a different image repository or tag. -You provide two resources out-of-band: +## JWT-mode bootstrap -1. `Secret/ateapi-tls` (type `kubernetes.io/tls`) in the release namespace. - This is the server cert for `ateapi` and the Envoy data-plane listener. -2. `ConfigMap/ateapi-ca` with key `ca.crt` in the release namespace. - This is the CA bundle clients use to verify the server. +JWT mode is standalone by default. The chart generates: -Bootstrap snippet using `openssl`: +- `Secret/ateapi-tls` +- `ConfigMap/ateapi-ca` +- `Secret/session-id-jwt-pool` +- `Secret/session-id-ca-pool` -```bash -NS=ate-system -kubectl create ns "$NS" --dry-run=client -o yaml | kubectl apply -f - - -# 1. Self-signed CA. -openssl req -x509 -newkey rsa:2048 -nodes -days 3650 \ - -subj "/CN=ateapi-ca" \ - -keyout ca.key -out ca.crt - -# 2. Server key + CSR + signed cert. -openssl req -newkey rsa:2048 -nodes \ - -subj "/CN=api.ate-system.svc" \ - -keyout server.key -out server.csr -cat > server.ext </dev/null 2>&1 \ - && run_kubectl get configmap -n "${NS}" ateapi-ca >/dev/null 2>&1; then - echo "Secret/ateapi-tls and ConfigMap/ateapi-ca already present — skipping." - return - fi - - local tmp - tmp=$(mktemp -d) - trap 'rm -rf "$tmp"' RETURN - - openssl req -x509 -newkey rsa:2048 -nodes -days 3650 \ - -subj "/CN=ateapi-ca" \ - -keyout "${tmp}/ca.key" -out "${tmp}/ca.crt" >/dev/null 2>&1 - - openssl req -newkey rsa:2048 -nodes \ - -subj "/CN=api.${NS}.svc" \ - -keyout "${tmp}/server.key" -out "${tmp}/server.csr" >/dev/null 2>&1 - - cat > "${tmp}/server.ext" </dev/null 2>&1 - - run_kubectl -n "${NS}" create secret tls ateapi-tls \ - --cert="${tmp}/server.crt" --key="${tmp}/server.key" \ - --dry-run=client -o yaml | run_kubectl apply -f - - - run_kubectl -n "${NS}" create configmap ateapi-ca \ - --from-file=ca.crt="${tmp}/ca.crt" \ - --dry-run=client -o yaml | run_kubectl apply -f - -} - apply_chart() { log_step "apply_chart (helm template | ko resolve | kubectl apply)" local rendered rendered=$(helm template substrate "${ROOT}/charts/substrate" \ --namespace "${NS}" \ -f "${ROOT}/hack/values-kind-jwt.yaml" \ + --set image.registry=ko://github.com/agent-substrate/substrate/cmd \ --set 'image.tag=') # ko resolve replaces ko:// refs with built+pushed image refs. @@ -162,25 +121,6 @@ apply_crds() { run_helm upgrade --install substrate-crds "${ROOT}/charts/substrate-crds" } -bootstrap_session_id_secrets() { - log_step "bootstrap_session_id_secrets" - run_kubectl get secret -n "${NS}" session-id-jwt-pool >/dev/null 2>&1 \ - || kubectl-ate admin make-jwt-pool --key-id="1" --name="session-id-jwt-pool" --secret-namespace="${NS}" - run_kubectl get secret -n "${NS}" session-id-ca-pool >/dev/null 2>&1 \ - || kubectl-ate admin make-ca-pool --ca-id="1" --name="session-id-ca-pool" --secret-namespace="${NS}" -} - -bootstrap_envvars_configmap() { - log_step "bootstrap_envvars_configmap" - run_kubectl get configmap -n "${NS}" ate-api-server-envvars >/dev/null 2>&1 && return - run_kubectl create configmap -n "${NS}" ate-api-server-envvars \ - --from-literal=ATE_API_REDIS_ADDRESS="valkey-cluster.${NS}.svc:6379" \ - --from-literal=ATE_API_REDIS_USE_IAM_AUTH="false" \ - --from-literal=ATE_API_REDIS_TLS_SERVER_NAME="" \ - --from-literal=ATE_API_REDIS_CLIENT_CERT="" \ - --dry-run=client -o yaml | run_kubectl apply -f - -} - apply_kind_extras() { log_step "apply_kind_extras (rustfs + otel-collector)" run_kubectl apply -f "${ROOT}/manifests/ate-install/kind/rustfs.yaml" @@ -199,9 +139,6 @@ wait_rollouts() { ensure_namespace ensure_kind_local_registry apply_crds -bootstrap_jwt_tls -bootstrap_session_id_secrets -bootstrap_envvars_configmap apply_chart apply_kind_extras wait_rollouts diff --git a/hack/values-kind-jwt.yaml b/hack/values-kind-jwt.yaml index a881ce8f5..00cc324b7 100644 --- a/hack/values-kind-jwt.yaml +++ b/hack/values-kind-jwt.yaml @@ -25,7 +25,7 @@ auth: serverCertSecret: ateapi-tls caBundleConfigMap: ateapi-ca -createNamespace: true +createNamespace: false # In-cluster OTel collector deployed alongside via manifests/ate-install/kind/otel-collector.yaml otel: diff --git a/internal/localca/localca.go b/internal/localca/localca.go index eb8370905..3078435ed 100644 --- a/internal/localca/localca.go +++ b/internal/localca/localca.go @@ -22,6 +22,7 @@ import ( "crypto/rand" "crypto/x509" "encoding/json" + "encoding/pem" "fmt" "time" ) @@ -43,7 +44,9 @@ type serializedPool struct { type serializedCA struct { ID string SigningKeyPKCS8 []byte + SigningKeyPEM string RootCertificateDER []byte + RootCertificatePEM string IntermediateCertificatesDER [][]byte } @@ -92,12 +95,12 @@ func Unmarshal(wireBytes []byte) (*Pool, error) { ID: wireCA.ID, } - ca.SigningKey, err = x509.ParsePKCS8PrivateKey(wireCA.SigningKeyPKCS8) + ca.SigningKey, err = parsePrivateKey(wireCA.SigningKeyPKCS8, wireCA.SigningKeyPEM) if err != nil { return nil, fmt.Errorf("while parsing signing key: %w", err) } - ca.RootCertificate, err = x509.ParseCertificate(wireCA.RootCertificateDER) + ca.RootCertificate, err = parseCertificate(wireCA.RootCertificateDER, wireCA.RootCertificatePEM) if err != nil { return nil, fmt.Errorf("while parsing root certificate: %w", err) } @@ -116,6 +119,43 @@ func Unmarshal(wireBytes []byte) (*Pool, error) { return pool, nil } +func parsePrivateKey(pkcs8 []byte, pemData string) (crypto.PrivateKey, error) { + if len(pkcs8) != 0 { + return x509.ParsePKCS8PrivateKey(pkcs8) + } + + block, _ := pem.Decode([]byte(pemData)) + if block == nil { + return nil, fmt.Errorf("missing PEM block") + } + + if key, err := x509.ParsePKCS8PrivateKey(block.Bytes); err == nil { + return key, nil + } + if key, err := x509.ParseECPrivateKey(block.Bytes); err == nil { + return key, nil + } + if key, err := x509.ParsePKCS1PrivateKey(block.Bytes); err == nil { + return key, nil + } + return nil, fmt.Errorf("unsupported private key PEM type %q", block.Type) +} + +func parseCertificate(der []byte, pemData string) (*x509.Certificate, error) { + if len(der) != 0 { + return x509.ParseCertificate(der) + } + + block, _ := pem.Decode([]byte(pemData)) + if block == nil { + return nil, fmt.Errorf("missing PEM block") + } + if block.Type != "CERTIFICATE" { + return nil, fmt.Errorf("unsupported certificate PEM type %q", block.Type) + } + return x509.ParseCertificate(block.Bytes) +} + func GenerateED25519CA(id string) (*CA, error) { rootPubKey, rootPrivKey, err := ed25519.GenerateKey(rand.Reader) if err != nil { diff --git a/internal/localca/localca_test.go b/internal/localca/localca_test.go index 9b5d6247f..c470a4d48 100644 --- a/internal/localca/localca_test.go +++ b/internal/localca/localca_test.go @@ -18,8 +18,12 @@ import ( "bytes" "crypto/ed25519" "crypto/rand" + "crypto/rsa" "crypto/x509" + "crypto/x509/pkix" "encoding/json" + "encoding/pem" + "math/big" "strings" "testing" "time" @@ -198,6 +202,53 @@ func TestMarshalUnmarshalWithIntermediates(t *testing.T) { } } +func TestUnmarshalPEMPool(t *testing.T) { + key, err := rsa.GenerateKey(rand.Reader, 2048) + if err != nil { + t.Fatalf("GenerateKey(): %v", err) + } + template := &x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{CommonName: "session-id-ca"}, + NotBefore: time.Now(), + NotAfter: time.Now().Add(24 * time.Hour), + IsCA: true, + BasicConstraintsValid: true, + KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign, + } + certDER, err := x509.CreateCertificate(rand.Reader, template, template, &key.PublicKey, key) + if err != nil { + t.Fatalf("CreateCertificate(): %v", err) + } + keyPEM := string(pem.EncodeToMemory(&pem.Block{Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(key)})) + certPEM := string(pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: certDER})) + + data, err := json.Marshal(&serializedPool{ + CAs: []*serializedCA{{ + ID: "1", + SigningKeyPEM: keyPEM, + RootCertificatePEM: certPEM, + }}, + }) + if err != nil { + t.Fatalf("Marshal(): %v", err) + } + + pool, err := Unmarshal(data) + if err != nil { + t.Fatalf("Unmarshal(): %v", err) + } + if len(pool.CAs) != 1 { + t.Fatalf("CAs length = %d, want 1", len(pool.CAs)) + } + if _, ok := pool.CAs[0].SigningKey.(*rsa.PrivateKey); !ok { + t.Fatalf("SigningKey type = %T, want *rsa.PrivateKey", pool.CAs[0].SigningKey) + } + if pool.CAs[0].RootCertificate.Subject.CommonName != "session-id-ca" { + t.Fatalf("RootCertificate CN = %q, want session-id-ca", pool.CAs[0].RootCertificate.Subject.CommonName) + } +} + func TestUnmarshalErrors(t *testing.T) { ca, err := GenerateED25519CA("err-test") if err != nil { diff --git a/internal/localjwtauthority/localjwtauthority.go b/internal/localjwtauthority/localjwtauthority.go index 62f647b55..97021fb2d 100644 --- a/internal/localjwtauthority/localjwtauthority.go +++ b/internal/localjwtauthority/localjwtauthority.go @@ -22,6 +22,7 @@ import ( "crypto/rand" "crypto/x509" "encoding/json" + "encoding/pem" "fmt" ) @@ -43,6 +44,7 @@ type serializedAuthority struct { ID string Algorithm string SigningKeyPKCS8 []byte + SigningKeyPEM string } // Marshal serializes a Pool to JSON. @@ -86,7 +88,7 @@ func Unmarshal(wireBytes []byte) (*Pool, error) { Algorithm: wireAuthority.Algorithm, } - signingKey, err := x509.ParsePKCS8PrivateKey(wireAuthority.SigningKeyPKCS8) + signingKey, err := parsePrivateKey(wireAuthority.SigningKeyPKCS8, wireAuthority.SigningKeyPEM) if err != nil { return nil, fmt.Errorf("while parsing signing key: %w", err) } @@ -98,6 +100,28 @@ func Unmarshal(wireBytes []byte) (*Pool, error) { return pool, nil } +func parsePrivateKey(pkcs8 []byte, pemData string) (crypto.PrivateKey, error) { + if len(pkcs8) != 0 { + return x509.ParsePKCS8PrivateKey(pkcs8) + } + + block, _ := pem.Decode([]byte(pemData)) + if block == nil { + return nil, fmt.Errorf("missing PEM block") + } + + if key, err := x509.ParsePKCS8PrivateKey(block.Bytes); err == nil { + return key, nil + } + if key, err := x509.ParseECPrivateKey(block.Bytes); err == nil { + return key, nil + } + if key, err := x509.ParsePKCS1PrivateKey(block.Bytes); err == nil { + return key, nil + } + return nil, fmt.Errorf("unsupported private key PEM type %q", block.Type) +} + // GenerateECDSAP256Authority generates an ECDSA P256 JWT signing key. func GenerateECDSAP256Authority(id string) (*Authority, error) { privKey, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) diff --git a/internal/localjwtauthority/localjwtauthority_test.go b/internal/localjwtauthority/localjwtauthority_test.go new file mode 100644 index 000000000..7f25a72ea --- /dev/null +++ b/internal/localjwtauthority/localjwtauthority_test.go @@ -0,0 +1,62 @@ +// Copyright 2026 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package localjwtauthority + +import ( + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/x509" + "encoding/json" + "encoding/pem" + "testing" +) + +func TestUnmarshalPEMSigningKey(t *testing.T) { + key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + t.Fatalf("GenerateKey(): %v", err) + } + keyDER, err := x509.MarshalECPrivateKey(key) + if err != nil { + t.Fatalf("MarshalECPrivateKey(): %v", err) + } + keyPEM := string(pem.EncodeToMemory(&pem.Block{Type: "EC PRIVATE KEY", Bytes: keyDER})) + + data, err := json.Marshal(&serializedPool{ + Authorities: []*serializedAuthority{{ + ID: "1", + Algorithm: "ES256", + SigningKeyPEM: keyPEM, + }}, + }) + if err != nil { + t.Fatalf("Marshal(): %v", err) + } + + pool, err := Unmarshal(data) + if err != nil { + t.Fatalf("Unmarshal(): %v", err) + } + if len(pool.Authorities) != 1 { + t.Fatalf("Authorities length = %d, want 1", len(pool.Authorities)) + } + if pool.Authorities[0].Algorithm != "ES256" { + t.Fatalf("Algorithm = %q, want ES256", pool.Authorities[0].Algorithm) + } + if _, ok := pool.Authorities[0].SigningKey.(*ecdsa.PrivateKey); !ok { + t.Fatalf("SigningKey type = %T, want *ecdsa.PrivateKey", pool.Authorities[0].SigningKey) + } +} diff --git a/manifests/ate-install/ate-api-server-envvars.yaml b/manifests/ate-install/ate-api-server-envvars.yaml new file mode 100644 index 000000000..f0df64a0f --- /dev/null +++ b/manifests/ate-install/ate-api-server-envvars.yaml @@ -0,0 +1,28 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. +# Run `make helm-template` to regenerate. + +apiVersion: v1 +kind: ConfigMap +metadata: + name: ate-api-server-envvars + namespace: ate-system +data: + ATE_API_REDIS_ADDRESS: "valkey-cluster.ate-system.svc:6379" + ATE_API_REDIS_USE_IAM_AUTH: "false" + ATE_API_REDIS_TLS_SERVER_NAME: "" + ATE_API_REDIS_CLIENT_CERT: "" + ATE_API_K8SJWT_ISSUER: "" From ddc796ac2ad416824c33e55fbf37f2bf00e8ced9 Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Tue, 9 Jun 2026 19:46:17 +0000 Subject: [PATCH 09/13] fix: make helm defaults cloud-neutral --- charts/substrate/README.md | 3 ++ .../substrate/templates/ate-api-server.yaml | 4 +- charts/substrate/templates/atelet.yaml | 4 +- charts/substrate/templates/atenet-router.yaml | 4 +- charts/substrate/values.yaml | 12 +++--- manifests/ate-install/ate-api-server.yaml | 2 - manifests/ate-install/atelet.yaml | 4 +- .../ate-install/atenet-router-monitoring.yaml | 38 ------------------- manifests/ate-install/atenet-router.yaml | 2 - 9 files changed, 19 insertions(+), 54 deletions(-) delete mode 100644 manifests/ate-install/atenet-router-monitoring.yaml diff --git a/charts/substrate/README.md b/charts/substrate/README.md index 142b8d808..4a3ccb37c 100644 --- a/charts/substrate/README.md +++ b/charts/substrate/README.md @@ -67,3 +67,6 @@ See `values.yaml` for the full set; the important keys: | `valkey.replicas` | `6` | StatefulSet size | | `redis.clusterAddress` | `""` (in-cluster) | Override to use external Redis | | `redis.useIAMAuth` | `false` | Google IAM auth | +| `atelet.gcpAuthForImagePulls` | `false` | Enable only when using GCP registry auth | +| `otel.endpoint` | `""` | Set to an OTLP endpoint to export traces/metrics | +| `monitoring.gkePodMonitoring.enabled` | `false` | Enable only on clusters with the GKE/GMP `PodMonitoring` CRD | diff --git a/charts/substrate/templates/ate-api-server.yaml b/charts/substrate/templates/ate-api-server.yaml index cbe952de5..fcf5ccc7e 100644 --- a/charts/substrate/templates/ate-api-server.yaml +++ b/charts/substrate/templates/ate-api-server.yaml @@ -125,8 +125,10 @@ spec: fieldPath: metadata.uid - name: OTEL_RESOURCE_ATTRIBUTES value: k8s.namespace.name=$(POD_NAMESPACE),k8s.pod.name=$(POD_NAME),k8s.pod.uid=$(POD_UID),service.instance.id=$(POD_UID) +{{- if .Values.otel.endpoint }} - name: OTEL_EXPORTER_OTLP_ENDPOINT - value: {{ .Values.otel.endpoint }} + value: {{ .Values.otel.endpoint | quote }} +{{- end }} envFrom: - configMapRef: name: {{ .Values.ateApiServerEnvVarsConfigMap }} diff --git a/charts/substrate/templates/atelet.yaml b/charts/substrate/templates/atelet.yaml index fef93f547..c2726fa36 100644 --- a/charts/substrate/templates/atelet.yaml +++ b/charts/substrate/templates/atelet.yaml @@ -78,8 +78,10 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName +{{- if .Values.otel.endpoint }} - name: OTEL_EXPORTER_OTLP_ENDPOINT - value: {{ .Values.otel.endpoint }} + value: {{ .Values.otel.endpoint | quote }} +{{- end }} - name: ATE_STORAGE_BACKEND value: {{ .Values.atelet.storageBackend | quote }} {{- with .Values.atelet.extraEnv }} diff --git a/charts/substrate/templates/atenet-router.yaml b/charts/substrate/templates/atenet-router.yaml index 687dcea47..9a4e79e4f 100644 --- a/charts/substrate/templates/atenet-router.yaml +++ b/charts/substrate/templates/atenet-router.yaml @@ -177,8 +177,10 @@ spec: fieldPath: metadata.uid - name: OTEL_RESOURCE_ATTRIBUTES value: k8s.namespace.name=$(POD_NAMESPACE),k8s.pod.name=$(POD_NAME),k8s.pod.uid=$(POD_UID),service.instance.id=$(POD_UID) +{{- if .Values.otel.endpoint }} - name: OTEL_EXPORTER_OTLP_ENDPOINT - value: {{ .Values.otel.endpoint }} + value: {{ .Values.otel.endpoint | quote }} +{{- end }} ports: - name: xds containerPort: 18000 diff --git a/charts/substrate/values.yaml b/charts/substrate/values.yaml index 05071afeb..48820cecf 100644 --- a/charts/substrate/values.yaml +++ b/charts/substrate/values.yaml @@ -75,11 +75,11 @@ valkey: replicas: 6 storageSize: 1Gi -# atelet daemonset overrides. Defaults match GKE; kind/dev installs override -# via hack/values-kind-jwt.yaml. extraArgs / extraEnv are appended verbatim -# for installer-specific knobs (e.g. AWS_* for rustfs/S3 storage). +# atelet daemonset overrides. Defaults avoid cloud-specific integrations. +# extraArgs / extraEnv are appended verbatim for installer-specific knobs +# (e.g. AWS_* for rustfs/S3 storage). atelet: - gcpAuthForImagePulls: true + gcpAuthForImagePulls: false storageBackend: gcs extraArgs: [] extraEnv: [] @@ -101,11 +101,11 @@ redis: ateApiServerEnvVarsConfigMap: ate-api-server-envvars otel: - endpoint: http://opentelemetry-collector.gke-managed-otel.svc.cluster.local:4317 + endpoint: "" monitoring: gkePodMonitoring: - enabled: true + enabled: false image: registry: ghcr.io/kagent-dev/substrate diff --git a/manifests/ate-install/ate-api-server.yaml b/manifests/ate-install/ate-api-server.yaml index 0955e8adc..46b5daf4a 100644 --- a/manifests/ate-install/ate-api-server.yaml +++ b/manifests/ate-install/ate-api-server.yaml @@ -116,8 +116,6 @@ spec: fieldPath: metadata.uid - name: OTEL_RESOURCE_ATTRIBUTES value: k8s.namespace.name=$(POD_NAMESPACE),k8s.pod.name=$(POD_NAME),k8s.pod.uid=$(POD_UID),service.instance.id=$(POD_UID) - - name: OTEL_EXPORTER_OTLP_ENDPOINT - value: http://opentelemetry-collector.gke-managed-otel.svc.cluster.local:4317 envFrom: - configMapRef: name: ate-api-server-envvars diff --git a/manifests/ate-install/atelet.yaml b/manifests/ate-install/atelet.yaml index e2c75a07e..e43ae9ab0 100644 --- a/manifests/ate-install/atelet.yaml +++ b/manifests/ate-install/atelet.yaml @@ -68,7 +68,7 @@ spec: - name: atelet image: ko://github.com/agent-substrate/substrate/cmd/atelet args: - - --gcp-auth-for-image-pulls=true + - --gcp-auth-for-image-pulls=false securityContext: privileged: true env: @@ -76,8 +76,6 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName - - name: OTEL_EXPORTER_OTLP_ENDPOINT - value: http://opentelemetry-collector.gke-managed-otel.svc.cluster.local:4317 - name: ATE_STORAGE_BACKEND value: "gcs" ports: diff --git a/manifests/ate-install/atenet-router-monitoring.yaml b/manifests/ate-install/atenet-router-monitoring.yaml deleted file mode 100644 index a52edc067..000000000 --- a/manifests/ate-install/atenet-router-monitoring.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# DO NOT EDIT — generated from charts/substrate by hack/render-manifests.sh. -# Run `make helm-template` to regenerate. - -# Scrape the Envoy sidecar's admin /stats/prometheus endpoint so its end-to-end -# request-latency histogram (envoy_http_downstream_rq_time, milliseconds) reaches -# Google Managed Prometheus. This is E2E *context* for the per-stage latency -# dashboard, not an SLI we own (the SLI is the OTLP atenet.router.route.duration -# histogram). Envoy only speaks Prometheus, so it needs an explicit scrape; the -# admin port (9901) is already exposed by the envoy container above. -apiVersion: monitoring.googleapis.com/v1 -kind: PodMonitoring -metadata: - name: atenet-router-envoy - namespace: ate-system - labels: - app: atenet-router -spec: - selector: - matchLabels: - app: atenet-router - endpoints: - - port: admin - path: /stats/prometheus - interval: 30s diff --git a/manifests/ate-install/atenet-router.yaml b/manifests/ate-install/atenet-router.yaml index 1eaffa2d9..b63acf208 100644 --- a/manifests/ate-install/atenet-router.yaml +++ b/manifests/ate-install/atenet-router.yaml @@ -174,8 +174,6 @@ spec: fieldPath: metadata.uid - name: OTEL_RESOURCE_ATTRIBUTES value: k8s.namespace.name=$(POD_NAMESPACE),k8s.pod.name=$(POD_NAME),k8s.pod.uid=$(POD_UID),service.instance.id=$(POD_UID) - - name: OTEL_EXPORTER_OTLP_ENDPOINT - value: http://opentelemetry-collector.gke-managed-otel.svc.cluster.local:4317 ports: - name: xds containerPort: 18000 From 79294aa57f049f3f1f97884742d6cf9e0aced7ae Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Tue, 9 Jun 2026 20:02:41 +0000 Subject: [PATCH 10/13] fix: sync crd chart templates --- .../templates/ate.dev_actortemplates.yaml | 46 +++++++++++++++---- .../templates/ate.dev_workerpools.yaml | 2 + 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/charts/substrate-crds/templates/ate.dev_actortemplates.yaml b/charts/substrate-crds/templates/ate.dev_actortemplates.yaml index 25c8fabf9..cdb07e788 100644 --- a/charts/substrate-crds/templates/ate.dev_actortemplates.yaml +++ b/charts/substrate-crds/templates/ate.dev_actortemplates.yaml @@ -63,6 +63,7 @@ spec: description: Entrypoint array. Not executed within a shell. items: type: string + maxItems: 64 type: array x-kubernetes-list-type: atomic env: @@ -75,12 +76,19 @@ spec: envFrom is not supported, and valueFrom currently supports only secretKeyRef. properties: name: - description: Name of the environment variable. Must be - a C_IDENTIFIER. + description: |- + Name is the name of the environment variable. May be any printable ASCII + character except '='. + minLength: 1 + pattern: ^[ -<>-~]+$ type: string value: - description: Variable value. Defaults to "". Mutually - exclusive with ValueFrom. + description: |- + Variable value. Mutually exclusive with ValueFrom. + Value is the literal value of the environment variable. Unlike in + Kubernetes pods, this value is not interpolated, and $(VAR) + references are not expanded. + minLength: 0 type: string valueFrom: description: |- @@ -96,11 +104,15 @@ spec: key: description: Key to select within the Secret. minLength: 1 + pattern: ^[-._a-zA-Z0-9]+$ type: string name: description: Name of the referent Secret. - minLength: 1 + maxLength: 253 type: string + x-kubernetes-validations: + - message: Name must be a valid DNS subdomain + rule: '!format.dns1123Subdomain().validate(self).hasValue()' optional: description: Specify whether the Secret or its key must be defined. @@ -114,8 +126,11 @@ spec: - name type: object x-kubernetes-validations: - - message: value and valueFrom are mutually exclusive - rule: '!(has(self.value) && has(self.valueFrom))' + - message: exactly one of the fields in [value valueFrom] + must be set + rule: '[has(self.value),has(self.valueFrom)].filter(x,x==true).size() + == 1' + maxItems: 32 type: array image: description: Image to use for the worker replicas. @@ -126,8 +141,13 @@ spec: rule: self.contains('@') name: description: Name of the container. + maxLength: 63 type: string + x-kubernetes-validations: + - message: Name must be a valid DNS label + rule: '!format.dns1123Label().validate(self).hasValue()' required: + - image - name type: object maxItems: 10 @@ -156,11 +176,13 @@ spec: The SHA256 hash of the binary to download. Used both to name the downloaded file (for preventing conflicts), and to check the integrity of the downloaded file. + pattern: ^[a-z0-9]+$ type: string url: - description: |- + description: | A gs:// URL pointing to a runsc binary that can be downloaded (possibly with atelet's credentials). + minLength: 1 type: string required: - sha256Hash @@ -174,11 +196,13 @@ spec: The SHA256 hash of the binary to download. Used both to name the downloaded file (for preventing conflicts), and to check the integrity of the downloaded file. + pattern: ^[a-z0-9]+$ type: string url: - description: |- + description: | A gs:// URL pointing to a runsc binary that can be downloaded (possibly with atelet's credentials). + minLength: 1 type: string required: - sha256Hash @@ -198,12 +222,14 @@ spec: properties: location: description: Location to store snapshots in. + minLength: 1 type: string required: - location type: object workerPoolRef: - description: Name of the worker pool to use for the actor. + description: | + Name of the worker pool to use for the actor. properties: apiVersion: description: API version of the referent. diff --git a/charts/substrate-crds/templates/ate.dev_workerpools.yaml b/charts/substrate-crds/templates/ate.dev_workerpools.yaml index 8fce0acbd..3e2387802 100644 --- a/charts/substrate-crds/templates/ate.dev_workerpools.yaml +++ b/charts/substrate-crds/templates/ate.dev_workerpools.yaml @@ -67,6 +67,7 @@ spec: ateomImage: description: AteomImage is the ateom container image to deploy as workers. + minLength: 1 type: string replicas: description: Replicas is the number of worker pods to run. @@ -83,6 +84,7 @@ spec: replicas: description: Replicas is the total number of worker pods. format: int32 + minimum: 0 type: integer type: object required: From e1388dc13e729d252cbe549754771b39e9fd445b Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Wed, 10 Jun 2026 14:29:14 +0000 Subject: [PATCH 11/13] fix: use agentgateway in helm chart --- charts/substrate/README.md | 1 - .../templates/atenet-router-monitoring.yaml | 39 ---- charts/substrate/templates/atenet-router.yaml | 170 ++++++++------- charts/substrate/values.yaml | 6 +- .../atenet-router-agentgateway.yaml | 197 ------------------ manifests/ate-install/atenet-router.yaml | 138 ++++++------ 6 files changed, 157 insertions(+), 394 deletions(-) delete mode 100644 charts/substrate/templates/atenet-router-monitoring.yaml delete mode 100644 manifests/ate-install/atenet-router-agentgateway.yaml diff --git a/charts/substrate/README.md b/charts/substrate/README.md index 4a3ccb37c..b515c26f1 100644 --- a/charts/substrate/README.md +++ b/charts/substrate/README.md @@ -69,4 +69,3 @@ See `values.yaml` for the full set; the important keys: | `redis.useIAMAuth` | `false` | Google IAM auth | | `atelet.gcpAuthForImagePulls` | `false` | Enable only when using GCP registry auth | | `otel.endpoint` | `""` | Set to an OTLP endpoint to export traces/metrics | -| `monitoring.gkePodMonitoring.enabled` | `false` | Enable only on clusters with the GKE/GMP `PodMonitoring` CRD | diff --git a/charts/substrate/templates/atenet-router-monitoring.yaml b/charts/substrate/templates/atenet-router-monitoring.yaml deleted file mode 100644 index e2c522213..000000000 --- a/charts/substrate/templates/atenet-router-monitoring.yaml +++ /dev/null @@ -1,39 +0,0 @@ -{{/* -Copyright 2026 Google LLC - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/}} - -{{- if .Values.monitoring.gkePodMonitoring.enabled }} -# Scrape the Envoy sidecar's admin /stats/prometheus endpoint so its end-to-end -# request-latency histogram (envoy_http_downstream_rq_time, milliseconds) reaches -# Google Managed Prometheus. This is E2E *context* for the per-stage latency -# dashboard, not an SLI we own (the SLI is the OTLP atenet.router.route.duration -# histogram). Envoy only speaks Prometheus, so it needs an explicit scrape; the -# admin port (9901) is already exposed by the envoy container above. -apiVersion: monitoring.googleapis.com/v1 -kind: PodMonitoring -metadata: - name: {{ include "substrate.fullname" (list "atenet-router-envoy" .) }} - namespace: {{ .Release.Namespace }} - labels: - app: atenet-router -spec: - selector: - matchLabels: - app: atenet-router - endpoints: - - port: admin - path: /stats/prometheus - interval: 30s -{{- end }} diff --git a/charts/substrate/templates/atenet-router.yaml b/charts/substrate/templates/atenet-router.yaml index 9a4e79e4f..18e34ee6c 100644 --- a/charts/substrate/templates/atenet-router.yaml +++ b/charts/substrate/templates/atenet-router.yaml @@ -52,54 +52,68 @@ roleRef: apiVersion: v1 kind: ConfigMap metadata: - name: {{ include "substrate.fullname" (list "atenet-router-envoy-config" .) }} + name: {{ include "substrate.fullname" (list "atenet-router-agentgateway-config" .) }} namespace: {{ .Release.Namespace }} data: - envoy.yaml: | - admin: - address: - socket_address: - address: 0.0.0.0 - port_value: 9901 - - node: - id: substrate-envoy-node - cluster: substrate-router-cluster - - dynamic_resources: - lds_config: - resource_api_version: V3 - ads: {} - cds_config: - resource_api_version: V3 - ads: {} - ads_config: - api_type: GRPC - transport_api_version: V3 - grpc_services: - - envoy_grpc: - cluster_name: xds_cluster - - static_resources: - clusters: - - name: xds_cluster - connect_timeout: 0.25s - type: STRICT_DNS - lb_policy: ROUND_ROBIN - typed_extension_protocol_options: - envoy.extensions.upstreams.http.v3.HttpProtocolOptions: - "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions - explicit_http_config: - http2_protocol_options: {} - load_assignment: - cluster_name: xds_cluster - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: 127.0.0.1 - port_value: 18000 + config.yaml: | + # yaml-language-server: $schema=https://agentgateway.dev/schema/config + config: + adminAddr: "127.0.0.1:15000" + readinessAddr: "0.0.0.0:15021" + statsAddr: "0.0.0.0:15020" + binds: + - port: 8080 + listeners: + - name: http + protocol: HTTP + routes: + - name: substrate-http + matches: + - path: + pathPrefix: / + policies: + extProc: + host: "127.0.0.1:50051" + failureMode: failClosed + processingOptions: + requestBodyMode: none + responseBodyMode: none + requestHeaderMode: send + responseHeaderMode: skip + requestTrailerMode: skip + responseTrailerMode: skip + backends: + - dynamic: {} + - port: 8443 + listeners: + - name: https + protocol: HTTPS + tls: +{{ if eq .Values.auth.mode "mtls" }} + cert: "/run/servicedns.podcert.ate.dev/credential-bundle.pem" + key: "/run/servicedns.podcert.ate.dev/credential-bundle.pem" +{{ else }} + cert: "/run/agentgateway-tls/tls.crt" + key: "/run/agentgateway-tls/tls.key" +{{ end }} + routes: + - name: substrate-https + matches: + - path: + pathPrefix: / + policies: + extProc: + host: "127.0.0.1:50051" + failureMode: failClosed + processingOptions: + requestBodyMode: none + responseBodyMode: none + requestHeaderMode: send + responseHeaderMode: skip + requestTrailerMode: skip + responseTrailerMode: skip + backends: + - dynamic: {} --- apiVersion: apps/v1 kind: Deployment @@ -122,30 +136,15 @@ spec: prometheus.io/port: "9090" spec: serviceAccountName: {{ include "substrate.fullname" (list "atenet-router" .) }} -{{- if eq .Values.auth.mode "jwt" }} - initContainers: - - name: assemble-envoy-cred-bundle - image: {{ .Values.images.busybox }} - command: - - sh - - -c - - cat /run/ateapi-tls-src/tls.crt /run/ateapi-tls-src/tls.key > /run/envoy-tls/credential-bundle.pem - volumeMounts: - - name: ateapi-tls-src - mountPath: /run/ateapi-tls-src - readOnly: true - - name: envoy-tls - mountPath: /run/envoy-tls -{{- end }} containers: - name: atenet-router image: {{ include "substrate.componentImage" (list "atenet" .) }} args: - "router" - "--standalone" + - "--networking-mode=agentgateway" - "--namespace={{ .Release.Namespace }}" - "--port-http=8080" - - "--port-xds=18000" - "--port-extproc=50051" - "--extproc-address=127.0.0.1" - "--ateapi-address={{ include "substrate.fullname" (list "api" .) }}.{{ .Release.Namespace }}.svc:443" @@ -157,11 +156,6 @@ spec: {{- end }} - "--status-port=4040" - "--port-https=8443" -{{- if eq .Values.auth.mode "mtls" }} - - "--envoy-cert-path=/run/servicedns.podcert.ate.dev/credential-bundle.pem" -{{- else }} - - "--envoy-cert-path=/run/envoy-tls/credential-bundle.pem" -{{- end }} env: - name: POD_NAME valueFrom: @@ -182,8 +176,6 @@ spec: value: {{ .Values.otel.endpoint | quote }} {{- end }} ports: - - name: xds - containerPort: 18000 - name: extproc containerPort: 50051 - name: status @@ -199,36 +191,40 @@ spec: mountPath: /var/run/secrets/tokens/ateapi readOnly: true {{- end }} - - name: envoy - image: {{ .Values.images.envoy }} - command: - - "/usr/local/bin/envoy" - - "-c" - - "/etc/envoy/envoy.yaml" - - "--component-log-level" - - "upstream:debug,router:debug,ext_proc:debug" + - name: agentgateway + image: {{ .Values.images.agentgateway }} + args: + - "-f" + - "/etc/agentgateway/config.yaml" ports: - name: http containerPort: 8080 - name: https containerPort: 8443 - - name: admin - containerPort: 9901 + - name: readiness + containerPort: 15021 + - name: gw-metrics + containerPort: 15020 volumeMounts: - - name: envoy-config - mountPath: /etc/envoy + - name: agentgateway-config + mountPath: /etc/agentgateway {{- if eq .Values.auth.mode "mtls" }} - name: "servicedns" mountPath: "/run/servicedns.podcert.ate.dev" {{- else }} - - name: envoy-tls - mountPath: /run/envoy-tls + - name: agentgateway-tls + mountPath: /run/agentgateway-tls readOnly: true {{- end }} + readinessProbe: + httpGet: + path: /healthz/ready + port: readiness + periodSeconds: 10 volumes: - - name: envoy-config + - name: agentgateway-config configMap: - name: {{ include "substrate.fullname" (list "atenet-router-envoy-config" .) }} + name: {{ include "substrate.fullname" (list "atenet-router-agentgateway-config" .) }} {{- if eq .Values.auth.mode "mtls" }} - name: "servicedns" projected: @@ -238,11 +234,9 @@ spec: keyType: ECDSAP256 credentialBundlePath: credential-bundle.pem {{- else }} - - name: ateapi-tls-src + - name: agentgateway-tls secret: secretName: {{ .Values.auth.jwt.serverCertSecret }} - - name: envoy-tls - emptyDir: {} - name: ateapi-ca configMap: name: {{ .Values.auth.jwt.caBundleConfigMap }} diff --git a/charts/substrate/values.yaml b/charts/substrate/values.yaml index 48820cecf..e34cc244d 100644 --- a/charts/substrate/values.yaml +++ b/charts/substrate/values.yaml @@ -103,16 +103,12 @@ ateApiServerEnvVarsConfigMap: ate-api-server-envvars otel: endpoint: "" -monitoring: - gkePodMonitoring: - enabled: false - image: registry: ghcr.io/kagent-dev/substrate tag: "" images: valkey: valkey/valkey:8.0 - envoy: envoyproxy/envoy:v1.30-latest + agentgateway: cr.agentgateway.dev/agentgateway:v1.3.0-alpha.1 coredns: coredns/coredns:1.11.1 busybox: busybox:1.36 diff --git a/manifests/ate-install/atenet-router-agentgateway.yaml b/manifests/ate-install/atenet-router-agentgateway.yaml deleted file mode 100644 index d39972906..000000000 --- a/manifests/ate-install/atenet-router-agentgateway.yaml +++ /dev/null @@ -1,197 +0,0 @@ -# Copyright 2026 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -apiVersion: v1 -kind: ServiceAccount -metadata: - name: atenet-router - namespace: ate-system - labels: - app: atenet-router ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: atenet-router -rules: -- apiGroups: - - "ate.dev" - resources: - - actortemplates - verbs: - - get - - watch - - list ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: atenet-router -subjects: -- kind: ServiceAccount - name: atenet-router - namespace: ate-system -roleRef: - kind: ClusterRole - name: atenet-router - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: atenet-router-agentgateway-config - namespace: ate-system -data: - config.yaml: | - binds: - - port: 8080 - listeners: - - name: http - protocol: HTTP - routes: - - name: substrate-http - matches: - - path: - pathPrefix: / - policies: - extProc: - host: "127.0.0.1:50051" - processingOptions: - requestBodyMode: none - responseBodyMode: none - requestHeaderMode: send - responseHeaderMode: skip - requestTrailerMode: skip - responseTrailerMode: skip - backends: - - dynamic: {} - - port: 8443 - listeners: - - name: https - protocol: HTTPS - tls: - cert: "/run/servicedns.podcert.ate.dev/cert.pem" - key: "/run/servicedns.podcert.ate.dev/key.pem" - routes: - - name: substrate-https - matches: - - path: - pathPrefix: / - policies: - extProc: - host: "127.0.0.1:50051" - processingOptions: - requestBodyMode: none - responseBodyMode: none - requestHeaderMode: send - responseHeaderMode: skip - requestTrailerMode: skip - responseTrailerMode: skip - backends: - - dynamic: {} ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: atenet-router - namespace: ate-system - labels: - app: atenet-router -spec: - replicas: 1 - selector: - matchLabels: - app: atenet-router - template: - metadata: - labels: - app: atenet-router - spec: - serviceAccountName: atenet-router - containers: - - name: atenet-router - image: ko://github.com/agent-substrate/substrate/cmd/atenet - args: - - "router" - - "--networking-mode=agentgateway" - - "--standalone" - - "--namespace=ate-system" - - "--port-http=8080" - - "--port-extproc=50051" - - "--extproc-address=127.0.0.1" - - "--ateapi-address=api.ate-system.svc:443" - - "--status-port=4040" - - "--port-https=8443" - - "--tls-cert-path=/run/servicedns.podcert.ate.dev/cert.pem" - - "--tls-key-path=/run/servicedns.podcert.ate.dev/key.pem" - ports: - - name: extproc - containerPort: 50051 - - name: status - containerPort: 4040 - - name: agentgateway - image: cr.agentgateway.dev/agentgateway:v1.3.0-alpha.1 - args: - - "-f" - - "/etc/agentgateway/config.yaml" - ports: - - name: http - containerPort: 8080 - - name: https - containerPort: 8443 - - name: readiness - containerPort: 15021 - - name: metrics - containerPort: 15020 - readinessProbe: - httpGet: - path: /healthz/ready - port: 15021 - periodSeconds: 10 - volumeMounts: - - name: agentgateway-config - mountPath: /etc/agentgateway - - name: "servicedns" - mountPath: "/run/servicedns.podcert.ate.dev" - volumes: - - name: agentgateway-config - configMap: - name: atenet-router-agentgateway-config - - name: "servicedns" - projected: - sources: - - podCertificate: - signerName: servicedns.podcert.ate.dev/identity - keyType: ECDSAP256 - certificateChainPath: cert.pem - keyPath: key.pem ---- -apiVersion: v1 -kind: Service -metadata: - name: atenet-router - namespace: ate-system -spec: - type: ClusterIP - selector: - app: atenet-router - ports: - - name: http - port: 80 - targetPort: 8080 - protocol: TCP - - name: https - port: 443 - targetPort: 8443 - protocol: TCP diff --git a/manifests/ate-install/atenet-router.yaml b/manifests/ate-install/atenet-router.yaml index b63acf208..bc21752e7 100644 --- a/manifests/ate-install/atenet-router.yaml +++ b/manifests/ate-install/atenet-router.yaml @@ -26,54 +26,65 @@ metadata: apiVersion: v1 kind: ConfigMap metadata: - name: atenet-router-envoy-config + name: atenet-router-agentgateway-config namespace: ate-system data: - envoy.yaml: | - admin: - address: - socket_address: - address: 0.0.0.0 - port_value: 9901 + config.yaml: | + # yaml-language-server: $schema=https://agentgateway.dev/schema/config + config: + adminAddr: "127.0.0.1:15000" + readinessAddr: "0.0.0.0:15021" + statsAddr: "0.0.0.0:15020" + binds: + - port: 8080 + listeners: + - name: http + protocol: HTTP + routes: + - name: substrate-http + matches: + - path: + pathPrefix: / + policies: + extProc: + host: "127.0.0.1:50051" + failureMode: failClosed + processingOptions: + requestBodyMode: none + responseBodyMode: none + requestHeaderMode: send + responseHeaderMode: skip + requestTrailerMode: skip + responseTrailerMode: skip + backends: + - dynamic: {} + - port: 8443 + listeners: + - name: https + protocol: HTTPS + tls: - node: - id: substrate-envoy-node - cluster: substrate-router-cluster + cert: "/run/servicedns.podcert.ate.dev/credential-bundle.pem" + key: "/run/servicedns.podcert.ate.dev/credential-bundle.pem" - dynamic_resources: - lds_config: - resource_api_version: V3 - ads: {} - cds_config: - resource_api_version: V3 - ads: {} - ads_config: - api_type: GRPC - transport_api_version: V3 - grpc_services: - - envoy_grpc: - cluster_name: xds_cluster - - static_resources: - clusters: - - name: xds_cluster - connect_timeout: 0.25s - type: STRICT_DNS - lb_policy: ROUND_ROBIN - typed_extension_protocol_options: - envoy.extensions.upstreams.http.v3.HttpProtocolOptions: - "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions - explicit_http_config: - http2_protocol_options: {} - load_assignment: - cluster_name: xds_cluster - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: 127.0.0.1 - port_value: 18000 + routes: + - name: substrate-https + matches: + - path: + pathPrefix: / + policies: + extProc: + host: "127.0.0.1:50051" + failureMode: failClosed + processingOptions: + requestBodyMode: none + responseBodyMode: none + requestHeaderMode: send + responseHeaderMode: skip + requestTrailerMode: skip + responseTrailerMode: skip + backends: + - dynamic: {} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -147,18 +158,15 @@ spec: image: ko://github.com/agent-substrate/substrate/cmd/atenet args: - "router" - - "--networking-mode=envoy" - "--standalone" + - "--networking-mode=agentgateway" - "--namespace=ate-system" - "--port-http=8080" - - "--port-xds=18000" - "--port-extproc=50051" - "--extproc-address=127.0.0.1" - "--ateapi-address=api.ate-system.svc:443" - "--status-port=4040" - "--port-https=8443" - - "--tls-cert-path=/run/servicedns.podcert.ate.dev/credential-bundle.pem" - - "--tls-key-path=/run/servicedns.podcert.ate.dev/credential-bundle.pem" env: - name: POD_NAME valueFrom: @@ -175,38 +183,40 @@ spec: - name: OTEL_RESOURCE_ATTRIBUTES value: k8s.namespace.name=$(POD_NAMESPACE),k8s.pod.name=$(POD_NAME),k8s.pod.uid=$(POD_UID),service.instance.id=$(POD_UID) ports: - - name: xds - containerPort: 18000 - name: extproc containerPort: 50051 - name: status containerPort: 4040 - name: metrics containerPort: 9090 - - name: envoy - image: envoyproxy/envoy:v1.30-latest - command: - - "/usr/local/bin/envoy" - - "-c" - - "/etc/envoy/envoy.yaml" - - "--component-log-level" - - "upstream:debug,router:debug,ext_proc:debug" + - name: agentgateway + image: cr.agentgateway.dev/agentgateway:v1.3.0-alpha.1 + args: + - "-f" + - "/etc/agentgateway/config.yaml" ports: - name: http containerPort: 8080 - name: https containerPort: 8443 - - name: admin - containerPort: 9901 + - name: readiness + containerPort: 15021 + - name: gw-metrics + containerPort: 15020 volumeMounts: - - name: envoy-config - mountPath: /etc/envoy + - name: agentgateway-config + mountPath: /etc/agentgateway - name: "servicedns" mountPath: "/run/servicedns.podcert.ate.dev" + readinessProbe: + httpGet: + path: /healthz/ready + port: readiness + periodSeconds: 10 volumes: - - name: envoy-config + - name: agentgateway-config configMap: - name: atenet-router-envoy-config + name: atenet-router-agentgateway-config - name: "servicedns" projected: sources: From 96145dbb8626d53204abfe82dd859147f793e440 Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Wed, 10 Jun 2026 15:20:27 +0000 Subject: [PATCH 12/13] fix: update agentgateway install overlays --- hack/install-ate.sh | 27 +++++++------------ .../base-agentgateway/kustomization.yaml | 2 +- .../kind-agentgateway/kustomization.yaml | 2 +- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/hack/install-ate.sh b/hack/install-ate.sh index 729a308d0..12140c130 100755 --- a/hack/install-ate.sh +++ b/hack/install-ate.sh @@ -62,7 +62,7 @@ function usage() { echo "Overall infrastructure (all infrastructure components):" echo "" echo " --deploy-ate-system Deploy core system (CRDs, atelet, apiserver)" - echo " --router=envoy|agentgateway Select atenet-router implementation (default: agentgateway)" + echo " --router=agentgateway Select atenet-router implementation (default: agentgateway)" echo " --delete-ate-system Delete core system" echo " --delete-all Delete core system and all registered demos" echo "" @@ -120,11 +120,11 @@ run_ko() { set_atenet_router() { case "$1" in - envoy|agentgateway) + agentgateway) ATE_INSTALL_ATENET_ROUTER="$1" ;; *) - echo "unsupported atenet router mode: $1" >&2 + echo "unsupported atenet router mode: $1 (only agentgateway is supported)" >&2 exit 1 ;; esac @@ -132,14 +132,11 @@ set_atenet_router() { atenet_router_manifest() { case "${ATE_INSTALL_ATENET_ROUTER}" in - envoy) - echo "manifests/ate-install/atenet-router.yaml" - ;; agentgateway) - echo "manifests/ate-install/atenet-router-agentgateway.yaml" + echo "manifests/ate-install/atenet-router.yaml" ;; *) - echo "unsupported atenet router mode: ${ATE_INSTALL_ATENET_ROUTER}" >&2 + echo "unsupported atenet router mode: ${ATE_INSTALL_ATENET_ROUTER} (only agentgateway is supported)" >&2 exit 1 ;; esac @@ -147,14 +144,11 @@ atenet_router_manifest() { ate_install_kustomize_base_dir() { case "${ATE_INSTALL_ATENET_ROUTER}" in - envoy) - echo "manifests/ate-install/base" - ;; agentgateway) - echo "manifests/ate-install/base-agentgateway" + echo "manifests/ate-install/base" ;; *) - echo "unsupported atenet router mode: ${ATE_INSTALL_ATENET_ROUTER}" >&2 + echo "unsupported atenet router mode: ${ATE_INSTALL_ATENET_ROUTER} (only agentgateway is supported)" >&2 exit 1 ;; esac @@ -162,14 +156,11 @@ ate_install_kustomize_base_dir() { ate_install_kustomize_dir() { case "${ATE_INSTALL_ATENET_ROUTER}" in - envoy) - echo "manifests/ate-install/kind" - ;; agentgateway) - echo "manifests/ate-install/kind-agentgateway" + echo "manifests/ate-install/kind" ;; *) - echo "unsupported atenet router mode: ${ATE_INSTALL_ATENET_ROUTER}" >&2 + echo "unsupported atenet router mode: ${ATE_INSTALL_ATENET_ROUTER} (only agentgateway is supported)" >&2 exit 1 ;; esac diff --git a/manifests/ate-install/base-agentgateway/kustomization.yaml b/manifests/ate-install/base-agentgateway/kustomization.yaml index d5883ba79..61cde5ace 100644 --- a/manifests/ate-install/base-agentgateway/kustomization.yaml +++ b/manifests/ate-install/base-agentgateway/kustomization.yaml @@ -20,6 +20,6 @@ resources: - ../ate-controller.yaml - ../atelet.yaml - ../atenet-dns.yaml - - ../atenet-router-agentgateway.yaml + - ../atenet-router.yaml - ../valkey.yaml - ../pod-certificate-controller.yaml diff --git a/manifests/ate-install/kind-agentgateway/kustomization.yaml b/manifests/ate-install/kind-agentgateway/kustomization.yaml index d9c9192b9..b3f849937 100644 --- a/manifests/ate-install/kind-agentgateway/kustomization.yaml +++ b/manifests/ate-install/kind-agentgateway/kustomization.yaml @@ -20,7 +20,7 @@ resources: - ../ate-controller.yaml - ../kind/atelet - ../atenet-dns.yaml - - ../atenet-router-agentgateway.yaml + - ../atenet-router.yaml - ../valkey.yaml - ../pod-certificate-controller.yaml - ../kind/rustfs.yaml From 5e9bf9b3b54475a7ba8f54bfa3153973a4fff661 Mon Sep 17 00:00:00 2001 From: Eitan Yarmush Date: Wed, 10 Jun 2026 15:33:53 +0000 Subject: [PATCH 13/13] fix: project agentgateway tls key separately --- charts/substrate/templates/atenet-router.yaml | 7 ++++--- manifests/ate-install/atenet-router.yaml | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/charts/substrate/templates/atenet-router.yaml b/charts/substrate/templates/atenet-router.yaml index 18e34ee6c..01536184a 100644 --- a/charts/substrate/templates/atenet-router.yaml +++ b/charts/substrate/templates/atenet-router.yaml @@ -90,8 +90,8 @@ data: protocol: HTTPS tls: {{ if eq .Values.auth.mode "mtls" }} - cert: "/run/servicedns.podcert.ate.dev/credential-bundle.pem" - key: "/run/servicedns.podcert.ate.dev/credential-bundle.pem" + cert: "/run/servicedns.podcert.ate.dev/cert.pem" + key: "/run/servicedns.podcert.ate.dev/key.pem" {{ else }} cert: "/run/agentgateway-tls/tls.crt" key: "/run/agentgateway-tls/tls.key" @@ -232,7 +232,8 @@ spec: - podCertificate: signerName: servicedns.podcert.ate.dev/identity keyType: ECDSAP256 - credentialBundlePath: credential-bundle.pem + certificateChainPath: cert.pem + keyPath: key.pem {{- else }} - name: agentgateway-tls secret: diff --git a/manifests/ate-install/atenet-router.yaml b/manifests/ate-install/atenet-router.yaml index bc21752e7..54eb77e05 100644 --- a/manifests/ate-install/atenet-router.yaml +++ b/manifests/ate-install/atenet-router.yaml @@ -64,8 +64,8 @@ data: protocol: HTTPS tls: - cert: "/run/servicedns.podcert.ate.dev/credential-bundle.pem" - key: "/run/servicedns.podcert.ate.dev/credential-bundle.pem" + cert: "/run/servicedns.podcert.ate.dev/cert.pem" + key: "/run/servicedns.podcert.ate.dev/key.pem" routes: - name: substrate-https @@ -223,4 +223,5 @@ spec: - podCertificate: signerName: servicedns.podcert.ate.dev/identity keyType: ECDSAP256 - credentialBundlePath: credential-bundle.pem + certificateChainPath: cert.pem + keyPath: key.pem