Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions chart/templates/deployment-operator-controller-manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ spec:
- --redirect-ingress-class={{ .Values.redirect.ingressClass }}
- --redirect-cluster-issuer={{ .Values.redirect.clusterIssuer.name }}
{{- end }}
{{- if .Values.redirect.blockedIPv6CIDRs }}
- --redirect-blocked-ipv6={{ join "," .Values.redirect.blockedIPv6CIDRs }}
{{- end }}
{{- if .Values.redirect.namespace }}
- --redirect-namespace={{ .Values.redirect.namespace }}
{{- end }}
Expand Down
1 change: 1 addition & 0 deletions chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ operatorApi:
redirect:
namespace: "deco-redirect-system"
ingressClass: "" # set to enable DecoRedirect controller (e.g. "redirect-nginx")
blockedIPv6CIDRs: [] # IPv6 CIDRs that block cert issuance when present in AAAA records (e.g. ["2600:1901::/32"])
clusterIssuer:
enabled: false # set true to create the Let's Encrypt ClusterIssuer
name: "" # ClusterIssuer name (e.g. "letsencrypt")
Expand Down
27 changes: 23 additions & 4 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"crypto/tls"
"flag"
"fmt"
"net"
"os"
"path/filepath"
"strings"
Expand Down Expand Up @@ -135,6 +136,10 @@ func main() {
flag.StringVar(&redirectClusterIssuer, "redirect-cluster-issuer",
getEnvOrDefault("REDIRECT_CLUSTER_ISSUER", "letsencrypt"),
"cert-manager ClusterIssuer name (matches redirect.clusterIssuer.name in values).")
var redirectBlockedIPv6 string
flag.StringVar(&redirectBlockedIPv6, "redirect-blocked-ipv6",
getEnvOrDefault("REDIRECT_BLOCKED_IPV6", ""),
"Comma-separated IPv6 CIDRs that block cert issuance when present in a domain's AAAA records (e.g. 2600:1901::/32).")
var controllersFlag string
flag.StringVar(&controllersFlag, "controllers", "*",
"Comma-separated list of controllers to enable. Use \"*\" to enable all. Valid values: "+
Expand Down Expand Up @@ -371,11 +376,25 @@ func main() {
}

if enabled(controller.DecoRedirectControllerName) {
var blockedIPv6CIDRs []*net.IPNet
for _, cidr := range strings.Split(redirectBlockedIPv6, ",") {
cidr = strings.TrimSpace(cidr)
if cidr == "" {
continue
}
_, ipNet, cidrErr := net.ParseCIDR(cidr)
if cidrErr != nil {
setupLog.Error(cidrErr, "invalid CIDR in --redirect-blocked-ipv6", "cidr", cidr)
os.Exit(1)
}
blockedIPv6CIDRs = append(blockedIPv6CIDRs, ipNet)
}
if err = (&controller.DecoRedirectReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
IngressClass: redirectIngressClass,
ClusterIssuer: redirectClusterIssuer,
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
IngressClass: redirectIngressClass,
ClusterIssuer: redirectClusterIssuer,
BlockedIPv6CIDRs: blockedIPv6CIDRs,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "DecoRedirect")
os.Exit(1)
Expand Down
3 changes: 3 additions & 0 deletions hack/helm-generator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,9 @@ func addRedirectControllerArgs(templatesDir string) error {
- --redirect-ingress-class={{ .Values.redirect.ingressClass }}
- --redirect-cluster-issuer={{ .Values.redirect.clusterIssuer.name }}
{{- end }}
{{- if .Values.redirect.blockedIPv6CIDRs }}
- --redirect-blocked-ipv6={{ join "," .Values.redirect.blockedIPv6CIDRs }}
{{- end }}
{{- if .Values.redirect.namespace }}
- --redirect-namespace={{ .Values.redirect.namespace }}
{{- end }}`
Expand Down
110 changes: 110 additions & 0 deletions internal/controller/decoredirect_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"context"
"crypto/sha256"
"fmt"
"net"
"net/http"
"strconv"
"strings"
"time"
Expand All @@ -30,6 +32,14 @@ type DecoRedirectReconciler struct {
Scheme *runtime.Scheme
IngressClass string // nginx ingress class name, e.g. "nginx"
ClusterIssuer string // cert-manager ClusterIssuer name, e.g. "letsencrypt"
// BlockedIPv6CIDRs is a list of IPv6 CIDR ranges that, if present in a domain's
// AAAA records, indicate DNS is not ready for cert issuance. Typically legacy
// infrastructure addresses that intercept Let's Encrypt validation incorrectly.
// When empty, no AAAA check is performed.
BlockedIPv6CIDRs []*net.IPNet
// DNSReadyFunc checks if the domain DNS is correctly pointing to the redirect infrastructure.
// Defaults to isDNSReady. Injectable for testing.
DNSReadyFunc func(ctx context.Context, domain string) bool
}

// dummyBackendName satisfies the k8s Ingress API requirement for a backend on every path.
Expand All @@ -51,6 +61,15 @@ func (r *DecoRedirectReconciler) Reconcile(ctx context.Context, req ctrl.Request
return ctrl.Result{}, client.IgnoreNotFound(err)
}

// Auto-heal: if Certificate is stuck in Failed backoff and DNS is now correct, delete it
// so reconcileCertificate recreates it fresh and cert-manager retries without backoff.
if healed, err := r.maybeHealCertificate(ctx, rd); err != nil {
log.Error(err, "failed to heal Certificate")
return ctrl.Result{}, err
} else if healed {
return ctrl.Result{RequeueAfter: 2 * time.Second}, nil
}

if err := r.reconcileCertificate(ctx, rd); err != nil {
log.Error(err, "failed to reconcile Certificate")
return ctrl.Result{}, err
Expand Down Expand Up @@ -86,6 +105,10 @@ func (r *DecoRedirectReconciler) reconcileCertificate(ctx context.Context, rd *d
}

_, err := controllerutil.CreateOrUpdate(ctx, r.Client, cert, func() error {
// Skip mutation while the object is being deleted — the Watch will re-trigger once gone.
if cert.DeletionTimestamp != nil {
return nil
}
cert.Spec.SecretName = tlsSecretName(rd.Spec.From)
cert.Spec.DNSNames = []string{rd.Spec.From}
cert.Spec.IssuerRef = cmmeta.ObjectReference{
Expand Down Expand Up @@ -191,6 +214,93 @@ func (r *DecoRedirectReconciler) updateStatus(ctx context.Context, rd *decosites
return certReady, r.Status().Patch(ctx, patch, client.MergeFrom(rd))
}

// maybeHealCertificate deletes a Certificate that is stuck in Failed backoff when DNS
// is already pointing correctly to the Deco redirect infrastructure. Returning true
// means the Certificate was deleted and the caller should requeue before recreating it.
func (r *DecoRedirectReconciler) maybeHealCertificate(ctx context.Context, rd *decositesv1alpha1.DecoRedirect) (bool, error) {
log := logf.FromContext(ctx)

cert := &cmv1.Certificate{}
if err := r.Get(ctx, types.NamespacedName{Name: resourceName(rd.Spec.From), Namespace: rd.Namespace}, cert); err != nil {
return false, client.IgnoreNotFound(err)
}

// Skip if already being deleted or not in the Failed backoff state.
if cert.DeletionTimestamp != nil || !isCertFailed(cert) {
return false, nil
}

dnsReady := r.DNSReadyFunc
if dnsReady == nil {
dnsReady = r.isDNSReady
}
if !dnsReady(ctx, rd.Spec.From) {
log.Info("certificate in Failed backoff but DNS not ready yet", "domain", rd.Spec.From)
return false, nil
}

log.Info("certificate in Failed backoff and DNS is ready — deleting to trigger retry", "domain", rd.Spec.From)
if err := r.Delete(ctx, cert); err != nil {
return false, client.IgnoreNotFound(err)
}
return true, nil
}

// isCertFailed reports whether the Certificate is stuck in cert-manager's exponential
// backoff after a failed issuance attempt (Issuing=False, Reason=Failed).
func isCertFailed(cert *cmv1.Certificate) bool {
for _, c := range cert.Status.Conditions {
if c.Type == cmv1.CertificateConditionIssuing {
return c.Status == cmmeta.ConditionFalse && c.Reason == "Failed"
}
}
return false
}

// isDNSReady checks that the domain is correctly pointing to the redirect infrastructure:
// 1. An HTTP request returns a redirect served by the nginx (X-Redirect-By: deco header).
// 2. No AAAA record falls within any BlockedIPv6CIDRs range, which would cause
// Let's Encrypt's IPv6 validation to reach the wrong server and fail the challenge.
func (r *DecoRedirectReconciler) isDNSReady(ctx context.Context, domain string) bool {
httpClient := &http.Client{
CheckRedirect: func(*http.Request, []*http.Request) error { return http.ErrUseLastResponse },
Timeout: 5 * time.Second,
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://"+domain+"/", nil)
if err != nil {
return false
}
resp, err := httpClient.Do(req)
if err != nil {
return false
}
_ = resp.Body.Close()
if resp.Header.Get("X-Redirect-By") != "deco" {
return false
}

if len(r.BlockedIPv6CIDRs) == 0 {
return true
}

addrs, err := net.DefaultResolver.LookupIPAddr(ctx, domain)
if err != nil {
return false
}
for _, a := range addrs {
ip := a.IP
if ip.To4() != nil {
continue
}
for _, blocked := range r.BlockedIPv6CIDRs {
if blocked.Contains(ip) {
return false
}
}
}
return true
}

// resourceName returns a deterministic k8s-safe name for a domain, capped at 253 chars.
// "client.com" → "redirect-client-com"
func resourceName(domain string) string {
Expand Down
157 changes: 157 additions & 0 deletions internal/controller/decoredirect_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,4 +184,161 @@ var _ = Describe("DecoRedirect Controller", func() {
Expect(ing.Annotations["nginx.ingress.kubernetes.io/permanent-redirect-code"]).To(Equal("301"))
})
})

Context("Auto-healing: maybeHealCertificate", func() {
const healNS = "default"
ctx := context.Background()

newReconciler := func(dnsReady bool) *DecoRedirectReconciler {
return &DecoRedirectReconciler{
Client: k8sClient,
Scheme: k8sClient.Scheme(),
IngressClass: "nginx",
ClusterIssuer: "letsencrypt",
DNSReadyFunc: func(_ context.Context, _ string) bool { return dnsReady },
}
}

// Each test uses a unique name to avoid state sharing between tests.
setup := func(suffix string) (nn, certNN types.NamespacedName, cleanup func()) {
name := "heal-" + suffix
domain := name + ".com"
nn = types.NamespacedName{Name: name + "-com", Namespace: healNS}
certNN = types.NamespacedName{Name: "redirect-" + name + "-com", Namespace: healNS}

rd := &decositesv1alpha1.DecoRedirect{
ObjectMeta: metav1.ObjectMeta{Name: name + "-com", Namespace: healNS},
Spec: decositesv1alpha1.DecoRedirectSpec{
From: domain,
To: "https://www." + domain,
},
}
Expect(k8sClient.Create(ctx, rd)).To(Succeed())

cleanup = func() {
r := &decositesv1alpha1.DecoRedirect{}
if err := k8sClient.Get(ctx, nn, r); err == nil {
_ = k8sClient.Delete(ctx, r)
}
c := &cmv1.Certificate{}
if err := k8sClient.Get(ctx, certNN, c); err == nil {
_ = k8sClient.Delete(ctx, c)
}
}
return nn, certNN, cleanup
}

patchCertFailed := func(certNN types.NamespacedName) {
cert := &cmv1.Certificate{}
Expect(k8sClient.Get(ctx, certNN, cert)).To(Succeed())
patch := cert.DeepCopy()
patch.Status.Conditions = []cmv1.CertificateCondition{
{Type: cmv1.CertificateConditionReady, Status: "False", Reason: "DoesNotExist", Message: "secret not found", LastTransitionTime: &[]metav1.Time{metav1.Now()}[0]},
{Type: cmv1.CertificateConditionIssuing, Status: "False", Reason: "Failed", Message: "cert request failed", LastTransitionTime: &[]metav1.Time{metav1.Now()}[0]},
}
Expect(k8sClient.Status().Patch(ctx, patch, client.MergeFrom(cert))).To(Succeed())
}

It("should delete the Certificate when it is in Failed backoff and DNS is ready", func() {
nn, certNN, cleanup := setup("delete")
DeferCleanup(cleanup)

_, err := newReconciler(true).Reconcile(ctx, reconcile.Request{NamespacedName: nn})
Expect(err).NotTo(HaveOccurred())

patchCertFailed(certNN)

rd := &decositesv1alpha1.DecoRedirect{}
Expect(k8sClient.Get(ctx, nn, rd)).To(Succeed())

healed, err := newReconciler(true).maybeHealCertificate(ctx, rd)
Expect(err).NotTo(HaveOccurred())
Expect(healed).To(BeTrue())

cert := &cmv1.Certificate{}
Expect(k8sClient.Get(ctx, certNN, cert)).To(MatchError(ContainSubstring("not found")))
})

It("should NOT delete the Certificate when DNS is not ready", func() {
nn, certNN, cleanup := setup("dns-wrong")
DeferCleanup(cleanup)

_, err := newReconciler(false).Reconcile(ctx, reconcile.Request{NamespacedName: nn})
Expect(err).NotTo(HaveOccurred())

patchCertFailed(certNN)

rd := &decositesv1alpha1.DecoRedirect{}
Expect(k8sClient.Get(ctx, nn, rd)).To(Succeed())

healed, err := newReconciler(false).maybeHealCertificate(ctx, rd)
Expect(err).NotTo(HaveOccurred())
Expect(healed).To(BeFalse())

cert := &cmv1.Certificate{}
Expect(k8sClient.Get(ctx, certNN, cert)).To(Succeed())
})

It("should NOT delete the Certificate when it is Issuing (actively trying)", func() {
nn, certNN, cleanup := setup("issuing")
DeferCleanup(cleanup)

_, err := newReconciler(true).Reconcile(ctx, reconcile.Request{NamespacedName: nn})
Expect(err).NotTo(HaveOccurred())

cert := &cmv1.Certificate{}
Expect(k8sClient.Get(ctx, certNN, cert)).To(Succeed())
patch := cert.DeepCopy()
patch.Status.Conditions = []cmv1.CertificateCondition{
{Type: cmv1.CertificateConditionIssuing, Status: "True", Reason: "Issuing", LastTransitionTime: &[]metav1.Time{metav1.Now()}[0]},
}
Expect(k8sClient.Status().Patch(ctx, patch, client.MergeFrom(cert))).To(Succeed())

rd := &decositesv1alpha1.DecoRedirect{}
Expect(k8sClient.Get(ctx, nn, rd)).To(Succeed())

healed, err := newReconciler(true).maybeHealCertificate(ctx, rd)
Expect(err).NotTo(HaveOccurred())
Expect(healed).To(BeFalse())

Expect(k8sClient.Get(ctx, certNN, cert)).To(Succeed())
})

It("should NOT delete the Certificate when it is Ready", func() {
nn, certNN, cleanup := setup("ready")
DeferCleanup(cleanup)

_, err := newReconciler(true).Reconcile(ctx, reconcile.Request{NamespacedName: nn})
Expect(err).NotTo(HaveOccurred())

cert := &cmv1.Certificate{}
Expect(k8sClient.Get(ctx, certNN, cert)).To(Succeed())
patch := cert.DeepCopy()
patch.Status.Conditions = []cmv1.CertificateCondition{
{Type: cmv1.CertificateConditionReady, Status: "True", Reason: "Ready", LastTransitionTime: &[]metav1.Time{metav1.Now()}[0]},
}
Expect(k8sClient.Status().Patch(ctx, patch, client.MergeFrom(cert))).To(Succeed())

rd := &decositesv1alpha1.DecoRedirect{}
Expect(k8sClient.Get(ctx, nn, rd)).To(Succeed())

healed, err := newReconciler(true).maybeHealCertificate(ctx, rd)
Expect(err).NotTo(HaveOccurred())
Expect(healed).To(BeFalse())

Expect(k8sClient.Get(ctx, certNN, cert)).To(Succeed())
})

It("should do nothing when the Certificate does not exist yet", func() {
nn, _, cleanup := setup("no-cert")
DeferCleanup(cleanup)

rd := &decositesv1alpha1.DecoRedirect{}
Expect(k8sClient.Get(ctx, nn, rd)).To(Succeed())

healed, err := newReconciler(true).maybeHealCertificate(ctx, rd)
Expect(err).NotTo(HaveOccurred())
Expect(healed).To(BeFalse())
})
})
})
Loading