Skip to content

Commit 7c45eb0

Browse files
stuggiclaude
andcommitted
dataplane: Make MachineConfig CRD dependency optional
Problem: When deploying the openstack-operator on clusters where the OpenShift Machine Config Operator (MCO) is not installed, the openstackdataplanenodeset controller fails to start with the error: ERROR setup problem running manager {"error": "failed to wait for openstackdataplanenodeset caches to sync: timed out waiting for cache to be synced for Kind *v1.MachineConfig"} This occurs because the controller unconditionally sets up a watch for MachineConfig resources in SetupWithManager(). When the MachineConfig CRD doesn't exist (e.g., on non-OpenShift Kubernetes clusters or clusters without MCO), the informer cache sync times out and the controller fails to start. Similarly, the disconnected environment detection code (IsDisconnectedOCP) would fail if ImageContentSourcePolicy or ImageDigestMirrorSet CRDs don't exist. Solution: Implement conditional/dynamic CRD watching for MachineConfig: 1. Remove the MachineConfig watch from SetupWithManager() so the controller can start without the CRD being present 2. Add ensureMachineConfigWatch() function that: - Checks if the MachineConfig CRD exists by querying apiextensions.k8s.io/v1/CustomResourceDefinition - Dynamically adds the watch using Controller.Watch() if the CRD exists - Tracks watched resources to avoid duplicate watch registration - Logs an informational message if the CRD is not available 3. Call ensureMachineConfigWatch() at the start of each reconciliation to attempt setting up the watch when the CRD becomes available 4. Update IsDisconnectedOCP() to handle missing ICSP/IDMS CRDs gracefully instead of returning an error 5. Update inventory generation error handling to distinguish between: - IsNoMatchError (CRD not installed): Log warning and continue. This is expected on non-OpenShift clusters or clusters without MCO. - IsNotFound (CRD exists but resource missing): Return error. If MCO is installed and a disconnected environment is detected, the registry MachineConfig should exist. Missing resource indicates misconfiguration. This allows the operator to: - Start successfully even without the MachineConfig CRD - Work on non-OpenShift Kubernetes clusters - Gracefully degrade disconnected environment support - Automatically enable MachineConfig watching if the CRD becomes available later - Report actual misconfigurations when MCO is present but registry MachineConfig is missing The MachineConfig watch is used for disconnected/mirrored environments to detect changes to the 99-master-generated-registries MachineConfig and propagate registry configuration to dataplane nodes. This feature is now optional rather than required. Manual registry configuration without MCO: If the MachineConfig CRD is not available but you need to configure container registries on dataplane nodes, you can set the ansible variables directly in the OpenStackDataPlaneNodeSet spec: spec: nodeTemplate: ansible: ansibleVars: edpm_podman_disconnected_ocp: true edpm_podman_registries_conf: | unqualified-search-registries = ["registry.access.redhat.com"] [[registry]] prefix = "" location = "quay.io/openstack-k8s-operators" [[registry.mirror]] location = "my-registry.example.com/openstack-k8s-operators" Fixes: cache sync timeout when MachineConfig CRD is not installed Related: OSPRH-24026 Co-Authored-By: Claude <noreply@anthropic.com> Signed-off-by: Martin Schuppert <mschuppert@redhat.com>
1 parent 42a817d commit 7c45eb0

3 files changed

Lines changed: 145 additions & 20 deletions

File tree

internal/controller/dataplane/openstackdataplanenodeset_controller.go

Lines changed: 93 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,17 +29,22 @@ import (
2929
rbacv1 "k8s.io/api/rbac/v1"
3030
k8s_errors "k8s.io/apimachinery/pkg/api/errors"
3131
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
32+
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
3233
"k8s.io/apimachinery/pkg/fields"
3334
"k8s.io/apimachinery/pkg/runtime"
35+
"k8s.io/apimachinery/pkg/runtime/schema"
3436
"k8s.io/apimachinery/pkg/types"
3537
"k8s.io/client-go/kubernetes"
3638
ctrl "sigs.k8s.io/controller-runtime"
3739
"sigs.k8s.io/controller-runtime/pkg/builder"
40+
"sigs.k8s.io/controller-runtime/pkg/cache"
3841
"sigs.k8s.io/controller-runtime/pkg/client"
42+
"sigs.k8s.io/controller-runtime/pkg/controller"
3943
"sigs.k8s.io/controller-runtime/pkg/handler"
4044
"sigs.k8s.io/controller-runtime/pkg/log"
4145
"sigs.k8s.io/controller-runtime/pkg/predicate"
4246
"sigs.k8s.io/controller-runtime/pkg/reconcile"
47+
"sigs.k8s.io/controller-runtime/pkg/source"
4348

4449
"github.com/go-logr/logr"
4550
infranetworkv1 "github.com/openstack-k8s-operators/infra-operator/apis/network/v1beta1"
@@ -68,8 +73,11 @@ const (
6873
// OpenStackDataPlaneNodeSetReconciler reconciles a OpenStackDataPlaneNodeSet object
6974
type OpenStackDataPlaneNodeSetReconciler struct {
7075
client.Client
71-
Kclient kubernetes.Interface
72-
Scheme *runtime.Scheme
76+
Kclient kubernetes.Interface
77+
Scheme *runtime.Scheme
78+
Controller controller.Controller
79+
Cache cache.Cache
80+
Watching map[string]bool
7381
}
7482

7583
// GetLogger returns a logger object with a prefix of "controller.name" and additional controller context fields
@@ -141,6 +149,10 @@ func (r *OpenStackDataPlaneNodeSetReconciler) Reconcile(ctx context.Context, req
141149
Log := r.GetLogger(ctx)
142150
Log.Info("Reconciling NodeSet")
143151

152+
// Try to set up MachineConfig watch if not already done
153+
// This is done conditionally because MachineConfig CRD may not exist on all clusters
154+
r.ensureMachineConfigWatch(ctx)
155+
144156
validate := validator.New()
145157

146158
// Fetch the OpenStackDataPlaneNodeSet instance
@@ -669,7 +681,12 @@ func (r *OpenStackDataPlaneNodeSetReconciler) SetupWithManager(
669681
}); err != nil {
670682
return err
671683
}
672-
return ctrl.NewControllerManagedBy(mgr).
684+
// Initialize the Watching map for conditional CRD watches
685+
r.Watching = make(map[string]bool)
686+
r.Cache = mgr.GetCache()
687+
688+
// Build the controller without MachineConfig watch (added conditionally later)
689+
c, err := ctrl.NewControllerManagedBy(mgr).
673690
For(&dataplanev1.OpenStackDataPlaneNodeSet{},
674691
builder.WithPredicates(predicate.Or(
675692
predicate.GenerationChangedPredicate{},
@@ -692,10 +709,15 @@ func (r *OpenStackDataPlaneNodeSetReconciler) SetupWithManager(
692709
builder.WithPredicates(predicate.ResourceVersionChangedPredicate{})).
693710
Watches(&openstackv1.OpenStackVersion{},
694711
handler.EnqueueRequestsFromMapFunc(r.genericWatcherFn)).
695-
Watches(&machineconfig.MachineConfig{},
696-
handler.EnqueueRequestsFromMapFunc(r.machineConfigWatcherFn),
697-
builder.WithPredicates(predicate.ResourceVersionChangedPredicate{})).
698-
Complete(r)
712+
// NOTE: MachineConfig watch is added conditionally during reconciliation
713+
// to avoid failures when the MachineConfig CRD doesn't exist
714+
Build(r)
715+
716+
if err != nil {
717+
return err
718+
}
719+
r.Controller = c
720+
return nil
699721
}
700722

701723
// machineConfigWatcherFn - watches for changes to the registries MachineConfig resource and queues
@@ -734,6 +756,70 @@ func (r *OpenStackDataPlaneNodeSetReconciler) machineConfigWatcherFn(
734756
return requests
735757
}
736758

759+
// machineConfigWatcherFnTyped - typed version of machineConfigWatcherFn for use with source.Kind
760+
func (r *OpenStackDataPlaneNodeSetReconciler) machineConfigWatcherFnTyped(
761+
ctx context.Context, obj *machineconfig.MachineConfig,
762+
) []reconcile.Request {
763+
return r.machineConfigWatcherFn(ctx, obj)
764+
}
765+
766+
const machineConfigCRDName = "machineconfigs.machineconfiguration.openshift.io"
767+
768+
// ensureMachineConfigWatch attempts to set up a watch for MachineConfig resources.
769+
// This is done conditionally because the MachineConfig CRD may not exist on all clusters
770+
// (e.g., non-OpenShift Kubernetes clusters or clusters without the Machine Config Operator).
771+
// Returns true if the CRD is available (watch was set up or already exists), false otherwise.
772+
func (r *OpenStackDataPlaneNodeSetReconciler) ensureMachineConfigWatch(ctx context.Context) bool {
773+
Log := r.GetLogger(ctx)
774+
775+
// Check if we're already watching
776+
if r.Watching[machineConfigCRDName] {
777+
return true
778+
}
779+
780+
// Check if the MachineConfig CRD exists
781+
crd := &unstructured.Unstructured{}
782+
crd.SetGroupVersionKind(schema.GroupVersionKind{
783+
Group: "apiextensions.k8s.io",
784+
Kind: "CustomResourceDefinition",
785+
Version: "v1",
786+
})
787+
788+
err := r.Get(ctx, client.ObjectKey{Name: machineConfigCRDName}, crd)
789+
if err != nil {
790+
if k8s_errors.IsNotFound(err) {
791+
Log.Info("MachineConfig CRD not found, disconnected environment features disabled")
792+
} else {
793+
Log.Error(err, "Error checking for MachineConfig CRD")
794+
}
795+
return false
796+
}
797+
798+
// CRD exists, set up the watch
799+
Log.Info("MachineConfig CRD found, enabling watch for disconnected environment support")
800+
err = r.Controller.Watch(
801+
source.Kind(
802+
r.Cache,
803+
&machineconfig.MachineConfig{},
804+
handler.TypedEnqueueRequestsFromMapFunc(r.machineConfigWatcherFnTyped),
805+
predicate.TypedResourceVersionChangedPredicate[*machineconfig.MachineConfig]{},
806+
),
807+
)
808+
if err != nil {
809+
Log.Error(err, "Failed to set up MachineConfig watch")
810+
return false
811+
}
812+
813+
r.Watching[machineConfigCRDName] = true
814+
Log.Info("Successfully set up MachineConfig watch")
815+
return true
816+
}
817+
818+
// IsMachineConfigAvailable returns true if the MachineConfig CRD is available and being watched
819+
func (r *OpenStackDataPlaneNodeSetReconciler) IsMachineConfigAvailable() bool {
820+
return r.Watching[machineConfigCRDName]
821+
}
822+
737823
func (r *OpenStackDataPlaneNodeSetReconciler) secretWatcherFn(
738824
ctx context.Context, obj client.Object,
739825
) []reconcile.Request {

internal/dataplane/inventory.go

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -144,13 +144,28 @@ func GenerateNodeSetInventory(ctx context.Context, helper *helper.Helper,
144144
if isDisconnected {
145145
registryConfig, err := util.GetMCRegistryConf(ctx, helper)
146146
if err != nil {
147-
return "", err
148-
}
149-
helper.GetLogger().Info("disconnected registry was identified via the ImageContentSourcePolicy. Using OCP registry.")
147+
// CRD not installed (non-OpenShift or no MCO) - log warning and continue.
148+
// This allows graceful degradation when running on non-OpenShift clusters.
149+
// Users can manually configure registries.conf via ansibleVars.
150+
if util.IsNoMatchError(err) {
151+
helper.GetLogger().Info("Disconnected environment detected but MachineConfig CRD not available. "+
152+
"Registry configuration will not be propagated to dataplane nodes. "+
153+
"You may need to configure registries.conf manually using ansibleVars "+
154+
"(edpm_podman_disconnected_ocp and edpm_podman_registries_conf).",
155+
"error", err.Error())
156+
} else {
157+
// CRD exists but resource not found, or other errors (network issues,
158+
// permissions, etc.) - return the error. If MCO is installed but the
159+
// registry MachineConfig doesn't exist, this indicates a misconfiguration.
160+
return "", fmt.Errorf("failed to get MachineConfig registry configuration: %w", err)
161+
}
162+
} else {
163+
helper.GetLogger().Info("disconnected registry was identified via the ImageContentSourcePolicy. Using OCP registry.")
150164

151-
// Use OCP registries.conf for disconnected deployments
152-
nodeSetGroup.Vars["edpm_podman_registries_conf"] = registryConfig
153-
nodeSetGroup.Vars["edpm_podman_disconnected_ocp"] = isDisconnected
165+
// Use OCP registries.conf for disconnected deployments
166+
nodeSetGroup.Vars["edpm_podman_registries_conf"] = registryConfig
167+
nodeSetGroup.Vars["edpm_podman_disconnected_ocp"] = isDisconnected
168+
}
154169
}
155170

156171
// add TLS ansible variable

internal/dataplane/util/image_registry.go

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,27 +38,51 @@ type machineConfigIgnition struct {
3838

3939
// IsDisconnectedOCP - Will retrieve a CR's related to disconnected OCP deployments. If the list is not
4040
// empty, we can infer that the OCP cluster is a disconnected deployment.
41+
// Returns false without error if the CRDs don't exist (non-OpenShift cluster).
4142
func IsDisconnectedOCP(ctx context.Context, helper *helper.Helper) (bool, error) {
4243
icspList := ocpicsp.ImageContentSourcePolicyList{}
4344
idmsList := ocpidms.ImageDigestMirrorSetList{}
4445

4546
listOpts := []client.ListOption{}
4647

47-
var err error
48-
err = helper.GetClient().List(ctx, &icspList, listOpts...)
48+
var icspCount, idmsCount int
49+
50+
err := helper.GetClient().List(ctx, &icspList, listOpts...)
4951
if err != nil {
50-
return false, err
52+
// If the CRD doesn't exist, this is not an OpenShift cluster or ICSP is not available
53+
// This is not an error condition - just means we're not in a disconnected environment
54+
if IsNoMatchError(err) {
55+
helper.GetLogger().Info("ImageContentSourcePolicy CRD not available, assuming not a disconnected environment")
56+
} else {
57+
return false, err
58+
}
59+
} else {
60+
icspCount = len(icspList.Items)
5161
}
62+
5263
err = helper.GetClient().List(ctx, &idmsList, listOpts...)
5364
if err != nil {
54-
return false, err
65+
// If the CRD doesn't exist, this is not an OpenShift cluster or IDMS is not available
66+
if IsNoMatchError(err) {
67+
helper.GetLogger().Info("ImageDigestMirrorSet CRD not available, assuming not a disconnected environment")
68+
} else {
69+
return false, err
70+
}
71+
} else {
72+
idmsCount = len(idmsList.Items)
5573
}
5674

57-
if len(icspList.Items) != 0 || len(idmsList.Items) != 0 {
58-
return true, err
75+
if icspCount != 0 || idmsCount != 0 {
76+
return true, nil
5977
}
6078

61-
return false, err
79+
return false, nil
80+
}
81+
82+
// IsNoMatchError checks if the error indicates that a CRD/resource type doesn't exist
83+
func IsNoMatchError(err error) bool {
84+
// Check for "no matches for kind" type errors which indicate the CRD doesn't exist
85+
return strings.Contains(err.Error(), "no matches for kind")
6286
}
6387

6488
// GetMCRegistryConf - will unmarshal the MachineConfig ignition file the machineConfigIgnition object.

0 commit comments

Comments
 (0)