From bf4fb36a3c3f8ada1deb98c92fa099702ca1d215 Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Wed, 17 Jun 2026 16:32:28 +0200 Subject: [PATCH 1/8] domain resolver --- .../reservations/commitments/config.go | 7 + .../commitments/domain_resolver.go | 65 ++++++ .../commitments/domain_resolver_test.go | 181 +++++++++++++++ .../commitments/reservation_controller.go | 56 ++++- .../reservation_controller_test.go | 212 ++++++++++++++++++ 5 files changed, 516 insertions(+), 5 deletions(-) create mode 100644 internal/scheduling/reservations/commitments/domain_resolver.go create mode 100644 internal/scheduling/reservations/commitments/domain_resolver_test.go diff --git a/internal/scheduling/reservations/commitments/config.go b/internal/scheduling/reservations/commitments/config.go index 8e18c2f9f..ffe316d2a 100644 --- a/internal/scheduling/reservations/commitments/config.go +++ b/internal/scheduling/reservations/commitments/config.go @@ -8,6 +8,7 @@ import ( "time" "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -67,6 +68,12 @@ type ReservationControllerConfig struct { PipelineDefault string `json:"pipelineDefault"` // FlavorGroupPipelines maps flavor group IDs to pipeline names; "*" acts as catch-all. FlavorGroupPipelines map[string]string `json:"flavorGroupPipelines,omitempty"` + // KeystoneSecretRef references a Kubernetes Secret that holds OpenStack credentials + // used to resolve domain IDs to domain names for the domain_name scheduler hint. + // The secret must contain the same keys as the syncer's keystoneSecretRef. + // When empty, domain name resolution is skipped and filter_external_customer will + // not enforce domain restrictions for CR reservations. + KeystoneSecretRef corev1.SecretReference `json:"keystoneSecretRef,omitempty"` } // CommittedResourceControllerConfig holds tuning knobs for the CommittedResource CRD controller. diff --git a/internal/scheduling/reservations/commitments/domain_resolver.go b/internal/scheduling/reservations/commitments/domain_resolver.go new file mode 100644 index 000000000..2d5fc4eab --- /dev/null +++ b/internal/scheduling/reservations/commitments/domain_resolver.go @@ -0,0 +1,65 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package commitments + +import ( + "context" + "fmt" + "sync" + + "github.com/gophercloud/gophercloud/v2" + "github.com/gophercloud/gophercloud/v2/openstack/identity/v3/domains" +) + +// DomainResolver resolves OpenStack domain IDs to their human-readable names. +// Implementations must be safe for concurrent use. +type DomainResolver interface { + // ResolveDomainName returns the name of the domain with the given ID. + // Returns an error if the domain cannot be found or the lookup fails. + ResolveDomainName(ctx context.Context, domainID string) (string, error) +} + +// keystoneDomainResolver resolves domain IDs via the Keystone identity API. +// Names are cached indefinitely — domain names are immutable for the lifetime +// of an OpenStack deployment, and the controller is a long-lived process. +type keystoneDomainResolver struct { + sc *gophercloud.ServiceClient + mu sync.RWMutex + cache map[string]string // domainID → name +} + +// newKeystoneDomainResolver creates a resolver backed by the given Keystone service client. +// The caller is responsible for authenticating the provider before passing sc here. +func newKeystoneDomainResolver(sc *gophercloud.ServiceClient) *keystoneDomainResolver { + return &keystoneDomainResolver{ + sc: sc, + cache: make(map[string]string), + } +} + +// ResolveDomainName returns the domain name for domainID, fetching it from Keystone on +// first access and serving subsequent calls from the in-process cache. +func (r *keystoneDomainResolver) ResolveDomainName(ctx context.Context, domainID string) (string, error) { + r.mu.RLock() + if name, ok := r.cache[domainID]; ok { + r.mu.RUnlock() + return name, nil + } + r.mu.RUnlock() + + // Upgrade to write-lock. Re-check after acquiring the write-lock to avoid a + // redundant Keystone call when two goroutines race on the same uncached ID. + r.mu.Lock() + defer r.mu.Unlock() + if name, ok := r.cache[domainID]; ok { + return name, nil + } + + domain, err := domains.Get(ctx, r.sc, domainID).Extract() + if err != nil { + return "", fmt.Errorf("keystone: failed to resolve domain %q: %w", domainID, err) + } + r.cache[domainID] = domain.Name + return domain.Name, nil +} diff --git a/internal/scheduling/reservations/commitments/domain_resolver_test.go b/internal/scheduling/reservations/commitments/domain_resolver_test.go new file mode 100644 index 000000000..69009fbcb --- /dev/null +++ b/internal/scheduling/reservations/commitments/domain_resolver_test.go @@ -0,0 +1,181 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package commitments + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/http/httptest" + "sync" + "sync/atomic" + "testing" + + "github.com/gophercloud/gophercloud/v2" +) + +// newDomainResolverTestServer creates an httptest.Server that serves Keystone +// GET /domains/{id} responses. The provided map controls what is returned for +// each domain ID. Unknown IDs get a 404. +func newDomainResolverTestServer(t *testing.T, domainsByID map[string]string) (*httptest.Server, *atomic.Int32) { + t.Helper() + var callCount atomic.Int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Keystone endpoint is registered as the base URL, so gophercloud appends + // the path "/domains/" directly. + // Path format: /domains/ + const prefix = "/domains/" + if len(r.URL.Path) <= len(prefix) { + http.Error(w, "bad path", http.StatusBadRequest) + return + } + id := r.URL.Path[len(prefix):] + callCount.Add(1) + name, ok := domainsByID[id] + if !ok { + http.Error(w, fmt.Sprintf("domain %q not found", id), http.StatusNotFound) + return + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{ + "domain": map[string]any{"id": id, "name": name}, + }) + })) + return server, &callCount +} + +// newTestServiceClient builds a gophercloud.ServiceClient whose Endpoint points +// at the given test server URL (with trailing slash as gophercloud expects). +func newTestServiceClient(serverURL string) *gophercloud.ServiceClient { + provider := &gophercloud.ProviderClient{} + return &gophercloud.ServiceClient{ + ProviderClient: provider, + Endpoint: serverURL + "/", + Type: "identity", + } +} + +func TestKeystoneDomainResolver_ResolveDomainName(t *testing.T) { + server, callCount := newDomainResolverTestServer(t, map[string]string{ + "domain-abc": "monsoon3", + "domain-xyz": "cc3test", + }) + defer server.Close() + + resolver := newKeystoneDomainResolver(newTestServiceClient(server.URL)) + + name, err := resolver.ResolveDomainName(context.Background(), "domain-abc") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if name != "monsoon3" { + t.Errorf("expected %q, got %q", "monsoon3", name) + } + if callCount.Load() != 1 { + t.Errorf("expected 1 Keystone call, got %d", callCount.Load()) + } +} + +func TestKeystoneDomainResolver_CacheHit(t *testing.T) { + server, callCount := newDomainResolverTestServer(t, map[string]string{ + "domain-abc": "monsoon3", + }) + defer server.Close() + + resolver := newKeystoneDomainResolver(newTestServiceClient(server.URL)) + + for i := range 5 { + name, err := resolver.ResolveDomainName(context.Background(), "domain-abc") + if err != nil { + t.Fatalf("call %d: unexpected error: %v", i, err) + } + if name != "monsoon3" { + t.Errorf("call %d: expected %q, got %q", i, "monsoon3", name) + } + } + + // Only one HTTP request should have been made regardless of call count. + if callCount.Load() != 1 { + t.Errorf("expected exactly 1 Keystone call due to caching, got %d", callCount.Load()) + } +} + +func TestKeystoneDomainResolver_MultipleDomains(t *testing.T) { + server, _ := newDomainResolverTestServer(t, map[string]string{ + "domain-a": "alpha", + "domain-b": "beta", + }) + defer server.Close() + + resolver := newKeystoneDomainResolver(newTestServiceClient(server.URL)) + + for _, tc := range []struct{ id, want string }{ + {"domain-a", "alpha"}, + {"domain-b", "beta"}, + } { + name, err := resolver.ResolveDomainName(context.Background(), tc.id) + if err != nil { + t.Errorf("domain %q: unexpected error: %v", tc.id, err) + continue + } + if name != tc.want { + t.Errorf("domain %q: expected %q, got %q", tc.id, tc.want, name) + } + } +} + +func TestKeystoneDomainResolver_NotFound(t *testing.T) { + server, _ := newDomainResolverTestServer(t, map[string]string{}) + defer server.Close() + + resolver := newKeystoneDomainResolver(newTestServiceClient(server.URL)) + + _, err := resolver.ResolveDomainName(context.Background(), "nonexistent") + if err == nil { + t.Fatal("expected an error for unknown domain, got nil") + } +} + +func TestKeystoneDomainResolver_ConcurrentAccess(t *testing.T) { + server, callCount := newDomainResolverTestServer(t, map[string]string{ + "domain-concurrent": "shared", + }) + defer server.Close() + + resolver := newKeystoneDomainResolver(newTestServiceClient(server.URL)) + + const goroutines = 20 + errs := make(chan error, goroutines) + var wg sync.WaitGroup + for range goroutines { + wg.Add(1) + go func() { + defer wg.Done() + name, err := resolver.ResolveDomainName(context.Background(), "domain-concurrent") + if err != nil { + errs <- err + return + } + if name != "shared" { + errs <- fmt.Errorf("expected %q, got %q", "shared", name) + } + }() + } + wg.Wait() + close(errs) + + for err := range errs { + t.Errorf("concurrent call error: %v", err) + } + + // Due to the double-checked-lock pattern, at most a small number of calls + // may race before the cache is warm. The important property is correctness: + // all goroutines must have received the right answer. + // We allow up to goroutines calls in the absolute worst case (no caching), + // but the real invariant is just that no data races occurred (enforced by -race). + if n := callCount.Load(); n > int32(goroutines) { + t.Errorf("unexpectedly high Keystone call count: %d", n) + } +} diff --git a/internal/scheduling/reservations/commitments/reservation_controller.go b/internal/scheduling/reservations/commitments/reservation_controller.go index 387e463f5..cd03622c9 100644 --- a/internal/scheduling/reservations/commitments/reservation_controller.go +++ b/internal/scheduling/reservations/commitments/reservation_controller.go @@ -26,9 +26,11 @@ import ( "github.com/cobaltcore-dev/cortex/api/scheduling" "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations" + "github.com/cobaltcore-dev/cortex/pkg/keystone" "github.com/cobaltcore-dev/cortex/pkg/multicluster" hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" "github.com/go-logr/logr" + "github.com/gophercloud/gophercloud/v2" ) // CommitmentReservationController reconciles commitment Reservation objects @@ -41,6 +43,10 @@ type CommitmentReservationController struct { Conf ReservationControllerConfig // SchedulerClient for making scheduler API calls. SchedulerClient *reservations.SchedulerClient + // DomainResolver resolves OpenStack domain IDs to domain names so the + // domain_name scheduler hint can be populated for filter_external_customer. + // Nil when KeystoneSecretRef is not configured; hint is omitted in that case. + DomainResolver DomainResolver } // echoParentGeneration copies Spec.CommittedResourceReservation.ParentGeneration to @@ -213,6 +219,25 @@ func (r *CommitmentReservationController) Reconcile(ctx context.Context, req ctr availabilityZone = res.Spec.AvailabilityZone } + // Resolve domain name for the domain_name scheduler hint consumed by + // filter_external_customer to enforce host restrictions for external customers. + // Resolution is best-effort: if the DomainResolver is not configured (no + // KeystoneSecretRef) or the lookup fails, we log and proceed without the hint. + // filter_external_customer already handles a missing hint by skipping the filter, + // so omitting it degrades gracefully rather than blocking scheduling. + schedulerHints := map[string]any{ + "_nova_check_type": string(schedulerdelegationapi.ReserveForCommittedResourceIntent), + } + if r.DomainResolver != nil && res.Spec.CommittedResourceReservation != nil && res.Spec.CommittedResourceReservation.DomainID != "" { + domainName, err := r.DomainResolver.ResolveDomainName(ctx, res.Spec.CommittedResourceReservation.DomainID) + if err != nil { + logger.Error(err, "failed to resolve domain name for scheduler hint, proceeding without it", + "domainID", res.Spec.CommittedResourceReservation.DomainID) + } else { + schedulerHints["domain_name"] = domainName + } + } + // Get flavor details from flavor group knowledge CRD knowledge := &reservations.FlavorGroupKnowledgeClient{Client: r.Client} flavorGroups, err := knowledge.GetAllFlavorGroups(ctx, nil) @@ -281,11 +306,7 @@ func (r *CommitmentReservationController) Reconcile(ctx context.Context, req ctr EligibleHosts: eligibleHosts, Pipeline: pipelineName, AvailabilityZone: availabilityZone, - // Set hint to indicate this is a CR reservation scheduling request. - // This prevents other CR reservations from being unlocked during capacity filtering. - SchedulerHints: map[string]any{ - "_nova_check_type": string(schedulerdelegationapi.ReserveForCommittedResourceIntent), - }, + SchedulerHints: schedulerHints, } scheduleOpts := scheduling.Options{ ReadOnly: false, // mutates state (reservation placement) @@ -602,6 +623,31 @@ func (r *CommitmentReservationController) hypervisorToReservations(ctx context.C func (r *CommitmentReservationController) Init(ctx context.Context, conf ReservationControllerConfig) error { r.SchedulerClient = reservations.NewSchedulerClient(conf.SchedulerURL) logf.FromContext(ctx).Info("scheduler client initialized for commitment reservation controller", "url", conf.SchedulerURL) + + if conf.KeystoneSecretRef.Name != "" { + keystoneClient, err := keystone.Connector{Client: r.Client}.FromSecretRef(ctx, conf.KeystoneSecretRef) + if err != nil { + return fmt.Errorf("failed to initialize keystone client for domain resolver: %w", err) + } + provider := keystoneClient.Client() + identityURL, err := provider.EndpointLocator(gophercloud.EndpointOpts{ + Type: "identity", + Availability: gophercloud.Availability(keystoneClient.Availability()), + }) + if err != nil { + return fmt.Errorf("failed to locate keystone identity endpoint for domain resolver: %w", err) + } + sc := &gophercloud.ServiceClient{ + ProviderClient: provider, + Endpoint: identityURL, + Type: "identity", + } + r.DomainResolver = newKeystoneDomainResolver(sc) + logf.FromContext(ctx).Info("domain resolver initialized for commitment reservation controller") + } else { + logf.FromContext(ctx).Info("keystoneSecretRef not configured — domain_name scheduler hint will not be set for CR reservations") + } + return nil } diff --git a/internal/scheduling/reservations/commitments/reservation_controller_test.go b/internal/scheduling/reservations/commitments/reservation_controller_test.go index 6ef9b0c66..07a145a39 100644 --- a/internal/scheduling/reservations/commitments/reservation_controller_test.go +++ b/internal/scheduling/reservations/commitments/reservation_controller_test.go @@ -6,6 +6,7 @@ package commitments import ( "context" "encoding/json" + "errors" "net/http" "net/http/httptest" "testing" @@ -21,8 +22,27 @@ import ( schedulerdelegationapi "github.com/cobaltcore-dev/cortex/api/external/nova" "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations" ) +// mockDomainResolver is a test double for DomainResolver. +type mockDomainResolver struct { + // names maps domainID → name returned on success. + names map[string]string + // err is returned for all calls when non-nil. + err error + // calls records every domainID passed to ResolveDomainName. + calls []string +} + +func (m *mockDomainResolver) ResolveDomainName(_ context.Context, domainID string) (string, error) { + m.calls = append(m.calls, domainID) + if m.err != nil { + return "", m.err + } + return m.names[domainID], nil +} + func TestCommitmentReservationController_Reconcile(t *testing.T) { scheme := newCRTestScheme(t) @@ -846,3 +866,195 @@ func TestCommitmentReservationController_reconcileInstanceReservation_Success(t t.Errorf("Expected host %v, got %v", "test-host-1", updated.Status.Host) } } + +// ============================================================================ +// Test: domain_name scheduler hint +// ============================================================================ + +// newTestSchedulerServer creates an httptest server that captures the decoded +// ExternalSchedulerRequest and returns a single host. The caller can inspect +// the captured request after the Reconcile call. +func newTestSchedulerServer(t *testing.T, captured *schedulerdelegationapi.ExternalSchedulerRequest) *httptest.Server { + t.Helper() + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if err := json.NewDecoder(r.Body).Decode(captured); err != nil { + t.Errorf("failed to decode scheduler request: %v", err) + w.WriteHeader(http.StatusBadRequest) + return + } + resp := schedulerdelegationapi.ExternalSchedulerResponse{Hosts: []string{"test-host-1"}} + if err := json.NewEncoder(w).Encode(resp); err != nil { + t.Fatalf("failed to write scheduler response: %v", err) + } + })) +} + +// TestCommitmentReservationController_DomainNameHint_Present verifies that when +// DomainResolver is configured and the reservation carries a DomainID, the +// domain_name scheduler hint is forwarded to the scheduling pipeline so that +// filter_external_customer can enforce domain-based host restrictions. +func TestCommitmentReservationController_DomainNameHint_Present(t *testing.T) { + scheme := newCRTestScheme(t) + + reservation := &v1alpha1.Reservation{ + ObjectMeta: ctrl.ObjectMeta{Name: "test-reservation"}, + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + ProjectID: "test-project", + DomainID: "domain-uuid-1", + ResourceName: "test-flavor", + }, + Resources: map[hv1.ResourceName]resource.Quantity{ + hv1.ResourceMemory: resource.MustParse("4Gi"), + hv1.ResourceCPU: resource.MustParse("2"), + }, + }, + } + hypervisor := &hv1.Hypervisor{ObjectMeta: metav1.ObjectMeta{Name: "test-host-1"}} + k8sClient := newCRTestClient(scheme, reservation, newTestFlavorKnowledge(), hypervisor) + + var captured schedulerdelegationapi.ExternalSchedulerRequest + server := newTestSchedulerServer(t, &captured) + defer server.Close() + + resolver := &mockDomainResolver{names: map[string]string{"domain-uuid-1": "monsoon3"}} + conf := ReservationControllerConfig{SchedulerURL: server.URL} + reconciler := &CommitmentReservationController{ + Client: k8sClient, + Scheme: scheme, + Conf: conf, + SchedulerClient: reservations.NewSchedulerClient(server.URL), + DomainResolver: resolver, + } + + req := ctrl.Request{NamespacedName: types.NamespacedName{Name: reservation.Name}} + if _, err := reconciler.Reconcile(context.Background(), req); err != nil { + t.Fatalf("Reconcile() error = %v", err) + } + + // Verify the resolver was called with the correct domain ID. + if len(resolver.calls) != 1 || resolver.calls[0] != "domain-uuid-1" { + t.Errorf("expected ResolveDomainName called with %q, got calls=%v", "domain-uuid-1", resolver.calls) + } + + // Verify domain_name hint was forwarded to the scheduler. + gotDomain, err := captured.Spec.Data.GetSchedulerHintStr("domain_name") + if err != nil { + t.Fatalf("domain_name scheduler hint missing: %v", err) + } + if gotDomain != "monsoon3" { + t.Errorf("expected domain_name hint %q, got %q", "monsoon3", gotDomain) + } + + // Verify _nova_check_type hint is still present. + gotIntent, err := captured.Spec.Data.GetSchedulerHintStr("_nova_check_type") + if err != nil || gotIntent != string(schedulerdelegationapi.ReserveForCommittedResourceIntent) { + t.Errorf("expected _nova_check_type hint %q, got %q (err=%v)", schedulerdelegationapi.ReserveForCommittedResourceIntent, gotIntent, err) + } +} + +// TestCommitmentReservationController_DomainNameHint_ResolverError verifies that +// when domain name resolution fails the controller does not return an error and +// proceeds with scheduling — omitting only the domain_name hint. This ensures +// that a transient Keystone outage never blocks CR reservation placement. +func TestCommitmentReservationController_DomainNameHint_ResolverError(t *testing.T) { + scheme := newCRTestScheme(t) + + reservation := &v1alpha1.Reservation{ + ObjectMeta: ctrl.ObjectMeta{Name: "test-reservation"}, + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + ProjectID: "test-project", + DomainID: "domain-uuid-1", + ResourceName: "test-flavor", + }, + Resources: map[hv1.ResourceName]resource.Quantity{ + hv1.ResourceMemory: resource.MustParse("4Gi"), + hv1.ResourceCPU: resource.MustParse("2"), + }, + }, + } + hypervisor := &hv1.Hypervisor{ObjectMeta: metav1.ObjectMeta{Name: "test-host-1"}} + k8sClient := newCRTestClient(scheme, reservation, newTestFlavorKnowledge(), hypervisor) + + var captured schedulerdelegationapi.ExternalSchedulerRequest + server := newTestSchedulerServer(t, &captured) + defer server.Close() + + resolver := &mockDomainResolver{err: errors.New("keystone unavailable")} + conf := ReservationControllerConfig{SchedulerURL: server.URL} + reconciler := &CommitmentReservationController{ + Client: k8sClient, + Scheme: scheme, + Conf: conf, + SchedulerClient: reservations.NewSchedulerClient(server.URL), + DomainResolver: resolver, + } + + req := ctrl.Request{NamespacedName: types.NamespacedName{Name: reservation.Name}} + // Must not return an error — resolution failure is non-fatal. + if _, err := reconciler.Reconcile(context.Background(), req); err != nil { + t.Fatalf("Reconcile() must not fail on resolver error, got: %v", err) + } + + // domain_name hint must be absent when resolution failed. + if _, err := captured.Spec.Data.GetSchedulerHintStr("domain_name"); err == nil { + t.Error("expected domain_name hint to be absent when resolver fails, but it was present") + } + + // _nova_check_type must still be present — unrelated to domain resolution. + gotIntent, err := captured.Spec.Data.GetSchedulerHintStr("_nova_check_type") + if err != nil || gotIntent != string(schedulerdelegationapi.ReserveForCommittedResourceIntent) { + t.Errorf("expected _nova_check_type hint %q, got %q (err=%v)", schedulerdelegationapi.ReserveForCommittedResourceIntent, gotIntent, err) + } +} + +// TestCommitmentReservationController_DomainNameHint_NilResolver verifies that +// when no DomainResolver is configured (KeystoneSecretRef absent), the controller +// schedules normally without setting the domain_name hint. +func TestCommitmentReservationController_DomainNameHint_NilResolver(t *testing.T) { + scheme := newCRTestScheme(t) + + reservation := &v1alpha1.Reservation{ + ObjectMeta: ctrl.ObjectMeta{Name: "test-reservation"}, + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + ProjectID: "test-project", + DomainID: "domain-uuid-1", + ResourceName: "test-flavor", + }, + Resources: map[hv1.ResourceName]resource.Quantity{ + hv1.ResourceMemory: resource.MustParse("4Gi"), + hv1.ResourceCPU: resource.MustParse("2"), + }, + }, + } + hypervisor := &hv1.Hypervisor{ObjectMeta: metav1.ObjectMeta{Name: "test-host-1"}} + k8sClient := newCRTestClient(scheme, reservation, newTestFlavorKnowledge(), hypervisor) + + var captured schedulerdelegationapi.ExternalSchedulerRequest + server := newTestSchedulerServer(t, &captured) + defer server.Close() + + conf := ReservationControllerConfig{SchedulerURL: server.URL} + reconciler := &CommitmentReservationController{ + Client: k8sClient, + Scheme: scheme, + Conf: conf, + SchedulerClient: reservations.NewSchedulerClient(server.URL), + DomainResolver: nil, // not configured + } + + req := ctrl.Request{NamespacedName: types.NamespacedName{Name: reservation.Name}} + if _, err := reconciler.Reconcile(context.Background(), req); err != nil { + t.Fatalf("Reconcile() error = %v", err) + } + + // domain_name hint must be absent. + if _, err := captured.Spec.Data.GetSchedulerHintStr("domain_name"); err == nil { + t.Error("expected domain_name hint to be absent when DomainResolver is nil, but it was present") + } +} From 1d7af233edad084d351cbe69431b664713228068 Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Thu, 18 Jun 2026 09:52:22 +0200 Subject: [PATCH 2/8] add e2e checks --- cmd/manager/main.go | 6 ++ internal/scheduling/nova/e2e_checks.go | 94 +++++++++++++++++++++++++- 2 files changed, 99 insertions(+), 1 deletion(-) diff --git a/cmd/manager/main.go b/cmd/manager/main.go index d58be2a29..f9ca0b81f 100644 --- a/cmd/manager/main.go +++ b/cmd/manager/main.go @@ -115,6 +115,12 @@ func main() { switch os.Args[1] { case "e2e-nova": novaChecksConfig := conf.GetConfigOrDie[nova.ChecksConfig]() + if len(os.Args) >= 3 { + if err := json.Unmarshal([]byte(os.Args[2]), &novaChecksConfig); err != nil { + slog.Error("invalid json override for e2e-nova", "err", err) + os.Exit(1) + } + } nova.RunChecks(ctx, client, novaChecksConfig) return case "e2e-cinder": diff --git a/internal/scheduling/nova/e2e_checks.go b/internal/scheduling/nova/e2e_checks.go index ad65a57b5..052fa7433 100644 --- a/internal/scheduling/nova/e2e_checks.go +++ b/internal/scheduling/nova/e2e_checks.go @@ -36,11 +36,36 @@ const ( nRandomRequestsToSend = 50 ) +// ChecksConfig holds configuration for nova e2e checks. type ChecksConfig struct { // Secret ref to keystone credentials stored in a k8s secret. KeystoneSecretRef corev1.SecretReference `json:"keystoneSecretRef"` // Secret ref to SSO credentials stored in a k8s secret, if applicable. SSOSecretRef *corev1.SecretReference `json:"ssoSecretRef"` + // DomainNameHintCheck holds optional configuration for the domain_name hint check. + // When nil the check is skipped. Provide DomainName, EligibleHosts, and FlavorName + // to exercise the filter_external_customer path for CR reservation scheduling. + DomainNameHintCheck *DomainNameHintConfig `json:"domainNameHintCheck,omitempty"` +} + +// DomainNameHintConfig holds parameters for CheckDomainNameHintRouting. +type DomainNameHintConfig struct { + // DomainName is the OpenStack domain name passed as the domain_name scheduler hint. + // Use a real domain name from the target environment so filter_external_customer + // can apply its prefix-matching logic. + DomainName string `json:"domainName"` + // EligibleHosts is the list of compute hostnames offered to the scheduler. + // Include at least one host with CUSTOM_EXTERNAL_CUSTOMER_EXCLUSIVE and one without + // to give the filter something to act on. + EligibleHosts []string `json:"eligibleHosts"` + // FlavorName is the Nova flavor name to use in the request (e.g. "g_k_c1_m2_v2"). + FlavorName string `json:"flavorName"` + // FlavorExtraSpecs are the extra specs for the flavor. Required by most pipeline + // filters to determine hypervisor type, capabilities, and traits. + // Example: {"capabilities:hypervisor_type": "CH", "quota:hw_version": "2101"} + FlavorExtraSpecs map[string]string `json:"flavorExtraSpecs,omitempty"` + // Pipeline is the scheduler pipeline to target. Empty means default. + Pipeline string `json:"pipeline,omitempty"` } // Data necessary to generate a somewhat valid nova scheduler request. @@ -356,6 +381,73 @@ func checkNovaSchedulerReturnsValidHosts( return resp.Hosts } +// CheckDomainNameHintRouting verifies that CR reservation scheduling passes the +// domain_name scheduler hint through to the pipeline and that filter_external_customer +// acts on it correctly. +// +// It sends a synthetic ExternalSchedulerRequest with: +// - _nova_check_type = reserve_for_committed_resource (CR intent, not failover) +// - domain_name = config.DomainName +// +// to the nova external scheduler and asserts the call succeeds (HTTP 200). The check +// does not assert which specific hosts are returned — that depends on cluster state — +// but a successful response confirms the full request/filter path is wired correctly. +// +// To observe actual filtering, include at least one host with +// CUSTOM_EXTERNAL_CUSTOMER_EXCLUSIVE and one without, then inspect the logs. +func CheckDomainNameHintRouting(ctx context.Context, config ChecksConfig) { + cfg := config.DomainNameHintCheck + if cfg == nil { + slog.Info("domain_name hint check skipped: DomainNameHintCheck not configured") + return + } + if cfg.FlavorName == "" || len(cfg.EligibleHosts) == 0 { + slog.Info("domain_name hint check skipped: FlavorName or EligibleHosts not set") + return + } + + hosts := make([]api.ExternalSchedulerHost, len(cfg.EligibleHosts)) + weights := make(map[string]float64, len(cfg.EligibleHosts)) + for i, h := range cfg.EligibleHosts { + hosts[i] = api.ExternalSchedulerHost{ComputeHost: h} + weights[h] = 0.0 + } + + req := api.ExternalSchedulerRequest{ + Pipeline: cfg.Pipeline, + Hosts: hosts, + Weights: weights, + Spec: api.NovaObject[api.NovaSpec]{ + Data: api.NovaSpec{ + InstanceUUID: "e2e-domain-hint-check", + Flavor: api.NovaObject[api.NovaFlavor]{ + Data: api.NovaFlavor{ + Name: cfg.FlavorName, + ExtraSpecs: cfg.FlavorExtraSpecs, + }, + }, + SchedulerHints: map[string]any{ + "_nova_check_type": string(api.ReserveForCommittedResourceIntent), + "domain_name": cfg.DomainName, + }, + }, + }, + } + + slog.Info("domain_name hint check: sending CR reservation scheduling request", + "domainName", cfg.DomainName, + "eligibleHosts", cfg.EligibleHosts, + "flavorName", cfg.FlavorName, + ) + + hosts2 := checkNovaSchedulerReturnsValidHosts(ctx, req) + slog.Info("domain_name hint check passed", + "domainName", cfg.DomainName, + "hostsReturned", len(hosts2), + "hosts", hosts2, + ) +} + // Run all checks. func RunChecks(ctx context.Context, client client.Client, config ChecksConfig) { datacenter := prepare(ctx, client, config) @@ -370,10 +462,10 @@ func RunChecks(ctx context.Context, client client.Client, config ChecksConfig) { requestsWithNoHostsReturned++ } } - // Print a summary. slog.Info( "summary", "requestsWithHostsReturned", requestsWithHostsReturned, "requestsWithNoHostsReturned", requestsWithNoHostsReturned, ) + CheckDomainNameHintRouting(ctx, config) } From d4a7b10c1191920d3eb21a1c48017e5908e71dc6 Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Thu, 18 Jun 2026 09:53:15 +0200 Subject: [PATCH 3/8] tests --- .../filters/filter_external_customer_test.go | 53 +++++++++++++ .../commitments/domain_resolver_test.go | 75 +++++++++++++++++++ 2 files changed, 128 insertions(+) diff --git a/internal/scheduling/nova/plugins/filters/filter_external_customer_test.go b/internal/scheduling/nova/plugins/filters/filter_external_customer_test.go index 97c9d6925..290709f05 100644 --- a/internal/scheduling/nova/plugins/filters/filter_external_customer_test.go +++ b/internal/scheduling/nova/plugins/filters/filter_external_customer_test.go @@ -63,6 +63,15 @@ func TestFilterExternalCustomerStep_Run(t *testing.T) { Traits: []string{"CUSTOM_EXTERNAL_CUSTOMER_EXCLUSIVE"}, }, }, + &hv1.Hypervisor{ + ObjectMeta: v1.ObjectMeta{ + Name: "host-custom-trait", + }, + Spec: hv1.HypervisorSpec{ + CustomTraits: []string{"CUSTOM_EXTERNAL_CUSTOMER_EXCLUSIVE"}, + }, + // Status.Traits intentionally empty — trait comes solely from Spec.CustomTraits. + }, } tests := []struct { @@ -392,6 +401,50 @@ func TestFilterExternalCustomerStep_Run(t *testing.T) { expectedHosts: []string{"host1", "host3"}, filteredHosts: []string{}, }, + { + name: "ReserveForCommittedResourceIntent with external customer domain - filter applies", + opts: FilterExternalCustomerStepOpts{ + CustomerDomainNamePrefixes: []string{"ext-"}, + }, + request: api.ExternalSchedulerRequest{ + Spec: api.NovaObject[api.NovaSpec]{ + Data: api.NovaSpec{ + SchedulerHints: map[string]any{ + "_nova_check_type": string(api.ReserveForCommittedResourceIntent), + "domain_name": "ext-customer1", + }, + }, + }, + Hosts: []api.ExternalSchedulerHost{ + {ComputeHost: "host1"}, + {ComputeHost: "host3"}, + {ComputeHost: "host4"}, + }, + }, + expectedHosts: []string{"host1"}, + filteredHosts: []string{"host3", "host4"}, + }, + { + name: "Trait from Spec.CustomTraits (not Status.Traits) grants host inclusion", + opts: FilterExternalCustomerStepOpts{ + CustomerDomainNamePrefixes: []string{"ext-"}, + }, + request: api.ExternalSchedulerRequest{ + Spec: api.NovaObject[api.NovaSpec]{ + Data: api.NovaSpec{ + SchedulerHints: map[string]any{ + "domain_name": "ext-customer1", + }, + }, + }, + Hosts: []api.ExternalSchedulerHost{ + {ComputeHost: "host-custom-trait"}, + {ComputeHost: "host4"}, + }, + }, + expectedHosts: []string{"host-custom-trait"}, + filteredHosts: []string{"host4"}, + }, } for _, tt := range tests { diff --git a/internal/scheduling/reservations/commitments/domain_resolver_test.go b/internal/scheduling/reservations/commitments/domain_resolver_test.go index 69009fbcb..7098f1e05 100644 --- a/internal/scheduling/reservations/commitments/domain_resolver_test.go +++ b/internal/scheduling/reservations/commitments/domain_resolver_test.go @@ -9,6 +9,7 @@ import ( "fmt" "net/http" "net/http/httptest" + "strings" "sync" "sync/atomic" "testing" @@ -138,6 +139,80 @@ func TestKeystoneDomainResolver_NotFound(t *testing.T) { } } +func TestKeystoneDomainResolver_ErrorNotCached(t *testing.T) { + // First request fails (5xx), second request succeeds. + // The resolver must NOT cache the error, so the second call retries Keystone + // and returns the correct name. + var callCount atomic.Int32 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + n := callCount.Add(1) + if n == 1 { + http.Error(w, "internal server error", http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{ + "domain": map[string]any{"id": "domain-flaky", "name": "recovered"}, + }) + })) + defer server.Close() + + resolver := newKeystoneDomainResolver(newTestServiceClient(server.URL)) + + _, err := resolver.ResolveDomainName(context.Background(), "domain-flaky") + if err == nil { + t.Fatal("expected error on first (failing) call, got nil") + } + + name, err := resolver.ResolveDomainName(context.Background(), "domain-flaky") + if err != nil { + t.Fatalf("expected success on second call after Keystone recovered, got: %v", err) + } + if name != "recovered" { + t.Errorf("expected %q, got %q", "recovered", name) + } + if callCount.Load() != 2 { + t.Errorf("expected 2 Keystone calls (error not cached), got %d", callCount.Load()) + } +} + +func TestKeystoneDomainResolver_ErrorWrapping(t *testing.T) { + server, _ := newDomainResolverTestServer(t, map[string]string{}) + defer server.Close() + + resolver := newKeystoneDomainResolver(newTestServiceClient(server.URL)) + + _, err := resolver.ResolveDomainName(context.Background(), "missing-domain") + if err == nil { + t.Fatal("expected error, got nil") + } + // The error message must include the domain ID so callers can diagnose which + // lookup failed without having to add their own context. + if !strings.Contains(err.Error(), "missing-domain") { + t.Errorf("expected error to contain domain ID %q, got: %v", "missing-domain", err) + } +} + +func TestKeystoneDomainResolver_ContextCancelled(t *testing.T) { + // Server that blocks until the context is cancelled. + blocked := make(chan struct{}) + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + <-blocked // never unblocked; request hangs until client cancels + })) + defer server.Close() + defer close(blocked) + + resolver := newKeystoneDomainResolver(newTestServiceClient(server.URL)) + + ctx, cancel := context.WithCancel(context.Background()) + cancel() // cancel immediately + + _, err := resolver.ResolveDomainName(ctx, "domain-cancelled") + if err == nil { + t.Fatal("expected error when context is cancelled, got nil") + } +} + func TestKeystoneDomainResolver_ConcurrentAccess(t *testing.T) { server, callCount := newDomainResolverTestServer(t, map[string]string{ "domain-concurrent": "shared", From d7d6c3491a59453a62bcf25f76ce6ad822dfb1e4 Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Thu, 18 Jun 2026 10:48:27 +0200 Subject: [PATCH 4/8] add sso --- .../scheduling/reservations/commitments/config.go | 4 ++++ .../commitments/reservation_controller.go | 12 +++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/internal/scheduling/reservations/commitments/config.go b/internal/scheduling/reservations/commitments/config.go index ffe316d2a..9f750c365 100644 --- a/internal/scheduling/reservations/commitments/config.go +++ b/internal/scheduling/reservations/commitments/config.go @@ -74,6 +74,10 @@ type ReservationControllerConfig struct { // When empty, domain name resolution is skipped and filter_external_customer will // not enforce domain restrictions for CR reservations. KeystoneSecretRef corev1.SecretReference `json:"keystoneSecretRef,omitempty"` + // SSOSecretRef is an optional reference to a Secret holding SSO credentials. + // Required in environments that use SSO-based Keystone authentication. + // When nil, http.DefaultClient is used, which will fail in SSO-only environments. + SSOSecretRef *corev1.SecretReference `json:"ssoSecretRef,omitempty"` } // CommittedResourceControllerConfig holds tuning knobs for the CommittedResource CRD controller. diff --git a/internal/scheduling/reservations/commitments/reservation_controller.go b/internal/scheduling/reservations/commitments/reservation_controller.go index cd03622c9..598f3a667 100644 --- a/internal/scheduling/reservations/commitments/reservation_controller.go +++ b/internal/scheduling/reservations/commitments/reservation_controller.go @@ -28,9 +28,11 @@ import ( "github.com/cobaltcore-dev/cortex/internal/scheduling/reservations" "github.com/cobaltcore-dev/cortex/pkg/keystone" "github.com/cobaltcore-dev/cortex/pkg/multicluster" + "github.com/cobaltcore-dev/cortex/pkg/sso" hv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" "github.com/go-logr/logr" "github.com/gophercloud/gophercloud/v2" + "net/http" ) // CommitmentReservationController reconciles commitment Reservation objects @@ -625,7 +627,15 @@ func (r *CommitmentReservationController) Init(ctx context.Context, conf Reserva logf.FromContext(ctx).Info("scheduler client initialized for commitment reservation controller", "url", conf.SchedulerURL) if conf.KeystoneSecretRef.Name != "" { - keystoneClient, err := keystone.Connector{Client: r.Client}.FromSecretRef(ctx, conf.KeystoneSecretRef) + var authenticatedHTTP *http.Client + if conf.SSOSecretRef != nil { + var err error + authenticatedHTTP, err = sso.Connector{Client: r.Client}.FromSecretRef(ctx, *conf.SSOSecretRef) + if err != nil { + return fmt.Errorf("failed to initialize SSO client for domain resolver: %w", err) + } + } + keystoneClient, err := keystone.Connector{Client: r.Client, HTTPClient: authenticatedHTTP}.FromSecretRef(ctx, conf.KeystoneSecretRef) if err != nil { return fmt.Errorf("failed to initialize keystone client for domain resolver: %w", err) } From 2a25eafd0b8aa1b8c62e3d3c36344a362dcd3848 Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Thu, 18 Jun 2026 11:06:50 +0200 Subject: [PATCH 5/8] chart --- helm/bundles/cortex-nova/values.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml index d7ef28094..2300e17e6 100644 --- a/helm/bundles/cortex-nova/values.yaml +++ b/helm/bundles/cortex-nova/values.yaml @@ -179,6 +179,15 @@ cortex-scheduling-controllers: allocationGracePeriod: "15m" # URL of the nova external scheduler API for placement decisions schedulerURL: "http://localhost:8080/scheduler/nova/external" + # Keystone credentials used to resolve domain IDs to domain names for the + # domain_name scheduler hint consumed by filter_external_customer. + # Must match the credentials used by the commitments syncer. + keystoneSecretRef: + name: cortex-nova-openstack-keystone + namespace: default + # ssoSecretRef: + # name: cortex-nova-openstack-sso + # namespace: default committedResourceController: # Back-off interval while CommittedResource placement is pending or failed (base for exponential backoff) requeueIntervalRetry: "1m" From 14d84b2cc01cc2cfb32655aa672a6ab7179c313c Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Thu, 18 Jun 2026 11:13:58 +0200 Subject: [PATCH 6/8] shorten tests --- .../commitments/domain_resolver_test.go | 194 ++++++------------ .../reservation_controller_test.go | 8 +- 2 files changed, 66 insertions(+), 136 deletions(-) diff --git a/internal/scheduling/reservations/commitments/domain_resolver_test.go b/internal/scheduling/reservations/commitments/domain_resolver_test.go index 7098f1e05..60996fa60 100644 --- a/internal/scheduling/reservations/commitments/domain_resolver_test.go +++ b/internal/scheduling/reservations/commitments/domain_resolver_test.go @@ -17,16 +17,12 @@ import ( "github.com/gophercloud/gophercloud/v2" ) -// newDomainResolverTestServer creates an httptest.Server that serves Keystone -// GET /domains/{id} responses. The provided map controls what is returned for -// each domain ID. Unknown IDs get a 404. +// newDomainResolverTestServer creates an httptest.Server serving Keystone +// GET /domains/{id} responses. Unknown IDs get 404. func newDomainResolverTestServer(t *testing.T, domainsByID map[string]string) (*httptest.Server, *atomic.Int32) { t.Helper() var callCount atomic.Int32 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - // Keystone endpoint is registered as the base URL, so gophercloud appends - // the path "/domains/" directly. - // Path format: /domains/ const prefix = "/domains/" if len(r.URL.Path) <= len(prefix) { http.Error(w, "bad path", http.StatusBadRequest) @@ -40,133 +36,99 @@ func newDomainResolverTestServer(t *testing.T, domainsByID map[string]string) (* return } w.Header().Set("Content-Type", "application/json") - _ = json.NewEncoder(w).Encode(map[string]any{ - "domain": map[string]any{"id": id, "name": name}, - }) + _ = json.NewEncoder(w).Encode(map[string]any{"domain": map[string]any{"id": id, "name": name}}) })) return server, &callCount } -// newTestServiceClient builds a gophercloud.ServiceClient whose Endpoint points -// at the given test server URL (with trailing slash as gophercloud expects). +// newTestServiceClient builds a gophercloud.ServiceClient pointing at serverURL. func newTestServiceClient(serverURL string) *gophercloud.ServiceClient { - provider := &gophercloud.ProviderClient{} return &gophercloud.ServiceClient{ - ProviderClient: provider, + ProviderClient: &gophercloud.ProviderClient{}, Endpoint: serverURL + "/", Type: "identity", } } -func TestKeystoneDomainResolver_ResolveDomainName(t *testing.T) { - server, callCount := newDomainResolverTestServer(t, map[string]string{ - "domain-abc": "monsoon3", - "domain-xyz": "cc3test", - }) - defer server.Close() - - resolver := newKeystoneDomainResolver(newTestServiceClient(server.URL)) - - name, err := resolver.ResolveDomainName(context.Background(), "domain-abc") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if name != "monsoon3" { - t.Errorf("expected %q, got %q", "monsoon3", name) - } - if callCount.Load() != 1 { - t.Errorf("expected 1 Keystone call, got %d", callCount.Load()) - } -} - -func TestKeystoneDomainResolver_CacheHit(t *testing.T) { +func TestKeystoneDomainResolver(t *testing.T) { server, callCount := newDomainResolverTestServer(t, map[string]string{ - "domain-abc": "monsoon3", + "domain-a": "alpha", + "domain-b": "beta", }) defer server.Close() - resolver := newKeystoneDomainResolver(newTestServiceClient(server.URL)) - for i := range 5 { - name, err := resolver.ResolveDomainName(context.Background(), "domain-abc") - if err != nil { - t.Fatalf("call %d: unexpected error: %v", i, err) - } - if name != "monsoon3" { - t.Errorf("call %d: expected %q, got %q", i, "monsoon3", name) - } + tests := []struct { + name string + domainID string + wantName string + wantErr bool + errContains string + }{ + {name: "resolves domain name", domainID: "domain-a", wantName: "alpha"}, + {name: "resolves second domain independently", domainID: "domain-b", wantName: "beta"}, + {name: "not found returns error", domainID: "nonexistent", wantErr: true}, + {name: "error contains domain ID", domainID: "missing-domain", wantErr: true, errContains: "missing-domain"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + name, err := resolver.ResolveDomainName(context.Background(), tt.domainID) + if tt.wantErr { + if err == nil { + t.Fatal("expected error, got nil") + } + if tt.errContains != "" && !strings.Contains(err.Error(), tt.errContains) { + t.Errorf("expected error to contain %q, got: %v", tt.errContains, err) + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if name != tt.wantName { + t.Errorf("expected %q, got %q", tt.wantName, name) + } + }) } - // Only one HTTP request should have been made regardless of call count. - if callCount.Load() != 1 { - t.Errorf("expected exactly 1 Keystone call due to caching, got %d", callCount.Load()) + // Each successful domain ID fetched exactly once; subsequent calls served from cache. + // Error cases (not_found, missing-domain) also hit the server but must not be cached. + // We have 2 success lookups (domain-a, domain-b) + 2 error lookups = 4 total calls. + if n := callCount.Load(); n != 4 { + t.Errorf("expected 4 Keystone calls (2 success + 2 error), got %d", n) } -} - -func TestKeystoneDomainResolver_MultipleDomains(t *testing.T) { - server, _ := newDomainResolverTestServer(t, map[string]string{ - "domain-a": "alpha", - "domain-b": "beta", - }) - defer server.Close() - - resolver := newKeystoneDomainResolver(newTestServiceClient(server.URL)) - for _, tc := range []struct{ id, want string }{ - {"domain-a", "alpha"}, - {"domain-b", "beta"}, - } { - name, err := resolver.ResolveDomainName(context.Background(), tc.id) - if err != nil { - t.Errorf("domain %q: unexpected error: %v", tc.id, err) - continue + t.Run("cache hit does not re-fetch", func(t *testing.T) { + before := callCount.Load() + if _, err := resolver.ResolveDomainName(context.Background(), "domain-a"); err != nil { + t.Fatalf("unexpected error on cache hit: %v", err) } - if name != tc.want { - t.Errorf("domain %q: expected %q, got %q", tc.id, tc.want, name) + if after := callCount.Load(); after != before { + t.Errorf("expected no additional Keystone calls on cache hit, got %d new call(s)", after-before) } - } -} - -func TestKeystoneDomainResolver_NotFound(t *testing.T) { - server, _ := newDomainResolverTestServer(t, map[string]string{}) - defer server.Close() - - resolver := newKeystoneDomainResolver(newTestServiceClient(server.URL)) - - _, err := resolver.ResolveDomainName(context.Background(), "nonexistent") - if err == nil { - t.Fatal("expected an error for unknown domain, got nil") - } + }) } func TestKeystoneDomainResolver_ErrorNotCached(t *testing.T) { - // First request fails (5xx), second request succeeds. - // The resolver must NOT cache the error, so the second call retries Keystone - // and returns the correct name. + // First call fails (5xx); second must retry and succeed — errors must not be cached. var callCount atomic.Int32 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - n := callCount.Add(1) - if n == 1 { + if callCount.Add(1) == 1 { http.Error(w, "internal server error", http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") - _ = json.NewEncoder(w).Encode(map[string]any{ - "domain": map[string]any{"id": "domain-flaky", "name": "recovered"}, - }) + _ = json.NewEncoder(w).Encode(map[string]any{"domain": map[string]any{"id": "domain-flaky", "name": "recovered"}}) })) defer server.Close() - resolver := newKeystoneDomainResolver(newTestServiceClient(server.URL)) - _, err := resolver.ResolveDomainName(context.Background(), "domain-flaky") - if err == nil { + if _, err := resolver.ResolveDomainName(context.Background(), "domain-flaky"); err == nil { t.Fatal("expected error on first (failing) call, got nil") } - name, err := resolver.ResolveDomainName(context.Background(), "domain-flaky") if err != nil { - t.Fatalf("expected success on second call after Keystone recovered, got: %v", err) + t.Fatalf("expected success after Keystone recovered, got: %v", err) } if name != "recovered" { t.Errorf("expected %q, got %q", "recovered", name) @@ -176,49 +138,26 @@ func TestKeystoneDomainResolver_ErrorNotCached(t *testing.T) { } } -func TestKeystoneDomainResolver_ErrorWrapping(t *testing.T) { - server, _ := newDomainResolverTestServer(t, map[string]string{}) - defer server.Close() - - resolver := newKeystoneDomainResolver(newTestServiceClient(server.URL)) - - _, err := resolver.ResolveDomainName(context.Background(), "missing-domain") - if err == nil { - t.Fatal("expected error, got nil") - } - // The error message must include the domain ID so callers can diagnose which - // lookup failed without having to add their own context. - if !strings.Contains(err.Error(), "missing-domain") { - t.Errorf("expected error to contain domain ID %q, got: %v", "missing-domain", err) - } -} - func TestKeystoneDomainResolver_ContextCancelled(t *testing.T) { - // Server that blocks until the context is cancelled. blocked := make(chan struct{}) server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - <-blocked // never unblocked; request hangs until client cancels + <-blocked })) defer server.Close() defer close(blocked) - resolver := newKeystoneDomainResolver(newTestServiceClient(server.URL)) - ctx, cancel := context.WithCancel(context.Background()) - cancel() // cancel immediately + cancel() - _, err := resolver.ResolveDomainName(ctx, "domain-cancelled") + _, err := newKeystoneDomainResolver(newTestServiceClient(server.URL)).ResolveDomainName(ctx, "domain-x") if err == nil { t.Fatal("expected error when context is cancelled, got nil") } } func TestKeystoneDomainResolver_ConcurrentAccess(t *testing.T) { - server, callCount := newDomainResolverTestServer(t, map[string]string{ - "domain-concurrent": "shared", - }) + server, callCount := newDomainResolverTestServer(t, map[string]string{"domain-c": "shared"}) defer server.Close() - resolver := newKeystoneDomainResolver(newTestServiceClient(server.URL)) const goroutines = 20 @@ -228,28 +167,19 @@ func TestKeystoneDomainResolver_ConcurrentAccess(t *testing.T) { wg.Add(1) go func() { defer wg.Done() - name, err := resolver.ResolveDomainName(context.Background(), "domain-concurrent") + name, err := resolver.ResolveDomainName(context.Background(), "domain-c") if err != nil { errs <- err - return - } - if name != "shared" { + } else if name != "shared" { errs <- fmt.Errorf("expected %q, got %q", "shared", name) } }() } wg.Wait() close(errs) - for err := range errs { t.Errorf("concurrent call error: %v", err) } - - // Due to the double-checked-lock pattern, at most a small number of calls - // may race before the cache is warm. The important property is correctness: - // all goroutines must have received the right answer. - // We allow up to goroutines calls in the absolute worst case (no caching), - // but the real invariant is just that no data races occurred (enforced by -race). if n := callCount.Load(); n > int32(goroutines) { t.Errorf("unexpectedly high Keystone call count: %d", n) } diff --git a/internal/scheduling/reservations/commitments/reservation_controller_test.go b/internal/scheduling/reservations/commitments/reservation_controller_test.go index 07a145a39..14ab254a4 100644 --- a/internal/scheduling/reservations/commitments/reservation_controller_test.go +++ b/internal/scheduling/reservations/commitments/reservation_controller_test.go @@ -921,11 +921,11 @@ func TestCommitmentReservationController_DomainNameHint_Present(t *testing.T) { resolver := &mockDomainResolver{names: map[string]string{"domain-uuid-1": "monsoon3"}} conf := ReservationControllerConfig{SchedulerURL: server.URL} reconciler := &CommitmentReservationController{ - Client: k8sClient, - Scheme: scheme, - Conf: conf, + Client: k8sClient, + Scheme: scheme, + Conf: conf, SchedulerClient: reservations.NewSchedulerClient(server.URL), - DomainResolver: resolver, + DomainResolver: resolver, } req := ctrl.Request{NamespacedName: types.NamespacedName{Name: reservation.Name}} From 41f042c775365a8f6bfb023b6a06b4e168569f52 Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Thu, 18 Jun 2026 11:21:01 +0200 Subject: [PATCH 7/8] unify tests --- internal/scheduling/nova/e2e_checks.go | 19 +- .../reservation_controller_test.go | 244 ++++++------------ 2 files changed, 89 insertions(+), 174 deletions(-) diff --git a/internal/scheduling/nova/e2e_checks.go b/internal/scheduling/nova/e2e_checks.go index 052fa7433..94aaa8642 100644 --- a/internal/scheduling/nova/e2e_checks.go +++ b/internal/scheduling/nova/e2e_checks.go @@ -381,20 +381,11 @@ func checkNovaSchedulerReturnsValidHosts( return resp.Hosts } -// CheckDomainNameHintRouting verifies that CR reservation scheduling passes the -// domain_name scheduler hint through to the pipeline and that filter_external_customer -// acts on it correctly. -// -// It sends a synthetic ExternalSchedulerRequest with: -// - _nova_check_type = reserve_for_committed_resource (CR intent, not failover) -// - domain_name = config.DomainName -// -// to the nova external scheduler and asserts the call succeeds (HTTP 200). The check -// does not assert which specific hosts are returned — that depends on cluster state — -// but a successful response confirms the full request/filter path is wired correctly. -// -// To observe actual filtering, include at least one host with -// CUSTOM_EXTERNAL_CUSTOMER_EXCLUSIVE and one without, then inspect the logs. +// CheckDomainNameHintRouting sends a synthetic CR reservation scheduling request +// with _nova_check_type=reserve_for_committed_resource and domain_name=config.DomainName +// to the nova external scheduler and asserts HTTP 200. Confirms the hint flows through +// the pipeline and filter_external_customer evaluates it. Inspect the manager logs to +// see which hosts were kept or dropped. func CheckDomainNameHintRouting(ctx context.Context, config ChecksConfig) { cfg := config.DomainNameHintCheck if cfg == nil { diff --git a/internal/scheduling/reservations/commitments/reservation_controller_test.go b/internal/scheduling/reservations/commitments/reservation_controller_test.go index 14ab254a4..651852c2c 100644 --- a/internal/scheduling/reservations/commitments/reservation_controller_test.go +++ b/internal/scheduling/reservations/commitments/reservation_controller_test.go @@ -871,9 +871,7 @@ func TestCommitmentReservationController_reconcileInstanceReservation_Success(t // Test: domain_name scheduler hint // ============================================================================ -// newTestSchedulerServer creates an httptest server that captures the decoded -// ExternalSchedulerRequest and returns a single host. The caller can inspect -// the captured request after the Reconcile call. +// newTestSchedulerServer captures the decoded request and returns a single host. func newTestSchedulerServer(t *testing.T, captured *schedulerdelegationapi.ExternalSchedulerRequest) *httptest.Server { t.Helper() return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -889,172 +887,98 @@ func newTestSchedulerServer(t *testing.T, captured *schedulerdelegationapi.Exter })) } -// TestCommitmentReservationController_DomainNameHint_Present verifies that when -// DomainResolver is configured and the reservation carries a DomainID, the -// domain_name scheduler hint is forwarded to the scheduling pipeline so that -// filter_external_customer can enforce domain-based host restrictions. -func TestCommitmentReservationController_DomainNameHint_Present(t *testing.T) { - scheme := newCRTestScheme(t) - - reservation := &v1alpha1.Reservation{ - ObjectMeta: ctrl.ObjectMeta{Name: "test-reservation"}, - Spec: v1alpha1.ReservationSpec{ - Type: v1alpha1.ReservationTypeCommittedResource, - CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ - ProjectID: "test-project", - DomainID: "domain-uuid-1", - ResourceName: "test-flavor", - }, - Resources: map[hv1.ResourceName]resource.Quantity{ - hv1.ResourceMemory: resource.MustParse("4Gi"), - hv1.ResourceCPU: resource.MustParse("2"), - }, +func TestCommitmentReservationController_DomainNameHint(t *testing.T) { + tests := []struct { + name string + resolver DomainResolver + wantDomainName string // empty means hint must be absent + wantResolverCall string // expected domainID passed to resolver; empty means no call expected + }{ + { + name: "hint present when resolver succeeds", + resolver: &mockDomainResolver{names: map[string]string{"domain-uuid-1": "monsoon3"}}, + wantDomainName: "monsoon3", + wantResolverCall: "domain-uuid-1", }, - } - hypervisor := &hv1.Hypervisor{ObjectMeta: metav1.ObjectMeta{Name: "test-host-1"}} - k8sClient := newCRTestClient(scheme, reservation, newTestFlavorKnowledge(), hypervisor) - - var captured schedulerdelegationapi.ExternalSchedulerRequest - server := newTestSchedulerServer(t, &captured) - defer server.Close() - - resolver := &mockDomainResolver{names: map[string]string{"domain-uuid-1": "monsoon3"}} - conf := ReservationControllerConfig{SchedulerURL: server.URL} - reconciler := &CommitmentReservationController{ - Client: k8sClient, - Scheme: scheme, - Conf: conf, - SchedulerClient: reservations.NewSchedulerClient(server.URL), - DomainResolver: resolver, - } - - req := ctrl.Request{NamespacedName: types.NamespacedName{Name: reservation.Name}} - if _, err := reconciler.Reconcile(context.Background(), req); err != nil { - t.Fatalf("Reconcile() error = %v", err) - } - - // Verify the resolver was called with the correct domain ID. - if len(resolver.calls) != 1 || resolver.calls[0] != "domain-uuid-1" { - t.Errorf("expected ResolveDomainName called with %q, got calls=%v", "domain-uuid-1", resolver.calls) - } - - // Verify domain_name hint was forwarded to the scheduler. - gotDomain, err := captured.Spec.Data.GetSchedulerHintStr("domain_name") - if err != nil { - t.Fatalf("domain_name scheduler hint missing: %v", err) - } - if gotDomain != "monsoon3" { - t.Errorf("expected domain_name hint %q, got %q", "monsoon3", gotDomain) - } - - // Verify _nova_check_type hint is still present. - gotIntent, err := captured.Spec.Data.GetSchedulerHintStr("_nova_check_type") - if err != nil || gotIntent != string(schedulerdelegationapi.ReserveForCommittedResourceIntent) { - t.Errorf("expected _nova_check_type hint %q, got %q (err=%v)", schedulerdelegationapi.ReserveForCommittedResourceIntent, gotIntent, err) - } -} - -// TestCommitmentReservationController_DomainNameHint_ResolverError verifies that -// when domain name resolution fails the controller does not return an error and -// proceeds with scheduling — omitting only the domain_name hint. This ensures -// that a transient Keystone outage never blocks CR reservation placement. -func TestCommitmentReservationController_DomainNameHint_ResolverError(t *testing.T) { - scheme := newCRTestScheme(t) - - reservation := &v1alpha1.Reservation{ - ObjectMeta: ctrl.ObjectMeta{Name: "test-reservation"}, - Spec: v1alpha1.ReservationSpec{ - Type: v1alpha1.ReservationTypeCommittedResource, - CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ - ProjectID: "test-project", - DomainID: "domain-uuid-1", - ResourceName: "test-flavor", - }, - Resources: map[hv1.ResourceName]resource.Quantity{ - hv1.ResourceMemory: resource.MustParse("4Gi"), - hv1.ResourceCPU: resource.MustParse("2"), - }, + { + name: "hint absent when resolver fails, scheduling continues", + resolver: &mockDomainResolver{err: errors.New("keystone unavailable")}, + wantDomainName: "", + }, + { + name: "hint absent when resolver is nil (keystoneSecretRef not configured)", + resolver: nil, + wantDomainName: "", }, - } - hypervisor := &hv1.Hypervisor{ObjectMeta: metav1.ObjectMeta{Name: "test-host-1"}} - k8sClient := newCRTestClient(scheme, reservation, newTestFlavorKnowledge(), hypervisor) - - var captured schedulerdelegationapi.ExternalSchedulerRequest - server := newTestSchedulerServer(t, &captured) - defer server.Close() - - resolver := &mockDomainResolver{err: errors.New("keystone unavailable")} - conf := ReservationControllerConfig{SchedulerURL: server.URL} - reconciler := &CommitmentReservationController{ - Client: k8sClient, - Scheme: scheme, - Conf: conf, - SchedulerClient: reservations.NewSchedulerClient(server.URL), - DomainResolver: resolver, - } - - req := ctrl.Request{NamespacedName: types.NamespacedName{Name: reservation.Name}} - // Must not return an error — resolution failure is non-fatal. - if _, err := reconciler.Reconcile(context.Background(), req); err != nil { - t.Fatalf("Reconcile() must not fail on resolver error, got: %v", err) - } - - // domain_name hint must be absent when resolution failed. - if _, err := captured.Spec.Data.GetSchedulerHintStr("domain_name"); err == nil { - t.Error("expected domain_name hint to be absent when resolver fails, but it was present") } - // _nova_check_type must still be present — unrelated to domain resolution. - gotIntent, err := captured.Spec.Data.GetSchedulerHintStr("_nova_check_type") - if err != nil || gotIntent != string(schedulerdelegationapi.ReserveForCommittedResourceIntent) { - t.Errorf("expected _nova_check_type hint %q, got %q (err=%v)", schedulerdelegationapi.ReserveForCommittedResourceIntent, gotIntent, err) - } -} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + scheme := newCRTestScheme(t) + reservation := &v1alpha1.Reservation{ + ObjectMeta: ctrl.ObjectMeta{Name: "test-reservation"}, + Spec: v1alpha1.ReservationSpec{ + Type: v1alpha1.ReservationTypeCommittedResource, + CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ + ProjectID: "test-project", + DomainID: "domain-uuid-1", + ResourceName: "test-flavor", + }, + Resources: map[hv1.ResourceName]resource.Quantity{ + hv1.ResourceMemory: resource.MustParse("4Gi"), + hv1.ResourceCPU: resource.MustParse("2"), + }, + }, + } + k8sClient := newCRTestClient(scheme, reservation, newTestFlavorKnowledge(), + &hv1.Hypervisor{ObjectMeta: metav1.ObjectMeta{Name: "test-host-1"}}) -// TestCommitmentReservationController_DomainNameHint_NilResolver verifies that -// when no DomainResolver is configured (KeystoneSecretRef absent), the controller -// schedules normally without setting the domain_name hint. -func TestCommitmentReservationController_DomainNameHint_NilResolver(t *testing.T) { - scheme := newCRTestScheme(t) + var captured schedulerdelegationapi.ExternalSchedulerRequest + server := newTestSchedulerServer(t, &captured) + defer server.Close() - reservation := &v1alpha1.Reservation{ - ObjectMeta: ctrl.ObjectMeta{Name: "test-reservation"}, - Spec: v1alpha1.ReservationSpec{ - Type: v1alpha1.ReservationTypeCommittedResource, - CommittedResourceReservation: &v1alpha1.CommittedResourceReservationSpec{ - ProjectID: "test-project", - DomainID: "domain-uuid-1", - ResourceName: "test-flavor", - }, - Resources: map[hv1.ResourceName]resource.Quantity{ - hv1.ResourceMemory: resource.MustParse("4Gi"), - hv1.ResourceCPU: resource.MustParse("2"), - }, - }, - } - hypervisor := &hv1.Hypervisor{ObjectMeta: metav1.ObjectMeta{Name: "test-host-1"}} - k8sClient := newCRTestClient(scheme, reservation, newTestFlavorKnowledge(), hypervisor) + reconciler := &CommitmentReservationController{ + Client: k8sClient, + Scheme: scheme, + Conf: ReservationControllerConfig{SchedulerURL: server.URL}, + SchedulerClient: reservations.NewSchedulerClient(server.URL), + DomainResolver: tt.resolver, + } - var captured schedulerdelegationapi.ExternalSchedulerRequest - server := newTestSchedulerServer(t, &captured) - defer server.Close() + if _, err := reconciler.Reconcile(context.Background(), ctrl.Request{ + NamespacedName: types.NamespacedName{Name: reservation.Name}, + }); err != nil { + t.Fatalf("Reconcile() error = %v", err) + } - conf := ReservationControllerConfig{SchedulerURL: server.URL} - reconciler := &CommitmentReservationController{ - Client: k8sClient, - Scheme: scheme, - Conf: conf, - SchedulerClient: reservations.NewSchedulerClient(server.URL), - DomainResolver: nil, // not configured - } + // Verify resolver was called correctly. + if tt.wantResolverCall != "" { + m := tt.resolver.(*mockDomainResolver) + if len(m.calls) != 1 || m.calls[0] != tt.wantResolverCall { + t.Errorf("expected resolver called with %q, got %v", tt.wantResolverCall, m.calls) + } + } - req := ctrl.Request{NamespacedName: types.NamespacedName{Name: reservation.Name}} - if _, err := reconciler.Reconcile(context.Background(), req); err != nil { - t.Fatalf("Reconcile() error = %v", err) - } + // Verify domain_name hint. + gotDomain, err := captured.Spec.Data.GetSchedulerHintStr("domain_name") + if tt.wantDomainName == "" { + if err == nil { + t.Errorf("expected domain_name hint absent, got %q", gotDomain) + } + } else { + if err != nil { + t.Fatalf("expected domain_name hint %q, got error: %v", tt.wantDomainName, err) + } + if gotDomain != tt.wantDomainName { + t.Errorf("expected domain_name hint %q, got %q", tt.wantDomainName, gotDomain) + } + } - // domain_name hint must be absent. - if _, err := captured.Spec.Data.GetSchedulerHintStr("domain_name"); err == nil { - t.Error("expected domain_name hint to be absent when DomainResolver is nil, but it was present") + // _nova_check_type must always be present regardless of resolver outcome. + gotIntent, err := captured.Spec.Data.GetSchedulerHintStr("_nova_check_type") + if err != nil || gotIntent != string(schedulerdelegationapi.ReserveForCommittedResourceIntent) { + t.Errorf("expected _nova_check_type=%q, got %q (err=%v)", schedulerdelegationapi.ReserveForCommittedResourceIntent, gotIntent, err) + } + }) } } From 501f18d88993afff9b0f40e1dea136746a9d0211 Mon Sep 17 00:00:00 2001 From: Julius Clausnitzer Date: Thu, 18 Jun 2026 11:21:33 +0200 Subject: [PATCH 8/8] fix --- .../reservations/commitments/domain_resolver_test.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/internal/scheduling/reservations/commitments/domain_resolver_test.go b/internal/scheduling/reservations/commitments/domain_resolver_test.go index 60996fa60..0e9dd30b9 100644 --- a/internal/scheduling/reservations/commitments/domain_resolver_test.go +++ b/internal/scheduling/reservations/commitments/domain_resolver_test.go @@ -36,7 +36,9 @@ func newDomainResolverTestServer(t *testing.T, domainsByID map[string]string) (* return } w.Header().Set("Content-Type", "application/json") - _ = json.NewEncoder(w).Encode(map[string]any{"domain": map[string]any{"id": id, "name": name}}) + if err := json.NewEncoder(w).Encode(map[string]any{"domain": map[string]any{"id": id, "name": name}}); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + } })) return server, &callCount } @@ -118,7 +120,9 @@ func TestKeystoneDomainResolver_ErrorNotCached(t *testing.T) { return } w.Header().Set("Content-Type", "application/json") - _ = json.NewEncoder(w).Encode(map[string]any{"domain": map[string]any{"id": "domain-flaky", "name": "recovered"}}) + if err := json.NewEncoder(w).Encode(map[string]any{"domain": map[string]any{"id": "domain-flaky", "name": "recovered"}}); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + } })) defer server.Close() resolver := newKeystoneDomainResolver(newTestServiceClient(server.URL))