From 5f1deef978e007181e6f475d6b9ef85f124c0f7f Mon Sep 17 00:00:00 2001 From: "Kaveesh Dubey (from Dev Box)" Date: Thu, 11 Jun 2026 07:55:15 -0700 Subject: [PATCH 1/6] {AKS} az aks create/update: add --enable/--disable-control-plane-metrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surface `azureMonitorProfile.metrics.controlPlane.enabled` so users can opt clusters in/out of Azure Monitor managed Prometheus control-plane metrics (kube-apiserver, etcd, scheduler, controller-manager) via the first-class API property — replaces the AFEC-gated preview. New flags: az aks create: --enable-control-plane-metrics (--enable-cp-metrics) az aks update: --enable-control-plane-metrics (--enable-cp-metrics) --disable-control-plane-metrics (--disable-cp-metrics) Enable requires Azure Monitor metrics to already be on or to be enabled in the same command via --enable-azure-monitor-metrics. Enable + disable in the same command, or enable-CP + --disable-azure-monitor-metrics, are rejected client-side with MutuallyExclusiveArgumentError. Greenfield race fix: On `aks create`, `metrics.controlPlane.enabled=true` is intentionally NOT set on the initial cluster PUT. Otherwise the RP would schedule the control-plane-metrics collection (CCP) pod before the DCRA is created in postprocessing (link_azure_monitor_profile_artifacts), causing the CCP pod to crash-loop with "DCRA not found" until reconciliation. The flip is deferred to the existing post-DCRA addon_put PUT, so the CCP pod is scheduled only after its DCRA exists. The update path is unchanged — brownfield updates target a cluster whose DCRA already exists, so there is no race. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../azure/cli/command_modules/acs/_help.py | 9 + .../azure/cli/command_modules/acs/_params.py | 29 +++ .../azuremonitorprofile.py | 51 ++++- .../azure/cli/command_modules/acs/custom.py | 3 + .../acs/managed_cluster_decorator.py | 120 ++++++++++++ .../acs/tests/latest/test_aks_commands.py | 133 +++++++++++++ .../latest/test_managed_cluster_decorator.py | 178 ++++++++++++++++++ 7 files changed, 521 insertions(+), 2 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/_help.py b/src/azure-cli/azure/cli/command_modules/acs/_help.py index 191345789b1..d3775fb1436 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_help.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_help.py @@ -558,6 +558,9 @@ - name: --enable-windows-recording-rules type: bool short-summary: Enable Windows Recording Rules when enabling the Azure Monitor Metrics addon + - name: --enable-control-plane-metrics --enable-cp-metrics + type: bool + short-summary: Enable collection of Azure Monitor managed Prometheus control plane metrics for managed cluster components (kube-apiserver, etcd, etc). Requires Azure Monitor metrics to be enabled (already enabled or via --enable-azure-monitor-metrics). See aka.ms/aks/controlplane-metrics. - name: --enable-azure-monitor-app-monitoring type: bool short-summary: Enable Azure Monitor Application Monitoring auto-instrumentation for a Kubernetes cluster. @@ -1102,6 +1105,12 @@ - name: --disable-azure-monitor-metrics type: bool short-summary: Disable Azure Monitor Metrics Profile. This will delete all DCRA's associated with the cluster, any linked DCRs with the data stream = prometheus-stream and the recording rule groups created by the addon for this AKS cluster. + - name: --enable-control-plane-metrics --enable-cp-metrics + type: bool + short-summary: Enable collection of Azure Monitor managed Prometheus control plane metrics for managed cluster components (kube-apiserver, etcd, etc). Requires Azure Monitor metrics to be enabled (already enabled or via --enable-azure-monitor-metrics). See aka.ms/aks/controlplane-metrics. + - name: --disable-control-plane-metrics --disable-cp-metrics + type: bool + short-summary: Disable collection of Azure Monitor managed Prometheus control plane metrics. Leaves Azure Monitor metrics enabled. See aka.ms/aks/controlplane-metrics. - name: --enable-azure-monitor-app-monitoring type: bool short-summary: Enable Azure Monitor Application Monitoring auto-instrumentation for a Kubernetes cluster. diff --git a/src/azure-cli/azure/cli/command_modules/acs/_params.py b/src/azure-cli/azure/cli/command_modules/acs/_params.py index 91a66f7f286..d516acd18e9 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_params.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_params.py @@ -573,6 +573,16 @@ def load_arguments(self, _): c.argument('ksm_metric_annotations_allow_list') c.argument('grafana_resource_id', validator=validate_grafanaresourceid) c.argument('enable_windows_recording_rules', action='store_true') + c.argument( + 'enable_control_plane_metrics', + options_list=['--enable-control-plane-metrics', '--enable-cp-metrics'], + action='store_true', + help=( + 'Enable collection of Azure Monitor managed Prometheus control plane metrics for managed ' + 'cluster components (kube-apiserver, etcd, etc). Requires Azure Monitor metrics to be enabled ' + '(already enabled or via --enable-azure-monitor-metrics). See aka.ms/aks/controlplane-metrics.' + ), + ) c.argument('enable_azure_monitor_app_monitoring', action='store_true') c.argument('node_public_ip_tags', arg_type=tags_type, validator=validate_node_public_ip_tags, help='space-separated tags: key[=value] [key[=value] ...].') @@ -812,6 +822,25 @@ def load_arguments(self, _): c.argument('grafana_resource_id', validator=validate_grafanaresourceid) c.argument('enable_windows_recording_rules', action='store_true') c.argument('disable_azure_monitor_metrics', action='store_true') + c.argument( + 'enable_control_plane_metrics', + options_list=['--enable-control-plane-metrics', '--enable-cp-metrics'], + action='store_true', + help=( + 'Enable collection of Azure Monitor managed Prometheus control plane metrics for managed ' + 'cluster components (kube-apiserver, etcd, etc). Requires Azure Monitor metrics to be enabled ' + '(already enabled or via --enable-azure-monitor-metrics). See aka.ms/aks/controlplane-metrics.' + ), + ) + c.argument( + 'disable_control_plane_metrics', + options_list=['--disable-control-plane-metrics', '--disable-cp-metrics'], + action='store_true', + help=( + 'Disable collection of Azure Monitor managed Prometheus control plane metrics. ' + 'Sets azureMonitorProfile.metrics.controlPlane.enabled=false on the cluster.' + ), + ) c.argument('enable_azure_monitor_app_monitoring', action='store_true') c.argument('disable_azure_monitor_app_monitoring', action='store_true') # azure container storage diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/azuremonitorprofile.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/azuremonitorprofile.py index 2c94f879b23..b77362ba835 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/azuremonitorprofile.py +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/azuremonitorprofile.py @@ -2,6 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. See License.txt in the project root for license information. # -------------------------------------------------------------------------------------------- +from azure.cli.command_modules.acs._client_factory import get_container_service_client from azure.cli.command_modules.acs.azuremonitormetrics.addonput import addon_put from azure.cli.command_modules.acs.azuremonitormetrics.amg.link import link_grafana_instance from azure.cli.command_modules.acs.azuremonitormetrics.amw.helper import get_azure_monitor_workspace_resource @@ -18,13 +19,51 @@ ) from azure.cli.command_modules.acs.azuremonitormetrics.recordingrules.create import create_rules from azure.cli.command_modules.acs.azuremonitormetrics.recordingrules.delete import delete_rules -from azure.cli.core.azclierror import InvalidArgumentValueError +from azure.cli.core.azclierror import CLIError, InvalidArgumentValueError, UnknownError from knack.log import get_logger logger = get_logger(__name__) +# pylint: disable=line-too-long +def _addon_put_with_control_plane(cmd, cluster_subscription, cluster_resource_group_name, cluster_name): + """Sibling of ``addon_put`` that ALSO flips ``metrics.controlPlane.enabled=True``. + + Used by the greenfield ``aks create --enable-control-plane-metrics`` path. The + initial cluster PUT intentionally leaves ``control_plane`` unset so the RP does + not schedule the control-plane-metrics collection (CCP) pod before the DCRA is + created in postprocessing. Once the DCRA exists, we issue this PUT so the CCP + pod is scheduled with its DCRA already in place (race-free). + """ + client = get_container_service_client(cmd.cli_ctx, cluster_subscription).managed_clusters + try: + mc = client.get(cluster_resource_group_name, cluster_name) + except CLIError as e: + raise UnknownError(e) + # Enable metrics if present and not already enabled (mirrors addon_put). + if hasattr(mc, "azure_monitor_profile") and mc.azure_monitor_profile: + if hasattr(mc.azure_monitor_profile, "metrics") and mc.azure_monitor_profile.metrics: + if getattr(mc.azure_monitor_profile.metrics, "enabled", None) is False: + mc.azure_monitor_profile.metrics.enabled = True + # Flip control plane now that DCRA exists. + try: + from azure.mgmt.containerservice.models import ( + ManagedClusterAzureMonitorProfileMetricsControlPlane, + ) + mc.azure_monitor_profile.metrics.control_plane = ( + ManagedClusterAzureMonitorProfileMetricsControlPlane(enabled=True) + ) + except ImportError: + # Fallback for SDK versions that don't expose the model directly: + # set a dict that the generated client will serialize as the property. + mc.azure_monitor_profile.metrics.control_plane = {"enabled": True} + try: + client.begin_create_or_update(cluster_resource_group_name, cluster_name, mc) + except Exception as e: + raise UnknownError(e) + + # pylint: disable=line-too-long def link_azure_monitor_profile_artifacts( cmd, @@ -49,7 +88,15 @@ def link_azure_monitor_profile_artifacts( create_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster_name, azure_monitor_workspace_resource_id, azure_monitor_workspace_location, raw_parameters) # if aks cluster create flow -> do a PUT on the AKS cluster to enable the addon if create_flow: - addon_put(cmd, cluster_subscription, cluster_resource_group_name, cluster_name) + # If --enable-control-plane-metrics was specified on create, flip + # metrics.controlPlane.enabled HERE (after DCRA creation) instead of on + # the initial cluster PUT. This avoids the CCP pod being scheduled before + # its DCRA exists (which would cause CrashLoopBackOff until reconciliation). + enable_cp = bool(raw_parameters and raw_parameters.get("enable_control_plane_metrics")) + if enable_cp: + _addon_put_with_control_plane(cmd, cluster_subscription, cluster_resource_group_name, cluster_name) + else: + addon_put(cmd, cluster_subscription, cluster_resource_group_name, cluster_name) # pylint: disable=line-too-long diff --git a/src/azure-cli/azure/cli/command_modules/acs/custom.py b/src/azure-cli/azure/cli/command_modules/acs/custom.py index a81309c2a21..7f0b5385c90 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/custom.py @@ -1008,6 +1008,7 @@ def aks_create( ksm_metric_annotations_allow_list=None, grafana_resource_id=None, enable_windows_recording_rules=False, + enable_control_plane_metrics=False, enable_azure_monitor_app_monitoring=False, # azure container storage enable_azure_container_storage=None, @@ -1209,6 +1210,8 @@ def aks_update( grafana_resource_id=None, enable_windows_recording_rules=False, disable_azure_monitor_metrics=False, + enable_control_plane_metrics=False, + disable_control_plane_metrics=False, enable_azure_monitor_app_monitoring=False, disable_azure_monitor_app_monitoring=False, # azure container storage diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index b2b8e1b124a..122661486b5 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -5956,6 +5956,91 @@ def get_disable_azure_monitor_metrics(self) -> bool: """ return self._get_disable_azure_monitor_metrics(enable_validation=True) + def _get_enable_control_plane_metrics(self, enable_validation: bool = False) -> bool: + """Internal function to obtain the value of enable_control_plane_metrics. + This function supports the option of enable_validation. When enabled, if both + enable_control_plane_metrics and disable_control_plane_metrics are specified, raise a + MutuallyExclusiveArgumentError. Additionally, --enable-control-plane-metrics requires + Azure Monitor metrics to either already be enabled on the cluster or to be enabled in + the same command via --enable-azure-monitor-metrics. + + :return: bool + """ + # Read the original value passed by the command. + enable_control_plane_metrics = self.raw_param.get("enable_control_plane_metrics") + # In create mode, try to read the property value corresponding to the parameter from the `mc` object. + if self.decorator_mode == DecoratorMode.CREATE: + if ( + self.mc and + self.mc.azure_monitor_profile and + self.mc.azure_monitor_profile.metrics and + self.mc.azure_monitor_profile.metrics.control_plane + ): + enable_control_plane_metrics = self.mc.azure_monitor_profile.metrics.control_plane.enabled + # This parameter does not need dynamic completion. + if enable_validation: + if enable_control_plane_metrics and self._get_disable_control_plane_metrics(False): + raise MutuallyExclusiveArgumentError( + "Cannot specify --enable-control-plane-metrics and --disable-control-plane-metrics " + "at the same time." + ) + if enable_control_plane_metrics: + # Reject combining enable-control-plane-metrics with disable-azure-monitor-metrics + # in the same command — the resulting payload would be inconsistent. + if self._get_disable_azure_monitor_metrics(False): + raise MutuallyExclusiveArgumentError( + "Cannot specify --enable-control-plane-metrics together with " + "--disable-azure-monitor-metrics." + ) + # Must have Azure Monitor metrics enabled (either already or in this command). + already_enabled = ( + self.mc and + self.mc.azure_monitor_profile and + self.mc.azure_monitor_profile.metrics and + self.mc.azure_monitor_profile.metrics.enabled + ) + enabling_now = self._get_enable_azure_monitor_metrics(False) + if not already_enabled and not enabling_now: + raise RequiredArgumentMissingError( + "--enable-control-plane-metrics requires Azure Monitor metrics to be enabled. " + "Specify --enable-azure-monitor-metrics or run on a cluster that already has " + "Azure Monitor metrics enabled." + ) + return enable_control_plane_metrics + + def get_enable_control_plane_metrics(self) -> bool: + """Obtain the value of enable_control_plane_metrics. + This function will verify the parameter by default. If both enable_control_plane_metrics and + disable_control_plane_metrics are specified, raise a MutuallyExclusiveArgumentError. + :return: bool + """ + return self._get_enable_control_plane_metrics(enable_validation=True) + + def _get_disable_control_plane_metrics(self, enable_validation: bool = False) -> bool: + """Internal function to obtain the value of disable_control_plane_metrics. + This function supports the option of enable_validation. When enabled, if both + enable_control_plane_metrics and disable_control_plane_metrics are specified, raise a + MutuallyExclusiveArgumentError. + :return: bool + """ + # Read the original value passed by the command. + disable_control_plane_metrics = self.raw_param.get("disable_control_plane_metrics") + if enable_validation: + if disable_control_plane_metrics and self._get_enable_control_plane_metrics(False): + raise MutuallyExclusiveArgumentError( + "Cannot specify --enable-control-plane-metrics and --disable-control-plane-metrics " + "at the same time." + ) + return disable_control_plane_metrics + + def get_disable_control_plane_metrics(self) -> bool: + """Obtain the value of disable_control_plane_metrics. + This function will verify the parameter by default. If both enable_control_plane_metrics and + disable_control_plane_metrics are specified, raise a MutuallyExclusiveArgumentError. + :return: bool + """ + return self._get_disable_control_plane_metrics(enable_validation=True) + def _get_enable_azure_monitor_app_monitoring(self, enable_validation: bool = False) -> bool: """Internal function to obtain the value of enable_azure_monitor_app_monitoring. This function supports the option of enable_validation. When enabled, if both @@ -7714,6 +7799,10 @@ def set_up_azure_monitor_profile(self, mc: ManagedCluster) -> ManagedCluster: ksm_metric_labels_allow_list = "" if ksm_metric_annotations_allow_list is None: ksm_metric_annotations_allow_list = "" + # Trigger control-plane-metrics validation even if the parent metrics flag was + # not specified, so users get a clear error instead of silent ignore when they + # pass --enable-control-plane-metrics on its own. + self.context.get_enable_control_plane_metrics() if self.context.get_enable_azure_monitor_metrics(): if mc.azure_monitor_profile is None: mc.azure_monitor_profile = self.models.ManagedClusterAzureMonitorProfile() @@ -7721,6 +7810,13 @@ def set_up_azure_monitor_profile(self, mc: ManagedCluster) -> ManagedCluster: mc.azure_monitor_profile.metrics.kube_state_metrics = self.models.ManagedClusterAzureMonitorProfileKubeStateMetrics( # pylint:disable=line-too-long metric_labels_allowlist=str(ksm_metric_labels_allow_list), metric_annotations_allow_list=str(ksm_metric_annotations_allow_list)) + # NOTE: control_plane.enabled is intentionally NOT set here on the create flow. + # If we set it on this initial PUT, the RP would schedule the control-plane-metrics + # collection pod (CCP) before the DCRA (Data Collection Rule Association) has been + # created in postprocessing. The CCP would then crash-loop with "DCRA not found" + # until the next reconciliation. Instead, we defer the flip to the addon_put step + # inside link_azure_monitor_profile_artifacts (postprocessing_after_mc_created), + # which runs *after* DCRA creation. # set intermediate self.context.set_intermediate("azuremonitormetrics_addon_enabled", True, overwrite_exists=True) if self.context.get_enable_azure_monitor_app_monitoring(): @@ -9810,6 +9906,30 @@ def update_azure_monitor_profile(self, mc: ManagedCluster) -> ManagedCluster: self.context.get_disable_azure_monitor_metrics(), False) + # Handle enable / disable of control plane metrics independently of the parent metrics flag, + # so users can toggle control plane metrics on a cluster that already has metrics enabled. + if self.context.get_enable_control_plane_metrics(): + if mc.azure_monitor_profile is None: + mc.azure_monitor_profile = self.models.ManagedClusterAzureMonitorProfile() + if mc.azure_monitor_profile.metrics is None: + # Should not normally happen — validation requires metrics to be enabled — but guard + # against partially-populated profiles to avoid AttributeError. + mc.azure_monitor_profile.metrics = ( + self.models.ManagedClusterAzureMonitorProfileMetrics(enabled=True) + ) + mc.azure_monitor_profile.metrics.control_plane = ( + self.models.ManagedClusterAzureMonitorProfileMetricsControlPlane(enabled=True) + ) + + if self.context.get_disable_control_plane_metrics(): + if ( + mc.azure_monitor_profile and + mc.azure_monitor_profile.metrics + ): + mc.azure_monitor_profile.metrics.control_plane = ( + self.models.ManagedClusterAzureMonitorProfileMetricsControlPlane(enabled=False) + ) + if self.context.get_enable_azure_monitor_app_monitoring(): if mc.azure_monitor_profile is None: mc.azure_monitor_profile = self.models.ManagedClusterAzureMonitorProfile() diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py index 7be8614c087..23354bced09 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py @@ -8331,6 +8331,139 @@ def test_aks_update_with_azuremonitormetrics(self, resource_group, resource_grou self.is_empty(), ]) + @live_only() + @AllowLargeResponse() + @AKSCustomResourceGroupPreparer(random_name_length=17, name_prefix='clitest', location='westus2') + def test_aks_create_with_control_plane_metrics(self, resource_group, resource_group_location): + # reset the count so in replay mode the random names will start with 0 + self.test_resources_count = 0 + aks_name = self.create_random_name('cliakstest', 16) + node_vm_size = 'standard_d2s_v3' + self.kwargs.update({ + 'resource_group': resource_group, + 'name': aks_name, + 'location': resource_group_location, + 'ssh_key_value': self.generate_ssh_keys(), + 'node_vm_size': node_vm_size, + }) + + # create: --enable-azure-monitor-metrics + --enable-control-plane-metrics + create_cmd = 'aks create --resource-group={resource_group} --name={name} --location={location} ' \ + '--ssh-key-value={ssh_key_value} --node-vm-size={node_vm_size} --enable-managed-identity ' \ + '--enable-azure-monitor-metrics --enable-control-plane-metrics --output=json' + self.cmd(create_cmd, checks=[ + self.check('provisioningState', 'Succeeded'), + self.check('azureMonitorProfile.metrics.enabled', True), + self.check('azureMonitorProfile.metrics.controlPlane.enabled', True), + ]) + + wait_cmd = 'aks wait --resource-group={resource_group} --name={name} --created ' \ + '--interval 60 --timeout 1800' + self.cmd(wait_cmd, checks=[self.is_empty()]) + + # delete + self.cmd('aks delete --resource-group={resource_group} --name={name} --yes --no-wait', + checks=[self.is_empty()]) + + @live_only() + @AllowLargeResponse() + @AKSCustomResourceGroupPreparer(random_name_length=17, name_prefix='clitest', location='westus2') + def test_aks_update_with_control_plane_metrics(self, resource_group, resource_group_location): + aks_name = self.create_random_name('cliakstest', 16) + node_vm_size = 'standard_d2s_v3' + self.kwargs.update({ + 'resource_group': resource_group, + 'name': aks_name, + 'location': resource_group_location, + 'ssh_key_value': self.generate_ssh_keys(), + 'node_vm_size': node_vm_size, + }) + + # create: with azure monitor metrics but without control plane metrics + create_cmd = 'aks create --resource-group={resource_group} --name={name} --location={location} ' \ + '--ssh-key-value={ssh_key_value} --node-vm-size={node_vm_size} --enable-managed-identity ' \ + '--enable-azure-monitor-metrics --output=json' + self.cmd(create_cmd, checks=[ + self.check('provisioningState', 'Succeeded'), + self.check('azureMonitorProfile.metrics.enabled', True), + ]) + + # wait for AMW background setup to complete before issuing update + wait_cmd = 'aks wait --resource-group={resource_group} --name={name} --updated --timeout=1800' + self.cmd(wait_cmd, checks=[self.is_empty()]) + + # update: enable-control-plane-metrics on a cluster that already has AM metrics + update_cmd = 'aks update --resource-group={resource_group} --name={name} --yes --output=json ' \ + '--enable-control-plane-metrics' + self.cmd(update_cmd, checks=[ + self.check('provisioningState', 'Succeeded'), + self.check('azureMonitorProfile.metrics.controlPlane.enabled', True), + ]) + + self.cmd(wait_cmd, checks=[self.is_empty()]) + + # update: disable-control-plane-metrics + update_cmd = 'aks update --resource-group={resource_group} --name={name} --yes --output=json ' \ + '--disable-control-plane-metrics' + self.cmd(update_cmd, checks=[ + self.check('provisioningState', 'Succeeded'), + self.check('azureMonitorProfile.metrics.controlPlane.enabled', False), + ]) + + self.cmd(wait_cmd, checks=[self.is_empty()]) + + # delete + self.cmd('aks delete --resource-group={resource_group} --name={name} --yes --no-wait', + checks=[self.is_empty()]) + + @live_only() + @AllowLargeResponse() + @AKSCustomResourceGroupPreparer(random_name_length=17, name_prefix='clitest', location='westus2') + def test_aks_control_plane_metrics_negative(self, resource_group, resource_group_location): + aks_name = self.create_random_name('cliakstest', 16) + node_vm_size = 'standard_d2s_v3' + self.kwargs.update({ + 'resource_group': resource_group, + 'name': aks_name, + 'location': resource_group_location, + 'ssh_key_value': self.generate_ssh_keys(), + 'node_vm_size': node_vm_size, + }) + + # negative: --enable-control-plane-metrics without --enable-azure-monitor-metrics on create + create_missing_parent = 'aks create --resource-group={resource_group} --name={name} --location={location} ' \ + '--ssh-key-value={ssh_key_value} --node-vm-size={node_vm_size} --enable-managed-identity ' \ + '--enable-control-plane-metrics --output=json' + self.cmd(create_missing_parent, expect_failure=True) + + # create a baseline cluster (no AM metrics) so we can exercise the update-time negatives + create_cmd = 'aks create --resource-group={resource_group} --name={name} --location={location} ' \ + '--ssh-key-value={ssh_key_value} --node-vm-size={node_vm_size} --enable-managed-identity ' \ + '--output=json' + self.cmd(create_cmd, checks=[ + self.check('provisioningState', 'Succeeded'), + self.not_exists('azureMonitorProfile.metrics'), + ]) + + # negative: update --enable-control-plane-metrics on a cluster without AM metrics enabled + update_missing_parent = 'aks update --resource-group={resource_group} --name={name} --yes --output=json ' \ + '--enable-control-plane-metrics' + self.cmd(update_missing_parent, expect_failure=True) + + # negative: --enable-control-plane-metrics with --disable-azure-monitor-metrics on update + update_conflicting = 'aks update --resource-group={resource_group} --name={name} --yes --output=json ' \ + '--enable-azure-monitor-metrics --enable-control-plane-metrics --disable-azure-monitor-metrics' + self.cmd(update_conflicting, expect_failure=True) + + # negative: both --enable-control-plane-metrics and --disable-control-plane-metrics on update + update_both = 'aks update --resource-group={resource_group} --name={name} --yes --output=json ' \ + '--enable-azure-monitor-metrics --enable-control-plane-metrics --disable-control-plane-metrics' + self.cmd(update_both, expect_failure=True) + + # delete + self.cmd('aks delete --resource-group={resource_group} --name={name} --yes --no-wait', + checks=[self.is_empty()]) + # live only due to dependency `_add_role_assignment` is not mocked @live_only() @AllowLargeResponse() diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index 0ad0ca1be32..211269efdb9 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -9042,6 +9042,56 @@ def test_set_up_azure_monitor_profile_app_monitoring_with_existing_metrics(self) self.assertIsNotNone(dec_mc_1.azure_monitor_profile.app_monitoring.auto_instrumentation) self.assertTrue(dec_mc_1.azure_monitor_profile.app_monitoring.auto_instrumentation.enabled) + def test_set_up_azure_monitor_profile_defers_control_plane_on_create(self): + # Greenfield --enable-control-plane-metrics must NOT set control_plane.enabled + # on the initial cluster PUT. It is deferred to the addon_put step in + # postprocessing (after DCRA creation) so the CCP collector pod is only + # scheduled once its DCRA exists. + dec = AKSManagedClusterCreateDecorator( + self.cmd, + self.client, + { + "enable_azure_monitor_metrics": True, + "enable_control_plane_metrics": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc = self.models.ManagedCluster( + location="test_location", + identity=self.models.ManagedClusterIdentity(type="SystemAssigned"), + ) + dec.context.attach_mc(mc) + dec_mc = dec.set_up_azure_monitor_profile(mc) + + # Parent AMP metrics is enabled on the initial PUT... + self.assertIsNotNone(dec_mc.azure_monitor_profile) + self.assertIsNotNone(dec_mc.azure_monitor_profile.metrics) + # The initial PUT sets enabled=False; addon_put flips it to True after prerequisites. + self.assertFalse(dec_mc.azure_monitor_profile.metrics.enabled) + # ...but control_plane is deferred and must be None here. + self.assertIsNone(dec_mc.azure_monitor_profile.metrics.control_plane) + # Intermediate flag still set so postprocessing runs the prereqs/addon_put. + self.assertTrue(dec.context.get_intermediate("azuremonitormetrics_addon_enabled")) + + def test_set_up_azure_monitor_profile_create_cp_without_amp_raises(self): + # --enable-control-plane-metrics without --enable-azure-monitor-metrics on create + # must fail validation early. + dec = AKSManagedClusterCreateDecorator( + self.cmd, + self.client, + { + "enable_control_plane_metrics": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc = self.models.ManagedCluster( + location="test_location", + identity=self.models.ManagedClusterIdentity(type="SystemAssigned"), + ) + dec.context.attach_mc(mc) + with self.assertRaises(RequiredArgumentMissingError): + dec.set_up_azure_monitor_profile(mc) + def test_set_up_azure_service_mesh(self): dec_1 = AKSManagedClusterCreateDecorator( self.cmd, @@ -16961,6 +17011,134 @@ def test_update_azure_monitor_profile_disable_app_monitoring_no_existing_app_mon self.assertIsNotNone(dec_mc_1.azure_monitor_profile.app_monitoring.auto_instrumentation) self.assertFalse(dec_mc_1.azure_monitor_profile.app_monitoring.auto_instrumentation.enabled) + def test_update_enable_control_plane_metrics_requires_parent_metrics(self): + # Update path: --enable-control-plane-metrics on a cluster that has neither + # Azure Monitor metrics already enabled nor --enable-azure-monitor-metrics in + # the same command must raise RequiredArgumentMissingError. + dec = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_control_plane_metrics": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc = self.models.ManagedCluster( + location="test_location", + identity=self.models.ManagedClusterIdentity(type="SystemAssigned"), + ) + dec.context.attach_mc(mc) + + with self.assertRaises(RequiredArgumentMissingError): + dec.context.get_enable_control_plane_metrics() + + def test_update_enable_control_plane_metrics_already_enabled_cluster_succeeds(self): + # Update path: --enable-control-plane-metrics on a cluster that already has + # Azure Monitor metrics enabled should succeed without requiring + # --enable-azure-monitor-metrics. + dec = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_control_plane_metrics": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc = self.models.ManagedCluster( + location="test_location", + identity=self.models.ManagedClusterIdentity(type="SystemAssigned"), + azure_monitor_profile=self.models.ManagedClusterAzureMonitorProfile( + metrics=self.models.ManagedClusterAzureMonitorProfileMetrics(enabled=True) + ), + ) + dec.context.attach_mc(mc) + + self.assertTrue(dec.context.get_enable_control_plane_metrics()) + + dec_mc = dec.update_azure_monitor_profile(mc) + + self.assertIsNotNone(dec_mc.azure_monitor_profile.metrics.control_plane) + self.assertTrue(dec_mc.azure_monitor_profile.metrics.control_plane.enabled) + + def test_update_enable_control_plane_metrics_with_disable_metrics_raises(self): + # Update path: --enable-control-plane-metrics combined with + # --disable-azure-monitor-metrics in the same command must be rejected to + # avoid producing an inconsistent payload. + dec = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_control_plane_metrics": True, + "disable_azure_monitor_metrics": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc = self.models.ManagedCluster( + location="test_location", + identity=self.models.ManagedClusterIdentity(type="SystemAssigned"), + azure_monitor_profile=self.models.ManagedClusterAzureMonitorProfile( + metrics=self.models.ManagedClusterAzureMonitorProfileMetrics(enabled=True) + ), + ) + dec.context.attach_mc(mc) + + with self.assertRaises(MutuallyExclusiveArgumentError): + dec.context.get_enable_control_plane_metrics() + + def test_update_enable_control_plane_metrics_with_disable_control_plane_raises(self): + # --enable-control-plane-metrics together with --disable-control-plane-metrics + # in the same command must raise MutuallyExclusiveArgumentError. + dec = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_control_plane_metrics": True, + "disable_control_plane_metrics": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc = self.models.ManagedCluster( + location="test_location", + identity=self.models.ManagedClusterIdentity(type="SystemAssigned"), + azure_monitor_profile=self.models.ManagedClusterAzureMonitorProfile( + metrics=self.models.ManagedClusterAzureMonitorProfileMetrics(enabled=True) + ), + ) + dec.context.attach_mc(mc) + + with self.assertRaises(MutuallyExclusiveArgumentError): + dec.context.get_enable_control_plane_metrics() + + def test_update_disable_control_plane_metrics_sets_enabled_false(self): + # --disable-control-plane-metrics on a cluster that has it enabled should + # produce a payload with control_plane.enabled=False. + dec = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "disable_control_plane_metrics": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc = self.models.ManagedCluster( + location="test_location", + identity=self.models.ManagedClusterIdentity(type="SystemAssigned"), + azure_monitor_profile=self.models.ManagedClusterAzureMonitorProfile( + metrics=self.models.ManagedClusterAzureMonitorProfileMetrics( + enabled=True, + control_plane=self.models.ManagedClusterAzureMonitorProfileMetricsControlPlane( + enabled=True + ), + ) + ), + ) + dec.context.attach_mc(mc) + + dec_mc = dec.update_azure_monitor_profile(mc) + + self.assertIsNotNone(dec_mc.azure_monitor_profile.metrics.control_plane) + self.assertFalse(dec_mc.azure_monitor_profile.metrics.control_plane.enabled) + if __name__ == "__main__": unittest.main() From 56ba118c87212d1b9057b37b345d82edea6b9788 Mon Sep 17 00:00:00 2001 From: "Kaveesh Dubey (from Dev Box)" Date: Thu, 11 Jun 2026 16:52:07 -0700 Subject: [PATCH 2/6] Address Copilot review feedback - Wait on the LRO in _addon_put_with_control_plane via poller.result(). This is the only place controlPlane.enabled is set during the greenfield create flow, so the CP flip must be durably persisted before the create command returns. Without the wait, callers and tests that read the cluster immediately could observe the pre-flip state. (The sibling addon_put intentionally remains fire-and-forget because metrics.enabled was already persisted on the initial cluster PUT.) - Replace raise UnknownError(e) with raise UnknownError(str(e)) from e so the message is readable and the original traceback is preserved. - Coerce _get_enable_control_plane_metrics / _get_disable_control_plane_metrics return values to bool() to match the declared -> bool return type when the parameter dict omits the key. - Make the live test_aks_create_with_control_plane_metrics assertion robust: the controlPlane.enabled check is moved out of the immediate create response into an explicit aks show after aks wait, since the flip is intentionally deferred to post-DCRA postprocessing. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../acs/azuremonitormetrics/azuremonitorprofile.py | 13 ++++++++++--- .../acs/managed_cluster_decorator.py | 4 ++-- .../acs/tests/latest/test_aks_commands.py | 14 +++++++++++++- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/azuremonitorprofile.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/azuremonitorprofile.py index b77362ba835..b218b9eb33e 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/azuremonitorprofile.py +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/azuremonitorprofile.py @@ -40,7 +40,7 @@ def _addon_put_with_control_plane(cmd, cluster_subscription, cluster_resource_gr try: mc = client.get(cluster_resource_group_name, cluster_name) except CLIError as e: - raise UnknownError(e) + raise UnknownError(str(e)) from e # Enable metrics if present and not already enabled (mirrors addon_put). if hasattr(mc, "azure_monitor_profile") and mc.azure_monitor_profile: if hasattr(mc.azure_monitor_profile, "metrics") and mc.azure_monitor_profile.metrics: @@ -58,10 +58,17 @@ def _addon_put_with_control_plane(cmd, cluster_subscription, cluster_resource_gr # Fallback for SDK versions that don't expose the model directly: # set a dict that the generated client will serialize as the property. mc.azure_monitor_profile.metrics.control_plane = {"enabled": True} + # Unlike the sibling ``addon_put`` (where ``metrics.enabled`` is already true on the + # cluster from the initial PUT and the postprocessing PUT is just a safety re-affirm), + # this is the ONLY place where ``controlPlane.enabled`` is set during ``aks create``. + # Wait for the LRO so the CP flip is durably persisted before the create command + # returns; otherwise callers / tests that read the cluster immediately could see the + # pre-flip state. try: - client.begin_create_or_update(cluster_resource_group_name, cluster_name, mc) + poller = client.begin_create_or_update(cluster_resource_group_name, cluster_name, mc) + poller.result() except Exception as e: - raise UnknownError(e) + raise UnknownError(str(e)) from e # pylint: disable=line-too-long diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index 122661486b5..8965762f63e 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -6006,7 +6006,7 @@ def _get_enable_control_plane_metrics(self, enable_validation: bool = False) -> "Specify --enable-azure-monitor-metrics or run on a cluster that already has " "Azure Monitor metrics enabled." ) - return enable_control_plane_metrics + return bool(enable_control_plane_metrics) def get_enable_control_plane_metrics(self) -> bool: """Obtain the value of enable_control_plane_metrics. @@ -6031,7 +6031,7 @@ def _get_disable_control_plane_metrics(self, enable_validation: bool = False) -> "Cannot specify --enable-control-plane-metrics and --disable-control-plane-metrics " "at the same time." ) - return disable_control_plane_metrics + return bool(disable_control_plane_metrics) def get_disable_control_plane_metrics(self) -> bool: """Obtain the value of disable_control_plane_metrics. diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py index 23354bced09..160e4c954d9 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py @@ -8351,16 +8351,28 @@ def test_aks_create_with_control_plane_metrics(self, resource_group, resource_gr create_cmd = 'aks create --resource-group={resource_group} --name={name} --location={location} ' \ '--ssh-key-value={ssh_key_value} --node-vm-size={node_vm_size} --enable-managed-identity ' \ '--enable-azure-monitor-metrics --enable-control-plane-metrics --output=json' + # NOTE: ``--enable-control-plane-metrics`` on create is intentionally deferred to a + # postprocessing PUT (after DCRA creation) to avoid scheduling the CCP pod before its + # DCRA exists. The create response may therefore reflect the pre-flip state; assert + # the final state via ``aks show`` after the cluster settles. self.cmd(create_cmd, checks=[ self.check('provisioningState', 'Succeeded'), self.check('azureMonitorProfile.metrics.enabled', True), - self.check('azureMonitorProfile.metrics.controlPlane.enabled', True), ]) wait_cmd = 'aks wait --resource-group={resource_group} --name={name} --created ' \ '--interval 60 --timeout 1800' self.cmd(wait_cmd, checks=[self.is_empty()]) + # Verify the deferred control-plane-metrics flip landed on the cluster. + self.cmd( + 'aks show --resource-group={resource_group} --name={name} --output=json', + checks=[ + self.check('azureMonitorProfile.metrics.enabled', True), + self.check('azureMonitorProfile.metrics.controlPlane.enabled', True), + ], + ) + # delete self.cmd('aks delete --resource-group={resource_group} --name={name} --yes --no-wait', checks=[self.is_empty()]) From 591106861d7e28d5132a2ace7ba47c0094e744f3 Mon Sep 17 00:00:00 2001 From: "Kaveesh Dubey (from Dev Box)" Date: Fri, 12 Jun 2026 08:28:13 -0700 Subject: [PATCH 3/6] Separate control-plane-metrics validation from the getters Per FumingZhang review feedback on PR #33537: calling get_enable_control_plane_metrics() purely to trigger validation and discarding the return value is a confusing pattern. Extract the validation block into a new private _validate_control_plane_metrics_params method, expose a public validate_control_plane_metrics_params, and have the getters delegate to it when enable_validation=True (preserves existing API). The set_up_azure_monitor_profile call site now calls the validator directly instead of discarding a getter result. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../acs/managed_cluster_decorator.py | 117 +++++++++++------- 1 file changed, 72 insertions(+), 45 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index 8965762f63e..e724303d1ba 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -5956,13 +5956,70 @@ def get_disable_azure_monitor_metrics(self) -> bool: """ return self._get_disable_azure_monitor_metrics(enable_validation=True) + def _validate_control_plane_metrics_params(self) -> None: + """Validate the --enable/--disable-control-plane-metrics flag combo and + its interaction with --enable/--disable-azure-monitor-metrics. + + Raises MutuallyExclusiveArgumentError or RequiredArgumentMissingError on + an invalid combination. Returns nothing — use this when you want to + surface validation errors without consuming a parameter value. + + Reads raw_param directly to avoid recursing back into the getters that + themselves delegate here when enable_validation is True. + """ + enable_cp = self.raw_param.get("enable_control_plane_metrics") + disable_cp = self.raw_param.get("disable_control_plane_metrics") + # On create, the property may already be set on the incoming mc object. + if self.decorator_mode == DecoratorMode.CREATE: + if ( + self.mc and + self.mc.azure_monitor_profile and + self.mc.azure_monitor_profile.metrics and + self.mc.azure_monitor_profile.metrics.control_plane + ): + enable_cp = self.mc.azure_monitor_profile.metrics.control_plane.enabled + + if enable_cp and disable_cp: + raise MutuallyExclusiveArgumentError( + "Cannot specify --enable-control-plane-metrics and --disable-control-plane-metrics " + "at the same time." + ) + + if enable_cp: + # Reject combining enable-control-plane-metrics with disable-azure-monitor-metrics + # in the same command — the resulting payload would be inconsistent. + if self._get_disable_azure_monitor_metrics(False): + raise MutuallyExclusiveArgumentError( + "Cannot specify --enable-control-plane-metrics together with " + "--disable-azure-monitor-metrics." + ) + # Must have Azure Monitor metrics enabled (either already or in this command). + already_enabled = ( + self.mc and + self.mc.azure_monitor_profile and + self.mc.azure_monitor_profile.metrics and + self.mc.azure_monitor_profile.metrics.enabled + ) + enabling_now = self._get_enable_azure_monitor_metrics(False) + if not already_enabled and not enabling_now: + raise RequiredArgumentMissingError( + "--enable-control-plane-metrics requires Azure Monitor metrics to be enabled. " + "Specify --enable-azure-monitor-metrics or run on a cluster that already has " + "Azure Monitor metrics enabled." + ) + + def validate_control_plane_metrics_params(self) -> None: + """Public entry-point for validating the control-plane-metrics flag combo + without consuming a parameter value. Useful in code paths that need to + surface validation errors even when the parent --enable-azure-monitor-metrics + flag was not passed. + """ + self._validate_control_plane_metrics_params() + def _get_enable_control_plane_metrics(self, enable_validation: bool = False) -> bool: """Internal function to obtain the value of enable_control_plane_metrics. - This function supports the option of enable_validation. When enabled, if both - enable_control_plane_metrics and disable_control_plane_metrics are specified, raise a - MutuallyExclusiveArgumentError. Additionally, --enable-control-plane-metrics requires - Azure Monitor metrics to either already be enabled on the cluster or to be enabled in - the same command via --enable-azure-monitor-metrics. + When enable_validation is True, the flag combinations are validated via + _validate_control_plane_metrics_params before the value is returned. :return: bool """ @@ -5977,35 +6034,8 @@ def _get_enable_control_plane_metrics(self, enable_validation: bool = False) -> self.mc.azure_monitor_profile.metrics.control_plane ): enable_control_plane_metrics = self.mc.azure_monitor_profile.metrics.control_plane.enabled - # This parameter does not need dynamic completion. if enable_validation: - if enable_control_plane_metrics and self._get_disable_control_plane_metrics(False): - raise MutuallyExclusiveArgumentError( - "Cannot specify --enable-control-plane-metrics and --disable-control-plane-metrics " - "at the same time." - ) - if enable_control_plane_metrics: - # Reject combining enable-control-plane-metrics with disable-azure-monitor-metrics - # in the same command — the resulting payload would be inconsistent. - if self._get_disable_azure_monitor_metrics(False): - raise MutuallyExclusiveArgumentError( - "Cannot specify --enable-control-plane-metrics together with " - "--disable-azure-monitor-metrics." - ) - # Must have Azure Monitor metrics enabled (either already or in this command). - already_enabled = ( - self.mc and - self.mc.azure_monitor_profile and - self.mc.azure_monitor_profile.metrics and - self.mc.azure_monitor_profile.metrics.enabled - ) - enabling_now = self._get_enable_azure_monitor_metrics(False) - if not already_enabled and not enabling_now: - raise RequiredArgumentMissingError( - "--enable-control-plane-metrics requires Azure Monitor metrics to be enabled. " - "Specify --enable-azure-monitor-metrics or run on a cluster that already has " - "Azure Monitor metrics enabled." - ) + self._validate_control_plane_metrics_params() return bool(enable_control_plane_metrics) def get_enable_control_plane_metrics(self) -> bool: @@ -6018,19 +6048,15 @@ def get_enable_control_plane_metrics(self) -> bool: def _get_disable_control_plane_metrics(self, enable_validation: bool = False) -> bool: """Internal function to obtain the value of disable_control_plane_metrics. - This function supports the option of enable_validation. When enabled, if both - enable_control_plane_metrics and disable_control_plane_metrics are specified, raise a - MutuallyExclusiveArgumentError. + When enable_validation is True, the flag combinations are validated via + _validate_control_plane_metrics_params before the value is returned. + :return: bool """ # Read the original value passed by the command. disable_control_plane_metrics = self.raw_param.get("disable_control_plane_metrics") if enable_validation: - if disable_control_plane_metrics and self._get_enable_control_plane_metrics(False): - raise MutuallyExclusiveArgumentError( - "Cannot specify --enable-control-plane-metrics and --disable-control-plane-metrics " - "at the same time." - ) + self._validate_control_plane_metrics_params() return bool(disable_control_plane_metrics) def get_disable_control_plane_metrics(self) -> bool: @@ -7799,10 +7825,11 @@ def set_up_azure_monitor_profile(self, mc: ManagedCluster) -> ManagedCluster: ksm_metric_labels_allow_list = "" if ksm_metric_annotations_allow_list is None: ksm_metric_annotations_allow_list = "" - # Trigger control-plane-metrics validation even if the parent metrics flag was - # not specified, so users get a clear error instead of silent ignore when they - # pass --enable-control-plane-metrics on its own. - self.context.get_enable_control_plane_metrics() + # Surface control-plane-metrics flag combination errors even when the + # parent metrics flag was not specified, so users get a clear error + # instead of a silent ignore when they pass --enable-control-plane-metrics + # on its own. + self.context.validate_control_plane_metrics_params() if self.context.get_enable_azure_monitor_metrics(): if mc.azure_monitor_profile is None: mc.azure_monitor_profile = self.models.ManagedClusterAzureMonitorProfile() From 138467b962b4d3068ae39cf906076249e93403b0 Mon Sep 17 00:00:00 2001 From: "Kaveesh Dubey (from Dev Box)" Date: Fri, 12 Jun 2026 08:31:56 -0700 Subject: [PATCH 4/6] Drop redundant private validator wrapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Other validators in this file (e.g. validate_byo_hosted_system_subnets) are a single public def validate_xxx(self) -> None — no private companion. Collapse the extra _validate_control_plane_metrics_params indirection so the new validator matches the file's convention. Tests + behavior unchanged. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../acs/managed_cluster_decorator.py | 22 ++++++------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index e724303d1ba..518c735df8e 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -5956,7 +5956,7 @@ def get_disable_azure_monitor_metrics(self) -> bool: """ return self._get_disable_azure_monitor_metrics(enable_validation=True) - def _validate_control_plane_metrics_params(self) -> None: + def validate_control_plane_metrics_params(self) -> None: """Validate the --enable/--disable-control-plane-metrics flag combo and its interaction with --enable/--disable-azure-monitor-metrics. @@ -5964,8 +5964,8 @@ def _validate_control_plane_metrics_params(self) -> None: an invalid combination. Returns nothing — use this when you want to surface validation errors without consuming a parameter value. - Reads raw_param directly to avoid recursing back into the getters that - themselves delegate here when enable_validation is True. + Reads raw_param directly so the getters can also delegate here from + their enable_validation=True path without recursing. """ enable_cp = self.raw_param.get("enable_control_plane_metrics") disable_cp = self.raw_param.get("disable_control_plane_metrics") @@ -6008,18 +6008,10 @@ def _validate_control_plane_metrics_params(self) -> None: "Azure Monitor metrics enabled." ) - def validate_control_plane_metrics_params(self) -> None: - """Public entry-point for validating the control-plane-metrics flag combo - without consuming a parameter value. Useful in code paths that need to - surface validation errors even when the parent --enable-azure-monitor-metrics - flag was not passed. - """ - self._validate_control_plane_metrics_params() - def _get_enable_control_plane_metrics(self, enable_validation: bool = False) -> bool: """Internal function to obtain the value of enable_control_plane_metrics. When enable_validation is True, the flag combinations are validated via - _validate_control_plane_metrics_params before the value is returned. + validate_control_plane_metrics_params before the value is returned. :return: bool """ @@ -6035,7 +6027,7 @@ def _get_enable_control_plane_metrics(self, enable_validation: bool = False) -> ): enable_control_plane_metrics = self.mc.azure_monitor_profile.metrics.control_plane.enabled if enable_validation: - self._validate_control_plane_metrics_params() + self.validate_control_plane_metrics_params() return bool(enable_control_plane_metrics) def get_enable_control_plane_metrics(self) -> bool: @@ -6049,14 +6041,14 @@ def get_enable_control_plane_metrics(self) -> bool: def _get_disable_control_plane_metrics(self, enable_validation: bool = False) -> bool: """Internal function to obtain the value of disable_control_plane_metrics. When enable_validation is True, the flag combinations are validated via - _validate_control_plane_metrics_params before the value is returned. + validate_control_plane_metrics_params before the value is returned. :return: bool """ # Read the original value passed by the command. disable_control_plane_metrics = self.raw_param.get("disable_control_plane_metrics") if enable_validation: - self._validate_control_plane_metrics_params() + self.validate_control_plane_metrics_params() return bool(disable_control_plane_metrics) def get_disable_control_plane_metrics(self) -> bool: From a3455563e3a17edb0c56eaa7880739abdfa6836b Mon Sep 17 00:00:00 2001 From: "Kaveesh Dubey (from Dev Box)" Date: Fri, 12 Jun 2026 08:44:48 -0700 Subject: [PATCH 5/6] Drop made-up aka.ms link from control-plane-metrics help The aka.ms/aks/controlplane-metrics shortlink does not resolve. Drop the trailing reference from the four help strings (create + update, both _help.py and _params.py). The remaining help text already explains the flag and its prerequisite, matching the sibling --enable-azure-monitor-metrics line which has no docs URL. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/azure-cli/azure/cli/command_modules/acs/_help.py | 6 +++--- src/azure-cli/azure/cli/command_modules/acs/_params.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/_help.py b/src/azure-cli/azure/cli/command_modules/acs/_help.py index d3775fb1436..7f2da970141 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_help.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_help.py @@ -560,7 +560,7 @@ short-summary: Enable Windows Recording Rules when enabling the Azure Monitor Metrics addon - name: --enable-control-plane-metrics --enable-cp-metrics type: bool - short-summary: Enable collection of Azure Monitor managed Prometheus control plane metrics for managed cluster components (kube-apiserver, etcd, etc). Requires Azure Monitor metrics to be enabled (already enabled or via --enable-azure-monitor-metrics). See aka.ms/aks/controlplane-metrics. + short-summary: Enable collection of Azure Monitor managed Prometheus control plane metrics for managed cluster components (kube-apiserver, etcd, etc). Requires Azure Monitor metrics to be enabled (already enabled or via --enable-azure-monitor-metrics). - name: --enable-azure-monitor-app-monitoring type: bool short-summary: Enable Azure Monitor Application Monitoring auto-instrumentation for a Kubernetes cluster. @@ -1107,10 +1107,10 @@ short-summary: Disable Azure Monitor Metrics Profile. This will delete all DCRA's associated with the cluster, any linked DCRs with the data stream = prometheus-stream and the recording rule groups created by the addon for this AKS cluster. - name: --enable-control-plane-metrics --enable-cp-metrics type: bool - short-summary: Enable collection of Azure Monitor managed Prometheus control plane metrics for managed cluster components (kube-apiserver, etcd, etc). Requires Azure Monitor metrics to be enabled (already enabled or via --enable-azure-monitor-metrics). See aka.ms/aks/controlplane-metrics. + short-summary: Enable collection of Azure Monitor managed Prometheus control plane metrics for managed cluster components (kube-apiserver, etcd, etc). Requires Azure Monitor metrics to be enabled (already enabled or via --enable-azure-monitor-metrics). - name: --disable-control-plane-metrics --disable-cp-metrics type: bool - short-summary: Disable collection of Azure Monitor managed Prometheus control plane metrics. Leaves Azure Monitor metrics enabled. See aka.ms/aks/controlplane-metrics. + short-summary: Disable collection of Azure Monitor managed Prometheus control plane metrics. Leaves Azure Monitor metrics enabled. - name: --enable-azure-monitor-app-monitoring type: bool short-summary: Enable Azure Monitor Application Monitoring auto-instrumentation for a Kubernetes cluster. diff --git a/src/azure-cli/azure/cli/command_modules/acs/_params.py b/src/azure-cli/azure/cli/command_modules/acs/_params.py index d516acd18e9..8203de72e68 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_params.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_params.py @@ -580,7 +580,7 @@ def load_arguments(self, _): help=( 'Enable collection of Azure Monitor managed Prometheus control plane metrics for managed ' 'cluster components (kube-apiserver, etcd, etc). Requires Azure Monitor metrics to be enabled ' - '(already enabled or via --enable-azure-monitor-metrics). See aka.ms/aks/controlplane-metrics.' + '(already enabled or via --enable-azure-monitor-metrics).' ), ) c.argument('enable_azure_monitor_app_monitoring', action='store_true') @@ -829,7 +829,7 @@ def load_arguments(self, _): help=( 'Enable collection of Azure Monitor managed Prometheus control plane metrics for managed ' 'cluster components (kube-apiserver, etcd, etc). Requires Azure Monitor metrics to be enabled ' - '(already enabled or via --enable-azure-monitor-metrics). See aka.ms/aks/controlplane-metrics.' + '(already enabled or via --enable-azure-monitor-metrics).' ), ) c.argument( From e18c65d85a3f8695b96294419c637c0858a8dcef Mon Sep 17 00:00:00 2001 From: "Kaveesh Dubey (from Dev Box)" Date: Fri, 12 Jun 2026 08:47:45 -0700 Subject: [PATCH 6/6] Use real scrape-job names in control-plane-metrics help Replace 'kube-apiserver, etcd, etc' with the actual default Prometheus scrape job names: controlplane-apiserver and controlplane-etcd. These are the targets users see in AMW and what the AKS docs reference. The 'etc' was also misleading since scheduler / controller-manager / NAP targets are opt-in via MinimalIngestionProfile and are not flipped on by this flag. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/azure-cli/azure/cli/command_modules/acs/_help.py | 4 ++-- src/azure-cli/azure/cli/command_modules/acs/_params.py | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/azure-cli/azure/cli/command_modules/acs/_help.py b/src/azure-cli/azure/cli/command_modules/acs/_help.py index 7f2da970141..933449564ca 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_help.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_help.py @@ -560,7 +560,7 @@ short-summary: Enable Windows Recording Rules when enabling the Azure Monitor Metrics addon - name: --enable-control-plane-metrics --enable-cp-metrics type: bool - short-summary: Enable collection of Azure Monitor managed Prometheus control plane metrics for managed cluster components (kube-apiserver, etcd, etc). Requires Azure Monitor metrics to be enabled (already enabled or via --enable-azure-monitor-metrics). + short-summary: Enable collection of Azure Monitor managed Prometheus control plane metrics for managed cluster components (controlplane-apiserver and controlplane-etcd targets by default). Requires Azure Monitor metrics to be enabled (already enabled or via --enable-azure-monitor-metrics). - name: --enable-azure-monitor-app-monitoring type: bool short-summary: Enable Azure Monitor Application Monitoring auto-instrumentation for a Kubernetes cluster. @@ -1107,7 +1107,7 @@ short-summary: Disable Azure Monitor Metrics Profile. This will delete all DCRA's associated with the cluster, any linked DCRs with the data stream = prometheus-stream and the recording rule groups created by the addon for this AKS cluster. - name: --enable-control-plane-metrics --enable-cp-metrics type: bool - short-summary: Enable collection of Azure Monitor managed Prometheus control plane metrics for managed cluster components (kube-apiserver, etcd, etc). Requires Azure Monitor metrics to be enabled (already enabled or via --enable-azure-monitor-metrics). + short-summary: Enable collection of Azure Monitor managed Prometheus control plane metrics for managed cluster components (controlplane-apiserver and controlplane-etcd targets by default). Requires Azure Monitor metrics to be enabled (already enabled or via --enable-azure-monitor-metrics). - name: --disable-control-plane-metrics --disable-cp-metrics type: bool short-summary: Disable collection of Azure Monitor managed Prometheus control plane metrics. Leaves Azure Monitor metrics enabled. diff --git a/src/azure-cli/azure/cli/command_modules/acs/_params.py b/src/azure-cli/azure/cli/command_modules/acs/_params.py index 8203de72e68..3b9c48a5e88 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_params.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_params.py @@ -579,7 +579,8 @@ def load_arguments(self, _): action='store_true', help=( 'Enable collection of Azure Monitor managed Prometheus control plane metrics for managed ' - 'cluster components (kube-apiserver, etcd, etc). Requires Azure Monitor metrics to be enabled ' + 'cluster components (controlplane-apiserver and controlplane-etcd targets by default). ' + 'Requires Azure Monitor metrics to be enabled ' '(already enabled or via --enable-azure-monitor-metrics).' ), ) @@ -828,7 +829,8 @@ def load_arguments(self, _): action='store_true', help=( 'Enable collection of Azure Monitor managed Prometheus control plane metrics for managed ' - 'cluster components (kube-apiserver, etcd, etc). Requires Azure Monitor metrics to be enabled ' + 'cluster components (controlplane-apiserver and controlplane-etcd targets by default). ' + 'Requires Azure Monitor metrics to be enabled ' '(already enabled or via --enable-azure-monitor-metrics).' ), )