diff --git a/src/azure-cli/azure/cli/command_modules/acs/_help.py b/src/azure-cli/azure/cli/command_modules/acs/_help.py index 191345789b1..933449564ca 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_help.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_help.py @@ -558,6 +558,9 @@ - name: --enable-windows-recording-rules type: bool short-summary: Enable Windows Recording Rules when enabling the Azure Monitor Metrics addon + - name: --enable-control-plane-metrics --enable-cp-metrics + type: bool + short-summary: Enable collection of Azure Monitor managed Prometheus control plane metrics for managed cluster components (controlplane-apiserver and controlplane-etcd targets by default). Requires Azure Monitor metrics to be enabled (already enabled or via --enable-azure-monitor-metrics). - name: --enable-azure-monitor-app-monitoring type: bool short-summary: Enable Azure Monitor Application Monitoring auto-instrumentation for a Kubernetes cluster. @@ -1102,6 +1105,12 @@ - name: --disable-azure-monitor-metrics type: bool short-summary: Disable Azure Monitor Metrics Profile. This will delete all DCRA's associated with the cluster, any linked DCRs with the data stream = prometheus-stream and the recording rule groups created by the addon for this AKS cluster. + - name: --enable-control-plane-metrics --enable-cp-metrics + type: bool + short-summary: Enable collection of Azure Monitor managed Prometheus control plane metrics for managed cluster components (controlplane-apiserver and controlplane-etcd targets by default). Requires Azure Monitor metrics to be enabled (already enabled or via --enable-azure-monitor-metrics). + - name: --disable-control-plane-metrics --disable-cp-metrics + type: bool + short-summary: Disable collection of Azure Monitor managed Prometheus control plane metrics. Leaves Azure Monitor metrics enabled. - name: --enable-azure-monitor-app-monitoring type: bool short-summary: Enable Azure Monitor Application Monitoring auto-instrumentation for a Kubernetes cluster. diff --git a/src/azure-cli/azure/cli/command_modules/acs/_params.py b/src/azure-cli/azure/cli/command_modules/acs/_params.py index 91a66f7f286..3b9c48a5e88 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/_params.py +++ b/src/azure-cli/azure/cli/command_modules/acs/_params.py @@ -573,6 +573,17 @@ def load_arguments(self, _): c.argument('ksm_metric_annotations_allow_list') c.argument('grafana_resource_id', validator=validate_grafanaresourceid) c.argument('enable_windows_recording_rules', action='store_true') + c.argument( + 'enable_control_plane_metrics', + options_list=['--enable-control-plane-metrics', '--enable-cp-metrics'], + action='store_true', + help=( + 'Enable collection of Azure Monitor managed Prometheus control plane metrics for managed ' + 'cluster components (controlplane-apiserver and controlplane-etcd targets by default). ' + 'Requires Azure Monitor metrics to be enabled ' + '(already enabled or via --enable-azure-monitor-metrics).' + ), + ) c.argument('enable_azure_monitor_app_monitoring', action='store_true') c.argument('node_public_ip_tags', arg_type=tags_type, validator=validate_node_public_ip_tags, help='space-separated tags: key[=value] [key[=value] ...].') @@ -812,6 +823,26 @@ def load_arguments(self, _): c.argument('grafana_resource_id', validator=validate_grafanaresourceid) c.argument('enable_windows_recording_rules', action='store_true') c.argument('disable_azure_monitor_metrics', action='store_true') + c.argument( + 'enable_control_plane_metrics', + options_list=['--enable-control-plane-metrics', '--enable-cp-metrics'], + action='store_true', + help=( + 'Enable collection of Azure Monitor managed Prometheus control plane metrics for managed ' + 'cluster components (controlplane-apiserver and controlplane-etcd targets by default). ' + 'Requires Azure Monitor metrics to be enabled ' + '(already enabled or via --enable-azure-monitor-metrics).' + ), + ) + c.argument( + 'disable_control_plane_metrics', + options_list=['--disable-control-plane-metrics', '--disable-cp-metrics'], + action='store_true', + help=( + 'Disable collection of Azure Monitor managed Prometheus control plane metrics. ' + 'Sets azureMonitorProfile.metrics.controlPlane.enabled=false on the cluster.' + ), + ) c.argument('enable_azure_monitor_app_monitoring', action='store_true') c.argument('disable_azure_monitor_app_monitoring', action='store_true') # azure container storage diff --git a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/azuremonitorprofile.py b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/azuremonitorprofile.py index 2c94f879b23..b218b9eb33e 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/azuremonitorprofile.py +++ b/src/azure-cli/azure/cli/command_modules/acs/azuremonitormetrics/azuremonitorprofile.py @@ -2,6 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. See License.txt in the project root for license information. # -------------------------------------------------------------------------------------------- +from azure.cli.command_modules.acs._client_factory import get_container_service_client from azure.cli.command_modules.acs.azuremonitormetrics.addonput import addon_put from azure.cli.command_modules.acs.azuremonitormetrics.amg.link import link_grafana_instance from azure.cli.command_modules.acs.azuremonitormetrics.amw.helper import get_azure_monitor_workspace_resource @@ -18,13 +19,58 @@ ) from azure.cli.command_modules.acs.azuremonitormetrics.recordingrules.create import create_rules from azure.cli.command_modules.acs.azuremonitormetrics.recordingrules.delete import delete_rules -from azure.cli.core.azclierror import InvalidArgumentValueError +from azure.cli.core.azclierror import CLIError, InvalidArgumentValueError, UnknownError from knack.log import get_logger logger = get_logger(__name__) +# pylint: disable=line-too-long +def _addon_put_with_control_plane(cmd, cluster_subscription, cluster_resource_group_name, cluster_name): + """Sibling of ``addon_put`` that ALSO flips ``metrics.controlPlane.enabled=True``. + + Used by the greenfield ``aks create --enable-control-plane-metrics`` path. The + initial cluster PUT intentionally leaves ``control_plane`` unset so the RP does + not schedule the control-plane-metrics collection (CCP) pod before the DCRA is + created in postprocessing. Once the DCRA exists, we issue this PUT so the CCP + pod is scheduled with its DCRA already in place (race-free). + """ + client = get_container_service_client(cmd.cli_ctx, cluster_subscription).managed_clusters + try: + mc = client.get(cluster_resource_group_name, cluster_name) + except CLIError as e: + raise UnknownError(str(e)) from e + # Enable metrics if present and not already enabled (mirrors addon_put). + if hasattr(mc, "azure_monitor_profile") and mc.azure_monitor_profile: + if hasattr(mc.azure_monitor_profile, "metrics") and mc.azure_monitor_profile.metrics: + if getattr(mc.azure_monitor_profile.metrics, "enabled", None) is False: + mc.azure_monitor_profile.metrics.enabled = True + # Flip control plane now that DCRA exists. + try: + from azure.mgmt.containerservice.models import ( + ManagedClusterAzureMonitorProfileMetricsControlPlane, + ) + mc.azure_monitor_profile.metrics.control_plane = ( + ManagedClusterAzureMonitorProfileMetricsControlPlane(enabled=True) + ) + except ImportError: + # Fallback for SDK versions that don't expose the model directly: + # set a dict that the generated client will serialize as the property. + mc.azure_monitor_profile.metrics.control_plane = {"enabled": True} + # Unlike the sibling ``addon_put`` (where ``metrics.enabled`` is already true on the + # cluster from the initial PUT and the postprocessing PUT is just a safety re-affirm), + # this is the ONLY place where ``controlPlane.enabled`` is set during ``aks create``. + # Wait for the LRO so the CP flip is durably persisted before the create command + # returns; otherwise callers / tests that read the cluster immediately could see the + # pre-flip state. + try: + poller = client.begin_create_or_update(cluster_resource_group_name, cluster_name, mc) + poller.result() + except Exception as e: + raise UnknownError(str(e)) from e + + # pylint: disable=line-too-long def link_azure_monitor_profile_artifacts( cmd, @@ -49,7 +95,15 @@ def link_azure_monitor_profile_artifacts( create_rules(cmd, cluster_subscription, cluster_resource_group_name, cluster_name, azure_monitor_workspace_resource_id, azure_monitor_workspace_location, raw_parameters) # if aks cluster create flow -> do a PUT on the AKS cluster to enable the addon if create_flow: - addon_put(cmd, cluster_subscription, cluster_resource_group_name, cluster_name) + # If --enable-control-plane-metrics was specified on create, flip + # metrics.controlPlane.enabled HERE (after DCRA creation) instead of on + # the initial cluster PUT. This avoids the CCP pod being scheduled before + # its DCRA exists (which would cause CrashLoopBackOff until reconciliation). + enable_cp = bool(raw_parameters and raw_parameters.get("enable_control_plane_metrics")) + if enable_cp: + _addon_put_with_control_plane(cmd, cluster_subscription, cluster_resource_group_name, cluster_name) + else: + addon_put(cmd, cluster_subscription, cluster_resource_group_name, cluster_name) # pylint: disable=line-too-long diff --git a/src/azure-cli/azure/cli/command_modules/acs/custom.py b/src/azure-cli/azure/cli/command_modules/acs/custom.py index a81309c2a21..7f0b5385c90 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/custom.py +++ b/src/azure-cli/azure/cli/command_modules/acs/custom.py @@ -1008,6 +1008,7 @@ def aks_create( ksm_metric_annotations_allow_list=None, grafana_resource_id=None, enable_windows_recording_rules=False, + enable_control_plane_metrics=False, enable_azure_monitor_app_monitoring=False, # azure container storage enable_azure_container_storage=None, @@ -1209,6 +1210,8 @@ def aks_update( grafana_resource_id=None, enable_windows_recording_rules=False, disable_azure_monitor_metrics=False, + enable_control_plane_metrics=False, + disable_control_plane_metrics=False, enable_azure_monitor_app_monitoring=False, disable_azure_monitor_app_monitoring=False, # azure container storage diff --git a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py index b2b8e1b124a..518c735df8e 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/managed_cluster_decorator.py @@ -5956,6 +5956,109 @@ def get_disable_azure_monitor_metrics(self) -> bool: """ return self._get_disable_azure_monitor_metrics(enable_validation=True) + def validate_control_plane_metrics_params(self) -> None: + """Validate the --enable/--disable-control-plane-metrics flag combo and + its interaction with --enable/--disable-azure-monitor-metrics. + + Raises MutuallyExclusiveArgumentError or RequiredArgumentMissingError on + an invalid combination. Returns nothing — use this when you want to + surface validation errors without consuming a parameter value. + + Reads raw_param directly so the getters can also delegate here from + their enable_validation=True path without recursing. + """ + enable_cp = self.raw_param.get("enable_control_plane_metrics") + disable_cp = self.raw_param.get("disable_control_plane_metrics") + # On create, the property may already be set on the incoming mc object. + if self.decorator_mode == DecoratorMode.CREATE: + if ( + self.mc and + self.mc.azure_monitor_profile and + self.mc.azure_monitor_profile.metrics and + self.mc.azure_monitor_profile.metrics.control_plane + ): + enable_cp = self.mc.azure_monitor_profile.metrics.control_plane.enabled + + if enable_cp and disable_cp: + raise MutuallyExclusiveArgumentError( + "Cannot specify --enable-control-plane-metrics and --disable-control-plane-metrics " + "at the same time." + ) + + if enable_cp: + # Reject combining enable-control-plane-metrics with disable-azure-monitor-metrics + # in the same command — the resulting payload would be inconsistent. + if self._get_disable_azure_monitor_metrics(False): + raise MutuallyExclusiveArgumentError( + "Cannot specify --enable-control-plane-metrics together with " + "--disable-azure-monitor-metrics." + ) + # Must have Azure Monitor metrics enabled (either already or in this command). + already_enabled = ( + self.mc and + self.mc.azure_monitor_profile and + self.mc.azure_monitor_profile.metrics and + self.mc.azure_monitor_profile.metrics.enabled + ) + enabling_now = self._get_enable_azure_monitor_metrics(False) + if not already_enabled and not enabling_now: + raise RequiredArgumentMissingError( + "--enable-control-plane-metrics requires Azure Monitor metrics to be enabled. " + "Specify --enable-azure-monitor-metrics or run on a cluster that already has " + "Azure Monitor metrics enabled." + ) + + def _get_enable_control_plane_metrics(self, enable_validation: bool = False) -> bool: + """Internal function to obtain the value of enable_control_plane_metrics. + When enable_validation is True, the flag combinations are validated via + validate_control_plane_metrics_params before the value is returned. + + :return: bool + """ + # Read the original value passed by the command. + enable_control_plane_metrics = self.raw_param.get("enable_control_plane_metrics") + # In create mode, try to read the property value corresponding to the parameter from the `mc` object. + if self.decorator_mode == DecoratorMode.CREATE: + if ( + self.mc and + self.mc.azure_monitor_profile and + self.mc.azure_monitor_profile.metrics and + self.mc.azure_monitor_profile.metrics.control_plane + ): + enable_control_plane_metrics = self.mc.azure_monitor_profile.metrics.control_plane.enabled + if enable_validation: + self.validate_control_plane_metrics_params() + return bool(enable_control_plane_metrics) + + def get_enable_control_plane_metrics(self) -> bool: + """Obtain the value of enable_control_plane_metrics. + This function will verify the parameter by default. If both enable_control_plane_metrics and + disable_control_plane_metrics are specified, raise a MutuallyExclusiveArgumentError. + :return: bool + """ + return self._get_enable_control_plane_metrics(enable_validation=True) + + def _get_disable_control_plane_metrics(self, enable_validation: bool = False) -> bool: + """Internal function to obtain the value of disable_control_plane_metrics. + When enable_validation is True, the flag combinations are validated via + validate_control_plane_metrics_params before the value is returned. + + :return: bool + """ + # Read the original value passed by the command. + disable_control_plane_metrics = self.raw_param.get("disable_control_plane_metrics") + if enable_validation: + self.validate_control_plane_metrics_params() + return bool(disable_control_plane_metrics) + + def get_disable_control_plane_metrics(self) -> bool: + """Obtain the value of disable_control_plane_metrics. + This function will verify the parameter by default. If both enable_control_plane_metrics and + disable_control_plane_metrics are specified, raise a MutuallyExclusiveArgumentError. + :return: bool + """ + return self._get_disable_control_plane_metrics(enable_validation=True) + def _get_enable_azure_monitor_app_monitoring(self, enable_validation: bool = False) -> bool: """Internal function to obtain the value of enable_azure_monitor_app_monitoring. This function supports the option of enable_validation. When enabled, if both @@ -7714,6 +7817,11 @@ def set_up_azure_monitor_profile(self, mc: ManagedCluster) -> ManagedCluster: ksm_metric_labels_allow_list = "" if ksm_metric_annotations_allow_list is None: ksm_metric_annotations_allow_list = "" + # Surface control-plane-metrics flag combination errors even when the + # parent metrics flag was not specified, so users get a clear error + # instead of a silent ignore when they pass --enable-control-plane-metrics + # on its own. + self.context.validate_control_plane_metrics_params() if self.context.get_enable_azure_monitor_metrics(): if mc.azure_monitor_profile is None: mc.azure_monitor_profile = self.models.ManagedClusterAzureMonitorProfile() @@ -7721,6 +7829,13 @@ def set_up_azure_monitor_profile(self, mc: ManagedCluster) -> ManagedCluster: mc.azure_monitor_profile.metrics.kube_state_metrics = self.models.ManagedClusterAzureMonitorProfileKubeStateMetrics( # pylint:disable=line-too-long metric_labels_allowlist=str(ksm_metric_labels_allow_list), metric_annotations_allow_list=str(ksm_metric_annotations_allow_list)) + # NOTE: control_plane.enabled is intentionally NOT set here on the create flow. + # If we set it on this initial PUT, the RP would schedule the control-plane-metrics + # collection pod (CCP) before the DCRA (Data Collection Rule Association) has been + # created in postprocessing. The CCP would then crash-loop with "DCRA not found" + # until the next reconciliation. Instead, we defer the flip to the addon_put step + # inside link_azure_monitor_profile_artifacts (postprocessing_after_mc_created), + # which runs *after* DCRA creation. # set intermediate self.context.set_intermediate("azuremonitormetrics_addon_enabled", True, overwrite_exists=True) if self.context.get_enable_azure_monitor_app_monitoring(): @@ -9810,6 +9925,30 @@ def update_azure_monitor_profile(self, mc: ManagedCluster) -> ManagedCluster: self.context.get_disable_azure_monitor_metrics(), False) + # Handle enable / disable of control plane metrics independently of the parent metrics flag, + # so users can toggle control plane metrics on a cluster that already has metrics enabled. + if self.context.get_enable_control_plane_metrics(): + if mc.azure_monitor_profile is None: + mc.azure_monitor_profile = self.models.ManagedClusterAzureMonitorProfile() + if mc.azure_monitor_profile.metrics is None: + # Should not normally happen — validation requires metrics to be enabled — but guard + # against partially-populated profiles to avoid AttributeError. + mc.azure_monitor_profile.metrics = ( + self.models.ManagedClusterAzureMonitorProfileMetrics(enabled=True) + ) + mc.azure_monitor_profile.metrics.control_plane = ( + self.models.ManagedClusterAzureMonitorProfileMetricsControlPlane(enabled=True) + ) + + if self.context.get_disable_control_plane_metrics(): + if ( + mc.azure_monitor_profile and + mc.azure_monitor_profile.metrics + ): + mc.azure_monitor_profile.metrics.control_plane = ( + self.models.ManagedClusterAzureMonitorProfileMetricsControlPlane(enabled=False) + ) + if self.context.get_enable_azure_monitor_app_monitoring(): if mc.azure_monitor_profile is None: mc.azure_monitor_profile = self.models.ManagedClusterAzureMonitorProfile() diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py index 7be8614c087..160e4c954d9 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_aks_commands.py @@ -8331,6 +8331,151 @@ def test_aks_update_with_azuremonitormetrics(self, resource_group, resource_grou self.is_empty(), ]) + @live_only() + @AllowLargeResponse() + @AKSCustomResourceGroupPreparer(random_name_length=17, name_prefix='clitest', location='westus2') + def test_aks_create_with_control_plane_metrics(self, resource_group, resource_group_location): + # reset the count so in replay mode the random names will start with 0 + self.test_resources_count = 0 + aks_name = self.create_random_name('cliakstest', 16) + node_vm_size = 'standard_d2s_v3' + self.kwargs.update({ + 'resource_group': resource_group, + 'name': aks_name, + 'location': resource_group_location, + 'ssh_key_value': self.generate_ssh_keys(), + 'node_vm_size': node_vm_size, + }) + + # create: --enable-azure-monitor-metrics + --enable-control-plane-metrics + create_cmd = 'aks create --resource-group={resource_group} --name={name} --location={location} ' \ + '--ssh-key-value={ssh_key_value} --node-vm-size={node_vm_size} --enable-managed-identity ' \ + '--enable-azure-monitor-metrics --enable-control-plane-metrics --output=json' + # NOTE: ``--enable-control-plane-metrics`` on create is intentionally deferred to a + # postprocessing PUT (after DCRA creation) to avoid scheduling the CCP pod before its + # DCRA exists. The create response may therefore reflect the pre-flip state; assert + # the final state via ``aks show`` after the cluster settles. + self.cmd(create_cmd, checks=[ + self.check('provisioningState', 'Succeeded'), + self.check('azureMonitorProfile.metrics.enabled', True), + ]) + + wait_cmd = 'aks wait --resource-group={resource_group} --name={name} --created ' \ + '--interval 60 --timeout 1800' + self.cmd(wait_cmd, checks=[self.is_empty()]) + + # Verify the deferred control-plane-metrics flip landed on the cluster. + self.cmd( + 'aks show --resource-group={resource_group} --name={name} --output=json', + checks=[ + self.check('azureMonitorProfile.metrics.enabled', True), + self.check('azureMonitorProfile.metrics.controlPlane.enabled', True), + ], + ) + + # delete + self.cmd('aks delete --resource-group={resource_group} --name={name} --yes --no-wait', + checks=[self.is_empty()]) + + @live_only() + @AllowLargeResponse() + @AKSCustomResourceGroupPreparer(random_name_length=17, name_prefix='clitest', location='westus2') + def test_aks_update_with_control_plane_metrics(self, resource_group, resource_group_location): + aks_name = self.create_random_name('cliakstest', 16) + node_vm_size = 'standard_d2s_v3' + self.kwargs.update({ + 'resource_group': resource_group, + 'name': aks_name, + 'location': resource_group_location, + 'ssh_key_value': self.generate_ssh_keys(), + 'node_vm_size': node_vm_size, + }) + + # create: with azure monitor metrics but without control plane metrics + create_cmd = 'aks create --resource-group={resource_group} --name={name} --location={location} ' \ + '--ssh-key-value={ssh_key_value} --node-vm-size={node_vm_size} --enable-managed-identity ' \ + '--enable-azure-monitor-metrics --output=json' + self.cmd(create_cmd, checks=[ + self.check('provisioningState', 'Succeeded'), + self.check('azureMonitorProfile.metrics.enabled', True), + ]) + + # wait for AMW background setup to complete before issuing update + wait_cmd = 'aks wait --resource-group={resource_group} --name={name} --updated --timeout=1800' + self.cmd(wait_cmd, checks=[self.is_empty()]) + + # update: enable-control-plane-metrics on a cluster that already has AM metrics + update_cmd = 'aks update --resource-group={resource_group} --name={name} --yes --output=json ' \ + '--enable-control-plane-metrics' + self.cmd(update_cmd, checks=[ + self.check('provisioningState', 'Succeeded'), + self.check('azureMonitorProfile.metrics.controlPlane.enabled', True), + ]) + + self.cmd(wait_cmd, checks=[self.is_empty()]) + + # update: disable-control-plane-metrics + update_cmd = 'aks update --resource-group={resource_group} --name={name} --yes --output=json ' \ + '--disable-control-plane-metrics' + self.cmd(update_cmd, checks=[ + self.check('provisioningState', 'Succeeded'), + self.check('azureMonitorProfile.metrics.controlPlane.enabled', False), + ]) + + self.cmd(wait_cmd, checks=[self.is_empty()]) + + # delete + self.cmd('aks delete --resource-group={resource_group} --name={name} --yes --no-wait', + checks=[self.is_empty()]) + + @live_only() + @AllowLargeResponse() + @AKSCustomResourceGroupPreparer(random_name_length=17, name_prefix='clitest', location='westus2') + def test_aks_control_plane_metrics_negative(self, resource_group, resource_group_location): + aks_name = self.create_random_name('cliakstest', 16) + node_vm_size = 'standard_d2s_v3' + self.kwargs.update({ + 'resource_group': resource_group, + 'name': aks_name, + 'location': resource_group_location, + 'ssh_key_value': self.generate_ssh_keys(), + 'node_vm_size': node_vm_size, + }) + + # negative: --enable-control-plane-metrics without --enable-azure-monitor-metrics on create + create_missing_parent = 'aks create --resource-group={resource_group} --name={name} --location={location} ' \ + '--ssh-key-value={ssh_key_value} --node-vm-size={node_vm_size} --enable-managed-identity ' \ + '--enable-control-plane-metrics --output=json' + self.cmd(create_missing_parent, expect_failure=True) + + # create a baseline cluster (no AM metrics) so we can exercise the update-time negatives + create_cmd = 'aks create --resource-group={resource_group} --name={name} --location={location} ' \ + '--ssh-key-value={ssh_key_value} --node-vm-size={node_vm_size} --enable-managed-identity ' \ + '--output=json' + self.cmd(create_cmd, checks=[ + self.check('provisioningState', 'Succeeded'), + self.not_exists('azureMonitorProfile.metrics'), + ]) + + # negative: update --enable-control-plane-metrics on a cluster without AM metrics enabled + update_missing_parent = 'aks update --resource-group={resource_group} --name={name} --yes --output=json ' \ + '--enable-control-plane-metrics' + self.cmd(update_missing_parent, expect_failure=True) + + # negative: --enable-control-plane-metrics with --disable-azure-monitor-metrics on update + update_conflicting = 'aks update --resource-group={resource_group} --name={name} --yes --output=json ' \ + '--enable-azure-monitor-metrics --enable-control-plane-metrics --disable-azure-monitor-metrics' + self.cmd(update_conflicting, expect_failure=True) + + # negative: both --enable-control-plane-metrics and --disable-control-plane-metrics on update + update_both = 'aks update --resource-group={resource_group} --name={name} --yes --output=json ' \ + '--enable-azure-monitor-metrics --enable-control-plane-metrics --disable-control-plane-metrics' + self.cmd(update_both, expect_failure=True) + + # delete + self.cmd('aks delete --resource-group={resource_group} --name={name} --yes --no-wait', + checks=[self.is_empty()]) + # live only due to dependency `_add_role_assignment` is not mocked @live_only() @AllowLargeResponse() diff --git a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py index 0ad0ca1be32..211269efdb9 100644 --- a/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py +++ b/src/azure-cli/azure/cli/command_modules/acs/tests/latest/test_managed_cluster_decorator.py @@ -9042,6 +9042,56 @@ def test_set_up_azure_monitor_profile_app_monitoring_with_existing_metrics(self) self.assertIsNotNone(dec_mc_1.azure_monitor_profile.app_monitoring.auto_instrumentation) self.assertTrue(dec_mc_1.azure_monitor_profile.app_monitoring.auto_instrumentation.enabled) + def test_set_up_azure_monitor_profile_defers_control_plane_on_create(self): + # Greenfield --enable-control-plane-metrics must NOT set control_plane.enabled + # on the initial cluster PUT. It is deferred to the addon_put step in + # postprocessing (after DCRA creation) so the CCP collector pod is only + # scheduled once its DCRA exists. + dec = AKSManagedClusterCreateDecorator( + self.cmd, + self.client, + { + "enable_azure_monitor_metrics": True, + "enable_control_plane_metrics": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc = self.models.ManagedCluster( + location="test_location", + identity=self.models.ManagedClusterIdentity(type="SystemAssigned"), + ) + dec.context.attach_mc(mc) + dec_mc = dec.set_up_azure_monitor_profile(mc) + + # Parent AMP metrics is enabled on the initial PUT... + self.assertIsNotNone(dec_mc.azure_monitor_profile) + self.assertIsNotNone(dec_mc.azure_monitor_profile.metrics) + # The initial PUT sets enabled=False; addon_put flips it to True after prerequisites. + self.assertFalse(dec_mc.azure_monitor_profile.metrics.enabled) + # ...but control_plane is deferred and must be None here. + self.assertIsNone(dec_mc.azure_monitor_profile.metrics.control_plane) + # Intermediate flag still set so postprocessing runs the prereqs/addon_put. + self.assertTrue(dec.context.get_intermediate("azuremonitormetrics_addon_enabled")) + + def test_set_up_azure_monitor_profile_create_cp_without_amp_raises(self): + # --enable-control-plane-metrics without --enable-azure-monitor-metrics on create + # must fail validation early. + dec = AKSManagedClusterCreateDecorator( + self.cmd, + self.client, + { + "enable_control_plane_metrics": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc = self.models.ManagedCluster( + location="test_location", + identity=self.models.ManagedClusterIdentity(type="SystemAssigned"), + ) + dec.context.attach_mc(mc) + with self.assertRaises(RequiredArgumentMissingError): + dec.set_up_azure_monitor_profile(mc) + def test_set_up_azure_service_mesh(self): dec_1 = AKSManagedClusterCreateDecorator( self.cmd, @@ -16961,6 +17011,134 @@ def test_update_azure_monitor_profile_disable_app_monitoring_no_existing_app_mon self.assertIsNotNone(dec_mc_1.azure_monitor_profile.app_monitoring.auto_instrumentation) self.assertFalse(dec_mc_1.azure_monitor_profile.app_monitoring.auto_instrumentation.enabled) + def test_update_enable_control_plane_metrics_requires_parent_metrics(self): + # Update path: --enable-control-plane-metrics on a cluster that has neither + # Azure Monitor metrics already enabled nor --enable-azure-monitor-metrics in + # the same command must raise RequiredArgumentMissingError. + dec = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_control_plane_metrics": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc = self.models.ManagedCluster( + location="test_location", + identity=self.models.ManagedClusterIdentity(type="SystemAssigned"), + ) + dec.context.attach_mc(mc) + + with self.assertRaises(RequiredArgumentMissingError): + dec.context.get_enable_control_plane_metrics() + + def test_update_enable_control_plane_metrics_already_enabled_cluster_succeeds(self): + # Update path: --enable-control-plane-metrics on a cluster that already has + # Azure Monitor metrics enabled should succeed without requiring + # --enable-azure-monitor-metrics. + dec = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_control_plane_metrics": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc = self.models.ManagedCluster( + location="test_location", + identity=self.models.ManagedClusterIdentity(type="SystemAssigned"), + azure_monitor_profile=self.models.ManagedClusterAzureMonitorProfile( + metrics=self.models.ManagedClusterAzureMonitorProfileMetrics(enabled=True) + ), + ) + dec.context.attach_mc(mc) + + self.assertTrue(dec.context.get_enable_control_plane_metrics()) + + dec_mc = dec.update_azure_monitor_profile(mc) + + self.assertIsNotNone(dec_mc.azure_monitor_profile.metrics.control_plane) + self.assertTrue(dec_mc.azure_monitor_profile.metrics.control_plane.enabled) + + def test_update_enable_control_plane_metrics_with_disable_metrics_raises(self): + # Update path: --enable-control-plane-metrics combined with + # --disable-azure-monitor-metrics in the same command must be rejected to + # avoid producing an inconsistent payload. + dec = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_control_plane_metrics": True, + "disable_azure_monitor_metrics": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc = self.models.ManagedCluster( + location="test_location", + identity=self.models.ManagedClusterIdentity(type="SystemAssigned"), + azure_monitor_profile=self.models.ManagedClusterAzureMonitorProfile( + metrics=self.models.ManagedClusterAzureMonitorProfileMetrics(enabled=True) + ), + ) + dec.context.attach_mc(mc) + + with self.assertRaises(MutuallyExclusiveArgumentError): + dec.context.get_enable_control_plane_metrics() + + def test_update_enable_control_plane_metrics_with_disable_control_plane_raises(self): + # --enable-control-plane-metrics together with --disable-control-plane-metrics + # in the same command must raise MutuallyExclusiveArgumentError. + dec = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "enable_control_plane_metrics": True, + "disable_control_plane_metrics": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc = self.models.ManagedCluster( + location="test_location", + identity=self.models.ManagedClusterIdentity(type="SystemAssigned"), + azure_monitor_profile=self.models.ManagedClusterAzureMonitorProfile( + metrics=self.models.ManagedClusterAzureMonitorProfileMetrics(enabled=True) + ), + ) + dec.context.attach_mc(mc) + + with self.assertRaises(MutuallyExclusiveArgumentError): + dec.context.get_enable_control_plane_metrics() + + def test_update_disable_control_plane_metrics_sets_enabled_false(self): + # --disable-control-plane-metrics on a cluster that has it enabled should + # produce a payload with control_plane.enabled=False. + dec = AKSManagedClusterUpdateDecorator( + self.cmd, + self.client, + { + "disable_control_plane_metrics": True, + }, + ResourceType.MGMT_CONTAINERSERVICE, + ) + mc = self.models.ManagedCluster( + location="test_location", + identity=self.models.ManagedClusterIdentity(type="SystemAssigned"), + azure_monitor_profile=self.models.ManagedClusterAzureMonitorProfile( + metrics=self.models.ManagedClusterAzureMonitorProfileMetrics( + enabled=True, + control_plane=self.models.ManagedClusterAzureMonitorProfileMetricsControlPlane( + enabled=True + ), + ) + ), + ) + dec.context.attach_mc(mc) + + dec_mc = dec.update_azure_monitor_profile(mc) + + self.assertIsNotNone(dec_mc.azure_monitor_profile.metrics.control_plane) + self.assertFalse(dec_mc.azure_monitor_profile.metrics.control_plane.enabled) + if __name__ == "__main__": unittest.main()