Skip to content

Commit 80b9555

Browse files
welkinhemonrog2
andauthored
vmm_inventory_partial_sync fault check (#274)
* updated vmm_inventory_partial_sync to align with v3.0 paradigm * Enhanced by looking for changeset partial-inv to avoid flagging on the other reasons. * merge mater + fix pytest --------- Co-authored-by: Gabriel <gmonroy@cisco.com>
1 parent a129e87 commit 80b9555

7 files changed

Lines changed: 138 additions & 3 deletions

File tree

aci-preupgrade-validation-script.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5962,6 +5962,43 @@ def configpush_shard_check(tversion, **kwargs):
59625962

59635963
return Result(result=result, headers=headers, data=data, recommended_action=recommended_action, doc_url=doc_url)
59645964

5965+
5966+
@check_wrapper(check_title='APIC VMM inventory sync fault (F0132)')
5967+
def apic_vmm_inventory_sync_faults_check(**kwargs):
5968+
result = PASS
5969+
headers = ['Fault', 'VMM Domain', 'Controller']
5970+
data = []
5971+
unformatted_headers = ["Fault", "Fault DN"]
5972+
unformatted_data = []
5973+
recommended_action = "Please look for Faults under VM and Host and fix them via VCenter, then manually re-trigger inventory sync on APIC"
5974+
doc_url = 'https://datacenter.github.io/ACI-Pre-Upgrade-Validation-Script/validations/#vmm-inventory-partially-synced'
5975+
vmm_regex = r'comp/prov-VMware/ctrlr-\[(?P<domain>.+?)\]-(?P<controller>.+?)/fault-F0132'
5976+
faultInsts = icurl('class', 'faultInst.json?query-target-filter=eq(faultInst.code,"F0132")')
5977+
5978+
for faultInst in faultInsts:
5979+
fc = faultInst['faultInst']['attributes']['code']
5980+
dn = faultInst['faultInst']['attributes']['dn']
5981+
desc = faultInst['faultInst']['attributes']['descr']
5982+
change_set = faultInst['faultInst']['attributes']['changeSet']
5983+
5984+
dn_array = re.search(vmm_regex, dn)
5985+
if dn_array and "partial-inv" in change_set:
5986+
data.append([fc, dn_array.group("domain"), dn_array.group("controller")])
5987+
elif "partial-inv" in change_set:
5988+
unformatted_data.append([fc, dn])
5989+
5990+
if data or unformatted_data:
5991+
result = MANUAL
5992+
5993+
return Result(
5994+
result=result,
5995+
headers=headers,
5996+
data=data,
5997+
unformatted_headers=unformatted_headers,
5998+
unformatted_data=unformatted_data,
5999+
recommended_action=recommended_action,
6000+
doc_url=doc_url)
6001+
59656002
# ---- Script Execution ----
59666003

59676004

@@ -6069,6 +6106,7 @@ class CheckManager:
60696106
scalability_faults_check,
60706107
fabric_port_down_check,
60716108
equipment_disk_limits_exceeded,
6109+
apic_vmm_inventory_sync_faults_check,
60726110

60736111
# Configurations
60746112
vpc_paired_switches_check,

docs/docs/validations.md

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ Items | Faults | This Script
6868
[L3 Port Config][f7] | F0467: port-configured-as-l2 | :white_check_mark: | :white_check_mark: 5.2(4d)
6969
[L2 Port Config][f8] | F0467: port-configured-as-l3 | :white_check_mark: | :white_check_mark: 5.2(4d)
7070
[Access (Untagged) Port Config][f9] | F0467: native-or-untagged-encap-failure | :white_check_mark: | :no_entry_sign:
71-
[Encap Already in Use][f10] | F0467: encap-already-in-use | :white_check_mark: | :no_entry_sign: | :no_entry_sign:
71+
[Encap Already in Use][f10] | F0467: encap-already-in-use | :white_check_mark: | :no_entry_sign:
7272
[L3Out Subnets][f11] | F0467: prefix-entry-already-in-use | :white_check_mark: | :white_check_mark: 6.0(1g)
7373
[BD Subnets][f12] | F0469: duplicate-subnets-within-ctx | :white_check_mark: | :white_check_mark: 5.2(4d)
7474
[BD Subnets][f13] | F1425: subnet-overlap | :white_check_mark: | :white_check_mark: 5.2(4d)
@@ -79,7 +79,7 @@ Items | Faults | This Script
7979
[Scalability (faults related to Capacity Dashboard)][f18] | TCA faults for eqptcapacityEntity | :white_check_mark: | :no_entry_sign:
8080
[Fabric Port Status][f19] | F1394: ethpm-if-port-down-fabric | :white_check_mark: | :no_entry_sign:
8181
[Equipment Disk Limits][f20] | F1820: 80% -minor<br>F1821: -major<br>F1822: -critical | :white_check_mark: | :no_entry_sign:
82-
82+
[VMM Inventory Partially Synced][f21] | F0132: comp-ctrlr-operational-issues | :white_check_mark: | :no_entry_sign:
8383

8484

8585
[f1]: #apic-disk-space-usage
@@ -102,7 +102,7 @@ Items | Faults | This Script
102102
[f18]: #scalability-faults-related-to-capacity-dashboard
103103
[f19]: #fabric-port-status
104104
[f20]: #equipment-disk-limits
105-
105+
[f21]: #vmm-inventory-partially-synced
106106

107107
### Configuration Checks
108108

@@ -1506,6 +1506,16 @@ To recover from this fault, try the following action
15061506
userdom : all
15071507
```
15081508

1509+
### VMM Inventory Partially Synced
1510+
1511+
This script checks for fault code F0132 with rule comp-ctrlr-operational-issues and change set `partial-inv`. This fault is raised when APICs report a partially synchronized inventory with vCenter servers.
1512+
1513+
EPGs using the `immediate` or `on-demand` resolution immediacy (this is typical) rely on the VMM Inventory to determine VLAN programming. If the known inventory changes during an upgrade and the APIC is reporting its last sync to be partial, a VMM inventory resync response with inventory changes could result in VLANs being unexpectedly removed.
1514+
1515+
EPGs using the `pre-provision` resolution immediacy do not rely on the VMM inventory for VLAN deployment and so unexpected inventory changes will not change vlan programmings.
1516+
1517+
This check returns a `MANUAL` result as there are many reasons for a partial inventory sync to be reported. The goal is to ensure that the VMM inventory sync has fully completed before triggering the APIC upgrade to reduce any chance for unexpected inventory changes to occur.
1518+
15091519
## Configuration Check Details
15101520

15111521
### VPC-paired Leaf switches
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[]
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
[
2+
{
3+
"faultInst": {
4+
"attributes": {
5+
"cause": "operational-issues",
6+
"code": "F0132",
7+
"changeSet": "remoteOperIssues (Old: , New: event-channel-down)",
8+
"descr": "Operational issues detected for VMM controller: 192.168.1.1 with name MY_CONTROLLER in datacenter MY_DC in domain: MY_DOMAIN due to error: Received partial inventory in the last inventory sync. Please look for Faults under VM and Host and fix them via VCenter, then manually re-trigger inventory sync on APIC",
9+
"dn": "comp/prov-VMware/ctrlr-[MY_DOMAIN]-MY_CONTROLLER/fault-F0132",
10+
"rule": "comp-ctrlr-operational-issues"
11+
}
12+
}
13+
}
14+
15+
]
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
[
2+
{
3+
"faultInst": {
4+
"attributes": {
5+
"cause": "operational-issues",
6+
"code": "F0132",
7+
"changeSet": "remoteOperIssues (Old: event-channel-down,partial-inv, New: partial-inv)",
8+
"descr": "Operational issues detected for VMM controller: 192.168.1.1 with name MY_CONTROLLER in datacenter MY_DC in domain: MY_DOMAIN due to error: Received partial inventory in the last inventory sync. Please look for Faults under VM and Host and fix them via VCenter, then manually re-trigger inventory sync on APIC",
9+
"dn": "comp/prov-VMware/ctrlr-[MY_DOMAIN]-MY_CONTROLLER/fault-F0132",
10+
"rule": "comp-ctrlr-operational-issues"
11+
}
12+
}
13+
}
14+
15+
]
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
[
2+
{
3+
"faultInst": {
4+
"attributes": {
5+
"cause": "operational-issues",
6+
"code": "F0132",
7+
"changeSet": "remoteOperIssues (Old: event-channel-down,partial-inv, New: partial-inv)",
8+
"descr": "Operational issues detected for VMM controller: 192.168.1.1 with name MY_CONTROLLER in datacenter MY_DC in domain: MY_DOMAIN due to error: Received partial inventory in the last inventory sync. Please look for Faults under VM and Host and fix them via VCenter, then manually re-trigger inventory sync on APIC",
9+
"dn": "comp/prov-VMware/ctrlrx-[MY_DOMAIN]-MY_CONTROLLER/fault-F0132",
10+
"rule": "comp-ctrlr-operational-issues"
11+
}
12+
}
13+
}
14+
15+
]
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import os
2+
import pytest
3+
import logging
4+
import importlib
5+
from helpers.utils import read_data
6+
7+
script = importlib.import_module("aci-preupgrade-validation-script")
8+
9+
log = logging.getLogger(__name__)
10+
dir = os.path.dirname(os.path.abspath(__file__))
11+
12+
test_function = "apic_vmm_inventory_sync_faults_check"
13+
14+
15+
f0132_api = 'faultInst.json'
16+
f0132_api += '?query-target-filter=eq(faultInst.code,"F0132")'
17+
18+
@pytest.mark.parametrize(
19+
"icurl_outputs, expected_result",
20+
[
21+
(
22+
{f0132_api: read_data(dir, "faultInst_neg.json")},
23+
script.PASS,
24+
),
25+
(
26+
{f0132_api: read_data(dir, "faultInst_neg1.json")},
27+
script.PASS,
28+
),
29+
(
30+
{f0132_api: read_data(dir, "faultInst_pos.json")},
31+
script.MANUAL,
32+
),
33+
(
34+
{f0132_api: read_data(dir, "faultInst_pos2.json")},
35+
script.MANUAL,
36+
)
37+
],
38+
)
39+
def test_logic(run_check, mock_icurl, expected_result):
40+
result = run_check()
41+
assert result.result == expected_result

0 commit comments

Comments
 (0)