--- # # Copyright (c) 2013-2023 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # ############################################################################ # # ALARM & CUSTOMER LOG DOCUMENTATION # ############################################################################ ############################################################################ # # Record Format ... for documentation # # 100.001: # Type: < Alarm | Log > # Description: < yaml string > # OR # [ < yaml string >, // list of yaml strings # < yaml string > ] # OR # critical: < yaml string > // i.e. dictionary of yaml strings indexed by severity # major: < yaml string > # minor: < yaml string > # warning: < yaml string > # Entity_Instance_ID: < yaml string ... e.g. host=.interface= > # OR # [ < yaml string >, // list of yaml strings # < yaml string > ] # Severity: < critical | major | minor | warning > # OR # [ critical, major ] // list of severity values # Proposed_Repair_Action: < yaml string > // NOTE ALARM ONLY FIELD # OR # critical: < yaml string > // i.e. dictionary of yaml strings indexed by severity # major: < yaml string > # minor: < yaml string > # warning: < yaml string > # Maintenance_Action: < yaml string > // NOTE ALARM ONLY FIELD # OR # critical: < yaml string > // i.e. dictionary of yaml strings indexed by severity # major: < yaml string > # minor: < yaml string > # warning: < yaml string > # Inhibit_Alarms: < True | False > // NOTE ALARM ONLY FIELD # Alarm_Type: < operational-violation | ... > # Probable_Cause: < timing-problem | ... > # OR # [ < timing-problem | ... >, // list of probable-causes # < timing-problem | ... > ] # Service_Affecting: < True | False > # Suppression: < True | False > // NOTE ALARM ONLY FIELD # Management_Affecting_Severity: < none | critical | major | minor | warning > # // lowest alarm level of this type that will block forced upgrades & orchestration actions # Degrade_Affecting_Severity: < none | critical | major | minor > # // lowest alarm level of this type sets a host to 'degraded' # Context: < none | starlingx | openstack > # // Identifies where the alarm/log is used. If it should be ignored by # // the documentation generating scripts, the value has to be 'none'. # // If any of the other values is used, the alarm/log will be included # // in the documentation and classified by the chosen value. # # # Other Notes: # - use general record format above # - the only dictionaries allowed are ones indexed by severity # - if there are multiple lists in a record, # then they should all have the same # of items and corresponding list items represent instance of alarm # - if you can't describe the alarm/log based on the above rules, # then you can use a multi-line string format # - DELETING alarms from events.yaml: alarms should only be deleted when going to a new Titanium Cloud release # - if all possible alarm severities are mgmt affecting, the convention is to # use 'warning' as the Management_Affecting_Severity, even if warning is not a possible severity for that alarm # # Testing: # - Testing of events.yaml can be done by running regular make command # and specifying fm-doc: # nice -n 20 ionice -c Idle make -C build fm-doc.rebuild # - When building, events.yaml will be parsed for correct format, and also # to ensure that Alarm IDs defined in constants.py and fmAlarm.h are # listed in events.yaml # ############################################################################ #--------------------------------------------------------------------------- # Monitored Resource Alarms #--------------------------------------------------------------------------- 100.101: Type: Alarm Description: |- Platform CPU threshold exceeded; threshold x%, actual y% . CRITICAL @ 95% MAJOR @ 90% Entity_Instance_ID: host= Severity: [critical, major] Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support." Maintenance_Action: critical: degrade major: degrade Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: threshold-crossed Service_Affecting: False Suppression: True Management_Affecting_Severity: major Degrade_Affecting_Severity: critical Context: starlingx 100.102: Type: Alarm Description: |- VSwitch CPU threshold exceeded; threshold x%, actual y% . CRITICAL @ 95% MAJOR @ 90% MINOR @ 80% Entity_Instance_ID: host= Severity: [critical, major, minor] Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support." Maintenance_Action: critical: degrade major: degrade Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: threshold-crossed Service_Affecting: False Suppression: True Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: none 100.103: Type: Alarm Description: |- Memory threshold exceeded; threshold x%, actual y% . CRITICAL @ 90% MAJOR @ 80% Entity_Instance_ID: |- host= OR host=.memory=total OR host=.memory=platform OR host=.numa=node Severity: [critical, major] Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support; may require additional memory on Host." Maintenance_Action: critical: degrade major: degrade Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: threshold-crossed Service_Affecting: False Suppression: True Management_Affecting_Severity: none Degrade_Affecting_Severity: critical Context: starlingx 100.104: # NOTE This should really be split into two different Alarms. Type: Alarm Description: |- host=.filesystem= File System threshold exceeded; threshold x%, actual y% . CRITICAL @ 90% MAJOR @ 80% OR host=.volumegroup= Monitor and if condition persists, consider adding additional physical volumes to the volume group. Entity_Instance_ID: |- host=.filesystem= OR host=.volumegroup= Severity: [critical, major] Proposed_Repair_Action: "Reduce usage or resize filesystem." Maintenance_Action: critical: degrade major: degrade Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: threshold-crossed Service_Affecting: False Suppression: True Management_Affecting_Severity: critical Degrade_Affecting_Severity: critical Context: starlingx 100.105: Type: Alarm Description: |- Filesystem Alarm Condition: filesystem is not added on both controllers and/or does not have the same size: . Entity_Instance_ID: fs_name= Severity: critical Proposed_Repair_Action: "Add image-conversion filesystem on both controllers. See the |prod-long| documentation at |docs-url| for more details. If problem persists, contact next level of support." Maintenance_Action: degrade Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: configuration-or-customization-error Service_Affecting: True Suppression: False Management_Affecting_Severity: major Degrade_Affecting_Severity: none Context: openstack #-------- # 100.105: Retired (with R2 release): previously monitored /etc/nova/instances # NFS mount from controller to computes #-------- 100.106: Type: Alarm Description: "'OAM' Port failed." Entity_Instance_ID: host=.port= Severity: major Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment. Maintenance_Action: degrade Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unknown Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: major Context: starlingx 100.107: Type: Alarm Description: |- 'OAM' Interface degraded. OR 'OAM' Interface failed. Entity_Instance_ID: host=.interface= Severity: [critical, major] Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment. Maintenance_Action: critical: degrade major: degrade Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unknown Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: major Context: starlingx 100.108: Type: Alarm Description: "'MGMT' Port failed." Entity_Instance_ID: host=.port= Severity: major Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment. Maintenance_Action: degrade Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unknown Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: major Context: starlingx 100.109: Type: Alarm Description: |- 'MGMT' Interface degraded. OR 'MGMT' Interface failed. Entity_Instance_ID: host=.interface= Severity: [critical, major] Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment. Maintenance_Action: critical: degrade major: degrade Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unknown Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: major Context: starlingx 100.110: Type: Alarm Description: "'CLUSTER-HOST' Port failed." Entity_Instance_ID: host=.port= Severity: major Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment. Maintenance_Action: degrade Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unknown Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: major Context: starlingx 100.111: Type: Alarm Description: |- 'CLUSTER-HOST' Interface degraded. OR 'CLUSTER-HOST' Interface failed. Entity_Instance_ID: host=.interface= Severity: [critical, major] Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment. Maintenance_Action: critical: degrade major: degrade Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unknown Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: major Context: starlingx 100.112: Type: Alarm Description: "'DATA-VRS' Port down." Entity_Instance_ID: host=.port= Severity: major Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment. Maintenance_Action: degrade Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unknown Service_Affecting: True Suppression: True Management_Affecting_Severity: none Degrade_Affecting_Severity: major Context: openstack 100.113: Type: Alarm Description: |- 'DATA-VRS' Interface degraded. OR 'DATA-VRS' Interface down. Entity_Instance_ID: host=.interface= Severity: [critical, major] Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment. Maintenance_Action: major: degrade Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unknown Service_Affecting: True Suppression: True Management_Affecting_Severity: none Degrade_Affecting_Severity: major Context: openstack 100.114: Type: Alarm Description: major: "NTP configuration does not contain any valid or reachable NTP servers." minor: "NTP address is not a valid or a reachable NTP server." Entity_Instance_ID: major: host=.ntp minor: host=.ntp= Severity: [major, minor] Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support." Maintenance_Action: none Inhibit_Alarms: Alarm_Type: communication Probable_Cause: unknown Service_Affecting: False Suppression: False Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: starlingx 100.115: Type: Alarm Description: "VSwitch Memory Usage, processor threshold exceeded; threshold x%, actual y% ." Entity_Instance_ID: host=.processor= Severity: [critical, major, minor] Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support." Maintenance_Action: critical: degrade major: degrade Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: threshold-crossed Service_Affecting: False Suppression: True Management_Affecting_Severity: none Degrade_Affecting_Severity: critical Context: none 100.116: Type: Alarm Description: "Cinder LVM Thinpool Usage threshold exceeded; threshold x%, actual y% ." Entity_Instance_ID: host= Severity: [critical, major, minor] Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support." Maintenance_Action: critical: degrade major: degrade Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: threshold-crossed Service_Affecting: False Suppression: True Management_Affecting_Severity: none Degrade_Affecting_Severity: critical Context: none 100.117: Type: Alarm Description: "Nova LVM Thinpool Usage threshold exceeded; threshold x%, actual y% ." Entity_Instance_ID: host= Severity: [critical, major, minor] Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support." Maintenance_Action: critical: degrade major: degrade Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: threshold-crossed Service_Affecting: False Suppression: True Management_Affecting_Severity: major Degrade_Affecting_Severity: critical Context: none 100.118: Type: Alarm Description: Controller cannot establish connection with remote logging server. Entity_Instance_ID: host= Severity: minor Proposed_Repair_Action: "Ensure Remote Log Server IP is reachable from Controller through OAM interface; otherwise contact next level of support." Maintenance_Action: none Inhibit_Alarms: False Alarm_Type: communication Probable_Cause: communication-subsystem-failure Service_Affecting: False Suppression: False Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: starlingx 100.119: Type: Alarm Description: |- does not support the provisioned PTP mode OR PTP clocking is out-of-tolerance OR is not locked to remote PTP Primary source OR GNSS signal loss state: OR 1PPS signal loss state: Entity_Instance_ID: |- host=.ptp OR host=.ptp=no-lock OR host=.ptp=.unsupported=hardware-timestamping OR host=.ptp=.unsupported=software-timestamping OR host=.ptp=.unsupported=legacy-timestamping OR host=.ptp=out-of-tolerance OR host=.instance=.ptp=out-of-tolerance OR host=.interface=.ptp=signal-loss Severity: [major, minor] Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support." Maintenance_Action: none Inhibit_Alarms: Alarm_Type: communication Probable_Cause: unknown Service_Affecting: False Suppression: False Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: starlingx 100.120: Type: Alarm Description: Controllers running mismatched kernels. Entity_Instance_ID: host=.kernel= Severity: minor Proposed_Repair_Action: "Modify controllers using 'system host-kernel-modify' so that both are running the desired 'standard' or 'lowlatency' kernel." Maintenance_Action: none Inhibit_Alarms: False Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Suppression: False Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: starlingx 100.121: Type: Alarm Description: Host not running the provisioned kernel. Entity_Instance_ID: host=.kernel= Severity: major Proposed_Repair_Action: "Retry 'system host-kernel-modify' and if condition persists, contact next level of support." Maintenance_Action: none Inhibit_Alarms: False Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Suppression: False Management_Affecting_Severity: major Degrade_Affecting_Severity: none Context: starlingx 100.150: Type: Alarm Description: critical: "service open file descriptor has reached its limit" major: "service open file descriptor is approaching to its limit" Entity_Instance_ID: |- host=.resource_type=file-descriptor.service_name= Severity: [critical, major] Proposed_Repair_Action: "swact to the other controller if it is available" Maintenance_Action: none Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: threshold-crossed Service_Affecting: True Suppression: False Management_Affecting_Severity: critical Degrade_Affecting_Severity: critical Context: starlingx #--------------------------------------------------------------------------- # MAINTENANCE #--------------------------------------------------------------------------- 200.001: Type: Alarm Description: was administratively locked to take it out-of-service. Entity_Instance_ID: host= Severity: warning Proposed_Repair_Action: Administratively unlock Host to bring it back in-service. Maintenance_Action: none Inhibit_Alarms: True Alarm_Type: operational-violation Probable_Cause: out-of-service Service_Affecting: True Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 200.004: Type: Alarm Description: |- experienced a service-affecting failure. Host is being auto recovered by Reboot. Entity_Instance_ID: host= Severity: critical Proposed_Repair_Action: If auto-recovery is consistently unable to recover host to the unlocked-enabled state contact next level of support or lock and replace failing host. Maintenance_Action: auto recover Inhibit_Alarms: False Alarm_Type: operational-violation Probable_Cause: application-subsystem-failure Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 200.011: Type: Alarm Description: experienced a configuration failure during initialization. Host is being re-configured by Reboot. Entity_Instance_ID: host= Severity: critical Proposed_Repair_Action: If auto-recovery is consistently unable to recover host to the unlocked-enabled state contact next level of support or lock and replace failing host. Maintenance_Action: auto-recover Inhibit_Alarms: False Alarm_Type: operational-violation Probable_Cause: configuration-or-customization-error Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 200.010: Type: Alarm Description: access to board management module has failed. Entity_Instance_ID: host= Severity: warning Proposed_Repair_Action: Check Host's board management configuration and connectivity. Maintenance_Action: auto recover Inhibit_Alarms: False Alarm_Type: operational-violation Probable_Cause: communication-subsystem-failure Service_Affecting: False Suppression: False Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: starlingx 200.013: Type: Alarm Description: compute service of the only available controller is not proportional. Auto-recovery is disabled. Degrading host instead. Entity_Instance_ID: host= Severity: major Proposed_Repair_Action: Enable second controller and Switch Activity (Swact) over to it as soon as possible. Then Lock and Unlock host to recover its local compute service. Maintenance_Action: "degrade - requires manual action" Inhibit_Alarms: False Alarm_Type: operational-violation Probable_Cause: communication-subsystem-failure Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: major Context: starlingx 200.005: Type: Alarm Description: |- Degrade: is experiencing an intermittent 'Management Network' communication failure that have exceeded its lower alarming threshold. Failure: is experiencing a persistent critical 'Management Network' communication failure." Entity_Instance_ID: host= Severity: [critical, major] Proposed_Repair_Action: "Check 'Management Network' connectivity and support for multicast messaging. If problem consistently occurs after that and Host is reset, then contact next level of support or lock and replace failing host." Maintenance_Action: auto recover Inhibit_Alarms: False Alarm_Type: communication Probable_Cause: unknown Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 200.009: Type: Alarm Description: |- Degrade: is experiencing an intermittent 'Cluster-host Network' communication failures that have exceeded its lower alarming threshold. Failure: is experiencing a persistent critical 'Cluster-host Network' communication failure." Entity_Instance_ID: host= Severity: [critical, major] Proposed_Repair_Action: "Check 'Cluster-host Network' connectivity and support for multicast messaging. If problem consistently occurs after that and Host is reset, then contact next level of support or lock and replace failing host." Maintenance_Action: auto recover Inhibit_Alarms: False Alarm_Type: communication Probable_Cause: unknown Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 200.006: Type: Alarm Description: |- Main Process Monitor Daemon Failure (major): 'Process Monitor' (pmond) process is not running or functioning properly. The system is trying to recover this process. Monitored Process Failure (critical/major/minor): Critical: critical '' process has failed and could not be auto-recovered gracefully. Auto-recovery progression by host reboot is required and in progress. Major: is degraded due to the failure of its '' process. Auto recovery of this major process is in progress. Minor: '' process has failed. Auto recovery of this minor process is in progress. OR '' process has failed. Manual recovery is required. Entity_Instance_ID: host=.process= Severity: [critical, major, minor] Proposed_Repair_Action: |- If this alarm does not automatically clear after some time and continues to be asserted after Host is locked and unlocked then contact next level of support for root cause analysis and recovery. If problem consistently occurs after Host is locked and unlocked then contact next level of support for root cause analysis and recovery." Maintenance_Action: critical: auto-recover major: degrade minor: Inhibit_Alarms: False Alarm_Type: operational-violation Probable_Cause: unknown Service_Affecting: critical: True major: True minor: False Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: major Context: starlingx # 200.006: // NOTE using duplicate ID of a completely analogous Alarm for this # Type: Log # Description: |- # Main Process Monitor Daemon Failure (major) # 'Process Monitor' (pmond) process is not running or functioning properly. # The system is trying to recover this process. # # Monitored Process Failure (critical/major/minor) # critical: critical '' process has failed and could not be auto-recovered gracefully. # Auto-recovery progression by host reboot is required and in progress. # major: is degraded due to the failure of its '' process. Auto recovery of this major process is in progress. # minor: '' process has failed. Auto recovery of this minor process is in progress. # OR # '' process has failed. Manual recovery is required. # Entity_Instance_ID: host=.process= # Severity: minor # Alarm_Type: other # Probable_Cause: unspecified-reason # Service_Affecting: True 200.007: Type: Alarm Description: critical: "Host is degraded due to a 'critical' out-of-tolerance reading from the '' sensor" major: "Host is degraded due to a 'major' out-of-tolerance reading from the '' sensor" minor: "Host is reporting a 'minor' out-of-tolerance reading from the '' sensor" Entity_Instance_ID: host=.sensor= Severity: [critical, major, minor] Proposed_Repair_Action: "If problem consistently occurs after Host is power cycled and or reset, contact next level of support or lock and replace failing host." Maintenance_Action: critical: degrade major: degrade minor: auto-recover (polling) Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unspecified-reason Service_Affecting: critical: True major: False minor: False Suppression: True Management_Affecting_Severity: none Degrade_Affecting_Severity: critical Context: starlingx 200.014: Type: Alarm Description: The Hardware Monitor was unable to load, configure and monitor one or more hardware sensors. Entity_Instance_ID: host= Severity: minor Proposed_Repair_Action: Check Board Management Controller provisioning. Try reprovisioning the BMC. If problem persists, try power cycling the host and then the entire server including the BMC power. If problem persists, then contact next level of support. Maintenance_Action: None Inhibit_Alarms: False Alarm_Type: operational-violation Probable_Cause: unknown Service_Affecting: False Suppression: True Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: starlingx 200.015: Type: Alarm Description: Unable to read one or more sensor groups from this host's board management controller Entity_Instance_ID: host= Severity: major Proposed_Repair_Action: Check board management connectivity and try rebooting the board management controller. If problem persists, contact next level of support or lock and replace failing host. Maintenance_Action: None Inhibit_Alarms: False Alarm_Type: operational-violation Probable_Cause: unknown Service_Affecting: False Suppression: False Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: starlingx 200.020: Type: Log Description: |- has been 'discovered' on the network OR has been 'added' to the system OR has 'entered' multi-node failure avoidance OR has 'exited' multi-node failure avoidance Entity_Instance_ID: host=.event=discovered OR host=.event=add OR host=.event=mnfa_enter OR host=.event=mnfa_exit Severity: warning Alarm_Type: other Probable_Cause: unspecified-reason Service_Affecting: True Context: starlingx 200.021: Type: Log Description: |- board management controller has been 'provisioned' OR board management controller has been 're-provisioned' OR board management controller has been 'de-provisioned' OR manual 'unlock' request OR manual 'reboot' request OR manual 'reset' request OR manual 'power-off' request OR manual 'power-on' request OR manual 'reinstall' request OR manual 'force-lock' request OR manual 'delete' request OR manual 'controller switchover' request Entity_Instance_ID: |- host=.command=provision OR host=.command=reprovision OR host=.command=deprovision OR host=.command=unlock OR host=.command=reboot OR host=.command=reset OR host=.command=power-off OR host=.command=power-on OR host=.command=reinstall OR host=.command=force-lock OR host=.command=delete OR host=.command=swact Severity: warning Alarm_Type: other Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 200.022: Type: Log Description: |- is now 'disabled' OR is now 'enabled' OR is now 'online' OR is now 'offline' OR is 'disabled-failed' to the system OR reinstall failed OR reinstall completed successfully Entity_Instance_ID: |- host=.state=disabled OR host=.state=enabled OR host=.status=online OR host=.status=offline OR host=.status=failed OR host=.status=reinstall-failed OR host=.status=reinstall-complete Severity: warning Alarm_Type: other Probable_Cause: unspecified-reason Service_Affecting: True Context: starlingx #--------------------------------------------------------------------------- # BACKUP AND RESTORE #--------------------------------------------------------------------------- 210.001: Type: Alarm Description: System Backup in progress. Entity_Instance_ID: host=controller Severity: minor Proposed_Repair_Action: No action required. Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unspecified-reason Service_Affecting: False Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 210.002: Type: Alarm Description: System Restore in progress. Entity_Instance_ID: host=controller Severity: minor Proposed_Repair_Action: Run 'system restore-complete' to complete restore if running restore manually. Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unspecified-reason Service_Affecting: False Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx #--------------------------------------------------------------------------- # SYSTEM CONFIGURATION #--------------------------------------------------------------------------- 250.001: Type: Alarm Description: Configuration is out-of-date. Entity_Instance_ID: host= Severity: major Proposed_Repair_Action: Administratively lock and unlock to update config. Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unspecified-reason Service_Affecting: True Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 250.003: Type: Alarm Description: "Kubernetes certificates rotation failed on host[, reason = ]" Entity_Instance_ID: host= Severity: major Proposed_Repair_Action: Lock and unlock the host to update services with new certificates (Manually renew kubernetes certificates first if renewal failed). Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unspecified-reason Service_Affecting: False Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx #--------------------------------------------------------------------------- # DEPLOYMENT #--------------------------------------------------------------------------- 260.001: Type: Alarm Description: "Deployment resource not reconciled: " Entity_Instance_ID: resource=,name= Severity: major Proposed_Repair_Action: Monitor and if condition persists, validate deployment configuration. Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: configuration-out-of-date Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 260.002: Type: Alarm Description: "Deployment resource not synchronized: " Entity_Instance_ID: resource=,name= Severity: minor Proposed_Repair_Action: Monitor and if condition persists, validate deployment configuration. Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: configuration-out-of-date Service_Affecting: False Suppression: True Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: starlingx #--------------------------------------------------------------------------- # VM Compute Services #--------------------------------------------------------------------------- 270.101: Type: Log Description: "Host compute services failure[, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: none 270.102: Type: Log Description: Host compute services enabled Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: none 270.103: Type: Log Description: Host compute services disabled Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: none 275.001: Type: Log Description: Host hypervisor is now - Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: none #--------------------------------------------------------------------------- # DISTRIBUTED CLOUD #--------------------------------------------------------------------------- 280.001: Type: Alarm Description: is offline Entity_Instance_ID: subcloud= Severity: critical Proposed_Repair_Action: Wait for subcloud to become online; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: communication Probable_Cause: loss-of-signal Service_Affecting: False Suppression: False Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: starlingx 280.002: Type: Alarm Description: sync_status is out-of-sync Entity_Instance_ID: [subcloud=.resource=] Severity: major Proposed_Repair_Action: If problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: other Probable_Cause: application-subsystem-failure Service_Affecting: False Suppression: False Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: starlingx 280.003: Type: Alarm Description: Subcloud backup failure Entity_Instance_ID: subcloud= Severity: minor Proposed_Repair_Action: Retry subcloud backup after checking backup input file. If problem persists, contact next level of support. Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: unknown Service_Affecting: False Suppression: False Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: none #--------------------------------------------------------------------------- # NETWORK #--------------------------------------------------------------------------- 300.001: Type: Alarm Description: "'Data' Port failed." Entity_Instance_ID: host=.port= Severity: major Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment. Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: loss-of-signal Service_Affecting: True Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 300.002: Type: Alarm Description: |- 'Data' Interface degraded. OR 'Data' Interface failed. Entity_Instance_ID: host=.interface= Severity: [critical, major] Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment. Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: loss-of-signal Service_Affecting: True Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: critical Context: openstack 300.003: Type: Alarm Description: Networking Agent not responding. Entity_Instance_ID: host=.agent= Severity: major Proposed_Repair_Action: "If condition persists, attempt to clear issue by administratively locking and unlocking the Host." Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: underlying-resource-unavailable Service_Affecting: True Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 300.004: Type: Alarm Description: No enabled compute host with connectivity to provider network. Entity_Instance_ID: service=networking.providernet= Severity: major Proposed_Repair_Action: Enable compute hosts with required provider network connectivity. Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: underlying-resource-unavailable Service_Affecting: True Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 300.005: Type: Alarm Description: |- Communication failure detected over provider network x% for ranges y% on host z%. OR Communication failure detected over provider network x% on host z%. Entity_Instance_ID: host=.service=networking.providernet= Severity: major Proposed_Repair_Action: Check neighbor switch port VLAN assignments. Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: underlying-resource-unavailable Service_Affecting: True Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 300.010: Type: Alarm Description: |- ML2 Driver Agent non-reachable OR ML2 Driver Agent reachable but non-responsive OR ML2 Driver Agent authentication failure OR ML2 Driver Agent is unable to sync Neutron database Entity_Instance_ID: host=.ml2driver= Severity: major Proposed_Repair_Action: "Monitor and if condition persists, contact next level of support." Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: underlying-resource-unavailable Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 300.012: Type: Alarm Description: "Openflow Controller connection failed." Entity_Instance_ID: host=.openflow-controller= Severity: major Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment. Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: loss-of-signal Service_Affecting: True Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: critical Context: openstack 300.013: Type: Alarm Description: |- No active Openflow controller connections found for this network. OR One or more Openflow controller connections in disconnected state for this network. Entity_Instance_ID: host=.openflow-network= Severity: [critical, major] Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment. Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: loss-of-signal Service_Affecting: True Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: critical Context: openstack 300.014: Type: Alarm Description: "OVSDB Manager connection failed." Entity_Instance_ID: host=.sdn-controller= Severity: major Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment. Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: loss-of-signal Service_Affecting: True Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: critical Context: none 300.015: Type: Alarm Description: "No active OVSDB connections found." Entity_Instance_ID: host= Severity: critical Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment. Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: loss-of-signal Service_Affecting: True Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: critical Context: openstack 300.016: Type: Alarm Description: "Dynamic routing agent x% lost connectivity to peer y%." Entity_Instance_ID: host=,agent=,bgp-peer= Severity: major Proposed_Repair_Action: If condition persists, fix connectivity to peer. Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: loss-of-signal Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack #--------------------------------------------------------------------------- # HIGH AVAILABILITY #--------------------------------------------------------------------------- 400.001: Type: Alarm Description: |- Service group failure; . OR Service group degraded; . OR Service group warning; . Entity_Instance_ID: service_domain=.service_group=.host= Severity: [critical, major, minor] Proposed_Repair_Action: Contact next level of support. Maintenance_Action: Inhibit_Alarms: False Alarm_Type: processing-error Probable_Cause: underlying-resource-unavailable Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: major Context: starlingx 400.002: Type: Alarm Description: |- Service group loss of redundancy; expected standby member but only standby member available. OR Service group loss of redundancy; expected standby member but only standby member available. OR Service group loss of redundancy; expected active member but no active members available. OR Service group loss of redundancy; expected active member but only active member available. Entity_Instance_ID: service_domain=.service_group= Severity: major Proposed_Repair_Action: "Bring a controller node back in to service, otherwise contact next level of support." Maintenance_Action: Inhibit_Alarms: False Alarm_Type: processing-error Probable_Cause: underlying-resource-unavailable Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 400.003: Type: Alarm Description: |- License key is not installed; a valid license key is required for operation. OR License key has expired or is invalid; a valid license key is required for operation. OR Evaluation license key will expire on ; there are days remaining in this evaluation. OR Evaluation license key will expire on ; there is only 1 day remaining in this evaluation. Entity_Instance_ID: host= Severity: critical Proposed_Repair_Action: Contact next level of support to obtain a new license key. Maintenance_Action: Inhibit_Alarms: False Alarm_Type: processing-error Probable_Cause: key-expired Service_Affecting: True Suppression: False Management_Affecting_Severity: critical Degrade_Affecting_Severity: none Context: starlingx # 400.004: // NOTE Removed # Type: Alarm # Description: Service group software modification detected; . # Entity_Instance_ID: host= # Severity: major # Proposed_Repair_Action: Contact next level of support. # Maintenance_Action: # Inhibit_Alarms: False # Alarm_Type: processing-error # Probable_Cause: software-program-error # Service_Affecting: True # Suppression: False 400.005: Type: Alarm Description: |- Communication failure detected with peer over port . OR Communication failure detected with peer over port within the last 30 seconds. Entity_Instance_ID: host=.network= Severity: major Proposed_Repair_Action: Check cabling and far-end port configuration and status on adjacent equipment. Maintenance_Action: Inhibit_Alarms: False Alarm_Type: communication Probable_Cause: underlying-resource-unavailable Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx #--------------------------------------------------------------------------- # SM #--------------------------------------------------------------------------- 401.001: Type: Log Description: Service group state change from to on host Entity_Instance_ID: service_domain=.service_group=.host= Severity: critical Alarm_Type: processing-error Probable_Cause: unspecified-reason Service_Affecting: True Context: openstack 401.002: Type: Log Description: |- Service group loss of redundancy; expected standby member but no standby members available or Service group loss of redundancy; expected standby member but only standby member(s) available or Service group has no active members available; expected active member(s) or Service group loss of redundancy; expected active member(s) but only active member(s) available Entity_Instance_ID: service_domain=.service_group= Severity: critical Alarm_Type: processing-error Probable_Cause: unspecified-reason Service_Affecting: True Context: openstack 401.003: Type: Log Description: |- License key has expired or is invalid or Evaluation license key will expire on or License key is valid Entity_Instance_ID: host= Severity: critical Alarm_Type: processing-error Probable_Cause: unspecified-reason Service_Affecting: True Context: starlingx 401.005: Type: Log Description: |- Communication failure detected with peer over port on host or Communication failure detected with peer over port on host within the last seconds or Communication established with peer over port on host Entity_Instance_ID: host=.network= Severity: critical Alarm_Type: processing-error Probable_Cause: unspecified-reason Service_Affecting: True Context: starlingx 401.007: Type: Log Description: Swact or swact-force Entity_Instance_ID: host= Severity: critical Alarm_Type: processing-error Probable_Cause: unspecified-reason Service_Affecting: True Context: starlingx #--------------------------------------------------------------------------- # SECURITY #--------------------------------------------------------------------------- 500.100: Type: Alarm Description: TPM initialization failed on host. Entity_Instance_ID: host= Severity: major Proposed_Repair_Action: reinstall HTTPS certificate; if problem persists contact next level of support. Maintenance_Action: degrade Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: procedural-error Service_Affecting: True Suppression: False Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: none 500.101: Type: Alarm Description: Developer patch certificate enabled. Entity_Instance_ID: host=controller Severity: critical Proposed_Repair_Action: Reinstall system to disable developer certificate and remove untrusted patches. Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unspecified-reason Service_Affecting: False Suppression: False Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: starlingx 500.200: Type: Alarm Description: |- Certificate 'system certificate-show ' (mode=) expiring soon on . OR Certificate '/' expiring soon on . OR Certificate '' expiring soon on . Entity_Instance_ID: |- system.certificate.mode=.uuid= OR namespace=.certificate= OR namespace=.secret= OR system.certificate.k8sRootCA Severity: major Proposed_Repair_Action: Check certificate expiration time. Renew certificate for the entity identified. Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: certificate-expiration Service_Affecting: False Suppression: False Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: starlingx 500.210: Type: Alarm Description: |- Certificate 'system certificate-show ' (mode=) expired. OR Certificate '/' expired. OR Certificate '' expired. Entity_Instance_ID: |- system.certificate.mode=.uuid= OR namespace=.certificate= OR namespace=.secret= OR system.certificate.k8sRootCA Severity: critical Proposed_Repair_Action: Check certificate expiration time. Renew certificate for the entity identified. Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: certificate-expiration Service_Affecting: False Suppression: False Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: starlingx 500.500: Type: Log Description: "Host has IMA Appraisal failure for service when executing , reason = ]" Entity_Instance_ID: host=.service= Severity: major Alarm_Type: integrity-violation Probable_Cause: information-modification-detected Service_Affecting: False Context: none #--------------------------------------------------------------------------- # VM #--------------------------------------------------------------------------- 700.001: Type: Alarm Description: |- Instance owned by has failed on host Instance owned by has failed to schedule Entity_Instance_ID: tenant=.instance= Severity: critical Proposed_Repair_Action: The system will attempt recovery; no repair action required Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: software-error Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 700.002: Type: Alarm Description: Instance owned by is paused on host Entity_Instance_ID: tenant=.instance= Severity: critical Proposed_Repair_Action: Un-pause the instance Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: procedural-error Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 700.003: Type: Alarm Description: Instance owned by is suspended on host Entity_Instance_ID: tenant=.instance= Severity: critical Proposed_Repair_Action: Resume the instance Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: procedural-error Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 700.004: Type: Alarm Description: Instance owned by is stopped on host Entity_Instance_ID: tenant=.instance= Severity: critical Proposed_Repair_Action: Start the instance Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: procedural-error Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 700.005: Type: Alarm Description: Instance owned by is rebooting on host Entity_Instance_ID: tenant=.instance= Severity: critical Proposed_Repair_Action: Wait for reboot to complete; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 700.006: Type: Alarm Description: Instance owned by is rebuilding on host Entity_Instance_ID: tenant=.instance= Severity: critical Proposed_Repair_Action: Wait for rebuild to complete; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: underlying-resource-unavailable Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 700.007: Type: Alarm Description: Instance owned by is evacuating from host Entity_Instance_ID: tenant=.instance= Severity: critical Proposed_Repair_Action: Wait for evacuate to complete; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: underlying-resource-unavailable Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 700.008: Type: Alarm Description: Instance owned by is live migrating from host Entity_Instance_ID: tenant=.instance= Severity: warning Proposed_Repair_Action: Wait for live migration to complete; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 700.009: Type: Alarm Description: Instance owned by is cold migrating from host Entity_Instance_ID: tenant=.instance= Severity: critical Proposed_Repair_Action: Wait for cold migration to complete; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 700.010: Type: Alarm Description: Instance owned by has been cold-migrated to host waiting for confirmation Entity_Instance_ID: tenant=.instance= Severity: critical Proposed_Repair_Action: Confirm or revert cold-migrate of instance Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 700.011: Type: Alarm Description: Instance owned by is reverting cold migrate to host Entity_Instance_ID: tenant=.instance= Severity: critical Proposed_Repair_Action: "Wait for cold migration revert to complete; if problem persists contact next level of support" Maintenance_Action: Inhibit_Alarms: Alarm_Type: other Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 700.012: Type: Alarm Description: Instance owned by is resizing on host Entity_Instance_ID: tenant=.instance= Severity: critical Proposed_Repair_Action: Wait for resize to complete; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 700.013: Type: Alarm Description: Instance owned by has been resized on host waiting for confirmation Entity_Instance_ID: itenant=.instance= Severity: critical Proposed_Repair_Action: Confirm or revert resize of instance Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 700.014: Type: Alarm Description: Instance owned by is reverting resize on host Entity_Instance_ID: tenant=.instance= Severity: critical Proposed_Repair_Action: "Wait for resize revert to complete; if problem persists contact next level of support" Maintenance_Action: Inhibit_Alarms: Alarm_Type: other Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 700.015: Type: Alarm Description: Guest Heartbeat not established for instance owned by on host Entity_Instance_ID: tenant=.instance= Severity: major Proposed_Repair_Action: "Verify that the instance is running the Guest-Client daemon, or disable Guest Heartbeat for the instance if no longer needed, otherwise contact next level of support" Maintenance_Action: Inhibit_Alarms: Alarm_Type: communication Probable_Cause: procedural-error Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: none 700.016: Type: Alarm Description: Multi-Node Recovery Mode Entity_Instance_ID: subsystem=vim Severity: minor Proposed_Repair_Action: "Wait for the system to exit out of this mode" Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 700.017: Type: Alarm Description: Server group policy was not satisfied Entity_Instance_ID: server-group Severity: minor Proposed_Repair_Action: "Migrate instances in an attempt to satisfy the policy; if problem persists contact next level of support" Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: procedural-error Service_Affecting: True Suppression: True Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: openstack 700.101: Type: Log Description: Instance is enabled on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.102: Type: Log Description: Instance owned by has failed[, reason = ] Instance owned by has failed to schedule[, reason = ] Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.103: Type: Log Description: Create issued |by the system> against owned by Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.104: Type: Log Description: Creating instance owned by Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.105: Type: Log Description: "Create rejected for instance [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.106: Type: Log Description: "Create cancelled for instance [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.107: Type: Log Description: "Create failed for instance [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.108: Type: Log Description: Inance owned by has been created Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.109: Type: Log Description: "Delete issued |by the system> against instance owned by on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.110: Type: Log Description: Deleting instance owned by Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.111: Type: Log Description: "Delete rejected for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.112: Type: Log Description: "Delete cancelled for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.113: Type: Log Description: "Delete failed for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.114: Type: Log Description: Deleted instance owned by Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.115: Type: Log Description: "Pause issued |by the system> against instance owned by on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.116: Type: Log Description: Pause in progress for instance on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.117: Type: Log Description: "Pause rejected for instance enabled on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.118: Type: Log Description: "Pause cancelled for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.119: Type: Log Description: "Pause failed for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.120: Type: Log Description: Pause complete for instance now paused on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.121: Type: Log Description: "Un-pause issued |by the system> against instance owned by on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.122: Type: Log Description: Un-pause in-progress for instance on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.123: Type: Log Description: "Un-pause rejected for instance paused on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.124: Type: Log Description: "Un-pause cancelled for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.125: Type: Log Description: "Un-pause failed for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.126: Type: Log Description: Un-pause complete for instance now enabled on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.127: Type: Log Description: "Suspend issued |by the system> against instance owned by on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.128: Type: Log Description: Suspend in-progress for instance on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.129: Type: Log Description: "Suspend rejected for instance enabled on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.130: Type: Log Description: "Suspend cancelled for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.131: Type: Log Description: "Suspend failed for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.132: Type: Log Description: Suspend complete for instance now suspended on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.133: Type: Log Description: "Resume issued |by the system> against instance owned by on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.134: Type: Log Description: Resume in-progress for instance on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.135: Type: Log Description: "Resume rejected for instance suspended on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.136: Type: Log Description: "Resume cancelled for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.137: Type: Log Description: "Resume failed for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.138: Type: Log Description: Resume complete for instance now enabled on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.139: Type: Log Description: "Start issued |by the system> against instance owned by on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.140: Type: Log Description: Start in-progress for instance on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.141: Type: Log Description: "Start rejected for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.142: Type: Log Description: "Start cancelled for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.143: Type: Log Description: "Start failed for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.144: Type: Log Description: Start complete for instance now enabled on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.145: Type: Log Description: "Stop issued |by the system|by the instance> against instance owned by on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.146: Type: Log Description: Stop in progress for instance on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.147: Type: Log Description: "Stop rejected for instance enabled on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.148: Type: Log Description: "Stop cancelled for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.149: Type: Log Description: "Stop failed for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.150: Type: Log Description: Stop complete for instance now disabled on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.151: Type: Log Description: "Live-Migrate issued |by the system> against instance owned by from host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.152: Type: Log Description: Live-Migrate in progress for instance from host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.153: Type: Log Description: "Live-Migrate rejected for instance now on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.154: Type: Log Description: "Live-Migrate cancelled for instance now on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.155: Type: Log Description: "Live-Migrate failed for instance now on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.156: Type: Log Description: Live-Migrate complete for instance now enabled on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.157: Type: Log Description: "Cold-Migrate issued |by the system> against instance owned by from host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.158: Type: Log Description: Cold-Migrate in progress for instance from host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.159: Type: Log Description: "Cold-Migrate rejected for instance now on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.160: Type: Log Description: "Cold-Migrate cancelled for instance now on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.161: Type: Log Description: "Cold-Migrate failed for instance now on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.162: Type: Log Description: Cold-Migrate complete for instance now enabled on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.163: Type: Log Description: "Cold-Migrate-Confirm issued |by the system> against instance owned by on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.164: Type: Log Description: Cold-Migrate-Confirm in progress for instance on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.165: Type: Log Description: "Cold-Migrate-Confirm rejected for instance now enabled on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.166: Type: Log Description: "Cold-Migrate-Confirm cancelled for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.167: Type: Log Description: "Cold-Migrate-Confirm failed for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.168: Type: Log Description: Cold-Migrate-Confirm complete for instance enabled on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.169: Type: Log Description: "Cold-Migrate-Revert issued |by the system> against instance owned by on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.170: Type: Log Description: Cold-Migrate-Revert in progress for instance from host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.171: Type: Log Description: "Cold-Migrate-Revert rejected for instance now on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.172: Type: Log Description: "Cold-Migrate-Revert cancelled for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.173: Type: Log Description: "Cold-Migrate-Revert failed for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.174: Type: Log Description: Cold-Migrate-Revert complete for instance now enabled on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.175: Type: Log Description: "Evacuate issued |by the system> against instance owned by on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.176: Type: Log Description: Evacuating instance owned by from host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.177: Type: Log Description: "Evacuate rejected for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.178: Type: Log Description: "Evacuate cancelled for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.179: Type: Log Description: "Evacuate failed for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.180: Type: Log Description: Evacuate complete for instance now enabled on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.181: Type: Log Description: "Reboot <(soft-reboot)|(hard-reboot)> issued |by the system|by the instance> against instance owned by on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.182: Type: Log Description: Reboot in progress for instance on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.183: Type: Log Description: "Reboot rejected for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.184: Type: Log Description: "Reboot cancelled for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.185: Type: Log Description: "Reboot failed for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.186: Type: Log Description: Reboot complete for instance now enabled on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.187: Type: Log Description: "Rebuild issued |by the system> against instance using image on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.188: Type: Log Description: Rebuild in progress for instance on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.189: Type: Log Description: "Rebuild rejected for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.190: Type: Log Description: "Rebuild cancelled for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.191: Type: Log Description: "Rebuild failed for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.192: Type: Log Description: Rebuild complete for instance now enabled on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.193: Type: Log Description: "Resize issued |by the system> against instance owned by on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.194: Type: Log Description: Resize in progress for instance on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.195: Type: Log Description: "Resize rejected for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.196: Type: Log Description: "Resize cancelled for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.197: Type: Log Description: "Resize failed for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.198: Type: Log Description: Resize complete for instance enabled on host waiting for confirmation Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.199: Type: Log Description: "Resize-Confirm issued |by the system> against instance owned by on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.200: Type: Log Description: Resize-Confirm in progress for instance on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.201: Type: Log Description: "Resize-Confirm rejected for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.202: Type: Log Description: "Resize-Confirm cancelled for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.203: Type: Log Description: "Resize-Confirm failed for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.204: Type: Log Description: Resize-Confirm complete for instance enabled on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.205: Type: Log Description: "Resize-Revert issued |by the system> against instance owned by on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.206: Type: Log Description: Resize-Revert in progress for instance on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.207: Type: Log Description: "Resize-Revert rejected for instance owned by on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.208: Type: Log Description: "Resize-Revert cancelled for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.209: Type: Log Description: "Resize-Revert failed for instance on host [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.210: Type: Log Description: Resize-Revert complete for instance enabled on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.211: Type: Log Description: Guest Heartbeat established for instance on host Entity_Instance_ID: tenant=.instance= Severity: major Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: none 700.212: Type: Log Description: Guest Heartbeat disconnected for instance on host Entity_Instance_ID: tenant=.instance= Severity: major Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: none 700.213: Type: Log Description: "Guest Heartbeat failed for instance [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: none 700.214: Type: Log Description: Instance has been renamed to owned by on host Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.215: Type: Log Description: "Guest Health Check failed for instance [, reason = ]" Entity_Instance_ID: tenant=.instance= Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.216: Type: Log Description: "Entered Multi-Node Recovery Mode" Entity_Instance_ID: subsystem=vim Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack 700.217: Type: Log Description: "Exited Multi-Node Recovery Mode" Entity_Instance_ID: subsystem=vim Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: openstack #--------------------------------------------------------------------------- # APPLICATION #--------------------------------------------------------------------------- 750.001: Type: Alarm Description: "Application Upload Failure" Entity_Instance_ID: k8s_application= Severity: warning Proposed_Repair_Action: "Check system inventory log for cause." Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: unknown Service_Affecting: False Suppression: True Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: starlingx 750.002: Type: Alarm Description: "Application Apply Failure" Entity_Instance_ID: k8s_application= Severity: major Proposed_Repair_Action: "Retry applying the application. Check application is managed by the system application framework. If the issue persists, please check system inventory log for cause." Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: unknown Service_Affecting: True Suppression: True Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: starlingx 750.003: Type: Alarm Description: "Application Remove Failure" Entity_Instance_ID: k8s_application= Severity: major Proposed_Repair_Action: "Retry removing the application. If the issue persists, please check system inventory log for cause." Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: unknown Service_Affecting: True Suppression: True Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: starlingx 750.004: Type: Alarm Description: "Application Apply In Progress" Entity_Instance_ID: k8s_application= Severity: warning Proposed_Repair_Action: "No action required." Maintenance_Action: Inhibit_Alarms: Alarm_Type: other Probable_Cause: unknown Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 750.005: Type: Alarm Description: "Application Update In Progress" Entity_Instance_ID: k8s_application= Severity: warning Proposed_Repair_Action: "No action required." Maintenance_Action: Inhibit_Alarms: Alarm_Type: other Probable_Cause: unknown Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 750.006: Type: Alarm Description: "Automatic Application Re-Apply Is Pending" Entity_Instance_ID: k8s_application= Severity: warning Proposed_Repair_Action: "Ensure all hosts are either locked or unlocked. When the system is stable the application will be automatically reapplied." Maintenance_Action: Inhibit_Alarms: Alarm_Type: other Probable_Cause: unknown Service_Affecting: False Suppression: True Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: starlingx #--------------------------------------------------------------------------- # STORAGE #--------------------------------------------------------------------------- 800.001: Type: Alarm Description: |- Possible data loss. Any mds, mon or osd is unavailable in storage replication group. Entity_Instance_ID: cluster= Severity: [critical, major] Proposed_Repair_Action: "Manually restart Ceph processes and check the state of the Ceph cluster with 'ceph -s' If problem persists, contact next level of support." Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: equipment-malfunction Service_Affecting: critical: True major: False Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 800.010: Type: Alarm Description: |- Potential data loss. No available OSDs in storage replication group. Entity_Instance_ID: cluster=.peergroup= Severity: [critical] Proposed_Repair_Action: "Ensure storage hosts from replication group are unlocked and available. Check replication group state with 'system host-list' Check if OSDs of each storage host are up and running. Manually restart Ceph processes and check the state of the Ceph OSDs with 'ceph osd stat' OR 'ceph osd tree' If problem persists, contact next level of support." Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: equipment-malfunction Service_Affecting: critical: True Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 800.011: Type: Alarm Description: |- Loss of replication in peergroup. Entity_Instance_ID: cluster=.peergroup= Severity: [major] Proposed_Repair_Action: "Ensure storage hosts from replication group are unlocked and available. Check replication group state with 'system host-list' Check if OSDs of each storage host are up and running. Manually restart Ceph processes and check the state of the Ceph OSDs with 'ceph osd stat' AND/OR 'ceph osd tree' If problem persists, contact next level of support." Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: equipment-malfunction Service_Affecting: major: True Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 800.002: Type: Alarm Description: |- Image storage media is full: There is not enough disk space on the image storage media. OR Instance snapshot failed: There is not enough disk space on the image storage media. OR Supplied () and generated from uploaded image () did not match. Setting image status to 'killed'. OR Error in store configuration. Adding images to store is disabled. OR Forbidden upload attempt: . OR Insufficient permissions on image storage media: . OR Denying attempt to upload image larger than bytes. OR Denying attempt to upload image because it exceeds the quota: . OR Received HTTP error while uploading image . OR Client disconnected before sending all data to backend. OR Failed to upload image . Entity_Instance_ID: image= instance= OR tenant= instance= OR image= instance= OR image= instance= OR image= instance= OR image= instance= OR image= instance= OR image= instance= OR image= instance= OR image= instance= OR image= instance= Alarm_Type: [physical-violation, physical-violation, integrity-violation, integrity-violation, security-service-or-mechanism-violation, security-service-or-mechanism-violation, security-service-or-mechanism-violation, security-service-or-mechanism-violation, communication, communication, operational-violation] Severity: warning Proposed_Repair_Action: Maintenance_Action: Inhibit_Alarms: Probable_Cause: unspecified-reason Service_Affecting: False Suppression: False Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: openstack 800.100: Type: Alarm Description: |- Storage Alarm Condition: Cinder I/O Congestion is above normal range and is building Entity_Instance_ID: cinder_io_monitor Severity: major Proposed_Repair_Action: "Reduce the I/O load on the Cinder LVM backend. Use Cinder QoS mechanisms on high usage volumes." Maintenance_Action: Inhibit_Alarms: Alarm_Type: qos Probable_Cause: congestion Service_Affecting: False Suppression: False Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: openstack 800.101: Type: Alarm Description: |- Storage Alarm Condition: Cinder I/O Congestion is high and impacting guest performance Entity_Instance_ID: cinder_io_monitor Severity: critical Proposed_Repair_Action: "Reduce the I/O load on the Cinder LVM backend. Cinder actions may fail until congestion is reduced. Use Cinder QoS mechanisms on high usage volumes." Maintenance_Action: Inhibit_Alarms: Alarm_Type: qos Probable_Cause: congestion Service_Affecting: False Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: openstack 800.104: Type: Alarm Description: |- Storage Alarm Condition: configuration failed to apply on host: . Entity_Instance_ID: storage_backend= Severity: critical Proposed_Repair_Action: "Update backend setting to reapply configuration. Use the following commands to try again: 'system storage-backend-delete ' AND 'system storage-backend-add ' See the |prod-long| documentation at |docs-url| for more details. If problem persists, contact next level of support." Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: configuration-or-customization-error Service_Affecting: True Suppression: False Management_Affecting_Severity: major Degrade_Affecting_Severity: none Context: starlingx #--------------------------------------------------------------------------- # KUBERNETES #--------------------------------------------------------------------------- 850.001: Type: Alarm Description: Persistent Volume Migration Error Entity_Instance_ID: kubernetes=PV-migration-failed Severity: major Proposed_Repair_Action: "Manually execute /usr/bin/ceph_k8s_update_monitors.sh to confirm PVs are updated, then lock/unlock to clear alarms. If problem persists, contact next level of support." Maintenance_Action: Inhibit_Alarms: Alarm_Type: processing-error Probable_Cause: communication-subsystem-failure Service_Affecting: False Suppression: False Management_Affecting_Severity: none Degrade_Affecting_Severity: none Context: none #--------------------------------------------------------------------------- # SOFTWARE #--------------------------------------------------------------------------- 900.001: Type: Alarm Description: Patching operation in progress. Entity_Instance_ID: host=controller Severity: minor Proposed_Repair_Action: Complete reboots of affected hosts. Maintenance_Action: Inhibit_Alarms: Alarm_Type: environmental Probable_Cause: unspecified-reason Service_Affecting: False Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.002: Type: Alarm Description: Patch host install failure. Command "sw-patch host-install" failed. Entity_Instance_ID: host= Severity: major Proposed_Repair_Action: Undo patching operation. Check patch logs on the target host (i.e. /var/log/patching.log) Maintenance_Action: Inhibit_Alarms: Alarm_Type: environmental Probable_Cause: unspecified-reason Service_Affecting: False Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.003: Type: Alarm Description: A patch with state 'obsolete' in its metadata has been uploaded. Entity_Instance_ID: host=controller Severity: warning Proposed_Repair_Action: Remove and delete obsolete patches. Maintenance_Action: Inhibit_Alarms: Alarm_Type: environmental Probable_Cause: unspecified-reason Service_Affecting: False Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.004: Type: Alarm Description: The upgrade and running software version do not match. Command host-upgrade failed. Entity_Instance_ID: host= Severity: major Proposed_Repair_Action: Reinstall host to update applied load. Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unspecified-reason Service_Affecting: True Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.005: Type: Alarm Description: System Upgrade in progress. Entity_Instance_ID: host=controller Severity: minor Proposed_Repair_Action: No action required. Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unspecified-reason Service_Affecting: False Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.006: Type: Alarm Description: Device image update operation in progress. Entity_Instance_ID: host=controller Severity: minor Proposed_Repair_Action: Complete reboots of affected hosts. Maintenance_Action: Inhibit_Alarms: Alarm_Type: environmental Probable_Cause: unspecified-reason Service_Affecting: False Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.007: Type: Alarm Description: Kubernetes upgrade in progress. Entity_Instance_ID: host=controller Severity: minor Proposed_Repair_Action: No action required. Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unspecified-reason Service_Affecting: False Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.008: Type: Alarm Description: Kubernetes rootca update in progress Entity_Instance_ID: host=controller Severity: minor Proposed_Repair_Action: Wait for kubernetes rootca procedure to complete Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unspecified-reason Service_Affecting: False Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.009: Type: Alarm Description: Kubernetes root CA update aborted, certificates may not be fully updated. Command "system kube-rootca-update-abort" has been run. Entity_Instance_ID: host=controller Severity: minor Proposed_Repair_Action: Fully update certificates by a new root CA update. Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unspecified-reason Service_Affecting: False Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.010: Type: Alarm Description: System Config update in progress Entity_Instance_ID: host=controller Severity: minor Proposed_Repair_Action: Wait for system config update to complete Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unspecified-reason Service_Affecting: False Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.011: Type: Alarm Description: System Config update aborted, configurations may not be fully updated Entity_Instance_ID: host= Severity: minor Proposed_Repair_Action: Lock the host, wait for the host resource in the deployment namespace to become in-sync, then unlock the host Maintenance_Action: Inhibit_Alarms: Alarm_Type: operational-violation Probable_Cause: unspecified-reason Service_Affecting: False Suppression: False Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.101: Type: Alarm Description: Software patch auto-apply in progress Entity_Instance_ID: orchestration=sw-patch Severity: major Proposed_Repair_Action: Wait for software patch auto-apply to complete; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.102: Type: Alarm Description: Software patch auto-apply aborting Entity_Instance_ID: orchestration=sw-patch Severity: major Proposed_Repair_Action: Wait for software patch auto-apply abort to complete; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.103: Type: Alarm Description: Software patch auto-apply failed. Command "sw-manager patch-strategy apply" failed. Entity_Instance_ID: orchestration=sw-patch Severity: critical Proposed_Repair_Action: Attempt to apply software patches manually; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: underlying-resource-unavailable Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.111: Type: Log Description: Software patch auto-apply start Entity_Instance_ID: orchestration=sw-patch Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.112: Type: Log Description: Software patch auto-apply in progress Entity_Instance_ID: orchestration=sw-patch Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.113: Type: Log Description: Software patch auto-apply rejected Entity_Instance_ID: orchestration=sw-patch Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.114: Type: Log Description: Software patch auto-apply cancelled Entity_Instance_ID: orchestration=sw-patch Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.115: Type: Log Description: Software patch auto-apply failed Entity_Instance_ID: orchestration=sw-patch Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.116: Type: Log Description: Software patch auto-apply completed Entity_Instance_ID: orchestration=sw-patch Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.117: Type: Log Description: Software patch auto-apply abort Entity_Instance_ID: orchestration=sw-patch Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.118: Type: Log Description: Software patch auto-apply aborting Entity_Instance_ID: orchestration=sw-patch Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.119: Type: Log Description: Software patch auto-apply abort rejected Entity_Instance_ID: orchestration=sw-patch Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.120: Type: Log Description: Software patch auto-apply abort failed Entity_Instance_ID: orchestration=sw-patch Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.121: Type: Log Description: Software patch auto-apply aborted Entity_Instance_ID: orchestration=sw-patch Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.201: Type: Alarm Description: Software upgrade auto-apply in progress Entity_Instance_ID: orchestration=sw-upgrade Severity: major Proposed_Repair_Action: Wait for software upgrade auto-apply to complete; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.202: Type: Alarm Description: Software upgrade auto-apply aborting Entity_Instance_ID: orchestration=sw-upgrade Severity: major Proposed_Repair_Action: Wait for software upgrade auto-apply abort to complete; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.203: Type: Alarm Description: Software upgrade auto-apply failed. Command "sw-manager update-strategy apply" failed Entity_Instance_ID: orchestration=sw-upgrade Severity: critical Proposed_Repair_Action: Attempt to apply software upgrade manually; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: underlying-resource-unavailable Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.211: Type: Log Description: Software upgrade auto-apply start Entity_Instance_ID: orchestration=sw-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.212: Type: Log Description: Software upgrade auto-apply in progress Entity_Instance_ID: orchestration=sw-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.213: Type: Log Description: Software upgrade auto-apply rejected Entity_Instance_ID: orchestration=sw-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.214: Type: Log Description: Software upgrade auto-apply cancelled Entity_Instance_ID: orchestration=sw-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.215: Type: Log Description: Software upgrade auto-apply failed Entity_Instance_ID: orchestration=sw-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.216: Type: Log Description: Software upgrade auto-apply completed Entity_Instance_ID: orchestration=sw-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.217: Type: Log Description: Software upgrade auto-apply abort Entity_Instance_ID: orchestration=sw-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.218: Type: Log Description: Software upgrade auto-apply aborting Entity_Instance_ID: orchestration=sw-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.219: Type: Log Description: Software upgrade auto-apply abort rejected Entity_Instance_ID: orchestration=sw-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.220: Type: Log Description: Software upgrade auto-apply abort failed Entity_Instance_ID: orchestration=sw-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.221: Type: Log Description: Software upgrade auto-apply aborted Entity_Instance_ID: orchestration=sw-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.301: Type: Alarm Description: Firmware Update auto-apply in progress Entity_Instance_ID: orchestration=fw-update Severity: major Proposed_Repair_Action: Wait for firmware update auto-apply to complete; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.302: Type: Alarm Description: Firmware Update auto-apply aborting Entity_Instance_ID: orchestration=fw-update Severity: major Proposed_Repair_Action: Wait for firmware update auto-apply abort to complete; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.303: Type: Alarm Description: Firmware Update auto-apply failed. Command "sw-manager kube-rootca-update-strategy apply" failed. Entity_Instance_ID: orchestration=fw-update Severity: critical Proposed_Repair_Action: Attempt to apply firmware update manually; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: underlying-resource-unavailable Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.311: Type: Log Description: Firmware update auto-apply start Entity_Instance_ID: orchestration=fw-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.312: Type: Log Description: Firmware update auto-apply in progress Entity_Instance_ID: orchestration=fw-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.313: Type: Log Description: Firmware update auto-apply rejected Entity_Instance_ID: orchestration=fw-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.314: Type: Log Description: Firmware update auto-apply cancelled Entity_Instance_ID: orchestration=fw-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.315: Type: Log Description: Firmware update auto-apply failed Entity_Instance_ID: orchestration=fw-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.316: Type: Log Description: Firmware update auto-apply completed Entity_Instance_ID: orchestration=fw-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.317: Type: Log Description: Firmware update auto-apply abort Entity_Instance_ID: orchestration=fw-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.318: Type: Log Description: Firmware update auto-apply aborting Entity_Instance_ID: orchestration=fw-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.319: Type: Log Description: Firmware update auto-apply abort rejected Entity_Instance_ID: orchestration=fw-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.320: Type: Log Description: Firmware update auto-apply abort failed Entity_Instance_ID: orchestration=fw-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.321: Type: Log Description: Firmware update auto-apply aborted Entity_Instance_ID: orchestration=fw-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.401: Type: Alarm Description: Kubernetes upgrade auto-apply in progress Entity_Instance_ID: orchestration=kube-upgrade Severity: major Proposed_Repair_Action: Wait for kubernetes upgrade auto-apply to complete; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: none 900.402: Type: Alarm Description: Kubernetes upgrade auto-apply aborting Entity_Instance_ID: orchestration=kube-upgrade Severity: major Proposed_Repair_Action: Wait for kubernetes upgrade auto-apply abort to complete; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: none 900.403: Type: Alarm Description: Kubernetes upgrade auto-apply failed Entity_Instance_ID: orchestration=kube-upgrade Severity: critical Proposed_Repair_Action: Attempt to apply kubernetes upgrade manually; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: underlying-resource-unavailable Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: none 900.411: Type: Log Description: Kubernetes upgrade auto-apply start Entity_Instance_ID: orchestration=kube-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: none 900.412: Type: Log Description: Kubernetes upgrade auto-apply in progress Entity_Instance_ID: orchestration=kube-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: none 900.413: Type: Log Description: Kubernetes upgrade auto-apply rejected Entity_Instance_ID: orchestration=kube-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: none 900.414: Type: Log Description: Kubernetes upgrade auto-apply cancelled Entity_Instance_ID: orchestration=kube-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: none 900.415: Type: Log Description: Kubernetes upgrade auto-apply failed Entity_Instance_ID: orchestration=kube-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: none 900.416: Type: Log Description: Kubernetes upgrade auto-apply completed Entity_Instance_ID: orchestration=kube-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: none 900.417: Type: Log Description: Kubernetes upgrade auto-apply abort Entity_Instance_ID: orchestration=kube-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: none 900.418: Type: Log Description: Kubernetes upgrade auto-apply aborting Entity_Instance_ID: orchestration=kube-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: none 900.419: Type: Log Description: Kubernetes upgrade auto-apply abort rejected Entity_Instance_ID: orchestration=kube-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: none 900.420: Type: Log Description: Kubernetes upgrade auto-apply abort failed Entity_Instance_ID: orchestration=kube-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: none 900.421: Type: Log Description: Kubernetes upgrade auto-apply aborted Entity_Instance_ID: orchestration=kube-upgrade Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: none 900.501: Type: Alarm Description: Kubernetes rootca update auto-apply in progress Entity_Instance_ID: orchestration=kube-rootca-update Severity: major Proposed_Repair_Action: Wait for kubernetes rootca update auto-apply to complete; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.502: Type: Alarm Description: Kubernetes rootca update auto-apply aborting Entity_Instance_ID: orchestration=kube-rootca-update Severity: major Proposed_Repair_Action: Wait for kubernetes rootca update auto-apply abort to complete; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.503: Type: Alarm Description: Kubernetes rootca update auto-apply failed. Command "sw-manager kube-upgrade-strategy apply" failed. Entity_Instance_ID: orchestration=kube-rootca-update Severity: critical Proposed_Repair_Action: Attempt to apply kubernetes rootca update manually; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: underlying-resource-unavailable Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.511: Type: Log Description: Kubernetes rootca update auto-apply start Entity_Instance_ID: orchestration=kube-rootca-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.512: Type: Log Description: Kubernetes rootca update auto-apply in progress Entity_Instance_ID: orchestration=kube-rootca-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.513: Type: Log Description: Kubernetes rootca update auto-apply rejected Entity_Instance_ID: orchestration=kube-rootca-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.514: Type: Log Description: Kubernetes rootca update auto-apply cancelled Entity_Instance_ID: orchestration=kube-rootca-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.515: Type: Log Description: Kubernetes rootca update auto-apply failed Entity_Instance_ID: orchestration=kube-rootca-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.516: Type: Log Description: Kubernetes rootca update auto-apply completed Entity_Instance_ID: orchestration=kube-rootca-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.517: Type: Log Description: Kubernetes rootca update auto-apply abort Entity_Instance_ID: orchestration=kube-rootca-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.518: Type: Log Description: Kubernetes rootca update auto-apply aborting Entity_Instance_ID: orchestration=kube-rootca-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.519: Type: Log Description: Kubernetes rootca update auto-apply abort rejected Entity_Instance_ID: orchestration=kube-rootca-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.520: Type: Log Description: Kubernetes rootca update auto-apply abort failed Entity_Instance_ID: orchestration=kube-rootca-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.521: Type: Log Description: Kubernetes rootca update auto-apply aborted Entity_Instance_ID: orchestration=kube-rootca-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.601: Type: Alarm Description: System config update auto-apply in progress Entity_Instance_ID: orchestration=system-config-update Severity: major Proposed_Repair_Action: Wait for system config update auto-apply to complete; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.602: Type: Alarm Description: System config update auto-apply aborting Entity_Instance_ID: orchestration=system-config-update Severity: major Proposed_Repair_Action: Wait for system config update auto-apply abort to complete; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.603: Type: Alarm Description: System config update auto-apply failed. Command "sw-manager kube-upgrade-strategy apply" failed Entity_Instance_ID: orchestration=system-config-update Severity: critical Proposed_Repair_Action: Attempt to apply system config update manually; if problem persists contact next level of support Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment Probable_Cause: underlying-resource-unavailable Service_Affecting: True Suppression: True Management_Affecting_Severity: warning Degrade_Affecting_Severity: none Context: starlingx 900.611: Type: Log Description: System config update auto-apply start Entity_Instance_ID: orchestration=system-config-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.612: Type: Log Description: System config update auto-apply in progress Entity_Instance_ID: orchestration=system-config-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.613: Type: Log Description: System config update auto-apply rejected Entity_Instance_ID: orchestration=system-config-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.614: Type: Log Description: System config update auto-apply cancelled Entity_Instance_ID: orchestration=system-config-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.615: Type: Log Description: System config update auto-apply failed Entity_Instance_ID: orchestration=system-config-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.616: Type: Log Description: System config update auto-apply completed Entity_Instance_ID: orchestration=system-config-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.617: Type: Log Description: System config update auto-apply abort Entity_Instance_ID: orchestration=system-config-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.618: Type: Log Description: System config update auto-apply aborting Entity_Instance_ID: orchestration=system-config-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.619: Type: Log Description: System config update auto-apply abort rejected Entity_Instance_ID: orchestration=system-config-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.620: Type: Log Description: System config update auto-apply abort failed Entity_Instance_ID: orchestration=system-config-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx 900.621: Type: Log Description: System config update auto-apply aborted Entity_Instance_ID: orchestration=system-config-update Severity: critical Alarm_Type: equipment Probable_Cause: unspecified-reason Service_Affecting: False Context: starlingx ...