diff --git a/fm-doc/fm_doc/events.yaml b/fm-doc/fm_doc/events.yaml index 72c9fdbf..f16037c5 100755 --- a/fm-doc/fm_doc/events.yaml +++ b/fm-doc/fm_doc/events.yaml @@ -3109,11 +3109,12 @@ 800.001: Type: Alarm Description: |- - Storage Alarm Condition: - 1 mons down, quorum 1,2 controller-1,storage-0 + Possible data loss. Any mds, mon or osd is unavailable in storage replication group. Entity_Instance_ID: cluster= Severity: [critical, major] - Proposed_Repair_Action: "If problem persists, contact next level of support." + Proposed_Repair_Action: "Manually restart Ceph processes and check the state of the Ceph cluster with + 'ceph -s' + If problem persists, contact next level of support." Maintenance_Action: Inhibit_Alarms: Alarm_Type: equipment @@ -3133,7 +3134,10 @@ Entity_Instance_ID: cluster=.peergroup= Severity: [critical] Proposed_Repair_Action: "Ensure storage hosts from replication group are unlocked and available. + Check replication group state with 'system host-list' Check if OSDs of each storage host are up and running. + Manually restart Ceph processes and check the state of the Ceph OSDs with + 'ceph osd stat' OR 'ceph osd tree' If problem persists, contact next level of support." Maintenance_Action: Inhibit_Alarms: @@ -3153,7 +3157,10 @@ Entity_Instance_ID: cluster=.peergroup= Severity: [major] Proposed_Repair_Action: "Ensure storage hosts from replication group are unlocked and available. + Check replication group state with 'system host-list' Check if OSDs of each storage host are up and running. + Manually restart Ceph processes and check the state of the Ceph OSDs with + 'ceph osd stat' AND/OR 'ceph osd tree' If problem persists, contact next level of support." Maintenance_Action: Inhibit_Alarms: @@ -3282,6 +3289,9 @@ Entity_Instance_ID: .lvmthinpool=/ Severity: critical Proposed_Repair_Action: "Increase Storage Space Allotment for Cinder on the 'lvm' backend. + Try the following commands: + 'vgextend ' or 'vgextend -L + + Check status with 'vgdisplay' Consult the System Administration Manual for more details. If problem persists, contact next level of support." Maintenance_Action: @@ -3302,6 +3312,10 @@ Entity_Instance_ID: storage_backend= Severity: critical Proposed_Repair_Action: "Update backend setting to reapply configuration. + Use the following commands to try again: + 'system storage-backend-delete ' + AND + 'system storage-backend-add ' Consult the System Administration Manual for more details. If problem persists, contact next level of support." Maintenance_Action: