diff --git a/base/lighttpd/centos/build_srpm.data b/base/lighttpd/centos/build_srpm.data index f057a5a58..6d94d3e28 100755 --- a/base/lighttpd/centos/build_srpm.data +++ b/base/lighttpd/centos/build_srpm.data @@ -6,4 +6,4 @@ COPY_LIST="lighttpd-1.4.35/index.html.lighttpd \ lighttpd-1.4.35/lighttpd-csr.conf \ lighttpd-1.4.35/check-content-length.patch \ lighttpd-1.4.35/lighttpd-tpm-support.patch" -TIS_PATCH_VER=5 +TIS_PATCH_VER=6 diff --git a/base/lighttpd/lighttpd-1.4.35/lighttpd.conf b/base/lighttpd/lighttpd-1.4.35/lighttpd.conf index 48ada9b6d..31b294800 100755 --- a/base/lighttpd/lighttpd-1.4.35/lighttpd.conf +++ b/base/lighttpd/lighttpd-1.4.35/lighttpd.conf @@ -243,6 +243,9 @@ $HTTP["url"] !~ "^/(rel-[^/]*|feed|updates|static)/" { # ".cgi" => "/usr/bin/perl" ) # +#### Listen to IPv6 +$SERVER["socket"] == "[::]:80" { } + #### status module #status.status-url = "/server-status" #status.config-url = "/server-config" diff --git a/base/rsync/centos/build_srpm.data b/base/rsync/centos/build_srpm.data index 2c93764a1..69cb924ed 100644 --- a/base/rsync/centos/build_srpm.data +++ b/base/rsync/centos/build_srpm.data @@ -1,2 +1,2 @@ COPY_LIST="$PKG_BASE/files/rsyncd.conf" -TIS_PATCH_VER=1 +TIS_PATCH_VER=2 diff --git a/base/rsync/files/rsyncd.conf b/base/rsync/files/rsyncd.conf index 8b56742b8..f7a26e1df 100644 --- a/base/rsync/files/rsyncd.conf +++ b/base/rsync/files/rsyncd.conf @@ -49,3 +49,9 @@ read only = yes comment = SSL ca certificate uid = root read only = no + +[helm_charts] + path = /www/pages/helm_charts + comment = Helm chart repo + uid = root + read only = no diff --git a/centos_guest_image.inc b/centos_guest_image.inc new file mode 100644 index 000000000..c68985da3 --- /dev/null +++ b/centos_guest_image.inc @@ -0,0 +1,48 @@ +# List of packages to be included/installed in guest image +# If these have dependencies, they will be pulled in automatically +# + +# build-info +build-info + +# kernel +perf + +# +# Network Drivers +# + +# i40evf-kmod +kmod-i40evf + +# i40e-kmod +kmod-i40e + +# ixgbevf-kmod +kmod-ixgbevf + +# ixgbe-kmod +kmod-ixgbe + +# qat17 +qat17 + +# +# TPM2 libs to enable vTPM on Guest +# + +# tpm-kmod +kmod-tpm + +# tss2 +tss2 + +# tpm2-tools +tpm2-tools + +# +# ima support +# + +# rpm +rpm-plugin-systemd-inhibit diff --git a/centos_guest_image_rt.inc b/centos_guest_image_rt.inc new file mode 100644 index 000000000..c98982df3 --- /dev/null +++ b/centos_guest_image_rt.inc @@ -0,0 +1,45 @@ +# List of packages to be included/installed in RT guest image +# If these have dependencies, they will be pulled in automatically +# + +# build-info +build-info + +# +# Network Drivers +# + +# i40evf-kmod +kmod-i40evf-rt + +# i40e-kmod +kmod-i40e-rt + +# ixgbevf-kmod +kmod-ixgbevf-rt + +# ixgbe-kmod +kmod-ixgbe-rt + +# qat17 +qat17-rt + +# +# TPM2 libs to enable vTPM on Guest +# + +# tpm-kmod +kmod-tpm-rt + +# tss2 +tss2 + +# tpm2-tools +tpm2-tools + +# +# ima support +# + +# rpm +rpm-plugin-systemd-inhibit diff --git a/centos_iso_image.inc b/centos_iso_image.inc new file mode 100644 index 000000000..3ebf82576 --- /dev/null +++ b/centos_iso_image.inc @@ -0,0 +1,334 @@ +# List of packages to be included/installed in ISO +# If these have dependencies, they will be pulled in automatically +# + +# vm-topology +vm-topology + +# namespace-utils +namespace-utils + +# qemu-kvm-ev +qemu-kvm-ev +qemu-img-ev +qemu-kvm-tools-ev + +# nfscheck +nfscheck + +# libvirt +libvirt +libvirt-docs +libvirt-daemon +libvirt-daemon-config-network +libvirt-daemon-config-nwfilter +libvirt-daemon-driver-network +libvirt-daemon-driver-nwfilter +libvirt-daemon-driver-nodedev +libvirt-daemon-driver-secret +libvirt-daemon-driver-storage +libvirt-daemon-driver-qemu +libvirt-daemon-driver-lxc +libvirt-client + +# python-cephclient +python-cephclient + +# python-ryu +python2-ryu +python-ryu-common + +# python-smartpm +python-smartpm + +# lldpd +lldpd + +# nova-utils +nova-utils + +# mlx4-config +mlx4-config + +# wrs-ssl +wrs-ssl + +# tss2 +tss2 + +# tpm2-openssl-engine +tpm2-openssl-engine + +# libtpms +libtpms + +# swtpm +swtpm +swtpm-cuse +swtpm-tools + +# tis-extensions +tis-extensions +tis-extensions-controller + +# python-3parclient +python-3parclient + +# python-lefthandclient +python-lefthandclient + +# collectd-extensions +collectd-extensions + +# influxdb-extensions +influxdb-extensions + +# docker-distribution +docker-distribution + +# helm +helm + +# logmgmt +logmgmt + +# filesystem-scripts +filesystem-scripts + +# io-scheduler +io-scheduler + +# collector +collector + +# platform-util +platform-util +platform-util-noncontroller + +# monitor-tools +monitor-tools + +# e1000e-kmod +kmod-e1000e +kmod-e1000e-rt + +# i40e-kmod +kmod-i40e +kmod-i40e-rt + +# ixgbevf-kmod +kmod-ixgbevf + +# ixgbe-kmod +kmod-ixgbe +kmod-ixgbe-rt + +# qat17 +qat17 +qat17-rt + +# tpm-kmod +kmod-tpm +kmod-tpm-rt + +# integrity-kmod +kmod-integrity +kmod-integrity-rt + +# drbd-kernel +kmod-drbd +kmod-drbd-rt + +# rpm +rpm-plugin-systemd-inhibit + +# dpkg +dpkg + +# cgcs-users +cgcs-users + +# ldapscripts +ldapscripts + +# drbd +drbd +drbd-utils +drbd-udev +drbd-pacemaker +drbd-heartbeat +drbd-bash-completion + +# build-info +build-info + +# initscripts +initscripts + +# setup +setup + +# lshell +lshell + +# nss-pam-ldapd +nss-pam-ldapd + +# centos-release +centos-release + +# nfs-utils +nfs-utils + +# dhcp +dhcp +dhclient + +# openssh +openssh +openssh-clients +openssh-server + +# facter +facter + +# vim +vim-enhanced + +# python +python + +# libvirt-python +libvirt-python + +# lighttpd +lighttpd +lighttpd-fastcgi +lighttpd-mod_geoip +lighttpd-mod_mysql_vhost + +# logrotate +logrotate + +# ntp +ntp +ntp-perl +ntpdate + +# pam +pam + +# shadow-utils +shadow-utils + +# syslog-ng +syslog-ng +syslog-ng-libdbi + +# novnc +novnc + +# sudo +sudo + +# net-snmp +net-snmp-utils +net-snmp-libs +net-snmp-python + +# openldap +openldap +openldap-servers +openldap-clients + +# openvswitch +openvswitch + +# libevent +libevent + +# tpm2-tools +tpm2-tools + +# audit +audit + +# kernel +kernel +kernel-tools +kernel-tools-libs +perf +python-perf + +# puppet +puppet + +# puppet-gnocchi +puppet-gnocchi + +# systemd +systemd + +# python-gunicorn +python2-gunicorn + +# tboot +tboot + +# memcached +memcached + +# kubernetes +kubernetes +kubernetes-master +kubernetes-node +kubernetes-kubeadm +kubernetes-client + +# resource-agents +resource-agents + +# bash +bash + +# haproxy +haproxy + +# iscsi-initiator-utils +iscsi-initiator-utils +iscsi-initiator-utils-iscsiuio + +# iptables +iptables +iptables-services +iptables-utils + +# python-psycopg2 +python-psycopg2 + +# dnsmasq +dnsmasq +dnsmasq-utils + +# rsync +rsync + +# parted +parted + +# python-keyring +python-keyring + +# grub2 +grub2-tools +grub2-efi-x64-modules + +# kernel-rt +kernel-rt +kernel-rt-kvm +kernel-rt-tools + +# mellanox drivers +rdma-core +mlnx-ofa_kernel-modules +mlnx-ofa_kernel-rt-modules diff --git a/centos_pkg_dirs b/centos_pkg_dirs index ad08ec0a9..d04e605a4 100644 --- a/centos_pkg_dirs +++ b/centos_pkg_dirs @@ -143,3 +143,5 @@ filesystem/parted security/python-keyring grub/grub2 utilities/build-info +ceph/ceph +ceph/ceph-manager diff --git a/ceph/ceph-manager/.gitignore b/ceph/ceph-manager/.gitignore new file mode 100644 index 000000000..78868598f --- /dev/null +++ b/ceph/ceph-manager/.gitignore @@ -0,0 +1,6 @@ +!.distro +.distro/centos7/rpmbuild/RPMS +.distro/centos7/rpmbuild/SRPMS +.distro/centos7/rpmbuild/BUILD +.distro/centos7/rpmbuild/BUILDROOT +.distro/centos7/rpmbuild/SOURCES/ceph-manager*tar.gz diff --git a/ceph/ceph-manager/LICENSE b/ceph/ceph-manager/LICENSE new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/ceph/ceph-manager/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ceph/ceph-manager/PKG-INFO b/ceph/ceph-manager/PKG-INFO new file mode 100644 index 000000000..5b6746d87 --- /dev/null +++ b/ceph/ceph-manager/PKG-INFO @@ -0,0 +1,13 @@ +Metadata-Version: 1.1 +Name: ceph-manager +Version: 1.0 +Summary: Handle Ceph API calls and provide status updates via alarms +Home-page: +Author: Windriver +Author-email: info@windriver.com +License: Apache-2.0 + +Description: Handle Ceph API calls and provide status updates via alarms + + +Platform: UNKNOWN diff --git a/ceph/ceph-manager/centos/build_srpm.data b/ceph/ceph-manager/centos/build_srpm.data new file mode 100644 index 000000000..d01510bde --- /dev/null +++ b/ceph/ceph-manager/centos/build_srpm.data @@ -0,0 +1,3 @@ +SRC_DIR="ceph-manager" +COPY_LIST_TO_TAR="files scripts" +TIS_PATCH_VER=4 diff --git a/ceph/ceph-manager/centos/ceph-manager.spec b/ceph/ceph-manager/centos/ceph-manager.spec new file mode 100644 index 000000000..2f54deb5f --- /dev/null +++ b/ceph/ceph-manager/centos/ceph-manager.spec @@ -0,0 +1,70 @@ +Summary: Handle Ceph API calls and provide status updates via alarms +Name: ceph-manager +Version: 1.0 +Release: %{tis_patch_ver}%{?_tis_dist} +License: Apache-2.0 +Group: base +Packager: Wind River +URL: unknown +Source0: %{name}-%{version}.tar.gz + +BuildRequires: python-setuptools +BuildRequires: systemd-units +BuildRequires: systemd-devel +Requires: sysinv + +%description +Handle Ceph API calls and provide status updates via alarms. +Handle sysinv RPC calls for long running Ceph API operations: +- cache tiering enable +- cache tiering disable + +%define local_bindir /usr/bin/ +%define local_etc_initd /etc/init.d/ +%define local_etc_logrotated /etc/logrotate.d/ +%define pythonroot /usr/lib64/python2.7/site-packages + +%define debug_package %{nil} + +%prep +%setup + +%build +%{__python} setup.py build + +%install +%{__python} setup.py install --root=$RPM_BUILD_ROOT \ + --install-lib=%{pythonroot} \ + --prefix=/usr \ + --install-data=/usr/share \ + --single-version-externally-managed + +install -d -m 755 %{buildroot}%{local_etc_initd} +install -p -D -m 700 scripts/init.d/ceph-manager %{buildroot}%{local_etc_initd}/ceph-manager + +install -d -m 755 %{buildroot}%{local_bindir} +install -p -D -m 700 scripts/bin/ceph-manager %{buildroot}%{local_bindir}/ceph-manager + +install -d -m 755 %{buildroot}%{local_etc_logrotated} +install -p -D -m 644 files/ceph-manager.logrotate %{buildroot}%{local_etc_logrotated}/ceph-manager.logrotate + +install -d -m 755 %{buildroot}%{_unitdir} +install -m 644 -p -D files/%{name}.service %{buildroot}%{_unitdir}/%{name}.service + +%clean +rm -rf $RPM_BUILD_ROOT + +# Note: The package name is ceph-manager but the import name is ceph_manager so +# can't use '%{name}'. +%files +%defattr(-,root,root,-) +%doc LICENSE +%{local_bindir}/* +%{local_etc_initd}/* +%{_unitdir}/%{name}.service +%dir %{local_etc_logrotated} +%{local_etc_logrotated}/* +%dir %{pythonroot}/ceph_manager +%{pythonroot}/ceph_manager/* +%dir %{pythonroot}/ceph_manager-%{version}.0-py2.7.egg-info +%{pythonroot}/ceph_manager-%{version}.0-py2.7.egg-info/* diff --git a/ceph/ceph-manager/ceph-manager/LICENSE b/ceph/ceph-manager/ceph-manager/LICENSE new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/ceph/ceph-manager/ceph-manager/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ceph/ceph-manager/ceph-manager/ceph_manager/__init__.py b/ceph/ceph-manager/ceph-manager/ceph_manager/__init__.py new file mode 100644 index 000000000..754a8f4ef --- /dev/null +++ b/ceph/ceph-manager/ceph-manager/ceph_manager/__init__.py @@ -0,0 +1,5 @@ +# +# Copyright (c) 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# diff --git a/ceph/ceph-manager/ceph-manager/ceph_manager/ceph.py b/ceph/ceph-manager/ceph-manager/ceph_manager/ceph.py new file mode 100644 index 000000000..a143b5775 --- /dev/null +++ b/ceph/ceph-manager/ceph-manager/ceph_manager/ceph.py @@ -0,0 +1,159 @@ +# +# Copyright (c) 2016-2018 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +import exception +from i18n import _LI +# noinspection PyUnresolvedReferences +from oslo_log import log as logging + + +LOG = logging.getLogger(__name__) + + +def osd_pool_set_quota(ceph_api, pool_name, max_bytes=0, max_objects=0): + """Set the quota for an OSD pool_name + Setting max_bytes or max_objects to 0 will disable that quota param + :param pool_name: OSD pool_name + :param max_bytes: maximum bytes for OSD pool_name + :param max_objects: maximum objects for OSD pool_name + """ + + # Update quota if needed + prev_quota = osd_pool_get_quota(ceph_api, pool_name) + if prev_quota["max_bytes"] != max_bytes: + resp, b = ceph_api.osd_set_pool_quota(pool_name, 'max_bytes', + max_bytes, body='json') + if resp.ok: + LOG.info(_LI("Set OSD pool_name quota: " + "pool_name={}, max_bytes={}").format( + pool_name, max_bytes)) + else: + e = exception.CephPoolSetQuotaFailure( + pool=pool_name, name='max_bytes', + value=max_bytes, reason=resp.reason) + LOG.error(e) + raise e + if prev_quota["max_objects"] != max_objects: + resp, b = ceph_api.osd_set_pool_quota(pool_name, 'max_objects', + max_objects, + body='json') + if resp.ok: + LOG.info(_LI("Set OSD pool_name quota: " + "pool_name={}, max_objects={}").format( + pool_name, max_objects)) + else: + e = exception.CephPoolSetQuotaFailure( + pool=pool_name, name='max_objects', + value=max_objects, reason=resp.reason) + LOG.error(e) + raise e + + +def osd_pool_get_quota(ceph_api, pool_name): + resp, quota = ceph_api.osd_get_pool_quota(pool_name, body='json') + if not resp.ok: + e = exception.CephPoolGetQuotaFailure( + pool=pool_name, reason=resp.reason) + LOG.error(e) + raise e + else: + return {"max_objects": quota["output"]["quota_max_objects"], + "max_bytes": quota["output"]["quota_max_bytes"]} + + +def osd_pool_exists(ceph_api, pool_name): + response, body = ceph_api.osd_pool_get( + pool_name, "pg_num", body='json') + if response.ok: + return True + return False + + +def osd_pool_create(ceph_api, pool_name, pg_num, pgp_num): + # ruleset 0: is the default ruleset if no crushmap is loaded or + # the ruleset for the backing tier if loaded: + # Name: storage_tier_ruleset + ruleset = 0 + response, body = ceph_api.osd_pool_create( + pool_name, pg_num, pgp_num, pool_type="replicated", + ruleset=ruleset, body='json') + if response.ok: + LOG.info(_LI("Created OSD pool: " + "pool_name={}, pg_num={}, pgp_num={}, " + "pool_type=replicated, ruleset={}").format( + pool_name, pg_num, pgp_num, ruleset)) + else: + e = exception.CephPoolCreateFailure( + name=pool_name, reason=response.reason) + LOG.error(e) + raise e + + # Explicitly assign the ruleset to the pool on creation since it is + # ignored in the create call + response, body = ceph_api.osd_set_pool_param( + pool_name, "crush_ruleset", ruleset, body='json') + if response.ok: + LOG.info(_LI("Assigned crush ruleset to OS pool: " + "pool_name={}, ruleset={}").format( + pool_name, ruleset)) + else: + e = exception.CephPoolRulesetFailure( + name=pool_name, reason=response.reason) + LOG.error(e) + ceph_api.osd_pool_delete( + pool_name, pool_name, + sure='--yes-i-really-really-mean-it', + body='json') + raise e + + +def osd_pool_delete(ceph_api, pool_name): + """Delete an osd pool + :param pool_name: pool name + """ + response, body = ceph_api.osd_pool_delete( + pool_name, pool_name, + sure='--yes-i-really-really-mean-it', + body='json') + if response.ok: + LOG.info(_LI("Deleted OSD pool {}").format(pool_name)) + else: + e = exception.CephPoolDeleteFailure( + name=pool_name, reason=response.reason) + LOG.warn(e) + raise e + + +def osd_set_pool_param(ceph_api, pool_name, param, value): + response, body = ceph_api.osd_set_pool_param( + pool_name, param, value, + force=None, body='json') + if response.ok: + LOG.info('OSD set pool param: ' + 'pool={}, name={}, value={}'.format( + pool_name, param, value)) + else: + raise exception.CephPoolSetParamFailure( + pool_name=pool_name, + param=param, + value=str(value), + reason=response.reason) + return response, body + + +def osd_get_pool_param(ceph_api, pool_name, param): + response, body = ceph_api.osd_get_pool_param( + pool_name, param, body='json') + if response.ok: + LOG.debug('OSD get pool param: ' + 'pool={}, name={}, value={}'.format( + pool_name, param, body['output'][param])) + else: + raise exception.CephPoolGetParamFailure( + pool_name=pool_name, + param=param, + reason=response.reason) + return body['output'][param] diff --git a/ceph/ceph-manager/ceph-manager/ceph_manager/constants.py b/ceph/ceph-manager/ceph-manager/ceph_manager/constants.py new file mode 100644 index 000000000..6cfbba4f8 --- /dev/null +++ b/ceph/ceph-manager/ceph-manager/ceph_manager/constants.py @@ -0,0 +1,90 @@ +# +# Copyright (c) 2016-2018 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +from i18n import _ +# noinspection PyUnresolvedReferences +from sysinv.common import constants as sysinv_constants + +CEPH_POOL_OBJECT_GATEWAY_NAME_JEWEL = \ + sysinv_constants.CEPH_POOL_OBJECT_GATEWAY_NAME_JEWEL +CEPH_POOL_OBJECT_GATEWAY_NAME_HAMMER = \ + sysinv_constants.CEPH_POOL_OBJECT_GATEWAY_NAME_HAMMER +CEPH_POOLS = sysinv_constants.CEPH_POOLS +CEPH_REPLICATION_FACTOR = sysinv_constants.CEPH_REPLICATION_FACTOR_DEFAULT + +# Cache flush parameters +CACHE_FLUSH_OBJECTS_THRESHOLD = 1000 +CACHE_FLUSH_MIN_WAIT_OBJ_COUNT_DECREASE_SEC = 1 +CACHE_FLUSH_MAX_WAIT_OBJ_COUNT_DECREASE_SEC = 128 + +FM_ALARM_REASON_MAX_SIZE = 256 + +# TODO this will later change based on parsed health +# clock skew is vm malfunction, mon or osd is equipment mal +ALARM_CAUSE = 'equipment-malfunction' +ALARM_TYPE = 'equipment' + +# Ceph health check interval (in seconds) +CEPH_HEALTH_CHECK_INTERVAL = 60 + +# Ceph health statuses +CEPH_HEALTH_OK = 'HEALTH_OK' +CEPH_HEALTH_WARN = 'HEALTH_WARN' +CEPH_HEALTH_ERR = 'HEALTH_ERR' +CEPH_HEALTH_DOWN = 'CEPH_DOWN' + +# Statuses not reported by Ceph +CEPH_STATUS_CUSTOM = [CEPH_HEALTH_DOWN] + +SEVERITY = {CEPH_HEALTH_DOWN: 'critical', + CEPH_HEALTH_ERR: 'critical', + CEPH_HEALTH_WARN: 'warning'} + +SERVICE_AFFECTING = {CEPH_HEALTH_DOWN: True, + CEPH_HEALTH_ERR: True, + CEPH_HEALTH_WARN: False} + +# TODO this will later change based on parsed health +ALARM_REASON_NO_OSD = _('no OSDs') +ALARM_REASON_OSDS_DOWN = _('OSDs are down') +ALARM_REASON_OSDS_OUT = _('OSDs are out') +ALARM_REASON_OSDS_DOWN_OUT = _('OSDs are down/out') +ALARM_REASON_PEER_HOST_DOWN = _('peer host down') + +REPAIR_ACTION_MAJOR_CRITICAL_ALARM = _( + 'Ensure storage hosts from replication group are unlocked and available.' + 'Check if OSDs of each storage host are up and running.' + 'If problem persists, contact next level of support.') +REPAIR_ACTION = _('If problem persists, contact next level of support.') + +SYSINV_CONDUCTOR_TOPIC = 'sysinv.conductor_manager' +CEPH_MANAGER_TOPIC = 'sysinv.ceph_manager' +SYSINV_CONFIG_FILE = '/etc/sysinv/sysinv.conf' + +# Titanium Cloud version strings +TITANIUM_SERVER_VERSION_18_03 = '18.03' + +CEPH_HEALTH_WARN_REQUIRE_JEWEL_OSDS_NOT_SET = ( + "all OSDs are running jewel or later but the " + "'require_jewel_osds' osdmap flag is not set") + +UPGRADE_COMPLETED = \ + sysinv_constants.UPGRADE_COMPLETED +UPGRADE_ABORTING = \ + sysinv_constants.UPGRADE_ABORTING +UPGRADE_ABORT_COMPLETING = \ + sysinv_constants.UPGRADE_ABORT_COMPLETING +UPGRADE_ABORTING_ROLLBACK = \ + sysinv_constants.UPGRADE_ABORTING_ROLLBACK + +CEPH_FLAG_REQUIRE_JEWEL_OSDS = 'require_jewel_osds' + +# Tiers +CEPH_CRUSH_TIER_SUFFIX = sysinv_constants.CEPH_CRUSH_TIER_SUFFIX +SB_TIER_TYPE_CEPH = sysinv_constants.SB_TIER_TYPE_CEPH +SB_TIER_SUPPORTED = sysinv_constants.SB_TIER_SUPPORTED +SB_TIER_DEFAULT_NAMES = sysinv_constants.SB_TIER_DEFAULT_NAMES +SB_TIER_CEPH_POOLS = sysinv_constants.SB_TIER_CEPH_POOLS diff --git a/ceph/ceph-manager/ceph-manager/ceph_manager/exception.py b/ceph/ceph-manager/ceph-manager/ceph_manager/exception.py new file mode 100644 index 000000000..3ef078252 --- /dev/null +++ b/ceph/ceph-manager/ceph-manager/ceph_manager/exception.py @@ -0,0 +1,78 @@ +# +# Copyright (c) 2016-2018 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# noinspection PyUnresolvedReferences +from i18n import _, _LW +# noinspection PyUnresolvedReferences +from oslo_log import log as logging + + +LOG = logging.getLogger(__name__) + + +class CephManagerException(Exception): + message = _("An unknown exception occurred.") + + def __init__(self, message=None, **kwargs): + self.kwargs = kwargs + if not message: + try: + message = self.message % kwargs + except TypeError: + LOG.warn(_LW('Exception in string format operation')) + for name, value in kwargs.iteritems(): + LOG.error("%s: %s" % (name, value)) + # at least get the core message out if something happened + message = self.message + super(CephManagerException, self).__init__(message) + + +class CephPoolSetQuotaFailure(CephManagerException): + message = _("Error seting the OSD pool " + "quota %(name)s for %(pool)s to %(value)s") \ + + ": %(reason)s" + + +class CephPoolGetQuotaFailure(CephManagerException): + message = _("Error geting the OSD pool quota for %(pool)s") \ + + ": %(reason)s" + + +class CephPoolCreateFailure(CephManagerException): + message = _("Creating OSD pool %(name)s failed: %(reason)s") + + +class CephPoolDeleteFailure(CephManagerException): + message = _("Deleting OSD pool %(name)s failed: %(reason)s") + + +class CephPoolRulesetFailure(CephManagerException): + message = _("Assigning crush ruleset to OSD " + "pool %(name)s failed: %(reason)s") + + +class CephPoolSetParamFailure(CephManagerException): + message = _("Cannot set Ceph OSD pool parameter: " + "pool_name=%(pool_name)s, param=%(param)s, value=%(value)s. " + "Reason: %(reason)s") + + +class CephPoolGetParamFailure(CephManagerException): + message = _("Cannot get Ceph OSD pool parameter: " + "pool_name=%(pool_name)s, param=%(param)s. " + "Reason: %(reason)s") + + +class CephSetKeyFailure(CephManagerException): + message = _("Error setting the Ceph flag " + "'%(flag)s' %(extra)s: " + "response=%(response_status_code)s:%(response_reason)s, " + "status=%(status)s, output=%(output)s") + + +class CephApiFailure(CephManagerException): + message = _("API failure: " + "call=%(call)s, reason=%(reason)s") diff --git a/ceph/ceph-manager/ceph-manager/ceph_manager/i18n.py b/ceph/ceph-manager/ceph-manager/ceph_manager/i18n.py new file mode 100644 index 000000000..67977ceae --- /dev/null +++ b/ceph/ceph-manager/ceph-manager/ceph_manager/i18n.py @@ -0,0 +1,15 @@ +# +# Copyright (c) 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# +import oslo_i18n + +DOMAIN = 'ceph-manager' + +_translators = oslo_i18n.TranslatorFactory(domain=DOMAIN) +_ = _translators.primary + +_LI = _translators.log_info +_LW = _translators.log_warning +_LE = _translators.log_error diff --git a/ceph/ceph-manager/ceph-manager/ceph_manager/monitor.py b/ceph/ceph-manager/ceph-manager/ceph_manager/monitor.py new file mode 100644 index 000000000..2a13f88a1 --- /dev/null +++ b/ceph/ceph-manager/ceph-manager/ceph_manager/monitor.py @@ -0,0 +1,874 @@ +# +# Copyright (c) 2013-2018 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +import time + +# noinspection PyUnresolvedReferences +from fm_api import fm_api +# noinspection PyUnresolvedReferences +from fm_api import constants as fm_constants +# noinspection PyUnresolvedReferences +from oslo_log import log as logging + +# noinspection PyProtectedMember +from i18n import _, _LI, _LW, _LE + +import constants +import exception + +LOG = logging.getLogger(__name__) + + +# In 18.03 R5, ceph cache tiering was disabled and prevented from being +# re-enabled. When upgrading from 18.03 (R5) to R6 we need to remove the +# cache-tier from the crushmap ceph-cache-tiering +# +# This class is needed only when upgrading from R5 to R6 +# TODO: remove it after 1st R6 release +# +class HandleUpgradesMixin(object): + + def __init__(self, service): + self.service = service + self.wait_for_upgrade_complete = False + + def setup(self, config): + self._set_upgrade(self.service.retry_get_software_upgrade_status()) + + def _set_upgrade(self, upgrade): + state = upgrade.get('state') + from_version = upgrade.get('from_version') + if (state + and state != constants.UPGRADE_COMPLETED + and from_version == constants.TITANIUM_SERVER_VERSION_18_03): + + LOG.info(_LI("Wait for ceph upgrade to complete before monitoring cluster.")) + self.wait_for_upgrade_complete = True + + def set_flag_require_jewel_osds(self): + try: + response, body = self.service.ceph_api.osd_set_key( + constants.CEPH_FLAG_REQUIRE_JEWEL_OSDS, + body='json') + LOG.info(_LI("Set require_jewel_osds flag")) + except IOError as e: + raise exception.CephApiFailure( + call="osd_set_key", + reason=e.message) + else: + if not response.ok: + raise exception.CephSetKeyFailure( + flag=constants.CEPH_FLAG_REQUIRE_JEWEL_OSDS, + extra=_("needed to complete upgrade to Jewel"), + response_status_code=response.status_code, + response_reason=response.reason, + status=body.get('status'), + output=body.get('output')) + + def filter_health_status(self, health): + health = self.auto_heal(health) + # filter out require_jewel_osds warning + # + if not self.wait_for_upgrade_complete: + return health + if health['health'] != constants.CEPH_HEALTH_WARN: + return health + if (constants.CEPH_HEALTH_WARN_REQUIRE_JEWEL_OSDS_NOT_SET + not in health['detail']): + return health + return self._remove_require_jewel_osds_warning(health) + + def _remove_require_jewel_osds_warning(self, health): + reasons_list = [] + for reason in health['detail'].split(';'): + reason = reason.strip() + if len(reason) == 0: + continue + if constants.CEPH_HEALTH_WARN_REQUIRE_JEWEL_OSDS_NOT_SET in reason: + continue + reasons_list.append(reason) + if len(reasons_list) == 0: + health = { + 'health': constants.CEPH_HEALTH_OK, + 'detail': ''} + else: + health['detail'] = '; '.join(reasons_list) + return health + + def auto_heal(self, health): + if (health['health'] == constants.CEPH_HEALTH_WARN + and (constants.CEPH_HEALTH_WARN_REQUIRE_JEWEL_OSDS_NOT_SET + in health['detail'])): + try: + upgrade = self.service.get_software_upgrade_status() + except Exception as ex: + LOG.warn(_LW( + "Getting software upgrade status failed " + "with: %s. Skip auto-heal attempt " + "(will retry on next ceph status poll).") % str(ex)) + return health + state = upgrade.get('state') + # surpress require_jewel_osds in case upgrade is + # in progress but not completed or aborting + if (not self.wait_for_upgrade_complete + and (upgrade.get('from_version') + == constants.TITANIUM_SERVER_VERSION_18_03) + and state not in [ + None, + constants.UPGRADE_COMPLETED, + constants.UPGRADE_ABORTING, + constants.UPGRADE_ABORT_COMPLETING, + constants.UPGRADE_ABORTING_ROLLBACK]): + self.wait_for_upgrade_complete = True + # set require_jewel_osds in case upgrade is + # not in progress or completed + if (state in [None, constants.UPGRADE_COMPLETED]): + LOG.warn(_LW( + "No upgrade in progress or update completed " + "and require_jewel_osds health warning raised. " + "Set require_jewel_osds flag.")) + self.set_flag_require_jewel_osds() + health = self._remove_require_jewel_osds_warning(health) + LOG.info(_LI("Unsurpress require_jewel_osds health warning")) + self.wait_for_upgrade_complete = False + # unsurpress require_jewel_osds in case upgrade + # is aborting + if (state in [ + constants.UPGRADE_ABORTING, + constants.UPGRADE_ABORT_COMPLETING, + constants.UPGRADE_ABORTING_ROLLBACK]): + self.wait_for_upgrade_complete = False + return health + + +class Monitor(HandleUpgradesMixin): + + def __init__(self, service): + self.service = service + self.current_ceph_health = "" + self.tiers_size = {} + self.known_object_pool_name = None + self.primary_tier_name = constants.SB_TIER_DEFAULT_NAMES[ + constants.SB_TIER_TYPE_CEPH] + constants.CEPH_CRUSH_TIER_SUFFIX + self.cluster_is_up = False + super(Monitor, self).__init__(service) + + def setup(self, config): + super(Monitor, self).setup(config) + + def run(self): + # Wait until Ceph cluster is up and we can get the fsid + while True: + try: + self.ceph_get_fsid() + except Exception: + LOG.exception("Error getting fsid, " + "will retry in %ss" % constants.CEPH_HEALTH_CHECK_INTERVAL) + if self.service.entity_instance_id: + break + time.sleep(constants.CEPH_HEALTH_CHECK_INTERVAL) + + # Start monitoring ceph status + while True: + try: + self.ceph_poll_status() + self.ceph_poll_quotas() + except Exception: + LOG.exception("Error running periodic monitoring of ceph status, " + "will retry in %ss" % constants.CEPH_HEALTH_CHECK_INTERVAL) + time.sleep(constants.CEPH_HEALTH_CHECK_INTERVAL) + + def ceph_get_fsid(self): + # Check whether an alarm has already been raised + self._get_current_alarms() + if self.current_health_alarm: + LOG.info(_LI("Current alarm: %s") % + str(self.current_health_alarm.__dict__)) + + fsid = self._get_fsid() + if not fsid: + # Raise alarm - it will not have an entity_instance_id + self._report_fault({'health': constants.CEPH_HEALTH_DOWN, + 'detail': 'Ceph cluster is down.'}, + fm_constants.FM_ALARM_ID_STORAGE_CEPH) + else: + # Clear alarm with no entity_instance_id + self._clear_fault(fm_constants.FM_ALARM_ID_STORAGE_CEPH) + self.service.entity_instance_id = 'cluster=%s' % fsid + + def ceph_poll_status(self): + # get previous data every time in case: + # * daemon restarted + # * alarm was cleared manually but stored as raised in daemon + self._get_current_alarms() + if self.current_health_alarm: + LOG.info(_LI("Current alarm: %s") % + str(self.current_health_alarm.__dict__)) + + # get ceph health + health = self._get_health() + LOG.info(_LI("Current Ceph health: " + "%(health)s detail: %(detail)s") % health) + + health = self.filter_health_status(health) + if health['health'] != constants.CEPH_HEALTH_OK: + self._report_fault(health, fm_constants.FM_ALARM_ID_STORAGE_CEPH) + self._report_alarm_osds_health() + else: + self._clear_fault(fm_constants.FM_ALARM_ID_STORAGE_CEPH) + self.clear_all_major_critical() + + def filter_health_status(self, health): + return super(Monitor, self).filter_health_status(health) + + def ceph_poll_quotas(self): + self._get_current_alarms() + if self.current_quota_alarms: + LOG.info(_LI("Current quota alarms %s") % + self.current_quota_alarms) + + # Get current current size of each tier + previous_tiers_size = self.tiers_size + self.tiers_size = self._get_tiers_size() + + # Make sure any removed tiers have the alarms cleared + for t in (set(previous_tiers_size)-set(self.tiers_size)): + self._clear_fault(fm_constants.FM_ALARM_ID_STORAGE_CEPH_FREE_SPACE, + "{0}.tier={1}".format( + self.service.entity_instance_id, + t[:-len(constants.CEPH_CRUSH_TIER_SUFFIX)])) + + # Check the quotas on each tier + for tier in self.tiers_size: + # Extract the tier name from the crush equivalent + tier_name = tier[:-len(constants.CEPH_CRUSH_TIER_SUFFIX)] + + if self.tiers_size[tier] == 0: + LOG.info(_LI("'%s' tier cluster size not yet available") + % tier_name) + continue + + pools_quota_sum = 0 + if tier == self.primary_tier_name: + for pool in constants.CEPH_POOLS: + if (pool['pool_name'] == + constants.CEPH_POOL_OBJECT_GATEWAY_NAME_JEWEL or + pool['pool_name'] == + constants.CEPH_POOL_OBJECT_GATEWAY_NAME_HAMMER): + object_pool_name = self._get_object_pool_name() + if object_pool_name is None: + LOG.error("Rados gateway object data pool does " + "not exist.") + else: + pools_quota_sum += \ + self._get_osd_pool_quota(object_pool_name) + else: + pools_quota_sum += self._get_osd_pool_quota( + pool['pool_name']) + else: + for pool in constants.SB_TIER_CEPH_POOLS: + pool_name = "{0}-{1}".format(pool['pool_name'], tier_name) + pools_quota_sum += self._get_osd_pool_quota(pool_name) + + # Currently, there is only one pool on the addtional tier(s), + # therefore allow a quota of 0 + if (pools_quota_sum != self.tiers_size[tier] and + pools_quota_sum != 0): + self._report_fault( + {'tier_name': tier_name, + 'tier_eid': "{0}.tier={1}".format( + self.service.entity_instance_id, + tier_name)}, + fm_constants.FM_ALARM_ID_STORAGE_CEPH_FREE_SPACE) + else: + self._clear_fault( + fm_constants.FM_ALARM_ID_STORAGE_CEPH_FREE_SPACE, + "{0}.tier={1}".format(self.service.entity_instance_id, + tier_name)) + + # CEPH HELPERS + + def _get_fsid(self): + try: + response, fsid = self.service.ceph_api.fsid( + body='text', timeout=30) + except IOError as e: + LOG.warning(_LW("ceph_api.fsid failed: %s") % str(e.message)) + self.cluster_is_up = False + return None + + if not response.ok: + LOG.warning(_LW("Get fsid failed: %s") % response.reason) + self.cluster_is_up = False + return None + + self.cluster_is_up = True + return fsid.strip() + + def _get_health(self): + try: + # we use text since it has all info + response, body = self.service.ceph_api.health( + body='text', timeout=30) + except IOError as e: + LOG.warning(_LW("ceph_api.health failed: %s") % str(e.message)) + self.cluster_is_up = False + return {'health': constants.CEPH_HEALTH_DOWN, + 'detail': 'Ceph cluster is down.'} + + if not response.ok: + LOG.warning(_LW("CEPH health check failed: %s") % response.reason) + health_info = [constants.CEPH_HEALTH_DOWN, response.reason] + self.cluster_is_up = False + else: + health_info = body.split(' ', 1) + self.cluster_is_up = True + + health = health_info[0] + + if len(health_info) > 1: + detail = health_info[1] + else: + detail = health_info[0] + + return {'health': health.strip(), + 'detail': detail.strip()} + + def _get_object_pool_name(self): + if self.known_object_pool_name is None: + response, body = self.service.ceph_api.osd_pool_get( + constants.CEPH_POOL_OBJECT_GATEWAY_NAME_JEWEL, + "pg_num", + body='json') + + if response.ok: + self.known_object_pool_name = \ + constants.CEPH_POOL_OBJECT_GATEWAY_NAME_JEWEL + return self.known_object_pool_name + + response, body = self.service.ceph_api.osd_pool_get( + constants.CEPH_POOL_OBJECT_GATEWAY_NAME_HAMMER, + "pg_num", + body='json') + + if response.ok: + self.known_object_pool_name = \ + constants.CEPH_POOL_OBJECT_GATEWAY_NAME_HAMMER + return self.known_object_pool_name + + return self.known_object_pool_name + + def _get_osd_pool_quota(self, pool_name): + try: + resp, quota = self.service.ceph_api.osd_get_pool_quota( + pool_name, body='json') + except IOError: + return 0 + + if not resp.ok: + LOG.error(_LE("Getting the quota for " + "%(name)s pool failed:%(reason)s)") % + {"name": pool_name, "reason": resp.reason}) + return 0 + else: + try: + quota_gib = int(quota["output"]["quota_max_bytes"])/(1024**3) + return quota_gib + except IOError: + return 0 + + # we have two root nodes 'cache-tier' and 'storage-tier' + # to calculate the space that is used by the pools, we must only + # use 'storage-tier' + # this function determines if a certain node is under a certain + # tree + def host_is_in_root(self, search_tree, node, root_name): + if node['type'] == 'root': + if node['name'] == root_name: + return True + else: + return False + return self.host_is_in_root(search_tree, + search_tree[node['parent']], + root_name) + + # The information received from ceph is not properly + # structured for efficient parsing and searching, so + # it must be processed and transformed into a more + # structured form. + # + # Input received from ceph is an array of nodes with the + # following structure: + # [{'id':, 'children':, ....}, + # ...] + # + # We process this array and transform it into a dictionary + # (for efficient access) The transformed "search tree" is a + # dictionary with the following structure: + # { : {'children':} + def _get_tiers_size(self): + try: + resp, body = self.service.ceph_api.osd_df( + body='json', + output_method='tree') + except IOError: + return 0 + if not resp.ok: + LOG.error(_LE("Getting the cluster usage " + "information failed: %(reason)s - " + "%(body)s") % {"reason": resp.reason, + "body": body}) + return {} + + # A node is a crushmap element: root, chassis, host, osd. Create a + # dictionary for the nodes with the key as the id used for efficient + # searching through nodes. + # + # For example: storage-0's node has one child node => OSD 0 + # { + # "id": -4, + # "name": "storage-0", + # "type": "host", + # "type_id": 1, + # "reweight": -1.000000, + # "kb": 51354096, + # "kb_used": 1510348, + # "kb_avail": 49843748, + # "utilization": 2.941047, + # "var": 1.480470, + # "pgs": 0, + # "children": [ + # 0 + # ] + # }, + search_tree = {} + for node in body['output']['nodes']: + search_tree[node['id']] = node + + # Extract the tiers as we will return a dict for the size of each tier + tiers = {k: v for k, v in search_tree.items() if v['type'] == 'root'} + + # For each tier, traverse the heirarchy from the root->chassis->host. + # Sum the host sizes to determine the overall size of the tier + tier_sizes = {} + for tier in tiers.values(): + tier_size = 0 + for chassis_id in tier['children']: + chassis_size = 0 + chassis = search_tree[chassis_id] + for host_id in chassis['children']: + host = search_tree[host_id] + if (chassis_size == 0 or + chassis_size > host['kb']): + chassis_size = host['kb'] + tier_size += chassis_size/(1024 ** 2) + tier_sizes[tier['name']] = tier_size + + return tier_sizes + + # ALARM HELPERS + + @staticmethod + def _check_storage_group(osd_tree, group_id, + hosts, osds, fn_report_alarm): + reasons = set() + degraded_hosts = set() + severity = fm_constants.FM_ALARM_SEVERITY_CRITICAL + for host_id in hosts: + if len(osds[host_id]) == 0: + reasons.add(constants.ALARM_REASON_NO_OSD) + degraded_hosts.add(host_id) + else: + for osd_id in osds[host_id]: + if osd_tree[osd_id]['status'] == 'up': + if osd_tree[osd_id]['reweight'] == 0.0: + reasons.add(constants.ALARM_REASON_OSDS_OUT) + degraded_hosts.add(host_id) + else: + severity = fm_constants.FM_ALARM_SEVERITY_MAJOR + elif osd_tree[osd_id]['status'] == 'down': + reasons.add(constants.ALARM_REASON_OSDS_DOWN) + degraded_hosts.add(host_id) + if constants.ALARM_REASON_OSDS_OUT in reasons \ + and constants.ALARM_REASON_OSDS_DOWN in reasons: + reasons.add(constants.ALARM_REASON_OSDS_DOWN_OUT) + reasons.remove(constants.ALARM_REASON_OSDS_OUT) + if constants.ALARM_REASON_OSDS_DOWN in reasons \ + and constants.ALARM_REASON_OSDS_DOWN_OUT in reasons: + reasons.remove(constants.ALARM_REASON_OSDS_DOWN) + reason = "/".join(list(reasons)) + if severity == fm_constants.FM_ALARM_SEVERITY_CRITICAL: + reason = "{} {}: {}".format( + fm_constants.ALARM_CRITICAL_REPLICATION, + osd_tree[group_id]['name'], + reason) + elif severity == fm_constants.FM_ALARM_SEVERITY_MAJOR: + reason = "{} {}: {}".format( + fm_constants.ALARM_MAJOR_REPLICATION, + osd_tree[group_id]['name'], + reason) + if len(degraded_hosts) == 0: + if len(hosts) < 2: + fn_report_alarm( + osd_tree[group_id]['name'], + "{} {}: {}".format( + fm_constants.ALARM_MAJOR_REPLICATION, + osd_tree[group_id]['name'], + constants.ALARM_REASON_PEER_HOST_DOWN), + fm_constants.FM_ALARM_SEVERITY_MAJOR) + elif len(degraded_hosts) == 1: + fn_report_alarm( + "{}.host={}".format( + osd_tree[group_id]['name'], + osd_tree[list(degraded_hosts)[0]]['name']), + reason, severity) + else: + fn_report_alarm( + osd_tree[group_id]['name'], + reason, severity) + + def _check_storage_tier(self, osd_tree, tier_name, fn_report_alarm): + for tier_id in osd_tree: + if osd_tree[tier_id]['type'] != 'root': + continue + if osd_tree[tier_id]['name'] != tier_name: + continue + for group_id in osd_tree[tier_id]['children']: + if osd_tree[group_id]['type'] != 'chassis': + continue + if not osd_tree[group_id]['name'].startswith('group-'): + continue + hosts = [] + osds = {} + for host_id in osd_tree[group_id]['children']: + if osd_tree[host_id]['type'] != 'host': + continue + hosts.append(host_id) + osds[host_id] = [] + for osd_id in osd_tree[host_id]['children']: + if osd_tree[osd_id]['type'] == 'osd': + osds[host_id].append(osd_id) + self._check_storage_group(osd_tree, group_id, hosts, + osds, fn_report_alarm) + break + + def _current_health_alarm_equals(self, reason, severity): + if not self.current_health_alarm: + return False + if getattr(self.current_health_alarm, 'severity', None) != severity: + return False + if getattr(self.current_health_alarm, 'reason_text', None) != reason: + return False + return True + + def _report_alarm_osds_health(self): + response, osd_tree = self.service.ceph_api.osd_tree(body='json') + if not response.ok: + LOG.error(_LE("Failed to retrieve Ceph OSD tree: " + "status_code: %(status_code)s, reason: %(reason)s") % + {"status_code": response.status_code, + "reason": response.reason}) + return + osd_tree = dict([(n['id'], n) for n in osd_tree['output']['nodes']]) + alarms = [] + + self._check_storage_tier(osd_tree, "storage-tier", + lambda *args: alarms.append(args)) + + old_alarms = {} + for alarm_id in [ + fm_constants.FM_ALARM_ID_STORAGE_CEPH_MAJOR, + fm_constants.FM_ALARM_ID_STORAGE_CEPH_CRITICAL]: + alarm_list = self.service.fm_api.get_faults_by_id(alarm_id) + if not alarm_list: + continue + for alarm in alarm_list: + if alarm.entity_instance_id not in old_alarms: + old_alarms[alarm.entity_instance_id] = [] + old_alarms[alarm.entity_instance_id].append( + (alarm.alarm_id, alarm.reason_text)) + + for peer_group, reason, severity in alarms: + if self._current_health_alarm_equals(reason, severity): + continue + alarm_critical_major = fm_constants.FM_ALARM_ID_STORAGE_CEPH_MAJOR + if severity == fm_constants.FM_ALARM_SEVERITY_CRITICAL: + alarm_critical_major = ( + fm_constants.FM_ALARM_ID_STORAGE_CEPH_CRITICAL) + entity_instance_id = ( + self.service.entity_instance_id + '.peergroup=' + peer_group) + alarm_already_exists = False + if entity_instance_id in old_alarms: + for alarm_id, old_reason in old_alarms[entity_instance_id]: + if (reason == old_reason and + alarm_id == alarm_critical_major): + # if the alarm is exactly the same, we don't need + # to recreate it + old_alarms[entity_instance_id].remove( + (alarm_id, old_reason)) + alarm_already_exists = True + elif (alarm_id == alarm_critical_major): + # if we change just the reason, then we just remove the + # alarm from the list so we don't remove it at the + # end of the function + old_alarms[entity_instance_id].remove( + (alarm_id, old_reason)) + + if (len(old_alarms[entity_instance_id]) == 0): + del old_alarms[entity_instance_id] + + # in case the alarm is exactly the same, we skip the alarm set + if alarm_already_exists is True: + continue + major_repair_action = constants.REPAIR_ACTION_MAJOR_CRITICAL_ALARM + fault = fm_api.Fault( + alarm_id=alarm_critical_major, + alarm_type=fm_constants.FM_ALARM_TYPE_4, + alarm_state=fm_constants.FM_ALARM_STATE_SET, + entity_type_id=fm_constants.FM_ENTITY_TYPE_CLUSTER, + entity_instance_id=entity_instance_id, + severity=severity, + reason_text=reason, + probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_15, + proposed_repair_action=major_repair_action, + service_affecting=constants.SERVICE_AFFECTING['HEALTH_WARN']) + alarm_uuid = self.service.fm_api.set_fault(fault) + if alarm_uuid: + LOG.info(_LI( + "Created storage alarm %(alarm_uuid)s - " + "severity: %(severity)s, reason: %(reason)s, " + "service_affecting: %(service_affecting)s") % { + "alarm_uuid": str(alarm_uuid), + "severity": str(severity), + "reason": reason, + "service_affecting": str( + constants.SERVICE_AFFECTING['HEALTH_WARN'])}) + else: + LOG.error(_LE( + "Failed to create storage alarm - " + "severity: %(severity)s, reason: %(reason)s, " + "service_affecting: %(service_affecting)s") % { + "severity": str(severity), + "reason": reason, + "service_affecting": str( + constants.SERVICE_AFFECTING['HEALTH_WARN'])}) + + for entity_instance_id in old_alarms: + for alarm_id, old_reason in old_alarms[entity_instance_id]: + self.service.fm_api.clear_fault(alarm_id, entity_instance_id) + + @staticmethod + def _parse_reason(health): + """ Parse reason strings received from Ceph """ + if health['health'] in constants.CEPH_STATUS_CUSTOM: + # Don't parse reason messages that we added + return "Storage Alarm Condition: %(health)s. %(detail)s" % health + + reasons_lst = health['detail'].split(';') + + parsed_reasons_text = "" + + # Check if PGs have issues - we can't safely store the entire message + # as it tends to be long + for reason in reasons_lst: + if "pgs" in reason: + parsed_reasons_text += "PGs are degraded/stuck or undersized" + break + + # Extract recovery status + parsed_reasons = [r.strip() for r in reasons_lst if 'recovery' in r] + if parsed_reasons: + parsed_reasons_text += ";" + ";".join(parsed_reasons) + + # We need to keep the most important parts of the messages when storing + # them to fm alarms, therefore text between [] brackets is truncated if + # max size is reached. + + # Add brackets, if needed + if len(parsed_reasons_text): + lbracket = " [" + rbracket = "]" + else: + lbracket = "" + rbracket = "" + + msg = {"head": "Storage Alarm Condition: ", + "tail": ". Please check 'ceph -s' for more details."} + max_size = constants.FM_ALARM_REASON_MAX_SIZE - \ + len(msg["head"]) - len(msg["tail"]) + + return ( + msg['head'] + + (health['health'] + lbracket + parsed_reasons_text)[:max_size-1] + + rbracket + msg['tail']) + + def _report_fault(self, health, alarm_id): + if alarm_id == fm_constants.FM_ALARM_ID_STORAGE_CEPH: + new_severity = constants.SEVERITY[health['health']] + new_reason_text = self._parse_reason(health) + new_service_affecting = \ + constants.SERVICE_AFFECTING[health['health']] + + # Raise or update alarm if necessary + if ((not self.current_health_alarm) or + (self.current_health_alarm.__dict__['severity'] != + new_severity) or + (self.current_health_alarm.__dict__['reason_text'] != + new_reason_text) or + (self.current_health_alarm.__dict__['service_affecting'] != + str(new_service_affecting))): + + fault = fm_api.Fault( + alarm_id=fm_constants.FM_ALARM_ID_STORAGE_CEPH, + alarm_type=fm_constants.FM_ALARM_TYPE_4, + alarm_state=fm_constants.FM_ALARM_STATE_SET, + entity_type_id=fm_constants.FM_ENTITY_TYPE_CLUSTER, + entity_instance_id=self.service.entity_instance_id, + severity=new_severity, + reason_text=new_reason_text, + probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_15, + proposed_repair_action=constants.REPAIR_ACTION, + service_affecting=new_service_affecting) + + alarm_uuid = self.service.fm_api.set_fault(fault) + if alarm_uuid: + LOG.info(_LI( + "Created storage alarm %(alarm_uuid)s - " + "severity: %(severity)s, reason: %(reason)s, " + "service_affecting: %(service_affecting)s") % { + "alarm_uuid": alarm_uuid, + "severity": new_severity, + "reason": new_reason_text, + "service_affecting": new_service_affecting}) + else: + LOG.error(_LE( + "Failed to create storage alarm - " + "severity: %(severity)s, reason: %(reason)s " + "service_affecting: %(service_affecting)s") % { + "severity": new_severity, + "reason": new_reason_text, + "service_affecting": new_service_affecting}) + + # Log detailed reason for later analysis + if (self.current_ceph_health != health['health'] or + self.detailed_health_reason != health['detail']): + LOG.info(_LI("Ceph status changed: %(health)s " + "detailed reason: %(detail)s") % health) + self.current_ceph_health = health['health'] + self.detailed_health_reason = health['detail'] + + elif (alarm_id == fm_constants.FM_ALARM_ID_STORAGE_CEPH_FREE_SPACE and + not health['tier_eid'] in self.current_quota_alarms): + + quota_reason_text = ("Quota/Space mismatch for the %s tier. The " + "sum of Ceph pool quotas does not match the " + "tier size." % health['tier_name']) + fault = fm_api.Fault( + alarm_id=fm_constants.FM_ALARM_ID_STORAGE_CEPH_FREE_SPACE, + alarm_state=fm_constants.FM_ALARM_STATE_SET, + entity_type_id=fm_constants.FM_ENTITY_TYPE_CLUSTER, + entity_instance_id=health['tier_eid'], + severity=fm_constants.FM_ALARM_SEVERITY_MINOR, + reason_text=quota_reason_text, + alarm_type=fm_constants.FM_ALARM_TYPE_7, + probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_75, + proposed_repair_action=( + "Update ceph storage pool quotas to use all available " + "cluster space for the %s tier." % health['tier_name']), + service_affecting=False) + + alarm_uuid = self.service.fm_api.set_fault(fault) + if alarm_uuid: + LOG.info(_LI( + "Created storage quota storage alarm %(alarm_uuid)s. " + "Reason: %(reason)s") % { + "alarm_uuid": alarm_uuid, "reason": quota_reason_text}) + else: + LOG.error(_LE("Failed to create quota " + "storage alarm. Reason: %s") % quota_reason_text) + + def _clear_fault(self, alarm_id, entity_instance_id=None): + # Only clear alarm if there is one already raised + if (alarm_id == fm_constants.FM_ALARM_ID_STORAGE_CEPH and + self.current_health_alarm): + LOG.info(_LI("Clearing health alarm")) + self.service.fm_api.clear_fault( + fm_constants.FM_ALARM_ID_STORAGE_CEPH, + self.service.entity_instance_id) + elif (alarm_id == fm_constants.FM_ALARM_ID_STORAGE_CEPH_FREE_SPACE and + entity_instance_id in self.current_quota_alarms): + LOG.info(_LI("Clearing quota alarm with entity_instance_id %s") + % entity_instance_id) + self.service.fm_api.clear_fault( + fm_constants.FM_ALARM_ID_STORAGE_CEPH_FREE_SPACE, + entity_instance_id) + + def clear_critical_alarm(self, group_name): + alarm_list = self.service.fm_api.get_faults_by_id( + fm_constants.FM_ALARM_ID_STORAGE_CEPH_CRITICAL) + if alarm_list: + for alarm in range(len(alarm_list)): + group_id = alarm_list[alarm].entity_instance_id.find("group-") + group_instance_name = ( + "group-" + + alarm_list[alarm].entity_instance_id[group_id + 6]) + if group_name == group_instance_name: + self.service.fm_api.clear_fault( + fm_constants.FM_ALARM_ID_STORAGE_CEPH_CRITICAL, + alarm_list[alarm].entity_instance_id) + + def clear_all_major_critical(self, group_name=None): + # clear major alarms + alarm_list = self.service.fm_api.get_faults_by_id( + fm_constants.FM_ALARM_ID_STORAGE_CEPH_MAJOR) + if alarm_list: + for alarm in range(len(alarm_list)): + if group_name is not None: + group_id = ( + alarm_list[alarm].entity_instance_id.find("group-")) + group_instance_name = ( + "group-" + + alarm_list[alarm].entity_instance_id[group_id+6]) + if group_name == group_instance_name: + self.service.fm_api.clear_fault( + fm_constants.FM_ALARM_ID_STORAGE_CEPH_MAJOR, + alarm_list[alarm].entity_instance_id) + else: + self.service.fm_api.clear_fault( + fm_constants.FM_ALARM_ID_STORAGE_CEPH_MAJOR, + alarm_list[alarm].entity_instance_id) + # clear critical alarms + alarm_list = self.service.fm_api.get_faults_by_id( + fm_constants.FM_ALARM_ID_STORAGE_CEPH_CRITICAL) + if alarm_list: + for alarm in range(len(alarm_list)): + if group_name is not None: + group_id = ( + alarm_list[alarm].entity_instance_id.find("group-")) + group_instance_name = ( + "group-" + + alarm_list[alarm].entity_instance_id[group_id + 6]) + if group_name == group_instance_name: + self.service.fm_api.clear_fault( + fm_constants.FM_ALARM_ID_STORAGE_CEPH_CRITICAL, + alarm_list[alarm].entity_instance_id) + else: + self.service.fm_api.clear_fault( + fm_constants.FM_ALARM_ID_STORAGE_CEPH_CRITICAL, + alarm_list[alarm].entity_instance_id) + + def _get_current_alarms(self): + """ Retrieve currently raised alarm """ + self.current_health_alarm = self.service.fm_api.get_fault( + fm_constants.FM_ALARM_ID_STORAGE_CEPH, + self.service.entity_instance_id) + quota_faults = self.service.fm_api.get_faults_by_id( + fm_constants.FM_ALARM_ID_STORAGE_CEPH_FREE_SPACE) + if quota_faults: + self.current_quota_alarms = [f.entity_instance_id + for f in quota_faults] + else: + self.current_quota_alarms = [] diff --git a/ceph/ceph-manager/ceph-manager/ceph_manager/server.py b/ceph/ceph-manager/ceph-manager/ceph_manager/server.py new file mode 100644 index 000000000..72edf406b --- /dev/null +++ b/ceph/ceph-manager/ceph-manager/ceph_manager/server.py @@ -0,0 +1,175 @@ +# vim: tabstop=4 shiftwidth=4 softtabstop=4 +# +# Copyright (c) 2016-2018 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# https://chrigl.de/posts/2014/08/27/oslo-messaging-example.html +# http://docs.openstack.org/developer/oslo.messaging/server.html + +import sys + +# noinspection PyUnresolvedReferences +import eventlet +# noinspection PyUnresolvedReferences +import oslo_messaging as messaging +# noinspection PyUnresolvedReferences +from fm_api import fm_api +# noinspection PyUnresolvedReferences +from oslo_config import cfg +# noinspection PyUnresolvedReferences +from oslo_log import log as logging +# noinspection PyUnresolvedReferences +from oslo_service import service +# noinspection PyUnresolvedReferences +from oslo_service.periodic_task import PeriodicTasks +# noinspection PyUnresolvedReferences +from oslo_service import loopingcall + +# noinspection PyUnresolvedReferences +from cephclient import wrapper + +from monitor import Monitor +import exception +import constants + +from i18n import _LI, _LW +from retrying import retry + +eventlet.monkey_patch(all=True) + +CONF = cfg.CONF +CONF.register_opts([ + cfg.StrOpt('sysinv_api_bind_ip', + default='0.0.0.0', + help='IP for the Ceph Manager server to bind to')]) +CONF.logging_default_format_string = ( + '%(asctime)s.%(msecs)03d %(process)d ' + '%(levelname)s %(name)s [-] %(message)s') +logging.register_options(CONF) +logging.setup(CONF, __name__) +LOG = logging.getLogger(__name__) +CONF.rpc_backend = 'rabbit' + + +class RpcEndpoint(PeriodicTasks): + + def __init__(self, service=None): + self.service = service + + def get_primary_tier_size(self, _): + """Get the ceph size for the primary tier. + + returns: an int for the size (in GB) of the tier + """ + + tiers_size = self.service.monitor.tiers_size + primary_tier_size = tiers_size.get( + self.service.monitor.primary_tier_name, 0) + LOG.debug(_LI("Ceph cluster primary tier size: %s GB") % + str(primary_tier_size)) + return primary_tier_size + + def get_tiers_size(self, _): + """Get the ceph cluster tier sizes. + + returns: a dict of sizes (in GB) by tier name + """ + + tiers_size = self.service.monitor.tiers_size + LOG.debug(_LI("Ceph cluster tiers (size in GB): %s") % + str(tiers_size)) + return tiers_size + + def is_cluster_up(self, _): + """Report if the last health check was successful. + + This is an independent view of the cluster accessibility that can be + used by the sysinv conductor to gate ceph API calls which would timeout + and potentially block other operations. + + This view is only updated at the rate the monitor checks for a cluster + uuid or a health check (CEPH_HEALTH_CHECK_INTERVAL) + + returns: boolean True if last health check was successful else False + """ + return self.service.monitor.cluster_is_up + + +class SysinvConductorUpgradeApi(object): + def __init__(self): + self.sysinv_conductor = None + super(SysinvConductorUpgradeApi, self).__init__() + + def get_software_upgrade_status(self): + LOG.info(_LI("Getting software upgrade status from sysinv")) + cctxt = self.sysinv_conductor.prepare(timeout=2) + upgrade = cctxt.call({}, 'get_software_upgrade_status') + LOG.info(_LI("Software upgrade status: %s") % str(upgrade)) + return upgrade + + @retry(wait_fixed=1000, + retry_on_exception=lambda e: + LOG.warn(_LW( + "Getting software upgrade status failed " + "with: %s. Retrying... ") % str(e)) or True) + def retry_get_software_upgrade_status(self): + return self.get_software_upgrade_status() + + +class Service(SysinvConductorUpgradeApi, service.Service): + + def __init__(self, conf): + super(Service, self).__init__() + self.conf = conf + self.rpc_server = None + self.sysinv_conductor = None + self.ceph_api = None + self.entity_instance_id = '' + self.fm_api = fm_api.FaultAPIs() + self.monitor = Monitor(self) + self.config = None + self.config_desired = None + self.config_applied = None + + def start(self): + super(Service, self).start() + transport = messaging.get_transport(self.conf) + self.sysinv_conductor = messaging.RPCClient( + transport, + messaging.Target( + topic=constants.SYSINV_CONDUCTOR_TOPIC)) + + self.ceph_api = wrapper.CephWrapper( + endpoint='http://localhost:5001/api/v0.1/') + + # Get initial config from sysinv and send it to + # services that need it before starting them + self.rpc_server = messaging.get_rpc_server( + transport, + messaging.Target(topic=constants.CEPH_MANAGER_TOPIC, + server=self.conf.sysinv_api_bind_ip), + [RpcEndpoint(self)], + executor='eventlet') + self.rpc_server.start() + eventlet.spawn_n(self.monitor.run) + + def stop(self): + try: + self.rpc_server.stop() + self.rpc_server.wait() + except Exception: + pass + super(Service, self).stop() + + +def run_service(): + CONF(sys.argv[1:]) + logging.setup(CONF, "ceph-manager") + launcher = service.launch(CONF, Service(CONF), workers=1) + launcher.wait() + + +if __name__ == "__main__": + run_service() diff --git a/ceph/ceph-manager/ceph-manager/ceph_manager/tests/__init__.py b/ceph/ceph-manager/ceph-manager/ceph_manager/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ceph/ceph-manager/ceph-manager/setup.py b/ceph/ceph-manager/ceph-manager/setup.py new file mode 100644 index 000000000..40cf5012b --- /dev/null +++ b/ceph/ceph-manager/ceph-manager/setup.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +# +# Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + + +import setuptools + +setuptools.setup( + name='ceph_manager', + version='1.0.0', + description='CEPH manager', + license='Apache-2.0', + packages=['ceph_manager'], + entry_points={ + } +) diff --git a/ceph/ceph-manager/ceph-manager/test-requirements.txt b/ceph/ceph-manager/ceph-manager/test-requirements.txt new file mode 100644 index 000000000..1fdf20563 --- /dev/null +++ b/ceph/ceph-manager/ceph-manager/test-requirements.txt @@ -0,0 +1,10 @@ +# The order of packages is significant, because pip processes them in the order +# of appearance. Changing the order has an impact on the overall integration +# process, which may cause wedges in the gate later. + +mock +flake8 +eventlet +pytest +oslo.log +oslo.i18n \ No newline at end of file diff --git a/ceph/ceph-manager/ceph-manager/tox.ini b/ceph/ceph-manager/ceph-manager/tox.ini new file mode 100644 index 000000000..41d3854b2 --- /dev/null +++ b/ceph/ceph-manager/ceph-manager/tox.ini @@ -0,0 +1,29 @@ +# adapted from glance tox.ini + +[tox] +minversion = 1.6 +envlist = py27,pep8 +skipsdist = True +# tox does not work if the path to the workdir is too long, so move it to /tmp +toxworkdir = /tmp/{env:USER}_ceph_manager_tox + +[testenv] +setenv = VIRTUAL_ENV={envdir} +usedevelop = True +install_command = pip install --no-use-wheel -U --force-reinstall {opts} {packages} +deps = -r{toxinidir}/test-requirements.txt +commands = py.test {posargs} +whitelist_externals = bash +passenv = http_proxy HTTP_PROXY https_proxy HTTPS_PROXY no_proxy NO_PROXY + +[testenv:py27] +basepython = python2.7 +setenv = + PYTHONPATH={toxinidir}/../../../../sysinv/recipes-common/sysinv/sysinv:{toxinidir}/../../../../config/recipes-common/tsconfig/tsconfig + +[testenv:pep8] +commands = + flake8 {posargs} + +[flake8] +exclude = .venv,.git,.tox,dist,doc,etc,*glance/locale*,*lib/python*,*egg,build diff --git a/ceph/ceph-manager/files/ceph-manager.logrotate b/ceph/ceph-manager/files/ceph-manager.logrotate new file mode 100644 index 000000000..8d7a16ab1 --- /dev/null +++ b/ceph/ceph-manager/files/ceph-manager.logrotate @@ -0,0 +1,11 @@ +/var/log/ceph-manager.log { + nodateext + size 10M + start 1 + rotate 10 + missingok + notifempty + compress + delaycompress + copytruncate +} diff --git a/ceph/ceph-manager/files/ceph-manager.service b/ceph/ceph-manager/files/ceph-manager.service new file mode 100644 index 000000000..e8bf26cf9 --- /dev/null +++ b/ceph/ceph-manager/files/ceph-manager.service @@ -0,0 +1,17 @@ +[Unit] +Description=Handle Ceph API calls and provide status updates via alarms +After=ceph.target + +[Service] +Type=forking +Restart=no +KillMode=process +RemainAfterExit=yes +ExecStart=/etc/rc.d/init.d/ceph-manager start +ExecStop=/etc/rc.d/init.d/ceph-manager stop +ExecReload=/etc/rc.d/init.d/ceph-manager reload +PIDFile=/var/run/ceph/ceph-manager.pid + +[Install] +WantedBy=multi-user.target + diff --git a/ceph/ceph-manager/scripts/bin/ceph-manager b/ceph/ceph-manager/scripts/bin/ceph-manager new file mode 100644 index 000000000..9aa4330db --- /dev/null +++ b/ceph/ceph-manager/scripts/bin/ceph-manager @@ -0,0 +1,17 @@ +#!/usr/bin/env python +# +# Copyright (c) 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + + +import sys + +try: + from ceph_manager.server import run_service +except EnvironmentError as e: + print >> sys.stderr, "Error importing ceph_manager: ", str(e) + sys.exit(1) + +run_service() diff --git a/ceph/ceph-manager/scripts/init.d/ceph-manager b/ceph/ceph-manager/scripts/init.d/ceph-manager new file mode 100644 index 000000000..88bdddfb8 --- /dev/null +++ b/ceph/ceph-manager/scripts/init.d/ceph-manager @@ -0,0 +1,103 @@ +#!/bin/sh +# +# Copyright (c) 2013-2014, 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + + +### BEGIN INIT INFO +# Provides: ceph-manager +# Required-Start: $ceph +# Required-Stop: $ceph +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: Daemon for polling ceph status +# Description: Daemon for polling ceph status +### END INIT INFO + +DESC="ceph-manager" +DAEMON="/usr/bin/ceph-manager" +RUNDIR="/var/run/ceph" +PIDFILE=$RUNDIR/$DESC.pid + +CONFIGFILE="/etc/sysinv/sysinv.conf" +LOGFILE="/var/log/ceph-manager.log" + +start() +{ + if [ -e $PIDFILE ]; then + PIDDIR=/prod/$(cat $PIDFILE) + if [ -d ${PIDFILE} ]; then + echo "$DESC already running." + exit 0 + else + echo "Removing stale PID file $PIDFILE" + rm -f $PIDFILE + fi + fi + + echo -n "Starting $DESC..." + mkdir -p $RUNDIR + start-stop-daemon --start --quiet \ + --pidfile ${PIDFILE} --exec ${DAEMON} \ + --make-pidfile --background \ + -- --log-file=$LOGFILE --config-file=$CONFIGFILE + + if [ $? -eq 0 ]; then + echo "done." + else + echo "failed." + exit 1 + fi +} + +stop() +{ + echo -n "Stopping $DESC..." + start-stop-daemon --stop --quiet --pidfile $PIDFILE --retry 60 + if [ $? -eq 0 ]; then + echo "done." + else + echo "failed." + fi + rm -f $PIDFILE +} + +status() +{ + pid=`cat $PIDFILE 2>/dev/null` + if [ -n "$pid" ]; then + if ps -p $pid &> /dev/null ; then + echo "$DESC is running" + exit 0 + else + echo "$DESC is not running but has pid file" + exit 1 + fi + fi + echo "$DESC is not running" + exit 3 +} + +case "$1" in + start) + start + ;; + stop) + stop + ;; + restart|force-reload|reload) + stop + start + ;; + status) + status + ;; + *) + echo "Usage: $0 {start|stop|force-reload|restart|reload|status}" + exit 1 + ;; +esac + +exit 0 diff --git a/ceph/ceph/centos/build_srpm.data b/ceph/ceph/centos/build_srpm.data new file mode 100644 index 000000000..ca131ddd5 --- /dev/null +++ b/ceph/ceph/centos/build_srpm.data @@ -0,0 +1,5 @@ +SRC_DIR="$CGCS_BASE/git/ceph" +TIS_BASE_SRCREV=3f07f7ff1a5c7bfa8d0de12c966594d5fb7cf4ec +TIS_PATCH_VER=GITREVCOUNT +BUILD_IS_BIG=40 +BUILD_IS_SLOW=26 diff --git a/ceph/ceph/centos/ceph.spec b/ceph/ceph/centos/ceph.spec new file mode 120000 index 000000000..5502d2f3f --- /dev/null +++ b/ceph/ceph/centos/ceph.spec @@ -0,0 +1 @@ +../../../../git/ceph/ceph.spec \ No newline at end of file diff --git a/ceph/ceph/files/ceph-manage-journal.py b/ceph/ceph/files/ceph-manage-journal.py new file mode 100644 index 000000000..b3312e0cb --- /dev/null +++ b/ceph/ceph/files/ceph-manage-journal.py @@ -0,0 +1,326 @@ +#!/usr/bin/python +# +# Copyright (c) 2016 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +import ast +import os +import os.path +import re +import subprocess +import sys + + +######### +# Utils # +######### + +def command(arguments, **kwargs): + """ Execute e command and capture stdout, stderr & return code """ + process = subprocess.Popen( + arguments, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + **kwargs) + out, err = process.communicate() + return out, err, process.returncode + + +def get_input(arg, valid_keys): + """Convert the input to a dict and perform basic validation""" + json_string = arg.replace("\\n", "\n") + try: + input_dict = ast.literal_eval(json_string) + if not all(k in input_dict for k in valid_keys): + return None + except Exception: + return None + + return input_dict + + +def get_partition_uuid(dev): + output, _, _ = command(['blkid', dev]) + try: + return re.search('PARTUUID=\"(.+?)\"', output).group(1) + except AttributeError: + return None + + +def device_path_to_device_node(device_path): + try: + output, _, _ = command(["udevadm", "settle", "-E", device_path]) + out, err, retcode = command(["readlink", "-f", device_path]) + out = out.rstrip() + except Exception as e: + return None + + return out + + +########################################### +# Manage Journal Disk Partitioning Scheme # +########################################### + +DISK_BY_PARTUUID = "/dev/disk/by-partuuid/" +JOURNAL_UUID='45b0969e-9b03-4f30-b4c6-b4b80ceff106' # Type of a journal partition + + +def is_partitioning_correct(disk_path, partition_sizes): + """ Validate the existence and size of journal partitions""" + + # Obtain the device node from the device path. + disk_node = device_path_to_device_node(disk_path) + + # Check that partition table format is GPT + output, _, _ = command(["udevadm", "settle", "-E", disk_node]) + output, _, _ = command(["parted", "-s", disk_node, "print"]) + if not re.search('Partition Table: gpt', output): + print "Format of disk node %s is not GPT, zapping disk" % disk_node + return False + + # Check each partition size + partition_index = 1 + for size in partition_sizes: + # Check that each partition size matches the one in input + partition_node = disk_node + str(partition_index) + output, _, _ = command(["udevadm", "settle", "-E", partition_node]) + cmd = ["parted", "-s", partition_node, "unit", "MiB", "print"] + output, _, _ = command(cmd) + + regex = ("^Disk " + str(partition_node) + ":\\s*" + + str(size) + "[\\.0]*MiB") + if not re.search(regex, output, re.MULTILINE): + print ("Journal partition %(node)s size is not %(size)s, " + "zapping disk" % {"node": partition_node, "size": size}) + return False + + partition_index += 1 + + output, _, _ = command(["udevadm", "settle", "-t", "10"]) + return True + + +def create_partitions(disk_path, partition_sizes): + """ Recreate partitions """ + + # Obtain the device node from the device path. + disk_node = device_path_to_device_node(disk_path) + + # Issue: After creating a new partition table on a device, Udev does not + # always remove old symlinks (i.e. to previous partitions on that device). + # Also, even if links are erased before zapping the disk, some of them will + # be recreated even though there is no partition to back them! + # Therefore, we have to remove the links AFTER we erase the partition table + # Issue: DISK_BY_PARTUUID directory is not present at all if there are no + # GPT partitions on the storage node so nothing to remove in this case + links = [] + if os.path.isdir(DISK_BY_PARTUUID): + links = [ os.path.join(DISK_BY_PARTUUID,l) for l in os.listdir(DISK_BY_PARTUUID) + if os.path.islink(os.path.join(DISK_BY_PARTUUID, l)) ] + + # Erase all partitions on current node by creating a new GPT table + _, err, ret = command(["parted", "-s", disk_node, "mktable", "gpt"]) + if ret: + print ("Error erasing partition table of %(node)s\n" + "Return code: %(ret)s reason: %(reason)s" % + {"node": disk_node, "ret": ret, "reason": err}) + exit(1) + + # Erase old symlinks + for l in links: + if disk_node in os.path.realpath(l): + os.remove(l) + + # Create partitions in order + used_space_mib = 1 # leave 1 MB at the beginning of the disk + num = 1 + for size in partition_sizes: + cmd = ['parted', '-s', disk_node, 'unit', 'mib', + 'mkpart', 'primary', + str(used_space_mib), str(used_space_mib + size)] + _, err, ret = command(cmd) + parms = {"disk_node": disk_node, + "start": used_space_mib, + "end": used_space_mib + size, + "reason": err} + print ("Created partition from start=%(start)s MiB to end=%(end)s MiB" + " on %(disk_node)s" % parms) + if ret: + print ("Failed to create partition with " + "start=%(start)s, end=%(end)s " + "on %(disk_node)s reason: %(reason)s" % parms) + exit(1) + # Set partition type to ceph journal + # noncritical operation, it makes 'ceph-disk list' output correct info + cmd = ['sgdisk', + '--change-name={num}:ceph journal'.format(num=num), + '--typecode={num}:{uuid}'.format( + num=num, + uuid=JOURNAL_UUID, + ), + disk_node] + _, err, ret = command(cmd) + if ret: + print ("WARNINIG: Failed to set partition name and typecode") + used_space_mib += size + num += 1 + +########################### +# Manage Journal Location # +########################### + +OSD_PATH = "/var/lib/ceph/osd/" + + +def mount_data_partition(data_path, osdid): + """ Mount an OSD data partition and return the mounted path """ + + # Obtain the device node from the device path. + data_node = device_path_to_device_node(data_path) + + mount_path = OSD_PATH + "ceph-" + str(osdid) + output, _, _ = command(['mount']) + regex = "^" + data_node + ".*" + mount_path + if not re.search(regex, output, re.MULTILINE): + cmd = ['mount', '-t', 'xfs', data_node, mount_path] + _, _, ret = command(cmd) + params = {"node": data_node, "path": mount_path} + if ret: + print "Failed to mount %(node)s to %(path), aborting" % params + exit(1) + else: + print "Mounted %(node)s to %(path)s" % params + return mount_path + + +def is_location_correct(path, journal_path, osdid): + """ Check if location points to the correct device """ + + # Obtain the device node from the device path. + journal_node = device_path_to_device_node(journal_path) + + cur_node = os.path.realpath(path + "/journal") + if cur_node == journal_node: + return True + else: + return False + + +def fix_location(mount_point, journal_path, osdid): + """ Move the journal to the new partition """ + + # Obtain the device node from the device path. + journal_node = device_path_to_device_node(journal_path) + + # Fix symlink + path = mount_point + "/journal" # 'journal' symlink path used by ceph-osd + journal_uuid = get_partition_uuid(journal_node) + new_target = DISK_BY_PARTUUID + journal_uuid + params = {"path": path, "target": new_target} + try: + if os.path.lexists(path): + os.unlink(path) # delete the old symlink + os.symlink(new_target, path) + print "Symlink created: %(path)s -> %(target)s" % params + except: + print "Failed to create symlink: %(path)s -> %(target)s" % params + exit(1) + # Fix journal_uuid + path = mount_point + "/journal_uuid" + try: + with open(path, 'w') as f: + f.write(journal_uuid) + except Exception as ex: + # The operation is noncritical, it only makes 'ceph-disk list' + # display complete output. We log and continue. + params = {"path": path, "uuid": journal_uuid} + print "WARNING: Failed to set uuid of %(path)s to %(uuid)s" % params + + # Clean the journal partition + # even if erasing the partition table, if another journal was present here + # it's going to be reused. Journals are always bigger than 100MB. + command(['dd', 'if=/dev/zero', 'of=%s' % journal_node, + 'bs=1M', 'count=100']) + + # Format the journal + cmd = ['/usr/bin/ceph-osd', '-i', str(osdid), + '--pid-file', '/var/run/ceph/osd.%s.pid' % osdid, + '-c', '/etc/ceph/ceph.conf', + '--cluster', 'ceph', + '--mkjournal'] + out, err, ret = command(cmd) + params = {"journal_node": journal_node, + "osdid": osdid, + "ret": ret, + "reason": err} + if not ret: + print ("Prepared new journal partition: %(journal_node)s " + "for osd id: %(osdid)s") % params + else: + print ("Error initializing journal node: " + "%(journal_node)s for osd id: %(osdid)s " + "ceph-osd return code: %(ret)s reason: %(reason)s" % params) + + +######## +# Main # +######## + +def main(argv): + # parse and validate arguments + err = False + partitions = None + location = None + if len(argv) != 2: + err = True + elif argv[0] == "partitions": + valid_keys = ['disk_path', 'journals'] + partitions = get_input(argv[1], valid_keys) + if not partitions: + err = True + elif not isinstance(partitions['journals'], list): + err = True + elif argv[0] == "location": + valid_keys = ['data_path', 'journal_path', 'osdid'] + location = get_input(argv[1], valid_keys) + if not location: + err = True + elif not isinstance(location['osdid'], int): + err = True + else: + err = True + if err: + print "Command intended for internal use only" + exit(-1) + + if partitions: + # Recreate partitions only if the existing ones don't match input + if not is_partitioning_correct(partitions['disk_path'], + partitions['journals']): + create_partitions(partitions['disk_path'], partitions['journals']) + else: + print ("Partition table for %s is correct, " + "no need to repartition" % + device_path_to_device_node(partitions['disk_path'])) + elif location: + # we need to have the data partition mounted & we can let it mounted + mount_point = mount_data_partition(location['data_path'], + location['osdid']) + # Update journal location only if link point to another partition + if not is_location_correct(mount_point, + location['journal_path'], + location['osdid']): + print ("Fixing journal location for " + "OSD id: %(id)s" % {"node": location['data_path'], + "id": location['osdid']}) + fix_location(mount_point, + location['journal_path'], + location['osdid']) + else: + print ("Journal location for %s is correct," + "no need to change it" % location['data_path']) + +main(sys.argv[1:]) diff --git a/config/puppet-modules/puppet-lvm/centos/build_srpm.data b/config/puppet-modules/puppet-lvm/centos/build_srpm.data index 9bfbdd412..6c97141c9 100644 --- a/config/puppet-modules/puppet-lvm/centos/build_srpm.data +++ b/config/puppet-modules/puppet-lvm/centos/build_srpm.data @@ -9,4 +9,4 @@ COPY_LIST="$CGCS_BASE/downloads/puppet/$PREFIX-$MODULE-$GIT_SHA.tar.gz $FILES_BA -TIS_PATCH_VER=4 +TIS_PATCH_VER=5 diff --git a/config/puppet-modules/puppet-lvm/centos/files/Fix-the-logical-statement-for-nuke_fs_on_resize.patch b/config/puppet-modules/puppet-lvm/centos/files/Fix-the-logical-statement-for-nuke_fs_on_resize.patch new file mode 100644 index 000000000..e1796ba3c --- /dev/null +++ b/config/puppet-modules/puppet-lvm/centos/files/Fix-the-logical-statement-for-nuke_fs_on_resize.patch @@ -0,0 +1,45 @@ +From 21d2c4e714611ad08e5aa999e555e1e7591f2717 Mon Sep 17 00:00:00 2001 +From: Kristine Bujold +Date: Thu, 19 Jul 2018 09:02:27 -0400 +Subject: [PATCH 1/1] Patch4: + Fix-the-logical-statement-for-nuke_fs_on_resize_2.patch + +--- + .../puppet/modules/lvm/lib/puppet/provider/logical_volume/lvm.rb | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/packstack/puppet/modules/lvm/lib/puppet/provider/logical_volume/lvm.rb b/packstack/puppet/modules/lvm/lib/puppet/provider/logical_volume/lvm.rb +index 2abfea3..f9b1c66 100755 +--- a/packstack/puppet/modules/lvm/lib/puppet/provider/logical_volume/lvm.rb ++++ b/packstack/puppet/modules/lvm/lib/puppet/provider/logical_volume/lvm.rb +@@ -184,13 +184,15 @@ Puppet::Type.type(:logical_volume).provide :lvm do + exec_cmd('umount', path) + exec_cmd('fsadm', '-y', 'check', path ) + r = exec_cmd('fsadm', '-y', 'resize', path, "#{new_size}k") +- if r[:exit] != 0 and @resource[:nuke_fs_on_resize_failure] ++ if r[:exit] != 0 and [:true, "true", true ].include? @resource[:nuke_fs_on_resize_failure] ++ info( "Failed 'fsadm resize' erase the disk #{r}" ) + exec_cmd('dd', 'if=/dev/zero', "of=#{path}", "bs=512", "count=16", "conv=notrunc") + blkid('-g') + end + r = exec_cmd('lvresize', '-r', '-f', '-L', "#{new_size}k", path) + if r[:exit] != 0 +- if @resource[:nuke_fs_on_resize_failure] ++ if [:true, "true", true ].include? @resource[:nuke_fs_on_resize_failure] ++ info( "Failed 'fsadm resize' erase the disk #{r}" ) + exec_cmd('dd', 'if=/dev/zero', "of=#{path}", "bs=512", "count=16", "conv=notrunc") + blkid('-g') + lvresize( '-f', '-L', "#{new_size}k", path) || fail( "Cannot reduce to size #{new_size} because lvresize failed." ) +@@ -215,7 +217,8 @@ Puppet::Type.type(:logical_volume).provide :lvm do + exec_cmd('umount', path) + exec_cmd('fsadm', '-y', 'check', path ) + r = exec_cmd('fsadm', '-y', 'resize', path, "#{new_size}k") +- if r[:exit] != 0 and @resource[:nuke_fs_on_resize_failure] ++ if r[:exit] != 0 and [:true, "true", true ].include? @resource[:nuke_fs_on_resize_failure] ++ info( "Failed 'fsadm resize' erase the disk #{r}" ) + exec_cmd('dd', 'if=/dev/zero', "of=#{path}", "bs=512", "count=16", "conv=notrunc") + blkid('-g') + end +-- +1.8.3.1 + diff --git a/config/puppet-modules/puppet-lvm/centos/puppet-lvm.spec b/config/puppet-modules/puppet-lvm/centos/puppet-lvm.spec index 8f7252092..247dc03c3 100644 --- a/config/puppet-modules/puppet-lvm/centos/puppet-lvm.spec +++ b/config/puppet-modules/puppet-lvm/centos/puppet-lvm.spec @@ -16,6 +16,7 @@ Patch0: 0001-puppet-lvm-kilo-quilt-changes.patch Patch1: 0002-UEFI-pvcreate-fix.patch Patch2: 0003-US94222-Persistent-Dev-Naming.patch Patch3: 0004-extendind-nuke_fs_on_resize_failure-functionality.patch +Patch4: Fix-the-logical-statement-for-nuke_fs_on_resize.patch BuildArch: noarch @@ -34,6 +35,7 @@ A Puppet module for Logical Resource Management (LVM) %patch1 -p1 %patch2 -p1 %patch3 -p1 +%patch4 -p1 %install install -d -m 0755 %{buildroot}/%{_datadir}/puppet/modules/%{module_dir} diff --git a/kernel/kernel-rt/centos/build_srpm.data b/kernel/kernel-rt/centos/build_srpm.data index 699bea4d8..ae352ddf0 100644 --- a/kernel/kernel-rt/centos/build_srpm.data +++ b/kernel/kernel-rt/centos/build_srpm.data @@ -1,4 +1,4 @@ COPY_LIST="files/*" TIS_PATCH_VER=43 -BUILD_IS_BIG=10 +BUILD_IS_BIG=11 BUILD_IS_SLOW=12 diff --git a/kernel/kernel-rt/centos/patches/US103091-IMA-System-Configuration.patch b/kernel/kernel-rt/centos/patches/US103091-IMA-System-Configuration.patch index 1e6c79c41..936b2a0db 100644 --- a/kernel/kernel-rt/centos/patches/US103091-IMA-System-Configuration.patch +++ b/kernel/kernel-rt/centos/patches/US103091-IMA-System-Configuration.patch @@ -58,7 +58,7 @@ index d357e7d..f333b29 100644 + +############################################################################### +# -+# We will roll in the IMA X.509 certificate and pull it in the the kernel ++# We will roll in the IMA X.509 certificate and pull it in the kernel +# so that it gets loaded into the _ima keyring during boot. +# +# Ideally, this should have been treated similar to other .x509 certificates diff --git a/kernel/kernel-std/centos/build_srpm.data b/kernel/kernel-std/centos/build_srpm.data index 0de47ad03..2789e6f6e 100644 --- a/kernel/kernel-std/centos/build_srpm.data +++ b/kernel/kernel-std/centos/build_srpm.data @@ -1,4 +1,4 @@ COPY_LIST="files/*" TIS_PATCH_VER=36 -BUILD_IS_BIG=10 +BUILD_IS_BIG=11 BUILD_IS_SLOW=12 diff --git a/kernel/kernel-std/centos/patches/US103091-IMA-System-Configuration.patch b/kernel/kernel-std/centos/patches/US103091-IMA-System-Configuration.patch index 16b4e4f05..382fcc75e 100644 --- a/kernel/kernel-std/centos/patches/US103091-IMA-System-Configuration.patch +++ b/kernel/kernel-std/centos/patches/US103091-IMA-System-Configuration.patch @@ -68,7 +68,7 @@ index 44a82c1..000b9a8 100644 + +############################################################################### +# -+# We will roll in the IMA X.509 certificate and pull it in the the kernel ++# We will roll in the IMA X.509 certificate and pull it in the kernel +# so that it gets loaded into the _ima keyring during boot. +# +# Ideally, this should have been treated similar to other .x509 certificates diff --git a/kubernetes/helm/centos/build_srpm.data b/kubernetes/helm/centos/build_srpm.data index 1d35a996a..d18ca5832 100644 --- a/kubernetes/helm/centos/build_srpm.data +++ b/kubernetes/helm/centos/build_srpm.data @@ -1,7 +1,6 @@ VERSION=2.9.1 TAR_NAME=helm TAR="$TAR_NAME-v$VERSION-linux-amd64.tar.gz" -#COPY_LIST="${CGCS_BASE}/downloads/$TAR ${CGCS_BASE}/downloads/tiller-2.9.1-docker-image.tgz" -COPY_LIST="${CGCS_BASE}/downloads/$TAR" +COPY_LIST="${CGCS_BASE}/downloads/$TAR $FILES_BASE/*" -TIS_PATCH_VER=2 +TIS_PATCH_VER=3 diff --git a/kubernetes/helm/centos/files/helm-upload b/kubernetes/helm/centos/files/helm-upload new file mode 100644 index 000000000..a7f8dcde5 --- /dev/null +++ b/kubernetes/helm/centos/files/helm-upload @@ -0,0 +1,79 @@ +#!/bin/bash + +# +# Copyright (c) 2018 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +# This script takes the names of packaged helm charts as arguments. +# It installs them in the on-node helm chart repository and regenerates +# the repository index. + + +# We want to run as the "www" user and scripts can't be setuid. The +# sudoers permissions are set up to allow wrsroot to run this script +# as the "www" user without a password. +if [ $USER != "www" ]; then + exec sudo -u www $0 $@ +fi + + +RETVAL=0 +REINDEX=0 + +REPO_DIR='/www/pages/helm_charts' + +for FILE in "$@"; do + if [ -r $FILE ]; then + # QUESTION: should we disallow overwriting an existing file? + # The versions are embedded in the filename, so it shouldn't + # cause problems. + cp $FILE $REPO_DIR + if [ $? -ne 0 ]; then + echo Problem adding $FILE to helm chart registry. + RETVAL=1 + else + REINDEX=1 + fi + else + echo Cannot read file ${FILE}. + RETVAL=1 + fi +done + + +# Now re-index the helm repository if we successfully copied in +# any new charts. +if [ $REINDEX -eq 1 ]; then + /usr/sbin/helm repo index $REPO_DIR +fi + +if [ ! -f "/etc/platform/simplex" ]; then + # We're not a one node system, copy the files to the other + # controller if we can + if [ $HOSTNAME == "controller-0" ]; then + TARGET="controller-1" + else + TARGET="controller-0" + fi + + # We've modified etc/rsyncd.conf to allow access to /www/helm_charts + # To avoid races, copy over the index file last. + rsync -acv --exclude=index.yaml ${REPO_DIR}/ rsync://${TARGET}/helm_charts + if [ $? -ne 0 ]; then + echo Problem syncing helm charts to $TARGET + RETVAL=1 + fi + + rsync -acv ${REPO_DIR}/index.yaml rsync://${TARGET}/helm_charts + if [ $? -ne 0 ]; then + echo Problem syncing helm chart index file to $TARGET + RETVAL=1 + fi +fi + +# We also need to sync the helm charts on node startup +# in case they were added while the node was down. + +exit $RETVAL diff --git a/kubernetes/helm/centos/files/helm.sudo b/kubernetes/helm/centos/files/helm.sudo new file mode 100644 index 000000000..48e02bfbb --- /dev/null +++ b/kubernetes/helm/centos/files/helm.sudo @@ -0,0 +1,3 @@ +wrsroot ALL=(www) NOPASSWD: /usr/local/sbin/helm-upload + +Defaults lecture=never, secure_path=/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin diff --git a/kubernetes/helm/centos/helm.spec b/kubernetes/helm/centos/helm.spec index 6afe901ef..f4f56e31c 100644 --- a/kubernetes/helm/centos/helm.spec +++ b/kubernetes/helm/centos/helm.spec @@ -7,7 +7,8 @@ Group: devel Packager: Wind River URL: https://github.com/kubernetes/helm/releases Source0: %{name}-v%{version}-linux-amd64.tar.gz -#Source1: tiller-2.9.1-docker-image.tgz +Source1: helm-upload +Source2: helm.sudo Requires: /bin/bash @@ -20,11 +21,13 @@ Requires: /bin/bash %install install -d %{buildroot}%{_sbindir} install -m 755 ${RPM_BUILD_DIR}/linux-amd64/helm %{buildroot}%{_sbindir}/helm -#install -d %{buildroot}%{_sharedstatedir}/tiller -#install -m 400 %{SOURCE1} %{buildroot}%{_sharedstatedir}/tiller/tiller-2.9.1-docker-image.tgz +install -d %{buildroot}/usr/local/sbin +install -m 755 %{SOURCE1} %{buildroot}/usr/local/sbin/helm-upload +install -d %{buildroot}%{_sysconfdir}/sudoers.d +install -m 440 %{SOURCE2} %{buildroot}%{_sysconfdir}/sudoers.d/helm %files %defattr(-,root,root,-) %{_sbindir}/helm -#%{_sharedstatedir}/tiller/tiller-2.9.1-docker-image.tgz - +/usr/local/sbin/helm-upload +%{_sysconfdir}/sudoers.d/helm diff --git a/monitoring/collectd-extensions/src/mtce_notifier.py b/monitoring/collectd-extensions/src/mtce_notifier.py index c18977eab..1ffa88a2a 100755 --- a/monitoring/collectd-extensions/src/mtce_notifier.py +++ b/monitoring/collectd-extensions/src/mtce_notifier.py @@ -58,7 +58,7 @@ FAIL = 1 MTCE_CMD_RX_PORT = 2101 # same state message throttle count. -# ... only send the the degrade message every 'this' number +# ... only send the degrade message every 'this' number # while the state of assert or clear remains the same. ONE_EVERY = 10 diff --git a/mwa-sparta.map b/mwa-sparta.map deleted file mode 100644 index 78cf86cf3..000000000 --- a/mwa-sparta.map +++ /dev/null @@ -1,20 +0,0 @@ -cgcs/recipes-3rdparty/python|python -cgcs/recipes-base|base -cgcs/common-bsp/recipes-kernel/linux|kernel-std -cgcs/common-bsp/recipes-kernel/linux-rt|kernel-rt -cgcs/recipes-cgi|cgi -cgcs/recipes-connectivity|connectivity -cgcs/recipes-core|core -cgcs/recipes-devtools|devtools -cgcs/recipes-extended|extended -cgcs/recipes-kernel|kernel -cgcs/recipes-networking|networking -cgcs/recipes-power|power -cgcs/recipes-restapi-doc/restapi-doc|restapi-doc -cgcs/recipes-security|security -cgcs/recipes-support|support -avs/drivers/mellanox/libibverbs|mellanox/libibverbs -avs/drivers/mellanox/libmlx4|mellanox/libmlx4 -avs/drivers/mellanox/libmlx5|mellanox/libmlx5 -avs/drivers/mellanox/mlnx-ofa_kernel|mellanox/mlnx-ofa_kernel -avs/drivers/mellanox/rdma-core|mellanox/rdma-core diff --git a/networking/mellanox/libibverbs/centos/meta_patches/0001-Update-package-versioning-for-TIS-format.patch b/networking/mellanox/libibverbs/centos/meta_patches/0001-Update-package-versioning-for-TIS-format.patch index 7c13df876..2e617d9e6 100644 --- a/networking/mellanox/libibverbs/centos/meta_patches/0001-Update-package-versioning-for-TIS-format.patch +++ b/networking/mellanox/libibverbs/centos/meta_patches/0001-Update-package-versioning-for-TIS-format.patch @@ -16,8 +16,8 @@ index e55433c..74cb4d2 100644 Name: libibverbs Version: 41mlnx1 --Release: OFED.4.2.1.0.6.42120 -+Release: OFED.4.2.1.0.6.42120%{?_tis_dist}.%{tis_patch_ver} +-Release: OFED.4.3.2.1.6.43302 ++Release: OFED.4.3.2.1.6.43302%{?_tis_dist}.%{tis_patch_ver} Summary: A library for direct userspace use of RDMA (InfiniBand/iWARP) hardware Group: System Environment/Libraries diff --git a/networking/mellanox/libibverbs/centos/srpm_path b/networking/mellanox/libibverbs/centos/srpm_path index c58521323..7cd73d918 100644 --- a/networking/mellanox/libibverbs/centos/srpm_path +++ b/networking/mellanox/libibverbs/centos/srpm_path @@ -1 +1 @@ -repo:stx/downloads/libibverbs-41mlnx1-OFED.4.2.1.0.6.42120.src.rpm +repo:stx/downloads/libibverbs-41mlnx1-OFED.4.3.2.1.6.43302.src.rpm diff --git a/networking/openvswitch/centos/build_srpm.data b/networking/openvswitch/centos/build_srpm.data index 6e36f5d7f..b98158431 100644 --- a/networking/openvswitch/centos/build_srpm.data +++ b/networking/openvswitch/centos/build_srpm.data @@ -1,3 +1,3 @@ COPY_LIST="files/*" -TIS_PATCH_VER=0 +TIS_PATCH_VER=1 BUILD_IS_SLOW=12 diff --git a/networking/openvswitch/centos/meta_patches/0007-enable-mlx-pmds.patch b/networking/openvswitch/centos/meta_patches/0007-enable-mlx-pmds.patch new file mode 100644 index 000000000..a20174ede --- /dev/null +++ b/networking/openvswitch/centos/meta_patches/0007-enable-mlx-pmds.patch @@ -0,0 +1,45 @@ +diff --git a/SOURCES/x86_64-native-linuxapp-gcc-config b/SOURCES/x86_64-native-linuxapp-gcc-config +index f81d420..eab161c 100644 +--- a/SOURCES/x86_64-native-linuxapp-gcc-config ++++ b/SOURCES/x86_64-native-linuxapp-gcc-config +@@ -197,12 +197,12 @@ CONFIG_RTE_LIBRTE_FM10K_DEBUG_DRIVER=n + CONFIG_RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE=y + CONFIG_RTE_LIBRTE_FM10K_INC_VECTOR=y + # Compile burst-oriented Mellanox ConnectX-3 (MLX4) PMD +-CONFIG_RTE_LIBRTE_MLX4_PMD=n ++CONFIG_RTE_LIBRTE_MLX4_PMD=y + CONFIG_RTE_LIBRTE_MLX4_DEBUG=n + CONFIG_RTE_LIBRTE_MLX4_DEBUG_BROKEN_VERBS=n + CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE=8 + # Compile burst-oriented Mellanox ConnectX-4 & ConnectX-5 (MLX5) PMD +-CONFIG_RTE_LIBRTE_MLX5_PMD=n ++CONFIG_RTE_LIBRTE_MLX5_PMD=y + CONFIG_RTE_LIBRTE_MLX5_DEBUG=n + CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE=8 + # Compile burst-oriented Broadcom PMD driver +diff --git a/SPECS/openvswitch.spec b/SPECS/openvswitch.spec +index 29255d5..f392e95 100644 +--- a/SPECS/openvswitch.spec ++++ b/SPECS/openvswitch.spec +@@ -155,6 +155,7 @@ BuildRequires: libcap-ng libcap-ng-devel + %ifarch %{dpdkarches} + # DPDK driver dependencies + BuildRequires: zlib-devel libpcap-devel numactl-devel ++BuildRequires: rdma-core-devel + Requires: python-pyelftools + + # Virtual provide for depending on DPDK-enabled OVS +@@ -356,7 +357,12 @@ cd - + --dpdk \ + < rhel/usr_lib_systemd_system_ovs-vswitchd.service.in \ + > rhel/usr_lib_systemd_system_ovs-vswitchd.service +-make %{?_smp_mflags} ++make %{?_smp_mflags} \ ++%if %{with dpdk} ++%ifarch %{dpdkarches} ++ LDFLAGS="-libverbs -lmlx4 -lmlx5" ++%endif ++%endif + + %install + rm -rf $RPM_BUILD_ROOT diff --git a/networking/openvswitch/centos/meta_patches/PATCH_ORDER b/networking/openvswitch/centos/meta_patches/PATCH_ORDER index 12dcf854c..1c6551c98 100644 --- a/networking/openvswitch/centos/meta_patches/PATCH_ORDER +++ b/networking/openvswitch/centos/meta_patches/PATCH_ORDER @@ -4,3 +4,4 @@ 0004-add-pmon-conf-files.patch 0005-log-rotation-config.patch 0006-rpm-check-with-condition.patch +0007-enable-mlx-pmds.patch diff --git a/restapi-doc/centos/build_srpm.data b/restapi-doc/centos/build_srpm.data index deff7763a..e74282c78 100644 --- a/restapi-doc/centos/build_srpm.data +++ b/restapi-doc/centos/build_srpm.data @@ -3,5 +3,5 @@ COPY_LIST="$SRC_DIR/* \ $CGCS_BASE/downloads/mvn.repo.tgz \ " -TIS_PATCH_VER=26 +TIS_PATCH_VER=27 BUILD_IS_SLOW=3 diff --git a/restapi-doc/centos/restapi-doc.spec b/restapi-doc/centos/restapi-doc.spec index 11655f56b..c4d18f8f2 100644 --- a/restapi-doc/centos/restapi-doc.spec +++ b/restapi-doc/centos/restapi-doc.spec @@ -1,6 +1,6 @@ Summary: RestAPI-Doc Name: restapi-doc -Version: 1.9.0 +Version: 1.9.1 Release: %{tis_patch_ver}%{?_tis_dist} License: Apache-2.0 Group: devel diff --git a/restapi-doc/restapi-doc/Makefile b/restapi-doc/restapi-doc/Makefile index 05e9fbc6f..a8997f0af 100644 --- a/restapi-doc/restapi-doc/Makefile +++ b/restapi-doc/restapi-doc/Makefile @@ -1,5 +1,5 @@ # increment this every release -API_VERSION := "1.9.0" +API_VERSION := "1.9.1" build: @git status > /dev/null ; \ diff --git a/restapi-doc/restapi-doc/README.mvn_cache b/restapi-doc/restapi-doc/README.mvn_cache index d91e0f804..1e2e235f9 100644 --- a/restapi-doc/restapi-doc/README.mvn_cache +++ b/restapi-doc/restapi-doc/README.mvn_cache @@ -5,11 +5,10 @@ Steps to produce mvn.repo.tgz [Maven cache] cd $MY_REPO/stx/stx-integ/restapi-doc/restapi-doc cp Makefile Makefile.backup cp Makefile.mvn_cache Makefile -build_srpms restapi-doc +build-srpms restapi-doc mock -r $MY_BUILD_CFG_STD "FILE_NAME_TO_THE_BUILT_SRPM" -mock -r $MY_BUILD_CFG_STD --copyout /builddir/build/BUILD/restapi-doc-1.6.0/mvn.repo.tgz ~/ +mock -r $MY_BUILD_CFG_STD --copyout /builddir/build/BUILD/restapi-doc-1.9.1/mvn.repo.tgz ~/ cp ~/mvn.repo.tgz $MY_REPO/stx/downloads/ -cd $MY_REPO/stx/downloads/ # only the first time # ln -s ../../../downloads/mvn.repo.tgz mvn.repo.tgz diff --git a/restapi-doc/restapi-doc/api-ref-guides/pom.xml b/restapi-doc/restapi-doc/api-ref-guides/pom.xml index eed2b9c7e..f2669c716 100644 --- a/restapi-doc/restapi-doc/api-ref-guides/pom.xml +++ b/restapi-doc/restapi-doc/api-ref-guides/pom.xml @@ -2,7 +2,7 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> + + + + + + + +GET'> +PUT'> +POST'> +DELETE'> +]> + + + Titanium Fault Management API v1 + API Reference + + + + + + + Wind River + + + + 2018 + Wind River + + Titanium Cloud + + + + Copyright details are filled in by the + template. + + + + + + diff --git a/restapi-doc/restapi-doc/api-ref-guides/src/bk-api-ref-smapi-v1.xml b/restapi-doc/restapi-doc/api-ref-guides/src/bk-api-ref-smapi-v1.xml new file mode 100644 index 000000000..a3ce010fe --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref-guides/src/bk-api-ref-smapi-v1.xml @@ -0,0 +1,57 @@ + + + + + + + + + +GET'> +PUT'> +POST'> +DELETE'> +]> + + + Service Management API v1 + API Reference + + + + + + + Wind River + + + + 2018 + Wind River + + Titanium Cloud + + + + Copyright details are filled in by the + template. + + + + + + diff --git a/restapi-doc/restapi-doc/api-ref-guides/src/bk-api-ref.xml b/restapi-doc/restapi-doc/api-ref-guides/src/bk-api-ref.xml index 1a6b0f9a4..299f16579 100644 --- a/restapi-doc/restapi-doc/api-ref-guides/src/bk-api-ref.xml +++ b/restapi-doc/restapi-doc/api-ref-guides/src/bk-api-ref.xml @@ -1,6 +1,6 @@ diff --git a/restapi-doc/restapi-doc/api-ref/src/docbkx/api-ref-fm-v1.xml b/restapi-doc/restapi-doc/api-ref/src/docbkx/api-ref-fm-v1.xml new file mode 100644 index 000000000..fca013489 --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/docbkx/api-ref-fm-v1.xml @@ -0,0 +1,30 @@ + + + + + Titanium Fault Management API v1 + + 2017 + Wind River + + + + + + + + + + diff --git a/restapi-doc/restapi-doc/api-ref/src/docbkx/api-ref-smapi-v1.xml b/restapi-doc/restapi-doc/api-ref/src/docbkx/api-ref-smapi-v1.xml new file mode 100644 index 000000000..ad1b3eed4 --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/docbkx/api-ref-smapi-v1.xml @@ -0,0 +1,30 @@ + + + + + Titanium Service Management API v1 + + 2018 + Wind River + + + + + + + + + + diff --git a/restapi-doc/restapi-doc/api-ref/src/docbkx/ch_fm-api-v1.xml b/restapi-doc/restapi-doc/api-ref/src/docbkx/ch_fm-api-v1.xml new file mode 100644 index 000000000..2884f048b --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/docbkx/ch_fm-api-v1.xml @@ -0,0 +1,110 @@ + + + + Fault Management API v1 + The API supports alarm and event collection of the cloud platform itself. + The typical port used for the FM REST API is 18002. + However, proper technique would be to look up the FM service endpoint in Keystone. + + + + + + +
+ API versions + + + + + + + + +
+ + + + + +
+ Alarms + These APIs allow the display of the Active Alarms + in the system. + + + + + + + + + + + + + + +
+ + + + + + +
+ Event Log + These APIs allow the display of the Event Log + in the system. The Event log contains both historical alarms and customer logs. + + + + + + + + +
+ + + + + + +
+ Event Suppression + These APIs allow the display of the Event Suppression state + in the system. + + + + + + + + +
+ +
diff --git a/restapi-doc/restapi-doc/api-ref/src/docbkx/ch_smapi-v1.xml b/restapi-doc/restapi-doc/api-ref/src/docbkx/ch_smapi-v1.xml new file mode 100644 index 000000000..8c83ab76e --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/docbkx/ch_smapi-v1.xml @@ -0,0 +1,104 @@ + + + + SM API v1 + Interact with Service Management + The typical port used for the SM REST API is 7777. + However, proper technique would be to look up the smapi service endpoint in Keystone. + + + + + + +
+ API versions + + + + + + + + +
+ + + + + +
+ Services + These APIs allow the display of the services running + and their attributes + + + + + + + + + + + + +
+ + + + + +
+ Service Nodes + These APIs allow the display of the service nodes + and their attributes + + + + + + + + +
+ + + + + +
+ Service Groups + These APIs allow the display of the service groups + and their attributes + + + + + + + + +
+ +
diff --git a/restapi-doc/restapi-doc/api-ref/src/docbkx/ch_sysinv-api-v1.xml b/restapi-doc/restapi-doc/api-ref/src/docbkx/ch_sysinv-api-v1.xml index 0d0aa76b7..3983dfbf0 100644 --- a/restapi-doc/restapi-doc/api-ref/src/docbkx/ch_sysinv-api-v1.xml +++ b/restapi-doc/restapi-doc/api-ref/src/docbkx/ch_sysinv-api-v1.xml @@ -16,8 +16,8 @@ SPDX-License-Identifier: Apache-2.0 SysInv API v1 Manage physical servers with the Titanium System Inventory API. This includes inventory collection and configuration of nodes, ports, interfaces, CPUs, disks, - partitions, memory, and sensors. The API also supports alarm collection for fault - events of the cloud itself as well as configuration of the cloud's SNMP interface. + partitions, memory, and sensors. The API also supports configuration of the + cloud's SNMP interface. The typical port used for the SysInv REST API is 6385. However, proper technique would be to look up the sysinv service endpoint in Keystone. @@ -633,77 +633,6 @@ configuration entity for the system. - - - - -
- Alarms - These APIs allow the display of the Active Alarms - in the system. - - - - - - - - - - - - - - -
- - - - - - -
- Event Log - These APIs allow the display of the Event Log - in the system. The Event log contains both historical alarms and customer logs. - - - - - - - - -
- - - - - - -
- Event Suppression - These APIs allow the display of the Event Suppression state - in the system. - - - - - - - - -
- - @@ -1429,4 +1358,26 @@ configuration entity for the system. + + + + +
+ Labels + + + + + + + + + + + +
+ diff --git a/restapi-doc/restapi-doc/api-ref/src/docbkx/itemizedlist-service-list.xml b/restapi-doc/restapi-doc/api-ref/src/docbkx/itemizedlist-service-list.xml index cb86f8045..4516ec933 100644 --- a/restapi-doc/restapi-doc/api-ref/src/docbkx/itemizedlist-service-list.xml +++ b/restapi-doc/restapi-doc/api-ref/src/docbkx/itemizedlist-service-list.xml @@ -1,6 +1,6 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + '> + + + + + + '> + + + + + + '> + + + + + '> + + + + + + '> + + + + + + '> + + + + + + + The universally unique identifier for this object. + + + + + For convenience, resources contain links to themselves. + This allows a client to easily obtain rather than construct + resource URIs. The following types of link relations are + associated with resources: a self link containing a versioned + link to the resource, and a bookmark link containing a permanent + link to a resource that is appropriate for long term storage. + + + + + The time when the object was created. + + + + + The time when the object was last updated. + + + '> + + + + + + + The alarm ID; each type of alarm has a unique ID. Note + the alarm_id and the entity_instance_id uniquely identify + an alarm instance. + + + + + The instance of the object raising alarm. A . separated list + of sub-entity-type=instance-value pairs, representing the containment + structure of the overall entity instance. Note + the alarm_id and the entity_instance_id uniquely identify + an alarm instance. + + + + + The text description of the alarm. + + + + + The severity of the alarm; critical, + major, minor, or warning. + + + + + The time in UTC at which the alarm has last been updated. + + + + + The unique identifier of the alarm. + + + '> + + + + The state of the alarm; set or clear + + + + + Indicates whether the alarm affects the service. + + + + + The proposed action to clear the alarm. + + + + + The type of the alarm. + + + + + The type of the object raising the alarm. A . separated list + of sub-entity-type, representing the containment structure of the + overall entity type. + + + + + The probable cause of the alarm. + + + + + Indicates whether suppression of the specific alarm is allowed. + + + '> + + + + UUID of the system. + + + + + Overall system status based on alarms present; critical, + degraded, or OK. + + + + + Count of critical alarms on the system + + + + + Count of major alarms on the system + + + + + Count of minor alarms on the system + + + + + Count of warnings on the system + + + '> + + + + + + + The event log ID; each type of event log has a unique ID. Note + the event_log_id and the entity_instance_id uniquely identify + an event log instance. + + + + + The state of the event; set, clear or log + + + + + The instance of the object generating the event log. A . separated list + of sub-entity-type=instance-value pairs, representing the containment + structure of the overall entity instance. Note + the event_log_id and the entity_instance_id uniquely identify + an event log instance. + + + + + The text description of the event log. + + + + + The severity of the event log; critical, + major, minor or warning. + + + + + The time in UTC at which the event log has last been updated. + + + + + The unique identifier of the event log. + + + + + The next attribute is the request to use to get the next n + items. It is used to paginate the event log list. + + + '> + + + + The state of the event; set, clear or log + + + + + Indicates whether the event affects the service. + + + + + The proposed action to clear the event. + + + + + The type of the event. + + + + + The type of the object raising the alarm. A . separated list + of sub-entity-type, representing the containment structure of the + overall entity type. + + + + + The probable cause of the event. + + + + + Indicates whether suppression of the specific event is allowed. + + + '> + + + + + + + The alarm ID type (event ID type) that can be suppressed or unsuppressed. + + + + + + The text description of the event type. + + + + + The suppression status for the event ID type; suppressed or unsuppressed + + + '> + + + + GET'> + PUT'> + POST'> + DELETE'> diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/fm-api/v1/fm-api-v1.wadl b/restapi-doc/restapi-doc/api-ref/src/wadls/fm-api/v1/fm-api-v1.wadl new file mode 100644 index 000000000..07689fe08 --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/fm-api/v1/fm-api-v1.wadl @@ -0,0 +1,469 @@ + + + +%common;]> + + + + + + + + + + + + + + + + + + + + + + + + + + + The unique identifier of an existing active alarm. + + + + + + + + + + + + + + + + + + + + + + + + The unique identifier of an event log. + + + + + + + + + + + + + + + + + The unique identifier of an event suppression. + + + + + + + + + + + + + + + + API version details. + + + + + + + + + + + + + + + + + Lists information about all Fault Management API versions. + + + + + + + + + &commonFaults; &getFaults; + + + + + Shows details for Fault Management API v1. + + + + + + + + + &commonFaults; &getFaults; + + + + + + + + + + + + + + + Lists all active alarms based on specified query. + The supported query options are alarm_id, entity_type_id, entity_instance_id, + severity and alarm_type. + + + + + + + + This optional parameter when set to true (include_suppress=true) specifies + to include suppressed alarms in output. + + + + + + + + + + + + + + + + The list of active alarms based on the specified query. + + + &alarmListShowParameters; + + + + + + + + &commonFaults; &getFaults; + + + + + + Shows information about a specific alarm. + + + + + + + &alarmListShowParameters; + &alarmDetailShowParameters; + &commonListShowParameters; + + + + + + + + &commonFaults; &getFaults; + + + + + + Deletes a specific alarm. + NOTE Typically this command should NOT be used. I.e typically + alarms will be and should be cleared by the system + when the alarm condition clears. This command is only provided + in the event that the alarm has cleared but for some reason the + system has not removed the alarm. + + + + + + + + + + + + + Summarize all active alarms by severity. + + + + + + + + This optional parameter when set to true (include_suppress=true) specifies + to include suppressed alarms in the summations (default false). + + + + + + + + + &alarmSummaryShowParameters; + + + + + + + + &commonFaults; + + + + + + + + + + + + + Lists all event logs (historical alarms and customer logs) based on specified query. The logs + are returned in reverse chronological order. + The supported query options are event_log_id, entity_type_id, entity_instance_id, + severity, event_log_type, start and end. + + + + + + + + This parameter specifies filter rules for the logs to + be returned. + + + + + This parameter specifies the maximum number of event logs to + be returned. + + + + + This optional parameter when set to true (alarms=true) specifies + that only alarm event log records should be returned. + + + + + This optional parameter when set to true (logs=true) specifies + that only customer log records should be returned. + + + + + This optional parameter when set to true (include_suppress=true) specifies + to include suppressed alarms in output. + + + + + + + + + + + + + + + + The list of events log based on the specified query. + + + &eventLogListShowParameters; + + + + + + + + &commonFaults; &getFaults; + + + + + Shows information about a specific event log. + + + + + + + &eventLogListShowParameters; + &commonListShowParameters; + + + + + + + + &commonFaults; &getFaults; + + + + + + + + + + + + + Lists suppressed event id's. + + + + + + + + + The list of suppressed event types. + + + &EventSuppressionListShowParameters; + &commonListShowParameters; + + + + + + + + &commonFaults; &getFaults; + + + + + Modifies the value of an event suppression. + + + + + + + + The suppression status of an event suppression; suppressed or unsuppressed + + + + + + + + + + + + + + + + URIs to the modified event suppression. + + + + &EventSuppressionListShowParameters; + &commonListShowParameters; + + + + + + + + &postPutFaults; + + + + diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_group_list-response.json b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_group_list-response.json new file mode 100644 index 000000000..737ce14a7 --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_group_list-response.json @@ -0,0 +1,81 @@ +{ + "sm_servicegroup":[ + { + "status":"", + "name":"controller", + "service_group_name":"web-services", + "node_name":"controller-1", + "state":"active", + "desired_state":"active", + "id":1, + "condition":"", + "uuid":"e3aa5e50-030b-4ab6-a339-929f0be50e5d" + }, + { + "status":"", + "name":"controller", + "service_group_name":"directory-services", + "node_name":"controller-1", + "state":"active", + "desired_state":"active", + "id":2, + "condition":"", + "uuid":"f7b01783-ea3d-44b8-8dd3-9a0c4a1cae9d" + }, + { + "status":"", + "name":"controller", + "service_group_name":"patching-services", + "node_name":"controller-1", + "state":"active", + "desired_state":"active", + "id":3, + "condition":"", + "uuid":"f64bc693-62fa-4f31-b96e-9851c42669ec" + }, + { + "status":"", + "name":"controller", + "service_group_name":"vim-services", + "node_name":"controller-1", + "state":"active", + "desired_state":"active", + "id":4, + "condition":"", + "uuid":"e7dab99d-7bdc-4756-b8b3-b069e7b26e0d" + }, + { + "status":"", + "name":"controller", + "service_group_name":"cloud-services", + "node_name":"controller-1", + "state":"active", + "desired_state":"active", + "id":5, + "condition":"", + "uuid":"149e9f4e-13ba-4d91-9e0e-09905073fda6" + }, + { + "status":"", + "name":"controller", + "service_group_name":"controller-services", + "node_name":"controller-1", + "state":"active", + "desired_state":"active", + "id":6, + "condition":"", + "uuid":"54d46994-9c0e-43bd-8d83-be7396f04f70" + }, + { + "status":"", + "name":"controller", + "service_group_name":"oam-services", + "node_name":"controller-1", + "state":"active", + "desired_state":"active", + "id":7, + "condition":"", + "uuid":"f7b532bf-0dc0-41bd-b38a-75b7747da754" + } + ] +} diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_group_show-response.json b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_group_show-response.json new file mode 100644 index 000000000..538d18436 --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_group_show-response.json @@ -0,0 +1,11 @@ +{ + "status":"", + "name":"controller", + "service_group_name":"oam-services", + "node_name":"controller-1", + "state":"active", + "desired_state":"active", + "id":7, + "condition":"", + "uuid":"f7b532bf-0dc0-41bd-b38a-75b7747da754" +} diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_list-response.json b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_list-response.json new file mode 100644 index 000000000..a9f2199ee --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_list-response.json @@ -0,0 +1,39 @@ +{ + "services":[ + { + "status":"", + "state":"enabled-active", + "id":5, + "desired_state":"enabled-active", + "name":"drbd-cgcs" + }, + { + "status":"", + "state":"enabled-active", + "id":3, + "desired_state":"enabled-active", + "name":"drbd-pg" + }, + { + "status":"", + "state":"enabled-active", + "id":4, + "desired_state":"enabled-active", + "name":"drbd-rabbit" + }, + { + "status":"", + "state":"enabled-active", + "id":2, + "desired_state":"enabled-active", + "name":"management-ip" + }, + { + "status":"", + "state":"enabled-active", + "id":1, + "desired_state":"enabled-active", + "name":"oam-ip" + } + ] +} diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_node_list-response.json b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_node_list-response.json new file mode 100644 index 000000000..e8da62855 --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_node_list-response.json @@ -0,0 +1,20 @@ +{ + "nodes":[ + { + "administrative_state":"unlocked", + "ready_state":"disabled", + "name":"controller-0", + "operational_state":"disabled", + "availability_status":"unknown", + "id":2 + }, + { + "administrative_state":"unlocked", + "ready_state":"enabled", + "name":"controller-1", + "operational_state":"enabled", + "availability_status":"available", + "id":1 + } + ] +} diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_node_show-response.json b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_node_show-response.json new file mode 100644 index 000000000..fbff27b50 --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_node_show-response.json @@ -0,0 +1,8 @@ +{ + "administrative_state":"unlocked", + "ready_state":"enabled", + "name":"controller-1", + "operational_state":"enabled", + "availability_status":"available", + "id":1 +} diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_parameter_list-response.json b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_parameter_list-response.json new file mode 100644 index 000000000..dfe3af2f5 --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_parameter_list-response.json @@ -0,0 +1,73 @@ +{ + "parameters":[ + { + "uuid":"7694eca1-21e0-4998-bf2c-15f71b3bddc5", + "links":[ + { + "href":"http://10.10.10.2:6385/v1/parameters/7694eca1-21e0-4998-bf2c-15f71b3bddc5", + "rel":"self" + }, + { + "href":"http://10.10.10.2:6385/parameters/7694eca1-21e0-4998-bf2c-15f71b3bddc5", + "rel":"bookmark" + } + ], + "section":"assignment", + "value":"keystone.assignment.backends.sql.Assignment", + "service":"identity", + "name":"driver" + }, + { + "uuid":"5eeebd50-4809-4d2e-b4ce-1acd9cfeadab", + "links":[ + { + "href":"http://10.10.10.2:6385/v1/parameters/5eeebd50-4809-4d2e-b4ce-1acd9cfeadab", + "rel":"self" + }, + { + "href":"http://10.10.10.2:6385/parameters/5eeebd50-4809-4d2e-b4ce-1acd9cfeadab", + "rel":"bookmark" + } + ], + "section":"identity", + "value":"keystone.identity.backends.sql.Identity", + "service":"identity", + "name":"driver" + }, + { + "uuid":"b84378ae-6e0a-48f0-b394-f8a519fc14f4", + "links":[ + { + "href":"http://10.10.10.2:6385/v1/parameters/b84378ae-6e0a-48f0-b394-f8a519fc14f4", + "rel":"self" + }, + { + "href":"http://10.10.10.2:6385/parameters/b84378ae-6e0a-48f0-b394-f8a519fc14f4", + "rel":"bookmark" + } + ], + "section":"resource", + "value":"keystone.resource.backends.sql.Resource", + "service":"identity", + "name":"driver" + }, + { + "uuid":"6634285f-428e-4ebe-becd-cbb0ab7f30ad", + "links":[ + { + "href":"http://10.10.10.2:6385/v1/parameters/6634285f-428e-4ebe-becd-cbb0ab7f30ad", + "rel":"self" + }, + { + "href":"http://10.10.10.2:6385/parameters/6634285f-428e-4ebe-becd-cbb0ab7f30ad", + "rel":"bookmark" + } + ], + "section":"role", + "value":"keystone.assignment.role_backends.sql.Role", + "service":"identity", + "name":"driver" + } + ] +} + diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_parameter_show-response.json b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_parameter_show-response.json new file mode 100644 index 000000000..ad1b80bd6 --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_parameter_show-response.json @@ -0,0 +1,20 @@ +{ + "uuid":"fd5e5e4c-2723-430a-b162-b06b49d94313", + "links":[ + { + "href":"http://192.168.204.2:6385/v1/parameters/fd5e5e4c-2723-430a-b162-b06b49d94313", + "rel":"self" + }, + { + "href":"http://192.168.204.2:6385/parameters/fd5e5e4c-2723-430a-b162-b06b49d94313", + "rel":"bookmark" + } + ], + "section":"identity", + "updated_at":"2015-12-23T19:07:41.257052+00:00", + "value":"keystone.identity.backends.sql.Identity", + "service":"identity", + "created_at":"2015-12-23T18:54:53.676200+00:00", + "name":"driver" +} + diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_show-response.json b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_show-response.json new file mode 100644 index 000000000..d37f86a3c --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/service_show-response.json @@ -0,0 +1,7 @@ +{ + "status":"", + "state":"enabled-active", + "id":1, + "desired_state":"enabled-active", + "name":"oam-ip" +} diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/smapi-versions-response.json b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/smapi-versions-response.json new file mode 100644 index 000000000..794c6d56b --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/smapi-versions-response.json @@ -0,0 +1,24 @@ +{ + "default_version": { + "id": "v1", + "links": [ + { + "href": "http://10.10.10.2:7777/v1/", + "rel": "self" + } + ] + }, + "version": [ + { + "id": "v1", + "links": [ + { + "href": "http://10.10.10.2:7777/v1/", + "rel": "self" + } + ] + } + ], + "name": "System Management API", + "description": "System Management API from Wind River" +} diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/versionv1-get-response.json b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/versionv1-get-response.json new file mode 100644 index 000000000..d7984e965 --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/api_samples/versionv1-get-response.json @@ -0,0 +1,59 @@ +{ + "links": [ + { + "href": "http://10.10.10.2:7777/v1/", + "rel": "self" + } + ], + "id": "v1", + "servicenode": [ + { + "href": "http://10.10.10.2:7777/v1/servicenode/", + "rel": "self" + }, + { + "href": "http://10.10.10.2:7777/servicenode/", + "rel": "bookmark" + } + ], + "services": [ + { + "href": "http://10.10.10.2:7777/v1/services/", + "rel": "self" + }, + { + "href": "http://10.10.10.2:7777/services/", + "rel": "bookmark" + } + ], + "sm_sda": [ + { + "href": "http://10.10.10.2:7777/v1/sm_sda/", + "rel": "self" + }, + { + "href": "http://10.10.10.2:7777/sm_sda/", + "rel": "bookmark" + } + ], + "nodes": [ + { + "href": "http://10.10.10.2:7777/v1/nodes/", + "rel": "self" + }, + { + "href": "http://10.10.10.2:7777/nodes/", + "rel": "bookmark" + } + ], + "service_groups": [ + { + "href": "http://10.10.10.2:7777/v1/service_groups/", + "rel": "self" + }, + { + "href": "http://10.10.10.2:7777/service_groups/", + "rel": "bookmark" + } + ] +} \ No newline at end of file diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/common.ent b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/common.ent new file mode 100644 index 000000000..237ee8f9a --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/common.ent @@ -0,0 +1,244 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + '> + + + + + + '> + + + + + + '> + + + + + '> + + + + + + '> + + + + + + '> + + + + + + + The universally unique identifier for this object. + + + + + For convenience, resources contain links to themselves. + This allows a client to easily obtain rather than construct + resource URIs. The following types of link relations are + associated with resources: a self link containing a versioned + link to the resource, and a bookmark link containing a permanent + link to a resource that is appropriate for long term storage. + + + '> + + + + + + + Administrative state of the node. + + + + + The operational state of the node. + + + + + The name of the node. + + + + + The operational state of the node + + + + + The availability status of the node. + + + + + The id of the node. + + + '> + + + + + + The type of host that the service is running on. + + + + + The name of the service group. + + + + + The name of the node that the service is running on. + + + + + The state of the service. + + + + + The uuid of the service group. + + + '> + + + + + + + The operational state of the service. + + + + + The id of the service. + + + + + The desired state of the service + + + + + The name of the service. + + + + + The name of the host which the service is running on. + + + '> + + + + GET'> + PUT'> + POST'> + DELETE'> diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/sm-api-v1.wadl b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/sm-api-v1.wadl new file mode 100644 index 000000000..bce8aa1d4 --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sm-api/v1/sm-api-v1.wadl @@ -0,0 +1,298 @@ + + + +%common;]> + + + + + + + + + + + + + + + + + + + + + + + + + + + The unique identifier of an existing service. + + + + + + + + + The name of an existing service. + + + + + + + + + + + + + + The unique identifier of an existing node. + + + + + + + + + + + + + + + The unique identifier of an existing service group. + + + + + + + + + + + + + + + + + + API version details. + + + + + + + + + + + + + + + + + Lists information about all Titanium Cloud SM API versions. + + + + + + + + + &commonFaults; &getFaults; + + + + + Shows details for SM API v1. + + + + + + + + + &commonFaults; &getFaults; + + + + + + + + + + List all services running. + + + + + + + + The list of services. + + + &serviceListShowParameters; + + + + + + + + &commonFaults; &getFaults; + + + + + + Shows the attributes of a specific service. + + + + + + &serviceListShowParameters; + + + + + + + + &commonFaults; &getFaults; + + + + + + + + + + List all controller nodes in the system. + + + + + + + + The list of controller nodes. + + + &serviceNodeListShowParameters; + + + + + + + + &commonFaults; &getFaults; + + + + + + Shows the attributes of a specific node. + + + + + + &serviceNodeListShowParameters; + + + + + + + + &commonFaults; &getFaults; + + + + + + + + + List all service groups in the system. + + + + + + + + The list of service groups. + + + &serviceGroupListShowParameters; + + + + + + + + &commonFaults; &getFaults; + + + + + + Shows the attributes of a specific service group. + + + + + + &serviceGroupListShowParameters; + + + + + + + + &commonFaults; &getFaults; + + + diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/alarm_list-request.json b/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/alarm_list-request.json deleted file mode 100644 index 4aa914e46..000000000 --- a/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/alarm_list-request.json +++ /dev/null @@ -1 +0,0 @@ -http://192.168.204.2:6385/v1/ialarms?q.field=severity&q.op=eq&q.type=&q.value=major&include_suppress=True diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/event_log_list-request.json b/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/event_log_list-request.json deleted file mode 100644 index f779ac846..000000000 --- a/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/event_log_list-request.json +++ /dev/null @@ -1 +0,0 @@ -http://192.168.204.2:6385/v1/event_log?q.field=start&q.field=end&q.op=eq&q.op=eq&q.type=&q.type=&q.value=2014-11-28T16%3A56%3A44&q.value=2014-11-28T16%3A56%3A45&limit=2 diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/event_suppression_list-request.json b/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/event_suppression_list-request.json deleted file mode 100644 index 0de69f46a..000000000 --- a/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/event_suppression_list-request.json +++ /dev/null @@ -1 +0,0 @@ -http://192.168.204.2:6385/v1/event_suppression diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/host_label_assign-request.json b/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/host_label_assign-request.json new file mode 100644 index 000000000..7afc33d53 --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/host_label_assign-request.json @@ -0,0 +1,4 @@ +{ + "key1": "value1", + "key2": "value2" +} diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/host_label_list-response.json b/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/host_label_list-response.json new file mode 100644 index 000000000..ba30258d2 --- /dev/null +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/host_label_list-response.json @@ -0,0 +1,14 @@ +{ + "labels": [ + { + "uuid": "a7d37730-c58e-4b18-9046-6bd0f4fe03a8", + "host_uuid": "42e30882-ab1a-41b0-9f65-696f6d804888", + "label": "key1=value1" + }, + { + "uuid": "c9d3aca9-d360-406c-80c7-a059404471c1", + "host_uuid": "42e30882-ab1a-41b0-9f65-696f6d804888", + "label": "key2=value2" + } + ] +} diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/sysinv-versions-response.json b/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/sysinv-versions-response.json index 31a632c5b..964df10ec 100644 --- a/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/sysinv-versions-response.json +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/sysinv-versions-response.json @@ -19,6 +19,6 @@ ] } ], - "description":"Titanium Cloud System API allows for the management of physical servers. This includes inventory collection and configuration of hosts, ports, interfaces, CPUs, disk, memory, and system configuration. The API also supports alarms and fault collection for the cloud itself as well as the configuration of the cloud's SNMP interface. ", + "description":"Titanium Cloud System API allows for the management of physical servers. This includes inventory collection and configuration of hosts, ports, interfaces, CPUs, disk, memory, and system configuration. The API also supports the configuration of the cloud's SNMP interface. ", "name":"Titanium SysInv API" } diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/version-get-response.json b/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/version-get-response.json index 31a632c5b..964df10ec 100644 --- a/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/version-get-response.json +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/version-get-response.json @@ -19,6 +19,6 @@ ] } ], - "description":"Titanium Cloud System API allows for the management of physical servers. This includes inventory collection and configuration of hosts, ports, interfaces, CPUs, disk, memory, and system configuration. The API also supports alarms and fault collection for the cloud itself as well as the configuration of the cloud's SNMP interface. ", + "description":"Titanium Cloud System API allows for the management of physical servers. This includes inventory collection and configuration of hosts, ports, interfaces, CPUs, disk, memory, and system configuration. The API also supports the configuration of the cloud's SNMP interface. ", "name":"Titanium SysInv API" } diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/versionv1-get-response.json b/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/versionv1-get-response.json index dbb2ae95a..2ad1ad8d5 100644 --- a/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/versionv1-get-response.json +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/api_samples/versionv1-get-response.json @@ -136,36 +136,6 @@ "rel":"bookmark" } ], - "ialarms":[ - { - "href":"http://128.224.150.54:6385/v1/ialarms/", - "rel":"self" - }, - { - "href":"http://128.224.150.54:6385/ialarms/", - "rel":"bookmark" - } - ], - "event_log":[ - { - "href":"http://128.224.150.54:6385/v1/event_log/", - "rel":"self" - }, - { - "href":"http://128.224.150.54:6385/event_log/", - "rel":"bookmark" - } - ], - "event_suppression":[ - { - "href":"http://128.224.150.54:6385/v1/event_suppression/", - "rel":"self" - }, - { - "href":"http://128.224.150.54:6385/event_suppression/", - "rel":"bookmark" - } - ], "icommunity":[ { "href":"http://128.224.150.54:6385/v1/icommunity/", diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/common.ent b/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/common.ent index 334e56b65..87418597c 100644 --- a/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/common.ent +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/common.ent @@ -168,6 +168,55 @@ The user-specified location of the cloud system. + + + System capabilities. + + + sdn_enabled : (Boolean) Software Defined Networking enabled. + + + region_config : (Boolean) region selection: + + + true : Secondary region. + + + false : Primary region. + + + + + shared_services : Services provided by Primary region. + + + bm_region : Board Management controller network selection: + + + External : OAM network. + + + Internal : Management network. + + + + + cinder_backend : backend selection for Cinder. + + + vswitch_type : vSwitch selection. + + + security_feature : Selection of Spectre and Meltdown mitigation options. + + + https_enabled : (Boolean) selection of https mode for public URLs. + + + + - - - - - The alarm ID; each type of alarm has a unique ID. Note - the alarm_id and the entity_instance_id uniquely identify - an alarm instance. - - - - - The instance of the object raising alarm. A . separated list - of sub-entity-type=instance-value pairs, representing the containment - structure of the overall entity instance. Note - the alarm_id and the entity_instance_id uniquely identify - an alarm instance. - - - - - The text description of the alarm. - - - - - The severity of the alarm; critical, - major, minor, or warning. - - - - - The time in UTC at which the alarm has last been updated. - - - - - The unique identifier of the alarm. - - - '> - - - - The state of the alarm; set or clear - - - - - Indicates whether the alarm affects the service. - - - - - The proposed action to clear the alarm. - - - - - The type of the alarm. - - - - - The type of the object raising the alarm. A . separated list - of sub-entity-type, representing the containment structure of the - overall entity type. - - - - - The probable cause of the alarm. - - - - - Indicates whether suppression of the specific alarm is allowed. - - - '> - - - - UUID of the system. - - - - - Overall system status based on alarms present; critical, - degraded, or OK. - - - - - Count of critical alarms on the system - - - - - Count of major alarms on the system - - - - - Count of minor alarms on the system - - - - - Count of warnings on the system - - - - - '> - - - - - - - The event log ID; each type of event log has a unique ID. Note - the event_log_id and the entity_instance_id uniquely identify - an event log instance. - - - - - The state of the event; set, clear or log - - - - - The instance of the object generating the event log. A . separated list - of sub-entity-type=instance-value pairs, representing the containment - structure of the overall entity instance. Note - the event_log_id and the entity_instance_id uniquely identify - an event log instance. - - - - - The text description of the event log. - - - - - The severity of the event log; critical, - major, minor or warning. - - - - - The time in UTC at which the event log has last been updated. - - - - - The unique identifier of the event log. - - - - - The next attribute is the request to use to get the next n - items. It is used to paginate the event log list. - - - '> - - - - The state of the event; set, clear or log - - - - - Indicates whether the event affects the service. - - - - - The proposed action to clear the event. - - - - - The type of the event. - - - - - The type of the object raising the alarm. A . separated list - of sub-entity-type, representing the containment structure of the - overall entity type. - - - - - The probable cause of the event. - - - - - Indicates whether suppression of the specific event is allowed. - - - '> - - - - - - The alarm ID type (event ID type) that can be suppressed or unsuppressed. - - - - - - The text description of the event type. - - - - - The suppression status for the event ID type; suppressed or unsuppressed - - - '> - - '> + + + + + The universally unique identifier for this object. + + + + + The uuid for the host. + + + + + The label provisioned for the host. + + + '> + GET'> PUT'> diff --git a/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/sysinv-api-v1.wadl b/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/sysinv-api-v1.wadl index cbdd9ab6c..5c4a9f821 100644 --- a/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/sysinv-api-v1.wadl +++ b/restapi-doc/restapi-doc/api-ref/src/wadls/sysinv-api/v1/sysinv-api-v1.wadl @@ -199,6 +199,11 @@ SPDX-License-Identifier: Apache-2.0 + + + + + @@ -520,29 +525,6 @@ SPDX-License-Identifier: Apache-2.0 - - - - - - - - - The unique identifier of an existing active alarm. - - - - - - - - - - - - - - @@ -581,40 +563,6 @@ SPDX-License-Identifier: Apache-2.0 - - - - - - - - - The unique identifier of an event log. - - - - - - - - - - - - - - - - - The unique identifier of an event suppression. - - - - - - - @@ -1060,6 +1008,26 @@ SPDX-License-Identifier: Apache-2.0 + + + + + The unique identifier of an existing host. + + + + + + + + + + The unique identifier of an existing host label. + + + + + @@ -3616,140 +3584,6 @@ SPDX-License-Identifier: Apache-2.0 - - - - - - - - - - - - Lists all active alarms based on specified query. - The supported query options are alarm_id, entity_type_id, entity_instance_id, - severity and alarm_type. - - - - - - - - This optional parameter when set to true (include_suppress=true) specifies - to include suppressed alarms in output. - - - - - - - - - - - - - - - - The list of active alarms based on the specified query. - - - &alarmListShowParameters; - - - - - - - - &commonFaults; &getFaults; - - - - - - Shows information about a specific alarm. - - - - - - - &alarmListShowParameters; - &alarmDetailShowParameters; - &commonListShowParameters; - - - - - - - - &commonFaults; &getFaults; - - - - - - Deletes a specific alarm. - NOTE Typically this command should NOT be used. I.e typically - alarms will be and should be cleared by the system - when the alarm condition clears. This command is only provided - in the event that the alarm has cleared but for some reason the - system has not removed the alarm. - - - - - - - - - - - - - Summarize all active alarms by severity. - - - - - - - - This optional parameter when set to true (include_suppress=true) specifies - to include suppressed alarms in the summations (default false). - - - - - - - - - &alarmSummaryShowParameters; - - - - - - - - &commonFaults; - - - @@ -3980,7 +3814,7 @@ SPDX-License-Identifier: Apache-2.0 name="community" style="plain" type="xsd:string" > - This parameter specifies the the community of which the trap destination is a member. + This parameter specifies the community of which the trap destination is a member. @@ -4570,197 +4404,6 @@ OAM Controller-1 IP Address. &postPutFaults; - - - - - - - - - - Lists all event logs (historical alarms and customer logs) based on specified query. The logs - are returned in reverse chronological order. - The supported query options are event_log_id, entity_type_id, entity_instance_id, - severity, event_log_type, start and end. - - - - - - - - This parameter specifies filter rules for the logs to - be returned. - - - - - This parameter specifies the maximum number of event logs to - be returned. - - - - - This optional parameter when set to true (alarms=true) specifies - that only alarm event log records should be returned. - - - - - This optional parameter when set to true (logs=true) specifies - that only customer log records should be returned. - - - - - This optional parameter when set to true (include_suppress=true) specifies - to include suppressed alarms in output. - - - - - - - - - - - - - - - - The list of events log based on the specified query. - - - &eventLogListShowParameters; - - - - - - - - &commonFaults; &getFaults; - - - - - Shows information about a specific event log. - - - - - - - &eventLogListShowParameters; - &commonListShowParameters; - - - - - - - - &commonFaults; &getFaults; - - - - - - - - - - - - Lists suppressed event id's. - - - - - - - - - The list of suppressed event types. - - - &EventSuppressionListShowParameters; - &commonListShowParameters; - - - - - - - - &commonFaults; &getFaults; - - - - - Modifies the value of an event suppression. - - - - - - - - The suppression status of an event suppression; suppressed or unsuppressed - - - - - - - - - - - - - - - - URIs to the modified event suppression. - - - - &EventSuppressionListShowParameters; - &commonListShowParameters; - - - - - - - - &postPutFaults; - - @@ -7645,4 +7288,75 @@ OAM Controller-1 IP Address. &commonFaults; &getFaults; + + + + + + + + Assign label to a host. + + + + + + + + This parameter specifies the label key value pairs. + + + + + + + + + + + + + + + + + &commonFaults; &postPutFaults; + + + + + List host label. + + + + + + &labelListParameters; + + + + + + + + &commonFaults; &getFaults; + + + + + Remove label from a host. + + + + + + + + + diff --git a/tools/collector/scripts/collect_mask_passwords b/tools/collector/scripts/collect_mask_passwords index b7f0e2461..0dd5fa9ab 100644 --- a/tools/collector/scripts/collect_mask_passwords +++ b/tools/collector/scripts/collect_mask_passwords @@ -13,7 +13,7 @@ for conffile in \ ${COLLECT_NAME_DIR}/etc/aodh/aodh.conf \ ${COLLECT_NAME_DIR}/etc/ceilometer/ceilometer.conf \ ${COLLECT_NAME_DIR}/etc/cinder/cinder.conf \ - ${COLLECT_NAME_DIR}/etc/fm.conf \ + ${COLLECT_NAME_DIR}/etc/fm/fm.conf \ ${COLLECT_NAME_DIR}/etc/glance/glance-api.conf \ ${COLLECT_NAME_DIR}/etc/glance/glance-registry.conf \ ${COLLECT_NAME_DIR}/etc/heat/heat.conf \ diff --git a/tools/engtools/hostdata-collectors/centos/build_srpm.data b/tools/engtools/hostdata-collectors/centos/build_srpm.data index 81d405878..ac9c374eb 100644 --- a/tools/engtools/hostdata-collectors/centos/build_srpm.data +++ b/tools/engtools/hostdata-collectors/centos/build_srpm.data @@ -1,2 +1,2 @@ SRC_DIR="scripts" -TIS_PATCH_VER=1 +TIS_PATCH_VER=2 diff --git a/tools/engtools/hostdata-collectors/scripts/cfg/engtools.conf b/tools/engtools/hostdata-collectors/scripts/cfg/engtools.conf index efe1b9b74..a6c06eac6 100644 --- a/tools/engtools/hostdata-collectors/scripts/cfg/engtools.conf +++ b/tools/engtools/hostdata-collectors/scripts/cfg/engtools.conf @@ -47,10 +47,11 @@ netstats=10 postgres=30 rabbitmq=3600 vswitch=120 +api_requests=5 [AdditionalOptions] # Set this option to Y/N to enable/disable Openstack API GET/POST collection -API_REQUESTS=N +API_REQUESTS=Y # Set this option to Y/N to enable/disable the collection of all services and not just the ones listed below. Note that this hasn't been tested thoroughly ALL_SERVICES=N @@ -74,4 +75,25 @@ STORAGE_SERVICE_LIST=ceph-mon ceph-osd ceph-manager ceph-rest-api RABBITMQ_QUEUE_LIST=notifications.info versioned_notifications.info [CommonServices] -COMMON_SERVICE_LIST=dnsmasq ceilometer-polling haproxy hwmond pmond rmond fsmond sw-patch-agent sysinv-agent syslog-ng hostwd iscsid io-monitor-manager acpid hbsClient logmgmt mtcClient mtcalarmd mtclogd sshd ntpd smartd sm sm-eru sm-watchdog sm-api ceilometer keyring cinder-rtstool tuned polkitd lldpd IPaddr2 dnsmasq systemd-udevd systemd-journald logrotate collectd +COMMON_SERVICE_LIST=dnsmasq ceilometer-polling haproxy hwmond pmond rmond fsmond sw-patch-agent sysinv-agent syslog-ng hostwd iscsid io-monitor-manager acpid hbsClient logmgmt mtcClient mtcalarmd mtclogd sshd ntpd ptp4l phc2sys smartd sm sm-eru sm-watchdog sm-api ceilometer keyring cinder-rtstool tuned polkitd lldpd IPaddr2 dnsmasq systemd-udevd systemd-journald logrotate collectd + +[StaticServices] +STATIC_SERVICE_LIST=occtop memtop schedtop top.sh iostat.sh netstats.sh diskstats.sh memstats.sh filestats.sh ceph.sh postgres.sh rabbitmq.sh vswitch.sh + +[OpenStackServices] +OPEN_STACK_SERVICE_LIST=nova cinder aodh ceilometer heat glance ceph horizon keystone puppet sysinv neutron nova_api postgres panko nova_cell0 magnum ironic murano gnocchi + +[SkipList] +SKIP_LIST=ps top sh curl awk wc sleep lsof cut grep ip tail su + +[ExcludeList] +EXCLUDE_LIST=python python2 bash perl sudo init + +[ApiStatsConstantPorts] +DB_PORT_NUMBER=5432 +RABBIT_PORT_NUMBER=5672 + +# The api stats data structure has three fields: the name displayed in ps -ef, the name displayed in lsof -Pn -i tcp and the specific api port of the service. +[ApiStatsServices] +API_STATS_STRUCTURE=ironic-conductor;ironic-co;|ironic-api;ironic-ap;6485|radosgw-swift;radosgw;8|magnum-conductor;magnum-co;|magnum-api;magnum-ap;9511|murano-api;murano-ap;8082|murano-engine;murano-en;|keystone-public;gunicorn;5000|openstack_dashboard.wsgi;gunicorn;8080|gnocchi-api;gunicorn;8041|aodh-api;gunicorn;8042|panko-api;gunicorn;8977|sysinv-conductor;sysinv-co ;|neutron-server;neutron-s;9696|nova-conductor;nova-cond ;|sysinv-agent;sysinv-ag;|sysinv-api;sysinv-ap;6385|nova-api;nova-api ;18774|cinder-api;cinder-a;8776|glance-api;glance-a;9292|vim;nfv-vim;4545|heat-api;heat-a;8004|heat-engine;heat-e;8004 + diff --git a/tools/engtools/hostdata-collectors/scripts/live_stream.py b/tools/engtools/hostdata-collectors/scripts/live_stream.py index d96773d39..aed8f5520 100644 --- a/tools/engtools/hostdata-collectors/scripts/live_stream.py +++ b/tools/engtools/hostdata-collectors/scripts/live_stream.py @@ -14,6 +14,8 @@ import psutil import fcntl import logging import ConfigParser +import itertools +import six from multiprocessing import Process, cpu_count from subprocess import Popen, PIPE from collections import OrderedDict @@ -1114,60 +1116,50 @@ def collectCpuCount(influx_info, node, ci): except Exception: logging.error("cpu_count collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info())) - -# collect API GET and POST requests/sec -def collectApi(influx_info, node, ci, openstack_svcs): +def collectApiStats(influx_info, node, ci, services, db_port, rabbit_port): logging.basicConfig(filename="/tmp/livestream.log", filemode="a", format="%(asctime)s %(levelname)s %(message)s", level=logging.INFO) logging.info("api_request data starting collection with a collection interval of {}s".format(ci["cpu_count"])) measurement = "api_requests" tags = {"node": node} - openstack_services = openstack_svcs influx_string = "" + lsof_args = ['lsof', '-Pn', '-i', 'tcp'] while True: try: fields = {} - tmp = {} - tmp1 = {} - # get initial values - for s in openstack_services: - fields[s] = {"get": 0, "post": 0} - tmp[s] = {"get": 0, "post": 0} - log = "/var/log/{0}/{0}-api.log".format(s) - if os.path.exists(log): - if s == "ceilometer": - p = Popen("awk '/INFO/ && /500/' {} | wc -l".format(log), shell=True, stdout=PIPE) - else: - p = Popen("awk '/INFO/ && /GET/' {} | wc -l".format(log), shell=True, stdout=PIPE) - init_api_get = int(p.stdout.readline()) - tmp[s]["get"] = init_api_get - p.kill() - p = Popen("awk '/INFO/ && /POST/' {} | wc -l".format(log), shell=True, stdout=PIPE) - init_api_post = int(p.stdout.readline()) - tmp[s]["post"] = init_api_post - p.kill() - time.sleep(1) - # get new values - for s in openstack_services: - tmp1[s] = {"get": 0, "post": 0} - log = "/var/log/{0}/{0}-api.log".format(s) - if os.path.exists(log): - if s == "ceilometer": - p = Popen("awk '/INFO/ && /500/' {} | wc -l".format(log), shell=True, stdout=PIPE) - else: - p = Popen("awk '/INFO/ && /GET/' {} | wc -l".format(log), shell=True, stdout=PIPE) - api_get = int(p.stdout.readline()) - tmp1[s]["get"] = api_get - p.kill() - p = Popen("awk '/INFO/ && /POST/' {} | wc -l".format(log), shell=True, stdout=PIPE) - api_post = int(p.stdout.readline()) - tmp1[s]["post"] = api_post - p.kill() - # take difference - for key in fields: - if (key in tmp and key in tmp1) and (tmp1[key]["get"] >= tmp[key]["get"]) and (tmp1[key]["post"] >= tmp[key]["post"]): - fields[key]["get"] = (tmp1[key]["get"] - tmp[key]["get"]) - fields[key]["post"] = (tmp1[key]["post"] - tmp[key]["post"]) - influx_string += "{},'{}'='{}','{}'='{}' '{}'='{}','{}'='{}'".format(measurement, "node", tags["node"], "service", key, "get_requests", fields[key]["get"], "post_requests", fields[key]["post"]) + "\n" + lsof_result = Popen(lsof_args, shell=False, stdout=PIPE) + lsof_lines = list() + while True: + line = lsof_result.stdout.readline().strip("\n") + if not line: + break + lsof_lines.append(line) + lsof_result.kill() + for name, service in services.iteritems(): + pid_list = list() + check_pid = False + if name == "keystone-public": + check_pid = True + ps_result = Popen("pgrep -f --delimiter=' ' keystone-public", shell=True, stdout=PIPE) + pid_list = ps_result.stdout.readline().strip().split(' ') + ps_result.kill() + elif name == "gnocchi-api": + check_pid = True + ps_result = Popen("pgrep -f --delimiter=' ' gnocchi-api", shell=True, stdout=PIPE) + pid_list = ps_result.stdout.readline().strip().split(' ') + ps_result.kill() + api_count = 0 + db_count = 0 + rabbit_count = 0 + for line in lsof_lines: + if service['name'] is not None and service['name'] in line and (not check_pid or any(pid in line for pid in pid_list)): + if service['api-port'] is not None and service['api-port'] in line: + api_count += 1 + elif db_port is not None and db_port in line: + db_count += 1 + elif rabbit_port is not None and rabbit_port in line: + rabbit_count += 1 + fields[name] = {"api": api_count, "db": db_count, "rabbit": rabbit_count} + influx_string += "{},'{}'='{}','{}'='{}' '{}'='{}','{}'='{}','{}'='{}'".format(measurement, "node", tags["node"], "service", name, "api", fields[name]["api"], "db", fields[name]["db"], "rabbit", fields[name]["rabbit"]) + "\n" p = Popen("curl -s -o /dev/null 'http://'{}':'{}'/write?db='{}'' --data-binary '{}'".format(influx_info[0], influx_info[1], influx_info[2], influx_string), shell=True) p.communicate() influx_string = "" @@ -1177,7 +1169,6 @@ def collectApi(influx_info, node, ci, openstack_svcs): logging.error("api_request collection stopped unexpectedly with error: {}. Restarting process...".format(sys.exc_info())) time.sleep(3) - # returns the cores dedicated to platform use def getPlatformCores(node, cpe): if cpe is True or node.startswith("compute"): @@ -1347,12 +1338,7 @@ if __name__ == "__main__": common_services = list() services = {} live_svc = ("live_stream.py",) - static_svcs = ("occtop", "memtop", "schedtop", "top.sh", "iostat.sh", "netstats.sh", "diskstats.sh", "memstats.sh", "filestats.sh", "ceph.sh", "postgres.sh", "rabbitmq.sh", "vswitch.sh") collection_intervals = {"memtop": None, "memstats": None, "occtop": None, "schedtop": None, "load_avg": None, "cpu_count": None, "diskstats": None, "iostat": None, "filestats": None, "netstats": None, "postgres": None, "rabbitmq": None, "vswitch": None} - openstack_services = ("nova", "cinder", "aodh", "ceilometer", "heat", "glance", "ceph", "horizon", "keystone", "puppet", "sysinv", "neutron", "nova_api", "postgres", "panko", "nova_cell0", "magnum", "ironic", "murano", "gnocchi") - # memstats, schedtop, and filestats must skip/exclude certain fields when collect_all is enabled. No need to collect this stuff - exclude_list = ("python", "python2", "bash", "perl", "sudo", "init") - skip_list = ("ps", "top", "sh", "", "curl", "awk", "wc", "sleep", "lsof", "cut", "grep", "ip", "tail", "su") duration = None unconverted_duration = "" collect_api_requests = False @@ -1423,12 +1409,27 @@ if __name__ == "__main__": storage_services = tuple(config.get("StorageServices", "STORAGE_SERVICE_LIST").split()) rabbit_services = tuple(config.get("RabbitmqServices", "RABBITMQ_QUEUE_LIST").split()) common_services = tuple(config.get("CommonServices", "COMMON_SERVICE_LIST").split()) + static_svcs = tuple(config.get("StaticServices", "STATIC_SERVICE_LIST").split()) + openstack_services = tuple(config.get("OpenStackServices", "OPEN_STACK_SERVICE_LIST").split()) + skip_list = tuple(config.get("SkipList", "SKIP_LIST").split()) + exclude_list = tuple(config.get("ExcludeList", "EXCLUDE_LIST").split()) # get collection intervals for i in config.options("Intervals"): if config.get("Intervals", i) == "" or config.get("Intervals", i) is None: collection_intervals[i] = None else: collection_intervals[i] = int(config.get("Intervals", i)) + # get api-stats services + DB_PORT_NUMBER = config.get("ApiStatsConstantPorts", "DB_PORT_NUMBER") + RABBIT_PORT_NUMBER = config.get("ApiStatsConstantPorts", "RABBIT_PORT_NUMBER") + SERVICES = OrderedDict() + SERVICES_INFO = tuple(config.get("ApiStatsServices", "API_STATS_STRUCTURE").split('|')) + for service_string in SERVICES_INFO: + service_tuple = tuple(service_string.split(';')) + if service_tuple[2] != "" and service_tuple[2] != None: + SERVICES[service_tuple[0]] = {'name': service_tuple[1], 'api-port': service_tuple[2]} + else: + SERVICES[service_tuple[0]] = {'name': service_tuple[1], 'api-port': None} except Exception: print "An error has occurred when parsing the engtools.conf configuration file: {}".format(sys.exc_info()) sys.exit(0) @@ -1551,7 +1552,7 @@ if __name__ == "__main__": tasks.append(p) p.start() if collect_api_requests is True and node_type == "controller": - p = Process(target=collectApi, args=(influx_info, node, collection_intervals, openstack_services), name="api_requests") + p = Process(target=collectApiStats, args=(influx_info, node, collection_intervals, SERVICES, DB_PORT_NUMBER, RABBIT_PORT_NUMBER), name="api_requests") tasks.append(p) p.start() diff --git a/virt/libvirt/libvirt-2.0.0/libvirt.logrotate b/virt/libvirt/libvirt-2.0.0/libvirt.logrotate deleted file mode 100644 index a60915995..000000000 --- a/virt/libvirt/libvirt-2.0.0/libvirt.logrotate +++ /dev/null @@ -1,14 +0,0 @@ -/var/log/libvirt/libvirtd.log -{ - nodateext - size 10M - start 1 - rotate 20 - missingok - notifempty - compress - sharedscripts - postrotate - /etc/init.d/syslog reload > /dev/null 2>&1 || true - endscript -} diff --git a/virt/libvirt/libvirt-2.0.0/libvirt.lxc b/virt/libvirt/libvirt-2.0.0/libvirt.lxc deleted file mode 100644 index 81ea6210b..000000000 --- a/virt/libvirt/libvirt-2.0.0/libvirt.lxc +++ /dev/null @@ -1,15 +0,0 @@ -/var/log/libvirt/lxc/*.log -{ - nodateext - size 10M - start 1 - rotate 20 - missingok - notifempty - compress - sharedscripts - postrotate - /etc/init.d/syslog reload > /dev/null 2>&1 || true - endscript -} - diff --git a/virt/libvirt/libvirt-2.0.0/libvirt.qemu b/virt/libvirt/libvirt-2.0.0/libvirt.qemu deleted file mode 100644 index 470ef8cda..000000000 --- a/virt/libvirt/libvirt-2.0.0/libvirt.qemu +++ /dev/null @@ -1,15 +0,0 @@ -/var/log/libvirt/qemu/*.log -{ - nodateext - size 10M - start 1 - rotate 4 - missingok - notifempty - compress - sharedscripts - postrotate - /etc/init.d/syslog reload > /dev/null 2>&1 || true - endscript -} - diff --git a/virt/libvirt/libvirt-2.0.0/libvirt.uml b/virt/libvirt/libvirt-2.0.0/libvirt.uml deleted file mode 100644 index 1c26219f0..000000000 --- a/virt/libvirt/libvirt-2.0.0/libvirt.uml +++ /dev/null @@ -1,15 +0,0 @@ -/var/log/libvirt/uml/*.log -{ - nodateext - size 10M - start 1 - rotate 4 - missingok - notifempty - compress - sharedscripts - postrotate - /etc/init.d/syslog reload > /dev/null 2>&1 || true - endscript -} -