# Copyright (c) 2020-2023 Wind River Systems, Inc. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. # See the License for the specific language governing permissions and # limitations under the License. # import collections from datetime import datetime import functools import os import random import re import threading import time from eventlet.green import subprocess from oslo_log import log as logging from oslo_utils import timeutils from dccommon import consts from dccommon import exceptions from dccommon.exceptions import PlaybookExecutionFailed from dccommon.exceptions import PlaybookExecutionTimeout from dccommon.subprocess_cleanup import kill_subprocess_group from dccommon.subprocess_cleanup import SubprocessCleanup from dcorch.common.i18n import _ LOG = logging.getLogger(__name__) ANSIBLE_PASSWD_PARMS = ['ansible_ssh_pass', 'ansible_become_pass'] SCRIPT_PASSWD_PARMS = ['sysadmin_password', 'password'] # Gap, in seconds, to determine whether the given token is about to expire # These values are used to randomize the token early renewal duration and # to distribute the new keystone creation to different audit cycles STALE_TOKEN_DURATION_MIN = 300 STALE_TOKEN_DURATION_MAX = 480 STALE_TOKEN_DURATION_STEP = 20 # Exitcode from 'timeout' command on timeout: TIMEOUT_EXITCODE = 124 LAST_SW_VERSION_IN_CENTOS = "22.06" class memoized(object): """Decorator. Caches a function's return value each time it is called. If called later with the same arguments, the cached value is returned (not reevaluated). WARNING: This function should not be used for class methods since it does not provide weak references; thus would prevent the instance from being garbage collected. """ def __init__(self, func): self.func = func self.cache = {} def __call__(self, *args): if not isinstance(args, collections.Hashable): # uncacheable. a list, for instance. # better to not cache than blow up. return self.func(*args) if args in self.cache: return self.cache[args] else: value = self.func(*args) self.cache[args] = value return value def __repr__(self): '''Return the function's docstring.''' return self.func.__doc__ def __get__(self, obj, objtype): '''Support instance methods.''' return functools.partial(self.__call__, obj) class RunAnsible(object): """Class to run Ansible playbooks with the abort option Approach: At the start of the playbook execution, the abort status (default value is False) and PID of the subprocess for the specified subcloud are set on the class variable dict (abort_status). When the user sends the abort command, the subcloud_manager changes the abort status to True and the subprocess is killed. If Ansible is currently executing a task that cannot be interrupted, a deploy_not_abortable flag is created in the overrides folder by the playbook itself, and the abort process will wait for said flag to be deleted before killing the subprocess. If the task fails while abort is waiting, the playbook_failed flag will indicate to the original process to raise PlaybookExecutionFailed. """ abort_status = {} lock = threading.Lock() def _unregister_subcloud(self, subcloud_name): with RunAnsible.lock: if RunAnsible.abort_status.get(subcloud_name): del RunAnsible.abort_status[subcloud_name] def run_abort(self, subcloud_name, timeout=600): """Set abort status for a subcloud. :param subcloud_name: Name of the subcloud param timeout: Timeout in seconds. """ with RunAnsible.lock: RunAnsible.abort_status[subcloud_name]['abort'] = True unabortable_flag = os.path.join(consts.ANSIBLE_OVERRIDES_PATH, '.%s_deploy_not_abortable' % subcloud_name) subp = RunAnsible.abort_status[subcloud_name]['subp'] while os.path.exists(unabortable_flag) and timeout > 0: time.sleep(1) timeout -= 1 kill_subprocess_group(subp) return True def exec_playbook(self, log_file, playbook_command, subcloud_name, timeout=None, register_cleanup=True): """Run ansible playbook via subprocess. :param log_file: Logs output to file :param timeout: Timeout in seconds. Raises PlaybookExecutionTimeout on timeout :param register_cleanup: Register the subprocess group for cleanup on shutdown, if the underlying service supports cleanup. """ exec_env = os.environ.copy() exec_env["ANSIBLE_LOG_PATH"] = "/dev/null" aborted = False if timeout: timeout_log_str = " (timeout: %ss)" % timeout else: timeout_log_str = '' with open(log_file, "a+") as f_out_log: try: logged_playbook_command = \ _strip_password_from_command(playbook_command) txt = "%s Executing playbook command%s: %s\n" \ % (datetime.today().strftime('%Y-%m-%d-%H:%M:%S'), timeout_log_str, logged_playbook_command) f_out_log.write(txt) f_out_log.flush() # Remove unabortable flag created by the playbook # if present from previous executions unabortable_flag = os.path.join(consts.ANSIBLE_OVERRIDES_PATH, '.%s_deploy_not_abortable' % subcloud_name) if os.path.exists(unabortable_flag): os.remove(unabortable_flag) subp = subprocess.Popen(playbook_command, stdout=f_out_log, stderr=f_out_log, env=exec_env, start_new_session=register_cleanup) try: if register_cleanup: SubprocessCleanup.register_subprocess_group(subp) with RunAnsible.lock: RunAnsible.abort_status[subcloud_name] = { 'abort': False, 'subp': subp} subp.wait(timeout) subp_rc = subp.poll() # There are 5 possible outcomes of the subprocess execution: # 1: Playbook completed (process exited) # - playbook_failure is False with subp_rc == 0, # aborted is False, unabortable_flag_exists is False # 2: Playbook was aborted (process killed) # - playbook_failure is False with subp_rc != 0, # aborted is True, unabortable_flag_exists is False # 3: Playbook failed (process exited) # - playbook_failure is True with subp_rc != 0, # aborted is False, unabortable_flag_exists is False # 4: Playbook failed during unabortable task (process exited) # - playbook_failure is True with subp_rc != 0, # aborted is False, unabortable_flag_exists is True # 5: Playbook failed while waiting to be aborted (process exited) # - playbook_failure is True with subp_rc != 0, # aborted is True, unabortable_flag_exists is False with RunAnsible.lock: aborted = RunAnsible.abort_status[subcloud_name]['abort'] unabortable_flag_exists = os.path.exists(unabortable_flag) playbook_failure = (subp_rc != 0 and (not aborted or unabortable_flag_exists)) # Raise PlaybookExecutionFailed if the playbook fails when # on normal conditions (no abort issued) or fails while # waiting for the unabortable flag to be cleared. if playbook_failure: raise PlaybookExecutionFailed(playbook_cmd=playbook_command) except subprocess.TimeoutExpired: kill_subprocess_group(subp) f_out_log.write( "%s TIMEOUT (%ss) - playbook is terminated\n" % (datetime.today().strftime('%Y-%m-%d-%H:%M:%S'), timeout) ) raise PlaybookExecutionTimeout(playbook_cmd=playbook_command, timeout=timeout) finally: f_out_log.flush() if register_cleanup: SubprocessCleanup.unregister_subprocess_group(subp) self._unregister_subcloud(subcloud_name) except PlaybookExecutionFailed: raise except Exception as ex: LOG.error(str(ex)) raise return aborted def _strip_password_from_command(script_command): """Strip out any known password arguments from given command""" logged_command = list() for item in script_command: if not any(parm in item for parm in SCRIPT_PASSWD_PARMS): logged_command.append(item) else: tmpl = item.split() tmpstr = '' for tmp in tmpl: if any(parm in tmp for parm in SCRIPT_PASSWD_PARMS): tmpstr = tmpstr + tmp[:tmp.index('=') + 1] + ' ' else: tmpstr = tmpstr + tmp + ' ' tmpstr = tmpstr[:-1] logged_command.append(tmpstr) return logged_command # TODO(vgluzrom): remove this function and replace all calls # with RunAnsible class def run_playbook(log_file, playbook_command, timeout=None, register_cleanup=True): """Run ansible playbook via subprocess. log_file: Logs output to file timeout: Timeout in seconds. Raises PlaybookExecutionTimeout on timeout register_cleanup: Register the subprocess group for cleanup on shutdown, if the underlying service supports cleanup. """ exec_env = os.environ.copy() exec_env["ANSIBLE_LOG_PATH"] = "/dev/null" if timeout: # Invoke ansible-playbook via the 'timeout' command. # Using --kill-after=5s which will force a kill -9 if the process # hasn't terminated within 5s: timeout_log_str = " (timeout: %ss)" % timeout playbook_command = ["/usr/bin/timeout", "--kill-after=5s", "%ss" % timeout] + playbook_command else: timeout_log_str = '' with open(log_file, "a+") as f_out_log: try: logged_playbook_command = \ _strip_password_from_command(playbook_command) txt = "%s Executing playbook command%s: %s\n" \ % (datetime.today().strftime('%Y-%m-%d-%H:%M:%S'), timeout_log_str, logged_playbook_command) f_out_log.write(txt) f_out_log.flush() if register_cleanup: # Use the same process group / session for all children # This makes it easier to kill the entire process group # on cleanup preexec_fn = os.setsid else: preexec_fn = None # TODO(kmacleod) future considerations: # - In python3, this code can be simplified to use the new # subprocess.run(timeout=val) method or Popen with # subp.wait(timeout=val) # - Beginning with ansible 2.10, we can introduce the # ANSIBLE_TASK_TIMEOUT value to set a task-level timeout. # This is not available in our current version of ansible (2.7.5) subp = subprocess.Popen(playbook_command, stdout=f_out_log, stderr=f_out_log, env=exec_env, preexec_fn=preexec_fn) try: if register_cleanup: SubprocessCleanup.register_subprocess_group(subp) subp.communicate() # wait for process to exit if timeout and subp.returncode == TIMEOUT_EXITCODE: f_out_log.write( "%s TIMEOUT (%ss) - playbook is terminated\n" % (datetime.today().strftime('%Y-%m-%d-%H:%M:%S'), timeout) ) raise PlaybookExecutionTimeout(playbook_cmd=playbook_command, timeout=timeout) if subp.returncode != 0: raise PlaybookExecutionFailed(playbook_cmd=playbook_command) finally: f_out_log.flush() if register_cleanup: SubprocessCleanup.unregister_subprocess_group(subp) except PlaybookExecutionFailed: raise except Exception as ex: LOG.error(str(ex)) raise def is_token_expiring_soon(token, stale_token_duration_min=STALE_TOKEN_DURATION_MIN, stale_token_duration_max=STALE_TOKEN_DURATION_MAX, stale_token_duration_step=STALE_TOKEN_DURATION_STEP): expiry_time = timeutils.normalize_time(timeutils.parse_isotime(token['expires_at'])) duration = random.randrange(stale_token_duration_min, stale_token_duration_max, stale_token_duration_step) if timeutils.is_soon(expiry_time, duration): return True return False def _get_key_from_file(file_contents, key): """Extract value from KEY=VALUE entries. Ignore newline, ignore apostrophe, ignore quotation mark. :param file_contents: contents of file :param key: key to search :return: found value or '' """ r = re.compile('^{}\=[\'\"]*([^\'\"\n]*)'.format(key), re.MULTILINE) match = r.search(file_contents) if match: return match.group(1) else: return '' @memoized def get_os_release(release_file=consts.OS_RELEASE_FILE): """Function to read release information. Ignore newline, ignore apostrophe, ignore quotation mark. :param release_file: file to read from :return: a tuple of (ID, VERSION) """ linux_distro = ('', '') try: with open(release_file, 'r') as f: data = f.read() linux_distro = ( _get_key_from_file(data, 'ID'), _get_key_from_file(data, 'VERSION')) except Exception as e: raise exceptions.DCCommonException( msg=_("Failed to open %s : %s" % (release_file, str(e)))) if linux_distro[0] == '': raise exceptions.DCCommonException( msg=_("Could not determine os type from %s" % release_file)) # Hint: This code is added here to aid future unit test. # Probably running unit tests on a non-supported OS (example at # time of writing: ubuntu), which is perfect, because code reaching # here will fail, and we just identified a place that would split # logic between OSs. The failing tests should mock this function # (get_os_release) for each supported OS. if linux_distro[0] not in consts.SUPPORTED_OS_TYPES: raise exceptions.DCCommonException( msg=_("Unsupported OS detected %s" % linux_distro[0])) return linux_distro def get_os_type(release_file=consts.OS_RELEASE_FILE): return get_os_release(release_file)[0] def is_debian(software_version=None): """Check target version or underlying OS type. Check either the given software_version (e.g. for checking a subcloud, or prestaging operation), or the underlying OS type (for this running instance) """ if software_version: return not is_centos(software_version) return get_os_type() == consts.OS_DEBIAN def is_centos(software_version=None): """Check target version or underlying OS type. Check either the given software_version (e.g. for checking a subcloud, or prestaging operation), or the underlying OS type (for this running instance) """ if software_version: return software_version <= LAST_SW_VERSION_IN_CENTOS return get_os_type() == consts.OS_CENTOS def get_ssl_cert_ca_file(): return os.path.join( consts.SSL_CERT_CA_DIR, consts.CERT_CA_FILE_DEBIAN if is_debian() else consts.CERT_CA_FILE_CENTOS)