Merge "Provide progress update for upload, apply & remove"

This commit is contained in:
Zuul 2018-11-27 19:54:24 +00:00 committed by Gerrit Code Review
commit c548f1ec97
8 changed files with 216 additions and 37 deletions

View File

@ -18,11 +18,16 @@ def _print_application_show(app):
utils.print_dict(ordereddata, wrap=72) utils.print_dict(ordereddata, wrap=72)
def _print_reminder_msg(app_name):
print("Please use 'system application-list' or 'system "
"application-show %s' to view the current progress." % app_name)
def do_application_list(cc, args): def do_application_list(cc, args):
"""List all containerized applications""" """List all containerized applications"""
apps = cc.app.list() apps = cc.app.list()
labels = ['application', 'manifest name', 'manifest file', 'status'] labels = ['application', 'manifest name', 'manifest file', 'status', 'progress']
fields = ['name', 'manifest_name', 'manifest_file', 'status'] fields = ['name', 'manifest_name', 'manifest_file', 'status', 'progress']
utils.print_list(apps, fields, labels, sortby=0) utils.print_list(apps, fields, labels, sortby=0)
@ -59,6 +64,7 @@ def do_application_upload(cc, args):
'tarfile': tarfile} 'tarfile': tarfile}
response = cc.app.upload(data) response = cc.app.upload(data)
_print_application_show(response) _print_application_show(response)
_print_reminder_msg(args.name)
@utils.arg('name', metavar='<app name>', @utils.arg('name', metavar='<app name>',
@ -68,6 +74,7 @@ def do_application_apply(cc, args):
try: try:
response = cc.app.apply(args.name) response = cc.app.apply(args.name)
_print_application_show(response) _print_application_show(response)
_print_reminder_msg(args.name)
except exc.HTTPNotFound: except exc.HTTPNotFound:
raise exc.CommandError('Application not found: %s' % args.name) raise exc.CommandError('Application not found: %s' % args.name)
@ -79,6 +86,7 @@ def do_application_remove(cc, args):
try: try:
response = cc.app.remove(args.name) response = cc.app.remove(args.name)
_print_application_show(response) _print_application_show(response)
_print_reminder_msg(args.name)
except exc.HTTPNotFound: except exc.HTTPNotFound:
raise exc.CommandError('Application not found: %s' % args.name) raise exc.CommandError('Application not found: %s' % args.name)

View File

@ -68,6 +68,9 @@ class KubeApp(base.APIBase):
status = wtypes.text status = wtypes.text
"Represents the installation status of the application" "Represents the installation status of the application"
progress = wtypes.text
"Represents the installation progress of the application"
def __init__(self, **kwargs): def __init__(self, **kwargs):
self.fields = objects.kube_app.fields.keys() self.fields = objects.kube_app.fields.keys()
for k in self.fields: for k in self.fields:
@ -80,7 +83,7 @@ class KubeApp(base.APIBase):
app = KubeApp(**rpc_app.as_dict()) app = KubeApp(**rpc_app.as_dict())
if not expand: if not expand:
app.unset_fields_except(['name', 'manifest_name', app.unset_fields_except(['name', 'manifest_name',
'manifest_file', 'status']) 'manifest_file', 'status', 'progress'])
# skip the id # skip the id
app.id = wtypes.Unset app.id = wtypes.Unset
@ -320,6 +323,7 @@ class KubeAppController(rest.RestController):
"Application-apply rejected: operation is not allowed " "Application-apply rejected: operation is not allowed "
"while the current status is {}.".format(db_app.status))) "while the current status is {}.".format(db_app.status)))
db_app.status = constants.APP_APPLY_IN_PROGRESS db_app.status = constants.APP_APPLY_IN_PROGRESS
db_app.progress = None
db_app.save() db_app.save()
pecan.request.rpcapi.perform_app_apply(pecan.request.context, pecan.request.rpcapi.perform_app_apply(pecan.request.context,
db_app) db_app)
@ -332,6 +336,7 @@ class KubeAppController(rest.RestController):
"Application-remove rejected: operation is not allowed while " "Application-remove rejected: operation is not allowed while "
"the current status is {}.".format(db_app.status))) "the current status is {}.".format(db_app.status)))
db_app.status = constants.APP_REMOVE_IN_PROGRESS db_app.status = constants.APP_REMOVE_IN_PROGRESS
db_app.progress = None
db_app.save() db_app.save()
pecan.request.rpcapi.perform_app_remove(pecan.request.context, pecan.request.rpcapi.perform_app_remove(pecan.request.context,
db_app) db_app)

View File

@ -1507,6 +1507,16 @@ APP_APPLY_OP = 'apply'
APP_REMOVE_OP = 'remove' APP_REMOVE_OP = 'remove'
APP_DELETE_OP = 'delete' APP_DELETE_OP = 'delete'
# Progress constants
APP_PROGRESS_ABORTED = 'operation aborted, check logs for detail'
APP_PROGRESS_APPLY_MANIFEST = 'applying application manifest'
APP_PROGRESS_COMPLETED = 'completed'
APP_PROGRESS_DELETE_MANIFEST = 'deleting application manifest'
APP_PROGRESS_DOWNLOAD_IMAGES = 'retrieving docker images'
APP_PROGRESS_EXTRACT_TARFILE = 'extracting application tar file'
APP_PROGRESS_GENERATE_OVERRIDES = 'generating application overrides'
APP_PROGRESS_VALIDATE_UPLOAD_CHARTS = 'validating and uploading charts'
# Node label operation constants # Node label operation constants
LABEL_ASSIGN_OP = 'assign' LABEL_ASSIGN_OP = 'assign'
LABEL_REMOVE_OP = 'remove' LABEL_REMOVE_OP = 'remove'

View File

@ -1093,6 +1093,10 @@ class SysInvSignalTimeout(SysinvException):
message = "Sysinv Timeout." message = "Sysinv Timeout."
class KubeAppProgressMonitorTimeout(SysinvException):
message = "Armada execution progress monitor timed out."
class InvalidEndpoint(SysinvException): class InvalidEndpoint(SysinvException):
message = "The provided endpoint is invalid" message = "The provided endpoint is invalid"

View File

@ -10,7 +10,6 @@
""" System Inventory Kubernetes Application Operator.""" """ System Inventory Kubernetes Application Operator."""
import docker import docker
import eventlet
import grp import grp
import os import os
import re import re
@ -22,6 +21,10 @@ import time
import yaml import yaml
from collections import namedtuple from collections import namedtuple
from eventlet import greenpool
from eventlet import greenthread
from eventlet import queue
from eventlet import Timeout
from oslo_config import cfg from oslo_config import cfg
from oslo_log import log as logging from oslo_log import log as logging
from sysinv.common import constants from sysinv.common import constants
@ -44,6 +47,9 @@ CONF.register_opts(kube_app_opts)
ARMADA_CONTAINER_NAME = 'armada_service' ARMADA_CONTAINER_NAME = 'armada_service'
MAX_DOWNLOAD_THREAD = 20 MAX_DOWNLOAD_THREAD = 20
INSTALLATION_TIMEOUT = 3600 INSTALLATION_TIMEOUT = 3600
APPLY_SEARCH_PATTERN = 'Processing Chart,'
DELETE_SEARCH_PATTERN = 'Deleting release'
CONTAINER_ABNORMAL_EXIT_CODE = 137
Chart = namedtuple('Chart', 'name namespace') Chart = namedtuple('Chart', 'name namespace')
@ -73,18 +79,28 @@ class AppOperator(object):
except OSError as e: except OSError as e:
LOG.exception(e) LOG.exception(e)
def _update_app_status(self, app, new_status): def _update_app_status(self, app, new_status=None, new_progress=None):
""" Persist new app status """ """ Persist new app status """
if new_status is None:
new_status = app.status
elif (new_status in [constants.APP_UPLOAD_SUCCESS,
constants.APP_APPLY_SUCCESS]):
new_progress = constants.APP_PROGRESS_COMPLETED
with self._lock: with self._lock:
app.status = new_status app.update_status(new_status, new_progress)
def _abort_operation(self, app, operation): def _abort_operation(self, app, operation):
if (app.status == constants.APP_UPLOAD_IN_PROGRESS): if (app.status == constants.APP_UPLOAD_IN_PROGRESS):
self._update_app_status(app, constants.APP_UPLOAD_FAILURE) self._update_app_status(app, constants.APP_UPLOAD_FAILURE,
constants.APP_PRORESS_ABORTED)
elif (app.status == constants.APP_APPLY_IN_PROGRESS): elif (app.status == constants.APP_APPLY_IN_PROGRESS):
self._update_app_status(app, constants.APP_APPLY_FAILURE) self._update_app_status(app, constants.APP_APPLY_FAILURE,
constants.APP_PROGRESS_ABORTED)
elif (app.status == constants.APP_REMOVE_IN_PROGRESS): elif (app.status == constants.APP_REMOVE_IN_PROGRESS):
self._update_app_status(app, constants.APP_REMOVE_FAILURE) self._update_app_status(app, constants.APP_REMOVE_FAILURE,
constants.APP_PROGRESS_ABORTED)
LOG.error("Application %s aborted!." % operation) LOG.error("Application %s aborted!." % operation)
def _extract_tarfile(self, app): def _extract_tarfile(self, app):
@ -163,10 +179,8 @@ class AppOperator(object):
def _register_embedded_images(self, app): def _register_embedded_images(self, app):
""" """
TODO(tngo): TODO(tngo): When we're ready to support air-gap scenario and private
============= images, the following need to be done:
When we're ready to support air-gapped scenario and private images, the
following need to be done:
a. load the embedded images a. load the embedded images
b. tag and push them to the docker registery on the controller b. tag and push them to the docker registery on the controller
c. find image tag IDs in each chart and replace their values with c. find image tag IDs in each chart and replace their values with
@ -186,9 +200,8 @@ class AppOperator(object):
if app.system_app: if app.system_app:
# Grab the image tags from the overrides. If they don't exist # Grab the image tags from the overrides. If they don't exist
# then mine them from the chart paths. # then mine them from the chart paths.
charts = self._get_list_of_charts(app.mfile_abs)
images_to_download = self._get_image_tags_by_charts(app.charts_dir, images_to_download = self._get_image_tags_by_charts(app.charts_dir,
charts) app.charts)
else: else:
# For custom apps, mine image tags from application path # For custom apps, mine image tags from application path
images_to_download = self._get_image_tags_by_path(app.path) images_to_download = self._get_image_tags_by_path(app.path)
@ -203,7 +216,7 @@ class AppOperator(object):
failed_downloads = [] failed_downloads = []
start = time.time() start = time.time()
pool = eventlet.greenpool.GreenPool(size=threads) pool = greenpool.GreenPool(size=threads)
for tag, rc in pool.imap(self._docker.download_an_image, for tag, rc in pool.imap(self._docker.download_an_image,
images_to_download): images_to_download):
if not rc: if not rc:
@ -437,6 +450,96 @@ class AppOperator(object):
self._helm.remove_helm_chart_overrides(chart.name, self._helm.remove_helm_chart_overrides(chart.name,
chart.namespace) chart.namespace)
def _make_armada_request_with_monitor(self, app, request, overrides_str=None):
"""Initiate armada request with monitoring
This method delegates the armada request to docker helper and starts
a monitoring thread to persist status and progress along the way.
:param app: application data object
:param request: type of request (apply or delete)
:param overrides_str: list of overrides in string format to be applied
"""
def _get_armada_log_stats(pattern, logfile):
"""
TODO(tngo): In the absence of an Armada API that provides the current
status of an apply/delete manifest operation, the progress is derived
from specific log entries extracted from the execution logs. This
inner method is to be replaced with an official API call when
it becomes available.
"""
p1 = subprocess.Popen(['docker', 'exec', ARMADA_CONTAINER_NAME,
'grep', pattern, logfile],
stdout=subprocess.PIPE)
p2 = subprocess.Popen(['awk', '{print $NF}'], stdin=p1.stdout,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p1.stdout.close()
result, err = p2.communicate()
if result:
# Strip out ANSI color code that might be in the text stream
r = re.compile("\x1b\[[0-9;]*m")
result = r.sub('', result)
matches = result.split()
num_chart_processed = len(matches)
last_chart_processed = matches[num_chart_processed - 1]
if '=' in last_chart_processed:
last_chart_processed = last_chart_processed.split('=')[1]
return last_chart_processed, num_chart_processed
return None, None
def _check_progress(monitor_flag, app, pattern, logfile):
""" Progress monitoring task, to be run in a separate thread """
LOG.info("Starting progress monitoring thread for app %s" % app.name)
try:
with Timeout(INSTALLATION_TIMEOUT,
exception.KubeAppProgressMonitorTimeout()):
while True:
try:
monitor_flag.get_nowait()
LOG.debug("Received monitor stop signal for %s" % app.name)
monitor_flag.task_done()
break
except queue.Empty:
last, num = _get_armada_log_stats(pattern, logfile)
if last:
if app.system_app:
# helm-toolkit doesn't count
percent = \
round(float(num) / (len(app.charts) - 1) * 100)
else:
percent = round(float(num) / len(app.charts) * 100)
progress_str = 'processing chart: ' + last +\
', overall completion: ' + str(percent) + '%'
if app.progress != progress_str:
LOG.info("%s" % progress_str)
self._update_app_status(
app, new_progress=progress_str)
greenthread.sleep(1)
except Exception as e:
# timeout or subprocess error
LOG.exception(e)
finally:
LOG.info("Exiting progress monitoring thread for app %s" % app.name)
# Body of the outer method
mqueue = queue.Queue()
rc = True
logfile = app.name + '-' + request + '.log'
if request == constants.APP_APPLY_OP:
pattern = APPLY_SEARCH_PATTERN
else:
pattern = DELETE_SEARCH_PATTERN
monitor = greenthread.spawn_after(1, _check_progress, mqueue, app,
pattern, logfile)
rc = self._docker.make_armada_request(request, app.armada_mfile,
overrides_str, logfile)
mqueue.put('done')
monitor.kill()
return rc
def perform_app_upload(self, rpc_app, tarfile): def perform_app_upload(self, rpc_app, tarfile):
"""Process application upload request """Process application upload request
@ -456,6 +559,8 @@ class AppOperator(object):
# Full extraction of application tarball at /scratch/apps. # Full extraction of application tarball at /scratch/apps.
# Manifest file is placed under /opt/platform/armada # Manifest file is placed under /opt/platform/armada
# which is managed by drbd-sync and visible to Armada. # which is managed by drbd-sync and visible to Armada.
self._update_app_status(
app, new_progress=constants.APP_PROGRESS_EXTRACT_TARFILE)
orig_mode = stat.S_IMODE(os.lstat("/scratch").st_mode) orig_mode = stat.S_IMODE(os.lstat("/scratch").st_mode)
app.tarfile = tarfile app.tarfile = tarfile
self._extract_tarfile(app) self._extract_tarfile(app)
@ -463,6 +568,9 @@ class AppOperator(object):
if not self._docker.make_armada_request('validate', app.armada_mfile): if not self._docker.make_armada_request('validate', app.armada_mfile):
return self._abort_operation(app, constants.APP_UPLOAD_OP) return self._abort_operation(app, constants.APP_UPLOAD_OP)
self._update_app_status(
app, new_progress=constants.APP_PROGRESS_VALIDATE_UPLOAD_CHARTS)
if os.path.isdir(app.charts_dir): if os.path.isdir(app.charts_dir):
self._validate_helm_charts(app) self._validate_helm_charts(app)
# Temporarily allow read and execute access to /scratch so www # Temporarily allow read and execute access to /scratch so www
@ -504,13 +612,15 @@ class AppOperator(object):
overrides_str = '' overrides_str = ''
ready = True ready = True
try: try:
app.charts = self._get_list_of_charts(app.mfile_abs)
if app.system_app: if app.system_app:
charts = self._get_list_of_charts(app.mfile_abs) self._update_app_status(
app, new_progress=constants.APP_PROGRESS_GENERATE_OVERRIDES)
LOG.info("Generating application overrides...") LOG.info("Generating application overrides...")
self._helm.generate_helm_application_overrides( self._helm.generate_helm_application_overrides(
app.name, cnamespace=None, armada_format=True, app.name, cnamespace=None, armada_format=True,
combined=True) combined=True)
overrides_files = self._get_overrides_files(charts) overrides_files = self._get_overrides_files(app.charts)
if overrides_files: if overrides_files:
LOG.info("Application overrides generated.") LOG.info("Application overrides generated.")
# Ensure all chart overrides are readable by Armada # Ensure all chart overrides are readable by Armada
@ -518,18 +628,24 @@ class AppOperator(object):
os.chmod(file, 0644) os.chmod(file, 0644)
overrides_str =\ overrides_str =\
self._generate_armada_overrides_str(overrides_files) self._generate_armada_overrides_str(overrides_files)
self._update_app_status(
app, new_progress=constants.APP_PROGRESS_DOWNLOAD_IMAGES)
self._download_images(app) self._download_images(app)
else: else:
ready = False ready = False
else: else:
# No support for custom app overrides at this point, just # No support for custom app overrides at this point, just
# download the needed images. # download the needed images.
self._update_app_status(
app, new_progress=constants.APP_PROGRESS_DOWNLOAD_IMAGES)
self._download_images(app) self._download_images(app)
if ready: if ready:
if self._docker.make_armada_request('apply', self._update_app_status(
app.armada_mfile, app, new_progress=constants.APP_PROGRESS_APPLY_MANIFEST)
overrides_str): if self._make_armada_request_with_monitor(app,
constants.APP_APPLY_OP,
overrides_str):
self._update_app_status(app, self._update_app_status(app,
constants.APP_APPLY_SUCCESS) constants.APP_APPLY_SUCCESS)
LOG.info("Application (%s) apply completed." % app.name) LOG.info("Application (%s) apply completed." % app.name)
@ -552,7 +668,11 @@ class AppOperator(object):
app = AppOperator.Application(rpc_app) app = AppOperator.Application(rpc_app)
LOG.info("Application (%s) remove started." % app.name) LOG.info("Application (%s) remove started." % app.name)
if self._docker.make_armada_request('delete', app.armada_mfile): app.charts = self._get_list_of_charts(app.mfile_abs)
self._update_app_status(
app, new_progress=constants.APP_PROGRESS_DELETE_MANIFEST)
if self._make_armada_request_with_monitor(app, constants.APP_DELETE_OP):
if app.system_app: if app.system_app:
try: try:
p1 = subprocess.Popen(['kubectl', 'get', 'pods', '-n', p1 = subprocess.Popen(['kubectl', 'get', 'pods', '-n',
@ -628,6 +748,7 @@ class AppOperator(object):
os.path.join(constants.APP_INSTALL_PATH, os.path.join(constants.APP_INSTALL_PATH,
self._kube_app.get('name'), self._kube_app.get('name'),
self._kube_app.get('manifest_file')) self._kube_app.get('manifest_file'))
self.charts = []
@property @property
def name(self): def name(self):
@ -641,9 +762,14 @@ class AppOperator(object):
def status(self): def status(self):
return self._kube_app.get('status') return self._kube_app.get('status')
@status.setter @property
def status(self, new_status): def progress(self):
return self._kube_app.get('progress')
def update_status(self, new_status, new_progress):
self._kube_app.status = new_status self._kube_app.status = new_status
if new_progress:
self._kube_app.progress = new_progress
self._kube_app.save() self._kube_app.save()
@ -696,8 +822,14 @@ class DockerHelper(object):
os.unlink(kube_config) os.unlink(kube_config)
return None return None
def make_armada_request(self, request, manifest_file, overrides_str=''): def make_armada_request(self, request, manifest_file, overrides_str='',
logfile=None):
if logfile is None:
logfile = request + '.log'
rc = True rc = True
try: try:
client = docker.from_env(timeout=INSTALLATION_TIMEOUT) client = docker.from_env(timeout=INSTALLATION_TIMEOUT)
armada_svc = self._start_armada_service(client) armada_svc = self._start_armada_service(client)
@ -710,10 +842,16 @@ class DockerHelper(object):
manifest_file) manifest_file)
else: else:
rc = False rc = False
LOG.error("Validation of the armada manifest %s " if exit_code == CONTAINER_ABNORMAL_EXIT_CODE:
"failed: %s" % (manifest_file, exec_logs)) LOG.error("Failed to validate application manifest %s. "
elif request == 'apply': "Armada service has exited abnormally." %
cmd = 'armada apply --debug ' + manifest_file + overrides_str manifest_file)
else:
LOG.error("Failed to validate application manifest "
"%s: %s." % (manifest_file, exec_logs))
elif request == constants.APP_APPLY_OP:
cmd = "/bin/bash -c 'armada apply --debug " + manifest_file +\
overrides_str + " | tee " + logfile + "'"
LOG.info("Armada apply command = %s" % cmd) LOG.info("Armada apply command = %s" % cmd)
(exit_code, exec_logs) = armada_svc.exec_run(cmd) (exit_code, exec_logs) = armada_svc.exec_run(cmd)
if exit_code == 0: if exit_code == 0:
@ -721,18 +859,29 @@ class DockerHelper(object):
"applied/re-applied." % manifest_file) "applied/re-applied." % manifest_file)
else: else:
rc = False rc = False
LOG.error("Failed to apply application manifest: %s" % if exit_code == CONTAINER_ABNORMAL_EXIT_CODE:
exec_logs) LOG.error("Failed to apply application manifest %s. "
elif request == 'delete': "Armada service has exited abnormally." %
cmd = 'armada delete --debug --manifest ' + manifest_file manifest_file)
else:
LOG.error("Failed to apply application manifest %s: "
"%s." % (manifest_file, exec_logs))
elif request == constants.APP_DELETE_OP:
cmd = "/bin/bash -c 'armada delete --debug --manifest " +\
manifest_file + " | tee " + logfile + "'"
(exit_code, exec_logs) = armada_svc.exec_run(cmd) (exit_code, exec_logs) = armada_svc.exec_run(cmd)
if exit_code == 0: if exit_code == 0:
LOG.info("Application charts were successfully " LOG.info("Application charts were successfully "
"deleted.") "deleted.")
else: else:
rc = False rc = False
LOG.error("Delete the application manifest failed: %s" % if exit_code == CONTAINER_ABNORMAL_EXIT_CODE:
exec_logs) LOG.error("Failed to delete application manifest %s. "
"Armada service has exited abnormally." %
manifest_file)
else:
LOG.error("Failed to delete application manifest %s: "
"%s" % (manifest_file, exec_logs))
else: else:
rc = False rc = False
LOG.error("Unsupported armada request: %s." % request) LOG.error("Unsupported armada request: %s." % request)
@ -751,8 +900,8 @@ class DockerHelper(object):
start = time.time() start = time.time()
try: try:
LOG.info("Image %s download started" % img_tag) LOG.info("Image %s download started" % img_tag)
c = docker.from_env(timeout=INSTALLATION_TIMEOUT) client = docker.from_env(timeout=INSTALLATION_TIMEOUT)
c.images.pull(img_tag) client.images.pull(img_tag)
except Exception as e: except Exception as e:
rc = False rc = False
LOG.error("Image %s download failed: %s" % (img_tag, e)) LOG.error("Image %s download failed: %s" % (img_tag, e))

View File

@ -36,6 +36,7 @@ def upgrade(migrate_engine):
Column('manifest_name', String(255), nullable=False), Column('manifest_name', String(255), nullable=False),
Column('manifest_file', String(255), nullable=True), Column('manifest_file', String(255), nullable=True),
Column('status', String(255), nullable=False), Column('status', String(255), nullable=False),
Column('progress', String(255), nullable=True),
mysql_engine=ENGINE, mysql_engine=ENGINE,
mysql_charset=CHARSET, mysql_charset=CHARSET,

View File

@ -1644,3 +1644,4 @@ class KubeApp(Base):
manifest_name = Column(String(255), nullable=False) manifest_name = Column(String(255), nullable=False)
manifest_file = Column(String(255), nullable=False) manifest_file = Column(String(255), nullable=False)
status = Column(String(255), nullable=False) status = Column(String(255), nullable=False)
progress = Column(String(255), nullable=True)

View File

@ -21,6 +21,7 @@ class KubeApp(base.SysinvObject):
'manifest_name': utils.str_or_none, 'manifest_name': utils.str_or_none,
'manifest_file': utils.str_or_none, 'manifest_file': utils.str_or_none,
'status': utils.str_or_none, 'status': utils.str_or_none,
'progress': utils.str_or_none,
} }
@base.remotable_classmethod @base.remotable_classmethod